diff options
| author | Ellie Huxtable <ellie@elliehuxtable.com> | 2026-03-16 15:22:49 -0700 |
|---|---|---|
| committer | Ellie Huxtable <ellie@elliehuxtable.com> | 2026-03-16 15:22:49 -0700 |
| commit | 8f9777ce7aecfe1a163a915e3245466b9dd9ac2e (patch) | |
| tree | a878a0495e5f84266b7e36918a9e1a9432b0ddd8 /matcher/src/matrix.rs | |
| download | atuin-8f9777ce7aecfe1a163a915e3245466b9dd9ac2e.zip | |
Squashed 'crates/atuin-nucleo/' content from commit 4253de9f
git-subtree-dir: crates/atuin-nucleo
git-subtree-split: 4253de9faabb4e5c6d81d946a5e35a90f87347ee
Diffstat (limited to 'matcher/src/matrix.rs')
| -rw-r--r-- | matcher/src/matrix.rs | 198 |
1 files changed, 198 insertions, 0 deletions
diff --git a/matcher/src/matrix.rs b/matcher/src/matrix.rs new file mode 100644 index 00000000..a91ed95f --- /dev/null +++ b/matcher/src/matrix.rs @@ -0,0 +1,198 @@ +use std::alloc::{alloc_zeroed, dealloc, handle_alloc_error, Layout}; +use std::marker::PhantomData; +use std::mem::size_of; +use std::panic::{RefUnwindSafe, UnwindSafe}; +use std::ptr::{slice_from_raw_parts_mut, NonNull}; + +use crate::chars::Char; + +const MAX_MATRIX_SIZE: usize = 100 * 1024; // 100*1024 = 100KB + +// these two aren't hard maxima, instead we simply allow whatever will fit into memory +const MAX_HAYSTACK_LEN: usize = 2048; // 64KB +const MAX_NEEDLE_LEN: usize = 2048; // 64KB + +struct MatrixLayout<C: Char> { + haystack_len: usize, + needle_len: usize, + layout: Layout, + haystack_off: usize, + bonus_off: usize, + rows_off: usize, + score_off: usize, + matrix_off: usize, + _phantom: PhantomData<C>, +} +impl<C: Char> MatrixLayout<C> { + fn new(haystack_len: usize, needle_len: usize) -> MatrixLayout<C> { + assert!(haystack_len >= needle_len); + assert!(haystack_len <= u32::MAX as usize); + let mut layout = Layout::from_size_align(0, 1).unwrap(); + let haystack_layout = Layout::array::<C>(haystack_len).unwrap(); + let bonus_layout = Layout::array::<u8>(haystack_len).unwrap(); + let rows_layout = Layout::array::<u16>(needle_len).unwrap(); + let score_layout = Layout::array::<ScoreCell>(haystack_len + 1 - needle_len).unwrap(); + let matrix_layout = + Layout::array::<MatrixCell>((haystack_len + 1 - needle_len) * needle_len).unwrap(); + + let haystack_off; + (layout, haystack_off) = layout.extend(haystack_layout).unwrap(); + let bonus_off; + (layout, bonus_off) = layout.extend(bonus_layout).unwrap(); + let rows_off; + (layout, rows_off) = layout.extend(rows_layout).unwrap(); + let score_off; + (layout, score_off) = layout.extend(score_layout).unwrap(); + let matrix_off; + (layout, matrix_off) = layout.extend(matrix_layout).unwrap(); + MatrixLayout { + haystack_len, + needle_len, + layout, + haystack_off, + bonus_off, + rows_off, + score_off, + matrix_off, + _phantom: PhantomData, + } + } + /// # Safety + /// + /// `ptr` must point at an allocated with MARTIX_ALLOC_LAYOUT + #[allow(clippy::type_complexity)] + unsafe fn fieds_from_ptr( + &self, + ptr: NonNull<u8>, + ) -> ( + *mut [C], + *mut [u8], + *mut [u16], + *mut [ScoreCell], + *mut [MatrixCell], + ) { + let base = ptr.as_ptr(); + let haystack = base.add(self.haystack_off) as *mut C; + let haystack = slice_from_raw_parts_mut(haystack, self.haystack_len); + let bonus = base.add(self.bonus_off); + let bonus = slice_from_raw_parts_mut(bonus, self.haystack_len); + let rows = base.add(self.rows_off) as *mut u16; + let rows = slice_from_raw_parts_mut(rows, self.needle_len); + let cells = base.add(self.score_off) as *mut ScoreCell; + let cells = slice_from_raw_parts_mut(cells, self.haystack_len + 1 - self.needle_len); + let matrix = base.add(self.matrix_off) as *mut MatrixCell; + let matrix = slice_from_raw_parts_mut( + matrix, + (self.haystack_len + 1 - self.needle_len) * self.haystack_len, + ); + (haystack, bonus, rows, cells, matrix) + } +} + +const _SIZE_CHECK: () = { + if size_of::<ScoreCell>() != 8 { + panic!() + } +}; + +// make this act like a u64 +#[repr(align(8))] +#[derive(Clone, Copy, PartialEq, Eq)] +pub(crate) struct ScoreCell { + pub score: u16, + pub consecutive_bonus: u8, + pub matched: bool, +} + +pub(crate) struct MatcherDataView<'a, C: Char> { + pub haystack: &'a mut [C], + // stored as a separate array instead of struct + // to avoid padding since char is too large and u8 too small :/ + pub bonus: &'a mut [u8], + pub current_row: &'a mut [ScoreCell], + pub row_offs: &'a mut [u16], + pub matrix_cells: &'a mut [MatrixCell], +} +#[repr(transparent)] +pub struct MatrixCell(pub(crate) u8); + +impl MatrixCell { + pub fn set(&mut self, p_match: bool, m_match: bool) { + self.0 = p_match as u8 | ((m_match as u8) << 1); + } + + pub fn get(&self, m_matrix: bool) -> bool { + let mask = m_matrix as u8 + 1; + (self.0 & mask) != 0 + } +} + +// we only use this to construct the layout for the slab allocation +#[allow(unused)] +struct MatcherData { + haystack: [char; MAX_HAYSTACK_LEN], + bonus: [u8; MAX_HAYSTACK_LEN], + row_offs: [u16; MAX_NEEDLE_LEN], + scratch_space: [ScoreCell; MAX_HAYSTACK_LEN], + matrix: [u8; MAX_MATRIX_SIZE], +} + +pub(crate) struct MatrixSlab(NonNull<u8>); +unsafe impl Sync for MatrixSlab {} +unsafe impl Send for MatrixSlab {} +impl UnwindSafe for MatrixSlab {} +impl RefUnwindSafe for MatrixSlab {} + +impl MatrixSlab { + pub fn new() -> Self { + let layout = Layout::new::<MatcherData>(); + // safety: the matrix is never zero sized (hardcoded constants) + let ptr = unsafe { alloc_zeroed(layout) }; + let Some(ptr) = NonNull::new(ptr) else { + handle_alloc_error(layout) + }; + MatrixSlab(ptr.cast()) + } + + pub(crate) fn alloc<C: Char>( + &mut self, + haystack_: &[C], + needle_len: usize, + ) -> Option<MatcherDataView<'_, C>> { + let cells = haystack_.len() * needle_len; + if cells > MAX_MATRIX_SIZE + || haystack_.len() > u16::MAX as usize + // ensures that scores never overflow + || needle_len > MAX_NEEDLE_LEN + { + return None; + } + let matrix_layout = MatrixLayout::<C>::new(haystack_.len(), needle_len); + if matrix_layout.layout.size() > size_of::<MatcherData>() { + return None; + } + unsafe { + // safely: this allocation is valid for MATRIX_ALLOC_LAYOUT + let (haystack, bonus, rows, current_row, matrix_cells) = + matrix_layout.fieds_from_ptr(self.0); + // copy haystack before creating references to ensure we don't create + // references to invalid chars (which may or may not be UB) + haystack_ + .as_ptr() + .copy_to_nonoverlapping(haystack as *mut _, haystack_.len()); + Some(MatcherDataView { + haystack: &mut *haystack, + row_offs: &mut *rows, + bonus: &mut *bonus, + current_row: &mut *current_row, + matrix_cells: &mut *matrix_cells, + }) + } + } +} + +impl Drop for MatrixSlab { + fn drop(&mut self) { + unsafe { dealloc(self.0.as_ptr(), Layout::new::<MatcherData>()) }; + } +} |
