diff options
Diffstat (limited to 'src/lib.rs')
| -rw-r--r-- | src/lib.rs | 462 |
1 files changed, 0 insertions, 462 deletions
diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index 7ddb7407..00000000 --- a/src/lib.rs +++ /dev/null @@ -1,462 +0,0 @@ -/*! -`nucleo` is a high level crate that provides a high level matcher API that -provides a highly effective (parallel) matcher worker. It's designed to allow -quickly plugging a fully featured (and faster) fzf/skim like fuzzy matcher into -your TUI application. - -It's designed to run matching on a background threadpool while providing a -snapshot of the last complete match. That means the matcher can update the -results live while the user is typing while never blocking the main UI thread -(beyond a user provided timeout). Nucleo also supports fully concurrent lock-free -(and wait-free) streaming of input items. - -The [`Nucleo`] struct serves as the main API entrypoint for this crate. - -# Status - -Nucleo is used in the helix-editor and therefore has a large user base with lots -or real world testing. The core matcher implementation is considered complete -and is unlikely to see major changes. The `nucleo-matcher` crate is finished and -ready for widespread use, breaking changes should be very rare (a 1.0 release -should not be far away). - -While the high level `nucleo` crate also works well (and is also used in helix), -there are still additional features that will be added in the future. The high -level crate also need better documentation and will likely see a few minor API -changes in the future. - -*/ -use std::ops::{Bound, RangeBounds}; -use std::sync::atomic::{self, AtomicBool, Ordering}; -use std::sync::Arc; -use std::time::Duration; - -use parking_lot::Mutex; -use rayon::ThreadPool; - -use crate::pattern::MultiPattern; -use crate::worker::Worker; -pub use nucleo_matcher::{chars, Config, Matcher, Utf32Str, Utf32String}; - -mod boxcar; -mod par_sort; -pub mod pattern; -mod worker; - -#[cfg(test)] -mod tests; - -/// A match candidate stored in a [`Nucleo`] worker. -pub struct Item<'a, T> { - pub data: &'a T, - pub matcher_columns: &'a [Utf32String], -} - -/// A handle that allows adding new items to a [`Nucleo`] worker. -/// -/// It's internally reference counted and can be cheaply cloned -/// and sent across threads. -pub struct Injector<T> { - items: Arc<boxcar::Vec<T>>, - notify: Arc<(dyn Fn() + Sync + Send)>, -} - -impl<T> Clone for Injector<T> { - fn clone(&self) -> Self { - Injector { - items: self.items.clone(), - notify: self.notify.clone(), - } - } -} - -impl<T> Injector<T> { - /// Appends an element to the list of matched items. - /// This function is lock-free and wait-free. - pub fn push(&self, value: T, fill_columns: impl FnOnce(&T, &mut [Utf32String])) -> u32 { - let idx = self.items.push(value, fill_columns); - (self.notify)(); - idx - } - - /// Appends multiple elements to the list of matched items. - /// This function is lock-free and wait-free. - /// - /// You should favor this function over `push` if at least one of the following is true: - /// - the number of items you're adding can be computed beforehand and is typically larger - /// than 1k - /// - you're able to batch incoming items - /// - you're adding items from multiple threads concurrently (this function results in less - /// contention) - pub fn extend<I>(&self, values: I, fill_columns: impl Fn(&T, &mut [Utf32String])) - where - I: IntoIterator<Item = T> + ExactSizeIterator, - { - self.items.extend(values, fill_columns); - (self.notify)(); - } - - /// Returns the total number of items injected in the matcher. This might - /// not match the number of items in the match snapshot (if the matcher - /// is still running) - pub fn injected_items(&self) -> u32 { - self.items.count() - } - - /// Returns a reference to the item at the given index. - /// - /// # Safety - /// - /// Item at `index` must be initialized. That means you must have observed - /// `push` returning this value or `get` returning `Some` for this value. - /// Just because a later index is initialized doesn't mean that this index - /// is initialized - pub unsafe fn get_unchecked(&self, index: u32) -> Item<'_, T> { - self.items.get_unchecked(index) - } - - /// Returns a reference to the element at the given index. - pub fn get(&self, index: u32) -> Option<Item<'_, T>> { - self.items.get(index) - } -} - -/// An [item](crate::Item) that was successfully matched by a [`Nucleo`] worker. -#[derive(PartialEq, Eq, Debug, Clone, Copy)] -pub struct Match { - pub score: u32, - pub idx: u32, -} - -/// That status of a [`Nucleo`] worker after a match. -#[derive(PartialEq, Eq, Debug, Clone, Copy)] -pub struct Status { - /// Whether the current snapshot has changed. - pub changed: bool, - /// Whether the matcher is still processing in the background. - pub running: bool, -} - -/// A snapshot represent the results of a [`Nucleo`] worker after -/// finishing a [`tick`](Nucleo::tick). -pub struct Snapshot<T: Sync + Send + 'static> { - item_count: u32, - matches: Vec<Match>, - pattern: MultiPattern, - items: Arc<boxcar::Vec<T>>, -} - -impl<T: Sync + Send + 'static> Snapshot<T> { - fn clear(&mut self, new_items: Arc<boxcar::Vec<T>>) { - self.item_count = 0; - self.matches.clear(); - self.items = new_items - } - - fn update(&mut self, worker: &Worker<T>) { - self.item_count = worker.item_count(); - self.pattern.clone_from(&worker.pattern); - self.matches.clone_from(&worker.matches); - if !Arc::ptr_eq(&worker.items, &self.items) { - self.items = worker.items.clone() - } - } - - /// Returns that total number of items - pub fn item_count(&self) -> u32 { - self.item_count - } - - /// Returns the pattern which items were matched against - pub fn pattern(&self) -> &MultiPattern { - &self.pattern - } - - /// Returns that number of items that matched the pattern - pub fn matched_item_count(&self) -> u32 { - self.matches.len() as u32 - } - - /// Returns an iterator over the items that correspond to a subrange of - /// all the matches in this snapshot. - /// - /// # Panics - /// Panics if `range` has a range bound that is larger than - /// the matched item count - pub fn matched_items( - &self, - range: impl RangeBounds<u32>, - ) -> impl ExactSizeIterator<Item = Item<'_, T>> + DoubleEndedIterator + '_ { - // TODO: use TAIT - let start = match range.start_bound() { - Bound::Included(&start) => start as usize, - Bound::Excluded(&start) => start as usize + 1, - Bound::Unbounded => 0, - }; - let end = match range.end_bound() { - Bound::Included(&end) => end as usize + 1, - Bound::Excluded(&end) => end as usize, - Bound::Unbounded => self.matches.len(), - }; - self.matches[start..end] - .iter() - .map(|&m| unsafe { self.items.get_unchecked(m.idx) }) - } - - /// Returns a reference to the item at the given index. - /// - /// # Safety - /// - /// Item at `index` must be initialized. That means you must have observed a - /// match with the corresponding index in this exact snapshot. Observing - /// a higher index is not enough as item indices can be non-contigously - /// initialized - #[inline] - pub unsafe fn get_item_unchecked(&self, index: u32) -> Item<'_, T> { - self.items.get_unchecked(index) - } - - /// Returns a reference to the item at the given index. - /// - /// Returns `None` if the given `index` is not initialized. This function - /// is only guarteed to return `Some` for item indices that can be found in - /// the `matches` of this struct. Both smaller and larger indices may return - /// `None`. - #[inline] - pub fn get_item(&self, index: u32) -> Option<Item<'_, T>> { - self.items.get(index) - } - - /// Return the matches corresponding to this snapshot. - #[inline] - pub fn matches(&self) -> &[Match] { - &self.matches - } - - /// A convenience function to return the [`Item`] corresponding to the - /// `n`th match. - /// - /// Returns `None` if `n` is greater than or equal to the match count. - #[inline] - pub fn get_matched_item(&self, n: u32) -> Option<Item<'_, T>> { - // SAFETY: A match index is guaranteed to corresponding to a valid global index in this - // snapshot. - unsafe { Some(self.get_item_unchecked(self.matches.get(n as usize)?.idx)) } - } -} - -#[repr(u8)] -#[derive(Clone, Copy, PartialEq, Eq)] -enum State { - Init, - /// items have been cleared but snapshot and items are still outdated - Cleared, - /// items are fresh - Fresh, -} - -impl State { - fn matcher_item_refs(self) -> usize { - match self { - State::Cleared => 1, - State::Init | State::Fresh => 2, - } - } - - fn canceled(self) -> bool { - self != State::Fresh - } - - fn cleared(self) -> bool { - self != State::Fresh - } -} - -/// A high level matcher worker that quickly computes matches in a background -/// threadpool. -pub struct Nucleo<T: Sync + Send + 'static> { - // the way the API is build we totally don't actually need these to be Arcs - // but this lets us avoid some unsafe - canceled: Arc<AtomicBool>, - should_notify: Arc<AtomicBool>, - worker: Arc<Mutex<Worker<T>>>, - pool: ThreadPool, - state: State, - items: Arc<boxcar::Vec<T>>, - notify: Arc<(dyn Fn() + Sync + Send)>, - snapshot: Snapshot<T>, - /// The pattern matched by this matcher. To update the match pattern - /// [`MultiPattern::reparse`](`pattern::MultiPattern::reparse`) should be used. - /// Note that the matcher worker will only become aware of the new pattern - /// after a call to [`tick`](Nucleo::tick). - pub pattern: MultiPattern, -} - -impl<T: Sync + Send + 'static> Nucleo<T> { - /// Constructs a new `nucleo` worker threadpool with the provided `config`. - /// - /// `notify` is called everytime new information is available and - /// [`tick`](Nucleo::tick) should be called. Note that `notify` is not - /// debounced, that should be handled by the downstream crate (for example - /// debouncing to only redraw at most every 1/60 seconds). - /// - /// If `None` is passed for the number of worker threads, nucleo will use - /// one thread per hardware thread. - /// - /// Nucleo can match items with multiple orthogonal properties. `columns` - /// indicates how many matching columns each item (and the pattern) has. The - /// number of columns cannot be changed after construction. - pub fn new( - config: Config, - notify: Arc<(dyn Fn() + Sync + Send)>, - num_threads: Option<usize>, - columns: u32, - ) -> Self { - let (pool, worker) = Worker::new(num_threads, config, notify.clone(), columns); - Self { - canceled: worker.canceled.clone(), - should_notify: worker.should_notify.clone(), - items: worker.items.clone(), - pool, - pattern: MultiPattern::new(columns as usize), - snapshot: Snapshot { - matches: Vec::with_capacity(2 * 1024), - pattern: MultiPattern::new(columns as usize), - item_count: 0, - items: worker.items.clone(), - }, - worker: Arc::new(Mutex::new(worker)), - state: State::Init, - notify, - } - } - - /// Returns the total number of active injectors - pub fn active_injectors(&self) -> usize { - Arc::strong_count(&self.items) - - self.state.matcher_item_refs() - - (Arc::ptr_eq(&self.snapshot.items, &self.items)) as usize - } - - /// Returns a snapshot of the current matcher state. - pub fn snapshot(&self) -> &Snapshot<T> { - &self.snapshot - } - - /// Returns an injector that can be used for adding candidates to the matcher. - pub fn injector(&self) -> Injector<T> { - Injector { - items: self.items.clone(), - notify: self.notify.clone(), - } - } - - /// Restart the the item stream. Removes all items and disconnects all - /// previously created injectors from this instance. If `clear_snapshot` - /// is `true` then all items and matched are removed from the [`Snapshot`] - /// immediately. Otherwise the snapshot will keep the current matches until - /// the matcher has run again. - /// - /// # Note - /// - /// The injectors will continue to function but they will not affect this - /// instance anymore. The old items will only be dropped when all injectors - /// were dropped. - pub fn restart(&mut self, clear_snapshot: bool) { - self.canceled.store(true, Ordering::Relaxed); - self.items = Arc::new(boxcar::Vec::with_capacity(1024, self.items.columns())); - self.state = State::Cleared; - if clear_snapshot { - self.snapshot.clear(self.items.clone()); - } - } - - /// Update the internal configuration. - pub fn update_config(&mut self, config: Config) { - self.worker.lock().update_config(config) - } - - // Set whether the matcher should sort search results by score after - // matching. Defaults to true. - pub fn sort_results(&mut self, sort_results: bool) { - self.worker.lock().sort_results(sort_results) - } - - // Set whether the matcher should reverse the order of the input. - // Defaults to false. - pub fn reverse_items(&mut self, reverse_items: bool) { - self.worker.lock().reverse_items(reverse_items) - } - - /// The main way to interact with the matcher, this should be called - /// regularly (for example each time a frame is rendered). To avoid - /// excessive redraws this method will wait `timeout` milliseconds for the - /// worker thread to finish. It is recommend to set the timeout to 10ms. - pub fn tick(&mut self, timeout: u64) -> Status { - self.should_notify.store(false, atomic::Ordering::Relaxed); - let status = self.pattern.status(); - let canceled = status != pattern::Status::Unchanged || self.state.canceled(); - let mut res = self.tick_inner(timeout, canceled, status); - if !canceled { - return res; - } - self.state = State::Fresh; - let status2 = self.tick_inner(timeout, false, pattern::Status::Unchanged); - res.changed |= status2.changed; - res.running = status2.running; - res - } - - fn tick_inner(&mut self, timeout: u64, canceled: bool, status: pattern::Status) -> Status { - let mut inner = if canceled { - self.pattern.reset_status(); - self.canceled.store(true, atomic::Ordering::Relaxed); - self.worker.lock_arc() - } else { - let Some(worker) = self.worker.try_lock_arc_for(Duration::from_millis(timeout)) else { - self.should_notify.store(true, Ordering::Release); - return Status { - changed: false, - running: true, - }; - }; - worker - }; - - let changed = inner.running; - - let running = canceled || self.items.count() > inner.item_count(); - if inner.running { - inner.running = false; - if !inner.was_canceled && !self.state.canceled() { - self.snapshot.update(&inner) - } - } - if running { - inner.pattern.clone_from(&self.pattern); - self.canceled.store(false, atomic::Ordering::Relaxed); - if !canceled { - self.should_notify.store(true, atomic::Ordering::Release); - } - let cleared = self.state.cleared(); - if cleared { - inner.items = self.items.clone(); - } - self.pool - .spawn(move || unsafe { inner.run(status, cleared) }) - } - Status { changed, running } - } -} - -impl<T: Sync + Send> Drop for Nucleo<T> { - fn drop(&mut self) { - // we ensure the worker quits before dropping items to ensure that - // the worker can always assume the items outlive it - self.canceled.store(true, atomic::Ordering::Relaxed); - let lock = self.worker.try_lock_for(Duration::from_secs(1)); - if lock.is_none() { - unreachable!("thread pool failed to shutdown properly") - } - } -} |
