1 files changed, 0 insertions, 566 deletions
diff --git a/matcher/src/pattern.rs b/matcher/src/pattern.rs
deleted file mode 100644
index 495feede..00000000
--- a/matcher/src/pattern.rs
+++ /dev/null
@@ -1,566 +0,0 @@
-//! This module provides a slightly higher level API for matching strings.
-
-use std::cmp::Reverse;
-
-use crate::{chars, Matcher, Utf32Str};
-
-#[cfg(test)]
-mod tests;
-
-use crate::Utf32String;
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
-#[non_exhaustive]
-/// How to treat a case mismatch between two characters.
-pub enum CaseMatching {
-    /// Characters never match their case folded version (`a != A`).
-    #[cfg_attr(not(feature = "unicode-casefold"), default)]
-    Respect,
-    /// Characters always match their case folded version (`a == A`).
-    #[cfg(feature = "unicode-casefold")]
-    Ignore,
-    /// Acts like [`Ignore`](CaseMatching::Ignore) if all characters in a pattern atom are
-    /// lowercase and like [`Respect`](CaseMatching::Respect) otherwise.
-    #[default]
-    #[cfg(feature = "unicode-casefold")]
-    Smart,
-}
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
-#[non_exhaustive]
-/// How to handle unicode normalization,
-pub enum Normalization {
-    /// Characters never match their normalized version (`a != ä`).
-    #[cfg_attr(not(feature = "unicode-normalization"), default)]
-    Never,
-    /// Acts like [`Never`](Normalization::Never) if any character in a pattern atom
-    /// would need to be normalized. Otherwise normalization occurs (`a == ä` but `ä != a`).
-    #[default]
-    #[cfg(feature = "unicode-normalization")]
-    Smart,
-}
-
-#[derive(Debug, PartialEq, Eq, Clone, Copy)]
-#[non_exhaustive]
-/// The kind of matching algorithm to run for an atom.
-pub enum AtomKind {
-    /// Fuzzy matching where the needle must match any haystack characters
-    /// (match can contain gaps). This atom kind is used by default if no
-    /// special syntax is used. There is no negated fuzzy matching (too
-    /// many false positives).
-    ///
-    /// See also [`Matcher::fuzzy_match`](crate::Matcher::fuzzy_match).
-    Fuzzy,
-    /// The needle must match a contiguous sequence of haystack characters
-    /// without gaps.  This atom kind is parsed from the following syntax:
-    /// `'foo` and `!foo` (negated).
-    ///
-    /// See also [`Matcher::substring_match`](crate::Matcher::substring_match).
-    Substring,
-    /// The needle must match all leading haystack characters without gaps or
-    /// prefix. This atom kind is parsed from the following syntax: `^foo` and
-    /// `!^foo` (negated).
-    ///
-    /// See also [`Matcher::prefix_match`](crate::Matcher::prefix_match).
-    Prefix,
-    /// The needle must match all trailing haystack characters without gaps or
-    /// postfix. This atom kind is parsed from the following syntax: `foo$` and
-    /// `!foo$` (negated).
-    ///
-    /// See also [`Matcher::postfix_match`](crate::Matcher::postfix_match).
-    Postfix,
-    /// The needle must match all haystack characters without gaps or prefix.
-    /// This atom kind is parsed from the following syntax: `^foo$` and `!^foo$`
-    /// (negated).
-    ///
-    /// See also [`Matcher::exact_match`](crate::Matcher::exact_match).
-    Exact,
-}
-
-/// A single pattern component that is matched with a single [`Matcher`] function
-#[derive(Debug, PartialEq, Eq, Clone)]
-pub struct Atom {
-    /// Whether this pattern atom is a negative match.
-    /// A negative pattern atom will prevent haystacks matching it from
-    /// being matchend. It does not contribute to scoring/indices
-    pub negative: bool,
-    /// The kind of match that this pattern performs
-    pub kind: AtomKind,
-    needle: Utf32String,
-    ignore_case: bool,
-    normalize: bool,
-}
-
-impl Atom {
-    /// Creates a single [`Atom`] from a string by performing unicode
-    /// normalization and case folding (if necessary). Optionally `\ ` can
-    /// be escaped to ` `.
-    pub fn new(
-        needle: &str,
-        case: CaseMatching,
-        normalize: Normalization,
-        kind: AtomKind,
-        escape_whitespace: bool,
-    ) -> Atom {
-        Atom::new_inner(needle, case, normalize, kind, escape_whitespace, false)
-    }
-
-    fn new_inner(
-        needle: &str,
-        case: CaseMatching,
-        normalization: Normalization,
-        kind: AtomKind,
-        escape_whitespace: bool,
-        append_dollar: bool,
-    ) -> Atom {
-        let mut ignore_case;
-        let mut normalize;
-        #[cfg(feature = "unicode-normalization")]
-        {
-            normalize = matches!(normalization, Normalization::Smart);
-        }
-        #[cfg(not(feature = "unicode-normalization"))]
-        {
-            normalize = false;
-        }
-        let needle = if needle.is_ascii() {
-            let mut needle = if escape_whitespace {
-                if let Some((start, rem)) = needle.split_once("\\ ") {
-                    let mut needle = start.to_owned();
-                    for rem in rem.split("\\ ") {
-                        needle.push(' ');
-                        needle.push_str(rem);
-                    }
-                    needle
-                } else {
-                    needle.to_owned()
-                }
-            } else {
-                needle.to_owned()
-            };
-
-            match case {
-                #[cfg(feature = "unicode-casefold")]
-                CaseMatching::Ignore => {
-                    ignore_case = true;
-                    needle.make_ascii_lowercase()
-                }
-                #[cfg(feature = "unicode-casefold")]
-                CaseMatching::Smart => {
-                    ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase())
-                }
-                CaseMatching::Respect => ignore_case = false,
-            }
-            if append_dollar {
-                needle.push('$');
-            }
-            Utf32String::Ascii(needle.into_boxed_str())
-        } else {
-            let mut needle_ = Vec::with_capacity(needle.len());
-            #[cfg(feature = "unicode-casefold")]
-            {
-                ignore_case = matches!(case, CaseMatching::Ignore | CaseMatching::Smart);
-            }
-            #[cfg(not(feature = "unicode-casefold"))]
-            {
-                ignore_case = false;
-            }
-            #[cfg(feature = "unicode-normalization")]
-            {
-                normalize = matches!(normalization, Normalization::Smart);
-            }
-            if escape_whitespace {
-                let mut saw_backslash = false;
-                for mut c in chars::graphemes(needle) {
-                    if saw_backslash {
-                        if c == ' ' {
-                            needle_.push(' ');
-                            saw_backslash = false;
-                            continue;
-                        } else {
-                            needle_.push('\\');
-                        }
-                    }
-                    saw_backslash = c == '\\';
-                    match case {
-                        #[cfg(feature = "unicode-casefold")]
-                        CaseMatching::Ignore => c = chars::to_lower_case(c),
-                        #[cfg(feature = "unicode-casefold")]
-                        CaseMatching::Smart => {
-                            ignore_case = ignore_case && !chars::is_upper_case(c)
-                        }
-                        CaseMatching::Respect => (),
-                    }
-                    match normalization {
-                        #[cfg(feature = "unicode-normalization")]
-                        Normalization::Smart => {
-                            normalize = normalize && chars::normalize(c) == c;
-                        }
-                        Normalization::Never => (),
-                    }
-                    needle_.push(c);
-                }
-            } else {
-                let chars = chars::graphemes(needle).map(|mut c| {
-                    match case {
-                        #[cfg(feature = "unicode-casefold")]
-                        CaseMatching::Ignore => c = chars::to_lower_case(c),
-                        #[cfg(feature = "unicode-casefold")]
-                        CaseMatching::Smart => {
-                            ignore_case = ignore_case && !chars::is_upper_case(c);
-                        }
-                        CaseMatching::Respect => (),
-                    }
-                    match normalization {
-                        #[cfg(feature = "unicode-normalization")]
-                        Normalization::Smart => {
-                            normalize = normalize && chars::normalize(c) == c;
-                        }
-                        Normalization::Never => (),
-                    }
-                    c
-                });
-                needle_.extend(chars);
-            };
-            if append_dollar {
-                needle_.push('$');
-            }
-            Utf32String::Unicode(needle_.into_boxed_slice())
-        };
-        Atom {
-            kind,
-            needle,
-            negative: false,
-            ignore_case,
-            normalize,
-        }
-    }
-
-    /// Parse a pattern atom from a string. Some special trailing and leading
-    /// characters can be used to control the atom kind. See [`AtomKind`] for
-    /// details.
-    pub fn parse(raw: &str, case: CaseMatching, normalize: Normalization) -> Atom {
-        let mut atom = raw;
-        let invert = match atom.as_bytes() {
-            [b'!', ..] => {
-                atom = &atom[1..];
-                true
-            }
-            [b'\\', b'!', ..] => {
-                atom = &atom[1..];
-                false
-            }
-            _ => false,
-        };
-
-        let mut kind = match atom.as_bytes() {
-            [b'^', ..] => {
-                atom = &atom[1..];
-                AtomKind::Prefix
-            }
-            [b'\'', ..] => {
-                atom = &atom[1..];
-                AtomKind::Substring
-            }
-            [b'\\', b'^' | b'\'', ..] => {
-                atom = &atom[1..];
-                AtomKind::Fuzzy
-            }
-            _ => AtomKind::Fuzzy,
-        };
-
-        let mut append_dollar = false;
-        match atom.as_bytes() {
-            [.., b'\\', b'$'] => {
-                append_dollar = true;
-                atom = &atom[..atom.len() - 2]
-            }
-            [.., b'$'] => {
-                kind = if kind == AtomKind::Fuzzy {
-                    AtomKind::Postfix
-                } else {
-                    AtomKind::Exact
-                };
-                atom = &atom[..atom.len() - 1]
-            }
-            _ => (),
-        }
-
-        if invert && kind == AtomKind::Fuzzy {
-            kind = AtomKind::Substring
-        }
-
-        let mut pattern = Atom::new_inner(atom, case, normalize, kind, true, append_dollar);
-        pattern.negative = invert;
-        pattern
-    }
-
-    /// Matches this pattern against `haystack` (using the allocation and configuration
-    /// from `matcher`) and calculates a ranking score. See the [`Matcher`].
-    /// Documentation for more details.
-    ///
-    /// *Note:*  The `ignore_case` setting is overwritten to match the casing of
-    /// each pattern atom.
-    pub fn score(&self, haystack: Utf32Str<'_>, matcher: &mut Matcher) -> Option<u16> {
-        matcher.config.ignore_case = self.ignore_case;
-        matcher.config.normalize = self.normalize;
-        let pattern_score = match self.kind {
-            AtomKind::Exact => matcher.exact_match(haystack, self.needle.slice(..)),
-            AtomKind::Fuzzy => matcher.fuzzy_match(haystack, self.needle.slice(..)),
-            AtomKind::Substring => matcher.substring_match(haystack, self.needle.slice(..)),
-            AtomKind::Prefix => matcher.prefix_match(haystack, self.needle.slice(..)),
-            AtomKind::Postfix => matcher.postfix_match(haystack, self.needle.slice(..)),
-        };
-        if self.negative {
-            if pattern_score.is_some() {
-                return None;
-            }
-            Some(0)
-        } else {
-            pattern_score
-        }
-    }
-
-    /// Matches this pattern against `haystack` (using the allocation and
-    /// configuration from `matcher`), calculates a ranking score and the match
-    /// indices. See the [`Matcher`]. Documentation for more
-    /// details.
-    ///
-    /// *Note:*  The `ignore_case` setting is overwritten to match the casing of
-    /// each pattern atom.
-    ///
-    /// *Note:*  The `indices` vector is not cleared by this function.
-    pub fn indices(
-        &self,
-        haystack: Utf32Str<'_>,
-        matcher: &mut Matcher,
-        indices: &mut Vec<u32>,
-    ) -> Option<u16> {
-        matcher.config.ignore_case = self.ignore_case;
-        matcher.config.normalize = self.normalize;
-        if self.negative {
-            let pattern_score = match self.kind {
-                AtomKind::Exact => matcher.exact_match(haystack, self.needle.slice(..)),
-                AtomKind::Fuzzy => matcher.fuzzy_match(haystack, self.needle.slice(..)),
-                AtomKind::Substring => matcher.substring_match(haystack, self.needle.slice(..)),
-                AtomKind::Prefix => matcher.prefix_match(haystack, self.needle.slice(..)),
-                AtomKind::Postfix => matcher.postfix_match(haystack, self.needle.slice(..)),
-            };
-            pattern_score.is_none().then_some(0)
-        } else {
-            match self.kind {
-                AtomKind::Exact => matcher.exact_indices(haystack, self.needle.slice(..), indices),
-                AtomKind::Fuzzy => matcher.fuzzy_indices(haystack, self.needle.slice(..), indices),
-                AtomKind::Substring => {
-                    matcher.substring_indices(haystack, self.needle.slice(..), indices)
-                }
-                AtomKind::Prefix => {
-                    matcher.prefix_indices(haystack, self.needle.slice(..), indices)
-                }
-                AtomKind::Postfix => {
-                    matcher.postfix_indices(haystack, self.needle.slice(..), indices)
-                }
-            }
-        }
-    }
-
-    /// Returns the needle text that is passed to the matcher. All indices
-    /// produced by the `indices` functions produce char indices used to index
-    /// this text
-    pub fn needle_text(&self) -> Utf32Str<'_> {
-        self.needle.slice(..)
-    }
-    /// Convenience function to easily match (and sort) a (relatively small)
-    /// list of inputs.
-    ///
-    /// *Note* This function is not recommended for building a full fuzzy
-    /// matching application that can match large numbers of matches (like all
-    /// files in a directory) as all matching is done on the current thread,
-    /// effectively blocking the UI. For such applications the high level
-    /// `nucleo` crate can be used instead.
-    pub fn match_list<T: AsRef<str>>(
-        &self,
-        items: impl IntoIterator<Item = T>,
-        matcher: &mut Matcher,
-    ) -> Vec<(T, u16)> {
-        if self.needle.is_empty() {
-            return items.into_iter().map(|item| (item, 0)).collect();
-        }
-        let mut buf = Vec::new();
-        let mut items: Vec<_> = items
-            .into_iter()
-            .filter_map(|item| {
-                self.score(Utf32Str::new(item.as_ref(), &mut buf), matcher)
-                    .map(|score| (item, score))
-            })
-            .collect();
-        items.sort_by_key(|(_, score)| Reverse(*score));
-        items
-    }
-}
-
-fn pattern_atoms(pattern: &str) -> impl Iterator<Item = &str> + '_ {
-    let mut saw_backslash = false;
-    pattern.split(move |c| {
-        saw_backslash = match c {
-            c if c.is_whitespace() && !saw_backslash => return true,
-            '\\' => true,
-            _ => false,
-        };
-        false
-    })
-}
-
-#[derive(Debug, Default)]
-/// A text pattern made up of (potentially multiple) [atoms](crate::pattern::Atom).
-#[non_exhaustive]
-pub struct Pattern {
-    /// The individual pattern (words) in this pattern
-    pub atoms: Vec<Atom>,
-}
-
-impl Pattern {
-    /// Creates a pattern where each word is matched individually (whitespaces
-    /// can be escaped with `\`). Otherwise no parsing is performed (so `$`, `!`,
-    /// `'` and `^` don't receive special treatment). If you want to match the entire
-    /// pattern as a single needle use a single [`Atom`] instead.
-    pub fn new(
-        pattern: &str,
-        case_matching: CaseMatching,
-        normalize: Normalization,
-        kind: AtomKind,
-    ) -> Pattern {
-        let atoms = pattern_atoms(pattern)
-            .filter_map(|pat| {
-                let pat = Atom::new(pat, case_matching, normalize, kind, true);
-                (!pat.needle.is_empty()).then_some(pat)
-            })
-            .collect();
-        Pattern { atoms }
-    }
-    /// Creates a pattern where each word is matched individually (whitespaces
-    /// can be escaped with `\`). And `$`, `!`, `'` and `^` at word boundaries will
-    /// cause different matching behaviour (see [`AtomKind`]). These can be
-    /// escaped with backslash.
-    pub fn parse(pattern: &str, case_matching: CaseMatching, normalize: Normalization) -> Pattern {
-        let atoms = pattern_atoms(pattern)
-            .filter_map(|pat| {
-                let pat = Atom::parse(pat, case_matching, normalize);
-                (!pat.needle.is_empty()).then_some(pat)
-            })
-            .collect();
-        Pattern { atoms }
-    }
-
-    /// Convenience function to easily match (and sort) a (relatively small)
-    /// list of inputs.
-    ///
-    /// *Note* This function is not recommended for building a full fuzzy
-    /// matching application that can match large numbers of matches (like all
-    /// files in a directory) as all matching is done on the current thread,
-    /// effectively blocking the UI. For such applications the high level
-    /// `nucleo` crate can be used instead.
-    pub fn match_list<T: AsRef<str>>(
-        &self,
-        items: impl IntoIterator<Item = T>,
-        matcher: &mut Matcher,
-    ) -> Vec<(T, u32)> {
-        if self.atoms.is_empty() {
-            return items.into_iter().map(|item| (item, 0)).collect();
-        }
-        let mut buf = Vec::new();
-        let mut items: Vec<_> = items
-            .into_iter()
-            .filter_map(|item| {
-                self.score(Utf32Str::new(item.as_ref(), &mut buf), matcher)
-                    .map(|score| (item, score))
-            })
-            .collect();
-        items.sort_by_key(|(_, score)| Reverse(*score));
-        items
-    }
-
-    /// Matches this pattern against `haystack` (using the allocation and configuration
-    /// from `matcher`) and calculates a ranking score. See the [`Matcher`]
-    /// documentation for more details.
-    ///
-    /// *Note:*  The `ignore_case` setting is overwritten to match the casing of
-    /// each pattern atom.
-    pub fn score(&self, haystack: Utf32Str<'_>, matcher: &mut Matcher) -> Option<u32> {
-        if self.atoms.is_empty() {
-            return Some(0);
-        }
-        let mut score = 0;
-        for pattern in &self.atoms {
-            score += pattern.score(haystack, matcher)? as u32;
-        }
-        Some(score)
-    }
-
-    /// Matches this pattern against `haystack` (using the allocation and
-    /// configuration from `matcher`), calculates a ranking score and the match
-    /// indices. See the [`Matcher`] documentation for more
-    /// details.
-    ///
-    /// *Note:*  The `ignore_case` setting is overwritten to match the casing of
-    /// each pattern atom.
-    ///
-    /// *Note:*  The indices for each pattern are calculated individually
-    /// and simply appended to the `indices` vector and not deduplicated/sorted.
-    /// This allows associating the match indices to their source pattern. If
-    /// required (like for highlighting) unique/sorted indices can be obtained
-    /// as follows:
-    ///
-    /// ```
-    /// # let mut indices: Vec<u32> = Vec::new();
-    /// indices.sort_unstable();
-    /// indices.dedup();
-    /// ```
-    pub fn indices(
-        &self,
-        haystack: Utf32Str<'_>,
-        matcher: &mut Matcher,
-        indices: &mut Vec<u32>,
-    ) -> Option<u32> {
-        if self.atoms.is_empty() {
-            return Some(0);
-        }
-        let mut score = 0;
-        for pattern in &self.atoms {
-            score += pattern.indices(haystack, matcher, indices)? as u32;
-        }
-        Some(score)
-    }
-
-    /// Refreshes this pattern by reparsing it from a string. This is mostly
-    /// equivalent to just constructing a new pattern using [`Pattern::parse`]
-    /// but is slightly more efficient by reusing some allocations
-    pub fn reparse(
-        &mut self,
-        pattern: &str,
-        case_matching: CaseMatching,
-        normalize: Normalization,
-    ) {
-        self.atoms.clear();
-        let atoms = pattern_atoms(pattern).filter_map(|atom| {
-            let atom = Atom::parse(atom, case_matching, normalize);
-            if atom.needle.is_empty() {
-                return None;
-            }
-            Some(atom)
-        });
-        self.atoms.extend(atoms);
-    }
-}
-
-impl Clone for Pattern {
-    fn clone(&self) -> Self {
-        Self {
-            atoms: self.atoms.clone(),
-        }
-    }
-
-    fn clone_from(&mut self, source: &Self) {
-        self.atoms.clone_from(&source.atoms);
-    }
-}