Squashed 'crates/atuin-nucleo/' content from commit 4253de9f

git-subtree-dir: crates/atuin-nucleo git-subtree-split: 4253de9faabb4e5c6d81d946a5e35a90f87347ee
author: Ellie Huxtable <ellie@elliehuxtable.com> 2026-03-16 15:22:49 -0700
committer: Ellie Huxtable <ellie@elliehuxtable.com> 2026-03-16 15:22:49 -0700
commit: 8f9777ce7aecfe1a163a915e3245466b9dd9ac2e (patch)
tree: a878a0495e5f84266b7e36918a9e1a9432b0ddd8 /matcher/src/score.rs
download: atuin-8f9777ce7aecfe1a163a915e3245466b9dd9ac2e.zip
1 files changed, 158 insertions, 0 deletions
diff --git a/matcher/src/score.rs b/matcher/src/score.rs
new file mode 100644
index 00000000..c934a8ef
--- /dev/null
+++ b/matcher/src/score.rs
@@ -0,0 +1,158 @@
+use std::cmp::max;
+
+use crate::chars::{Char, CharClass};
+use crate::{Config, Matcher};
+
+pub(crate) const SCORE_MATCH: u16 = 16;
+pub(crate) const PENALTY_GAP_START: u16 = 3;
+pub(crate) const PENALTY_GAP_EXTENSION: u16 = 1;
+/// If the prefer_prefix option is enabled we want to penalize
+/// the initial gap. The prefix should not be too much  
+pub(crate) const PREFIX_BONUS_SCALE: u16 = 2;
+pub(crate) const MAX_PREFIX_BONUS: u16 = BONUS_BOUNDARY;
+
+// We prefer matches at the beginning of a word, but the bonus should not be
+// too great to prevent the longer acronym matches from always winning over
+// shorter fuzzy matches. The bonus point here was specifically chosen that
+// the bonus is cancelled when the gap between the acronyms grows over
+// 8 characters, which is approximately the average length of the words found
+// in web2 dictionary and my file system.
+pub(crate) const BONUS_BOUNDARY: u16 = SCORE_MATCH / 2;
+
+// Edge-triggered bonus for matches in camelCase words.
+// Their value should be BONUS_BOUNDARY - PENALTY_GAP_EXTENSION = 7.
+// However, this priporitzes camel case over non-camel case.
+// In fzf/skim this is not a problem since they score off the max
+// consecutive bonus. However, we don't do that (because its incorrect)
+// so to avoids prioritizing camel we use a lower bonus. I think that's fine
+// usually camel case is wekaer boundary than actual wourd boundaries anyway
+// This also has the nice sideeffect of perfectly balancing out
+// camel case, snake case and the consecutive version of the word
+pub(crate) const BONUS_CAMEL123: u16 = BONUS_BOUNDARY - PENALTY_GAP_START;
+
+/// Although bonus point for non-word characters is non-contextual, we need it
+/// for computing bonus points for consecutive chunks starting with a non-word
+/// character.
+pub(crate) const BONUS_NON_WORD: u16 = BONUS_BOUNDARY;
+
+// Minimum bonus point given to characters in consecutive chunks.
+// Note that bonus points for consecutive matches shouldn't have needed if we
+// used fixed match score as in the original algorithm.
+pub(crate) const BONUS_CONSECUTIVE: u16 = PENALTY_GAP_START + PENALTY_GAP_EXTENSION;
+
+// The first character in the typed pattern usually has more significance
+// than the rest so it's important that it appears at special positions where
+// bonus points are given, e.g. "to-go" vs. "ongoing" on "og" or on "ogo".
+// The amount of the extra bonus should be limited so that the gap penalty is
+// still respected.
+pub(crate) const BONUS_FIRST_CHAR_MULTIPLIER: u16 = 2;
+
+impl Config {
+    #[inline]
+    pub(crate) fn bonus_for(&self, prev_class: CharClass, class: CharClass) -> u16 {
+        if class > CharClass::Delimiter {
+            // transition from non word to word
+            match prev_class {
+                CharClass::Whitespace => return self.bonus_boundary_white,
+                CharClass::Delimiter => return self.bonus_boundary_delimiter,
+                CharClass::NonWord => return BONUS_BOUNDARY,
+                _ => (),
+            }
+        }
+        if prev_class == CharClass::Lower && class == CharClass::Upper
+            || prev_class != CharClass::Number && class == CharClass::Number
+        {
+            // camelCase letter123
+            BONUS_CAMEL123
+        } else if class == CharClass::Whitespace {
+            self.bonus_boundary_white
+        } else if class == CharClass::NonWord {
+            return BONUS_NON_WORD;
+        } else {
+            0
+        }
+    }
+}
+impl Matcher {
+    #[inline(always)]
+    pub(crate) fn bonus_for(&self, prev_class: CharClass, class: CharClass) -> u16 {
+        self.config.bonus_for(prev_class, class)
+    }
+
+    pub(crate) fn calculate_score<const INDICES: bool, H: Char + PartialEq<N>, N: Char>(
+        &mut self,
+        haystack: &[H],
+        needle: &[N],
+        start: usize,
+        end: usize,
+        indices: &mut Vec<u32>,
+    ) -> u16 {
+        if INDICES {
+            indices.reserve(needle.len());
+        }
+
+        let mut prev_class = start
+            .checked_sub(1)
+            .map(|i| haystack[i].char_class(&self.config))
+            .unwrap_or(self.config.initial_char_class);
+        let mut needle_iter = needle.iter();
+        let mut needle_char = *needle_iter.next().unwrap();
+
+        let mut in_gap = false;
+        let mut consecutive = 1;
+
+        // unrolled the first iteration to make applying the first char multiplier less awkward
+        if INDICES {
+            indices.push(start as u32)
+        }
+        let class = haystack[start].char_class(&self.config);
+        let mut first_bonus = self.bonus_for(prev_class, class);
+        let mut score = SCORE_MATCH + first_bonus * BONUS_FIRST_CHAR_MULTIPLIER;
+        prev_class = class;
+        needle_char = *needle_iter.next().unwrap_or(&needle_char);
+
+        for (i, c) in haystack[start + 1..end].iter().enumerate() {
+            let (c, class) = c.char_class_and_normalize(&self.config);
+            if c == needle_char {
+                if INDICES {
+                    indices.push(i as u32 + start as u32 + 1)
+                }
+                let mut bonus = self.bonus_for(prev_class, class);
+                if consecutive != 0 {
+                    if bonus >= BONUS_BOUNDARY && bonus > first_bonus {
+                        first_bonus = bonus
+                    }
+                    bonus = max(max(bonus, first_bonus), BONUS_CONSECUTIVE);
+                } else {
+                    first_bonus = bonus;
+                }
+                score += SCORE_MATCH + bonus;
+                in_gap = false;
+                consecutive += 1;
+                if let Some(&next) = needle_iter.next() {
+                    needle_char = next;
+                }
+            } else {
+                let penalty = if in_gap {
+                    PENALTY_GAP_EXTENSION
+                } else {
+                    PENALTY_GAP_START
+                };
+                score = score.saturating_sub(penalty);
+                in_gap = true;
+                consecutive = 0;
+            }
+            prev_class = class;
+        }
+        if self.config.prefer_prefix {
+            if start != 0 {
+                let penalty = PENALTY_GAP_START
+                    + PENALTY_GAP_START * (start - 1).min(u16::MAX as usize) as u16;
+                score += MAX_PREFIX_BONUS.saturating_sub(penalty / PREFIX_BONUS_SCALE);
+            } else {
+                score += MAX_PREFIX_BONUS;
+            }
+        }
+        score
+    }
+}
author	Ellie Huxtable <ellie@elliehuxtable.com>	2026-03-16 15:22:49 -0700
committer	Ellie Huxtable <ellie@elliehuxtable.com>	2026-03-16 15:22:49 -0700
commit	8f9777ce7aecfe1a163a915e3245466b9dd9ac2e (patch)
tree	a878a0495e5f84266b7e36918a9e1a9432b0ddd8 /matcher/src/score.rs
download	atuin-8f9777ce7aecfe1a163a915e3245466b9dd9ac2e.zip