crates/atuin-nucleo/matcher/src/score.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158

use std::cmp::max;

use crate::chars::{Char, CharClass};
use crate::{Config, Matcher};

pub(crate) const SCORE_MATCH: u16 = 16;
pub(crate) const PENALTY_GAP_START: u16 = 3;
pub(crate) const PENALTY_GAP_EXTENSION: u16 = 1;
/// If the prefer_prefix option is enabled we want to penalize
/// the initial gap. The prefix should not be too much  
pub(crate) const PREFIX_BONUS_SCALE: u16 = 2;
pub(crate) const MAX_PREFIX_BONUS: u16 = BONUS_BOUNDARY;

// We prefer matches at the beginning of a word, but the bonus should not be
// too great to prevent the longer acronym matches from always winning over
// shorter fuzzy matches. The bonus point here was specifically chosen that
// the bonus is cancelled when the gap between the acronyms grows over
// 8 characters, which is approximately the average length of the words found
// in web2 dictionary and my file system.
pub(crate) const BONUS_BOUNDARY: u16 = SCORE_MATCH / 2;

// Edge-triggered bonus for matches in camelCase words.
// Their value should be BONUS_BOUNDARY - PENALTY_GAP_EXTENSION = 7.
// However, this priporitzes camel case over non-camel case.
// In fzf/skim this is not a problem since they score off the max
// consecutive bonus. However, we don't do that (because its incorrect)
// so to avoids prioritizing camel we use a lower bonus. I think that's fine
// usually camel case is wekaer boundary than actual wourd boundaries anyway
// This also has the nice sideeffect of perfectly balancing out
// camel case, snake case and the consecutive version of the word
pub(crate) const BONUS_CAMEL123: u16 = BONUS_BOUNDARY - PENALTY_GAP_START;

/// Although bonus point for non-word characters is non-contextual, we need it
/// for computing bonus points for consecutive chunks starting with a non-word
/// character.
pub(crate) const BONUS_NON_WORD: u16 = BONUS_BOUNDARY;

// Minimum bonus point given to characters in consecutive chunks.
// Note that bonus points for consecutive matches shouldn't have needed if we
// used fixed match score as in the original algorithm.
pub(crate) const BONUS_CONSECUTIVE: u16 = PENALTY_GAP_START + PENALTY_GAP_EXTENSION;

// The first character in the typed pattern usually has more significance
// than the rest so it's important that it appears at special positions where
// bonus points are given, e.g. "to-go" vs. "ongoing" on "og" or on "ogo".
// The amount of the extra bonus should be limited so that the gap penalty is
// still respected.
pub(crate) const BONUS_FIRST_CHAR_MULTIPLIER: u16 = 2;

impl Config {
    #[inline]
    pub(crate) fn bonus_for(&self, prev_class: CharClass, class: CharClass) -> u16 {
        if class > CharClass::Delimiter {
            // transition from non word to word
            match prev_class {
                CharClass::Whitespace => return self.bonus_boundary_white,
                CharClass::Delimiter => return self.bonus_boundary_delimiter,
                CharClass::NonWord => return BONUS_BOUNDARY,
                _ => (),
            }
        }
        if prev_class == CharClass::Lower && class == CharClass::Upper
            || prev_class != CharClass::Number && class == CharClass::Number
        {
            // camelCase letter123
            BONUS_CAMEL123
        } else if class == CharClass::Whitespace {
            self.bonus_boundary_white
        } else if class == CharClass::NonWord {
            return BONUS_NON_WORD;
        } else {
            0
        }
    }
}
impl Matcher {
    #[inline(always)]
    pub(crate) fn bonus_for(&self, prev_class: CharClass, class: CharClass) -> u16 {
        self.config.bonus_for(prev_class, class)
    }

    pub(crate) fn calculate_score<const INDICES: bool, H: Char + PartialEq<N>, N: Char>(
        &mut self,
        haystack: &[H],
        needle: &[N],
        start: usize,
        end: usize,
        indices: &mut Vec<u32>,
    ) -> u16 {
        if INDICES {
            indices.reserve(needle.len());
        }

        let mut prev_class = start
            .checked_sub(1)
            .map(|i| haystack[i].char_class(&self.config))
            .unwrap_or(self.config.initial_char_class);
        let mut needle_iter = needle.iter();
        let mut needle_char = *needle_iter.next().unwrap();

        let mut in_gap = false;
        let mut consecutive = 1;

        // unrolled the first iteration to make applying the first char multiplier less awkward
        if INDICES {
            indices.push(start as u32)
        }
        let class = haystack[start].char_class(&self.config);
        let mut first_bonus = self.bonus_for(prev_class, class);
        let mut score = SCORE_MATCH + first_bonus * BONUS_FIRST_CHAR_MULTIPLIER;
        prev_class = class;
        needle_char = *needle_iter.next().unwrap_or(&needle_char);

        for (i, c) in haystack[start + 1..end].iter().enumerate() {
            let (c, class) = c.char_class_and_normalize(&self.config);
            if c == needle_char {
                if INDICES {
                    indices.push(i as u32 + start as u32 + 1)
                }
                let mut bonus = self.bonus_for(prev_class, class);
                if consecutive != 0 {
                    if bonus >= BONUS_BOUNDARY && bonus > first_bonus {
                        first_bonus = bonus
                    }
                    bonus = max(max(bonus, first_bonus), BONUS_CONSECUTIVE);
                } else {
                    first_bonus = bonus;
                }
                score += SCORE_MATCH + bonus;
                in_gap = false;
                consecutive += 1;
                if let Some(&next) = needle_iter.next() {
                    needle_char = next;
                }
            } else {
                let penalty = if in_gap {
                    PENALTY_GAP_EXTENSION
                } else {
                    PENALTY_GAP_START
                };
                score = score.saturating_sub(penalty);
                in_gap = true;
                consecutive = 0;
            }
            prev_class = class;
        }
        if self.config.prefer_prefix {
            if start != 0 {
                let penalty = PENALTY_GAP_START
                    + PENALTY_GAP_START * (start - 1).min(u16::MAX as usize) as u16;
                score += MAX_PREFIX_BONUS.saturating_sub(penalty / PREFIX_BONUS_SCALE);
            } else {
                score += MAX_PREFIX_BONUS;
            }
        }
        score
    }
}