Squashed 'crates/atuin-nucleo/' content from commit 4253de9f

git-subtree-dir: crates/atuin-nucleo git-subtree-split: 4253de9faabb4e5c6d81d946a5e35a90f87347ee
author: Ellie Huxtable <ellie@elliehuxtable.com> 2026-03-16 15:22:49 -0700
committer: Ellie Huxtable <ellie@elliehuxtable.com> 2026-03-16 15:22:49 -0700
commit: 8f9777ce7aecfe1a163a915e3245466b9dd9ac2e (patch)
tree: a878a0495e5f84266b7e36918a9e1a9432b0ddd8 /matcher/src
download: atuin-8f9777ce7aecfe1a163a915e3245466b9dd9ac2e.zip
17 files changed, 5476 insertions, 0 deletions
diff --git a/matcher/src/chars.rs b/matcher/src/chars.rs
new file mode 100644
index 00000000..d13a2466
--- /dev/null
+++ b/matcher/src/chars.rs
@@ -0,0 +1,207 @@
+//! Utilities for working with (unicode) characters/codepoints
+
+use std::fmt::{self, Debug, Display};
+
+#[cfg(feature = "unicode-casefold")]
+use crate::chars::case_fold::CASE_FOLDING_SIMPLE;
+use crate::Config;
+
+//autogenerated by generate-ucd
+#[allow(warnings)]
+#[rustfmt::skip]
+#[cfg(feature = "unicode-casefold")]
+mod case_fold;
+#[cfg(feature = "unicode-normalization")]
+mod normalize;
+
+pub(crate) trait Char: Copy + Eq + Ord + fmt::Display {
+    const ASCII: bool;
+    fn char_class(self, config: &Config) -> CharClass;
+    fn char_class_and_normalize(self, config: &Config) -> (Self, CharClass);
+    fn normalize(self, config: &Config) -> Self;
+}
+
+/// repr tansparent wrapper around u8 with better formatting and `PartialEq<char>` implementation
+#[repr(transparent)]
+#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
+pub(crate) struct AsciiChar(pub u8);
+
+impl AsciiChar {
+    pub fn cast(bytes: &[u8]) -> &[AsciiChar] {
+        unsafe { &*(bytes as *const [u8] as *const [AsciiChar]) }
+    }
+}
+
+impl fmt::Display for AsciiChar {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        Display::fmt(&(self.0 as char), f)
+    }
+}
+
+impl PartialEq<AsciiChar> for char {
+    fn eq(&self, other: &AsciiChar) -> bool {
+        other.0 as char == *self
+    }
+}
+
+impl Char for AsciiChar {
+    const ASCII: bool = true;
+    #[inline]
+    fn char_class(self, config: &Config) -> CharClass {
+        let c = self.0;
+        // using manual if conditions instead optimizes better
+        if c >= b'a' && c <= b'z' {
+            CharClass::Lower
+        } else if c >= b'A' && c <= b'Z' {
+            CharClass::Upper
+        } else if c >= b'0' && c <= b'9' {
+            CharClass::Number
+        } else if c.is_ascii_whitespace() {
+            CharClass::Whitespace
+        } else if config.delimiter_chars.contains(&c) {
+            CharClass::Delimiter
+        } else {
+            CharClass::NonWord
+        }
+    }
+
+    #[inline(always)]
+    fn char_class_and_normalize(mut self, config: &Config) -> (Self, CharClass) {
+        let char_class = self.char_class(config);
+        if config.ignore_case && char_class == CharClass::Upper {
+            self.0 += 32
+        }
+        (self, char_class)
+    }
+
+    #[inline(always)]
+    fn normalize(mut self, config: &Config) -> Self {
+        if config.ignore_case && self.0 >= b'A' && self.0 <= b'Z' {
+            self.0 += 32
+        }
+        self
+    }
+}
+fn char_class_non_ascii(c: char) -> CharClass {
+    if c.is_lowercase() {
+        CharClass::Lower
+    } else if is_upper_case(c) {
+        CharClass::Upper
+    } else if c.is_numeric() {
+        CharClass::Number
+    } else if c.is_alphabetic() {
+        CharClass::Letter
+    } else if c.is_whitespace() {
+        CharClass::Whitespace
+    } else {
+        CharClass::NonWord
+    }
+}
+impl Char for char {
+    const ASCII: bool = false;
+    #[inline(always)]
+    fn char_class(self, config: &Config) -> CharClass {
+        if self.is_ascii() {
+            return AsciiChar(self as u8).char_class(config);
+        }
+        char_class_non_ascii(self)
+    }
+
+    #[inline(always)]
+    fn char_class_and_normalize(mut self, config: &Config) -> (Self, CharClass) {
+        if self.is_ascii() {
+            let (c, class) = AsciiChar(self as u8).char_class_and_normalize(config);
+            return (c.0 as char, class);
+        }
+        let char_class = char_class_non_ascii(self);
+        #[cfg(feature = "unicode-casefold")]
+        let mut case_fold = char_class == CharClass::Upper;
+        #[cfg(feature = "unicode-normalization")]
+        if config.normalize {
+            self = normalize::normalize(self);
+            case_fold = true
+        }
+        #[cfg(feature = "unicode-casefold")]
+        if case_fold && config.ignore_case {
+            self = CASE_FOLDING_SIMPLE
+                .binary_search_by_key(&self, |(upper, _)| *upper)
+                .map_or(self, |idx| CASE_FOLDING_SIMPLE[idx].1)
+        }
+        (self, char_class)
+    }
+
+    #[inline(always)]
+    fn normalize(mut self, config: &Config) -> Self {
+        #[cfg(feature = "unicode-normalization")]
+        if config.normalize {
+            self = normalize::normalize(self);
+        }
+        #[cfg(feature = "unicode-casefold")]
+        if config.ignore_case {
+            self = to_lower_case(self)
+        }
+        self
+    }
+}
+
+#[cfg(feature = "unicode-normalization")]
+pub use normalize::normalize;
+#[cfg(feature = "unicode-segmentation")]
+use unicode_segmentation::UnicodeSegmentation;
+
+/// Converts a character to lower case using simple unicode case folding
+#[cfg(feature = "unicode-casefold")]
+#[inline(always)]
+pub fn to_lower_case(c: char) -> char {
+    CASE_FOLDING_SIMPLE
+        .binary_search_by_key(&c, |(upper, _)| *upper)
+        .map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
+}
+
+/// Checks if a character is upper case according to simple unicode case folding.
+/// if the `unicode-casefold` feature is disable the equivalent std function is used
+#[inline(always)]
+pub fn is_upper_case(c: char) -> bool {
+    #[cfg(feature = "unicode-casefold")]
+    let val = CASE_FOLDING_SIMPLE
+        .binary_search_by_key(&c, |(upper, _)| *upper)
+        .is_ok();
+    #[cfg(not(feature = "unicode-casefold"))]
+    let val = c.is_uppercase();
+    val
+}
+
+#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
+pub(crate) enum CharClass {
+    Whitespace,
+    NonWord,
+    Delimiter,
+    Lower,
+    Upper,
+    Letter,
+    Number,
+}
+
+/// Nucleo cannot match graphemes as single units. To work around
+/// that we only use the first codepoint of each grapheme. This
+/// iterator returns the first character of each unicode grapheme
+/// in a string and is used for constructing `Utf32Str(ing)`.
+pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
+    #[cfg(feature = "unicode-segmentation")]
+    let res = text.graphemes(true).map(|grapheme| {
+        // we need to special-case this check since `\r\n` is a single grapheme and is
+        // therefore the exception to the rule that normalization of a grapheme should
+        // map to the first character.
+        if grapheme == "\r\n" {
+            '\n'
+        } else {
+            grapheme
+                .chars()
+                .next()
+                .expect("graphemes must be non-empty")
+        }
+    });
+    #[cfg(not(feature = "unicode-segmentation"))]
+    let res = text.chars();
+    res
+}
diff --git a/matcher/src/chars/case_fold.rs b/matcher/src/chars/case_fold.rs
new file mode 100644
index 00000000..aacbe461
--- /dev/null
+++ b/matcher/src/chars/case_fold.rs
@@ -0,0 +1,347 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//   ucd-generate case-folding-simple /tmp/ucd-15.0.0 --chars
+//
+// Unicode version: 15.0.0.
+//
+// ucd-generate 0.3.0 is available on crates.io.
+
+pub const CASE_FOLDING_SIMPLE: &'static [(char, char)] = &[
+  ('A', 'a'), ('B', 'b'), ('C', 'c'), ('D', 'd'), ('E', 'e'), ('F', 'f'),
+  ('G', 'g'), ('H', 'h'), ('I', 'i'), ('J', 'j'), ('K', 'k'), ('L', 'l'),
+  ('M', 'm'), ('N', 'n'), ('O', 'o'), ('P', 'p'), ('Q', 'q'), ('R', 'r'),
+  ('S', 's'), ('T', 't'), ('U', 'u'), ('V', 'v'), ('W', 'w'), ('X', 'x'),
+  ('Y', 'y'), ('Z', 'z'), ('µ', 'μ'), ('À', 'à'), ('Á', 'á'),
+  ('Â', 'â'), ('Ã', 'ã'), ('Ä', 'ä'), ('Å', 'å'), ('Æ', 'æ'),
+  ('Ç', 'ç'), ('È', 'è'), ('É', 'é'), ('Ê', 'ê'), ('Ë', 'ë'),
+  ('Ì', 'ì'), ('Í', 'í'), ('Î', 'î'), ('Ï', 'ï'), ('Ð', 'ð'),
+  ('Ñ', 'ñ'), ('Ò', 'ò'), ('Ó', 'ó'), ('Ô', 'ô'), ('Õ', 'õ'),
+  ('Ö', 'ö'), ('Ø', 'ø'), ('Ù', 'ù'), ('Ú', 'ú'), ('Û', 'û'),
+  ('Ü', 'ü'), ('Ý', 'ý'), ('Þ', 'þ'), ('Ā', 'ā'), ('Ă', 'ă'),
+  ('Ą', 'ą'), ('Ć', 'ć'), ('Ĉ', 'ĉ'), ('Ċ', 'ċ'), ('Č', 'č'),
+  ('Ď', 'ď'), ('Đ', 'đ'), ('Ē', 'ē'), ('Ĕ', 'ĕ'), ('Ė', 'ė'),
+  ('Ę', 'ę'), ('Ě', 'ě'), ('Ĝ', 'ĝ'), ('Ğ', 'ğ'), ('Ġ', 'ġ'),
+  ('Ģ', 'ģ'), ('Ĥ', 'ĥ'), ('Ħ', 'ħ'), ('Ĩ', 'ĩ'), ('Ī', 'ī'),
+  ('Ĭ', 'ĭ'), ('Į', 'į'), ('Ĳ', 'ĳ'), ('Ĵ', 'ĵ'), ('Ķ', 'ķ'),
+  ('Ĺ', 'ĺ'), ('Ļ', 'ļ'), ('Ľ', 'ľ'), ('Ŀ', 'ŀ'), ('Ł', 'ł'),
+  ('Ń', 'ń'), ('Ņ', 'ņ'), ('Ň', 'ň'), ('Ŋ', 'ŋ'), ('Ō', 'ō'),
+  ('Ŏ', 'ŏ'), ('Ő', 'ő'), ('Œ', 'œ'), ('Ŕ', 'ŕ'), ('Ŗ', 'ŗ'),
+  ('Ř', 'ř'), ('Ś', 'ś'), ('Ŝ', 'ŝ'), ('Ş', 'ş'), ('Š', 'š'),
+  ('Ţ', 'ţ'), ('Ť', 'ť'), ('Ŧ', 'ŧ'), ('Ũ', 'ũ'), ('Ū', 'ū'),
+  ('Ŭ', 'ŭ'), ('Ů', 'ů'), ('Ű', 'ű'), ('Ų', 'ų'), ('Ŵ', 'ŵ'),
+  ('Ŷ', 'ŷ'), ('Ÿ', 'ÿ'), ('Ź', 'ź'), ('Ż', 'ż'), ('Ž', 'ž'),
+  ('ſ', 's'), ('Ɓ', 'ɓ'), ('Ƃ', 'ƃ'), ('Ƅ', 'ƅ'), ('Ɔ', 'ɔ'),
+  ('Ƈ', 'ƈ'), ('Ɖ', 'ɖ'), ('Ɗ', 'ɗ'), ('Ƌ', 'ƌ'), ('Ǝ', 'ǝ'),
+  ('Ə', 'ə'), ('Ɛ', 'ɛ'), ('Ƒ', 'ƒ'), ('Ɠ', 'ɠ'), ('Ɣ', 'ɣ'),
+  ('Ɩ', 'ɩ'), ('Ɨ', 'ɨ'), ('Ƙ', 'ƙ'), ('Ɯ', 'ɯ'), ('Ɲ', 'ɲ'),
+  ('Ɵ', 'ɵ'), ('Ơ', 'ơ'), ('Ƣ', 'ƣ'), ('Ƥ', 'ƥ'), ('Ʀ', 'ʀ'),
+  ('Ƨ', 'ƨ'), ('Ʃ', 'ʃ'), ('Ƭ', 'ƭ'), ('Ʈ', 'ʈ'), ('Ư', 'ư'),
+  ('Ʊ', 'ʊ'), ('Ʋ', 'ʋ'), ('Ƴ', 'ƴ'), ('Ƶ', 'ƶ'), ('Ʒ', 'ʒ'),
+  ('Ƹ', 'ƹ'), ('Ƽ', 'ƽ'), ('Ǆ', 'ǆ'), ('ǅ', 'ǆ'), ('Ǉ', 'ǉ'),
+  ('ǈ', 'ǉ'), ('Ǌ', 'ǌ'), ('ǋ', 'ǌ'), ('Ǎ', 'ǎ'), ('Ǐ', 'ǐ'),
+  ('Ǒ', 'ǒ'), ('Ǔ', 'ǔ'), ('Ǖ', 'ǖ'), ('Ǘ', 'ǘ'), ('Ǚ', 'ǚ'),
+  ('Ǜ', 'ǜ'), ('Ǟ', 'ǟ'), ('Ǡ', 'ǡ'), ('Ǣ', 'ǣ'), ('Ǥ', 'ǥ'),
+  ('Ǧ', 'ǧ'), ('Ǩ', 'ǩ'), ('Ǫ', 'ǫ'), ('Ǭ', 'ǭ'), ('Ǯ', 'ǯ'),
+  ('Ǳ', 'ǳ'), ('ǲ', 'ǳ'), ('Ǵ', 'ǵ'), ('Ƕ', 'ƕ'), ('Ƿ', 'ƿ'),
+  ('Ǹ', 'ǹ'), ('Ǻ', 'ǻ'), ('Ǽ', 'ǽ'), ('Ǿ', 'ǿ'), ('Ȁ', 'ȁ'),
+  ('Ȃ', 'ȃ'), ('Ȅ', 'ȅ'), ('Ȇ', 'ȇ'), ('Ȉ', 'ȉ'), ('Ȋ', 'ȋ'),
+  ('Ȍ', 'ȍ'), ('Ȏ', 'ȏ'), ('Ȑ', 'ȑ'), ('Ȓ', 'ȓ'), ('Ȕ', 'ȕ'),
+  ('Ȗ', 'ȗ'), ('Ș', 'ș'), ('Ț', 'ț'), ('Ȝ', 'ȝ'), ('Ȟ', 'ȟ'),
+  ('Ƞ', 'ƞ'), ('Ȣ', 'ȣ'), ('Ȥ', 'ȥ'), ('Ȧ', 'ȧ'), ('Ȩ', 'ȩ'),
+  ('Ȫ', 'ȫ'), ('Ȭ', 'ȭ'), ('Ȯ', 'ȯ'), ('Ȱ', 'ȱ'), ('Ȳ', 'ȳ'),
+  ('Ⱥ', 'ⱥ'), ('Ȼ', 'ȼ'), ('Ƚ', 'ƚ'), ('Ⱦ', 'ⱦ'), ('Ɂ', 'ɂ'),
+  ('Ƀ', 'ƀ'), ('Ʉ', 'ʉ'), ('Ʌ', 'ʌ'), ('Ɇ', 'ɇ'), ('Ɉ', 'ɉ'),
+  ('Ɋ', 'ɋ'), ('Ɍ', 'ɍ'), ('Ɏ', 'ɏ'), ('\u{345}', 'ι'), ('Ͱ', 'ͱ'),
+  ('Ͳ', 'ͳ'), ('Ͷ', 'ͷ'), ('Ϳ', 'ϳ'), ('Ά', 'ά'), ('Έ', 'έ'),
+  ('Ή', 'ή'), ('Ί', 'ί'), ('Ό', 'ό'), ('Ύ', 'ύ'), ('Ώ', 'ώ'),
+  ('Α', 'α'), ('Β', 'β'), ('Γ', 'γ'), ('Δ', 'δ'), ('Ε', 'ε'),
+  ('Ζ', 'ζ'), ('Η', 'η'), ('Θ', 'θ'), ('Ι', 'ι'), ('Κ', 'κ'),
+  ('Λ', 'λ'), ('Μ', 'μ'), ('Ν', 'ν'), ('Ξ', 'ξ'), ('Ο', 'ο'),
+  ('Π', 'π'), ('Ρ', 'ρ'), ('Σ', 'σ'), ('Τ', 'τ'), ('Υ', 'υ'),
+  ('Φ', 'φ'), ('Χ', 'χ'), ('Ψ', 'ψ'), ('Ω', 'ω'), ('Ϊ', 'ϊ'),
+  ('Ϋ', 'ϋ'), ('ς', 'σ'), ('Ϗ', 'ϗ'), ('ϐ', 'β'), ('ϑ', 'θ'),
+  ('ϕ', 'φ'), ('ϖ', 'π'), ('Ϙ', 'ϙ'), ('Ϛ', 'ϛ'), ('Ϝ', 'ϝ'),
+  ('Ϟ', 'ϟ'), ('Ϡ', 'ϡ'), ('Ϣ', 'ϣ'), ('Ϥ', 'ϥ'), ('Ϧ', 'ϧ'),
+  ('Ϩ', 'ϩ'), ('Ϫ', 'ϫ'), ('Ϭ', 'ϭ'), ('Ϯ', 'ϯ'), ('ϰ', 'κ'),
+  ('ϱ', 'ρ'), ('ϴ', 'θ'), ('ϵ', 'ε'), ('Ϸ', 'ϸ'), ('Ϲ', 'ϲ'),
+  ('Ϻ', 'ϻ'), ('Ͻ', 'ͻ'), ('Ͼ', 'ͼ'), ('Ͽ', 'ͽ'), ('Ѐ', 'ѐ'),
+  ('Ё', 'ё'), ('Ђ', 'ђ'), ('Ѓ', 'ѓ'), ('Є', 'є'), ('Ѕ', 'ѕ'),
+  ('І', 'і'), ('Ї', 'ї'), ('Ј', 'ј'), ('Љ', 'љ'), ('Њ', 'њ'),
+  ('Ћ', 'ћ'), ('Ќ', 'ќ'), ('Ѝ', 'ѝ'), ('Ў', 'ў'), ('Џ', 'џ'),
+  ('А', 'а'), ('Б', 'б'), ('В', 'в'), ('Г', 'г'), ('Д', 'д'),
+  ('Е', 'е'), ('Ж', 'ж'), ('З', 'з'), ('И', 'и'), ('Й', 'й'),
+  ('К', 'к'), ('Л', 'л'), ('М', 'м'), ('Н', 'н'), ('О', 'о'),
+  ('П', 'п'), ('Р', 'р'), ('С', 'с'), ('Т', 'т'), ('У', 'у'),
+  ('Ф', 'ф'), ('Х', 'х'), ('Ц', 'ц'), ('Ч', 'ч'), ('Ш', 'ш'),
+  ('Щ', 'щ'), ('Ъ', 'ъ'), ('Ы', 'ы'), ('Ь', 'ь'), ('Э', 'э'),
+  ('Ю', 'ю'), ('Я', 'я'), ('Ѡ', 'ѡ'), ('Ѣ', 'ѣ'), ('Ѥ', 'ѥ'),
+  ('Ѧ', 'ѧ'), ('Ѩ', 'ѩ'), ('Ѫ', 'ѫ'), ('Ѭ', 'ѭ'), ('Ѯ', 'ѯ'),
+  ('Ѱ', 'ѱ'), ('Ѳ', 'ѳ'), ('Ѵ', 'ѵ'), ('Ѷ', 'ѷ'), ('Ѹ', 'ѹ'),
+  ('Ѻ', 'ѻ'), ('Ѽ', 'ѽ'), ('Ѿ', 'ѿ'), ('Ҁ', 'ҁ'), ('Ҋ', 'ҋ'),
+  ('Ҍ', 'ҍ'), ('Ҏ', 'ҏ'), ('Ґ', 'ґ'), ('Ғ', 'ғ'), ('Ҕ', 'ҕ'),
+  ('Җ', 'җ'), ('Ҙ', 'ҙ'), ('Қ', 'қ'), ('Ҝ', 'ҝ'), ('Ҟ', 'ҟ'),
+  ('Ҡ', 'ҡ'), ('Ң', 'ң'), ('Ҥ', 'ҥ'), ('Ҧ', 'ҧ'), ('Ҩ', 'ҩ'),
+  ('Ҫ', 'ҫ'), ('Ҭ', 'ҭ'), ('Ү', 'ү'), ('Ұ', 'ұ'), ('Ҳ', 'ҳ'),
+  ('Ҵ', 'ҵ'), ('Ҷ', 'ҷ'), ('Ҹ', 'ҹ'), ('Һ', 'һ'), ('Ҽ', 'ҽ'),
+  ('Ҿ', 'ҿ'), ('Ӏ', 'ӏ'), ('Ӂ', 'ӂ'), ('Ӄ', 'ӄ'), ('Ӆ', 'ӆ'),
+  ('Ӈ', 'ӈ'), ('Ӊ', 'ӊ'), ('Ӌ', 'ӌ'), ('Ӎ', 'ӎ'), ('Ӑ', 'ӑ'),
+  ('Ӓ', 'ӓ'), ('Ӕ', 'ӕ'), ('Ӗ', 'ӗ'), ('Ә', 'ә'), ('Ӛ', 'ӛ'),
+  ('Ӝ', 'ӝ'), ('Ӟ', 'ӟ'), ('Ӡ', 'ӡ'), ('Ӣ', 'ӣ'), ('Ӥ', 'ӥ'),
+  ('Ӧ', 'ӧ'), ('Ө', 'ө'), ('Ӫ', 'ӫ'), ('Ӭ', 'ӭ'), ('Ӯ', 'ӯ'),
+  ('Ӱ', 'ӱ'), ('Ӳ', 'ӳ'), ('Ӵ', 'ӵ'), ('Ӷ', 'ӷ'), ('Ӹ', 'ӹ'),
+  ('Ӻ', 'ӻ'), ('Ӽ', 'ӽ'), ('Ӿ', 'ӿ'), ('Ԁ', 'ԁ'), ('Ԃ', 'ԃ'),
+  ('Ԅ', 'ԅ'), ('Ԇ', 'ԇ'), ('Ԉ', 'ԉ'), ('Ԋ', 'ԋ'), ('Ԍ', 'ԍ'),
+  ('Ԏ', 'ԏ'), ('Ԑ', 'ԑ'), ('Ԓ', 'ԓ'), ('Ԕ', 'ԕ'), ('Ԗ', 'ԗ'),
+  ('Ԙ', 'ԙ'), ('Ԛ', 'ԛ'), ('Ԝ', 'ԝ'), ('Ԟ', 'ԟ'), ('Ԡ', 'ԡ'),
+  ('Ԣ', 'ԣ'), ('Ԥ', 'ԥ'), ('Ԧ', 'ԧ'), ('Ԩ', 'ԩ'), ('Ԫ', 'ԫ'),
+  ('Ԭ', 'ԭ'), ('Ԯ', 'ԯ'), ('Ա', 'ա'), ('Բ', 'բ'), ('Գ', 'գ'),
+  ('Դ', 'դ'), ('Ե', 'ե'), ('Զ', 'զ'), ('Է', 'է'), ('Ը', 'ը'),
+  ('Թ', 'թ'), ('Ժ', 'ժ'), ('Ի', 'ի'), ('Լ', 'լ'), ('Խ', 'խ'),
+  ('Ծ', 'ծ'), ('Կ', 'կ'), ('Հ', 'հ'), ('Ձ', 'ձ'), ('Ղ', 'ղ'),
+  ('Ճ', 'ճ'), ('Մ', 'մ'), ('Յ', 'յ'), ('Ն', 'ն'), ('Շ', 'շ'),
+  ('Ո', 'ո'), ('Չ', 'չ'), ('Պ', 'պ'), ('Ջ', 'ջ'), ('Ռ', 'ռ'),
+  ('Ս', 'ս'), ('Վ', 'վ'), ('Տ', 'տ'), ('Ր', 'ր'), ('Ց', 'ց'),
+  ('Ւ', 'ւ'), ('Փ', 'փ'), ('Ք', 'ք'), ('Օ', 'օ'), ('Ֆ', 'ֆ'),
+  ('Ⴀ', 'ⴀ'), ('Ⴁ', 'ⴁ'), ('Ⴂ', 'ⴂ'), ('Ⴃ', 'ⴃ'),
+  ('Ⴄ', 'ⴄ'), ('Ⴅ', 'ⴅ'), ('Ⴆ', 'ⴆ'), ('Ⴇ', 'ⴇ'),
+  ('Ⴈ', 'ⴈ'), ('Ⴉ', 'ⴉ'), ('Ⴊ', 'ⴊ'), ('Ⴋ', 'ⴋ'),
+  ('Ⴌ', 'ⴌ'), ('Ⴍ', 'ⴍ'), ('Ⴎ', 'ⴎ'), ('Ⴏ', 'ⴏ'),
+  ('Ⴐ', 'ⴐ'), ('Ⴑ', 'ⴑ'), ('Ⴒ', 'ⴒ'), ('Ⴓ', 'ⴓ'),
+  ('Ⴔ', 'ⴔ'), ('Ⴕ', 'ⴕ'), ('Ⴖ', 'ⴖ'), ('Ⴗ', 'ⴗ'),
+  ('Ⴘ', 'ⴘ'), ('Ⴙ', 'ⴙ'), ('Ⴚ', 'ⴚ'), ('Ⴛ', 'ⴛ'),
+  ('Ⴜ', 'ⴜ'), ('Ⴝ', 'ⴝ'), ('Ⴞ', 'ⴞ'), ('Ⴟ', 'ⴟ'),
+  ('Ⴠ', 'ⴠ'), ('Ⴡ', 'ⴡ'), ('Ⴢ', 'ⴢ'), ('Ⴣ', 'ⴣ'),
+  ('Ⴤ', 'ⴤ'), ('Ⴥ', 'ⴥ'), ('Ⴧ', 'ⴧ'), ('Ⴭ', 'ⴭ'),
+  ('ᏸ', 'Ᏸ'), ('ᏹ', 'Ᏹ'), ('ᏺ', 'Ᏺ'), ('ᏻ', 'Ᏻ'),
+  ('ᏼ', 'Ᏼ'), ('ᏽ', 'Ᏽ'), ('ᲀ', 'в'), ('ᲁ', 'д'), ('ᲂ', 'о'),
+  ('ᲃ', 'с'), ('ᲄ', 'т'), ('ᲅ', 'т'), ('ᲆ', 'ъ'), ('ᲇ', 'ѣ'),
+  ('ᲈ', 'ꙋ'), ('Ა', 'ა'), ('Ბ', 'ბ'), ('Გ', 'გ'),
+  ('Დ', 'დ'), ('Ე', 'ე'), ('Ვ', 'ვ'), ('Ზ', 'ზ'),
+  ('Თ', 'თ'), ('Ი', 'ი'), ('Კ', 'კ'), ('Ლ', 'ლ'),
+  ('Მ', 'მ'), ('Ნ', 'ნ'), ('Ო', 'ო'), ('Პ', 'პ'),
+  ('Ჟ', 'ჟ'), ('Რ', 'რ'), ('Ს', 'ს'), ('Ტ', 'ტ'),
+  ('Უ', 'უ'), ('Ფ', 'ფ'), ('Ქ', 'ქ'), ('Ღ', 'ღ'),
+  ('Ყ', 'ყ'), ('Შ', 'შ'), ('Ჩ', 'ჩ'), ('Ც', 'ც'),
+  ('Ძ', 'ძ'), ('Წ', 'წ'), ('Ჭ', 'ჭ'), ('Ხ', 'ხ'),
+  ('Ჯ', 'ჯ'), ('Ჰ', 'ჰ'), ('Ჱ', 'ჱ'), ('Ჲ', 'ჲ'),
+  ('Ჳ', 'ჳ'), ('Ჴ', 'ჴ'), ('Ჵ', 'ჵ'), ('Ჶ', 'ჶ'),
+  ('Ჷ', 'ჷ'), ('Ჸ', 'ჸ'), ('Ჹ', 'ჹ'), ('Ჺ', 'ჺ'),
+  ('Ჽ', 'ჽ'), ('Ჾ', 'ჾ'), ('Ჿ', 'ჿ'), ('Ḁ', 'ḁ'),
+  ('Ḃ', 'ḃ'), ('Ḅ', 'ḅ'), ('Ḇ', 'ḇ'), ('Ḉ', 'ḉ'),
+  ('Ḋ', 'ḋ'), ('Ḍ', 'ḍ'), ('Ḏ', 'ḏ'), ('Ḑ', 'ḑ'),
+  ('Ḓ', 'ḓ'), ('Ḕ', 'ḕ'), ('Ḗ', 'ḗ'), ('Ḙ', 'ḙ'),
+  ('Ḛ', 'ḛ'), ('Ḝ', 'ḝ'), ('Ḟ', 'ḟ'), ('Ḡ', 'ḡ'),
+  ('Ḣ', 'ḣ'), ('Ḥ', 'ḥ'), ('Ḧ', 'ḧ'), ('Ḩ', 'ḩ'),
+  ('Ḫ', 'ḫ'), ('Ḭ', 'ḭ'), ('Ḯ', 'ḯ'), ('Ḱ', 'ḱ'),
+  ('Ḳ', 'ḳ'), ('Ḵ', 'ḵ'), ('Ḷ', 'ḷ'), ('Ḹ', 'ḹ'),
+  ('Ḻ', 'ḻ'), ('Ḽ', 'ḽ'), ('Ḿ', 'ḿ'), ('Ṁ', 'ṁ'),
+  ('Ṃ', 'ṃ'), ('Ṅ', 'ṅ'), ('Ṇ', 'ṇ'), ('Ṉ', 'ṉ'),
+  ('Ṋ', 'ṋ'), ('Ṍ', 'ṍ'), ('Ṏ', 'ṏ'), ('Ṑ', 'ṑ'),
+  ('Ṓ', 'ṓ'), ('Ṕ', 'ṕ'), ('Ṗ', 'ṗ'), ('Ṙ', 'ṙ'),
+  ('Ṛ', 'ṛ'), ('Ṝ', 'ṝ'), ('Ṟ', 'ṟ'), ('Ṡ', 'ṡ'),
+  ('Ṣ', 'ṣ'), ('Ṥ', 'ṥ'), ('Ṧ', 'ṧ'), ('Ṩ', 'ṩ'),
+  ('Ṫ', 'ṫ'), ('Ṭ', 'ṭ'), ('Ṯ', 'ṯ'), ('Ṱ', 'ṱ'),
+  ('Ṳ', 'ṳ'), ('Ṵ', 'ṵ'), ('Ṷ', 'ṷ'), ('Ṹ', 'ṹ'),
+  ('Ṻ', 'ṻ'), ('Ṽ', 'ṽ'), ('Ṿ', 'ṿ'), ('Ẁ', 'ẁ'),
+  ('Ẃ', 'ẃ'), ('Ẅ', 'ẅ'), ('Ẇ', 'ẇ'), ('Ẉ', 'ẉ'),
+  ('Ẋ', 'ẋ'), ('Ẍ', 'ẍ'), ('Ẏ', 'ẏ'), ('Ẑ', 'ẑ'),
+  ('Ẓ', 'ẓ'), ('Ẕ', 'ẕ'), ('ẛ', 'ṡ'), ('ẞ', 'ß'),
+  ('Ạ', 'ạ'), ('Ả', 'ả'), ('Ấ', 'ấ'), ('Ầ', 'ầ'),
+  ('Ẩ', 'ẩ'), ('Ẫ', 'ẫ'), ('Ậ', 'ậ'), ('Ắ', 'ắ'),
+  ('Ằ', 'ằ'), ('Ẳ', 'ẳ'), ('Ẵ', 'ẵ'), ('Ặ', 'ặ'),
+  ('Ẹ', 'ẹ'), ('Ẻ', 'ẻ'), ('Ẽ', 'ẽ'), ('Ế', 'ế'),
+  ('Ề', 'ề'), ('Ể', 'ể'), ('Ễ', 'ễ'), ('Ệ', 'ệ'),
+  ('Ỉ', 'ỉ'), ('Ị', 'ị'), ('Ọ', 'ọ'), ('Ỏ', 'ỏ'),
+  ('Ố', 'ố'), ('Ồ', 'ồ'), ('Ổ', 'ổ'), ('Ỗ', 'ỗ'),
+  ('Ộ', 'ộ'), ('Ớ', 'ớ'), ('Ờ', 'ờ'), ('Ở', 'ở'),
+  ('Ỡ', 'ỡ'), ('Ợ', 'ợ'), ('Ụ', 'ụ'), ('Ủ', 'ủ'),
+  ('Ứ', 'ứ'), ('Ừ', 'ừ'), ('Ử', 'ử'), ('Ữ', 'ữ'),
+  ('Ự', 'ự'), ('Ỳ', 'ỳ'), ('Ỵ', 'ỵ'), ('Ỷ', 'ỷ'),
+  ('Ỹ', 'ỹ'), ('Ỻ', 'ỻ'), ('Ỽ', 'ỽ'), ('Ỿ', 'ỿ'),
+  ('Ἀ', 'ἀ'), ('Ἁ', 'ἁ'), ('Ἂ', 'ἂ'), ('Ἃ', 'ἃ'),
+  ('Ἄ', 'ἄ'), ('Ἅ', 'ἅ'), ('Ἆ', 'ἆ'), ('Ἇ', 'ἇ'),
+  ('Ἐ', 'ἐ'), ('Ἑ', 'ἑ'), ('Ἒ', 'ἒ'), ('Ἓ', 'ἓ'),
+  ('Ἔ', 'ἔ'), ('Ἕ', 'ἕ'), ('Ἠ', 'ἠ'), ('Ἡ', 'ἡ'),
+  ('Ἢ', 'ἢ'), ('Ἣ', 'ἣ'), ('Ἤ', 'ἤ'), ('Ἥ', 'ἥ'),
+  ('Ἦ', 'ἦ'), ('Ἧ', 'ἧ'), ('Ἰ', 'ἰ'), ('Ἱ', 'ἱ'),
+  ('Ἲ', 'ἲ'), ('Ἳ', 'ἳ'), ('Ἴ', 'ἴ'), ('Ἵ', 'ἵ'),
+  ('Ἶ', 'ἶ'), ('Ἷ', 'ἷ'), ('Ὀ', 'ὀ'), ('Ὁ', 'ὁ'),
+  ('Ὂ', 'ὂ'), ('Ὃ', 'ὃ'), ('Ὄ', 'ὄ'), ('Ὅ', 'ὅ'),
+  ('Ὑ', 'ὑ'), ('Ὓ', 'ὓ'), ('Ὕ', 'ὕ'), ('Ὗ', 'ὗ'),
+  ('Ὠ', 'ὠ'), ('Ὡ', 'ὡ'), ('Ὢ', 'ὢ'), ('Ὣ', 'ὣ'),
+  ('Ὤ', 'ὤ'), ('Ὥ', 'ὥ'), ('Ὦ', 'ὦ'), ('Ὧ', 'ὧ'),
+  ('ᾈ', 'ᾀ'), ('ᾉ', 'ᾁ'), ('ᾊ', 'ᾂ'), ('ᾋ', 'ᾃ'),
+  ('ᾌ', 'ᾄ'), ('ᾍ', 'ᾅ'), ('ᾎ', 'ᾆ'), ('ᾏ', 'ᾇ'),
+  ('ᾘ', 'ᾐ'), ('ᾙ', 'ᾑ'), ('ᾚ', 'ᾒ'), ('ᾛ', 'ᾓ'),
+  ('ᾜ', 'ᾔ'), ('ᾝ', 'ᾕ'), ('ᾞ', 'ᾖ'), ('ᾟ', 'ᾗ'),
+  ('ᾨ', 'ᾠ'), ('ᾩ', 'ᾡ'), ('ᾪ', 'ᾢ'), ('ᾫ', 'ᾣ'),
+  ('ᾬ', 'ᾤ'), ('ᾭ', 'ᾥ'), ('ᾮ', 'ᾦ'), ('ᾯ', 'ᾧ'),
+  ('Ᾰ', 'ᾰ'), ('Ᾱ', 'ᾱ'), ('Ὰ', 'ὰ'), ('Ά', 'ά'),
+  ('ᾼ', 'ᾳ'), ('ι', 'ι'), ('Ὲ', 'ὲ'), ('Έ', 'έ'),
+  ('Ὴ', 'ὴ'), ('Ή', 'ή'), ('ῌ', 'ῃ'), ('Ῐ', 'ῐ'),
+  ('Ῑ', 'ῑ'), ('Ὶ', 'ὶ'), ('Ί', 'ί'), ('Ῠ', 'ῠ'),
+  ('Ῡ', 'ῡ'), ('Ὺ', 'ὺ'), ('Ύ', 'ύ'), ('Ῥ', 'ῥ'),
+  ('Ὸ', 'ὸ'), ('Ό', 'ό'), ('Ὼ', 'ὼ'), ('Ώ', 'ώ'),
+  ('ῼ', 'ῳ'), ('Ω', 'ω'), ('K', 'k'), ('Å', 'å'), ('Ⅎ', 'ⅎ'),
+  ('Ⅰ', 'ⅰ'), ('Ⅱ', 'ⅱ'), ('Ⅲ', 'ⅲ'), ('Ⅳ', 'ⅳ'),
+  ('Ⅴ', 'ⅴ'), ('Ⅵ', 'ⅵ'), ('Ⅶ', 'ⅶ'), ('Ⅷ', 'ⅷ'),
+  ('Ⅸ', 'ⅸ'), ('Ⅹ', 'ⅹ'), ('Ⅺ', 'ⅺ'), ('Ⅻ', 'ⅻ'),
+  ('Ⅼ', 'ⅼ'), ('Ⅽ', 'ⅽ'), ('Ⅾ', 'ⅾ'), ('Ⅿ', 'ⅿ'),
+  ('Ↄ', 'ↄ'), ('Ⓐ', 'ⓐ'), ('Ⓑ', 'ⓑ'), ('Ⓒ', 'ⓒ'),
+  ('Ⓓ', 'ⓓ'), ('Ⓔ', 'ⓔ'), ('Ⓕ', 'ⓕ'), ('Ⓖ', 'ⓖ'),
+  ('Ⓗ', 'ⓗ'), ('Ⓘ', 'ⓘ'), ('Ⓙ', 'ⓙ'), ('Ⓚ', 'ⓚ'),
+  ('Ⓛ', 'ⓛ'), ('Ⓜ', 'ⓜ'), ('Ⓝ', 'ⓝ'), ('Ⓞ', 'ⓞ'),
+  ('Ⓟ', 'ⓟ'), ('Ⓠ', 'ⓠ'), ('Ⓡ', 'ⓡ'), ('Ⓢ', 'ⓢ'),
+  ('Ⓣ', 'ⓣ'), ('Ⓤ', 'ⓤ'), ('Ⓥ', 'ⓥ'), ('Ⓦ', 'ⓦ'),
+  ('Ⓧ', 'ⓧ'), ('Ⓨ', 'ⓨ'), ('Ⓩ', 'ⓩ'), ('Ⰰ', 'ⰰ'),
+  ('Ⰱ', 'ⰱ'), ('Ⰲ', 'ⰲ'), ('Ⰳ', 'ⰳ'), ('Ⰴ', 'ⰴ'),
+  ('Ⰵ', 'ⰵ'), ('Ⰶ', 'ⰶ'), ('Ⰷ', 'ⰷ'), ('Ⰸ', 'ⰸ'),
+  ('Ⰹ', 'ⰹ'), ('Ⰺ', 'ⰺ'), ('Ⰻ', 'ⰻ'), ('Ⰼ', 'ⰼ'),
+  ('Ⰽ', 'ⰽ'), ('Ⰾ', 'ⰾ'), ('Ⰿ', 'ⰿ'), ('Ⱀ', 'ⱀ'),
+  ('Ⱁ', 'ⱁ'), ('Ⱂ', 'ⱂ'), ('Ⱃ', 'ⱃ'), ('Ⱄ', 'ⱄ'),
+  ('Ⱅ', 'ⱅ'), ('Ⱆ', 'ⱆ'), ('Ⱇ', 'ⱇ'), ('Ⱈ', 'ⱈ'),
+  ('Ⱉ', 'ⱉ'), ('Ⱊ', 'ⱊ'), ('Ⱋ', 'ⱋ'), ('Ⱌ', 'ⱌ'),
+  ('Ⱍ', 'ⱍ'), ('Ⱎ', 'ⱎ'), ('Ⱏ', 'ⱏ'), ('Ⱐ', 'ⱐ'),
+  ('Ⱑ', 'ⱑ'), ('Ⱒ', 'ⱒ'), ('Ⱓ', 'ⱓ'), ('Ⱔ', 'ⱔ'),
+  ('Ⱕ', 'ⱕ'), ('Ⱖ', 'ⱖ'), ('Ⱗ', 'ⱗ'), ('Ⱘ', 'ⱘ'),
+  ('Ⱙ', 'ⱙ'), ('Ⱚ', 'ⱚ'), ('Ⱛ', 'ⱛ'), ('Ⱜ', 'ⱜ'),
+  ('Ⱝ', 'ⱝ'), ('Ⱞ', 'ⱞ'), ('Ⱟ', 'ⱟ'), ('Ⱡ', 'ⱡ'),
+  ('Ɫ', 'ɫ'), ('Ᵽ', 'ᵽ'), ('Ɽ', 'ɽ'), ('Ⱨ', 'ⱨ'),
+  ('Ⱪ', 'ⱪ'), ('Ⱬ', 'ⱬ'), ('Ɑ', 'ɑ'), ('Ɱ', 'ɱ'), ('Ɐ', 'ɐ'),
+  ('Ɒ', 'ɒ'), ('Ⱳ', 'ⱳ'), ('Ⱶ', 'ⱶ'), ('Ȿ', 'ȿ'), ('Ɀ', 'ɀ'),
+  ('Ⲁ', 'ⲁ'), ('Ⲃ', 'ⲃ'), ('Ⲅ', 'ⲅ'), ('Ⲇ', 'ⲇ'),
+  ('Ⲉ', 'ⲉ'), ('Ⲋ', 'ⲋ'), ('Ⲍ', 'ⲍ'), ('Ⲏ', 'ⲏ'),
+  ('Ⲑ', 'ⲑ'), ('Ⲓ', 'ⲓ'), ('Ⲕ', 'ⲕ'), ('Ⲗ', 'ⲗ'),
+  ('Ⲙ', 'ⲙ'), ('Ⲛ', 'ⲛ'), ('Ⲝ', 'ⲝ'), ('Ⲟ', 'ⲟ'),
+  ('Ⲡ', 'ⲡ'), ('Ⲣ', 'ⲣ'), ('Ⲥ', 'ⲥ'), ('Ⲧ', 'ⲧ'),
+  ('Ⲩ', 'ⲩ'), ('Ⲫ', 'ⲫ'), ('Ⲭ', 'ⲭ'), ('Ⲯ', 'ⲯ'),
+  ('Ⲱ', 'ⲱ'), ('Ⲳ', 'ⲳ'), ('Ⲵ', 'ⲵ'), ('Ⲷ', 'ⲷ'),
+  ('Ⲹ', 'ⲹ'), ('Ⲻ', 'ⲻ'), ('Ⲽ', 'ⲽ'), ('Ⲿ', 'ⲿ'),
+  ('Ⳁ', 'ⳁ'), ('Ⳃ', 'ⳃ'), ('Ⳅ', 'ⳅ'), ('Ⳇ', 'ⳇ'),
+  ('Ⳉ', 'ⳉ'), ('Ⳋ', 'ⳋ'), ('Ⳍ', 'ⳍ'), ('Ⳏ', 'ⳏ'),
+  ('Ⳑ', 'ⳑ'), ('Ⳓ', 'ⳓ'), ('Ⳕ', 'ⳕ'), ('Ⳗ', 'ⳗ'),
+  ('Ⳙ', 'ⳙ'), ('Ⳛ', 'ⳛ'), ('Ⳝ', 'ⳝ'), ('Ⳟ', 'ⳟ'),
+  ('Ⳡ', 'ⳡ'), ('Ⳣ', 'ⳣ'), ('Ⳬ', 'ⳬ'), ('Ⳮ', 'ⳮ'),
+  ('Ⳳ', 'ⳳ'), ('Ꙁ', 'ꙁ'), ('Ꙃ', 'ꙃ'), ('Ꙅ', 'ꙅ'),
+  ('Ꙇ', 'ꙇ'), ('Ꙉ', 'ꙉ'), ('Ꙋ', 'ꙋ'), ('Ꙍ', 'ꙍ'),
+  ('Ꙏ', 'ꙏ'), ('Ꙑ', 'ꙑ'), ('Ꙓ', 'ꙓ'), ('Ꙕ', 'ꙕ'),
+  ('Ꙗ', 'ꙗ'), ('Ꙙ', 'ꙙ'), ('Ꙛ', 'ꙛ'), ('Ꙝ', 'ꙝ'),
+  ('Ꙟ', 'ꙟ'), ('Ꙡ', 'ꙡ'), ('Ꙣ', 'ꙣ'), ('Ꙥ', 'ꙥ'),
+  ('Ꙧ', 'ꙧ'), ('Ꙩ', 'ꙩ'), ('Ꙫ', 'ꙫ'), ('Ꙭ', 'ꙭ'),
+  ('Ꚁ', 'ꚁ'), ('Ꚃ', 'ꚃ'), ('Ꚅ', 'ꚅ'), ('Ꚇ', 'ꚇ'),
+  ('Ꚉ', 'ꚉ'), ('Ꚋ', 'ꚋ'), ('Ꚍ', 'ꚍ'), ('Ꚏ', 'ꚏ'),
+  ('Ꚑ', 'ꚑ'), ('Ꚓ', 'ꚓ'), ('Ꚕ', 'ꚕ'), ('Ꚗ', 'ꚗ'),
+  ('Ꚙ', 'ꚙ'), ('Ꚛ', 'ꚛ'), ('Ꜣ', 'ꜣ'), ('Ꜥ', 'ꜥ'),
+  ('Ꜧ', 'ꜧ'), ('Ꜩ', 'ꜩ'), ('Ꜫ', 'ꜫ'), ('Ꜭ', 'ꜭ'),
+  ('Ꜯ', 'ꜯ'), ('Ꜳ', 'ꜳ'), ('Ꜵ', 'ꜵ'), ('Ꜷ', 'ꜷ'),
+  ('Ꜹ', 'ꜹ'), ('Ꜻ', 'ꜻ'), ('Ꜽ', 'ꜽ'), ('Ꜿ', 'ꜿ'),
+  ('Ꝁ', 'ꝁ'), ('Ꝃ', 'ꝃ'), ('Ꝅ', 'ꝅ'), ('Ꝇ', 'ꝇ'),
+  ('Ꝉ', 'ꝉ'), ('Ꝋ', 'ꝋ'), ('Ꝍ', 'ꝍ'), ('Ꝏ', 'ꝏ'),
+  ('Ꝑ', 'ꝑ'), ('Ꝓ', 'ꝓ'), ('Ꝕ', 'ꝕ'), ('Ꝗ', 'ꝗ'),
+  ('Ꝙ', 'ꝙ'), ('Ꝛ', 'ꝛ'), ('Ꝝ', 'ꝝ'), ('Ꝟ', 'ꝟ'),
+  ('Ꝡ', 'ꝡ'), ('Ꝣ', 'ꝣ'), ('Ꝥ', 'ꝥ'), ('Ꝧ', 'ꝧ'),
+  ('Ꝩ', 'ꝩ'), ('Ꝫ', 'ꝫ'), ('Ꝭ', 'ꝭ'), ('Ꝯ', 'ꝯ'),
+  ('Ꝺ', 'ꝺ'), ('Ꝼ', 'ꝼ'), ('Ᵹ', 'ᵹ'), ('Ꝿ', 'ꝿ'),
+  ('Ꞁ', 'ꞁ'), ('Ꞃ', 'ꞃ'), ('Ꞅ', 'ꞅ'), ('Ꞇ', 'ꞇ'),
+  ('Ꞌ', 'ꞌ'), ('Ɥ', 'ɥ'), ('Ꞑ', 'ꞑ'), ('Ꞓ', 'ꞓ'),
+  ('Ꞗ', 'ꞗ'), ('Ꞙ', 'ꞙ'), ('Ꞛ', 'ꞛ'), ('Ꞝ', 'ꞝ'),
+  ('Ꞟ', 'ꞟ'), ('Ꞡ', 'ꞡ'), ('Ꞣ', 'ꞣ'), ('Ꞥ', 'ꞥ'),
+  ('Ꞧ', 'ꞧ'), ('Ꞩ', 'ꞩ'), ('Ɦ', 'ɦ'), ('Ɜ', 'ɜ'), ('Ɡ', 'ɡ'),
+  ('Ɬ', 'ɬ'), ('Ɪ', 'ɪ'), ('Ʞ', 'ʞ'), ('Ʇ', 'ʇ'), ('Ʝ', 'ʝ'),
+  ('Ꭓ', 'ꭓ'), ('Ꞵ', 'ꞵ'), ('Ꞷ', 'ꞷ'), ('Ꞹ', 'ꞹ'),
+  ('Ꞻ', 'ꞻ'), ('Ꞽ', 'ꞽ'), ('Ꞿ', 'ꞿ'), ('Ꟁ', 'ꟁ'),
+  ('Ꟃ', 'ꟃ'), ('Ꞔ', 'ꞔ'), ('Ʂ', 'ʂ'), ('Ᶎ', 'ᶎ'),
+  ('Ꟈ', 'ꟈ'), ('Ꟊ', 'ꟊ'), ('Ꟑ', 'ꟑ'), ('Ꟗ', 'ꟗ'),
+  ('Ꟙ', 'ꟙ'), ('Ꟶ', 'ꟶ'), ('ꭰ', 'Ꭰ'), ('ꭱ', 'Ꭱ'),
+  ('ꭲ', 'Ꭲ'), ('ꭳ', 'Ꭳ'), ('ꭴ', 'Ꭴ'), ('ꭵ', 'Ꭵ'),
+  ('ꭶ', 'Ꭶ'), ('ꭷ', 'Ꭷ'), ('ꭸ', 'Ꭸ'), ('ꭹ', 'Ꭹ'),
+  ('ꭺ', 'Ꭺ'), ('ꭻ', 'Ꭻ'), ('ꭼ', 'Ꭼ'), ('ꭽ', 'Ꭽ'),
+  ('ꭾ', 'Ꭾ'), ('ꭿ', 'Ꭿ'), ('ꮀ', 'Ꮀ'), ('ꮁ', 'Ꮁ'),
+  ('ꮂ', 'Ꮂ'), ('ꮃ', 'Ꮃ'), ('ꮄ', 'Ꮄ'), ('ꮅ', 'Ꮅ'),
+  ('ꮆ', 'Ꮆ'), ('ꮇ', 'Ꮇ'), ('ꮈ', 'Ꮈ'), ('ꮉ', 'Ꮉ'),
+  ('ꮊ', 'Ꮊ'), ('ꮋ', 'Ꮋ'), ('ꮌ', 'Ꮌ'), ('ꮍ', 'Ꮍ'),
+  ('ꮎ', 'Ꮎ'), ('ꮏ', 'Ꮏ'), ('ꮐ', 'Ꮐ'), ('ꮑ', 'Ꮑ'),
+  ('ꮒ', 'Ꮒ'), ('ꮓ', 'Ꮓ'), ('ꮔ', 'Ꮔ'), ('ꮕ', 'Ꮕ'),
+  ('ꮖ', 'Ꮖ'), ('ꮗ', 'Ꮗ'), ('ꮘ', 'Ꮘ'), ('ꮙ', 'Ꮙ'),
+  ('ꮚ', 'Ꮚ'), ('ꮛ', 'Ꮛ'), ('ꮜ', 'Ꮜ'), ('ꮝ', 'Ꮝ'),
+  ('ꮞ', 'Ꮞ'), ('ꮟ', 'Ꮟ'), ('ꮠ', 'Ꮠ'), ('ꮡ', 'Ꮡ'),
+  ('ꮢ', 'Ꮢ'), ('ꮣ', 'Ꮣ'), ('ꮤ', 'Ꮤ'), ('ꮥ', 'Ꮥ'),
+  ('ꮦ', 'Ꮦ'), ('ꮧ', 'Ꮧ'), ('ꮨ', 'Ꮨ'), ('ꮩ', 'Ꮩ'),
+  ('ꮪ', 'Ꮪ'), ('ꮫ', 'Ꮫ'), ('ꮬ', 'Ꮬ'), ('ꮭ', 'Ꮭ'),
+  ('ꮮ', 'Ꮮ'), ('ꮯ', 'Ꮯ'), ('ꮰ', 'Ꮰ'), ('ꮱ', 'Ꮱ'),
+  ('ꮲ', 'Ꮲ'), ('ꮳ', 'Ꮳ'), ('ꮴ', 'Ꮴ'), ('ꮵ', 'Ꮵ'),
+  ('ꮶ', 'Ꮶ'), ('ꮷ', 'Ꮷ'), ('ꮸ', 'Ꮸ'), ('ꮹ', 'Ꮹ'),
+  ('ꮺ', 'Ꮺ'), ('ꮻ', 'Ꮻ'), ('ꮼ', 'Ꮼ'), ('ꮽ', 'Ꮽ'),
+  ('ꮾ', 'Ꮾ'), ('ꮿ', 'Ꮿ'), ('Ａ', 'ａ'), ('Ｂ', 'ｂ'),
+  ('Ｃ', 'ｃ'), ('Ｄ', 'ｄ'), ('Ｅ', 'ｅ'), ('Ｆ', 'ｆ'),
+  ('Ｇ', 'ｇ'), ('Ｈ', 'ｈ'), ('Ｉ', 'ｉ'), ('Ｊ', 'ｊ'),
+  ('Ｋ', 'ｋ'), ('Ｌ', 'ｌ'), ('Ｍ', 'ｍ'), ('Ｎ', 'ｎ'),
+  ('Ｏ', 'ｏ'), ('Ｐ', 'ｐ'), ('Ｑ', 'ｑ'), ('Ｒ', 'ｒ'),
+  ('Ｓ', 'ｓ'), ('Ｔ', 'ｔ'), ('Ｕ', 'ｕ'), ('Ｖ', 'ｖ'),
+  ('Ｗ', 'ｗ'), ('Ｘ', 'ｘ'), ('Ｙ', 'ｙ'), ('Ｚ', 'ｚ'),
+  ('𐐀', '𐐨'), ('𐐁', '𐐩'), ('𐐂', '𐐪'), ('𐐃', '𐐫'),
+  ('𐐄', '𐐬'), ('𐐅', '𐐭'), ('𐐆', '𐐮'), ('𐐇', '𐐯'),
+  ('𐐈', '𐐰'), ('𐐉', '𐐱'), ('𐐊', '𐐲'), ('𐐋', '𐐳'),
+  ('𐐌', '𐐴'), ('𐐍', '𐐵'), ('𐐎', '𐐶'), ('𐐏', '𐐷'),
+  ('𐐐', '𐐸'), ('𐐑', '𐐹'), ('𐐒', '𐐺'), ('𐐓', '𐐻'),
+  ('𐐔', '𐐼'), ('𐐕', '𐐽'), ('𐐖', '𐐾'), ('𐐗', '𐐿'),
+  ('𐐘', '𐑀'), ('𐐙', '𐑁'), ('𐐚', '𐑂'), ('𐐛', '𐑃'),
+  ('𐐜', '𐑄'), ('𐐝', '𐑅'), ('𐐞', '𐑆'), ('𐐟', '𐑇'),
+  ('𐐠', '𐑈'), ('𐐡', '𐑉'), ('𐐢', '𐑊'), ('𐐣', '𐑋'),
+  ('𐐤', '𐑌'), ('𐐥', '𐑍'), ('𐐦', '𐑎'), ('𐐧', '𐑏'),
+  ('𐒰', '𐓘'), ('𐒱', '𐓙'), ('𐒲', '𐓚'), ('𐒳', '𐓛'),
+  ('𐒴', '𐓜'), ('𐒵', '𐓝'), ('𐒶', '𐓞'), ('𐒷', '𐓟'),
+  ('𐒸', '𐓠'), ('𐒹', '𐓡'), ('𐒺', '𐓢'), ('𐒻', '𐓣'),
+  ('𐒼', '𐓤'), ('𐒽', '𐓥'), ('𐒾', '𐓦'), ('𐒿', '𐓧'),
+  ('𐓀', '𐓨'), ('𐓁', '𐓩'), ('𐓂', '𐓪'), ('𐓃', '𐓫'),
+  ('𐓄', '𐓬'), ('𐓅', '𐓭'), ('𐓆', '𐓮'), ('𐓇', '𐓯'),
+  ('𐓈', '𐓰'), ('𐓉', '𐓱'), ('𐓊', '𐓲'), ('𐓋', '𐓳'),
+  ('𐓌', '𐓴'), ('𐓍', '𐓵'), ('𐓎', '𐓶'), ('𐓏', '𐓷'),
+  ('𐓐', '𐓸'), ('𐓑', '𐓹'), ('𐓒', '𐓺'), ('𐓓', '𐓻'),
+  ('𐕰', '𐖗'), ('𐕱', '𐖘'), ('𐕲', '𐖙'), ('𐕳', '𐖚'),
+  ('𐕴', '𐖛'), ('𐕵', '𐖜'), ('𐕶', '𐖝'), ('𐕷', '𐖞'),
+  ('𐕸', '𐖟'), ('𐕹', '𐖠'), ('𐕺', '𐖡'), ('𐕼', '𐖣'),
+  ('𐕽', '𐖤'), ('𐕾', '𐖥'), ('𐕿', '𐖦'), ('𐖀', '𐖧'),
+  ('𐖁', '𐖨'), ('𐖂', '𐖩'), ('𐖃', '𐖪'), ('𐖄', '𐖫'),
+  ('𐖅', '𐖬'), ('𐖆', '𐖭'), ('𐖇', '𐖮'), ('𐖈', '𐖯'),
+  ('𐖉', '𐖰'), ('𐖊', '𐖱'), ('𐖌', '𐖳'), ('𐖍', '𐖴'),
+  ('𐖎', '𐖵'), ('𐖏', '𐖶'), ('𐖐', '𐖷'), ('𐖑', '𐖸'),
+  ('𐖒', '𐖹'), ('𐖔', '𐖻'), ('𐖕', '𐖼'), ('𐲀', '𐳀'),
+  ('𐲁', '𐳁'), ('𐲂', '𐳂'), ('𐲃', '𐳃'), ('𐲄', '𐳄'),
+  ('𐲅', '𐳅'), ('𐲆', '𐳆'), ('𐲇', '𐳇'), ('𐲈', '𐳈'),
+  ('𐲉', '𐳉'), ('𐲊', '𐳊'), ('𐲋', '𐳋'), ('𐲌', '𐳌'),
+  ('𐲍', '𐳍'), ('𐲎', '𐳎'), ('𐲏', '𐳏'), ('𐲐', '𐳐'),
+  ('𐲑', '𐳑'), ('𐲒', '𐳒'), ('𐲓', '𐳓'), ('𐲔', '𐳔'),
+  ('𐲕', '𐳕'), ('𐲖', '𐳖'), ('𐲗', '𐳗'), ('𐲘', '𐳘'),
+  ('𐲙', '𐳙'), ('𐲚', '𐳚'), ('𐲛', '𐳛'), ('𐲜', '𐳜'),
+  ('𐲝', '𐳝'), ('𐲞', '𐳞'), ('𐲟', '𐳟'), ('𐲠', '𐳠'),
+  ('𐲡', '𐳡'), ('𐲢', '𐳢'), ('𐲣', '𐳣'), ('𐲤', '𐳤'),
+  ('𐲥', '𐳥'), ('𐲦', '𐳦'), ('𐲧', '𐳧'), ('𐲨', '𐳨'),
+  ('𐲩', '𐳩'), ('𐲪', '𐳪'), ('𐲫', '𐳫'), ('𐲬', '𐳬'),
+  ('𐲭', '𐳭'), ('𐲮', '𐳮'), ('𐲯', '𐳯'), ('𐲰', '𐳰'),
+  ('𐲱', '𐳱'), ('𐲲', '𐳲'), ('𑢠', '𑣀'), ('𑢡', '𑣁'),
+  ('𑢢', '𑣂'), ('𑢣', '𑣃'), ('𑢤', '𑣄'), ('𑢥', '𑣅'),
+  ('𑢦', '𑣆'), ('𑢧', '𑣇'), ('𑢨', '𑣈'), ('𑢩', '𑣉'),
+  ('𑢪', '𑣊'), ('𑢫', '𑣋'), ('𑢬', '𑣌'), ('𑢭', '𑣍'),
+  ('𑢮', '𑣎'), ('𑢯', '𑣏'), ('𑢰', '𑣐'), ('𑢱', '𑣑'),
+  ('𑢲', '𑣒'), ('𑢳', '𑣓'), ('𑢴', '𑣔'), ('𑢵', '𑣕'),
+  ('𑢶', '𑣖'), ('𑢷', '𑣗'), ('𑢸', '𑣘'), ('𑢹', '𑣙'),
+  ('𑢺', '𑣚'), ('𑢻', '𑣛'), ('𑢼', '𑣜'), ('𑢽', '𑣝'),
+  ('𑢾', '𑣞'), ('𑢿', '𑣟'), ('𖹀', '𖹠'), ('𖹁', '𖹡'),
+  ('𖹂', '𖹢'), ('𖹃', '𖹣'), ('𖹄', '𖹤'), ('𖹅', '𖹥'),
+  ('𖹆', '𖹦'), ('𖹇', '𖹧'), ('𖹈', '𖹨'), ('𖹉', '𖹩'),
+  ('𖹊', '𖹪'), ('𖹋', '𖹫'), ('𖹌', '𖹬'), ('𖹍', '𖹭'),
+  ('𖹎', '𖹮'), ('𖹏', '𖹯'), ('𖹐', '𖹰'), ('𖹑', '𖹱'),
+  ('𖹒', '𖹲'), ('𖹓', '𖹳'), ('𖹔', '𖹴'), ('𖹕', '𖹵'),
+  ('𖹖', '𖹶'), ('𖹗', '𖹷'), ('𖹘', '𖹸'), ('𖹙', '𖹹'),
+  ('𖹚', '𖹺'), ('𖹛', '𖹻'), ('𖹜', '𖹼'), ('𖹝', '𖹽'),
+  ('𖹞', '𖹾'), ('𖹟', '𖹿'), ('𞤀', '𞤢'), ('𞤁', '𞤣'),
+  ('𞤂', '𞤤'), ('𞤃', '𞤥'), ('𞤄', '𞤦'), ('𞤅', '𞤧'),
+  ('𞤆', '𞤨'), ('𞤇', '𞤩'), ('𞤈', '𞤪'), ('𞤉', '𞤫'),
+  ('𞤊', '𞤬'), ('𞤋', '𞤭'), ('𞤌', '𞤮'), ('𞤍', '𞤯'),
+  ('𞤎', '𞤰'), ('𞤏', '𞤱'), ('𞤐', '𞤲'), ('𞤑', '𞤳'),
+  ('𞤒', '𞤴'), ('𞤓', '𞤵'), ('𞤔', '𞤶'), ('𞤕', '𞤷'),
+  ('𞤖', '𞤸'), ('𞤗', '𞤹'), ('𞤘', '𞤺'), ('𞤙', '𞤻'),
+  ('𞤚', '𞤼'), ('𞤛', '𞤽'), ('𞤜', '𞤾'), ('𞤝', '𞤿'),
+  ('𞤞', '𞥀'), ('𞤟', '𞥁'), ('𞤠', '𞥂'), ('𞤡', '𞥃'),
+];
diff --git a/matcher/src/chars/normalize.rs b/matcher/src/chars/normalize.rs
new file mode 100644
index 00000000..3de501aa
--- /dev/null
+++ b/matcher/src/chars/normalize.rs
@@ -0,0 +1,972 @@
+/// Normalize a Unicode character by converting Latin characters which are variants
+/// of ASCII characters to their latin equivalent.
+///
+/// Note that this method acts on single `char`s: if you want to perform full normalization, you
+/// should first split on graphemes, and then normalize each grapheme by normalizing the first
+/// `char` in the grapheme.
+///
+/// If a character does not normalize to a single ASCII character, no normalization is performed.
+///
+/// This performs normalization within the following Unicode blocks:
+///
+/// - [Latin-1 Supplement](https://en.wikipedia.org/wiki/Latin-1_Supplement)
+/// - [Latin Extended-A](https://en.wikipedia.org/wiki/Latin_Extended-A)
+/// - [Latin Extended-B](https://en.wikipedia.org/wiki/Latin_Extended-B)
+/// - [Latin Extended Additional](https://en.wikipedia.org/wiki/Latin_Extended_Additional)
+/// - [Superscripts and Subscripts](https://en.wikipedia.org/wiki/Superscripts_and_Subscripts)
+///
+/// If the character does not fall in this block, it is not normalized.
+///
+/// # Example
+/// ```
+/// # use nucleo_matcher::chars::normalize;
+/// assert_eq!(normalize('ä'), 'a');
+/// assert_eq!(normalize('Æ'), 'Æ');
+/// assert_eq!(normalize('ữ'), 'u');
+/// ```
+pub fn normalize(c: char) -> char {
+    // outside checked blocks
+    if c < '\u{a0}' || c >= '\u{20A0}' {
+        return c;
+    }
+    // Latin-1 Supplement, Extended-A, Extended-B
+    if c <= '\u{29f}' {
+        return LATIN_1AB[c as usize - '\u{a0}' as usize];
+    }
+    // between blocks
+    if c < '\u{1e00}' {
+        return c;
+    }
+    // Latin Extended Additional
+    if c <= '\u{1eff}' {
+        return LATIN_EXTENDED_ADDITIONAL[c as usize - '\u{1e00}' as usize];
+    }
+    // between blocks
+    if c < '\u{2070}' {
+        return c;
+    }
+    // Superscripts and subscripts
+    SUPERSCRIPTS_AND_SUBSCRIPTS[c as usize - '\u{2070}' as usize]
+}
+
+/// A char array corresponding to the following contiguous Unicode blocks:
+///
+/// - [Latin-1 Supplement](https://en.wikipedia.org/wiki/Latin-1_Supplement)
+/// - [Latin Extended-A](https://en.wikipedia.org/wiki/Latin_Extended-A)
+/// - [Latin Extended-B](https://en.wikipedia.org/wiki/Latin_Extended-B)
+///
+/// This covers the range `'\u{a0}'..='\u{29f}'`.
+static LATIN_1AB: [char; 512] = [
+    '\u{a0}', // invisible NON BREAKING SPACE
+    '!',      // '¡'; '\u{a1}'
+    '¢',      // '¢'; '\u{a2}'
+    '£',      // '£'; '\u{a3}'
+    '¤',      // '¤'; '\u{a4}'
+    '¥',      // '¥'; '\u{a5}'
+    '¦',      // '¦'; '\u{a6}'
+    '§',      // '§'; '\u{a7}'
+    '¨',      // '¨'; '\u{a8}'
+    '©',      // '©'; '\u{a9}'
+    'a',      // 'ª'; '\u{aa}'
+    '«',      // '«'; '\u{ab}'
+    '¬',      // '¬'; '\u{ac}'
+    '\u{ad}', // invisible SOFT HYPHEN
+    '®',      // '®'; '\u{ae}'
+    '¯',      // '¯'; '\u{af}'
+    '°',      // '°'; '\u{b0}'
+    '±',      // '±'; '\u{b1}'
+    '2',      // '²'; '\u{b2}'
+    '3',      // '³'; '\u{b3}'
+    '´',      // '´'; '\u{b4}'
+    'µ',      // 'µ'; '\u{b5}'
+    '¶',      // '¶'; '\u{b6}'
+    '·',      // '·'; '\u{b7}'
+    '¸',      // '¸'; '\u{b8}'
+    '1',      // '¹'; '\u{b9}'
+    '0',      // 'º'; '\u{ba}'
+    '»',      // '»'; '\u{bb}'
+    '¼',      // '¼'; '\u{bc}'
+    '½',      // '½'; '\u{bd}'
+    '¾',      // '¾'; '\u{be}'
+    '?',      // '¿'; '\u{bf}'
+    'A',      // 'À'; '\u{c0}'
+    'A',      // 'Á'; '\u{c1}'
+    'A',      // 'Â'; '\u{c2}'
+    'A',      // 'Ã'; '\u{c3}'
+    'A',      // 'Ä'; '\u{c4}'
+    'A',      // 'Å'; '\u{c5}'
+    'Æ',      // 'Æ'; '\u{c6}'
+    'C',      // 'Ç'; '\u{c7}'
+    'E',      // 'È'; '\u{c8}'
+    'E',      // 'É'; '\u{c9}'
+    'E',      // 'Ê'; '\u{ca}'
+    'E',      // 'Ë'; '\u{cb}'
+    'I',      // 'Ì'; '\u{cc}'
+    'I',      // 'Í'; '\u{cd}'
+    'I',      // 'Î'; '\u{ce}'
+    'I',      // 'Ï'; '\u{cf}'
+    'D',      // 'Ð'; '\u{d0}'
+    'N',      // 'Ñ'; '\u{d1}'
+    'O',      // 'Ò'; '\u{d2}'
+    'O',      // 'Ó'; '\u{d3}'
+    'O',      // 'Ô'; '\u{d4}'
+    'O',      // 'Õ'; '\u{d5}'
+    'O',      // 'Ö'; '\u{d6}'
+    '×',      // '×'; '\u{d7}'
+    'O',      // 'Ø'; '\u{d8}'
+    'U',      // 'Ù'; '\u{d9}'
+    'U',      // 'Ú'; '\u{da}'
+    'U',      // 'Û'; '\u{db}'
+    'U',      // 'Ü'; '\u{dc}'
+    'Y',      // 'Ý'; '\u{dd}'
+    'Þ',      // 'Þ'; '\u{de}'
+    's',      // 'ß'; '\u{df}'
+    'a',      // 'à'; '\u{e0}'
+    'a',      // 'á'; '\u{e1}'
+    'a',      // 'â'; '\u{e2}'
+    'a',      // 'ã'; '\u{e3}'
+    'a',      // 'ä'; '\u{e4}'
+    'a',      // 'å'; '\u{e5}'
+    'æ',      // 'æ'; '\u{e6}'
+    'c',      // 'ç'; '\u{e7}'
+    'e',      // 'è'; '\u{e8}'
+    'e',      // 'é'; '\u{e9}'
+    'e',      // 'ê'; '\u{ea}'
+    'e',      // 'ë'; '\u{eb}'
+    'i',      // 'ì'; '\u{ec}'
+    'i',      // 'í'; '\u{ed}'
+    'i',      // 'î'; '\u{ee}'
+    'i',      // 'ï'; '\u{ef}'
+    'd',      // 'ð'; '\u{f0}'
+    'n',      // 'ñ'; '\u{f1}'
+    'o',      // 'ò'; '\u{f2}'
+    'o',      // 'ó'; '\u{f3}'
+    'o',      // 'ô'; '\u{f4}'
+    'o',      // 'õ'; '\u{f5}'
+    'o',      // 'ö'; '\u{f6}'
+    '÷',      // '÷'; '\u{f7}'
+    'o',      // 'ø'; '\u{f8}'
+    'u',      // 'ù'; '\u{f9}'
+    'u',      // 'ú'; '\u{fa}'
+    'u',      // 'û'; '\u{fb}'
+    'u',      // 'ü'; '\u{fc}'
+    'y',      // 'ý'; '\u{fd}'
+    'þ',      // 'þ'; '\u{fe}'
+    'y',      // 'ÿ'; '\u{ff}'
+    'A',      // 'Ā'; '\u{100}'
+    'a',      // 'ā'; '\u{101}'
+    'A',      // 'Ă'; '\u{102}'
+    'a',      // 'ă'; '\u{103}'
+    'A',      // 'Ą'; '\u{104}'
+    'a',      // 'ą'; '\u{105}'
+    'C',      // 'Ć'; '\u{106}'
+    'c',      // 'ć'; '\u{107}'
+    'C',      // 'Ĉ'; '\u{108}'
+    'c',      // 'ĉ'; '\u{109}'
+    'C',      // 'Ċ'; '\u{10a}'
+    'c',      // 'ċ'; '\u{10b}'
+    'C',      // 'Č'; '\u{10c}'
+    'c',      // 'č'; '\u{10d}'
+    'D',      // 'Ď'; '\u{10e}'
+    'd',      // 'ď'; '\u{10f}'
+    'D',      // 'Đ'; '\u{110}'
+    'd',      // 'đ'; '\u{111}'
+    'E',      // 'Ē'; '\u{112}'
+    'e',      // 'ē'; '\u{113}'
+    'E',      // 'Ĕ'; '\u{114}'
+    'e',      // 'ĕ'; '\u{115}'
+    'E',      // 'Ė'; '\u{116}'
+    'e',      // 'ė'; '\u{117}'
+    'E',      // 'Ę'; '\u{118}'
+    'e',      // 'ę'; '\u{119}'
+    'E',      // 'Ě'; '\u{11a}'
+    'e',      // 'ě'; '\u{11b}'
+    'G',      // 'Ĝ'; '\u{11c}'
+    'g',      // 'ĝ'; '\u{11d}'
+    'G',      // 'Ğ'; '\u{11e}'
+    'g',      // 'ğ'; '\u{11f}'
+    'G',      // 'Ġ'; '\u{120}'
+    'g',      // 'ġ'; '\u{121}'
+    'G',      // 'Ģ'; '\u{122}'
+    'g',      // 'ģ'; '\u{123}'
+    'H',      // 'Ĥ'; '\u{124}'
+    'h',      // 'ĥ'; '\u{125}'
+    'H',      // 'Ħ'; '\u{126}'
+    'h',      // 'ħ'; '\u{127}'
+    'I',      // 'Ĩ'; '\u{128}'
+    'i',      // 'ĩ'; '\u{129}'
+    'I',      // 'Ī'; '\u{12a}'
+    'i',      // 'ī'; '\u{12b}'
+    'I',      // 'Ĭ'; '\u{12c}'
+    'i',      // 'ĭ'; '\u{12d}'
+    'I',      // 'Į'; '\u{12e}'
+    'i',      // 'į'; '\u{12f}'
+    'I',      // 'İ'; '\u{130}'
+    'i',      // 'ı'; '\u{131}'
+    'Ĳ',      // 'Ĳ'; '\u{132}'
+    'ĳ',      // 'ĳ'; '\u{133}'
+    'J',      // 'Ĵ'; '\u{134}'
+    'j',      // 'ĵ'; '\u{135}'
+    'K',      // 'Ķ'; '\u{136}'
+    'k',      // 'ķ'; '\u{137}'
+    'ĸ',      // 'ĸ'; '\u{138}'
+    'L',      // 'Ĺ'; '\u{139}'
+    'l',      // 'ĺ'; '\u{13a}'
+    'L',      // 'Ļ'; '\u{13b}'
+    'l',      // 'ļ'; '\u{13c}'
+    'L',      // 'Ľ'; '\u{13d}'
+    'l',      // 'ľ'; '\u{13e}'
+    'L',      // 'Ŀ'; '\u{13f}'
+    'l',      // 'ŀ'; '\u{140}'
+    'L',      // 'Ł'; '\u{141}'
+    'l',      // 'ł'; '\u{142}'
+    'N',      // 'Ń'; '\u{143}'
+    'n',      // 'ń'; '\u{144}'
+    'N',      // 'Ņ'; '\u{145}'
+    'n',      // 'ņ'; '\u{146}'
+    'N',      // 'Ň'; '\u{147}'
+    'n',      // 'ň'; '\u{148}'
+    'n',      // 'ŉ'; '\u{149}'
+    'N',      // 'Ŋ'; '\u{14a}'
+    'n',      // 'ŋ'; '\u{14b}'
+    'O',      // 'Ō'; '\u{14c}'
+    'o',      // 'ō'; '\u{14d}'
+    'O',      // 'Ŏ'; '\u{14e}'
+    'o',      // 'ŏ'; '\u{14f}'
+    'O',      // 'Ő'; '\u{150}'
+    'o',      // 'ő'; '\u{151}'
+    'Œ',      // 'Œ'; '\u{152}'
+    'œ',      // 'œ'; '\u{153}'
+    'R',      // 'Ŕ'; '\u{154}'
+    'r',      // 'ŕ'; '\u{155}'
+    'R',      // 'Ŗ'; '\u{156}'
+    'r',      // 'ŗ'; '\u{157}'
+    'R',      // 'Ř'; '\u{158}'
+    'r',      // 'ř'; '\u{159}'
+    'S',      // 'Ś'; '\u{15a}'
+    's',      // 'ś'; '\u{15b}'
+    'S',      // 'Ŝ'; '\u{15c}'
+    's',      // 'ŝ'; '\u{15d}'
+    'S',      // 'Ş'; '\u{15e}'
+    's',      // 'ş'; '\u{15f}'
+    'S',      // 'Š'; '\u{160}'
+    's',      // 'š'; '\u{161}'
+    'T',      // 'Ţ'; '\u{162}'
+    't',      // 'ţ'; '\u{163}'
+    'T',      // 'Ť'; '\u{164}'
+    't',      // 'ť'; '\u{165}'
+    'T',      // 'Ŧ'; '\u{166}'
+    't',      // 'ŧ'; '\u{167}'
+    'U',      // 'Ũ'; '\u{168}'
+    'u',      // 'ũ'; '\u{169}'
+    'U',      // 'Ū'; '\u{16a}'
+    'u',      // 'ū'; '\u{16b}'
+    'U',      // 'Ŭ'; '\u{16c}'
+    'u',      // 'ŭ'; '\u{16d}'
+    'U',      // 'Ů'; '\u{16e}'
+    'u',      // 'ů'; '\u{16f}'
+    'U',      // 'Ű'; '\u{170}'
+    'u',      // 'ű'; '\u{171}'
+    'U',      // 'Ų'; '\u{172}'
+    'u',      // 'ų'; '\u{173}'
+    'W',      // 'Ŵ'; '\u{174}'
+    'w',      // 'ŵ'; '\u{175}'
+    'Y',      // 'Ŷ'; '\u{176}'
+    'y',      // 'ŷ'; '\u{177}'
+    'Y',      // 'Ÿ'; '\u{178}'
+    'Z',      // 'Ź'; '\u{179}'
+    'z',      // 'ź'; '\u{17a}'
+    'Z',      // 'Ż'; '\u{17b}'
+    'z',      // 'ż'; '\u{17c}'
+    'Z',      // 'Ž'; '\u{17d}'
+    'z',      // 'ž'; '\u{17e}'
+    's',      // 'ſ'; '\u{17f}'
+    'b',      // 'ƀ'; '\u{180}'
+    'B',      // 'Ɓ'; '\u{181}'
+    'b',      // 'Ƃ'; '\u{182}'
+    'b',      // 'ƃ'; '\u{183}'
+    'b',      // 'Ƅ'; '\u{184}'
+    'ƅ',      // 'ƅ'; '\u{185}'
+    'O',      // 'Ɔ'; '\u{186}'
+    'C',      // 'Ƈ'; '\u{187}'
+    'c',      // 'ƈ'; '\u{188}'
+    'D',      // 'Ɖ'; '\u{189}'
+    'D',      // 'Ɗ'; '\u{18a}'
+    'd',      // 'Ƌ'; '\u{18b}'
+    'd',      // 'ƌ'; '\u{18c}'
+    'ƍ',      // 'ƍ'; '\u{18d}'
+    'E',      // 'Ǝ'; '\u{18e}'
+    'e',      // 'Ə'; '\u{18f}'
+    'E',      // 'Ɛ'; '\u{190}'
+    'F',      // 'Ƒ'; '\u{191}'
+    'f',      // 'ƒ'; '\u{192}'
+    'G',      // 'Ɠ'; '\u{193}'
+    'Ɣ',      // 'Ɣ'; '\u{194}'
+    'h',      // 'ƕ'; '\u{195}'
+    'I',      // 'Ɩ'; '\u{196}'
+    'I',      // 'Ɨ'; '\u{197}'
+    'Ƙ',      // 'Ƙ'; '\u{198}'
+    'k',      // 'ƙ'; '\u{199}'
+    'l',      // 'ƚ'; '\u{19a}'
+    'ƛ',      // 'ƛ'; '\u{19b}'
+    'M',      // 'Ɯ'; '\u{19c}'
+    'N',      // 'Ɲ'; '\u{19d}'
+    'n',      // 'ƞ'; '\u{19e}'
+    'O',      // 'Ɵ'; '\u{19f}'
+    'O',      // 'Ơ'; '\u{1a0}'
+    'o',      // 'ơ'; '\u{1a1}'
+    'Ƣ',      // 'Ƣ'; '\u{1a2}'
+    'ƣ',      // 'ƣ'; '\u{1a3}'
+    'P',      // 'Ƥ'; '\u{1a4}'
+    'p',      // 'ƥ'; '\u{1a5}'
+    'R',      // 'Ʀ'; '\u{1a6}'
+    'S',      // 'Ƨ'; '\u{1a7}'
+    's',      // 'ƨ'; '\u{1a8}'
+    'Ʃ',      // 'Ʃ'; '\u{1a9}'
+    'l',      // 'ƪ'; '\u{1aa}'
+    't',      // 'ƫ'; '\u{1ab}'
+    'T',      // 'Ƭ'; '\u{1ac}'
+    't',      // 'ƭ'; '\u{1ad}'
+    'T',      // 'Ʈ'; '\u{1ae}'
+    'U',      // 'Ư'; '\u{1af}'
+    'u',      // 'ư'; '\u{1b0}'
+    'Ʊ',      // 'Ʊ'; '\u{1b1}'
+    'V',      // 'Ʋ'; '\u{1b2}'
+    'Y',      // 'Ƴ'; '\u{1b3}'
+    'y',      // 'ƴ'; '\u{1b4}'
+    'Z',      // 'Ƶ'; '\u{1b5}'
+    'z',      // 'ƶ'; '\u{1b6}'
+    'Ʒ',      // 'Ʒ'; '\u{1b7}'
+    'Ƹ',      // 'Ƹ'; '\u{1b8}'
+    'ƹ',      // 'ƹ'; '\u{1b9}'
+    'ƺ',      // 'ƺ'; '\u{1ba}'
+    'ƻ',      // 'ƻ'; '\u{1bb}'
+    'Ƽ',      // 'Ƽ'; '\u{1bc}'
+    'ƽ',      // 'ƽ'; '\u{1bd}'
+    'ƾ',      // 'ƾ'; '\u{1be}'
+    'ƿ',      // 'ƿ'; '\u{1bf}'
+    'ǀ',      // 'ǀ'; '\u{1c0}'
+    'ǁ',      // 'ǁ'; '\u{1c1}'
+    'ǂ',      // 'ǂ'; '\u{1c2}'
+    '!',      // 'ǃ'; '\u{1c3}'
+    'Ǆ',      // 'Ǆ'; '\u{1c4}'
+    'ǅ',      // 'ǅ'; '\u{1c5}'
+    'ǆ',      // 'ǆ'; '\u{1c6}'
+    'Ǉ',      // 'Ǉ'; '\u{1c7}'
+    'ǈ',      // 'ǈ'; '\u{1c8}'
+    'ǉ',      // 'ǉ'; '\u{1c9}'
+    'Ǌ',      // 'Ǌ'; '\u{1ca}'
+    'ǋ',      // 'ǋ'; '\u{1cb}'
+    'ǌ',      // 'ǌ'; '\u{1cc}'
+    'A',      // 'Ǎ'; '\u{1cd}'
+    'a',      // 'ǎ'; '\u{1ce}'
+    'I',      // 'Ǐ'; '\u{1cf}'
+    'i',      // 'ǐ'; '\u{1d0}'
+    'O',      // 'Ǒ'; '\u{1d1}'
+    'o',      // 'ǒ'; '\u{1d2}'
+    'U',      // 'Ǔ'; '\u{1d3}'
+    'u',      // 'ǔ'; '\u{1d4}'
+    'U',      // 'Ǖ'; '\u{1d5}'
+    'u',      // 'ǖ'; '\u{1d6}'
+    'U',      // 'Ǘ'; '\u{1d7}'
+    'u',      // 'ǘ'; '\u{1d8}'
+    'U',      // 'Ǚ'; '\u{1d9}'
+    'u',      // 'ǚ'; '\u{1da}'
+    'U',      // 'Ǜ'; '\u{1db}'
+    'u',      // 'ǜ'; '\u{1dc}'
+    'e',      // 'ǝ'; '\u{1dd}'
+    'A',      // 'Ǟ'; '\u{1de}'
+    'a',      // 'ǟ'; '\u{1df}'
+    'A',      // 'Ǡ'; '\u{1e0}'
+    'a',      // 'ǡ'; '\u{1e1}'
+    'Æ',      // 'Ǣ'; '\u{1e2}'
+    'æ',      // 'ǣ'; '\u{1e3}'
+    'G',      // 'Ǥ'; '\u{1e4}'
+    'g',      // 'ǥ'; '\u{1e5}'
+    'G',      // 'Ǧ'; '\u{1e6}'
+    'g',      // 'ǧ'; '\u{1e7}'
+    'K',      // 'Ǩ'; '\u{1e8}'
+    'k',      // 'ǩ'; '\u{1e9}'
+    'O',      // 'Ǫ'; '\u{1ea}'
+    'o',      // 'ǫ'; '\u{1eb}'
+    'O',      // 'Ǭ'; '\u{1ec}'
+    'o',      // 'ǭ'; '\u{1ed}'
+    'Ǯ',      // 'Ǯ'; '\u{1ee}'
+    'ǯ',      // 'ǯ'; '\u{1ef}'
+    'j',      // 'ǰ'; '\u{1f0}'
+    'Ǳ',      // 'Ǳ'; '\u{1f1}'
+    'ǲ',      // 'ǲ'; '\u{1f2}'
+    'ǳ',      // 'ǳ'; '\u{1f3}'
+    'G',      // 'Ǵ'; '\u{1f4}'
+    'g',      // 'ǵ'; '\u{1f5}'
+    'Ƕ',      // 'Ƕ'; '\u{1f6}'
+    'Ƿ',      // 'Ƿ'; '\u{1f7}'
+    'N',      // 'Ǹ'; '\u{1f8}'
+    'n',      // 'ǹ'; '\u{1f9}'
+    'A',      // 'Ǻ'; '\u{1fa}'
+    'a',      // 'ǻ'; '\u{1fb}'
+    'Æ',      // 'Ǽ'; '\u{1fc}'
+    'æ',      // 'ǽ'; '\u{1fd}'
+    'O',      // 'Ǿ'; '\u{1fe}'
+    'o',      // 'ǿ'; '\u{1ff}'
+    'A',      // 'Ȁ'; '\u{200}'
+    'a',      // 'ȁ'; '\u{201}'
+    'A',      // 'Ȃ'; '\u{202}'
+    'a',      // 'ȃ'; '\u{203}'
+    'E',      // 'Ȅ'; '\u{204}'
+    'e',      // 'ȅ'; '\u{205}'
+    'E',      // 'Ȇ'; '\u{206}'
+    'e',      // 'ȇ'; '\u{207}'
+    'I',      // 'Ȉ'; '\u{208}'
+    'i',      // 'ȉ'; '\u{209}'
+    'I',      // 'Ȋ'; '\u{20a}'
+    'i',      // 'ȋ'; '\u{20b}'
+    'O',      // 'Ȍ'; '\u{20c}'
+    'o',      // 'ȍ'; '\u{20d}'
+    'O',      // 'Ȏ'; '\u{20e}'
+    'o',      // 'ȏ'; '\u{20f}'
+    'R',      // 'Ȑ'; '\u{210}'
+    'r',      // 'ȑ'; '\u{211}'
+    'R',      // 'Ȓ'; '\u{212}'
+    'r',      // 'ȓ'; '\u{213}'
+    'U',      // 'Ȕ'; '\u{214}'
+    'u',      // 'ȕ'; '\u{215}'
+    'U',      // 'Ȗ'; '\u{216}'
+    'u',      // 'ȗ'; '\u{217}'
+    'S',      // 'Ș'; '\u{218}'
+    's',      // 'ș'; '\u{219}'
+    'T',      // 'Ț'; '\u{21a}'
+    't',      // 'ț'; '\u{21b}'
+    'Ȝ',      // 'Ȝ'; '\u{21c}'
+    'ȝ',      // 'ȝ'; '\u{21d}'
+    'H',      // 'Ȟ'; '\u{21e}'
+    'h',      // 'ȟ'; '\u{21f}'
+    'N',      // 'Ƞ'; '\u{220}'
+    'd',      // 'ȡ'; '\u{221}'
+    'Ȣ',      // 'Ȣ'; '\u{222}'
+    'ȣ',      // 'ȣ'; '\u{223}'
+    'Z',      // 'Ȥ'; '\u{224}'
+    'z',      // 'ȥ'; '\u{225}'
+    'A',      // 'Ȧ'; '\u{226}'
+    'a',      // 'ȧ'; '\u{227}'
+    'E',      // 'Ȩ'; '\u{228}'
+    'e',      // 'ȩ'; '\u{229}'
+    'O',      // 'Ȫ'; '\u{22a}'
+    'o',      // 'ȫ'; '\u{22b}'
+    'O',      // 'Ȭ'; '\u{22c}'
+    'o',      // 'ȭ'; '\u{22d}'
+    'O',      // 'Ȯ'; '\u{22e}'
+    'o',      // 'ȯ'; '\u{22f}'
+    'O',      // 'Ȱ'; '\u{230}'
+    'o',      // 'ȱ'; '\u{231}'
+    'Y',      // 'Ȳ'; '\u{232}'
+    'y',      // 'ȳ'; '\u{233}'
+    'l',      // 'ȴ'; '\u{234}'
+    'n',      // 'ȵ'; '\u{235}'
+    't',      // 'ȶ'; '\u{236}'
+    'j',      // 'ȷ'; '\u{237}'
+    'ȸ',      // 'ȸ'; '\u{238}'
+    'ȹ',      // 'ȹ'; '\u{239}'
+    'A',      // 'Ⱥ'; '\u{23a}'
+    'C',      // 'Ȼ'; '\u{23b}'
+    'c',      // 'ȼ'; '\u{23c}'
+    'L',      // 'Ƚ'; '\u{23d}'
+    'T',      // 'Ⱦ'; '\u{23e}'
+    's',      // 'ȿ'; '\u{23f}'
+    'z',      // 'ɀ'; '\u{240}'
+    'Ɂ',      // 'Ɂ'; '\u{241}'
+    'ɂ',      // 'ɂ'; '\u{242}'
+    'B',      // 'Ƀ'; '\u{243}'
+    'U',      // 'Ʉ'; '\u{244}'
+    'V',      // 'Ʌ'; '\u{245}'
+    'E',      // 'Ɇ'; '\u{246}'
+    'e',      // 'ɇ'; '\u{247}'
+    'J',      // 'Ɉ'; '\u{248}'
+    'j',      // 'ɉ'; '\u{249}'
+    'Q',      // 'Ɋ'; '\u{24a}'
+    'q',      // 'ɋ'; '\u{24b}'
+    'R',      // 'Ɍ'; '\u{24c}'
+    'r',      // 'ɍ'; '\u{24d}'
+    'Y',      // 'Ɏ'; '\u{24e}'
+    'y',      // 'ɏ'; '\u{24f}'
+    'a',      // 'ɐ'; '\u{250}'
+    'a',      // 'ɑ'; '\u{251}'
+    'a',      // 'ɒ'; '\u{252}'
+    'b',      // 'ɓ'; '\u{253}'
+    'c',      // 'ɔ'; '\u{254}'
+    'c',      // 'ɕ'; '\u{255}'
+    'd',      // 'ɖ'; '\u{256}'
+    'd',      // 'ɗ'; '\u{257}'
+    'e',      // 'ɘ'; '\u{258}'
+    'e',      // 'ə'; '\u{259}'
+    'e',      // 'ɚ'; '\u{25a}'
+    'e',      // 'ɛ'; '\u{25b}'
+    'e',      // 'ɜ'; '\u{25c}'
+    'e',      // 'ɝ'; '\u{25d}'
+    'e',      // 'ɞ'; '\u{25e}'
+    'j',      // 'ɟ'; '\u{25f}'
+    'g',      // 'ɠ'; '\u{260}'
+    'g',      // 'ɡ'; '\u{261}'
+    'G',      // 'ɢ'; '\u{262}'
+    'g',      // 'ɣ'; '\u{263}'
+    'u',      // 'ɤ'; '\u{264}'
+    'h',      // 'ɥ'; '\u{265}'
+    'h',      // 'ɦ'; '\u{266}'
+    'h',      // 'ɧ'; '\u{267}'
+    'i',      // 'ɨ'; '\u{268}'
+    'i',      // 'ɩ'; '\u{269}'
+    'I',      // 'ɪ'; '\u{26a}'
+    'l',      // 'ɫ'; '\u{26b}'
+    'l',      // 'ɬ'; '\u{26c}'
+    'l',      // 'ɭ'; '\u{26d}'
+    'ɮ',      // 'ɮ'; '\u{26e}'
+    'm',      // 'ɯ'; '\u{26f}'
+    'm',      // 'ɰ'; '\u{270}'
+    'm',      // 'ɱ'; '\u{271}'
+    'n',      // 'ɲ'; '\u{272}'
+    'n',      // 'ɳ'; '\u{273}'
+    'N',      // 'ɴ'; '\u{274}'
+    'o',      // 'ɵ'; '\u{275}'
+    'ɶ',      // 'ɶ'; '\u{276}'
+    'ɷ',      // 'ɷ'; '\u{277}'
+    'ɸ',      // 'ɸ'; '\u{278}'
+    'r',      // 'ɹ'; '\u{279}'
+    'r',      // 'ɺ'; '\u{27a}'
+    'r',      // 'ɻ'; '\u{27b}'
+    'r',      // 'ɼ'; '\u{27c}'
+    'r',      // 'ɽ'; '\u{27d}'
+    'r',      // 'ɾ'; '\u{27e}'
+    'r',      // 'ɿ'; '\u{27f}'
+    'R',      // 'ʀ'; '\u{280}'
+    'R',      // 'ʁ'; '\u{281}'
+    's',      // 'ʂ'; '\u{282}'
+    'ʃ',      // 'ʃ'; '\u{283}'
+    'ʄ',      // 'ʄ'; '\u{284}'
+    'ʅ',      // 'ʅ'; '\u{285}'
+    'ʆ',      // 'ʆ'; '\u{286}'
+    't',      // 'ʇ'; '\u{287}'
+    't',      // 'ʈ'; '\u{288}'
+    'u',      // 'ʉ'; '\u{289}'
+    'ʊ',      // 'ʊ'; '\u{28a}'
+    'v',      // 'ʋ'; '\u{28b}'
+    'v',      // 'ʌ'; '\u{28c}'
+    'w',      // 'ʍ'; '\u{28d}'
+    'y',      // 'ʎ'; '\u{28e}'
+    'Y',      // 'ʏ'; '\u{28f}'
+    'z',      // 'ʐ'; '\u{290}'
+    'z',      // 'ʑ'; '\u{291}'
+    'ʒ',      // 'ʒ'; '\u{292}'
+    'ʓ',      // 'ʓ'; '\u{293}'
+    'ʔ',      // 'ʔ'; '\u{294}'
+    'ʕ',      // 'ʕ'; '\u{295}'
+    'ʖ',      // 'ʖ'; '\u{296}'
+    'c',      // 'ʗ'; '\u{297}'
+    'ʘ',      // 'ʘ'; '\u{298}'
+    'B',      // 'ʙ'; '\u{299}'
+    'e',      // 'ʚ'; '\u{29a}'
+    'G',      // 'ʛ'; '\u{29b}'
+    'H',      // 'ʜ'; '\u{29c}'
+    'j',      // 'ʝ'; '\u{29d}'
+    'k',      // 'ʞ'; '\u{29e}'
+    'L',      // 'ʟ'; '\u{29f}'
+];
+
+/// A char array corresponding to the following Unicode block:
+///
+/// - [Latin Extended Additional](https://en.wikipedia.org/wiki/Latin_Extended_Additional)
+///
+/// This covers the range `'\u{1e00}'..='\u{1eff}'`.
+static LATIN_EXTENDED_ADDITIONAL: [char; 256] = [
+    'A', // 'Ḁ'; '\u{1e00}'
+    'a', // 'ḁ'; '\u{1e01}'
+    'B', // 'Ḃ'; '\u{1e02}'
+    'b', // 'ḃ'; '\u{1e03}'
+    'B', // 'Ḅ'; '\u{1e04}'
+    'b', // 'ḅ'; '\u{1e05}'
+    'B', // 'Ḇ'; '\u{1e06}'
+    'b', // 'ḇ'; '\u{1e07}'
+    'C', // 'Ḉ'; '\u{1e08}'
+    'c', // 'ḉ'; '\u{1e09}'
+    'D', // 'Ḋ'; '\u{1e0a}'
+    'e', // 'ḋ'; '\u{1e0b}'
+    'D', // 'Ḍ'; '\u{1e0c}'
+    'd', // 'ḍ'; '\u{1e0d}'
+    'D', // 'Ḏ'; '\u{1e0e}'
+    'd', // 'ḏ'; '\u{1e0f}'
+    'D', // 'Ḑ'; '\u{1e10}'
+    'd', // 'ḑ'; '\u{1e11}'
+    'D', // 'Ḓ'; '\u{1e12}'
+    'd', // 'ḓ'; '\u{1e13}'
+    'E', // 'Ḕ'; '\u{1e14}'
+    'e', // 'ḕ'; '\u{1e15}'
+    'E', // 'Ḗ'; '\u{1e16}'
+    'e', // 'ḗ'; '\u{1e17}'
+    'E', // 'Ḙ'; '\u{1e18}'
+    'e', // 'ḙ'; '\u{1e19}'
+    'E', // 'Ḛ'; '\u{1e1a}'
+    'e', // 'ḛ'; '\u{1e1b}'
+    'E', // 'Ḝ'; '\u{1e1c}'
+    'e', // 'ḝ'; '\u{1e1d}'
+    'F', // 'Ḟ'; '\u{1e1e}'
+    'f', // 'ḟ'; '\u{1e1f}'
+    'G', // 'Ḡ'; '\u{1e20}'
+    'g', // 'ḡ'; '\u{1e21}'
+    'H', // 'Ḣ'; '\u{1e22}'
+    'g', // 'ḣ'; '\u{1e23}'
+    'H', // 'Ḥ'; '\u{1e24}'
+    'g', // 'ḥ'; '\u{1e25}'
+    'H', // 'Ḧ'; '\u{1e26}'
+    'g', // 'ḧ'; '\u{1e27}'
+    'H', // 'Ḩ'; '\u{1e28}'
+    'g', // 'ḩ'; '\u{1e29}'
+    'H', // 'Ḫ'; '\u{1e2a}'
+    'h', // 'ḫ'; '\u{1e2b}'
+    'I', // 'Ḭ'; '\u{1e2c}'
+    'i', // 'ḭ'; '\u{1e2d}'
+    'I', // 'Ḯ'; '\u{1e2e}'
+    'i', // 'ḯ'; '\u{1e2f}'
+    'K', // 'Ḱ'; '\u{1e30}'
+    'k', // 'ḱ'; '\u{1e31}'
+    'K', // 'Ḳ'; '\u{1e32}'
+    'k', // 'ḳ'; '\u{1e33}'
+    'K', // 'Ḵ'; '\u{1e34}'
+    'k', // 'ḵ'; '\u{1e35}'
+    'L', // 'Ḷ'; '\u{1e36}'
+    'l', // 'ḷ'; '\u{1e37}'
+    'L', // 'Ḹ'; '\u{1e38}'
+    'l', // 'ḹ'; '\u{1e39}'
+    'L', // 'Ḻ'; '\u{1e3a}'
+    'l', // 'ḻ'; '\u{1e3b}'
+    'L', // 'Ḽ'; '\u{1e3c}'
+    'l', // 'ḽ'; '\u{1e3d}'
+    'M', // 'Ḿ'; '\u{1e3e}'
+    'm', // 'ḿ'; '\u{1e3f}'
+    'M', // 'Ṁ'; '\u{1e40}'
+    'm', // 'ṁ'; '\u{1e41}'
+    'M', // 'Ṃ'; '\u{1e42}'
+    'm', // 'ṃ'; '\u{1e43}'
+    'N', // 'Ṅ'; '\u{1e44}'
+    'n', // 'ṅ'; '\u{1e45}'
+    'N', // 'Ṇ'; '\u{1e46}'
+    'n', // 'ṇ'; '\u{1e47}'
+    'N', // 'Ṉ'; '\u{1e48}'
+    'n', // 'ṉ'; '\u{1e49}'
+    'N', // 'Ṋ'; '\u{1e4a}'
+    'n', // 'ṋ'; '\u{1e4b}'
+    'O', // 'Ṍ'; '\u{1e4c}'
+    'o', // 'ṍ'; '\u{1e4d}'
+    'O', // 'Ṏ'; '\u{1e4e}'
+    'o', // 'ṏ'; '\u{1e4f}'
+    'O', // 'Ṑ'; '\u{1e50}'
+    'o', // 'ṑ'; '\u{1e51}'
+    'O', // 'Ṓ'; '\u{1e52}'
+    'o', // 'ṓ'; '\u{1e53}'
+    'P', // 'Ṕ'; '\u{1e54}'
+    'p', // 'ṕ'; '\u{1e55}'
+    'P', // 'Ṗ'; '\u{1e56}'
+    'p', // 'ṗ'; '\u{1e57}'
+    'R', // 'Ṙ'; '\u{1e58}'
+    'r', // 'ṙ'; '\u{1e59}'
+    'R', // 'Ṛ'; '\u{1e5a}'
+    'r', // 'ṛ'; '\u{1e5b}'
+    'R', // 'Ṝ'; '\u{1e5c}'
+    'r', // 'ṝ'; '\u{1e5d}'
+    'R', // 'Ṟ'; '\u{1e5e}'
+    'r', // 'ṟ'; '\u{1e5f}'
+    'S', // 'Ṡ'; '\u{1e60}'
+    's', // 'ṡ'; '\u{1e61}'
+    'S', // 'Ṣ'; '\u{1e62}'
+    's', // 'ṣ'; '\u{1e63}'
+    'S', // 'Ṥ'; '\u{1e64}'
+    's', // 'ṥ'; '\u{1e65}'
+    'S', // 'Ṧ'; '\u{1e66}'
+    's', // 'ṧ'; '\u{1e67}'
+    'S', // 'Ṩ'; '\u{1e68}'
+    's', // 'ṩ'; '\u{1e69}'
+    'T', // 'Ṫ'; '\u{1e6a}'
+    't', // 'ṫ'; '\u{1e6b}'
+    'T', // 'Ṭ'; '\u{1e6c}'
+    't', // 'ṭ'; '\u{1e6d}'
+    'T', // 'Ṯ'; '\u{1e6e}'
+    't', // 'ṯ'; '\u{1e6f}'
+    'T', // 'Ṱ'; '\u{1e70}'
+    't', // 'ṱ'; '\u{1e71}'
+    'U', // 'Ṳ'; '\u{1e72}'
+    'u', // 'ṳ'; '\u{1e73}'
+    'U', // 'Ṵ'; '\u{1e74}'
+    'u', // 'ṵ'; '\u{1e75}'
+    'U', // 'Ṷ'; '\u{1e76}'
+    'u', // 'ṷ'; '\u{1e77}'
+    'U', // 'Ṹ'; '\u{1e78}'
+    'u', // 'ṹ'; '\u{1e79}'
+    'U', // 'Ṻ'; '\u{1e7a}'
+    'u', // 'ṻ'; '\u{1e7b}'
+    'V', // 'Ṽ'; '\u{1e7c}'
+    'v', // 'ṽ'; '\u{1e7d}'
+    'V', // 'Ṿ'; '\u{1e7e}'
+    'v', // 'ṿ'; '\u{1e7f}'
+    'W', // 'Ẁ'; '\u{1e80}'
+    'w', // 'ẁ'; '\u{1e81}'
+    'W', // 'Ẃ'; '\u{1e82}'
+    'w', // 'ẃ'; '\u{1e83}'
+    'W', // 'Ẅ'; '\u{1e84}'
+    'w', // 'ẅ'; '\u{1e85}'
+    'W', // 'Ẇ'; '\u{1e86}'
+    'w', // 'ẇ'; '\u{1e87}'
+    'W', // 'Ẉ'; '\u{1e88}'
+    'j', // 'ẉ'; '\u{1e89}'
+    'X', // 'Ẋ'; '\u{1e8a}'
+    'x', // 'ẋ'; '\u{1e8b}'
+    'X', // 'Ẍ'; '\u{1e8c}'
+    'x', // 'ẍ'; '\u{1e8d}'
+    'Y', // 'Ẏ'; '\u{1e8e}'
+    'y', // 'ẏ'; '\u{1e8f}'
+    'Z', // 'Ẑ'; '\u{1e90}'
+    'z', // 'ẑ'; '\u{1e91}'
+    'Z', // 'Ẓ'; '\u{1e92}'
+    'z', // 'ẓ'; '\u{1e93}'
+    'Z', // 'Ẕ'; '\u{1e94}'
+    'z', // 'ẕ'; '\u{1e95}'
+    'h', // 'ẖ'; '\u{1e96}'
+    't', // 'ẗ'; '\u{1e97}'
+    'w', // 'ẘ'; '\u{1e98}'
+    'y', // 'ẙ'; '\u{1e99}'
+    'a', // 'ẚ'; '\u{1e9a}'
+    'i', // 'ẛ'; '\u{1e9b}'
+    'f', // 'ẜ'; '\u{1e9c}'
+    'f', // 'ẝ'; '\u{1e9d}'
+    'ẞ', // 'ẞ'; '\u{1e9e}'
+    'ẟ', // 'ẟ'; '\u{1e9f}'
+    'A', // 'Ạ'; '\u{1ea0}'
+    'a', // 'ạ'; '\u{1ea1}'
+    'A', // 'Ả'; '\u{1ea2}'
+    'a', // 'ả'; '\u{1ea3}'
+    'A', // 'Ấ'; '\u{1ea4}'
+    'a', // 'ấ'; '\u{1ea5}'
+    'A', // 'Ầ'; '\u{1ea6}'
+    'a', // 'ầ'; '\u{1ea7}'
+    'A', // 'Ẩ'; '\u{1ea8}'
+    'a', // 'ẩ'; '\u{1ea9}'
+    'A', // 'Ẫ'; '\u{1eaa}'
+    'a', // 'ẫ'; '\u{1eab}'
+    'A', // 'Ậ'; '\u{1eac}'
+    'a', // 'ậ'; '\u{1ead}'
+    'A', // 'Ắ'; '\u{1eae}'
+    'a', // 'ắ'; '\u{1eaf}'
+    'A', // 'Ằ'; '\u{1eb0}'
+    'a', // 'ằ'; '\u{1eb1}'
+    'A', // 'Ẳ'; '\u{1eb2}'
+    'a', // 'ẳ'; '\u{1eb3}'
+    'A', // 'Ẵ'; '\u{1eb4}'
+    'a', // 'ẵ'; '\u{1eb5}'
+    'A', // 'Ặ'; '\u{1eb6}'
+    'a', // 'ặ'; '\u{1eb7}'
+    'E', // 'Ẹ'; '\u{1eb8}'
+    'e', // 'ẹ'; '\u{1eb9}'
+    'E', // 'Ẻ'; '\u{1eba}'
+    'e', // 'ẻ'; '\u{1ebb}'
+    'E', // 'Ẽ'; '\u{1ebc}'
+    'e', // 'ẽ'; '\u{1ebd}'
+    'E', // 'Ế'; '\u{1ebe}'
+    'e', // 'ế'; '\u{1ebf}'
+    'E', // 'Ề'; '\u{1ec0}'
+    'e', // 'ề'; '\u{1ec1}'
+    'E', // 'Ể'; '\u{1ec2}'
+    'e', // 'ể'; '\u{1ec3}'
+    'E', // 'Ễ'; '\u{1ec4}'
+    'e', // 'ễ'; '\u{1ec5}'
+    'E', // 'Ệ'; '\u{1ec6}'
+    'e', // 'ệ'; '\u{1ec7}'
+    'I', // 'Ỉ'; '\u{1ec8}'
+    'i', // 'ỉ'; '\u{1ec9}'
+    'I', // 'Ị'; '\u{1eca}'
+    'i', // 'ị'; '\u{1ecb}'
+    'O', // 'Ọ'; '\u{1ecc}'
+    'o', // 'ọ'; '\u{1ecd}'
+    'O', // 'Ỏ'; '\u{1ece}'
+    'o', // 'ỏ'; '\u{1ecf}'
+    'O', // 'Ố'; '\u{1ed0}'
+    'o', // 'ố'; '\u{1ed1}'
+    'O', // 'Ồ'; '\u{1ed2}'
+    'o', // 'ồ'; '\u{1ed3}'
+    'O', // 'Ổ'; '\u{1ed4}'
+    'o', // 'ổ'; '\u{1ed5}'
+    'O', // 'Ỗ'; '\u{1ed6}'
+    'o', // 'ỗ'; '\u{1ed7}'
+    'O', // 'Ộ'; '\u{1ed8}'
+    'o', // 'ộ'; '\u{1ed9}'
+    'O', // 'Ớ'; '\u{1eda}'
+    'o', // 'ớ'; '\u{1edb}'
+    'O', // 'Ờ'; '\u{1edc}'
+    'o', // 'ờ'; '\u{1edd}'
+    'O', // 'Ở'; '\u{1ede}'
+    'o', // 'ở'; '\u{1edf}'
+    'O', // 'Ỡ'; '\u{1ee0}'
+    'o', // 'ỡ'; '\u{1ee1}'
+    'O', // 'Ợ'; '\u{1ee2}'
+    'o', // 'ợ'; '\u{1ee3}'
+    'U', // 'Ụ'; '\u{1ee4}'
+    'u', // 'ụ'; '\u{1ee5}'
+    'U', // 'Ủ'; '\u{1ee6}'
+    'u', // 'ủ'; '\u{1ee7}'
+    'U', // 'Ứ'; '\u{1ee8}'
+    'u', // 'ứ'; '\u{1ee9}'
+    'U', // 'Ừ'; '\u{1eea}'
+    'u', // 'ừ'; '\u{1eeb}'
+    'U', // 'Ử'; '\u{1eec}'
+    'u', // 'ử'; '\u{1eed}'
+    'U', // 'Ữ'; '\u{1eee}'
+    'u', // 'ữ'; '\u{1eef}'
+    'U', // 'Ự'; '\u{1ef0}'
+    'u', // 'ự'; '\u{1ef1}'
+    'Y', // 'Ỳ'; '\u{1ef2}'
+    'y', // 'ỳ'; '\u{1ef3}'
+    'Y', // 'Ỵ'; '\u{1ef4}'
+    'y', // 'ỵ'; '\u{1ef5}'
+    'Y', // 'Ỷ'; '\u{1ef6}'
+    'y', // 'ỷ'; '\u{1ef7}'
+    'Y', // 'Ỹ'; '\u{1ef8}'
+    'y', // 'ỹ'; '\u{1ef9}'
+    'Ỻ', // 'Ỻ'; '\u{1efa}'
+    'ỻ', // 'ỻ'; '\u{1efb}'
+    'Ỽ', // 'Ỽ'; '\u{1efc}'
+    'ỽ', // 'ỽ'; '\u{1efd}'
+    'Ỿ', // 'Ỿ'; '\u{1efe}'
+    'ỿ', // 'ỿ'; '\u{1eff}'
+];
+
+/// A char array corresponding to the following Unicode block:
+///
+/// - [Superscripts and Subscripts](https://en.wikipedia.org/wiki/Superscripts_and_Subscripts)
+///
+/// This covers the range `'\u{2070}'..='\u{209f}'`.
+static SUPERSCRIPTS_AND_SUBSCRIPTS: [char; 48] = [
+    '0', // '⁰'; '\u{2070}'
+    'i', // 'ⁱ'; '\u{2071}'
+    '⁲', // '⁲'; '\u{2072}'
+    '⁳', // '⁳'; '\u{2073}'
+    '4', // '⁴'; '\u{2074}'
+    '5', // '⁵'; '\u{2075}'
+    '6', // '⁶'; '\u{2076}'
+    '7', // '⁷'; '\u{2077}'
+    '8', // '⁸'; '\u{2078}'
+    '0', // '⁹'; '\u{2079}'
+    '+', // '⁺'; '\u{207a}'
+    '-', // '⁻'; '\u{207b}'
+    '=', // '⁼'; '\u{207c}'
+    '(', // '⁽'; '\u{207d}'
+    ')', // '⁾'; '\u{207e}'
+    'n', // 'ⁿ'; '\u{207f}'
+    '0', // '₀'; '\u{2080}'
+    '1', // '₁'; '\u{2081}'
+    '2', // '₂'; '\u{2082}'
+    '3', // '₃'; '\u{2083}'
+    '4', // '₄'; '\u{2084}'
+    '5', // '₅'; '\u{2085}'
+    '6', // '₆'; '\u{2086}'
+    '7', // '₇'; '\u{2087}'
+    '8', // '₈'; '\u{2088}'
+    '9', // '₉'; '\u{2089}'
+    '+', // '₊'; '\u{208a}'
+    '-', // '₋'; '\u{208b}'
+    '=', // '₌'; '\u{208c}'
+    '(', // '₍'; '\u{208d}'
+    ')', // '₎'; '\u{208e}'
+    '₏', // '₏'; '\u{208f}'
+    'a', // 'ₐ'; '\u{2090}'
+    'e', // 'ₑ'; '\u{2091}'
+    'o', // 'ₒ'; '\u{2092}'
+    'x', // 'ₓ'; '\u{2093}'
+    'e', // 'ₔ'; '\u{2094}'
+    'h', // 'ₕ'; '\u{2095}'
+    'k', // 'ₖ'; '\u{2096}'
+    'l', // 'ₗ'; '\u{2097}'
+    'm', // 'ₘ'; '\u{2098}'
+    'n', // 'ₙ'; '\u{2099}'
+    'p', // 'ₚ'; '\u{209a}'
+    's', // 'ₛ'; '\u{209b}'
+    't', // 'ₜ'; '\u{209c}'
+    '₝', // '₝'; '\u{209d}'
+    '₞', // '₞'; '\u{209e}'
+    '₟', // '₟'; '\u{209f}'
+];
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Helper function for test assertions.
+    fn check_conversions(pairs: &[(char, char)]) {
+        for (original, normalized) in pairs {
+            assert_eq!(normalize(*original), *normalized);
+        }
+    }
+
+    /// General conversion checks
+    #[test]
+    fn general() {
+        check_conversions(&[
+            ('ą', 'a'),
+            ('À', 'A'),
+            ('ć', 'c'),
+            ('ę', 'e'),
+            ('ł', 'l'),
+            ('ń', 'n'),
+            ('ó', 'o'),
+            ('ś', 's'),
+            ('ź', 'z'),
+            ('ż', 'z'),
+            ('Ą', 'A'),
+            ('Ć', 'C'),
+            ('Ę', 'E'),
+            ('ł', 'l'),
+            ('Ł', 'L'),
+            ('Ń', 'N'),
+            ('Ó', 'O'),
+            ('Ś', 'S'),
+            ('Ź', 'Z'),
+            ('Ż', 'Z'),
+            ('¡', '!'),
+        ]);
+    }
+
+    /// Some checks for characters which are not visible.
+    #[test]
+    fn invisible_chars() {
+        check_conversions(&[('\u{a0}', '\u{a0}'), ('\u{ad}', '\u{ad}')]);
+    }
+
+    /// Check boundary cases in case ranges are modified.
+    #[test]
+    fn boundary_cases() {
+        check_conversions(&[
+            ('\u{9f}', '\u{9f}'),
+            ('\u{a0}', '\u{a0}'),
+            ('¡', '!'),
+            ('ʟ', 'L'),
+            ('\u{2a0}', '\u{2a0}'),
+            ('\u{1dff}', '\u{1dff}'),
+            ('Ḁ', 'A'),
+            ('ỹ', 'y'),
+            ('\u{1eff}', '\u{1eff}'),
+            ('\u{1f00}', '\u{1f00}'),
+            ('⁰', '0'),
+            ('\u{209c}', 't'),
+            ('\u{209f}', '\u{209f}'),
+            ('\u{20a0}', '\u{20a0}'),
+        ]);
+    }
+
+    /// Check that conversions outside the blocks are unchanged.
+    #[test]
+    fn unchanged_outside_blocks() {
+        check_conversions(&[
+            ('a', 'a'),
+            ('⟁', '⟁'),
+            ('┍', '┍'),
+            ('ω', 'ω'),
+            ('⁕', '⁕'),
+            ('ה', 'ה'),
+        ]);
+    }
+}
diff --git a/matcher/src/config.rs b/matcher/src/config.rs
new file mode 100644
index 00000000..eca7ae38
--- /dev/null
+++ b/matcher/src/config.rs
@@ -0,0 +1,70 @@
+use crate::chars::CharClass;
+use crate::score::BONUS_BOUNDARY;
+
+/// Configuration data that controls how a matcher behaves
+#[non_exhaustive]
+#[derive(PartialEq, Eq, Debug, Clone)]
+pub struct Config {
+    /// Characters that act as delimiters and provide bonus
+    /// for matching the following char
+    pub(crate) delimiter_chars: &'static [u8],
+    /// Extra bonus for word boundary after whitespace character or beginning of the string
+    pub(crate) bonus_boundary_white: u16,
+    /// Extra bonus for word boundary after slash, colon, semi-colon, and comma
+    pub(crate) bonus_boundary_delimiter: u16,
+    pub(crate) initial_char_class: CharClass,
+
+    /// Whether to normalize latin script characters to ASCII (enabled by default)
+    pub normalize: bool,
+    /// whether to ignore casing
+    pub ignore_case: bool,
+    /// Whether to provide a bonus to matches by their distance from the start
+    /// of the haystack. The bonus is fairly small compared to the normal gap
+    /// penalty to avoid messing with the normal score heuristic. This setting
+    /// is not turned on by default and only recommended for autocompletion
+    /// usecases where the expectation is that the user is typing the entire
+    /// match. For a full fzf-like fuzzy matcher/picker word segmentation and
+    /// explicit prefix literals should be used instead.
+    pub prefer_prefix: bool,
+}
+
+impl Config {
+    /// The default config for nucleo, implemented as a constant since
+    /// Default::default can not be called in a const context
+    pub const DEFAULT: Self = {
+        Config {
+            delimiter_chars: b"/,:;|",
+            bonus_boundary_white: BONUS_BOUNDARY + 2,
+            bonus_boundary_delimiter: BONUS_BOUNDARY + 1,
+            initial_char_class: CharClass::Whitespace,
+            normalize: true,
+            ignore_case: true,
+            prefer_prefix: false,
+        }
+    };
+}
+
+impl Config {
+    /// Configures the matcher with bonuses appropriate for matching file paths.
+    pub fn set_match_paths(&mut self) {
+        if cfg!(windows) {
+            self.delimiter_chars = b"/:\\";
+        } else {
+            self.delimiter_chars = b"/:";
+        }
+        self.bonus_boundary_white = BONUS_BOUNDARY;
+        self.initial_char_class = CharClass::Delimiter;
+    }
+
+    /// Configures the matcher with bonuses appropriate for matching file paths.
+    pub const fn match_paths(mut self) -> Self {
+        if cfg!(windows) {
+            self.delimiter_chars = b"/\\";
+        } else {
+            self.delimiter_chars = b"/";
+        }
+        self.bonus_boundary_white = BONUS_BOUNDARY;
+        self.initial_char_class = CharClass::Delimiter;
+        self
+    }
+}
diff --git a/matcher/src/debug.rs b/matcher/src/debug.rs
new file mode 100644
index 00000000..b8369f32
--- /dev/null
+++ b/matcher/src/debug.rs
@@ -0,0 +1,14 @@
+use crate::matrix::{MatrixCell, ScoreCell};
+use std::fmt::{Debug, Formatter, Result};
+
+impl Debug for ScoreCell {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "({}, {})", self.score, self.matched)
+    }
+}
+
+impl Debug for MatrixCell {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "({}, {})", (self.0 & 1) != 0, (self.0 & 2) != 0)
+    }
+}
diff --git a/matcher/src/exact.rs b/matcher/src/exact.rs
new file mode 100644
index 00000000..3cb3ceb2
--- /dev/null
+++ b/matcher/src/exact.rs
@@ -0,0 +1,275 @@
+use memchr::memmem;
+use memchr::{Memchr, Memchr2};
+
+use crate::chars::{AsciiChar, Char};
+use crate::score::{BONUS_FIRST_CHAR_MULTIPLIER, SCORE_MATCH};
+use crate::Matcher;
+
+impl Matcher {
+    pub(crate) fn substring_match_1_ascii<const INDICES: bool>(
+        &mut self,
+        haystack: &[u8],
+        c: u8,
+        indices: &mut Vec<u32>,
+    ) -> Option<u16> {
+        let mut max_score = 0;
+        let mut max_pos = 0;
+        if self.config.ignore_case && c >= b'a' && c <= b'z' {
+            for i in Memchr2::new(c, c - 32, haystack) {
+                let prev_char_class = i
+                    .checked_sub(1)
+                    .map(|i| AsciiChar(haystack[i]).char_class(&self.config))
+                    .unwrap_or(self.config.initial_char_class);
+                let char_class = AsciiChar(haystack[i]).char_class(&self.config);
+                let bonus = self.config.bonus_for(prev_char_class, char_class);
+                let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
+                if score > max_score {
+                    max_pos = i as u32;
+                    max_score = score;
+                    // can't get better than this
+                    if bonus >= self.config.bonus_boundary_white {
+                        break;
+                    }
+                }
+            }
+        } else {
+            let char_class = AsciiChar(c).char_class(&self.config);
+            for i in Memchr::new(c, haystack) {
+                let prev_char_class = i
+                    .checked_sub(1)
+                    .map(|i| AsciiChar(haystack[i]).char_class(&self.config))
+                    .unwrap_or(self.config.initial_char_class);
+                let bonus = self.config.bonus_for(prev_char_class, char_class);
+                let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
+                if score > max_score {
+                    max_pos = i as u32;
+                    max_score = score;
+                    // can't get better than this
+                    if bonus >= self.config.bonus_boundary_white {
+                        break;
+                    }
+                }
+            }
+        }
+        if max_score == 0 {
+            return None;
+        }
+
+        if INDICES {
+            indices.push(max_pos);
+        }
+        Some(max_score)
+    }
+
+    pub(crate) fn substring_match_ascii_with_prefilter(
+        &mut self,
+        haystack: &[u8],
+        needle: &[u8],
+        prefilter_len: usize,
+        prefilter: impl Iterator<Item = usize>,
+    ) -> (u16, usize) {
+        let needle_without_prefilter = &needle[prefilter_len..];
+        let mut max_score = 0;
+        let mut max_pos = 0;
+        for i in prefilter {
+            let prev_char_class = i
+                .checked_sub(1)
+                .map(|i| AsciiChar(haystack[i]).char_class(&self.config))
+                .unwrap_or(self.config.initial_char_class);
+            let char_class = AsciiChar(haystack[i]).char_class(&self.config);
+            let bonus = self.config.bonus_for(prev_char_class, char_class);
+            let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
+            if score > max_score
+                && haystack[i + prefilter_len..(i + needle.len()).min(haystack.len())]
+                    .iter()
+                    .map(|&c| AsciiChar(c).normalize(&self.config).0)
+                    .eq(needle_without_prefilter.iter().copied())
+            {
+                max_pos = i;
+                max_score = score;
+                // can't get better than this
+                if bonus >= self.config.bonus_boundary_white {
+                    break;
+                }
+            }
+        }
+        (max_score, max_pos)
+    }
+
+    pub(crate) fn substring_match_ascii<const INDICES: bool>(
+        &mut self,
+        haystack: &[u8],
+        needle: &[u8],
+        indices: &mut Vec<u32>,
+    ) -> Option<u16> {
+        let mut max_score = 0;
+        let mut max_pos = 0;
+        if self.config.ignore_case {
+            match needle.iter().position(|&c| c >= b'a' && c <= b'z') {
+                // start with char do case insensitive search
+                Some(0) => {
+                    (max_score, max_pos) = self.substring_match_ascii_with_prefilter(
+                        haystack,
+                        needle,
+                        1,
+                        Memchr2::new(
+                            needle[0],
+                            needle[0] - 32,
+                            &haystack[..haystack.len() - needle.len() + 1],
+                        ),
+                    );
+                    if max_score == 0 {
+                        return None;
+                    }
+                }
+                Some(1) => {
+                    (max_score, max_pos) = self.substring_match_ascii_with_prefilter(
+                        haystack,
+                        needle,
+                        1,
+                        Memchr::new(needle[0], &haystack[..haystack.len() - needle.len() + 1]),
+                    );
+                    if max_score == 0 {
+                        return None;
+                    }
+                }
+                Some(len) => {
+                    (max_score, max_pos) = self.substring_match_ascii_with_prefilter(
+                        haystack,
+                        needle,
+                        1,
+                        memmem::find_iter(&haystack[..haystack.len() - needle.len() + len], needle),
+                    );
+                    if max_score == 0 {
+                        return None;
+                    }
+                }
+                // in case we don't have any letter in the needle
+                // we can treat the search as case sensitive and use memmem directly which is way faster
+                None => (),
+            }
+        }
+
+        if max_score == 0 {
+            let char_class = AsciiChar(needle[0]).char_class(&self.config);
+            for i in memmem::find_iter(haystack, needle) {
+                let prev_char_class = i
+                    .checked_sub(1)
+                    .map(|i| AsciiChar(haystack[i]).char_class(&self.config))
+                    .unwrap_or(self.config.initial_char_class);
+                let bonus = self.config.bonus_for(prev_char_class, char_class);
+                let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
+                if score > max_score {
+                    max_pos = i;
+                    max_score = score;
+                    // can't get better than this
+                    if bonus >= self.config.bonus_boundary_white {
+                        break;
+                    }
+                }
+            }
+            if max_score == 0 {
+                return None;
+            }
+        }
+        let score = self.calculate_score::<INDICES, _, _>(
+            AsciiChar::cast(haystack),
+            AsciiChar::cast(needle),
+            max_pos,
+            max_pos + needle.len(),
+            indices,
+        );
+        Some(score)
+    }
+
+    pub(crate) fn substring_match_1_non_ascii<const INDICES: bool>(
+        &mut self,
+        haystack: &[char],
+        needle: char,
+        start: usize,
+        indices: &mut Vec<u32>,
+    ) -> u16 {
+        let mut max_score = 0;
+        let mut max_pos = 0;
+        let mut prev_class = start
+            .checked_sub(1)
+            .map(|i| haystack[i].char_class(&self.config))
+            .unwrap_or(self.config.initial_char_class);
+        for (i, &c) in haystack[start..].iter().enumerate() {
+            let (c, char_class) = c.char_class_and_normalize(&self.config);
+            if c != needle {
+                continue;
+            }
+            let bonus = self.config.bonus_for(prev_class, char_class);
+            prev_class = char_class;
+            let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
+            if score > max_score {
+                max_pos = i as u32;
+                max_score = score;
+                // can't get better than this
+                if bonus >= self.config.bonus_boundary_white {
+                    break;
+                }
+            }
+        }
+
+        if INDICES {
+            indices.push(max_pos + start as u32);
+        }
+        max_score
+    }
+
+    pub(crate) fn substring_match_non_ascii<const INDICES: bool, N>(
+        &mut self,
+        haystack: &[char],
+        needle: &[N],
+        start: usize,
+        indices: &mut Vec<u32>,
+    ) -> Option<u16>
+    where
+        N: Char,
+        char: PartialEq<N>,
+    {
+        let mut max_score = 0;
+        let mut max_pos = 0;
+        let mut prev_class = start
+            .checked_sub(1)
+            .map(|i| haystack[i].char_class(&self.config))
+            .unwrap_or(self.config.initial_char_class);
+        let end = haystack.len() - needle.len();
+        for (i, &c) in haystack[start..end].iter().enumerate() {
+            let (c, char_class) = c.char_class_and_normalize(&self.config);
+            if c != needle[0] {
+                continue;
+            }
+            let bonus = self.config.bonus_for(prev_class, char_class);
+            prev_class = char_class;
+            let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
+            if score > max_score
+                && haystack[start + i + 1..start + i + needle.len()]
+                    .iter()
+                    .map(|c| c.normalize(&self.config))
+                    .eq(needle[1..].iter().copied())
+            {
+                max_pos = i;
+                max_score = score;
+                // can't get better than this
+                if bonus >= self.config.bonus_boundary_white {
+                    break;
+                }
+            }
+        }
+        if max_score == 0 {
+            return None;
+        }
+
+        let score = self.calculate_score::<INDICES, _, _>(
+            haystack,
+            needle,
+            start + max_pos,
+            start + max_pos + needle.len(),
+            indices,
+        );
+        Some(score)
+    }
+}
diff --git a/matcher/src/fuzzy_greedy.rs b/matcher/src/fuzzy_greedy.rs
new file mode 100644
index 00000000..8215bf31
--- /dev/null
+++ b/matcher/src/fuzzy_greedy.rs
@@ -0,0 +1,51 @@
+use crate::chars::Char;
+use crate::Matcher;
+
+impl Matcher {
+    /// greedy fallback algorithm, much faster (linear time) but reported scores/indicies
+    /// might not be the best match
+    pub(crate) fn fuzzy_match_greedy_<const INDICES: bool, H: Char + PartialEq<N>, N: Char>(
+        &mut self,
+        haystack: &[H],
+        needle: &[N],
+        mut start: usize,
+        mut end: usize,
+        indices: &mut Vec<u32>,
+    ) -> Option<u16> {
+        let first_char_end = if H::ASCII && N::ASCII { start + 1 } else { end };
+        'nonascii: {
+            if !H::ASCII || !N::ASCII {
+                let mut needle_iter = needle[1..].iter().copied();
+                if let Some(mut needle_char) = needle_iter.next() {
+                    for (i, &c) in haystack[first_char_end..].iter().enumerate() {
+                        if c.normalize(&self.config) == needle_char {
+                            let Some(next_needle_char) = needle_iter.next() else {
+                                // we found a match so we are now in the same state
+                                // as the prefilter would produce
+                                end = first_char_end + i + 1;
+                                break 'nonascii;
+                            };
+                            needle_char = next_needle_char;
+                        }
+                    }
+                    // some needle chars were not matched bail out
+                    return None;
+                }
+            }
+        } // minimize the greedly match by greedy matching in reverse
+
+        let mut needle_iter = needle.iter().rev().copied();
+        let mut needle_char = needle_iter.next().unwrap();
+        for (i, &c) in haystack[start..end].iter().enumerate().rev() {
+            let c = c.normalize(&self.config);
+            if c == needle_char {
+                let Some(next_needle_char) = needle_iter.next() else {
+                    start += i;
+                    break;
+                };
+                needle_char = next_needle_char;
+            }
+        }
+        Some(self.calculate_score::<INDICES, H, N>(haystack, needle, start, end, indices))
+    }
+}
diff --git a/matcher/src/fuzzy_optimal.rs b/matcher/src/fuzzy_optimal.rs
new file mode 100644
index 00000000..5d53ecfb
--- /dev/null
+++ b/matcher/src/fuzzy_optimal.rs
@@ -0,0 +1,348 @@
+use std::cmp::max;
+
+use crate::chars::{Char, CharClass};
+use crate::matrix::{MatcherDataView, MatrixCell, ScoreCell};
+use crate::score::{
+    BONUS_BOUNDARY, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, MAX_PREFIX_BONUS,
+    PENALTY_GAP_EXTENSION, PENALTY_GAP_START, PREFIX_BONUS_SCALE, SCORE_MATCH,
+};
+use crate::{Config, Matcher};
+
+impl Matcher {
+    pub(crate) fn fuzzy_match_optimal<const INDICES: bool, H: Char + PartialEq<N>, N: Char>(
+        &mut self,
+        haystack: &[H],
+        needle: &[N],
+        start: usize,
+        greedy_end: usize,
+        end: usize,
+        indices: &mut Vec<u32>,
+    ) -> Option<u16> {
+        // construct a matrix (and copy the haystack), the matrix and haystack size are bounded
+        // to avoid the slow O(mn) time complexity for large inputs. Furthermore, it allows
+        // us to treat needle indices as u16
+        let Some(mut matrix) = self.slab.alloc(&haystack[start..end], needle.len()) else {
+            return self.fuzzy_match_greedy_::<INDICES, H, N>(
+                haystack, needle, start, greedy_end, indices,
+            );
+        };
+
+        let prev_class = start
+            .checked_sub(1)
+            .map(|i| haystack[i].char_class(&self.config))
+            .unwrap_or(self.config.initial_char_class);
+        let matched = matrix.setup::<INDICES, _>(needle, prev_class, &self.config, start as u32);
+        // this only happened with unicode haystacks, for ASCII the prefilter handles all rejects
+        if !matched {
+            assert!(
+                !N::ASCII || !H::ASCII,
+                "Non-match should have been caught by prefilter. Maybe `needle` is not normalized?"
+            );
+            return None;
+        }
+
+        // populate the matrix and find the best score
+        let matrix_len = matrix.populate_matrix::<INDICES, _>(needle);
+        let last_row_off = matrix.row_offs[needle.len() - 1];
+        let relative_last_row_off = last_row_off as usize + 1 - needle.len();
+        let (match_end, match_score_cell) = matrix.current_row[relative_last_row_off..]
+            .iter()
+            .enumerate()
+            .max_by_key(|(_, cell)| cell.score)
+            .expect("there must be atleast one match");
+        if INDICES {
+            matrix.reconstruct_optimal_path(match_end as u16, indices, matrix_len, start as u32);
+        }
+        Some(match_score_cell.score)
+    }
+}
+
+const UNMATCHED: ScoreCell = ScoreCell {
+    score: 0,
+    // if matched is true then the consecutive bonus
+    // is always atleast BONUS_CONSECUTIVE so
+    // this constant can never occur naturally
+    consecutive_bonus: 0,
+    matched: true,
+};
+
+fn next_m_cell(p_score: u16, bonus: u16, m_cell: ScoreCell) -> ScoreCell {
+    if m_cell == UNMATCHED {
+        return ScoreCell {
+            score: p_score + bonus + SCORE_MATCH,
+            matched: false,
+            consecutive_bonus: bonus as u8,
+        };
+    }
+
+    let mut consecutive_bonus = max(m_cell.consecutive_bonus as u16, BONUS_CONSECUTIVE);
+    if bonus >= BONUS_BOUNDARY && bonus > consecutive_bonus {
+        consecutive_bonus = bonus
+    }
+
+    let score_match = m_cell.score + max(consecutive_bonus, bonus);
+    let score_skip = p_score + bonus;
+    if score_match > score_skip {
+        ScoreCell {
+            score: score_match + SCORE_MATCH,
+            matched: true,
+            consecutive_bonus: consecutive_bonus as u8,
+        }
+    } else {
+        ScoreCell {
+            score: score_skip + SCORE_MATCH,
+            matched: false,
+            consecutive_bonus: bonus as u8,
+        }
+    }
+}
+
+fn p_score(prev_p_score: u16, prev_m_score: u16) -> (u16, bool) {
+    let score_match = prev_m_score.saturating_sub(PENALTY_GAP_START);
+    let score_skip = prev_p_score.saturating_sub(PENALTY_GAP_EXTENSION);
+    if score_match > score_skip {
+        (score_match, true)
+    } else {
+        (score_skip, false)
+    }
+}
+
+impl<H: Char> MatcherDataView<'_, H> {
+    fn setup<const INDICES: bool, N: Char>(
+        &mut self,
+        needle: &[N],
+        mut prev_class: CharClass,
+        config: &Config,
+        start: u32,
+    ) -> bool
+    where
+        H: PartialEq<N>,
+    {
+        let mut row_iter = needle.iter().copied().zip(self.row_offs.iter_mut());
+        let (mut needle_char, mut row_start) = row_iter.next().unwrap();
+
+        let col_iter = self
+            .haystack
+            .iter_mut()
+            .zip(self.bonus.iter_mut())
+            .enumerate();
+
+        let mut matched = false;
+        for (i, (c_, bonus_)) in col_iter {
+            let (c, class) = c_.char_class_and_normalize(config);
+            *c_ = c;
+
+            let bonus = config.bonus_for(prev_class, class);
+            // save bonus for later so we don't have to recompute it each time
+            *bonus_ = bonus as u8;
+            prev_class = class;
+
+            let i = i as u16;
+            if c == needle_char {
+                // save the first idx of each char
+                if let Some(next) = row_iter.next() {
+                    *row_start = i;
+                    (needle_char, row_start) = next;
+                } else if !matched {
+                    *row_start = i;
+                    // we have atleast one match
+                    matched = true;
+                }
+            }
+        }
+        if !matched {
+            return false;
+        }
+        debug_assert_eq!(self.row_offs[0], 0);
+        Self::score_row::<true, INDICES, _>(
+            self.current_row,
+            self.matrix_cells,
+            self.haystack,
+            self.bonus,
+            0,
+            self.row_offs[1],
+            0,
+            needle[0],
+            needle[1],
+            if config.prefer_prefix {
+                if start == 0 {
+                    MAX_PREFIX_BONUS * PREFIX_BONUS_SCALE
+                } else {
+                    (MAX_PREFIX_BONUS * PREFIX_BONUS_SCALE - PENALTY_GAP_START).saturating_sub(
+                        (start - 1).min(u16::MAX as u32) as u16 * PENALTY_GAP_EXTENSION,
+                    )
+                }
+            } else {
+                0
+            },
+        );
+        true
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    fn score_row<const FIRST_ROW: bool, const INDICES: bool, N: Char>(
+        current_row: &mut [ScoreCell],
+        matrix_cells: &mut [MatrixCell],
+        haystack: &[H],
+        bonus: &[u8],
+        row_off: u16,
+        mut next_row_off: u16,
+        needle_idx: u16,
+        needle_char: N,
+        next_needle_char: N,
+        mut prefix_bonus: u16,
+    ) where
+        H: PartialEq<N>,
+    {
+        next_row_off -= 1;
+        let relative_row_off = row_off - needle_idx;
+        let next_relative_row_off = next_row_off - needle_idx;
+        let skipped_col_iter = haystack[row_off as usize..next_row_off as usize]
+            .iter()
+            .zip(bonus[row_off as usize..next_row_off as usize].iter())
+            .zip(current_row[relative_row_off as usize..next_relative_row_off as usize].iter_mut())
+            .zip(matrix_cells.iter_mut());
+        let mut prev_p_score = 0;
+        let mut prev_m_score = 0;
+        for (((&c, bonus), score_cell), matrix_cell) in skipped_col_iter {
+            let (p_score, p_matched) = p_score(prev_p_score, prev_m_score);
+            let m_cell = if FIRST_ROW {
+                let cell = if c == needle_char {
+                    ScoreCell {
+                        score: *bonus as u16 * BONUS_FIRST_CHAR_MULTIPLIER
+                            + SCORE_MATCH
+                            + prefix_bonus / PREFIX_BONUS_SCALE,
+                        matched: false,
+                        consecutive_bonus: *bonus,
+                    }
+                } else {
+                    UNMATCHED
+                };
+                prefix_bonus = prefix_bonus.saturating_sub(PENALTY_GAP_EXTENSION);
+                cell
+            } else {
+                *score_cell
+            };
+            if INDICES {
+                matrix_cell.set(p_matched, m_cell.matched);
+            }
+            prev_p_score = p_score;
+            prev_m_score = m_cell.score;
+        }
+        let col_iter = haystack[next_row_off as usize..]
+            .windows(2)
+            .zip(bonus[next_row_off as usize..].windows(2))
+            .zip(current_row[next_relative_row_off as usize..].iter_mut())
+            .zip(matrix_cells[(next_relative_row_off - relative_row_off) as usize..].iter_mut());
+        for (((c, bonus), score_cell), matrix_cell) in col_iter {
+            let (p_score, p_matched) = p_score(prev_p_score, prev_m_score);
+            let m_cell = if FIRST_ROW {
+                let cell = if c[0] == needle_char {
+                    ScoreCell {
+                        score: bonus[0] as u16 * BONUS_FIRST_CHAR_MULTIPLIER
+                            + SCORE_MATCH
+                            + prefix_bonus / PREFIX_BONUS_SCALE,
+                        matched: false,
+                        consecutive_bonus: bonus[0],
+                    }
+                } else {
+                    UNMATCHED
+                };
+                prefix_bonus = prefix_bonus.saturating_sub(PENALTY_GAP_EXTENSION);
+                cell
+            } else {
+                *score_cell
+            };
+            *score_cell = if c[1] == next_needle_char {
+                next_m_cell(p_score, bonus[1] as u16, m_cell)
+            } else {
+                UNMATCHED
+            };
+            if INDICES {
+                matrix_cell.set(p_matched, m_cell.matched);
+            }
+            prev_p_score = p_score;
+            prev_m_score = m_cell.score;
+        }
+    }
+
+    fn populate_matrix<const INDICES: bool, N: Char>(&mut self, needle: &[N]) -> usize
+    where
+        H: PartialEq<N>,
+    {
+        let mut matrix_cells = &mut self.matrix_cells[self.current_row.len()..];
+        let mut row_iter = needle[1..]
+            .iter()
+            .copied()
+            .zip(self.row_offs[1..].iter().copied())
+            .enumerate();
+        let (mut needle_idx, (mut needle_char, mut row_off)) = row_iter.next().unwrap();
+        for (next_needle_idx, (next_needle_char, next_row_off)) in row_iter {
+            Self::score_row::<false, INDICES, _>(
+                self.current_row,
+                matrix_cells,
+                self.haystack,
+                self.bonus,
+                row_off,
+                next_row_off,
+                needle_idx as u16 + 1,
+                needle_char,
+                next_needle_char,
+                0,
+            );
+            let len = self.current_row.len() + needle_idx + 1 - row_off as usize;
+            matrix_cells = &mut matrix_cells[len..];
+            (needle_idx, needle_char, row_off) = (next_needle_idx, next_needle_char, next_row_off);
+        }
+        matrix_cells.as_ptr() as usize - self.matrix_cells.as_ptr() as usize
+    }
+
+    fn reconstruct_optimal_path(
+        &self,
+        max_score_end: u16,
+        indices: &mut Vec<u32>,
+        matrix_len: usize,
+        start: u32,
+    ) {
+        let indices_start = indices.len();
+        indices.resize(indices_start + self.row_offs.len(), 0);
+        let indices = &mut indices[indices_start..];
+        let last_row_off = *self.row_offs.last().unwrap();
+        indices[self.row_offs.len() - 1] = start + max_score_end as u32 + last_row_off as u32;
+
+        let mut matrix_cells = &self.matrix_cells[..matrix_len];
+        let width = self.current_row.len();
+        let mut row_iter = self.row_offs[..self.row_offs.len() - 1]
+            .iter()
+            .copied()
+            .enumerate()
+            .rev()
+            .map(|(i, off)| {
+                let relative_off = off as usize - i;
+                let row;
+                (matrix_cells, row) =
+                    matrix_cells.split_at(matrix_cells.len() - (width - relative_off));
+                (i, off, row)
+            });
+        let (mut row_idx, mut row_off, mut row) = row_iter.next().unwrap();
+        let mut col = max_score_end;
+        let relative_last_row_off = last_row_off as usize + 1 - self.row_offs.len();
+        let mut matched = self.current_row[col as usize + relative_last_row_off].matched;
+        col += last_row_off - row_off - 1;
+        loop {
+            if matched {
+                indices[row_idx] = start + col as u32 + row_off as u32;
+            }
+            let next_matched = row[col as usize].get(matched);
+            if matched {
+                let Some((next_row_idx, next_row_off, next_row)) = row_iter.next() else {
+                    break;
+                };
+                col += row_off - next_row_off;
+                (row_idx, row_off, row) = (next_row_idx, next_row_off, next_row)
+            }
+            col -= 1;
+            matched = next_matched;
+        }
+    }
+}
diff --git a/matcher/src/lib.rs b/matcher/src/lib.rs
new file mode 100644
index 00000000..3e8874c5
--- /dev/null
+++ b/matcher/src/lib.rs
@@ -0,0 +1,780 @@
+/*!
+`nucleo_matcher` is a low level crate that contains the matcher implementation
+used by the high level `nucleo` crate.
+
+**NOTE**: If you are building an fzf-like interactive fuzzy finder that is
+meant to match a reasonably large number of items (> 100) using the high level
+`nucleo` crate is highly recommended. Using `nucleo-matcher` directly in you ui
+loop will be very slow. Implementing this logic yourself is very complex.
+
+The matcher is hightly optimized and can significantly outperform `fzf` and
+`skim` (the `fuzzy-matcher` crate). However some of these optimizations require
+a slightly less convenient API. Be sure to carefully read the documentation of
+the [`Matcher`] to avoid unexpected behaviour.
+# Examples
+
+For almost all usecases the [`pattern`] API should be used instead of calling
+the matcher methods directly. [`Pattern::parse`](pattern::Pattern::parse) will
+construct a single Atom (a single match operation) for each word. The pattern
+can contain special characters to control what kind of match is performed (see
+[`AtomKind`](crate::pattern::AtomKind)).
+
+```
+# use nucleo_matcher::{Matcher, Config};
+# use nucleo_matcher::pattern::{Pattern, Normalization, CaseMatching};
+let paths = ["foo/bar", "bar/foo", "foobar"];
+let mut matcher = Matcher::new(Config::DEFAULT.match_paths());
+let matches = Pattern::parse("foo bar", CaseMatching::Ignore, Normalization::Smart).match_list(paths, &mut matcher);
+assert_eq!(matches, vec![("foo/bar", 168), ("bar/foo", 168), ("foobar", 140)]);
+let matches = Pattern::parse("^foo bar", CaseMatching::Ignore, Normalization::Smart).match_list(paths, &mut matcher);
+assert_eq!(matches, vec![("foo/bar", 168), ("foobar", 140)]);
+```
+
+If the pattern should be matched literally (without this special parsing)
+[`Pattern::new`](pattern::Pattern::new) can be used instead.
+
+```
+# use nucleo_matcher::{Matcher, Config};
+# use nucleo_matcher::pattern::{Pattern, CaseMatching, AtomKind, Normalization};
+let paths = ["foo/bar", "bar/foo", "foobar"];
+let mut matcher = Matcher::new(Config::DEFAULT.match_paths());
+let matches = Pattern::new("foo bar", CaseMatching::Ignore, Normalization::Smart, AtomKind::Fuzzy).match_list(paths, &mut matcher);
+assert_eq!(matches, vec![("foo/bar", 168), ("bar/foo", 168), ("foobar", 140)]);
+let paths = ["^foo/bar", "bar/^foo", "foobar"];
+let matches = Pattern::new("^foo bar", CaseMatching::Ignore, Normalization::Smart, AtomKind::Fuzzy).match_list(paths, &mut matcher);
+assert_eq!(matches, vec![("^foo/bar", 188), ("bar/^foo", 188)]);
+```
+
+Word segmentation is performed automatically on any unescaped character for which [`is_whitespace`](char::is_whitespace) returns true.
+This is relevant, for instance, with non-english keyboard input.
+
+```
+# use nucleo_matcher::pattern::{Atom, Pattern, Normalization, CaseMatching};
+assert_eq!(
+    // double-width 'Ideographic Space', i.e. `'\u{3000}'`
+    Pattern::parse("ほげ　ふが", CaseMatching::Smart, Normalization::Smart).atoms,
+    vec![
+        Atom::parse("ほげ", CaseMatching::Smart, Normalization::Smart),
+        Atom::parse("ふが", CaseMatching::Smart, Normalization::Smart),
+    ],
+);
+```
+
+If word segmentation is also not desired, a single `Atom` can be constructed directly.
+
+```
+# use nucleo_matcher::{Matcher, Config};
+# use nucleo_matcher::pattern::{Pattern, Atom, CaseMatching, Normalization, AtomKind};
+let paths = ["foobar", "foo bar"];
+let mut matcher = Matcher::new(Config::DEFAULT);
+let matches = Atom::new("foo bar", CaseMatching::Ignore, Normalization::Smart, AtomKind::Fuzzy, false).match_list(paths, &mut matcher);
+assert_eq!(matches, vec![("foo bar", 192)]);
+```
+
+
+# Status
+
+Nucleo is used in the helix-editor and therefore has a large user base with lots or real world testing. The core matcher implementation is considered complete and is unlikely to see major changes. The `nucleo-matcher` crate is finished and ready for widespread use, breaking changes should be very rare (a 1.0 release should not be far away).
+
+*/
+
+// sadly ranges don't optmimzie well
+#![allow(clippy::manual_range_contains)]
+#![warn(missing_docs)]
+
+pub mod chars;
+mod config;
+#[cfg(test)]
+mod debug;
+mod exact;
+mod fuzzy_greedy;
+mod fuzzy_optimal;
+mod matrix;
+pub mod pattern;
+mod prefilter;
+mod score;
+mod utf32_str;
+
+#[cfg(test)]
+mod tests;
+
+pub use crate::config::Config;
+pub use crate::utf32_str::{Utf32Str, Utf32String};
+
+use crate::chars::{AsciiChar, Char};
+use crate::matrix::MatrixSlab;
+
+/// A matcher engine that can execute (fuzzy) matches.
+///
+/// A matches contains **heap allocated** scratch memory that is reused during
+/// matching. This scratch memory allows the matcher to guarantee that it will
+/// **never allocate** during matching (with the exception of pushing to the
+/// `indices` vector if there isn't enough capacity). However this scratch
+/// memory is fairly large (around 135KB) so creating a matcher is expensive.
+///
+/// All `.._match` functions will not compute the indices  of the matched
+/// characters. These should be used to prefilter to filter and rank all
+/// matches. All `.._indices` functions will also compute the indices of the
+/// matched characters but are slower compared to the `..match` variant. These
+/// should be used when rendering the best N matches. Note that the `indices`
+/// argument is **never cleared**. This allows running multiple different
+/// matches on the same haystack and merging the indices by sorting and
+/// deduplicating the vector.
+///
+/// The `needle` argument for each function must always be normalized by the
+/// caller (unicode normalization and case folding). Otherwise, the matcher
+/// may fail to produce a match. The [`pattern`] modules provides utilities
+/// to preprocess needles and **should usually be preferred over invoking the
+/// matcher directly**.  Additionally it's recommend to perform separate matches
+/// for each word in the needle. Consider the folloling example:
+///
+/// If `foo bar` is used as the needle it matches both `foo test baaar` and
+/// `foo hello-world bar`. However, `foo test baaar` will receive a higher
+/// score than `foo hello-world bar`. `baaar` contains a 2 character gap which
+/// will receive a penalty and therefore the user will likely expect it to rank
+/// lower. However, if `foo bar` is matched as a single query `hello-world` and
+/// `test` are both considered gaps too. As `hello-world` is a much longer gap
+/// then `test` the extra penalty for `baaar` is canceled out. If both words
+/// are matched individually the interspersed words do not receive a penalty and
+/// `foo hello-world bar` ranks higher.
+///
+/// In general nucleo is a **substring matching tool** (except for the prefix/
+/// postfix matching modes) with no penalty assigned to matches that start
+/// later within the same pattern (which enables matching words individually
+/// as shown above). If patterns show a large variety in length and the syntax
+/// described above is not used it may be preferable to give preference to
+/// matches closer to the start of a haystack. To accommodate that usecase the
+/// [`prefer_prefix`](Config::prefer_prefix) option can be set to true.
+///
+/// Matching is limited to 2^32-1 codepoints, if the haystack is longer than
+/// that the matcher **will panic**. The caller must decide whether it wants to
+/// filter out long haystacks or truncate them.
+pub struct Matcher {
+    #[allow(missing_docs)]
+    pub config: Config,
+    slab: MatrixSlab,
+}
+
+// this is just here for convenience not sure if we should implement this
+impl Clone for Matcher {
+    fn clone(&self) -> Self {
+        Matcher {
+            config: self.config.clone(),
+            slab: MatrixSlab::new(),
+        }
+    }
+}
+
+impl std::fmt::Debug for Matcher {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Matcher")
+            .field("config", &self.config)
+            .finish_non_exhaustive()
+    }
+}
+
+impl Default for Matcher {
+    fn default() -> Self {
+        Matcher {
+            config: Config::DEFAULT,
+            slab: MatrixSlab::new(),
+        }
+    }
+}
+
+impl Matcher {
+    /// Creates a new matcher instance, note that this will eagerly allocate a
+    /// fairly large chunk of heap memory (around 135KB currently but subject to
+    /// change) so matchers should be reused if called often (like in a loop).
+    pub fn new(config: Config) -> Self {
+        Self {
+            config,
+            slab: MatrixSlab::new(),
+        }
+    }
+
+    /// Find the fuzzy match with the highest score in the `haystack`.
+    ///
+    /// This functions has `O(mn)` time complexity for short inputs.
+    /// To avoid slowdowns it automatically falls back to
+    /// [greedy matching](crate::Matcher::fuzzy_match_greedy) for large
+    /// needles and haystacks.
+    ///
+    /// See the [matcher documentation](crate::Matcher) for more details.
+    pub fn fuzzy_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
+        assert!(haystack.len() <= u32::MAX as usize);
+        self.fuzzy_matcher_impl::<false>(haystack, needle, &mut Vec::new())
+    }
+
+    /// Find the fuzzy match with the highest score in the `haystack` and
+    /// compute its indices.
+    ///
+    /// This functions has `O(mn)` time complexity for short inputs. To
+    /// avoid slowdowns it automatically falls back to
+    /// [greedy matching](crate::Matcher::fuzzy_match_greedy) for large needles
+    /// and haystacks
+    ///
+    /// See the [matcher documentation](crate::Matcher) for more details.
+    pub fn fuzzy_indices(
+        &mut self,
+        haystack: Utf32Str<'_>,
+        needle: Utf32Str<'_>,
+        indices: &mut Vec<u32>,
+    ) -> Option<u16> {
+        assert!(haystack.len() <= u32::MAX as usize);
+        self.fuzzy_matcher_impl::<true>(haystack, needle, indices)
+    }
+
+    fn fuzzy_matcher_impl<const INDICES: bool>(
+        &mut self,
+        haystack_: Utf32Str<'_>,
+        needle_: Utf32Str<'_>,
+        indices: &mut Vec<u32>,
+    ) -> Option<u16> {
+        if needle_.len() > haystack_.len() {
+            return None;
+        }
+        if needle_.is_empty() {
+            return Some(0);
+        }
+        if needle_.len() == haystack_.len() {
+            return self.exact_match_impl::<INDICES>(
+                haystack_,
+                needle_,
+                0,
+                haystack_.len(),
+                indices,
+            );
+        }
+        assert!(
+            haystack_.len() <= u32::MAX as usize,
+            "fuzzy matching is only support for up to 2^32-1 codepoints"
+        );
+        match (haystack_, needle_) {
+            (Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
+                if let &[needle] = needle {
+                    return self.substring_match_1_ascii::<INDICES>(haystack, needle, indices);
+                }
+                let (start, greedy_end, end) = self.prefilter_ascii(haystack, needle, false)?;
+                if needle_.len() == end - start {
+                    return Some(self.calculate_score::<INDICES, _, _>(
+                        AsciiChar::cast(haystack),
+                        AsciiChar::cast(needle),
+                        start,
+                        greedy_end,
+                        indices,
+                    ));
+                }
+                self.fuzzy_match_optimal::<INDICES, AsciiChar, AsciiChar>(
+                    AsciiChar::cast(haystack),
+                    AsciiChar::cast(needle),
+                    start,
+                    greedy_end,
+                    end,
+                    indices,
+                )
+            }
+            (Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
+                // a purely ascii haystack can never be transformed to match
+                // a needle that contains non-ascii chars since we don't allow gaps
+                None
+            }
+            (Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
+                if let &[needle] = needle {
+                    let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
+                    let res = self.substring_match_1_non_ascii::<INDICES>(
+                        haystack,
+                        needle as char,
+                        start,
+                        indices,
+                    );
+                    return Some(res);
+                }
+                let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
+                if needle_.len() == end - start {
+                    return self
+                        .exact_match_impl::<INDICES>(haystack_, needle_, start, end, indices);
+                }
+                self.fuzzy_match_optimal::<INDICES, char, AsciiChar>(
+                    haystack,
+                    AsciiChar::cast(needle),
+                    start,
+                    start + 1,
+                    end,
+                    indices,
+                )
+            }
+            (Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
+                if let &[needle] = needle {
+                    let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
+                    let res = self
+                        .substring_match_1_non_ascii::<INDICES>(haystack, needle, start, indices);
+                    return Some(res);
+                }
+                let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
+                if needle_.len() == end - start {
+                    return self
+                        .exact_match_impl::<INDICES>(haystack_, needle_, start, end, indices);
+                }
+                self.fuzzy_match_optimal::<INDICES, char, char>(
+                    haystack,
+                    needle,
+                    start,
+                    start + 1,
+                    end,
+                    indices,
+                )
+            }
+        }
+    }
+
+    /// Greedly find a fuzzy match in the `haystack`.
+    ///
+    /// This functions has `O(n)` time complexity but may provide unintutive (non-optimal)
+    /// indices and scores. Usually [fuzzy_match](crate::Matcher::fuzzy_match) should
+    /// be preferred.
+    ///
+    /// See the [matcher documentation](crate::Matcher) for more details.
+    pub fn fuzzy_match_greedy(
+        &mut self,
+        haystack: Utf32Str<'_>,
+        needle: Utf32Str<'_>,
+    ) -> Option<u16> {
+        assert!(haystack.len() <= u32::MAX as usize);
+        self.fuzzy_match_greedy_impl::<false>(haystack, needle, &mut Vec::new())
+    }
+
+    /// Greedly find a fuzzy match in the `haystack` and compute its indices.
+    ///
+    /// This functions has `O(n)` time complexity but may provide unintuitive (non-optimal)
+    /// indices and scores. Usually [fuzzy_indices](crate::Matcher::fuzzy_indices) should
+    /// be preferred.
+    ///
+    /// See the [matcher documentation](crate::Matcher) for more details.
+    pub fn fuzzy_indices_greedy(
+        &mut self,
+        haystack: Utf32Str<'_>,
+        needle: Utf32Str<'_>,
+        indices: &mut Vec<u32>,
+    ) -> Option<u16> {
+        assert!(haystack.len() <= u32::MAX as usize);
+        self.fuzzy_match_greedy_impl::<true>(haystack, needle, indices)
+    }
+
+    fn fuzzy_match_greedy_impl<const INDICES: bool>(
+        &mut self,
+        haystack: Utf32Str<'_>,
+        needle_: Utf32Str<'_>,
+        indices: &mut Vec<u32>,
+    ) -> Option<u16> {
+        if needle_.len() > haystack.len() {
+            return None;
+        }
+        if needle_.is_empty() {
+            return Some(0);
+        }
+        if needle_.len() == haystack.len() {
+            return self.exact_match_impl::<INDICES>(haystack, needle_, 0, haystack.len(), indices);
+        }
+        assert!(
+            haystack.len() <= u32::MAX as usize,
+            "matching is only support for up to 2^32-1 codepoints"
+        );
+        match (haystack, needle_) {
+            (Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
+                let (start, greedy_end, _) = self.prefilter_ascii(haystack, needle, true)?;
+                if needle_.len() == greedy_end - start {
+                    return Some(self.calculate_score::<INDICES, _, _>(
+                        AsciiChar::cast(haystack),
+                        AsciiChar::cast(needle),
+                        start,
+                        greedy_end,
+                        indices,
+                    ));
+                }
+                self.fuzzy_match_greedy_::<INDICES, AsciiChar, AsciiChar>(
+                    AsciiChar::cast(haystack),
+                    AsciiChar::cast(needle),
+                    start,
+                    greedy_end,
+                    indices,
+                )
+            }
+            (Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
+                // a purely ascii haystack can never be transformed to match
+                // a needle that contains non-ascii chars since we don't allow gaps
+                None
+            }
+            (Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
+                let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
+                self.fuzzy_match_greedy_::<INDICES, char, AsciiChar>(
+                    haystack,
+                    AsciiChar::cast(needle),
+                    start,
+                    start + 1,
+                    indices,
+                )
+            }
+            (Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
+                let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
+                self.fuzzy_match_greedy_::<INDICES, char, char>(
+                    haystack,
+                    needle,
+                    start,
+                    start + 1,
+                    indices,
+                )
+            }
+        }
+    }
+
+    /// Finds the substring match with the highest score in the `haystack`.
+    ///
+    /// This functions has `O(nm)` time complexity. However many cases can
+    /// be significantly accelerated using prefilters so it's usually very fast
+    /// in practice.
+    ///
+    /// See the [matcher documentation](crate::Matcher) for more details.
+    pub fn substring_match(
+        &mut self,
+        haystack: Utf32Str<'_>,
+        needle_: Utf32Str<'_>,
+    ) -> Option<u16> {
+        self.substring_match_impl::<false>(haystack, needle_, &mut Vec::new())
+    }
+
+    /// Finds the substring match with the highest score in the `haystack` and
+    /// compute its indices.
+    ///
+    /// This functions has `O(nm)` time complexity. However many cases can
+    /// be significantly accelerated using prefilters so it's usually fast
+    /// in practice.
+    ///
+    /// See the [matcher documentation](crate::Matcher) for more details.
+    pub fn substring_indices(
+        &mut self,
+        haystack: Utf32Str<'_>,
+        needle_: Utf32Str<'_>,
+        indices: &mut Vec<u32>,
+    ) -> Option<u16> {
+        self.substring_match_impl::<true>(haystack, needle_, indices)
+    }
+
+    fn substring_match_impl<const INDICES: bool>(
+        &mut self,
+        haystack: Utf32Str<'_>,
+        needle_: Utf32Str<'_>,
+        indices: &mut Vec<u32>,
+    ) -> Option<u16> {
+        if needle_.len() > haystack.len() {
+            return None;
+        }
+        if needle_.is_empty() {
+            return Some(0);
+        }
+        if needle_.len() == haystack.len() {
+            return self.exact_match_impl::<INDICES>(haystack, needle_, 0, haystack.len(), indices);
+        }
+        assert!(
+            haystack.len() <= u32::MAX as usize,
+            "matching is only support for up to 2^32-1 codepoints"
+        );
+        match (haystack, needle_) {
+            (Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
+                if let &[needle] = needle {
+                    return self.substring_match_1_ascii::<INDICES>(haystack, needle, indices);
+                }
+                self.substring_match_ascii::<INDICES>(haystack, needle, indices)
+            }
+            (Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
+                // a purely ascii haystack can never be transformed to match
+                // a needle that contains non-ascii chars since we don't allow gaps
+                None
+            }
+            (Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
+                if let &[needle] = needle {
+                    let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
+                    let res = self.substring_match_1_non_ascii::<INDICES>(
+                        haystack,
+                        needle as char,
+                        start,
+                        indices,
+                    );
+                    return Some(res);
+                }
+                let (start, _) = self.prefilter_non_ascii(haystack, needle_, false)?;
+                self.substring_match_non_ascii::<INDICES, _>(
+                    haystack,
+                    AsciiChar::cast(needle),
+                    start,
+                    indices,
+                )
+            }
+            (Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
+                if let &[needle] = needle {
+                    let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
+                    let res = self
+                        .substring_match_1_non_ascii::<INDICES>(haystack, needle, start, indices);
+                    return Some(res);
+                }
+                let (start, _) = self.prefilter_non_ascii(haystack, needle_, false)?;
+                self.substring_match_non_ascii::<INDICES, _>(haystack, needle, start, indices)
+            }
+        }
+    }
+
+    /// Checks whether needle and haystack match exactly.
+    ///
+    /// This functions has `O(n)` time complexity.
+    ///
+    /// See the [matcher documentation](crate::Matcher) for more details.
+    pub fn exact_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
+        if needle.is_empty() {
+            return Some(0);
+        }
+        let mut leading_space = 0;
+        let mut trailing_space = 0;
+        if !needle.first().is_whitespace() {
+            leading_space = haystack.leading_white_space()
+        }
+        if !needle.last().is_whitespace() {
+            trailing_space = haystack.trailing_white_space()
+        }
+        // avoid wraparound in size check
+        if trailing_space == haystack.len() {
+            return None;
+        }
+        self.exact_match_impl::<false>(
+            haystack,
+            needle,
+            leading_space,
+            haystack.len() - trailing_space,
+            &mut Vec::new(),
+        )
+    }
+
+    /// Checks whether needle and haystack match exactly and compute the matches indices.
+    ///
+    /// This functions has `O(n)` time complexity.
+    ///
+    /// See the [matcher documentation](crate::Matcher) for more details.
+    pub fn exact_indices(
+        &mut self,
+        haystack: Utf32Str<'_>,
+        needle: Utf32Str<'_>,
+        indices: &mut Vec<u32>,
+    ) -> Option<u16> {
+        if needle.is_empty() {
+            return Some(0);
+        }
+        let mut leading_space = 0;
+        let mut trailing_space = 0;
+        if !needle.first().is_whitespace() {
+            leading_space = haystack.leading_white_space()
+        }
+        if !needle.last().is_whitespace() {
+            trailing_space = haystack.trailing_white_space()
+        }
+        // avoid wraparound in size check
+        if trailing_space == haystack.len() {
+            return None;
+        }
+        self.exact_match_impl::<true>(
+            haystack,
+            needle,
+            leading_space,
+            haystack.len() - trailing_space,
+            indices,
+        )
+    }
+
+    /// Checks whether needle is a prefix of the haystack.
+    ///
+    /// This functions has `O(n)` time complexity.
+    ///
+    /// See the [matcher documentation](crate::Matcher) for more details.
+    pub fn prefix_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
+        if needle.is_empty() {
+            return Some(0);
+        }
+        let mut leading_space = 0;
+        if !needle.first().is_whitespace() {
+            leading_space = haystack.leading_white_space()
+        }
+        if haystack.len() - leading_space < needle.len() {
+            None
+        } else {
+            self.exact_match_impl::<false>(
+                haystack,
+                needle,
+                leading_space,
+                needle.len() + leading_space,
+                &mut Vec::new(),
+            )
+        }
+    }
+
+    /// Checks whether needle is a prefix of the haystack and compute the matches indices.
+    ///
+    /// This functions has `O(n)` time complexity.
+    ///
+    /// See the [matcher documentation](crate::Matcher) for more details.
+    pub fn prefix_indices(
+        &mut self,
+        haystack: Utf32Str<'_>,
+        needle: Utf32Str<'_>,
+        indices: &mut Vec<u32>,
+    ) -> Option<u16> {
+        if needle.is_empty() {
+            return Some(0);
+        }
+        let mut leading_space = 0;
+        if !needle.first().is_whitespace() {
+            leading_space = haystack.leading_white_space()
+        }
+        if haystack.len() - leading_space < needle.len() {
+            None
+        } else {
+            self.exact_match_impl::<true>(
+                haystack,
+                needle,
+                leading_space,
+                needle.len() + leading_space,
+                indices,
+            )
+        }
+    }
+
+    /// Checks whether needle is a postfix of the haystack.
+    ///
+    /// This functions has `O(n)` time complexity.
+    ///
+    /// See the [matcher documentation](crate::Matcher) for more details.
+    pub fn postfix_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
+        if needle.is_empty() {
+            return Some(0);
+        }
+        let mut trailing_spaces = 0;
+        if !needle.last().is_whitespace() {
+            trailing_spaces = haystack.trailing_white_space()
+        }
+        if haystack.len() - trailing_spaces < needle.len() {
+            None
+        } else {
+            self.exact_match_impl::<false>(
+                haystack,
+                needle,
+                haystack.len() - needle.len() - trailing_spaces,
+                haystack.len() - trailing_spaces,
+                &mut Vec::new(),
+            )
+        }
+    }
+
+    /// Checks whether needle is a postfix of the haystack and compute the matches indices.
+    ///
+    /// This functions has `O(n)` time complexity.
+    ///
+    /// See the [matcher documentation](crate::Matcher) for more details.
+    pub fn postfix_indices(
+        &mut self,
+        haystack: Utf32Str<'_>,
+        needle: Utf32Str<'_>,
+        indices: &mut Vec<u32>,
+    ) -> Option<u16> {
+        if needle.is_empty() {
+            return Some(0);
+        }
+        let mut trailing_spaces = 0;
+        if !needle.last().is_whitespace() {
+            trailing_spaces = haystack.trailing_white_space()
+        }
+        if haystack.len() - trailing_spaces < needle.len() {
+            None
+        } else {
+            self.exact_match_impl::<true>(
+                haystack,
+                needle,
+                haystack.len() - needle.len() - trailing_spaces,
+                haystack.len() - trailing_spaces,
+                indices,
+            )
+        }
+    }
+
+    fn exact_match_impl<const INDICES: bool>(
+        &mut self,
+        haystack: Utf32Str<'_>,
+        needle_: Utf32Str<'_>,
+        start: usize,
+        end: usize,
+        indices: &mut Vec<u32>,
+    ) -> Option<u16> {
+        if needle_.len() != end - start {
+            return None;
+        }
+        assert!(
+            haystack.len() <= u32::MAX as usize,
+            "matching is only support for up to 2^32-1 codepoints"
+        );
+        let score = match (haystack, needle_) {
+            (Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
+                let matched = if self.config.ignore_case {
+                    AsciiChar::cast(haystack)[start..end]
+                        .iter()
+                        .map(|c| c.normalize(&self.config))
+                        .eq(AsciiChar::cast(needle)
+                            .iter()
+                            .map(|c| c.normalize(&self.config)))
+                } else {
+                    &haystack[start..end] == needle
+                };
+                if !matched {
+                    return None;
+                }
+                self.calculate_score::<INDICES, _, _>(
+                    AsciiChar::cast(haystack),
+                    AsciiChar::cast(needle),
+                    start,
+                    end,
+                    indices,
+                )
+            }
+            (Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
+                // a purely ascii haystack can never be transformed to match
+                // a needle that contains non-ascii chars since we don't allow gaps
+                return None;
+            }
+            (Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
+                let matched = haystack[start..end]
+                    .iter()
+                    .map(|c| c.normalize(&self.config))
+                    .eq(AsciiChar::cast(needle)
+                        .iter()
+                        .map(|c| c.normalize(&self.config)));
+                if !matched {
+                    return None;
+                }
+
+                self.calculate_score::<INDICES, _, _>(
+                    haystack,
+                    AsciiChar::cast(needle),
+                    start,
+                    end,
+                    indices,
+                )
+            }
+            (Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
+                let matched = haystack[start..end]
+                    .iter()
+                    .map(|c| c.normalize(&self.config))
+                    .eq(needle.iter().map(|c| c.normalize(&self.config)));
+                if !matched {
+                    return None;
+                }
+                self.calculate_score::<INDICES, _, _>(haystack, needle, start, end, indices)
+            }
+        };
+        Some(score)
+    }
+}
diff --git a/matcher/src/matrix.rs b/matcher/src/matrix.rs
new file mode 100644
index 00000000..a91ed95f
--- /dev/null
+++ b/matcher/src/matrix.rs
@@ -0,0 +1,198 @@
+use std::alloc::{alloc_zeroed, dealloc, handle_alloc_error, Layout};
+use std::marker::PhantomData;
+use std::mem::size_of;
+use std::panic::{RefUnwindSafe, UnwindSafe};
+use std::ptr::{slice_from_raw_parts_mut, NonNull};
+
+use crate::chars::Char;
+
+const MAX_MATRIX_SIZE: usize = 100 * 1024; // 100*1024 = 100KB
+
+// these two aren't hard maxima, instead we simply allow whatever will fit into memory
+const MAX_HAYSTACK_LEN: usize = 2048; // 64KB
+const MAX_NEEDLE_LEN: usize = 2048; // 64KB
+
+struct MatrixLayout<C: Char> {
+    haystack_len: usize,
+    needle_len: usize,
+    layout: Layout,
+    haystack_off: usize,
+    bonus_off: usize,
+    rows_off: usize,
+    score_off: usize,
+    matrix_off: usize,
+    _phantom: PhantomData<C>,
+}
+impl<C: Char> MatrixLayout<C> {
+    fn new(haystack_len: usize, needle_len: usize) -> MatrixLayout<C> {
+        assert!(haystack_len >= needle_len);
+        assert!(haystack_len <= u32::MAX as usize);
+        let mut layout = Layout::from_size_align(0, 1).unwrap();
+        let haystack_layout = Layout::array::<C>(haystack_len).unwrap();
+        let bonus_layout = Layout::array::<u8>(haystack_len).unwrap();
+        let rows_layout = Layout::array::<u16>(needle_len).unwrap();
+        let score_layout = Layout::array::<ScoreCell>(haystack_len + 1 - needle_len).unwrap();
+        let matrix_layout =
+            Layout::array::<MatrixCell>((haystack_len + 1 - needle_len) * needle_len).unwrap();
+
+        let haystack_off;
+        (layout, haystack_off) = layout.extend(haystack_layout).unwrap();
+        let bonus_off;
+        (layout, bonus_off) = layout.extend(bonus_layout).unwrap();
+        let rows_off;
+        (layout, rows_off) = layout.extend(rows_layout).unwrap();
+        let score_off;
+        (layout, score_off) = layout.extend(score_layout).unwrap();
+        let matrix_off;
+        (layout, matrix_off) = layout.extend(matrix_layout).unwrap();
+        MatrixLayout {
+            haystack_len,
+            needle_len,
+            layout,
+            haystack_off,
+            bonus_off,
+            rows_off,
+            score_off,
+            matrix_off,
+            _phantom: PhantomData,
+        }
+    }
+    /// # Safety
+    ///
+    /// `ptr` must point at an allocated with MARTIX_ALLOC_LAYOUT
+    #[allow(clippy::type_complexity)]
+    unsafe fn fieds_from_ptr(
+        &self,
+        ptr: NonNull<u8>,
+    ) -> (
+        *mut [C],
+        *mut [u8],
+        *mut [u16],
+        *mut [ScoreCell],
+        *mut [MatrixCell],
+    ) {
+        let base = ptr.as_ptr();
+        let haystack = base.add(self.haystack_off) as *mut C;
+        let haystack = slice_from_raw_parts_mut(haystack, self.haystack_len);
+        let bonus = base.add(self.bonus_off);
+        let bonus = slice_from_raw_parts_mut(bonus, self.haystack_len);
+        let rows = base.add(self.rows_off) as *mut u16;
+        let rows = slice_from_raw_parts_mut(rows, self.needle_len);
+        let cells = base.add(self.score_off) as *mut ScoreCell;
+        let cells = slice_from_raw_parts_mut(cells, self.haystack_len + 1 - self.needle_len);
+        let matrix = base.add(self.matrix_off) as *mut MatrixCell;
+        let matrix = slice_from_raw_parts_mut(
+            matrix,
+            (self.haystack_len + 1 - self.needle_len) * self.haystack_len,
+        );
+        (haystack, bonus, rows, cells, matrix)
+    }
+}
+
+const _SIZE_CHECK: () = {
+    if size_of::<ScoreCell>() != 8 {
+        panic!()
+    }
+};
+
+// make this act like a u64
+#[repr(align(8))]
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub(crate) struct ScoreCell {
+    pub score: u16,
+    pub consecutive_bonus: u8,
+    pub matched: bool,
+}
+
+pub(crate) struct MatcherDataView<'a, C: Char> {
+    pub haystack: &'a mut [C],
+    // stored as a separate array instead of struct
+    // to avoid padding since char is too large and u8 too small :/
+    pub bonus: &'a mut [u8],
+    pub current_row: &'a mut [ScoreCell],
+    pub row_offs: &'a mut [u16],
+    pub matrix_cells: &'a mut [MatrixCell],
+}
+#[repr(transparent)]
+pub struct MatrixCell(pub(crate) u8);
+
+impl MatrixCell {
+    pub fn set(&mut self, p_match: bool, m_match: bool) {
+        self.0 = p_match as u8 | ((m_match as u8) << 1);
+    }
+
+    pub fn get(&self, m_matrix: bool) -> bool {
+        let mask = m_matrix as u8 + 1;
+        (self.0 & mask) != 0
+    }
+}
+
+// we only use this to construct the layout for the slab allocation
+#[allow(unused)]
+struct MatcherData {
+    haystack: [char; MAX_HAYSTACK_LEN],
+    bonus: [u8; MAX_HAYSTACK_LEN],
+    row_offs: [u16; MAX_NEEDLE_LEN],
+    scratch_space: [ScoreCell; MAX_HAYSTACK_LEN],
+    matrix: [u8; MAX_MATRIX_SIZE],
+}
+
+pub(crate) struct MatrixSlab(NonNull<u8>);
+unsafe impl Sync for MatrixSlab {}
+unsafe impl Send for MatrixSlab {}
+impl UnwindSafe for MatrixSlab {}
+impl RefUnwindSafe for MatrixSlab {}
+
+impl MatrixSlab {
+    pub fn new() -> Self {
+        let layout = Layout::new::<MatcherData>();
+        // safety: the matrix is never zero sized (hardcoded constants)
+        let ptr = unsafe { alloc_zeroed(layout) };
+        let Some(ptr) = NonNull::new(ptr) else {
+            handle_alloc_error(layout)
+        };
+        MatrixSlab(ptr.cast())
+    }
+
+    pub(crate) fn alloc<C: Char>(
+        &mut self,
+        haystack_: &[C],
+        needle_len: usize,
+    ) -> Option<MatcherDataView<'_, C>> {
+        let cells = haystack_.len() * needle_len;
+        if cells > MAX_MATRIX_SIZE
+            || haystack_.len() > u16::MAX as usize
+            // ensures that scores never overflow
+            || needle_len > MAX_NEEDLE_LEN
+        {
+            return None;
+        }
+        let matrix_layout = MatrixLayout::<C>::new(haystack_.len(), needle_len);
+        if matrix_layout.layout.size() > size_of::<MatcherData>() {
+            return None;
+        }
+        unsafe {
+            // safely: this allocation is valid for MATRIX_ALLOC_LAYOUT
+            let (haystack, bonus, rows, current_row, matrix_cells) =
+                matrix_layout.fieds_from_ptr(self.0);
+            // copy haystack before creating references to ensure we don't create
+            // references to invalid chars (which may or may not be UB)
+            haystack_
+                .as_ptr()
+                .copy_to_nonoverlapping(haystack as *mut _, haystack_.len());
+            Some(MatcherDataView {
+                haystack: &mut *haystack,
+                row_offs: &mut *rows,
+                bonus: &mut *bonus,
+                current_row: &mut *current_row,
+                matrix_cells: &mut *matrix_cells,
+            })
+        }
+    }
+}
+
+impl Drop for MatrixSlab {
+    fn drop(&mut self) {
+        unsafe { dealloc(self.0.as_ptr(), Layout::new::<MatcherData>()) };
+    }
+}
diff --git a/matcher/src/pattern.rs b/matcher/src/pattern.rs
new file mode 100644
index 00000000..495feede
--- /dev/null
+++ b/matcher/src/pattern.rs
@@ -0,0 +1,566 @@
+//! This module provides a slightly higher level API for matching strings.
+
+use std::cmp::Reverse;
+
+use crate::{chars, Matcher, Utf32Str};
+
+#[cfg(test)]
+mod tests;
+
+use crate::Utf32String;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
+#[non_exhaustive]
+/// How to treat a case mismatch between two characters.
+pub enum CaseMatching {
+    /// Characters never match their case folded version (`a != A`).
+    #[cfg_attr(not(feature = "unicode-casefold"), default)]
+    Respect,
+    /// Characters always match their case folded version (`a == A`).
+    #[cfg(feature = "unicode-casefold")]
+    Ignore,
+    /// Acts like [`Ignore`](CaseMatching::Ignore) if all characters in a pattern atom are
+    /// lowercase and like [`Respect`](CaseMatching::Respect) otherwise.
+    #[default]
+    #[cfg(feature = "unicode-casefold")]
+    Smart,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
+#[non_exhaustive]
+/// How to handle unicode normalization,
+pub enum Normalization {
+    /// Characters never match their normalized version (`a != ä`).
+    #[cfg_attr(not(feature = "unicode-normalization"), default)]
+    Never,
+    /// Acts like [`Never`](Normalization::Never) if any character in a pattern atom
+    /// would need to be normalized. Otherwise normalization occurs (`a == ä` but `ä != a`).
+    #[default]
+    #[cfg(feature = "unicode-normalization")]
+    Smart,
+}
+
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+#[non_exhaustive]
+/// The kind of matching algorithm to run for an atom.
+pub enum AtomKind {
+    /// Fuzzy matching where the needle must match any haystack characters
+    /// (match can contain gaps). This atom kind is used by default if no
+    /// special syntax is used. There is no negated fuzzy matching (too
+    /// many false positives).
+    ///
+    /// See also [`Matcher::fuzzy_match`](crate::Matcher::fuzzy_match).
+    Fuzzy,
+    /// The needle must match a contiguous sequence of haystack characters
+    /// without gaps.  This atom kind is parsed from the following syntax:
+    /// `'foo` and `!foo` (negated).
+    ///
+    /// See also [`Matcher::substring_match`](crate::Matcher::substring_match).
+    Substring,
+    /// The needle must match all leading haystack characters without gaps or
+    /// prefix. This atom kind is parsed from the following syntax: `^foo` and
+    /// `!^foo` (negated).
+    ///
+    /// See also [`Matcher::prefix_match`](crate::Matcher::prefix_match).
+    Prefix,
+    /// The needle must match all trailing haystack characters without gaps or
+    /// postfix. This atom kind is parsed from the following syntax: `foo$` and
+    /// `!foo$` (negated).
+    ///
+    /// See also [`Matcher::postfix_match`](crate::Matcher::postfix_match).
+    Postfix,
+    /// The needle must match all haystack characters without gaps or prefix.
+    /// This atom kind is parsed from the following syntax: `^foo$` and `!^foo$`
+    /// (negated).
+    ///
+    /// See also [`Matcher::exact_match`](crate::Matcher::exact_match).
+    Exact,
+}
+
+/// A single pattern component that is matched with a single [`Matcher`] function
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub struct Atom {
+    /// Whether this pattern atom is a negative match.
+    /// A negative pattern atom will prevent haystacks matching it from
+    /// being matchend. It does not contribute to scoring/indices
+    pub negative: bool,
+    /// The kind of match that this pattern performs
+    pub kind: AtomKind,
+    needle: Utf32String,
+    ignore_case: bool,
+    normalize: bool,
+}
+
+impl Atom {
+    /// Creates a single [`Atom`] from a string by performing unicode
+    /// normalization and case folding (if necessary). Optionally `\ ` can
+    /// be escaped to ` `.
+    pub fn new(
+        needle: &str,
+        case: CaseMatching,
+        normalize: Normalization,
+        kind: AtomKind,
+        escape_whitespace: bool,
+    ) -> Atom {
+        Atom::new_inner(needle, case, normalize, kind, escape_whitespace, false)
+    }
+
+    fn new_inner(
+        needle: &str,
+        case: CaseMatching,
+        normalization: Normalization,
+        kind: AtomKind,
+        escape_whitespace: bool,
+        append_dollar: bool,
+    ) -> Atom {
+        let mut ignore_case;
+        let mut normalize;
+        #[cfg(feature = "unicode-normalization")]
+        {
+            normalize = matches!(normalization, Normalization::Smart);
+        }
+        #[cfg(not(feature = "unicode-normalization"))]
+        {
+            normalize = false;
+        }
+        let needle = if needle.is_ascii() {
+            let mut needle = if escape_whitespace {
+                if let Some((start, rem)) = needle.split_once("\\ ") {
+                    let mut needle = start.to_owned();
+                    for rem in rem.split("\\ ") {
+                        needle.push(' ');
+                        needle.push_str(rem);
+                    }
+                    needle
+                } else {
+                    needle.to_owned()
+                }
+            } else {
+                needle.to_owned()
+            };
+
+            match case {
+                #[cfg(feature = "unicode-casefold")]
+                CaseMatching::Ignore => {
+                    ignore_case = true;
+                    needle.make_ascii_lowercase()
+                }
+                #[cfg(feature = "unicode-casefold")]
+                CaseMatching::Smart => {
+                    ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase())
+                }
+                CaseMatching::Respect => ignore_case = false,
+            }
+            if append_dollar {
+                needle.push('$');
+            }
+            Utf32String::Ascii(needle.into_boxed_str())
+        } else {
+            let mut needle_ = Vec::with_capacity(needle.len());
+            #[cfg(feature = "unicode-casefold")]
+            {
+                ignore_case = matches!(case, CaseMatching::Ignore | CaseMatching::Smart);
+            }
+            #[cfg(not(feature = "unicode-casefold"))]
+            {
+                ignore_case = false;
+            }
+            #[cfg(feature = "unicode-normalization")]
+            {
+                normalize = matches!(normalization, Normalization::Smart);
+            }
+            if escape_whitespace {
+                let mut saw_backslash = false;
+                for mut c in chars::graphemes(needle) {
+                    if saw_backslash {
+                        if c == ' ' {
+                            needle_.push(' ');
+                            saw_backslash = false;
+                            continue;
+                        } else {
+                            needle_.push('\\');
+                        }
+                    }
+                    saw_backslash = c == '\\';
+                    match case {
+                        #[cfg(feature = "unicode-casefold")]
+                        CaseMatching::Ignore => c = chars::to_lower_case(c),
+                        #[cfg(feature = "unicode-casefold")]
+                        CaseMatching::Smart => {
+                            ignore_case = ignore_case && !chars::is_upper_case(c)
+                        }
+                        CaseMatching::Respect => (),
+                    }
+                    match normalization {
+                        #[cfg(feature = "unicode-normalization")]
+                        Normalization::Smart => {
+                            normalize = normalize && chars::normalize(c) == c;
+                        }
+                        Normalization::Never => (),
+                    }
+                    needle_.push(c);
+                }
+            } else {
+                let chars = chars::graphemes(needle).map(|mut c| {
+                    match case {
+                        #[cfg(feature = "unicode-casefold")]
+                        CaseMatching::Ignore => c = chars::to_lower_case(c),
+                        #[cfg(feature = "unicode-casefold")]
+                        CaseMatching::Smart => {
+                            ignore_case = ignore_case && !chars::is_upper_case(c);
+                        }
+                        CaseMatching::Respect => (),
+                    }
+                    match normalization {
+                        #[cfg(feature = "unicode-normalization")]
+                        Normalization::Smart => {
+                            normalize = normalize && chars::normalize(c) == c;
+                        }
+                        Normalization::Never => (),
+                    }
+                    c
+                });
+                needle_.extend(chars);
+            };
+            if append_dollar {
+                needle_.push('$');
+            }
+            Utf32String::Unicode(needle_.into_boxed_slice())
+        };
+        Atom {
+            kind,
+            needle,
+            negative: false,
+            ignore_case,
+            normalize,
+        }
+    }
+
+    /// Parse a pattern atom from a string. Some special trailing and leading
+    /// characters can be used to control the atom kind. See [`AtomKind`] for
+    /// details.
+    pub fn parse(raw: &str, case: CaseMatching, normalize: Normalization) -> Atom {
+        let mut atom = raw;
+        let invert = match atom.as_bytes() {
+            [b'!', ..] => {
+                atom = &atom[1..];
+                true
+            }
+            [b'\\', b'!', ..] => {
+                atom = &atom[1..];
+                false
+            }
+            _ => false,
+        };
+
+        let mut kind = match atom.as_bytes() {
+            [b'^', ..] => {
+                atom = &atom[1..];
+                AtomKind::Prefix
+            }
+            [b'\'', ..] => {
+                atom = &atom[1..];
+                AtomKind::Substring
+            }
+            [b'\\', b'^' | b'\'', ..] => {
+                atom = &atom[1..];
+                AtomKind::Fuzzy
+            }
+            _ => AtomKind::Fuzzy,
+        };
+
+        let mut append_dollar = false;
+        match atom.as_bytes() {
+            [.., b'\\', b'$'] => {
+                append_dollar = true;
+                atom = &atom[..atom.len() - 2]
+            }
+            [.., b'$'] => {
+                kind = if kind == AtomKind::Fuzzy {
+                    AtomKind::Postfix
+                } else {
+                    AtomKind::Exact
+                };
+                atom = &atom[..atom.len() - 1]
+            }
+            _ => (),
+        }
+
+        if invert && kind == AtomKind::Fuzzy {
+            kind = AtomKind::Substring
+        }
+
+        let mut pattern = Atom::new_inner(atom, case, normalize, kind, true, append_dollar);
+        pattern.negative = invert;
+        pattern
+    }
+
+    /// Matches this pattern against `haystack` (using the allocation and configuration
+    /// from `matcher`) and calculates a ranking score. See the [`Matcher`].
+    /// Documentation for more details.
+    ///
+    /// *Note:*  The `ignore_case` setting is overwritten to match the casing of
+    /// each pattern atom.
+    pub fn score(&self, haystack: Utf32Str<'_>, matcher: &mut Matcher) -> Option<u16> {
+        matcher.config.ignore_case = self.ignore_case;
+        matcher.config.normalize = self.normalize;
+        let pattern_score = match self.kind {
+            AtomKind::Exact => matcher.exact_match(haystack, self.needle.slice(..)),
+            AtomKind::Fuzzy => matcher.fuzzy_match(haystack, self.needle.slice(..)),
+            AtomKind::Substring => matcher.substring_match(haystack, self.needle.slice(..)),
+            AtomKind::Prefix => matcher.prefix_match(haystack, self.needle.slice(..)),
+            AtomKind::Postfix => matcher.postfix_match(haystack, self.needle.slice(..)),
+        };
+        if self.negative {
+            if pattern_score.is_some() {
+                return None;
+            }
+            Some(0)
+        } else {
+            pattern_score
+        }
+    }
+
+    /// Matches this pattern against `haystack` (using the allocation and
+    /// configuration from `matcher`), calculates a ranking score and the match
+    /// indices. See the [`Matcher`]. Documentation for more
+    /// details.
+    ///
+    /// *Note:*  The `ignore_case` setting is overwritten to match the casing of
+    /// each pattern atom.
+    ///
+    /// *Note:*  The `indices` vector is not cleared by this function.
+    pub fn indices(
+        &self,
+        haystack: Utf32Str<'_>,
+        matcher: &mut Matcher,
+        indices: &mut Vec<u32>,
+    ) -> Option<u16> {
+        matcher.config.ignore_case = self.ignore_case;
+        matcher.config.normalize = self.normalize;
+        if self.negative {
+            let pattern_score = match self.kind {
+                AtomKind::Exact => matcher.exact_match(haystack, self.needle.slice(..)),
+                AtomKind::Fuzzy => matcher.fuzzy_match(haystack, self.needle.slice(..)),
+                AtomKind::Substring => matcher.substring_match(haystack, self.needle.slice(..)),
+                AtomKind::Prefix => matcher.prefix_match(haystack, self.needle.slice(..)),
+                AtomKind::Postfix => matcher.postfix_match(haystack, self.needle.slice(..)),
+            };
+            pattern_score.is_none().then_some(0)
+        } else {
+            match self.kind {
+                AtomKind::Exact => matcher.exact_indices(haystack, self.needle.slice(..), indices),
+                AtomKind::Fuzzy => matcher.fuzzy_indices(haystack, self.needle.slice(..), indices),
+                AtomKind::Substring => {
+                    matcher.substring_indices(haystack, self.needle.slice(..), indices)
+                }
+                AtomKind::Prefix => {
+                    matcher.prefix_indices(haystack, self.needle.slice(..), indices)
+                }
+                AtomKind::Postfix => {
+                    matcher.postfix_indices(haystack, self.needle.slice(..), indices)
+                }
+            }
+        }
+    }
+
+    /// Returns the needle text that is passed to the matcher. All indices
+    /// produced by the `indices` functions produce char indices used to index
+    /// this text
+    pub fn needle_text(&self) -> Utf32Str<'_> {
+        self.needle.slice(..)
+    }
+    /// Convenience function to easily match (and sort) a (relatively small)
+    /// list of inputs.
+    ///
+    /// *Note* This function is not recommended for building a full fuzzy
+    /// matching application that can match large numbers of matches (like all
+    /// files in a directory) as all matching is done on the current thread,
+    /// effectively blocking the UI. For such applications the high level
+    /// `nucleo` crate can be used instead.
+    pub fn match_list<T: AsRef<str>>(
+        &self,
+        items: impl IntoIterator<Item = T>,
+        matcher: &mut Matcher,
+    ) -> Vec<(T, u16)> {
+        if self.needle.is_empty() {
+            return items.into_iter().map(|item| (item, 0)).collect();
+        }
+        let mut buf = Vec::new();
+        let mut items: Vec<_> = items
+            .into_iter()
+            .filter_map(|item| {
+                self.score(Utf32Str::new(item.as_ref(), &mut buf), matcher)
+                    .map(|score| (item, score))
+            })
+            .collect();
+        items.sort_by_key(|(_, score)| Reverse(*score));
+        items
+    }
+}
+
+fn pattern_atoms(pattern: &str) -> impl Iterator<Item = &str> + '_ {
+    let mut saw_backslash = false;
+    pattern.split(move |c| {
+        saw_backslash = match c {
+            c if c.is_whitespace() && !saw_backslash => return true,
+            '\\' => true,
+            _ => false,
+        };
+        false
+    })
+}
+
+#[derive(Debug, Default)]
+/// A text pattern made up of (potentially multiple) [atoms](crate::pattern::Atom).
+#[non_exhaustive]
+pub struct Pattern {
+    /// The individual pattern (words) in this pattern
+    pub atoms: Vec<Atom>,
+}
+
+impl Pattern {
+    /// Creates a pattern where each word is matched individually (whitespaces
+    /// can be escaped with `\`). Otherwise no parsing is performed (so `$`, `!`,
+    /// `'` and `^` don't receive special treatment). If you want to match the entire
+    /// pattern as a single needle use a single [`Atom`] instead.
+    pub fn new(
+        pattern: &str,
+        case_matching: CaseMatching,
+        normalize: Normalization,
+        kind: AtomKind,
+    ) -> Pattern {
+        let atoms = pattern_atoms(pattern)
+            .filter_map(|pat| {
+                let pat = Atom::new(pat, case_matching, normalize, kind, true);
+                (!pat.needle.is_empty()).then_some(pat)
+            })
+            .collect();
+        Pattern { atoms }
+    }
+    /// Creates a pattern where each word is matched individually (whitespaces
+    /// can be escaped with `\`). And `$`, `!`, `'` and `^` at word boundaries will
+    /// cause different matching behaviour (see [`AtomKind`]). These can be
+    /// escaped with backslash.
+    pub fn parse(pattern: &str, case_matching: CaseMatching, normalize: Normalization) -> Pattern {
+        let atoms = pattern_atoms(pattern)
+            .filter_map(|pat| {
+                let pat = Atom::parse(pat, case_matching, normalize);
+                (!pat.needle.is_empty()).then_some(pat)
+            })
+            .collect();
+        Pattern { atoms }
+    }
+
+    /// Convenience function to easily match (and sort) a (relatively small)
+    /// list of inputs.
+    ///
+    /// *Note* This function is not recommended for building a full fuzzy
+    /// matching application that can match large numbers of matches (like all
+    /// files in a directory) as all matching is done on the current thread,
+    /// effectively blocking the UI. For such applications the high level
+    /// `nucleo` crate can be used instead.
+    pub fn match_list<T: AsRef<str>>(
+        &self,
+        items: impl IntoIterator<Item = T>,
+        matcher: &mut Matcher,
+    ) -> Vec<(T, u32)> {
+        if self.atoms.is_empty() {
+            return items.into_iter().map(|item| (item, 0)).collect();
+        }
+        let mut buf = Vec::new();
+        let mut items: Vec<_> = items
+            .into_iter()
+            .filter_map(|item| {
+                self.score(Utf32Str::new(item.as_ref(), &mut buf), matcher)
+                    .map(|score| (item, score))
+            })
+            .collect();
+        items.sort_by_key(|(_, score)| Reverse(*score));
+        items
+    }
+
+    /// Matches this pattern against `haystack` (using the allocation and configuration
+    /// from `matcher`) and calculates a ranking score. See the [`Matcher`]
+    /// documentation for more details.
+    ///
+    /// *Note:*  The `ignore_case` setting is overwritten to match the casing of
+    /// each pattern atom.
+    pub fn score(&self, haystack: Utf32Str<'_>, matcher: &mut Matcher) -> Option<u32> {
+        if self.atoms.is_empty() {
+            return Some(0);
+        }
+        let mut score = 0;
+        for pattern in &self.atoms {
+            score += pattern.score(haystack, matcher)? as u32;
+        }
+        Some(score)
+    }
+
+    /// Matches this pattern against `haystack` (using the allocation and
+    /// configuration from `matcher`), calculates a ranking score and the match
+    /// indices. See the [`Matcher`] documentation for more
+    /// details.
+    ///
+    /// *Note:*  The `ignore_case` setting is overwritten to match the casing of
+    /// each pattern atom.
+    ///
+    /// *Note:*  The indices for each pattern are calculated individually
+    /// and simply appended to the `indices` vector and not deduplicated/sorted.
+    /// This allows associating the match indices to their source pattern. If
+    /// required (like for highlighting) unique/sorted indices can be obtained
+    /// as follows:
+    ///
+    /// ```
+    /// # let mut indices: Vec<u32> = Vec::new();
+    /// indices.sort_unstable();
+    /// indices.dedup();
+    /// ```
+    pub fn indices(
+        &self,
+        haystack: Utf32Str<'_>,
+        matcher: &mut Matcher,
+        indices: &mut Vec<u32>,
+    ) -> Option<u32> {
+        if self.atoms.is_empty() {
+            return Some(0);
+        }
+        let mut score = 0;
+        for pattern in &self.atoms {
+            score += pattern.indices(haystack, matcher, indices)? as u32;
+        }
+        Some(score)
+    }
+
+    /// Refreshes this pattern by reparsing it from a string. This is mostly
+    /// equivalent to just constructing a new pattern using [`Pattern::parse`]
+    /// but is slightly more efficient by reusing some allocations
+    pub fn reparse(
+        &mut self,
+        pattern: &str,
+        case_matching: CaseMatching,
+        normalize: Normalization,
+    ) {
+        self.atoms.clear();
+        let atoms = pattern_atoms(pattern).filter_map(|atom| {
+            let atom = Atom::parse(atom, case_matching, normalize);
+            if atom.needle.is_empty() {
+                return None;
+            }
+            Some(atom)
+        });
+        self.atoms.extend(atoms);
+    }
+}
+
+impl Clone for Pattern {
+    fn clone(&self) -> Self {
+        Self {
+            atoms: self.atoms.clone(),
+        }
+    }
+
+    fn clone_from(&mut self, source: &Self) {
+        self.atoms.clone_from(&source.atoms);
+    }
+}
diff --git a/matcher/src/pattern/tests.rs b/matcher/src/pattern/tests.rs
new file mode 100644
index 00000000..88880ba9
--- /dev/null
+++ b/matcher/src/pattern/tests.rs
@@ -0,0 +1,149 @@
+use crate::pattern::{Atom, AtomKind, CaseMatching, Normalization, Pattern};
+
+#[test]
+fn negative() {
+    let pat = Atom::parse("!foo", CaseMatching::Smart, Normalization::Smart);
+    assert!(pat.negative);
+    assert_eq!(pat.kind, AtomKind::Substring);
+    assert_eq!(pat.needle.to_string(), "foo");
+    let pat = Atom::parse("!^foo", CaseMatching::Smart, Normalization::Smart);
+    assert!(pat.negative);
+    assert_eq!(pat.kind, AtomKind::Prefix);
+    assert_eq!(pat.needle.to_string(), "foo");
+    let pat = Atom::parse("!foo$", CaseMatching::Smart, Normalization::Smart);
+    assert!(pat.negative);
+    assert_eq!(pat.kind, AtomKind::Postfix);
+    assert_eq!(pat.needle.to_string(), "foo");
+    let pat = Atom::parse("!^foo$", CaseMatching::Smart, Normalization::Smart);
+    assert!(pat.negative);
+    assert_eq!(pat.kind, AtomKind::Exact);
+    assert_eq!(pat.needle.to_string(), "foo");
+}
+
+#[test]
+fn pattern_kinds() {
+    let pat = Atom::parse("foo", CaseMatching::Smart, Normalization::Smart);
+    assert!(!pat.negative);
+    assert_eq!(pat.kind, AtomKind::Fuzzy);
+    assert_eq!(pat.needle.to_string(), "foo");
+    let pat = Atom::parse("'foo", CaseMatching::Smart, Normalization::Smart);
+    assert!(!pat.negative);
+    assert_eq!(pat.kind, AtomKind::Substring);
+    assert_eq!(pat.needle.to_string(), "foo");
+    let pat = Atom::parse("^foo", CaseMatching::Smart, Normalization::Smart);
+    assert!(!pat.negative);
+    assert_eq!(pat.kind, AtomKind::Prefix);
+    assert_eq!(pat.needle.to_string(), "foo");
+    let pat = Atom::parse("foo$", CaseMatching::Smart, Normalization::Smart);
+    assert!(!pat.negative);
+    assert_eq!(pat.kind, AtomKind::Postfix);
+    assert_eq!(pat.needle.to_string(), "foo");
+    let pat = Atom::parse("^foo$", CaseMatching::Smart, Normalization::Smart);
+    assert!(!pat.negative);
+    assert_eq!(pat.kind, AtomKind::Exact);
+    assert_eq!(pat.needle.to_string(), "foo");
+}
+
+#[test]
+fn case_matching() {
+    let pat = Atom::parse("foo", CaseMatching::Smart, Normalization::Smart);
+    assert!(pat.ignore_case);
+    assert_eq!(pat.needle.to_string(), "foo");
+    let pat = Atom::parse("Foo", CaseMatching::Smart, Normalization::Smart);
+    assert!(!pat.ignore_case);
+    assert_eq!(pat.needle.to_string(), "Foo");
+    let pat = Atom::parse("Foo", CaseMatching::Ignore, Normalization::Smart);
+    assert!(pat.ignore_case);
+    assert_eq!(pat.needle.to_string(), "foo");
+    let pat = Atom::parse("Foo", CaseMatching::Respect, Normalization::Smart);
+    assert!(!pat.ignore_case);
+    assert_eq!(pat.needle.to_string(), "Foo");
+    let pat = Atom::parse("Foo", CaseMatching::Respect, Normalization::Smart);
+    assert!(!pat.ignore_case);
+    assert_eq!(pat.needle.to_string(), "Foo");
+    let pat = Atom::parse("Äxx", CaseMatching::Ignore, Normalization::Smart);
+    assert!(pat.ignore_case);
+    assert_eq!(pat.needle.to_string(), "äxx");
+    let pat = Atom::parse("Äxx", CaseMatching::Respect, Normalization::Smart);
+    assert!(!pat.ignore_case);
+    let pat = Atom::parse("Axx", CaseMatching::Smart, Normalization::Smart);
+    assert!(!pat.ignore_case);
+    assert_eq!(pat.needle.to_string(), "Axx");
+    let pat = Atom::parse("你xx", CaseMatching::Smart, Normalization::Smart);
+    assert!(pat.ignore_case);
+    assert_eq!(pat.needle.to_string(), "你xx");
+    let pat = Atom::parse("你xx", CaseMatching::Ignore, Normalization::Smart);
+    assert!(pat.ignore_case);
+    assert_eq!(pat.needle.to_string(), "你xx");
+    let pat = Atom::parse("Ⲽxx", CaseMatching::Smart, Normalization::Smart);
+    assert!(!pat.ignore_case);
+    assert_eq!(pat.needle.to_string(), "Ⲽxx");
+    let pat = Atom::parse("Ⲽxx", CaseMatching::Ignore, Normalization::Smart);
+    assert!(pat.ignore_case);
+    assert_eq!(pat.needle.to_string(), "ⲽxx");
+}
+
+#[test]
+fn escape() {
+    let pat = Atom::parse("foo\\ bar", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "foo bar");
+    let pat = Atom::parse("\\!foo", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "!foo");
+    assert_eq!(pat.kind, AtomKind::Fuzzy);
+    let pat = Atom::parse("\\'foo", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "'foo");
+    assert_eq!(pat.kind, AtomKind::Fuzzy);
+    let pat = Atom::parse("\\^foo", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "^foo");
+    assert_eq!(pat.kind, AtomKind::Fuzzy);
+    let pat = Atom::parse("foo\\$", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "foo$");
+    assert_eq!(pat.kind, AtomKind::Fuzzy);
+    let pat = Atom::parse("^foo\\$", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "foo$");
+    assert_eq!(pat.kind, AtomKind::Prefix);
+    let pat = Atom::parse("\\^foo\\$", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "^foo$");
+    assert_eq!(pat.kind, AtomKind::Fuzzy);
+    let pat = Atom::parse("\\!^foo\\$", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "!^foo$");
+    assert_eq!(pat.kind, AtomKind::Fuzzy);
+    let pat = Atom::parse("!\\^foo\\$", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "^foo$");
+    assert_eq!(pat.kind, AtomKind::Substring);
+}
+
+#[test]
+fn pattern_atoms() {
+    assert_eq!(
+        Pattern::parse("a b", CaseMatching::Ignore, Normalization::Smart).atoms,
+        vec![
+            Atom::parse("a", CaseMatching::Ignore, Normalization::Smart),
+            Atom::parse("b", CaseMatching::Ignore, Normalization::Smart),
+        ]
+    );
+
+    assert_eq!(
+        Pattern::parse("a\n b", CaseMatching::Ignore, Normalization::Smart).atoms,
+        vec![
+            Atom::parse("a", CaseMatching::Ignore, Normalization::Smart),
+            Atom::parse("b", CaseMatching::Ignore, Normalization::Smart),
+        ]
+    );
+
+    assert_eq!(
+        Pattern::parse("  a b\r\n", CaseMatching::Ignore, Normalization::Smart).atoms,
+        vec![
+            Atom::parse("a", CaseMatching::Ignore, Normalization::Smart),
+            Atom::parse("b", CaseMatching::Ignore, Normalization::Smart),
+        ]
+    );
+
+    assert_eq!(
+        Pattern::parse("ほ　げ", CaseMatching::Smart, Normalization::Smart).atoms,
+        vec![
+            Atom::parse("ほ", CaseMatching::Smart, Normalization::Smart),
+            Atom::parse("げ", CaseMatching::Smart, Normalization::Smart),
+        ],
+    )
+}
diff --git a/matcher/src/prefilter.rs b/matcher/src/prefilter.rs
new file mode 100644
index 00000000..1e79cc2c
--- /dev/null
+++ b/matcher/src/prefilter.rs
@@ -0,0 +1,98 @@
+use ::memchr::{memchr, memchr2, memrchr, memrchr2};
+
+use crate::chars::Char;
+use crate::utf32_str::Utf32Str;
+use crate::Matcher;
+
+#[inline(always)]
+fn find_ascii_ignore_case(c: u8, haystack: &[u8]) -> Option<usize> {
+    if c >= b'a' && c <= b'z' {
+        memchr2(c, c - 32, haystack)
+    } else {
+        memchr(c, haystack)
+    }
+}
+
+#[inline(always)]
+fn find_ascii_ignore_case_rev(c: u8, haystack: &[u8]) -> Option<usize> {
+    if c >= b'a' && c <= b'z' {
+        memrchr2(c, c - 32, haystack)
+    } else {
+        memrchr(c, haystack)
+    }
+}
+
+impl Matcher {
+    pub(crate) fn prefilter_ascii(
+        &self,
+        mut haystack: &[u8],
+        needle: &[u8],
+        only_greedy: bool,
+    ) -> Option<(usize, usize, usize)> {
+        if self.config.ignore_case {
+            let start =
+                find_ascii_ignore_case(needle[0], &haystack[..haystack.len() - needle.len() + 1])?;
+            let mut greedy_end = start + 1;
+            haystack = &haystack[greedy_end..];
+            for &c in &needle[1..] {
+                let idx = find_ascii_ignore_case(c, haystack)? + 1;
+                greedy_end += idx;
+                haystack = &haystack[idx..];
+            }
+            if only_greedy {
+                Some((start, greedy_end, greedy_end))
+            } else {
+                let end = greedy_end
+                    + find_ascii_ignore_case_rev(*needle.last().unwrap(), haystack)
+                        .map_or(0, |i| i + 1);
+                Some((start, greedy_end, end))
+            }
+        } else {
+            let start = memchr(needle[0], &haystack[..haystack.len() - needle.len() + 1])?;
+            let mut greedy_end = start + 1;
+            haystack = &haystack[greedy_end..];
+            for &c in &needle[1..] {
+                let idx = memchr(c, haystack)? + 1;
+                greedy_end += idx;
+                haystack = &haystack[idx..];
+            }
+            if only_greedy {
+                Some((start, greedy_end, greedy_end))
+            } else {
+                let end =
+                    greedy_end + memrchr(*needle.last().unwrap(), haystack).map_or(0, |i| i + 1);
+                Some((start, greedy_end, end))
+            }
+        }
+    }
+
+    pub(crate) fn prefilter_non_ascii(
+        &self,
+        haystack: &[char],
+        needle: Utf32Str<'_>,
+        only_greedy: bool,
+    ) -> Option<(usize, usize)> {
+        let needle_char = needle.get(0);
+        let start = haystack[..haystack.len() - needle.len() + 1]
+            .iter()
+            .position(|c| c.normalize(&self.config) == needle_char)?;
+        let needle_char = needle.last();
+        if only_greedy {
+            if haystack.len() - start < needle.len() {
+                return None;
+            }
+            Some((start, start + 1))
+        } else {
+            let end = haystack.len()
+                - haystack[start + 1..]
+                    .iter()
+                    .rev()
+                    .position(|c| c.normalize(&self.config) == needle_char)?;
+            if end - start < needle.len() {
+                return None;
+            }
+
+            Some((start, end))
+        }
+    }
+}
diff --git a/matcher/src/score.rs b/matcher/src/score.rs
new file mode 100644
index 00000000..c934a8ef
--- /dev/null
+++ b/matcher/src/score.rs
@@ -0,0 +1,158 @@
+use std::cmp::max;
+
+use crate::chars::{Char, CharClass};
+use crate::{Config, Matcher};
+
+pub(crate) const SCORE_MATCH: u16 = 16;
+pub(crate) const PENALTY_GAP_START: u16 = 3;
+pub(crate) const PENALTY_GAP_EXTENSION: u16 = 1;
+/// If the prefer_prefix option is enabled we want to penalize
+/// the initial gap. The prefix should not be too much  
+pub(crate) const PREFIX_BONUS_SCALE: u16 = 2;
+pub(crate) const MAX_PREFIX_BONUS: u16 = BONUS_BOUNDARY;
+
+// We prefer matches at the beginning of a word, but the bonus should not be
+// too great to prevent the longer acronym matches from always winning over
+// shorter fuzzy matches. The bonus point here was specifically chosen that
+// the bonus is cancelled when the gap between the acronyms grows over
+// 8 characters, which is approximately the average length of the words found
+// in web2 dictionary and my file system.
+pub(crate) const BONUS_BOUNDARY: u16 = SCORE_MATCH / 2;
+
+// Edge-triggered bonus for matches in camelCase words.
+// Their value should be BONUS_BOUNDARY - PENALTY_GAP_EXTENSION = 7.
+// However, this priporitzes camel case over non-camel case.
+// In fzf/skim this is not a problem since they score off the max
+// consecutive bonus. However, we don't do that (because its incorrect)
+// so to avoids prioritizing camel we use a lower bonus. I think that's fine
+// usually camel case is wekaer boundary than actual wourd boundaries anyway
+// This also has the nice sideeffect of perfectly balancing out
+// camel case, snake case and the consecutive version of the word
+pub(crate) const BONUS_CAMEL123: u16 = BONUS_BOUNDARY - PENALTY_GAP_START;
+
+/// Although bonus point for non-word characters is non-contextual, we need it
+/// for computing bonus points for consecutive chunks starting with a non-word
+/// character.
+pub(crate) const BONUS_NON_WORD: u16 = BONUS_BOUNDARY;
+
+// Minimum bonus point given to characters in consecutive chunks.
+// Note that bonus points for consecutive matches shouldn't have needed if we
+// used fixed match score as in the original algorithm.
+pub(crate) const BONUS_CONSECUTIVE: u16 = PENALTY_GAP_START + PENALTY_GAP_EXTENSION;
+
+// The first character in the typed pattern usually has more significance
+// than the rest so it's important that it appears at special positions where
+// bonus points are given, e.g. "to-go" vs. "ongoing" on "og" or on "ogo".
+// The amount of the extra bonus should be limited so that the gap penalty is
+// still respected.
+pub(crate) const BONUS_FIRST_CHAR_MULTIPLIER: u16 = 2;
+
+impl Config {
+    #[inline]
+    pub(crate) fn bonus_for(&self, prev_class: CharClass, class: CharClass) -> u16 {
+        if class > CharClass::Delimiter {
+            // transition from non word to word
+            match prev_class {
+                CharClass::Whitespace => return self.bonus_boundary_white,
+                CharClass::Delimiter => return self.bonus_boundary_delimiter,
+                CharClass::NonWord => return BONUS_BOUNDARY,
+                _ => (),
+            }
+        }
+        if prev_class == CharClass::Lower && class == CharClass::Upper
+            || prev_class != CharClass::Number && class == CharClass::Number
+        {
+            // camelCase letter123
+            BONUS_CAMEL123
+        } else if class == CharClass::Whitespace {
+            self.bonus_boundary_white
+        } else if class == CharClass::NonWord {
+            return BONUS_NON_WORD;
+        } else {
+            0
+        }
+    }
+}
+impl Matcher {
+    #[inline(always)]
+    pub(crate) fn bonus_for(&self, prev_class: CharClass, class: CharClass) -> u16 {
+        self.config.bonus_for(prev_class, class)
+    }
+
+    pub(crate) fn calculate_score<const INDICES: bool, H: Char + PartialEq<N>, N: Char>(
+        &mut self,
+        haystack: &[H],
+        needle: &[N],
+        start: usize,
+        end: usize,
+        indices: &mut Vec<u32>,
+    ) -> u16 {
+        if INDICES {
+            indices.reserve(needle.len());
+        }
+
+        let mut prev_class = start
+            .checked_sub(1)
+            .map(|i| haystack[i].char_class(&self.config))
+            .unwrap_or(self.config.initial_char_class);
+        let mut needle_iter = needle.iter();
+        let mut needle_char = *needle_iter.next().unwrap();
+
+        let mut in_gap = false;
+        let mut consecutive = 1;
+
+        // unrolled the first iteration to make applying the first char multiplier less awkward
+        if INDICES {
+            indices.push(start as u32)
+        }
+        let class = haystack[start].char_class(&self.config);
+        let mut first_bonus = self.bonus_for(prev_class, class);
+        let mut score = SCORE_MATCH + first_bonus * BONUS_FIRST_CHAR_MULTIPLIER;
+        prev_class = class;
+        needle_char = *needle_iter.next().unwrap_or(&needle_char);
+
+        for (i, c) in haystack[start + 1..end].iter().enumerate() {
+            let (c, class) = c.char_class_and_normalize(&self.config);
+            if c == needle_char {
+                if INDICES {
+                    indices.push(i as u32 + start as u32 + 1)
+                }
+                let mut bonus = self.bonus_for(prev_class, class);
+                if consecutive != 0 {
+                    if bonus >= BONUS_BOUNDARY && bonus > first_bonus {
+                        first_bonus = bonus
+                    }
+                    bonus = max(max(bonus, first_bonus), BONUS_CONSECUTIVE);
+                } else {
+                    first_bonus = bonus;
+                }
+                score += SCORE_MATCH + bonus;
+                in_gap = false;
+                consecutive += 1;
+                if let Some(&next) = needle_iter.next() {
+                    needle_char = next;
+                }
+            } else {
+                let penalty = if in_gap {
+                    PENALTY_GAP_EXTENSION
+                } else {
+                    PENALTY_GAP_START
+                };
+                score = score.saturating_sub(penalty);
+                in_gap = true;
+                consecutive = 0;
+            }
+            prev_class = class;
+        }
+        if self.config.prefer_prefix {
+            if start != 0 {
+                let penalty = PENALTY_GAP_START
+                    + PENALTY_GAP_START * (start - 1).min(u16::MAX as usize) as u16;
+                score += MAX_PREFIX_BONUS.saturating_sub(penalty / PREFIX_BONUS_SCALE);
+            } else {
+                score += MAX_PREFIX_BONUS;
+            }
+        }
+        score
+    }
+}
diff --git a/matcher/src/tests.rs b/matcher/src/tests.rs
new file mode 100644
index 00000000..32a02403
--- /dev/null
+++ b/matcher/src/tests.rs
@@ -0,0 +1,771 @@
+use crate::chars::Char;
+use crate::pattern::{CaseMatching, Normalization, Pattern};
+use crate::score::{
+    BONUS_BOUNDARY, BONUS_CAMEL123, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, BONUS_NON_WORD,
+    MAX_PREFIX_BONUS, PENALTY_GAP_EXTENSION, PENALTY_GAP_START, SCORE_MATCH,
+};
+use crate::utf32_str::Utf32Str;
+use crate::{Config, Matcher};
+
+use Algorithm::*;
+
+#[derive(Debug)]
+enum Algorithm {
+    FuzzyOptimal,
+    FuzzyGreedy,
+    Substring,
+    Prefix,
+    Postfix,
+    Exact,
+}
+
+fn assert_matches(
+    algorithm: &[Algorithm],
+    normalize: bool,
+    case_sensitive: bool,
+    path: bool,
+    prefer_prefix: bool,
+    cases: &[(&str, &str, &[u32], u16)],
+) {
+    let mut config = Config {
+        normalize,
+        ignore_case: !case_sensitive,
+        prefer_prefix,
+        ..Config::DEFAULT
+    };
+    if path {
+        config.set_match_paths();
+    }
+    let mut matcher = Matcher::new(config);
+    let mut matched_indices = Vec::new();
+    let mut needle_buf = Vec::new();
+    let mut haystack_buf = Vec::new();
+    for &(haystack, needle, indices, mut score) in cases {
+        let needle = if !case_sensitive {
+            needle.to_lowercase()
+        } else {
+            needle.to_owned()
+        };
+        let needle = Utf32Str::new(&needle, &mut needle_buf);
+        let haystack = Utf32Str::new(haystack, &mut haystack_buf);
+        score += needle.len() as u16 * SCORE_MATCH;
+        for algo in algorithm {
+            println!("xx {matched_indices:?} {algo:?}");
+            matched_indices.clear();
+            let res = match algo {
+                FuzzyOptimal => matcher.fuzzy_indices(haystack, needle, &mut matched_indices),
+                FuzzyGreedy => matcher.fuzzy_indices_greedy(haystack, needle, &mut matched_indices),
+                Substring => matcher.substring_indices(haystack, needle, &mut matched_indices),
+                Prefix => matcher.prefix_indices(haystack, needle, &mut matched_indices),
+                Postfix => matcher.postfix_indices(haystack, needle, &mut matched_indices),
+                Exact => matcher.exact_indices(haystack, needle, &mut matched_indices),
+            };
+            println!("{matched_indices:?}");
+            let match_chars: Vec<_> = matched_indices
+                .iter()
+                .map(|&i| haystack.get(i).normalize(&matcher.config))
+                .collect();
+            let needle_chars: Vec<_> = needle.chars().collect();
+
+            assert_eq!(
+                res,
+                Some(score),
+                "{needle:?} did  not match {haystack:?}: matched {match_chars:?} {matched_indices:?} {algo:?}"
+            );
+            assert_eq!(
+                matched_indices, indices,
+                "{needle:?} match {haystack:?} {algo:?}"
+            );
+            assert_eq!(
+                match_chars, needle_chars,
+                "{needle:?} match {haystack:?} indices are incorrect {matched_indices:?} {algo:?}"
+            );
+        }
+    }
+}
+
+fn assert_not_matches_with(
+    normalize: bool,
+    case_sensitive: bool,
+    algorithm: &[Algorithm],
+    cases: &[(&str, &str)],
+) {
+    let config = Config {
+        normalize,
+        ignore_case: !case_sensitive,
+        ..Config::DEFAULT
+    };
+    let mut matcher = Matcher::new(config);
+    let mut needle_buf = Vec::new();
+    let mut haystack_buf = Vec::new();
+    for &(haystack, needle) in cases {
+        let needle = if !case_sensitive {
+            needle.to_lowercase()
+        } else {
+            needle.to_owned()
+        };
+        let needle = Utf32Str::new(&needle, &mut needle_buf);
+        let haystack = Utf32Str::new(haystack, &mut haystack_buf);
+
+        for algo in algorithm {
+            let res = match algo {
+                FuzzyOptimal => matcher.fuzzy_match(haystack, needle),
+                FuzzyGreedy => matcher.fuzzy_match_greedy(haystack, needle),
+                Substring => matcher.substring_match(haystack, needle),
+                Prefix => matcher.prefix_match(haystack, needle),
+                Postfix => matcher.postfix_match(haystack, needle),
+                Exact => matcher.exact_match(haystack, needle),
+            };
+            assert_eq!(
+                res, None,
+                "{needle:?} should not match {haystack:?} {algo:?}"
+            );
+        }
+    }
+}
+
+pub fn assert_not_matches(normalize: bool, case_sensitive: bool, cases: &[(&str, &str)]) {
+    assert_not_matches_with(
+        normalize,
+        case_sensitive,
+        &[FuzzyOptimal, FuzzyGreedy, Substring, Prefix, Postfix, Exact],
+        cases,
+    )
+}
+
+const BONUS_BOUNDARY_WHITE: u16 = Config::DEFAULT.bonus_boundary_white;
+const BONUS_BOUNDARY_DELIMITER: u16 = Config::DEFAULT.bonus_boundary_delimiter;
+
+#[test]
+fn test_fuzzy() {
+    assert_matches(
+        &[FuzzyGreedy, FuzzyOptimal],
+        false,
+        false,
+        false,
+        false,
+        &[
+            (
+                "fooBarbaz1",
+                "obr",
+                &[2, 3, 5],
+                BONUS_CAMEL123 - PENALTY_GAP_START,
+            ),
+            (
+                "/usr/share/doc/at/ChangeLog",
+                "changelog",
+                &[18, 19, 20, 21, 22, 23, 24, 25, 26],
+                (BONUS_FIRST_CHAR_MULTIPLIER + 8) * BONUS_BOUNDARY_DELIMITER,
+            ),
+            (
+                "fooBarbaz1",
+                "br",
+                &[3, 5],
+                BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START,
+            ),
+            (
+                "foo bar baz",
+                "fbb",
+                &[0, 4, 8],
+                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 2
+                    - 2 * PENALTY_GAP_START
+                    - 4 * PENALTY_GAP_EXTENSION,
+            ),
+            (
+                "/AutomatorDocument.icns",
+                "rdoc",
+                &[9, 10, 11, 12],
+                BONUS_CAMEL123 + 2 * BONUS_CONSECUTIVE,
+            ),
+            (
+                "/man1/zshcompctl.1",
+                "zshc",
+                &[6, 7, 8, 9],
+                BONUS_BOUNDARY_DELIMITER * (BONUS_FIRST_CHAR_MULTIPLIER + 3),
+            ),
+            (
+                "/.oh-my-zsh/cache",
+                "zshc",
+                &[8, 9, 10, 12],
+                BONUS_BOUNDARY * (BONUS_FIRST_CHAR_MULTIPLIER + 2) - PENALTY_GAP_START
+                    + BONUS_BOUNDARY_DELIMITER,
+            ),
+            (
+                "ab0123 456",
+                "12356",
+                &[3, 4, 5, 8, 9],
+                BONUS_CONSECUTIVE * 3 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION,
+            ),
+            (
+                "abc123 456",
+                "12356",
+                &[3, 4, 5, 8, 9],
+                BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 2)
+                    - PENALTY_GAP_START
+                    - PENALTY_GAP_EXTENSION
+                    + BONUS_CONSECUTIVE,
+            ),
+            (
+                "foo/bar/baz",
+                "fbb",
+                &[0, 4, 8],
+                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2
+                    - 2 * PENALTY_GAP_START
+                    - 4 * PENALTY_GAP_EXTENSION,
+            ),
+            (
+                "fooBarBaz",
+                "fbb",
+                &[0, 3, 6],
+                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2
+                    - 2 * PENALTY_GAP_START
+                    - 2 * PENALTY_GAP_EXTENSION,
+            ),
+            (
+                "foo barbaz",
+                "fbb",
+                &[0, 4, 7],
+                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE
+                    - PENALTY_GAP_START * 2
+                    - PENALTY_GAP_EXTENSION * 3,
+            ),
+            (
+                "fooBar Baz",
+                "foob",
+                &[0, 1, 2, 3],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 3),
+            ),
+            (
+                "xFoo-Bar Baz",
+                "foo-b",
+                &[1, 2, 3, 4, 5],
+                BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 2) + 2 * BONUS_NON_WORD,
+            ),
+        ],
+    );
+}
+
+#[test]
+fn empty_needle() {
+    assert_matches(
+        &[Substring, Prefix, Postfix, FuzzyGreedy, FuzzyOptimal, Exact],
+        false,
+        false,
+        false,
+        false,
+        &[("foo bar baz", "", &[], 0)],
+    );
+}
+
+#[test]
+fn test_substring() {
+    assert_matches(
+        &[Substring, Prefix],
+        false,
+        false,
+        false,
+        false,
+        &[
+            (
+                "foo bar baz",
+                "foo",
+                &[0, 1, 2],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
+            ),
+            (
+                " foo bar baz",
+                "FOO",
+                &[1, 2, 3],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
+            ),
+            (
+                " foo bar baz",
+                " FOO",
+                &[0, 1, 2, 3],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 3),
+            ),
+        ],
+    );
+    assert_matches(
+        &[Substring, Postfix],
+        false,
+        false,
+        false,
+        false,
+        &[
+            (
+                "foo bar baz",
+                "baz",
+                &[8, 9, 10],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
+            ),
+            (
+                "foo bar baz ",
+                "baz",
+                &[8, 9, 10],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
+            ),
+            (
+                "foo bar baz ",
+                "baz ",
+                &[8, 9, 10, 11],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 3),
+            ),
+        ],
+    );
+    assert_matches(
+        &[Substring, Prefix, Postfix, Exact, FuzzyGreedy, FuzzyOptimal],
+        false,
+        false,
+        false,
+        false,
+        &[
+            (
+                "foo",
+                "foo",
+                &[0, 1, 2],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
+            ),
+            (
+                " foo",
+                "foo",
+                &[1, 2, 3],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
+            ),
+            (
+                " foo",
+                " foo",
+                &[0, 1, 2, 3],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 3),
+            ),
+        ],
+    );
+    assert_matches(
+        &[Substring],
+        false,
+        false,
+        false,
+        false,
+        &[
+            (
+                "fooBarbaz1",
+                "oba",
+                &[2, 3, 4],
+                BONUS_CAMEL123 + BONUS_CONSECUTIVE,
+            ),
+            (
+                "/AutomatorDocument.icns",
+                "rdoc",
+                &[9, 10, 11, 12],
+                BONUS_CAMEL123 + 2 * BONUS_CONSECUTIVE,
+            ),
+            (
+                "/man1/zshcompctl.1",
+                "zshc",
+                &[6, 7, 8, 9],
+                BONUS_BOUNDARY_DELIMITER * (BONUS_FIRST_CHAR_MULTIPLIER + 3),
+            ),
+            (
+                "/.oh-my-zsh/cache",
+                "zsh/c",
+                &[8, 9, 10, 11, 12],
+                BONUS_BOUNDARY * (BONUS_FIRST_CHAR_MULTIPLIER + 2)
+                    + BONUS_NON_WORD
+                    + BONUS_BOUNDARY_DELIMITER,
+            ),
+        ],
+    );
+    assert_not_matches_with(
+        true,
+        false,
+        &[Prefix, Substring, Postfix, Exact],
+        &[(
+            "At the Road’s End - Seeming - SOL: A Self-Banishment Ritual",
+            "adi",
+        )],
+    )
+}
+
+#[test]
+fn test_substring_case_sensitive() {
+    assert_matches(
+        &[Substring, Prefix],
+        false,
+        true,
+        false,
+        false,
+        &[
+            (
+                "Foo bar baz",
+                "Foo",
+                &[0, 1, 2],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
+            ),
+            (
+                "Fȫô bar baz",
+                "Fȫô",
+                &[0, 1, 2],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
+            ),
+            (
+                "Foo ฿ar baz",
+                "Foo",
+                &[0, 1, 2],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
+            ),
+        ],
+    );
+    assert_not_matches_with(false, true, &[Substring, Prefix], &[("foo bar baz", "Foo")]);
+}
+
+#[test]
+fn test_fuzzy_case_sensitive() {
+    assert_matches(
+        &[FuzzyGreedy, FuzzyOptimal],
+        false,
+        true,
+        false,
+        false,
+        &[
+            (
+                "fooBarbaz1",
+                "oBr",
+                &[2, 3, 5],
+                BONUS_CAMEL123 - PENALTY_GAP_START,
+            ),
+            (
+                "Foo/Bar/Baz",
+                "FBB",
+                &[0, 4, 8],
+                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2
+                    - 2 * PENALTY_GAP_START
+                    - 4 * PENALTY_GAP_EXTENSION,
+            ),
+            (
+                "FooBarBaz",
+                "FBB",
+                &[0, 3, 6],
+                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2
+                    - 2 * PENALTY_GAP_START
+                    - 2 * PENALTY_GAP_EXTENSION,
+            ),
+            (
+                "FooBar Baz",
+                "FooB",
+                &[0, 1, 2, 3],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 3),
+            ),
+            ("foo-bar", "o-ba", &[2, 3, 4, 5], BONUS_NON_WORD * 3),
+        ],
+    );
+}
+
+#[test]
+fn test_normalize() {
+    assert_matches(
+        &[FuzzyGreedy, FuzzyOptimal],
+        true,
+        false,
+        false,
+        false,
+        &[
+            (
+                "Só Danço Samba",
+                "So",
+                &[0, 1],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 1),
+            ),
+            (
+                "Só Danço Samba",
+                "sodc",
+                &[0, 1, 3, 6],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 1) - PENALTY_GAP_START
+                    + BONUS_BOUNDARY_WHITE
+                    - PENALTY_GAP_START
+                    - PENALTY_GAP_EXTENSION,
+            ),
+            (
+                "Danço",
+                "danco",
+                &[0, 1, 2, 3, 4],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
+            ),
+            (
+                "DanÇo",
+                "danco",
+                &[0, 1, 2, 3, 4],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
+            ),
+            (
+                "xÇando",
+                "cando",
+                &[1, 2, 3, 4, 5],
+                BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
+            ),
+            ("ۂ(GCGɴCG", "n", &[5], 0),
+        ],
+    )
+}
+
+#[test]
+fn test_unicode() {
+    assert_matches(
+        &[FuzzyGreedy, FuzzyOptimal, Substring],
+        true,
+        false,
+        false,
+        false,
+        &[
+            (
+                "你好世界",
+                "你好",
+                &[0, 1],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 1),
+            ),
+            (
+                " 你好世界",
+                "你好",
+                &[1, 2],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 1),
+            ),
+        ],
+    );
+    assert_matches(
+        &[FuzzyGreedy, FuzzyOptimal],
+        true,
+        false,
+        false,
+        false,
+        &[(
+            "你好世界",
+            "你世",
+            &[0, 2],
+            BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START,
+        )],
+    );
+    assert_not_matches(
+        false,
+        false,
+        &[("Flibbertigibbet / イタズラっ子たち", "lying")],
+    );
+}
+
+#[test]
+fn test_long_str() {
+    assert_matches(
+        &[FuzzyGreedy, FuzzyOptimal],
+        false,
+        false,
+        false,
+        false,
+        &[(
+            &"x".repeat(u16::MAX as usize + 1),
+            "xx",
+            &[0, 1],
+            BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 1),
+        )],
+    );
+}
+
+#[test]
+fn test_casing() {
+    assert_matches(
+        &[FuzzyGreedy, FuzzyOptimal],
+        false,
+        false,
+        false,
+        false,
+        &[
+            // these two have the same score
+            (
+                "fooBar",
+                "foobar",
+                &[0, 1, 2, 3, 4, 5],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 5),
+            ),
+            (
+                "foobar",
+                "foobar",
+                &[0, 1, 2, 3, 4, 5],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 5),
+            ),
+            // these two have the same score (slightly lower than the other two: 60 instead of 70)
+            (
+                "foo-bar",
+                "foobar",
+                &[0, 1, 2, 4, 5, 6],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2) - PENALTY_GAP_START
+                    + BONUS_BOUNDARY * 3,
+            ),
+            (
+                "foo_bar",
+                "foobar",
+                &[0, 1, 2, 4, 5, 6],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2) - PENALTY_GAP_START
+                    + BONUS_BOUNDARY * 3,
+            ),
+        ],
+    )
+}
+
+#[test]
+fn test_optimal() {
+    assert_matches(
+        &[FuzzyOptimal],
+        false,
+        false,
+        false,
+        false,
+        &[
+            (
+                "axxx xx ",
+                "xx",
+                &[5, 6],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 1),
+            ),
+            (
+                "SS!H",
+                "S!",
+                &[0, 2],
+                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START
+                    + BONUS_NON_WORD,
+            ),
+            // this case is a cool example of why our algorithm is more than fzf
+            // we handle this corretly detect that it's better to match
+            // the second f instead of the third yielding a higher score
+            // (despite using the same scoring function!)
+            (
+                "xf.foo",
+                "xfoo",
+                &[0, 3, 4, 5],
+                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER
+                    - PENALTY_GAP_START
+                    - PENALTY_GAP_EXTENSION
+                    + BONUS_BOUNDARY * 3,
+            ),
+            (
+                "xf fo",
+                "xfo",
+                &[0, 3, 4],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2)
+                    - PENALTY_GAP_START
+                    - PENALTY_GAP_EXTENSION,
+            ),
+        ],
+    );
+}
+
+#[test]
+fn test_reject() {
+    assert_not_matches(
+        true,
+        false,
+        &[
+            ("你好界", "abc"),
+            ("你好界", "a"),
+            ("你好世界", "富"),
+            ("Só Danço Samba", "sox"),
+            ("fooBarbaz", "fooBarbazz"),
+            ("fooBarbaz", "c"),
+        ],
+    );
+    assert_not_matches(
+        true,
+        true,
+        &[
+            ("你好界", "abc"),
+            ("abc", "你"),
+            ("abc", "A"),
+            ("abc", "d"),
+            ("你好世界", "富"),
+            ("Só Danço Samba", "sox"),
+            ("fooBarbaz", "oBZ"),
+            ("Foo Bar Baz", "fbb"),
+            ("fooBarbaz", "fooBarbazz"),
+        ],
+    );
+    assert_not_matches(
+        false,
+        true,
+        &[
+            ("Só Danço Samba", "sod"),
+            ("Só Danço Samba", "soc"),
+            ("Só Danç", "So"),
+        ],
+    );
+    assert_not_matches(false, false, &[("ۂۂfoۂۂ", "foo")]);
+}
+
+#[test]
+fn test_prefer_prefix() {
+    assert_matches(
+        &[FuzzyOptimal, FuzzyGreedy],
+        false,
+        false,
+        false,
+        true,
+        &[
+            (
+                "Moby Dick",
+                "md",
+                &[0, 5],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 1)  + MAX_PREFIX_BONUS
+                    - PENALTY_GAP_START
+                    - 3 * PENALTY_GAP_EXTENSION,
+            ),
+            (
+                "Though I cannot tell why it was exactly that those stage managers, the Fates, put me down for this shabby part of a whaling voyage",
+                "md",
+                &[82, 85],
+                BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 1)
+                    - PENALTY_GAP_START
+                    - PENALTY_GAP_EXTENSION,
+            ),
+        ],
+    );
+}
+
+#[test]
+fn test_single_char_needle() {
+    assert_matches(
+        &[FuzzyOptimal],
+        false,
+        false,
+        false,
+        false,
+        &[(
+            "foO",
+            "o",
+            &[2],
+            BONUS_FIRST_CHAR_MULTIPLIER * BONUS_CAMEL123,
+        )],
+    );
+    assert_matches(
+        &[FuzzyOptimal],
+        false,
+        false,
+        false,
+        false,
+        &[(
+            "föÖ",
+            "ö",
+            &[2],
+            BONUS_FIRST_CHAR_MULTIPLIER * BONUS_CAMEL123,
+        )],
+    );
+}
+
+#[test]
+fn umlaut() {
+    let paths = ["be", "bë"];
+    let mut matcher = Matcher::new(Config::DEFAULT);
+    let matches = Pattern::parse("ë", CaseMatching::Ignore, Normalization::Smart)
+        .match_list(paths, &mut matcher);
+    assert_eq!(matches.len(), 1);
+    let matches = Pattern::parse("e", CaseMatching::Ignore, Normalization::Never)
+        .match_list(paths, &mut matcher);
+    assert_eq!(matches.len(), 1);
+    let matches = Pattern::parse("e", CaseMatching::Ignore, Normalization::Smart)
+        .match_list(paths, &mut matcher);
+    assert_eq!(matches.len(), 2);
+}
diff --git a/matcher/src/utf32_str.rs b/matcher/src/utf32_str.rs
new file mode 100644
index 00000000..664dae7a
--- /dev/null
+++ b/matcher/src/utf32_str.rs
@@ -0,0 +1,428 @@
+#[cfg(test)]
+mod tests;
+
+use std::borrow::Cow;
+use std::ops::{Bound, RangeBounds};
+use std::{fmt, slice};
+
+use memchr::memmem;
+
+use crate::chars;
+
+/// Check if a given string can be represented internally as the `Ascii` variant in a
+/// [`Utf32String`] or a [`Utf32Str`].
+///
+/// This returns true if the string is ASCII and does not contain a windows-style newline
+/// `'\r'`.
+/// The additional carriage return check is required since even for strings consisting only
+/// of ASCII, the windows-style newline `\r\n` is treated as a single grapheme.
+#[inline]
+fn has_ascii_graphemes(string: &str) -> bool {
+    string.is_ascii() && memmem::find(string.as_bytes(), b"\r\n").is_none()
+}
+
+/// A UTF-32 encoded (char array) string that is used as an input to (fuzzy) matching.
+///
+/// This is mostly intended as an internal string type, but some methods are exposed for
+/// convenience. We make the following API guarantees for `Utf32Str(ing)`s produced from a string
+/// using one of its `From<T>` constructors for string types `T` or from the
+/// [`Utf32Str::new`] method.
+///
+/// 1. The `Ascii` variant contains a byte buffer which is guaranteed to be a valid string
+///    slice.
+/// 2. It is guaranteed that the string slice internal to the `Ascii` variant is identical
+///    to the original string.
+/// 3. The length of a `Utf32Str(ing)` is exactly the number of graphemes in the original string.
+///
+/// Since `Utf32Str(ing)`s variants may be constructed directly, you **must not** make these
+/// assumptions when handling `Utf32Str(ing)`s of unknown origin.
+///
+/// ## Caveats
+/// Despite the name, this type is quite far from being a true string type. Here are some
+/// examples demonstrating this.
+///
+/// ### String conversions are not round-trip
+/// In the presence of a multi-codepoint grapheme (e.g. `"u\u{0308}"` which is `u +
+/// COMBINING_DIAERESIS`), the trailing codepoints are truncated.
+/// ```
+/// # use nucleo_matcher::Utf32String;
+/// assert_eq!(Utf32String::from("u\u{0308}").to_string(), "u");
+/// ```
+///
+/// ### Indexing is done by grapheme
+/// Indexing into a string is done by grapheme rather than by codepoint.
+/// ```
+/// # use nucleo_matcher::Utf32String;
+/// assert!(Utf32String::from("au\u{0308}").len() == 2);
+/// ```
+///
+/// ### A `Unicode` variant may be produced by all-ASCII characters.
+/// Since the windows-style newline `\r\n` is ASCII only but considered to be a single grapheme,
+/// strings containing `\r\n` will still result in a `Unicode` variant.
+/// ```
+/// # use nucleo_matcher::Utf32String;
+/// let s = Utf32String::from("\r\n");
+/// assert!(!s.slice(..).is_ascii());
+/// assert!(s.len() == 1);
+/// assert!(s.slice(..).get(0) == '\n');
+/// ```
+///
+/// ## Design rationale
+/// Usually Rust's UTF-8 encoded strings are great. However, since fuzzy matching
+/// operates on codepoints (ideally, it should operate on graphemes but that's too
+/// much hassle to deal with), we want to quickly iterate over codepoints (up to 5
+/// times) during matching.
+///
+/// Doing codepoint segmentation on the fly not only blows trough the cache
+/// (lookup tables and I-cache) but also has nontrivial runtime compared to the
+/// matching itself. Furthermore there are many extra optimizations available
+/// for ASCII only text, but checking each match has too much overhead.
+///
+/// Of course, this comes at extra memory cost as we usually still need the UTF-8
+/// encoded variant for rendering. In the (dominant) case of ASCII-only text
+/// we don't require a copy. Furthermore fuzzy matching usually is applied while
+/// the user is typing on the fly so the same item is potentially matched many
+/// times (making the the up-front cost more worth it). That means that its
+/// basically always worth it to pre-segment the string.
+///
+/// For usecases that only match (a lot of) strings once its possible to keep
+/// char buffer around that is filled with the presegmented chars.
+///
+/// Another advantage of this approach is that the matcher will naturally
+/// produce grapheme indices (instead of utf8 offsets) anyway. With a
+/// codepoint basic representation like this the indices can be used
+/// directly
+#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
+pub enum Utf32Str<'a> {
+    /// A string represented as ASCII encoded bytes.
+    /// Correctness invariant: must only contain valid ASCII (`<= 127`)
+    Ascii(&'a [u8]),
+    /// A string represented as an array of unicode codepoints (basically UTF-32).
+    Unicode(&'a [char]),
+}
+
+impl<'a> Utf32Str<'a> {
+    /// Convenience method to construct a `Utf32Str` from a normal UTF-8 str
+    pub fn new(str: &'a str, buf: &'a mut Vec<char>) -> Self {
+        if has_ascii_graphemes(str) {
+            Utf32Str::Ascii(str.as_bytes())
+        } else {
+            buf.clear();
+            buf.extend(crate::chars::graphemes(str));
+            Utf32Str::Unicode(buf)
+        }
+    }
+
+    /// Returns the number of characters in this string.
+    #[inline]
+    pub fn len(self) -> usize {
+        match self {
+            Utf32Str::Unicode(codepoints) => codepoints.len(),
+            Utf32Str::Ascii(ascii_bytes) => ascii_bytes.len(),
+        }
+    }
+
+    /// Returns whether this string is empty.
+    #[inline]
+    pub fn is_empty(self) -> bool {
+        match self {
+            Utf32Str::Unicode(codepoints) => codepoints.is_empty(),
+            Utf32Str::Ascii(ascii_bytes) => ascii_bytes.is_empty(),
+        }
+    }
+
+    /// Creates a slice with a string that contains the characters in
+    /// the specified **character range**.
+    #[inline]
+    pub fn slice(self, range: impl RangeBounds<usize>) -> Utf32Str<'a> {
+        let start = match range.start_bound() {
+            Bound::Included(&start) => start,
+            Bound::Excluded(&start) => start + 1,
+            Bound::Unbounded => 0,
+        };
+        let end = match range.end_bound() {
+            Bound::Included(&end) => end + 1,
+            Bound::Excluded(&end) => end,
+            Bound::Unbounded => self.len(),
+        };
+        match self {
+            Utf32Str::Ascii(bytes) => Utf32Str::Ascii(&bytes[start..end]),
+            Utf32Str::Unicode(codepoints) => Utf32Str::Unicode(&codepoints[start..end]),
+        }
+    }
+
+    /// Returns the number of leading whitespaces in this string
+    #[inline]
+    pub(crate) fn leading_white_space(self) -> usize {
+        match self {
+            Utf32Str::Ascii(bytes) => bytes
+                .iter()
+                .position(|b| !b.is_ascii_whitespace())
+                .unwrap_or(0),
+            Utf32Str::Unicode(codepoints) => codepoints
+                .iter()
+                .position(|c| !c.is_whitespace())
+                .unwrap_or(0),
+        }
+    }
+
+    /// Returns the number of trailing whitespaces in this string
+    #[inline]
+    pub(crate) fn trailing_white_space(self) -> usize {
+        match self {
+            Utf32Str::Ascii(bytes) => bytes
+                .iter()
+                .rev()
+                .position(|b| !b.is_ascii_whitespace())
+                .unwrap_or(0),
+            Utf32Str::Unicode(codepoints) => codepoints
+                .iter()
+                .rev()
+                .position(|c| !c.is_whitespace())
+                .unwrap_or(0),
+        }
+    }
+
+    /// Same as `slice` but accepts a u32 range for convenience since
+    /// those are the indices returned by the matcher.
+    #[inline]
+    pub fn slice_u32(self, range: impl RangeBounds<u32>) -> Utf32Str<'a> {
+        let start = match range.start_bound() {
+            Bound::Included(&start) => start as usize,
+            Bound::Excluded(&start) => start as usize + 1,
+            Bound::Unbounded => 0,
+        };
+        let end = match range.end_bound() {
+            Bound::Included(&end) => end as usize + 1,
+            Bound::Excluded(&end) => end as usize,
+            Bound::Unbounded => self.len(),
+        };
+        match self {
+            Utf32Str::Ascii(bytes) => Utf32Str::Ascii(&bytes[start..end]),
+            Utf32Str::Unicode(codepoints) => Utf32Str::Unicode(&codepoints[start..end]),
+        }
+    }
+
+    /// Returns whether this string only contains graphemes which are single ASCII chars.
+    ///
+    /// This is almost equivalent to the string being ASCII, except with the additional requirement
+    /// that the string cannot contain a windows-style newline `\r\n` which is treated as a single
+    /// grapheme.
+    pub fn is_ascii(self) -> bool {
+        matches!(self, Utf32Str::Ascii(_))
+    }
+
+    /// Returns the `n`th character in this string, zero-indexed
+    pub fn get(self, n: u32) -> char {
+        match self {
+            Utf32Str::Ascii(bytes) => bytes[n as usize] as char,
+            Utf32Str::Unicode(codepoints) => codepoints[n as usize],
+        }
+    }
+
+    /// Returns the last character in this string.
+    ///
+    /// Panics if the string is empty.
+    pub(crate) fn last(self) -> char {
+        match self {
+            Utf32Str::Ascii(bytes) => bytes[bytes.len() - 1] as char,
+            Utf32Str::Unicode(codepoints) => codepoints[codepoints.len() - 1],
+        }
+    }
+
+    /// Returns the first character in this string.
+    ///
+    /// Panics if the string is empty.
+    pub(crate) fn first(self) -> char {
+        match self {
+            Utf32Str::Ascii(bytes) => bytes[0] as char,
+            Utf32Str::Unicode(codepoints) => codepoints[0],
+        }
+    }
+
+    /// Returns an iterator over the characters in this string
+    pub fn chars(self) -> Chars<'a> {
+        match self {
+            Utf32Str::Ascii(bytes) => Chars::Ascii(bytes.iter()),
+            Utf32Str::Unicode(codepoints) => Chars::Unicode(codepoints.iter()),
+        }
+    }
+}
+
+impl fmt::Debug for Utf32Str<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "\"")?;
+        for c in self.chars() {
+            for c in c.escape_debug() {
+                write!(f, "{c}")?
+            }
+        }
+        write!(f, "\"")
+    }
+}
+
+impl fmt::Display for Utf32Str<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        for c in self.chars() {
+            write!(f, "{c}")?
+        }
+        Ok(())
+    }
+}
+
+pub enum Chars<'a> {
+    Ascii(slice::Iter<'a, u8>),
+    Unicode(slice::Iter<'a, char>),
+}
+
+impl Iterator for Chars<'_> {
+    type Item = char;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self {
+            Chars::Ascii(iter) => iter.next().map(|&c| c as char),
+            Chars::Unicode(iter) => iter.next().copied(),
+        }
+    }
+}
+
+impl DoubleEndedIterator for Chars<'_> {
+    fn next_back(&mut self) -> Option<Self::Item> {
+        match self {
+            Chars::Ascii(iter) => iter.next_back().map(|&c| c as char),
+            Chars::Unicode(iter) => iter.next_back().copied(),
+        }
+    }
+}
+
+#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
+/// An owned version of [`Utf32Str`].
+///
+/// See the API documentation for [`Utf32Str`] for more detail.
+pub enum Utf32String {
+    /// A string represented as ASCII encoded bytes.
+    /// Correctness invariant: must only contain valid ASCII (<=127)
+    Ascii(Box<str>),
+    /// A string represented as an array of unicode codepoints (basically UTF-32).
+    Unicode(Box<[char]>),
+}
+
+impl Default for Utf32String {
+    fn default() -> Self {
+        Self::Ascii(String::new().into_boxed_str())
+    }
+}
+
+impl Utf32String {
+    /// Returns the number of characters in this string.
+    #[inline]
+    pub fn len(&self) -> usize {
+        match self {
+            Utf32String::Unicode(codepoints) => codepoints.len(),
+            Utf32String::Ascii(ascii_bytes) => ascii_bytes.len(),
+        }
+    }
+
+    /// Returns whether this string is empty.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        match self {
+            Utf32String::Unicode(codepoints) => codepoints.is_empty(),
+            Utf32String::Ascii(ascii_bytes) => ascii_bytes.is_empty(),
+        }
+    }
+
+    /// Creates a slice with a string that contains the characters in
+    /// the specified **character range**.
+    #[inline]
+    pub fn slice(&self, range: impl RangeBounds<usize>) -> Utf32Str {
+        let start = match range.start_bound() {
+            Bound::Included(&start) => start,
+            Bound::Excluded(&start) => start + 1,
+            Bound::Unbounded => 0,
+        };
+        let end = match range.end_bound() {
+            Bound::Included(&end) => end + 1,
+            Bound::Excluded(&end) => end,
+            Bound::Unbounded => self.len(),
+        };
+        match self {
+            Utf32String::Ascii(bytes) => Utf32Str::Ascii(&bytes.as_bytes()[start..end]),
+            Utf32String::Unicode(codepoints) => Utf32Str::Unicode(&codepoints[start..end]),
+        }
+    }
+
+    /// Same as `slice` but accepts a u32 range for convenience since
+    /// those are the indices returned by the matcher.
+    #[inline]
+    pub fn slice_u32(&self, range: impl RangeBounds<u32>) -> Utf32Str {
+        let start = match range.start_bound() {
+            Bound::Included(&start) => start,
+            Bound::Excluded(&start) => start + 1,
+            Bound::Unbounded => 0,
+        };
+        let end = match range.end_bound() {
+            Bound::Included(&end) => end + 1,
+            Bound::Excluded(&end) => end,
+            Bound::Unbounded => self.len() as u32,
+        };
+        match self {
+            Utf32String::Ascii(bytes) => {
+                Utf32Str::Ascii(&bytes.as_bytes()[start as usize..end as usize])
+            }
+            Utf32String::Unicode(codepoints) => {
+                Utf32Str::Unicode(&codepoints[start as usize..end as usize])
+            }
+        }
+    }
+}
+
+impl From<&str> for Utf32String {
+    #[inline]
+    fn from(value: &str) -> Self {
+        if has_ascii_graphemes(value) {
+            Self::Ascii(value.to_owned().into_boxed_str())
+        } else {
+            Self::Unicode(chars::graphemes(value).collect())
+        }
+    }
+}
+
+impl From<Box<str>> for Utf32String {
+    fn from(value: Box<str>) -> Self {
+        if has_ascii_graphemes(&value) {
+            Self::Ascii(value)
+        } else {
+            Self::Unicode(chars::graphemes(&value).collect())
+        }
+    }
+}
+
+impl From<String> for Utf32String {
+    #[inline]
+    fn from(value: String) -> Self {
+        value.into_boxed_str().into()
+    }
+}
+
+impl<'a> From<Cow<'a, str>> for Utf32String {
+    #[inline]
+    fn from(value: Cow<'a, str>) -> Self {
+        match value {
+            Cow::Borrowed(value) => value.into(),
+            Cow::Owned(value) => value.into(),
+        }
+    }
+}
+
+impl fmt::Debug for Utf32String {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{:?}", self.slice(..))
+    }
+}
+
+impl fmt::Display for Utf32String {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.slice(..))
+    }
+}
diff --git a/matcher/src/utf32_str/tests.rs b/matcher/src/utf32_str/tests.rs
new file mode 100644
index 00000000..a38c8875
--- /dev/null
+++ b/matcher/src/utf32_str/tests.rs
@@ -0,0 +1,44 @@
+use crate::{Utf32Str, Utf32String};
+
+#[test]
+fn test_utf32str_ascii() {
+    /// Helper function for testing
+    fn expect_ascii(src: &str, is_ascii: bool) {
+        let mut buffer = Vec::new();
+        assert!(Utf32Str::new(src, &mut buffer).is_ascii() == is_ascii);
+        assert!(Utf32String::from(src).slice(..).is_ascii() == is_ascii);
+        assert!(Utf32String::from(src.to_owned()).slice(..).is_ascii() == is_ascii);
+    }
+
+    // ascii
+    expect_ascii("", true);
+    expect_ascii("a", true);
+    expect_ascii("a\nb", true);
+    expect_ascii("\n\r", true);
+
+    // not ascii
+    expect_ascii("aü", false);
+    expect_ascii("au\u{0308}", false);
+
+    // windows-style newline
+    expect_ascii("a\r\nb", false);
+    expect_ascii("ü\r\n", false);
+    expect_ascii("\r\n", false);
+}
+
+#[test]
+fn test_grapheme_truncation() {
+    // ascii is preserved
+    let s = Utf32String::from("ab");
+    assert_eq!(s.slice(..).get(0), 'a');
+    assert_eq!(s.slice(..).get(1), 'b');
+
+    // windows-style newline is truncated to '\n'
+    let s = Utf32String::from("\r\n");
+    assert_eq!(s.slice(..).get(0), '\n');
+
+    // normal graphemes are truncated to the first character
+    let s = Utf32String::from("u\u{0308}\r\n");
+    assert_eq!(s.slice(..).get(0), 'u');
+    assert_eq!(s.slice(..).get(1), '\n');
+}
author	Ellie Huxtable <ellie@elliehuxtable.com>	2026-03-16 15:22:49 -0700
committer	Ellie Huxtable <ellie@elliehuxtable.com>	2026-03-16 15:22:49 -0700
commit	8f9777ce7aecfe1a163a915e3245466b9dd9ac2e (patch)
tree	a878a0495e5f84266b7e36918a9e1a9432b0ddd8 /matcher/src
download	atuin-8f9777ce7aecfe1a163a915e3245466b9dd9ac2e.zip