//! Utilities for working with (unicode) characters/codepoints

use std::fmt::{self, Debug, Display};

#[cfg(feature = "unicode-casefold")]
use crate::chars::case_fold::CASE_FOLDING_SIMPLE;
use crate::Config;

//autogenerated by generate-ucd
#[expect(warnings)]
#[rustfmt::skip]
#[cfg(feature = "unicode-casefold")]
mod case_fold;
#[cfg(feature = "unicode-normalization")]
mod normalize;

pub(crate) trait Char: Copy + Eq + Ord + fmt::Display {
    const ASCII: bool;
    fn char_class(self, config: &Config) -> CharClass;
    fn char_class_and_normalize(self, config: &Config) -> (Self, CharClass);
    fn normalize(self, config: &Config) -> Self;
}

/// repr tansparent wrapper around u8 with better formatting and `PartialEq<char>` implementation
#[repr(transparent)]
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
pub(crate) struct AsciiChar(pub u8);

impl AsciiChar {
    pub fn cast(bytes: &[u8]) -> &[AsciiChar] {
        unsafe { &*(bytes as *const [u8] as *const [AsciiChar]) }
    }
}

impl fmt::Display for AsciiChar {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        Display::fmt(&(self.0 as char), f)
    }
}

impl PartialEq<AsciiChar> for char {
    fn eq(&self, other: &AsciiChar) -> bool {
        other.0 as char == *self
    }
}

impl Char for AsciiChar {
    const ASCII: bool = true;
    #[inline]
    fn char_class(self, config: &Config) -> CharClass {
        let c = self.0;
        // using manual if conditions instead optimizes better
        if c >= b'a' && c <= b'z' {
            CharClass::Lower
        } else if c >= b'A' && c <= b'Z' {
            CharClass::Upper
        } else if c >= b'0' && c <= b'9' {
            CharClass::Number
        } else if c.is_ascii_whitespace() {
            CharClass::Whitespace
        } else if config.delimiter_chars.contains(&c) {
            CharClass::Delimiter
        } else {
            CharClass::NonWord
        }
    }

    #[inline(always)]
    fn char_class_and_normalize(mut self, config: &Config) -> (Self, CharClass) {
        let char_class = self.char_class(config);
        if config.ignore_case && char_class == CharClass::Upper {
            self.0 += 32
        }
        (self, char_class)
    }

    #[inline(always)]
    fn normalize(mut self, config: &Config) -> Self {
        if config.ignore_case && self.0 >= b'A' && self.0 <= b'Z' {
            self.0 += 32
        }
        self
    }
}
fn char_class_non_ascii(c: char) -> CharClass {
    if c.is_lowercase() {
        CharClass::Lower
    } else if is_upper_case(c) {
        CharClass::Upper
    } else if c.is_numeric() {
        CharClass::Number
    } else if c.is_alphabetic() {
        CharClass::Letter
    } else if c.is_whitespace() {
        CharClass::Whitespace
    } else {
        CharClass::NonWord
    }
}
impl Char for char {
    const ASCII: bool = false;
    #[inline(always)]
    fn char_class(self, config: &Config) -> CharClass {
        if self.is_ascii() {
            return AsciiChar(self as u8).char_class(config);
        }
        char_class_non_ascii(self)
    }

    #[inline(always)]
    fn char_class_and_normalize(mut self, config: &Config) -> (Self, CharClass) {
        if self.is_ascii() {
            let (c, class) = AsciiChar(self as u8).char_class_and_normalize(config);
            return (c.0 as char, class);
        }
        let char_class = char_class_non_ascii(self);
        #[cfg(feature = "unicode-casefold")]
        let mut case_fold = char_class == CharClass::Upper;
        #[cfg(feature = "unicode-normalization")]
        if config.normalize {
            self = normalize::normalize(self);
            case_fold = true
        }
        #[cfg(feature = "unicode-casefold")]
        if case_fold && config.ignore_case {
            self = CASE_FOLDING_SIMPLE
                .binary_search_by_key(&self, |(upper, _)| *upper)
                .map_or(self, |idx| CASE_FOLDING_SIMPLE[idx].1)
        }
        (self, char_class)
    }

    #[inline(always)]
    fn normalize(mut self, config: &Config) -> Self {
        #[cfg(feature = "unicode-normalization")]
        if config.normalize {
            self = normalize::normalize(self);
        }
        #[cfg(feature = "unicode-casefold")]
        if config.ignore_case {
            self = to_lower_case(self)
        }
        self
    }
}

#[cfg(feature = "unicode-normalization")]
pub use normalize::normalize;
#[cfg(feature = "unicode-segmentation")]
use unicode_segmentation::UnicodeSegmentation;

/// Converts a character to lower case using simple unicode case folding
#[cfg(feature = "unicode-casefold")]
#[inline(always)]
pub fn to_lower_case(c: char) -> char {
    CASE_FOLDING_SIMPLE
        .binary_search_by_key(&c, |(upper, _)| *upper)
        .map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
}

/// Checks if a character is upper case according to simple unicode case folding.
/// if the `unicode-casefold` feature is disable the equivalent std function is used
#[inline(always)]
pub fn is_upper_case(c: char) -> bool {
    #[cfg(feature = "unicode-casefold")]
    let val = CASE_FOLDING_SIMPLE
        .binary_search_by_key(&c, |(upper, _)| *upper)
        .is_ok();
    #[cfg(not(feature = "unicode-casefold"))]
    let val = c.is_uppercase();
    val
}

#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
pub(crate) enum CharClass {
    Whitespace,
    NonWord,
    Delimiter,
    Lower,
    Upper,
    Letter,
    Number,
}

/// Nucleo cannot match graphemes as single units. To work around
/// that we only use the first codepoint of each grapheme. This
/// iterator returns the first character of each unicode grapheme
/// in a string and is used for constructing `Utf32Str(ing)`.
pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
    #[cfg(feature = "unicode-segmentation")]
    let res = text.graphemes(true).map(|grapheme| {
        // we need to special-case this check since `\r\n` is a single grapheme and is
        // therefore the exception to the rule that normalization of a grapheme should
        // map to the first character.
        if grapheme == "\r\n" {
            '\n'
        } else {
            grapheme
                .chars()
                .next()
                .expect("graphemes must be non-empty")
        }
    });
    #[cfg(not(feature = "unicode-segmentation"))]
    let res = text.chars();
    res
}