diff options
| author | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2026-06-11 00:54:30 +0200 |
|---|---|---|
| committer | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2026-06-11 00:54:30 +0200 |
| commit | 5c39e7cf284a1f6e9a1657f2deb44e359fc47eb8 (patch) | |
| tree | c64baa8d5866c8e339eaf660dd3f94f30a3f7d8a /crates/atuin-history/src | |
| parent | chore: Somewhat simplify sync code (diff) | |
| download | atuin-5c39e7cf284a1f6e9a1657f2deb44e359fc47eb8.zip | |
chore: Move everything into one big crate
That helps remove duplicated code and rustc/cargo will now also show
dead code correctly.
Diffstat (limited to 'crates/atuin-history/src')
| -rw-r--r-- | crates/atuin-history/src/lib.rs | 2 | ||||
| -rw-r--r-- | crates/atuin-history/src/sort.rs | 46 | ||||
| -rw-r--r-- | crates/atuin-history/src/stats.rs | 548 |
3 files changed, 0 insertions, 596 deletions
diff --git a/crates/atuin-history/src/lib.rs b/crates/atuin-history/src/lib.rs deleted file mode 100644 index e7b33916..00000000 --- a/crates/atuin-history/src/lib.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod sort; -pub mod stats; diff --git a/crates/atuin-history/src/sort.rs b/crates/atuin-history/src/sort.rs deleted file mode 100644 index 022865a2..00000000 --- a/crates/atuin-history/src/sort.rs +++ /dev/null @@ -1,46 +0,0 @@ -use atuin_client::history::History; - -type ScoredHistory = (f64, History); - -// Fuzzy search already comes sorted by minspan -// This sorting should be applicable to all search modes, and solve the more "obvious" issues -// first. -// Later on, we can pass in context and do some boosts there too. -pub fn sort(query: &str, input: Vec<History>) -> Vec<History> { - // This can totally be extended. We need to be _careful_ that it's not slow. - // We also need to balance sorting db-side with sorting here. SQLite can do a lot, - // but some things are just much easier/more doable in Rust. - - let mut scored = input - .into_iter() - .map(|h| { - // If history is _prefixed_ with the query, score it more highly - let score = if h.command.starts_with(query) { - 2.0 - } else if h.command.contains(query) { - 1.75 - } else { - 1.0 - }; - - // calculate how long ago the history was, in seconds - let now = time::OffsetDateTime::now_utc().unix_timestamp(); - let time = h.timestamp.unix_timestamp(); - let diff = std::cmp::max(1, now - time); // no /0 please - - // prefer newer history, but not hugely so as to offset the other scoring - // the numbers will get super small over time, but I don't want time to overpower other - // scoring - #[expect(clippy::cast_precision_loss)] - let time_score = 1.0 + (1.0 / diff as f64); - let score = score * time_score; - - (score, h) - }) - .collect::<Vec<ScoredHistory>>(); - - scored.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap().reverse()); - - // Remove the scores and return the history - scored.into_iter().map(|(_, h)| h).collect::<Vec<History>>() -} diff --git a/crates/atuin-history/src/stats.rs b/crates/atuin-history/src/stats.rs deleted file mode 100644 index fedb1487..00000000 --- a/crates/atuin-history/src/stats.rs +++ /dev/null @@ -1,548 +0,0 @@ -use std::collections::{HashMap, HashSet}; - -use crossterm::style::{Color, ResetColor, SetAttribute, SetForegroundColor}; -use serde::{Deserialize, Serialize}; -use unicode_segmentation::UnicodeSegmentation; - -use atuin_client::{history::History, settings::Settings, theme::Meaning, theme::Theme}; - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Stats { - pub total_commands: usize, - pub unique_commands: usize, - pub top: Vec<(Vec<String>, usize)>, -} - -fn first_non_whitespace(s: &str) -> Option<usize> { - s.char_indices() - // find the first non whitespace char - .find(|(_, c)| !c.is_ascii_whitespace()) - // return the index of that char - .map(|(i, _)| i) -} - -fn first_whitespace(s: &str) -> usize { - s.char_indices() - // find the first whitespace char - .find(|(_, c)| c.is_ascii_whitespace()) - // return the index of that char, (or the max length of the string) - .map_or(s.len(), |(i, _)| i) -} - -fn interesting_command<'a>(settings: &Settings, mut command: &'a str) -> &'a str { - // Sort by length so that we match the longest prefix first - let mut common_prefix = settings.stats.common_prefix.clone(); - common_prefix.sort_by_key(|b| std::cmp::Reverse(b.len())); - - // Trim off the common prefix, if it exists - for p in &common_prefix { - if command.starts_with(p) { - let i = p.len(); - let prefix = &command[..i]; - command = command[i..].trim_start(); - if command.is_empty() { - // no commands following, just use the prefix - return prefix; - } - break; - } - } - - // Sort the common_subcommands by length so that we match the longest subcommand first - let mut common_subcommands = settings.stats.common_subcommands.clone(); - common_subcommands.sort_by_key(|b| std::cmp::Reverse(b.len())); - - // Check for a common subcommand - for p in &common_subcommands { - if command.starts_with(p) { - // if the subcommand is the same length as the command, then we just use the subcommand - if p.len() == command.len() { - return command; - } - // otherwise we need to use the subcommand + the next word - let non_whitespace = first_non_whitespace(&command[p.len()..]).unwrap_or(0); - let j = - p.len() + non_whitespace + first_whitespace(&command[p.len() + non_whitespace..]); - return &command[..j]; - } - } - // Return the first word if there is no subcommand - &command[..first_whitespace(command)] -} - -fn split_at_pipe(command: &str) -> Vec<&str> { - let mut result = vec![]; - let mut quoted = false; - let mut start = 0; - let mut graphemes = UnicodeSegmentation::grapheme_indices(command, true); - - while let Some((i, c)) = graphemes.next() { - let current = i; - match c { - "\"" if command[start..current] != *"\"" => { - quoted = !quoted; - } - "'" if command[start..current] != *"'" => { - quoted = !quoted; - } - "\\" if graphemes.next().is_some() => {} - "|" if !quoted => { - if current > start && command[start..].starts_with('|') { - start += 1; - } - result.push(&command[start..current]); - start = current; - } - _ => {} - } - } - if command[start..].starts_with('|') { - start += 1; - } - result.push(&command[start..]); - result -} - -fn strip_leading_env_vars(command: &str) -> &str { - // fast path: no equals sign, no environment variable - if !command.contains('=') { - return command; - } - - let mut in_token = false; - let mut token_start_pos = 0; - let mut in_single_quotes = false; - let mut in_double_quotes = false; - let mut escape_next = false; - let mut has_equals_outside_quotes = false; - - for (i, g) in UnicodeSegmentation::grapheme_indices(command, true) { - if escape_next { - escape_next = false; - continue; - } - - if !in_token { - token_start_pos = i; - } - - match g { - "\\" => { - escape_next = true; - in_token = true; - } - "'" if !in_double_quotes => { - in_single_quotes = !in_single_quotes; - in_token = true; - } - "\"" if !in_single_quotes => { - in_double_quotes = !in_double_quotes; - in_token = true; - } - "=" if !in_single_quotes && !in_double_quotes => { - has_equals_outside_quotes = true; - in_token = true; - } - " " | "\t" if !in_single_quotes && !in_double_quotes => { - if in_token { - if !has_equals_outside_quotes { - // if we're not in an env var, we can break early - break; - } - in_token = false; - has_equals_outside_quotes = false; - } - } - _ => { - in_token = true; - } - } - } - - command[token_start_pos..].trim() -} - -pub fn pretty_print(stats: Stats, ngram_size: usize, theme: &Theme) { - let max = stats.top.iter().map(|x| x.1).max().unwrap(); - let num_pad = max.ilog10() as usize + 1; - - // Find the length of the longest command name for each column - let column_widths = stats - .top - .iter() - .map(|(commands, _)| commands.iter().map(|c| c.len()).collect::<Vec<usize>>()) - .fold(vec![0; ngram_size], |acc, item| { - acc.iter() - .zip(item.iter()) - .map(|(a, i)| *std::cmp::max(a, i)) - .collect() - }); - - for (command, count) in stats.top { - let gray = SetForegroundColor(match theme.as_style(Meaning::Muted).foreground_color { - Some(color) => color, - None => Color::Grey, - }); - let bold = SetAttribute(crossterm::style::Attribute::Bold); - - let in_ten = 10 * count / max; - - print!("["); - print!( - "{}", - SetForegroundColor(match theme.get_error().foreground_color { - Some(color) => color, - None => Color::Red, - }) - ); - - for i in 0..in_ten { - if i == 2 { - print!( - "{}", - SetForegroundColor(match theme.get_warning().foreground_color { - Some(color) => color, - None => Color::Yellow, - }) - ); - } - - if i == 5 { - print!( - "{}", - SetForegroundColor(match theme.get_info().foreground_color { - Some(color) => color, - None => Color::Green, - }) - ); - } - - print!("▮"); - } - - for _ in in_ten..10 { - print!(" "); - } - - let formatted_command = command - .iter() - .zip(column_widths.iter()) - .map(|(cmd, width)| format!("{cmd:width$}")) - .collect::<Vec<_>>() - .join(" | "); - - println!( - "{ResetColor}] {gray}{count:num_pad$}{ResetColor} {bold}{formatted_command}{ResetColor}" - ); - } - println!("Total commands: {}", stats.total_commands); - println!("Unique commands: {}", stats.unique_commands); -} - -pub fn compute( - settings: &Settings, - history: &[History], - count: usize, - ngram_size: usize, -) -> Option<Stats> { - let mut commands = HashSet::<&str>::with_capacity(history.len()); - let mut total_unignored = 0; - let mut prefixes = HashMap::<Vec<&str>, usize>::with_capacity(history.len()); - - for i in history { - // just in case it somehow has a leading tab or space or something (legacy atuin didn't ignore space prefixes) - let command = strip_leading_env_vars(i.command.trim()); - let prefix = interesting_command(settings, command); - - if settings.stats.ignored_commands.iter().any(|c| c == prefix) { - continue; - } - - total_unignored += 1; - commands.insert(command); - - split_at_pipe(command) - .iter() - .map(|l| { - let command = l.trim(); - commands.insert(command); - command - }) - .collect::<Vec<_>>() - .windows(ngram_size) - .for_each(|w| { - *prefixes - .entry(w.iter().map(|c| interesting_command(settings, c)).collect()) - .or_default() += 1; - }); - } - - let unique = commands.len(); - let mut top = prefixes.into_iter().collect::<Vec<_>>(); - - top.sort_unstable_by_key(|x| std::cmp::Reverse(x.1)); - top.truncate(count); - - if top.is_empty() { - return None; - } - - Some(Stats { - unique_commands: unique, - total_commands: total_unignored, - top: top - .into_iter() - .map(|t| (t.0.into_iter().map(|s| s.to_string()).collect(), t.1)) - .collect(), - }) -} - -#[cfg(test)] -mod tests { - use atuin_client::history::History; - use atuin_client::settings::Settings; - use time::OffsetDateTime; - - use super::compute; - use super::{interesting_command, split_at_pipe, strip_leading_env_vars}; - - #[test] - fn ignored_env_vars() { - let settings = Settings::utc(); - - let history: History = History::capture() - .timestamp(time::OffsetDateTime::now_utc()) - .command("FOO='BAR=🚀' echo foo") - .cwd("/") - .build() - .into(); - - let stats = compute(&settings, &[history], 10, 1).expect("failed to compute stats"); - assert_eq!(stats.top.first().unwrap().0, vec!["echo"]); - } - - #[test] - fn ignored_commands() { - let mut settings = Settings::utc(); - settings.stats.ignored_commands.push("cd".to_string()); - - let history = [ - History::import() - .timestamp(OffsetDateTime::now_utc()) - .command("cd foo") - .build() - .into(), - History::import() - .timestamp(OffsetDateTime::now_utc()) - .command("cargo build stuff") - .build() - .into(), - ]; - - let stats = compute(&settings, &history, 10, 1).expect("failed to compute stats"); - assert_eq!(stats.total_commands, 1); - assert_eq!(stats.unique_commands, 1); - } - - #[test] - fn interesting_commands() { - let settings = Settings::utc(); - - assert_eq!(interesting_command(&settings, "cargo"), "cargo"); - assert_eq!( - interesting_command(&settings, "cargo build foo bar"), - "cargo build" - ); - assert_eq!( - interesting_command(&settings, "sudo cargo build foo bar"), - "cargo build" - ); - assert_eq!(interesting_command(&settings, "sudo"), "sudo"); - } - - // Test with spaces in the common_prefix - #[test] - fn interesting_commands_spaces() { - let mut settings = Settings::utc(); - settings.stats.common_prefix.push("sudo test".to_string()); - - assert_eq!(interesting_command(&settings, "sudo test"), "sudo test"); - assert_eq!(interesting_command(&settings, "sudo test "), "sudo test"); - assert_eq!(interesting_command(&settings, "sudo test foo bar"), "foo"); - assert_eq!( - interesting_command(&settings, "sudo test foo bar"), - "foo" - ); - - // Works with a common_subcommand as well - assert_eq!( - interesting_command(&settings, "sudo test cargo build foo bar"), - "cargo build" - ); - - // We still match on just the sudo prefix - assert_eq!(interesting_command(&settings, "sudo"), "sudo"); - assert_eq!(interesting_command(&settings, "sudo foo"), "foo"); - } - - // Test with spaces in the common_subcommand - #[test] - fn interesting_commands_spaces_subcommand() { - let mut settings = Settings::utc(); - settings - .stats - .common_subcommands - .push("cargo build".to_string()); - - assert_eq!(interesting_command(&settings, "cargo build"), "cargo build"); - assert_eq!( - interesting_command(&settings, "cargo build "), - "cargo build" - ); - assert_eq!( - interesting_command(&settings, "cargo build foo bar"), - "cargo build foo" - ); - - // Works with a common_prefix as well - assert_eq!( - interesting_command(&settings, "sudo cargo build foo bar"), - "cargo build foo" - ); - - // We still match on just cargo as a subcommand - assert_eq!(interesting_command(&settings, "cargo"), "cargo"); - assert_eq!(interesting_command(&settings, "cargo foo"), "cargo foo"); - } - - // Test with spaces in the common_prefix and common_subcommand - #[test] - fn interesting_commands_spaces_both() { - let mut settings = Settings::utc(); - settings.stats.common_prefix.push("sudo test".to_string()); - settings - .stats - .common_subcommands - .push("cargo build".to_string()); - - assert_eq!( - interesting_command(&settings, "sudo test cargo build"), - "cargo build" - ); - assert_eq!( - interesting_command(&settings, "sudo test cargo build"), - "cargo build" - ); - assert_eq!( - interesting_command(&settings, "sudo test cargo build "), - "cargo build" - ); - assert_eq!( - interesting_command(&settings, "sudo test cargo build foo bar"), - "cargo build foo" - ); - } - - #[test] - fn split_simple() { - assert_eq!(split_at_pipe("fd | rg"), ["fd ", " rg"]); - } - - #[test] - fn split_multi() { - assert_eq!( - split_at_pipe("kubectl | jq | rg"), - ["kubectl ", " jq ", " rg"] - ); - } - - #[test] - fn split_simple_quoted() { - assert_eq!( - split_at_pipe("foo | bar 'baz {} | quux' | xyzzy"), - ["foo ", " bar 'baz {} | quux' ", " xyzzy"] - ); - } - - #[test] - fn split_multi_quoted() { - assert_eq!( - split_at_pipe("foo | bar 'baz \"{}\" | quux' | xyzzy"), - ["foo ", " bar 'baz \"{}\" | quux' ", " xyzzy"] - ); - } - - #[test] - fn escaped_pipes() { - assert_eq!( - split_at_pipe("foo | bar baz \\| quux"), - ["foo ", " bar baz \\| quux"] - ); - } - - #[test] - fn emoji() { - assert_eq!( - split_at_pipe("git commit -m \"🚀\""), - ["git commit -m \"🚀\""] - ); - } - - #[test] - fn starts_with_pipe() { - assert_eq!( - split_at_pipe("| sed 's/[0-9a-f]//g'"), - ["", " sed 's/[0-9a-f]//g'"] - ); - } - - #[test] - fn starts_with_spaces_and_pipe() { - assert_eq!( - split_at_pipe(" | sed 's/[0-9a-f]//g'"), - [" ", " sed 's/[0-9a-f]//g'"] - ); - } - - #[test] - fn strip_leading_env_vars_simple() { - assert_eq!( - strip_leading_env_vars("FOO=bar BAZ=quux echo foo"), - "echo foo" - ); - } - - #[test] - fn strip_leading_env_vars_quoted_single() { - assert_eq!(strip_leading_env_vars("FOO='BAR=baz' echo foo"), "echo foo"); - } - - #[test] - fn strip_leading_env_vars_quoted_double() { - assert_eq!( - strip_leading_env_vars("FOO=\"BAR=baz\" echo foo"), - "echo foo" - ); - } - - #[test] - fn strip_leading_env_vars_quoted_single_and_double() { - assert_eq!( - strip_leading_env_vars("FOO='BAR=\"baz\"' echo foo \"BAR=quux\""), - "echo foo \"BAR=quux\"" - ); - } - - #[test] - fn strip_leading_env_vars_emojis() { - assert_eq!( - strip_leading_env_vars("FOO='BAR=🚀' echo foo \"BAR=quux\" foo"), - "echo foo \"BAR=quux\" foo" - ); - } - - #[test] - fn strip_leading_env_vars_name_same_as_command() { - assert_eq!(strip_leading_env_vars("FOO='bar' bar baz"), "bar baz"); - } -} |
