From 1201caee5c7b3020e1c879e527feb934fd1d8023 Mon Sep 17 00:00:00 2001 From: Ellie Huxtable Date: Wed, 26 Jun 2024 12:40:17 +0100 Subject: perf(search): benchmark smart sort (#2202) --- crates/atuin-history/Cargo.toml | 9 +++++ crates/atuin-history/benches/smart_sort.rs | 35 ++++++++++++++++ crates/atuin-history/src/lib.rs | 1 + crates/atuin-history/src/sort.rs | 46 ++++++++++++++++++++++ crates/atuin/src/command/client/search.rs | 1 - .../atuin/src/command/client/search/interactive.rs | 6 ++- crates/atuin/src/command/client/search/sort.rs | 46 ---------------------- 7 files changed, 95 insertions(+), 49 deletions(-) create mode 100644 crates/atuin-history/benches/smart_sort.rs create mode 100644 crates/atuin-history/src/sort.rs delete mode 100644 crates/atuin/src/command/client/search/sort.rs (limited to 'crates') diff --git a/crates/atuin-history/Cargo.toml b/crates/atuin-history/Cargo.toml index 4836cf74..2ed0e1a5 100644 --- a/crates/atuin-history/Cargo.toml +++ b/crates/atuin-history/Cargo.toml @@ -39,3 +39,12 @@ tracing = "0.1" uuid = { workspace = true } unicode-segmentation = "1.11.0" sysinfo = "0.30.7" + +[dev-dependencies] +tracing-tree = "0.3" +divan = "0.1.14" +rand = { workspace = true } + +[[bench]] +name = "smart_sort" +harness = false diff --git a/crates/atuin-history/benches/smart_sort.rs b/crates/atuin-history/benches/smart_sort.rs new file mode 100644 index 00000000..a78064de --- /dev/null +++ b/crates/atuin-history/benches/smart_sort.rs @@ -0,0 +1,35 @@ +use atuin_client::history::History; +use atuin_history::sort::sort; + +use rand::Rng; + +fn main() { + // Run registered benchmarks. + divan::main(); +} + +// Smart sort usually runs on 200 entries, test on a few sizes +#[divan::bench(args=[100, 200, 400, 800, 1600, 10000])] +fn smart_sort(lines: usize) { + // benchmark a few different sizes of "history" + // first we need to generate some history. This will use a whole bunch of memory, sorry + let mut rng = rand::thread_rng(); + let now = time::OffsetDateTime::now_utc().unix_timestamp(); + + let possible_commands = ["echo", "ls", "cd", "grep", "atuin", "curl"]; + let mut commands = Vec::::with_capacity(lines); + + for _ in 0..lines { + let command = possible_commands[rng.gen_range(0..possible_commands.len())]; + + let command = History::import() + .command(command) + .timestamp(time::OffsetDateTime::from_unix_timestamp(rng.gen_range(0..now)).unwrap()) + .build() + .into(); + + commands.push(command); + } + + let _ = sort("curl", commands); +} diff --git a/crates/atuin-history/src/lib.rs b/crates/atuin-history/src/lib.rs index 9d34677f..e7b33916 100644 --- a/crates/atuin-history/src/lib.rs +++ b/crates/atuin-history/src/lib.rs @@ -1 +1,2 @@ +pub mod sort; pub mod stats; diff --git a/crates/atuin-history/src/sort.rs b/crates/atuin-history/src/sort.rs new file mode 100644 index 00000000..4465a142 --- /dev/null +++ b/crates/atuin-history/src/sort.rs @@ -0,0 +1,46 @@ +use atuin_client::history::History; + +type ScoredHistory = (f64, History); + +// Fuzzy search already comes sorted by minspan +// This sorting should be applicable to all search modes, and solve the more "obvious" issues +// first. +// Later on, we can pass in context and do some boosts there too. +pub fn sort(query: &str, input: Vec) -> Vec { + // This can totally be extended. We need to be _careful_ that it's not slow. + // We also need to balance sorting db-side with sorting here. SQLite can do a lot, + // but some things are just much easier/more doable in Rust. + + let mut scored = input + .into_iter() + .map(|h| { + // If history is _prefixed_ with the query, score it more highly + let score = if h.command.starts_with(query) { + 2.0 + } else if h.command.contains(query) { + 1.75 + } else { + 1.0 + }; + + // calculate how long ago the history was, in seconds + let now = time::OffsetDateTime::now_utc().unix_timestamp(); + let time = h.timestamp.unix_timestamp(); + let diff = std::cmp::max(1, now - time); // no /0 please + + // prefer newer history, but not hugely so as to offset the other scoring + // the numbers will get super small over time, but I don't want time to overpower other + // scoring + #[allow(clippy::cast_precision_loss)] + let time_score = 1.0 + (1.0 / diff as f64); + let score = score * time_score; + + (score, h) + }) + .collect::>(); + + scored.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap().reverse()); + + // Remove the scores and return the history + scored.into_iter().map(|(_, h)| h).collect::>() +} diff --git a/crates/atuin/src/command/client/search.rs b/crates/atuin/src/command/client/search.rs index f645d26b..f3626afe 100644 --- a/crates/atuin/src/command/client/search.rs +++ b/crates/atuin/src/command/client/search.rs @@ -21,7 +21,6 @@ mod engines; mod history_list; mod inspector; mod interactive; -mod sort; pub use duration::format_duration_into; diff --git a/crates/atuin/src/command/client/search/interactive.rs b/crates/atuin/src/command/client/search/interactive.rs index de0f4ea1..1676345b 100644 --- a/crates/atuin/src/command/client/search/interactive.rs +++ b/crates/atuin/src/command/client/search/interactive.rs @@ -31,7 +31,6 @@ use super::{ cursor::Cursor, engines::{SearchEngine, SearchState}, history_list::{HistoryList, ListState, PREFIX_LENGTH}, - sort, }; use crate::{command::client::search::engines, VERSION}; @@ -96,7 +95,10 @@ impl State { self.results_len = results.len(); if smart_sort { - Ok(sort::sort(self.search.input.as_str(), results)) + Ok(atuin_history::sort::sort( + self.search.input.as_str(), + results, + )) } else { Ok(results) } diff --git a/crates/atuin/src/command/client/search/sort.rs b/crates/atuin/src/command/client/search/sort.rs deleted file mode 100644 index 4465a142..00000000 --- a/crates/atuin/src/command/client/search/sort.rs +++ /dev/null @@ -1,46 +0,0 @@ -use atuin_client::history::History; - -type ScoredHistory = (f64, History); - -// Fuzzy search already comes sorted by minspan -// This sorting should be applicable to all search modes, and solve the more "obvious" issues -// first. -// Later on, we can pass in context and do some boosts there too. -pub fn sort(query: &str, input: Vec) -> Vec { - // This can totally be extended. We need to be _careful_ that it's not slow. - // We also need to balance sorting db-side with sorting here. SQLite can do a lot, - // but some things are just much easier/more doable in Rust. - - let mut scored = input - .into_iter() - .map(|h| { - // If history is _prefixed_ with the query, score it more highly - let score = if h.command.starts_with(query) { - 2.0 - } else if h.command.contains(query) { - 1.75 - } else { - 1.0 - }; - - // calculate how long ago the history was, in seconds - let now = time::OffsetDateTime::now_utc().unix_timestamp(); - let time = h.timestamp.unix_timestamp(); - let diff = std::cmp::max(1, now - time); // no /0 please - - // prefer newer history, but not hugely so as to offset the other scoring - // the numbers will get super small over time, but I don't want time to overpower other - // scoring - #[allow(clippy::cast_precision_loss)] - let time_score = 1.0 + (1.0 / diff as f64); - let score = score * time_score; - - (score, h) - }) - .collect::>(); - - scored.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap().reverse()); - - // Remove the scores and return the history - scored.into_iter().map(|(_, h)| h).collect::>() -} -- cgit v1.3.1