aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEllie Huxtable <ellie@elliehuxtable.com>2024-04-30 13:16:50 +0100
committerGitHub <noreply@github.com>2024-04-30 13:16:50 +0100
commitd1ce01679b22b99321fe7407e8ee35de8cf99bd5 (patch)
tree1d583508eb01a698121e56c33a43af1684022c16
parentfeat(ui/dotfiles): add vars (#1989) (diff)
downloadatuin-d1ce01679b22b99321fe7407e8ee35de8cf99bd5.zip
feat(history): create atuin-history, add stats to it (#1990)
* feat(history): create atuin-history, add stats to it I'd like to eventually pull all the history stuff into this crate. Stats are a nice start, as I'd like to use them from the UI anyways. * lock * clippy
Diffstat (limited to '')
-rw-r--r--Cargo.lock31
-rw-r--r--crates/atuin-client/src/settings.rs32
-rw-r--r--crates/atuin-history/Cargo.toml40
-rw-r--r--crates/atuin-history/src/lib.rs1
-rw-r--r--crates/atuin-history/src/stats.rs394
-rw-r--r--crates/atuin/Cargo.toml1
-rw-r--r--crates/atuin/src/command/client/stats.rs379
7 files changed, 490 insertions, 388 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 4c242691..d12dedf9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -186,6 +186,7 @@ dependencies = [
"atuin-client",
"atuin-common",
"atuin-dotfiles",
+ "atuin-history",
"atuin-server",
"atuin-server-postgres",
"base64 0.21.7",
@@ -307,6 +308,36 @@ dependencies = [
]
[[package]]
+name = "atuin-history"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "atuin-client",
+ "atuin-common",
+ "base64 0.21.7",
+ "crossterm",
+ "directories",
+ "eyre",
+ "fs-err",
+ "futures-util",
+ "indicatif",
+ "interim",
+ "itertools",
+ "log",
+ "semver",
+ "serde",
+ "serde_json",
+ "sysinfo",
+ "time",
+ "tokio",
+ "tracing",
+ "unicode-segmentation",
+ "unicode-width",
+ "uuid",
+ "whoami",
+]
+
+[[package]]
name = "atuin-server"
version = "18.2.0"
dependencies = [
diff --git a/crates/atuin-client/src/settings.rs b/crates/atuin-client/src/settings.rs
index daf8fe34..0497fb64 100644
--- a/crates/atuin-client/src/settings.rs
+++ b/crates/atuin-client/src/settings.rs
@@ -17,7 +17,7 @@ use fs_err::{create_dir_all, File};
use parse_duration::parse;
use regex::RegexSet;
use semver::Version;
-use serde::Deserialize;
+use serde::{Deserialize, Serialize};
use serde_with::DeserializeFromStr;
use time::{
format_description::{well_known::Rfc3339, FormatItem},
@@ -35,7 +35,7 @@ static EXAMPLE_CONFIG: &str = include_str!("../config.toml");
mod dotfiles;
-#[derive(Clone, Debug, Deserialize, Copy, ValueEnum, PartialEq)]
+#[derive(Clone, Debug, Deserialize, Copy, ValueEnum, PartialEq, Serialize)]
pub enum SearchMode {
#[serde(rename = "prefix")]
Prefix,
@@ -72,7 +72,7 @@ impl SearchMode {
}
}
-#[derive(Clone, Debug, Deserialize, Copy, PartialEq, Eq, ValueEnum)]
+#[derive(Clone, Debug, Deserialize, Copy, PartialEq, Eq, ValueEnum, Serialize)]
pub enum FilterMode {
#[serde(rename = "global")]
Global = 0,
@@ -102,7 +102,7 @@ impl FilterMode {
}
}
-#[derive(Clone, Debug, Deserialize, Copy)]
+#[derive(Clone, Debug, Deserialize, Copy, Serialize)]
pub enum ExitMode {
#[serde(rename = "return-original")]
ReturnOriginal,
@@ -113,7 +113,7 @@ pub enum ExitMode {
// FIXME: Can use upstream Dialect enum if https://github.com/stevedonovan/chrono-english/pull/16 is merged
// FIXME: Above PR was merged, but dependency was changed to interim (fork of chrono-english) in the ... interim
-#[derive(Clone, Debug, Deserialize, Copy)]
+#[derive(Clone, Debug, Deserialize, Copy, Serialize)]
pub enum Dialect {
#[serde(rename = "us")]
Us,
@@ -137,7 +137,7 @@ impl From<Dialect> for interim::Dialect {
/// multithreaded runtime, otherwise it will fail on most Unix systems.
///
/// See: https://github.com/atuinsh/atuin/pull/1517#discussion_r1447516426
-#[derive(Clone, Copy, Debug, Eq, PartialEq, DeserializeFromStr)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq, DeserializeFromStr, Serialize)]
pub struct Timezone(pub UtcOffset);
impl fmt::Display for Timezone {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@@ -180,7 +180,7 @@ impl FromStr for Timezone {
}
}
-#[derive(Clone, Debug, Deserialize, Copy)]
+#[derive(Clone, Debug, Deserialize, Copy, Serialize)]
pub enum Style {
#[serde(rename = "auto")]
Auto,
@@ -192,7 +192,7 @@ pub enum Style {
Compact,
}
-#[derive(Clone, Debug, Deserialize, Copy)]
+#[derive(Clone, Debug, Deserialize, Copy, Serialize)]
pub enum WordJumpMode {
#[serde(rename = "emacs")]
Emacs,
@@ -201,7 +201,7 @@ pub enum WordJumpMode {
Subl,
}
-#[derive(Clone, Debug, Deserialize, Copy, PartialEq, Eq, ValueEnum)]
+#[derive(Clone, Debug, Deserialize, Copy, PartialEq, Eq, ValueEnum, Serialize)]
pub enum KeymapMode {
#[serde(rename = "emacs")]
Emacs,
@@ -232,7 +232,7 @@ impl KeymapMode {
// It seems impossible to implement Deserialize for external types when it is
// used in HashMap (https://stackoverflow.com/questions/67142663). We instead
// define an adapter type.
-#[derive(Clone, Debug, Deserialize, Copy, PartialEq, Eq, ValueEnum)]
+#[derive(Clone, Debug, Deserialize, Copy, PartialEq, Eq, ValueEnum, Serialize)]
pub enum CursorStyle {
#[serde(rename = "default")]
DefaultUserShape,
@@ -270,7 +270,7 @@ impl CursorStyle {
}
}
-#[derive(Clone, Debug, Deserialize)]
+#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Stats {
#[serde(default = "Stats::common_prefix_default")]
pub common_prefix: Vec<String>, // sudo, etc. commands we want to strip off
@@ -327,17 +327,17 @@ impl Default for Stats {
}
}
-#[derive(Clone, Debug, Deserialize, Default)]
+#[derive(Clone, Debug, Deserialize, Default, Serialize)]
pub struct Sync {
pub records: bool,
}
-#[derive(Clone, Debug, Deserialize, Default)]
+#[derive(Clone, Debug, Deserialize, Default, Serialize)]
pub struct Keys {
pub scroll_exits: bool,
}
-#[derive(Clone, Debug, Deserialize)]
+#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Settings {
pub dialect: Dialect,
pub timezone: Timezone,
@@ -373,10 +373,10 @@ pub struct Settings {
pub prefers_reduced_motion: bool,
pub store_failed: bool,
- #[serde(with = "serde_regex", default = "RegexSet::empty")]
+ #[serde(with = "serde_regex", default = "RegexSet::empty", skip_serializing)]
pub history_filter: RegexSet,
- #[serde(with = "serde_regex", default = "RegexSet::empty")]
+ #[serde(with = "serde_regex", default = "RegexSet::empty", skip_serializing)]
pub cwd_filter: RegexSet,
pub secrets_filter: bool,
diff --git a/crates/atuin-history/Cargo.toml b/crates/atuin-history/Cargo.toml
new file mode 100644
index 00000000..0927230c
--- /dev/null
+++ b/crates/atuin-history/Cargo.toml
@@ -0,0 +1,40 @@
+[package]
+name = "atuin-history"
+edition = "2021"
+version = "0.1.0"
+
+authors.workspace = true
+rust-version.workspace = true
+license.workspace = true
+homepage.workspace = true
+repository.workspace = true
+readme.workspace = true
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+atuin-client = { path = "../atuin-client", version = "18.2.0" }
+atuin-common = { path = "../atuin-common", version = "18.2.0" }
+
+log = { workspace = true }
+time = { workspace = true }
+eyre = { workspace = true }
+directories = { workspace = true }
+indicatif = "0.17.5"
+serde = { workspace = true }
+serde_json = { workspace = true }
+crossterm = { version = "0.27", features = ["use-dev-tty"] }
+unicode-width = "0.1"
+itertools = { workspace = true }
+tokio = { workspace = true }
+async-trait = { workspace = true }
+interim = { workspace = true }
+base64 = { workspace = true }
+fs-err = { workspace = true }
+whoami = { workspace = true }
+semver = { workspace = true }
+futures-util = "0.3"
+tracing = "0.1"
+uuid = { workspace = true }
+unicode-segmentation = "1.11.0"
+sysinfo = "0.30.7"
diff --git a/crates/atuin-history/src/lib.rs b/crates/atuin-history/src/lib.rs
new file mode 100644
index 00000000..9d34677f
--- /dev/null
+++ b/crates/atuin-history/src/lib.rs
@@ -0,0 +1 @@
+pub mod stats;
diff --git a/crates/atuin-history/src/stats.rs b/crates/atuin-history/src/stats.rs
new file mode 100644
index 00000000..fb6781fe
--- /dev/null
+++ b/crates/atuin-history/src/stats.rs
@@ -0,0 +1,394 @@
+use std::collections::{HashMap, HashSet};
+
+use crossterm::style::{Color, ResetColor, SetAttribute, SetForegroundColor};
+
+use atuin_client::{history::History, settings::Settings};
+use unicode_segmentation::UnicodeSegmentation;
+
+pub struct Stats<'a> {
+ pub total_commands: usize,
+ pub unique_commands: usize,
+ pub top: Vec<(Vec<&'a str>, usize)>,
+}
+
+fn first_non_whitespace(s: &str) -> Option<usize> {
+ s.char_indices()
+ // find the first non whitespace char
+ .find(|(_, c)| !c.is_ascii_whitespace())
+ // return the index of that char
+ .map(|(i, _)| i)
+}
+
+fn first_whitespace(s: &str) -> usize {
+ s.char_indices()
+ // find the first whitespace char
+ .find(|(_, c)| c.is_ascii_whitespace())
+ // return the index of that char, (or the max length of the string)
+ .map_or(s.len(), |(i, _)| i)
+}
+
+fn interesting_command<'a>(settings: &Settings, mut command: &'a str) -> &'a str {
+ // Sort by length so that we match the longest prefix first
+ let mut common_prefix = settings.stats.common_prefix.clone();
+ common_prefix.sort_by_key(|b| std::cmp::Reverse(b.len()));
+
+ // Trim off the common prefix, if it exists
+ for p in &common_prefix {
+ if command.starts_with(p) {
+ let i = p.len();
+ let prefix = &command[..i];
+ command = command[i..].trim_start();
+ if command.is_empty() {
+ // no commands following, just use the prefix
+ return prefix;
+ }
+ break;
+ }
+ }
+
+ // Sort the common_subcommands by length so that we match the longest subcommand first
+ let mut common_subcommands = settings.stats.common_subcommands.clone();
+ common_subcommands.sort_by_key(|b| std::cmp::Reverse(b.len()));
+
+ // Check for a common subcommand
+ for p in &common_subcommands {
+ if command.starts_with(p) {
+ // if the subcommand is the same length as the command, then we just use the subcommand
+ if p.len() == command.len() {
+ return command;
+ }
+ // otherwise we need to use the subcommand + the next word
+ let non_whitespace = first_non_whitespace(&command[p.len()..]).unwrap_or(0);
+ let j =
+ p.len() + non_whitespace + first_whitespace(&command[p.len() + non_whitespace..]);
+ return &command[..j];
+ }
+ }
+ // Return the first word if there is no subcommand
+ &command[..first_whitespace(command)]
+}
+
+fn split_at_pipe(command: &str) -> Vec<&str> {
+ let mut result = vec![];
+ let mut quoted = false;
+ let mut start = 0;
+ let mut graphemes = UnicodeSegmentation::grapheme_indices(command, true);
+
+ while let Some((i, c)) = graphemes.next() {
+ let current = i;
+ match c {
+ "\"" => {
+ if command[start..current] != *"\"" {
+ quoted = !quoted;
+ }
+ }
+ "'" => {
+ if command[start..current] != *"'" {
+ quoted = !quoted;
+ }
+ }
+ "\\" => if graphemes.next().is_some() {},
+ "|" => {
+ if !quoted {
+ if command[start..].starts_with('|') {
+ start += 1;
+ }
+ result.push(&command[start..current]);
+ start = current;
+ }
+ }
+ _ => {}
+ }
+ }
+ if command[start..].starts_with('|') {
+ start += 1;
+ }
+ result.push(&command[start..]);
+ result
+}
+
+pub fn pretty_print(stats: Stats, ngram_size: usize) {
+ let max = stats.top.iter().map(|x| x.1).max().unwrap();
+ let num_pad = max.ilog10() as usize + 1;
+
+ // Find the length of the longest command name for each column
+ let column_widths = stats
+ .top
+ .iter()
+ .map(|(commands, _)| commands.iter().map(|c| c.len()).collect::<Vec<usize>>())
+ .fold(vec![0; ngram_size], |acc, item| {
+ acc.iter()
+ .zip(item.iter())
+ .map(|(a, i)| *std::cmp::max(a, i))
+ .collect()
+ });
+
+ for (command, count) in stats.top {
+ let gray = SetForegroundColor(Color::Grey);
+ let bold = SetAttribute(crossterm::style::Attribute::Bold);
+
+ let in_ten = 10 * count / max;
+
+ print!("[");
+ print!("{}", SetForegroundColor(Color::Red));
+
+ for i in 0..in_ten {
+ if i == 2 {
+ print!("{}", SetForegroundColor(Color::Yellow));
+ }
+
+ if i == 5 {
+ print!("{}", SetForegroundColor(Color::Green));
+ }
+
+ print!("▮");
+ }
+
+ for _ in in_ten..10 {
+ print!(" ");
+ }
+
+ let formatted_command = command
+ .iter()
+ .zip(column_widths.iter())
+ .map(|(cmd, width)| format!("{cmd:width$}"))
+ .collect::<Vec<_>>()
+ .join(" | ");
+
+ println!("{ResetColor}] {gray}{count:num_pad$}{ResetColor} {bold}{formatted_command}{ResetColor}");
+ }
+ println!("Total commands: {}", stats.total_commands);
+ println!("Unique commands: {}", stats.unique_commands);
+}
+
+pub fn compute<'a>(
+ settings: &Settings,
+ history: &'a [History],
+ count: usize,
+ ngram_size: usize,
+) -> Option<Stats<'a>> {
+ let mut commands = HashSet::<&str>::with_capacity(history.len());
+ let mut total_unignored = 0;
+ let mut prefixes = HashMap::<Vec<&str>, usize>::with_capacity(history.len());
+
+ for i in history {
+ // just in case it somehow has a leading tab or space or something (legacy atuin didn't ignore space prefixes)
+ let command = i.command.trim();
+ let prefix = interesting_command(settings, command);
+
+ if settings.stats.ignored_commands.iter().any(|c| c == prefix) {
+ continue;
+ }
+
+ total_unignored += 1;
+ commands.insert(command);
+
+ split_at_pipe(i.command.trim())
+ .iter()
+ .map(|l| {
+ let command = l.trim();
+ commands.insert(command);
+ command
+ })
+ .collect::<Vec<_>>()
+ .windows(ngram_size)
+ .for_each(|w| {
+ *prefixes
+ .entry(w.iter().map(|c| interesting_command(settings, c)).collect())
+ .or_default() += 1;
+ });
+ }
+
+ let unique = commands.len();
+ let mut top = prefixes.into_iter().collect::<Vec<_>>();
+
+ top.sort_unstable_by_key(|x| std::cmp::Reverse(x.1));
+ top.truncate(count);
+
+ if top.is_empty() {
+ return None;
+ }
+
+ Some(Stats {
+ unique_commands: unique,
+ total_commands: total_unignored,
+ top,
+ })
+}
+
+#[cfg(test)]
+mod tests {
+ use atuin_client::history::History;
+ use atuin_client::settings::Settings;
+ use time::OffsetDateTime;
+
+ use super::compute;
+ use super::{interesting_command, split_at_pipe};
+
+ #[test]
+ fn ignored_commands() {
+ let mut settings = Settings::utc();
+ settings.stats.ignored_commands.push("cd".to_string());
+
+ let history = [
+ History::import()
+ .timestamp(OffsetDateTime::now_utc())
+ .command("cd foo")
+ .build()
+ .into(),
+ History::import()
+ .timestamp(OffsetDateTime::now_utc())
+ .command("cargo build stuff")
+ .build()
+ .into(),
+ ];
+
+ let stats = compute(&settings, &history, 10, 1).expect("failed to compute stats");
+ assert_eq!(stats.total_commands, 1);
+ assert_eq!(stats.unique_commands, 1);
+ }
+
+ #[test]
+ fn interesting_commands() {
+ let settings = Settings::utc();
+
+ assert_eq!(interesting_command(&settings, "cargo"), "cargo");
+ assert_eq!(
+ interesting_command(&settings, "cargo build foo bar"),
+ "cargo build"
+ );
+ assert_eq!(
+ interesting_command(&settings, "sudo cargo build foo bar"),
+ "cargo build"
+ );
+ assert_eq!(interesting_command(&settings, "sudo"), "sudo");
+ }
+
+ // Test with spaces in the common_prefix
+ #[test]
+ fn interesting_commands_spaces() {
+ let mut settings = Settings::utc();
+ settings.stats.common_prefix.push("sudo test".to_string());
+
+ assert_eq!(interesting_command(&settings, "sudo test"), "sudo test");
+ assert_eq!(interesting_command(&settings, "sudo test "), "sudo test");
+ assert_eq!(interesting_command(&settings, "sudo test foo bar"), "foo");
+ assert_eq!(
+ interesting_command(&settings, "sudo test foo bar"),
+ "foo"
+ );
+
+ // Works with a common_subcommand as well
+ assert_eq!(
+ interesting_command(&settings, "sudo test cargo build foo bar"),
+ "cargo build"
+ );
+
+ // We still match on just the sudo prefix
+ assert_eq!(interesting_command(&settings, "sudo"), "sudo");
+ assert_eq!(interesting_command(&settings, "sudo foo"), "foo");
+ }
+
+ // Test with spaces in the common_subcommand
+ #[test]
+ fn interesting_commands_spaces_subcommand() {
+ let mut settings = Settings::utc();
+ settings
+ .stats
+ .common_subcommands
+ .push("cargo build".to_string());
+
+ assert_eq!(interesting_command(&settings, "cargo build"), "cargo build");
+ assert_eq!(
+ interesting_command(&settings, "cargo build "),
+ "cargo build"
+ );
+ assert_eq!(
+ interesting_command(&settings, "cargo build foo bar"),
+ "cargo build foo"
+ );
+
+ // Works with a common_prefix as well
+ assert_eq!(
+ interesting_command(&settings, "sudo cargo build foo bar"),
+ "cargo build foo"
+ );
+
+ // We still match on just cargo as a subcommand
+ assert_eq!(interesting_command(&settings, "cargo"), "cargo");
+ assert_eq!(interesting_command(&settings, "cargo foo"), "cargo foo");
+ }
+
+ // Test with spaces in the common_prefix and common_subcommand
+ #[test]
+ fn interesting_commands_spaces_both() {
+ let mut settings = Settings::utc();
+ settings.stats.common_prefix.push("sudo test".to_string());
+ settings
+ .stats
+ .common_subcommands
+ .push("cargo build".to_string());
+
+ assert_eq!(
+ interesting_command(&settings, "sudo test cargo build"),
+ "cargo build"
+ );
+ assert_eq!(
+ interesting_command(&settings, "sudo test cargo build"),
+ "cargo build"
+ );
+ assert_eq!(
+ interesting_command(&settings, "sudo test cargo build "),
+ "cargo build"
+ );
+ assert_eq!(
+ interesting_command(&settings, "sudo test cargo build foo bar"),
+ "cargo build foo"
+ );
+ }
+
+ #[test]
+ fn split_simple() {
+ assert_eq!(split_at_pipe("fd | rg"), ["fd ", " rg"]);
+ }
+
+ #[test]
+ fn split_multi() {
+ assert_eq!(
+ split_at_pipe("kubectl | jq | rg"),
+ ["kubectl ", " jq ", " rg"]
+ );
+ }
+
+ #[test]
+ fn split_simple_quoted() {
+ assert_eq!(
+ split_at_pipe("foo | bar 'baz {} | quux' | xyzzy"),
+ ["foo ", " bar 'baz {} | quux' ", " xyzzy"]
+ );
+ }
+
+ #[test]
+ fn split_multi_quoted() {
+ assert_eq!(
+ split_at_pipe("foo | bar 'baz \"{}\" | quux' | xyzzy"),
+ ["foo ", " bar 'baz \"{}\" | quux' ", " xyzzy"]
+ );
+ }
+
+ #[test]
+ fn escaped_pipes() {
+ assert_eq!(
+ split_at_pipe("foo | bar baz \\| quux"),
+ ["foo ", " bar baz \\| quux"]
+ );
+ }
+
+ #[test]
+ fn emoji() {
+ assert_eq!(
+ split_at_pipe("git commit -m \"🚀\""),
+ ["git commit -m \"🚀\""]
+ );
+ }
+}
diff --git a/crates/atuin/Cargo.toml b/crates/atuin/Cargo.toml
index 9ea2a98e..2915317a 100644
--- a/crates/atuin/Cargo.toml
+++ b/crates/atuin/Cargo.toml
@@ -46,6 +46,7 @@ atuin-server = { path = "../atuin-server", version = "18.2.0", optional = true }
atuin-client = { path = "../atuin-client", version = "18.2.0", optional = true, default-features = false }
atuin-common = { path = "../atuin-common", version = "18.2.0" }
atuin-dotfiles = { path = "../atuin-dotfiles", version = "0.2.0" }
+atuin-history = { path = "../atuin-history", version = "0.1.0" }
log = { workspace = true }
env_logger = "0.11.2"
diff --git a/crates/atuin/src/command/client/stats.rs b/crates/atuin/src/command/client/stats.rs
index 7f2e7aa8..fd480438 100644
--- a/crates/atuin/src/command/client/stats.rs
+++ b/crates/atuin/src/command/client/stats.rs
@@ -1,17 +1,14 @@
-use std::collections::{HashMap, HashSet};
-
use clap::Parser;
-use crossterm::style::{Color, ResetColor, SetAttribute, SetForegroundColor};
use eyre::Result;
use interim::parse_date_string;
+use time::{Duration, OffsetDateTime, Time};
use atuin_client::{
database::{current_context, Database},
- history::History,
settings::Settings,
};
-use time::{Duration, OffsetDateTime, Time};
-use unicode_segmentation::UnicodeSegmentation;
+
+use atuin_history::stats::{compute, pretty_print};
#[derive(Parser, Debug)]
#[command(infer_subcommands = true)]
@@ -28,140 +25,6 @@ pub struct Cmd {
ngram_size: usize,
}
-fn split_at_pipe(command: &str) -> Vec<&str> {
- let mut result = vec![];
- let mut quoted = false;
- let mut start = 0;
- let mut graphemes = UnicodeSegmentation::grapheme_indices(command, true);
-
- while let Some((i, c)) = graphemes.next() {
- let current = i;
- match c {
- "\"" => {
- if command[start..current] != *"\"" {
- quoted = !quoted;
- }
- }
- "'" => {
- if command[start..current] != *"'" {
- quoted = !quoted;
- }
- }
- "\\" => if graphemes.next().is_some() {},
- "|" => {
- if !quoted {
- if command[start..].starts_with('|') {
- start += 1;
- }
- result.push(&command[start..current]);
- start = current;
- }
- }
- _ => {}
- }
- }
- if command[start..].starts_with('|') {
- start += 1;
- }
- result.push(&command[start..]);
- result
-}
-
-fn compute_stats(
- settings: &Settings,
- history: &[History],
- count: usize,
- ngram_size: usize,
-) -> (usize, usize) {
- let mut commands = HashSet::<&str>::with_capacity(history.len());
- let mut total_unignored = 0;
- let mut prefixes = HashMap::<Vec<&str>, usize>::with_capacity(history.len());
- for i in history {
- // just in case it somehow has a leading tab or space or something (legacy atuin didn't ignore space prefixes)
- let command = i.command.trim();
- let prefix = interesting_command(settings, command);
-
- if settings.stats.ignored_commands.iter().any(|c| c == prefix) {
- continue;
- }
-
- total_unignored += 1;
- commands.insert(command);
-
- split_at_pipe(i.command.trim())
- .iter()
- .map(|l| {
- let command = l.trim();
- commands.insert(command);
- command
- })
- .collect::<Vec<_>>()
- .windows(ngram_size)
- .for_each(|w| {
- *prefixes
- .entry(w.iter().map(|c| interesting_command(settings, c)).collect())
- .or_default() += 1;
- });
- }
-
- let unique = commands.len();
- let mut top = prefixes.into_iter().collect::<Vec<_>>();
- top.sort_unstable_by_key(|x| std::cmp::Reverse(x.1));
- top.truncate(count);
- if top.is_empty() {
- println!("No commands found");
- return (0, 0);
- }
-
- let max = top.iter().map(|x| x.1).max().unwrap();
- let num_pad = max.ilog10() as usize + 1;
-
- // Find the length of the longest command name for each column
- let column_widths = top
- .iter()
- .map(|(commands, _)| commands.iter().map(|c| c.len()).collect::<Vec<usize>>())
- .fold(vec![0; ngram_size], |acc, item| {
- acc.iter()
- .zip(item.iter())
- .map(|(a, i)| *std::cmp::max(a, i))
- .collect()
- });
-
- for (command, count) in top {
- let gray = SetForegroundColor(Color::Grey);
- let bold = SetAttribute(crossterm::style::Attribute::Bold);
-
- let in_ten = 10 * count / max;
- print!("[");
- print!("{}", SetForegroundColor(Color::Red));
- for i in 0..in_ten {
- if i == 2 {
- print!("{}", SetForegroundColor(Color::Yellow));
- }
- if i == 5 {
- print!("{}", SetForegroundColor(Color::Green));
- }
- print!("▮");
- }
- for _ in in_ten..10 {
- print!(" ");
- }
-
- let formatted_command = command
- .iter()
- .zip(column_widths.iter())
- .map(|(cmd, width)| format!("{cmd:width$}"))
- .collect::<Vec<_>>()
- .join(" | ");
-
- println!("{ResetColor}] {gray}{count:num_pad$}{ResetColor} {bold}{formatted_command}{ResetColor}");
- }
- println!("Total commands: {total_unignored}");
- println!("Unique commands: {unique}");
-
- (total_unignored, unique)
-}
-
impl Cmd {
pub async fn run(&self, db: &impl Database, settings: &Settings) -> Result<()> {
let context = current_context();
@@ -197,241 +60,13 @@ impl Cmd {
let end = start + Duration::days(1);
db.range(start, end).await?
};
- compute_stats(settings, &history, self.count, self.ngram_size);
- Ok(())
- }
-}
-fn first_non_whitespace(s: &str) -> Option<usize> {
- s.char_indices()
- // find the first non whitespace char
- .find(|(_, c)| !c.is_ascii_whitespace())
- // return the index of that char
- .map(|(i, _)| i)
-}
+ let stats = compute(settings, &history, self.count, self.ngram_size);
-fn first_whitespace(s: &str) -> usize {
- s.char_indices()
- // find the first whitespace char
- .find(|(_, c)| c.is_ascii_whitespace())
- // return the index of that char, (or the max length of the string)
- .map_or(s.len(), |(i, _)| i)
-}
-
-fn interesting_command<'a>(settings: &Settings, mut command: &'a str) -> &'a str {
- // Sort by length so that we match the longest prefix first
- let mut common_prefix = settings.stats.common_prefix.clone();
- common_prefix.sort_by_key(|b| std::cmp::Reverse(b.len()));
-
- // Trim off the common prefix, if it exists
- for p in &common_prefix {
- if command.starts_with(p) {
- let i = p.len();
- let prefix = &command[..i];
- command = command[i..].trim_start();
- if command.is_empty() {
- // no commands following, just use the prefix
- return prefix;
- }
- break;
+ if let Some(stats) = stats {
+ pretty_print(stats, self.ngram_size);
}
- }
- // Sort the common_subcommands by length so that we match the longest subcommand first
- let mut common_subcommands = settings.stats.common_subcommands.clone();
- common_subcommands.sort_by_key(|b| std::cmp::Reverse(b.len()));
-
- // Check for a common subcommand
- for p in &common_subcommands {
- if command.starts_with(p) {
- // if the subcommand is the same length as the command, then we just use the subcommand
- if p.len() == command.len() {
- return command;
- }
- // otherwise we need to use the subcommand + the next word
- let non_whitespace = first_non_whitespace(&command[p.len()..]).unwrap_or(0);
- let j =
- p.len() + non_whitespace + first_whitespace(&command[p.len() + non_whitespace..]);
- return &command[..j];
- }
- }
- // Return the first word if there is no subcommand
- &command[..first_whitespace(command)]
-}
-
-#[cfg(test)]
-mod tests {
- use atuin_client::history::History;
- use atuin_client::settings::Settings;
- use time::OffsetDateTime;
-
- use super::compute_stats;
- use super::{interesting_command, split_at_pipe};
-
- #[test]
- fn ignored_commands() {
- let mut settings = Settings::utc();
- settings.stats.ignored_commands.push("cd".to_string());
-
- let history = [
- History::import()
- .timestamp(OffsetDateTime::now_utc())
- .command("cd foo")
- .build()
- .into(),
- History::import()
- .timestamp(OffsetDateTime::now_utc())
- .command("cargo build stuff")
- .build()
- .into(),
- ];
-
- let (total, unique) = compute_stats(&settings, &history, 10, 1);
- assert_eq!(total, 1);
- assert_eq!(unique, 1);
- }
-
- #[test]
- fn interesting_commands() {
- let settings = Settings::utc();
-
- assert_eq!(interesting_command(&settings, "cargo"), "cargo");
- assert_eq!(
- interesting_command(&settings, "cargo build foo bar"),
- "cargo build"
- );
- assert_eq!(
- interesting_command(&settings, "sudo cargo build foo bar"),
- "cargo build"
- );
- assert_eq!(interesting_command(&settings, "sudo"), "sudo");
- }
-
- // Test with spaces in the common_prefix
- #[test]
- fn interesting_commands_spaces() {
- let mut settings = Settings::utc();
- settings.stats.common_prefix.push("sudo test".to_string());
-
- assert_eq!(interesting_command(&settings, "sudo test"), "sudo test");
- assert_eq!(interesting_command(&settings, "sudo test "), "sudo test");
- assert_eq!(interesting_command(&settings, "sudo test foo bar"), "foo");
- assert_eq!(
- interesting_command(&settings, "sudo test foo bar"),
- "foo"
- );
-
- // Works with a common_subcommand as well
- assert_eq!(
- interesting_command(&settings, "sudo test cargo build foo bar"),
- "cargo build"
- );
-
- // We still match on just the sudo prefix
- assert_eq!(interesting_command(&settings, "sudo"), "sudo");
- assert_eq!(interesting_command(&settings, "sudo foo"), "foo");
- }
-
- // Test with spaces in the common_subcommand
- #[test]
- fn interesting_commands_spaces_subcommand() {
- let mut settings = Settings::utc();
- settings
- .stats
- .common_subcommands
- .push("cargo build".to_string());
-
- assert_eq!(interesting_command(&settings, "cargo build"), "cargo build");
- assert_eq!(
- interesting_command(&settings, "cargo build "),
- "cargo build"
- );
- assert_eq!(
- interesting_command(&settings, "cargo build foo bar"),
- "cargo build foo"
- );
-
- // Works with a common_prefix as well
- assert_eq!(
- interesting_command(&settings, "sudo cargo build foo bar"),
- "cargo build foo"
- );
-
- // We still match on just cargo as a subcommand
- assert_eq!(interesting_command(&settings, "cargo"), "cargo");
- assert_eq!(interesting_command(&settings, "cargo foo"), "cargo foo");
- }
-
- // Test with spaces in the common_prefix and common_subcommand
- #[test]
- fn interesting_commands_spaces_both() {
- let mut settings = Settings::utc();
- settings.stats.common_prefix.push("sudo test".to_string());
- settings
- .stats
- .common_subcommands
- .push("cargo build".to_string());
-
- assert_eq!(
- interesting_command(&settings, "sudo test cargo build"),
- "cargo build"
- );
- assert_eq!(
- interesting_command(&settings, "sudo test cargo build"),
- "cargo build"
- );
- assert_eq!(
- interesting_command(&settings, "sudo test cargo build "),
- "cargo build"
- );
- assert_eq!(
- interesting_command(&settings, "sudo test cargo build foo bar"),
- "cargo build foo"
- );
- }
-
- #[test]
- fn split_simple() {
- assert_eq!(split_at_pipe("fd | rg"), ["fd ", " rg"]);
- }
-
- #[test]
- fn split_multi() {
- assert_eq!(
- split_at_pipe("kubectl | jq | rg"),
- ["kubectl ", " jq ", " rg"]
- );
- }
-
- #[test]
- fn split_simple_quoted() {
- assert_eq!(
- split_at_pipe("foo | bar 'baz {} | quux' | xyzzy"),
- ["foo ", " bar 'baz {} | quux' ", " xyzzy"]
- );
- }
-
- #[test]
- fn split_multi_quoted() {
- assert_eq!(
- split_at_pipe("foo | bar 'baz \"{}\" | quux' | xyzzy"),
- ["foo ", " bar 'baz \"{}\" | quux' ", " xyzzy"]
- );
- }
-
- #[test]
- fn escaped_pipes() {
- assert_eq!(
- split_at_pipe("foo | bar baz \\| quux"),
- ["foo ", " bar baz \\| quux"]
- );
- }
-
- #[test]
- fn emoji() {
- assert_eq!(
- split_at_pipe("git commit -m \"🚀\""),
- ["git commit -m \"🚀\""]
- );
+ Ok(())
}
}