aboutsummaryrefslogtreecommitdiffstats
path: root/crates
diff options
context:
space:
mode:
authorMichelle Tilley <michelle@michelletilley.net>2025-04-01 07:39:28 -0700
committerGitHub <noreply@github.com>2025-04-01 15:39:28 +0100
commit6072241422f4a8b44e1d9c3e4f8fead408064cee (patch)
tree4cc05645854bc8bdc105faa779ac7ec6d5825563 /crates
parentfeat(kv): Add support for 'atuin kv delete' (#2660) (diff)
downloadatuin-6072241422f4a8b44e1d9c3e4f8fead408064cee.zip
fix(stats): Ignore leading environment variables when calculating stats (#2659)
* Ignore leading environment variables when calculating stats * There's always an extra println * Make clippy happy * We don't actually need the tokens
Diffstat (limited to 'crates')
-rw-r--r--crates/atuin-history/src/stats.rs122
1 files changed, 119 insertions, 3 deletions
diff --git a/crates/atuin-history/src/stats.rs b/crates/atuin-history/src/stats.rs
index bbcf7f9b..5e8d59da 100644
--- a/crates/atuin-history/src/stats.rs
+++ b/crates/atuin-history/src/stats.rs
@@ -109,6 +109,65 @@ fn split_at_pipe(command: &str) -> Vec<&str> {
result
}
+fn strip_leading_env_vars(command: &str) -> &str {
+ // fast path: no equals sign, no environment variable
+ if !command.contains('=') {
+ return command;
+ }
+
+ let mut in_token = false;
+ let mut token_start_pos = 0;
+ let mut in_single_quotes = false;
+ let mut in_double_quotes = false;
+ let mut escape_next = false;
+ let mut has_equals_outside_quotes = false;
+
+ for (i, g) in UnicodeSegmentation::grapheme_indices(command, true) {
+ if escape_next {
+ escape_next = false;
+ continue;
+ }
+
+ if !in_token {
+ token_start_pos = i;
+ }
+
+ match g {
+ "\\" => {
+ escape_next = true;
+ in_token = true;
+ }
+ "'" if !in_double_quotes => {
+ in_single_quotes = !in_single_quotes;
+ in_token = true;
+ }
+ "\"" if !in_single_quotes => {
+ in_double_quotes = !in_double_quotes;
+ in_token = true;
+ }
+ "=" if !in_single_quotes && !in_double_quotes => {
+ has_equals_outside_quotes = true;
+ in_token = true;
+ }
+ " " | "\t" if !in_single_quotes && !in_double_quotes => {
+ if in_token {
+ if !has_equals_outside_quotes {
+ // if we're not in an env var, we can break early
+ break;
+ }
+ in_token = false;
+ has_equals_outside_quotes = false;
+ }
+ }
+ _ => {
+ in_token = true;
+ }
+ }
+ }
+
+ command[token_start_pos..].trim()
+}
+
pub fn pretty_print(stats: Stats, ngram_size: usize, theme: &Theme) {
let max = stats.top.iter().map(|x| x.1).max().unwrap();
let num_pad = max.ilog10() as usize + 1;
@@ -198,7 +257,7 @@ pub fn compute(
for i in history {
// just in case it somehow has a leading tab or space or something (legacy atuin didn't ignore space prefixes)
- let command = i.command.trim();
+ let command = strip_leading_env_vars(i.command.trim());
let prefix = interesting_command(settings, command);
if settings.stats.ignored_commands.iter().any(|c| c == prefix) {
@@ -208,7 +267,7 @@ pub fn compute(
total_unignored += 1;
commands.insert(command);
- split_at_pipe(i.command.trim())
+ split_at_pipe(command)
.iter()
.map(|l| {
let command = l.trim();
@@ -251,7 +310,22 @@ mod tests {
use time::OffsetDateTime;
use super::compute;
- use super::{interesting_command, split_at_pipe};
+ use super::{interesting_command, split_at_pipe, strip_leading_env_vars};
+
+ #[test]
+ fn ignored_env_vars() {
+ let settings = Settings::utc();
+
+ let history: History = History::capture()
+ .timestamp(time::OffsetDateTime::now_utc())
+ .command("FOO='BAR=🚀' echo foo")
+ .cwd("/")
+ .build()
+ .into();
+
+ let stats = compute(&settings, &[history], 10, 1).expect("failed to compute stats");
+ assert_eq!(stats.top.get(0).unwrap().0, vec!["echo"]);
+ }
#[test]
fn ignored_commands() {
@@ -435,4 +509,46 @@ mod tests {
[" ", " sed 's/[0-9a-f]//g'"]
);
}
+
+ #[test]
+ fn strip_leading_env_vars_simple() {
+ assert_eq!(
+ strip_leading_env_vars("FOO=bar BAZ=quux echo foo"),
+ "echo foo"
+ );
+ }
+
+ #[test]
+ fn strip_leading_env_vars_quoted_single() {
+ assert_eq!(strip_leading_env_vars("FOO='BAR=baz' echo foo"), "echo foo");
+ }
+
+ #[test]
+ fn strip_leading_env_vars_quoted_double() {
+ assert_eq!(
+ strip_leading_env_vars("FOO=\"BAR=baz\" echo foo"),
+ "echo foo"
+ );
+ }
+
+ #[test]
+ fn strip_leading_env_vars_quoted_single_and_double() {
+ assert_eq!(
+ strip_leading_env_vars("FOO='BAR=\"baz\"' echo foo \"BAR=quux\""),
+ "echo foo \"BAR=quux\""
+ );
+ }
+
+ #[test]
+ fn strip_leading_env_vars_emojis() {
+ assert_eq!(
+ strip_leading_env_vars("FOO='BAR=🚀' echo foo \"BAR=quux\" foo"),
+ "echo foo \"BAR=quux\" foo"
+ );
+ }
+
+ #[test]
+ fn strip_leading_env_vars_name_same_as_command() {
+ assert_eq!(strip_leading_env_vars("FOO='bar' bar baz"), "bar baz");
+ }
}