From 95cc472037fcb3207b510e67f1a44af4e2a2cae9 Mon Sep 17 00:00:00 2001 From: Ellie Huxtable Date: Thu, 18 Apr 2024 16:41:28 +0100 Subject: chore: move crates into crates/ dir (#1958) I'd like to tidy up the root a little, and it's nice to have all the rust crates in one place --- crates/atuin-client/src/import/bash.rs | 218 +++++++++++++++++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 crates/atuin-client/src/import/bash.rs (limited to 'crates/atuin-client/src/import/bash.rs') diff --git a/crates/atuin-client/src/import/bash.rs b/crates/atuin-client/src/import/bash.rs new file mode 100644 index 00000000..ade1f751 --- /dev/null +++ b/crates/atuin-client/src/import/bash.rs @@ -0,0 +1,218 @@ +use std::{path::PathBuf, str}; + +use async_trait::async_trait; +use directories::UserDirs; +use eyre::{eyre, Result}; +use itertools::Itertools; +use time::{Duration, OffsetDateTime}; + +use super::{get_histpath, unix_byte_lines, Importer, Loader}; +use crate::history::History; +use crate::import::read_to_end; + +#[derive(Debug)] +pub struct Bash { + bytes: Vec, +} + +fn default_histpath() -> Result { + let user_dirs = UserDirs::new().ok_or_else(|| eyre!("could not find user directories"))?; + let home_dir = user_dirs.home_dir(); + + Ok(home_dir.join(".bash_history")) +} + +#[async_trait] +impl Importer for Bash { + const NAME: &'static str = "bash"; + + async fn new() -> Result { + let bytes = read_to_end(get_histpath(default_histpath)?)?; + Ok(Self { bytes }) + } + + async fn entries(&mut self) -> Result { + let count = unix_byte_lines(&self.bytes) + .map(LineType::from) + .filter(|line| matches!(line, LineType::Command(_))) + .count(); + Ok(count) + } + + async fn load(self, h: &mut impl Loader) -> Result<()> { + let lines = unix_byte_lines(&self.bytes) + .map(LineType::from) + .filter(|line| !matches!(line, LineType::NotUtf8)) // invalid utf8 are ignored + .collect_vec(); + + let (commands_before_first_timestamp, first_timestamp) = lines + .iter() + .enumerate() + .find_map(|(i, line)| match line { + LineType::Timestamp(t) => Some((i, *t)), + _ => None, + }) + // if no known timestamps, use now as base + .unwrap_or((lines.len(), OffsetDateTime::now_utc())); + + // if no timestamp is recorded, then use this increment to set an arbitrary timestamp + // to preserve ordering + // this increment is deliberately very small to prevent particularly fast fingers + // causing ordering issues; it also helps in handling the "here document" syntax, + // where several lines are recorded in succession without individual timestamps + let timestamp_increment = Duration::milliseconds(1); + + // make sure there is a minimum amount of time before the first known timestamp + // to fit all commands, given the default increment + let mut next_timestamp = + first_timestamp - timestamp_increment * commands_before_first_timestamp as i32; + + for line in lines.into_iter() { + match line { + LineType::NotUtf8 => unreachable!(), // already filtered + LineType::Empty => {} // do nothing + LineType::Timestamp(t) => { + if t < next_timestamp { + warn!("Time reversal detected in Bash history! Commands may be ordered incorrectly."); + } + next_timestamp = t; + } + LineType::Command(c) => { + let imported = History::import().timestamp(next_timestamp).command(c); + + h.push(imported.build().into()).await?; + next_timestamp += timestamp_increment; + } + } + } + + Ok(()) + } +} + +#[derive(Debug, Clone)] +enum LineType<'a> { + NotUtf8, + /// Can happen when using the "here document" syntax. + Empty, + /// A timestamp line start with a '#', followed immediately by an integer + /// that represents seconds since UNIX epoch. + Timestamp(OffsetDateTime), + /// Anything else. + Command(&'a str), +} +impl<'a> From<&'a [u8]> for LineType<'a> { + fn from(bytes: &'a [u8]) -> Self { + let Ok(line) = str::from_utf8(bytes) else { + return LineType::NotUtf8; + }; + if line.is_empty() { + return LineType::Empty; + } + let parsed = match try_parse_line_as_timestamp(line) { + Some(time) => LineType::Timestamp(time), + None => LineType::Command(line), + }; + parsed + } +} + +fn try_parse_line_as_timestamp(line: &str) -> Option { + let seconds = line.strip_prefix('#')?.parse().ok()?; + OffsetDateTime::from_unix_timestamp(seconds).ok() +} + +#[cfg(test)] +mod test { + use std::cmp::Ordering; + + use itertools::{assert_equal, Itertools}; + + use crate::import::{tests::TestLoader, Importer}; + + use super::Bash; + + #[tokio::test] + async fn parse_no_timestamps() { + let bytes = r"cargo install atuin +cargo update +cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷ +" + .as_bytes() + .to_owned(); + + let mut bash = Bash { bytes }; + assert_eq!(bash.entries().await.unwrap(), 3); + + let mut loader = TestLoader::default(); + bash.load(&mut loader).await.unwrap(); + + assert_equal( + loader.buf.iter().map(|h| h.command.as_str()), + [ + "cargo install atuin", + "cargo update", + "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", + ], + ); + assert!(is_strictly_sorted(loader.buf.iter().map(|h| h.timestamp))) + } + + #[tokio::test] + async fn parse_with_timestamps() { + let bytes = b"#1672918999 +git reset +#1672919006 +git clean -dxf +#1672919020 +cd ../ +" + .to_vec(); + + let mut bash = Bash { bytes }; + assert_eq!(bash.entries().await.unwrap(), 3); + + let mut loader = TestLoader::default(); + bash.load(&mut loader).await.unwrap(); + + assert_equal( + loader.buf.iter().map(|h| h.command.as_str()), + ["git reset", "git clean -dxf", "cd ../"], + ); + assert_equal( + loader.buf.iter().map(|h| h.timestamp.unix_timestamp()), + [1672918999, 1672919006, 1672919020], + ) + } + + #[tokio::test] + async fn parse_with_partial_timestamps() { + let bytes = b"git reset +#1672919006 +git clean -dxf +cd ../ +" + .to_vec(); + + let mut bash = Bash { bytes }; + assert_eq!(bash.entries().await.unwrap(), 3); + + let mut loader = TestLoader::default(); + bash.load(&mut loader).await.unwrap(); + + assert_equal( + loader.buf.iter().map(|h| h.command.as_str()), + ["git reset", "git clean -dxf", "cd ../"], + ); + assert!(is_strictly_sorted(loader.buf.iter().map(|h| h.timestamp))) + } + + fn is_strictly_sorted(iter: impl IntoIterator) -> bool + where + T: Clone + PartialOrd, + { + iter.into_iter() + .tuple_windows() + .all(|(a, b)| matches!(a.partial_cmp(&b), Some(Ordering::Less))) + } +} -- cgit v1.3.1