diff options
Diffstat (limited to 'crates/atuin-client/src/import')
| -rw-r--r-- | crates/atuin-client/src/import/bash.rs | 218 | ||||
| -rw-r--r-- | crates/atuin-client/src/import/fish.rs | 179 | ||||
| -rw-r--r-- | crates/atuin-client/src/import/mod.rs | 111 | ||||
| -rw-r--r-- | crates/atuin-client/src/import/nu.rs | 67 | ||||
| -rw-r--r-- | crates/atuin-client/src/import/nu_histdb.rs | 113 | ||||
| -rw-r--r-- | crates/atuin-client/src/import/resh.rs | 140 | ||||
| -rw-r--r-- | crates/atuin-client/src/import/xonsh.rs | 233 | ||||
| -rw-r--r-- | crates/atuin-client/src/import/xonsh_sqlite.rs | 217 | ||||
| -rw-r--r-- | crates/atuin-client/src/import/zsh.rs | 229 | ||||
| -rw-r--r-- | crates/atuin-client/src/import/zsh_histdb.rs | 247 |
10 files changed, 1754 insertions, 0 deletions
diff --git a/crates/atuin-client/src/import/bash.rs b/crates/atuin-client/src/import/bash.rs new file mode 100644 index 00000000..ade1f751 --- /dev/null +++ b/crates/atuin-client/src/import/bash.rs @@ -0,0 +1,218 @@ +use std::{path::PathBuf, str}; + +use async_trait::async_trait; +use directories::UserDirs; +use eyre::{eyre, Result}; +use itertools::Itertools; +use time::{Duration, OffsetDateTime}; + +use super::{get_histpath, unix_byte_lines, Importer, Loader}; +use crate::history::History; +use crate::import::read_to_end; + +#[derive(Debug)] +pub struct Bash { + bytes: Vec<u8>, +} + +fn default_histpath() -> Result<PathBuf> { + let user_dirs = UserDirs::new().ok_or_else(|| eyre!("could not find user directories"))?; + let home_dir = user_dirs.home_dir(); + + Ok(home_dir.join(".bash_history")) +} + +#[async_trait] +impl Importer for Bash { + const NAME: &'static str = "bash"; + + async fn new() -> Result<Self> { + let bytes = read_to_end(get_histpath(default_histpath)?)?; + Ok(Self { bytes }) + } + + async fn entries(&mut self) -> Result<usize> { + let count = unix_byte_lines(&self.bytes) + .map(LineType::from) + .filter(|line| matches!(line, LineType::Command(_))) + .count(); + Ok(count) + } + + async fn load(self, h: &mut impl Loader) -> Result<()> { + let lines = unix_byte_lines(&self.bytes) + .map(LineType::from) + .filter(|line| !matches!(line, LineType::NotUtf8)) // invalid utf8 are ignored + .collect_vec(); + + let (commands_before_first_timestamp, first_timestamp) = lines + .iter() + .enumerate() + .find_map(|(i, line)| match line { + LineType::Timestamp(t) => Some((i, *t)), + _ => None, + }) + // if no known timestamps, use now as base + .unwrap_or((lines.len(), OffsetDateTime::now_utc())); + + // if no timestamp is recorded, then use this increment to set an arbitrary timestamp + // to preserve ordering + // this increment is deliberately very small to prevent particularly fast fingers + // causing ordering issues; it also helps in handling the "here document" syntax, + // where several lines are recorded in succession without individual timestamps + let timestamp_increment = Duration::milliseconds(1); + + // make sure there is a minimum amount of time before the first known timestamp + // to fit all commands, given the default increment + let mut next_timestamp = + first_timestamp - timestamp_increment * commands_before_first_timestamp as i32; + + for line in lines.into_iter() { + match line { + LineType::NotUtf8 => unreachable!(), // already filtered + LineType::Empty => {} // do nothing + LineType::Timestamp(t) => { + if t < next_timestamp { + warn!("Time reversal detected in Bash history! Commands may be ordered incorrectly."); + } + next_timestamp = t; + } + LineType::Command(c) => { + let imported = History::import().timestamp(next_timestamp).command(c); + + h.push(imported.build().into()).await?; + next_timestamp += timestamp_increment; + } + } + } + + Ok(()) + } +} + +#[derive(Debug, Clone)] +enum LineType<'a> { + NotUtf8, + /// Can happen when using the "here document" syntax. + Empty, + /// A timestamp line start with a '#', followed immediately by an integer + /// that represents seconds since UNIX epoch. + Timestamp(OffsetDateTime), + /// Anything else. + Command(&'a str), +} +impl<'a> From<&'a [u8]> for LineType<'a> { + fn from(bytes: &'a [u8]) -> Self { + let Ok(line) = str::from_utf8(bytes) else { + return LineType::NotUtf8; + }; + if line.is_empty() { + return LineType::Empty; + } + let parsed = match try_parse_line_as_timestamp(line) { + Some(time) => LineType::Timestamp(time), + None => LineType::Command(line), + }; + parsed + } +} + +fn try_parse_line_as_timestamp(line: &str) -> Option<OffsetDateTime> { + let seconds = line.strip_prefix('#')?.parse().ok()?; + OffsetDateTime::from_unix_timestamp(seconds).ok() +} + +#[cfg(test)] +mod test { + use std::cmp::Ordering; + + use itertools::{assert_equal, Itertools}; + + use crate::import::{tests::TestLoader, Importer}; + + use super::Bash; + + #[tokio::test] + async fn parse_no_timestamps() { + let bytes = r"cargo install atuin +cargo update +cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷ +" + .as_bytes() + .to_owned(); + + let mut bash = Bash { bytes }; + assert_eq!(bash.entries().await.unwrap(), 3); + + let mut loader = TestLoader::default(); + bash.load(&mut loader).await.unwrap(); + + assert_equal( + loader.buf.iter().map(|h| h.command.as_str()), + [ + "cargo install atuin", + "cargo update", + "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", + ], + ); + assert!(is_strictly_sorted(loader.buf.iter().map(|h| h.timestamp))) + } + + #[tokio::test] + async fn parse_with_timestamps() { + let bytes = b"#1672918999 +git reset +#1672919006 +git clean -dxf +#1672919020 +cd ../ +" + .to_vec(); + + let mut bash = Bash { bytes }; + assert_eq!(bash.entries().await.unwrap(), 3); + + let mut loader = TestLoader::default(); + bash.load(&mut loader).await.unwrap(); + + assert_equal( + loader.buf.iter().map(|h| h.command.as_str()), + ["git reset", "git clean -dxf", "cd ../"], + ); + assert_equal( + loader.buf.iter().map(|h| h.timestamp.unix_timestamp()), + [1672918999, 1672919006, 1672919020], + ) + } + + #[tokio::test] + async fn parse_with_partial_timestamps() { + let bytes = b"git reset +#1672919006 +git clean -dxf +cd ../ +" + .to_vec(); + + let mut bash = Bash { bytes }; + assert_eq!(bash.entries().await.unwrap(), 3); + + let mut loader = TestLoader::default(); + bash.load(&mut loader).await.unwrap(); + + assert_equal( + loader.buf.iter().map(|h| h.command.as_str()), + ["git reset", "git clean -dxf", "cd ../"], + ); + assert!(is_strictly_sorted(loader.buf.iter().map(|h| h.timestamp))) + } + + fn is_strictly_sorted<T>(iter: impl IntoIterator<Item = T>) -> bool + where + T: Clone + PartialOrd, + { + iter.into_iter() + .tuple_windows() + .all(|(a, b)| matches!(a.partial_cmp(&b), Some(Ordering::Less))) + } +} diff --git a/crates/atuin-client/src/import/fish.rs b/crates/atuin-client/src/import/fish.rs new file mode 100644 index 00000000..714b2d01 --- /dev/null +++ b/crates/atuin-client/src/import/fish.rs @@ -0,0 +1,179 @@ +// import old shell history! +// automatically hoover up all that we can find + +use std::path::PathBuf; + +use async_trait::async_trait; +use directories::BaseDirs; +use eyre::{eyre, Result}; +use time::OffsetDateTime; + +use super::{unix_byte_lines, Importer, Loader}; +use crate::history::History; +use crate::import::read_to_end; + +#[derive(Debug)] +pub struct Fish { + bytes: Vec<u8>, +} + +/// see https://fishshell.com/docs/current/interactive.html#searchable-command-history +fn default_histpath() -> Result<PathBuf> { + let base = BaseDirs::new().ok_or_else(|| eyre!("could not determine data directory"))?; + let data = std::env::var("XDG_DATA_HOME").map_or_else( + |_| base.home_dir().join(".local").join("share"), + PathBuf::from, + ); + + // fish supports multiple history sessions + // If `fish_history` var is missing, or set to `default`, use `fish` as the session + let session = std::env::var("fish_history").unwrap_or_else(|_| String::from("fish")); + let session = if session == "default" { + String::from("fish") + } else { + session + }; + + let mut histpath = data.join("fish"); + histpath.push(format!("{session}_history")); + + if histpath.exists() { + Ok(histpath) + } else { + Err(eyre!("Could not find history file.")) + } +} + +#[async_trait] +impl Importer for Fish { + const NAME: &'static str = "fish"; + + async fn new() -> Result<Self> { + let bytes = read_to_end(default_histpath()?)?; + Ok(Self { bytes }) + } + + async fn entries(&mut self) -> Result<usize> { + Ok(super::count_lines(&self.bytes)) + } + + async fn load(self, loader: &mut impl Loader) -> Result<()> { + let now = OffsetDateTime::now_utc(); + let mut time: Option<OffsetDateTime> = None; + let mut cmd: Option<String> = None; + + for b in unix_byte_lines(&self.bytes) { + let s = match std::str::from_utf8(b) { + Ok(s) => s, + Err(_) => continue, // we can skip past things like invalid utf8 + }; + + if let Some(c) = s.strip_prefix("- cmd: ") { + // first, we must deal with the prev cmd + if let Some(cmd) = cmd.take() { + let time = time.unwrap_or(now); + let entry = History::import().timestamp(time).command(cmd); + + loader.push(entry.build().into()).await?; + } + + // using raw strings to avoid needing escaping. + // replaces double backslashes with single backslashes + let c = c.replace(r"\\", r"\"); + // replaces escaped newlines + let c = c.replace(r"\n", "\n"); + // TODO: any other escape characters? + + cmd = Some(c); + } else if let Some(t) = s.strip_prefix(" when: ") { + // if t is not an int, just ignore this line + if let Ok(t) = t.parse::<i64>() { + time = Some(OffsetDateTime::from_unix_timestamp(t)?); + } + } else { + // ... ignore paths lines + } + } + + // we might have a trailing cmd + if let Some(cmd) = cmd.take() { + let time = time.unwrap_or(now); + let entry = History::import().timestamp(time).command(cmd); + + loader.push(entry.build().into()).await?; + } + + Ok(()) + } +} + +#[cfg(test)] +mod test { + + use crate::import::{tests::TestLoader, Importer}; + + use super::Fish; + + #[tokio::test] + async fn parse_complex() { + // complicated input with varying contents and escaped strings. + let bytes = r#"- cmd: history --help + when: 1639162832 +- cmd: cat ~/.bash_history + when: 1639162851 + paths: + - ~/.bash_history +- cmd: ls ~/.local/share/fish/fish_history + when: 1639162890 + paths: + - ~/.local/share/fish/fish_history +- cmd: cat ~/.local/share/fish/fish_history + when: 1639162893 + paths: + - ~/.local/share/fish/fish_history +ERROR +- CORRUPTED: ENTRY + CONTINUE: + - AS + - NORMAL +- cmd: echo "foo" \\\n'bar' baz + when: 1639162933 +- cmd: cat ~/.local/share/fish/fish_history + when: 1639162939 + paths: + - ~/.local/share/fish/fish_history +- cmd: echo "\\"" \\\\ "\\\\" + when: 1639163063 +- cmd: cat ~/.local/share/fish/fish_history + when: 1639163066 + paths: + - ~/.local/share/fish/fish_history +"# + .as_bytes() + .to_owned(); + + let fish = Fish { bytes }; + + let mut loader = TestLoader::default(); + fish.load(&mut loader).await.unwrap(); + let mut history = loader.buf.into_iter(); + + // simple wrapper for fish history entry + macro_rules! fishtory { + ($timestamp:expr, $command:expr) => { + let h = history.next().expect("missing entry in history"); + assert_eq!(h.command.as_str(), $command); + assert_eq!(h.timestamp.unix_timestamp(), $timestamp); + }; + } + + fishtory!(1639162832, "history --help"); + fishtory!(1639162851, "cat ~/.bash_history"); + fishtory!(1639162890, "ls ~/.local/share/fish/fish_history"); + fishtory!(1639162893, "cat ~/.local/share/fish/fish_history"); + fishtory!(1639162933, "echo \"foo\" \\\n'bar' baz"); + fishtory!(1639162939, "cat ~/.local/share/fish/fish_history"); + fishtory!(1639163063, r#"echo "\"" \\ "\\""#); + fishtory!(1639163066, "cat ~/.local/share/fish/fish_history"); + } +} diff --git a/crates/atuin-client/src/import/mod.rs b/crates/atuin-client/src/import/mod.rs new file mode 100644 index 00000000..c9d8c798 --- /dev/null +++ b/crates/atuin-client/src/import/mod.rs @@ -0,0 +1,111 @@ +use std::fs::File; +use std::io::Read; +use std::path::PathBuf; + +use async_trait::async_trait; +use eyre::{bail, Result}; +use memchr::Memchr; + +use crate::history::History; + +pub mod bash; +pub mod fish; +pub mod nu; +pub mod nu_histdb; +pub mod resh; +pub mod xonsh; +pub mod xonsh_sqlite; +pub mod zsh; +pub mod zsh_histdb; + +#[async_trait] +pub trait Importer: Sized { + const NAME: &'static str; + async fn new() -> Result<Self>; + async fn entries(&mut self) -> Result<usize>; + async fn load(self, loader: &mut impl Loader) -> Result<()>; +} + +#[async_trait] +pub trait Loader: Sync + Send { + async fn push(&mut self, hist: History) -> eyre::Result<()>; +} + +fn unix_byte_lines(input: &[u8]) -> impl Iterator<Item = &[u8]> { + UnixByteLines { + iter: memchr::memchr_iter(b'\n', input), + bytes: input, + i: 0, + } +} + +struct UnixByteLines<'a> { + iter: Memchr<'a>, + bytes: &'a [u8], + i: usize, +} + +impl<'a> Iterator for UnixByteLines<'a> { + type Item = &'a [u8]; + + fn next(&mut self) -> Option<Self::Item> { + let j = self.iter.next()?; + let out = &self.bytes[self.i..j]; + self.i = j + 1; + Some(out) + } + + fn count(self) -> usize + where + Self: Sized, + { + self.iter.count() + } +} + +fn count_lines(input: &[u8]) -> usize { + unix_byte_lines(input).count() +} + +fn get_histpath<D>(def: D) -> Result<PathBuf> +where + D: FnOnce() -> Result<PathBuf>, +{ + if let Ok(p) = std::env::var("HISTFILE") { + is_file(PathBuf::from(p)) + } else { + is_file(def()?) + } +} + +fn read_to_end(path: PathBuf) -> Result<Vec<u8>> { + let mut bytes = Vec::new(); + let mut f = File::open(path)?; + f.read_to_end(&mut bytes)?; + Ok(bytes) +} +fn is_file(p: PathBuf) -> Result<PathBuf> { + if p.is_file() { + Ok(p) + } else { + bail!("Could not find history file {:?}. Try setting $HISTFILE", p) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Default)] + pub struct TestLoader { + pub buf: Vec<History>, + } + + #[async_trait] + impl Loader for TestLoader { + async fn push(&mut self, hist: History) -> Result<()> { + self.buf.push(hist); + Ok(()) + } + } +} diff --git a/crates/atuin-client/src/import/nu.rs b/crates/atuin-client/src/import/nu.rs new file mode 100644 index 00000000..a45d83c5 --- /dev/null +++ b/crates/atuin-client/src/import/nu.rs @@ -0,0 +1,67 @@ +// import old shell history! +// automatically hoover up all that we can find + +use std::path::PathBuf; + +use async_trait::async_trait; +use directories::BaseDirs; +use eyre::{eyre, Result}; +use time::OffsetDateTime; + +use super::{unix_byte_lines, Importer, Loader}; +use crate::history::History; +use crate::import::read_to_end; + +#[derive(Debug)] +pub struct Nu { + bytes: Vec<u8>, +} + +fn get_histpath() -> Result<PathBuf> { + let base = BaseDirs::new().ok_or_else(|| eyre!("could not determine data directory"))?; + let config_dir = base.config_dir().join("nushell"); + + let histpath = config_dir.join("history.txt"); + if histpath.exists() { + Ok(histpath) + } else { + Err(eyre!("Could not find history file.")) + } +} + +#[async_trait] +impl Importer for Nu { + const NAME: &'static str = "nu"; + + async fn new() -> Result<Self> { + let bytes = read_to_end(get_histpath()?)?; + Ok(Self { bytes }) + } + + async fn entries(&mut self) -> Result<usize> { + Ok(super::count_lines(&self.bytes)) + } + + async fn load(self, h: &mut impl Loader) -> Result<()> { + let now = OffsetDateTime::now_utc(); + + let mut counter = 0; + for b in unix_byte_lines(&self.bytes) { + let s = match std::str::from_utf8(b) { + Ok(s) => s, + Err(_) => continue, // we can skip past things like invalid utf8 + }; + + let cmd: String = s.replace("<\\n>", "\n"); + + let offset = time::Duration::nanoseconds(counter); + counter += 1; + + let entry = History::import().timestamp(now - offset).command(cmd); + + h.push(entry.build().into()).await?; + } + + Ok(()) + } +} diff --git a/crates/atuin-client/src/import/nu_histdb.rs b/crates/atuin-client/src/import/nu_histdb.rs new file mode 100644 index 00000000..f0e8e95c --- /dev/null +++ b/crates/atuin-client/src/import/nu_histdb.rs @@ -0,0 +1,113 @@ +// import old shell history! +// automatically hoover up all that we can find + +use std::path::PathBuf; + +use async_trait::async_trait; +use directories::BaseDirs; +use eyre::{eyre, Result}; +use sqlx::{sqlite::SqlitePool, Pool}; +use time::{Duration, OffsetDateTime}; + +use super::Importer; +use crate::history::History; +use crate::import::Loader; + +#[derive(sqlx::FromRow, Debug)] +pub struct HistDbEntry { + pub id: i64, + pub command_line: Vec<u8>, + pub start_timestamp: i64, + pub session_id: i64, + pub hostname: Vec<u8>, + pub cwd: Vec<u8>, + pub duration_ms: i64, + pub exit_status: i64, + pub more_info: Vec<u8>, +} + +impl From<HistDbEntry> for History { + fn from(histdb_item: HistDbEntry) -> Self { + let ts_secs = histdb_item.start_timestamp / 1000; + let ts_ns = (histdb_item.start_timestamp % 1000) * 1_000_000; + let imported = History::import() + .timestamp( + OffsetDateTime::from_unix_timestamp(ts_secs).unwrap() + + Duration::nanoseconds(ts_ns), + ) + .command(String::from_utf8(histdb_item.command_line).unwrap()) + .cwd(String::from_utf8(histdb_item.cwd).unwrap()) + .exit(histdb_item.exit_status) + .duration(histdb_item.duration_ms) + .session(format!("{:x}", histdb_item.session_id)) + .hostname(String::from_utf8(histdb_item.hostname).unwrap()); + + imported.build().into() + } +} + +#[derive(Debug)] +pub struct NuHistDb { + histdb: Vec<HistDbEntry>, +} + +/// Read db at given file, return vector of entries. +async fn hist_from_db(dbpath: PathBuf) -> Result<Vec<HistDbEntry>> { + let pool = SqlitePool::connect(dbpath.to_str().unwrap()).await?; + hist_from_db_conn(pool).await +} + +async fn hist_from_db_conn(pool: Pool<sqlx::Sqlite>) -> Result<Vec<HistDbEntry>> { + let query = r#" + SELECT + id, command_line, start_timestamp, session_id, hostname, cwd, duration_ms, exit_status, + more_info + FROM history + ORDER BY start_timestamp + "#; + let histdb_vec: Vec<HistDbEntry> = sqlx::query_as::<_, HistDbEntry>(query) + .fetch_all(&pool) + .await?; + Ok(histdb_vec) +} + +impl NuHistDb { + pub fn histpath() -> Result<PathBuf> { + let base = BaseDirs::new().ok_or_else(|| eyre!("could not determine data directory"))?; + let config_dir = base.config_dir().join("nushell"); + + let histdb_path = config_dir.join("history.sqlite3"); + if histdb_path.exists() { + Ok(histdb_path) + } else { + Err(eyre!("Could not find history file.")) + } + } +} + +#[async_trait] +impl Importer for NuHistDb { + // Not sure how this is used + const NAME: &'static str = "nu_histdb"; + + /// Creates a new NuHistDb and populates the history based on the pre-populated data + /// structure. + async fn new() -> Result<Self> { + let dbpath = NuHistDb::histpath()?; + let histdb_entry_vec = hist_from_db(dbpath).await?; + Ok(Self { + histdb: histdb_entry_vec, + }) + } + + async fn entries(&mut self) -> Result<usize> { + Ok(self.histdb.len()) + } + + async fn load(self, h: &mut impl Loader) -> Result<()> { + for i in self.histdb { + h.push(i.into()).await?; + } + Ok(()) + } +} diff --git a/crates/atuin-client/src/import/resh.rs b/crates/atuin-client/src/import/resh.rs new file mode 100644 index 00000000..396d11fd --- /dev/null +++ b/crates/atuin-client/src/import/resh.rs @@ -0,0 +1,140 @@ +use std::path::PathBuf; + +use async_trait::async_trait; +use directories::UserDirs; +use eyre::{eyre, Result}; +use serde::Deserialize; + +use atuin_common::utils::uuid_v7; +use time::OffsetDateTime; + +use super::{get_histpath, unix_byte_lines, Importer, Loader}; +use crate::history::History; +use crate::import::read_to_end; + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct ReshEntry { + pub cmd_line: String, + pub exit_code: i64, + pub shell: String, + pub uname: String, + pub session_id: String, + pub home: String, + pub lang: String, + pub lc_all: String, + pub login: String, + pub pwd: String, + pub pwd_after: String, + pub shell_env: String, + pub term: String, + pub real_pwd: String, + pub real_pwd_after: String, + pub pid: i64, + pub session_pid: i64, + pub host: String, + pub hosttype: String, + pub ostype: String, + pub machtype: String, + pub shlvl: i64, + pub timezone_before: String, + pub timezone_after: String, + pub realtime_before: f64, + pub realtime_after: f64, + pub realtime_before_local: f64, + pub realtime_after_local: f64, + pub realtime_duration: f64, + pub realtime_since_session_start: f64, + pub realtime_since_boot: f64, + pub git_dir: String, + pub git_real_dir: String, + pub git_origin_remote: String, + pub git_dir_after: String, + pub git_real_dir_after: String, + pub git_origin_remote_after: String, + pub machine_id: String, + pub os_release_id: String, + pub os_release_version_id: String, + pub os_release_id_like: String, + pub os_release_name: String, + pub os_release_pretty_name: String, + pub resh_uuid: String, + pub resh_version: String, + pub resh_revision: String, + pub parts_merged: bool, + pub recalled: bool, + pub recall_last_cmd_line: String, + pub cols: String, + pub lines: String, +} + +#[derive(Debug)] +pub struct Resh { + bytes: Vec<u8>, +} + +fn default_histpath() -> Result<PathBuf> { + let user_dirs = UserDirs::new().ok_or_else(|| eyre!("could not find user directories"))?; + let home_dir = user_dirs.home_dir(); + + Ok(home_dir.join(".resh_history.json")) +} + +#[async_trait] +impl Importer for Resh { + const NAME: &'static str = "resh"; + + async fn new() -> Result<Self> { + let bytes = read_to_end(get_histpath(default_histpath)?)?; + Ok(Self { bytes }) + } + + async fn entries(&mut self) -> Result<usize> { + Ok(super::count_lines(&self.bytes)) + } + + async fn load(self, h: &mut impl Loader) -> Result<()> { + for b in unix_byte_lines(&self.bytes) { + let s = match std::str::from_utf8(b) { + Ok(s) => s, + Err(_) => continue, // we can skip past things like invalid utf8 + }; + let entry = match serde_json::from_str::<ReshEntry>(s) { + Ok(e) => e, + Err(_) => continue, // skip invalid json :shrug: + }; + + #[allow(clippy::cast_possible_truncation)] + #[allow(clippy::cast_sign_loss)] + let timestamp = { + let secs = entry.realtime_before.floor() as i64; + let nanosecs = (entry.realtime_before.fract() * 1_000_000_000_f64).round() as i64; + OffsetDateTime::from_unix_timestamp(secs)? + time::Duration::nanoseconds(nanosecs) + }; + #[allow(clippy::cast_possible_truncation)] + #[allow(clippy::cast_sign_loss)] + let duration = { + let secs = entry.realtime_after.floor() as i64; + let nanosecs = (entry.realtime_after.fract() * 1_000_000_000_f64).round() as i64; + let base = OffsetDateTime::from_unix_timestamp(secs)? + + time::Duration::nanoseconds(nanosecs); + let difference = base - timestamp; + difference.whole_nanoseconds() as i64 + }; + + let imported = History::import() + .command(entry.cmd_line) + .timestamp(timestamp) + .duration(duration) + .exit(entry.exit_code) + .cwd(entry.pwd) + .hostname(entry.host) + // CHECK: should we add uuid here? It's not set in the other importers + .session(uuid_v7().as_simple().to_string()); + + h.push(imported.build().into()).await?; + } + + Ok(()) + } +} diff --git a/crates/atuin-client/src/import/xonsh.rs b/crates/atuin-client/src/import/xonsh.rs new file mode 100644 index 00000000..19ce4cf6 --- /dev/null +++ b/crates/atuin-client/src/import/xonsh.rs @@ -0,0 +1,233 @@ +use std::env; +use std::fs::{self, File}; +use std::path::{Path, PathBuf}; + +use async_trait::async_trait; +use directories::BaseDirs; +use eyre::{eyre, Result}; +use serde::Deserialize; +use time::OffsetDateTime; +use uuid::timestamp::{context::NoContext, Timestamp}; +use uuid::Uuid; + +use super::{get_histpath, Importer, Loader}; +use crate::history::History; +use crate::utils::get_host_user; + +// Note: both HistoryFile and HistoryData have other keys present in the JSON, we don't +// care about them so we leave them unspecified so as to avoid deserializing unnecessarily. +#[derive(Debug, Deserialize)] +struct HistoryFile { + data: HistoryData, +} + +#[derive(Debug, Deserialize)] +struct HistoryData { + sessionid: String, + cmds: Vec<HistoryCmd>, +} + +#[derive(Debug, Deserialize)] +struct HistoryCmd { + cwd: String, + inp: String, + rtn: Option<i64>, + ts: (f64, f64), +} + +#[derive(Debug)] +pub struct Xonsh { + // history is stored as a bunch of json files, one per session + sessions: Vec<HistoryData>, + hostname: String, +} + +fn xonsh_hist_dir(xonsh_data_dir: Option<String>) -> Result<PathBuf> { + // if running within xonsh, this will be available + if let Some(d) = xonsh_data_dir { + let mut path = PathBuf::from(d); + path.push("history_json"); + return Ok(path); + } + + // otherwise, fall back to default + let base = BaseDirs::new().ok_or_else(|| eyre!("Could not determine home directory"))?; + + let hist_dir = base.data_dir().join("xonsh/history_json"); + if hist_dir.exists() || cfg!(test) { + Ok(hist_dir) + } else { + Err(eyre!("Could not find xonsh history files")) + } +} + +fn load_sessions(hist_dir: &Path) -> Result<Vec<HistoryData>> { + let mut sessions = vec![]; + for entry in fs::read_dir(hist_dir)? { + let p = entry?.path(); + let ext = p.extension().and_then(|e| e.to_str()); + if p.is_file() && ext == Some("json") { + if let Some(data) = load_session(&p)? { + sessions.push(data); + } + } + } + Ok(sessions) +} + +fn load_session(path: &Path) -> Result<Option<HistoryData>> { + let file = File::open(path)?; + // empty files are not valid json, so we can't deserialize them + if file.metadata()?.len() == 0 { + return Ok(None); + } + + let mut hist_file: HistoryFile = serde_json::from_reader(file)?; + + // if there are commands in this session, replace the existing UUIDv4 + // with a UUIDv7 generated from the timestamp of the first command + if let Some(cmd) = hist_file.data.cmds.first() { + let seconds = cmd.ts.0.trunc() as u64; + let nanos = (cmd.ts.0.fract() * 1_000_000_000_f64) as u32; + let ts = Timestamp::from_unix(NoContext, seconds, nanos); + hist_file.data.sessionid = Uuid::new_v7(ts).to_string(); + } + Ok(Some(hist_file.data)) +} + +#[async_trait] +impl Importer for Xonsh { + const NAME: &'static str = "xonsh"; + + async fn new() -> Result<Self> { + // wrap xonsh-specific path resolver in general one so that it respects $HISTPATH + let xonsh_data_dir = env::var("XONSH_DATA_DIR").ok(); + let hist_dir = get_histpath(|| xonsh_hist_dir(xonsh_data_dir))?; + let sessions = load_sessions(&hist_dir)?; + let hostname = get_host_user(); + Ok(Xonsh { sessions, hostname }) + } + + async fn entries(&mut self) -> Result<usize> { + let total = self.sessions.iter().map(|s| s.cmds.len()).sum(); + Ok(total) + } + + async fn load(self, loader: &mut impl Loader) -> Result<()> { + for session in self.sessions { + for cmd in session.cmds { + let (start, end) = cmd.ts; + let ts_nanos = (start * 1_000_000_000_f64) as i128; + let timestamp = OffsetDateTime::from_unix_timestamp_nanos(ts_nanos)?; + + let duration = (end - start) * 1_000_000_000_f64; + + match cmd.rtn { + Some(exit) => { + let entry = History::import() + .timestamp(timestamp) + .duration(duration.trunc() as i64) + .exit(exit) + .command(cmd.inp.trim()) + .cwd(cmd.cwd) + .session(session.sessionid.clone()) + .hostname(self.hostname.clone()); + loader.push(entry.build().into()).await?; + } + None => { + let entry = History::import() + .timestamp(timestamp) + .duration(duration.trunc() as i64) + .command(cmd.inp.trim()) + .cwd(cmd.cwd) + .session(session.sessionid.clone()) + .hostname(self.hostname.clone()); + loader.push(entry.build().into()).await?; + } + } + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use time::macros::datetime; + + use super::*; + + use crate::history::History; + use crate::import::tests::TestLoader; + + #[test] + fn test_hist_dir_xonsh() { + let hist_dir = xonsh_hist_dir(Some("/home/user/xonsh_data".to_string())).unwrap(); + assert_eq!( + hist_dir, + PathBuf::from("/home/user/xonsh_data/history_json") + ); + } + + #[tokio::test] + async fn test_import() { + let dir = PathBuf::from("tests/data/xonsh"); + let sessions = load_sessions(&dir).unwrap(); + let hostname = "box:user".to_string(); + let xonsh = Xonsh { sessions, hostname }; + + let mut loader = TestLoader::default(); + xonsh.load(&mut loader).await.unwrap(); + // order in buf will depend on filenames, so sort by timestamp for consistency + loader.buf.sort_by_key(|h| h.timestamp); + for (actual, expected) in loader.buf.iter().zip(expected_hist_entries().iter()) { + assert_eq!(actual.timestamp, expected.timestamp); + assert_eq!(actual.command, expected.command); + assert_eq!(actual.cwd, expected.cwd); + assert_eq!(actual.exit, expected.exit); + assert_eq!(actual.duration, expected.duration); + assert_eq!(actual.hostname, expected.hostname); + } + } + + fn expected_hist_entries() -> [History; 4] { + [ + History::import() + .timestamp(datetime!(2024-02-6 04:17:59.478272256 +00:00:00)) + .command("echo hello world!".to_string()) + .cwd("/home/user/Documents/code/atuin".to_string()) + .exit(0) + .duration(4651069) + .hostname("box:user".to_string()) + .build() + .into(), + History::import() + .timestamp(datetime!(2024-02-06 04:18:01.70632832 +00:00:00)) + .command("ls -l".to_string()) + .cwd("/home/user/Documents/code/atuin".to_string()) + .exit(0) + .duration(21288633) + .hostname("box:user".to_string()) + .build() + .into(), + History::import() + .timestamp(datetime!(2024-02-06 17:41:31.142515968 +00:00:00)) + .command("false".to_string()) + .cwd("/home/user/Documents/code/atuin/atuin-client".to_string()) + .exit(1) + .duration(10269403) + .hostname("box:user".to_string()) + .build() + .into(), + History::import() + .timestamp(datetime!(2024-02-06 17:41:32.271584 +00:00:00)) + .command("exit".to_string()) + .cwd("/home/user/Documents/code/atuin/atuin-client".to_string()) + .exit(0) + .duration(4259347) + .hostname("box:user".to_string()) + .build() + .into(), + ] + } +} diff --git a/crates/atuin-client/src/import/xonsh_sqlite.rs b/crates/atuin-client/src/import/xonsh_sqlite.rs new file mode 100644 index 00000000..2817dc63 --- /dev/null +++ b/crates/atuin-client/src/import/xonsh_sqlite.rs @@ -0,0 +1,217 @@ +use std::env; +use std::path::PathBuf; + +use async_trait::async_trait; +use directories::BaseDirs; +use eyre::{eyre, Result}; +use futures::TryStreamExt; +use sqlx::{sqlite::SqlitePool, FromRow, Row}; +use time::OffsetDateTime; +use uuid::timestamp::{context::NoContext, Timestamp}; +use uuid::Uuid; + +use super::{get_histpath, Importer, Loader}; +use crate::history::History; +use crate::utils::get_host_user; + +#[derive(Debug, FromRow)] +struct HistDbEntry { + inp: String, + rtn: Option<i64>, + tsb: f64, + tse: f64, + cwd: String, + session_start: f64, +} + +impl HistDbEntry { + fn into_hist_with_hostname(self, hostname: String) -> History { + let ts_nanos = (self.tsb * 1_000_000_000_f64) as i128; + let timestamp = OffsetDateTime::from_unix_timestamp_nanos(ts_nanos).unwrap(); + + let session_ts_seconds = self.session_start.trunc() as u64; + let session_ts_nanos = (self.session_start.fract() * 1_000_000_000_f64) as u32; + let session_ts = Timestamp::from_unix(NoContext, session_ts_seconds, session_ts_nanos); + let session_id = Uuid::new_v7(session_ts).to_string(); + let duration = (self.tse - self.tsb) * 1_000_000_000_f64; + + if let Some(exit) = self.rtn { + let imported = History::import() + .timestamp(timestamp) + .duration(duration.trunc() as i64) + .exit(exit) + .command(self.inp) + .cwd(self.cwd) + .session(session_id) + .hostname(hostname); + imported.build().into() + } else { + let imported = History::import() + .timestamp(timestamp) + .duration(duration.trunc() as i64) + .command(self.inp) + .cwd(self.cwd) + .session(session_id) + .hostname(hostname); + imported.build().into() + } + } +} + +fn xonsh_db_path(xonsh_data_dir: Option<String>) -> Result<PathBuf> { + // if running within xonsh, this will be available + if let Some(d) = xonsh_data_dir { + let mut path = PathBuf::from(d); + path.push("xonsh-history.sqlite"); + return Ok(path); + } + + // otherwise, fall back to default + let base = BaseDirs::new().ok_or_else(|| eyre!("Could not determine home directory"))?; + + let hist_file = base.data_dir().join("xonsh/xonsh-history.sqlite"); + if hist_file.exists() || cfg!(test) { + Ok(hist_file) + } else { + Err(eyre!( + "Could not find xonsh history db at: {}", + hist_file.to_string_lossy() + )) + } +} + +#[derive(Debug)] +pub struct XonshSqlite { + pool: SqlitePool, + hostname: String, +} + +#[async_trait] +impl Importer for XonshSqlite { + const NAME: &'static str = "xonsh_sqlite"; + + async fn new() -> Result<Self> { + // wrap xonsh-specific path resolver in general one so that it respects $HISTPATH + let xonsh_data_dir = env::var("XONSH_DATA_DIR").ok(); + let db_path = get_histpath(|| xonsh_db_path(xonsh_data_dir))?; + let connection_str = db_path.to_str().ok_or_else(|| { + eyre!( + "Invalid path for SQLite database: {}", + db_path.to_string_lossy() + ) + })?; + + let pool = SqlitePool::connect(connection_str).await?; + let hostname = get_host_user(); + Ok(XonshSqlite { pool, hostname }) + } + + async fn entries(&mut self) -> Result<usize> { + let query = "SELECT COUNT(*) FROM xonsh_history"; + let row = sqlx::query(query).fetch_one(&self.pool).await?; + let count: u32 = row.get(0); + Ok(count as usize) + } + + async fn load(self, loader: &mut impl Loader) -> Result<()> { + let query = r#" + SELECT inp, rtn, tsb, tse, cwd, + MIN(tsb) OVER (PARTITION BY sessionid) AS session_start + FROM xonsh_history + ORDER BY rowid + "#; + + let mut entries = sqlx::query_as::<_, HistDbEntry>(query).fetch(&self.pool); + + let mut count = 0; + while let Some(entry) = entries.try_next().await? { + let hist = entry.into_hist_with_hostname(self.hostname.clone()); + loader.push(hist).await?; + count += 1; + } + + println!("Loaded: {count}"); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use time::macros::datetime; + + use super::*; + + use crate::history::History; + use crate::import::tests::TestLoader; + + #[test] + fn test_db_path_xonsh() { + let db_path = xonsh_db_path(Some("/home/user/xonsh_data".to_string())).unwrap(); + assert_eq!( + db_path, + PathBuf::from("/home/user/xonsh_data/xonsh-history.sqlite") + ); + } + + #[tokio::test] + async fn test_import() { + let connection_str = "tests/data/xonsh-history.sqlite"; + let xonsh_sqlite = XonshSqlite { + pool: SqlitePool::connect(connection_str).await.unwrap(), + hostname: "box:user".to_string(), + }; + + let mut loader = TestLoader::default(); + xonsh_sqlite.load(&mut loader).await.unwrap(); + + for (actual, expected) in loader.buf.iter().zip(expected_hist_entries().iter()) { + assert_eq!(actual.timestamp, expected.timestamp); + assert_eq!(actual.command, expected.command); + assert_eq!(actual.cwd, expected.cwd); + assert_eq!(actual.exit, expected.exit); + assert_eq!(actual.duration, expected.duration); + assert_eq!(actual.hostname, expected.hostname); + } + } + + fn expected_hist_entries() -> [History; 4] { + [ + History::import() + .timestamp(datetime!(2024-02-6 17:56:21.130956288 +00:00:00)) + .command("echo hello world!".to_string()) + .cwd("/home/user/Documents/code/atuin".to_string()) + .exit(0) + .duration(2628564) + .hostname("box:user".to_string()) + .build() + .into(), + History::import() + .timestamp(datetime!(2024-02-06 17:56:28.190406144 +00:00:00)) + .command("ls -l".to_string()) + .cwd("/home/user/Documents/code/atuin".to_string()) + .exit(0) + .duration(9371519) + .hostname("box:user".to_string()) + .build() + .into(), + History::import() + .timestamp(datetime!(2024-02-06 17:56:46.989020928 +00:00:00)) + .command("false".to_string()) + .cwd("/home/user/Documents/code/atuin".to_string()) + .exit(1) + .duration(17337560) + .hostname("box:user".to_string()) + .build() + .into(), + History::import() + .timestamp(datetime!(2024-02-06 17:56:48.218384128 +00:00:00)) + .command("exit".to_string()) + .cwd("/home/user/Documents/code/atuin".to_string()) + .exit(0) + .duration(4599094) + .hostname("box:user".to_string()) + .build() + .into(), + ] + } +} diff --git a/crates/atuin-client/src/import/zsh.rs b/crates/atuin-client/src/import/zsh.rs new file mode 100644 index 00000000..5bc8fc16 --- /dev/null +++ b/crates/atuin-client/src/import/zsh.rs @@ -0,0 +1,229 @@ +// import old shell history! +// automatically hoover up all that we can find + +use std::borrow::Cow; +use std::path::PathBuf; + +use async_trait::async_trait; +use directories::UserDirs; +use eyre::{eyre, Result}; +use time::OffsetDateTime; + +use super::{get_histpath, unix_byte_lines, Importer, Loader}; +use crate::history::History; +use crate::import::read_to_end; + +#[derive(Debug)] +pub struct Zsh { + bytes: Vec<u8>, +} + +fn default_histpath() -> Result<PathBuf> { + // oh-my-zsh sets HISTFILE=~/.zhistory + // zsh has no default value for this var, but uses ~/.zhistory. + // we could maybe be smarter about this in the future :) + let user_dirs = UserDirs::new().ok_or_else(|| eyre!("could not find user directories"))?; + let home_dir = user_dirs.home_dir(); + + let mut candidates = [".zhistory", ".zsh_history"].iter(); + loop { + match candidates.next() { + Some(candidate) => { + let histpath = home_dir.join(candidate); + if histpath.exists() { + break Ok(histpath); + } + } + None => { + break Err(eyre!( + "Could not find history file. Try setting and exporting $HISTFILE" + )) + } + } + } +} + +#[async_trait] +impl Importer for Zsh { + const NAME: &'static str = "zsh"; + + async fn new() -> Result<Self> { + let bytes = read_to_end(get_histpath(default_histpath)?)?; + Ok(Self { bytes }) + } + + async fn entries(&mut self) -> Result<usize> { + Ok(super::count_lines(&self.bytes)) + } + + async fn load(self, h: &mut impl Loader) -> Result<()> { + let now = OffsetDateTime::now_utc(); + let mut line = String::new(); + + let mut counter = 0; + for b in unix_byte_lines(&self.bytes) { + let s = match unmetafy(b) { + Some(s) => s, + _ => continue, // we can skip past things like invalid utf8 + }; + + if let Some(s) = s.strip_suffix('\\') { + line.push_str(s); + line.push_str("\\\n"); + } else { + line.push_str(&s); + let command = std::mem::take(&mut line); + + if let Some(command) = command.strip_prefix(": ") { + counter += 1; + h.push(parse_extended(command, counter)).await?; + } else { + let offset = time::Duration::seconds(counter); + counter += 1; + + let imported = History::import() + // preserve ordering + .timestamp(now - offset) + .command(command.trim_end().to_string()); + + h.push(imported.build().into()).await?; + } + } + } + + Ok(()) + } +} + +fn parse_extended(line: &str, counter: i64) -> History { + let (time, duration) = line.split_once(':').unwrap(); + let (duration, command) = duration.split_once(';').unwrap(); + + let time = time + .parse::<i64>() + .ok() + .and_then(|t| OffsetDateTime::from_unix_timestamp(t).ok()) + .unwrap_or_else(OffsetDateTime::now_utc) + + time::Duration::milliseconds(counter); + + // use nanos, because why the hell not? we won't display them. + let duration = duration.parse::<i64>().map_or(-1, |t| t * 1_000_000_000); + + let imported = History::import() + .timestamp(time) + .command(command.trim_end().to_string()) + .duration(duration); + + imported.build().into() +} + +fn unmetafy(line: &[u8]) -> Option<Cow<str>> { + if line.contains(&0x83) { + let mut s = Vec::with_capacity(line.len()); + let mut is_meta = false; + for ch in line { + if *ch == 0x83 { + is_meta = true; + } else if is_meta { + is_meta = false; + s.push(*ch ^ 32); + } else { + s.push(*ch) + } + } + String::from_utf8(s).ok().map(Cow::Owned) + } else { + std::str::from_utf8(line).ok().map(Cow::Borrowed) + } +} + +#[cfg(test)] +mod test { + use itertools::assert_equal; + + use crate::import::tests::TestLoader; + + use super::*; + + #[test] + fn test_parse_extended_simple() { + let parsed = parse_extended("1613322469:0;cargo install atuin", 0); + + assert_eq!(parsed.command, "cargo install atuin"); + assert_eq!(parsed.duration, 0); + assert_eq!( + parsed.timestamp, + OffsetDateTime::from_unix_timestamp(1_613_322_469).unwrap() + ); + + let parsed = parse_extended("1613322469:10;cargo install atuin;cargo update", 0); + + assert_eq!(parsed.command, "cargo install atuin;cargo update"); + assert_eq!(parsed.duration, 10_000_000_000); + assert_eq!( + parsed.timestamp, + OffsetDateTime::from_unix_timestamp(1_613_322_469).unwrap() + ); + + let parsed = parse_extended("1613322469:10;cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", 0); + + assert_eq!(parsed.command, "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷"); + assert_eq!(parsed.duration, 10_000_000_000); + assert_eq!( + parsed.timestamp, + OffsetDateTime::from_unix_timestamp(1_613_322_469).unwrap() + ); + + let parsed = parse_extended("1613322469:10;cargo install \\n atuin\n", 0); + + assert_eq!(parsed.command, "cargo install \\n atuin"); + assert_eq!(parsed.duration, 10_000_000_000); + assert_eq!( + parsed.timestamp, + OffsetDateTime::from_unix_timestamp(1_613_322_469).unwrap() + ); + } + + #[tokio::test] + async fn test_parse_file() { + let bytes = r": 1613322469:0;cargo install atuin +: 1613322469:10;cargo install atuin; \ +cargo update +: 1613322469:10;cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷ +" + .as_bytes() + .to_owned(); + + let mut zsh = Zsh { bytes }; + assert_eq!(zsh.entries().await.unwrap(), 4); + + let mut loader = TestLoader::default(); + zsh.load(&mut loader).await.unwrap(); + + assert_equal( + loader.buf.iter().map(|h| h.command.as_str()), + [ + "cargo install atuin", + "cargo install atuin; \\\ncargo update", + "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", + ], + ); + } + + #[tokio::test] + async fn test_parse_metafied() { + let bytes = + b"echo \xe4\xbd\x83\x80\xe5\xa5\xbd\nls ~/\xe9\x83\xbf\xb3\xe4\xb9\x83\xb0\n".to_vec(); + + let mut zsh = Zsh { bytes }; + assert_eq!(zsh.entries().await.unwrap(), 2); + + let mut loader = TestLoader::default(); + zsh.load(&mut loader).await.unwrap(); + + assert_equal( + loader.buf.iter().map(|h| h.command.as_str()), + ["echo 你好", "ls ~/音乐"], + ); + } +} diff --git a/crates/atuin-client/src/import/zsh_histdb.rs b/crates/atuin-client/src/import/zsh_histdb.rs new file mode 100644 index 00000000..eb72baa3 --- /dev/null +++ b/crates/atuin-client/src/import/zsh_histdb.rs @@ -0,0 +1,247 @@ +// import old shell history from zsh-histdb! +// automatically hoover up all that we can find + +// As far as i can tell there are no version numbers in the histdb sqlite DB, so we're going based +// on the schema from 2022-05-01 +// +// I have run into some histories that will not import b/c of non UTF-8 characters. +// + +// +// An Example sqlite query for hsitdb data: +// +//id|session|command_id|place_id|exit_status|start_time|duration|id|argv|id|host|dir +// +// +// select +// history.id, +// history.start_time, +// places.host, +// places.dir, +// commands.argv +// from history +// left join commands on history.command_id = commands.id +// left join places on history.place_id = places.id ; +// +// CREATE TABLE history (id integer primary key autoincrement, +// session int, +// command_id int references commands (id), +// place_id int references places (id), +// exit_status int, +// start_time int, +// duration int); +// + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +use async_trait::async_trait; +use atuin_common::utils::uuid_v7; +use directories::UserDirs; +use eyre::{eyre, Result}; +use sqlx::{sqlite::SqlitePool, Pool}; +use time::PrimitiveDateTime; + +use super::Importer; +use crate::history::History; +use crate::import::Loader; +use crate::utils::{get_hostname, get_username}; + +#[derive(sqlx::FromRow, Debug)] +pub struct HistDbEntryCount { + pub count: usize, +} + +#[derive(sqlx::FromRow, Debug)] +pub struct HistDbEntry { + pub id: i64, + pub start_time: PrimitiveDateTime, + pub host: Vec<u8>, + pub dir: Vec<u8>, + pub argv: Vec<u8>, + pub duration: i64, + pub exit_status: i64, + pub session: i64, +} + +#[derive(Debug)] +pub struct ZshHistDb { + histdb: Vec<HistDbEntry>, + username: String, +} + +/// Read db at given file, return vector of entries. +async fn hist_from_db(dbpath: PathBuf) -> Result<Vec<HistDbEntry>> { + let pool = SqlitePool::connect(dbpath.to_str().unwrap()).await?; + hist_from_db_conn(pool).await +} + +async fn hist_from_db_conn(pool: Pool<sqlx::Sqlite>) -> Result<Vec<HistDbEntry>> { + let query = r#" + SELECT + history.id, history.start_time, history.duration, places.host, places.dir, + commands.argv, history.exit_status, history.session + FROM history + LEFT JOIN commands ON history.command_id = commands.id + LEFT JOIN places ON history.place_id = places.id + ORDER BY history.start_time + "#; + let histdb_vec: Vec<HistDbEntry> = sqlx::query_as::<_, HistDbEntry>(query) + .fetch_all(&pool) + .await?; + Ok(histdb_vec) +} + +impl ZshHistDb { + pub fn histpath_candidate() -> PathBuf { + // By default histdb database is `${HOME}/.histdb/zsh-history.db` + // This can be modified by ${HISTDB_FILE} + // + // if [[ -z ${HISTDB_FILE} ]]; then + // typeset -g HISTDB_FILE="${HOME}/.histdb/zsh-history.db" + let user_dirs = UserDirs::new().unwrap(); // should catch error here? + let home_dir = user_dirs.home_dir(); + std::env::var("HISTDB_FILE") + .as_ref() + .map(|x| Path::new(x).to_path_buf()) + .unwrap_or_else(|_err| home_dir.join(".histdb/zsh-history.db")) + } + pub fn histpath() -> Result<PathBuf> { + let histdb_path = ZshHistDb::histpath_candidate(); + if histdb_path.exists() { + Ok(histdb_path) + } else { + Err(eyre!( + "Could not find history file. Try setting $HISTDB_FILE" + )) + } + } +} + +#[async_trait] +impl Importer for ZshHistDb { + // Not sure how this is used + const NAME: &'static str = "zsh_histdb"; + + /// Creates a new ZshHistDb and populates the history based on the pre-populated data + /// structure. + async fn new() -> Result<Self> { + let dbpath = ZshHistDb::histpath()?; + let histdb_entry_vec = hist_from_db(dbpath).await?; + Ok(Self { + histdb: histdb_entry_vec, + username: get_username(), + }) + } + + async fn entries(&mut self) -> Result<usize> { + Ok(self.histdb.len()) + } + + async fn load(self, h: &mut impl Loader) -> Result<()> { + let mut session_map = HashMap::new(); + for entry in self.histdb { + let command = match std::str::from_utf8(&entry.argv) { + Ok(s) => s.trim_end(), + Err(_) => continue, // we can skip past things like invalid utf8 + }; + let cwd = match std::str::from_utf8(&entry.dir) { + Ok(s) => s.trim_end(), + Err(_) => continue, // we can skip past things like invalid utf8 + }; + let hostname = format!( + "{}:{}", + String::from_utf8(entry.host).unwrap_or_else(|_e| get_hostname()), + self.username + ); + let session = session_map.entry(entry.session).or_insert_with(uuid_v7); + + let imported = History::import() + .timestamp(entry.start_time.assume_utc()) + .command(command) + .cwd(cwd) + .duration(entry.duration * 1_000_000_000) + .exit(entry.exit_status) + .session(session.as_simple().to_string()) + .hostname(hostname) + .build(); + h.push(imported.into()).await?; + } + Ok(()) + } +} + +#[cfg(test)] +mod test { + + use super::*; + use sqlx::sqlite::SqlitePoolOptions; + use std::env; + #[tokio::test(flavor = "multi_thread")] + async fn test_env_vars() { + let test_env_db = "nonstd-zsh-history.db"; + let key = "HISTDB_FILE"; + env::set_var(key, test_env_db); + + // test the env got set + assert_eq!(env::var(key).unwrap(), test_env_db.to_string()); + + // test histdb returns the proper db from previous step + let histdb_path = ZshHistDb::histpath_candidate(); + assert_eq!(histdb_path.to_str().unwrap(), test_env_db); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_import() { + let pool: SqlitePool = SqlitePoolOptions::new() + .min_connections(2) + .connect(":memory:") + .await + .unwrap(); + + // sql dump directly from a test database. + let db_sql = r#" + PRAGMA foreign_keys=OFF; + BEGIN TRANSACTION; + CREATE TABLE commands (id integer primary key autoincrement, argv text, unique(argv) on conflict ignore); + INSERT INTO commands VALUES(1,'pwd'); + INSERT INTO commands VALUES(2,'curl google.com'); + INSERT INTO commands VALUES(3,'bash'); + CREATE TABLE places (id integer primary key autoincrement, host text, dir text, unique(host, dir) on conflict ignore); + INSERT INTO places VALUES(1,'mbp16.local','/home/noyez'); + CREATE TABLE history (id integer primary key autoincrement, + session int, + command_id int references commands (id), + place_id int references places (id), + exit_status int, + start_time int, + duration int); + INSERT INTO history VALUES(1,0,1,1,0,1651497918,1); + INSERT INTO history VALUES(2,0,2,1,0,1651497923,1); + INSERT INTO history VALUES(3,0,3,1,NULL,1651497930,NULL); + DELETE FROM sqlite_sequence; + INSERT INTO sqlite_sequence VALUES('commands',3); + INSERT INTO sqlite_sequence VALUES('places',3); + INSERT INTO sqlite_sequence VALUES('history',3); + CREATE INDEX hist_time on history(start_time); + CREATE INDEX place_dir on places(dir); + CREATE INDEX place_host on places(host); + CREATE INDEX history_command_place on history(command_id, place_id); + COMMIT; "#; + + sqlx::query(db_sql).execute(&pool).await.unwrap(); + + // test histdb iterator + let histdb_vec = hist_from_db_conn(pool).await.unwrap(); + let histdb = ZshHistDb { + histdb: histdb_vec, + username: get_username(), + }; + + println!("h: {:#?}", histdb.histdb); + println!("counter: {:?}", histdb.histdb.len()); + for i in histdb.histdb { + println!("{i:?}"); + } + } +} |
