diff options
Diffstat (limited to 'crates/turtle/src/atuin_client/import')
| -rw-r--r-- | crates/turtle/src/atuin_client/import/bash.rs | 221 | ||||
| -rw-r--r-- | crates/turtle/src/atuin_client/import/fish.rs | 179 | ||||
| -rw-r--r-- | crates/turtle/src/atuin_client/import/mod.rs | 140 | ||||
| -rw-r--r-- | crates/turtle/src/atuin_client/import/nu.rs | 67 | ||||
| -rw-r--r-- | crates/turtle/src/atuin_client/import/nu_histdb.rs | 113 | ||||
| -rw-r--r-- | crates/turtle/src/atuin_client/import/powershell.rs | 202 | ||||
| -rw-r--r-- | crates/turtle/src/atuin_client/import/replxx.rs | 137 | ||||
| -rw-r--r-- | crates/turtle/src/atuin_client/import/resh.rs | 140 | ||||
| -rw-r--r-- | crates/turtle/src/atuin_client/import/xonsh.rs | 234 | ||||
| -rw-r--r-- | crates/turtle/src/atuin_client/import/xonsh_sqlite.rs | 217 | ||||
| -rw-r--r-- | crates/turtle/src/atuin_client/import/zsh.rs | 230 | ||||
| -rw-r--r-- | crates/turtle/src/atuin_client/import/zsh_histdb.rs | 249 |
12 files changed, 0 insertions, 2129 deletions
diff --git a/crates/turtle/src/atuin_client/import/bash.rs b/crates/turtle/src/atuin_client/import/bash.rs deleted file mode 100644 index e35634e7..00000000 --- a/crates/turtle/src/atuin_client/import/bash.rs +++ /dev/null @@ -1,221 +0,0 @@ -use std::{path::PathBuf, str}; - -use async_trait::async_trait; -use directories::UserDirs; -use eyre::{Result, eyre}; -use itertools::Itertools; -use time::{Duration, OffsetDateTime}; -use tracing::warn; - -use super::{Importer, Loader, get_histfile_path, unix_byte_lines}; -use crate::atuin_client::history::History; -use crate::atuin_client::import::read_to_end; - -#[derive(Debug)] -pub(crate) struct Bash { - bytes: Vec<u8>, -} - -fn default_histpath() -> Result<PathBuf> { - let user_dirs = UserDirs::new().ok_or_else(|| eyre!("could not find user directories"))?; - let home_dir = user_dirs.home_dir(); - - Ok(home_dir.join(".bash_history")) -} - -#[async_trait] -impl Importer for Bash { - const NAME: &'static str = "bash"; - - async fn new() -> Result<Self> { - let bytes = read_to_end(get_histfile_path(default_histpath)?)?; - Ok(Self { bytes }) - } - - async fn entries(&mut self) -> Result<usize> { - let count = unix_byte_lines(&self.bytes) - .map(LineType::from) - .filter(|line| matches!(line, LineType::Command(_))) - .count(); - Ok(count) - } - - async fn load(self, h: &mut impl Loader) -> Result<()> { - let lines = unix_byte_lines(&self.bytes) - .map(LineType::from) - .filter(|line| !matches!(line, LineType::NotUtf8)) // invalid utf8 are ignored - .collect_vec(); - - let (commands_before_first_timestamp, first_timestamp) = lines - .iter() - .enumerate() - .find_map(|(i, line)| match line { - LineType::Timestamp(t) => Some((i, *t)), - _ => None, - }) - // if no known timestamps, use now as base - .unwrap_or((lines.len(), OffsetDateTime::now_utc())); - - // if no timestamp is recorded, then use this increment to set an arbitrary timestamp - // to preserve ordering - // this increment is deliberately very small to prevent particularly fast fingers - // causing ordering issues; it also helps in handling the "here document" syntax, - // where several lines are recorded in succession without individual timestamps - let timestamp_increment = Duration::milliseconds(1); - - // make sure there is a minimum amount of time before the first known timestamp - // to fit all commands, given the default increment - let mut next_timestamp = - first_timestamp - timestamp_increment * commands_before_first_timestamp as i32; - - for line in lines.into_iter() { - match line { - LineType::NotUtf8 => unreachable!(), // already filtered - LineType::Empty => {} // do nothing - LineType::Timestamp(t) => { - if t < next_timestamp { - warn!( - "Time reversal detected in Bash history! Commands may be ordered incorrectly." - ); - } - next_timestamp = t; - } - LineType::Command(c) => { - let imported = History::import().timestamp(next_timestamp).command(c); - - h.push(imported.build().into()).await?; - next_timestamp += timestamp_increment; - } - } - } - - Ok(()) - } -} - -#[derive(Debug, Clone)] -enum LineType<'a> { - NotUtf8, - /// Can happen when using the "here document" syntax. - Empty, - /// A timestamp line start with a '#', followed immediately by an integer - /// that represents seconds since UNIX epoch. - Timestamp(OffsetDateTime), - /// Anything else. - Command(&'a str), -} -impl<'a> From<&'a [u8]> for LineType<'a> { - fn from(bytes: &'a [u8]) -> Self { - let Ok(line) = str::from_utf8(bytes) else { - return LineType::NotUtf8; - }; - if line.is_empty() { - return LineType::Empty; - } - - match try_parse_line_as_timestamp(line) { - Some(time) => LineType::Timestamp(time), - None => LineType::Command(line), - } - } -} - -fn try_parse_line_as_timestamp(line: &str) -> Option<OffsetDateTime> { - let seconds = line.strip_prefix('#')?.parse().ok()?; - OffsetDateTime::from_unix_timestamp(seconds).ok() -} - -#[cfg(test)] -mod test { - use std::cmp::Ordering; - - use itertools::{Itertools, assert_equal}; - - use crate::atuin_client::import::{Importer, tests::TestLoader}; - - use super::Bash; - - #[tokio::test] - async fn parse_no_timestamps() { - let bytes = r"cargo install atuin -cargo update -cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷ -" - .as_bytes() - .to_owned(); - - let mut bash = Bash { bytes }; - assert_eq!(bash.entries().await.unwrap(), 3); - - let mut loader = TestLoader::default(); - bash.load(&mut loader).await.unwrap(); - - assert_equal( - loader.buf.iter().map(|h| h.command.as_str()), - [ - "cargo install atuin", - "cargo update", - "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", - ], - ); - assert!(is_strictly_sorted(loader.buf.iter().map(|h| h.timestamp))) - } - - #[tokio::test] - async fn parse_with_timestamps() { - let bytes = b"#1672918999 -git reset -#1672919006 -git clean -dxf -#1672919020 -cd ../ -" - .to_vec(); - - let mut bash = Bash { bytes }; - assert_eq!(bash.entries().await.unwrap(), 3); - - let mut loader = TestLoader::default(); - bash.load(&mut loader).await.unwrap(); - - assert_equal( - loader.buf.iter().map(|h| h.command.as_str()), - ["git reset", "git clean -dxf", "cd ../"], - ); - assert_equal( - loader.buf.iter().map(|h| h.timestamp.unix_timestamp()), - [1_672_918_999, 1_672_919_006, 1_672_919_020], - ) - } - - #[tokio::test] - async fn parse_with_partial_timestamps() { - let bytes = b"git reset -#1672919006 -git clean -dxf -cd ../ -" - .to_vec(); - - let mut bash = Bash { bytes }; - assert_eq!(bash.entries().await.unwrap(), 3); - - let mut loader = TestLoader::default(); - bash.load(&mut loader).await.unwrap(); - - assert_equal( - loader.buf.iter().map(|h| h.command.as_str()), - ["git reset", "git clean -dxf", "cd ../"], - ); - assert!(is_strictly_sorted(loader.buf.iter().map(|h| h.timestamp))) - } - - fn is_strictly_sorted<T>(iter: impl IntoIterator<Item = T>) -> bool - where - T: Clone + PartialOrd, - { - iter.into_iter() - .tuple_windows() - .all(|(a, b)| matches!(a.partial_cmp(&b), Some(Ordering::Less))) - } -} diff --git a/crates/turtle/src/atuin_client/import/fish.rs b/crates/turtle/src/atuin_client/import/fish.rs deleted file mode 100644 index edc2d437..00000000 --- a/crates/turtle/src/atuin_client/import/fish.rs +++ /dev/null @@ -1,179 +0,0 @@ -// import old shell history! -// automatically hoover up all that we can find - -use std::path::PathBuf; - -use async_trait::async_trait; -use directories::BaseDirs; -use eyre::{Result, eyre}; -use time::OffsetDateTime; - -use super::{Importer, Loader, unix_byte_lines}; -use crate::atuin_client::history::History; -use crate::atuin_client::import::read_to_end; - -#[derive(Debug)] -pub(crate) struct Fish { - bytes: Vec<u8>, -} - -/// see https://fishshell.com/docs/current/interactive.html#searchable-command-history -fn default_histpath() -> Result<PathBuf> { - let base = BaseDirs::new().ok_or_else(|| eyre!("could not determine data directory"))?; - let data = std::env::var("XDG_DATA_HOME").map_or_else( - |_| base.home_dir().join(".local").join("share"), - PathBuf::from, - ); - - // fish supports multiple history sessions - // If `fish_history` var is missing, or set to `default`, use `fish` as the session - let session = std::env::var("fish_history").unwrap_or_else(|_| String::from("fish")); - let session = if session == "default" { - String::from("fish") - } else { - session - }; - - let mut histpath = data.join("fish"); - histpath.push(format!("{session}_history")); - - if histpath.exists() { - Ok(histpath) - } else { - Err(eyre!("Could not find history file.")) - } -} - -#[async_trait] -impl Importer for Fish { - const NAME: &'static str = "fish"; - - async fn new() -> Result<Self> { - let bytes = read_to_end(default_histpath()?)?; - Ok(Self { bytes }) - } - - async fn entries(&mut self) -> Result<usize> { - Ok(super::count_lines(&self.bytes)) - } - - async fn load(self, loader: &mut impl Loader) -> Result<()> { - let now = OffsetDateTime::now_utc(); - let mut time: Option<OffsetDateTime> = None; - let mut cmd: Option<String> = None; - - for b in unix_byte_lines(&self.bytes) { - let s = match std::str::from_utf8(b) { - Ok(s) => s, - Err(_) => continue, // we can skip past things like invalid utf8 - }; - - if let Some(c) = s.strip_prefix("- cmd: ") { - // first, we must deal with the prev cmd - if let Some(cmd) = cmd.take() { - let time = time.unwrap_or(now); - let entry = History::import().timestamp(time).command(cmd); - - loader.push(entry.build().into()).await?; - } - - // using raw strings to avoid needing escaping. - // replaces double backslashes with single backslashes - let c = c.replace(r"\\", r"\"); - // replaces escaped newlines - let c = c.replace(r"\n", "\n"); - // TODO: any other escape characters? - - cmd = Some(c); - } else if let Some(t) = s.strip_prefix(" when: ") { - // if t is not an int, just ignore this line - if let Ok(t) = t.parse::<i64>() { - time = Some(OffsetDateTime::from_unix_timestamp(t)?); - } - } else { - // ... ignore paths lines - } - } - - // we might have a trailing cmd - if let Some(cmd) = cmd.take() { - let time = time.unwrap_or(now); - let entry = History::import().timestamp(time).command(cmd); - - loader.push(entry.build().into()).await?; - } - - Ok(()) - } -} - -#[cfg(test)] -mod test { - - use crate::import::{Importer, tests::TestLoader}; - - use super::Fish; - - #[tokio::test] - async fn parse_complex() { - // complicated input with varying contents and escaped strings. - let bytes = r#"- cmd: history --help - when: 1639162832 -- cmd: cat ~/.bash_history - when: 1639162851 - paths: - - ~/.bash_history -- cmd: ls ~/.local/share/fish/fish_history - when: 1639162890 - paths: - - ~/.local/share/fish/fish_history -- cmd: cat ~/.local/share/fish/fish_history - when: 1639162893 - paths: - - ~/.local/share/fish/fish_history -ERROR -- CORRUPTED: ENTRY - CONTINUE: - - AS - - NORMAL -- cmd: echo "foo" \\\n'bar' baz - when: 1639162933 -- cmd: cat ~/.local/share/fish/fish_history - when: 1639162939 - paths: - - ~/.local/share/fish/fish_history -- cmd: echo "\\"" \\\\ "\\\\" - when: 1639163063 -- cmd: cat ~/.local/share/fish/fish_history - when: 1639163066 - paths: - - ~/.local/share/fish/fish_history -"# - .as_bytes() - .to_owned(); - - let fish = Fish { bytes }; - - let mut loader = TestLoader::default(); - fish.load(&mut loader).await.unwrap(); - let mut history = loader.buf.into_iter(); - - // simple wrapper for fish history entry - macro_rules! fishtory { - ($timestamp:expr_2021, $command:expr_2021) => { - let h = history.next().expect("missing entry in history"); - assert_eq!(h.command.as_str(), $command); - assert_eq!(h.timestamp.unix_timestamp(), $timestamp); - }; - } - - fishtory!(1639162832, "history --help"); - fishtory!(1639162851, "cat ~/.bash_history"); - fishtory!(1639162890, "ls ~/.local/share/fish/fish_history"); - fishtory!(1639162893, "cat ~/.local/share/fish/fish_history"); - fishtory!(1639162933, "echo \"foo\" \\\n'bar' baz"); - fishtory!(1639162939, "cat ~/.local/share/fish/fish_history"); - fishtory!(1639163063, r#"echo "\"" \\ "\\""#); - fishtory!(1639163066, "cat ~/.local/share/fish/fish_history"); - } -} diff --git a/crates/turtle/src/atuin_client/import/mod.rs b/crates/turtle/src/atuin_client/import/mod.rs deleted file mode 100644 index 81e01991..00000000 --- a/crates/turtle/src/atuin_client/import/mod.rs +++ /dev/null @@ -1,140 +0,0 @@ -use std::fs::File; -use std::io::Read; -use std::path::PathBuf; - -use async_trait::async_trait; -use eyre::{Result, bail}; -use memchr::Memchr; - -use crate::atuin_client::history::History; - -pub(crate) mod bash; -pub(crate) mod fish; -pub(crate) mod nu; -pub(crate) mod nu_histdb; -pub(crate) mod powershell; -pub(crate) mod replxx; -pub(crate) mod resh; -pub(crate) mod xonsh; -pub(crate) mod xonsh_sqlite; -pub(crate) mod zsh; -pub(crate) mod zsh_histdb; - -#[async_trait] -pub(crate) trait Importer: Sized { - const NAME: &'static str; - async fn new() -> Result<Self>; - async fn entries(&mut self) -> Result<usize>; - async fn load(self, loader: &mut impl Loader) -> Result<()>; -} - -#[async_trait] -pub(crate) trait Loader: Sync + Send { - async fn push(&mut self, hist: History) -> eyre::Result<()>; -} - -fn unix_byte_lines(input: &[u8]) -> impl Iterator<Item = &[u8]> { - UnixByteLines { - iter: memchr::memchr_iter(b'\n', input), - bytes: input, - i: 0, - } -} - -struct UnixByteLines<'a> { - iter: Memchr<'a>, - bytes: &'a [u8], - i: usize, -} - -impl<'a> Iterator for UnixByteLines<'a> { - type Item = &'a [u8]; - - fn next(&mut self) -> Option<Self::Item> { - let j = self.iter.next()?; - let out = &self.bytes[self.i..j]; - self.i = j + 1; - Some(out) - } - - fn count(self) -> usize - where - Self: Sized, - { - self.iter.count() - } -} - -fn count_lines(input: &[u8]) -> usize { - unix_byte_lines(input).count() -} - -fn get_histpath<D>(def: D) -> Result<PathBuf> -where - D: FnOnce() -> Result<PathBuf>, -{ - if let Ok(p) = std::env::var("HISTFILE") { - Ok(PathBuf::from(p)) - } else { - def() - } -} - -fn get_histfile_path<D>(def: D) -> Result<PathBuf> -where - D: FnOnce() -> Result<PathBuf>, -{ - get_histpath(def).and_then(is_file) -} - -fn get_histdir_path<D>(def: D) -> Result<PathBuf> -where - D: FnOnce() -> Result<PathBuf>, -{ - get_histpath(def).and_then(is_dir) -} - -fn read_to_end(path: PathBuf) -> Result<Vec<u8>> { - let mut bytes = Vec::new(); - let mut f = File::open(path)?; - f.read_to_end(&mut bytes)?; - Ok(bytes) -} -fn is_file(p: PathBuf) -> Result<PathBuf> { - if p.is_file() { - Ok(p) - } else { - bail!( - "Could not find history file {:?}. Try setting and exporting $HISTFILE", - p - ) - } -} -fn is_dir(p: PathBuf) -> Result<PathBuf> { - if p.is_dir() { - Ok(p) - } else { - bail!( - "Could not find history directory {:?}. Try setting and exporting $HISTFILE", - p - ) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[derive(Default)] - pub(crate) struct TestLoader { - pub(crate) buf: Vec<History>, - } - - #[async_trait] - impl Loader for TestLoader { - async fn push(&mut self, hist: History) -> Result<()> { - self.buf.push(hist); - Ok(()) - } - } -} diff --git a/crates/turtle/src/atuin_client/import/nu.rs b/crates/turtle/src/atuin_client/import/nu.rs deleted file mode 100644 index 1897a969..00000000 --- a/crates/turtle/src/atuin_client/import/nu.rs +++ /dev/null @@ -1,67 +0,0 @@ -// import old shell history! -// automatically hoover up all that we can find - -use std::path::PathBuf; - -use async_trait::async_trait; -use directories::BaseDirs; -use eyre::{Result, eyre}; -use time::OffsetDateTime; - -use super::{Importer, Loader, unix_byte_lines}; -use crate::atuin_client::history::History; -use crate::atuin_client::import::read_to_end; - -#[derive(Debug)] -pub(crate) struct Nu { - bytes: Vec<u8>, -} - -fn get_histpath() -> Result<PathBuf> { - let base = BaseDirs::new().ok_or_else(|| eyre!("could not determine data directory"))?; - let config_dir = base.config_dir().join("nushell"); - - let histpath = config_dir.join("history.txt"); - if histpath.exists() { - Ok(histpath) - } else { - Err(eyre!("Could not find history file.")) - } -} - -#[async_trait] -impl Importer for Nu { - const NAME: &'static str = "nu"; - - async fn new() -> Result<Self> { - let bytes = read_to_end(get_histpath()?)?; - Ok(Self { bytes }) - } - - async fn entries(&mut self) -> Result<usize> { - Ok(super::count_lines(&self.bytes)) - } - - async fn load(self, h: &mut impl Loader) -> Result<()> { - let now = OffsetDateTime::now_utc(); - - let mut counter = 0; - for b in unix_byte_lines(&self.bytes) { - let s = match std::str::from_utf8(b) { - Ok(s) => s, - Err(_) => continue, // we can skip past things like invalid utf8 - }; - - let cmd: String = s.replace("<\\n>", "\n"); - - let offset = time::Duration::nanoseconds(counter); - counter += 1; - - let entry = History::import().timestamp(now - offset).command(cmd); - - h.push(entry.build().into()).await?; - } - - Ok(()) - } -} diff --git a/crates/turtle/src/atuin_client/import/nu_histdb.rs b/crates/turtle/src/atuin_client/import/nu_histdb.rs deleted file mode 100644 index 1f66ea38..00000000 --- a/crates/turtle/src/atuin_client/import/nu_histdb.rs +++ /dev/null @@ -1,113 +0,0 @@ -// import old shell history! -// automatically hoover up all that we can find - -use std::path::PathBuf; - -use async_trait::async_trait; -use directories::BaseDirs; -use eyre::{Result, eyre}; -use sqlx::{Pool, sqlite::SqlitePool}; -use time::{Duration, OffsetDateTime}; - -use super::Importer; -use crate::atuin_client::history::History; -use crate::atuin_client::import::Loader; - -#[derive(sqlx::FromRow, Debug)] -pub(crate) struct HistDbEntry { - pub(crate) id: i64, - pub(crate) command_line: Vec<u8>, - pub(crate) start_timestamp: i64, - pub(crate) session_id: i64, - pub(crate) hostname: Vec<u8>, - pub(crate) cwd: Vec<u8>, - pub(crate) duration_ms: i64, - pub(crate) exit_status: i64, - pub(crate) more_info: Vec<u8>, -} - -impl From<HistDbEntry> for History { - fn from(histdb_item: HistDbEntry) -> Self { - let ts_secs = histdb_item.start_timestamp / 1000; - let ts_ns = (histdb_item.start_timestamp % 1000) * 1_000_000; - let imported = History::import() - .timestamp( - OffsetDateTime::from_unix_timestamp(ts_secs).unwrap() - + Duration::nanoseconds(ts_ns), - ) - .command(String::from_utf8(histdb_item.command_line).unwrap()) - .cwd(String::from_utf8(histdb_item.cwd).unwrap()) - .exit(histdb_item.exit_status) - .duration(histdb_item.duration_ms) - .session(format!("{:x}", histdb_item.session_id)) - .hostname(String::from_utf8(histdb_item.hostname).unwrap()); - - imported.build().into() - } -} - -#[derive(Debug)] -pub(crate) struct NuHistDb { - histdb: Vec<HistDbEntry>, -} - -/// Read db at given file, return vector of entries. -async fn hist_from_db(dbpath: PathBuf) -> Result<Vec<HistDbEntry>> { - let pool = SqlitePool::connect(dbpath.to_str().unwrap()).await?; - hist_from_db_conn(pool).await -} - -async fn hist_from_db_conn(pool: Pool<sqlx::Sqlite>) -> Result<Vec<HistDbEntry>> { - let query = r#" - SELECT - id, command_line, start_timestamp, session_id, hostname, cwd, duration_ms, exit_status, - more_info - FROM history - ORDER BY start_timestamp - "#; - let histdb_vec: Vec<HistDbEntry> = sqlx::query_as::<_, HistDbEntry>(query) - .fetch_all(&pool) - .await?; - Ok(histdb_vec) -} - -impl NuHistDb { - pub(crate) fn histpath() -> Result<PathBuf> { - let base = BaseDirs::new().ok_or_else(|| eyre!("could not determine data directory"))?; - let config_dir = base.config_dir().join("nushell"); - - let histdb_path = config_dir.join("history.sqlite3"); - if histdb_path.exists() { - Ok(histdb_path) - } else { - Err(eyre!("Could not find history file.")) - } - } -} - -#[async_trait] -impl Importer for NuHistDb { - // Not sure how this is used - const NAME: &'static str = "nu_histdb"; - - /// Creates a new NuHistDb and populates the history based on the pre-populated data - /// structure. - async fn new() -> Result<Self> { - let dbpath = NuHistDb::histpath()?; - let histdb_entry_vec = hist_from_db(dbpath).await?; - Ok(Self { - histdb: histdb_entry_vec, - }) - } - - async fn entries(&mut self) -> Result<usize> { - Ok(self.histdb.len()) - } - - async fn load(self, h: &mut impl Loader) -> Result<()> { - for i in self.histdb { - h.push(i.into()).await?; - } - Ok(()) - } -} diff --git a/crates/turtle/src/atuin_client/import/powershell.rs b/crates/turtle/src/atuin_client/import/powershell.rs deleted file mode 100644 index 09da0825..00000000 --- a/crates/turtle/src/atuin_client/import/powershell.rs +++ /dev/null @@ -1,202 +0,0 @@ -use async_trait::async_trait; -use directories::BaseDirs; -use eyre::{Result, eyre}; -use std::path::PathBuf; -use time::{Duration, OffsetDateTime}; - -use super::{Importer, Loader, count_lines, unix_byte_lines}; -use crate::atuin_client::history::History; -use crate::atuin_client::import::read_to_end; - -#[derive(Debug)] -pub(crate) struct PowerShell { - bytes: Vec<u8>, - line_count: Option<usize>, -} - -fn get_history_path() -> Result<PathBuf> { - let base = BaseDirs::new().ok_or_else(|| eyre!("could not determine data directory"))?; - - // The command line history in PowerShell is maintained by the PSReadLine module: - // https://learn.microsoft.com/en-us/powershell/module/psreadline/about/about_psreadline#command-history - // - // > PSReadLine maintains a history file containing all the commands and data you've entered from the command line. - // > The history files are a file named `$($Host.Name)_history.txt`. - // > On Windows systems the history file is stored at `$Env:APPDATA\Microsoft\Windows\PowerShell\PSReadLine`. - // > On non-Windows systems, the history files are stored at `$Env:XDG_DATA_HOME/powershell/PSReadLine` - // > or `$Env:HOME/.local/share/powershell/PSReadLine`. - - let dir = if cfg!(windows) { - base.data_dir() - .join("Microsoft") - .join("Windows") - .join("PowerShell") - .join("PSReadLine") - } else { - std::env::var("XDG_DATA_HOME") - .map_or_else( - |_| base.home_dir().join(".local").join("share"), - PathBuf::from, - ) - .join("powershell") - .join("PSReadLine") - }; - - // The history is stored in a file named `$($Host.Name)_history.txt`. - // For the default console host shipped by Microsoft,`$Host.Name` is `ConsoleHost`: - // https://learn.microsoft.com/en-us/dotnet/api/system.management.automation.host.pshost.name#remarks - - let file = dir.join("ConsoleHost_history.txt"); - - if file.is_file() { - Ok(file) - } else { - Err(eyre!("Could not find history file: {}", file.display())) - } -} - -#[async_trait] -impl Importer for PowerShell { - const NAME: &'static str = "PowerShell"; - - async fn new() -> Result<Self> { - let bytes = read_to_end(get_history_path()?)?; - Ok(Self { - bytes, - line_count: None, - }) - } - - async fn entries(&mut self) -> Result<usize> { - // Commands can be split over multiple lines, - // but this is only used for a progress bar, and multi-line commands - // should be quite rare, so this is not an issue in practice. - if self.line_count.is_none() { - self.line_count = Some(count_lines(&self.bytes)); - } - Ok(self.line_count.unwrap()) - } - - async fn load(mut self, h: &mut impl Loader) -> Result<()> { - let line_count = self.entries().await?; - let start = OffsetDateTime::now_utc() - Duration::milliseconds(line_count as i64); - - let mut counter = 0; - let mut iter = unix_byte_lines(&self.bytes); - - while let Some(s) = iter.next() { - let Ok(s) = read_line(s) else { - continue; // We can skip past things like invalid utf8 - }; - - let mut cmd = s.to_string(); - - // Multi-line commands end with a backtick, append the following lines. - while cmd.ends_with('`') { - cmd.pop(); - - let Some(next) = iter.next() else { - break; - }; - let Ok(next) = read_line(next) else { - break; - }; - - cmd.push('\n'); - cmd.push_str(next); - } - - if cmd.is_empty() { - continue; - } - - let offset = Duration::milliseconds(counter); - counter += 1; - - let entry = History::import().timestamp(start + offset).command(cmd); - h.push(entry.build().into()).await?; - } - - Ok(()) - } -} - -fn read_line(s: &[u8]) -> Result<&str> { - let s = str::from_utf8(s)?; - - // History is stored in CRLF on Windows, normalize the input to LF on all platforms. - let s = s.strip_suffix('\r').unwrap_or(s); - - Ok(s) -} - -#[cfg(test)] -mod test { - use super::*; - use crate::import::tests::TestLoader; - use itertools::assert_equal; - - const INPUT: &str = r#"cargo install atuin -cargo update -echo "first line` -second line` -` -last line" -echo foo - -echo bar -echo baz -"#; - - const EXPECTED: &[&str] = &[ - "cargo install atuin", - "cargo update", - "echo \"first line\nsecond line\n\nlast line\"", - "echo foo", - "echo bar", - "echo baz", - ]; - - #[tokio::test] - async fn test_import() { - let loader = import(INPUT).await; - - let actual = loader.buf.iter().map(|h| h.command.clone()); - let expected = EXPECTED.iter().map(|s| s.to_string()); - - assert_equal(actual, expected); - } - - #[tokio::test] - async fn test_crlf() { - let input = INPUT.replace("\n", "\r\n"); - let loader = import(input.as_str()).await; - - let actual = loader.buf.iter().map(|h| h.command.clone()); - let expected = EXPECTED.iter().map(|s| s.to_string()); - - assert_equal(actual, expected); - } - - #[tokio::test] - async fn test_timestamps() { - let loader = import(INPUT).await; - - let mut prev = loader.buf.first().unwrap().timestamp; - for current in loader.buf.iter().skip(1).map(|h| h.timestamp) { - assert!(current > prev); - prev = current; - } - } - - async fn import(input: &str) -> TestLoader { - let powershell = PowerShell { - bytes: input.as_bytes().to_vec(), - line_count: None, - }; - - let mut loader = TestLoader::default(); - powershell.load(&mut loader).await.unwrap(); - loader - } -} diff --git a/crates/turtle/src/atuin_client/import/replxx.rs b/crates/turtle/src/atuin_client/import/replxx.rs deleted file mode 100644 index fbce2598..00000000 --- a/crates/turtle/src/atuin_client/import/replxx.rs +++ /dev/null @@ -1,137 +0,0 @@ -use std::{path::PathBuf, str}; - -use async_trait::async_trait; -use directories::UserDirs; -use eyre::{Result, eyre}; -use time::{OffsetDateTime, PrimitiveDateTime, macros::format_description}; - -use super::{Importer, Loader, get_histfile_path, unix_byte_lines}; -use crate::atuin_client::history::History; -use crate::atuin_client::import::read_to_end; - -#[derive(Debug)] -pub(crate) struct Replxx { - bytes: Vec<u8>, -} - -fn default_histpath() -> Result<PathBuf> { - let user_dirs = UserDirs::new().ok_or_else(|| eyre!("could not find user directories"))?; - let home_dir = user_dirs.home_dir(); - - // There is no default histfile for replxx. - // Here we try a couple of common names. - let mut candidates = ["replxx_history.txt", ".histfile"].iter(); - loop { - match candidates.next() { - Some(candidate) => { - let histpath = home_dir.join(candidate); - if histpath.exists() { - break Ok(histpath); - } - } - None => { - break Err(eyre!( - "Could not find history file. Try setting and exporting $HISTFILE" - )); - } - } - } -} - -#[async_trait] -impl Importer for Replxx { - const NAME: &'static str = "replxx"; - - async fn new() -> Result<Self> { - let bytes = read_to_end(get_histfile_path(default_histpath)?)?; - Ok(Self { bytes }) - } - - async fn entries(&mut self) -> Result<usize> { - Ok(super::count_lines(&self.bytes) / 2) - } - - async fn load(self, h: &mut impl Loader) -> Result<()> { - let mut timestamp = OffsetDateTime::UNIX_EPOCH; - - for b in unix_byte_lines(&self.bytes) { - let s = std::str::from_utf8(b)?; - match try_parse_line_as_timestamp(s) { - Some(t) => timestamp = t, - None => { - // replxx uses ETB character (0x17) as line breaker - let cmd = s.replace('\u{0017}', "\n"); - let imported = History::import().timestamp(timestamp).command(cmd); - - h.push(imported.build().into()).await?; - } - } - } - - Ok(()) - } -} - -fn try_parse_line_as_timestamp(line: &str) -> Option<OffsetDateTime> { - // replxx history date time format: ### yyyy-mm-dd hh:mm:ss.xxx - let date_time_str = line.strip_prefix("### ")?; - let format = - format_description!("[year]-[month]-[day] [hour]:[minute]:[second].[subsecond digits:3]"); - - let primitive_date_time = PrimitiveDateTime::parse(date_time_str, format).ok()?; - // There is no safe way to get local time offset. - // For simplicity let's just assume UTC. - Some(primitive_date_time.assume_utc()) -} - -#[cfg(test)] -mod test { - - use crate::import::{Importer, tests::TestLoader}; - - use super::Replxx; - - #[tokio::test] - async fn parse_complex() { - let bytes = r#"### 2024-02-10 22:16:28.302 -select * from remote('127.0.0.1:20222', view(select 1)) -### 2024-02-10 22:16:36.919 -select * from numbers(10) -### 2024-02-10 22:16:41.710 -select * from system.numbers -### 2024-02-10 22:19:28.655 -select 1 -### 2024-02-22 11:15:33.046 -CREATE TABLE test( stamp DateTime('UTC'))ENGINE = MergeTreePARTITION BY toDate(stamp)order by tuple() as select toDateTime('2020-01-01')+number*60 from numbers(80000); -"# - .as_bytes() - .to_owned(); - - let replxx = Replxx { bytes }; - - let mut loader = TestLoader::default(); - replxx.load(&mut loader).await.unwrap(); - let mut history = loader.buf.into_iter(); - - // simple wrapper for replxx history entry - macro_rules! history { - ($timestamp:expr_2021, $command:expr_2021) => { - let h = history.next().expect("missing entry in history"); - assert_eq!(h.command.as_str(), $command); - assert_eq!(h.timestamp.unix_timestamp(), $timestamp); - }; - } - - history!( - 1707603388, - "select * from remote('127.0.0.1:20222', view(select 1))" - ); - history!(1707603396, "select * from numbers(10)"); - history!(1707603401, "select * from system.numbers"); - history!(1707603568, "select 1"); - history!( - 1708600533, - "CREATE TABLE test\n( stamp DateTime('UTC'))\nENGINE = MergeTree\nPARTITION BY toDate(stamp)\norder by tuple() as select toDateTime('2020-01-01')+number*60 from numbers(80000);" - ); - } -} diff --git a/crates/turtle/src/atuin_client/import/resh.rs b/crates/turtle/src/atuin_client/import/resh.rs deleted file mode 100644 index 2c75e387..00000000 --- a/crates/turtle/src/atuin_client/import/resh.rs +++ /dev/null @@ -1,140 +0,0 @@ -use std::path::PathBuf; - -use async_trait::async_trait; -use directories::UserDirs; -use eyre::{Result, eyre}; -use serde::Deserialize; - -use crate::atuin_common::utils::uuid_v7; -use time::OffsetDateTime; - -use super::{Importer, Loader, get_histfile_path, unix_byte_lines}; -use crate::atuin_client::history::History; -use crate::atuin_client::import::read_to_end; - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub(crate) struct ReshEntry { - pub(crate) cmd_line: String, - pub(crate) exit_code: i64, - pub(crate) shell: String, - pub(crate) uname: String, - pub(crate) session_id: String, - pub(crate) home: String, - pub(crate) lang: String, - pub(crate) lc_all: String, - pub(crate) login: String, - pub(crate) pwd: String, - pub(crate) pwd_after: String, - pub(crate) shell_env: String, - pub(crate) term: String, - pub(crate) real_pwd: String, - pub(crate) real_pwd_after: String, - pub(crate) pid: i64, - pub(crate) session_pid: i64, - pub(crate) host: String, - pub(crate) hosttype: String, - pub(crate) ostype: String, - pub(crate) machtype: String, - pub(crate) shlvl: i64, - pub(crate) timezone_before: String, - pub(crate) timezone_after: String, - pub(crate) realtime_before: f64, - pub(crate) realtime_after: f64, - pub(crate) realtime_before_local: f64, - pub(crate) realtime_after_local: f64, - pub(crate) realtime_duration: f64, - pub(crate) realtime_since_session_start: f64, - pub(crate) realtime_since_boot: f64, - pub(crate) git_dir: String, - pub(crate) git_real_dir: String, - pub(crate) git_origin_remote: String, - pub(crate) git_dir_after: String, - pub(crate) git_real_dir_after: String, - pub(crate) git_origin_remote_after: String, - pub(crate) machine_id: String, - pub(crate) os_release_id: String, - pub(crate) os_release_version_id: String, - pub(crate) os_release_id_like: String, - pub(crate) os_release_name: String, - pub(crate) os_release_pretty_name: String, - pub(crate) resh_uuid: String, - pub(crate) resh_version: String, - pub(crate) resh_revision: String, - pub(crate) parts_merged: bool, - pub(crate) recalled: bool, - pub(crate) recall_last_cmd_line: String, - pub(crate) cols: String, - pub(crate) lines: String, -} - -#[derive(Debug)] -pub(crate) struct Resh { - bytes: Vec<u8>, -} - -fn default_histpath() -> Result<PathBuf> { - let user_dirs = UserDirs::new().ok_or_else(|| eyre!("could not find user directories"))?; - let home_dir = user_dirs.home_dir(); - - Ok(home_dir.join(".resh_history.json")) -} - -#[async_trait] -impl Importer for Resh { - const NAME: &'static str = "resh"; - - async fn new() -> Result<Self> { - let bytes = read_to_end(get_histfile_path(default_histpath)?)?; - Ok(Self { bytes }) - } - - async fn entries(&mut self) -> Result<usize> { - Ok(super::count_lines(&self.bytes)) - } - - async fn load(self, h: &mut impl Loader) -> Result<()> { - for b in unix_byte_lines(&self.bytes) { - let s = match std::str::from_utf8(b) { - Ok(s) => s, - Err(_) => continue, // we can skip past things like invalid utf8 - }; - let entry = match serde_json::from_str::<ReshEntry>(s) { - Ok(e) => e, - Err(_) => continue, // skip invalid json :shrug: - }; - - #[expect(clippy::cast_possible_truncation)] - #[expect(clippy::cast_sign_loss)] - let timestamp = { - let secs = entry.realtime_before.floor() as i64; - let nanosecs = (entry.realtime_before.fract() * 1_000_000_000_f64).round() as i64; - OffsetDateTime::from_unix_timestamp(secs)? + time::Duration::nanoseconds(nanosecs) - }; - #[expect(clippy::cast_possible_truncation)] - #[expect(clippy::cast_sign_loss)] - let duration = { - let secs = entry.realtime_after.floor() as i64; - let nanosecs = (entry.realtime_after.fract() * 1_000_000_000_f64).round() as i64; - let base = OffsetDateTime::from_unix_timestamp(secs)? - + time::Duration::nanoseconds(nanosecs); - let difference = base - timestamp; - difference.whole_nanoseconds() as i64 - }; - - let imported = History::import() - .command(entry.cmd_line) - .timestamp(timestamp) - .duration(duration) - .exit(entry.exit_code) - .cwd(entry.pwd) - .hostname(entry.host) - // CHECK: should we add uuid here? It's not set in the other importers - .session(uuid_v7().as_simple().to_string()); - - h.push(imported.build().into()).await?; - } - - Ok(()) - } -} diff --git a/crates/turtle/src/atuin_client/import/xonsh.rs b/crates/turtle/src/atuin_client/import/xonsh.rs deleted file mode 100644 index 5df24284..00000000 --- a/crates/turtle/src/atuin_client/import/xonsh.rs +++ /dev/null @@ -1,234 +0,0 @@ -use std::env; -use std::fs::{self, File}; -use std::path::{Path, PathBuf}; - -use async_trait::async_trait; -use directories::BaseDirs; -use eyre::{Result, eyre}; -use serde::Deserialize; -use time::OffsetDateTime; -use uuid::Uuid; -use uuid::timestamp::{Timestamp, context::NoContext}; - -use super::{Importer, Loader, get_histdir_path}; -use crate::atuin_client::history::History; -use crate::atuin_client::utils::get_host_user; - -// Note: both HistoryFile and HistoryData have other keys present in the JSON, we don't -// care about them so we leave them unspecified so as to avoid deserializing unnecessarily. -#[derive(Debug, Deserialize)] -struct HistoryFile { - data: HistoryData, -} - -#[derive(Debug, Deserialize)] -struct HistoryData { - sessionid: String, - cmds: Vec<HistoryCmd>, -} - -#[derive(Debug, Deserialize)] -struct HistoryCmd { - cwd: String, - inp: String, - rtn: Option<i64>, - ts: (f64, f64), -} - -#[derive(Debug)] -pub(crate) struct Xonsh { - // history is stored as a bunch of json files, one per session - sessions: Vec<HistoryData>, - hostname: String, -} - -fn xonsh_hist_dir(xonsh_data_dir: Option<String>) -> Result<PathBuf> { - // if running within xonsh, this will be available - if let Some(d) = xonsh_data_dir { - let mut path = PathBuf::from(d); - path.push("history_json"); - return Ok(path); - } - - // otherwise, fall back to default - let base = BaseDirs::new().ok_or_else(|| eyre!("Could not determine home directory"))?; - - let hist_dir = base.data_dir().join("xonsh/history_json"); - if hist_dir.exists() || cfg!(test) { - Ok(hist_dir) - } else { - Err(eyre!("Could not find xonsh history files")) - } -} - -fn load_sessions(hist_dir: &Path) -> Result<Vec<HistoryData>> { - let mut sessions = vec![]; - for entry in fs::read_dir(hist_dir)? { - let p = entry?.path(); - let ext = p.extension().and_then(|e| e.to_str()); - if p.is_file() - && ext == Some("json") - && let Some(data) = load_session(&p)? - { - sessions.push(data); - } - } - Ok(sessions) -} - -fn load_session(path: &Path) -> Result<Option<HistoryData>> { - let file = File::open(path)?; - // empty files are not valid json, so we can't deserialize them - if file.metadata()?.len() == 0 { - return Ok(None); - } - - let mut hist_file: HistoryFile = serde_json::from_reader(file)?; - - // if there are commands in this session, replace the existing UUIDv4 - // with a UUIDv7 generated from the timestamp of the first command - if let Some(cmd) = hist_file.data.cmds.first() { - let seconds = cmd.ts.0.trunc() as u64; - let nanos = (cmd.ts.0.fract() * 1_000_000_000_f64) as u32; - let ts = Timestamp::from_unix(NoContext, seconds, nanos); - hist_file.data.sessionid = Uuid::new_v7(ts).to_string(); - } - Ok(Some(hist_file.data)) -} - -#[async_trait] -impl Importer for Xonsh { - const NAME: &'static str = "xonsh"; - - async fn new() -> Result<Self> { - // wrap xonsh-specific path resolver in general one so that it respects $HISTPATH - let xonsh_data_dir = env::var("XONSH_DATA_DIR").ok(); - let hist_dir = get_histdir_path(|| xonsh_hist_dir(xonsh_data_dir))?; - let sessions = load_sessions(&hist_dir)?; - let hostname = get_host_user(); - Ok(Xonsh { sessions, hostname }) - } - - async fn entries(&mut self) -> Result<usize> { - let total = self.sessions.iter().map(|s| s.cmds.len()).sum(); - Ok(total) - } - - async fn load(self, loader: &mut impl Loader) -> Result<()> { - for session in self.sessions { - for cmd in session.cmds { - let (start, end) = cmd.ts; - let ts_nanos = (start * 1_000_000_000_f64) as i128; - let timestamp = OffsetDateTime::from_unix_timestamp_nanos(ts_nanos)?; - - let duration = (end - start) * 1_000_000_000_f64; - - match cmd.rtn { - Some(exit) => { - let entry = History::import() - .timestamp(timestamp) - .duration(duration.trunc() as i64) - .exit(exit) - .command(cmd.inp.trim()) - .cwd(cmd.cwd) - .session(session.sessionid.clone()) - .hostname(self.hostname.clone()); - loader.push(entry.build().into()).await?; - } - None => { - let entry = History::import() - .timestamp(timestamp) - .duration(duration.trunc() as i64) - .command(cmd.inp.trim()) - .cwd(cmd.cwd) - .session(session.sessionid.clone()) - .hostname(self.hostname.clone()); - loader.push(entry.build().into()).await?; - } - } - } - } - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use time::macros::datetime; - - use super::*; - - use crate::history::History; - use crate::import::tests::TestLoader; - - #[test] - fn test_hist_dir_xonsh() { - let hist_dir = xonsh_hist_dir(Some("/home/user/xonsh_data".to_string())).unwrap(); - assert_eq!( - hist_dir, - PathBuf::from("/home/user/xonsh_data/history_json") - ); - } - - #[tokio::test] - async fn test_import() { - let dir = PathBuf::from("tests/data/xonsh"); - let sessions = load_sessions(&dir).unwrap(); - let hostname = "box:user".to_string(); - let xonsh = Xonsh { sessions, hostname }; - - let mut loader = TestLoader::default(); - xonsh.load(&mut loader).await.unwrap(); - // order in buf will depend on filenames, so sort by timestamp for consistency - loader.buf.sort_by_key(|h| h.timestamp); - for (actual, expected) in loader.buf.iter().zip(expected_hist_entries().iter()) { - assert_eq!(actual.timestamp, expected.timestamp); - assert_eq!(actual.command, expected.command); - assert_eq!(actual.cwd, expected.cwd); - assert_eq!(actual.exit, expected.exit); - assert_eq!(actual.duration, expected.duration); - assert_eq!(actual.hostname, expected.hostname); - } - } - - fn expected_hist_entries() -> [History; 4] { - [ - History::import() - .timestamp(datetime!(2024-02-6 04:17:59.478272256 +00:00:00)) - .command("echo hello world!".to_string()) - .cwd("/home/user/Documents/code/atuin".to_string()) - .exit(0) - .duration(4651069) - .hostname("box:user".to_string()) - .build() - .into(), - History::import() - .timestamp(datetime!(2024-02-06 04:18:01.70632832 +00:00:00)) - .command("ls -l".to_string()) - .cwd("/home/user/Documents/code/atuin".to_string()) - .exit(0) - .duration(21288633) - .hostname("box:user".to_string()) - .build() - .into(), - History::import() - .timestamp(datetime!(2024-02-06 17:41:31.142515968 +00:00:00)) - .command("false".to_string()) - .cwd("/home/user/Documents/code/atuin/atuin-client".to_string()) - .exit(1) - .duration(10269403) - .hostname("box:user".to_string()) - .build() - .into(), - History::import() - .timestamp(datetime!(2024-02-06 17:41:32.271584 +00:00:00)) - .command("exit".to_string()) - .cwd("/home/user/Documents/code/atuin/atuin-client".to_string()) - .exit(0) - .duration(4259347) - .hostname("box:user".to_string()) - .build() - .into(), - ] - } -} diff --git a/crates/turtle/src/atuin_client/import/xonsh_sqlite.rs b/crates/turtle/src/atuin_client/import/xonsh_sqlite.rs deleted file mode 100644 index 326fe74b..00000000 --- a/crates/turtle/src/atuin_client/import/xonsh_sqlite.rs +++ /dev/null @@ -1,217 +0,0 @@ -use std::env; -use std::path::PathBuf; - -use async_trait::async_trait; -use directories::BaseDirs; -use eyre::{Result, eyre}; -use futures::TryStreamExt; -use sqlx::{FromRow, Row, sqlite::SqlitePool}; -use time::OffsetDateTime; -use uuid::Uuid; -use uuid::timestamp::{Timestamp, context::NoContext}; - -use super::{Importer, Loader, get_histfile_path}; -use crate::atuin_client::history::History; -use crate::atuin_client::utils::get_host_user; - -#[derive(Debug, FromRow)] -struct HistDbEntry { - inp: String, - rtn: Option<i64>, - tsb: f64, - tse: f64, - cwd: String, - session_start: f64, -} - -impl HistDbEntry { - fn into_hist_with_hostname(self, hostname: String) -> History { - let ts_nanos = (self.tsb * 1_000_000_000_f64) as i128; - let timestamp = OffsetDateTime::from_unix_timestamp_nanos(ts_nanos).unwrap(); - - let session_ts_seconds = self.session_start.trunc() as u64; - let session_ts_nanos = (self.session_start.fract() * 1_000_000_000_f64) as u32; - let session_ts = Timestamp::from_unix(NoContext, session_ts_seconds, session_ts_nanos); - let session_id = Uuid::new_v7(session_ts).to_string(); - let duration = (self.tse - self.tsb) * 1_000_000_000_f64; - - if let Some(exit) = self.rtn { - let imported = History::import() - .timestamp(timestamp) - .duration(duration.trunc() as i64) - .exit(exit) - .command(self.inp) - .cwd(self.cwd) - .session(session_id) - .hostname(hostname); - imported.build().into() - } else { - let imported = History::import() - .timestamp(timestamp) - .duration(duration.trunc() as i64) - .command(self.inp) - .cwd(self.cwd) - .session(session_id) - .hostname(hostname); - imported.build().into() - } - } -} - -fn xonsh_db_path(xonsh_data_dir: Option<String>) -> Result<PathBuf> { - // if running within xonsh, this will be available - if let Some(d) = xonsh_data_dir { - let mut path = PathBuf::from(d); - path.push("xonsh-history.sqlite"); - return Ok(path); - } - - // otherwise, fall back to default - let base = BaseDirs::new().ok_or_else(|| eyre!("Could not determine home directory"))?; - - let hist_file = base.data_dir().join("xonsh/xonsh-history.sqlite"); - if hist_file.exists() || cfg!(test) { - Ok(hist_file) - } else { - Err(eyre!( - "Could not find xonsh history db at: {}", - hist_file.to_string_lossy() - )) - } -} - -#[derive(Debug)] -pub(crate) struct XonshSqlite { - pool: SqlitePool, - hostname: String, -} - -#[async_trait] -impl Importer for XonshSqlite { - const NAME: &'static str = "xonsh_sqlite"; - - async fn new() -> Result<Self> { - // wrap xonsh-specific path resolver in general one so that it respects $HISTPATH - let xonsh_data_dir = env::var("XONSH_DATA_DIR").ok(); - let db_path = get_histfile_path(|| xonsh_db_path(xonsh_data_dir))?; - let connection_str = db_path.to_str().ok_or_else(|| { - eyre!( - "Invalid path for SQLite database: {}", - db_path.to_string_lossy() - ) - })?; - - let pool = SqlitePool::connect(connection_str).await?; - let hostname = get_host_user(); - Ok(XonshSqlite { pool, hostname }) - } - - async fn entries(&mut self) -> Result<usize> { - let query = "SELECT COUNT(*) FROM xonsh_history"; - let row = sqlx::query(query).fetch_one(&self.pool).await?; - let count: u32 = row.get(0); - Ok(count as usize) - } - - async fn load(self, loader: &mut impl Loader) -> Result<()> { - let query = r#" - SELECT inp, rtn, tsb, tse, cwd, - MIN(tsb) OVER (PARTITION BY sessionid) AS session_start - FROM xonsh_history - ORDER BY rowid - "#; - - let mut entries = sqlx::query_as::<_, HistDbEntry>(query).fetch(&self.pool); - - let mut count = 0; - while let Some(entry) = entries.try_next().await? { - let hist = entry.into_hist_with_hostname(self.hostname.clone()); - loader.push(hist).await?; - count += 1; - } - - println!("Loaded: {count}"); - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use time::macros::datetime; - - use super::*; - - use crate::history::History; - use crate::import::tests::TestLoader; - - #[test] - fn test_db_path_xonsh() { - let db_path = xonsh_db_path(Some("/home/user/xonsh_data".to_string())).unwrap(); - assert_eq!( - db_path, - PathBuf::from("/home/user/xonsh_data/xonsh-history.sqlite") - ); - } - - #[tokio::test] - async fn test_import() { - let connection_str = "tests/data/xonsh-history.sqlite"; - let xonsh_sqlite = XonshSqlite { - pool: SqlitePool::connect(connection_str).await.unwrap(), - hostname: "box:user".to_string(), - }; - - let mut loader = TestLoader::default(); - xonsh_sqlite.load(&mut loader).await.unwrap(); - - for (actual, expected) in loader.buf.iter().zip(expected_hist_entries().iter()) { - assert_eq!(actual.timestamp, expected.timestamp); - assert_eq!(actual.command, expected.command); - assert_eq!(actual.cwd, expected.cwd); - assert_eq!(actual.exit, expected.exit); - assert_eq!(actual.duration, expected.duration); - assert_eq!(actual.hostname, expected.hostname); - } - } - - fn expected_hist_entries() -> [History; 4] { - [ - History::import() - .timestamp(datetime!(2024-02-6 17:56:21.130956288 +00:00:00)) - .command("echo hello world!".to_string()) - .cwd("/home/user/Documents/code/atuin".to_string()) - .exit(0) - .duration(2628564) - .hostname("box:user".to_string()) - .build() - .into(), - History::import() - .timestamp(datetime!(2024-02-06 17:56:28.190406144 +00:00:00)) - .command("ls -l".to_string()) - .cwd("/home/user/Documents/code/atuin".to_string()) - .exit(0) - .duration(9371519) - .hostname("box:user".to_string()) - .build() - .into(), - History::import() - .timestamp(datetime!(2024-02-06 17:56:46.989020928 +00:00:00)) - .command("false".to_string()) - .cwd("/home/user/Documents/code/atuin".to_string()) - .exit(1) - .duration(17337560) - .hostname("box:user".to_string()) - .build() - .into(), - History::import() - .timestamp(datetime!(2024-02-06 17:56:48.218384128 +00:00:00)) - .command("exit".to_string()) - .cwd("/home/user/Documents/code/atuin".to_string()) - .exit(0) - .duration(4599094) - .hostname("box:user".to_string()) - .build() - .into(), - ] - } -} diff --git a/crates/turtle/src/atuin_client/import/zsh.rs b/crates/turtle/src/atuin_client/import/zsh.rs deleted file mode 100644 index 55d082d3..00000000 --- a/crates/turtle/src/atuin_client/import/zsh.rs +++ /dev/null @@ -1,230 +0,0 @@ -// import old shell history! -// automatically hoover up all that we can find - -use std::borrow::Cow; -use std::path::PathBuf; - -use async_trait::async_trait; -use directories::UserDirs; -use eyre::{Result, eyre}; -use time::OffsetDateTime; - -use super::{Importer, Loader, get_histfile_path, unix_byte_lines}; -use crate::atuin_client::history::History; -use crate::atuin_client::import::read_to_end; - -#[derive(Debug)] -pub(crate) struct Zsh { - bytes: Vec<u8>, -} - -fn default_histpath() -> Result<PathBuf> { - // oh-my-zsh sets HISTFILE=~/.zhistory - // zsh has no default value for this var, but uses ~/.zhistory. - // zsh-newuser-install propose as default .histfile https://github.com/zsh-users/zsh/blob/master/Functions/Newuser/zsh-newuser-install#L794 - // we could maybe be smarter about this in the future :) - let user_dirs = UserDirs::new().ok_or_else(|| eyre!("could not find user directories"))?; - let home_dir = user_dirs.home_dir(); - - let mut candidates = [".zhistory", ".zsh_history", ".histfile"].iter(); - loop { - match candidates.next() { - Some(candidate) => { - let histpath = home_dir.join(candidate); - if histpath.exists() { - break Ok(histpath); - } - } - None => { - break Err(eyre!( - "Could not find history file. Try setting and exporting $HISTFILE" - )); - } - } - } -} - -#[async_trait] -impl Importer for Zsh { - const NAME: &'static str = "zsh"; - - async fn new() -> Result<Self> { - let bytes = read_to_end(get_histfile_path(default_histpath)?)?; - Ok(Self { bytes }) - } - - async fn entries(&mut self) -> Result<usize> { - Ok(super::count_lines(&self.bytes)) - } - - async fn load(self, h: &mut impl Loader) -> Result<()> { - let now = OffsetDateTime::now_utc(); - let mut line = String::new(); - - let mut counter = 0; - for b in unix_byte_lines(&self.bytes) { - let s = match unmetafy(b) { - Some(s) => s, - _ => continue, // we can skip past things like invalid utf8 - }; - - if let Some(s) = s.strip_suffix('\\') { - line.push_str(s); - line.push('\n'); - } else { - line.push_str(&s); - let command = std::mem::take(&mut line); - - if let Some(command) = command.strip_prefix(": ") { - counter += 1; - h.push(parse_extended(command, counter)).await?; - } else { - let offset = time::Duration::seconds(counter); - counter += 1; - - let imported = History::import() - // preserve ordering - .timestamp(now - offset) - .command(command.trim_end().to_string()); - - h.push(imported.build().into()).await?; - } - } - } - - Ok(()) - } -} - -fn parse_extended(line: &str, counter: i64) -> History { - let (time, duration) = line.split_once(':').unwrap(); - let (duration, command) = duration.split_once(';').unwrap(); - - let time = time - .parse::<i64>() - .ok() - .and_then(|t| OffsetDateTime::from_unix_timestamp(t).ok()) - .unwrap_or_else(OffsetDateTime::now_utc) - + time::Duration::milliseconds(counter); - - // use nanos, because why the hell not? we won't display them. - let duration = duration.parse::<i64>().map_or(-1, |t| t * 1_000_000_000); - - let imported = History::import() - .timestamp(time) - .command(command.trim_end().to_string()) - .duration(duration); - - imported.build().into() -} - -fn unmetafy(line: &[u8]) -> Option<Cow<'_, str>> { - if line.contains(&0x83) { - let mut s = Vec::with_capacity(line.len()); - let mut is_meta = false; - for ch in line { - if *ch == 0x83 { - is_meta = true; - } else if is_meta { - is_meta = false; - s.push(*ch ^ 32); - } else { - s.push(*ch) - } - } - String::from_utf8(s).ok().map(Cow::Owned) - } else { - std::str::from_utf8(line).ok().map(Cow::Borrowed) - } -} - -#[cfg(test)] -mod test { - use itertools::assert_equal; - - use crate::import::tests::TestLoader; - - use super::*; - - #[test] - fn test_parse_extended_simple() { - let parsed = parse_extended("1613322469:0;cargo install atuin", 0); - - assert_eq!(parsed.command, "cargo install atuin"); - assert_eq!(parsed.duration, 0); - assert_eq!( - parsed.timestamp, - OffsetDateTime::from_unix_timestamp(1_613_322_469).unwrap() - ); - - let parsed = parse_extended("1613322469:10;cargo install atuin;cargo update", 0); - - assert_eq!(parsed.command, "cargo install atuin;cargo update"); - assert_eq!(parsed.duration, 10_000_000_000); - assert_eq!( - parsed.timestamp, - OffsetDateTime::from_unix_timestamp(1_613_322_469).unwrap() - ); - - let parsed = parse_extended("1613322469:10;cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", 0); - - assert_eq!(parsed.command, "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷"); - assert_eq!(parsed.duration, 10_000_000_000); - assert_eq!( - parsed.timestamp, - OffsetDateTime::from_unix_timestamp(1_613_322_469).unwrap() - ); - - let parsed = parse_extended("1613322469:10;cargo install \\n atuin\n", 0); - - assert_eq!(parsed.command, "cargo install \\n atuin"); - assert_eq!(parsed.duration, 10_000_000_000); - assert_eq!( - parsed.timestamp, - OffsetDateTime::from_unix_timestamp(1_613_322_469).unwrap() - ); - } - - #[tokio::test] - async fn test_parse_file() { - let bytes = r": 1613322469:0;cargo install atuin -: 1613322469:10;cargo install atuin; \\ -cargo update -: 1613322469:10;cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷ -" - .as_bytes() - .to_owned(); - - let mut zsh = Zsh { bytes }; - assert_eq!(zsh.entries().await.unwrap(), 4); - - let mut loader = TestLoader::default(); - zsh.load(&mut loader).await.unwrap(); - - assert_equal( - loader.buf.iter().map(|h| h.command.as_str()), - [ - "cargo install atuin", - "cargo install atuin; \\\ncargo update", - "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", - ], - ); - } - - #[tokio::test] - async fn test_parse_metafied() { - let bytes = - b"echo \xe4\xbd\x83\x80\xe5\xa5\xbd\nls ~/\xe9\x83\xbf\xb3\xe4\xb9\x83\xb0\n".to_vec(); - - let mut zsh = Zsh { bytes }; - assert_eq!(zsh.entries().await.unwrap(), 2); - - let mut loader = TestLoader::default(); - zsh.load(&mut loader).await.unwrap(); - - assert_equal( - loader.buf.iter().map(|h| h.command.as_str()), - ["echo 你好", "ls ~/音乐"], - ); - } -} diff --git a/crates/turtle/src/atuin_client/import/zsh_histdb.rs b/crates/turtle/src/atuin_client/import/zsh_histdb.rs deleted file mode 100644 index 46622e32..00000000 --- a/crates/turtle/src/atuin_client/import/zsh_histdb.rs +++ /dev/null @@ -1,249 +0,0 @@ -// import old shell history from zsh-histdb! -// automatically hoover up all that we can find - -// As far as i can tell there are no version numbers in the histdb sqlite DB, so we're going based -// on the schema from 2022-05-01 -// -// I have run into some histories that will not import b/c of non UTF-8 characters. -// - -// -// An Example sqlite query for hsitdb data: -// -//id|session|command_id|place_id|exit_status|start_time|duration|id|argv|id|host|dir -// -// -// select -// history.id, -// history.start_time, -// places.host, -// places.dir, -// commands.argv -// from history -// left join commands on history.command_id = commands.id -// left join places on history.place_id = places.id ; -// -// CREATE TABLE history (id integer primary key autoincrement, -// session int, -// command_id int references commands (id), -// place_id int references places (id), -// exit_status int, -// start_time int, -// duration int); -// - -use std::collections::HashMap; -use std::path::{Path, PathBuf}; - -use async_trait::async_trait; -use crate::atuin_common::utils::uuid_v7; -use directories::UserDirs; -use eyre::{Result, eyre}; -use sqlx::{Pool, sqlite::SqlitePool}; -use time::PrimitiveDateTime; - -use super::Importer; -use crate::atuin_client::history::History; -use crate::atuin_client::import::Loader; -use crate::atuin_client::utils::{get_hostname, get_username}; - -#[derive(sqlx::FromRow, Debug)] -pub(crate) struct HistDbEntryCount { - pub(crate) count: usize, -} - -#[derive(sqlx::FromRow, Debug)] -pub(crate) struct HistDbEntry { - pub(crate) id: i64, - pub(crate) start_time: PrimitiveDateTime, - pub(crate) host: Vec<u8>, - pub(crate) dir: Vec<u8>, - pub(crate) argv: Vec<u8>, - pub(crate) duration: i64, - pub(crate) exit_status: i64, - pub(crate) session: i64, -} - -#[derive(Debug)] -pub(crate) struct ZshHistDb { - histdb: Vec<HistDbEntry>, - username: String, -} - -/// Read db at given file, return vector of entries. -async fn hist_from_db(dbpath: PathBuf) -> Result<Vec<HistDbEntry>> { - let pool = SqlitePool::connect(dbpath.to_str().unwrap()).await?; - hist_from_db_conn(pool).await -} - -async fn hist_from_db_conn(pool: Pool<sqlx::Sqlite>) -> Result<Vec<HistDbEntry>> { - let query = r#" - SELECT - history.id, history.start_time, history.duration, places.host, places.dir, - commands.argv, history.exit_status, history.session - FROM history - LEFT JOIN commands ON history.command_id = commands.id - LEFT JOIN places ON history.place_id = places.id - ORDER BY history.start_time - "#; - let histdb_vec: Vec<HistDbEntry> = sqlx::query_as::<_, HistDbEntry>(query) - .fetch_all(&pool) - .await?; - Ok(histdb_vec) -} - -impl ZshHistDb { - pub(crate) fn histpath_candidate() -> PathBuf { - // By default histdb database is `${HOME}/.histdb/zsh-history.db` - // This can be modified by ${HISTDB_FILE} - // - // if [[ -z ${HISTDB_FILE} ]]; then - // typeset -g HISTDB_FILE="${HOME}/.histdb/zsh-history.db" - let user_dirs = UserDirs::new().unwrap(); // should catch error here? - let home_dir = user_dirs.home_dir(); - std::env::var("HISTDB_FILE") - .as_ref() - .map(|x| Path::new(x).to_path_buf()) - .unwrap_or_else(|_err| home_dir.join(".histdb/zsh-history.db")) - } - pub(crate) fn histpath() -> Result<PathBuf> { - let histdb_path = ZshHistDb::histpath_candidate(); - if histdb_path.exists() { - Ok(histdb_path) - } else { - Err(eyre!( - "Could not find history file. Try setting $HISTDB_FILE" - )) - } - } -} - -#[async_trait] -impl Importer for ZshHistDb { - // Not sure how this is used - const NAME: &'static str = "zsh_histdb"; - - /// Creates a new ZshHistDb and populates the history based on the pre-populated data - /// structure. - async fn new() -> Result<Self> { - let dbpath = ZshHistDb::histpath()?; - let histdb_entry_vec = hist_from_db(dbpath).await?; - Ok(Self { - histdb: histdb_entry_vec, - username: get_username(), - }) - } - - async fn entries(&mut self) -> Result<usize> { - Ok(self.histdb.len()) - } - - async fn load(self, h: &mut impl Loader) -> Result<()> { - let mut session_map = HashMap::new(); - for entry in self.histdb { - let command = match std::str::from_utf8(&entry.argv) { - Ok(s) => s.trim_end(), - Err(_) => continue, // we can skip past things like invalid utf8 - }; - let cwd = match std::str::from_utf8(&entry.dir) { - Ok(s) => s.trim_end(), - Err(_) => continue, // we can skip past things like invalid utf8 - }; - let hostname = format!( - "{}:{}", - String::from_utf8(entry.host).unwrap_or_else(|_e| get_hostname()), - self.username - ); - let session = session_map.entry(entry.session).or_insert_with(uuid_v7); - - let imported = History::import() - .timestamp(entry.start_time.assume_utc()) - .command(command) - .cwd(cwd) - .duration(entry.duration * 1_000_000_000) - .exit(entry.exit_status) - .session(session.as_simple().to_string()) - .hostname(hostname) - .build(); - h.push(imported.into()).await?; - } - Ok(()) - } -} - -#[cfg(test)] -mod test { - - use super::*; - use sqlx::sqlite::SqlitePoolOptions; - use std::env; - #[tokio::test(flavor = "multi_thread")] - #[expect(unsafe_code)] - async fn test_env_vars() { - let test_env_db = "nonstd-zsh-history.db"; - let key = "HISTDB_FILE"; - // TODO: Audit that the environment access only happens in single-threaded code. - unsafe { env::set_var(key, test_env_db) }; - - // test the env got set - assert_eq!(env::var(key).unwrap(), test_env_db.to_string()); - - // test histdb returns the proper db from previous step - let histdb_path = ZshHistDb::histpath_candidate(); - assert_eq!(histdb_path.to_str().unwrap(), test_env_db); - } - - #[tokio::test(flavor = "multi_thread")] - async fn test_import() { - let pool: SqlitePool = SqlitePoolOptions::new() - .min_connections(2) - .connect(":memory:") - .await - .unwrap(); - - // sql dump directly from a test database. - let db_sql = r#" - PRAGMA foreign_keys=OFF; - BEGIN TRANSACTION; - CREATE TABLE commands (id integer primary key autoincrement, argv text, unique(argv) on conflict ignore); - INSERT INTO commands VALUES(1,'pwd'); - INSERT INTO commands VALUES(2,'curl google.com'); - INSERT INTO commands VALUES(3,'bash'); - CREATE TABLE places (id integer primary key autoincrement, host text, dir text, unique(host, dir) on conflict ignore); - INSERT INTO places VALUES(1,'mbp16.local','/home/noyez'); - CREATE TABLE history (id integer primary key autoincrement, - session int, - command_id int references commands (id), - place_id int references places (id), - exit_status int, - start_time int, - duration int); - INSERT INTO history VALUES(1,0,1,1,0,1651497918,1); - INSERT INTO history VALUES(2,0,2,1,0,1651497923,1); - INSERT INTO history VALUES(3,0,3,1,NULL,1651497930,NULL); - DELETE FROM sqlite_sequence; - INSERT INTO sqlite_sequence VALUES('commands',3); - INSERT INTO sqlite_sequence VALUES('places',3); - INSERT INTO sqlite_sequence VALUES('history',3); - CREATE INDEX hist_time on history(start_time); - CREATE INDEX place_dir on places(dir); - CREATE INDEX place_host on places(host); - CREATE INDEX history_command_place on history(command_id, place_id); - COMMIT; "#; - - sqlx::query(db_sql).execute(&pool).await.unwrap(); - - // test histdb iterator - let histdb_vec = hist_from_db_conn(pool).await.unwrap(); - let histdb = ZshHistDb { - histdb: histdb_vec, - username: get_username(), - }; - - println!("h: {:#?}", histdb.histdb); - println!("counter: {:?}", histdb.histdb.len()); - for i in histdb.histdb { - println!("{i:?}"); - } - } -} |
