diff options
Diffstat (limited to 'atuin-client/src')
| -rw-r--r-- | atuin-client/src/database.rs | 2 | ||||
| -rw-r--r-- | atuin-client/src/import/bash.rs | 162 | ||||
| -rw-r--r-- | atuin-client/src/import/fish.rs | 190 | ||||
| -rw-r--r-- | atuin-client/src/import/mod.rs | 99 | ||||
| -rw-r--r-- | atuin-client/src/import/resh.rs | 135 | ||||
| -rw-r--r-- | atuin-client/src/import/zsh.rs | 220 |
6 files changed, 394 insertions, 414 deletions
diff --git a/atuin-client/src/database.rs b/atuin-client/src/database.rs index 5f37e8b1..7b3ab3be 100644 --- a/atuin-client/src/database.rs +++ b/atuin-client/src/database.rs @@ -41,7 +41,7 @@ pub fn current_context() -> Context { } #[async_trait] -pub trait Database { +pub trait Database: Send + Sync { async fn save(&mut self, h: &History) -> Result<()>; async fn save_bulk(&mut self, h: &[History]) -> Result<()>; diff --git a/atuin-client/src/import/bash.rs b/atuin-client/src/import/bash.rs index 1a171625..10e8de1e 100644 --- a/atuin-client/src/import/bash.rs +++ b/atuin-client/src/import/bash.rs @@ -1,134 +1,106 @@ -use std::{ - fs::File, - io::{BufRead, BufReader, Read, Seek}, - path::{Path, PathBuf}, -}; +use std::{fs::File, io::Read, path::PathBuf}; +use async_trait::async_trait; use directories::UserDirs; use eyre::{eyre, Result}; -use super::{count_lines, Importer}; +use super::{get_histpath, unix_byte_lines, Importer, Loader}; use crate::history::History; #[derive(Debug)] -pub struct Bash<R> { - file: BufReader<R>, - strbuf: String, - loc: usize, - counter: i64, +pub struct Bash { + bytes: Vec<u8>, } -impl<R: Read + Seek> Bash<R> { - fn new(r: R) -> Result<Self> { - let mut buf = BufReader::new(r); - let loc = count_lines(&mut buf)?; +fn default_histpath() -> Result<PathBuf> { + let user_dirs = UserDirs::new().ok_or_else(|| eyre!("could not find user directories"))?; + let home_dir = user_dirs.home_dir(); - Ok(Self { - file: buf, - strbuf: String::new(), - loc, - counter: 0, - }) - } + Ok(home_dir.join(".bash_history")) } -impl Importer for Bash<File> { +#[async_trait] +impl Importer for Bash { const NAME: &'static str = "bash"; - fn histpath() -> Result<PathBuf> { - let user_dirs = UserDirs::new().unwrap(); - let home_dir = user_dirs.home_dir(); - - Ok(home_dir.join(".bash_history")) + async fn new() -> Result<Self> { + let mut bytes = Vec::new(); + let path = get_histpath(default_histpath)?; + let mut f = File::open(path)?; + f.read_to_end(&mut bytes)?; + Ok(Self { bytes }) } - fn parse(path: impl AsRef<Path>) -> Result<Self> { - Self::new(File::open(path)?) + async fn entries(&mut self) -> Result<usize> { + Ok(super::count_lines(&self.bytes)) } -} - -impl<R: Read> Iterator for Bash<R> { - type Item = Result<History>; - fn next(&mut self) -> Option<Self::Item> { - self.strbuf.clear(); - match self.file.read_line(&mut self.strbuf) { - Ok(0) => return None, - Ok(_) => (), - Err(e) => return Some(Err(eyre!("failed to read line: {}", e))), // we can skip past things like invalid utf8 - } - - self.loc -= 1; + async fn load(self, h: &mut impl Loader) -> Result<()> { + let now = chrono::Utc::now(); + let mut line = String::new(); - while self.strbuf.ends_with("\\\n") { - if self.file.read_line(&mut self.strbuf).is_err() { - // There's a chance that the last line of a command has invalid - // characters, the only safe thing to do is break :/ - // usually just invalid utf8 or smth - // however, we really need to avoid missing history, so it's - // better to have some items that should have been part of - // something else, than to miss things. So break. - break; + for (i, b) in unix_byte_lines(&self.bytes).enumerate() { + let s = match std::str::from_utf8(b) { + Ok(s) => s, + Err(_) => continue, // we can skip past things like invalid utf8 }; - self.loc -= 1; - } - - let time = chrono::Utc::now(); - let offset = chrono::Duration::seconds(self.counter); - let time = time - offset; + if let Some(s) = s.strip_suffix('\\') { + line.push_str(s); + line.push_str("\\\n"); + } else { + line.push_str(s); + let command = std::mem::take(&mut line); - self.counter += 1; - - Some(Ok(History::new( - time, - self.strbuf.trim_end().to_string(), - String::from("unknown"), - -1, - -1, - None, - None, - ))) - } + let offset = chrono::Duration::seconds(i as i64); + h.push(History::new( + now - offset, // preserve ordering + command, + String::from("unknown"), + -1, + -1, + None, + None, + )) + .await?; + } + } - fn size_hint(&self) -> (usize, Option<usize>) { - (0, Some(self.loc)) + Ok(()) } } #[cfg(test)] mod tests { - use std::io::Cursor; + use itertools::assert_equal; + + use crate::import::{tests::TestLoader, Importer}; use super::Bash; - #[test] - fn test_parse_file() { - let input = r"cargo install atuin + #[tokio::test] + async fn test_parse_file() { + let bytes = r"cargo install atuin cargo install atuin; \ cargo update cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷ -"; +" + .as_bytes() + .to_owned(); - let cursor = Cursor::new(input); - let mut bash = Bash::new(cursor).unwrap(); - assert_eq!(bash.loc, 4); - assert_eq!(bash.size_hint(), (0, Some(4))); + let mut bash = Bash { bytes }; + assert_eq!(bash.entries().await.unwrap(), 4); - assert_eq!( - &bash.next().unwrap().unwrap().command, - "cargo install atuin" - ); - assert_eq!( - &bash.next().unwrap().unwrap().command, - "cargo install atuin; \\\ncargo update" - ); - assert_eq!( - &bash.next().unwrap().unwrap().command, - "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷" - ); - assert!(bash.next().is_none()); + let mut loader = TestLoader::default(); + bash.load(&mut loader).await.unwrap(); - assert_eq!(bash.size_hint(), (0, Some(0))); + assert_equal( + loader.buf.iter().map(|h| h.command.as_str()), + [ + "cargo install atuin", + "cargo install atuin; \\\ncargo update", + "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", + ], + ); } } diff --git a/atuin-client/src/import/fish.rs b/atuin-client/src/import/fish.rs index 7c05d180..af932d74 100644 --- a/atuin-client/src/import/fish.rs +++ b/atuin-client/src/import/fish.rs @@ -1,99 +1,90 @@ // import old shell history! // automatically hoover up all that we can find -use std::{ - fs::File, - io::{self, BufRead, BufReader, Read, Seek}, - path::{Path, PathBuf}, -}; +use std::{fs::File, io::Read, path::PathBuf}; +use async_trait::async_trait; use chrono::{prelude::*, Utc}; use directories::BaseDirs; use eyre::{eyre, Result}; -use super::{count_lines, Importer}; +use super::{get_histpath, unix_byte_lines, Importer, Loader}; use crate::history::History; #[derive(Debug)] -pub struct Fish<R> { - file: BufReader<R>, - strbuf: String, - loc: usize, +pub struct Fish { + bytes: Vec<u8>, } -impl<R: Read + Seek> Fish<R> { - fn new(r: R) -> Result<Self> { - let mut buf = BufReader::new(r); - let loc = count_lines(&mut buf)?; +/// see https://fishshell.com/docs/current/interactive.html#searchable-command-history +fn default_histpath() -> Result<PathBuf> { + let base = BaseDirs::new().ok_or_else(|| eyre!("could not determine data directory"))?; + let data = base.data_local_dir(); - Ok(Self { - file: buf, - strbuf: String::new(), - loc, - }) - } -} + // fish supports multiple history sessions + // If `fish_history` var is missing, or set to `default`, use `fish` as the session + let session = std::env::var("fish_history").unwrap_or_else(|_| String::from("fish")); + let session = if session == "default" { + String::from("fish") + } else { + session + }; + + let mut histpath = data.join("fish"); + histpath.push(format!("{}_history", session)); -impl<R: Read> Fish<R> { - fn new_entry(&mut self) -> io::Result<bool> { - let inner = self.file.fill_buf()?; - Ok(inner.starts_with(b"- ")) + if histpath.exists() { + Ok(histpath) + } else { + Err(eyre!("Could not find history file. Try setting $HISTFILE")) } } -impl Importer for Fish<File> { +#[async_trait] +impl Importer for Fish { const NAME: &'static str = "fish"; - /// see https://fishshell.com/docs/current/interactive.html#searchable-command-history - fn histpath() -> Result<PathBuf> { - let base = BaseDirs::new().ok_or_else(|| eyre!("could not determine data directory"))?; - let data = base.data_local_dir(); - - // fish supports multiple history sessions - // If `fish_history` var is missing, or set to `default`, use `fish` as the session - let session = std::env::var("fish_history").unwrap_or_else(|_| String::from("fish")); - let session = if session == "default" { - String::from("fish") - } else { - session - }; - - let mut histpath = data.join("fish"); - histpath.push(format!("{}_history", session)); - - if histpath.exists() { - Ok(histpath) - } else { - Err(eyre!("Could not find history file. Try setting $HISTFILE")) - } + async fn new() -> Result<Self> { + let mut bytes = Vec::new(); + let path = get_histpath(default_histpath)?; + let mut f = File::open(path)?; + f.read_to_end(&mut bytes)?; + Ok(Self { bytes }) } - fn parse(path: impl AsRef<Path>) -> Result<Self> { - Self::new(File::open(path)?) + async fn entries(&mut self) -> Result<usize> { + Ok(super::count_lines(&self.bytes)) } -} -impl<R: Read> Iterator for Fish<R> { - type Item = Result<History>; - - fn next(&mut self) -> Option<Self::Item> { + async fn load(self, loader: &mut impl Loader) -> Result<()> { + let now = Utc::now(); let mut time: Option<DateTime<Utc>> = None; let mut cmd: Option<String> = None; - loop { - self.strbuf.clear(); - match self.file.read_line(&mut self.strbuf) { - // no more content to read - Ok(0) => break, - // bail on IO error - Err(e) => return Some(Err(e.into())), - _ => (), - } + for b in unix_byte_lines(&self.bytes) { + let s = match std::str::from_utf8(b) { + Ok(s) => s, + Err(_) => continue, // we can skip past things like invalid utf8 + }; + + if let Some(c) = s.strip_prefix("- cmd: ") { + // first, we must deal with the prev cmd + if let Some(cmd) = cmd.take() { + let time = time.unwrap_or(now); - // `read_line` adds the line delimeter to the string. No thanks - self.strbuf.pop(); + loader + .push(History::new( + time, + cmd, + "unknown".into(), + -1, + -1, + None, + None, + )) + .await?; + } - if let Some(c) = self.strbuf.strip_prefix("- cmd: ") { // using raw strings to avoid needing escaping. // replaces double backslashes with single backslashes let c = c.replace(r"\\", r"\"); @@ -102,7 +93,7 @@ impl<R: Read> Iterator for Fish<R> { // TODO: any other escape characters? cmd = Some(c); - } else if let Some(t) = self.strbuf.strip_prefix(" when: ") { + } else if let Some(t) = s.strip_prefix(" when: ") { // if t is not an int, just ignore this line if let Ok(t) = t.parse::<i64>() { time = Some(Utc.timestamp(t, 0)); @@ -110,47 +101,40 @@ impl<R: Read> Iterator for Fish<R> { } else { // ... ignore paths lines } - - match self.new_entry() { - // next line is a new entry, so let's stop here - // only if we have found a command though - Ok(true) if cmd.is_some() => break, - // bail on IO error - Err(e) => return Some(Err(e.into())), - _ => (), - } } - let cmd = cmd?; - let time = time.unwrap_or_else(Utc::now); + // we might have a trailing cmd + if let Some(cmd) = cmd.take() { + let time = time.unwrap_or(now); - Some(Ok(History::new( - time, - cmd, - "unknown".into(), - -1, - -1, - None, - None, - ))) - } + loader + .push(History::new( + time, + cmd, + "unknown".into(), + -1, + -1, + None, + None, + )) + .await?; + } - fn size_hint(&self) -> (usize, Option<usize>) { - // worst case, entry per line - (0, Some(self.loc)) + Ok(()) } } #[cfg(test)] mod test { - use std::io::Cursor; + + use crate::import::{tests::TestLoader, Importer}; use super::Fish; - #[test] - fn parse_complex() { + #[tokio::test] + async fn parse_complex() { // complicated input with varying contents and escaped strings. - let input = r#"- cmd: history --help + let bytes = r#"- cmd: history --help when: 1639162832 - cmd: cat ~/.bash_history when: 1639162851 @@ -181,14 +165,20 @@ ERROR when: 1639163066 paths: - ~/.local/share/fish/fish_history -"#; - let cursor = Cursor::new(input); - let mut fish = Fish::new(cursor).unwrap(); +"# + .as_bytes() + .to_owned(); + + let fish = Fish { bytes }; + + let mut loader = TestLoader::default(); + fish.load(&mut loader).await.unwrap(); + let mut history = loader.buf.into_iter(); // simple wrapper for fish history entry macro_rules! fishtory { ($timestamp:expr, $command:expr) => { - let h = fish.next().expect("missing entry in history").unwrap(); + let h = history.next().expect("missing entry in history"); assert_eq!(h.command.as_str(), $command); assert_eq!(h.timestamp.timestamp(), $timestamp); }; diff --git a/atuin-client/src/import/mod.rs b/atuin-client/src/import/mod.rs index 8d4aa17f..07178d17 100644 --- a/atuin-client/src/import/mod.rs +++ b/atuin-client/src/import/mod.rs @@ -1,9 +1,8 @@ -use std::{ - io::{BufRead, BufReader, Read, Seek, SeekFrom}, - path::{Path, PathBuf}, -}; +use std::path::PathBuf; -use eyre::Result; +use async_trait::async_trait; +use eyre::{bail, Result}; +use memchr::Memchr; use crate::history::History; @@ -12,16 +11,88 @@ pub mod fish; pub mod resh; pub mod zsh; -// this could probably be sped up -fn count_lines(buf: &mut BufReader<impl Read + Seek>) -> Result<usize> { - let lines = buf.lines().count(); - buf.seek(SeekFrom::Start(0))?; +#[async_trait] +pub trait Importer: Sized { + const NAME: &'static str; + async fn new() -> Result<Self>; + async fn entries(&mut self) -> Result<usize>; + async fn load(self, loader: &mut impl Loader) -> Result<()>; +} - Ok(lines) +#[async_trait] +pub trait Loader: Sync + Send { + async fn push(&mut self, hist: History) -> eyre::Result<()>; } -pub trait Importer: IntoIterator<Item = Result<History>> + Sized { - const NAME: &'static str; - fn histpath() -> Result<PathBuf>; - fn parse(path: impl AsRef<Path>) -> Result<Self>; +fn unix_byte_lines(input: &[u8]) -> impl Iterator<Item = &[u8]> { + UnixByteLines { + iter: memchr::memchr_iter(b'\n', input), + bytes: input, + i: 0, + } +} + +struct UnixByteLines<'a> { + iter: Memchr<'a>, + bytes: &'a [u8], + i: usize, +} + +impl<'a> Iterator for UnixByteLines<'a> { + type Item = &'a [u8]; + + fn next(&mut self) -> Option<Self::Item> { + let j = self.iter.next()?; + let out = &self.bytes[self.i..j]; + self.i = j + 1; + Some(out) + } + + fn count(self) -> usize + where + Self: Sized, + { + self.iter.count() + } +} + +fn count_lines(input: &[u8]) -> usize { + unix_byte_lines(input).count() +} + +fn get_histpath<D>(def: D) -> Result<PathBuf> +where + D: FnOnce() -> Result<PathBuf>, +{ + if let Ok(p) = std::env::var("HISTFILE") { + is_file(PathBuf::from(p)) + } else { + is_file(def()?) + } +} + +fn is_file(p: PathBuf) -> Result<PathBuf> { + if p.is_file() { + Ok(p) + } else { + bail!("Could not find history file {:?}. Try setting $HISTFILE", p) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Default)] + pub struct TestLoader { + pub buf: Vec<History>, + } + + #[async_trait] + impl Loader for TestLoader { + async fn push(&mut self, hist: History) -> Result<()> { + self.buf.push(hist); + Ok(()) + } + } } diff --git a/atuin-client/src/import/resh.rs b/atuin-client/src/import/resh.rs index 3eea84d7..75487fee 100644 --- a/atuin-client/src/import/resh.rs +++ b/atuin-client/src/import/resh.rs @@ -1,9 +1,6 @@ -use std::{ - fs::File, - io::{BufRead, BufReader}, - path::{Path, PathBuf}, -}; +use std::{fs::File, io::Read, path::PathBuf}; +use async_trait::async_trait; use chrono::{TimeZone, Utc}; use directories::UserDirs; use eyre::{eyre, Result}; @@ -11,7 +8,7 @@ use serde::Deserialize; use atuin_common::utils::uuid_v4; -use super::{count_lines, Importer}; +use super::{get_histpath, unix_byte_lines, Importer, Loader}; use crate::history::History; #[derive(Deserialize, Debug)] @@ -72,88 +69,72 @@ pub struct ReshEntry { #[derive(Debug)] pub struct Resh { - file: BufReader<File>, - strbuf: String, - loc: usize, + bytes: Vec<u8>, } +fn default_histpath() -> Result<PathBuf> { + let user_dirs = UserDirs::new().ok_or_else(|| eyre!("could not find user directories"))?; + let home_dir = user_dirs.home_dir(); + + Ok(home_dir.join(".resh_history.json")) +} + +#[async_trait] impl Importer for Resh { const NAME: &'static str = "resh"; - fn histpath() -> Result<PathBuf> { - let user_dirs = UserDirs::new().unwrap(); - let home_dir = user_dirs.home_dir(); - - Ok(home_dir.join(".resh_history.json")) + async fn new() -> Result<Self> { + let mut bytes = Vec::new(); + let path = get_histpath(default_histpath)?; + let mut f = File::open(path)?; + f.read_to_end(&mut bytes)?; + Ok(Self { bytes }) } - fn parse(path: impl AsRef<Path>) -> Result<Self> { - let file = File::open(path)?; - let mut buf = BufReader::new(file); - let loc = count_lines(&mut buf)?; - - Ok(Self { - file: buf, - strbuf: String::new(), - loc, - }) + async fn entries(&mut self) -> Result<usize> { + Ok(super::count_lines(&self.bytes)) } -} -impl Iterator for Resh { - type Item = Result<History>; + async fn load(self, h: &mut impl Loader) -> Result<()> { + for b in unix_byte_lines(&self.bytes) { + let s = match std::str::from_utf8(b) { + Ok(s) => s, + Err(_) => continue, // we can skip past things like invalid utf8 + }; + let entry = match serde_json::from_str::<ReshEntry>(s) { + Ok(e) => e, + Err(_) => continue, // skip invalid json :shrug: + }; - fn next(&mut self) -> Option<Self::Item> { - self.strbuf.clear(); - match self.file.read_line(&mut self.strbuf) { - Ok(0) => return None, - Ok(_) => (), - Err(e) => return Some(Err(eyre!("failed to read line: {}", e))), // we can skip past things like invalid utf8 - } - - // .resh_history.json lies about being a json. It is actually a file containing valid json - // on every line. This means that the last line will throw an error, as it is just an EOF. - // Without the special case here, that will crash the importer. - let entry = match serde_json::from_str::<ReshEntry>(&self.strbuf) { - Ok(e) => e, - Err(e) if e.is_eof() => return None, - Err(e) => { - return Some(Err(eyre!( - "Invalid entry found in resh_history file: {}", - e - ))) - } - }; - - #[allow(clippy::cast_possible_truncation)] - #[allow(clippy::cast_sign_loss)] - let timestamp = { - let secs = entry.realtime_before.floor() as i64; - let nanosecs = (entry.realtime_before.fract() * 1_000_000_000_f64).round() as u32; - Utc.timestamp(secs, nanosecs) - }; - #[allow(clippy::cast_possible_truncation)] - #[allow(clippy::cast_sign_loss)] - let duration = { - let secs = entry.realtime_after.floor() as i64; - let nanosecs = (entry.realtime_after.fract() * 1_000_000_000_f64).round() as u32; - let difference = Utc.timestamp(secs, nanosecs) - timestamp; - difference.num_nanoseconds().unwrap_or(0) - }; + #[allow(clippy::cast_possible_truncation)] + #[allow(clippy::cast_sign_loss)] + let timestamp = { + let secs = entry.realtime_before.floor() as i64; + let nanosecs = (entry.realtime_before.fract() * 1_000_000_000_f64).round() as u32; + Utc.timestamp(secs, nanosecs) + }; + #[allow(clippy::cast_possible_truncation)] + #[allow(clippy::cast_sign_loss)] + let duration = { + let secs = entry.realtime_after.floor() as i64; + let nanosecs = (entry.realtime_after.fract() * 1_000_000_000_f64).round() as u32; + let difference = Utc.timestamp(secs, nanosecs) - timestamp; + difference.num_nanoseconds().unwrap_or(0) + }; - Some(Ok(History { - id: uuid_v4(), - timestamp, - duration, - exit: entry.exit_code, - command: entry.cmd_line, - cwd: entry.pwd, - session: uuid_v4(), - hostname: entry.host, - })) - } + h.push(History { + id: uuid_v4(), + timestamp, + duration, + exit: entry.exit_code, + command: entry.cmd_line, + cwd: entry.pwd, + session: uuid_v4(), + hostname: entry.host, + }) + .await?; + } - fn size_hint(&self) -> (usize, Option<usize>) { - (self.loc, Some(self.loc)) + Ok(()) } } diff --git a/atuin-client/src/import/zsh.rs b/atuin-client/src/import/zsh.rs index 915b3115..62e814d6 100644 --- a/atuin-client/src/import/zsh.rs +++ b/atuin-client/src/import/zsh.rs @@ -1,138 +1,104 @@ // import old shell history! // automatically hoover up all that we can find -use std::{ - fs::File, - io::{BufRead, BufReader, Read, Seek}, - path::{Path, PathBuf}, -}; +use std::{fs::File, io::Read, path::PathBuf}; +use async_trait::async_trait; use chrono::{prelude::*, Utc}; use directories::UserDirs; use eyre::{eyre, Result}; -use itertools::Itertools; -use super::{count_lines, Importer}; +use super::{get_histpath, unix_byte_lines, Importer, Loader}; use crate::history::History; #[derive(Debug)] -pub struct Zsh<R> { - file: BufReader<R>, - strbuf: String, - loc: usize, - counter: i64, +pub struct Zsh { + bytes: Vec<u8>, } -impl<R: Read + Seek> Zsh<R> { - fn new(r: R) -> Result<Self> { - let mut buf = BufReader::new(r); - let loc = count_lines(&mut buf)?; +fn default_histpath() -> Result<PathBuf> { + // oh-my-zsh sets HISTFILE=~/.zhistory + // zsh has no default value for this var, but uses ~/.zhistory. + // we could maybe be smarter about this in the future :) + let user_dirs = UserDirs::new().ok_or_else(|| eyre!("could not find user directories"))?; + let home_dir = user_dirs.home_dir(); - Ok(Self { - file: buf, - strbuf: String::new(), - loc, - counter: 0, - }) - } -} - -impl Importer for Zsh<File> { - const NAME: &'static str = "zsh"; - - fn histpath() -> Result<PathBuf> { - // oh-my-zsh sets HISTFILE=~/.zhistory - // zsh has no default value for this var, but uses ~/.zhistory. - // we could maybe be smarter about this in the future :) - let user_dirs = UserDirs::new().unwrap(); - let home_dir = user_dirs.home_dir(); - - let mut candidates = [".zhistory", ".zsh_history"].iter(); - loop { - match candidates.next() { - Some(candidate) => { - let histpath = home_dir.join(candidate); - if histpath.exists() { - break Ok(histpath); - } + let mut candidates = [".zhistory", ".zsh_history"].iter(); + loop { + match candidates.next() { + Some(candidate) => { + let histpath = home_dir.join(candidate); + if histpath.exists() { + break Ok(histpath); } - None => break Err(eyre!("Could not find history file. Try setting $HISTFILE")), } + None => break Err(eyre!("Could not find history file. Try setting $HISTFILE")), } } - - fn parse(path: impl AsRef<Path>) -> Result<Self> { - Self::new(File::open(path)?) - } } -impl<R: Read> Iterator for Zsh<R> { - type Item = Result<History>; - - fn next(&mut self) -> Option<Self::Item> { - // ZSH extended history records the timestamp + command duration - // These lines begin with : - // So, if the line begins with :, parse it. Otherwise it's just - // the command - self.strbuf.clear(); - match self.file.read_line(&mut self.strbuf) { - Ok(0) => return None, - Ok(_) => (), - Err(e) => return Some(Err(eyre!("failed to read line: {}", e))), // we can skip past things like invalid utf8 - } - - self.loc -= 1; +#[async_trait] +impl Importer for Zsh { + const NAME: &'static str = "bash"; - while self.strbuf.ends_with("\\\n") { - if self.file.read_line(&mut self.strbuf).is_err() { - // There's a chance that the last line of a command has invalid - // characters, the only safe thing to do is break :/ - // usually just invalid utf8 or smth - // however, we really need to avoid missing history, so it's - // better to have some items that should have been part of - // something else, than to miss things. So break. - break; - }; + async fn new() -> Result<Self> { + let mut bytes = Vec::new(); + let path = get_histpath(default_histpath)?; + let mut f = File::open(path)?; + f.read_to_end(&mut bytes)?; + Ok(Self { bytes }) + } - self.loc -= 1; - } + async fn entries(&mut self) -> Result<usize> { + Ok(super::count_lines(&self.bytes)) + } - // We have to handle the case where a line has escaped newlines. - // Keep reading until we have a non-escaped newline + async fn load(self, h: &mut impl Loader) -> Result<()> { + let now = chrono::Utc::now(); + let mut line = String::new(); - let extended = self.strbuf.starts_with(':'); + let mut counter = 0; + for b in unix_byte_lines(&self.bytes) { + let s = match std::str::from_utf8(b) { + Ok(s) => s, + Err(_) => continue, // we can skip past things like invalid utf8 + }; - if extended { - self.counter += 1; - Some(Ok(parse_extended(&self.strbuf, self.counter))) - } else { - let time = chrono::Utc::now(); - let offset = chrono::Duration::seconds(self.counter); - let time = time - offset; + if let Some(s) = s.strip_suffix('\\') { + line.push_str(s); + line.push_str("\\\n"); + } else { + line.push_str(s); + let command = std::mem::take(&mut line); - self.counter += 1; + if let Some(command) = command.strip_prefix(": ") { + counter += 1; + h.push(parse_extended(command, counter)).await?; + } else { + let offset = chrono::Duration::seconds(counter); + counter += 1; - Some(Ok(History::new( - time, - self.strbuf.trim_end().to_string(), - String::from("unknown"), - -1, - -1, - None, - None, - ))) + h.push(History::new( + now - offset, // preserve ordering + command.trim_end().to_string(), + String::from("unknown"), + -1, + -1, + None, + None, + )) + .await?; + } + } } - } - fn size_hint(&self) -> (usize, Option<usize>) { - (0, Some(self.loc)) + Ok(()) } } fn parse_extended(line: &str, counter: i64) -> History { - let line = line.replacen(": ", "", 2); - let (time, duration) = line.splitn(2, ':').collect_tuple().unwrap(); - let (duration, command) = duration.splitn(2, ';').collect_tuple().unwrap(); + let (time, duration) = line.split_once(':').unwrap(); + let (duration, command) = duration.split_once(';').unwrap(); let time = time .parse::<i64>() @@ -158,64 +124,64 @@ fn parse_extended(line: &str, counter: i64) -> History { #[cfg(test)] mod test { - use std::io::Cursor; - use chrono::prelude::*; use chrono::Utc; + use itertools::assert_equal; + + use crate::import::tests::TestLoader; use super::*; #[test] fn test_parse_extended_simple() { - let parsed = parse_extended(": 1613322469:0;cargo install atuin", 0); + let parsed = parse_extended("1613322469:0;cargo install atuin", 0); assert_eq!(parsed.command, "cargo install atuin"); assert_eq!(parsed.duration, 0); assert_eq!(parsed.timestamp, Utc.timestamp(1_613_322_469, 0)); - let parsed = parse_extended(": 1613322469:10;cargo install atuin;cargo update", 0); + let parsed = parse_extended("1613322469:10;cargo install atuin;cargo update", 0); assert_eq!(parsed.command, "cargo install atuin;cargo update"); assert_eq!(parsed.duration, 10_000_000_000); assert_eq!(parsed.timestamp, Utc.timestamp(1_613_322_469, 0)); - let parsed = parse_extended(": 1613322469:10;cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", 0); + let parsed = parse_extended("1613322469:10;cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", 0); assert_eq!(parsed.command, "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷"); assert_eq!(parsed.duration, 10_000_000_000); assert_eq!(parsed.timestamp, Utc.timestamp(1_613_322_469, 0)); - let parsed = parse_extended(": 1613322469:10;cargo install \\n atuin\n", 0); + let parsed = parse_extended("1613322469:10;cargo install \\n atuin\n", 0); assert_eq!(parsed.command, "cargo install \\n atuin"); assert_eq!(parsed.duration, 10_000_000_000); assert_eq!(parsed.timestamp, Utc.timestamp(1_613_322_469, 0)); } - #[test] - fn test_parse_file() { - let input = r": 1613322469:0;cargo install atuin + #[tokio::test] + async fn test_parse_file() { + let bytes = r": 1613322469:0;cargo install atuin : 1613322469:10;cargo install atuin; \ cargo update : 1613322469:10;cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷ -"; +" + .as_bytes() + .to_owned(); - let cursor = Cursor::new(input); - let mut zsh = Zsh::new(cursor).unwrap(); - assert_eq!(zsh.loc, 4); - assert_eq!(zsh.size_hint(), (0, Some(4))); + let mut zsh = Zsh { bytes }; + assert_eq!(zsh.entries().await.unwrap(), 4); - assert_eq!(&zsh.next().unwrap().unwrap().command, "cargo install atuin"); - assert_eq!( - &zsh.next().unwrap().unwrap().command, - "cargo install atuin; \\\ncargo update" - ); - assert_eq!( - &zsh.next().unwrap().unwrap().command, - "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷" - ); - assert!(zsh.next().is_none()); + let mut loader = TestLoader::default(); + zsh.load(&mut loader).await.unwrap(); - assert_eq!(zsh.size_hint(), (0, Some(0))); + assert_equal( + loader.buf.iter().map(|h| h.command.as_str()), + [ + "cargo install atuin", + "cargo install atuin; \\\ncargo update", + "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", + ], + ); } } |
