From 1d030b9d32f539fd38f5ff3335234c5111c3303f Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Mon, 9 May 2022 07:46:52 +0100 Subject: Importer V3 (#395) * start of importer refactor * fish * resh * zsh --- atuin-client/src/import/zsh.rs | 232 ++++++++++++++++++----------------------- 1 file changed, 99 insertions(+), 133 deletions(-) (limited to 'atuin-client/src/import/zsh.rs') diff --git a/atuin-client/src/import/zsh.rs b/atuin-client/src/import/zsh.rs index 915b3115..62e814d6 100644 --- a/atuin-client/src/import/zsh.rs +++ b/atuin-client/src/import/zsh.rs @@ -1,138 +1,104 @@ // import old shell history! // automatically hoover up all that we can find -use std::{ - fs::File, - io::{BufRead, BufReader, Read, Seek}, - path::{Path, PathBuf}, -}; +use std::{fs::File, io::Read, path::PathBuf}; +use async_trait::async_trait; use chrono::{prelude::*, Utc}; use directories::UserDirs; use eyre::{eyre, Result}; -use itertools::Itertools; -use super::{count_lines, Importer}; +use super::{get_histpath, unix_byte_lines, Importer, Loader}; use crate::history::History; #[derive(Debug)] -pub struct Zsh { - file: BufReader, - strbuf: String, - loc: usize, - counter: i64, +pub struct Zsh { + bytes: Vec, } -impl Zsh { - fn new(r: R) -> Result { - let mut buf = BufReader::new(r); - let loc = count_lines(&mut buf)?; - - Ok(Self { - file: buf, - strbuf: String::new(), - loc, - counter: 0, - }) - } -} - -impl Importer for Zsh { - const NAME: &'static str = "zsh"; - - fn histpath() -> Result { - // oh-my-zsh sets HISTFILE=~/.zhistory - // zsh has no default value for this var, but uses ~/.zhistory. - // we could maybe be smarter about this in the future :) - let user_dirs = UserDirs::new().unwrap(); - let home_dir = user_dirs.home_dir(); - - let mut candidates = [".zhistory", ".zsh_history"].iter(); - loop { - match candidates.next() { - Some(candidate) => { - let histpath = home_dir.join(candidate); - if histpath.exists() { - break Ok(histpath); - } +fn default_histpath() -> Result { + // oh-my-zsh sets HISTFILE=~/.zhistory + // zsh has no default value for this var, but uses ~/.zhistory. + // we could maybe be smarter about this in the future :) + let user_dirs = UserDirs::new().ok_or_else(|| eyre!("could not find user directories"))?; + let home_dir = user_dirs.home_dir(); + + let mut candidates = [".zhistory", ".zsh_history"].iter(); + loop { + match candidates.next() { + Some(candidate) => { + let histpath = home_dir.join(candidate); + if histpath.exists() { + break Ok(histpath); } - None => break Err(eyre!("Could not find history file. Try setting $HISTFILE")), } + None => break Err(eyre!("Could not find history file. Try setting $HISTFILE")), } } +} + +#[async_trait] +impl Importer for Zsh { + const NAME: &'static str = "bash"; - fn parse(path: impl AsRef) -> Result { - Self::new(File::open(path)?) + async fn new() -> Result { + let mut bytes = Vec::new(); + let path = get_histpath(default_histpath)?; + let mut f = File::open(path)?; + f.read_to_end(&mut bytes)?; + Ok(Self { bytes }) } -} -impl Iterator for Zsh { - type Item = Result; - - fn next(&mut self) -> Option { - // ZSH extended history records the timestamp + command duration - // These lines begin with : - // So, if the line begins with :, parse it. Otherwise it's just - // the command - self.strbuf.clear(); - match self.file.read_line(&mut self.strbuf) { - Ok(0) => return None, - Ok(_) => (), - Err(e) => return Some(Err(eyre!("failed to read line: {}", e))), // we can skip past things like invalid utf8 - } + async fn entries(&mut self) -> Result { + Ok(super::count_lines(&self.bytes)) + } - self.loc -= 1; - - while self.strbuf.ends_with("\\\n") { - if self.file.read_line(&mut self.strbuf).is_err() { - // There's a chance that the last line of a command has invalid - // characters, the only safe thing to do is break :/ - // usually just invalid utf8 or smth - // however, we really need to avoid missing history, so it's - // better to have some items that should have been part of - // something else, than to miss things. So break. - break; - }; + async fn load(self, h: &mut impl Loader) -> Result<()> { + let now = chrono::Utc::now(); + let mut line = String::new(); - self.loc -= 1; - } + let mut counter = 0; + for b in unix_byte_lines(&self.bytes) { + let s = match std::str::from_utf8(b) { + Ok(s) => s, + Err(_) => continue, // we can skip past things like invalid utf8 + }; - // We have to handle the case where a line has escaped newlines. - // Keep reading until we have a non-escaped newline - - let extended = self.strbuf.starts_with(':'); - - if extended { - self.counter += 1; - Some(Ok(parse_extended(&self.strbuf, self.counter))) - } else { - let time = chrono::Utc::now(); - let offset = chrono::Duration::seconds(self.counter); - let time = time - offset; - - self.counter += 1; - - Some(Ok(History::new( - time, - self.strbuf.trim_end().to_string(), - String::from("unknown"), - -1, - -1, - None, - None, - ))) + if let Some(s) = s.strip_suffix('\\') { + line.push_str(s); + line.push_str("\\\n"); + } else { + line.push_str(s); + let command = std::mem::take(&mut line); + + if let Some(command) = command.strip_prefix(": ") { + counter += 1; + h.push(parse_extended(command, counter)).await?; + } else { + let offset = chrono::Duration::seconds(counter); + counter += 1; + + h.push(History::new( + now - offset, // preserve ordering + command.trim_end().to_string(), + String::from("unknown"), + -1, + -1, + None, + None, + )) + .await?; + } + } } - } - fn size_hint(&self) -> (usize, Option) { - (0, Some(self.loc)) + Ok(()) } } fn parse_extended(line: &str, counter: i64) -> History { - let line = line.replacen(": ", "", 2); - let (time, duration) = line.splitn(2, ':').collect_tuple().unwrap(); - let (duration, command) = duration.splitn(2, ';').collect_tuple().unwrap(); + let (time, duration) = line.split_once(':').unwrap(); + let (duration, command) = duration.split_once(';').unwrap(); let time = time .parse::() @@ -158,64 +124,64 @@ fn parse_extended(line: &str, counter: i64) -> History { #[cfg(test)] mod test { - use std::io::Cursor; - use chrono::prelude::*; use chrono::Utc; + use itertools::assert_equal; + + use crate::import::tests::TestLoader; use super::*; #[test] fn test_parse_extended_simple() { - let parsed = parse_extended(": 1613322469:0;cargo install atuin", 0); + let parsed = parse_extended("1613322469:0;cargo install atuin", 0); assert_eq!(parsed.command, "cargo install atuin"); assert_eq!(parsed.duration, 0); assert_eq!(parsed.timestamp, Utc.timestamp(1_613_322_469, 0)); - let parsed = parse_extended(": 1613322469:10;cargo install atuin;cargo update", 0); + let parsed = parse_extended("1613322469:10;cargo install atuin;cargo update", 0); assert_eq!(parsed.command, "cargo install atuin;cargo update"); assert_eq!(parsed.duration, 10_000_000_000); assert_eq!(parsed.timestamp, Utc.timestamp(1_613_322_469, 0)); - let parsed = parse_extended(": 1613322469:10;cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", 0); + let parsed = parse_extended("1613322469:10;cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", 0); assert_eq!(parsed.command, "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷"); assert_eq!(parsed.duration, 10_000_000_000); assert_eq!(parsed.timestamp, Utc.timestamp(1_613_322_469, 0)); - let parsed = parse_extended(": 1613322469:10;cargo install \\n atuin\n", 0); + let parsed = parse_extended("1613322469:10;cargo install \\n atuin\n", 0); assert_eq!(parsed.command, "cargo install \\n atuin"); assert_eq!(parsed.duration, 10_000_000_000); assert_eq!(parsed.timestamp, Utc.timestamp(1_613_322_469, 0)); } - #[test] - fn test_parse_file() { - let input = r": 1613322469:0;cargo install atuin + #[tokio::test] + async fn test_parse_file() { + let bytes = r": 1613322469:0;cargo install atuin : 1613322469:10;cargo install atuin; \ cargo update : 1613322469:10;cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷ -"; - - let cursor = Cursor::new(input); - let mut zsh = Zsh::new(cursor).unwrap(); - assert_eq!(zsh.loc, 4); - assert_eq!(zsh.size_hint(), (0, Some(4))); - - assert_eq!(&zsh.next().unwrap().unwrap().command, "cargo install atuin"); - assert_eq!( - &zsh.next().unwrap().unwrap().command, - "cargo install atuin; \\\ncargo update" +" + .as_bytes() + .to_owned(); + + let mut zsh = Zsh { bytes }; + assert_eq!(zsh.entries().await.unwrap(), 4); + + let mut loader = TestLoader::default(); + zsh.load(&mut loader).await.unwrap(); + + assert_equal( + loader.buf.iter().map(|h| h.command.as_str()), + [ + "cargo install atuin", + "cargo install atuin; \\\ncargo update", + "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", + ], ); - assert_eq!( - &zsh.next().unwrap().unwrap().command, - "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷" - ); - assert!(zsh.next().is_none()); - - assert_eq!(zsh.size_hint(), (0, Some(0))); } } -- cgit v1.3.1