diff options
Diffstat (limited to 'crates/atuin-daemon/src/search')
| -rw-r--r-- | crates/atuin-daemon/src/search/index.rs | 292 |
1 files changed, 11 insertions, 281 deletions
diff --git a/crates/atuin-daemon/src/search/index.rs b/crates/atuin-daemon/src/search/index.rs index 90751155..bb155979 100644 --- a/crates/atuin-daemon/src/search/index.rs +++ b/crates/atuin-daemon/src/search/index.rs @@ -12,9 +12,7 @@ use std::{ sync::Arc, }; -use atuin_client::history::{ - AUTHOR_FILTER_ALL_AGENT, AUTHOR_FILTER_ALL_USER, History, KNOWN_AGENTS, -}; +use atuin_client::history::{History, is_known_agent}; use atuin_client::settings::Search; use atuin_nucleo::{Injector, Nucleo, pattern}; use dashmap::DashMap; @@ -116,100 +114,6 @@ pub struct CommandData { hosts: HashSet<Spur>, /// All sessions where this command has been run (as 16-byte UUIDs). sessions: HashSet<[u8; 16]>, - /// All authors who have run this command (interned keys). - authors: HashSet<Spur>, - /// Per-invocation metadata for returning the newest row that matches the active filters. - invocations: Vec<InvocationData>, -} - -struct InvocationData { - id: [u8; 16], - timestamp: i64, - directory: Spur, - host: Spur, - session: [u8; 16], - author: Spur, -} - -#[derive(Default)] -struct CompiledAuthorFilter { - include_all_users: bool, - include_all_agents: bool, - literal_authors: HashSet<Spur>, -} - -impl CompiledAuthorFilter { - fn new(filters: &[String], interner: &ThreadedRodeo) -> Self { - let mut compiled = Self::default(); - - for filter in filters { - match filter.as_str() { - AUTHOR_FILTER_ALL_USER => compiled.include_all_users = true, - AUTHOR_FILTER_ALL_AGENT => compiled.include_all_agents = true, - literal => { - if let Some(author) = interner.get(literal) { - compiled.literal_authors.insert(author); - } - } - } - } - - compiled - } - - fn is_empty(&self) -> bool { - !self.include_all_users && !self.include_all_agents && self.literal_authors.is_empty() - } - - fn matches_author(&self, author: Spur, agent_authors: &HashSet<Spur>) -> bool { - self.is_empty() - || self.literal_authors.contains(&author) - || (self.include_all_users && !agent_authors.contains(&author)) - || (self.include_all_agents && agent_authors.contains(&author)) - } -} - -impl InvocationData { - fn new(history: &History, interner: &ThreadedRodeo) -> Option<Self> { - Some(Self { - id: parse_uuid_bytes(&history.id.0)?, - timestamp: history.timestamp.unix_timestamp(), - directory: interner.get_or_intern(with_trailing_slash(&history.cwd)), - host: interner.get_or_intern(&history.hostname), - session: parse_uuid_bytes(&history.session)?, - author: interner.get_or_intern(&history.author), - }) - } - - fn matches_mode(&self, mode: &IndexFilterMode, interner: &ThreadedRodeo) -> bool { - match mode { - IndexFilterMode::Global => true, - IndexFilterMode::Directory(dir) => { - interner.get(dir).is_some_and(|spur| self.directory == spur) - } - IndexFilterMode::Workspace(prefix) => { - interner.resolve(&self.directory).starts_with(prefix) - } - IndexFilterMode::Host(hostname) => { - interner.get(hostname).is_some_and(|spur| self.host == spur) - } - IndexFilterMode::Session(session) => { - parse_uuid_bytes(session).is_some_and(|bytes| self.session == bytes) - } - } - } - - fn matches_authors( - &self, - filter: &CompiledAuthorFilter, - agent_authors: &HashSet<Spur>, - ) -> bool { - filter.matches_author(self.author, agent_authors) - } - - fn id_string(&self) -> String { - format_uuid_bytes(&self.id) - } } impl CommandData { @@ -222,7 +126,6 @@ impl CommandData { let dir_key = interner.get_or_intern(with_trailing_slash(&history.cwd)); let host_key = interner.get_or_intern(&history.hostname); - let invocation = InvocationData::new(history, interner)?; let mut directories = HashSet::new(); directories.insert(dir_key); @@ -233,9 +136,6 @@ impl CommandData { let mut sessions = HashSet::new(); sessions.insert(session); - let mut authors = HashSet::new(); - authors.insert(interner.get_or_intern(&history.author)); - let mut global_frecency = FrecencyData::default(); global_frecency.record_use(timestamp); @@ -246,8 +146,6 @@ impl CommandData { directories, hosts, sessions, - authors, - invocations: vec![invocation], }) } @@ -262,9 +160,6 @@ impl CommandData { }; let timestamp = history.timestamp.unix_timestamp(); - let Some(invocation) = InvocationData::new(history, interner) else { - return false; - }; // Update global frecency self.global_frecency.record_use(timestamp); @@ -274,8 +169,6 @@ impl CommandData { self.directories.insert(dir_key); self.hosts.insert(interner.get_or_intern(&history.hostname)); self.sessions.insert(session); - self.authors.insert(interner.get_or_intern(&history.author)); - self.invocations.push(invocation); // Update most recent if this invocation is newer if timestamp > self.most_recent_timestamp { @@ -291,27 +184,6 @@ impl CommandData { format_uuid_bytes(&self.most_recent_id) } - fn most_recent_matching_id( - &self, - mode: &IndexFilterMode, - authors: &CompiledAuthorFilter, - interner: &ThreadedRodeo, - agent_authors: &HashSet<Spur>, - ) -> Option<String> { - if matches!(mode, IndexFilterMode::Global) && authors.is_empty() { - return Some(self.most_recent_id()); - } - - self.invocations - .iter() - .filter(|invocation| { - invocation.matches_mode(mode, interner) - && invocation.matches_authors(authors, agent_authors) - }) - .max_by_key(|invocation| invocation.timestamp) - .map(InvocationData::id_string) - } - /// Check if any invocation matches a directory filter (exact match). /// O(1) lookup using pre-computed index. pub fn has_invocation_in_dir(&self, dir: &str, interner: &ThreadedRodeo) -> bool { @@ -341,16 +213,6 @@ impl CommandData { pub fn has_invocation_in_session(&self, session: &str) -> bool { parse_uuid_bytes(session).is_some_and(|bytes| self.sessions.contains(&bytes)) } - - fn matches_authors( - &self, - filter: &CompiledAuthorFilter, - agent_authors: &HashSet<Spur>, - ) -> bool { - self.authors - .iter() - .any(|author| filter.matches_author(*author, agent_authors)) - } } /// Filter mode for search queries. @@ -426,6 +288,10 @@ impl SearchIndex { /// If the command already exists, updates its invocation data. /// If it's a new command, adds it to both the map and Nucleo. pub fn add_history(&self, history: &History) { + if is_known_agent(&history.author) { + return; + } + let command = history.command.as_str(); // DashMap with Arc<str> keys can be looked up with &str via Borrow trait @@ -474,7 +340,6 @@ impl SearchIndex { query: &str, filter_mode: IndexFilterMode, _context: &QueryContext, - authors: &[String], limit: u32, ) -> Vec<String> { let mut nucleo = self.nucleo.write().await; @@ -482,21 +347,8 @@ impl SearchIndex { // Get precomputed frecency map (may be None if not yet computed) let frecency_map = self.frecency_map.read().await.clone(); - // Build filter based on mode + authors - let mode_filter = self.build_filter(&filter_mode); - let compiled_authors = CompiledAuthorFilter::new(authors, &self.interner); - let author_filter = self.build_author_filter(&compiled_authors); - let agent_authors: HashSet<Spur> = KNOWN_AGENTS - .iter() - .filter_map(|author| self.interner.get(author)) - .collect(); - let filter = - match (mode_filter, author_filter) { - (Some(mf), Some(af)) => Some(Arc::new(move |cmd: &String| mf(cmd) && af(cmd)) - as atuin_nucleo::Filter<String>), - (Some(f), None) | (None, Some(f)) => Some(f), - (None, None) => None, - }; + // Build filter based on mode + let filter = self.build_filter(&filter_mode); nucleo.set_filter(filter); // Build scorer from precomputed frecency (or None if not available) @@ -527,14 +379,9 @@ impl SearchIndex { .filter_map(|item| { let cmd = item.data; // DashMap<Arc<str>, _>::get accepts &str via Borrow trait - self.commands.get(cmd.as_str()).and_then(|data| { - data.most_recent_matching_id( - &filter_mode, - &compiled_authors, - &self.interner, - &agent_authors, - ) - }) + self.commands + .get(cmd.as_str()) + .map(|data| data.most_recent_id()) }) .collect() }) @@ -609,36 +456,6 @@ impl SearchIndex { Some(Arc::new(move |cmd: &String| passing_commands.contains(cmd))) } - /// Build author filter predicate. - /// Same pre-computation approach as `build_filter`: find all commands with - /// a matching author, collect into a HashSet, wrap as a Nucleo Filter closure. - fn build_author_filter( - &self, - authors: &CompiledAuthorFilter, - ) -> Option<atuin_nucleo::Filter<String>> { - if authors.is_empty() { - return None; - } - - let agent_authors: HashSet<Spur> = KNOWN_AGENTS - .iter() - .filter_map(|a| self.interner.get(a)) - .collect(); - - let passing_commands: Arc<HashSet<String>> = { - let mut set = HashSet::new(); - for entry in self.commands.iter() { - let passes = entry.matches_authors(authors, &agent_authors); - if passes { - set.insert(entry.key().to_string()); - } - } - Arc::new(set) - }; - - Some(Arc::new(move |cmd: &String| passing_commands.contains(cmd))) - } - /// Build scorer from precomputed frecency map. /// /// Returns None if frecency map is not available (search still works, just without frecency ranking). @@ -821,86 +638,6 @@ mod tests { } #[tokio::test] - async fn search_index_returns_latest_matching_author_invocation() { - let index = SearchIndex::new(); - - let user_history: History = History::import() - .timestamp(datetime!(2024-01-01 10:00 UTC)) - .command("git status") - .cwd("/home/user/project") - .author("ellie") - .build() - .into(); - let agent_history: History = History::import() - .timestamp(datetime!(2024-01-01 11:00 UTC)) - .command("git status") - .cwd("/home/user/project") - .author("codex") - .build() - .into(); - - index.add_history(&user_history); - index.add_history(&agent_history); - - let user_results = index - .search( - "git", - IndexFilterMode::Global, - &QueryContext::default(), - &[AUTHOR_FILTER_ALL_USER.to_string()], - 10, - ) - .await; - assert_eq!( - user_results, - vec![Uuid::parse_str(&user_history.id.0).unwrap().to_string()] - ); - - let agent_results = index - .search( - "git", - IndexFilterMode::Global, - &QueryContext::default(), - &[AUTHOR_FILTER_ALL_AGENT.to_string()], - 10, - ) - .await; - assert_eq!( - agent_results, - vec![Uuid::parse_str(&agent_history.id.0).unwrap().to_string()] - ); - } - - #[tokio::test] - async fn search_index_returns_latest_matching_directory_invocation() { - let index = SearchIndex::new(); - - let dir1 = "/home/user/project"; - let dir2 = "/home/user/other"; - - let project_history = make_history("git status", dir1, datetime!(2024-01-01 10:00 UTC)); - let other_history = make_history("git status", dir2, datetime!(2024-01-01 11:00 UTC)); - - index.add_history(&project_history); - index.add_history(&other_history); - - let results = index - .search( - "git", - IndexFilterMode::Directory(with_trailing_slash(dir1)), - &QueryContext::default(), - &[], - 10, - ) - .await; - - assert_eq!( - results, - vec![Uuid::parse_str(&project_history.id.0).unwrap().to_string()] - ); - } - - #[tokio::test] async fn search_index_add_and_search() { let index = SearchIndex::new(); @@ -928,13 +665,7 @@ mod tests { // Search for "git" - should match 2 commands let results = index - .search( - "git", - IndexFilterMode::Global, - &QueryContext::default(), - &[], - 10, - ) + .search("git", IndexFilterMode::Global, &QueryContext::default(), 10) .await; assert_eq!(results.len(), 2); @@ -944,7 +675,6 @@ mod tests { "", IndexFilterMode::Directory(with_trailing_slash("/home/user/project")), &QueryContext::default(), - &[], 10, ) .await; |
