aboutsummaryrefslogtreecommitdiffstats
path: root/crates/atuin-daemon/src/search/index.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/atuin-daemon/src/search/index.rs')
-rw-r--r--crates/atuin-daemon/src/search/index.rs292
1 files changed, 11 insertions, 281 deletions
diff --git a/crates/atuin-daemon/src/search/index.rs b/crates/atuin-daemon/src/search/index.rs
index 90751155..bb155979 100644
--- a/crates/atuin-daemon/src/search/index.rs
+++ b/crates/atuin-daemon/src/search/index.rs
@@ -12,9 +12,7 @@ use std::{
sync::Arc,
};
-use atuin_client::history::{
- AUTHOR_FILTER_ALL_AGENT, AUTHOR_FILTER_ALL_USER, History, KNOWN_AGENTS,
-};
+use atuin_client::history::{History, is_known_agent};
use atuin_client::settings::Search;
use atuin_nucleo::{Injector, Nucleo, pattern};
use dashmap::DashMap;
@@ -116,100 +114,6 @@ pub struct CommandData {
hosts: HashSet<Spur>,
/// All sessions where this command has been run (as 16-byte UUIDs).
sessions: HashSet<[u8; 16]>,
- /// All authors who have run this command (interned keys).
- authors: HashSet<Spur>,
- /// Per-invocation metadata for returning the newest row that matches the active filters.
- invocations: Vec<InvocationData>,
-}
-
-struct InvocationData {
- id: [u8; 16],
- timestamp: i64,
- directory: Spur,
- host: Spur,
- session: [u8; 16],
- author: Spur,
-}
-
-#[derive(Default)]
-struct CompiledAuthorFilter {
- include_all_users: bool,
- include_all_agents: bool,
- literal_authors: HashSet<Spur>,
-}
-
-impl CompiledAuthorFilter {
- fn new(filters: &[String], interner: &ThreadedRodeo) -> Self {
- let mut compiled = Self::default();
-
- for filter in filters {
- match filter.as_str() {
- AUTHOR_FILTER_ALL_USER => compiled.include_all_users = true,
- AUTHOR_FILTER_ALL_AGENT => compiled.include_all_agents = true,
- literal => {
- if let Some(author) = interner.get(literal) {
- compiled.literal_authors.insert(author);
- }
- }
- }
- }
-
- compiled
- }
-
- fn is_empty(&self) -> bool {
- !self.include_all_users && !self.include_all_agents && self.literal_authors.is_empty()
- }
-
- fn matches_author(&self, author: Spur, agent_authors: &HashSet<Spur>) -> bool {
- self.is_empty()
- || self.literal_authors.contains(&author)
- || (self.include_all_users && !agent_authors.contains(&author))
- || (self.include_all_agents && agent_authors.contains(&author))
- }
-}
-
-impl InvocationData {
- fn new(history: &History, interner: &ThreadedRodeo) -> Option<Self> {
- Some(Self {
- id: parse_uuid_bytes(&history.id.0)?,
- timestamp: history.timestamp.unix_timestamp(),
- directory: interner.get_or_intern(with_trailing_slash(&history.cwd)),
- host: interner.get_or_intern(&history.hostname),
- session: parse_uuid_bytes(&history.session)?,
- author: interner.get_or_intern(&history.author),
- })
- }
-
- fn matches_mode(&self, mode: &IndexFilterMode, interner: &ThreadedRodeo) -> bool {
- match mode {
- IndexFilterMode::Global => true,
- IndexFilterMode::Directory(dir) => {
- interner.get(dir).is_some_and(|spur| self.directory == spur)
- }
- IndexFilterMode::Workspace(prefix) => {
- interner.resolve(&self.directory).starts_with(prefix)
- }
- IndexFilterMode::Host(hostname) => {
- interner.get(hostname).is_some_and(|spur| self.host == spur)
- }
- IndexFilterMode::Session(session) => {
- parse_uuid_bytes(session).is_some_and(|bytes| self.session == bytes)
- }
- }
- }
-
- fn matches_authors(
- &self,
- filter: &CompiledAuthorFilter,
- agent_authors: &HashSet<Spur>,
- ) -> bool {
- filter.matches_author(self.author, agent_authors)
- }
-
- fn id_string(&self) -> String {
- format_uuid_bytes(&self.id)
- }
}
impl CommandData {
@@ -222,7 +126,6 @@ impl CommandData {
let dir_key = interner.get_or_intern(with_trailing_slash(&history.cwd));
let host_key = interner.get_or_intern(&history.hostname);
- let invocation = InvocationData::new(history, interner)?;
let mut directories = HashSet::new();
directories.insert(dir_key);
@@ -233,9 +136,6 @@ impl CommandData {
let mut sessions = HashSet::new();
sessions.insert(session);
- let mut authors = HashSet::new();
- authors.insert(interner.get_or_intern(&history.author));
-
let mut global_frecency = FrecencyData::default();
global_frecency.record_use(timestamp);
@@ -246,8 +146,6 @@ impl CommandData {
directories,
hosts,
sessions,
- authors,
- invocations: vec![invocation],
})
}
@@ -262,9 +160,6 @@ impl CommandData {
};
let timestamp = history.timestamp.unix_timestamp();
- let Some(invocation) = InvocationData::new(history, interner) else {
- return false;
- };
// Update global frecency
self.global_frecency.record_use(timestamp);
@@ -274,8 +169,6 @@ impl CommandData {
self.directories.insert(dir_key);
self.hosts.insert(interner.get_or_intern(&history.hostname));
self.sessions.insert(session);
- self.authors.insert(interner.get_or_intern(&history.author));
- self.invocations.push(invocation);
// Update most recent if this invocation is newer
if timestamp > self.most_recent_timestamp {
@@ -291,27 +184,6 @@ impl CommandData {
format_uuid_bytes(&self.most_recent_id)
}
- fn most_recent_matching_id(
- &self,
- mode: &IndexFilterMode,
- authors: &CompiledAuthorFilter,
- interner: &ThreadedRodeo,
- agent_authors: &HashSet<Spur>,
- ) -> Option<String> {
- if matches!(mode, IndexFilterMode::Global) && authors.is_empty() {
- return Some(self.most_recent_id());
- }
-
- self.invocations
- .iter()
- .filter(|invocation| {
- invocation.matches_mode(mode, interner)
- && invocation.matches_authors(authors, agent_authors)
- })
- .max_by_key(|invocation| invocation.timestamp)
- .map(InvocationData::id_string)
- }
-
/// Check if any invocation matches a directory filter (exact match).
/// O(1) lookup using pre-computed index.
pub fn has_invocation_in_dir(&self, dir: &str, interner: &ThreadedRodeo) -> bool {
@@ -341,16 +213,6 @@ impl CommandData {
pub fn has_invocation_in_session(&self, session: &str) -> bool {
parse_uuid_bytes(session).is_some_and(|bytes| self.sessions.contains(&bytes))
}
-
- fn matches_authors(
- &self,
- filter: &CompiledAuthorFilter,
- agent_authors: &HashSet<Spur>,
- ) -> bool {
- self.authors
- .iter()
- .any(|author| filter.matches_author(*author, agent_authors))
- }
}
/// Filter mode for search queries.
@@ -426,6 +288,10 @@ impl SearchIndex {
/// If the command already exists, updates its invocation data.
/// If it's a new command, adds it to both the map and Nucleo.
pub fn add_history(&self, history: &History) {
+ if is_known_agent(&history.author) {
+ return;
+ }
+
let command = history.command.as_str();
// DashMap with Arc<str> keys can be looked up with &str via Borrow trait
@@ -474,7 +340,6 @@ impl SearchIndex {
query: &str,
filter_mode: IndexFilterMode,
_context: &QueryContext,
- authors: &[String],
limit: u32,
) -> Vec<String> {
let mut nucleo = self.nucleo.write().await;
@@ -482,21 +347,8 @@ impl SearchIndex {
// Get precomputed frecency map (may be None if not yet computed)
let frecency_map = self.frecency_map.read().await.clone();
- // Build filter based on mode + authors
- let mode_filter = self.build_filter(&filter_mode);
- let compiled_authors = CompiledAuthorFilter::new(authors, &self.interner);
- let author_filter = self.build_author_filter(&compiled_authors);
- let agent_authors: HashSet<Spur> = KNOWN_AGENTS
- .iter()
- .filter_map(|author| self.interner.get(author))
- .collect();
- let filter =
- match (mode_filter, author_filter) {
- (Some(mf), Some(af)) => Some(Arc::new(move |cmd: &String| mf(cmd) && af(cmd))
- as atuin_nucleo::Filter<String>),
- (Some(f), None) | (None, Some(f)) => Some(f),
- (None, None) => None,
- };
+ // Build filter based on mode
+ let filter = self.build_filter(&filter_mode);
nucleo.set_filter(filter);
// Build scorer from precomputed frecency (or None if not available)
@@ -527,14 +379,9 @@ impl SearchIndex {
.filter_map(|item| {
let cmd = item.data;
// DashMap<Arc<str>, _>::get accepts &str via Borrow trait
- self.commands.get(cmd.as_str()).and_then(|data| {
- data.most_recent_matching_id(
- &filter_mode,
- &compiled_authors,
- &self.interner,
- &agent_authors,
- )
- })
+ self.commands
+ .get(cmd.as_str())
+ .map(|data| data.most_recent_id())
})
.collect()
})
@@ -609,36 +456,6 @@ impl SearchIndex {
Some(Arc::new(move |cmd: &String| passing_commands.contains(cmd)))
}
- /// Build author filter predicate.
- /// Same pre-computation approach as `build_filter`: find all commands with
- /// a matching author, collect into a HashSet, wrap as a Nucleo Filter closure.
- fn build_author_filter(
- &self,
- authors: &CompiledAuthorFilter,
- ) -> Option<atuin_nucleo::Filter<String>> {
- if authors.is_empty() {
- return None;
- }
-
- let agent_authors: HashSet<Spur> = KNOWN_AGENTS
- .iter()
- .filter_map(|a| self.interner.get(a))
- .collect();
-
- let passing_commands: Arc<HashSet<String>> = {
- let mut set = HashSet::new();
- for entry in self.commands.iter() {
- let passes = entry.matches_authors(authors, &agent_authors);
- if passes {
- set.insert(entry.key().to_string());
- }
- }
- Arc::new(set)
- };
-
- Some(Arc::new(move |cmd: &String| passing_commands.contains(cmd)))
- }
-
/// Build scorer from precomputed frecency map.
///
/// Returns None if frecency map is not available (search still works, just without frecency ranking).
@@ -821,86 +638,6 @@ mod tests {
}
#[tokio::test]
- async fn search_index_returns_latest_matching_author_invocation() {
- let index = SearchIndex::new();
-
- let user_history: History = History::import()
- .timestamp(datetime!(2024-01-01 10:00 UTC))
- .command("git status")
- .cwd("/home/user/project")
- .author("ellie")
- .build()
- .into();
- let agent_history: History = History::import()
- .timestamp(datetime!(2024-01-01 11:00 UTC))
- .command("git status")
- .cwd("/home/user/project")
- .author("codex")
- .build()
- .into();
-
- index.add_history(&user_history);
- index.add_history(&agent_history);
-
- let user_results = index
- .search(
- "git",
- IndexFilterMode::Global,
- &QueryContext::default(),
- &[AUTHOR_FILTER_ALL_USER.to_string()],
- 10,
- )
- .await;
- assert_eq!(
- user_results,
- vec![Uuid::parse_str(&user_history.id.0).unwrap().to_string()]
- );
-
- let agent_results = index
- .search(
- "git",
- IndexFilterMode::Global,
- &QueryContext::default(),
- &[AUTHOR_FILTER_ALL_AGENT.to_string()],
- 10,
- )
- .await;
- assert_eq!(
- agent_results,
- vec![Uuid::parse_str(&agent_history.id.0).unwrap().to_string()]
- );
- }
-
- #[tokio::test]
- async fn search_index_returns_latest_matching_directory_invocation() {
- let index = SearchIndex::new();
-
- let dir1 = "/home/user/project";
- let dir2 = "/home/user/other";
-
- let project_history = make_history("git status", dir1, datetime!(2024-01-01 10:00 UTC));
- let other_history = make_history("git status", dir2, datetime!(2024-01-01 11:00 UTC));
-
- index.add_history(&project_history);
- index.add_history(&other_history);
-
- let results = index
- .search(
- "git",
- IndexFilterMode::Directory(with_trailing_slash(dir1)),
- &QueryContext::default(),
- &[],
- 10,
- )
- .await;
-
- assert_eq!(
- results,
- vec![Uuid::parse_str(&project_history.id.0).unwrap().to_string()]
- );
- }
-
- #[tokio::test]
async fn search_index_add_and_search() {
let index = SearchIndex::new();
@@ -928,13 +665,7 @@ mod tests {
// Search for "git" - should match 2 commands
let results = index
- .search(
- "git",
- IndexFilterMode::Global,
- &QueryContext::default(),
- &[],
- 10,
- )
+ .search("git", IndexFilterMode::Global, &QueryContext::default(), 10)
.await;
assert_eq!(results.len(), 2);
@@ -944,7 +675,6 @@ mod tests {
"",
IndexFilterMode::Directory(with_trailing_slash("/home/user/project")),
&QueryContext::default(),
- &[],
10,
)
.await;