aboutsummaryrefslogtreecommitdiffstats
path: root/crates/atuin-daemon/src/search
diff options
context:
space:
mode:
Diffstat (limited to 'crates/atuin-daemon/src/search')
-rw-r--r--crates/atuin-daemon/src/search/index.rs288
1 files changed, 281 insertions, 7 deletions
diff --git a/crates/atuin-daemon/src/search/index.rs b/crates/atuin-daemon/src/search/index.rs
index 3328c5b5..90751155 100644
--- a/crates/atuin-daemon/src/search/index.rs
+++ b/crates/atuin-daemon/src/search/index.rs
@@ -12,7 +12,9 @@ use std::{
sync::Arc,
};
-use atuin_client::history::History;
+use atuin_client::history::{
+ AUTHOR_FILTER_ALL_AGENT, AUTHOR_FILTER_ALL_USER, History, KNOWN_AGENTS,
+};
use atuin_client::settings::Search;
use atuin_nucleo::{Injector, Nucleo, pattern};
use dashmap::DashMap;
@@ -114,6 +116,100 @@ pub struct CommandData {
hosts: HashSet<Spur>,
/// All sessions where this command has been run (as 16-byte UUIDs).
sessions: HashSet<[u8; 16]>,
+ /// All authors who have run this command (interned keys).
+ authors: HashSet<Spur>,
+ /// Per-invocation metadata for returning the newest row that matches the active filters.
+ invocations: Vec<InvocationData>,
+}
+
+struct InvocationData {
+ id: [u8; 16],
+ timestamp: i64,
+ directory: Spur,
+ host: Spur,
+ session: [u8; 16],
+ author: Spur,
+}
+
+#[derive(Default)]
+struct CompiledAuthorFilter {
+ include_all_users: bool,
+ include_all_agents: bool,
+ literal_authors: HashSet<Spur>,
+}
+
+impl CompiledAuthorFilter {
+ fn new(filters: &[String], interner: &ThreadedRodeo) -> Self {
+ let mut compiled = Self::default();
+
+ for filter in filters {
+ match filter.as_str() {
+ AUTHOR_FILTER_ALL_USER => compiled.include_all_users = true,
+ AUTHOR_FILTER_ALL_AGENT => compiled.include_all_agents = true,
+ literal => {
+ if let Some(author) = interner.get(literal) {
+ compiled.literal_authors.insert(author);
+ }
+ }
+ }
+ }
+
+ compiled
+ }
+
+ fn is_empty(&self) -> bool {
+ !self.include_all_users && !self.include_all_agents && self.literal_authors.is_empty()
+ }
+
+ fn matches_author(&self, author: Spur, agent_authors: &HashSet<Spur>) -> bool {
+ self.is_empty()
+ || self.literal_authors.contains(&author)
+ || (self.include_all_users && !agent_authors.contains(&author))
+ || (self.include_all_agents && agent_authors.contains(&author))
+ }
+}
+
+impl InvocationData {
+ fn new(history: &History, interner: &ThreadedRodeo) -> Option<Self> {
+ Some(Self {
+ id: parse_uuid_bytes(&history.id.0)?,
+ timestamp: history.timestamp.unix_timestamp(),
+ directory: interner.get_or_intern(with_trailing_slash(&history.cwd)),
+ host: interner.get_or_intern(&history.hostname),
+ session: parse_uuid_bytes(&history.session)?,
+ author: interner.get_or_intern(&history.author),
+ })
+ }
+
+ fn matches_mode(&self, mode: &IndexFilterMode, interner: &ThreadedRodeo) -> bool {
+ match mode {
+ IndexFilterMode::Global => true,
+ IndexFilterMode::Directory(dir) => {
+ interner.get(dir).is_some_and(|spur| self.directory == spur)
+ }
+ IndexFilterMode::Workspace(prefix) => {
+ interner.resolve(&self.directory).starts_with(prefix)
+ }
+ IndexFilterMode::Host(hostname) => {
+ interner.get(hostname).is_some_and(|spur| self.host == spur)
+ }
+ IndexFilterMode::Session(session) => {
+ parse_uuid_bytes(session).is_some_and(|bytes| self.session == bytes)
+ }
+ }
+ }
+
+ fn matches_authors(
+ &self,
+ filter: &CompiledAuthorFilter,
+ agent_authors: &HashSet<Spur>,
+ ) -> bool {
+ filter.matches_author(self.author, agent_authors)
+ }
+
+ fn id_string(&self) -> String {
+ format_uuid_bytes(&self.id)
+ }
}
impl CommandData {
@@ -126,6 +222,7 @@ impl CommandData {
let dir_key = interner.get_or_intern(with_trailing_slash(&history.cwd));
let host_key = interner.get_or_intern(&history.hostname);
+ let invocation = InvocationData::new(history, interner)?;
let mut directories = HashSet::new();
directories.insert(dir_key);
@@ -136,6 +233,9 @@ impl CommandData {
let mut sessions = HashSet::new();
sessions.insert(session);
+ let mut authors = HashSet::new();
+ authors.insert(interner.get_or_intern(&history.author));
+
let mut global_frecency = FrecencyData::default();
global_frecency.record_use(timestamp);
@@ -146,6 +246,8 @@ impl CommandData {
directories,
hosts,
sessions,
+ authors,
+ invocations: vec![invocation],
})
}
@@ -160,6 +262,9 @@ impl CommandData {
};
let timestamp = history.timestamp.unix_timestamp();
+ let Some(invocation) = InvocationData::new(history, interner) else {
+ return false;
+ };
// Update global frecency
self.global_frecency.record_use(timestamp);
@@ -169,6 +274,8 @@ impl CommandData {
self.directories.insert(dir_key);
self.hosts.insert(interner.get_or_intern(&history.hostname));
self.sessions.insert(session);
+ self.authors.insert(interner.get_or_intern(&history.author));
+ self.invocations.push(invocation);
// Update most recent if this invocation is newer
if timestamp > self.most_recent_timestamp {
@@ -184,6 +291,27 @@ impl CommandData {
format_uuid_bytes(&self.most_recent_id)
}
+ fn most_recent_matching_id(
+ &self,
+ mode: &IndexFilterMode,
+ authors: &CompiledAuthorFilter,
+ interner: &ThreadedRodeo,
+ agent_authors: &HashSet<Spur>,
+ ) -> Option<String> {
+ if matches!(mode, IndexFilterMode::Global) && authors.is_empty() {
+ return Some(self.most_recent_id());
+ }
+
+ self.invocations
+ .iter()
+ .filter(|invocation| {
+ invocation.matches_mode(mode, interner)
+ && invocation.matches_authors(authors, agent_authors)
+ })
+ .max_by_key(|invocation| invocation.timestamp)
+ .map(InvocationData::id_string)
+ }
+
/// Check if any invocation matches a directory filter (exact match).
/// O(1) lookup using pre-computed index.
pub fn has_invocation_in_dir(&self, dir: &str, interner: &ThreadedRodeo) -> bool {
@@ -213,6 +341,16 @@ impl CommandData {
pub fn has_invocation_in_session(&self, session: &str) -> bool {
parse_uuid_bytes(session).is_some_and(|bytes| self.sessions.contains(&bytes))
}
+
+ fn matches_authors(
+ &self,
+ filter: &CompiledAuthorFilter,
+ agent_authors: &HashSet<Spur>,
+ ) -> bool {
+ self.authors
+ .iter()
+ .any(|author| filter.matches_author(*author, agent_authors))
+ }
}
/// Filter mode for search queries.
@@ -336,6 +474,7 @@ impl SearchIndex {
query: &str,
filter_mode: IndexFilterMode,
_context: &QueryContext,
+ authors: &[String],
limit: u32,
) -> Vec<String> {
let mut nucleo = self.nucleo.write().await;
@@ -343,8 +482,21 @@ impl SearchIndex {
// Get precomputed frecency map (may be None if not yet computed)
let frecency_map = self.frecency_map.read().await.clone();
- // Build filter based on mode
- let filter = self.build_filter(&filter_mode);
+ // Build filter based on mode + authors
+ let mode_filter = self.build_filter(&filter_mode);
+ let compiled_authors = CompiledAuthorFilter::new(authors, &self.interner);
+ let author_filter = self.build_author_filter(&compiled_authors);
+ let agent_authors: HashSet<Spur> = KNOWN_AGENTS
+ .iter()
+ .filter_map(|author| self.interner.get(author))
+ .collect();
+ let filter =
+ match (mode_filter, author_filter) {
+ (Some(mf), Some(af)) => Some(Arc::new(move |cmd: &String| mf(cmd) && af(cmd))
+ as atuin_nucleo::Filter<String>),
+ (Some(f), None) | (None, Some(f)) => Some(f),
+ (None, None) => None,
+ };
nucleo.set_filter(filter);
// Build scorer from precomputed frecency (or None if not available)
@@ -375,9 +527,14 @@ impl SearchIndex {
.filter_map(|item| {
let cmd = item.data;
// DashMap<Arc<str>, _>::get accepts &str via Borrow trait
- self.commands
- .get(cmd.as_str())
- .map(|data| data.most_recent_id())
+ self.commands.get(cmd.as_str()).and_then(|data| {
+ data.most_recent_matching_id(
+ &filter_mode,
+ &compiled_authors,
+ &self.interner,
+ &agent_authors,
+ )
+ })
})
.collect()
})
@@ -452,6 +609,36 @@ impl SearchIndex {
Some(Arc::new(move |cmd: &String| passing_commands.contains(cmd)))
}
+ /// Build author filter predicate.
+ /// Same pre-computation approach as `build_filter`: find all commands with
+ /// a matching author, collect into a HashSet, wrap as a Nucleo Filter closure.
+ fn build_author_filter(
+ &self,
+ authors: &CompiledAuthorFilter,
+ ) -> Option<atuin_nucleo::Filter<String>> {
+ if authors.is_empty() {
+ return None;
+ }
+
+ let agent_authors: HashSet<Spur> = KNOWN_AGENTS
+ .iter()
+ .filter_map(|a| self.interner.get(a))
+ .collect();
+
+ let passing_commands: Arc<HashSet<String>> = {
+ let mut set = HashSet::new();
+ for entry in self.commands.iter() {
+ let passes = entry.matches_authors(authors, &agent_authors);
+ if passes {
+ set.insert(entry.key().to_string());
+ }
+ }
+ Arc::new(set)
+ };
+
+ Some(Arc::new(move |cmd: &String| passing_commands.contains(cmd)))
+ }
+
/// Build scorer from precomputed frecency map.
///
/// Returns None if frecency map is not available (search still works, just without frecency ranking).
@@ -634,6 +821,86 @@ mod tests {
}
#[tokio::test]
+ async fn search_index_returns_latest_matching_author_invocation() {
+ let index = SearchIndex::new();
+
+ let user_history: History = History::import()
+ .timestamp(datetime!(2024-01-01 10:00 UTC))
+ .command("git status")
+ .cwd("/home/user/project")
+ .author("ellie")
+ .build()
+ .into();
+ let agent_history: History = History::import()
+ .timestamp(datetime!(2024-01-01 11:00 UTC))
+ .command("git status")
+ .cwd("/home/user/project")
+ .author("codex")
+ .build()
+ .into();
+
+ index.add_history(&user_history);
+ index.add_history(&agent_history);
+
+ let user_results = index
+ .search(
+ "git",
+ IndexFilterMode::Global,
+ &QueryContext::default(),
+ &[AUTHOR_FILTER_ALL_USER.to_string()],
+ 10,
+ )
+ .await;
+ assert_eq!(
+ user_results,
+ vec![Uuid::parse_str(&user_history.id.0).unwrap().to_string()]
+ );
+
+ let agent_results = index
+ .search(
+ "git",
+ IndexFilterMode::Global,
+ &QueryContext::default(),
+ &[AUTHOR_FILTER_ALL_AGENT.to_string()],
+ 10,
+ )
+ .await;
+ assert_eq!(
+ agent_results,
+ vec![Uuid::parse_str(&agent_history.id.0).unwrap().to_string()]
+ );
+ }
+
+ #[tokio::test]
+ async fn search_index_returns_latest_matching_directory_invocation() {
+ let index = SearchIndex::new();
+
+ let dir1 = "/home/user/project";
+ let dir2 = "/home/user/other";
+
+ let project_history = make_history("git status", dir1, datetime!(2024-01-01 10:00 UTC));
+ let other_history = make_history("git status", dir2, datetime!(2024-01-01 11:00 UTC));
+
+ index.add_history(&project_history);
+ index.add_history(&other_history);
+
+ let results = index
+ .search(
+ "git",
+ IndexFilterMode::Directory(with_trailing_slash(dir1)),
+ &QueryContext::default(),
+ &[],
+ 10,
+ )
+ .await;
+
+ assert_eq!(
+ results,
+ vec![Uuid::parse_str(&project_history.id.0).unwrap().to_string()]
+ );
+ }
+
+ #[tokio::test]
async fn search_index_add_and_search() {
let index = SearchIndex::new();
@@ -661,7 +928,13 @@ mod tests {
// Search for "git" - should match 2 commands
let results = index
- .search("git", IndexFilterMode::Global, &QueryContext::default(), 10)
+ .search(
+ "git",
+ IndexFilterMode::Global,
+ &QueryContext::default(),
+ &[],
+ 10,
+ )
.await;
assert_eq!(results.len(), 2);
@@ -671,6 +944,7 @@ mod tests {
"",
IndexFilterMode::Directory(with_trailing_slash("/home/user/project")),
&QueryContext::default(),
+ &[],
10,
)
.await;