From 3ba47446f06d5b0fbeaeb59d4ffed768b70729d8 Mon Sep 17 00:00:00 2001 From: Michelle Tilley Date: Thu, 26 Feb 2026 14:42:08 -0800 Subject: feat: In-memory search index with atuin daemon (#3201) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary This PR adds a persistent, in-memory search index to the Atuin daemon, enabling fast fuzzy search without the startup delay of building an index each time the TUI opens. ### Key Changes - **Daemon search service**: A new gRPC service that maintains a Nucleo fuzzy search index in memory - **Real-time index updates**: The daemon listens for history events (new commands, synced records) and updates the index immediately - **Filter mode support**: All existing filter modes work (Global, Host, Session, Directory, Workspace) - **New search engine**: `daemon-fuzzy` search mode that queries the daemon instead of building a local index - **Paged history loading**: Database pagination support for efficient initial index loading - **Configurable logging**: New `[logs]` settings section for daemon and search log configuration - **Component-based daemon architecture**: Refactored daemon internals into a modular, event-driven system - **Fallback to DB search for regex**: Since Nucleo doesn't support regex matching ## Daemon Architecture The daemon has been refactored to use a component-based, event-driven architecture that makes it easier to add new functionality and reason about the system. ### Core Concepts ``` ┌─────────────────────────────────────────────────────────────────────────────┐ │ Atuin Daemon │ │ │ │ ┌─────────────┐ ┌──────────────────────────────────────────────────┐ │ │ │ Daemon │ │ Components │ │ │ │ Handle │────▶│ │ │ │ │ │ │ ┌─────────────┐ ┌─────────────┐ ┌────────────┐ │ │ │ │ • emit() │ │ │ History │ │ Search │ │ Sync │ │ │ │ │ • subscribe │ │ │ Component │ │ Component │ │ Component │ │ │ │ │ • settings │ │ │ │ │ │ │ │ │ │ │ │ • databases │ │ │ gRPC service│ │ gRPC service│ │ background │ │ │ │ └─────────────┘ │ │ WIP history │ │ Nucleo index│ │ sync │ │ │ │ │ │ └─────────────┘ └─────────────┘ └────────────┘ │ │ │ │ └──────────────────────────────────────────────────┘ │ │ │ ▲ │ │ ▼ │ │ │ ┌─────────────────────────────────────┴────────────────────────────────┐ │ │ │ Event Bus (broadcast) │ │ │ │ │ │ │ │ HistoryStarted │ HistoryEnded │ RecordsAdded │ SyncCompleted │ ... │ │ │ └──────────────────────────────────────────────────────────────────────┘ │ │ ▲ │ │ ┌───────────────────────────────────┴──────────────────────────────────┐ │ │ │ Control Service (gRPC) │ │ │ │ External event injection from CLI commands │ │ │ └──────────────────────────────────────────────────────────────────────┘ │ └─────────────────────────────────────────────────────────────────────────────┘ ``` ### DaemonHandle A lightweight, cloneable handle that provides access to shared daemon resources: - **Event emission**: `handle.emit(DaemonEvent::...)` broadcasts to all components - **Event subscription**: `handle.subscribe()` returns a receiver for the event bus - **Settings**: `handle.settings()` for configuration access - **Databases**: `handle.history_db()` and `handle.store()` for data access ### Component Trait Components implement a simple lifecycle: ```rust #[async_trait] trait Component: Send + Sync { fn name(&self) -> &'static str; async fn start(&mut self, handle: DaemonHandle) -> Result<()>; async fn handle_event(&mut self, event: &DaemonEvent) -> Result<()>; async fn stop(&mut self) -> Result<()>; } ``` ### Event-Driven Design Components communicate via events rather than direct coupling: | Event | Emitted By | Consumed By | |-------|-----------|-------------| | `HistoryStarted` | History gRPC | Search (logging) | | `HistoryEnded` | History gRPC | Search (index update) | | `RecordsAdded` | Sync | Search (index update) | | `HistoryPruned` | CLI (via Control) | Search (index rebuild) | | `HistoryDeleted` | CLI (via Control) | Search (index rebuild) | | `ForceSync` | CLI (via Control) | Sync | | `ShutdownRequested` | Signal handler | All (graceful shutdown) | ### External Event Injection CLI commands can inject events into a running daemon: ```rust // After `atuin history prune` emit_event(DaemonEvent::HistoryPruned).await?; // After deleting specific items emit_event(DaemonEvent::HistoryDeleted { ids }).await?; // Request immediate sync emit_event(DaemonEvent::ForceSync).await?; ``` This ensures the daemon's search index stays in sync with database changes made by CLI commands. ## Search Architecture The search service uses a [forked version of Nucleo](https://github.com/atuinsh/nucleo-ext) that adds filter and scorer callbacks, enabling efficient filtering and frecency-based ranking. ``` ┌────────────────────────────────────────────────────────────────┐ │ Atuin Daemon │ │ │ │ ┌─────────────────┐ ┌──────────────────────────────────┐ │ │ │ Event System │───▶│ Search Component │ │ │ │ │ │ │ │ │ │ • RecordsAdded │ │ ┌────────────────────────────┐ │ │ │ │ • HistoryEnded │ │ │ Deduplicated Index │ │ │ │ │ • HistoryPruned │ │ │ │ │ │ │ └─────────────────┘ │ │ CommandData per command: │ │ │ │ │ │ • Global frecency │ │ │ │ ┌─────────────────┐ │ │ • Filter indexes (sets) │ │ │ │ │ Background Task │ │ │ • Invocation history │ │ │ │ │ │ │ └────────────────────────────┘ │ │ │ │ Rebuilds │ │ │ │ │ │ │ frecency map │ │ ▼ │ │ │ │ every 60s │───▶│ ┌────────────────────────────┐ │ │ │ └─────────────────┘ │ │ Nucleo (forked) │ │ │ │ │ │ │ │ │ │ │ │ • Filter callback │ │ │ │ │ │ • Scorer callback │ │ │ │ │ │ • Fuzzy matching │ │ │ │ │ └────────────────────────────┘ │ │ │ └──────────────────────────────────┘ │ │ │ │ │ │ gRPC (Unix socket) │ └──────────────────────────────────────│─────────────────────────┘ │ ▼ ┌─────────────────────────────────────────────────────────────────┐ │ Search TUI (Client) │ │ │ │ 1. Send query + filter mode + context to daemon │ │ 2. Receive matching history IDs (ranked by frecency) │ │ 3. Hydrate full records from local SQLite database │ │ 4. Display results in TUI │ └─────────────────────────────────────────────────────────────────┘ ``` ### Nucleo Fork The [nucleo-ext fork](https://github.com/atuinsh/nucleo-ext) adds two key features to Nucleo: 1. **Filter callback**: Pre-filter items before fuzzy matching (used for directory/host/session filtering) 2. **Scorer callback**: Compute custom scores after matching (used for frecency ranking) ```rust // Filter: only include commands run in current directory nucleo.set_filter(Some(Arc::new(|cmd: &String| { passing_commands.contains(cmd) }))); // Scorer: combine fuzzy score with frecency nucleo.set_scorer(Some(Arc::new(|cmd: &String, fuzzy_score: u32| { let frecency = frecency_map.get(cmd).unwrap_or(0); fuzzy_score + (frecency * 10) }))); ``` ### Deduplicated Index Commands are stored once per unique command text, with metadata tracking all invocations: ```rust struct CommandData { command: String, invocations: Vec, // All times this command was run global_frecency: FrecencyData, // Precomputed frecency score // O(1) filter indexes directories: HashSet, // All cwds where command was run hosts: HashSet, // All hostnames sessions: HashSet, // All session IDs } ``` This deduplication means: - **Fewer items to match**: ~13K unique commands vs ~62K history entries - **O(1) filter checks**: HashSet lookups instead of scanning invocations - **Single frecency score**: Global frecency computed once, used for all filter modes ### Frecency Scoring Frecency (frequency + recency) scoring prioritizes recently and frequently used commands: ```rust fn compute_frecency(count: u32, last_used: i64, now: i64) -> u32 { let age_hours = (now - last_used) / 3600; // Recency: decays over time (half-life ~24 hours) let recency = (100.0 * (-age_hours as f64 / 24.0).exp()) as u32; // Frequency: logarithmic scaling let frequency = (count.ln() * 20.0).min(100.0) as u32; recency + frequency } ``` The frecency map is: - **Precomputed by background task** every 60 seconds - **Never computed inline** during search (no latency impact) - **Graceful fallback**: If unavailable, search works without frecency ranking ### Filter Mode Implementation | Filter Mode | Implementation | |-------------|----------------| | Global | No filter (all commands) | | Directory | `command.directories.contains(cwd)` | | Workspace | `command.directories.any(\|d\| d.starts_with(git_root))` | | Host | `command.hosts.contains(hostname)` | | Session | `command.sessions.contains(session_id)` | Filters are pre-computed into a HashSet before the search, making the filter callback O(1). ### Search Flow 1. **Daemon startup**: Loads history from SQLite in pages, builds deduplicated index 2. **Frecency precompute**: Background task builds frecency map after history loads 3. **Search request**: Client sends query with filter mode and context 4. **Filter**: Pre-computed HashSet determines which commands pass the filter 5. **Match**: Nucleo fuzzy matches the query against command text 6. **Score**: Frecency scorer ranks results (fuzzy score + frecency * 10) 7. **Response**: Returns history IDs for the most recent invocation of each matching command 8. **Hydration**: Client fetches full records from local SQLite ### Configuration ```toml # Enable daemon + autostart [daemon] enabled = true autostart = true # Enable daemon-based fuzzy search [search] search_mode = "daemon-fuzzy" ``` ## Performance Performance varies based on several factors, but in most initial testing with the new architecture shows improvement: * **Nucleo performs searches up to 4.5x faster**: direct DB search averages 18.07ms, but the daemon completes the same queries in 3.99ms. * **IPC overhead is significant, but acceptable**: a significant amount of wall-time is taken up by the transfer of data over IPC (via UDS in this case). This averages to about ~7.8ms and accounts for 66% of client-side wall time. * **Tail latency improves at every layer**: p99 times correspond to initial requests, worst-case query patterns, etc. but the average p99 daemon-based response time is 3.6x better than the associated DB-based search p99 time * **Query complexity no longer impacts performance**: the Nucleo-based search shows consistent 2-7ms times regardless of query pattern. The DB-based search had a 17x variance (3.59ms to 62.46ms). Interestingly, @ellie - who has a larger history store than I do - gets even better performance on the IPC layer. This could use a lot more testing in various edge cases and on various hardware, but seems promising. ### Regular DB search ``` Individual calls for: db_search -------------------------------------------------------------------------------------------------------------- # Wall Busy Idle Fields -------------------------------------------------------------------------------------------------------------- 1 32.25ms 32.20ms 47.70µs {"mode":"Fuzzy","query":"^"} 2 19.48ms 19.40ms 84.20µs {"mode":"Fuzzy","query":"^c"} 3 20.40ms 20.10ms 297.00µs {"mode":"Fuzzy","query":"^ca"} 4 13.07ms 13.00ms 69.90µs {"mode":"Fuzzy","query":"^car"} 5 12.17ms 12.10ms 67.10µs {"mode":"Fuzzy","query":"^carg"} 6 20.78ms 20.70ms 76.60µs {"mode":"Fuzzy","query":"^cargo"} 7 9.15ms 9.10ms 53.20µs {"mode":"Fuzzy","query":"^cargo "} 8 10.24ms 10.00ms 237.00µs {"mode":"Fuzzy","query":"^cargo b"} 9 10.01ms 9.68ms 325.00µs {"mode":"Fuzzy","query":"^cargo bu"} 10 5.89ms 5.83ms 57.20µs {"mode":"Fuzzy","query":"^cargo bui"} 11 8.85ms 8.28ms 568.00µs {"mode":"Fuzzy","query":"^cargo buil"} 12 7.70ms 7.49ms 212.00µs {"mode":"Fuzzy","query":"^cargo build"} 13 3.59ms 3.53ms 57.00µs {"mode":"Fuzzy","query":"^cargo build$"} 14 6.50ms 6.44ms 63.60µs {"mode":"Fuzzy","query":"^cargo "} 15 6.48ms 6.38ms 100.00µs {"mode":"Fuzzy","query":"!"} 16 31.68ms 31.60ms 75.90µs {"mode":"Fuzzy","query":"!g"} 17 62.46ms 62.40ms 58.90µs {"mode":"Fuzzy","query":"!gi"} 18 30.35ms 30.30ms 46.90µs {"mode":"Fuzzy","query":"!git"} 19 53.84ms 53.80ms 40.80µs {"mode":"Fuzzy","query":"!git "} 20 19.24ms 19.20ms 39.70µs {"mode":"Fuzzy","query":"!git c"} 21 22.03ms 22.00ms 34.70µs {"mode":"Fuzzy","query":"!git co"} 22 17.13ms 17.00ms 133.00µs {"mode":"Fuzzy","query":"!git com"} 23 16.14ms 15.90ms 242.00µs {"mode":"Fuzzy","query":"!git comm"} 24 5.11ms 5.08ms 28.60µs {"mode":"Fuzzy","query":"!git commi"} 25 7.31ms 7.26ms 52.70µs {"mode":"Fuzzy","query":"!git commit"} Summary: 25 calls Wall: avg=18.07ms, min=3.59ms, max=62.46ms, p50=13.07ms, p99=62.46ms Busy: avg=17.95ms, min=3.53ms, max=62.40ms, p50=13.00ms, p99=62.40ms ``` ### Daemon-based search **Client** ``` Individual calls for: daemon_search -------------------------------------------------------------------------------------------------------------- # Wall Busy Idle Fields -------------------------------------------------------------------------------------------------------------- 1 13.05ms 2.55ms 10.50ms {"query":"^"} 2 10.65ms 1.40ms 9.25ms {"query":"^c"} 3 10.72ms 1.18ms 9.54ms {"query":"^ca"} 4 5.54ms 485.00µs 5.06ms {"query":"^car"} 5 15.02ms 1.02ms 14.00ms {"query":"^carg"} 6 9.49ms 840.00µs 8.65ms {"query":"^cargo"} 7 5.53ms 555.00µs 4.97ms {"query":"^cargo "} 8 8.56ms 717.00µs 7.84ms {"query":"^cargo b"} 9 12.34ms 1.24ms 11.10ms {"query":"^cargo bu"} 10 8.38ms 650.00µs 7.73ms {"query":"^cargo bui"} 11 13.07ms 770.00µs 12.30ms {"query":"^cargo buil"} 12 17.11ms 709.00µs 16.40ms {"query":"^cargo build"} 13 15.41ms 907.00µs 14.50ms {"query":"^cargo build$"} 14 8.19ms 665.00µs 7.52ms {"query":"^cargo "} 15 7.98ms 1.72ms 6.26ms {"query":"!"} 16 13.56ms 856.00µs 12.70ms {"query":"!g"} 17 8.11ms 624.00µs 7.49ms {"query":"!gi"} 18 14.57ms 775.00µs 13.80ms {"query":"!git"} 19 14.18ms 779.00µs 13.40ms {"query":"!git "} 20 9.62ms 802.00µs 8.82ms {"query":"!git c"} 21 15.50ms 1.50ms 14.00ms {"query":"!git co"} 22 11.58ms 1.48ms 10.10ms {"query":"!git com"} 23 13.82ms 2.12ms 11.70ms {"query":"!git comm"} 24 17.48ms 2.18ms 15.30ms {"query":"!git commi"} 25 14.81ms 1.71ms 13.10ms {"query":"!git commit"} Summary: 25 calls Wall: avg=11.77ms, min=5.53ms, max=17.48ms, p50=12.34ms, p99=17.48ms Busy: avg=1.13ms, min=485.00µs, max=2.55ms, p50=856.00µs, p99=2.55ms ``` **Daemon** ``` Individual calls for: daemon_search_query -------------------------------------------------------------------------------------------------------------- # Wall Busy Idle Fields -------------------------------------------------------------------------------------------------------------- 1 1.75ms 250ns 1.75ms {"query":"^","query_id":1} 2 4.58ms 125ns 4.58ms {"query":"^c","query_id":2} 3 4.39ms 250ns 4.39ms {"query":"^ca","query_id":3} 4 2.52ms 125ns 2.52ms {"query":"^car","query_id":4} 5 4.44ms 250ns 4.44ms {"query":"^carg","query_id":5} 6 3.66ms 167ns 3.66ms {"query":"^cargo","query_id":6} 7 2.38ms 84ns 2.38ms {"query":"^cargo ","query_id":7} 8 4.13ms 84ns 4.13ms {"query":"^cargo b","query_id":8} 9 4.40ms 167ns 4.40ms {"query":"^cargo bu","query_id":9} 10 3.87ms 125ns 3.87ms {"query":"^cargo bui","query_id":10} 11 4.36ms 84ns 4.36ms {"query":"^cargo buil","query_id":11} 12 3.96ms 333ns 3.96ms {"query":"^cargo build","query_id":12} 13 4.61ms 167ns 4.61ms {"query":"^cargo build$","query_id":13} 14 4.20ms 209ns 4.20ms {"query":"^cargo ","query_id":14} 15 238.17µs 167ns 238.00µs {"query":"!","query_id":15} 16 4.44ms 125ns 4.44ms {"query":"!g","query_id":16} 17 3.47ms 83ns 3.47ms {"query":"!gi","query_id":17} 18 4.57ms 125ns 4.57ms {"query":"!git","query_id":18} 19 7.15ms 167ns 7.15ms {"query":"!git ","query_id":19} 20 4.27ms 250ns 4.27ms {"query":"!git c","query_id":20} 21 5.19ms 292ns 5.19ms {"query":"!git co","query_id":21} 22 4.29ms 417ns 4.29ms {"query":"!git com","query_id":22} 23 4.08ms 125ns 4.08ms {"query":"!git comm","query_id":23} 24 4.50ms 167ns 4.50ms {"query":"!git commi","query_id":24} 25 4.35ms 208ns 4.35ms {"query":"!git commit","query_id":25} Summary: 25 calls Wall: avg=3.99ms, min=238.17µs, max=7.15ms, p50=4.29ms, p99=7.15ms Busy: avg=182ns, min=83ns, max=417ns, p50=167ns, p99=417ns ``` **Nucleo matching time (in daemon)** ``` Individual calls for: nucleo_match -------------------------------------------------------------------------------------------------------------- # Wall Busy Idle Fields -------------------------------------------------------------------------------------------------------------- 1 1.73ms 125ns 1.73ms {"query":"^","query_id":1} 2 4.57ms 167ns 4.57ms {"query":"^c","query_id":2} 3 4.37ms 125ns 4.37ms {"query":"^ca","query_id":3} 4 2.51ms 84ns 2.51ms {"query":"^car","query_id":4} 5 4.43ms 125ns 4.43ms {"query":"^carg","query_id":5} 6 3.64ms 125ns 3.64ms {"query":"^cargo","query_id":6} 7 2.37ms 84ns 2.37ms {"query":"^cargo ","query_id":7} 8 4.11ms 125ns 4.11ms {"query":"^cargo b","query_id":8} 9 4.36ms 208ns 4.36ms {"query":"^cargo bu","query_id":9} 10 3.85ms 125ns 3.85ms {"query":"^cargo bui","query_id":10} 11 4.35ms 125ns 4.35ms {"query":"^cargo buil","query_id":11} 12 3.94ms 250ns 3.94ms {"query":"^cargo build","query_id":12} 13 4.59ms 125ns 4.59ms {"query":"^cargo build$","query_id":13} 14 4.18ms 84ns 4.18ms {"query":"^cargo ","query_id":14} 15 220.13µs 125ns 220.00µs {"query":"!","query_id":15} 16 4.43ms 125ns 4.43ms {"query":"!g","query_id":16} 17 3.45ms 125ns 3.45ms {"query":"!gi","query_id":17} 18 4.55ms 125ns 4.55ms {"query":"!git","query_id":18} 19 7.12ms 209ns 7.12ms {"query":"!git ","query_id":19} 20 4.25ms 166ns 4.25ms {"query":"!git c","query_id":20} 21 5.18ms 125ns 5.18ms {"query":"!git co","query_id":21} 22 4.27ms 125ns 4.27ms {"query":"!git com","query_id":22} 23 4.06ms 292ns 4.06ms {"query":"!git comm","query_id":23} 24 4.46ms 166ns 4.46ms {"query":"!git commi","query_id":24} 25 4.31ms 208ns 4.31ms {"query":"!git commit","query_id":25} Summary: 25 calls Wall: avg=3.97ms, min=220.13µs, max=7.12ms, p50=4.27ms, p99=7.12ms Busy: avg=147ns, min=84ns, max=292ns, p50=125ns, p99=292ns ``` --- crates/atuin-daemon/src/components/history.rs | 252 ++++++++++++++++ crates/atuin-daemon/src/components/mod.rs | 22 ++ crates/atuin-daemon/src/components/search.rs | 394 ++++++++++++++++++++++++++ crates/atuin-daemon/src/components/sync.rs | 257 +++++++++++++++++ 4 files changed, 925 insertions(+) create mode 100644 crates/atuin-daemon/src/components/history.rs create mode 100644 crates/atuin-daemon/src/components/mod.rs create mode 100644 crates/atuin-daemon/src/components/search.rs create mode 100644 crates/atuin-daemon/src/components/sync.rs (limited to 'crates/atuin-daemon/src/components') diff --git a/crates/atuin-daemon/src/components/history.rs b/crates/atuin-daemon/src/components/history.rs new file mode 100644 index 00000000..23d48c5e --- /dev/null +++ b/crates/atuin-daemon/src/components/history.rs @@ -0,0 +1,252 @@ +//! History component. +//! +//! Handles command history lifecycle (start/end) and provides the History gRPC service. + +use std::sync::Arc; + +use atuin_client::{ + database::Database, + history::{History, HistoryId, store::HistoryStore}, + settings::Settings, +}; +use dashmap::DashMap; +use eyre::Result; +use time::OffsetDateTime; +use tonic::{Request, Response, Status}; +use tracing::{Level, instrument}; + +use crate::{ + daemon::{Component, DaemonHandle}, + events::DaemonEvent, + history::{ + EndHistoryReply, EndHistoryRequest, ShutdownReply, ShutdownRequest, StartHistoryReply, + StartHistoryRequest, StatusReply, StatusRequest, + history_server::{History as HistorySvc, HistoryServer}, + }, +}; + +const DAEMON_PROTOCOL_VERSION: u32 = 1; + +/// History component - manages command history lifecycle. +/// +/// This component: +/// - Tracks currently running commands (stored in memory) +/// - Saves completed commands to the database and record store +/// - Emits history events for other components (e.g., search indexing) +/// - Provides the History gRPC service +pub struct HistoryComponent { + inner: Arc, +} + +struct HistoryComponentInner { + /// Commands currently running (not yet completed). + running: DashMap, + + /// Handle to the daemon (set during start). + handle: tokio::sync::RwLock>, + + /// History store for pushing records (set during start). + history_store: tokio::sync::RwLock>, +} + +impl HistoryComponent { + /// Create a new history component. + pub fn new() -> Self { + Self { + inner: Arc::new(HistoryComponentInner { + running: DashMap::new(), + handle: tokio::sync::RwLock::new(None), + history_store: tokio::sync::RwLock::new(None), + }), + } + } + + /// Get the gRPC service for this component. + /// + /// This returns a tonic service that can be added to a gRPC server. + pub fn grpc_service(&self) -> HistoryServer { + HistoryServer::new(HistoryGrpcService { + inner: self.inner.clone(), + }) + } +} + +impl Default for HistoryComponent { + fn default() -> Self { + Self::new() + } +} + +#[tonic::async_trait] +impl Component for HistoryComponent { + fn name(&self) -> &'static str { + "history" + } + + async fn start(&mut self, handle: DaemonHandle) -> Result<()> { + // Create the history store + let host_id = Settings::host_id().await?; + let history_store = + HistoryStore::new(handle.store().clone(), host_id, *handle.encryption_key()); + + *self.inner.history_store.write().await = Some(history_store); + *self.inner.handle.write().await = Some(handle); + + tracing::info!("history component started"); + Ok(()) + } + + async fn handle_event(&mut self, _event: &DaemonEvent) -> Result<()> { + // History component produces events but doesn't need to react to them + Ok(()) + } + + async fn stop(&mut self) -> Result<()> { + tracing::info!("history component stopped"); + Ok(()) + } +} + +/// The gRPC service implementation. +/// +/// This is a thin wrapper that delegates to the component's shared state. +pub struct HistoryGrpcService { + inner: Arc, +} + +#[tonic::async_trait] +impl HistorySvc for HistoryGrpcService { + #[instrument(skip_all, level = Level::INFO)] + async fn start_history( + &self, + request: Request, + ) -> Result, Status> { + let req = request.into_inner(); + + let timestamp = + OffsetDateTime::from_unix_timestamp_nanos(req.timestamp as i128).map_err(|_| { + Status::invalid_argument( + "failed to parse timestamp as unix time (expected nanos since epoch)", + ) + })?; + + let h: History = History::daemon() + .timestamp(timestamp) + .command(req.command) + .cwd(req.cwd) + .session(req.session) + .hostname(req.hostname) + .build() + .into(); + + // Emit the event + if let Some(handle) = self.inner.handle.read().await.as_ref() { + handle.emit(DaemonEvent::HistoryStarted(h.clone())); + } + + let id = h.id.clone(); + tracing::info!(id = id.to_string(), "start history"); + self.inner.running.insert(id.clone(), h); + + let reply = StartHistoryReply { + id: id.to_string(), + version: env!("CARGO_PKG_VERSION").to_string(), + protocol: DAEMON_PROTOCOL_VERSION, + }; + + Ok(Response::new(reply)) + } + + #[instrument(skip_all, level = Level::INFO)] + async fn end_history( + &self, + request: Request, + ) -> Result, Status> { + let req = request.into_inner(); + let id = HistoryId(req.id); + + if let Some((_, mut history)) = self.inner.running.remove(&id) { + history.exit = req.exit; + history.duration = match req.duration { + 0 => i64::try_from( + (OffsetDateTime::now_utc() - history.timestamp).whole_nanoseconds(), + ) + .expect("failed to convert calculated duration to i64"), + value => i64::try_from(value).expect("failed to get i64 duration"), + }; + + // Get the handle and store to save the history + let handle_guard = self.inner.handle.read().await; + let handle = handle_guard + .as_ref() + .ok_or_else(|| Status::internal("component not initialized"))?; + + let store_guard = self.inner.history_store.read().await; + let history_store = store_guard + .as_ref() + .ok_or_else(|| Status::internal("component not initialized"))?; + + // Save to database + handle + .history_db() + .save(&history) + .await + .map_err(|e| Status::internal(format!("failed to write to db: {e:?}")))?; + + tracing::info!( + id = id.0.to_string(), + duration = history.duration, + "end history" + ); + + // Push to record store + let (record_id, idx) = history_store + .push(history.clone()) + .await + .map_err(|e| Status::internal(format!("failed to push record to store: {e:?}")))?; + + // Emit the event + handle.emit(DaemonEvent::HistoryEnded(history)); + + let reply = EndHistoryReply { + id: record_id.0.to_string(), + idx, + version: env!("CARGO_PKG_VERSION").to_string(), + protocol: DAEMON_PROTOCOL_VERSION, + }; + + return Ok(Response::new(reply)); + } + + Err(Status::not_found(format!( + "could not find history with id: {id}" + ))) + } + + #[instrument(skip_all, level = Level::INFO)] + async fn status( + &self, + _request: Request, + ) -> Result, Status> { + let reply = StatusReply { + healthy: true, + version: env!("CARGO_PKG_VERSION").to_string(), + pid: std::process::id(), + protocol: DAEMON_PROTOCOL_VERSION, + }; + + Ok(Response::new(reply)) + } + + #[instrument(skip_all, level = Level::INFO)] + async fn shutdown( + &self, + _request: Request, + ) -> Result, Status> { + // Use the daemon handle to request shutdown + if let Some(handle) = self.inner.handle.read().await.as_ref() { + handle.shutdown(); + } + Ok(Response::new(ShutdownReply { accepted: true })) + } +} diff --git a/crates/atuin-daemon/src/components/mod.rs b/crates/atuin-daemon/src/components/mod.rs new file mode 100644 index 00000000..5950d5d5 --- /dev/null +++ b/crates/atuin-daemon/src/components/mod.rs @@ -0,0 +1,22 @@ +//! Daemon components. +//! +//! Components are the building blocks of the daemon. Each component handles +//! a specific domain and can: +//! +//! - Expose gRPC services +//! - React to events +//! - Spawn background tasks +//! +//! Available components: +//! +//! - [`history::HistoryComponent`]: Command history lifecycle management +//! - [`search::SearchComponent`]: Fuzzy search over history +//! - [`sync::SyncComponent`]: Cloud sync + +pub mod history; +pub mod search; +pub mod sync; + +pub use history::HistoryComponent; +pub use search::SearchComponent; +pub use sync::SyncComponent; diff --git a/crates/atuin-daemon/src/components/search.rs b/crates/atuin-daemon/src/components/search.rs new file mode 100644 index 00000000..7fb59dea --- /dev/null +++ b/crates/atuin-daemon/src/components/search.rs @@ -0,0 +1,394 @@ +//! Search component. +//! +//! Provides fuzzy search over command history using the Nucleo search library +//! with frecency-based ranking and dynamic filtering. + +use std::{pin::Pin, sync::Arc}; + +use atuin_client::database::Database; +use eyre::Result; +use tokio::sync::RwLock; +use tokio_stream::Stream; +use tonic::{Request, Response, Status, Streaming}; +use tracing::{Level, debug, info, instrument, span, trace}; +use uuid::Uuid; + +use crate::{ + daemon::{Component, DaemonHandle}, + events::DaemonEvent, + search::{ + FilterMode, IndexFilterMode, QueryContext, SearchIndex, SearchRequest, SearchResponse, + search_server::{Search as SearchSvc, SearchServer}, + }, +}; + +const PAGE_SIZE: usize = 5000; +const RESULTS_LIMIT: u32 = 200; +/// How often to rebuild the frecency map (in seconds). +const FRECENCY_REFRESH_INTERVAL_SECS: u64 = 60; + +/// Search component - provides fuzzy search over command history. +/// +/// This component: +/// - Maintains a deduplicated search index with frecency ranking +/// - Loads history from the database on startup +/// - Updates the index when history events occur +/// - Provides the Search gRPC service +pub struct SearchComponent { + index: Arc>, + handle: tokio::sync::RwLock>, + loader_handle: Option>, + frecency_handle: Option>, +} + +impl SearchComponent { + /// Create a new search component. + pub fn new() -> Self { + Self { + index: Arc::new(RwLock::new(SearchIndex::new())), + handle: tokio::sync::RwLock::new(None), + loader_handle: None, + frecency_handle: None, + } + } + + /// Get the gRPC service for this component. + pub fn grpc_service(&self) -> SearchServer { + SearchServer::new(SearchGrpcService { + index: self.index.clone(), + }) + } + + /// Rebuild the entire search index from the database. + async fn rebuild_index(&self) -> Result<()> { + let handle_guard = self.handle.read().await; + let handle = handle_guard + .as_ref() + .ok_or_else(|| eyre::eyre!("component not initialized"))?; + + info!("Rebuilding search index from database"); + + // Create a new index + let new_index = SearchIndex::new(); + + // Load all history into the new index + let db = handle.history_db().clone(); + let mut pager = db.all_paged(PAGE_SIZE, false, true); + loop { + match pager.next().await { + Ok(Some(histories)) => { + info!( + "Loading {} history entries into search index", + histories.len() + ); + new_index.add_histories(&histories); + } + Ok(None) => break, + Err(e) => { + tracing::error!("Failed to load history during rebuild: {}", e); + break; + } + } + } + + info!( + "Search index rebuild complete; {} unique commands", + new_index.command_count() + ); + + // Replace the old index with the new one + *self.index.write().await = new_index; + Ok(()) + } +} + +impl Default for SearchComponent { + fn default() -> Self { + Self::new() + } +} + +#[tonic::async_trait] +impl Component for SearchComponent { + fn name(&self) -> &'static str { + "search" + } + + async fn start(&mut self, handle: DaemonHandle) -> Result<()> { + *self.handle.write().await = Some(handle.clone()); + + // Spawn background task to load history into index + let index = self.index.clone(); + let db = handle.history_db().clone(); + + self.loader_handle = Some(tokio::spawn(async move { + info!( + "Loading history into search index; page size = {}", + PAGE_SIZE + ); + let mut pager = db.all_paged(PAGE_SIZE, false, true); + loop { + match pager.next().await { + Ok(Some(histories)) => { + info!( + "Loading {} history entries into search index", + histories.len() + ); + index.read().await.add_histories(&histories); + } + Ok(None) => { + info!( + "Initial history load complete; {} unique commands indexed", + index.read().await.command_count() + ); + // Build initial frecency map + index.read().await.rebuild_frecency().await; + info!("Initial frecency map built"); + break; + } + Err(e) => { + tracing::error!("Failed to load history: {}", e); + break; + } + } + } + })); + + // Spawn background task to periodically refresh frecency + let index_for_frecency = self.index.clone(); + self.frecency_handle = Some(tokio::spawn(async move { + let mut interval = tokio::time::interval(std::time::Duration::from_secs( + FRECENCY_REFRESH_INTERVAL_SECS, + )); + loop { + interval.tick().await; + trace!("Refreshing frecency map"); + index_for_frecency.read().await.rebuild_frecency().await; + } + })); + + tracing::info!("search component started"); + Ok(()) + } + + async fn handle_event(&mut self, event: &DaemonEvent) -> Result<()> { + match event { + DaemonEvent::RecordsAdded(records) => { + debug!( + count = records.len(), + "Processing added records for search index" + ); + + let handle_guard = self.handle.read().await; + if let Some(handle) = handle_guard.as_ref() { + let histories: Vec<_> = handle + .history_db() + .query_history( + format!( + "select * from history where id in ({})", + records + .iter() + .map(|record| record.0.to_string()) + .collect::>() + .join(",") + ) + .as_str(), + ) + .await + .unwrap_or_default(); + + span!(Level::TRACE, "inject_records", count = histories.len()) + .in_scope(async || { + self.index.read().await.add_histories(&histories); + }) + .await; + } + } + DaemonEvent::HistoryStarted(history) => { + debug!(id = %history.id, command = %history.command, "History started (no index action)"); + } + DaemonEvent::HistoryEnded(history) => { + span!(Level::TRACE, "inject_history_ended") + .in_scope(async || { + self.index.read().await.add_history(history); + }) + .await; + } + DaemonEvent::HistoryPruned | DaemonEvent::HistoryRebuilt => { + info!("History store pruned or rebuilt, rebuilding search index"); + if let Err(e) = self.rebuild_index().await { + tracing::error!("Failed to rebuild search index: {}", e); + } + } + DaemonEvent::HistoryDeleted { ids } => { + info!( + count = ids.len(), + "History deleted, rebuilding search index" + ); + // For now, just rebuild the entire index. A more efficient implementation + // would remove specific items from the index. + if let Err(e) = self.rebuild_index().await { + tracing::error!("Failed to rebuild search index: {}", e); + } + } + // Events we don't care about + DaemonEvent::SyncCompleted { .. } + | DaemonEvent::SyncFailed { .. } + | DaemonEvent::ForceSync + | DaemonEvent::SettingsReloaded + | DaemonEvent::ShutdownRequested => {} + } + Ok(()) + } + + async fn stop(&mut self) -> Result<()> { + if let Some(handle) = self.loader_handle.take() { + handle.abort(); + } + if let Some(handle) = self.frecency_handle.take() { + handle.abort(); + } + tracing::info!("search component stopped"); + Ok(()) + } +} + +/// The gRPC service implementation. +pub struct SearchGrpcService { + index: Arc>, +} + +#[tonic::async_trait] +impl SearchSvc for SearchGrpcService { + type SearchStream = Pin> + Send>>; + + #[instrument(skip_all, level = Level::TRACE, name = "search_rpc")] + async fn search( + &self, + request: Request>, + ) -> Result, Status> { + let mut in_stream = request.into_inner(); + let index = self.index.clone(); + + // Create output channel + let (tx, rx) = tokio::sync::mpsc::channel::>(128); + + // Spawn task to handle incoming requests and send responses + tokio::spawn(async move { + while let Some(req) = in_stream.message().await.transpose() { + match req { + Ok(search_req) => { + let query = search_req.query; + let query_id = search_req.query_id; + let filter_mode: FilterMode = search_req + .filter_mode + .try_into() + .unwrap_or(FilterMode::Global); + let proto_context = search_req.context; + + debug!( + "search request: query = {}, query_id = {}, filter_mode = {}, context = {:?}", + query, + query_id, + filter_mode.as_str_name(), + proto_context + ); + + // Convert proto FilterMode + context to IndexFilterMode + let index_filter = convert_filter_mode(filter_mode, &proto_context); + + // Build QueryContext from proto context + let query_context = proto_context + .map(|ctx| QueryContext { + cwd: Some(with_trailing_slash(&ctx.cwd)), + git_root: ctx.git_root.map(|s| with_trailing_slash(&s)), + hostname: Some(ctx.hostname), + session_id: Some(ctx.session_id), + }) + .unwrap_or_default(); + + // Perform the search + let history_ids = + span!(Level::TRACE, "daemon_search_query", %query, query_id) + .in_scope(|| async { + let index = index.read().await; + index + .search(&query, index_filter, &query_context, RESULTS_LIMIT) + .await + }) + .await; + + // Convert history IDs to bytes + let ids: Vec> = history_ids + .iter() + .filter_map(|id| { + Uuid::parse_str(id) + .ok() + .map(|uuid| uuid.as_bytes().to_vec()) + }) + .collect(); + + if tx.send(Ok(SearchResponse { query_id, ids })).await.is_err() { + break; // Client disconnected + } + } + Err(e) => { + let _ = tx.send(Err(e)).await; + break; + } + } + } + }); + + // Convert receiver to stream + let out_stream = tokio_stream::wrappers::ReceiverStream::new(rx); + Ok(Response::new(Box::pin(out_stream))) + } +} + +/// Convert proto FilterMode and context to IndexFilterMode. +fn convert_filter_mode( + mode: FilterMode, + context: &Option, +) -> IndexFilterMode { + match (mode, context) { + (FilterMode::Global, _) => IndexFilterMode::Global, + (FilterMode::Directory, Some(ctx)) => { + IndexFilterMode::Directory(with_trailing_slash(&ctx.cwd)) + } + (FilterMode::Workspace, Some(ctx)) => { + if let Some(ref git_root) = ctx.git_root { + IndexFilterMode::Workspace(with_trailing_slash(git_root)) + } else { + // Fall back to directory if no git root + IndexFilterMode::Directory(with_trailing_slash(&ctx.cwd)) + } + } + (FilterMode::Host, Some(ctx)) => IndexFilterMode::Host(ctx.hostname.clone()), + (FilterMode::Session, Some(ctx)) => IndexFilterMode::Session(ctx.session_id.clone()), + (FilterMode::SessionPreload, Some(ctx)) => { + // SessionPreload is similar to Session - filter by session + IndexFilterMode::Session(ctx.session_id.clone()) + } + // If no context provided, fall back to global + _ => IndexFilterMode::Global, + } +} + +#[cfg(windows)] +pub fn with_trailing_slash(s: &str) -> String { + if s.ends_with('\\') { + s.to_string() + } else { + format!("{}\\", s) + } +} + +#[cfg(not(windows))] +pub fn with_trailing_slash(s: &str) -> String { + if s.ends_with('/') { + s.to_string() + } else { + format!("{}/", s) + } +} diff --git a/crates/atuin-daemon/src/components/sync.rs b/crates/atuin-daemon/src/components/sync.rs new file mode 100644 index 00000000..6217706a --- /dev/null +++ b/crates/atuin-daemon/src/components/sync.rs @@ -0,0 +1,257 @@ +//! Sync component. +//! +//! Handles periodic synchronization with the Atuin cloud server. + +use eyre::Result; +use rand::Rng; +use tokio::sync::mpsc; +use tokio::time::{self, MissedTickBehavior}; + +use atuin_client::{history::store::HistoryStore, record::sync, settings::Settings}; +use atuin_dotfiles::store::{AliasStore, var::VarStore}; + +use crate::{ + daemon::{Component, DaemonHandle}, + events::DaemonEvent, +}; + +/// Commands that can be sent to the sync task. +enum SyncCommand { + /// Trigger an immediate sync. + ForceSync, + /// Stop the sync loop. + Stop, +} + +/// Sync component - handles periodic cloud synchronization. +/// +/// This component: +/// - Runs a background sync loop on a configurable interval +/// - Implements exponential backoff on sync failures +/// - Responds to ForceSync events for immediate sync +/// - Emits SyncCompleted/SyncFailed events +pub struct SyncComponent { + task_handle: Option>, + command_tx: Option>, +} + +impl SyncComponent { + /// Create a new sync component. + pub fn new() -> Self { + Self { + task_handle: None, + command_tx: None, + } + } +} + +impl Default for SyncComponent { + fn default() -> Self { + Self::new() + } +} + +#[tonic::async_trait] +impl Component for SyncComponent { + fn name(&self) -> &'static str { + "sync" + } + + async fn start(&mut self, handle: DaemonHandle) -> Result<()> { + let (cmd_tx, cmd_rx) = mpsc::channel(16); + self.command_tx = Some(cmd_tx); + + // Spawn the sync loop with its own copy of the handle + self.task_handle = Some(tokio::spawn(sync_loop(handle, cmd_rx))); + + tracing::info!("sync component started"); + Ok(()) + } + + async fn handle_event(&mut self, event: &DaemonEvent) -> Result<()> { + if let DaemonEvent::ForceSync = event { + tracing::info!("force sync requested"); + if let Some(tx) = &self.command_tx { + let _ = tx.send(SyncCommand::ForceSync).await; + } + } + Ok(()) + } + + async fn stop(&mut self) -> Result<()> { + if let Some(tx) = &self.command_tx { + let _ = tx.send(SyncCommand::Stop).await; + } + if let Some(handle) = self.task_handle.take() { + // Give the task a moment to shut down gracefully + let _ = tokio::time::timeout(std::time::Duration::from_secs(5), handle).await; + } + tracing::info!("sync component stopped"); + Ok(()) + } +} + +/// The main sync loop. +/// +/// This runs in a spawned task and handles periodic sync as well as +/// force sync requests. +async fn sync_loop(handle: DaemonHandle, mut cmd_rx: mpsc::Receiver) { + tracing::info!("sync loop starting"); + + // Clone settings since we need them across await points + let settings = handle.settings().await.clone(); + let host_id = match Settings::host_id().await { + Ok(id) => id, + Err(e) => { + tracing::error!("failed to get host id, sync disabled: {e}"); + return; + } + }; + + // Create the stores we need + let encryption_key = *handle.encryption_key(); + let history_store = HistoryStore::new(handle.store().clone(), host_id, encryption_key); + let alias_store = AliasStore::new(handle.store().clone(), host_id, encryption_key); + let var_store = VarStore::new(handle.store().clone(), host_id, encryption_key); + + // Don't backoff by more than 30 mins (with a random jitter of up to 1 min) + let max_interval: f64 = 60.0 * 30.0 + rand::thread_rng().gen_range(0.0..60.0); + + let mut ticker = time::interval(time::Duration::from_secs(settings.daemon.sync_frequency)); + + // IMPORTANT: without this, if we miss ticks because a sync takes ages or is otherwise delayed, + // we may end up running a lot of syncs in a hot loop. + ticker.set_missed_tick_behavior(MissedTickBehavior::Skip); + + loop { + tokio::select! { + _ = ticker.tick() => { + do_sync_tick( + &handle, + &history_store, + &alias_store, + &var_store, + &mut ticker, + max_interval, + ).await; + } + cmd = cmd_rx.recv() => { + match cmd { + Some(SyncCommand::ForceSync) => { + tracing::info!("executing force sync"); + do_sync_tick( + &handle, + &history_store, + &alias_store, + &var_store, + &mut ticker, + max_interval, + ).await; + } + Some(SyncCommand::Stop) | None => { + tracing::info!("sync loop stopping"); + break; + } + } + } + } + } +} + +/// Execute a single sync tick. +async fn do_sync_tick( + handle: &DaemonHandle, + history_store: &HistoryStore, + alias_store: &AliasStore, + var_store: &VarStore, + ticker: &mut time::Interval, + max_interval: f64, +) { + // Clone settings since we need them across await points + let settings = handle.settings().await.clone(); + + tracing::info!("sync tick"); + + // Check if logged in + let logged_in = match settings.logged_in().await { + Ok(v) => v, + Err(e) => { + tracing::warn!("failed to check login status, skipping sync tick: {e}"); + return; + } + }; + + if !logged_in { + tracing::debug!("not logged in, skipping sync tick"); + return; + } + + // Perform the sync + let res = sync::sync(&settings, handle.store()).await; + + match res { + Err(e) => { + tracing::error!("sync tick failed with {e}"); + + // Emit failure event + handle.emit(DaemonEvent::SyncFailed { + error: e.to_string(), + }); + + // Exponential backoff + let mut rng = rand::thread_rng(); + let mut new_interval = ticker.period().as_secs_f64() * rng.gen_range(2.0..2.2); + + if new_interval > max_interval { + new_interval = max_interval; + } + + *ticker = time::interval(time::Duration::from_secs(new_interval as u64)); + ticker.reset_after(time::Duration::from_secs(new_interval as u64)); + + tracing::error!("backing off, next sync tick in {new_interval}"); + } + Ok((uploaded_count, downloaded_records)) => { + tracing::info!( + uploaded = uploaded_count, + downloaded = downloaded_records.len(), + "sync complete" + ); + + // Build history from downloaded records + if let Err(e) = history_store + .incremental_build(handle.history_db(), &downloaded_records) + .await + { + tracing::error!("failed to build history from downloaded records: {e}"); + } + + // Emit the records added event (for search indexing) + handle.emit(DaemonEvent::RecordsAdded(downloaded_records.clone())); + + // Emit sync completed event + handle.emit(DaemonEvent::SyncCompleted { + uploaded: uploaded_count as usize, + downloaded: downloaded_records.len(), + }); + + // Rebuild alias and var stores + if let Err(e) = alias_store.build().await { + tracing::error!("failed to rebuild alias store: {e}"); + } + if let Err(e) = var_store.build().await { + tracing::error!("failed to rebuild var store: {e}"); + } + + // Reset backoff on success + if ticker.period().as_secs() != settings.daemon.sync_frequency { + *ticker = time::interval(time::Duration::from_secs(settings.daemon.sync_frequency)); + } + + // Store sync time + if let Err(e) = Settings::save_sync_time().await { + tracing::error!("failed to save sync time: {e}"); + } + } + } +} -- cgit v1.3.1