aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichelle Tilley <michelle@michelletilley.net>2026-02-26 19:08:08 -0800
committerGitHub <noreply@github.com>2026-02-26 19:08:08 -0800
commit00f9d1cdee372c48b48b1d237dcdb75f65d7f46d (patch)
treef52eebd950efc6a0e21060a73ac3d2053f0935de
parentchore(deps): bump actions/attest-build-provenance from 2 to 3 (#2897) (diff)
downloadatuin-00f9d1cdee372c48b48b1d237dcdb75f65d7f46d.zip
fix: Dramatically decrease daemon memory usage (#3211)
Reduces memory usage of the atuin-daemon search index by ~80% through string interning, compact UUID storage, and eliminating redundant data. ## Changes * **Eliminate Vec\<Invocation\>**: Replaced the per-command `Vec<Invocation>` with just `most_recent_id: [u8; 16]` and `most_recent_timestamp: i64`. We only ever needed the most recent history ID for search results - the full invocation history was never used. * **UUID byte storage**: Store UUIDs as `[u8; 16]` instead of 36-byte strings, saving 40 bytes per UUID. * **String interning with lasso**: Use `ThreadedRodeo` to deduplicate `cwd` and `hostname` strings in the filter sets. These values are highly repetitive (most commands run from a small set of directories on the same host), so interning has an outsized effect. * **DashSet → HashSet**: Since `CommandData` lives inside a `DashMap` (already synchronized), the inner sets don't need their own locks. Switched to `HashSet<Spur>` for directories/hosts and `HashSet<[u8; 16]>` for sessions. * **Arc\<str\> for commands**: Changed the `commands` DashMap key and `frecency_map` keys from `String` to `Arc<str>`, enabling zero-copy sharing between the two maps. * **Remove dead code**: Removed `CommandData.command` field that was duplicating the DashMap key. ## Results With 60k history entries, observed memory usage dropped from ~200MB to ~40MB.
-rw-r--r--Cargo.lock31
-rw-r--r--crates/atuin-daemon/Cargo.toml1
-rw-r--r--crates/atuin-daemon/src/search/index.rs263
3 files changed, 183 insertions, 112 deletions
diff --git a/Cargo.lock b/Cargo.lock
index c7ee0947..248da058 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -375,9 +375,10 @@ dependencies = [
"atuin-common",
"atuin-dotfiles",
"atuin-history",
- "dashmap",
+ "dashmap 5.5.3",
"eyre",
"hyper-util",
+ "lasso",
"listenfd",
"nucleo",
"prost",
@@ -1266,6 +1267,20 @@ dependencies = [
]
[[package]]
+name = "dashmap"
+version = "6.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+ "hashbrown 0.14.5",
+ "lock_api",
+ "once_cell",
+ "parking_lot_core",
+]
+
+[[package]]
name = "deltae"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1923,6 +1938,10 @@ name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
+dependencies = [
+ "ahash",
+ "allocator-api2",
+]
[[package]]
name = "hashbrown"
@@ -2470,6 +2489,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf36173d4167ed999940f804952e6b08197cae5ad5d572eb4db150ce8ad5d58f"
[[package]]
+name = "lasso"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e14eda50a3494b3bf7b9ce51c52434a761e383d7238ce1dd5dcec2fbc13e9fb"
+dependencies = [
+ "dashmap 6.1.0",
+ "hashbrown 0.14.5",
+]
+
+[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/crates/atuin-daemon/Cargo.toml b/crates/atuin-daemon/Cargo.toml
index 816d5961..90f875d5 100644
--- a/crates/atuin-daemon/Cargo.toml
+++ b/crates/atuin-daemon/Cargo.toml
@@ -28,6 +28,7 @@ tracing = { workspace = true }
tracing-subscriber = { workspace = true }
dashmap = "5.5.3"
+lasso = { version = "0.7", features = ["multi-threaded"] }
tonic-types = "0.14"
tonic = "0.14"
tonic-prost = "0.14"
diff --git a/crates/atuin-daemon/src/search/index.rs b/crates/atuin-daemon/src/search/index.rs
index b15b057f..ed23f94d 100644
--- a/crates/atuin-daemon/src/search/index.rs
+++ b/crates/atuin-daemon/src/search/index.rs
@@ -7,45 +7,31 @@
//! - Frecency-based ranking (frequency + recency)
//! - Dynamic filtering by directory, host, session, etc.
-use std::{collections::HashMap, sync::Arc};
+use std::{
+ collections::{HashMap, HashSet},
+ sync::Arc,
+};
use atuin_client::history::History;
-use dashmap::{DashMap, DashSet};
+use dashmap::DashMap;
+use lasso::{Spur, ThreadedRodeo};
use nucleo::{Injector, Nucleo, pattern};
use time::OffsetDateTime;
use tokio::sync::RwLock;
use tracing::{Level, instrument};
+use uuid::Uuid;
use crate::components::search::with_trailing_slash;
-/// Data for a single invocation of a command.
-#[derive(Debug, Clone)]
-pub struct Invocation {
- /// When the command was run.
- pub timestamp: i64,
- /// The working directory when the command was run.
- #[allow(dead_code)]
- pub cwd: String,
- /// The hostname where the command was run.
- #[allow(dead_code)]
- pub hostname: String,
- /// The session ID.
- #[allow(dead_code)]
- pub session: String,
- /// The history entry ID (for returning in search results).
- pub history_id: String,
+/// Parse a UUID string into a 16-byte array.
+/// Returns None if the string is not a valid UUID.
+fn parse_uuid_bytes(s: &str) -> Option<[u8; 16]> {
+ Uuid::parse_str(s).ok().map(|u| *u.as_bytes())
}
-impl From<&History> for Invocation {
- fn from(history: &History) -> Self {
- Self {
- timestamp: history.timestamp.unix_timestamp(),
- cwd: history.cwd.clone(),
- hostname: history.hostname.clone(),
- session: history.session.clone(),
- history_id: history.id.0.clone(),
- }
- }
+/// Format a 16-byte array as a UUID string.
+fn format_uuid_bytes(bytes: &[u8; 16]) -> String {
+ Uuid::from_bytes(*bytes).to_string()
}
/// Pre-computed frecency data for O(1) lookup.
@@ -103,90 +89,121 @@ impl FrecencyData {
}
}
-/// Data for a unique command, including all its invocations.
+/// Data for a unique command.
pub struct CommandData {
- /// The command text (stored for debugging/logging purposes).
- #[allow(dead_code)]
- pub command: String,
- /// All invocations of this command, sorted by timestamp (newest first).
- pub invocations: Vec<Invocation>,
+ /// History ID of the most recent invocation (16-byte UUID).
+ most_recent_id: [u8; 16],
+ /// Timestamp of the most recent invocation.
+ most_recent_timestamp: i64,
/// Pre-computed global frecency.
pub global_frecency: FrecencyData,
// Pre-computed indexes for O(1) filter lookups
- /// All directories where this command has been run.
- directories: DashSet<String>,
- /// All hostnames where this command has been run.
- hosts: DashSet<String>,
- /// All sessions where this command has been run.
- sessions: DashSet<String>,
+ // Using HashSet instead of DashSet since CommandData lives inside DashMap (already synchronized)
+ /// All directories where this command has been run (interned keys).
+ directories: HashSet<Spur>,
+ /// All hostnames where this command has been run (interned keys).
+ hosts: HashSet<Spur>,
+ /// All sessions where this command has been run (as 16-byte UUIDs).
+ sessions: HashSet<[u8; 16]>,
}
impl CommandData {
/// Create a new CommandData from a history entry.
- pub fn new(history: &History) -> Self {
- let mut data = Self {
- command: history.command.clone(),
- invocations: Vec::new(),
- global_frecency: FrecencyData::default(),
- directories: DashSet::new(),
- hosts: DashSet::new(),
- sessions: DashSet::new(),
- };
- data.add_invocation(history);
- data
+ /// Returns None if the history entry has invalid UUIDs.
+ pub fn new(history: &History, interner: &ThreadedRodeo) -> Option<Self> {
+ let history_id = parse_uuid_bytes(&history.id.0)?;
+ let session = parse_uuid_bytes(&history.session)?;
+ let timestamp = history.timestamp.unix_timestamp();
+
+ let dir_key = interner.get_or_intern(with_trailing_slash(&history.cwd));
+ let host_key = interner.get_or_intern(&history.hostname);
+
+ let mut directories = HashSet::new();
+ directories.insert(dir_key);
+
+ let mut hosts = HashSet::new();
+ hosts.insert(host_key);
+
+ let mut sessions = HashSet::new();
+ sessions.insert(session);
+
+ let mut global_frecency = FrecencyData::default();
+ global_frecency.record_use(timestamp);
+
+ Some(Self {
+ most_recent_id: history_id,
+ most_recent_timestamp: timestamp,
+ global_frecency,
+ directories,
+ hosts,
+ sessions,
+ })
}
/// Add an invocation from a history entry.
- pub fn add_invocation(&mut self, history: &History) {
+ /// Returns false if the history entry has invalid UUIDs.
+ pub fn add_invocation(&mut self, history: &History, interner: &ThreadedRodeo) -> bool {
+ let Some(history_id) = parse_uuid_bytes(&history.id.0) else {
+ return false;
+ };
+ let Some(session) = parse_uuid_bytes(&history.session) else {
+ return false;
+ };
+
let timestamp = history.timestamp.unix_timestamp();
// Update global frecency
self.global_frecency.record_use(timestamp);
// Update pre-computed indexes for O(1) filter lookups
- self.directories.insert(with_trailing_slash(&history.cwd));
- self.hosts.insert(history.hostname.clone());
- self.sessions.insert(history.session.clone());
+ let dir_key = interner.get_or_intern(with_trailing_slash(&history.cwd));
+ self.directories.insert(dir_key);
+ self.hosts.insert(interner.get_or_intern(&history.hostname));
+ self.sessions.insert(session);
- let invocation = Invocation::from(history);
+ // Update most recent if this invocation is newer
+ if timestamp > self.most_recent_timestamp {
+ self.most_recent_id = history_id;
+ self.most_recent_timestamp = timestamp;
+ }
- // Insert sorted by timestamp (newest first)
- let pos = self
- .invocations
- .iter()
- .position(|inv| inv.timestamp < timestamp)
- .unwrap_or(self.invocations.len());
- self.invocations.insert(pos, invocation);
+ true
}
/// Get the most recent history ID for this command.
- pub fn most_recent_id(&self) -> Option<&str> {
- self.invocations.first().map(|inv| inv.history_id.as_str())
+ pub fn most_recent_id(&self) -> String {
+ format_uuid_bytes(&self.most_recent_id)
}
/// Check if any invocation matches a directory filter (exact match).
/// O(1) lookup using pre-computed index.
- pub fn has_invocation_in_dir(&self, dir: &str) -> bool {
- self.directories.contains(dir)
+ pub fn has_invocation_in_dir(&self, dir: &str, interner: &ThreadedRodeo) -> bool {
+ interner
+ .get(dir)
+ .is_some_and(|spur| self.directories.contains(&spur))
}
/// Check if any invocation matches a directory prefix (workspace/git root).
/// O(n) where n = number of unique directories for this command.
- pub fn has_invocation_in_workspace(&self, prefix: &str) -> bool {
- self.directories.iter().any(|d| d.starts_with(prefix))
+ pub fn has_invocation_in_workspace(&self, prefix: &str, interner: &ThreadedRodeo) -> bool {
+ self.directories
+ .iter()
+ .any(|&spur| interner.resolve(&spur).starts_with(prefix))
}
/// Check if any invocation matches a hostname.
/// O(1) lookup using pre-computed index.
- pub fn has_invocation_on_host(&self, hostname: &str) -> bool {
- self.hosts.contains(hostname)
+ pub fn has_invocation_on_host(&self, hostname: &str, interner: &ThreadedRodeo) -> bool {
+ interner
+ .get(hostname)
+ .is_some_and(|spur| self.hosts.contains(&spur))
}
/// Check if any invocation matches a session.
/// O(1) lookup using pre-computed index.
pub fn has_invocation_in_session(&self, session: &str) -> bool {
- self.sessions.contains(session)
+ parse_uuid_bytes(session).is_some_and(|bytes| self.sessions.contains(&bytes))
}
}
@@ -214,6 +231,10 @@ pub struct QueryContext {
pub session_id: Option<String>,
}
+/// Shareable frecency map: command -> frecency score.
+/// Wrapped in Arc for zero-copy sharing with scorer callbacks.
+type FrecencyMap = Arc<HashMap<Arc<str>, u32>>;
+
/// A deduplicated search index with frecency-based ranking.
///
/// Commands are stored by their text, with metadata about all invocations.
@@ -225,14 +246,16 @@ pub struct QueryContext {
pub struct SearchIndex {
/// Map from command text to command data.
/// Using DashMap for concurrent read/write access, wrapped in Arc for sharing with scorer.
- commands: Arc<DashMap<String, CommandData>>,
+ /// Keys are Arc<str> to enable zero-copy sharing with frecency_map.
+ commands: Arc<DashMap<Arc<str>, CommandData>>,
/// Nucleo fuzzy matcher - items are command strings.
nucleo: RwLock<Nucleo<String>>,
/// Injector for adding new commands to Nucleo.
injector: Injector<String>,
- /// Precomputed global frecency map (command -> frecency score).
- /// Updated by background task. If None, search works without frecency.
- frecency_map: RwLock<Option<Arc<HashMap<String, u32>>>>,
+ /// Precomputed global frecency map. Updated by background task.
+ frecency_map: RwLock<Option<FrecencyMap>>,
+ /// String interner for deduplicating cwd, hostname, and directory paths.
+ interner: Arc<ThreadedRodeo>,
}
impl SearchIndex {
@@ -248,6 +271,7 @@ impl SearchIndex {
nucleo: RwLock::new(nucleo),
injector,
frecency_map: RwLock::new(None),
+ interner: Arc::new(ThreadedRodeo::new()),
}
}
@@ -256,16 +280,21 @@ impl SearchIndex {
/// If the command already exists, updates its invocation data.
/// If it's a new command, adds it to both the map and Nucleo.
pub fn add_history(&self, history: &History) {
- let command = &history.command;
+ let command = history.command.as_str();
+ // DashMap with Arc<str> keys can be looked up with &str via Borrow trait
if let Some(mut entry) = self.commands.get_mut(command) {
// Existing command - just update invocations
- entry.add_invocation(history);
+ entry.add_invocation(history, &self.interner);
} else {
- // New command - add to both map and Nucleo
- let data = CommandData::new(history);
- self.commands.insert(command.clone(), data);
- self.injector.push(command.clone(), |cmd, cols| {
+ // New command - create Arc<str> once and share it
+ let Some(data) = CommandData::new(history, &self.interner) else {
+ return; // Invalid UUIDs, skip this entry
+ };
+ let command_arc: Arc<str> = command.into();
+ self.commands.insert(Arc::clone(&command_arc), data);
+ // Nucleo still needs String (unavoidable copy for fuzzy matching)
+ self.injector.push(command_arc.to_string(), |cmd, cols| {
cols[0] = cmd.clone().into();
});
}
@@ -337,9 +366,10 @@ impl SearchIndex {
.matched_items(..matched_count)
.filter_map(|item| {
let cmd = item.data;
+ // DashMap<Arc<str>, _>::get accepts &str via Borrow trait
self.commands
- .get(cmd)
- .and_then(|data| data.most_recent_id().map(|s| s.to_string()))
+ .get(cmd.as_str())
+ .map(|data| data.most_recent_id())
})
.collect()
})
@@ -352,11 +382,12 @@ impl SearchIndex {
#[instrument(skip_all, level = tracing::Level::DEBUG, name = "rebuild_frecency")]
pub async fn rebuild_frecency(&self) {
let now = OffsetDateTime::now_utc().unix_timestamp();
- let mut frecency_map: HashMap<String, u32> = HashMap::new();
+ let mut frecency_map: HashMap<Arc<str>, u32> = HashMap::new();
for entry in self.commands.iter() {
let frecency = entry.global_frecency.compute(now);
- frecency_map.insert(entry.key().clone(), frecency);
+ // Arc::clone is cheap - just increments reference count
+ frecency_map.insert(Arc::clone(entry.key()), frecency);
}
*self.frecency_map.write().await = Some(Arc::new(frecency_map));
@@ -370,18 +401,26 @@ impl SearchIndex {
}
// Pre-compute which commands pass the filter
- let passing_commands: Arc<std::collections::HashSet<String>> = {
- let mut set = std::collections::HashSet::new();
+ // Use HashSet<String> for the short-lived filter (simpler than Arc lookup)
+ let passing_commands: Arc<HashSet<String>> = {
+ let mut set = HashSet::new();
for entry in self.commands.iter() {
let passes = match mode {
IndexFilterMode::Global => unreachable!(),
- IndexFilterMode::Directory(dir) => entry.has_invocation_in_dir(dir),
- IndexFilterMode::Workspace(prefix) => entry.has_invocation_in_workspace(prefix),
- IndexFilterMode::Host(hostname) => entry.has_invocation_on_host(hostname),
+ IndexFilterMode::Directory(dir) => {
+ entry.has_invocation_in_dir(dir, &self.interner)
+ }
+ IndexFilterMode::Workspace(prefix) => {
+ entry.has_invocation_in_workspace(prefix, &self.interner)
+ }
+ IndexFilterMode::Host(hostname) => {
+ entry.has_invocation_on_host(hostname, &self.interner)
+ }
IndexFilterMode::Session(session) => entry.has_invocation_in_session(session),
};
if passes {
- set.insert(entry.key().clone());
+ // Convert Arc<str> to String for filter lookup
+ set.insert(entry.key().to_string());
}
}
Arc::new(set)
@@ -393,12 +432,11 @@ impl SearchIndex {
/// Build scorer from precomputed frecency map.
///
/// Returns None if frecency map is not available (search still works, just without frecency ranking).
- fn build_scorer(
- frecency_map: Option<Arc<HashMap<String, u32>>>,
- ) -> Option<nucleo::Scorer<String>> {
+ fn build_scorer(frecency_map: Option<FrecencyMap>) -> Option<nucleo::Scorer<String>> {
let map = frecency_map?;
Some(Arc::new(move |cmd: &String, fuzzy_score: u32| {
- let frecency = map.get(cmd).copied().unwrap_or(0);
+ // HashMap<Arc<str>, _>::get accepts &str via Borrow trait
+ let frecency = map.get(cmd.as_str()).copied().unwrap_or(0);
fuzzy_score + frecency
}))
}
@@ -453,6 +491,8 @@ mod tests {
#[test]
fn command_data_add_invocation() {
+ let interner = ThreadedRodeo::new();
+
let (dir1, dir2) = if cfg!(windows) {
("C:\\Users\\User\\project", "C:\\Users\\User\\other")
} else {
@@ -462,21 +502,22 @@ mod tests {
let history1 = make_history("git status", dir1, datetime!(2024-01-01 10:00 UTC));
let history2 = make_history("git status", dir2, datetime!(2024-01-01 12:00 UTC));
- let mut data = CommandData::new(&history1);
- assert_eq!(data.invocations.len(), 1);
+ let mut data = CommandData::new(&history1, &interner).unwrap();
assert_eq!(data.global_frecency.count, 1);
+ let id1 = data.most_recent_id();
- data.add_invocation(&history2);
- assert_eq!(data.invocations.len(), 2);
+ data.add_invocation(&history2, &interner);
assert_eq!(data.global_frecency.count, 2);
- // Most recent should be first
- assert_eq!(data.invocations[0].cwd, dir2);
- assert_eq!(data.invocations[1].cwd, dir1);
+ // Most recent ID should update to history2 (newer timestamp)
+ let id2 = data.most_recent_id();
+ assert_ne!(id1, id2);
}
#[test]
fn command_data_filters() {
+ let interner = ThreadedRodeo::new();
+
let (dir1, dir2) = if cfg!(windows) {
("C:\\Users\\User\\project", "C:\\Users\\User\\other")
} else {
@@ -486,8 +527,8 @@ mod tests {
let h1 = make_history("git status", dir1, datetime!(2024-01-01 10:00 UTC));
let h2 = make_history("git status", dir2, datetime!(2024-01-01 12:00 UTC));
- let mut data = CommandData::new(&h1);
- data.add_invocation(&h2);
+ let mut data = CommandData::new(&h1, &interner).unwrap();
+ data.add_invocation(&h2, &interner);
let (check1, check2, check3) = if cfg!(windows) {
(
@@ -503,9 +544,9 @@ mod tests {
)
};
- assert!(data.has_invocation_in_dir(&check1));
- assert!(data.has_invocation_in_dir(&check2));
- assert!(!data.has_invocation_in_dir(&check3));
+ assert!(data.has_invocation_in_dir(&check1, &interner));
+ assert!(data.has_invocation_in_dir(&check2, &interner));
+ assert!(!data.has_invocation_in_dir(&check3, &interner));
let (check1, check2, check3) = if cfg!(windows) {
(
@@ -521,9 +562,9 @@ mod tests {
)
};
- assert!(data.has_invocation_in_workspace(&check1));
- assert!(data.has_invocation_in_workspace(&check2));
- assert!(!data.has_invocation_in_workspace(&check3));
+ assert!(data.has_invocation_in_workspace(&check1, &interner));
+ assert!(data.has_invocation_in_workspace(&check2, &interner));
+ assert!(!data.has_invocation_in_workspace(&check3, &interner));
}
#[tokio::test]