aboutsummaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorMichelle Tilley <michelle@michelletilley.net>2026-02-26 14:42:08 -0800
committerGitHub <noreply@github.com>2026-02-26 14:42:08 -0800
commit3ba47446f06d5b0fbeaeb59d4ffed768b70729d8 (patch)
tree28348bb3d18e9983e9212c26840f691766cad985 /scripts
parentfeat: Add history author/intent metadata and v1 record version (#3205) (diff)
downloadatuin-3ba47446f06d5b0fbeaeb59d4ffed768b70729d8.zip
feat: In-memory search index with atuin daemon (#3201)
## Summary This PR adds a persistent, in-memory search index to the Atuin daemon, enabling fast fuzzy search without the startup delay of building an index each time the TUI opens. ### Key Changes - **Daemon search service**: A new gRPC service that maintains a Nucleo fuzzy search index in memory - **Real-time index updates**: The daemon listens for history events (new commands, synced records) and updates the index immediately - **Filter mode support**: All existing filter modes work (Global, Host, Session, Directory, Workspace) - **New search engine**: `daemon-fuzzy` search mode that queries the daemon instead of building a local index - **Paged history loading**: Database pagination support for efficient initial index loading - **Configurable logging**: New `[logs]` settings section for daemon and search log configuration - **Component-based daemon architecture**: Refactored daemon internals into a modular, event-driven system - **Fallback to DB search for regex**: Since Nucleo doesn't support regex matching ## Daemon Architecture The daemon has been refactored to use a component-based, event-driven architecture that makes it easier to add new functionality and reason about the system. ### Core Concepts ``` ┌─────────────────────────────────────────────────────────────────────────────┐ │ Atuin Daemon │ │ │ │ ┌─────────────┐ ┌──────────────────────────────────────────────────┐ │ │ │ Daemon │ │ Components │ │ │ │ Handle │────▶│ │ │ │ │ │ │ ┌─────────────┐ ┌─────────────┐ ┌────────────┐ │ │ │ │ • emit() │ │ │ History │ │ Search │ │ Sync │ │ │ │ │ • subscribe │ │ │ Component │ │ Component │ │ Component │ │ │ │ │ • settings │ │ │ │ │ │ │ │ │ │ │ │ • databases │ │ │ gRPC service│ │ gRPC service│ │ background │ │ │ │ └─────────────┘ │ │ WIP history │ │ Nucleo index│ │ sync │ │ │ │ │ │ └─────────────┘ └─────────────┘ └────────────┘ │ │ │ │ └──────────────────────────────────────────────────┘ │ │ │ ▲ │ │ ▼ │ │ │ ┌─────────────────────────────────────┴────────────────────────────────┐ │ │ │ Event Bus (broadcast) │ │ │ │ │ │ │ │ HistoryStarted │ HistoryEnded │ RecordsAdded │ SyncCompleted │ ... │ │ │ └──────────────────────────────────────────────────────────────────────┘ │ │ ▲ │ │ ┌───────────────────────────────────┴──────────────────────────────────┐ │ │ │ Control Service (gRPC) │ │ │ │ External event injection from CLI commands │ │ │ └──────────────────────────────────────────────────────────────────────┘ │ └─────────────────────────────────────────────────────────────────────────────┘ ``` ### DaemonHandle A lightweight, cloneable handle that provides access to shared daemon resources: - **Event emission**: `handle.emit(DaemonEvent::...)` broadcasts to all components - **Event subscription**: `handle.subscribe()` returns a receiver for the event bus - **Settings**: `handle.settings()` for configuration access - **Databases**: `handle.history_db()` and `handle.store()` for data access ### Component Trait Components implement a simple lifecycle: ```rust #[async_trait] trait Component: Send + Sync { fn name(&self) -> &'static str; async fn start(&mut self, handle: DaemonHandle) -> Result<()>; async fn handle_event(&mut self, event: &DaemonEvent) -> Result<()>; async fn stop(&mut self) -> Result<()>; } ``` ### Event-Driven Design Components communicate via events rather than direct coupling: | Event | Emitted By | Consumed By | |-------|-----------|-------------| | `HistoryStarted` | History gRPC | Search (logging) | | `HistoryEnded` | History gRPC | Search (index update) | | `RecordsAdded` | Sync | Search (index update) | | `HistoryPruned` | CLI (via Control) | Search (index rebuild) | | `HistoryDeleted` | CLI (via Control) | Search (index rebuild) | | `ForceSync` | CLI (via Control) | Sync | | `ShutdownRequested` | Signal handler | All (graceful shutdown) | ### External Event Injection CLI commands can inject events into a running daemon: ```rust // After `atuin history prune` emit_event(DaemonEvent::HistoryPruned).await?; // After deleting specific items emit_event(DaemonEvent::HistoryDeleted { ids }).await?; // Request immediate sync emit_event(DaemonEvent::ForceSync).await?; ``` This ensures the daemon's search index stays in sync with database changes made by CLI commands. ## Search Architecture The search service uses a [forked version of Nucleo](https://github.com/atuinsh/nucleo-ext) that adds filter and scorer callbacks, enabling efficient filtering and frecency-based ranking. ``` ┌────────────────────────────────────────────────────────────────┐ │ Atuin Daemon │ │ │ │ ┌─────────────────┐ ┌──────────────────────────────────┐ │ │ │ Event System │───▶│ Search Component │ │ │ │ │ │ │ │ │ │ • RecordsAdded │ │ ┌────────────────────────────┐ │ │ │ │ • HistoryEnded │ │ │ Deduplicated Index │ │ │ │ │ • HistoryPruned │ │ │ │ │ │ │ └─────────────────┘ │ │ CommandData per command: │ │ │ │ │ │ • Global frecency │ │ │ │ ┌─────────────────┐ │ │ • Filter indexes (sets) │ │ │ │ │ Background Task │ │ │ • Invocation history │ │ │ │ │ │ │ └────────────────────────────┘ │ │ │ │ Rebuilds │ │ │ │ │ │ │ frecency map │ │ ▼ │ │ │ │ every 60s │───▶│ ┌────────────────────────────┐ │ │ │ └─────────────────┘ │ │ Nucleo (forked) │ │ │ │ │ │ │ │ │ │ │ │ • Filter callback │ │ │ │ │ │ • Scorer callback │ │ │ │ │ │ • Fuzzy matching │ │ │ │ │ └────────────────────────────┘ │ │ │ └──────────────────────────────────┘ │ │ │ │ │ │ gRPC (Unix socket) │ └──────────────────────────────────────│─────────────────────────┘ │ ▼ ┌─────────────────────────────────────────────────────────────────┐ │ Search TUI (Client) │ │ │ │ 1. Send query + filter mode + context to daemon │ │ 2. Receive matching history IDs (ranked by frecency) │ │ 3. Hydrate full records from local SQLite database │ │ 4. Display results in TUI │ └─────────────────────────────────────────────────────────────────┘ ``` ### Nucleo Fork The [nucleo-ext fork](https://github.com/atuinsh/nucleo-ext) adds two key features to Nucleo: 1. **Filter callback**: Pre-filter items before fuzzy matching (used for directory/host/session filtering) 2. **Scorer callback**: Compute custom scores after matching (used for frecency ranking) ```rust // Filter: only include commands run in current directory nucleo.set_filter(Some(Arc::new(|cmd: &String| { passing_commands.contains(cmd) }))); // Scorer: combine fuzzy score with frecency nucleo.set_scorer(Some(Arc::new(|cmd: &String, fuzzy_score: u32| { let frecency = frecency_map.get(cmd).unwrap_or(0); fuzzy_score + (frecency * 10) }))); ``` ### Deduplicated Index Commands are stored once per unique command text, with metadata tracking all invocations: ```rust struct CommandData { command: String, invocations: Vec<Invocation>, // All times this command was run global_frecency: FrecencyData, // Precomputed frecency score // O(1) filter indexes directories: HashSet<String>, // All cwds where command was run hosts: HashSet<String>, // All hostnames sessions: HashSet<String>, // All session IDs } ``` This deduplication means: - **Fewer items to match**: ~13K unique commands vs ~62K history entries - **O(1) filter checks**: HashSet lookups instead of scanning invocations - **Single frecency score**: Global frecency computed once, used for all filter modes ### Frecency Scoring Frecency (frequency + recency) scoring prioritizes recently and frequently used commands: ```rust fn compute_frecency(count: u32, last_used: i64, now: i64) -> u32 { let age_hours = (now - last_used) / 3600; // Recency: decays over time (half-life ~24 hours) let recency = (100.0 * (-age_hours as f64 / 24.0).exp()) as u32; // Frequency: logarithmic scaling let frequency = (count.ln() * 20.0).min(100.0) as u32; recency + frequency } ``` The frecency map is: - **Precomputed by background task** every 60 seconds - **Never computed inline** during search (no latency impact) - **Graceful fallback**: If unavailable, search works without frecency ranking ### Filter Mode Implementation | Filter Mode | Implementation | |-------------|----------------| | Global | No filter (all commands) | | Directory | `command.directories.contains(cwd)` | | Workspace | `command.directories.any(\|d\| d.starts_with(git_root))` | | Host | `command.hosts.contains(hostname)` | | Session | `command.sessions.contains(session_id)` | Filters are pre-computed into a HashSet before the search, making the filter callback O(1). ### Search Flow 1. **Daemon startup**: Loads history from SQLite in pages, builds deduplicated index 2. **Frecency precompute**: Background task builds frecency map after history loads 3. **Search request**: Client sends query with filter mode and context 4. **Filter**: Pre-computed HashSet determines which commands pass the filter 5. **Match**: Nucleo fuzzy matches the query against command text 6. **Score**: Frecency scorer ranks results (fuzzy score + frecency * 10) 7. **Response**: Returns history IDs for the most recent invocation of each matching command 8. **Hydration**: Client fetches full records from local SQLite ### Configuration ```toml # Enable daemon + autostart [daemon] enabled = true autostart = true # Enable daemon-based fuzzy search [search] search_mode = "daemon-fuzzy" ``` ## Performance Performance varies based on several factors, but in most initial testing with the new architecture shows improvement: * **Nucleo performs searches up to 4.5x faster**: direct DB search averages 18.07ms, but the daemon completes the same queries in 3.99ms. * **IPC overhead is significant, but acceptable**: a significant amount of wall-time is taken up by the transfer of data over IPC (via UDS in this case). This averages to about ~7.8ms and accounts for 66% of client-side wall time. * **Tail latency improves at every layer**: p99 times correspond to initial requests, worst-case query patterns, etc. but the average p99 daemon-based response time is 3.6x better than the associated DB-based search p99 time * **Query complexity no longer impacts performance**: the Nucleo-based search shows consistent 2-7ms times regardless of query pattern. The DB-based search had a 17x variance (3.59ms to 62.46ms). Interestingly, @ellie - who has a larger history store than I do - gets even better performance on the IPC layer. This could use a lot more testing in various edge cases and on various hardware, but seems promising. ### Regular DB search ``` Individual calls for: db_search -------------------------------------------------------------------------------------------------------------- # Wall Busy Idle Fields -------------------------------------------------------------------------------------------------------------- 1 32.25ms 32.20ms 47.70µs {"mode":"Fuzzy","query":"^"} 2 19.48ms 19.40ms 84.20µs {"mode":"Fuzzy","query":"^c"} 3 20.40ms 20.10ms 297.00µs {"mode":"Fuzzy","query":"^ca"} 4 13.07ms 13.00ms 69.90µs {"mode":"Fuzzy","query":"^car"} 5 12.17ms 12.10ms 67.10µs {"mode":"Fuzzy","query":"^carg"} 6 20.78ms 20.70ms 76.60µs {"mode":"Fuzzy","query":"^cargo"} 7 9.15ms 9.10ms 53.20µs {"mode":"Fuzzy","query":"^cargo "} 8 10.24ms 10.00ms 237.00µs {"mode":"Fuzzy","query":"^cargo b"} 9 10.01ms 9.68ms 325.00µs {"mode":"Fuzzy","query":"^cargo bu"} 10 5.89ms 5.83ms 57.20µs {"mode":"Fuzzy","query":"^cargo bui"} 11 8.85ms 8.28ms 568.00µs {"mode":"Fuzzy","query":"^cargo buil"} 12 7.70ms 7.49ms 212.00µs {"mode":"Fuzzy","query":"^cargo build"} 13 3.59ms 3.53ms 57.00µs {"mode":"Fuzzy","query":"^cargo build$"} 14 6.50ms 6.44ms 63.60µs {"mode":"Fuzzy","query":"^cargo "} 15 6.48ms 6.38ms 100.00µs {"mode":"Fuzzy","query":"!"} 16 31.68ms 31.60ms 75.90µs {"mode":"Fuzzy","query":"!g"} 17 62.46ms 62.40ms 58.90µs {"mode":"Fuzzy","query":"!gi"} 18 30.35ms 30.30ms 46.90µs {"mode":"Fuzzy","query":"!git"} 19 53.84ms 53.80ms 40.80µs {"mode":"Fuzzy","query":"!git "} 20 19.24ms 19.20ms 39.70µs {"mode":"Fuzzy","query":"!git c"} 21 22.03ms 22.00ms 34.70µs {"mode":"Fuzzy","query":"!git co"} 22 17.13ms 17.00ms 133.00µs {"mode":"Fuzzy","query":"!git com"} 23 16.14ms 15.90ms 242.00µs {"mode":"Fuzzy","query":"!git comm"} 24 5.11ms 5.08ms 28.60µs {"mode":"Fuzzy","query":"!git commi"} 25 7.31ms 7.26ms 52.70µs {"mode":"Fuzzy","query":"!git commit"} Summary: 25 calls Wall: avg=18.07ms, min=3.59ms, max=62.46ms, p50=13.07ms, p99=62.46ms Busy: avg=17.95ms, min=3.53ms, max=62.40ms, p50=13.00ms, p99=62.40ms ``` ### Daemon-based search **Client** ``` Individual calls for: daemon_search -------------------------------------------------------------------------------------------------------------- # Wall Busy Idle Fields -------------------------------------------------------------------------------------------------------------- 1 13.05ms 2.55ms 10.50ms {"query":"^"} 2 10.65ms 1.40ms 9.25ms {"query":"^c"} 3 10.72ms 1.18ms 9.54ms {"query":"^ca"} 4 5.54ms 485.00µs 5.06ms {"query":"^car"} 5 15.02ms 1.02ms 14.00ms {"query":"^carg"} 6 9.49ms 840.00µs 8.65ms {"query":"^cargo"} 7 5.53ms 555.00µs 4.97ms {"query":"^cargo "} 8 8.56ms 717.00µs 7.84ms {"query":"^cargo b"} 9 12.34ms 1.24ms 11.10ms {"query":"^cargo bu"} 10 8.38ms 650.00µs 7.73ms {"query":"^cargo bui"} 11 13.07ms 770.00µs 12.30ms {"query":"^cargo buil"} 12 17.11ms 709.00µs 16.40ms {"query":"^cargo build"} 13 15.41ms 907.00µs 14.50ms {"query":"^cargo build$"} 14 8.19ms 665.00µs 7.52ms {"query":"^cargo "} 15 7.98ms 1.72ms 6.26ms {"query":"!"} 16 13.56ms 856.00µs 12.70ms {"query":"!g"} 17 8.11ms 624.00µs 7.49ms {"query":"!gi"} 18 14.57ms 775.00µs 13.80ms {"query":"!git"} 19 14.18ms 779.00µs 13.40ms {"query":"!git "} 20 9.62ms 802.00µs 8.82ms {"query":"!git c"} 21 15.50ms 1.50ms 14.00ms {"query":"!git co"} 22 11.58ms 1.48ms 10.10ms {"query":"!git com"} 23 13.82ms 2.12ms 11.70ms {"query":"!git comm"} 24 17.48ms 2.18ms 15.30ms {"query":"!git commi"} 25 14.81ms 1.71ms 13.10ms {"query":"!git commit"} Summary: 25 calls Wall: avg=11.77ms, min=5.53ms, max=17.48ms, p50=12.34ms, p99=17.48ms Busy: avg=1.13ms, min=485.00µs, max=2.55ms, p50=856.00µs, p99=2.55ms ``` **Daemon** ``` Individual calls for: daemon_search_query -------------------------------------------------------------------------------------------------------------- # Wall Busy Idle Fields -------------------------------------------------------------------------------------------------------------- 1 1.75ms 250ns 1.75ms {"query":"^","query_id":1} 2 4.58ms 125ns 4.58ms {"query":"^c","query_id":2} 3 4.39ms 250ns 4.39ms {"query":"^ca","query_id":3} 4 2.52ms 125ns 2.52ms {"query":"^car","query_id":4} 5 4.44ms 250ns 4.44ms {"query":"^carg","query_id":5} 6 3.66ms 167ns 3.66ms {"query":"^cargo","query_id":6} 7 2.38ms 84ns 2.38ms {"query":"^cargo ","query_id":7} 8 4.13ms 84ns 4.13ms {"query":"^cargo b","query_id":8} 9 4.40ms 167ns 4.40ms {"query":"^cargo bu","query_id":9} 10 3.87ms 125ns 3.87ms {"query":"^cargo bui","query_id":10} 11 4.36ms 84ns 4.36ms {"query":"^cargo buil","query_id":11} 12 3.96ms 333ns 3.96ms {"query":"^cargo build","query_id":12} 13 4.61ms 167ns 4.61ms {"query":"^cargo build$","query_id":13} 14 4.20ms 209ns 4.20ms {"query":"^cargo ","query_id":14} 15 238.17µs 167ns 238.00µs {"query":"!","query_id":15} 16 4.44ms 125ns 4.44ms {"query":"!g","query_id":16} 17 3.47ms 83ns 3.47ms {"query":"!gi","query_id":17} 18 4.57ms 125ns 4.57ms {"query":"!git","query_id":18} 19 7.15ms 167ns 7.15ms {"query":"!git ","query_id":19} 20 4.27ms 250ns 4.27ms {"query":"!git c","query_id":20} 21 5.19ms 292ns 5.19ms {"query":"!git co","query_id":21} 22 4.29ms 417ns 4.29ms {"query":"!git com","query_id":22} 23 4.08ms 125ns 4.08ms {"query":"!git comm","query_id":23} 24 4.50ms 167ns 4.50ms {"query":"!git commi","query_id":24} 25 4.35ms 208ns 4.35ms {"query":"!git commit","query_id":25} Summary: 25 calls Wall: avg=3.99ms, min=238.17µs, max=7.15ms, p50=4.29ms, p99=7.15ms Busy: avg=182ns, min=83ns, max=417ns, p50=167ns, p99=417ns ``` **Nucleo matching time (in daemon)** ``` Individual calls for: nucleo_match -------------------------------------------------------------------------------------------------------------- # Wall Busy Idle Fields -------------------------------------------------------------------------------------------------------------- 1 1.73ms 125ns 1.73ms {"query":"^","query_id":1} 2 4.57ms 167ns 4.57ms {"query":"^c","query_id":2} 3 4.37ms 125ns 4.37ms {"query":"^ca","query_id":3} 4 2.51ms 84ns 2.51ms {"query":"^car","query_id":4} 5 4.43ms 125ns 4.43ms {"query":"^carg","query_id":5} 6 3.64ms 125ns 3.64ms {"query":"^cargo","query_id":6} 7 2.37ms 84ns 2.37ms {"query":"^cargo ","query_id":7} 8 4.11ms 125ns 4.11ms {"query":"^cargo b","query_id":8} 9 4.36ms 208ns 4.36ms {"query":"^cargo bu","query_id":9} 10 3.85ms 125ns 3.85ms {"query":"^cargo bui","query_id":10} 11 4.35ms 125ns 4.35ms {"query":"^cargo buil","query_id":11} 12 3.94ms 250ns 3.94ms {"query":"^cargo build","query_id":12} 13 4.59ms 125ns 4.59ms {"query":"^cargo build$","query_id":13} 14 4.18ms 84ns 4.18ms {"query":"^cargo ","query_id":14} 15 220.13µs 125ns 220.00µs {"query":"!","query_id":15} 16 4.43ms 125ns 4.43ms {"query":"!g","query_id":16} 17 3.45ms 125ns 3.45ms {"query":"!gi","query_id":17} 18 4.55ms 125ns 4.55ms {"query":"!git","query_id":18} 19 7.12ms 209ns 7.12ms {"query":"!git ","query_id":19} 20 4.25ms 166ns 4.25ms {"query":"!git c","query_id":20} 21 5.18ms 125ns 5.18ms {"query":"!git co","query_id":21} 22 4.27ms 125ns 4.27ms {"query":"!git com","query_id":22} 23 4.06ms 292ns 4.06ms {"query":"!git comm","query_id":23} 24 4.46ms 166ns 4.46ms {"query":"!git commi","query_id":24} 25 4.31ms 208ns 4.31ms {"query":"!git commit","query_id":25} Summary: 25 calls Wall: avg=3.97ms, min=220.13µs, max=7.12ms, p50=4.27ms, p99=7.12ms Busy: avg=147ns, min=84ns, max=292ns, p50=125ns, p99=292ns ```
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/span-table.ts420
1 files changed, 420 insertions, 0 deletions
diff --git a/scripts/span-table.ts b/scripts/span-table.ts
new file mode 100755
index 00000000..3656f129
--- /dev/null
+++ b/scripts/span-table.ts
@@ -0,0 +1,420 @@
+#!/usr/bin/env bun
+/**
+ * Analyze span timing JSON logs generated with ATUIN_SPAN
+ *
+ * Usage: bun scripts/span-table.ts <file.json> [options]
+ * --filter <pattern> Only show spans matching pattern (regex)
+ * --sort <field> Sort by: calls, avg, total, p99 (default: total)
+ * --top <n> Show top N spans (default: 20)
+ * --detail <span> Show individual calls for a specific span
+ * --all Include internal/library spans
+ */
+
+import { readFileSync } from "fs";
+
+interface SpanEvent {
+ timestamp: string;
+ level: string;
+ fields: {
+ message: string;
+ "time.busy"?: string;
+ "time.idle"?: string;
+ };
+ target: string;
+ span?: {
+ name: string;
+ [key: string]: unknown;
+ };
+ spans?: Array<{ name: string; [key: string]: unknown }>;
+}
+
+interface SpanStats {
+ name: string;
+ calls: number;
+ busyTimes: number[]; // in microseconds
+ idleTimes: number[];
+ parentCounts: Map<string, number>; // parent span name -> count
+}
+
+// Parse duration strings like "1.23ms", "456µs", "789ns" to microseconds
+function parseDuration(duration: string): number {
+ const match = duration.match(/^([\d.]+)(ns|µs|us|ms|s)$/);
+ if (!match) return 0;
+
+ const value = parseFloat(match[1]);
+ const unit = match[2];
+
+ switch (unit) {
+ case "ns":
+ return value / 1000;
+ case "µs":
+ case "us":
+ return value;
+ case "ms":
+ return value * 1000;
+ case "s":
+ return value * 1_000_000;
+ default:
+ return 0;
+ }
+}
+
+// Format microseconds for display
+function formatDuration(us: number): string {
+ if (us < 1) {
+ return `${(us * 1000).toFixed(0)}ns`;
+ } else if (us < 1000) {
+ return `${us.toFixed(2)}µs`;
+ } else if (us < 1_000_000) {
+ return `${(us / 1000).toFixed(2)}ms`;
+ } else {
+ return `${(us / 1_000_000).toFixed(2)}s`;
+ }
+}
+
+function percentile(arr: number[], p: number): number {
+ if (arr.length === 0) return 0;
+ const sorted = [...arr].sort((a, b) => a - b);
+ const idx = Math.floor(sorted.length * p);
+ return sorted[Math.min(idx, sorted.length - 1)];
+}
+
+function parseJsonLines(content: string): SpanEvent[] {
+ const events: SpanEvent[] = [];
+ for (const line of content.trim().split("\n")) {
+ if (!line.trim()) continue;
+ try {
+ events.push(JSON.parse(line));
+ } catch {
+ // Skip malformed lines
+ }
+ }
+ return events;
+}
+
+function main() {
+ const args = process.argv.slice(2);
+
+ // Parse arguments
+ let filterPattern: RegExp | null = null;
+ let sortField = "total";
+ let topN = 20;
+ let detailSpan: string | null = null;
+ let showAll = false;
+ const files: string[] = [];
+
+ for (let i = 0; i < args.length; i++) {
+ if (args[i] === "--filter" && args[i + 1]) {
+ filterPattern = new RegExp(args[++i]);
+ } else if (args[i] === "--sort" && args[i + 1]) {
+ sortField = args[++i];
+ } else if (args[i] === "--top" && args[i + 1]) {
+ topN = parseInt(args[++i], 10);
+ } else if (args[i] === "--detail" && args[i + 1]) {
+ detailSpan = args[++i];
+ } else if (args[i] === "--all") {
+ showAll = true;
+ } else if (!args[i].startsWith("-")) {
+ files.push(args[i]);
+ }
+ }
+
+ if (files.length === 0) {
+ console.error("Usage: bun scripts/span-table.ts <file.json> [options]");
+ console.error(" --filter <pattern> Only show spans matching pattern (regex)");
+ console.error(" --sort <field> Sort by: calls, avg, total, p99 (default: total)");
+ console.error(" --top <n> Show top N spans (default: 20)");
+ console.error(" --detail <span> Show individual calls for a specific span");
+ console.error(" --all Include internal/library spans");
+ process.exit(1);
+ }
+
+ // Parse all files
+ const allEvents: SpanEvent[] = [];
+ for (const file of files) {
+ const content = readFileSync(file, "utf-8");
+ for (const event of parseJsonLines(content)) {
+ allEvents.push(event);
+ }
+ }
+
+ // Filter to close events and aggregate by span name
+ const spans = new Map<string, SpanStats>();
+
+ for (const event of allEvents) {
+ if (event.fields?.message !== "close") continue;
+ if (!event.span?.name) continue;
+ if (!event.fields["time.busy"]) continue;
+
+ const name = event.span.name;
+
+ // Apply filter if specified
+ if (filterPattern && !filterPattern.test(name)) continue;
+
+ // Skip noisy internal spans unless explicitly requested
+ if (
+ !showAll &&
+ !filterPattern &&
+ !detailSpan &&
+ (name.startsWith("FramedRead::") ||
+ name.startsWith("FramedWrite::") ||
+ name.startsWith("Prioritize::") ||
+ name === "poll" ||
+ name === "poll_ready" ||
+ name === "Connection" ||
+ name.startsWith("assign_") ||
+ name.startsWith("reserve_") ||
+ name.startsWith("try_") ||
+ name.startsWith("send_") ||
+ name.startsWith("pop_"))
+ ) {
+ continue;
+ }
+
+ if (!spans.has(name)) {
+ spans.set(name, { name, calls: 0, busyTimes: [], idleTimes: [], parentCounts: new Map() });
+ }
+
+ const stats = spans.get(name)!;
+ stats.calls++;
+ stats.busyTimes.push(parseDuration(event.fields["time.busy"]));
+ if (event.fields["time.idle"]) {
+ stats.idleTimes.push(parseDuration(event.fields["time.idle"]));
+ }
+
+ // Track parent relationship (immediate parent is the last element in spans array)
+ const parents = event.spans || [];
+ const parentName = parents.length > 0 ? parents[parents.length - 1].name : "__root__";
+ stats.parentCounts.set(parentName, (stats.parentCounts.get(parentName) || 0) + 1);
+ }
+
+ if (spans.size === 0) {
+ console.error("No matching span close events found");
+ process.exit(1);
+ }
+
+ // Detail mode: show individual calls for a specific span
+ if (detailSpan) {
+ const detailEvents: Array<{
+ timestamp: string;
+ busy: number;
+ idle: number;
+ fields: Record<string, unknown>;
+ parents: string[];
+ }> = [];
+
+ for (const event of allEvents) {
+ if (event.fields?.message !== "close") continue;
+ if (event.span?.name !== detailSpan) continue;
+ if (!event.fields["time.busy"]) continue;
+
+ // Extract span fields (excluding name)
+ const fields: Record<string, unknown> = {};
+ if (event.span) {
+ for (const [k, v] of Object.entries(event.span)) {
+ if (k !== "name") fields[k] = v;
+ }
+ }
+
+ // Get parent span names
+ const parents = (event.spans || []).map((s) => s.name);
+
+ detailEvents.push({
+ timestamp: event.timestamp,
+ busy: parseDuration(event.fields["time.busy"]),
+ idle: event.fields["time.idle"] ? parseDuration(event.fields["time.idle"]) : 0,
+ fields,
+ parents,
+ });
+ }
+
+ if (detailEvents.length === 0) {
+ console.error(`No events found for span "${detailSpan}"`);
+ process.exit(1);
+ }
+
+ console.log("");
+ console.log(`Individual calls for: ${detailSpan}`);
+ console.log("-".repeat(110));
+ console.log(
+ "#".padStart(4) +
+ "Wall".padStart(12) +
+ "Busy".padStart(12) +
+ "Idle".padStart(12) +
+ " Fields"
+ );
+ console.log("-".repeat(110));
+
+ detailEvents.forEach((e, i) => {
+ const fieldsStr = Object.keys(e.fields).length > 0
+ ? JSON.stringify(e.fields)
+ : "";
+
+ console.log(
+ (i + 1).toString().padStart(4) +
+ formatDuration(e.busy + e.idle).padStart(12) +
+ formatDuration(e.busy).padStart(12) +
+ formatDuration(e.idle).padStart(12) +
+ " " +
+ fieldsStr
+ );
+ });
+
+ // Summary stats
+ const busyTimes = detailEvents.map((e) => e.busy);
+ const wallTimes = detailEvents.map((e) => e.busy + e.idle);
+ console.log("");
+ console.log(
+ `Summary: ${detailEvents.length} calls\n` +
+ ` Wall: avg=${formatDuration(wallTimes.reduce((a, b) => a + b, 0) / wallTimes.length)}, ` +
+ `min=${formatDuration(Math.min(...wallTimes))}, ` +
+ `max=${formatDuration(Math.max(...wallTimes))}, ` +
+ `p50=${formatDuration(percentile(wallTimes, 0.5))}, ` +
+ `p99=${formatDuration(percentile(wallTimes, 0.99))}\n` +
+ ` Busy: avg=${formatDuration(busyTimes.reduce((a, b) => a + b, 0) / busyTimes.length)}, ` +
+ `min=${formatDuration(Math.min(...busyTimes))}, ` +
+ `max=${formatDuration(Math.max(...busyTimes))}, ` +
+ `p50=${formatDuration(percentile(busyTimes, 0.5))}, ` +
+ `p99=${formatDuration(percentile(busyTimes, 0.99))}`
+ );
+ return;
+ }
+
+ // Calculate stats
+ const results = [...spans.values()].map((s) => {
+ // Calculate wall times (busy + idle) for each call
+ const wallTimes = s.busyTimes.map((busy, i) => busy + (s.idleTimes[i] || 0));
+
+ // Find most common parent
+ let mostCommonParent = "__root__";
+ let maxCount = 0;
+ for (const [parent, count] of s.parentCounts) {
+ if (count > maxCount) {
+ maxCount = count;
+ mostCommonParent = parent;
+ }
+ }
+
+ return {
+ name: s.name,
+ calls: s.calls,
+ total: s.busyTimes.reduce((a, b) => a + b, 0),
+ avg: s.busyTimes.reduce((a, b) => a + b, 0) / s.calls,
+ min: Math.min(...s.busyTimes),
+ max: Math.max(...s.busyTimes),
+ p50: percentile(s.busyTimes, 0.5),
+ p99: percentile(s.busyTimes, 0.99),
+ avgWall: wallTimes.reduce((a, b) => a + b, 0) / s.calls,
+ p50Wall: percentile(wallTimes, 0.5),
+ p99Wall: percentile(wallTimes, 0.99),
+ parent: mostCommonParent,
+ };
+ });
+
+ // Build tree structure
+ const childrenOf = new Map<string, string[]>();
+ childrenOf.set("__root__", []);
+ for (const r of results) {
+ if (!childrenOf.has(r.name)) {
+ childrenOf.set(r.name, []);
+ }
+ if (!childrenOf.has(r.parent)) {
+ childrenOf.set(r.parent, []);
+ }
+ childrenOf.get(r.parent)!.push(r.name);
+ }
+
+ // Sort children by the specified field
+ const resultMap = new Map(results.map(r => [r.name, r]));
+ const sortChildren = (children: string[]) => {
+ children.sort((a, b) => {
+ const ra = resultMap.get(a);
+ const rb = resultMap.get(b);
+ if (!ra || !rb) return 0;
+ switch (sortField) {
+ case "calls":
+ return rb.calls - ra.calls;
+ case "avg":
+ return rb.avg - ra.avg;
+ case "p99":
+ return rb.p99 - ra.p99;
+ case "total":
+ default:
+ return rb.total - ra.total;
+ }
+ });
+ };
+
+ // Traverse tree to build ordered display list with depths
+ const displayResults: Array<{ result: typeof results[0]; depth: number }> = [];
+ const visited = new Set<string>();
+
+ function traverse(name: string, depth: number) {
+ if (visited.has(name)) return;
+ visited.add(name);
+
+ const result = resultMap.get(name);
+ if (result) {
+ displayResults.push({ result, depth });
+ }
+
+ const children = childrenOf.get(name) || [];
+ sortChildren(children);
+ for (const child of children) {
+ traverse(child, depth + 1);
+ }
+ }
+
+ // Start from roots
+ const roots = childrenOf.get("__root__") || [];
+ sortChildren(roots);
+ for (const root of roots) {
+ traverse(root, 0);
+ }
+
+ // Add any orphaned spans (whose parent wasn't in our span list)
+ for (const r of results) {
+ if (!visited.has(r.name)) {
+ displayResults.push({ result: r, depth: 0 });
+ }
+ }
+
+ // Apply topN limit
+ const limitedResults = displayResults.slice(0, topN);
+
+ console.log("");
+ console.log(
+ "Span Name".padEnd(40) +
+ "Calls".padStart(6) +
+ "Avg(wall)".padStart(11) +
+ "P50(wall)".padStart(11) +
+ "P99(wall)".padStart(11) +
+ "Avg(busy)".padStart(11) +
+ "P50(busy)".padStart(11) +
+ "P99(busy)".padStart(11)
+ );
+ console.log("-".repeat(112));
+
+ for (const { result: r, depth } of limitedResults) {
+ const indent = " ".repeat(depth);
+ const maxNameLen = 38 - indent.length;
+ const truncatedName = r.name.length > maxNameLen ? "..." + r.name.slice(-(maxNameLen - 3)) : r.name;
+ const displayName = indent + truncatedName;
+
+ console.log(
+ displayName.padEnd(40) +
+ r.calls.toString().padStart(6) +
+ formatDuration(r.avgWall).padStart(11) +
+ formatDuration(r.p50Wall).padStart(11) +
+ formatDuration(r.p99Wall).padStart(11) +
+ formatDuration(r.avg).padStart(11) +
+ formatDuration(r.p50).padStart(11) +
+ formatDuration(r.p99).padStart(11)
+ );
+ }
+
+ console.log("");
+ console.log(`Showing ${limitedResults.length} of ${results.length} spans (sorted by ${sortField})`);
+}
+
+main();