diff options
| author | Michelle Tilley <michelle@michelletilley.net> | 2026-02-26 14:42:08 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-02-26 14:42:08 -0800 |
| commit | 3ba47446f06d5b0fbeaeb59d4ffed768b70729d8 (patch) | |
| tree | 28348bb3d18e9983e9212c26840f691766cad985 /scripts | |
| parent | feat: Add history author/intent metadata and v1 record version (#3205) (diff) | |
| download | atuin-3ba47446f06d5b0fbeaeb59d4ffed768b70729d8.zip | |
feat: In-memory search index with atuin daemon (#3201)
## Summary
This PR adds a persistent, in-memory search index to the Atuin daemon,
enabling fast fuzzy search without the startup delay of building an
index each time the TUI opens.
### Key Changes
- **Daemon search service**: A new gRPC service that maintains a Nucleo
fuzzy search index in memory
- **Real-time index updates**: The daemon listens for history events
(new commands, synced records) and updates the index immediately
- **Filter mode support**: All existing filter modes work (Global, Host,
Session, Directory, Workspace)
- **New search engine**: `daemon-fuzzy` search mode that queries the
daemon instead of building a local index
- **Paged history loading**: Database pagination support for efficient
initial index loading
- **Configurable logging**: New `[logs]` settings section for daemon and
search log configuration
- **Component-based daemon architecture**: Refactored daemon internals
into a modular, event-driven system
- **Fallback to DB search for regex**: Since Nucleo doesn't support
regex matching
## Daemon Architecture
The daemon has been refactored to use a component-based, event-driven
architecture that makes it easier to add new functionality and reason
about the system.
### Core Concepts
```
┌─────────────────────────────────────────────────────────────────────────────┐
│ Atuin Daemon │
│ │
│ ┌─────────────┐ ┌──────────────────────────────────────────────────┐ │
│ │ Daemon │ │ Components │ │
│ │ Handle │────▶│ │ │
│ │ │ │ ┌─────────────┐ ┌─────────────┐ ┌────────────┐ │ │
│ │ • emit() │ │ │ History │ │ Search │ │ Sync │ │ │
│ │ • subscribe │ │ │ Component │ │ Component │ │ Component │ │ │
│ │ • settings │ │ │ │ │ │ │ │ │ │
│ │ • databases │ │ │ gRPC service│ │ gRPC service│ │ background │ │ │
│ └─────────────┘ │ │ WIP history │ │ Nucleo index│ │ sync │ │ │
│ │ │ └─────────────┘ └─────────────┘ └────────────┘ │ │
│ │ └──────────────────────────────────────────────────┘ │
│ │ ▲ │
│ ▼ │ │
│ ┌─────────────────────────────────────┴────────────────────────────────┐ │
│ │ Event Bus (broadcast) │ │
│ │ │ │
│ │ HistoryStarted │ HistoryEnded │ RecordsAdded │ SyncCompleted │ ... │ │
│ └──────────────────────────────────────────────────────────────────────┘ │
│ ▲ │
│ ┌───────────────────────────────────┴──────────────────────────────────┐ │
│ │ Control Service (gRPC) │ │
│ │ External event injection from CLI commands │ │
│ └──────────────────────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────────────────────┘
```
### DaemonHandle
A lightweight, cloneable handle that provides access to shared daemon
resources:
- **Event emission**: `handle.emit(DaemonEvent::...)` broadcasts to all
components
- **Event subscription**: `handle.subscribe()` returns a receiver for
the event bus
- **Settings**: `handle.settings()` for configuration access
- **Databases**: `handle.history_db()` and `handle.store()` for data
access
### Component Trait
Components implement a simple lifecycle:
```rust
#[async_trait]
trait Component: Send + Sync {
fn name(&self) -> &'static str;
async fn start(&mut self, handle: DaemonHandle) -> Result<()>;
async fn handle_event(&mut self, event: &DaemonEvent) -> Result<()>;
async fn stop(&mut self) -> Result<()>;
}
```
### Event-Driven Design
Components communicate via events rather than direct coupling:
| Event | Emitted By | Consumed By |
|-------|-----------|-------------|
| `HistoryStarted` | History gRPC | Search (logging) |
| `HistoryEnded` | History gRPC | Search (index update) |
| `RecordsAdded` | Sync | Search (index update) |
| `HistoryPruned` | CLI (via Control) | Search (index rebuild) |
| `HistoryDeleted` | CLI (via Control) | Search (index rebuild) |
| `ForceSync` | CLI (via Control) | Sync |
| `ShutdownRequested` | Signal handler | All (graceful shutdown) |
### External Event Injection
CLI commands can inject events into a running daemon:
```rust
// After `atuin history prune`
emit_event(DaemonEvent::HistoryPruned).await?;
// After deleting specific items
emit_event(DaemonEvent::HistoryDeleted { ids }).await?;
// Request immediate sync
emit_event(DaemonEvent::ForceSync).await?;
```
This ensures the daemon's search index stays in sync with database
changes made by CLI commands.
## Search Architecture
The search service uses a [forked version of
Nucleo](https://github.com/atuinsh/nucleo-ext) that adds filter and
scorer callbacks, enabling efficient filtering and frecency-based
ranking.
```
┌────────────────────────────────────────────────────────────────┐
│ Atuin Daemon │
│ │
│ ┌─────────────────┐ ┌──────────────────────────────────┐ │
│ │ Event System │───▶│ Search Component │ │
│ │ │ │ │ │
│ │ • RecordsAdded │ │ ┌────────────────────────────┐ │ │
│ │ • HistoryEnded │ │ │ Deduplicated Index │ │ │
│ │ • HistoryPruned │ │ │ │ │ │
│ └─────────────────┘ │ │ CommandData per command: │ │ │
│ │ │ • Global frecency │ │ │
│ ┌─────────────────┐ │ │ • Filter indexes (sets) │ │ │
│ │ Background Task │ │ │ • Invocation history │ │ │
│ │ │ │ └────────────────────────────┘ │ │
│ │ Rebuilds │ │ │ │ │
│ │ frecency map │ │ ▼ │ │
│ │ every 60s │───▶│ ┌────────────────────────────┐ │ │
│ └─────────────────┘ │ │ Nucleo (forked) │ │ │
│ │ │ │ │ │
│ │ │ • Filter callback │ │ │
│ │ │ • Scorer callback │ │ │
│ │ │ • Fuzzy matching │ │ │
│ │ └────────────────────────────┘ │ │
│ └──────────────────────────────────┘ │
│ │ │
│ │ gRPC (Unix socket) │
└──────────────────────────────────────│─────────────────────────┘
│
▼
┌─────────────────────────────────────────────────────────────────┐
│ Search TUI (Client) │
│ │
│ 1. Send query + filter mode + context to daemon │
│ 2. Receive matching history IDs (ranked by frecency) │
│ 3. Hydrate full records from local SQLite database │
│ 4. Display results in TUI │
└─────────────────────────────────────────────────────────────────┘
```
### Nucleo Fork
The [nucleo-ext fork](https://github.com/atuinsh/nucleo-ext) adds two
key features to Nucleo:
1. **Filter callback**: Pre-filter items before fuzzy matching (used for
directory/host/session filtering)
2. **Scorer callback**: Compute custom scores after matching (used for
frecency ranking)
```rust
// Filter: only include commands run in current directory
nucleo.set_filter(Some(Arc::new(|cmd: &String| {
passing_commands.contains(cmd)
})));
// Scorer: combine fuzzy score with frecency
nucleo.set_scorer(Some(Arc::new(|cmd: &String, fuzzy_score: u32| {
let frecency = frecency_map.get(cmd).unwrap_or(0);
fuzzy_score + (frecency * 10)
})));
```
### Deduplicated Index
Commands are stored once per unique command text, with metadata tracking
all invocations:
```rust
struct CommandData {
command: String,
invocations: Vec<Invocation>, // All times this command was run
global_frecency: FrecencyData, // Precomputed frecency score
// O(1) filter indexes
directories: HashSet<String>, // All cwds where command was run
hosts: HashSet<String>, // All hostnames
sessions: HashSet<String>, // All session IDs
}
```
This deduplication means:
- **Fewer items to match**: ~13K unique commands vs ~62K history entries
- **O(1) filter checks**: HashSet lookups instead of scanning
invocations
- **Single frecency score**: Global frecency computed once, used for all
filter modes
### Frecency Scoring
Frecency (frequency + recency) scoring prioritizes recently and
frequently used commands:
```rust
fn compute_frecency(count: u32, last_used: i64, now: i64) -> u32 {
let age_hours = (now - last_used) / 3600;
// Recency: decays over time (half-life ~24 hours)
let recency = (100.0 * (-age_hours as f64 / 24.0).exp()) as u32;
// Frequency: logarithmic scaling
let frequency = (count.ln() * 20.0).min(100.0) as u32;
recency + frequency
}
```
The frecency map is:
- **Precomputed by background task** every 60 seconds
- **Never computed inline** during search (no latency impact)
- **Graceful fallback**: If unavailable, search works without frecency
ranking
### Filter Mode Implementation
| Filter Mode | Implementation |
|-------------|----------------|
| Global | No filter (all commands) |
| Directory | `command.directories.contains(cwd)` |
| Workspace | `command.directories.any(\|d\| d.starts_with(git_root))` |
| Host | `command.hosts.contains(hostname)` |
| Session | `command.sessions.contains(session_id)` |
Filters are pre-computed into a HashSet before the search, making the
filter callback O(1).
### Search Flow
1. **Daemon startup**: Loads history from SQLite in pages, builds
deduplicated index
2. **Frecency precompute**: Background task builds frecency map after
history loads
3. **Search request**: Client sends query with filter mode and context
4. **Filter**: Pre-computed HashSet determines which commands pass the
filter
5. **Match**: Nucleo fuzzy matches the query against command text
6. **Score**: Frecency scorer ranks results (fuzzy score + frecency *
10)
7. **Response**: Returns history IDs for the most recent invocation of
each matching command
8. **Hydration**: Client fetches full records from local SQLite
### Configuration
```toml
# Enable daemon + autostart
[daemon]
enabled = true
autostart = true
# Enable daemon-based fuzzy search
[search]
search_mode = "daemon-fuzzy"
```
## Performance
Performance varies based on several factors, but in most initial testing
with the new architecture shows improvement:
* **Nucleo performs searches up to 4.5x faster**: direct DB search
averages 18.07ms, but the daemon completes the same queries in 3.99ms.
* **IPC overhead is significant, but acceptable**: a significant amount
of wall-time is taken up by the transfer of data over IPC (via UDS in
this case). This averages to about ~7.8ms and accounts for 66% of
client-side wall time.
* **Tail latency improves at every layer**: p99 times correspond to
initial requests, worst-case query patterns, etc. but the average p99
daemon-based response time is 3.6x better than the associated DB-based
search p99 time
* **Query complexity no longer impacts performance**: the Nucleo-based
search shows consistent 2-7ms times regardless of query pattern. The
DB-based search had a 17x variance (3.59ms to 62.46ms).
Interestingly, @ellie - who has a larger history store than I do - gets
even better performance on the IPC layer. This could use a lot more
testing in various edge cases and on various hardware, but seems
promising.
### Regular DB search
```
Individual calls for: db_search
--------------------------------------------------------------------------------------------------------------
# Wall Busy Idle Fields
--------------------------------------------------------------------------------------------------------------
1 32.25ms 32.20ms 47.70µs {"mode":"Fuzzy","query":"^"}
2 19.48ms 19.40ms 84.20µs {"mode":"Fuzzy","query":"^c"}
3 20.40ms 20.10ms 297.00µs {"mode":"Fuzzy","query":"^ca"}
4 13.07ms 13.00ms 69.90µs {"mode":"Fuzzy","query":"^car"}
5 12.17ms 12.10ms 67.10µs {"mode":"Fuzzy","query":"^carg"}
6 20.78ms 20.70ms 76.60µs {"mode":"Fuzzy","query":"^cargo"}
7 9.15ms 9.10ms 53.20µs {"mode":"Fuzzy","query":"^cargo "}
8 10.24ms 10.00ms 237.00µs {"mode":"Fuzzy","query":"^cargo b"}
9 10.01ms 9.68ms 325.00µs {"mode":"Fuzzy","query":"^cargo bu"}
10 5.89ms 5.83ms 57.20µs {"mode":"Fuzzy","query":"^cargo bui"}
11 8.85ms 8.28ms 568.00µs {"mode":"Fuzzy","query":"^cargo buil"}
12 7.70ms 7.49ms 212.00µs {"mode":"Fuzzy","query":"^cargo build"}
13 3.59ms 3.53ms 57.00µs {"mode":"Fuzzy","query":"^cargo build$"}
14 6.50ms 6.44ms 63.60µs {"mode":"Fuzzy","query":"^cargo "}
15 6.48ms 6.38ms 100.00µs {"mode":"Fuzzy","query":"!"}
16 31.68ms 31.60ms 75.90µs {"mode":"Fuzzy","query":"!g"}
17 62.46ms 62.40ms 58.90µs {"mode":"Fuzzy","query":"!gi"}
18 30.35ms 30.30ms 46.90µs {"mode":"Fuzzy","query":"!git"}
19 53.84ms 53.80ms 40.80µs {"mode":"Fuzzy","query":"!git "}
20 19.24ms 19.20ms 39.70µs {"mode":"Fuzzy","query":"!git c"}
21 22.03ms 22.00ms 34.70µs {"mode":"Fuzzy","query":"!git co"}
22 17.13ms 17.00ms 133.00µs {"mode":"Fuzzy","query":"!git com"}
23 16.14ms 15.90ms 242.00µs {"mode":"Fuzzy","query":"!git comm"}
24 5.11ms 5.08ms 28.60µs {"mode":"Fuzzy","query":"!git commi"}
25 7.31ms 7.26ms 52.70µs {"mode":"Fuzzy","query":"!git commit"}
Summary: 25 calls
Wall: avg=18.07ms, min=3.59ms, max=62.46ms, p50=13.07ms, p99=62.46ms
Busy: avg=17.95ms, min=3.53ms, max=62.40ms, p50=13.00ms, p99=62.40ms
```
### Daemon-based search
**Client**
```
Individual calls for: daemon_search
--------------------------------------------------------------------------------------------------------------
# Wall Busy Idle Fields
--------------------------------------------------------------------------------------------------------------
1 13.05ms 2.55ms 10.50ms {"query":"^"}
2 10.65ms 1.40ms 9.25ms {"query":"^c"}
3 10.72ms 1.18ms 9.54ms {"query":"^ca"}
4 5.54ms 485.00µs 5.06ms {"query":"^car"}
5 15.02ms 1.02ms 14.00ms {"query":"^carg"}
6 9.49ms 840.00µs 8.65ms {"query":"^cargo"}
7 5.53ms 555.00µs 4.97ms {"query":"^cargo "}
8 8.56ms 717.00µs 7.84ms {"query":"^cargo b"}
9 12.34ms 1.24ms 11.10ms {"query":"^cargo bu"}
10 8.38ms 650.00µs 7.73ms {"query":"^cargo bui"}
11 13.07ms 770.00µs 12.30ms {"query":"^cargo buil"}
12 17.11ms 709.00µs 16.40ms {"query":"^cargo build"}
13 15.41ms 907.00µs 14.50ms {"query":"^cargo build$"}
14 8.19ms 665.00µs 7.52ms {"query":"^cargo "}
15 7.98ms 1.72ms 6.26ms {"query":"!"}
16 13.56ms 856.00µs 12.70ms {"query":"!g"}
17 8.11ms 624.00µs 7.49ms {"query":"!gi"}
18 14.57ms 775.00µs 13.80ms {"query":"!git"}
19 14.18ms 779.00µs 13.40ms {"query":"!git "}
20 9.62ms 802.00µs 8.82ms {"query":"!git c"}
21 15.50ms 1.50ms 14.00ms {"query":"!git co"}
22 11.58ms 1.48ms 10.10ms {"query":"!git com"}
23 13.82ms 2.12ms 11.70ms {"query":"!git comm"}
24 17.48ms 2.18ms 15.30ms {"query":"!git commi"}
25 14.81ms 1.71ms 13.10ms {"query":"!git commit"}
Summary: 25 calls
Wall: avg=11.77ms, min=5.53ms, max=17.48ms, p50=12.34ms, p99=17.48ms
Busy: avg=1.13ms, min=485.00µs, max=2.55ms, p50=856.00µs, p99=2.55ms
```
**Daemon**
```
Individual calls for: daemon_search_query
--------------------------------------------------------------------------------------------------------------
# Wall Busy Idle Fields
--------------------------------------------------------------------------------------------------------------
1 1.75ms 250ns 1.75ms {"query":"^","query_id":1}
2 4.58ms 125ns 4.58ms {"query":"^c","query_id":2}
3 4.39ms 250ns 4.39ms {"query":"^ca","query_id":3}
4 2.52ms 125ns 2.52ms {"query":"^car","query_id":4}
5 4.44ms 250ns 4.44ms {"query":"^carg","query_id":5}
6 3.66ms 167ns 3.66ms {"query":"^cargo","query_id":6}
7 2.38ms 84ns 2.38ms {"query":"^cargo ","query_id":7}
8 4.13ms 84ns 4.13ms {"query":"^cargo b","query_id":8}
9 4.40ms 167ns 4.40ms {"query":"^cargo bu","query_id":9}
10 3.87ms 125ns 3.87ms {"query":"^cargo bui","query_id":10}
11 4.36ms 84ns 4.36ms {"query":"^cargo buil","query_id":11}
12 3.96ms 333ns 3.96ms {"query":"^cargo build","query_id":12}
13 4.61ms 167ns 4.61ms {"query":"^cargo build$","query_id":13}
14 4.20ms 209ns 4.20ms {"query":"^cargo ","query_id":14}
15 238.17µs 167ns 238.00µs {"query":"!","query_id":15}
16 4.44ms 125ns 4.44ms {"query":"!g","query_id":16}
17 3.47ms 83ns 3.47ms {"query":"!gi","query_id":17}
18 4.57ms 125ns 4.57ms {"query":"!git","query_id":18}
19 7.15ms 167ns 7.15ms {"query":"!git ","query_id":19}
20 4.27ms 250ns 4.27ms {"query":"!git c","query_id":20}
21 5.19ms 292ns 5.19ms {"query":"!git co","query_id":21}
22 4.29ms 417ns 4.29ms {"query":"!git com","query_id":22}
23 4.08ms 125ns 4.08ms {"query":"!git comm","query_id":23}
24 4.50ms 167ns 4.50ms {"query":"!git commi","query_id":24}
25 4.35ms 208ns 4.35ms {"query":"!git commit","query_id":25}
Summary: 25 calls
Wall: avg=3.99ms, min=238.17µs, max=7.15ms, p50=4.29ms, p99=7.15ms
Busy: avg=182ns, min=83ns, max=417ns, p50=167ns, p99=417ns
```
**Nucleo matching time (in daemon)**
```
Individual calls for: nucleo_match
--------------------------------------------------------------------------------------------------------------
# Wall Busy Idle Fields
--------------------------------------------------------------------------------------------------------------
1 1.73ms 125ns 1.73ms {"query":"^","query_id":1}
2 4.57ms 167ns 4.57ms {"query":"^c","query_id":2}
3 4.37ms 125ns 4.37ms {"query":"^ca","query_id":3}
4 2.51ms 84ns 2.51ms {"query":"^car","query_id":4}
5 4.43ms 125ns 4.43ms {"query":"^carg","query_id":5}
6 3.64ms 125ns 3.64ms {"query":"^cargo","query_id":6}
7 2.37ms 84ns 2.37ms {"query":"^cargo ","query_id":7}
8 4.11ms 125ns 4.11ms {"query":"^cargo b","query_id":8}
9 4.36ms 208ns 4.36ms {"query":"^cargo bu","query_id":9}
10 3.85ms 125ns 3.85ms {"query":"^cargo bui","query_id":10}
11 4.35ms 125ns 4.35ms {"query":"^cargo buil","query_id":11}
12 3.94ms 250ns 3.94ms {"query":"^cargo build","query_id":12}
13 4.59ms 125ns 4.59ms {"query":"^cargo build$","query_id":13}
14 4.18ms 84ns 4.18ms {"query":"^cargo ","query_id":14}
15 220.13µs 125ns 220.00µs {"query":"!","query_id":15}
16 4.43ms 125ns 4.43ms {"query":"!g","query_id":16}
17 3.45ms 125ns 3.45ms {"query":"!gi","query_id":17}
18 4.55ms 125ns 4.55ms {"query":"!git","query_id":18}
19 7.12ms 209ns 7.12ms {"query":"!git ","query_id":19}
20 4.25ms 166ns 4.25ms {"query":"!git c","query_id":20}
21 5.18ms 125ns 5.18ms {"query":"!git co","query_id":21}
22 4.27ms 125ns 4.27ms {"query":"!git com","query_id":22}
23 4.06ms 292ns 4.06ms {"query":"!git comm","query_id":23}
24 4.46ms 166ns 4.46ms {"query":"!git commi","query_id":24}
25 4.31ms 208ns 4.31ms {"query":"!git commit","query_id":25}
Summary: 25 calls
Wall: avg=3.97ms, min=220.13µs, max=7.12ms, p50=4.27ms, p99=7.12ms
Busy: avg=147ns, min=84ns, max=292ns, p50=125ns, p99=292ns
```
Diffstat (limited to '')
| -rwxr-xr-x | scripts/span-table.ts | 420 |
1 files changed, 420 insertions, 0 deletions
diff --git a/scripts/span-table.ts b/scripts/span-table.ts new file mode 100755 index 00000000..3656f129 --- /dev/null +++ b/scripts/span-table.ts @@ -0,0 +1,420 @@ +#!/usr/bin/env bun +/** + * Analyze span timing JSON logs generated with ATUIN_SPAN + * + * Usage: bun scripts/span-table.ts <file.json> [options] + * --filter <pattern> Only show spans matching pattern (regex) + * --sort <field> Sort by: calls, avg, total, p99 (default: total) + * --top <n> Show top N spans (default: 20) + * --detail <span> Show individual calls for a specific span + * --all Include internal/library spans + */ + +import { readFileSync } from "fs"; + +interface SpanEvent { + timestamp: string; + level: string; + fields: { + message: string; + "time.busy"?: string; + "time.idle"?: string; + }; + target: string; + span?: { + name: string; + [key: string]: unknown; + }; + spans?: Array<{ name: string; [key: string]: unknown }>; +} + +interface SpanStats { + name: string; + calls: number; + busyTimes: number[]; // in microseconds + idleTimes: number[]; + parentCounts: Map<string, number>; // parent span name -> count +} + +// Parse duration strings like "1.23ms", "456µs", "789ns" to microseconds +function parseDuration(duration: string): number { + const match = duration.match(/^([\d.]+)(ns|µs|us|ms|s)$/); + if (!match) return 0; + + const value = parseFloat(match[1]); + const unit = match[2]; + + switch (unit) { + case "ns": + return value / 1000; + case "µs": + case "us": + return value; + case "ms": + return value * 1000; + case "s": + return value * 1_000_000; + default: + return 0; + } +} + +// Format microseconds for display +function formatDuration(us: number): string { + if (us < 1) { + return `${(us * 1000).toFixed(0)}ns`; + } else if (us < 1000) { + return `${us.toFixed(2)}µs`; + } else if (us < 1_000_000) { + return `${(us / 1000).toFixed(2)}ms`; + } else { + return `${(us / 1_000_000).toFixed(2)}s`; + } +} + +function percentile(arr: number[], p: number): number { + if (arr.length === 0) return 0; + const sorted = [...arr].sort((a, b) => a - b); + const idx = Math.floor(sorted.length * p); + return sorted[Math.min(idx, sorted.length - 1)]; +} + +function parseJsonLines(content: string): SpanEvent[] { + const events: SpanEvent[] = []; + for (const line of content.trim().split("\n")) { + if (!line.trim()) continue; + try { + events.push(JSON.parse(line)); + } catch { + // Skip malformed lines + } + } + return events; +} + +function main() { + const args = process.argv.slice(2); + + // Parse arguments + let filterPattern: RegExp | null = null; + let sortField = "total"; + let topN = 20; + let detailSpan: string | null = null; + let showAll = false; + const files: string[] = []; + + for (let i = 0; i < args.length; i++) { + if (args[i] === "--filter" && args[i + 1]) { + filterPattern = new RegExp(args[++i]); + } else if (args[i] === "--sort" && args[i + 1]) { + sortField = args[++i]; + } else if (args[i] === "--top" && args[i + 1]) { + topN = parseInt(args[++i], 10); + } else if (args[i] === "--detail" && args[i + 1]) { + detailSpan = args[++i]; + } else if (args[i] === "--all") { + showAll = true; + } else if (!args[i].startsWith("-")) { + files.push(args[i]); + } + } + + if (files.length === 0) { + console.error("Usage: bun scripts/span-table.ts <file.json> [options]"); + console.error(" --filter <pattern> Only show spans matching pattern (regex)"); + console.error(" --sort <field> Sort by: calls, avg, total, p99 (default: total)"); + console.error(" --top <n> Show top N spans (default: 20)"); + console.error(" --detail <span> Show individual calls for a specific span"); + console.error(" --all Include internal/library spans"); + process.exit(1); + } + + // Parse all files + const allEvents: SpanEvent[] = []; + for (const file of files) { + const content = readFileSync(file, "utf-8"); + for (const event of parseJsonLines(content)) { + allEvents.push(event); + } + } + + // Filter to close events and aggregate by span name + const spans = new Map<string, SpanStats>(); + + for (const event of allEvents) { + if (event.fields?.message !== "close") continue; + if (!event.span?.name) continue; + if (!event.fields["time.busy"]) continue; + + const name = event.span.name; + + // Apply filter if specified + if (filterPattern && !filterPattern.test(name)) continue; + + // Skip noisy internal spans unless explicitly requested + if ( + !showAll && + !filterPattern && + !detailSpan && + (name.startsWith("FramedRead::") || + name.startsWith("FramedWrite::") || + name.startsWith("Prioritize::") || + name === "poll" || + name === "poll_ready" || + name === "Connection" || + name.startsWith("assign_") || + name.startsWith("reserve_") || + name.startsWith("try_") || + name.startsWith("send_") || + name.startsWith("pop_")) + ) { + continue; + } + + if (!spans.has(name)) { + spans.set(name, { name, calls: 0, busyTimes: [], idleTimes: [], parentCounts: new Map() }); + } + + const stats = spans.get(name)!; + stats.calls++; + stats.busyTimes.push(parseDuration(event.fields["time.busy"])); + if (event.fields["time.idle"]) { + stats.idleTimes.push(parseDuration(event.fields["time.idle"])); + } + + // Track parent relationship (immediate parent is the last element in spans array) + const parents = event.spans || []; + const parentName = parents.length > 0 ? parents[parents.length - 1].name : "__root__"; + stats.parentCounts.set(parentName, (stats.parentCounts.get(parentName) || 0) + 1); + } + + if (spans.size === 0) { + console.error("No matching span close events found"); + process.exit(1); + } + + // Detail mode: show individual calls for a specific span + if (detailSpan) { + const detailEvents: Array<{ + timestamp: string; + busy: number; + idle: number; + fields: Record<string, unknown>; + parents: string[]; + }> = []; + + for (const event of allEvents) { + if (event.fields?.message !== "close") continue; + if (event.span?.name !== detailSpan) continue; + if (!event.fields["time.busy"]) continue; + + // Extract span fields (excluding name) + const fields: Record<string, unknown> = {}; + if (event.span) { + for (const [k, v] of Object.entries(event.span)) { + if (k !== "name") fields[k] = v; + } + } + + // Get parent span names + const parents = (event.spans || []).map((s) => s.name); + + detailEvents.push({ + timestamp: event.timestamp, + busy: parseDuration(event.fields["time.busy"]), + idle: event.fields["time.idle"] ? parseDuration(event.fields["time.idle"]) : 0, + fields, + parents, + }); + } + + if (detailEvents.length === 0) { + console.error(`No events found for span "${detailSpan}"`); + process.exit(1); + } + + console.log(""); + console.log(`Individual calls for: ${detailSpan}`); + console.log("-".repeat(110)); + console.log( + "#".padStart(4) + + "Wall".padStart(12) + + "Busy".padStart(12) + + "Idle".padStart(12) + + " Fields" + ); + console.log("-".repeat(110)); + + detailEvents.forEach((e, i) => { + const fieldsStr = Object.keys(e.fields).length > 0 + ? JSON.stringify(e.fields) + : ""; + + console.log( + (i + 1).toString().padStart(4) + + formatDuration(e.busy + e.idle).padStart(12) + + formatDuration(e.busy).padStart(12) + + formatDuration(e.idle).padStart(12) + + " " + + fieldsStr + ); + }); + + // Summary stats + const busyTimes = detailEvents.map((e) => e.busy); + const wallTimes = detailEvents.map((e) => e.busy + e.idle); + console.log(""); + console.log( + `Summary: ${detailEvents.length} calls\n` + + ` Wall: avg=${formatDuration(wallTimes.reduce((a, b) => a + b, 0) / wallTimes.length)}, ` + + `min=${formatDuration(Math.min(...wallTimes))}, ` + + `max=${formatDuration(Math.max(...wallTimes))}, ` + + `p50=${formatDuration(percentile(wallTimes, 0.5))}, ` + + `p99=${formatDuration(percentile(wallTimes, 0.99))}\n` + + ` Busy: avg=${formatDuration(busyTimes.reduce((a, b) => a + b, 0) / busyTimes.length)}, ` + + `min=${formatDuration(Math.min(...busyTimes))}, ` + + `max=${formatDuration(Math.max(...busyTimes))}, ` + + `p50=${formatDuration(percentile(busyTimes, 0.5))}, ` + + `p99=${formatDuration(percentile(busyTimes, 0.99))}` + ); + return; + } + + // Calculate stats + const results = [...spans.values()].map((s) => { + // Calculate wall times (busy + idle) for each call + const wallTimes = s.busyTimes.map((busy, i) => busy + (s.idleTimes[i] || 0)); + + // Find most common parent + let mostCommonParent = "__root__"; + let maxCount = 0; + for (const [parent, count] of s.parentCounts) { + if (count > maxCount) { + maxCount = count; + mostCommonParent = parent; + } + } + + return { + name: s.name, + calls: s.calls, + total: s.busyTimes.reduce((a, b) => a + b, 0), + avg: s.busyTimes.reduce((a, b) => a + b, 0) / s.calls, + min: Math.min(...s.busyTimes), + max: Math.max(...s.busyTimes), + p50: percentile(s.busyTimes, 0.5), + p99: percentile(s.busyTimes, 0.99), + avgWall: wallTimes.reduce((a, b) => a + b, 0) / s.calls, + p50Wall: percentile(wallTimes, 0.5), + p99Wall: percentile(wallTimes, 0.99), + parent: mostCommonParent, + }; + }); + + // Build tree structure + const childrenOf = new Map<string, string[]>(); + childrenOf.set("__root__", []); + for (const r of results) { + if (!childrenOf.has(r.name)) { + childrenOf.set(r.name, []); + } + if (!childrenOf.has(r.parent)) { + childrenOf.set(r.parent, []); + } + childrenOf.get(r.parent)!.push(r.name); + } + + // Sort children by the specified field + const resultMap = new Map(results.map(r => [r.name, r])); + const sortChildren = (children: string[]) => { + children.sort((a, b) => { + const ra = resultMap.get(a); + const rb = resultMap.get(b); + if (!ra || !rb) return 0; + switch (sortField) { + case "calls": + return rb.calls - ra.calls; + case "avg": + return rb.avg - ra.avg; + case "p99": + return rb.p99 - ra.p99; + case "total": + default: + return rb.total - ra.total; + } + }); + }; + + // Traverse tree to build ordered display list with depths + const displayResults: Array<{ result: typeof results[0]; depth: number }> = []; + const visited = new Set<string>(); + + function traverse(name: string, depth: number) { + if (visited.has(name)) return; + visited.add(name); + + const result = resultMap.get(name); + if (result) { + displayResults.push({ result, depth }); + } + + const children = childrenOf.get(name) || []; + sortChildren(children); + for (const child of children) { + traverse(child, depth + 1); + } + } + + // Start from roots + const roots = childrenOf.get("__root__") || []; + sortChildren(roots); + for (const root of roots) { + traverse(root, 0); + } + + // Add any orphaned spans (whose parent wasn't in our span list) + for (const r of results) { + if (!visited.has(r.name)) { + displayResults.push({ result: r, depth: 0 }); + } + } + + // Apply topN limit + const limitedResults = displayResults.slice(0, topN); + + console.log(""); + console.log( + "Span Name".padEnd(40) + + "Calls".padStart(6) + + "Avg(wall)".padStart(11) + + "P50(wall)".padStart(11) + + "P99(wall)".padStart(11) + + "Avg(busy)".padStart(11) + + "P50(busy)".padStart(11) + + "P99(busy)".padStart(11) + ); + console.log("-".repeat(112)); + + for (const { result: r, depth } of limitedResults) { + const indent = " ".repeat(depth); + const maxNameLen = 38 - indent.length; + const truncatedName = r.name.length > maxNameLen ? "..." + r.name.slice(-(maxNameLen - 3)) : r.name; + const displayName = indent + truncatedName; + + console.log( + displayName.padEnd(40) + + r.calls.toString().padStart(6) + + formatDuration(r.avgWall).padStart(11) + + formatDuration(r.p50Wall).padStart(11) + + formatDuration(r.p99Wall).padStart(11) + + formatDuration(r.avg).padStart(11) + + formatDuration(r.p50).padStart(11) + + formatDuration(r.p99).padStart(11) + ); + } + + console.log(""); + console.log(`Showing ${limitedResults.length} of ${results.length} spans (sorted by ${sortField})`); +} + +main(); |
