diff options
| author | Michelle Tilley <michelle@michelletilley.net> | 2026-06-08 09:12:45 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-06-08 09:12:45 -0700 |
| commit | bcdf8c8cde31e826000f1b2d6eeaebdd865a07c1 (patch) | |
| tree | f62f66e4dede22ce73ea5dafe69881d6af9b3101 /crates/atuin-pty-proxy/src/osc133.rs | |
| parent | chore(deps): bump debian from bookworm-20260421-slim to bookworm-20260518-sli... (diff) | |
| download | atuin-bcdf8c8cde31e826000f1b2d6eeaebdd865a07c1.zip | |
feat: Capture command output + expose to new `atuin_output` tool (#3510)
Diffstat (limited to 'crates/atuin-pty-proxy/src/osc133.rs')
| -rw-r--r-- | crates/atuin-pty-proxy/src/osc133.rs | 313 |
1 files changed, 278 insertions, 35 deletions
diff --git a/crates/atuin-pty-proxy/src/osc133.rs b/crates/atuin-pty-proxy/src/osc133.rs index d6ee1220..51fda848 100644 --- a/crates/atuin-pty-proxy/src/osc133.rs +++ b/crates/atuin-pty-proxy/src/osc133.rs @@ -9,18 +9,19 @@ //! | C | Command submitted — output begins | //! | D[;n] | Command finished with exit code *n* | //! -//! The wire format is `ESC ] 133 ; <cmd> [; <params>] ST` where ST is either -//! BEL (0x07) or ESC \ (0x1B 0x5C). +//! The wire format is `ESC ] 133 ; <cmd> [; <params>] ST` where ST is BEL +//! (0x07), ESC \ (0x1B 0x5C), or C1 ST (0x9C). //! //! # Design goals //! -//! * **Zero-copy** — the parser observes the byte stream without buffering or -//! modifying it. -//! * **Zero-alloc** — after construction no heap allocation occurs. +//! * **Transparent** — the parser observes the byte stream without modifying it; +//! the caller remains responsible for forwarding bytes to their destination. +//! * **Bounded** — OSC parameter buffering is capped so malformed output cannot +//! grow memory without limit. //! * **Non-blocking** — [`Parser::push`] processes whatever bytes are available //! and returns immediately. -//! * **Transparent** — the caller is responsible for forwarding bytes to their -//! destination; the parser only emits [`Event`]s through a callback. +//! * **Extensible** — marker parameters are preserved so Atuin-specific metadata +//! can ride alongside standard OSC 133 markers. /// Events emitted when an OSC 133 marker is detected. #[derive(Debug, Clone, PartialEq, Eq)] @@ -38,6 +39,63 @@ pub enum Event { }, } +/// Parameters attached to an OSC 133 marker. +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct Params { + items: Vec<Param>, +} + +impl Params { + /// Iterate over all marker parameters in order. + #[cfg(test)] + #[inline] + pub fn iter(&self) -> impl Iterator<Item = &Param> { + self.items.iter() + } + + /// Return the value for the first `key=value` parameter with this key. + #[inline] + pub fn get(&self, key: &str) -> Option<&str> { + self.items.iter().find_map(|item| match item { + Param::KeyValue { + key: item_key, + value, + } if item_key == key => Some(value.as_str()), + Param::Value(_) | Param::KeyValue { .. } => None, + }) + } +} + +/// A single OSC 133 marker parameter. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Param { + /// A positional parameter without an equals sign. + Value(String), + /// A `key=value` parameter. + KeyValue { key: String, value: String }, +} + +/// An OSC 133 event with its position in the most recent input chunk. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct LocatedEvent { + /// The OSC 133 event that was parsed. + pub event: Event, + /// Offset where this marker starts in the current chunk. + /// + /// If a marker started in an earlier [`Parser::push_located`] call, this is + /// `0` in the chunk that completed the marker. + pub start_offset: usize, + /// Offset immediately after this marker's terminator in the current chunk. + /// + /// If a marker spans multiple [`Parser::push_located`] calls, this is still + /// the offset in the chunk that completed the marker. + pub offset: usize, + /// The semantic zone after applying this event. + pub zone: Zone, + /// Metadata parameters attached to this marker. + pub params: Params, +} + /// The current semantic zone as determined by the most recent OSC 133 marker. #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] #[allow(dead_code)] @@ -59,14 +117,14 @@ pub enum Zone { const ESC: u8 = 0x1B; const BEL: u8 = 0x07; +const C1_ST: u8 = 0x9C; const BACKSLASH: u8 = b'\\'; const RIGHT_BRACKET: u8 = b']'; -/// Maximum bytes we'll buffer for the OSC parameter string. 32 bytes is far -/// more than any valid OSC 133 payload needs (e.g. `133;D;127` is 9 bytes). -/// Longer (non-133) OSC sequences simply stop accumulating once the buffer is -/// full — the dispatch logic will harmlessly ignore them. -const PARAM_BUF_CAP: usize = 32; +/// Maximum bytes we'll buffer for the OSC parameter string. This is large enough +/// for Atuin metadata such as history/session IDs while still bounding malformed +/// OSC sequences. +const PARAM_BUF_CAP: usize = 512; // --------------------------------------------------------------------------- // State machine @@ -94,6 +152,7 @@ enum State { pub struct Parser { state: State, zone: Zone, + sequence_start: Option<usize>, param_buf: [u8; PARAM_BUF_CAP], param_len: usize, } @@ -111,6 +170,7 @@ impl Parser { Self { state: State::Ground, zone: Zone::Unknown, + sequence_start: None, param_buf: [0u8; PARAM_BUF_CAP], param_len: 0, } @@ -123,18 +183,40 @@ impl Parser { self.zone } + /// Start offset of an incomplete OSC sequence in the most recent chunk. + #[inline] + pub(crate) fn incomplete_osc_sequence_start(&self) -> Option<usize> { + matches!(self.state, State::OscParam | State::OscEsc) + .then(|| self.sequence_start.unwrap_or(0)) + } + /// Process a chunk of bytes, calling `on_event` for every OSC 133 marker /// found. /// /// All bytes in `data` should still be forwarded to the terminal by the /// caller — this method only *observes* the stream. + #[cfg(test)] #[inline] pub fn push(&mut self, data: &[u8], mut on_event: impl FnMut(Event)) { - for &byte in data { + self.push_located(data, |located| on_event(located.event)); + } + + /// Process a chunk of bytes, calling `on_event` for every OSC 133 marker + /// found with its byte offset in this chunk. + /// + /// The offset points to the first byte after the marker terminator, making + /// it suitable for callers that need to split the original chunk at marker + /// boundaries. + #[inline] + pub fn push_located(&mut self, data: &[u8], mut on_event: impl FnMut(LocatedEvent)) { + self.sequence_start = (self.state != State::Ground).then_some(0); + + for (offset, &byte) in data.iter().enumerate() { match self.state { State::Ground => { if byte == ESC { self.state = State::Esc; + self.sequence_start = Some(offset); } } State::Esc => { @@ -143,12 +225,14 @@ impl Parser { self.param_len = 0; } else { self.state = State::Ground; + self.sequence_start = None; } } State::OscParam => { - if byte == BEL { - self.dispatch(&mut on_event); + if byte == BEL || byte == C1_ST { + self.dispatch(offset + 1, &mut on_event); self.state = State::Ground; + self.sequence_start = None; } else if byte == ESC { self.state = State::OscEsc; } else if self.param_len < PARAM_BUF_CAP { @@ -160,12 +244,13 @@ impl Parser { } State::OscEsc => { if byte == BACKSLASH { - self.dispatch(&mut on_event); + self.dispatch(offset + 1, &mut on_event); } // Whether we got a valid ST or not, return to ground. // (A new ESC ] would restart accumulation via the Ground // -> Esc -> OscParam path on the *next* byte.) self.state = State::Ground; + self.sequence_start = None; } } } @@ -174,46 +259,104 @@ impl Parser { /// Inspect the accumulated parameter buffer. If it holds an OSC 133 /// payload, emit the corresponding [`Event`] and update the zone. #[inline] - fn dispatch(&mut self, on_event: &mut impl FnMut(Event)) { - let params = &self.param_buf[..self.param_len]; + fn dispatch(&mut self, offset: usize, on_event: &mut impl FnMut(LocatedEvent)) { + let payload = &self.param_buf[..self.param_len]; + + if payload.len() < 5 || &payload[..4] != b"133;" { + return; + } - // Must start with "133;" - if params.len() < 5 || ¶ms[..4] != b"133;" { + if payload.len() > 5 && payload[5] != b';' { return; } - let cmd = params[4]; - let event = match cmd { + let metadata = payload.get(6..).unwrap_or_default(); + let cmd = payload[4]; + let (event, params) = match cmd { b'A' => { self.zone = Zone::Prompt; - Event::PromptStart + (Event::PromptStart, parse_params(metadata)) } b'B' => { self.zone = Zone::Input; - Event::CommandStart + (Event::CommandStart, parse_params(metadata)) } b'C' => { self.zone = Zone::Output; - Event::CommandExecuted + (Event::CommandExecuted, parse_params(metadata)) } b'D' => { - let exit_code = if params.len() > 6 && params[5] == b';' { - std::str::from_utf8(¶ms[6..]) - .ok() - .and_then(|s| s.parse::<i32>().ok()) - } else { - None - }; + let (exit_code, params) = parse_command_finished_params(metadata); self.zone = Zone::Unknown; - Event::CommandFinished { exit_code } + (Event::CommandFinished { exit_code }, params) } _ => return, }; - on_event(event); + on_event(LocatedEvent { + event, + start_offset: self.sequence_start.unwrap_or(0), + offset, + zone: self.zone, + params, + }); } } +fn parse_command_finished_params(metadata: &[u8]) -> (Option<i32>, Params) { + if metadata.is_empty() { + return (None, Params::default()); + } + + let Some(separator) = metadata.iter().position(|byte| *byte == b';') else { + return parse_exit_code(metadata).map_or_else( + || (None, parse_params(metadata)), + |exit_code| (Some(exit_code), Params::default()), + ); + }; + + let (first, rest) = metadata.split_at(separator); + let rest = &rest[1..]; + + parse_exit_code(first).map_or_else( + || (None, parse_params(metadata)), + |exit_code| (Some(exit_code), parse_params(rest)), + ) +} + +fn parse_exit_code(code: &[u8]) -> Option<i32> { + if code.is_empty() { + return None; + } + + std::str::from_utf8(code) + .ok() + .and_then(|code| code.parse::<i32>().ok()) +} + +fn parse_params(metadata: &[u8]) -> Params { + let items = metadata + .split(|byte| *byte == b';') + .filter(|part| !part.is_empty()) + .map(parse_param) + .collect(); + + Params { items } +} + +fn parse_param(param: &[u8]) -> Param { + let param = String::from_utf8_lossy(param); + + if let Some((key, value)) = param.split_once('=') { + return Param::KeyValue { + key: key.to_string(), + value: value.to_string(), + }; + } + + Param::Value(param.into_owned()) +} + // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- @@ -468,6 +611,12 @@ mod tests { assert!(parse_events(data).is_empty()); } + #[test] + fn marker_with_unexpected_trailing_bytes_ignored() { + let data = b"\x1b]133;ABC\x07"; + assert!(parse_events(data).is_empty()); + } + // -- Malformed sequences -------------------------------------------------- #[test] @@ -509,7 +658,7 @@ mod tests { fn very_long_osc_does_not_panic() { let mut data = Vec::new(); data.extend_from_slice(b"\x1b]"); - data.extend(std::iter::repeat(b'x').take(1000)); + data.extend(std::iter::repeat_n(b'x', 1000)); data.push(BEL); // Should not panic and should produce no event. assert!(parse_events(&data).is_empty()); @@ -589,6 +738,100 @@ mod tests { ); } + #[test] + fn detects_c1_st_terminator() { + let data = b"\x1b]133;A\x9c"; + assert_eq!(parse_events(data), vec![Event::PromptStart]); + } + + // -- Located event offsets ------------------------------------------------ + + #[test] + fn located_event_reports_offset_after_marker() { + let data = b"before\x1b]133;A\x07prompt"; + let mut parser = Parser::new(); + let mut events = Vec::new(); + + parser.push_located(data, |e| events.push(e)); + + assert_eq!( + events, + vec![LocatedEvent { + event: Event::PromptStart, + start_offset: b"before".len(), + offset: b"before\x1b]133;A\x07".len(), + zone: Zone::Prompt, + params: Params::default(), + }] + ); + } + + #[test] + fn located_event_offset_is_relative_to_completing_chunk() { + let mut parser = Parser::new(); + let mut events = Vec::new(); + + parser.push_located(b"\x1b]133;", |e| events.push(e)); + parser.push_located(b"D;42\x07after", |e| events.push(e)); + + assert_eq!( + events, + vec![LocatedEvent { + event: Event::CommandFinished { + exit_code: Some(42) + }, + start_offset: 0, + offset: b"D;42\x07".len(), + zone: Zone::Unknown, + params: Params::default(), + }] + ); + } + + #[test] + fn located_event_preserves_metadata_params() { + let mut parser = Parser::new(); + let mut events = Vec::new(); + + parser.push_located( + b"\x1b]133;D;127;history_id=018f;session_id=abcd;flag\x07", + |event| events.push(event), + ); + + assert_eq!(events.len(), 1); + let event = &events[0]; + assert_eq!( + event.event, + Event::CommandFinished { + exit_code: Some(127) + } + ); + assert_eq!(event.params.get("history_id"), Some("018f")); + assert_eq!(event.params.get("session_id"), Some("abcd")); + assert!( + event + .params + .iter() + .any(|param| param == &Param::Value("flag".to_string())) + ); + } + + #[test] + fn command_finished_metadata_without_exit_code_is_preserved() { + let mut parser = Parser::new(); + let mut events = Vec::new(); + + parser.push_located(b"\x1b]133;D;history_id=018f;session_id=abcd\x07", |event| { + events.push(event); + }); + + assert_eq!(events.len(), 1); + let event = &events[0]; + assert_eq!(event.event, Event::CommandFinished { exit_code: None }); + assert_eq!(event.params.get("history_id"), Some("018f")); + assert_eq!(event.params.get("session_id"), Some("abcd")); + } + // -- Default trait -------------------------------------------------------- #[test] |
