agentmux_srv\agents/
runner.rs

1// Copyright 2026, AgentMux Corp.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Unified agent runner — one-shot Claude Code spawn for the
5//! drone Agent block.
6//!
7//! Spawns `claude --print --output-format=stream-json` as a non-
8//! interactive subprocess, drains its stdout through
9//! `ClaudeTranslator`, forwards each `AgentEvent` on the caller's
10//! `tx`, and resolves the handle's `final_result` with the
11//! structured `AgentRunResult` once the stream emits `Done`.
12//!
13//! Headless and one-shot by design — the drone Agent block's
14//! contract is "send task, wait for done, return result." The
15//! interactive agent pane has its own PTY-based controller in
16//! `blockcontroller/shell.rs`; that path is NOT routed through
17//! this runner (see `docs/specs/SPEC_UNIFIED_AGENT_TYPES_2026_05_13.md`
18//! §4.2 — what's shared is the translator + event shape, not the
19//! spawn function).
20
21use std::path::PathBuf;
22use std::process::Stdio;
23
24use tokio::io::{AsyncBufReadExt, AsyncReadExt, BufReader};
25use tokio::process::Command;
26use tokio::sync::{mpsc, oneshot};
27
28use super::failure::classify;
29use super::translator::claude::ClaudeTranslator;
30use super::translator::Translator as _;
31use super::types::{AgentEvent, AgentRef, AgentRunResult, AgentTask};
32
33/// Override the default `claude` binary name. Set to a full path
34/// (or a different binary) for testing or non-PATH installs.
35const ENV_CLAUDE_BIN: &str = "AGENTMUX_CLAUDE_BIN";
36
37const DEFAULT_CLAUDE_BIN: &str = "claude";
38
39/// Handle returned by `run_agent`. The caller already holds the
40/// `mpsc::UnboundedReceiver<AgentEvent>` they paired with the `tx`
41/// passed into `run_agent`; this handle adds the structured terminal
42/// value via `final_result` (drone Agent block's downstream
43/// output) and the `instance_id` of the backing `db_agent_instances`
44/// row.
45///
46/// Dropping the caller's receiver implicitly cancels the run only if
47/// the runner observes the send error — Phase 2 adds an explicit
48/// `AbortHandle`.
49pub struct AgentRunHandle {
50    pub instance_id: String,
51    pub final_result: oneshot::Receiver<Result<AgentRunResult, String>>,
52}
53
54/// Error returned by the runner.
55#[derive(Debug, thiserror::Error)]
56pub enum AgentError {
57    #[error("agent runner: invalid AgentRef: {0}")]
58    InvalidRef(String),
59    #[error("agent runner: spawn failed: {0}")]
60    Spawn(String),
61}
62
63/// Spawn `claude --print --output-format=stream-json` per the given
64/// `AgentTask` and `AgentRef`, drain its stdout through the shared
65/// `ClaudeTranslator`, forward each `AgentEvent` on `tx`, and
66/// resolve the returned handle's `final_result` with an
67/// `AgentRunResult` constructed from the terminal Cost + Done events.
68///
69/// Working directory resolution:
70///   - `agent_ref.working_directory` if non-empty
71///   - else the current process working directory
72///
73/// Identity / memory bundle resolution and named-agent continuation
74/// are NOT plumbed in Phase 1.5 PR 2 — the drone Agent block
75/// always spawns fresh (per spec §8 "drone runs always allocate
76/// fresh instance_name"). The bundles can be added in a follow-up
77/// once the drone inspector (PR 3) needs to surface them.
78pub async fn run_agent(
79    agent_ref: AgentRef,
80    task: AgentTask,
81    tx: mpsc::UnboundedSender<AgentEvent>,
82) -> Result<AgentRunHandle, AgentError> {
83    let bin = std::env::var(ENV_CLAUDE_BIN)
84        .unwrap_or_else(|_| DEFAULT_CLAUDE_BIN.to_string());
85    run_agent_with_bin(&bin, agent_ref, task, tx).await
86}
87
88/// Internal entry point — same as `run_agent` but takes the `claude`
89/// binary path explicitly. Lets tests inject a known-nonexistent
90/// path to exercise the spawn-failure path without touching env vars
91/// (Rust 1.81+ flags `std::env::set_var` as unsound under concurrent
92/// test execution). The public `run_agent` is a thin shim that
93/// resolves the binary from `$AGENTMUX_CLAUDE_BIN` or the default.
94pub(crate) async fn run_agent_with_bin(
95    bin: &str,
96    agent_ref: AgentRef,
97    task: AgentTask,
98    tx: mpsc::UnboundedSender<AgentEvent>,
99) -> Result<AgentRunHandle, AgentError> {
100    let working_dir = if agent_ref.working_directory.is_empty() {
101        std::env::current_dir()
102            .map_err(|e| AgentError::Spawn(format!("cwd: {e}")))?
103    } else {
104        PathBuf::from(&agent_ref.working_directory)
105    };
106
107    // `claude --print` runs in non-interactive mode and exits when
108    // done. `--output-format=stream-json` emits one JSON object per
109    // line, the format ClaudeTranslator consumes. `--verbose` is
110    // required alongside stream-json (the CLI rejects stream-json
111    // without it). `--include-partial-messages` gives us the
112    // streaming text_deltas — the translator skips the resulting
113    // `partial: true` snapshots when building the transcript.
114    let mut cmd = Command::new(bin);
115    cmd.arg("--print")
116        .arg("--output-format=stream-json")
117        .arg("--verbose")
118        .arg("--include-partial-messages");
119    // Forward the configured turn cap so the CLI enforces it.
120    // Previously stored on AgentTask but never passed to the
121    // subprocess — silently ignored. Reagent P1 + codex P2 on
122    // PR #834.
123    if let Some(n) = task.max_turns {
124        cmd.arg("--max-turns").arg(n.to_string());
125    }
126    // On Windows: suppress console-window allocation. Spawned from the windowless
127    // srv without CREATE_NO_WINDOW, this one-shot task-agent CLI opens a Windows
128    // Terminal window per run (Win11 default-terminal handler). stdio is piped, so
129    // no console is needed. See docs/retro/retro-windows-terminal-window-leak-2026-06-21.md.
130    #[cfg(windows)]
131    {
132        const CREATE_NO_WINDOW: u32 = 0x0800_0000;
133        cmd.creation_flags(CREATE_NO_WINDOW);
134    }
135    let mut child = cmd
136        .arg(&task.prompt)
137        .current_dir(&working_dir)
138        .stdin(Stdio::null())
139        .stdout(Stdio::piped())
140        .stderr(Stdio::piped())
141        .kill_on_drop(true)
142        .spawn()
143        .map_err(|e| AgentError::Spawn(format!("spawn `{bin}`: {e}")))?;
144
145    let stdout = child
146        .stdout
147        .take()
148        .ok_or_else(|| AgentError::Spawn("claude stdout pipe missing".to_string()))?;
149    let stderr = child
150        .stderr
151        .take()
152        .ok_or_else(|| AgentError::Spawn("claude stderr pipe missing".to_string()))?;
153
154    let instance_id = format!("drone-agent-{}", uuid::Uuid::new_v4());
155    let (result_tx, result_rx) = oneshot::channel();
156    // Hand the captured stderr back to `drain_and_collect` so a failed
157    // run reports the real cause (rate-limit / auth / OOM) instead of a
158    // bare exit code. See
159    // `docs/specs/SPEC_AGENT_FAILURE_DIAGNOSTICS_2026_06_11.md`.
160    let (stderr_tx, stderr_rx) = oneshot::channel::<Vec<u8>>();
161
162    // Drain stderr to EOF in the background so the child's pipe never
163    // fills (a half-drained pipe blocks the child on stderr writes and
164    // can stall the whole run). We keep a rolling *tail* — the last
165    // STDERR_TAIL_CAP bytes — because the CLI's terminal error line
166    // (rate-limit / auth / OOM) lands at the END of stderr; a capped
167    // prefix would drop exactly the line the classifier needs. On EOF
168    // the tail is handed to the collector via `stderr_tx`.
169    // (codex P2 on #1353: keep the tail, not a prefix.)
170    tokio::spawn(async move {
171        const STDERR_TAIL_CAP: usize = 64 * 1024;
172        let mut buf: Vec<u8> = Vec::with_capacity(8192);
173        let mut reader = BufReader::new(stderr);
174        let mut sink = [0u8; 8192];
175        loop {
176            match reader.read(&mut sink).await {
177                Ok(0) => break,
178                Ok(n) => append_capped_tail(&mut buf, &sink[..n], STDERR_TAIL_CAP),
179                Err(_) => break,
180            }
181        }
182        // Trim to exactly the last CAP bytes. If the receiver is gone
183        // (run succeeded — nobody needs stderr), the send just drops.
184        trim_to_tail(&mut buf, STDERR_TAIL_CAP);
185        let _ = stderr_tx.send(buf);
186    });
187
188    tokio::spawn(async move {
189        let result = drain_and_collect(stdout, &tx, &mut child, stderr_rx).await;
190        let _ = result_tx.send(result);
191    });
192
193    Ok(AgentRunHandle {
194        instance_id,
195        final_result: result_rx,
196    })
197}
198
199/// Drain `stdout` line-by-line through `ClaudeTranslator`, forward
200/// every emitted event on `tx`, accumulate the terminal `Cost` /
201/// `Done` event payloads into an `AgentRunResult`, wait for the
202/// child to exit, and return the result.
203///
204/// Split out from `run_agent` so it can be unit-tested against
205/// in-memory readers without spawning a real subprocess. Used by
206/// the integration test below as `drain_async_reader_for_test`.
207async fn drain_and_collect(
208    stdout: tokio::process::ChildStdout,
209    tx: &mpsc::UnboundedSender<AgentEvent>,
210    child: &mut tokio::process::Child,
211    stderr_rx: oneshot::Receiver<Vec<u8>>,
212) -> Result<AgentRunResult, String> {
213    let result = drain_async_reader(BufReader::new(stdout), tx).await;
214
215    // Wait for child to exit so the OS reaps it cleanly.
216    let exit = child.wait().await.map_err(|e| format!("wait: {e}"))?;
217
218    match result {
219        Ok(mut accumulated) => {
220            // A run is a failure if the process exited non-zero OR claude
221            // reported an error on stdout (a terminal error `result`
222            // frame) even while exiting 0 — otherwise downstream blocks
223            // could treat a failed run as successful. (codex P1 #1353.)
224            let reported_error = accumulated.error_frame.take();
225            if exit.success() && reported_error.is_none() {
226                // Genuine success — but a stream that produced nothing is
227                // itself a (classified) failure, not a silent empty result.
228                if accumulated.response.is_empty() && accumulated.transcript.is_empty() {
229                    return Err(
230                        explain_failure(exit.code(), exit_signal(&exit), None, stderr_rx).await,
231                    );
232                }
233                accumulated.transcript.shrink_to_fit();
234                return Ok(accumulated);
235            }
236            // Non-zero exit, or a stdout-reported error on exit 0: classify
237            // from the exit status + result frame + captured stderr so the
238            // caller sees a real cause, not "exited with status N".
239            Err(explain_failure(exit.code(), exit_signal(&exit), reported_error, stderr_rx).await)
240        }
241        // Stream read error: still enrich with the exit/stderr cause.
242        Err(e) => {
243            let cause = explain_failure(exit.code(), exit_signal(&exit), None, stderr_rx).await;
244            Err(format!("{cause}\n(stream read: {e})"))
245        }
246    }
247}
248
249/// Await the captured stderr, classify the exit, log a warning, and
250/// render the human-readable explanation that becomes the run's
251/// terminal error string. See
252/// `docs/specs/SPEC_AGENT_FAILURE_DIAGNOSTICS_2026_06_11.md`.
253async fn explain_failure(
254    exit_code: Option<i32>,
255    signal: Option<i32>,
256    result_frame: Option<serde_json::Value>,
257    stderr_rx: oneshot::Receiver<Vec<u8>>,
258) -> String {
259    let bytes = stderr_rx.await.unwrap_or_default();
260    let stderr = String::from_utf8_lossy(&bytes);
261    let failure = classify(exit_code, signal, &stderr, result_frame.as_ref());
262    tracing::warn!(
263        code = ?failure.code,
264        exit_code = ?exit_code,
265        signal = ?signal,
266        retryable = failure.retryable,
267        "agent run failed: {}",
268        failure.title,
269    );
270    failure.explain()
271}
272
273/// Extract the terminating signal (Unix only). On non-Unix there is no
274/// signal concept, so this is always `None`.
275#[cfg(unix)]
276fn exit_signal(status: &std::process::ExitStatus) -> Option<i32> {
277    use std::os::unix::process::ExitStatusExt;
278    status.signal()
279}
280
281#[cfg(not(unix))]
282fn exit_signal(_status: &std::process::ExitStatus) -> Option<i32> {
283    None
284}
285
286/// Append `chunk` to a rolling tail buffer, compacting to the last
287/// `cap` bytes whenever it grows past `2 * cap` (amortized O(1)). The
288/// exact final trim happens at EOF via [`trim_to_tail`]. Keeping the
289/// *tail* (not a prefix) matters because the CLI's terminal error line
290/// is at the end of stderr.
291fn append_capped_tail(buf: &mut Vec<u8>, chunk: &[u8], cap: usize) {
292    buf.extend_from_slice(chunk);
293    if buf.len() > cap.saturating_mul(2) {
294        trim_to_tail(buf, cap);
295    }
296}
297
298/// Drop all but the last `cap` bytes of `buf`.
299fn trim_to_tail(buf: &mut Vec<u8>, cap: usize) {
300    if buf.len() > cap {
301        let excess = buf.len() - cap;
302        buf.drain(0..excess);
303    }
304}
305
306/// Drain an arbitrary async reader of newline-delimited stream-json
307/// frames, forward every emitted `AgentEvent` on `tx`, and return
308/// an accumulator capturing the terminal `Cost` and `Done` values.
309///
310/// Pure async helper — no subprocess, no broker. Unit-tested with
311/// `tokio::io::duplex` in-memory pipes.
312pub(crate) async fn drain_async_reader<R: tokio::io::AsyncBufRead + Unpin>(
313    mut reader: R,
314    tx: &mpsc::UnboundedSender<AgentEvent>,
315) -> Result<AgentRunResult, String> {
316    let mut translator = ClaudeTranslator::new();
317    let mut accumulated = AgentRunResult::default();
318    let mut line = String::new();
319    loop {
320        line.clear();
321        let n = reader
322            .read_line(&mut line)
323            .await
324            .map_err(|e| format!("stdout read: {e}"))?;
325        if n == 0 {
326            break; // EOF
327        }
328        let trimmed = line.trim_end_matches(['\n', '\r']);
329        if !trimmed.starts_with('{') {
330            continue;
331        }
332        let Ok(frame) = serde_json::from_str::<serde_json::Value>(trimmed) else {
333            continue;
334        };
335        // Capture a terminal *error* result frame so the collector can
336        // fail the run even when claude reports the error on stdout and
337        // exits 0 (the translator still emits a hollow `Done`).
338        // codex P1 on #1353.
339        if frame.get("type").and_then(|v| v.as_str()) == Some("result")
340            && super::failure::is_error_result_frame(&frame)
341        {
342            accumulated.error_frame = Some(frame.clone());
343        }
344        for event in translator.translate(frame) {
345            // Capture terminal values before forwarding so a closed
346            // receiver doesn't lose the accumulated result.
347            match &event {
348                AgentEvent::Cost { cost_usd, tokens } => {
349                    accumulated.cost_usd = *cost_usd;
350                    accumulated.tokens = tokens.clone();
351                }
352                AgentEvent::Done {
353                    response,
354                    transcript,
355                } => {
356                    accumulated.response = response.clone();
357                    accumulated.transcript = transcript.clone();
358                }
359                _ => {}
360            }
361            // Forward — if the receiver is dropped, just stop
362            // sending (the drain still continues to capture the
363            // accumulated result).
364            let _ = tx.send(event);
365        }
366    }
367    Ok(accumulated)
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373    use serde_json::json;
374    use tokio::io::AsyncWriteExt;
375
376    /// Build a stream-json byte sequence simulating a complete
377    /// short claude run: streamed text + cost + result.
378    fn synthetic_stream(prompt_reply: &str, cost: f64) -> Vec<u8> {
379        let mut s = String::new();
380        for ch in prompt_reply.chars() {
381            s.push_str(&format!(
382                r#"{{"type":"stream_event","event":{{"type":"content_block_delta","delta":{{"type":"text_delta","text":"{ch}"}}}}}}"#,
383            ));
384            s.push('\n');
385        }
386        s.push_str(&format!(
387            r#"{{"type":"assistant","message":{{"content":[{{"type":"text","text":"{prompt_reply}"}}]}}}}
388"#
389        ));
390        s.push_str(&format!(
391            r#"{{"type":"result","cost_usd":{cost},"usage":{{"input_tokens":10,"output_tokens":5,"cache_creation_input_tokens":0,"cache_read_input_tokens":0}},"result":"{prompt_reply}"}}
392"#
393        ));
394        s.into_bytes()
395    }
396
397    #[tokio::test]
398    async fn drain_async_reader_accumulates_cost_and_done() {
399        let bytes = synthetic_stream("hello", 0.001);
400        let (mut w, r) = tokio::io::duplex(4096);
401        tokio::spawn(async move {
402            w.write_all(&bytes).await.unwrap();
403            w.shutdown().await.unwrap();
404        });
405
406        let (tx, mut rx) = mpsc::unbounded_channel();
407        let result = drain_async_reader(BufReader::new(r), &tx)
408            .await
409            .expect("drain ok");
410
411        assert_eq!(result.response, "hello");
412        assert_eq!(result.cost_usd, 0.001);
413        assert_eq!(result.tokens.input, 10);
414        assert_eq!(result.tokens.output, 5);
415        // Transcript contains the assistant turn.
416        assert_eq!(result.transcript.len(), 1);
417
418        // Events forwarded: 5 AssistantText (one per char) + Cost + Done.
419        drop(tx);
420        let mut evs = Vec::new();
421        while let Some(e) = rx.recv().await {
422            evs.push(e);
423        }
424        assert_eq!(evs.len(), 7, "got events: {evs:?}");
425        match &evs[evs.len() - 1] {
426            AgentEvent::Done { .. } => {}
427            other => panic!("expected last event Done, got {other:?}"),
428        }
429    }
430
431    #[tokio::test]
432    async fn drain_async_reader_skips_non_json_lines() {
433        // claude --verbose sometimes emits informational lines on
434        // stdout that aren't stream-json (rare but possible). Those
435        // must not break the drain.
436        let mut bytes: Vec<u8> = b"Reading config...\n".to_vec();
437        bytes.extend_from_slice(&synthetic_stream("ok", 0.0));
438        bytes.extend_from_slice(b"\n");
439
440        let (mut w, r) = tokio::io::duplex(4096);
441        tokio::spawn(async move {
442            w.write_all(&bytes).await.unwrap();
443            w.shutdown().await.unwrap();
444        });
445
446        let (tx, _rx) = mpsc::unbounded_channel();
447        let result = drain_async_reader(BufReader::new(r), &tx)
448            .await
449            .expect("drain ok");
450        assert_eq!(result.response, "ok");
451    }
452
453    #[tokio::test]
454    async fn drain_async_reader_returns_empty_on_no_stream() {
455        let (mut w, r) = tokio::io::duplex(4096);
456        tokio::spawn(async move {
457            // Just close — no output at all.
458            w.shutdown().await.unwrap();
459        });
460
461        let (tx, _rx) = mpsc::unbounded_channel();
462        let result = drain_async_reader(BufReader::new(r), &tx)
463            .await
464            .expect("drain ok");
465        // Default-empty result — the drain itself succeeds; the
466        // caller (drain_and_collect) is responsible for surfacing
467        // the "no Done event" as an error since it depends on
468        // exit status semantics.
469        assert!(result.response.is_empty());
470        assert_eq!(result.cost_usd, 0.0);
471    }
472
473    #[tokio::test]
474    async fn drain_async_reader_handles_multi_line_chunks() {
475        // BufReader's read_line is well-defined; this just guards
476        // against future regressions where someone might switch to a
477        // chunked reader.
478        let bytes = synthetic_stream("multi", 0.01);
479        let (mut w, r) = tokio::io::duplex(4096);
480        tokio::spawn(async move {
481            // Write in small chunks to exercise the read path.
482            for chunk in bytes.chunks(7) {
483                w.write_all(chunk).await.unwrap();
484            }
485            w.shutdown().await.unwrap();
486        });
487
488        let (tx, _rx) = mpsc::unbounded_channel();
489        let result = drain_async_reader(BufReader::new(r), &tx)
490            .await
491            .expect("drain ok");
492        assert_eq!(result.response, "multi");
493    }
494
495    #[tokio::test]
496    async fn drain_handles_malformed_json_gracefully() {
497        let mut bytes: Vec<u8> =
498            b"{this is not valid json\n{\"type\":\"unknown\"}\n".to_vec();
499        bytes.extend_from_slice(&synthetic_stream("recovered", 0.0));
500
501        let (mut w, r) = tokio::io::duplex(4096);
502        tokio::spawn(async move {
503            w.write_all(&bytes).await.unwrap();
504            w.shutdown().await.unwrap();
505        });
506
507        let (tx, _rx) = mpsc::unbounded_channel();
508        let result = drain_async_reader(BufReader::new(r), &tx)
509            .await
510            .expect("drain ok");
511        assert_eq!(result.response, "recovered");
512    }
513
514    #[tokio::test]
515    #[ignore = "requires `claude` CLI on PATH; run manually for end-to-end"]
516    async fn run_agent_end_to_end_with_real_claude() {
517        // Manual smoke: AGENTMUX_CLAUDE_BIN=/path/to/claude
518        // cargo test -p agentmux-srv -- --ignored
519        //     run_agent_end_to_end_with_real_claude
520        let (tx, mut rx) = mpsc::unbounded_channel();
521        let handle = run_agent(
522            AgentRef::default(),
523            AgentTask {
524                prompt: "What is 2+2? Respond with just the number.".to_string(),
525                context: serde_json::Map::new(),
526                max_turns: None,
527            },
528            tx,
529        )
530        .await
531        .expect("spawn ok");
532
533        // Drain events until done.
534        while let Some(_ev) = rx.recv().await {}
535
536        let result = handle
537            .final_result
538            .await
539            .expect("oneshot ok")
540            .expect("agent run ok");
541        assert!(result.response.contains('4'), "got: {}", result.response);
542        assert!(result.cost_usd > 0.0);
543    }
544
545    #[tokio::test]
546    async fn run_agent_with_bin_surfaces_spawn_failure() {
547        // Inject a known-nonexistent binary path so the spawn fails
548        // deterministically. Verifies the AgentError::Spawn path
549        // without touching env vars (set_var is unsound under
550        // concurrent test execution in Rust 1.81+).
551        let (tx, _rx) = mpsc::unbounded_channel();
552        let result = run_agent_with_bin(
553            "/definitely/does/not/exist/claude-xyz-test",
554            AgentRef::default(),
555            AgentTask {
556                prompt: "hi".to_string(),
557                context: serde_json::Map::new(),
558                max_turns: None,
559            },
560            tx,
561        )
562        .await;
563        match result {
564            Err(AgentError::Spawn(msg)) => {
565                assert!(
566                    msg.contains("spawn") || msg.contains("does/not/exist"),
567                    "spawn error message should reference the failure; got: {msg}"
568                );
569            }
570            Err(other) => panic!("expected Spawn error, got: {other}"),
571            Ok(_) => panic!("expected Spawn error, got Ok(handle)"),
572        }
573    }
574
575    /// End-to-end (Unix): a stub binary that writes a rate-limit line to
576    /// stderr and exits 1 must produce a *classified* failure naming the
577    /// cause and including the stderr tail — not a bare "exit 1".
578    /// Exercises G1 (stderr retained) + G2 (classify) of
579    /// SPEC_AGENT_FAILURE_DIAGNOSTICS.
580    #[cfg(unix)]
581    #[tokio::test]
582    async fn classified_failure_surfaces_cause_and_stderr() {
583        use std::io::Write as _;
584        use std::os::unix::fs::PermissionsExt as _;
585
586        let path = std::env::temp_dir()
587            .join(format!("amux-stub-claude-{}.sh", uuid::Uuid::new_v4()));
588        {
589            let mut f = std::fs::File::create(&path).unwrap();
590            writeln!(f, "#!/bin/sh").unwrap();
591            writeln!(
592                f,
593                "echo 'API Error: Server is temporarily limiting requests (not your usage limit) · Rate limited' >&2"
594            )
595            .unwrap();
596            writeln!(f, "exit 1").unwrap();
597            let mut perms = std::fs::metadata(&path).unwrap().permissions();
598            perms.set_mode(0o755);
599            std::fs::set_permissions(&path, perms).unwrap();
600        }
601
602        let (tx, _rx) = mpsc::unbounded_channel();
603        let handle = run_agent_with_bin(
604            path.to_str().unwrap(),
605            AgentRef::default(),
606            AgentTask {
607                prompt: "hi".to_string(),
608                context: serde_json::Map::new(),
609                max_turns: None,
610            },
611            tx,
612        )
613        .await
614        .expect("spawn ok");
615
616        let err = handle
617            .final_result
618            .await
619            .expect("oneshot ok")
620            .expect_err("run should fail");
621
622        assert!(
623            err.contains("Rate-limited"),
624            "explanation should name the class; got: {err}"
625        );
626        assert!(
627            err.to_lowercase().contains("rate limited"),
628            "stderr tail should be included; got: {err}"
629        );
630        assert!(
631            err.contains("retryable"),
632            "rate-limit is retryable; got: {err}"
633        );
634
635        let _ = std::fs::remove_file(&path);
636    }
637
638    /// End-to-end (Unix): stderr larger than the tail cap with the real
639    /// error on the LAST line must still classify correctly — the
640    /// rolling tail keeps the end, not a prefix. Regression test for
641    /// codex P2 on #1353.
642    #[cfg(unix)]
643    #[tokio::test]
644    async fn classified_failure_reads_error_past_stderr_cap() {
645        use std::io::Write as _;
646        use std::os::unix::fs::PermissionsExt as _;
647
648        let path = std::env::temp_dir()
649            .join(format!("amux-stub-claude-big-{}.sh", uuid::Uuid::new_v4()));
650        {
651            let mut f = std::fs::File::create(&path).unwrap();
652            writeln!(f, "#!/bin/sh").unwrap();
653            // ~70 KiB of filler (> 64 KiB tail cap), THEN the real error.
654            writeln!(f, "head -c 70000 /dev/zero | tr '\\0' x >&2").unwrap();
655            writeln!(
656                f,
657                "printf '\\nAPI Error: Server is temporarily limiting requests (not your usage limit) Rate limited\\n' >&2"
658            )
659            .unwrap();
660            writeln!(f, "exit 1").unwrap();
661            let mut perms = std::fs::metadata(&path).unwrap().permissions();
662            perms.set_mode(0o755);
663            std::fs::set_permissions(&path, perms).unwrap();
664        }
665
666        let (tx, _rx) = mpsc::unbounded_channel();
667        let handle = run_agent_with_bin(
668            path.to_str().unwrap(),
669            AgentRef::default(),
670            AgentTask {
671                prompt: "hi".to_string(),
672                context: serde_json::Map::new(),
673                max_turns: None,
674            },
675            tx,
676        )
677        .await
678        .expect("spawn ok");
679
680        let err = handle
681            .final_result
682            .await
683            .expect("oneshot ok")
684            .expect_err("run should fail");
685
686        assert!(
687            err.contains("Rate-limited"),
688            "must classify from the tail past the cap; got: {}",
689            err.chars().take(160).collect::<String>()
690        );
691
692        let _ = std::fs::remove_file(&path);
693    }
694
695    #[test]
696    fn rolling_tail_keeps_last_bytes_past_cap() {
697        let cap = 8;
698        let mut buf = Vec::new();
699        for i in 0..100u8 {
700            append_capped_tail(&mut buf, &[i], cap);
701            assert!(buf.len() <= cap * 2, "rolling buffer must stay bounded");
702        }
703        trim_to_tail(&mut buf, cap);
704        assert_eq!(buf, vec![92, 93, 94, 95, 96, 97, 98, 99]);
705    }
706
707    #[test]
708    fn rolling_tail_single_large_chunk() {
709        let cap = 4;
710        let mut buf = Vec::new();
711        append_capped_tail(&mut buf, b"abcdefghij", cap);
712        trim_to_tail(&mut buf, cap);
713        assert_eq!(&buf, b"ghij");
714    }
715
716    #[tokio::test]
717    async fn drain_captures_error_result_frame() {
718        // An error result frame on stdout must be captured so the
719        // collector can fail the run even on exit 0. codex P1 #1353.
720        let bytes =
721            b"{\"type\":\"result\",\"is_error\":true,\"subtype\":\"error_during_execution\",\"error\":{\"message\":\"overloaded_error\"}}\n"
722                .to_vec();
723        let (mut w, r) = tokio::io::duplex(4096);
724        tokio::spawn(async move {
725            w.write_all(&bytes).await.unwrap();
726            w.shutdown().await.unwrap();
727        });
728        let (tx, _rx) = mpsc::unbounded_channel();
729        let result = drain_async_reader(BufReader::new(r), &tx)
730            .await
731            .expect("drain ok");
732        assert!(
733            result.error_frame.is_some(),
734            "error result frame should be captured"
735        );
736    }
737
738    /// End-to-end (Unix): claude can report an error on stdout and still
739    /// exit 0; the runner must NOT treat that as success. codex P1 #1353.
740    #[cfg(unix)]
741    #[tokio::test]
742    async fn stdout_error_result_with_exit_zero_is_a_failure() {
743        use std::io::Write as _;
744        use std::os::unix::fs::PermissionsExt as _;
745
746        let path = std::env::temp_dir()
747            .join(format!("amux-stub-claude-okerr-{}.sh", uuid::Uuid::new_v4()));
748        {
749            let mut f = std::fs::File::create(&path).unwrap();
750            writeln!(f, "#!/bin/sh").unwrap();
751            // Emit an error result frame on stdout, then exit 0. The JSON
752            // lives in a `let` so its braces are data, not writeln! format
753            // placeholders (no escaping, no print_literal lint).
754            let frame = r#"{"type":"result","is_error":true,"subtype":"error_during_execution","error":{"message":"overloaded_error: upstream busy"}}"#;
755            writeln!(f, "echo '{frame}'").unwrap();
756            writeln!(f, "exit 0").unwrap();
757            let mut perms = std::fs::metadata(&path).unwrap().permissions();
758            perms.set_mode(0o755);
759            std::fs::set_permissions(&path, perms).unwrap();
760        }
761
762        let (tx, _rx) = mpsc::unbounded_channel();
763        let handle = run_agent_with_bin(
764            path.to_str().unwrap(),
765            AgentRef::default(),
766            AgentTask {
767                prompt: "hi".to_string(),
768                context: serde_json::Map::new(),
769                max_turns: None,
770            },
771            tx,
772        )
773        .await
774        .expect("spawn ok");
775
776        let err = handle
777            .final_result
778            .await
779            .expect("oneshot ok")
780            .expect_err("stdout-reported error with exit 0 must fail");
781        assert!(
782            err.to_lowercase().contains("overloaded"),
783            "should classify the stdout error; got: {err}"
784        );
785
786        let _ = std::fs::remove_file(&path);
787    }
788
789    #[test]
790    fn agent_task_max_turns_field_round_trips() {
791        // Reagent P1 + codex P2 on PR #834: the max_turns field is
792        // forwarded to the subprocess via `--max-turns N`. We can't
793        // assert the actual CLI argument here without spawning, but
794        // we can verify the field flows through AgentTask's
795        // serde + Clone path without loss — the subprocess wiring
796        // is exercised by the end-to-end test.
797        let task = AgentTask {
798            prompt: "x".into(),
799            context: serde_json::Map::new(),
800            max_turns: Some(7),
801        };
802        let v = serde_json::to_value(&task).unwrap();
803        assert_eq!(v["maxTurns"], json!(7));
804        let back: AgentTask = serde_json::from_value(v).unwrap();
805        assert_eq!(back.max_turns, Some(7));
806    }
807
808    #[test]
809    fn agent_run_result_has_sensible_defaults() {
810        let r = AgentRunResult::default();
811        assert_eq!(r.response, "");
812        assert_eq!(r.cost_usd, 0.0);
813        assert!(r.transcript.is_empty());
814        let _ = json!(r); // serializes without panic
815    }
816}
agentmux_srv\agents/runner.rs

agentmux_srv\agents/
runner.rs