agentmux_launcher/
state.rs

1// Copyright 2026, AgentMux Corp.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Phase B.3 launcher state. Held inside Arc<Mutex<...>> by the IPC
5// server; mutated only by the pure reducer (`crate::reducer::update`).
6// Mutex is held for microseconds at a time — never across an .await
7// boundary, never across I/O. Mirrors the Elm/Redux pattern: state
8// is data, transitions are functions, side effects fire after the
9// state mutation commits.
10//
11// What's here in B.3:
12//   * `LifecyclePhase` (re-exported from agentmux-common::ipc so the
13//     wire and internal types are the same enum)
14//   * `ProcessRecord` — pid, kind, state, spawned_at
15//   * `ProcessState` — Spawning / Running / Exited
16//   * `State` — top-level container: lifecycle + process map +
17//     monotonic version counter + monotonic client_id counter
18//
19// What's intentionally NOT here yet:
20//   * Window state machine (B.4–B.5)
21//   * Warm-pool (B.5)
22//   * Event log ring buffer (B.4 — added when events first start
23//     accumulating beyond handshake replies)
24
25use std::collections::{HashMap, HashSet};
26
27use agentmux_common::ipc::{ClientKind, Rect, WindowKind};
28pub use agentmux_common::ipc::LifecyclePhase;
29
30/// Lifecycle of a single process the launcher knows about. The
31/// reducer transitions through these in order — there's no skipping
32/// (Spawning → Running → Exited).
33#[derive(Debug, Clone, Copy, PartialEq, Eq)]
34pub enum ProcessState {
35    /// Spawn issued; child handle returned but process hasn't
36    /// confirmed it's alive yet. In B.3 we transition straight to
37    /// Running on Register because that's the first authoritative
38    /// signal. B.4+ adds intermediate state for "spawned but not
39    /// yet registered." F.7 cleanup audit: the variant is reserved
40    /// for that future intermediate state; keep with allow rather
41    /// than delete to preserve the documented state machine shape.
42    #[allow(dead_code)]
43    Spawning,
44    /// Process has registered with the launcher and is doing its
45    /// work. Healthy.
46    Running,
47    /// Process exited (clean Goodbye → code=0, crash → non-zero).
48    Exited { code: i32 },
49}
50
51/// One process in the launcher's canonical view. Updated by the
52/// reducer; read by IPC handlers + the eventual `--diag` printer.
53///
54/// F.7 cleanup audit: `pid` and `spawned_at` are written by
55/// `handle_register` but never read at runtime. They're carried for
56/// the future `--diag launcher` printer (alongside `version` /
57/// `kind`) and for Debug derivations in tests and crash dumps. Keep
58/// with allow rather than delete — losing them now means rebuilding
59/// the diag printer from scratch.
60#[derive(Debug, Clone)]
61pub struct ProcessRecord {
62    #[allow(dead_code)]
63    pub pid: u32,
64    pub kind: ClientKind,
65    pub state: ProcessState,
66    /// RFC3339 timestamp of the spawn (or first-register, whichever
67    /// the launcher learned about first).
68    #[allow(dead_code)]
69    pub spawned_at: String,
70    /// Free-form version string of the registered binary. For log
71    /// correlation across version skew during a Phase B rollout.
72    #[allow(dead_code)]
73    pub version: String,
74}
75
76/// Phase B.4 read-only mirror of one host-owned window. The launcher
77/// learns about windows via `Command::ReportWindowOpened`; the host
78/// remains authoritative until B.5 flips the direction. `opened_at`
79/// is the launcher's clock at the time the report arrived (RFC3339)
80/// — useful for correlating launcher logs with host logs across
81/// version skew.
82///
83/// Phase B.9.1 (WRR) — the observability axis (`hwnd`, `visible`,
84/// `iconic`, `last_rect`, `last_foreground_at_ms`,
85/// `foregrounded_since_open`) is populated by the host's Win32
86/// event hooks. Pre-B.9 these all sit at default values, which is
87/// fine: the WRR drift checks only fire when at least one of the
88/// `ReportHwnd*` Commands arrives, and they don't arrive in
89/// `task dev` mode (no launcher → no IPC) or installed mode until
90/// the host-side hooks land. Existing reducer paths
91/// (`ReportWindowOpened`, `ReportWindowClosed`, etc.) ignore these
92/// fields entirely.
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub struct WindowMirror {
95    pub label: String,
96    pub kind: WindowKind,
97    /// Set only for `Subwindow`; identifies the FullInstance that
98    /// owns this window so the eventual cascade-close logic (B.5)
99    /// has the parent linkage.
100    pub parent_label: Option<String>,
101    pub opened_at: String,
102    /// Milliseconds-since-launcher-start at which the host's
103    /// `ReportWindowOpened` arrived. Used by `apply_hwnd_visibility_changed`
104    /// to suppress `HiddenSinceOpen` drift during the post-create
105    /// placement grace window — CEF creates windows hidden, places
106    /// them, then shows them, and the intermediate `WM_HIDE` events
107    /// would otherwise fire spurious drift on every fresh window.
108    pub opened_at_ms: u64,
109    /// Phase B.9.1 — Win32 HWND linked to this label by the WRR
110    /// reducer arm (via `ReportHwndOpened` with matching
111    /// `label_hint` or via the post-hoc reconciliation against
112    /// `pending_hwnds`). `None` until the host's
113    /// `ReportHwndOpened` for this label arrives.
114    pub hwnd: Option<u64>,
115    /// Phase B.9.1 — last-known `IsWindowVisible` state.
116    pub visible: bool,
117    /// Phase B.9.1 — last-known minimized state.
118    pub iconic: bool,
119    /// Phase B.9.1 — last-known window rect. `None` until the
120    /// first `ReportHwndPositionChanged` arrives for the linked
121    /// HWND.
122    pub last_rect: Option<Rect>,
123    /// Phase B.9.1 — milliseconds-since-launcher-start of the most
124    /// recent `ReportHwndForegroundChanged` for this label's HWND.
125    /// `None` if the window has never been foregrounded.
126    pub last_foreground_at_ms: Option<u64>,
127    /// Phase B.9.1 — has this label been foregrounded at any point
128    /// since its `ReportWindowOpened`? Used to fire `HiddenSinceOpen`
129    /// drift on the first hide event when this is still false.
130    pub foregrounded_since_open: bool,
131    /// Drift-storm fix: `HiddenSinceOpen` / `OffMonitor` /
132    /// `CorrectiveWindowMove` each fire AT MOST ONCE per window per
133    /// session. Without these caps, a fresh top-level window that
134    /// goes through several SetWindowPos transitions during host
135    /// placement re-emits the same event for every intermediate
136    /// snapshot — observed up to 170 events in 1 second, exhausting
137    /// the renderer's V8 stack and crashing it. The cap fires once;
138    /// subscribers still see the signal, no storm.
139    ///
140    /// `OffMonitor` shares the same risk as `HiddenSinceOpen` because
141    /// `apply_hwnd_position_changed` fires per WM_MOVE — dragging an
142    /// already-off-monitor window emits drift on every pixel.
143    /// `CorrectiveWindowMove` rides with it (fires per position
144    /// change while `!foregrounded_since_open`).
145    ///
146    /// All three flags are monotonic for a window's lifetime: once
147    /// true, never reset (preserve via OR-with-prior on duplicate
148    /// `ReportWindowOpened`, codex P2 PR #708 round 3).
149    ///
150    /// See `docs/specs/ANALYSIS_DRIFT_STORM_RENDERER_CRASH_2026-05-06.md`
151    /// for storm context.
152    pub hidden_since_open_emitted: bool,
153    /// Set when `apply_hwnd_visibility_changed` sees `visible=false`
154    /// during the placement grace window. Marks "we suppressed a
155    /// hide; if it persists past the grace, drift fires on the next
156    /// reducer call via `drain_deferred_hidden_since_open`". Cleared
157    /// when the window subsequently becomes visible or is foregrounded.
158    /// Without this, a window that goes hidden during grace and
159    /// receives no further visibility events would permanently lose
160    /// its `HiddenSinceOpen` drift signal (codex P2 PR #725 round 1).
161    pub hidden_since_open_deferred: bool,
162    /// See `hidden_since_open_emitted` doc above.
163    pub off_monitor_drift_emitted: bool,
164    /// See `hidden_since_open_emitted` doc above.
165    pub corrective_window_move_emitted: bool,
166}
167
168/// Top-level launcher state. Single Arc<Mutex<State>> owned by the
169/// IPC server; passed into `update(state, cmd, conn)` for every
170/// incoming command.
171#[derive(Debug)]
172pub struct State {
173    pub lifecycle: LifecyclePhase,
174    /// Keyed by PID. Multiple records per PID would be a bug — the
175    /// reducer enforces unique-pid on insert.
176    pub processes: HashMap<u32, ProcessRecord>,
177    /// Read-only window mirror (Phase B.4). Keyed by label. Source of
178    /// truth still lives in `agentmux-cef::AppState.browsers` /
179    /// `window_meta`; this is a passive copy fed by host
180    /// `ReportWindow*` commands. B.5 inverts the dependency: host
181    /// queries this map instead of maintaining its own.
182    pub windows: HashMap<String, WindowMirror>,
183    /// Phase B.4 follow-up — pre-warmed pool inventory. Tracked
184    /// separately from `windows` because pool entries are not
185    /// user-visible until promote. On promote the host emits
186    /// `ReportPoolWindowRemoved` + `ReportWindowOpened` so the same
187    /// label transitions atomically (from launcher's perspective)
188    /// from `pool` to `windows`. On pre-promote destroy: only
189    /// `ReportPoolWindowRemoved`.
190    pub pool: HashSet<String>,
191    /// Phase B.5 — authoritative window instance registry. Maps
192    /// label → sequential instance number (1 for "main", 2 for the
193    /// second window opened, etc.). Numbers are never reused within
194    /// a launcher run — when a window closes the entry is removed
195    /// but `next_instance_num` keeps advancing. Sole source of truth
196    /// post-B.5e (host's `WindowInstanceRegistry` was deleted in
197    /// PR #584); host holds a passive shadow projection in
198    /// `agentmux-cef::AppState.shadow_instance_registry`. Updated by
199    /// the same reducer paths that mutate `windows`.
200    pub instance_registry: HashMap<String, u32>,
201    /// Next instance number to assign. Starts at 2 — "main" is
202    /// pre-seeded with 1 in `Default` (matching host's
203    /// `WindowInstanceRegistry::new` behavior so a synthetic main
204    /// open wouldn't collide).
205    pub next_instance_num: u32,
206    /// Phase B.5 (window_id_map step a) — authoritative
207    /// label → backend window ID map. Mirrors host's existing
208    /// `agentmux-cef::AppState.window_id_map`. Populated by
209    /// `Command::ReportBackendWindowIdRegistered` (sent from host
210    /// when the frontend calls `register_backend_window` IPC
211    /// after init); drained by `ReportBackendWindowIdUnregistered`
212    /// on close. Will become host-side authoritative through the
213    /// standard a→b→c→d→e ratchet.
214    pub backend_window_ids: HashMap<String, String>,
215    /// Monotonic counter for `Event.version`. Bumped by `bump_version()`.
216    pub event_version: u64,
217    /// Monotonic counter for client_id (returned in Registered events).
218    pub next_client_id: u64,
219    /// Phase B.9.1 (WRR) — current monitor topology, replaced
220    /// wholesale on `ReportMonitorTopologyChanged`. Empty by default
221    /// until the host's `wrr/wndproc.rs` reports the first
222    /// `WM_DISPLAYCHANGE`-equivalent (or its initial topology probe
223    /// at startup). `OffMonitor` drift is suppressed when this is
224    /// empty — we don't know enough to classify yet.
225    pub monitors: Vec<Rect>,
226    /// Phase B.9.1 — HWNDs the reducer has seen via `ReportHwndOpened`
227    /// but couldn't yet associate with a `WindowMirror`. Three
228    /// reasons an entry lives here transiently:
229    ///   1. The OS create event raced ahead of the host's
230    ///      `OnAfterCreated` → `ReportWindowOpened` chain.
231    ///   2. The host couldn't determine `label_hint` at create time.
232    ///   3. The HWND belongs to a pool window not yet promoted.
233    /// Drained on each `ReportWindowOpened` (we try to match a
234    /// pending HWND by `label_hint`/timing). Anything still here
235    /// after a follow-up event is classified as `HwndWithoutBrowser`.
236    pub pending_hwnds: HashMap<u64, PendingHwnd>,
237    /// Drift-storm fix (PR #708 round 3) — labels for which the host
238    /// emitted `ReportPoolWindowPromoted` but the corresponding
239    /// `ReportWindowOpened` hasn't arrived yet. The actual host emit
240    /// order on tear-off is `ReportPoolWindowRemoved` →
241    /// `ReportPoolWindowPromoted` → `ReportWindowOpened`, so at
242    /// promote-time the launcher has NO `WindowMirror` for the label
243    /// — the mirror is created by `ReportWindowOpened` a few ms
244    /// later. Without this set, the post-promote mirror is initialized
245    /// with `foregrounded_since_open: false`, the open-transient drift
246    /// detector then fires `HiddenSinceOpen` on every visible→hidden
247    /// flicker during HWND repositioning, the host fans each event
248    /// out across the bridge and the renderer's V8 isolate crashes.
249    /// `ReportWindowOpened` consumes the entry to initialize the new
250    /// mirror with `foregrounded_since_open: true`. Removed on
251    /// `ReportWindowClosed` if open never arrived (bounded leak).
252    /// See `docs/specs/ANALYSIS_DRIFT_STORM_RENDERER_CRASH_2026-05-06.md`.
253    pub just_promoted_labels: HashSet<String>,
254    // F.7 cleanup audit: removed unused `launcher_start_ms: Option<u64>`
255    // field. It was set in Default but no reducer arm or consumer
256    // ever read it — the WRR observability path uses the OnceLock-
257    // backed `launcher_start_ms()` helper in `ipc::server` directly,
258    // not a state field. Genuine leftover from the early B.9.1 sketch.
259}
260
261/// Phase B.9.1 — transient HWND record held until the reducer can
262/// link it to a `WindowMirror`. See `State::pending_hwnds`.
263#[derive(Debug, Clone, PartialEq, Eq)]
264pub struct PendingHwnd {
265    pub class_name: String,
266    pub title: String,
267    pub label_hint: Option<String>,
268    /// Milliseconds-since-launcher-start when the
269    /// `ReportHwndOpened` arrived. Used to age out pending entries
270    /// — if we still have it when a *different* event arrives that
271    /// should have caused reconciliation, classify as
272    /// `HwndWithoutBrowser`.
273    pub arrived_at_ms: u64,
274}
275
276impl Default for State {
277    fn default() -> Self {
278        Self {
279            lifecycle: LifecyclePhase::Starting,
280            processes: HashMap::new(),
281            windows: HashMap::new(),
282            pool: HashSet::new(),
283            instance_registry: {
284                let mut m = HashMap::new();
285                m.insert("main".to_string(), 1);
286                m
287            },
288            next_instance_num: 2,
289            backend_window_ids: HashMap::new(),
290            event_version: 0,
291            next_client_id: 1,
292            monitors: Vec::new(),
293            pending_hwnds: HashMap::new(),
294            just_promoted_labels: HashSet::new(),
295        }
296    }
297}
298
299impl State {
300    /// Bump and return the new event version. Always called inside
301    /// the reducer when constructing an Event so version numbers
302    /// stay strictly monotonic.
303    ///
304    /// Strict (non-wrapping) add: Phase D's GetSnapshot resync
305    /// protocol relies on monotonicity (`event.version >
306    /// snapshot.version`), and a wrap to 0 would silently break
307    /// that contract. u64 at one event/ns would take 584 years to
308    /// overflow — never going to happen in practice; if it ever
309    /// does, the panic is the right failure mode.
310    /// (gemini MEDIUM PR #574 round-1.)
311    pub fn bump_version(&mut self) -> u64 {
312        self.event_version += 1;
313        self.event_version
314    }
315
316    /// Bump and return the next client_id. Client IDs are stable
317    /// per launcher run; not persisted across restart. Same strict-
318    /// add reasoning as bump_version.
319    pub fn alloc_client_id(&mut self) -> u64 {
320        let id = self.next_client_id;
321        self.next_client_id += 1;
322        id
323    }
324}