agentmux_launcher/state.rs
1// Copyright 2026, AgentMux Corp.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Phase B.3 launcher state. Held inside Arc<Mutex<...>> by the IPC
5// server; mutated only by the pure reducer (`crate::reducer::update`).
6// Mutex is held for microseconds at a time — never across an .await
7// boundary, never across I/O. Mirrors the Elm/Redux pattern: state
8// is data, transitions are functions, side effects fire after the
9// state mutation commits.
10//
11// What's here in B.3:
12// * `LifecyclePhase` (re-exported from agentmux-common::ipc so the
13// wire and internal types are the same enum)
14// * `ProcessRecord` — pid, kind, state, spawned_at
15// * `ProcessState` — Spawning / Running / Exited
16// * `State` — top-level container: lifecycle + process map +
17// monotonic version counter + monotonic client_id counter
18//
19// What's intentionally NOT here yet:
20// * Window state machine (B.4–B.5)
21// * Warm-pool (B.5)
22// * Event log ring buffer (B.4 — added when events first start
23// accumulating beyond handshake replies)
24
25use std::collections::{HashMap, HashSet};
26
27use agentmux_common::ipc::{ClientKind, Rect, WindowKind};
28pub use agentmux_common::ipc::LifecyclePhase;
29
30/// Lifecycle of a single process the launcher knows about. The
31/// reducer transitions through these in order — there's no skipping
32/// (Spawning → Running → Exited).
33#[derive(Debug, Clone, Copy, PartialEq, Eq)]
34pub enum ProcessState {
35 /// Spawn issued; child handle returned but process hasn't
36 /// confirmed it's alive yet. In B.3 we transition straight to
37 /// Running on Register because that's the first authoritative
38 /// signal. B.4+ adds intermediate state for "spawned but not
39 /// yet registered." F.7 cleanup audit: the variant is reserved
40 /// for that future intermediate state; keep with allow rather
41 /// than delete to preserve the documented state machine shape.
42 #[allow(dead_code)]
43 Spawning,
44 /// Process has registered with the launcher and is doing its
45 /// work. Healthy.
46 Running,
47 /// Process exited (clean Goodbye → code=0, crash → non-zero).
48 Exited { code: i32 },
49}
50
51/// One process in the launcher's canonical view. Updated by the
52/// reducer; read by IPC handlers + the eventual `--diag` printer.
53///
54/// F.7 cleanup audit: `pid` and `spawned_at` are written by
55/// `handle_register` but never read at runtime. They're carried for
56/// the future `--diag launcher` printer (alongside `version` /
57/// `kind`) and for Debug derivations in tests and crash dumps. Keep
58/// with allow rather than delete — losing them now means rebuilding
59/// the diag printer from scratch.
60#[derive(Debug, Clone)]
61pub struct ProcessRecord {
62 #[allow(dead_code)]
63 pub pid: u32,
64 pub kind: ClientKind,
65 pub state: ProcessState,
66 /// RFC3339 timestamp of the spawn (or first-register, whichever
67 /// the launcher learned about first).
68 #[allow(dead_code)]
69 pub spawned_at: String,
70 /// Free-form version string of the registered binary. For log
71 /// correlation across version skew during a Phase B rollout.
72 #[allow(dead_code)]
73 pub version: String,
74}
75
76/// Phase B.4 read-only mirror of one host-owned window. The launcher
77/// learns about windows via `Command::ReportWindowOpened`; the host
78/// remains authoritative until B.5 flips the direction. `opened_at`
79/// is the launcher's clock at the time the report arrived (RFC3339)
80/// — useful for correlating launcher logs with host logs across
81/// version skew.
82///
83/// Phase B.9.1 (WRR) — the observability axis (`hwnd`, `visible`,
84/// `iconic`, `last_rect`, `last_foreground_at_ms`,
85/// `foregrounded_since_open`) is populated by the host's Win32
86/// event hooks. Pre-B.9 these all sit at default values, which is
87/// fine: the WRR drift checks only fire when at least one of the
88/// `ReportHwnd*` Commands arrives, and they don't arrive in
89/// `task dev` mode (no launcher → no IPC) or installed mode until
90/// the host-side hooks land. Existing reducer paths
91/// (`ReportWindowOpened`, `ReportWindowClosed`, etc.) ignore these
92/// fields entirely.
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub struct WindowMirror {
95 pub label: String,
96 pub kind: WindowKind,
97 /// Set only for `Subwindow`; identifies the FullInstance that
98 /// owns this window so the eventual cascade-close logic (B.5)
99 /// has the parent linkage.
100 pub parent_label: Option<String>,
101 pub opened_at: String,
102 /// Milliseconds-since-launcher-start at which the host's
103 /// `ReportWindowOpened` arrived. Used by `apply_hwnd_visibility_changed`
104 /// to suppress `HiddenSinceOpen` drift during the post-create
105 /// placement grace window — CEF creates windows hidden, places
106 /// them, then shows them, and the intermediate `WM_HIDE` events
107 /// would otherwise fire spurious drift on every fresh window.
108 pub opened_at_ms: u64,
109 /// Phase B.9.1 — Win32 HWND linked to this label by the WRR
110 /// reducer arm (via `ReportHwndOpened` with matching
111 /// `label_hint` or via the post-hoc reconciliation against
112 /// `pending_hwnds`). `None` until the host's
113 /// `ReportHwndOpened` for this label arrives.
114 pub hwnd: Option<u64>,
115 /// Phase B.9.1 — last-known `IsWindowVisible` state.
116 pub visible: bool,
117 /// Phase B.9.1 — last-known minimized state.
118 pub iconic: bool,
119 /// Phase B.9.1 — last-known window rect. `None` until the
120 /// first `ReportHwndPositionChanged` arrives for the linked
121 /// HWND.
122 pub last_rect: Option<Rect>,
123 /// Phase B.9.1 — milliseconds-since-launcher-start of the most
124 /// recent `ReportHwndForegroundChanged` for this label's HWND.
125 /// `None` if the window has never been foregrounded.
126 pub last_foreground_at_ms: Option<u64>,
127 /// Phase B.9.1 — has this label been foregrounded at any point
128 /// since its `ReportWindowOpened`? Used to fire `HiddenSinceOpen`
129 /// drift on the first hide event when this is still false.
130 pub foregrounded_since_open: bool,
131 /// Drift-storm fix: `HiddenSinceOpen` / `OffMonitor` /
132 /// `CorrectiveWindowMove` each fire AT MOST ONCE per window per
133 /// session. Without these caps, a fresh top-level window that
134 /// goes through several SetWindowPos transitions during host
135 /// placement re-emits the same event for every intermediate
136 /// snapshot — observed up to 170 events in 1 second, exhausting
137 /// the renderer's V8 stack and crashing it. The cap fires once;
138 /// subscribers still see the signal, no storm.
139 ///
140 /// `OffMonitor` shares the same risk as `HiddenSinceOpen` because
141 /// `apply_hwnd_position_changed` fires per WM_MOVE — dragging an
142 /// already-off-monitor window emits drift on every pixel.
143 /// `CorrectiveWindowMove` rides with it (fires per position
144 /// change while `!foregrounded_since_open`).
145 ///
146 /// All three flags are monotonic for a window's lifetime: once
147 /// true, never reset (preserve via OR-with-prior on duplicate
148 /// `ReportWindowOpened`, codex P2 PR #708 round 3).
149 ///
150 /// See `docs/specs/ANALYSIS_DRIFT_STORM_RENDERER_CRASH_2026-05-06.md`
151 /// for storm context.
152 pub hidden_since_open_emitted: bool,
153 /// Set when `apply_hwnd_visibility_changed` sees `visible=false`
154 /// during the placement grace window. Marks "we suppressed a
155 /// hide; if it persists past the grace, drift fires on the next
156 /// reducer call via `drain_deferred_hidden_since_open`". Cleared
157 /// when the window subsequently becomes visible or is foregrounded.
158 /// Without this, a window that goes hidden during grace and
159 /// receives no further visibility events would permanently lose
160 /// its `HiddenSinceOpen` drift signal (codex P2 PR #725 round 1).
161 pub hidden_since_open_deferred: bool,
162 /// See `hidden_since_open_emitted` doc above.
163 pub off_monitor_drift_emitted: bool,
164 /// See `hidden_since_open_emitted` doc above.
165 pub corrective_window_move_emitted: bool,
166}
167
168/// Top-level launcher state. Single Arc<Mutex<State>> owned by the
169/// IPC server; passed into `update(state, cmd, conn)` for every
170/// incoming command.
171#[derive(Debug)]
172pub struct State {
173 pub lifecycle: LifecyclePhase,
174 /// Keyed by PID. Multiple records per PID would be a bug — the
175 /// reducer enforces unique-pid on insert.
176 pub processes: HashMap<u32, ProcessRecord>,
177 /// Read-only window mirror (Phase B.4). Keyed by label. Source of
178 /// truth still lives in `agentmux-cef::AppState.browsers` /
179 /// `window_meta`; this is a passive copy fed by host
180 /// `ReportWindow*` commands. B.5 inverts the dependency: host
181 /// queries this map instead of maintaining its own.
182 pub windows: HashMap<String, WindowMirror>,
183 /// Phase B.4 follow-up — pre-warmed pool inventory. Tracked
184 /// separately from `windows` because pool entries are not
185 /// user-visible until promote. On promote the host emits
186 /// `ReportPoolWindowRemoved` + `ReportWindowOpened` so the same
187 /// label transitions atomically (from launcher's perspective)
188 /// from `pool` to `windows`. On pre-promote destroy: only
189 /// `ReportPoolWindowRemoved`.
190 pub pool: HashSet<String>,
191 /// Phase B.5 — authoritative window instance registry. Maps
192 /// label → sequential instance number (1 for "main", 2 for the
193 /// second window opened, etc.). Numbers are never reused within
194 /// a launcher run — when a window closes the entry is removed
195 /// but `next_instance_num` keeps advancing. Sole source of truth
196 /// post-B.5e (host's `WindowInstanceRegistry` was deleted in
197 /// PR #584); host holds a passive shadow projection in
198 /// `agentmux-cef::AppState.shadow_instance_registry`. Updated by
199 /// the same reducer paths that mutate `windows`.
200 pub instance_registry: HashMap<String, u32>,
201 /// Next instance number to assign. Starts at 2 — "main" is
202 /// pre-seeded with 1 in `Default` (matching host's
203 /// `WindowInstanceRegistry::new` behavior so a synthetic main
204 /// open wouldn't collide).
205 pub next_instance_num: u32,
206 /// Phase B.5 (window_id_map step a) — authoritative
207 /// label → backend window ID map. Mirrors host's existing
208 /// `agentmux-cef::AppState.window_id_map`. Populated by
209 /// `Command::ReportBackendWindowIdRegistered` (sent from host
210 /// when the frontend calls `register_backend_window` IPC
211 /// after init); drained by `ReportBackendWindowIdUnregistered`
212 /// on close. Will become host-side authoritative through the
213 /// standard a→b→c→d→e ratchet.
214 pub backend_window_ids: HashMap<String, String>,
215 /// Monotonic counter for `Event.version`. Bumped by `bump_version()`.
216 pub event_version: u64,
217 /// Monotonic counter for client_id (returned in Registered events).
218 pub next_client_id: u64,
219 /// Phase B.9.1 (WRR) — current monitor topology, replaced
220 /// wholesale on `ReportMonitorTopologyChanged`. Empty by default
221 /// until the host's `wrr/wndproc.rs` reports the first
222 /// `WM_DISPLAYCHANGE`-equivalent (or its initial topology probe
223 /// at startup). `OffMonitor` drift is suppressed when this is
224 /// empty — we don't know enough to classify yet.
225 pub monitors: Vec<Rect>,
226 /// Phase B.9.1 — HWNDs the reducer has seen via `ReportHwndOpened`
227 /// but couldn't yet associate with a `WindowMirror`. Three
228 /// reasons an entry lives here transiently:
229 /// 1. The OS create event raced ahead of the host's
230 /// `OnAfterCreated` → `ReportWindowOpened` chain.
231 /// 2. The host couldn't determine `label_hint` at create time.
232 /// 3. The HWND belongs to a pool window not yet promoted.
233 /// Drained on each `ReportWindowOpened` (we try to match a
234 /// pending HWND by `label_hint`/timing). Anything still here
235 /// after a follow-up event is classified as `HwndWithoutBrowser`.
236 pub pending_hwnds: HashMap<u64, PendingHwnd>,
237 /// Drift-storm fix (PR #708 round 3) — labels for which the host
238 /// emitted `ReportPoolWindowPromoted` but the corresponding
239 /// `ReportWindowOpened` hasn't arrived yet. The actual host emit
240 /// order on tear-off is `ReportPoolWindowRemoved` →
241 /// `ReportPoolWindowPromoted` → `ReportWindowOpened`, so at
242 /// promote-time the launcher has NO `WindowMirror` for the label
243 /// — the mirror is created by `ReportWindowOpened` a few ms
244 /// later. Without this set, the post-promote mirror is initialized
245 /// with `foregrounded_since_open: false`, the open-transient drift
246 /// detector then fires `HiddenSinceOpen` on every visible→hidden
247 /// flicker during HWND repositioning, the host fans each event
248 /// out across the bridge and the renderer's V8 isolate crashes.
249 /// `ReportWindowOpened` consumes the entry to initialize the new
250 /// mirror with `foregrounded_since_open: true`. Removed on
251 /// `ReportWindowClosed` if open never arrived (bounded leak).
252 /// See `docs/specs/ANALYSIS_DRIFT_STORM_RENDERER_CRASH_2026-05-06.md`.
253 pub just_promoted_labels: HashSet<String>,
254 // F.7 cleanup audit: removed unused `launcher_start_ms: Option<u64>`
255 // field. It was set in Default but no reducer arm or consumer
256 // ever read it — the WRR observability path uses the OnceLock-
257 // backed `launcher_start_ms()` helper in `ipc::server` directly,
258 // not a state field. Genuine leftover from the early B.9.1 sketch.
259}
260
261/// Phase B.9.1 — transient HWND record held until the reducer can
262/// link it to a `WindowMirror`. See `State::pending_hwnds`.
263#[derive(Debug, Clone, PartialEq, Eq)]
264pub struct PendingHwnd {
265 pub class_name: String,
266 pub title: String,
267 pub label_hint: Option<String>,
268 /// Milliseconds-since-launcher-start when the
269 /// `ReportHwndOpened` arrived. Used to age out pending entries
270 /// — if we still have it when a *different* event arrives that
271 /// should have caused reconciliation, classify as
272 /// `HwndWithoutBrowser`.
273 pub arrived_at_ms: u64,
274}
275
276impl Default for State {
277 fn default() -> Self {
278 Self {
279 lifecycle: LifecyclePhase::Starting,
280 processes: HashMap::new(),
281 windows: HashMap::new(),
282 pool: HashSet::new(),
283 instance_registry: {
284 let mut m = HashMap::new();
285 m.insert("main".to_string(), 1);
286 m
287 },
288 next_instance_num: 2,
289 backend_window_ids: HashMap::new(),
290 event_version: 0,
291 next_client_id: 1,
292 monitors: Vec::new(),
293 pending_hwnds: HashMap::new(),
294 just_promoted_labels: HashSet::new(),
295 }
296 }
297}
298
299impl State {
300 /// Bump and return the new event version. Always called inside
301 /// the reducer when constructing an Event so version numbers
302 /// stay strictly monotonic.
303 ///
304 /// Strict (non-wrapping) add: Phase D's GetSnapshot resync
305 /// protocol relies on monotonicity (`event.version >
306 /// snapshot.version`), and a wrap to 0 would silently break
307 /// that contract. u64 at one event/ns would take 584 years to
308 /// overflow — never going to happen in practice; if it ever
309 /// does, the panic is the right failure mode.
310 /// (gemini MEDIUM PR #574 round-1.)
311 pub fn bump_version(&mut self) -> u64 {
312 self.event_version += 1;
313 self.event_version
314 }
315
316 /// Bump and return the next client_id. Client IDs are stable
317 /// per launcher run; not persisted across restart. Same strict-
318 /// add reasoning as bump_version.
319 pub fn alloc_client_id(&mut self) -> u64 {
320 let id = self.next_client_id;
321 self.next_client_id += 1;
322 id
323 }
324}