agentmux_cef/main.rs
1// Copyright 2026, AgentMux Corp.
2// SPDX-License-Identifier: Apache-2.0
3//
4// AgentMux Host — Entry point.
5//
6// This binary serves as both the browser process and CEF subprocess
7// (renderer, GPU, utility). Subprocess mode is detected via the --type
8// command-line argument injected by CEF.
9//
10// Phase 2: Includes IPC HTTP server, sidecar management, and command routing.
11//
12// Usage:
13// agentmux-cef # Load default URL (http://localhost:5173)
14// agentmux-cef --url=http://host:port # Load custom URL
15// agentmux-cef --use-native # Use native platform window instead of Views
16// agentmux-cef --use-alloy-style # Use Alloy runtime style
17
18// Hide console window in release mode on Windows (not sandbox).
19#![cfg_attr(
20 all(not(debug_assertions), not(feature = "sandbox"), target_os = "windows"),
21 windows_subsystem = "windows"
22)]
23
24mod app;
25mod browser_api;
26mod browser_panes;
27mod client;
28mod commands;
29mod dev_authfile;
30mod events;
31mod ipc;
32mod launcher_event_bridge;
33mod launcher_ipc;
34mod parent_process;
35mod srv_event_bridge;
36mod srv_ipc;
37mod memory_heartbeat;
38mod browser_pane;
39#[cfg(target_os = "windows")]
40mod floating_pane;
41mod reducer;
42mod saga_dispatch;
43mod sidecar;
44mod state;
45mod ui_tasks;
46mod wrr;
47
48use std::sync::Arc;
49
50use cef::*;
51
52/// Platform-specific type for OS key events in KeyboardHandler.
53/// Matches `cef_event_handle_t` after deref:
54/// - Windows: *mut MSG → MSG
55/// - Linux: *mut XEvent → _XEvent (libX11)
56/// - macOS: *mut c_void → c_void (NSEvent-backed; cef-dll-sys
57/// uses an opaque pointer here)
58#[cfg(target_os = "windows")]
59pub type OsKeyEvent = cef::sys::MSG;
60#[cfg(target_os = "linux")]
61pub type OsKeyEvent = cef::sys::_XEvent;
62#[cfg(target_os = "macos")]
63pub type OsKeyEvent = std::ffi::c_void;
64
65/// Suppress the Windows "Application Error" / WER crash dialog so an unhandled
66/// fault (a Chromium `LOG(FATAL)`, an `abort()`, a breakpoint) terminates the
67/// process immediately instead of wedging it behind a modal the user must
68/// dismiss. While that dialog is up the process is frozen and cannot be
69/// auto-recovered. No-op off Windows. Spec:
70/// docs/specs/SPEC_SERVICE_SUPERVISION_AND_RECOVERY_2026_05_20.md.
71#[cfg(target_os = "windows")]
72fn suppress_os_crash_dialogs() {
73 use windows_sys::Win32::System::Diagnostics::Debug::{SetErrorMode, SEM_FAILCRITICALERRORS};
74 use windows_sys::Win32::System::ErrorReporting::{WerSetFlags, WER_FAULT_REPORTING_NO_UI};
75 // Process-wide; also covers the CEF subprocesses.
76 unsafe {
77 // Suppress the WER crash-dialog UI WITHOUT disabling WER itself —
78 // SEM_NOGPFAULTERRORBOX would also kill WER/LocalDumps crash-dump
79 // collection, the postmortem diagnostics this stability work needs.
80 // WER_FAULT_REPORTING_NO_UI is the documented "no UI, keep
81 // reports" path.
82 let _ = WerSetFlags(WER_FAULT_REPORTING_NO_UI);
83 // SEM_FAILCRITICALERRORS suppresses the critical-error handler
84 // (e.g. "no disk in drive" popups) — unrelated to crash reporting.
85 SetErrorMode(SEM_FAILCRITICALERRORS);
86 }
87}
88
89#[cfg(not(target_os = "windows"))]
90fn suppress_os_crash_dialogs() {}
91
92fn main() {
93 // Phase 0 (service supervision & recovery): suppress the Windows crash
94 // modal so a fault terminates the process immediately instead of freezing
95 // it behind an "Application Error" dialog. Must be the first statement —
96 // set before anything can fault.
97 suppress_os_crash_dialogs();
98
99 // Set the DLL search path so CEF's runtime LoadLibrary calls (chrome_elf,
100 // libEGL, libGLESv2, d3dcompiler_47, …) resolve against the directory that
101 // actually holds libcef.dll. Two layouts exist:
102 //
103 // Portable / installed: <root>/runtime/host.exe + libcef.dll alongside.
104 // The launcher (agentmux-launcher) already sets
105 // the path to <root>/runtime/ before spawning us;
106 // this block is a no-op safety net for that mode.
107 //
108 // Dev (`task dev`): dist/cef-dev/agentmux-cef.exe + libcef.dll
109 // alongside (flat layout). Taskfile launches the
110 // host directly with no launcher, so nothing else
111 // has set the DLL path. Without it, CEF's internal
112 // LoadLibrary chain can fail and `cef::initialize`
113 // returns 0 — the empty-chrome_debug.log mode.
114 //
115 // Fall back to the host's own directory whenever a runtime/ subdir isn't
116 // present. Idempotent in portable mode (launcher set it first), correct
117 // in dev mode.
118 #[cfg(target_os = "windows")]
119 {
120 if let Ok(exe) = std::env::current_exe() {
121 if let Some(dir) = exe.parent() {
122 let runtime_dir = dir.join("runtime");
123 let dll_dir = if runtime_dir.exists() {
124 runtime_dir
125 } else {
126 dir.to_path_buf()
127 };
128 unsafe {
129 use std::os::windows::ffi::OsStrExt;
130 let wide: Vec<u16> = dll_dir.as_os_str().encode_wide().chain(Some(0)).collect();
131 windows_sys::Win32::System::LibraryLoader::SetDllDirectoryW(wide.as_ptr());
132 }
133 }
134 }
135 }
136
137 // Tracing is initialized after the subprocess check below — browser process
138 // gets dual file+stderr output; subprocesses exit before tracing is needed.
139
140 // macOS: load the CEF framework library explicitly.
141 #[cfg(target_os = "macos")]
142 let _library = {
143 let loader =
144 library_loader::LibraryLoader::new(&std::env::current_exe().unwrap(), false);
145 assert!(loader.load(), "Failed to load CEF framework");
146 loader
147 };
148
149 // Initialize the CEF API hash for version verification.
150 let _ = api_hash(sys::CEF_API_VERSION_LAST, 0);
151
152 // Parse command-line arguments.
153 let args = cef::args::Args::new();
154 let Some(cmd_line) = args.as_cmd_line() else {
155 eprintln!("agentmux-cef: Failed to parse command line arguments");
156 std::process::exit(1);
157 };
158
159 // Detect subprocess mode: CEF injects --type=renderer|gpu-process|utility
160 // for child processes. If --type is present, this is a subprocess.
161 let type_switch = CefString::from("type");
162 let is_browser_process = cmd_line.has_switch(Some(&type_switch)) != 1;
163
164 // Execute subprocess if applicable (exits here for non-browser processes).
165 let ret = execute_process(
166 Some(args.as_main_args()),
167 None, // App can be None for subprocess
168 std::ptr::null_mut(),
169 );
170
171 if is_browser_process {
172 // Browser process: execute_process returns -1, we continue with initialization.
173 assert_eq!(ret, -1, "execute_process should return -1 for browser process");
174 } else {
175 // Subprocess: execute_process returns the exit code.
176 let process_type = CefString::from(&cmd_line.switch_value(Some(&type_switch)));
177 eprintln!("agentmux-cef: subprocess exiting: type={}", process_type);
178 assert!(ret >= 0, "execute_process failed for subprocess");
179 std::process::exit(ret);
180 }
181
182 // -----------------------------------------------------------------------
183 // Browser process initialization
184 // -----------------------------------------------------------------------
185
186 // Set the Application User Model ID before any UI is created. This lets
187 // Windows group our windows under one pinned identity and is required for
188 // the `DeleteTab` + per-HWND AppID treatment used by the full-instance /
189 // sub-window model (see docs/specs/SPEC_MULTIWINDOW_TASKBAR_GROUPING.md).
190 // Use a VERSION-STABLE ID — never embed the patch number or pinning forks.
191 #[cfg(target_os = "windows")]
192 unsafe {
193 use windows_sys::Win32::UI::Shell::SetCurrentProcessExplicitAppUserModelID;
194 let aumid: Vec<u16> = "AgentMuxCorp.AgentMux\0".encode_utf16().collect();
195 let _ = SetCurrentProcessExplicitAppUserModelID(aumid.as_ptr());
196 }
197
198 let version = env!("CARGO_PKG_VERSION");
199
200 // Read paths + mode from the launcher-injected env vars. Two
201 // reachable configurations:
202 // a) Launcher-managed startup → env vars present, from_env()
203 // returns Some.
204 // b) Standalone `task dev` → env absent. We re-derive via
205 // `RuntimeMode::current` + `DataPaths::resolve` (symmetric
206 // with sidecar.rs::spawn_backend's fallback so they agree on
207 // the disk layout).
208 let host_exe_dir = std::env::current_exe()
209 .ok()
210 .and_then(|p| p.parent().map(|d| d.to_path_buf()))
211 .unwrap_or_default();
212 // Dev builds NEVER inherit AGENTMUX_* env vars from a parent process.
213 // `task dev` is routinely launched from inside an AgentMux terminal
214 // pane, which means the child host inherits the parent instance's
215 // AGENTMUX_DATA_DIR pointing at the parent's version-isolated dir.
216 // Without this guard the dev build would resolve its data dir to the
217 // running portable's path and trip CEF's process-singleton lock,
218 // routing every "open" back to the existing window — the user would
219 // never see the dev code run. Path-based detection is authoritative
220 // for dev builds; for installed/portable we still honor the
221 // launcher-provided env (it's the launcher's job to publish them).
222 let common_paths = if agentmux_common::is_dev_build_exe(&host_exe_dir) {
223 let mode = agentmux_common::RuntimeMode::current_path_only(&host_exe_dir);
224 // resolve_path_only mirrors current_path_only's env-isolation:
225 // ignore inherited AGENTMUX_CHANNEL so a dev host launched from
226 // inside a parent agentmux instance doesn't redirect into the
227 // parent's channel (would trip the channel single-instance lock).
228 agentmux_common::DataPaths::resolve_path_only(version, &mode).ok()
229 } else {
230 agentmux_common::DataPaths::from_env().or_else(|| {
231 let mode = agentmux_common::RuntimeMode::current(&host_exe_dir);
232 agentmux_common::DataPaths::resolve(version, &mode).ok()
233 })
234 };
235 let is_dev = match &common_paths {
236 Some(p) => matches!(p.mode, agentmux_common::RuntimeMode::Dev { .. }),
237 None => false,
238 };
239
240 let (data_dir, log_dir) = match &common_paths {
241 Some(p) => (p.cef_cache_dir.clone(), p.logs_dir.clone()),
242 None => {
243 // Both env-read AND fallback resolution failed (no home
244 // dir on disk, or platform unsupported). Use a degraded
245 // path so log init at least works; the runtime-startup
246 // check below will surface the underlying error.
247 (
248 std::path::PathBuf::from("."),
249 dirs::home_dir()
250 .unwrap_or_default()
251 .join(".agentmux")
252 .join("logs"),
253 )
254 }
255 };
256 std::fs::create_dir_all(&data_dir).ok();
257
258 // Initialize dual-output tracing: rolling log file + stderr.
259 // The log file guard must live for the entire process to ensure flushing.
260 let _log_guard = init_logging(&log_dir);
261
262 tracing::info!(
263 version,
264 runtime_mode = ?common_paths.as_ref().map(|p| p.mode.to_env_string()),
265 data_dir = %data_dir.display(),
266 log_dir = %log_dir.display(),
267 "Initializing CEF browser process"
268 );
269
270 // Phase B.6 (post-fix): the named-pipe bind in the launcher is
271 // the AUTHORITATIVE single-instance lock — a second launcher
272 // hits ERROR_ACCESS_DENIED and never reaches the host. We still
273 // publish `<launcher-shared-data-dir>/ipc-port` (port:token) so
274 // the second launcher can FORWARD an `open_new_window` request
275 // to the existing instance over HTTP and exit silently — the
276 // legacy forwarding UX users expect when double-clicking the
277 // exe twice. The pipe-bind-first ordering closes the stale-state
278 // defect (gap #8 in
279 // specs/ANALYSIS_WINDOW_PROCESS_STATE_INVENTORY_2026_04_27.md):
280 // a stale ipc-port file from a hard crash is irrelevant on the
281 // FIRST-instance path because pipe-bind succeeds and the file is
282 // overwritten; on the SECOND-instance path the live first
283 // instance wrote a fresh port:token, so forwarding lands.
284 //
285 // CRITICAL: write the port file at the LAUNCHER-shared data dir
286 // (`AGENTMUX_DATA_DIR`, == `paths.data_dir` in the launcher), NOT
287 // the host-local CEF cache dir (`<portable>/data/cef/`). The two
288 // diverge in portable mode (cef cache is one level deeper) and
289 // the launcher's `forward_open_new_window` reads the launcher-
290 // shared path. Falls back to the cef cache dir only when the env
291 // is unset (`task dev` mode without launcher), where forwarding
292 // wouldn't be wired anyway.
293 // Dev builds inherit AGENTMUX_DATA_DIR from the parent pane they were
294 // launched from. Writing ipc-port there would overwrite the parent
295 // instance's port:token and break its single-instance forwarding.
296 // In dev mode there is no launcher so port forwarding isn't wired
297 // anyway — use the dev data dir directly.
298 let port_file_dir = if agentmux_common::is_dev_build_exe(&host_exe_dir) {
299 data_dir.clone()
300 } else {
301 std::env::var_os("AGENTMUX_DATA_DIR")
302 .map(std::path::PathBuf::from)
303 .unwrap_or_else(|| data_dir.clone())
304 };
305 let _ = std::fs::create_dir_all(&port_file_dir);
306 let port_file = port_file_dir.join("ipc-port");
307
308 // Create shared application state.
309 let app_state = Arc::new(state::AppState::default());
310
311 // Start tokio runtime for async operations (IPC server, sidecar management).
312 let runtime = tokio::runtime::Runtime::new().expect("Failed to create tokio runtime");
313
314 // Install the runtime Handle into browser_pane::auth so the
315 // CEF `get_auth_credentials` callback (which runs on CEF's IO
316 // thread) can spawn the parked-auth TTL timer. A bare
317 // `tokio::spawn` there would panic with "there is no reactor
318 // running" because that thread has no `Handle::current()`.
319 browser_pane::auth::set_runtime_handle(runtime.handle().clone());
320
321 // Start the IPC HTTP server and get the assigned port.
322 let ipc_port = runtime.block_on(ipc::start_ipc_server(app_state.clone()));
323 *app_state.ipc_port.lock() = ipc_port;
324
325 tracing::info!("IPC server started on port {}", ipc_port);
326
327 // Phase B.2: connect to launcher's named-pipe IPC (if launcher
328 // is in the loop) so the launcher can route Commands and Events
329 // through us. The handle is held in main scope for the host's
330 // lifetime — dropping it closes the pipe (logged by launcher).
331 // Failure to connect is non-fatal in B.2 (host can still run);
332 // B.5+ will tighten when the host depends on IPC for state.
333 //
334 // Env-isolation guard: a dev build inheriting
335 // `AGENTMUX_LAUNCHER_PIPE` from a parent AgentMux pane (e.g. a
336 // shell inside an active pane that re-invokes the host directly)
337 // would otherwise connect to that parent's launcher pipe and
338 // route its host events into the parent's launcher state.
339 //
340 // Discriminator: connect when our parent process IS the launcher
341 // (production portable, installed build, OR post-#SPEC_LAUNCHER_DEV_INTEGRATION
342 // `task dev` which spawns the host via the launcher). Skip when
343 // it isn't.
344 //
345 // Older path-only guard (`is_dev_build_exe`) over-fired in dev
346 // mode after launcher integration shipped — see
347 // docs/specs/SPEC_DEV_MODE_LAUNCHER_IPC_2026_05_16.md.
348 let parent_is_launcher = parent_process::parent_is_agentmux_launcher();
349 let should_connect_launcher = match parent_is_launcher {
350 Some(true) => true,
351 Some(false) => false,
352 // Parent detection failed — fall back to the path-based guard
353 // so production builds still connect (they would otherwise
354 // silently lose the launcher IPC) and dev builds still skip.
355 None => !agentmux_common::is_dev_build_exe(&host_exe_dir),
356 };
357 let _launcher_ipc = if should_connect_launcher {
358 runtime.block_on(launcher_ipc::connect_to_launcher(app_state.clone()))
359 } else {
360 None
361 };
362
363 // Phase E.2c.5a — connect to the srv reducer's pipe. Forwards
364 // srv events (workspace / tab / block lifecycle) to every
365 // top-level renderer via the JS bridge. Renderer-side handler
366 // (`window.__agentmux_srv_event`) lands in E.2c.5b. Non-fatal
367 // if absent: `AGENTMUX_SRV_PIPE_PATH` is only set on the srv
368 // child by the launcher (`agentmux-launcher/src/srv_spawner.rs`),
369 // not on the host spawn — so today the host never has the env
370 // var and `connect_to_srv` short-circuits to None at
371 // `srv_ipc.rs:62-68`. Path-based dev guard is the right gate
372 // for this branch; restoring full srv-IPC parity in dev needs
373 // the launcher to propagate the env var to the host first.
374 // See spec §11 of SPEC_DEV_MODE_LAUNCHER_IPC_2026_05_16.md.
375 let _srv_ipc = if agentmux_common::is_dev_build_exe(&host_exe_dir) {
376 None
377 } else {
378 runtime.block_on(srv_ipc::connect_to_srv(app_state.clone()))
379 };
380
381 // Phase B.1: if launcher already spawned srv (the normal portable
382 // / installed path post-PR-#570 + B.1), populate state from the
383 // env vars launcher set — no need to re-spawn srv. Falls back to
384 // spawn_backend() ONLY when env vars are absent (`task dev` mode
385 // where the host runs without the launcher).
386 //
387 // Spawn the backend sidecar SYNCHRONOUSLY — block until it
388 // signals ready (AGENTMUXSRV-ESTART) before creating the browser
389 // window. This eliminates the race condition where CEF loads the
390 // frontend before the backend is available, which causes a "raw
391 // browser" appearance on slow machines or first launch.
392 let backend_ready = runtime.block_on(async {
393 // Dev builds inherit AGENTMUX_BACKEND_WS from the parent pane.
394 // Consuming it would connect to the parent's srv instead of
395 // spawning our own, so the dev frontend runs against the wrong
396 // (parent's) backend and no dev-version srv is ever started.
397 let launcher_provided = if agentmux_common::is_dev_build_exe(&host_exe_dir) {
398 None
399 } else {
400 sidecar::use_launcher_endpoints(&app_state)
401 };
402 let result = match launcher_provided {
403 Some(Ok(r)) => {
404 tracing::info!(
405 "Using launcher-provided backend endpoints: ws={} web={} pid={}",
406 r.ws_endpoint,
407 r.web_endpoint,
408 r.instance_id
409 );
410 Ok(r)
411 }
412 Some(Err(e)) => {
413 tracing::error!(
414 "Launcher set AGENTMUX_BACKEND_WS but env was malformed: {} — refusing to fall back to spawn_backend (would fight launcher's srv)",
415 e
416 );
417 Err(e)
418 }
419 None => {
420 tracing::info!("No launcher-provided backend env (dev mode) — spawning srv ourselves");
421 sidecar::spawn_backend(&app_state).await
422 }
423 };
424 match result {
425 Ok(result) => {
426 {
427 let mut endpoints = app_state.backend_endpoints.lock();
428 endpoints.ws_endpoint = result.ws_endpoint.clone();
429 endpoints.web_endpoint = result.web_endpoint.clone();
430 }
431 tracing::info!(
432 "Backend ready: ws={} web={}",
433 result.ws_endpoint,
434 result.web_endpoint
435 );
436 true
437 }
438 Err(e) => {
439 tracing::error!("Failed to set up backend: {}", e);
440 false
441 }
442 }
443 });
444
445 if !backend_ready {
446 tracing::error!("Backend failed to start — exiting");
447 std::process::exit(1);
448 }
449
450 // Dev-only: write authkey.dev so external test harnesses can call
451 // the service API without polling logs or driving the UI. Gate is
452 // Write authkey.dev for ALL runtime modes (dev, portable, installed).
453 // The file lets bench-term-echo.mjs and the PowerShell test harnesses
454 // discover the running instance without manual --ws-url / --auth-key flags.
455 // Security: the WS server is loopback-only; any same-user process already
456 // has equivalent TCP access. See SPEC_TEST_API_ACCESS.md §3 and
457 // SPEC_BENCHMARK_PORTABLE_DISCOVERY_2026_05_20.md for rationale.
458 {
459 let endpoints = app_state.backend_endpoints.lock().clone();
460 let auth_key = app_state.auth_key.lock().clone();
461 let ipc_token = app_state.ipc_token.clone();
462 let data_dir_str = app_state
463 .version_data_dir
464 .lock()
465 .clone()
466 .unwrap_or_default();
467 let data_dir_path = std::path::PathBuf::from(&data_dir_str);
468 let ipc_endpoint = format!("127.0.0.1:{}", ipc_port);
469 let instance = format!("v{}", env!("CARGO_PKG_VERSION"));
470 let host_pid = std::process::id();
471 match dev_authfile::write_dev_auth_file(
472 &data_dir_path,
473 &auth_key,
474 &endpoints.web_endpoint,
475 &endpoints.ws_endpoint,
476 &ipc_endpoint,
477 &ipc_token,
478 &instance,
479 host_pid,
480 ) {
481 Ok(p) => tracing::info!("Wrote authkey file: {}", p.display()),
482 Err(e) => tracing::warn!("Failed to write authkey file: {}", e),
483 }
484 }
485
486 // Create the App handler with state.
487 let mut cef_app = app::AgentMuxApp::new(app_state.clone(), ipc_port);
488
489 // Resolve resource directories for portable layout. In portable
490 // mode the CEF host is IN runtime/, so resources are flat
491 // alongside it. In dev mode they are also flat in dist/cef-dev/.
492 // Reuses `host_exe_dir` from the startup mode-detection block.
493 let runtime_dir = host_exe_dir.join("runtime");
494 let base_dir = if runtime_dir.exists() {
495 runtime_dir
496 } else {
497 host_exe_dir.clone()
498 };
499 let resources_dir = CefString::from(base_dir.to_str().unwrap_or(""));
500 let locales_dir = CefString::from(base_dir.join("locales").to_str().unwrap_or(""));
501
502 // Reuse data_dir from single-instance check as CEF cache path.
503 // Remove stale lockfile from a previous killed run.
504 let lockfile = data_dir.join("lockfile");
505 if lockfile.exists() {
506 tracing::warn!("Removing stale CEF lockfile: {}", lockfile.display());
507 let _ = std::fs::remove_file(&lockfile);
508 }
509 tracing::info!("CEF cache dir: {}", data_dir.display());
510 let cache_dir = CefString::from(data_dir.to_str().unwrap_or(""));
511
512 // Configure CEF settings.
513 let debug_port: u16 = if is_dev { 9223 } else { 9222 };
514 *app_state.debug_port.lock() = debug_port;
515
516 // Route CEF's internal Chromium logging into our log dir alongside the
517 // tracing-subscriber file. Without this, init failures leave an empty
518 // chrome_debug.log in the cache dir and we have nothing to read. INFO is
519 // verbose enough to expose load-library / resource problems but quiet
520 // enough not to swamp the file in normal operation.
521 let cef_log_path = log_dir.join("cef-debug.log");
522 let cef_log_file = CefString::from(cef_log_path.to_str().unwrap_or(""));
523
524 let settings = Settings {
525 no_sandbox: 1,
526 // ARGB: alpha=0 → SK_AlphaTRANSPARENT → triggers the transparency
527 // cascade in the patched libcef.so (see cef commits b921ffe18 +
528 // 68e0dc668). The CSS layer's rgba(_,_,_,<1) body bg then composites
529 // with the desktop instead of being clamped to opaque white.
530 // Pair: BrowserSettings.background_color must also be 0 (app.rs).
531 // Pair: WindowDelegate must return is_frameless=true (already does
532 // for the main window).
533 // Spec: docs/research/cef-transparency-research-2026-05-10.md.
534 background_color: 0x00000000,
535 remote_debugging_port: debug_port as i32,
536 root_cache_path: cache_dir,
537 resources_dir_path: resources_dir,
538 locales_dir_path: locales_dir,
539 log_file: cef_log_file,
540 log_severity: LogSeverity::INFO,
541 // CEF subprocess (renderer, GPU) uses the same exe
542 browser_subprocess_path: CefString::from(
543 std::env::current_exe().unwrap().to_str().unwrap_or("")
544 ),
545 ..Default::default()
546 };
547
548 // Initialize CEF.
549 //
550 // CefInitialize returns 1 on success and 0 either on real init failure OR
551 // on "normal early exit" (process singleton, command-line forward, etc).
552 // We can only tell the two apart by calling cef_get_exit_code() and
553 // matching against cef_resultcode_t. Treat NORMAL_EXIT* codes as a clean
554 // exit; everything else is a real failure that we surface via panic.
555 //
556 // Common early-exit codes (cef_resultcode_t):
557 // 0 CEF_RESULT_CODE_NORMAL_EXIT
558 // 24 CEF_RESULT_CODE_NORMAL_EXIT_PROCESS_NOTIFIED ← singleton relaunch
559 // 36 CEF_RESULT_CODE_NORMAL_EXIT_PACK_EXTENSION_SUCCESS
560 // 38 CEF_RESULT_CODE_NORMAL_EXIT_AUTO_DE_ELEVATED
561 let init_result = initialize(
562 Some(args.as_main_args()),
563 Some(&settings),
564 Some(&mut cef_app),
565 std::ptr::null_mut(),
566 );
567 if init_result != 1 {
568 let exit_code = get_exit_code();
569 // Sidecar was spawned before cef_initialize(); std::process::exit()
570 // bypasses the normal shutdown block, so kill it here first.
571 {
572 let mut sidecar = app_state.sidecar_child.lock();
573 if let Some(ref mut child) = *sidecar {
574 tracing::info!("CEF early exit: killing backend sidecar before exit");
575 let _ = child.kill();
576 }
577 }
578 match exit_code {
579 0 | 24 | 36 | 38 => {
580 tracing::info!(
581 exit_code,
582 "CEF early exit (process singleton or similar) — exiting cleanly"
583 );
584 std::process::exit(0);
585 }
586 _ => {
587 tracing::error!(
588 exit_code,
589 "CEF initialization failed; see ~/.agentmux/logs/cef-debug.log for details"
590 );
591 std::process::exit(exit_code);
592 }
593 }
594 }
595
596 tracing::info!("CEF initialized, entering message loop");
597
598 // Start memory heartbeat — logs system/process memory stats every 20s.
599 // Provides forensic data if the process later crashes from OOM / VA exhaustion.
600 memory_heartbeat::start();
601
602 // Phase B.6 (post-fix): publish port:token AFTER CEF init so a
603 // second launcher only forwards `open_new_window` when we're
604 // actually ready to handle it. Single-instance enforcement is
605 // the launcher's named-pipe bind — this file is purely a
606 // forwarding hint.
607 let _ = std::fs::write(
608 &port_file,
609 format!("{}:{}", ipc_port, app_state.ipc_token),
610 );
611
612 // Phase B.9.1 (WRR) — install Win32 event hooks. Must come
613 // AFTER `connect_to_launcher` so the report_hwnd_* sync APIs
614 // have a live `COMMAND_TX` to push into; AFTER CEF init so
615 // any HWNDs CEF creates during initialize() are missed
616 // (acceptable — they predate the user's session and are
617 // accounted for by main-window startup paths). Idempotent;
618 // safe to call multiple times. State arg lets the callback
619 // peek `pending_window_creations` for `label_hint`.
620 wrr::install_hooks(app_state.clone());
621
622 // Run the CEF message loop. This blocks until quit_message_loop() is called
623 // (triggered when all browser windows are closed in client.rs).
624 run_message_loop();
625
626 tracing::info!("CEF message loop exited, shutting down");
627
628 // Phase B.9.1 (WRR) — tear down Win32 event hooks before any
629 // further teardown. UnhookWinEvent is cheap; doing it early
630 // prevents stray callbacks during shutdown from racing the
631 // launcher_ipc channel close.
632 wrr::uninstall_hooks();
633
634 // Kill the backend sidecar on shutdown.
635 {
636 let mut sidecar = app_state.sidecar_child.lock();
637 if let Some(ref mut child) = *sidecar {
638 tracing::info!("Killing backend sidecar");
639 let _ = child.kill();
640 }
641 }
642
643 // Clean shutdown.
644 shutdown();
645
646 // Drop the tokio runtime after CEF shutdown.
647 drop(runtime);
648
649 // Phase B.6 (post-fix): clean up the forwarding hint so a stale
650 // file doesn't survive a graceful exit. (Hard crashes will leave
651 // it behind; harmless because pipe-bind on next launch is
652 // authoritative — see comment at the port_file declaration.)
653 let _ = std::fs::remove_file(&port_file);
654
655 tracing::info!("AgentMux host shutdown complete");
656}
657
658/// Initialize tracing with dual output: rolling daily log file + human-readable stderr.
659/// `log_dir` is resolved by the caller: `<portable-root>/data/logs/` in portable mode,
660/// `~/.agentmux/logs/` in installed mode.
661/// Returns a guard that must be held for the lifetime of the process to ensure log flushing.
662fn init_logging(log_dir: &std::path::Path) -> tracing_appender::non_blocking::WorkerGuard {
663 use tracing_subscriber::{fmt, layer::SubscriberExt, EnvFilter};
664
665 let version = env!("CARGO_PKG_VERSION");
666 let _ = std::fs::create_dir_all(log_dir);
667
668 // Delete log files older than 7 days to prevent unbounded growth.
669 cleanup_old_logs(&log_dir, 7);
670
671 let log_prefix = format!("agentmux-host-v{}.log", version);
672 let file_appender = tracing_appender::rolling::daily(&log_dir, &log_prefix);
673 let (non_blocking_file, guard) = tracing_appender::non_blocking(file_appender);
674
675 // Write pointer to current log file for zero-lookup agent discovery.
676 // Version-qualified name so multi-instance doesn't clobber pointers.
677 // Uses UTC to match tracing_appender::rolling::daily's date suffix.
678 let today = chrono::Utc::now().format("%Y-%m-%d").to_string();
679 let current_filename = format!("{}.{}", log_prefix, today);
680 let absolute_path = log_dir.join(¤t_filename);
681 let pointer_name = format!("current-host-v{}.path", version);
682
683 // Pointer #1: local — inside the instance's log dir. The basename
684 // is enough here since the reader is colocated.
685 let _ = std::fs::write(log_dir.join(&pointer_name), ¤t_filename);
686
687 // Pointer #2: global — at `<root>/logs/<pointer_name>`. Writes the
688 // ABSOLUTE PATH so legacy tooling (`muxlog host`) that lives outside
689 // the instance dir can `cat $pointer | xargs tail -f` and reach the
690 // real file. Skipped silently if the global dir can't be derived
691 // (e.g. AGENTMUX_HOME_OVERRIDE unset in some test setups).
692 if let Some(global_logs_dir) = log_dir.parent().and_then(|p| p.parent()).and_then(|p| p.parent()).map(|p| p.join("logs")) {
693 let _ = std::fs::create_dir_all(&global_logs_dir);
694 let _ = std::fs::write(
695 global_logs_dir.join(&pointer_name),
696 absolute_path.to_string_lossy().as_bytes(),
697 );
698 }
699
700 // Synchronous init sentinel: append a single line directly to the
701 // expected log path BEFORE the tracing subscriber is wired up. Without
702 // this, a hang between subscriber-setup and the non-blocking writer's
703 // first flush leaves the pointer file pointing at a never-created log
704 // file (observed 2026-05-02 freeze investigation). The sentinel
705 // guarantees the file exists once init_logging has run past
706 // pointer-write — if the file is missing afterwards, we know
707 // init_logging itself didn't get past this point.
708 let sentinel_path = log_dir.join(¤t_filename);
709 let sentinel_line = format!(
710 "{} INIT-SENTINEL agentmux-host v={} pid={} os={} arch={}\n",
711 chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ"),
712 version,
713 std::process::id(),
714 std::env::consts::OS,
715 std::env::consts::ARCH,
716 );
717 if let Ok(mut f) = std::fs::OpenOptions::new()
718 .create(true)
719 .append(true)
720 .open(&sentinel_path)
721 {
722 use std::io::Write;
723 let _ = f.write_all(sentinel_line.as_bytes());
724 let _ = f.flush();
725 }
726
727 let subscriber = tracing_subscriber::registry()
728 .with(
729 EnvFilter::try_from_default_env()
730 .unwrap_or_else(|_| EnvFilter::new("info")),
731 )
732 .with(
733 fmt::layer()
734 .json()
735 .with_writer(non_blocking_file)
736 .with_target(true)
737 .with_thread_ids(true),
738 )
739 .with(
740 fmt::layer()
741 .with_writer(std::io::stderr)
742 .with_ansi(true),
743 );
744
745 tracing::subscriber::set_global_default(subscriber).ok();
746
747 tracing::info!(
748 version,
749 os = std::env::consts::OS,
750 arch = std::env::consts::ARCH,
751 log_dir = %log_dir.display(),
752 "AgentMux host starting"
753 );
754
755 guard
756}
757
758fn cleanup_old_logs(log_dir: &std::path::Path, days: u64) {
759 let cutoff = std::time::SystemTime::now()
760 - std::time::Duration::from_secs(days * 86400);
761 let Ok(entries) = std::fs::read_dir(log_dir) else { return };
762 for entry in entries.flatten() {
763 let path = entry.path();
764 if !path.to_string_lossy().contains(".log.") {
765 continue;
766 }
767 if let Ok(meta) = entry.metadata() {
768 if let Ok(modified) = meta.modified() {
769 if modified < cutoff {
770 let _ = std::fs::remove_file(&path);
771 }
772 }
773 }
774 }
775}