agentmux_cef/
main.rs

1// Copyright 2026, AgentMux Corp.
2// SPDX-License-Identifier: Apache-2.0
3//
4// AgentMux Host — Entry point.
5//
6// This binary serves as both the browser process and CEF subprocess
7// (renderer, GPU, utility). Subprocess mode is detected via the --type
8// command-line argument injected by CEF.
9//
10// Phase 2: Includes IPC HTTP server, sidecar management, and command routing.
11//
12// Usage:
13//   agentmux-cef                         # Load default URL (http://localhost:5173)
14//   agentmux-cef --url=http://host:port  # Load custom URL
15//   agentmux-cef --use-native            # Use native platform window instead of Views
16//   agentmux-cef --use-alloy-style       # Use Alloy runtime style
17
18// Hide console window in release mode on Windows (not sandbox).
19#![cfg_attr(
20    all(not(debug_assertions), not(feature = "sandbox"), target_os = "windows"),
21    windows_subsystem = "windows"
22)]
23
24mod app;
25mod browser_api;
26mod browser_panes;
27mod client;
28mod commands;
29mod dev_authfile;
30mod events;
31mod ipc;
32mod launcher_event_bridge;
33mod launcher_ipc;
34mod parent_process;
35mod srv_event_bridge;
36mod srv_ipc;
37mod memory_heartbeat;
38mod browser_pane;
39#[cfg(target_os = "windows")]
40mod floating_pane;
41mod reducer;
42mod saga_dispatch;
43mod sidecar;
44mod state;
45mod ui_tasks;
46mod wrr;
47
48use std::sync::Arc;
49
50use cef::*;
51
52/// Platform-specific type for OS key events in KeyboardHandler.
53/// Matches `cef_event_handle_t` after deref:
54///   - Windows:  *mut MSG          → MSG
55///   - Linux:    *mut XEvent       → _XEvent (libX11)
56///   - macOS:    *mut c_void       → c_void  (NSEvent-backed; cef-dll-sys
57///                                            uses an opaque pointer here)
58#[cfg(target_os = "windows")]
59pub type OsKeyEvent = cef::sys::MSG;
60#[cfg(target_os = "linux")]
61pub type OsKeyEvent = cef::sys::_XEvent;
62#[cfg(target_os = "macos")]
63pub type OsKeyEvent = std::ffi::c_void;
64
65/// Suppress the Windows "Application Error" / WER crash dialog so an unhandled
66/// fault (a Chromium `LOG(FATAL)`, an `abort()`, a breakpoint) terminates the
67/// process immediately instead of wedging it behind a modal the user must
68/// dismiss. While that dialog is up the process is frozen and cannot be
69/// auto-recovered. No-op off Windows. Spec:
70/// docs/specs/SPEC_SERVICE_SUPERVISION_AND_RECOVERY_2026_05_20.md.
71#[cfg(target_os = "windows")]
72fn suppress_os_crash_dialogs() {
73    use windows_sys::Win32::System::Diagnostics::Debug::{SetErrorMode, SEM_FAILCRITICALERRORS};
74    use windows_sys::Win32::System::ErrorReporting::{WerSetFlags, WER_FAULT_REPORTING_NO_UI};
75    // Process-wide; also covers the CEF subprocesses.
76    unsafe {
77        // Suppress the WER crash-dialog UI WITHOUT disabling WER itself —
78        // SEM_NOGPFAULTERRORBOX would also kill WER/LocalDumps crash-dump
79        // collection, the postmortem diagnostics this stability work needs.
80        // WER_FAULT_REPORTING_NO_UI is the documented "no UI, keep
81        // reports" path.
82        let _ = WerSetFlags(WER_FAULT_REPORTING_NO_UI);
83        // SEM_FAILCRITICALERRORS suppresses the critical-error handler
84        // (e.g. "no disk in drive" popups) — unrelated to crash reporting.
85        SetErrorMode(SEM_FAILCRITICALERRORS);
86    }
87}
88
89#[cfg(not(target_os = "windows"))]
90fn suppress_os_crash_dialogs() {}
91
92fn main() {
93    // Phase 0 (service supervision & recovery): suppress the Windows crash
94    // modal so a fault terminates the process immediately instead of freezing
95    // it behind an "Application Error" dialog. Must be the first statement —
96    // set before anything can fault.
97    suppress_os_crash_dialogs();
98
99    // Set the DLL search path so CEF's runtime LoadLibrary calls (chrome_elf,
100    // libEGL, libGLESv2, d3dcompiler_47, …) resolve against the directory that
101    // actually holds libcef.dll. Two layouts exist:
102    //
103    //   Portable / installed: <root>/runtime/host.exe + libcef.dll alongside.
104    //                         The launcher (agentmux-launcher) already sets
105    //                         the path to <root>/runtime/ before spawning us;
106    //                         this block is a no-op safety net for that mode.
107    //
108    //   Dev (`task dev`):     dist/cef-dev/agentmux-cef.exe + libcef.dll
109    //                         alongside (flat layout). Taskfile launches the
110    //                         host directly with no launcher, so nothing else
111    //                         has set the DLL path. Without it, CEF's internal
112    //                         LoadLibrary chain can fail and `cef::initialize`
113    //                         returns 0 — the empty-chrome_debug.log mode.
114    //
115    // Fall back to the host's own directory whenever a runtime/ subdir isn't
116    // present. Idempotent in portable mode (launcher set it first), correct
117    // in dev mode.
118    #[cfg(target_os = "windows")]
119    {
120        if let Ok(exe) = std::env::current_exe() {
121            if let Some(dir) = exe.parent() {
122                let runtime_dir = dir.join("runtime");
123                let dll_dir = if runtime_dir.exists() {
124                    runtime_dir
125                } else {
126                    dir.to_path_buf()
127                };
128                unsafe {
129                    use std::os::windows::ffi::OsStrExt;
130                    let wide: Vec<u16> = dll_dir.as_os_str().encode_wide().chain(Some(0)).collect();
131                    windows_sys::Win32::System::LibraryLoader::SetDllDirectoryW(wide.as_ptr());
132                }
133            }
134        }
135    }
136
137    // Tracing is initialized after the subprocess check below — browser process
138    // gets dual file+stderr output; subprocesses exit before tracing is needed.
139
140    // macOS: load the CEF framework library explicitly.
141    #[cfg(target_os = "macos")]
142    let _library = {
143        let loader =
144            library_loader::LibraryLoader::new(&std::env::current_exe().unwrap(), false);
145        assert!(loader.load(), "Failed to load CEF framework");
146        loader
147    };
148
149    // Initialize the CEF API hash for version verification.
150    let _ = api_hash(sys::CEF_API_VERSION_LAST, 0);
151
152    // Parse command-line arguments.
153    let args = cef::args::Args::new();
154    let Some(cmd_line) = args.as_cmd_line() else {
155        eprintln!("agentmux-cef: Failed to parse command line arguments");
156        std::process::exit(1);
157    };
158
159    // Detect subprocess mode: CEF injects --type=renderer|gpu-process|utility
160    // for child processes. If --type is present, this is a subprocess.
161    let type_switch = CefString::from("type");
162    let is_browser_process = cmd_line.has_switch(Some(&type_switch)) != 1;
163
164    // Execute subprocess if applicable (exits here for non-browser processes).
165    let ret = execute_process(
166        Some(args.as_main_args()),
167        None, // App can be None for subprocess
168        std::ptr::null_mut(),
169    );
170
171    if is_browser_process {
172        // Browser process: execute_process returns -1, we continue with initialization.
173        assert_eq!(ret, -1, "execute_process should return -1 for browser process");
174    } else {
175        // Subprocess: execute_process returns the exit code.
176        let process_type = CefString::from(&cmd_line.switch_value(Some(&type_switch)));
177        eprintln!("agentmux-cef: subprocess exiting: type={}", process_type);
178        assert!(ret >= 0, "execute_process failed for subprocess");
179        std::process::exit(ret);
180    }
181
182    // -----------------------------------------------------------------------
183    // Browser process initialization
184    // -----------------------------------------------------------------------
185
186    // Set the Application User Model ID before any UI is created. This lets
187    // Windows group our windows under one pinned identity and is required for
188    // the `DeleteTab` + per-HWND AppID treatment used by the full-instance /
189    // sub-window model (see docs/specs/SPEC_MULTIWINDOW_TASKBAR_GROUPING.md).
190    // Use a VERSION-STABLE ID — never embed the patch number or pinning forks.
191    #[cfg(target_os = "windows")]
192    unsafe {
193        use windows_sys::Win32::UI::Shell::SetCurrentProcessExplicitAppUserModelID;
194        let aumid: Vec<u16> = "AgentMuxCorp.AgentMux\0".encode_utf16().collect();
195        let _ = SetCurrentProcessExplicitAppUserModelID(aumid.as_ptr());
196    }
197
198    let version = env!("CARGO_PKG_VERSION");
199
200    // Read paths + mode from the launcher-injected env vars. Two
201    // reachable configurations:
202    //   a) Launcher-managed startup → env vars present, from_env()
203    //      returns Some.
204    //   b) Standalone `task dev` → env absent. We re-derive via
205    //      `RuntimeMode::current` + `DataPaths::resolve` (symmetric
206    //      with sidecar.rs::spawn_backend's fallback so they agree on
207    //      the disk layout).
208    let host_exe_dir = std::env::current_exe()
209        .ok()
210        .and_then(|p| p.parent().map(|d| d.to_path_buf()))
211        .unwrap_or_default();
212    // Dev builds NEVER inherit AGENTMUX_* env vars from a parent process.
213    // `task dev` is routinely launched from inside an AgentMux terminal
214    // pane, which means the child host inherits the parent instance's
215    // AGENTMUX_DATA_DIR pointing at the parent's version-isolated dir.
216    // Without this guard the dev build would resolve its data dir to the
217    // running portable's path and trip CEF's process-singleton lock,
218    // routing every "open" back to the existing window — the user would
219    // never see the dev code run. Path-based detection is authoritative
220    // for dev builds; for installed/portable we still honor the
221    // launcher-provided env (it's the launcher's job to publish them).
222    let common_paths = if agentmux_common::is_dev_build_exe(&host_exe_dir) {
223        let mode = agentmux_common::RuntimeMode::current_path_only(&host_exe_dir);
224        // resolve_path_only mirrors current_path_only's env-isolation:
225        // ignore inherited AGENTMUX_CHANNEL so a dev host launched from
226        // inside a parent agentmux instance doesn't redirect into the
227        // parent's channel (would trip the channel single-instance lock).
228        agentmux_common::DataPaths::resolve_path_only(version, &mode).ok()
229    } else {
230        agentmux_common::DataPaths::from_env().or_else(|| {
231            let mode = agentmux_common::RuntimeMode::current(&host_exe_dir);
232            agentmux_common::DataPaths::resolve(version, &mode).ok()
233        })
234    };
235    let is_dev = match &common_paths {
236        Some(p) => matches!(p.mode, agentmux_common::RuntimeMode::Dev { .. }),
237        None => false,
238    };
239
240    let (data_dir, log_dir) = match &common_paths {
241        Some(p) => (p.cef_cache_dir.clone(), p.logs_dir.clone()),
242        None => {
243            // Both env-read AND fallback resolution failed (no home
244            // dir on disk, or platform unsupported). Use a degraded
245            // path so log init at least works; the runtime-startup
246            // check below will surface the underlying error.
247            (
248                std::path::PathBuf::from("."),
249                dirs::home_dir()
250                    .unwrap_or_default()
251                    .join(".agentmux")
252                    .join("logs"),
253            )
254        }
255    };
256    std::fs::create_dir_all(&data_dir).ok();
257
258    // Initialize dual-output tracing: rolling log file + stderr.
259    // The log file guard must live for the entire process to ensure flushing.
260    let _log_guard = init_logging(&log_dir);
261
262    tracing::info!(
263        version,
264        runtime_mode = ?common_paths.as_ref().map(|p| p.mode.to_env_string()),
265        data_dir = %data_dir.display(),
266        log_dir = %log_dir.display(),
267        "Initializing CEF browser process"
268    );
269
270    // Phase B.6 (post-fix): the named-pipe bind in the launcher is
271    // the AUTHORITATIVE single-instance lock — a second launcher
272    // hits ERROR_ACCESS_DENIED and never reaches the host. We still
273    // publish `<launcher-shared-data-dir>/ipc-port` (port:token) so
274    // the second launcher can FORWARD an `open_new_window` request
275    // to the existing instance over HTTP and exit silently — the
276    // legacy forwarding UX users expect when double-clicking the
277    // exe twice. The pipe-bind-first ordering closes the stale-state
278    // defect (gap #8 in
279    // specs/ANALYSIS_WINDOW_PROCESS_STATE_INVENTORY_2026_04_27.md):
280    // a stale ipc-port file from a hard crash is irrelevant on the
281    // FIRST-instance path because pipe-bind succeeds and the file is
282    // overwritten; on the SECOND-instance path the live first
283    // instance wrote a fresh port:token, so forwarding lands.
284    //
285    // CRITICAL: write the port file at the LAUNCHER-shared data dir
286    // (`AGENTMUX_DATA_DIR`, == `paths.data_dir` in the launcher), NOT
287    // the host-local CEF cache dir (`<portable>/data/cef/`). The two
288    // diverge in portable mode (cef cache is one level deeper) and
289    // the launcher's `forward_open_new_window` reads the launcher-
290    // shared path. Falls back to the cef cache dir only when the env
291    // is unset (`task dev` mode without launcher), where forwarding
292    // wouldn't be wired anyway.
293    // Dev builds inherit AGENTMUX_DATA_DIR from the parent pane they were
294    // launched from. Writing ipc-port there would overwrite the parent
295    // instance's port:token and break its single-instance forwarding.
296    // In dev mode there is no launcher so port forwarding isn't wired
297    // anyway — use the dev data dir directly.
298    let port_file_dir = if agentmux_common::is_dev_build_exe(&host_exe_dir) {
299        data_dir.clone()
300    } else {
301        std::env::var_os("AGENTMUX_DATA_DIR")
302            .map(std::path::PathBuf::from)
303            .unwrap_or_else(|| data_dir.clone())
304    };
305    let _ = std::fs::create_dir_all(&port_file_dir);
306    let port_file = port_file_dir.join("ipc-port");
307
308    // Create shared application state.
309    let app_state = Arc::new(state::AppState::default());
310
311    // Start tokio runtime for async operations (IPC server, sidecar management).
312    let runtime = tokio::runtime::Runtime::new().expect("Failed to create tokio runtime");
313
314    // Install the runtime Handle into browser_pane::auth so the
315    // CEF `get_auth_credentials` callback (which runs on CEF's IO
316    // thread) can spawn the parked-auth TTL timer. A bare
317    // `tokio::spawn` there would panic with "there is no reactor
318    // running" because that thread has no `Handle::current()`.
319    browser_pane::auth::set_runtime_handle(runtime.handle().clone());
320
321    // Start the IPC HTTP server and get the assigned port.
322    let ipc_port = runtime.block_on(ipc::start_ipc_server(app_state.clone()));
323    *app_state.ipc_port.lock() = ipc_port;
324
325    tracing::info!("IPC server started on port {}", ipc_port);
326
327    // Phase B.2: connect to launcher's named-pipe IPC (if launcher
328    // is in the loop) so the launcher can route Commands and Events
329    // through us. The handle is held in main scope for the host's
330    // lifetime — dropping it closes the pipe (logged by launcher).
331    // Failure to connect is non-fatal in B.2 (host can still run);
332    // B.5+ will tighten when the host depends on IPC for state.
333    //
334    // Env-isolation guard: a dev build inheriting
335    // `AGENTMUX_LAUNCHER_PIPE` from a parent AgentMux pane (e.g. a
336    // shell inside an active pane that re-invokes the host directly)
337    // would otherwise connect to that parent's launcher pipe and
338    // route its host events into the parent's launcher state.
339    //
340    // Discriminator: connect when our parent process IS the launcher
341    // (production portable, installed build, OR post-#SPEC_LAUNCHER_DEV_INTEGRATION
342    // `task dev` which spawns the host via the launcher). Skip when
343    // it isn't.
344    //
345    // Older path-only guard (`is_dev_build_exe`) over-fired in dev
346    // mode after launcher integration shipped — see
347    // docs/specs/SPEC_DEV_MODE_LAUNCHER_IPC_2026_05_16.md.
348    let parent_is_launcher = parent_process::parent_is_agentmux_launcher();
349    let should_connect_launcher = match parent_is_launcher {
350        Some(true) => true,
351        Some(false) => false,
352        // Parent detection failed — fall back to the path-based guard
353        // so production builds still connect (they would otherwise
354        // silently lose the launcher IPC) and dev builds still skip.
355        None => !agentmux_common::is_dev_build_exe(&host_exe_dir),
356    };
357    let _launcher_ipc = if should_connect_launcher {
358        runtime.block_on(launcher_ipc::connect_to_launcher(app_state.clone()))
359    } else {
360        None
361    };
362
363    // Phase E.2c.5a — connect to the srv reducer's pipe. Forwards
364    // srv events (workspace / tab / block lifecycle) to every
365    // top-level renderer via the JS bridge. Renderer-side handler
366    // (`window.__agentmux_srv_event`) lands in E.2c.5b. Non-fatal
367    // if absent: `AGENTMUX_SRV_PIPE_PATH` is only set on the srv
368    // child by the launcher (`agentmux-launcher/src/srv_spawner.rs`),
369    // not on the host spawn — so today the host never has the env
370    // var and `connect_to_srv` short-circuits to None at
371    // `srv_ipc.rs:62-68`. Path-based dev guard is the right gate
372    // for this branch; restoring full srv-IPC parity in dev needs
373    // the launcher to propagate the env var to the host first.
374    // See spec §11 of SPEC_DEV_MODE_LAUNCHER_IPC_2026_05_16.md.
375    let _srv_ipc = if agentmux_common::is_dev_build_exe(&host_exe_dir) {
376        None
377    } else {
378        runtime.block_on(srv_ipc::connect_to_srv(app_state.clone()))
379    };
380
381    // Phase B.1: if launcher already spawned srv (the normal portable
382    // / installed path post-PR-#570 + B.1), populate state from the
383    // env vars launcher set — no need to re-spawn srv. Falls back to
384    // spawn_backend() ONLY when env vars are absent (`task dev` mode
385    // where the host runs without the launcher).
386    //
387    // Spawn the backend sidecar SYNCHRONOUSLY — block until it
388    // signals ready (AGENTMUXSRV-ESTART) before creating the browser
389    // window. This eliminates the race condition where CEF loads the
390    // frontend before the backend is available, which causes a "raw
391    // browser" appearance on slow machines or first launch.
392    let backend_ready = runtime.block_on(async {
393        // Dev builds inherit AGENTMUX_BACKEND_WS from the parent pane.
394        // Consuming it would connect to the parent's srv instead of
395        // spawning our own, so the dev frontend runs against the wrong
396        // (parent's) backend and no dev-version srv is ever started.
397        let launcher_provided = if agentmux_common::is_dev_build_exe(&host_exe_dir) {
398            None
399        } else {
400            sidecar::use_launcher_endpoints(&app_state)
401        };
402        let result = match launcher_provided {
403            Some(Ok(r)) => {
404                tracing::info!(
405                    "Using launcher-provided backend endpoints: ws={} web={} pid={}",
406                    r.ws_endpoint,
407                    r.web_endpoint,
408                    r.instance_id
409                );
410                Ok(r)
411            }
412            Some(Err(e)) => {
413                tracing::error!(
414                    "Launcher set AGENTMUX_BACKEND_WS but env was malformed: {} — refusing to fall back to spawn_backend (would fight launcher's srv)",
415                    e
416                );
417                Err(e)
418            }
419            None => {
420                tracing::info!("No launcher-provided backend env (dev mode) — spawning srv ourselves");
421                sidecar::spawn_backend(&app_state).await
422            }
423        };
424        match result {
425            Ok(result) => {
426                {
427                    let mut endpoints = app_state.backend_endpoints.lock();
428                    endpoints.ws_endpoint = result.ws_endpoint.clone();
429                    endpoints.web_endpoint = result.web_endpoint.clone();
430                }
431                tracing::info!(
432                    "Backend ready: ws={} web={}",
433                    result.ws_endpoint,
434                    result.web_endpoint
435                );
436                true
437            }
438            Err(e) => {
439                tracing::error!("Failed to set up backend: {}", e);
440                false
441            }
442        }
443    });
444
445    if !backend_ready {
446        tracing::error!("Backend failed to start — exiting");
447        std::process::exit(1);
448    }
449
450    // Dev-only: write authkey.dev so external test harnesses can call
451    // the service API without polling logs or driving the UI. Gate is
452    // Write authkey.dev for ALL runtime modes (dev, portable, installed).
453    // The file lets bench-term-echo.mjs and the PowerShell test harnesses
454    // discover the running instance without manual --ws-url / --auth-key flags.
455    // Security: the WS server is loopback-only; any same-user process already
456    // has equivalent TCP access. See SPEC_TEST_API_ACCESS.md §3 and
457    // SPEC_BENCHMARK_PORTABLE_DISCOVERY_2026_05_20.md for rationale.
458    {
459        let endpoints = app_state.backend_endpoints.lock().clone();
460        let auth_key = app_state.auth_key.lock().clone();
461        let ipc_token = app_state.ipc_token.clone();
462        let data_dir_str = app_state
463            .version_data_dir
464            .lock()
465            .clone()
466            .unwrap_or_default();
467        let data_dir_path = std::path::PathBuf::from(&data_dir_str);
468        let ipc_endpoint = format!("127.0.0.1:{}", ipc_port);
469        let instance = format!("v{}", env!("CARGO_PKG_VERSION"));
470        let host_pid = std::process::id();
471        match dev_authfile::write_dev_auth_file(
472            &data_dir_path,
473            &auth_key,
474            &endpoints.web_endpoint,
475            &endpoints.ws_endpoint,
476            &ipc_endpoint,
477            &ipc_token,
478            &instance,
479            host_pid,
480        ) {
481            Ok(p) => tracing::info!("Wrote authkey file: {}", p.display()),
482            Err(e) => tracing::warn!("Failed to write authkey file: {}", e),
483        }
484    }
485
486    // Create the App handler with state.
487    let mut cef_app = app::AgentMuxApp::new(app_state.clone(), ipc_port);
488
489    // Resolve resource directories for portable layout. In portable
490    // mode the CEF host is IN runtime/, so resources are flat
491    // alongside it. In dev mode they are also flat in dist/cef-dev/.
492    // Reuses `host_exe_dir` from the startup mode-detection block.
493    let runtime_dir = host_exe_dir.join("runtime");
494    let base_dir = if runtime_dir.exists() {
495        runtime_dir
496    } else {
497        host_exe_dir.clone()
498    };
499    let resources_dir = CefString::from(base_dir.to_str().unwrap_or(""));
500    let locales_dir = CefString::from(base_dir.join("locales").to_str().unwrap_or(""));
501
502    // Reuse data_dir from single-instance check as CEF cache path.
503    // Remove stale lockfile from a previous killed run.
504    let lockfile = data_dir.join("lockfile");
505    if lockfile.exists() {
506        tracing::warn!("Removing stale CEF lockfile: {}", lockfile.display());
507        let _ = std::fs::remove_file(&lockfile);
508    }
509    tracing::info!("CEF cache dir: {}", data_dir.display());
510    let cache_dir = CefString::from(data_dir.to_str().unwrap_or(""));
511
512    // Configure CEF settings.
513    let debug_port: u16 = if is_dev { 9223 } else { 9222 };
514    *app_state.debug_port.lock() = debug_port;
515
516    // Route CEF's internal Chromium logging into our log dir alongside the
517    // tracing-subscriber file. Without this, init failures leave an empty
518    // chrome_debug.log in the cache dir and we have nothing to read. INFO is
519    // verbose enough to expose load-library / resource problems but quiet
520    // enough not to swamp the file in normal operation.
521    let cef_log_path = log_dir.join("cef-debug.log");
522    let cef_log_file = CefString::from(cef_log_path.to_str().unwrap_or(""));
523
524    let settings = Settings {
525        no_sandbox: 1,
526        // ARGB: alpha=0 → SK_AlphaTRANSPARENT → triggers the transparency
527        // cascade in the patched libcef.so (see cef commits b921ffe18 +
528        // 68e0dc668). The CSS layer's rgba(_,_,_,<1) body bg then composites
529        // with the desktop instead of being clamped to opaque white.
530        // Pair: BrowserSettings.background_color must also be 0 (app.rs).
531        // Pair: WindowDelegate must return is_frameless=true (already does
532        // for the main window).
533        // Spec: docs/research/cef-transparency-research-2026-05-10.md.
534        background_color: 0x00000000,
535        remote_debugging_port: debug_port as i32,
536        root_cache_path: cache_dir,
537        resources_dir_path: resources_dir,
538        locales_dir_path: locales_dir,
539        log_file: cef_log_file,
540        log_severity: LogSeverity::INFO,
541        // CEF subprocess (renderer, GPU) uses the same exe
542        browser_subprocess_path: CefString::from(
543            std::env::current_exe().unwrap().to_str().unwrap_or("")
544        ),
545        ..Default::default()
546    };
547
548    // Initialize CEF.
549    //
550    // CefInitialize returns 1 on success and 0 either on real init failure OR
551    // on "normal early exit" (process singleton, command-line forward, etc).
552    // We can only tell the two apart by calling cef_get_exit_code() and
553    // matching against cef_resultcode_t. Treat NORMAL_EXIT* codes as a clean
554    // exit; everything else is a real failure that we surface via panic.
555    //
556    // Common early-exit codes (cef_resultcode_t):
557    //   0  CEF_RESULT_CODE_NORMAL_EXIT
558    //   24 CEF_RESULT_CODE_NORMAL_EXIT_PROCESS_NOTIFIED  ← singleton relaunch
559    //   36 CEF_RESULT_CODE_NORMAL_EXIT_PACK_EXTENSION_SUCCESS
560    //   38 CEF_RESULT_CODE_NORMAL_EXIT_AUTO_DE_ELEVATED
561    let init_result = initialize(
562        Some(args.as_main_args()),
563        Some(&settings),
564        Some(&mut cef_app),
565        std::ptr::null_mut(),
566    );
567    if init_result != 1 {
568        let exit_code = get_exit_code();
569        // Sidecar was spawned before cef_initialize(); std::process::exit()
570        // bypasses the normal shutdown block, so kill it here first.
571        {
572            let mut sidecar = app_state.sidecar_child.lock();
573            if let Some(ref mut child) = *sidecar {
574                tracing::info!("CEF early exit: killing backend sidecar before exit");
575                let _ = child.kill();
576            }
577        }
578        match exit_code {
579            0 | 24 | 36 | 38 => {
580                tracing::info!(
581                    exit_code,
582                    "CEF early exit (process singleton or similar) — exiting cleanly"
583                );
584                std::process::exit(0);
585            }
586            _ => {
587                tracing::error!(
588                    exit_code,
589                    "CEF initialization failed; see ~/.agentmux/logs/cef-debug.log for details"
590                );
591                std::process::exit(exit_code);
592            }
593        }
594    }
595
596    tracing::info!("CEF initialized, entering message loop");
597
598    // Start memory heartbeat — logs system/process memory stats every 20s.
599    // Provides forensic data if the process later crashes from OOM / VA exhaustion.
600    memory_heartbeat::start();
601
602    // Phase B.6 (post-fix): publish port:token AFTER CEF init so a
603    // second launcher only forwards `open_new_window` when we're
604    // actually ready to handle it. Single-instance enforcement is
605    // the launcher's named-pipe bind — this file is purely a
606    // forwarding hint.
607    let _ = std::fs::write(
608        &port_file,
609        format!("{}:{}", ipc_port, app_state.ipc_token),
610    );
611
612    // Phase B.9.1 (WRR) — install Win32 event hooks. Must come
613    // AFTER `connect_to_launcher` so the report_hwnd_* sync APIs
614    // have a live `COMMAND_TX` to push into; AFTER CEF init so
615    // any HWNDs CEF creates during initialize() are missed
616    // (acceptable — they predate the user's session and are
617    // accounted for by main-window startup paths). Idempotent;
618    // safe to call multiple times. State arg lets the callback
619    // peek `pending_window_creations` for `label_hint`.
620    wrr::install_hooks(app_state.clone());
621
622    // Run the CEF message loop. This blocks until quit_message_loop() is called
623    // (triggered when all browser windows are closed in client.rs).
624    run_message_loop();
625
626    tracing::info!("CEF message loop exited, shutting down");
627
628    // Phase B.9.1 (WRR) — tear down Win32 event hooks before any
629    // further teardown. UnhookWinEvent is cheap; doing it early
630    // prevents stray callbacks during shutdown from racing the
631    // launcher_ipc channel close.
632    wrr::uninstall_hooks();
633
634    // Kill the backend sidecar on shutdown.
635    {
636        let mut sidecar = app_state.sidecar_child.lock();
637        if let Some(ref mut child) = *sidecar {
638            tracing::info!("Killing backend sidecar");
639            let _ = child.kill();
640        }
641    }
642
643    // Clean shutdown.
644    shutdown();
645
646    // Drop the tokio runtime after CEF shutdown.
647    drop(runtime);
648
649    // Phase B.6 (post-fix): clean up the forwarding hint so a stale
650    // file doesn't survive a graceful exit. (Hard crashes will leave
651    // it behind; harmless because pipe-bind on next launch is
652    // authoritative — see comment at the port_file declaration.)
653    let _ = std::fs::remove_file(&port_file);
654
655    tracing::info!("AgentMux host shutdown complete");
656}
657
658/// Initialize tracing with dual output: rolling daily log file + human-readable stderr.
659/// `log_dir` is resolved by the caller: `<portable-root>/data/logs/` in portable mode,
660/// `~/.agentmux/logs/` in installed mode.
661/// Returns a guard that must be held for the lifetime of the process to ensure log flushing.
662fn init_logging(log_dir: &std::path::Path) -> tracing_appender::non_blocking::WorkerGuard {
663    use tracing_subscriber::{fmt, layer::SubscriberExt, EnvFilter};
664
665    let version = env!("CARGO_PKG_VERSION");
666    let _ = std::fs::create_dir_all(log_dir);
667
668    // Delete log files older than 7 days to prevent unbounded growth.
669    cleanup_old_logs(&log_dir, 7);
670
671    let log_prefix = format!("agentmux-host-v{}.log", version);
672    let file_appender = tracing_appender::rolling::daily(&log_dir, &log_prefix);
673    let (non_blocking_file, guard) = tracing_appender::non_blocking(file_appender);
674
675    // Write pointer to current log file for zero-lookup agent discovery.
676    // Version-qualified name so multi-instance doesn't clobber pointers.
677    // Uses UTC to match tracing_appender::rolling::daily's date suffix.
678    let today = chrono::Utc::now().format("%Y-%m-%d").to_string();
679    let current_filename = format!("{}.{}", log_prefix, today);
680    let absolute_path = log_dir.join(&current_filename);
681    let pointer_name = format!("current-host-v{}.path", version);
682
683    // Pointer #1: local — inside the instance's log dir. The basename
684    // is enough here since the reader is colocated.
685    let _ = std::fs::write(log_dir.join(&pointer_name), &current_filename);
686
687    // Pointer #2: global — at `<root>/logs/<pointer_name>`. Writes the
688    // ABSOLUTE PATH so legacy tooling (`muxlog host`) that lives outside
689    // the instance dir can `cat $pointer | xargs tail -f` and reach the
690    // real file. Skipped silently if the global dir can't be derived
691    // (e.g. AGENTMUX_HOME_OVERRIDE unset in some test setups).
692    if let Some(global_logs_dir) = log_dir.parent().and_then(|p| p.parent()).and_then(|p| p.parent()).map(|p| p.join("logs")) {
693        let _ = std::fs::create_dir_all(&global_logs_dir);
694        let _ = std::fs::write(
695            global_logs_dir.join(&pointer_name),
696            absolute_path.to_string_lossy().as_bytes(),
697        );
698    }
699
700    // Synchronous init sentinel: append a single line directly to the
701    // expected log path BEFORE the tracing subscriber is wired up. Without
702    // this, a hang between subscriber-setup and the non-blocking writer's
703    // first flush leaves the pointer file pointing at a never-created log
704    // file (observed 2026-05-02 freeze investigation). The sentinel
705    // guarantees the file exists once init_logging has run past
706    // pointer-write — if the file is missing afterwards, we know
707    // init_logging itself didn't get past this point.
708    let sentinel_path = log_dir.join(&current_filename);
709    let sentinel_line = format!(
710        "{} INIT-SENTINEL agentmux-host v={} pid={} os={} arch={}\n",
711        chrono::Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ"),
712        version,
713        std::process::id(),
714        std::env::consts::OS,
715        std::env::consts::ARCH,
716    );
717    if let Ok(mut f) = std::fs::OpenOptions::new()
718        .create(true)
719        .append(true)
720        .open(&sentinel_path)
721    {
722        use std::io::Write;
723        let _ = f.write_all(sentinel_line.as_bytes());
724        let _ = f.flush();
725    }
726
727    let subscriber = tracing_subscriber::registry()
728        .with(
729            EnvFilter::try_from_default_env()
730                .unwrap_or_else(|_| EnvFilter::new("info")),
731        )
732        .with(
733            fmt::layer()
734                .json()
735                .with_writer(non_blocking_file)
736                .with_target(true)
737                .with_thread_ids(true),
738        )
739        .with(
740            fmt::layer()
741                .with_writer(std::io::stderr)
742                .with_ansi(true),
743        );
744
745    tracing::subscriber::set_global_default(subscriber).ok();
746
747    tracing::info!(
748        version,
749        os = std::env::consts::OS,
750        arch = std::env::consts::ARCH,
751        log_dir = %log_dir.display(),
752        "AgentMux host starting"
753    );
754
755    guard
756}
757
758fn cleanup_old_logs(log_dir: &std::path::Path, days: u64) {
759    let cutoff = std::time::SystemTime::now()
760        - std::time::Duration::from_secs(days * 86400);
761    let Ok(entries) = std::fs::read_dir(log_dir) else { return };
762    for entry in entries.flatten() {
763        let path = entry.path();
764        if !path.to_string_lossy().contains(".log.") {
765            continue;
766        }
767        if let Ok(meta) = entry.metadata() {
768            if let Ok(modified) = meta.modified() {
769                if modified < cutoff {
770                    let _ = std::fs::remove_file(&path);
771                }
772            }
773        }
774    }
775}