agentmux_cef/
memory_heartbeat.rs

1// Copyright 2026, AgentMux Corp.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Memory heartbeat — logs system and process memory stats every 20 seconds.
5// Designed to provide forensic data for OOM / VA exhaustion crash analysis.
6
7use std::time::Duration;
8
9/// Spawn a background thread that logs memory stats at a fixed interval.
10/// Also refreshes the log pointer file on UTC date rollover.
11/// Runs for the lifetime of the process — no shutdown signal needed.
12pub fn start() {
13    std::thread::Builder::new()
14        .name("mem-heartbeat".into())
15        .spawn(move || {
16            let mut last_date = String::new();
17            loop {
18                std::thread::sleep(Duration::from_secs(20));
19                log_memory_stats();
20                refresh_log_pointer(&mut last_date);
21            }
22        })
23        .expect("Failed to spawn memory heartbeat thread");
24}
25
26/// Update the host log pointer file when the UTC date changes (midnight rollover).
27/// tracing_appender::rolling::daily creates a new file at UTC midnight, so the
28/// pointer must track the new date suffix.
29///
30/// Two pointers are written, matching `main::init_logging`:
31///   1. Local: `<log_dir>/<pointer_name>` with just the basename.
32///   2. Global: `<root>/logs/<pointer_name>` with the absolute path so
33///      legacy tooling (`muxlog host`) can resolve from outside the
34///      instance dir. Works for portable, installed, and dev modes —
35///      log_dir comes from `AGENTMUX_LOG_DIR`, set by data_paths.rs.
36fn refresh_log_pointer(last_date: &mut String) {
37    let today = chrono::Utc::now().format("%Y-%m-%d").to_string();
38    if *last_date == today {
39        return;
40    }
41    *last_date = today.clone();
42    let version = env!("CARGO_PKG_VERSION");
43    // Use the AGENTMUX_LOG_DIR exported by data_paths.rs. Falls back to
44    // the legacy hardcoded location only as a safety net — by the time
45    // memory_heartbeat starts, init_logging has already run with the
46    // resolved log_dir, so this env var should always be present.
47    let log_dir = std::env::var_os("AGENTMUX_LOG_DIR")
48        .map(std::path::PathBuf::from)
49        .unwrap_or_else(|| {
50            dirs::home_dir()
51                .unwrap_or_default()
52                .join(".agentmux")
53                .join("logs")
54        });
55    let current_filename = format!("agentmux-host-v{}.log.{}", version, today);
56    let absolute_path = log_dir.join(&current_filename);
57    let pointer_name = format!("current-host-v{}.path", version);
58
59    let _ = std::fs::write(log_dir.join(&pointer_name), &current_filename);
60
61    if let Some(global_logs_dir) = log_dir
62        .parent()
63        .and_then(|p| p.parent())
64        .and_then(|p| p.parent())
65        .map(|p| p.join("logs"))
66    {
67        let _ = std::fs::create_dir_all(&global_logs_dir);
68        let _ = std::fs::write(
69            global_logs_dir.join(&pointer_name),
70            absolute_path.to_string_lossy().as_bytes(),
71        );
72    }
73}
74
75#[cfg(target_os = "windows")]
76fn log_memory_stats() {
77    use windows_sys::Win32::System::SystemInformation::{
78        GlobalMemoryStatusEx, MEMORYSTATUSEX,
79    };
80    use windows_sys::Win32::System::ProcessStatus::{
81        GetProcessMemoryInfo, PROCESS_MEMORY_COUNTERS,
82    };
83
84    // ── System-wide stats ──
85    let mut mem: MEMORYSTATUSEX = unsafe { std::mem::zeroed() };
86    mem.dwLength = std::mem::size_of::<MEMORYSTATUSEX>() as u32;
87    let sys_ok = unsafe { GlobalMemoryStatusEx(&mut mem) } != 0;
88
89    // ── Per-process stats ──
90    let mut pmc: PROCESS_MEMORY_COUNTERS = unsafe { std::mem::zeroed() };
91    pmc.cb = std::mem::size_of::<PROCESS_MEMORY_COUNTERS>() as u32;
92    let proc_ok = unsafe {
93        let handle = windows_sys::Win32::System::Threading::GetCurrentProcess();
94        GetProcessMemoryInfo(handle, &mut pmc, pmc.cb)
95    } != 0;
96
97    if sys_ok {
98        let total_phys_gb = mem.ullTotalPhys as f64 / (1024.0 * 1024.0 * 1024.0);
99        let avail_phys_gb = mem.ullAvailPhys as f64 / (1024.0 * 1024.0 * 1024.0);
100        let total_page_gb = mem.ullTotalPageFile as f64 / (1024.0 * 1024.0 * 1024.0);
101        let avail_page_gb = mem.ullAvailPageFile as f64 / (1024.0 * 1024.0 * 1024.0);
102        let total_virt_gb = mem.ullTotalVirtual as f64 / (1024.0 * 1024.0 * 1024.0);
103        let avail_virt_gb = mem.ullAvailVirtual as f64 / (1024.0 * 1024.0 * 1024.0);
104        let load_pct = mem.dwMemoryLoad;
105
106        tracing::info!(
107            target: "mem_heartbeat",
108            load_pct,
109            total_phys_gb = format!("{:.1}", total_phys_gb),
110            avail_phys_gb = format!("{:.1}", avail_phys_gb),
111            total_page_gb = format!("{:.1}", total_page_gb),
112            avail_page_gb = format!("{:.1}", avail_page_gb),
113            total_virt_gb = format!("{:.1}", total_virt_gb),
114            avail_virt_gb = format!("{:.1}", avail_virt_gb),
115            "system memory"
116        );
117    }
118
119    if proc_ok {
120        let ws_mb = pmc.WorkingSetSize as f64 / (1024.0 * 1024.0);
121        let peak_ws_mb = pmc.PeakWorkingSetSize as f64 / (1024.0 * 1024.0);
122        let pagefile_mb = pmc.PagefileUsage as f64 / (1024.0 * 1024.0);
123        let peak_pagefile_mb = pmc.PeakPagefileUsage as f64 / (1024.0 * 1024.0);
124        let page_faults = pmc.PageFaultCount;
125
126        tracing::info!(
127            target: "mem_heartbeat",
128            ws_mb = format!("{:.1}", ws_mb),
129            peak_ws_mb = format!("{:.1}", peak_ws_mb),
130            commit_mb = format!("{:.1}", pagefile_mb),
131            peak_commit_mb = format!("{:.1}", peak_pagefile_mb),
132            page_faults,
133            "process memory"
134        );
135    }
136
137    if !sys_ok && !proc_ok {
138        tracing::warn!(target: "mem_heartbeat", "Failed to query memory stats");
139    }
140}
141
142#[cfg(not(target_os = "windows"))]
143fn log_memory_stats() {
144    // On non-Windows, read /proc/self/status and /proc/meminfo.
145    if let Ok(status) = std::fs::read_to_string("/proc/self/status") {
146        let vm_rss = extract_proc_field(&status, "VmRSS:");
147        let vm_size = extract_proc_field(&status, "VmSize:");
148        let vm_peak = extract_proc_field(&status, "VmPeak:");
149        tracing::info!(
150            target: "mem_heartbeat",
151            vm_rss, vm_size, vm_peak,
152            "process memory"
153        );
154    }
155    if let Ok(meminfo) = std::fs::read_to_string("/proc/meminfo") {
156        let total = extract_proc_field(&meminfo, "MemTotal:");
157        let avail = extract_proc_field(&meminfo, "MemAvailable:");
158        tracing::info!(
159            target: "mem_heartbeat",
160            total, avail,
161            "system memory"
162        );
163    }
164}
165
166#[cfg(not(target_os = "windows"))]
167fn extract_proc_field(content: &str, field: &str) -> String {
168    content
169        .lines()
170        .find(|l| l.starts_with(field))
171        .map(|l| l[field.len()..].trim().to_string())
172        .unwrap_or_else(|| "?".into())
173}