agentmux_srv\backend\process_tracker/
windows.rs

1// Copyright 2026, AgentMux Corp.
2// SPDX-License-Identifier: Apache-2.0
3#![cfg(windows)]
4
5//! Windows `ProcessTreeTracker` implementation backed by Job Objects.
6//!
7//! Flow:
8//! 1. `JobObjectTracker::new(block_id)` creates an anonymous job and sets
9//!    `KILL_ON_JOB_CLOSE`, so anything in the job dies automatically if
10//!    AgentMux itself crashes without calling `kill_tree`.
11//! 2. The caller gets the tracker handle and, when spawning the agent
12//!    CLI, calls `assign_process(child_pid)` immediately after spawn.
13//!    Every `CreateProcess` descendant of that PID inherits the job
14//!    automatically — no per-process tagging.
15//! 3. `list_members` queries the job for its current PID set and
16//!    enriches each with command line + RSS via `PROCESS_QUERY_LIMITED_INFORMATION`
17//!    + `GetModuleFileNameEx` / `GetProcessMemoryInfo`.
18//! 4. `kill_tree` → `TerminateJobObject`. One call nukes everything.
19//!
20//! The only non-trivial thing: there's a ~1ms race window between
21//! `CreateProcess` and our `AssignProcessToJobObject`. A child the CLI
22//! creates in that window escapes the job. In practice the CLI doesn't
23//! spawn anything before it reads stdin, so this is a theoretical
24//! concern — but worth a future move to `CREATE_SUSPENDED` + assign +
25//! `ResumeThread` if we see escapes.
26
27use std::ffi::OsString;
28use std::mem::{size_of, zeroed};
29use std::os::windows::ffi::OsStringExt;
30use std::sync::Mutex;
31
32use windows_sys::Win32::Foundation::{CloseHandle, HANDLE, INVALID_HANDLE_VALUE};
33use windows_sys::Win32::System::JobObjects::{
34    AssignProcessToJobObject, CreateJobObjectW, JobObjectBasicProcessIdList,
35    JobObjectExtendedLimitInformation, QueryInformationJobObject,
36    SetInformationJobObject, TerminateJobObject, JOBOBJECT_BASIC_PROCESS_ID_LIST,
37    JOBOBJECT_EXTENDED_LIMIT_INFORMATION, JOB_OBJECT_LIMIT_BREAKAWAY_OK,
38    JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE,
39};
40use windows_sys::Win32::System::ProcessStatus::{GetProcessMemoryInfo, PROCESS_MEMORY_COUNTERS};
41use windows_sys::Win32::System::Threading::{
42    OpenProcess, TerminateProcess, PROCESS_QUERY_LIMITED_INFORMATION, PROCESS_TERMINATE,
43};
44
45use super::{TrackedProcess, TrackerHandle, TrackingConfidence};
46
47pub struct JobObjectTracker {
48    block_id: String,
49    /// Inner state behind a mutex so a later `kill_tree` call from a
50    /// different thread is safe. The HANDLE itself is thread-safe per
51    /// Win32 docs, but the Drop impl still needs exclusive access.
52    inner: Mutex<Inner>,
53}
54
55struct Inner {
56    job: HANDLE,
57    /// Closed-idempotent flag. `kill_tree` + `Drop` both call
58    /// `CloseHandle`; the second caller must no-op.
59    closed: bool,
60}
61
62impl JobObjectTracker {
63    /// Inherent convenience wrapper — the canonical way callers should
64    /// use `assign_process` is via the `TrackerHandle` trait, which
65    /// dispatches across platforms. This stays inherent for internal
66    /// callers that hold a concrete `JobObjectTracker`.
67    #[allow(dead_code)]
68    pub fn assign_inherent(&self, pid: u32) -> Result<(), String> {
69        <Self as TrackerHandle>::assign_process(self, pid)
70    }
71
72    pub fn new(block_id: &str) -> Result<Self, String> {
73        unsafe {
74            let job = CreateJobObjectW(std::ptr::null(), std::ptr::null());
75            if job.is_null() {
76                return Err(format!("CreateJobObjectW failed: {}", std::io::Error::last_os_error()));
77            }
78
79            // Configure the job:
80            //  - KILL_ON_JOB_CLOSE: everything in the job dies if the host
81            //    process exits without explicit cleanup. No leaks across
82            //    AgentMux crashes.
83            //  - BREAKAWAY_OK is NOT set: descendants can't opt out of the
84            //    job. (Some CLIs try CREATE_BREAKAWAY_FROM_JOB; we want
85            //    those attempts to fail so the child stays tracked.)
86            let mut info: JOBOBJECT_EXTENDED_LIMIT_INFORMATION = zeroed();
87            info.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE;
88            let ok = SetInformationJobObject(
89                job,
90                JobObjectExtendedLimitInformation,
91                &info as *const _ as *const _,
92                size_of::<JOBOBJECT_EXTENDED_LIMIT_INFORMATION>() as u32,
93            );
94            if ok == 0 {
95                let err = std::io::Error::last_os_error();
96                CloseHandle(job);
97                return Err(format!("SetInformationJobObject failed: {err}"));
98            }
99            let _ = JOB_OBJECT_LIMIT_BREAKAWAY_OK; // referenced for docs
100
101            tracing::info!(
102                block_id = %block_id,
103                job = ?job,
104                "[process-tracker] created Windows Job Object"
105            );
106
107            Ok(Self {
108                block_id: block_id.to_string(),
109                inner: Mutex::new(Inner { job, closed: false }),
110            })
111        }
112    }
113
114    /// Internal assignment primitive — called via the `TrackerHandle`
115    /// trait impl below. See the trait doc for semantics + race
116    /// window caveat.
117    fn assign_process_impl(&self, pid: u32) -> Result<(), String> {
118        unsafe {
119            let h_process = OpenProcess(
120                PROCESS_QUERY_LIMITED_INFORMATION | PROCESS_TERMINATE | 0x0200, /* PROCESS_SET_QUOTA */
121                0,
122                pid,
123            );
124            if h_process.is_null() {
125                return Err(format!("OpenProcess({pid}) failed: {}", std::io::Error::last_os_error()));
126            }
127            let inner = self.inner.lock().unwrap();
128            let ok = AssignProcessToJobObject(inner.job, h_process);
129            CloseHandle(h_process);
130            if ok == 0 {
131                return Err(format!(
132                    "AssignProcessToJobObject({pid}) failed: {}",
133                    std::io::Error::last_os_error()
134                ));
135            }
136            tracing::info!(
137                block_id = %self.block_id,
138                pid = pid,
139                "[process-tracker] assigned process to job"
140            );
141            Ok(())
142        }
143    }
144
145    fn query_pids(&self) -> Vec<u32> {
146        // Buffer sized for up to 256 PIDs. If we overflow we'll log and
147        // truncate — an agent with >256 descendants is an edge case we
148        // can widen later.
149        const MAX_PIDS: usize = 256;
150        #[repr(C)]
151        struct Buf {
152            header: JOBOBJECT_BASIC_PROCESS_ID_LIST,
153            rest: [usize; MAX_PIDS - 1],
154        }
155        let mut buf: Buf = unsafe { zeroed() };
156        let mut returned: u32 = 0;
157        let ok = unsafe {
158            QueryInformationJobObject(
159                self.inner.lock().unwrap().job,
160                JobObjectBasicProcessIdList,
161                &mut buf as *mut _ as *mut _,
162                size_of::<Buf>() as u32,
163                &mut returned,
164            )
165        };
166        if ok == 0 {
167            return Vec::new();
168        }
169        let count = buf.header.NumberOfProcessIdsInList as usize;
170        let count = count.min(MAX_PIDS);
171        let mut pids = Vec::with_capacity(count);
172        // NumberOfAssignedProcesses is the total in the job; the first
173        // entry is inlined in the header, then `rest` continues. Treat
174        // as a flat array of `usize` starting at `header.ProcessIdList[0]`.
175        let first_slot = &buf.header.ProcessIdList[0] as *const usize;
176        for i in 0..count {
177            let p = unsafe { *first_slot.add(i) } as u32;
178            if p != 0 {
179                pids.push(p);
180            }
181        }
182        pids
183    }
184}
185
186impl TrackerHandle for JobObjectTracker {
187    fn assign_process(&self, pid: u32) -> Result<(), String> {
188        self.assign_process_impl(pid)
189    }
190
191    fn list_members(&self) -> Vec<TrackedProcess> {
192        self.query_pids()
193            .into_iter()
194            .map(|pid| TrackedProcess {
195                pid,
196                command: query_command_line(pid),
197                rss_bytes: query_rss(pid),
198                started_at_ms: 0, // deferred; uses NtQueryInformationProcess — skip for v1
199            })
200            .collect()
201    }
202
203    fn kill_tree(&self) {
204        unsafe {
205            let mut inner = self.inner.lock().unwrap();
206            if inner.closed {
207                return;
208            }
209            if TerminateJobObject(inner.job, 1) == 0 {
210                tracing::warn!(
211                    block_id = %self.block_id,
212                    err = %std::io::Error::last_os_error(),
213                    "[process-tracker] TerminateJobObject failed"
214                );
215            } else {
216                tracing::info!(block_id = %self.block_id, "[process-tracker] killed job tree");
217            }
218            CloseHandle(inner.job);
219            inner.closed = true;
220        }
221    }
222
223    fn kill_pid(&self, pid: u32) -> bool {
224        if !self.query_pids().iter().any(|&p| p == pid) {
225            return false;
226        }
227        unsafe {
228            let h = OpenProcess(PROCESS_TERMINATE, 0, pid);
229            if h.is_null() {
230                return false;
231            }
232            let ok = TerminateProcess(h, 1);
233            CloseHandle(h);
234            ok != 0
235        }
236    }
237
238    fn confidence(&self) -> TrackingConfidence {
239        TrackingConfidence::High
240    }
241}
242
243impl Drop for JobObjectTracker {
244    fn drop(&mut self) {
245        unsafe {
246            let mut inner = self.inner.get_mut().unwrap();
247            if !inner.closed && !inner.job.is_null() && inner.job != INVALID_HANDLE_VALUE {
248                // KILL_ON_JOB_CLOSE will fire when we close the handle,
249                // so we don't need an explicit TerminateJobObject here.
250                CloseHandle(inner.job);
251                inner.closed = true;
252            }
253        }
254    }
255}
256
257// SAFETY: HANDLE is a pointer-sized integer; Windows guarantees all Job
258// Object APIs are thread-safe.
259unsafe impl Send for JobObjectTracker {}
260unsafe impl Sync for JobObjectTracker {}
261
262// ── Helpers ────────────────────────────────────────────────────────────
263
264/// Read a process's command line via `GetCommandLineW` is not an option
265/// for foreign processes — that's the calling process's cmdline.
266/// Instead we use `QueryFullProcessImageNameW` for the executable path
267/// and treat cmdline as "unavailable" for v1. WMI can fill this in
268/// later if the user asks for full cmdline.
269fn query_command_line(pid: u32) -> String {
270    use windows_sys::Win32::System::Threading::{
271        QueryFullProcessImageNameW,
272    };
273    unsafe {
274        let h = OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, 0, pid);
275        if h.is_null() {
276            return String::new();
277        }
278        let mut buf = [0u16; 1024];
279        let mut len = buf.len() as u32;
280        let ok = QueryFullProcessImageNameW(h, 0, buf.as_mut_ptr(), &mut len);
281        CloseHandle(h);
282        if ok == 0 {
283            return String::new();
284        }
285        OsString::from_wide(&buf[..len as usize])
286            .to_string_lossy()
287            .into_owned()
288    }
289}
290
291fn query_rss(pid: u32) -> u64 {
292    unsafe {
293        let h = OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, 0, pid);
294        if h.is_null() {
295            return 0;
296        }
297        let mut counters: PROCESS_MEMORY_COUNTERS = zeroed();
298        let ok = GetProcessMemoryInfo(
299            h,
300            &mut counters,
301            size_of::<PROCESS_MEMORY_COUNTERS>() as u32,
302        );
303        CloseHandle(h);
304        if ok == 0 {
305            0
306        } else {
307            counters.WorkingSetSize as u64
308        }
309    }
310}