Skip to content

Commit 97e123e

Browse files
snomiaoclaude
andcommitted
fix(windows): eliminate three kernel-wedge deadlock vectors
A prior clx-rust.exe instance (PID 7580) ran for 3 days and could not be killed by taskkill or elevated Stop-Process, requiring a reboot. Analysis of the Windows adapter found three places where a user-space lock or a slow Win32/Tauri call could stall a thread inside an unabortable kernel wait and leave the process unkillable. 1. core/audio_capture.rs: replace Arc<Mutex<Vec<f32>>> with a lock-free SPSC HeapRb ring. The cpal input callback thread no longer blocks on a user-space Mutex, so a consumer holding the lock across slow I/O can't stall the audio thread into an unjoinable state when the stream is later paused or dropped. 2. adapters/windows/src/hook.rs: spawn a dedicated clx-tray-worker thread owning all update_tray_icon / cursor_visibility::enable|disable calls, fed by an mpsc::Sender<bool>. The WH_KEYBOARD_LL callback now only does a non-blocking tx.send on mode edges - no Tauri calls, no SystemParametersInfoW, no GUI locks touched from the hook. Also moves cursor_visibility::nudge out of the hook callback into tick_timer_proc (16 ms SetTimer) so the hook stays well under the ~300 ms budget. 3. adapters/windows/src/main.rs: the quit-watch thread now polls WaitForSingleObject with a 500 ms timeout and checks a SHUTDOWN AtomicBool set at end of main, instead of blocking INFINITE. This lets the thread exit cleanly during normal shutdown rather than sitting in a kernel wait forever. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 7124806 commit 97e123e

5 files changed

Lines changed: 136 additions & 35 deletions

File tree

rs/Cargo.lock

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

rs/adapters/windows/src/hook.rs

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/// Windows WH_KEYBOARD_LL hook – bridges Win32 key events to ClxEngine.
22
use std::sync::{Arc, OnceLock};
33
use std::sync::atomic::{AtomicU32, AtomicUsize, Ordering};
4+
use std::sync::mpsc;
45

56
use windows::Win32::Foundation::{HWND, LPARAM, LRESULT, WPARAM};
67
use windows::Win32::System::LibraryLoader::GetModuleHandleW;
@@ -30,6 +31,39 @@ static SHM: OnceLock<SharedState> = OnceLock::new();
3031
/// Last tray-active state for edge detection (0 = off, 1 = on, u32::MAX = uninitialised).
3132
static LAST_TRAY_ACTIVE: AtomicU32 = AtomicU32::new(u32::MAX);
3233

34+
/// Channel sender to the tray/cursor worker thread. `None` until `init_tray_worker`
35+
/// is called. The hook callback must NEVER call Tauri or SystemParametersInfoW
36+
/// directly — those can block on GUI locks held by threads waiting on the hook,
37+
/// creating a 3-way deadlock that leaves the process unkillable.
38+
static TRAY_TX: OnceLock<mpsc::Sender<bool>> = OnceLock::new();
39+
40+
/// Spawn the tray/cursor-visibility worker thread. Must be called once before
41+
/// `install_hook()`. The worker coalesces bursts of edge events so a rapid
42+
/// on/off/on flicker only triggers the most recent state.
43+
pub fn init_tray_worker() {
44+
let (tx, rx) = mpsc::channel::<bool>();
45+
if TRAY_TX.set(tx).is_err() {
46+
return;
47+
}
48+
let _ = std::thread::Builder::new()
49+
.name("clx-tray-worker".into())
50+
.spawn(move || {
51+
while let Ok(first) = rx.recv() {
52+
// Coalesce any backlog so we only apply the latest state.
53+
let mut latest = first;
54+
while let Ok(next) = rx.try_recv() {
55+
latest = next;
56+
}
57+
crate::update_tray_icon(latest);
58+
if latest {
59+
crate::cursor_visibility::enable();
60+
} else {
61+
crate::cursor_visibility::disable();
62+
}
63+
}
64+
});
65+
}
66+
3367
/// Store the shared memory handle so the hook callback can publish mode changes.
3468
pub fn init_shared_state(shm: SharedState) {
3569
let _ = SHM.set(shm);
@@ -86,11 +120,18 @@ pub fn install_hook() {
86120
}
87121
}
88122

89-
/// Timer callback — drives AccModel physics on the main/hook thread.
123+
/// Timer callback — drives AccModel physics on the main/hook thread, and
124+
/// periodically nudges cursor visibility while CLX mode is held. Nudging used
125+
/// to happen inside the hook callback; moving it here keeps the hook callback
126+
/// fast and keeps us well under the WH_KEYBOARD_LL ~300 ms budget.
90127
unsafe extern "system" fn tick_timer_proc(_hwnd: HWND, _msg: u32, _id: usize, _time: u32) {
91128
if let Some(engine) = ENGINE.get() {
92129
engine.tick();
93130
}
131+
let active = LAST_TRAY_ACTIVE.load(Ordering::Relaxed);
132+
if active != 0 && active != u32::MAX {
133+
crate::cursor_visibility::nudge();
134+
}
94135
}
95136

96137
pub fn uninstall_hook() {
@@ -144,19 +185,16 @@ unsafe extern "system" fn keyboard_proc(
144185
shm.write_mode(mode);
145186
}
146187

147-
// Update tray icon on mode edge transitions.
188+
// Dispatch mode-edge transitions to the tray worker. The hook callback
189+
// must never call Tauri or SystemParametersInfoW directly — sending on an
190+
// mpsc channel is wait-free (one atomic CAS + a malloc) and safe here.
191+
// Cursor-visibility nudges are handled by the SetTimer tick instead.
148192
let active = u32::from(mode != 0);
149193
let prev = LAST_TRAY_ACTIVE.swap(active, Ordering::Relaxed);
150194
if prev != active {
151-
crate::update_tray_icon(active != 0);
152-
if active != 0 {
153-
crate::cursor_visibility::enable();
154-
} else {
155-
crate::cursor_visibility::disable();
195+
if let Some(tx) = TRAY_TX.get() {
196+
let _ = tx.send(active != 0);
156197
}
157-
} else if active != 0 {
158-
// Periodic nudge while CLX mode held — defeats touch cursor suppression.
159-
crate::cursor_visibility::nudge();
160198
}
161199

162200
match resp {

rs/adapters/windows/src/main.rs

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ mod vk;
1616

1717
use std::path::Path;
1818
use std::process::{Child, Command};
19+
use std::sync::atomic::{AtomicBool, Ordering};
1920
use std::sync::OnceLock;
2021

2122
use windows::Win32::Foundation::HANDLE;
@@ -35,6 +36,11 @@ static ICON_ON: &[u8] = include_bytes!("../../../../Data/XIconBlue.ico");
3536

3637
static APP_HANDLE: OnceLock<AppHandle> = OnceLock::new();
3738

39+
/// Set when main() is about to return so background helper threads
40+
/// (e.g. the quit-watch thread) can exit their poll loops cleanly instead
41+
/// of blocking forever on kernel waits.
42+
static SHUTDOWN: AtomicBool = AtomicBool::new(false);
43+
3844
const TRAY_ID: &str = "main";
3945

4046
/// Switch tray icon between on (blue) and off (white).
@@ -142,6 +148,13 @@ fn main() {
142148
if let Some(ev) = ahk_ready_event {
143149
unsafe { let _ = windows::Win32::Foundation::CloseHandle(ev); }
144150
}
151+
// Spawn the tray/cursor worker BEFORE install_hook so the first hook
152+
// callback already has a channel to send edge events into. The worker
153+
// absorbs all Tauri and SystemParametersInfoW work that used to run
154+
// inside the WH_KEYBOARD_LL callback — keeping the hook callback fast
155+
// and avoiding deadlocks that could leave the process unkillable.
156+
hook::init_tray_worker();
157+
145158
// Install hook on the main thread BEFORE Tauri init.
146159
// Tauri's setup takes ~15s, during which the hook would be starved of
147160
// message pumping. We install here and run a brief PeekMessage pump
@@ -160,19 +173,34 @@ fn main() {
160173
let _ = APP_HANDLE.set(app.handle().clone());
161174

162175
// Watch the quit event so a new instance can ask us to exit cleanly.
176+
// Uses a 500 ms polling wait instead of INFINITE so the thread
177+
// observes the SHUTDOWN flag during normal shutdown and exits
178+
// cleanly, rather than sitting in a kernel wait forever.
163179
if let Some(evt) = shm::SharedState::create_quit_event() {
164180
let app_handle = app.handle().clone();
165181
let raw = evt.0 as usize; // extract raw ptr for Send
166-
std::thread::spawn(move || {
167-
use windows::Win32::Foundation::{CloseHandle, HANDLE};
168-
use windows::Win32::System::Threading::WaitForSingleObject;
169-
unsafe {
170-
let h = HANDLE(raw as *mut _);
171-
WaitForSingleObject(h, u32::MAX); // INFINITE
172-
let _ = CloseHandle(h);
173-
}
174-
app_handle.exit(0);
175-
});
182+
let _ = std::thread::Builder::new()
183+
.name("clx-quit-watch".into())
184+
.spawn(move || {
185+
use windows::Win32::Foundation::{CloseHandle, HANDLE, WAIT_OBJECT_0};
186+
use windows::Win32::System::Threading::WaitForSingleObject;
187+
unsafe {
188+
let h = HANDLE(raw as *mut _);
189+
loop {
190+
let r = WaitForSingleObject(h, 500);
191+
if r == WAIT_OBJECT_0 {
192+
let _ = CloseHandle(h);
193+
app_handle.exit(0);
194+
return;
195+
}
196+
if SHUTDOWN.load(Ordering::Relaxed) {
197+
let _ = CloseHandle(h);
198+
return;
199+
}
200+
// WAIT_TIMEOUT or WAIT_FAILED — retry.
201+
}
202+
}
203+
});
176204
}
177205

178206
let prefs_item = MenuItemBuilder::with_id("prefs", "Preferences…").build(app)?;
@@ -239,6 +267,9 @@ fn main() {
239267
}
240268
});
241269

270+
// Signal helper threads to wind down before we tear down Win32 state.
271+
SHUTDOWN.store(true, Ordering::Relaxed);
272+
242273
hook::uninstall_hook();
243274
cursor_visibility::disable();
244275

rs/core/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ path = "src/lib.rs"
1313
once_cell = "1.19"
1414
parking_lot = "0.12"
1515
cpal = "0.15"
16+
ringbuf = "0.4"
1617
dirs = "5"
1718
whisper-rs = { version = "0.13", optional = true }
1819
sherpa-rs = { version = "0.6", optional = true }

rs/core/src/audio_capture.rs

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
//!
33
//! Captures microphone audio as mono f32 samples, targeting 16 kHz for Whisper.
44
//! Falls back to the device's default config if 16 kHz is unavailable.
5+
//!
6+
//! Uses a lock-free SPSC ring buffer between the cpal callback thread and
7+
//! consumers. The audio callback must never block on a user-space lock — if a
8+
//! consumer holds one across slow I/O while the callback is also trying to take
9+
//! it, the callback thread stalls and a subsequent `stream.pause()/drop()` can
10+
//! wedge waiting to join the callback thread. A lock-free ring avoids that
11+
//! entire failure mode.
512
613
use std::sync::{
714
atomic::{AtomicBool, Ordering},
@@ -10,10 +17,11 @@ use std::sync::{
1017

1118
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
1219
use cpal::{SampleFormat, Stream, StreamConfig};
20+
use ringbuf::{traits::*, HeapCons, HeapRb};
1321

1422
pub struct AudioCapture {
1523
recording: Arc<AtomicBool>,
16-
buffer: Arc<Mutex<Vec<f32>>>,
24+
cons: Arc<Mutex<HeapCons<f32>>>,
1725
sample_rate: u32,
1826
stream: Option<Stream>,
1927
}
@@ -64,11 +72,17 @@ impl AudioCapture {
6472
};
6573

6674
let recording = Arc::new(AtomicBool::new(false));
67-
let buffer: Arc<Mutex<Vec<f32>>> = Arc::new(Mutex::new(Vec::new()));
75+
76+
// Ring buffer sized for ~10 s of mono audio at the target rate. Consumers
77+
// typically drain every 50–200 ms, so this is generous head-room.
78+
let rb_capacity = (sample_rate as usize).saturating_mul(10).max(16000);
79+
let rb: HeapRb<f32> = HeapRb::new(rb_capacity);
80+
let (mut prod, cons) = rb.split();
81+
let cons = Arc::new(Mutex::new(cons));
6882

6983
let rec_flag = Arc::clone(&recording);
70-
let buf_handle = Arc::clone(&buffer);
7184
let channels = config.channels as usize;
85+
let mut downmix: Vec<f32> = Vec::with_capacity(4096);
7286

7387
let stream = device
7488
.build_input_stream(
@@ -77,15 +91,18 @@ impl AudioCapture {
7791
if !rec_flag.load(Ordering::Relaxed) {
7892
return;
7993
}
80-
let mut buf = buf_handle.lock().unwrap();
8194
if channels == 1 {
82-
buf.extend_from_slice(data);
95+
let _ = prod.push_slice(data);
8396
} else {
84-
// Down-mix to mono by averaging channels.
97+
// Down-mix to mono by averaging channels. Reuse the
98+
// scratch buffer to avoid per-callback allocation.
99+
downmix.clear();
100+
downmix.reserve(data.len() / channels);
85101
for chunk in data.chunks(channels) {
86102
let sum: f32 = chunk.iter().sum();
87-
buf.push(sum / channels as f32);
103+
downmix.push(sum / channels as f32);
88104
}
105+
let _ = prod.push_slice(&downmix);
89106
}
90107
},
91108
move |err| {
@@ -95,12 +112,9 @@ impl AudioCapture {
95112
)
96113
.map_err(|e| format!("Failed to build input stream: {e}"))?;
97114

98-
// eprintln!("[CLX] audio capture: device ready, {}Hz {}ch",
99-
// sample_rate, config.channels);
100-
101115
Ok(Self {
102116
recording,
103-
buffer,
117+
cons,
104118
sample_rate,
105119
stream: Some(stream),
106120
})
@@ -113,7 +127,6 @@ impl AudioCapture {
113127
.play()
114128
.map_err(|e| format!("Failed to start audio stream: {e}"))?;
115129
self.recording.store(true, Ordering::Relaxed);
116-
// eprintln!("[CLX] audio capture: started");
117130
Ok(())
118131
} else {
119132
Err("No audio stream available".to_string())
@@ -126,13 +139,19 @@ impl AudioCapture {
126139
if let Some(ref stream) = self.stream {
127140
let _ = stream.pause();
128141
}
129-
// eprintln!("[CLX] audio capture: stopped");
130142
}
131143

132144
/// Drain and return all buffered samples collected so far.
133145
pub fn take_samples(&self) -> Vec<f32> {
134-
let mut buf = self.buffer.lock().unwrap();
135-
std::mem::take(&mut *buf)
146+
let mut cons = self.cons.lock().unwrap();
147+
let n = cons.occupied_len();
148+
if n == 0 {
149+
return Vec::new();
150+
}
151+
let mut out = vec![0.0f32; n];
152+
let got = cons.pop_slice(&mut out);
153+
out.truncate(got);
154+
out
136155
}
137156

138157
/// Whether the capture is currently recording.

0 commit comments

Comments
 (0)