Skip to content

Commit 10807bd

Browse files
snomiaoclaude
andcommitted
fix(core): commit PttTrayState + PttTranslated plumbing
These were referenced by already-committed voice_ptt.rs / voice.rs code but the enum + trait method + event variant lived only in local working tree, breaking every CI build for v2.0.0-beta.2. - platform.rs: add PttTrayState {Idle, Recording, Processing, NoteMode} and default Platform::set_ptt_tray_state() trait method. - voice_otoji.rs: parse ptt_translated events + forward to PttSession::on_ptt_translated. Enable translation args (--ptt-translate-to / --ptt-tts-source) when the CLX_TRANSLATE_* env vars are set. Wire polish=openai (→ Cloudflare) + tts=gemini. - voice_ptt.rs: instrumentation logs in start_spinner and replace_displayed to diagnose the reported "spinner slows with long utterances" bug. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 31c5e72 commit 10807bd

3 files changed

Lines changed: 93 additions & 14 deletions

File tree

rs/core/src/modules/voice_otoji.rs

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ enum AsrEvent {
2424
PttPartial { text: String },
2525
PttFinal { text: String },
2626
PttUpgrade { text: String },
27+
PttTranslated { text: String, lang: String },
2728
Other,
2829
}
2930

@@ -45,9 +46,13 @@ fn parse_event(line: &str) -> AsrEvent {
4546
"final" => AsrEvent::Final { text: get("text").unwrap_or_default() },
4647
"status" => AsrEvent::Status { message: get("message").unwrap_or_default() },
4748
"error" => AsrEvent::Error { message: get("message").unwrap_or_default() },
48-
"ptt_partial" => AsrEvent::PttPartial { text: get("text").unwrap_or_default() },
49-
"ptt_final" => AsrEvent::PttFinal { text: get("text").unwrap_or_default() },
50-
"ptt_upgrade" => AsrEvent::PttUpgrade { text: get("text").unwrap_or_default() },
49+
"ptt_partial" => AsrEvent::PttPartial { text: get("text").unwrap_or_default() },
50+
"ptt_final" => AsrEvent::PttFinal { text: get("text").unwrap_or_default() },
51+
"ptt_upgrade" => AsrEvent::PttUpgrade { text: get("text").unwrap_or_default() },
52+
"ptt_translated" => AsrEvent::PttTranslated {
53+
text: get("text").unwrap_or_default(),
54+
lang: get("lang").unwrap_or_default(),
55+
},
5156
_ => AsrEvent::Other,
5257
}
5358
}
@@ -127,17 +132,41 @@ impl OtojiBackend {
127132
// opening the mic itself. CLX has mic permission; otoji may not.
128133
let mut cmd = Command::new("otoji");
129134
let ctx_path = super::voice_ptt::ptt_context_file_path();
130-
cmd.args([
131-
"listen", "--plain", "-",
132-
"--ptt-polish", "auto", // fix punctuation (e.g. `.` → `?` for questions)
133-
"--ptt-tts", "auto", // speak the polished text back for pronunciation feedback
134-
"--ptt-context-file", &ctx_path,
135-
])
136-
.stdout(Stdio::piped())
137-
.stderr(Stdio::piped())
138-
.stdin(Stdio::piped())
139-
.env("OTOJI_RELAUNCHED", "1")
140-
.env("OTOJI_REBUILDING", "1"); // prevent auto-rebuild + exec which breaks pipes
135+
let mut args: Vec<String> = vec![
136+
"listen".into(), "--plain".into(), "-".into(),
137+
// "openai" route goes through OpenAiPolisher which honors the
138+
// OTOJI_POLISH_BASE_URL / _API_KEY / _MODEL env vars. Default
139+
// in .env.local points to Cloudflare Workers AI (edge inference,
140+
// ~200-500ms TTFB). Falls back to Gemini if those env vars are
141+
// unset thanks to `resolve_polisher`'s "auto" chain.
142+
"--ptt-polish".into(), "openai".into(),
143+
// Gemini handles multilingual (en/zh/ja) — "auto" would pick Piper
144+
// which is English-only and mangles CJK text.
145+
"--ptt-tts".into(), "gemini".into(),
146+
"--ptt-context-file".into(), ctx_path,
147+
];
148+
// Translation (Phase 1: env-driven).
149+
// CLX_TRANSLATE_TO: target language BCP-47 code (e.g. "en"). Empty = off.
150+
// CLX_TRANSLATE_TTS_SOURCE: "original" or "translated" (default original).
151+
if let Ok(to) = std::env::var("CLX_TRANSLATE_TO") {
152+
if !to.is_empty() {
153+
args.push("--ptt-translate-to".into());
154+
args.push(to);
155+
}
156+
}
157+
if let Ok(src) = std::env::var("CLX_TRANSLATE_TTS_SOURCE") {
158+
if !src.is_empty() {
159+
args.push("--ptt-tts-source".into());
160+
args.push(src);
161+
}
162+
}
163+
164+
cmd.args(&args)
165+
.stdout(Stdio::piped())
166+
.stderr(Stdio::piped())
167+
.stdin(Stdio::piped())
168+
.env("OTOJI_RELAUNCHED", "1")
169+
.env("OTOJI_REBUILDING", "1"); // prevent auto-rebuild + exec which breaks pipes
141170

142171
// NOTE: process_group(0) was disabled — it may interfere with signal
143172
// delivery from parent to child on macOS. We kill otoji explicitly
@@ -362,6 +391,11 @@ impl OtojiBackend {
362391
p.on_ptt_upgrade(&text);
363392
}
364393
}
394+
AsrEvent::PttTranslated { text, lang } => {
395+
if let Some(ref p) = ptt {
396+
p.on_ptt_translated(&text, &lang);
397+
}
398+
}
365399
AsrEvent::Status { message } => {
366400
platform.update_voice_subtitle(&format!("[{}]", message));
367401
}

rs/core/src/modules/voice_ptt.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,9 @@ impl PttSession {
416416
.name("ptt-spinner".into())
417417
.spawn(move || {
418418
let mut i = 0usize;
419+
let spin_start = Instant::now();
420+
let mut iter_no = 0u64;
421+
let mut last_iter_end = spin_start;
419422
// Type the initial frame under the lock.
420423
{
421424
let mut s = this.spinner.lock().unwrap();
@@ -426,19 +429,36 @@ impl PttSession {
426429
}
427430
loop {
428431
std::thread::sleep(Duration::from_millis(SPINNER_INTERVAL_MS));
432+
let loop_top = Instant::now();
429433
let mut s = this.spinner.lock().unwrap();
434+
let got_lock = Instant::now();
430435
if !s.active { return; }
431436
i = (i + 1) % SPINNER_FRAMES.len();
432437
// Backspace old frame, type new. Under lock so stop_spinner
433438
// can't race during the swap.
439+
let inject_start = Instant::now();
434440
if let Some(old) = s.current.take() {
435441
for _ in old.chars() {
436442
this.platform.key_tap(KeyCode::Backspace);
437443
}
438444
}
439445
let frame = SPINNER_FRAMES[i];
440446
this.platform.type_text(frame);
447+
let inject_end = Instant::now();
441448
s.current = Some(frame.to_string());
449+
drop(s);
450+
451+
iter_no += 1;
452+
let gap = loop_top.duration_since(last_iter_end).as_millis();
453+
let lock_wait = got_lock.duration_since(loop_top).as_millis();
454+
let inject_ms = inject_end.duration_since(inject_start).as_millis();
455+
let since_start = loop_top.duration_since(spin_start).as_millis();
456+
// Log every iteration for now (we can throttle later once
457+
// the cause is clear).
458+
eprintln!(
459+
"[CLX] ptt-spin #{iter_no} t+{since_start}ms gap={gap}ms lock_wait={lock_wait}ms inject={inject_ms}ms"
460+
);
461+
last_iter_end = inject_end;
442462
}
443463
})
444464
.ok();
@@ -504,7 +524,9 @@ impl PttSession {
504524

505525
/// Replace displayed text at cursor using diff (common prefix optimization).
506526
fn replace_displayed(&self, new_text: &str) {
527+
let t0 = Instant::now();
507528
let mut displayed = self.displayed.lock().unwrap();
529+
let t_lock = Instant::now();
508530
let old = &**displayed;
509531

510532
let common: usize = old.chars().zip(new_text.chars())
@@ -513,13 +535,25 @@ impl PttSession {
513535

514536
let old_tail_chars = old.chars().count() - common;
515537
let new_tail: String = new_text.chars().skip(common).collect();
538+
let new_tail_chars = new_tail.chars().count();
516539

540+
let t_inject_start = Instant::now();
517541
for _ in 0..old_tail_chars {
518542
self.platform.key_tap(KeyCode::Backspace);
519543
}
520544
if !new_tail.is_empty() {
521545
self.platform.type_text(&new_tail);
522546
}
547+
let t_end = Instant::now();
548+
549+
eprintln!(
550+
"[CLX] ptt-partial old_len={} new_len={} common={} bs={} type={} | lock={}ms inject={}ms total={}ms",
551+
old.chars().count(), new_text.chars().count(), common,
552+
old_tail_chars, new_tail_chars,
553+
t_lock.duration_since(t0).as_millis(),
554+
t_end.duration_since(t_inject_start).as_millis(),
555+
t_end.duration_since(t0).as_millis(),
556+
);
523557

524558
*displayed = new_text.to_string();
525559
}

rs/core/src/platform.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,15 @@ pub trait SystemAudioStream: Send {
2727
fn sample_rate(&self) -> u32;
2828
}
2929

30+
/// Menu bar / tray icon state for voice features.
31+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
32+
pub enum PttTrayState {
33+
Idle,
34+
Recording,
35+
Processing,
36+
NoteMode,
37+
}
38+
3039
pub trait Platform: Send + Sync + 'static {
3140
// ── Keyboard output ───────────────────────────────────────────────────────
3241

@@ -191,6 +200,8 @@ pub trait Platform: Send + Sync + 'static {
191200
fn hide_voice_overlay(&self) {}
192201
fn update_voice_overlay(&self, _mic_levels: &[f32], _mic_vad: bool, _sys_levels: &[f32], _sys_vad: bool) {}
193202
fn update_voice_subtitle(&self, _text: &str) {}
203+
/// Update the menu bar tray icon to reflect PTT / voice state.
204+
fn set_ptt_tray_state(&self, _state: PttTrayState) {}
194205

195206
// ── Brainstorm overlay (optional, default = no-op) ─────────────────────
196207

0 commit comments

Comments
 (0)