Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 70 additions & 15 deletions backends/opencode/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,67 @@ const PREVIEW_TOOLS = new Set(["edit", "write", "multiedit", "bash", "apply_patc

// ── Shim invocation ──────────────────────────────────────────────

function runHook(event: "pre" | "post", payload: object): void {
const HOOK_TIMEOUT_MS = 15000
// A genuine timeout takes ~HOOK_TIMEOUT_MS; the spurious libuv timeout below
// returns almost instantly. Anything faster than this is treated as spurious.
const SPURIOUS_TIMEOUT_MS = 2000
const MAX_HOOK_ATTEMPTS = 3

// Run `run` (a synchronous, throwing operation — here a spawnSync), retrying a
// SPURIOUS timeout.
//
// Why this exists: Node's spawnSync (used by execSync) derives its timeout
// deadline from libuv's *cached* loop time, which is only refreshed once per
// loop iteration. The first spawnSync that runs right after async work — here,
// the awaited enqueueHook in the tool hooks — sees a stale "now", so
// `now + timeout` is already in the past and libuv SIGTERMs the child the
// instant it spawns: a spurious ETIMEDOUT that returns in ~15ms instead of 15s.
// On Windows this drops the FIRST hook of a concurrent burst, so that file gets
// no diff/marker (issue #46; Unix is unaffected — it execs the .sh directly and
// doesn't hit this the same way).
//
// The fix: retry, but first `await` a turn of the event loop so libuv refreshes
// its cached time — a synchronous retry would re-read the same stale value and
// fail again (which is exactly why the *next* hook in a burst always succeeds).
// A genuine timeout takes ~HOOK_TIMEOUT_MS, far above SPURIOUS_TIMEOUT_MS, so it
// is never retried. `label` is used only for the diagnostic log. Exported so the
// retry behaviour can be exercised by the test harness (it's a Windows libuv
// quirk that can't otherwise be reproduced on CI).
export async function runWithSpuriousRetry(
run: () => void,
label = "hook-entry",
): Promise<void> {
for (let attempt = 1; attempt <= MAX_HOOK_ATTEMPTS; attempt++) {
const start = Date.now()
try {
run()
return
} catch (err: any) {
const elapsed = Date.now() - start
// A timeout-kill surfaces as code ETIMEDOUT and/or signal SIGTERM — Node
// has historically reported one or the other depending on platform — so
// match both, or the spurious-timeout retry could be missed where only
// SIGTERM is set. Still gated on elapsed < SPURIOUS_TIMEOUT_MS below, so a
// genuine ~15s hang (also SIGTERM'd) is never mistaken for spurious.
const timedOut =
!!err && (err.code === "ETIMEDOUT" || err.signal === "SIGTERM")
if (timedOut && elapsed < SPURIOUS_TIMEOUT_MS && attempt < MAX_HOOK_ATTEMPTS) {
// Yield so libuv refreshes its cached loop time before retrying.
await new Promise<void>((resolve) => setImmediate(resolve))
continue
}
// Abstain on any failure. Log a genuine timeout with elapsed ms so a real
// hang (~15s) is distinguishable from exhausted spurious retries.
if (timedOut) {
// eslint-disable-next-line no-console
console.debug(`[code-preview] ${label} timed out after ${elapsed}ms`)
}
return
}
}
}

async function runHook(event: "pre" | "post", payload: object): Promise<void> {
const shim = resolveHookEntry()
if (!shim) {
// Surface enough breadcrumb that a misconfigured bin-path.txt isn't a
Expand All @@ -69,22 +129,15 @@ function runHook(event: "pre" | "post", payload: object): void {
const cmd = IS_WIN
? `powershell -NoProfile -ExecutionPolicy Bypass -File "${shim}" opencode ${event}`
: `"${shim}" opencode ${event}`
try {

await runWithSpuriousRetry(() => {
execSync(cmd, {
input: JSON.stringify(payload),
env: { ...process.env, CODE_PREVIEW_BACKEND: "opencode" },
timeout: 15000,
timeout: HOOK_TIMEOUT_MS,
stdio: ["pipe", "pipe", "pipe"],
})
} catch (err: any) {
// Abstain on any failure. Log timeouts at debug-equivalent because
// a silent 15s hang is otherwise hard to diagnose; everything else
// is treated as best-effort and swallowed.
if (err && (err.code === "ETIMEDOUT" || err.signal === "SIGTERM")) {
// eslint-disable-next-line no-console
console.debug(`[code-preview] hook-entry ${event} timed out after 15s`)
}
}
}, `hook-entry ${event}`)
}

// ── Hook serialisation ───────────────────────────────────────────
Expand All @@ -96,9 +149,9 @@ function runHook(event: "pre" | "post", payload: object): void {

let hookQueue: Promise<void> = Promise.resolve()

function enqueueHook(fn: () => void): Promise<void> {
hookQueue = hookQueue.then(() => {
try { fn() } catch { /* non-fatal */ }
function enqueueHook(fn: () => void | Promise<void>): Promise<void> {
hookQueue = hookQueue.then(async () => {
try { await fn() } catch { /* non-fatal */ }
})
return hookQueue
}
Expand All @@ -116,6 +169,8 @@ const plugin: Plugin = async ({ directory }) => {

"tool.execute.after": async (input, _output) => {
if (!PREVIEW_TOOLS.has(input.tool)) return
// OpenCode's after-hook carries the tool args on `input` (the before-hook
// carries them on `output`), confirmed on the live API (issue #46).
const args = ((input as any).args as Record<string, any>) ?? {}
const payload = { tool: input.tool, args, cwd: directory }
await enqueueHook(() => runHook("post", payload))
Expand Down
92 changes: 92 additions & 0 deletions tests/backends/opencode/retry_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// retry_test.ts — Unit guard for runWithSpuriousRetry (issue #46).
//
// The Windows libuv quirk — the first spawnSync after async work spuriously
// times out because libuv's cached loop time is stale — can't be reproduced on
// CI. So we test the retry logic directly by injecting a `run` that throws a
// fast ETIMEDOUT / SIGTERM. Cheap insurance against someone later "simplifying"
// the retry (or the SIGTERM match) away. No nvim required.
//
// Run via: bun retry_test.ts (or) npx tsx retry_test.ts
// Prints "ALL OK" and exits 0 on success; exits 1 on any failed check.

import { resolve, dirname } from "path"
import { fileURLToPath, pathToFileURL } from "url"

const __dirname = dirname(fileURLToPath(import.meta.url))

type RunWithSpuriousRetry = (run: () => void, label?: string) => Promise<void>

// Build an error shaped like Node's timeout-kill: ETIMEDOUT via `code`, or a
// SIGTERM kill via `signal` (platform-dependent — see the retry's `timedOut`).
function timeoutErr(kind: "code" | "signal"): Error {
const e = new Error("simulated timeout") as any
if (kind === "code") e.code = "ETIMEDOUT"
else e.signal = "SIGTERM"
return e
}

let failures = 0
function check(name: string, cond: boolean): void {
console.log(`${cond ? "ok " : "FAIL"} - ${name}`)
if (!cond) failures++
}

async function main(): Promise<void> {
// pathToFileURL so the dynamic import works with Windows absolute paths too
// (the bare `D:\…` path is rejected by the ESM loader as an unsupported scheme).
const indexPath = resolve(__dirname, "../../../backends/opencode/index.ts")
const mod = await import(pathToFileURL(indexPath).href)
const runWithSpuriousRetry = mod.runWithSpuriousRetry as RunWithSpuriousRetry

// Success on the first attempt → run called exactly once.
{
let calls = 0
await runWithSpuriousRetry(() => { calls++ })
check("success on first attempt runs once", calls === 1)
}

// The core case: a fast ETIMEDOUT recovers on retry (the libuv quirk).
{
let calls = 0
await runWithSpuriousRetry(() => { calls++; if (calls === 1) throw timeoutErr("code") })
check("fast ETIMEDOUT recovers on retry (runs twice)", calls === 2)
}

// Platform variance: a SIGTERM-only fast kill must also recover (review fix).
{
let calls = 0
await runWithSpuriousRetry(() => { calls++; if (calls === 1) throw timeoutErr("signal") })
check("fast SIGTERM recovers on retry (runs twice)", calls === 2)
}

// A non-timeout error must NOT be retried — abstain after one attempt.
{
let calls = 0
await runWithSpuriousRetry(() => {
calls++
const e = new Error("nope") as any
e.code = "ENOENT"
throw e
})
check("non-timeout error is not retried (runs once)", calls === 1)
}

// A persistent fast timeout must be bounded at MAX_HOOK_ATTEMPTS — no infinite
// loop if every attempt spuriously times out.
{
let calls = 0
await runWithSpuriousRetry(() => { calls++; throw timeoutErr("code") })
check("persistent fast timeout is bounded (runs 3x)", calls === 3)
}

if (failures > 0) {
console.log(`RETRY GUARD FAILED (${failures})`)
process.exit(1)
}
console.log("ALL OK")
}

main().catch((err) => {
console.error(err)
process.exit(1)
})
36 changes: 36 additions & 0 deletions tests/backends/opencode/test_retry.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env bash
# test_retry.sh — Unit guard for the OpenCode plugin's spurious-timeout retry.
#
# The Windows libuv quirk (the first spawnSync after async work spuriously times
# out; issue #46) can't be reproduced on CI, so this drives retry_test.ts, which
# exercises runWithSpuriousRetry directly with an injected `run` that throws a
# fast ETIMEDOUT/SIGTERM. No nvim/socket needed — it's a pure unit guard against
# the retry being accidentally removed or narrowed.

# ── Check for tsx/bun ────────────────────────────────────────────

_OPENCODE_RUNNER=""
if command -v bun >/dev/null 2>&1; then
_OPENCODE_RUNNER="bun"
elif command -v npx >/dev/null 2>&1; then
_OPENCODE_RUNNER="npx tsx"
else
echo -e "${YELLOW} ⊘ Skipping OpenCode retry guard (neither bun nor npx found)${NC}"
return 0 2>/dev/null || exit 0
fi

RETRY_TEST="$SCRIPT_DIR/backends/opencode/retry_test.ts"

# ── Test: retry helper recovers / is bounded ─────────────────────

test_opencode_retry_guard() {
local output
output="$($_OPENCODE_RUNNER "$RETRY_TEST" 2>&1)"
if [[ "$output" != *"ALL OK"* ]]; then
echo -e " ${RED}retry guard output:${NC}" >&2
echo "$output" >&2
return 1
fi
}

run_test "OpenCode retry recovers fast ETIMEDOUT/SIGTERM, bounded, no over-retry" test_opencode_retry_guard
Loading