From bd11ee58268cfcbe83d8b1bdb84f819037425a67 Mon Sep 17 00:00:00 2001 From: suharvest Date: Sat, 18 Apr 2026 07:15:00 +0800 Subject: [PATCH 01/22] feat: auto-start Monitor on rescue dispatch via PostToolUse hook When Claude dispatches an opencode rescue task (via Agent tool or direct companion Bash call), this hook detects the new task-xxx id in the tool response and injects a system-reminder instructing Claude to start a persistent Monitor covering that id. On terminal states the Monitor emits a READY line pointing to the companion result command so Claude fetches the full payload and summarizes it for the user without needing to be asked. - New plugins/opencode/scripts/post-tool-use-monitor-hook.mjs - hooks.json: register PostToolUse (matcher: Agent|Bash, timeout 5s) Gracefully no-ops on non-matching tool output or missing companion markers. --- plugins/opencode/hooks/hooks.json | 12 ++ .../scripts/post-tool-use-monitor-hook.mjs | 159 ++++++++++++++++++ 2 files changed, 171 insertions(+) create mode 100644 plugins/opencode/scripts/post-tool-use-monitor-hook.mjs diff --git a/plugins/opencode/hooks/hooks.json b/plugins/opencode/hooks/hooks.json index c76f993..8c193dd 100644 --- a/plugins/opencode/hooks/hooks.json +++ b/plugins/opencode/hooks/hooks.json @@ -33,6 +33,18 @@ } ] } + ], + "PostToolUse": [ + { + "matcher": "Agent|Bash", + "hooks": [ + { + "type": "command", + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-use-monitor-hook.mjs\"", + "timeout": 5 + } + ] + } ] } } diff --git a/plugins/opencode/scripts/post-tool-use-monitor-hook.mjs b/plugins/opencode/scripts/post-tool-use-monitor-hook.mjs new file mode 100644 index 0000000..95d856d --- /dev/null +++ b/plugins/opencode/scripts/post-tool-use-monitor-hook.mjs @@ -0,0 +1,159 @@ +#!/usr/bin/env node + +// PostToolUse hook: watches for rescue-task dispatch in tool responses and +// injects a reminder that tells Claude to (a) start/refresh a Monitor +// covering the new task id(s), and (b) fetch + summarize the companion +// `result` payload when Monitor reports a terminal state. +// +// Why in a hook: the main Claude thread has no built-in way to observe +// background codex/opencode tasks. Without this, dispatching a rescue is +// fire-and-forget — the user has to ask for progress manually. The hook +// makes every rescue dispatch automatically get monitored and reported +// on, matching the UX of in-process subagents. + +import fs from "node:fs"; +import path from "node:path"; +import process from "node:process"; +import { fileURLToPath } from "node:url"; + +function readHookInput() { + try { + const raw = fs.readFileSync(0, "utf8").trim(); + if (!raw) return {}; + return JSON.parse(raw); + } catch { + return {}; + } +} + +// Companion task ids look like `task-moNNNNNN-NNNNNN`. +const TASK_ID_RE = /\btask-[a-z0-9]{6,}-[a-z0-9]{4,}\b/g; + +// Only react to responses that are unambiguously from the opencode companion, +// to avoid false positives on arbitrary text containing a task-like token. +const OPENCODE_MARKERS = [ + /OpenCode task started/i, + /opencode-companion\.mjs/, + /opencode:opencode-rescue/, + /opencode rescue/i, +]; + +function extractResponseText(response) { + if (response == null) return ""; + if (typeof response === "string") return response; + if (typeof response === "object") { + if (typeof response.result === "string") return response.result; + if (typeof response.content === "string") return response.content; + return JSON.stringify(response); + } + return String(response); +} + +function resolveCompanionPath() { + const here = fileURLToPath(import.meta.url); + return path.join(path.dirname(here), "opencode-companion.mjs"); +} + +function buildMonitorScript(ids, companionPath) { + const quoted = ids.map((id) => `"${id}"`).join(" "); + // The poll loop: + // - reads companion status JSON per id every 30s + // - emits a single line whenever status/phase changes + // - exits the loop as soon as every tracked id is terminal so the + // Monitor process ends cleanly; the main thread's Monitor tool sees + // exit and stops spawning events. + // + // stdout is the event stream — stay selective. On terminal states we + // emit a `READY: ` line so Claude knows the exact command to run + // to fetch the full result payload. + return [ + "set -u", + `COMP=${JSON.stringify(companionPath)}`, + `IDS=(${quoted})`, + "declare -A prev", + 'for id in "${IDS[@]}"; do prev[$id]=""; done', + "while true; do", + " all_done=1", + ' for id in "${IDS[@]}"; do', + ' json=$(node "$COMP" status "$id" --json 2>/dev/null || printf "{}")', + " st=$(printf '%s' \"$json\" | node -e 'let s=\"\";process.stdin.on(\"data\",d=>s+=d).on(\"end\",()=>{try{const j=JSON.parse(s);console.log((j.job?.status||\"unknown\")+\"/\"+(j.job?.phase||\"\"))}catch(e){console.log(\"parse-err/\")}}))')", + ' if [ "$st" != "${prev[$id]}" ]; then', + ' ts=$(date +%H:%M:%S)', + ' echo "[$ts] opencode $id: ${prev[$id]:-none} → $st"', + ' case "$st" in', + " completed/*|failed/*|cancelled/*)", + ` echo "[$ts] READY opencode $id: run \\\"node $COMP result $id\\\" to fetch the full report"`, + " ;;", + " esac", + ' prev[$id]="$st"', + " fi", + ' case "$st" in completed/*|failed/*|cancelled/*) ;; *) all_done=0 ;; esac', + " done", + " if [ $all_done -eq 1 ]; then", + " echo \"[$(date +%H:%M:%S)] opencode: all tracked tasks terminal — exiting monitor\"", + " break", + " fi", + " sleep 30", + "done", + ].join("\n"); +} + +function buildReminder(ids, script) { + const idList = ids.join(", "); + return [ + "", + `You just dispatched ${ids.length} opencode rescue task(s): ${idList}.`, + "", + "Before you respond to the user, start a persistent Monitor so this session can observe task completion automatically. Use the `Monitor` tool:", + "", + '- description: "opencode rescue task state"', + "- persistent: true", + "- timeout_ms: 3600000", + "- command: the shell loop below, verbatim", + "", + "```bash", + script, + "```", + "", + "Behavior contract — follow this without being asked:", + "1. If a Monitor is already running in this session covering other task ids, stop it (`TaskStop`) and start a new one whose IDS array is the union of all currently-running rescue task ids. One Monitor per session.", + "2. When a Monitor event reports a task transitioning to `completed/…`, `failed/…`, or `cancelled/…`, immediately run the `READY` line's command (`node result `) in Bash, read the output, and surface a short summary of what the rescue task did (or failed at) to the user. Do not wait for the user to ask.", + "3. If the user explicitly asked you to skip monitoring, do nothing here.", + "", + ].join("\n"); +} + +function main() { + const input = readHookInput(); + const toolName = input.tool_name || ""; + // Rescue dispatch happens through the Agent tool wrapper; main thread may + // also call companion directly via Bash. Ignore other tools. + if (toolName !== "Agent" && toolName !== "Bash") return; + + const response = extractResponseText(input.tool_response); + if (!response) return; + if (!OPENCODE_MARKERS.some((r) => r.test(response))) return; + + const ids = [...new Set(response.match(TASK_ID_RE) || [])]; + if (ids.length === 0) return; + + const companionPath = resolveCompanionPath(); + const script = buildMonitorScript(ids, companionPath); + const additionalContext = buildReminder(ids, script); + + process.stdout.write( + JSON.stringify({ + hookSpecificOutput: { + hookEventName: "PostToolUse", + additionalContext, + }, + }), + ); +} + +try { + main(); +} catch { + // Best-effort — never block tool use on hook failure. + process.exit(0); +} From 02160b84477928e3fa4327432ec997b5c5468283 Mon Sep 17 00:00:00 2001 From: suharvest Date: Sat, 18 Apr 2026 07:26:58 +0800 Subject: [PATCH 02/22] fix(monitor): inline result fetch + fix parse-status JS syntax On terminal state the Monitor script now calls companion result and emits the truncated summary inline (bounded by OPENCODE_MONITOR_RESULT_CHARS, default 1500). Claude sees the result summary directly in the Monitor event and no longer needs a follow-up Bash call. Also fixes an extra trailing ) in the inline node -e expression that would have caused the status parser to syntax-error at runtime. --- .../scripts/post-tool-use-monitor-hook.mjs | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/plugins/opencode/scripts/post-tool-use-monitor-hook.mjs b/plugins/opencode/scripts/post-tool-use-monitor-hook.mjs index 95d856d..18a1766 100644 --- a/plugins/opencode/scripts/post-tool-use-monitor-hook.mjs +++ b/plugins/opencode/scripts/post-tool-use-monitor-hook.mjs @@ -56,33 +56,41 @@ function resolveCompanionPath() { function buildMonitorScript(ids, companionPath) { const quoted = ids.map((id) => `"${id}"`).join(" "); - // The poll loop: - // - reads companion status JSON per id every 30s + // The poll loop runs inside a Monitor child process: + // - polls companion status JSON per id every 30s // - emits a single line whenever status/phase changes - // - exits the loop as soon as every tracked id is terminal so the - // Monitor process ends cleanly; the main thread's Monitor tool sees - // exit and stops spawning events. - // - // stdout is the event stream — stay selective. On terminal states we - // emit a `READY: ` line so Claude knows the exact command to run - // to fetch the full result payload. + // - on terminal state, fetches `companion result `, truncates to + // a bounded size, and prints it as multi-line output; Monitor batches + // lines within ~200ms into one notification, so the main thread + // sees a single event carrying "task done + full summary" without + // needing a follow-up tool call to fetch the result + // - exits when every tracked id is terminal (Monitor process ends + // cleanly, no runaway background poller) return [ "set -u", `COMP=${JSON.stringify(companionPath)}`, `IDS=(${quoted})`, + "RESULT_MAX_CHARS=${OPENCODE_MONITOR_RESULT_CHARS:-1500}", "declare -A prev", 'for id in "${IDS[@]}"; do prev[$id]=""; done', "while true; do", " all_done=1", ' for id in "${IDS[@]}"; do', ' json=$(node "$COMP" status "$id" --json 2>/dev/null || printf "{}")', - " st=$(printf '%s' \"$json\" | node -e 'let s=\"\";process.stdin.on(\"data\",d=>s+=d).on(\"end\",()=>{try{const j=JSON.parse(s);console.log((j.job?.status||\"unknown\")+\"/\"+(j.job?.phase||\"\"))}catch(e){console.log(\"parse-err/\")}}))')", + " st=$(printf '%s' \"$json\" | node -e 'let s=\"\";process.stdin.on(\"data\",d=>s+=d).on(\"end\",()=>{try{const j=JSON.parse(s);console.log((j.job?.status||\"unknown\")+\"/\"+(j.job?.phase||\"\"))}catch(e){console.log(\"parse-err/\")}})')", ' if [ "$st" != "${prev[$id]}" ]; then', ' ts=$(date +%H:%M:%S)', ' echo "[$ts] opencode $id: ${prev[$id]:-none} → $st"', ' case "$st" in', " completed/*|failed/*|cancelled/*)", - ` echo "[$ts] READY opencode $id: run \\\"node $COMP result $id\\\" to fetch the full report"`, + ' result=$(node "$COMP" result "$id" 2>/dev/null || true)', + " # Truncate defensively so Monitor output stays bounded.", + ' summary=$(printf "%s" "$result" | head -c "$RESULT_MAX_CHARS")', + ' echo "[$ts] opencode $id TERMINAL=$st — result summary:"', + ' echo "--- result-begin $id ---"', + ' printf "%s" "$summary"', + ' echo ""', + ' echo "--- result-end $id ---"', " ;;", " esac", ' prev[$id]="$st"', @@ -117,7 +125,7 @@ function buildReminder(ids, script) { "", "Behavior contract — follow this without being asked:", "1. If a Monitor is already running in this session covering other task ids, stop it (`TaskStop`) and start a new one whose IDS array is the union of all currently-running rescue task ids. One Monitor per session.", - "2. When a Monitor event reports a task transitioning to `completed/…`, `failed/…`, or `cancelled/…`, immediately run the `READY` line's command (`node result `) in Bash, read the output, and surface a short summary of what the rescue task did (or failed at) to the user. Do not wait for the user to ask.", + "2. The Monitor script above already fetches `companion result ` and emits the summary inline on terminal state — each terminal event carries the full result block between `--- result-begin ---` and `--- result-end ---` markers. You do NOT need to run a follow-up Bash call to get the result; just read the Monitor event and surface a short summary of what the rescue task did (or failed at) to the user.", "3. If the user explicitly asked you to skip monitoring, do nothing here.", "", ].join("\n"); From 31febd0b042ee20e50cc7b90d55c185323936c29 Mon Sep 17 00:00:00 2001 From: suharvest Date: Sat, 18 Apr 2026 07:31:23 +0800 Subject: [PATCH 03/22] fix(server): race sendPrompt against completion watcher + env-tunable timeouts OpenCode server's POST /session/:id/message occasionally fails to close its HTTP response after the session emits the terminal assistant message (observed with glm-5 backend, opencode 1.4.x). Without this fix, sendPrompt hangs until AbortSignal fires, leaving the companion job stuck in 'investigating' status until the (previously 5 min) timeout. Changes: - Race the POST fetch against a /session/:id/message polling watcher; whichever returns first aborts the other. Watcher only accepts a completion whose info.time.completed >= prompt startedAt. - Bump generic request() timeout and sendPrompt timeout to 30 min, configurable via OPENCODE_REQUEST_TIMEOUT_MS / OPENCODE_PROMPT_TIMEOUT_MS env vars. - Completion poll interval configurable via OPENCODE_COMPLETION_POLL_MS (default 5s). --- .../opencode/scripts/lib/opencode-server.mjs | 96 ++++++++++++++++--- 1 file changed, 84 insertions(+), 12 deletions(-) diff --git a/plugins/opencode/scripts/lib/opencode-server.mjs b/plugins/opencode/scripts/lib/opencode-server.mjs index f4192a8..1d32943 100644 --- a/plugins/opencode/scripts/lib/opencode-server.mjs +++ b/plugins/opencode/scripts/lib/opencode-server.mjs @@ -8,6 +8,12 @@ const DEFAULT_PORT = 4096; const DEFAULT_HOST = "127.0.0.1"; const SERVER_START_TIMEOUT = 30_000; +// Long-running tasks (e.g. engine builds, large refactors) can easily exceed +// the old 5-10 min caps, causing `fetch failed` at a fixed deadline. Default +// to 30 min; override via env for even longer workloads. +const REQUEST_TIMEOUT_MS = Number(process.env.OPENCODE_REQUEST_TIMEOUT_MS) || 1_800_000; +const PROMPT_TIMEOUT_MS = Number(process.env.OPENCODE_PROMPT_TIMEOUT_MS) || 1_800_000; + /** * Check if an OpenCode server is already running on the given port. * @param {string} host @@ -87,7 +93,7 @@ export function createClient(baseUrl, opts = {}) { method, headers, body: body != null ? JSON.stringify(body) : undefined, - signal: AbortSignal.timeout(300_000), + signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), }); if (!res.ok) { const text = await res.text().catch(() => ""); @@ -127,6 +133,17 @@ export function createClient(baseUrl, opts = {}) { /** * Send a prompt (synchronous / streaming). * Returns the full response text from SSE stream. + * + * NOTE: OpenCode's POST /session/:id/message occasionally fails to close + * its HTTP response body after the session emits its terminal assistant + * message (observed against glm-5 backend, opencode 1.4.x). Relying on + * res.json() alone means the caller hangs until AbortSignal fires, which + * breaks downstream job-completion detection in the companion. + * + * Workaround: race the fetch against a session-completion watcher that + * polls GET /session/:id/message. When the latest assistant message has + * info.time.completed set AND finish !== undefined, the session is done; + * we abort the hanging fetch and synthesize the response from the poll. */ sendPrompt: async (sessionId, promptText, opts = {}) => { const body = { @@ -136,19 +153,74 @@ export function createClient(baseUrl, opts = {}) { if (opts.model) body.model = opts.model; if (opts.system) body.system = opts.system; - const res = await fetch(`${baseUrl}/session/${sessionId}/message`, { - method: "POST", - headers, - body: JSON.stringify(body), - signal: AbortSignal.timeout(600_000), // 10 min for long tasks - }); + const ac = new AbortController(); + const timeoutId = setTimeout(() => ac.abort(new Error("prompt timeout")), PROMPT_TIMEOUT_MS); + const startedAt = Date.now(); + // Grace period so we don't mistake "session had no prior activity" for + // completion before the new prompt has even begun generating. + const MIN_POLL_DELAY_MS = 5_000; + const POLL_INTERVAL_MS = Number(process.env.OPENCODE_COMPLETION_POLL_MS) || 5_000; - if (!res.ok) { - const text = await res.text().catch(() => ""); - throw new Error(`OpenCode prompt failed ${res.status}: ${text}`); - } + const fetchPromise = (async () => { + const res = await fetch(`${baseUrl}/session/${sessionId}/message`, { + method: "POST", + headers, + body: JSON.stringify(body), + signal: ac.signal, + }); + if (!res.ok) { + const text = await res.text().catch(() => ""); + throw new Error(`OpenCode prompt failed ${res.status}: ${text}`); + } + return { source: "fetch", data: await res.json() }; + })(); - return res.json(); + const watcherPromise = (async () => { + // Wait briefly so the new generation has a chance to start and we + // don't latch onto a stale completed message from before this prompt. + await new Promise((r) => setTimeout(r, MIN_POLL_DELAY_MS)); + while (!ac.signal.aborted) { + try { + const params = new URLSearchParams({ limit: "1" }); + const r = await fetch( + `${baseUrl}/session/${sessionId}/message?${params.toString()}`, + { headers, signal: AbortSignal.timeout(10_000) }, + ); + if (r.ok) { + const arr = await r.json(); + const last = Array.isArray(arr) ? arr[arr.length - 1] : null; + const info = last?.info; + // Only treat assistant messages created *after* this prompt + // started as a completion signal for this call. + if ( + info && + info.role === "assistant" && + typeof info.time?.completed === "number" && + info.time.completed >= startedAt && + typeof info.finish === "string" + ) { + return { source: "watcher", data: last }; + } + } + } catch { + // Ignore transient poll errors; keep waiting. + } + await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS)); + } + throw new Error("watcher aborted"); + })(); + + try { + const winner = await Promise.race([fetchPromise, watcherPromise]); + // Whichever arrived first, cancel the other. + ac.abort(); + // Swallow the loser's rejection to avoid unhandled rejection noise. + fetchPromise.catch(() => {}); + watcherPromise.catch(() => {}); + return winner.data; + } finally { + clearTimeout(timeoutId); + } }, /** From 6d48b0fb274dbeca1f6a963a2a66a163187196b6 Mon Sep 17 00:00:00 2001 From: suharvest Date: Sat, 18 Apr 2026 08:34:17 +0800 Subject: [PATCH 04/22] fix(status): honor --json flag and single-task lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `status` handler was ignoring argv entirely — `--json` was silently dropped and positional task ids were never matched. Tooling that piped status through jq would choke on the markdown fallback with "parse error: Invalid numeric literal". Now: - `status --json` emits a workspace snapshot as JSON ({workspaceRoot, running, latestFinished, recent}) - `status [--json]` looks up a single job by id/prefix. JSON form is {workspaceRoot, job: } so callers can always read .job.status safely. - `status --all` widens from session-scoped to all-sessions (useful for cross-session observers like monitor scripts) - Markdown output unchanged for the no-flag case. --- .../opencode/scripts/opencode-companion.mjs | 53 ++++++++++++++++++- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/plugins/opencode/scripts/opencode-companion.mjs b/plugins/opencode/scripts/opencode-companion.mjs index 48e526b..7d57a19 100644 --- a/plugins/opencode/scripts/opencode-companion.mjs +++ b/plugins/opencode/scripts/opencode-companion.mjs @@ -13,7 +13,7 @@ import { isOpencodeInstalled, getOpencodeVersion, spawnDetached } from "./lib/pr import { isServerRunning, ensureServer, createClient, connect } from "./lib/opencode-server.mjs"; import { resolveWorkspace } from "./lib/workspace.mjs"; import { loadState, updateState, upsertJob, generateJobId, jobDataPath } from "./lib/state.mjs"; -import { buildStatusSnapshot, resolveResultJob, resolveCancelableJob, enrichJob } from "./lib/job-control.mjs"; +import { buildStatusSnapshot, resolveResultJob, resolveCancelableJob, enrichJob, matchJobReference } from "./lib/job-control.mjs"; import { createJobRecord, runTrackedJob, getClaudeSessionId } from "./lib/tracked-jobs.mjs"; import { renderStatus, renderResult, renderReview, renderSetup } from "./lib/render.mjs"; import { buildReviewPrompt, buildTaskPrompt } from "./lib/prompts.mjs"; @@ -424,11 +424,60 @@ async function handleTaskResumeCandidate(argv) { // ------------------------------------------------------------------ async function handleStatus(argv) { + const { options, positional } = parseArgs(argv ?? [], { + booleanOptions: ["json", "all"], + }); + const workspace = await resolveWorkspace(); const state = loadState(workspace); const sessionId = getClaudeSessionId(); + const jobs = state.jobs ?? []; + const wantJson = !!options.json; + // --all widens the snapshot filter to every session's jobs; without --all we + // still filter to the current Claude session for the existing markdown UX. + const sessionFilter = options.all ? undefined : sessionId; + const ref = positional?.[0]; + + // Single-task query — `status [--json]`. + if (ref) { + const { job, ambiguous } = matchJobReference(jobs, ref); + if (ambiguous) { + if (wantJson) { + console.log(JSON.stringify({ workspaceRoot: workspace, job: null, error: "ambiguous" })); + } else { + console.error(`Ambiguous job reference "${ref}". Please provide a more specific ID prefix.`); + } + process.exit(ambiguous ? 2 : 0); + return; + } + if (wantJson) { + const enriched = job ? enrichJob(job, workspace) : null; + console.log(JSON.stringify({ workspaceRoot: workspace, job: enriched })); + return; + } + if (!job) { + console.log(`No job found for "${ref}" in workspace ${workspace}.`); + return; + } + console.log(renderStatus({ running: [], latestFinished: null, recent: [enrichJob(job, workspace)] })); + return; + } + + const snapshot = buildStatusSnapshot(jobs, workspace, { sessionId: sessionFilter }); + + if (wantJson) { + // Machine-readable shape mirrors the single-task case so callers can treat + // both uniformly: a `.job` field is present for single-task, otherwise + // `.running`/`.recent` arrays describe the whole workspace snapshot. + console.log(JSON.stringify({ + workspaceRoot: workspace, + running: snapshot.running, + latestFinished: snapshot.latestFinished, + recent: snapshot.recent, + })); + return; + } - const snapshot = buildStatusSnapshot(state.jobs ?? [], workspace, { sessionId }); console.log(renderStatus(snapshot)); } From c378dfdfc4d6e0abf2a514fd16f3616994f74b77 Mon Sep 17 00:00:00 2001 From: suharvest Date: Sat, 18 Apr 2026 09:27:33 +0800 Subject: [PATCH 05/22] feat(monitor): surface progress activity and heartbeat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously the Monitor script only emitted on status/phase transitions. For long-running tasks that sit in 'running/investigating' for many minutes, the user saw one initial event and then nothing — no way to tell if the task was still alive. Now: - Include the last line of progressPreview in the state signature so any new log activity inside the task triggers an event (with elapsed time + latest log snippet) - Emit a heartbeat every HEARTBEAT_POLLS ticks (default 10 = ~5min) with current status/phase/elapsed even when nothing has changed - Both tunable via OPENCODE_MONITOR_HEARTBEAT_POLLS env var --- .../scripts/post-tool-use-monitor-hook.mjs | 69 ++++++++++++------- 1 file changed, 43 insertions(+), 26 deletions(-) diff --git a/plugins/opencode/scripts/post-tool-use-monitor-hook.mjs b/plugins/opencode/scripts/post-tool-use-monitor-hook.mjs index 18a1766..77900a1 100644 --- a/plugins/opencode/scripts/post-tool-use-monitor-hook.mjs +++ b/plugins/opencode/scripts/post-tool-use-monitor-hook.mjs @@ -58,44 +58,61 @@ function buildMonitorScript(ids, companionPath) { const quoted = ids.map((id) => `"${id}"`).join(" "); // The poll loop runs inside a Monitor child process: // - polls companion status JSON per id every 30s - // - emits a single line whenever status/phase changes - // - on terminal state, fetches `companion result `, truncates to - // a bounded size, and prints it as multi-line output; Monitor batches - // lines within ~200ms into one notification, so the main thread - // sees a single event carrying "task done + full summary" without - // needing a follow-up tool call to fetch the result - // - exits when every tracked id is terminal (Monitor process ends - // cleanly, no runaway background poller) + // - emits an event when status/phase OR the latest progressPreview log + // line changes, so long-running tasks surface intermediate activity + // - emits a heartbeat every HEARTBEAT_POLLS ticks (default 10 = ~5min) + // so the user sees signs of life even when nothing has changed + // - on terminal state, fetches `companion result `, truncates, and + // prints a multi-line summary so the main thread gets a single batched + // event carrying the full report + // - exits when every tracked id is terminal return [ "set -u", `COMP=${JSON.stringify(companionPath)}`, `IDS=(${quoted})`, "RESULT_MAX_CHARS=${OPENCODE_MONITOR_RESULT_CHARS:-1500}", + "HEARTBEAT_POLLS=${OPENCODE_MONITOR_HEARTBEAT_POLLS:-10}", "declare -A prev", - 'for id in "${IDS[@]}"; do prev[$id]=""; done', + "declare -A hb", + 'for id in "${IDS[@]}"; do prev[$id]=""; hb[$id]=0; done', "while true; do", " all_done=1", ' for id in "${IDS[@]}"; do', ' json=$(node "$COMP" status "$id" --json 2>/dev/null || printf "{}")', - " st=$(printf '%s' \"$json\" | node -e 'let s=\"\";process.stdin.on(\"data\",d=>s+=d).on(\"end\",()=>{try{const j=JSON.parse(s);console.log((j.job?.status||\"unknown\")+\"/\"+(j.job?.phase||\"\"))}catch(e){console.log(\"parse-err/\")}})')", - ' if [ "$st" != "${prev[$id]}" ]; then', + " fields=$(printf '%s' \"$json\" | node -e 'let s=\"\";process.stdin.on(\"data\",d=>s+=d).on(\"end\",()=>{try{const j=JSON.parse(s);const jb=j.job||{};const prog=String(jb.progressPreview||\"\").split(\"\\n\").filter(Boolean);const last=(prog[prog.length-1]||\"\").replace(/[|\\r\\n]/g,\" \").slice(0,200);process.stdout.write([jb.status||\"unknown\",jb.phase||\"\",jb.elapsed||\"\",last].join(\"|\"))}catch(e){process.stdout.write(\"parse-err|||\")}})')", + " IFS='|' read -r st phase elapsed last <<< \"$fields\"", + ' sig="${st}/${phase}|${last}"', + ' if [ "$sig" != "${prev[$id]}" ]; then', ' ts=$(date +%H:%M:%S)', - ' echo "[$ts] opencode $id: ${prev[$id]:-none} → $st"', - ' case "$st" in', - " completed/*|failed/*|cancelled/*)", - ' result=$(node "$COMP" result "$id" 2>/dev/null || true)', - " # Truncate defensively so Monitor output stays bounded.", - ' summary=$(printf "%s" "$result" | head -c "$RESULT_MAX_CHARS")', - ' echo "[$ts] opencode $id TERMINAL=$st — result summary:"', - ' echo "--- result-begin $id ---"', - ' printf "%s" "$summary"', - ' echo ""', - ' echo "--- result-end $id ---"', - " ;;", - " esac", - ' prev[$id]="$st"', + ' if [ -n "$last" ]; then', + ' echo "[$ts] opencode $id: $st/$phase (elapsed $elapsed) — $last"', + " else", + ' echo "[$ts] opencode $id: $st/$phase (elapsed $elapsed)"', + " fi", + ' prev[$id]="$sig"', + " hb[$id]=0", + " else", + ' hb[$id]=$(( ${hb[$id]} + 1 ))', + ' if [ "${hb[$id]}" -ge "$HEARTBEAT_POLLS" ]; then', + ' ts=$(date +%H:%M:%S)', + ' echo "[$ts] opencode $id: heartbeat — still $st/$phase (elapsed $elapsed)"', + " hb[$id]=0", + " fi", " fi", - ' case "$st" in completed/*|failed/*|cancelled/*) ;; *) all_done=0 ;; esac', + ' case "$st" in', + " completed|failed|cancelled)", + ' result=$(node "$COMP" result "$id" 2>/dev/null || true)', + " # Truncate defensively so Monitor output stays bounded.", + ' summary=$(printf "%s" "$result" | head -c "$RESULT_MAX_CHARS")', + ' ts=$(date +%H:%M:%S)', + ' echo "[$ts] opencode $id TERMINAL=$st — result summary:"', + ' echo "--- result-begin $id ---"', + ' printf "%s" "$summary"', + ' echo ""', + ' echo "--- result-end $id ---"', + " ;;", + ' *) all_done=0 ;;', + " esac", " done", " if [ $all_done -eq 1 ]; then", " echo \"[$(date +%H:%M:%S)] opencode: all tracked tasks terminal — exiting monitor\"", From ec2757353e7e3a9a7b9f9e79a123b88013112566 Mon Sep 17 00:00:00 2001 From: suharvest Date: Sat, 18 Apr 2026 14:37:20 +0800 Subject: [PATCH 06/22] feat(auto-heal): reconcile stuck jobs via session terminal probe Long-running background tasks occasionally get stuck in investigating status after the OpenCode session has finished server-side (POST body never closes, watcher misses the terminal signal, or task-worker dies). - New lib/auto-heal.mjs probes GET /session/:id/message?limit=1 and transitions the local job to completed when the last assistant message has info.finish set and info.time.completed >= job.startedAt. If the task-worker PID is dead and the session is silent >60s, the job is marked failed with a clear reason. - status, result, and task-resume-candidate run a silent heal pass before reading state so they never report a false "running" for a session that is actually complete. - New `companion.mjs heal` subcommand scans and reconciles in bulk, with --dry-run / --json / --all flags. - Heal is a no-op when the server is unreachable, so offline use of status/result keeps working. --- README.md | 27 ++ plugins/opencode/scripts/lib/auto-heal.mjs | 283 ++++++++++++++++++ .../opencode/scripts/opencode-companion.mjs | 90 +++++- 3 files changed, 397 insertions(+), 3 deletions(-) create mode 100644 plugins/opencode/scripts/lib/auto-heal.mjs diff --git a/README.md b/README.md index 8c14ad3..a18b686 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,33 @@ To check your configured providers: When enabled via `/opencode:setup --enable-review-gate`, a Stop hook runs a targeted OpenCode review on Claude's response. If issues are found, the stop is blocked so Claude can address them first. Warning: can create long-running loops and drain usage limits. +## Job Auto-Heal + +Long-running tasks spawned via `/opencode:task --background` occasionally get +stuck in `investigating` status even after the OpenCode session has finished +server-side — typically because `POST /session/:id/message` fails to close its +HTTP body, the task-worker is killed, or the companion's watcher misses the +terminal signal. + +The companion now reconciles this automatically: + +- `companion.mjs status` and `companion.mjs result` run a silent auto-heal + pass before they read state, so they never report a false "running" state + for a session that is actually complete. +- `companion.mjs heal` scans for stuck jobs and reconciles them in bulk. Pass + `--dry-run` to preview, `--json` for machine-readable output, and `--all` + to include jobs from other Claude sessions. + +Each heal check queries `GET /session/:id/message?limit=1`. If the last +assistant message has `info.finish` set and `info.time.completed >= job.startedAt`, +the job is transitioned to `completed` and the message text is persisted to +the job data file. If the task-worker PID is dead and the session has been +silent for >60 s, the job is transitioned to `failed` with a clear reason. + +If the OpenCode server is unreachable, auto-heal is a no-op — status/result +commands still work, they just can't move stuck jobs forward until the server +comes back. + ## Troubleshooting
diff --git a/plugins/opencode/scripts/lib/auto-heal.mjs b/plugins/opencode/scripts/lib/auto-heal.mjs new file mode 100644 index 0000000..88a555b --- /dev/null +++ b/plugins/opencode/scripts/lib/auto-heal.mjs @@ -0,0 +1,283 @@ +// Session-level auto-heal for tracked jobs. +// +// Background: task-worker subprocesses wrap `client.sendPrompt(sid, ...)` in +// runTrackedJob so that on successful return the job flips status→completed +// and the response text is persisted to jobDataPath. But sendPrompt can hang +// or the worker can be killed before that return happens — even though the +// OpenCode session itself completed cleanly server-side. The job then stays +// in a non-terminal state ("investigating"/"running") forever and downstream +// Monitor scripts never see the true finish. +// +// This module provides a best-effort reconciliation pass: given a job with +// an `opencodeSessionId`, query the OpenCode server for the last assistant +// message in that session. If it looks terminal (info.finish set and +// completed >= job.startedAt), upsert the job as completed and persist the +// text. If the worker process is gone and the session has been idle long +// enough, mark as failed with a clear error message. +// +// All functions are no-ops (or log to stderr and return the original job) +// when the server is unreachable, so callers can sprinkle autoHealJob at +// the top of status-reading paths without wrapping in try/catch themselves. + +import fs from "node:fs"; +import path from "node:path"; + +import { ensureDir } from "./fs.mjs"; +import { upsertJob, jobDataPath } from "./state.mjs"; + +const DEFAULT_BASE_URL = "http://127.0.0.1:4096"; +// A worker/session can be legitimately silent for a while (big model thinking, +// slow tool) — only declare it dead after >60s of no session activity AND no +// live task-worker process. 60s matches the spec. +const STALE_IDLE_MS = 60_000; + +function buildHeaders() { + const headers = { "Content-Type": "application/json" }; + if (process.env.OPENCODE_SERVER_PASSWORD) { + const user = process.env.OPENCODE_SERVER_USERNAME ?? "opencode"; + const cred = Buffer.from(`${user}:${process.env.OPENCODE_SERVER_PASSWORD}`).toString("base64"); + headers["Authorization"] = `Basic ${cred}`; + } + return headers; +} + +/** + * True if the given PID is currently alive. Treats missing/invalid PID as dead. + * @param {number|undefined|null} pid + * @returns {boolean} + */ +export function isProcessAlive(pid) { + if (!pid || !Number.isInteger(pid) || pid <= 0) return false; + try { + // Signal 0 is a permission/existence probe — no signal delivered. + process.kill(pid, 0); + return true; + } catch (err) { + // ESRCH = no such process. EPERM = process exists but we can't signal it + // (still alive from our perspective). + return err.code === "EPERM"; + } +} + +/** + * Extract visible text from an OpenCode message `parts` array. + * @param {Array|undefined} parts + * @returns {string} + */ +function extractPartsText(parts) { + if (!Array.isArray(parts)) return ""; + return parts + .filter((p) => p?.type === "text" && typeof p.text === "string") + .map((p) => p.text) + .join("\n"); +} + +/** + * Query the opencode server for the terminal state of a session. + * + * Returns: + * { terminal: true, finish, completed, text, info } when the last assistant + * message has info.time.completed >= startedAt AND typeof info.finish === 'string'. + * { terminal: false, reachable: true, lastUpdatedAt, lastInfo } when session exists but no terminal marker. + * { terminal: false, reachable: false, error } when server unreachable / errored. + * + * @param {string} baseUrl + * @param {string} sessionId + * @param {number} startedAtMs - epoch ms; only treat completions >= this as ours + * @param {object} [headers] + */ +export async function probeSessionTerminal(baseUrl, sessionId, startedAtMs, headers) { + const h = headers ?? buildHeaders(); + try { + // limit=1 → last message only. On glm-5 / opencode 1.4.x this returns + // an array of { info, parts } objects. + const res = await fetch(`${baseUrl}/session/${sessionId}/message?limit=1`, { + method: "GET", + headers: h, + signal: AbortSignal.timeout(10_000), + }); + if (!res.ok) { + return { terminal: false, reachable: true, error: `HTTP ${res.status}` }; + } + const arr = await res.json(); + const last = Array.isArray(arr) ? arr[arr.length - 1] : null; + const info = last?.info; + if (!info) { + return { terminal: false, reachable: true, lastUpdatedAt: 0, lastInfo: null }; + } + + const completed = typeof info.time?.completed === "number" ? info.time.completed : 0; + const created = typeof info.time?.created === "number" ? info.time.created : 0; + const lastUpdatedAt = Math.max(completed, created); + + const looksTerminal = + info.role === "assistant" && + typeof info.finish === "string" && + completed >= (startedAtMs || 0); + + if (looksTerminal) { + return { + terminal: true, + finish: info.finish, + completed, + text: extractPartsText(last.parts), + info, + }; + } + return { terminal: false, reachable: true, lastUpdatedAt, lastInfo: info }; + } catch (err) { + return { terminal: false, reachable: false, error: err.message }; + } +} + +/** + * Parse an ISO-ish timestamp that might be a number or string. Returns epoch ms, or 0. + */ +function toEpochMs(v) { + if (v == null) return 0; + if (typeof v === "number") return v < 1e12 ? v * 1000 : v; // tolerate seconds + const t = new Date(v).getTime(); + return Number.isFinite(t) ? t : 0; +} + +/** + * Attempt to auto-heal a single job. Mutates persistent state via upsertJob + * on transitions. Returns the up-to-date job record (healed or not). + * + * @param {string} workspace + * @param {object} job + * @param {object} [opts] + * @param {string} [opts.baseUrl] + * @param {boolean} [opts.dryRun] - when true, do not write state; return `{job, action, details}` + */ +export async function autoHealJob(workspace, job, opts = {}) { + const baseUrl = opts.baseUrl ?? DEFAULT_BASE_URL; + const dryRun = !!opts.dryRun; + const HEALABLE = new Set(["starting", "investigating", "running", "finalizing"]); + + if (!job || !job.opencodeSessionId) { + return { job, action: "skip", reason: "no opencodeSessionId" }; + } + if (!HEALABLE.has(job.status)) { + return { job, action: "skip", reason: `status=${job.status} not healable` }; + } + + const startedAtMs = + toEpochMs(job.startedAt) || + toEpochMs(job.createdAt) || + toEpochMs(job.updatedAt) || + 0; + + const probe = await probeSessionTerminal(baseUrl, job.opencodeSessionId, startedAtMs); + + if (probe.terminal) { + const completedIso = new Date(probe.completed).toISOString(); + const summary = (probe.text || "").slice(0, 500); + if (dryRun) { + return { + job, + action: "would-complete", + details: { + finish: probe.finish, + completedAt: completedIso, + textLen: (probe.text || "").length, + }, + }; + } + + // Persist the result payload to disk so handleResult can surface it. + try { + const dataFile = jobDataPath(workspace, job.id); + ensureDir(path.dirname(dataFile)); + const payload = { + rendered: probe.text, + summary, + healed: true, + finish: probe.finish, + }; + fs.writeFileSync(dataFile, JSON.stringify(payload, null, 2), "utf8"); + } catch (err) { + // Non-fatal: the status transition below is still useful. + process.stderr.write(`auto-heal: failed to write data file for ${job.id}: ${err.message}\n`); + } + + upsertJob(workspace, { + id: job.id, + status: "completed", + completedAt: completedIso, + phase: "completed", + result: summary || job.result || null, + healed: true, + finish: probe.finish, + }); + return { + job: { ...job, status: "completed", completedAt: completedIso, result: summary, healed: true, finish: probe.finish }, + action: "healed-completed", + details: { finish: probe.finish, textLen: (probe.text || "").length }, + }; + } + + // Not terminal. Can we at least declare it dead? + if (!probe.reachable) { + return { job, action: "skip", reason: `server unreachable: ${probe.error}` }; + } + + const workerAlive = isProcessAlive(job.pid); + if (workerAlive) { + return { job, action: "skip", reason: "worker still alive" }; + } + + const lastUpdateMs = probe.lastUpdatedAt || toEpochMs(job.updatedAt); + const idleMs = lastUpdateMs ? Date.now() - lastUpdateMs : Infinity; + if (idleMs < STALE_IDLE_MS) { + return { job, action: "skip", reason: `idle ${Math.floor(idleMs / 1000)}s < ${STALE_IDLE_MS / 1000}s threshold` }; + } + + const idleSec = Number.isFinite(idleMs) ? Math.floor(idleMs / 1000) : -1; + const errMsg = `task-worker exited without completion; session last updated ${idleSec}s ago`; + + if (dryRun) { + return { job, action: "would-fail", details: { errorMessage: errMsg } }; + } + + upsertJob(workspace, { + id: job.id, + status: "failed", + completedAt: new Date().toISOString(), + errorMessage: errMsg, + healed: true, + }); + return { + job: { ...job, status: "failed", errorMessage: errMsg, healed: true }, + action: "healed-failed", + details: { errorMessage: errMsg }, + }; +} + +/** + * Auto-heal a list of jobs, returning the (possibly updated) jobs in the same + * order, plus a list of heal actions for reporting. + * + * @param {string} workspace + * @param {object[]} jobs + * @param {object} [opts] + * @returns {Promise<{ jobs: object[], actions: object[] }>} + */ +export async function autoHealJobs(workspace, jobs, opts = {}) { + const actions = []; + const out = []; + for (const j of jobs ?? []) { + try { + const r = await autoHealJob(workspace, j, opts); + out.push(r.job ?? j); + if (r.action && r.action !== "skip") { + actions.push({ id: j.id, action: r.action, details: r.details }); + } + } catch (err) { + // Auto-heal must never crash the caller. + process.stderr.write(`auto-heal: ${j.id} errored: ${err.message}\n`); + out.push(j); + } + } + return { jobs: out, actions }; +} diff --git a/plugins/opencode/scripts/opencode-companion.mjs b/plugins/opencode/scripts/opencode-companion.mjs index 7d57a19..eff3b51 100644 --- a/plugins/opencode/scripts/opencode-companion.mjs +++ b/plugins/opencode/scripts/opencode-companion.mjs @@ -19,6 +19,7 @@ import { renderStatus, renderResult, renderReview, renderSetup } from "./lib/ren import { buildReviewPrompt, buildTaskPrompt } from "./lib/prompts.mjs"; import { getDiff, getStatus as getGitStatus } from "./lib/git.mjs"; import { readJson } from "./lib/fs.mjs"; +import { autoHealJob, autoHealJobs } from "./lib/auto-heal.mjs"; const PLUGIN_ROOT = process.env.CLAUDE_PLUGIN_ROOT || path.resolve(import.meta.dirname, ".."); @@ -38,6 +39,7 @@ const handlers = { status: handleStatus, result: handleResult, cancel: handleCancel, + heal: handleHeal, }; const handler = handlers[subcommand]; @@ -397,9 +399,20 @@ async function handleTaskResumeCandidate(argv) { const { options } = parseArgs(argv, { booleanOptions: ["json"] }); const workspace = await resolveWorkspace(); - const state = loadState(workspace); + let state = loadState(workspace); const sessionId = getClaudeSessionId(); + // Heal first so "latest completed" reflects session reality, not a stale + // "running" flag from a dead worker. + const healable = (state.jobs ?? []).filter( + (j) => j.type === "task" && j.opencodeSessionId && + ["starting", "investigating", "running", "finalizing"].includes(j.status), + ); + if (healable.length > 0) { + await autoHealJobs(workspace, healable); + state = loadState(workspace); + } + const lastTask = state.jobs ?.filter((j) => j.type === "task" && j.opencodeSessionId) ?.filter((j) => j.status === "completed" || j.status === "running") @@ -429,8 +442,18 @@ async function handleStatus(argv) { }); const workspace = await resolveWorkspace(); - const state = loadState(workspace); + let state = loadState(workspace); const sessionId = getClaudeSessionId(); + // Auto-heal stuck jobs before building the snapshot so `status` never lies + // about completion. Safe on ECONNREFUSED (probe returns reachable:false). + const healable = (state.jobs ?? []).filter( + (j) => j.opencodeSessionId && + ["starting", "investigating", "running", "finalizing"].includes(j.status), + ); + if (healable.length > 0) { + await autoHealJobs(workspace, healable); + state = loadState(workspace); + } const jobs = state.jobs ?? []; const wantJson = !!options.json; // --all widens the snapshot filter to every session's jobs; without --all we @@ -486,7 +509,18 @@ async function handleResult(argv) { const ref = positional[0]; const workspace = await resolveWorkspace(); - const state = loadState(workspace); + let state = loadState(workspace); + // Auto-heal before resolving so that if the caller asks for the latest + // result, we don't return "no finished job" while a silently-completed + // session is waiting to be reconciled. + const healable = (state.jobs ?? []).filter( + (j) => j.opencodeSessionId && + ["starting", "investigating", "running", "finalizing"].includes(j.status), + ); + if (healable.length > 0) { + await autoHealJobs(workspace, healable); + state = loadState(workspace); + } const { job, ambiguous } = resolveResultJob(state.jobs ?? [], ref); @@ -557,6 +591,56 @@ async function handleCancel(argv) { console.log(`Canceled job: ${job.id}`); } +// ------------------------------------------------------------------ +// Heal (batch auto-reconcile stuck jobs) +// ------------------------------------------------------------------ + +async function handleHeal(argv) { + const { options } = parseArgs(argv ?? [], { + booleanOptions: ["json", "dry-run", "all"], + }); + + const workspace = await resolveWorkspace(); + const state = loadState(workspace); + const sessionId = getClaudeSessionId(); + const dryRun = !!options["dry-run"]; + + let jobs = state.jobs ?? []; + if (!options.all && sessionId) { + jobs = jobs.filter((j) => !j.sessionId || j.sessionId === sessionId); + } + + const healable = jobs.filter( + (j) => j.opencodeSessionId && + ["starting", "investigating", "running", "finalizing"].includes(j.status), + ); + + const { actions } = await autoHealJobs(workspace, healable, { dryRun }); + + if (options.json) { + console.log(JSON.stringify({ + workspaceRoot: workspace, + dryRun, + scanned: healable.length, + actions, + }, null, 2)); + return; + } + + console.log(`## Auto-Heal ${dryRun ? "(dry-run)" : ""}\n`); + console.log(`- Workspace: ${workspace}`); + console.log(`- Scanned stuck jobs: ${healable.length}`); + if (actions.length === 0) { + console.log(`- No actions needed.`); + return; + } + console.log(`- Actions: ${actions.length}\n`); + for (const a of actions) { + const det = a.details ? ` — ${JSON.stringify(a.details)}` : ""; + console.log(`- **${a.id}**: ${a.action}${det}`); + } +} + // ------------------------------------------------------------------ // Helpers // ------------------------------------------------------------------ From deb34193c9573ce0df5d7f3dfa74e939c904596c Mon Sep 17 00:00:00 2001 From: suharvest Date: Sat, 18 Apr 2026 14:37:34 +0800 Subject: [PATCH 07/22] feat(server): idle + bash-stuck detectors in sendPrompt watcher MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Raise the absolute prompt timeout to 4h as a pure safety cap and move real stall detection into the watcher so long-but-alive tasks aren't killed by a fixed deadline. - Idle timeout (OPENCODE_IDLE_TIMEOUT_MS, default 15min): abort when the session shows no message/part/tool-output change for too long. - Bash-tool stuck detector: when the latest tool is a bash in status running but `opencode serve` has zero child processes for N consecutive polls (default 3 × 5s), abort. This catches the ask-permission deadlock (sst/opencode#14473) where the shell process already exited cleanly but tool state never flipped to completed. Gracefully degrades on Windows or when lsof/pgrep is unavailable. - Restructure fetch-vs-watcher race so a rejection from one side no longer cancels the other. The server's 5-min POST cap used to kill sendPrompt before the watcher could observe completion; now both settle independently and we prefer whichever succeeded. --- .../opencode/scripts/lib/opencode-server.mjs | 189 +++++++++++++++++- 1 file changed, 178 insertions(+), 11 deletions(-) diff --git a/plugins/opencode/scripts/lib/opencode-server.mjs b/plugins/opencode/scripts/lib/opencode-server.mjs index 1d32943..f0438f0 100644 --- a/plugins/opencode/scripts/lib/opencode-server.mjs +++ b/plugins/opencode/scripts/lib/opencode-server.mjs @@ -2,7 +2,12 @@ // Unlike codex-plugin-cc which uses JSON-RPC over stdin/stdout, // OpenCode exposes a REST API + SSE. This module wraps that API. -import { spawn } from "node:child_process"; +import { spawn, spawnSync } from "node:child_process"; + +// Re-export for spec-compliance / discoverability: probeSessionTerminal lives +// in auto-heal.mjs because it is tightly coupled to heal-decision logic, but +// conceptually it is a server probe. +export { probeSessionTerminal } from "./auto-heal.mjs"; const DEFAULT_PORT = 4096; const DEFAULT_HOST = "127.0.0.1"; @@ -10,9 +15,69 @@ const SERVER_START_TIMEOUT = 30_000; // Long-running tasks (e.g. engine builds, large refactors) can easily exceed // the old 5-10 min caps, causing `fetch failed` at a fixed deadline. Default -// to 30 min; override via env for even longer workloads. +// PROMPT_TIMEOUT_MS to 4 hours — absolute safety cap. Real stall detection +// lives in the watcher via IDLE_TIMEOUT_MS + pgrep child-process check. const REQUEST_TIMEOUT_MS = Number(process.env.OPENCODE_REQUEST_TIMEOUT_MS) || 1_800_000; -const PROMPT_TIMEOUT_MS = Number(process.env.OPENCODE_PROMPT_TIMEOUT_MS) || 1_800_000; +const PROMPT_TIMEOUT_MS = Number(process.env.OPENCODE_PROMPT_TIMEOUT_MS) || 14_400_000; +// How long a session may go without ANY activity signal before we assume it +// is stuck. Activity = new message, new parts, tool output growth, status +// change. Default 15 min — long enough for most silent-but-alive tasks. +const IDLE_TIMEOUT_MS = Number(process.env.OPENCODE_IDLE_TIMEOUT_MS) || 900_000; +// Bash-tool "no child process" consecutive-miss threshold. If the latest +// tool is a bash in status=running but opencode serve has zero child +// processes for N polls in a row, declare stuck. 3 × 5s = 15s grace. +const PGREP_MISS_THRESHOLD = Number(process.env.OPENCODE_PGREP_MISS_THRESHOLD) || 3; + +const IS_WINDOWS = process.platform === "win32"; + +/** + * Find the PID of `opencode serve` listening on `port`, if we can. + * Returns null on Windows or any detection failure (caller degrades gracefully). + */ +function resolveServePid(port) { + if (IS_WINDOWS) return null; + try { + // macOS + Linux: lsof works the same way. Short timeout so we never block + // the watcher loop if the tool is slow/missing. + const r = spawnSync("lsof", ["-nP", `-iTCP:${port}`, "-sTCP:LISTEN"], { + encoding: "utf8", + timeout: 2000, + }); + if (r.status !== 0 || !r.stdout) return null; + const lines = r.stdout.split("\n").slice(1).filter(Boolean); + for (const line of lines) { + const cols = line.trim().split(/\s+/); + const pid = Number(cols[1]); + if (Number.isInteger(pid) && pid > 0) return pid; + } + } catch { + // lsof missing or errored — degrade to no pgrep checks + } + return null; +} + +/** + * Count direct child processes of `pid`. Returns: + * -1 — feature unavailable (Windows, pgrep missing, etc.) — caller should skip check + * 0 — no children + * >0 — that many children + */ +function countChildren(pid) { + if (!pid || IS_WINDOWS) return -1; + try { + const r = spawnSync("pgrep", ["-P", String(pid)], { + encoding: "utf8", + timeout: 2000, + }); + if (r.error) return -1; + // pgrep exits 1 when no matches (empty stdout) — that's a real "zero", not a failure + const out = (r.stdout || "").trim(); + if (!out) return 0; + return out.split("\n").filter(Boolean).length; + } catch { + return -1; + } +} /** * Check if an OpenCode server is already running on the given port. @@ -179,6 +244,21 @@ export function createClient(baseUrl, opts = {}) { // Wait briefly so the new generation has a chance to start and we // don't latch onto a stale completed message from before this prompt. await new Promise((r) => setTimeout(r, MIN_POLL_DELAY_MS)); + + // Resolve the opencode serve PID once so we can check for child + // processes later. If this fails (Windows, no lsof, permissions) + // we silently skip the pgrep-based stuck detector — idle timeout + // still covers most cases. + const urlObj = (() => { + try { return new URL(baseUrl); } catch { return null; } + })(); + const port = Number(urlObj?.port) || DEFAULT_PORT; + const opencodePid = resolveServePid(port); + + let prevSig = ""; + let lastActivityMs = Date.now(); + let pgrepMissCount = 0; + while (!ac.signal.aborted) { try { const params = new URLSearchParams({ limit: "1" }); @@ -190,8 +270,30 @@ export function createClient(baseUrl, opts = {}) { const arr = await r.json(); const last = Array.isArray(arr) ? arr[arr.length - 1] : null; const info = last?.info; - // Only treat assistant messages created *after* this prompt - // started as a completion signal for this call. + const parts = Array.isArray(last?.parts) ? last.parts : []; + // Most recent tool part — the one actually "running" if any. + let lastTool = null; + for (let i = parts.length - 1; i >= 0; i--) { + if (parts[i]?.type === "tool") { lastTool = parts[i]; break; } + } + + // Activity signature: any change here = progress was made. + const sig = JSON.stringify({ + mid: info?.id, + created: info?.time?.created, + completed: info?.time?.completed, + parts: parts.length, + tStatus: lastTool?.state?.status, + tOutLen: (lastTool?.state?.output || "").length, + }); + if (sig !== prevSig) { + lastActivityMs = Date.now(); + prevSig = sig; + pgrepMissCount = 0; + } + + // Completion signal: assistant message created after our prompt + // started, with a terminal `finish` field populated. if ( info && info.role === "assistant" && @@ -201,23 +303,88 @@ export function createClient(baseUrl, opts = {}) { ) { return { source: "watcher", data: last }; } + + // Bash-tool stuck detector: latest tool is bash in status=running + // but opencode serve has zero children for N consecutive polls. + // This is the signature of the "ask permission deadlock" bug + // (sst/opencode#14473): the shell process already exited cleanly + // but tool state never flipped to completed. + if ( + opencodePid && + lastTool?.tool === "bash" && + lastTool?.state?.status === "running" + ) { + const n = countChildren(opencodePid); + if (n === 0) { + pgrepMissCount += 1; + if (pgrepMissCount >= PGREP_MISS_THRESHOLD) { + ac.abort( + new Error( + `bash tool stuck — opencode serve (pid ${opencodePid}) has no child for ${pgrepMissCount} polls while tool.status=running`, + ), + ); + throw new Error("bash tool stuck (no child)"); + } + } else if (n > 0) { + pgrepMissCount = 0; + } + // n === -1 → feature unavailable, don't count either way + } + + // Idle timeout: nothing happened in the session for too long. + // Covers all tool types (not just bash), including non-pgrep + // platforms (Windows). + const idleMs = Date.now() - lastActivityMs; + if (idleMs > IDLE_TIMEOUT_MS) { + ac.abort( + new Error( + `session idle ${Math.floor(idleMs / 1000)}s > ${IDLE_TIMEOUT_MS / 1000}s`, + ), + ); + throw new Error("session idle timeout"); + } } - } catch { - // Ignore transient poll errors; keep waiting. + } catch (err) { + // If we aborted above, propagate so the outer race sees a failure. + if (ac.signal.aborted) throw err; + // Otherwise it's a transient network/server blip — keep polling. } await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS)); } throw new Error("watcher aborted"); })(); + // Settle-wrap each so a single rejection doesn't lose the other side. + // Server-side 5-min POST cap means fetchPromise often rejects LONG + // before the agent is actually done; we must still wait on the watcher. + const wrap = (p, via) => + p.then( + (v) => ({ ok: true, via, data: v.data }), + (err) => ({ ok: false, via, err }), + ); + const runFetch = wrap(fetchPromise, "fetch"); + const runWatcher = wrap(watcherPromise, "watcher"); + try { - const winner = await Promise.race([fetchPromise, watcherPromise]); - // Whichever arrived first, cancel the other. + const first = await Promise.race([runFetch, runWatcher]); + if (first.ok) { + ac.abort(); + fetchPromise.catch(() => {}); + watcherPromise.catch(() => {}); + return first.data; + } + // First to settle was a failure — the other promise may still succeed. + // Do NOT abort yet: in particular, the watcher needs to keep polling + // when the POST was killed by the server's 5-min cap but generation + // is still running. + const second = first.via === "fetch" ? await runWatcher : await runFetch; ac.abort(); - // Swallow the loser's rejection to avoid unhandled rejection noise. fetchPromise.catch(() => {}); watcherPromise.catch(() => {}); - return winner.data; + if (second.ok) return second.data; + // Both failed — surface the more informative error. Prefer the + // fetch error because it usually has the HTTP status/body. + throw first.via === "fetch" ? first.err : second.err; } finally { clearTimeout(timeoutId); } From 76162011dd124222a9f0efb0b17eb15a04b4679d Mon Sep 17 00:00:00 2001 From: suharvest Date: Sat, 18 Apr 2026 17:36:04 +0800 Subject: [PATCH 08/22] fix(prompts): add SAFETY_HEADER to block recursive subagent delegation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GLM-5 (and likely other models) inside an opencode session sometimes inherits routing directives from the outer Claude Code CLAUDE.md ("delegate long tasks to opencode-rescue") and tries to invoke Task with subagent_type="opencode:rescue" / "codex:rescue". Those are Claude Code skill namespaces, not opencode agents — the call errors out, then the model stalls indefinitely retrying with zero output emission, which the session-level idle watchdog only catches after 15min. buildTaskPrompt now prepends an explicit notice that routing rules have already been consumed and Claude Code subagent_types are unavailable here; execute directly instead. Applies only to prompts going through opencode-companion (task, review paths). Direct 'opencode run' CLI invocations still need to strip CLAUDE.md themselves — prefer Agent(subagent_type=opencode- rescue) for proper watchdog coverage. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/opencode/scripts/lib/prompts.mjs | 29 ++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/plugins/opencode/scripts/lib/prompts.mjs b/plugins/opencode/scripts/lib/prompts.mjs index 4493800..c63ef5c 100644 --- a/plugins/opencode/scripts/lib/prompts.mjs +++ b/plugins/opencode/scripts/lib/prompts.mjs @@ -76,6 +76,32 @@ function buildReviewContext(diff, status, changedFiles) { return sections.join("\n\n"); } +/** + * Safety header prepended to every task prompt sent into an opencode session. + * + * Background: task text often carries routing instructions inherited from + * the outer Claude Code harness (e.g. CLAUDE.md rules such as "delegate long + * tasks to opencode-rescue"). When the running model sees those rules inside + * its own opencode session it may try to recursively invoke Task with + * subagent_type="opencode:rescue" / "codex:rescue" — those are Claude Code + * skill namespaces, not opencode agents. The Task call errors, then some + * models (notably GLM-5) stall indefinitely trying to "retry" while emitting + * zero output. Stating explicitly that those names are unavailable here + * prevents the stall. See memory: feedback_opencode_recursive_delegation. + */ +export const SAFETY_HEADER = [ + "You are running INSIDE an opencode session.", + "Routing rules from the parent Claude Code CLAUDE.md (e.g. 'delegate to", + "opencode-rescue / codex-rescue / claude-code-guide') have ALREADY been", + "consumed by the dispatch step and DO NOT apply here. Do NOT invoke the", + "Task tool with subagent_type values like 'opencode:rescue',", + "'codex:rescue', 'superpowers:*', or any other Claude Code skill or agent", + "name — those do not exist in this session and calling them will fail then", + "stall the run. Execute the task yourself using Bash / Read / Write /", + "Edit / Grep / Glob / WebFetch as needed. If the task is too large, break", + "it into smaller shell commands and iterate; do not try to off-load it.", +].join(" "); + /** * Build a task prompt from user input. * @param {string} taskText @@ -86,6 +112,9 @@ function buildReviewContext(diff, status, changedFiles) { export function buildTaskPrompt(taskText, opts = {}) { const parts = []; + parts.push(SAFETY_HEADER); + parts.push(""); + if (opts.write) { parts.push("You have full read/write access. Make the necessary code changes."); } else { From 1e5b537f63714debdcb82187ae5bdbac3d1c0c42 Mon Sep 17 00:00:00 2001 From: suharvest Date: Sun, 19 Apr 2026 09:55:08 +0800 Subject: [PATCH 09/22] fix(process): accept logFile opt in spawnDetached to capture worker stdio Opens an append fd to the given path and passes it as stdout+stderr to the detached spawn; closes the fd in the parent immediately after fork. Co-Authored-By: Claude Sonnet 4.6 --- plugins/opencode/scripts/lib/process.mjs | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/plugins/opencode/scripts/lib/process.mjs b/plugins/opencode/scripts/lib/process.mjs index a199e78..0394552 100644 --- a/plugins/opencode/scripts/lib/process.mjs +++ b/plugins/opencode/scripts/lib/process.mjs @@ -1,6 +1,7 @@ // Process utilities for the OpenCode companion. import { spawn } from "node:child_process"; +import fs from "node:fs"; /** * Resolve the full path to the `opencode` binary. @@ -69,12 +70,18 @@ export function runCommand(cmd, args, opts = {}) { * @returns {import("node:child_process").ChildProcess} */ export function spawnDetached(cmd, args, opts = {}) { - const child = spawn(cmd, args, { - stdio: "ignore", - detached: true, - cwd: opts.cwd, - env: { ...process.env, ...opts.env }, - }); + const logFd = opts.logFile ? fs.openSync(opts.logFile, "a") : null; + let child; + try { + child = spawn(cmd, args, { + stdio: logFd === null ? "ignore" : ["ignore", logFd, logFd], + detached: true, + cwd: opts.cwd, + env: { ...process.env, ...opts.env }, + }); + } finally { + if (logFd !== null) fs.closeSync(logFd); + } child.unref(); return child; } From 240b7d5e9d7c47c688b438ed44cd2422d52a1501 Mon Sep 17 00:00:00 2001 From: suharvest Date: Sun, 19 Apr 2026 09:55:14 +0800 Subject: [PATCH 10/22] feat(companion): pre-register queued job record before spawning detached worker Resolves logFile path and calls upsertJob with status=queued before spawn so status/result commands see the job immediately. Updates pid after spawn. Co-Authored-By: Claude Sonnet 4.6 --- .../opencode/scripts/opencode-companion.mjs | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/plugins/opencode/scripts/opencode-companion.mjs b/plugins/opencode/scripts/opencode-companion.mjs index eff3b51..93bed3b 100644 --- a/plugins/opencode/scripts/opencode-companion.mjs +++ b/plugins/opencode/scripts/opencode-companion.mjs @@ -12,7 +12,7 @@ import { parseArgs, extractTaskText } from "./lib/args.mjs"; import { isOpencodeInstalled, getOpencodeVersion, spawnDetached } from "./lib/process.mjs"; import { isServerRunning, ensureServer, createClient, connect } from "./lib/opencode-server.mjs"; import { resolveWorkspace } from "./lib/workspace.mjs"; -import { loadState, updateState, upsertJob, generateJobId, jobDataPath } from "./lib/state.mjs"; +import { loadState, updateState, upsertJob, generateJobId, jobDataPath, jobLogPath } from "./lib/state.mjs"; import { buildStatusSnapshot, resolveResultJob, resolveCancelableJob, enrichJob, matchJobReference } from "./lib/job-control.mjs"; import { createJobRecord, runTrackedJob, getClaudeSessionId } from "./lib/tracked-jobs.mjs"; import { renderStatus, renderResult, renderReview, renderSetup } from "./lib/render.mjs"; @@ -268,6 +268,22 @@ async function handleTask(argv) { // Background mode: spawn a detached worker if (options.background) { + const logFile = jobLogPath(workspace, job.id); + fs.mkdirSync(path.dirname(logFile), { recursive: true }); + upsertJob(workspace, { + id: job.id, + status: "queued", + phase: "queued", + logFile, + request: { + taskText, + agentName, + isWrite, + resumeSessionId, + model: options.model, + }, + }); + const workerArgs = [ path.join(PLUGIN_ROOT, "scripts", "opencode-companion.mjs"), "task-worker", @@ -280,7 +296,8 @@ async function handleTask(argv) { if (resumeSessionId) workerArgs.push("--resume-session", resumeSessionId); if (options.model) workerArgs.push("--model", options.model); - spawnDetached("node", workerArgs, { cwd: workspace }); + const child = spawnDetached("node", workerArgs, { cwd: workspace, logFile }); + upsertJob(workspace, { id: job.id, pid: child.pid }); console.log(`OpenCode task started in background: ${job.id}`); console.log("Check `/opencode:status` for progress."); return; From 743639b0ddadf143ca6e0c0f8a954f5cb69ffdd2 Mon Sep 17 00:00:00 2001 From: suharvest Date: Sun, 19 Apr 2026 09:56:24 +0800 Subject: [PATCH 11/22] fix(server): relax looksTerminal to not require info.finish; gate behind OPENCODE_STRICT_TERMINAL Some OpenCode versions emit terminal assistant messages without a finish field. Accept completed > 0 as sufficient when OPENCODE_STRICT_TERMINAL != "1". Applies to both sendPrompt watcher and probeSessionTerminal. Co-Authored-By: Claude Sonnet 4.6 --- plugins/opencode/scripts/lib/auto-heal.mjs | 14 ++++++++------ plugins/opencode/scripts/lib/opencode-server.mjs | 14 ++++++++------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/plugins/opencode/scripts/lib/auto-heal.mjs b/plugins/opencode/scripts/lib/auto-heal.mjs index 88a555b..2dcc044 100644 --- a/plugins/opencode/scripts/lib/auto-heal.mjs +++ b/plugins/opencode/scripts/lib/auto-heal.mjs @@ -10,10 +10,9 @@ // // This module provides a best-effort reconciliation pass: given a job with // an `opencodeSessionId`, query the OpenCode server for the last assistant -// message in that session. If it looks terminal (info.finish set and -// completed >= job.startedAt), upsert the job as completed and persist the -// text. If the worker process is gone and the session has been idle long -// enough, mark as failed with a clear error message. +// message in that session. If it looks terminal, upsert the job as completed +// and persist the text. If the worker process is gone and the session has +// been idle long enough, mark as failed with a clear error message. // // All functions are no-ops (or log to stderr and return the original job) // when the server is unreachable, so callers can sprinkle autoHealJob at @@ -26,6 +25,7 @@ import { ensureDir } from "./fs.mjs"; import { upsertJob, jobDataPath } from "./state.mjs"; const DEFAULT_BASE_URL = "http://127.0.0.1:4096"; +const STRICT_TERMINAL = process.env.OPENCODE_STRICT_TERMINAL === "1"; // A worker/session can be legitimately silent for a while (big model thinking, // slow tool) — only declare it dead after >60s of no session activity AND no // live task-worker process. 60s matches the spec. @@ -112,8 +112,10 @@ export async function probeSessionTerminal(baseUrl, sessionId, startedAtMs, head const looksTerminal = info.role === "assistant" && - typeof info.finish === "string" && - completed >= (startedAtMs || 0); + completed >= (startedAtMs || 0) && + (STRICT_TERMINAL + ? typeof info.finish === "string" + : typeof info.finish === "string" || completed > 0); if (looksTerminal) { return { diff --git a/plugins/opencode/scripts/lib/opencode-server.mjs b/plugins/opencode/scripts/lib/opencode-server.mjs index f0438f0..2c4776d 100644 --- a/plugins/opencode/scripts/lib/opencode-server.mjs +++ b/plugins/opencode/scripts/lib/opencode-server.mjs @@ -19,10 +19,11 @@ const SERVER_START_TIMEOUT = 30_000; // lives in the watcher via IDLE_TIMEOUT_MS + pgrep child-process check. const REQUEST_TIMEOUT_MS = Number(process.env.OPENCODE_REQUEST_TIMEOUT_MS) || 1_800_000; const PROMPT_TIMEOUT_MS = Number(process.env.OPENCODE_PROMPT_TIMEOUT_MS) || 14_400_000; +const STRICT_TERMINAL = process.env.OPENCODE_STRICT_TERMINAL === "1"; // How long a session may go without ANY activity signal before we assume it // is stuck. Activity = new message, new parts, tool output growth, status -// change. Default 15 min — long enough for most silent-but-alive tasks. -const IDLE_TIMEOUT_MS = Number(process.env.OPENCODE_IDLE_TIMEOUT_MS) || 900_000; +// change. Default 1h — long enough for silent-but-live tool subprocesses. +const IDLE_TIMEOUT_MS = Number(process.env.OPENCODE_IDLE_TIMEOUT_MS) || 3_600_000; // Bash-tool "no child process" consecutive-miss threshold. If the latest // tool is a bash in status=running but opencode serve has zero child // processes for N polls in a row, declare stuck. 3 × 5s = 15s grace. @@ -271,6 +272,8 @@ export function createClient(baseUrl, opts = {}) { const last = Array.isArray(arr) ? arr[arr.length - 1] : null; const info = last?.info; const parts = Array.isArray(last?.parts) ? last.parts : []; + const completed = typeof info?.time?.completed === "number" ? info.time.completed : 0; + const hasTerminalFinish = typeof info?.finish === "string"; // Most recent tool part — the one actually "running" if any. let lastTool = null; for (let i = parts.length - 1; i >= 0; i--) { @@ -293,13 +296,12 @@ export function createClient(baseUrl, opts = {}) { } // Completion signal: assistant message created after our prompt - // started, with a terminal `finish` field populated. + // started. Some OpenCode versions omit `finish` on terminal messages. if ( info && info.role === "assistant" && - typeof info.time?.completed === "number" && - info.time.completed >= startedAt && - typeof info.finish === "string" + completed >= startedAt && + (STRICT_TERMINAL ? hasTerminalFinish : hasTerminalFinish || completed > 0) ) { return { source: "watcher", data: last }; } From 3c77c98cd921a9ba5aca3764932881b3a5f21504 Mon Sep 17 00:00:00 2001 From: suharvest Date: Sun, 19 Apr 2026 09:56:35 +0800 Subject: [PATCH 12/22] fix(server): bump idle timeout to 1h and skip abort when opencode has live child processes Prevents premature session termination when tool subprocesses are still running. Resets lastActivityMs and logs a reason instead of aborting. Co-Authored-By: Claude Sonnet 4.6 --- plugins/opencode/scripts/lib/opencode-server.mjs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/plugins/opencode/scripts/lib/opencode-server.mjs b/plugins/opencode/scripts/lib/opencode-server.mjs index 2c4776d..94c8b9d 100644 --- a/plugins/opencode/scripts/lib/opencode-server.mjs +++ b/plugins/opencode/scripts/lib/opencode-server.mjs @@ -338,6 +338,16 @@ export function createClient(baseUrl, opts = {}) { // platforms (Windows). const idleMs = Date.now() - lastActivityMs; if (idleMs > IDLE_TIMEOUT_MS) { + if (opencodePid) { + const childCount = countChildren(opencodePid); + if (childCount > 0) { + lastActivityMs = Date.now(); + process.stderr.write( + `opencode watcher: session idle ${Math.floor(idleMs / 1000)}s, but opencode serve (pid ${opencodePid}) has ${childCount} child process(es); continuing\n`, + ); + continue; + } + } ac.abort( new Error( `session idle ${Math.floor(idleMs / 1000)}s > ${IDLE_TIMEOUT_MS / 1000}s`, From 6569f112d4dd0e2008abadf130f60c1423ba6019 Mon Sep 17 00:00:00 2001 From: suharvest Date: Sun, 19 Apr 2026 09:56:44 +0800 Subject: [PATCH 13/22] fix(auto-heal): mark job failed when server unreachable and worker process is dead Replaces the unconditional skip with a dead-worker check: if job.pid is gone and updatedAt is older than STALE_IDLE_MS, upsert status=failed and return healed-failed so the job stops being retried. Co-Authored-By: Claude Sonnet 4.6 --- plugins/opencode/scripts/lib/auto-heal.mjs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/plugins/opencode/scripts/lib/auto-heal.mjs b/plugins/opencode/scripts/lib/auto-heal.mjs index 2dcc044..ea4ac2d 100644 --- a/plugins/opencode/scripts/lib/auto-heal.mjs +++ b/plugins/opencode/scripts/lib/auto-heal.mjs @@ -221,6 +221,27 @@ export async function autoHealJob(workspace, job, opts = {}) { // Not terminal. Can we at least declare it dead? if (!probe.reachable) { + const updatedMs = toEpochMs(job.updatedAt); + const idleMs = updatedMs ? Date.now() - updatedMs : Infinity; + if (!isProcessAlive(job.pid) && idleMs >= STALE_IDLE_MS) { + const errMsg = "server unreachable while worker dead"; + if (dryRun) { + return { job, action: "would-fail", details: { errorMessage: errMsg } }; + } + + upsertJob(workspace, { + id: job.id, + status: "failed", + completedAt: new Date().toISOString(), + errorMessage: errMsg, + healed: true, + }); + return { + job: { ...job, status: "failed", errorMessage: errMsg, healed: true }, + action: "healed-failed", + details: { errorMessage: errMsg }, + }; + } return { job, action: "skip", reason: `server unreachable: ${probe.error}` }; } From cd5ef395202c38e5480408685da2b97657e87d34 Mon Sep 17 00:00:00 2001 From: suharvest Date: Sun, 19 Apr 2026 09:56:52 +0800 Subject: [PATCH 14/22] fix(monitor): emit explicit TERMINAL=timeout echo when MAX_POLLS exhausted Adds OPENCODE_MONITOR_MAX_POLLS (default 120) poll counter. When the loop exits due to poll exhaustion rather than a terminal status, emits a TERMINAL=timeout line for each non-terminal job so the parent thread sees a clear signal. Guards against duplicate TERMINAL lines via terminal[] map. Co-Authored-By: Claude Sonnet 4.6 --- .../scripts/post-tool-use-monitor-hook.mjs | 43 ++++++++++++++----- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/plugins/opencode/scripts/post-tool-use-monitor-hook.mjs b/plugins/opencode/scripts/post-tool-use-monitor-hook.mjs index 77900a1..b7c0f8a 100644 --- a/plugins/opencode/scripts/post-tool-use-monitor-hook.mjs +++ b/plugins/opencode/scripts/post-tool-use-monitor-hook.mjs @@ -72,15 +72,23 @@ function buildMonitorScript(ids, companionPath) { `IDS=(${quoted})`, "RESULT_MAX_CHARS=${OPENCODE_MONITOR_RESULT_CHARS:-1500}", "HEARTBEAT_POLLS=${OPENCODE_MONITOR_HEARTBEAT_POLLS:-10}", + "MAX_POLLS=${OPENCODE_MONITOR_MAX_POLLS:-120}", "declare -A prev", "declare -A hb", - 'for id in "${IDS[@]}"; do prev[$id]=""; hb[$id]=0; done', + "declare -A terminal", + "declare -A last_st", + "declare -A last_phase", + "polls=0", + 'for id in "${IDS[@]}"; do prev[$id]=""; hb[$id]=0; terminal[$id]=0; last_st[$id]="unknown"; last_phase[$id]=""; done', "while true; do", + " polls=$((polls + 1))", " all_done=1", ' for id in "${IDS[@]}"; do', ' json=$(node "$COMP" status "$id" --json 2>/dev/null || printf "{}")', " fields=$(printf '%s' \"$json\" | node -e 'let s=\"\";process.stdin.on(\"data\",d=>s+=d).on(\"end\",()=>{try{const j=JSON.parse(s);const jb=j.job||{};const prog=String(jb.progressPreview||\"\").split(\"\\n\").filter(Boolean);const last=(prog[prog.length-1]||\"\").replace(/[|\\r\\n]/g,\" \").slice(0,200);process.stdout.write([jb.status||\"unknown\",jb.phase||\"\",jb.elapsed||\"\",last].join(\"|\"))}catch(e){process.stdout.write(\"parse-err|||\")}})')", " IFS='|' read -r st phase elapsed last <<< \"$fields\"", + ' last_st[$id]="$st"', + ' last_phase[$id]="$phase"', ' sig="${st}/${phase}|${last}"', ' if [ "$sig" != "${prev[$id]}" ]; then', ' ts=$(date +%H:%M:%S)', @@ -101,15 +109,18 @@ function buildMonitorScript(ids, companionPath) { " fi", ' case "$st" in', " completed|failed|cancelled)", - ' result=$(node "$COMP" result "$id" 2>/dev/null || true)', - " # Truncate defensively so Monitor output stays bounded.", - ' summary=$(printf "%s" "$result" | head -c "$RESULT_MAX_CHARS")', - ' ts=$(date +%H:%M:%S)', - ' echo "[$ts] opencode $id TERMINAL=$st — result summary:"', - ' echo "--- result-begin $id ---"', - ' printf "%s" "$summary"', - ' echo ""', - ' echo "--- result-end $id ---"', + ' if [ "${terminal[$id]}" -eq 0 ]; then', + ' result=$(node "$COMP" result "$id" 2>/dev/null || true)', + " # Truncate defensively so Monitor output stays bounded.", + ' summary=$(printf "%s" "$result" | head -c "$RESULT_MAX_CHARS")', + ' ts=$(date +%H:%M:%S)', + ' echo "[$ts] opencode $id TERMINAL=$st — result summary:"', + ' echo "--- result-begin $id ---"', + ' printf "%s" "$summary"', + ' echo ""', + ' echo "--- result-end $id ---"', + " terminal[$id]=1", + " fi", " ;;", ' *) all_done=0 ;;', " esac", @@ -118,6 +129,18 @@ function buildMonitorScript(ids, companionPath) { " echo \"[$(date +%H:%M:%S)] opencode: all tracked tasks terminal — exiting monitor\"", " break", " fi", + ' if [ "$polls" -ge "$MAX_POLLS" ]; then', + ' for id in "${IDS[@]}"; do', + ' if [ "${terminal[$id]}" -eq 0 ]; then', + ' st="${last_st[$id]}"', + ' phase="${last_phase[$id]}"', + ' ts=$(date +%H:%M:%S)', + ' echo "[$ts] opencode $id TERMINAL=timeout — monitor gave up after $polls polls (last status: $st/$phase). Run companion status/result manually."', + " terminal[$id]=1", + " fi", + " done", + " break", + " fi", " sleep 30", "done", ].join("\n"); From 0247c993b4d06494821c7dd3ef1106ff39844022 Mon Sep 17 00:00:00 2001 From: suharvest Date: Sun, 19 Apr 2026 09:59:15 +0800 Subject: [PATCH 15/22] fix(server): avoid busy-loop in idle watcher live-child skip The prior patch used `continue` inside the idle-threshold branch, which skipped the POLL_INTERVAL_MS sleep at loop tail and hammered the opencode server while tools were alive. Replace with a straight if/else so the reset path falls through to the normal poll interval. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../opencode/scripts/lib/opencode-server.mjs | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/plugins/opencode/scripts/lib/opencode-server.mjs b/plugins/opencode/scripts/lib/opencode-server.mjs index 94c8b9d..32540a2 100644 --- a/plugins/opencode/scripts/lib/opencode-server.mjs +++ b/plugins/opencode/scripts/lib/opencode-server.mjs @@ -338,22 +338,20 @@ export function createClient(baseUrl, opts = {}) { // platforms (Windows). const idleMs = Date.now() - lastActivityMs; if (idleMs > IDLE_TIMEOUT_MS) { - if (opencodePid) { - const childCount = countChildren(opencodePid); - if (childCount > 0) { - lastActivityMs = Date.now(); - process.stderr.write( - `opencode watcher: session idle ${Math.floor(idleMs / 1000)}s, but opencode serve (pid ${opencodePid}) has ${childCount} child process(es); continuing\n`, - ); - continue; - } + const liveChildren = opencodePid ? countChildren(opencodePid) : 0; + if (liveChildren > 0) { + lastActivityMs = Date.now(); + process.stderr.write( + `opencode watcher: session idle ${Math.floor(idleMs / 1000)}s, but opencode serve (pid ${opencodePid}) has ${liveChildren} child process(es); continuing\n`, + ); + } else { + ac.abort( + new Error( + `session idle ${Math.floor(idleMs / 1000)}s > ${IDLE_TIMEOUT_MS / 1000}s`, + ), + ); + throw new Error("session idle timeout"); } - ac.abort( - new Error( - `session idle ${Math.floor(idleMs / 1000)}s > ${IDLE_TIMEOUT_MS / 1000}s`, - ), - ); - throw new Error("session idle timeout"); } } } catch (err) { From 4fb3c30d3554dbf7043b8cc58403591871018aea Mon Sep 17 00:00:00 2001 From: suharvest Date: Sat, 18 Apr 2026 14:37:08 +0800 Subject: [PATCH 16/22] fix(state): derive own plugin data dir to avoid cross-plugin leak CLAUDE_PLUGIN_DATA can be exported by an unrelated plugin (e.g. codex companion) into the shared env, causing opencode state to land in another plugin's data directory. Derive our own data dir from the script's install path instead, falling back to CLAUDE_PLUGIN_DATA only when it already names an opencode-scoped dir. Add OPENCODE_COMPANION_DATA as an explicit override. --- plugins/opencode/scripts/lib/state.mjs | 54 ++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/plugins/opencode/scripts/lib/state.mjs b/plugins/opencode/scripts/lib/state.mjs index d5f02f0..1bdc48e 100644 --- a/plugins/opencode/scripts/lib/state.mjs +++ b/plugins/opencode/scripts/lib/state.mjs @@ -4,20 +4,66 @@ import crypto from "node:crypto"; import path from "node:path"; +import { fileURLToPath } from "node:url"; import { ensureDir, readJson, writeJson } from "./fs.mjs"; const MAX_JOBS = 50; +/** + * Derive the opencode-companion's own plugin data directory from the script's + * install path. Claude Code installs plugins at + * /plugins/cache/-///scripts/lib/state.mjs + * and assigns per-plugin data at + * /plugins/data/--/ + * If CLAUDE_PLUGIN_DATA is exported by an UNRELATED plugin (e.g. codex + * companion), env-based lookup would leak opencode state into that plugin's + * data dir. Deriving our own path avoids that cross-contamination. + * + * Returns null if the path layout doesn't match (e.g. running from repo source). + */ +function deriveOwnDataDir() { + try { + const here = fileURLToPath(import.meta.url); + const parts = here.split(path.sep); + const cacheIdx = parts.lastIndexOf("cache"); + if (cacheIdx < 1 || cacheIdx + 4 >= parts.length) return null; + const ownerRepo = parts[cacheIdx + 1]; + const pluginName = parts[cacheIdx + 2]; + const rootBase = parts.slice(0, cacheIdx).join(path.sep); + return path.join(rootBase, "data", `${pluginName}-${ownerRepo}`); + } catch { + return null; + } +} + /** * Compute the state directory root for a workspace. + * + * Priority: + * 1. Explicit opt-in via OPENCODE_COMPANION_DATA (per-plugin override) + * 2. Self-derived path from script location (correct under normal install) + * 3. Only trust CLAUDE_PLUGIN_DATA when it already names our own plugin — + * otherwise ignore it (another plugin may have exported it into our env) + * 4. Fallback: /tmp/opencode-companion + * * @param {string} workspacePath * @returns {string} */ export function stateRoot(workspacePath) { - const base = - process.env.CLAUDE_PLUGIN_DATA - ? path.join(process.env.CLAUDE_PLUGIN_DATA, "state") - : path.join("/tmp", "opencode-companion"); + let base; + if (process.env.OPENCODE_COMPANION_DATA) { + base = path.join(process.env.OPENCODE_COMPANION_DATA, "state"); + } else { + const own = deriveOwnDataDir(); + const envData = process.env.CLAUDE_PLUGIN_DATA; + if (own) { + base = path.join(own, "state"); + } else if (envData && /opencode/i.test(path.basename(envData))) { + base = path.join(envData, "state"); + } else { + base = path.join("/tmp", "opencode-companion"); + } + } const hash = crypto.createHash("sha256").update(workspacePath).digest("hex").slice(0, 16); return path.join(base, hash); } From c215eaa630409c6929c4513135ece30d7f4a2118 Mon Sep 17 00:00:00 2001 From: suharvest Date: Sat, 18 Apr 2026 14:53:50 +0800 Subject: [PATCH 17/22] feat(server): classify errors + auto-repair opencode.json on ensureServer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two server-side robustness features so newcomers hit fewer cryptic failures: 1. errors.mjs — classifyError() annotates fetch/abort/connection errors with actionable fix hints. Wired into request() and sendPrompt() in opencode-server.mjs so "fetch failed" becomes e.g. "Aborted after Ns (OPENCODE_PROMPT_TIMEOUT_MS=...). For longer tasks set ... to a higher value." ECONNREFUSED gets the start-server hint, 401/403 points at OPENCODE_SERVER_PASSWORD, 5xx points at docker logs. 2. opencode-config.mjs — ensureOpencodeConfig() writes the headless-safe permission set (bash/edit/webfetch/external_directory all "allow") to ~/.config/opencode/opencode.json before ensureServer spawns opencode. Idempotent, atomic temp+rename, respects XDG_CONFIG_HOME. Fixes sst/opencode#14473 hang that cost 48 min in one session to diagnose. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/opencode/scripts/lib/errors.mjs | 78 +++++++++++++++ .../opencode/scripts/lib/opencode-config.mjs | 99 +++++++++++++++++++ .../opencode/scripts/lib/opencode-server.mjs | 41 ++++++-- 3 files changed, 210 insertions(+), 8 deletions(-) create mode 100644 plugins/opencode/scripts/lib/errors.mjs create mode 100644 plugins/opencode/scripts/lib/opencode-config.mjs diff --git a/plugins/opencode/scripts/lib/errors.mjs b/plugins/opencode/scripts/lib/errors.mjs new file mode 100644 index 0000000..0cc4885 --- /dev/null +++ b/plugins/opencode/scripts/lib/errors.mjs @@ -0,0 +1,78 @@ +// Error classification helpers — turn raw fetch/abort/HTTP errors into +// actionable messages that point newcomers at the right knob to turn. +// +// Usage: +// try { ... } +// catch (err) { throw classifyError(err, { baseUrl, startedAt, timeoutMs }); } +// +// classifyError always returns an Error (never swallows). When the input +// matches one of the known patterns, the returned Error's message is prefixed +// with a one-line hint; otherwise the original error is returned unchanged. + +/** + * Classify an error and return an annotated Error instance. + * @param {any} err + * @param {object} [ctx] + * @param {string} [ctx.baseUrl] + * @param {number} [ctx.startedAt] - epoch ms when the request started + * @param {number} [ctx.timeoutMs] - caller-side abort timeout + * @param {string} [ctx.op] - operation name (e.g. "sendPrompt", "request GET /session") + * @returns {Error} + */ +export function classifyError(err, ctx = {}) { + if (!err) return new Error("unknown error"); + const original = err instanceof Error ? err : new Error(String(err)); + const msg = original.message || ""; + const code = original.code || original.cause?.code || ""; + const elapsedSec = ctx.startedAt ? Math.round((Date.now() - ctx.startedAt) / 1000) : null; + + // AbortSignal fired from our side (caller-imposed timeout). + if (original.name === "AbortError" || /abort/i.test(msg)) { + if (ctx.timeoutMs && elapsedSec != null && elapsedSec * 1000 >= ctx.timeoutMs * 0.9) { + const envVar = ctx.op === "sendPrompt" ? "OPENCODE_PROMPT_TIMEOUT_MS" : "OPENCODE_REQUEST_TIMEOUT_MS"; + return annotate(original, + `Aborted after ${elapsedSec}s (${envVar}=${ctx.timeoutMs}). For longer tasks set ${envVar}=3600000 or higher.`); + } + // ~5min watershed — opencode server closes POST body at that boundary. + if (elapsedSec != null && elapsedSec >= 290 && elapsedSec <= 320) { + return annotate(original, + `OpenCode server closed POST body at ~5min (watcher took over if task still running).`); + } + } + + // Connection refused — server not listening. + if (code === "ECONNREFUSED" || /ECONNREFUSED/.test(msg)) { + const url = ctx.baseUrl || "http://127.0.0.1:4096"; + return annotate(original, + `OpenCode server at ${url} unreachable. Start it with 'opencode serve --port 4096' or run 'companion doctor'.`); + } + + // HTTP status-coded errors (shape: "OpenCode API GET /foo returned 401: ...") + const m = msg.match(/returned (\d{3})\b/); + if (m) { + const status = Number(m[1]); + if (status === 401 || status === 403) { + return annotate(original, + `Auth failed (${status}). Check OPENCODE_SERVER_PASSWORD / OPENCODE_SERVER_USERNAME env.`); + } + if (status >= 500 && status < 600) { + return annotate(original, + `OpenCode server error ${status} (check 'docker logs' or opencode logs):`); + } + } + + // fetch failed at exactly ~5min — server-side body close. + if (/fetch failed/i.test(msg) && elapsedSec != null && elapsedSec >= 290 && elapsedSec <= 320) { + return annotate(original, + `OpenCode server closed POST body at ~5min (watcher took over if task still running).`); + } + + return original; +} + +function annotate(err, hint) { + const wrapped = new Error(`${hint} [${err.message}]`); + wrapped.cause = err; + wrapped.code = err.code; + return wrapped; +} diff --git a/plugins/opencode/scripts/lib/opencode-config.mjs b/plugins/opencode/scripts/lib/opencode-config.mjs new file mode 100644 index 0000000..87ff2fc --- /dev/null +++ b/plugins/opencode/scripts/lib/opencode-config.mjs @@ -0,0 +1,99 @@ +// Self-heal for ~/.config/opencode/opencode.json permissions. +// +// Background: opencode's bash tool hangs forever in headless mode when +// permission.external_directory == "ask" (sst/opencode#14473). +// Our companion runs opencode serve in headless mode, so we MUST have: +// permission.bash = "allow" +// permission.edit = "allow" +// permission.webfetch = "allow" +// permission.external_directory = "allow" +// +// ensureOpencodeConfig() merges these in idempotently, preserving any other +// user keys. Called from ensureServer() before spawning opencode serve, and +// from the `doctor` subcommand. + +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { ensureDir, readJson } from "./fs.mjs"; + +export const REQUIRED_PERMISSIONS = { + bash: "allow", + edit: "allow", + webfetch: "allow", + external_directory: "allow", +}; + +/** + * Resolve the opencode config file path, respecting $XDG_CONFIG_HOME. + * @returns {string} + */ +export function resolveConfigPath() { + const xdg = process.env.XDG_CONFIG_HOME; + const base = xdg && xdg.length > 0 ? xdg : path.join(os.homedir(), ".config"); + return path.join(base, "opencode", "opencode.json"); +} + +/** + * Read the opencode config, returning the parsed object or a fresh scaffold. + * @returns {{ path: string, exists: boolean, data: object }} + */ +export function readOpencodeConfig() { + const p = resolveConfigPath(); + const exists = fs.existsSync(p); + const data = exists ? (readJson(p) ?? {}) : { $schema: "https://opencode.ai/config.json" }; + return { path: p, exists, data }; +} + +/** + * Returns the set of permission keys that are NOT already "allow". + * @param {object} data + * @returns {string[]} + */ +export function missingPermissions(data) { + const perm = (data && data.permission) || {}; + const missing = []; + for (const [k, v] of Object.entries(REQUIRED_PERMISSIONS)) { + if (perm[k] !== v) missing.push(k); + } + return missing; +} + +/** + * Ensure opencode.json has all required permissions set to "allow". + * Idempotent: if everything is already correct, does not touch the file. + * + * @param {object} [opts] + * @param {boolean} [opts.dryRun] - if true, do not write; just report what would change + * @param {boolean} [opts.silent] - if true, suppress stderr notice on change + * @returns {{ path: string, changed: boolean, missing: string[], dryRun: boolean }} + */ +export function ensureOpencodeConfig(opts = {}) { + const dryRun = !!opts.dryRun; + const { path: p, exists, data } = readOpencodeConfig(); + const missing = missingPermissions(data); + + if (missing.length === 0 && exists) { + return { path: p, changed: false, missing: [], dryRun }; + } + + if (dryRun) { + return { path: p, changed: false, missing, dryRun }; + } + + // Merge permissions, preserving other keys. + const merged = { ...data }; + if (!merged.$schema) merged.$schema = "https://opencode.ai/config.json"; + merged.permission = { ...(merged.permission || {}), ...REQUIRED_PERMISSIONS }; + + ensureDir(path.dirname(p)); + const tmp = `${p}.tmp.${process.pid}`; + fs.writeFileSync(tmp, JSON.stringify(merged, null, 2) + "\n", "utf8"); + fs.renameSync(tmp, p); + + if (!opts.silent) { + process.stderr.write(`[opencode-companion] Ensured opencode.json permissions (headless-safe)\n`); + } + return { path: p, changed: true, missing, dryRun }; +} diff --git a/plugins/opencode/scripts/lib/opencode-server.mjs b/plugins/opencode/scripts/lib/opencode-server.mjs index 32540a2..67b4730 100644 --- a/plugins/opencode/scripts/lib/opencode-server.mjs +++ b/plugins/opencode/scripts/lib/opencode-server.mjs @@ -8,6 +8,8 @@ import { spawn, spawnSync } from "node:child_process"; // in auto-heal.mjs because it is tightly coupled to heal-decision logic, but // conceptually it is a server probe. export { probeSessionTerminal } from "./auto-heal.mjs"; +import { ensureOpencodeConfig } from "./opencode-config.mjs"; +import { classifyError } from "./errors.mjs"; const DEFAULT_PORT = 4096; const DEFAULT_HOST = "127.0.0.1"; @@ -114,6 +116,14 @@ export async function ensureServer(opts = {}) { return { url, alreadyRunning: true }; } + // Self-heal permissions BEFORE spawning the server. The running daemon reads + // opencode.json at startup; fixing it after the spawn would require a restart. + try { + ensureOpencodeConfig(); + } catch (err) { + process.stderr.write(`[opencode-companion] ensureOpencodeConfig failed: ${err.message}\n`); + } + // Start the server const proc = spawn("opencode", ["serve", "--port", String(port)], { stdio: ["ignore", "pipe", "pipe"], @@ -155,15 +165,24 @@ export function createClient(baseUrl, opts = {}) { } async function request(method, path, body) { - const res = await fetch(`${baseUrl}${path}`, { - method, - headers, - body: body != null ? JSON.stringify(body) : undefined, - signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), - }); + const startedAt = Date.now(); + let res; + try { + res = await fetch(`${baseUrl}${path}`, { + method, + headers, + body: body != null ? JSON.stringify(body) : undefined, + signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), + }); + } catch (err) { + throw classifyError(err, { baseUrl, startedAt, timeoutMs: REQUEST_TIMEOUT_MS, op: `request ${method} ${path}` }); + } if (!res.ok) { const text = await res.text().catch(() => ""); - throw new Error(`OpenCode API ${method} ${path} returned ${res.status}: ${text}`); + throw classifyError( + new Error(`OpenCode API ${method} ${path} returned ${res.status}: ${text}`), + { baseUrl, startedAt, timeoutMs: REQUEST_TIMEOUT_MS, op: `request ${method} ${path}` }, + ); } const ct = res.headers.get("content-type") ?? ""; if (ct.includes("application/json")) { @@ -394,7 +413,13 @@ export function createClient(baseUrl, opts = {}) { if (second.ok) return second.data; // Both failed — surface the more informative error. Prefer the // fetch error because it usually has the HTTP status/body. - throw first.via === "fetch" ? first.err : second.err; + const rawErr = first.via === "fetch" ? first.err : second.err; + throw classifyError(rawErr, { + baseUrl, + startedAt, + timeoutMs: PROMPT_TIMEOUT_MS, + op: "sendPrompt", + }); } finally { clearTimeout(timeoutId); } From 08ae72bb171a1603d5901072b70c628504b00225 Mon Sep 17 00:00:00 2001 From: suharvest Date: Sat, 18 Apr 2026 14:54:03 +0800 Subject: [PATCH 18/22] feat(companion): add doctor + config onboarding subcommands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New subcommands for first-run self-service: - doctor [--fix] [--json] [--verbose] — 8 health checks covering opencode binary, config permissions, server reachability, plugin data dir resolution, stuck jobs, disk. With --fix, writes missing opencode.json permissions and runs autoHealJobs. Exits 1 on failures for CI use. - config [--json] — dumps all recognized env vars with current values and source (default | env), plus resolved state dir, opencode config path, and server reachability. Makes hidden defaults discoverable. Adds getSessionLastActivity() to auto-heal.mjs (used in follow-up commit for status breadcrumbs). Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/opencode/scripts/lib/auto-heal.mjs | 60 +++++ .../opencode/scripts/opencode-companion.mjs | 224 +++++++++++++++++- 2 files changed, 283 insertions(+), 1 deletion(-) diff --git a/plugins/opencode/scripts/lib/auto-heal.mjs b/plugins/opencode/scripts/lib/auto-heal.mjs index ea4ac2d..88e5d4d 100644 --- a/plugins/opencode/scripts/lib/auto-heal.mjs +++ b/plugins/opencode/scripts/lib/auto-heal.mjs @@ -132,6 +132,66 @@ export async function probeSessionTerminal(baseUrl, sessionId, startedAtMs, head } } +/** + * Fetch the session's last activity summary for display in `status` output. + * Never throws — returns null on any failure (unreachable, bad response). + * + * Return shape: + * null — server unreachable / no info + * { kind:"tool", tool, command, ageSec } — last part is a tool call + * { kind:"text", text, ageSec } — last part is text + * { kind:"none", ageSec } — session has info but empty parts + * + * @param {string} baseUrl + * @param {string} sessionId + * @returns {Promise} + */ +export async function getSessionLastActivity(baseUrl, sessionId) { + if (!sessionId) return null; + const h = buildHeaders(); + try { + const res = await fetch(`${baseUrl}/session/${sessionId}/message?limit=1`, { + method: "GET", + headers: h, + signal: AbortSignal.timeout(5_000), + }); + if (!res.ok) return null; + const arr = await res.json(); + const last = Array.isArray(arr) ? arr[arr.length - 1] : null; + if (!last) return null; + const info = last.info; + const parts = Array.isArray(last.parts) ? last.parts : []; + + const updatedMs = Math.max( + typeof info?.time?.completed === "number" ? info.time.completed : 0, + typeof info?.time?.created === "number" ? info.time.created : 0, + ); + const ageSec = updatedMs ? Math.max(0, Math.floor((Date.now() - updatedMs) / 1000)) : null; + + // Walk backwards to find the most recent meaningful part. + for (let i = parts.length - 1; i >= 0; i--) { + const p = parts[i]; + if (!p) continue; + if (p.type === "tool") { + const tool = p.tool || "tool"; + let command = ""; + const input = p.state?.input || p.input || {}; + if (tool === "bash") command = String(input.command || input.cmd || ""); + else if (tool === "edit" || tool === "write") command = String(input.filePath || input.file_path || input.path || ""); + else if (tool === "read") command = String(input.filePath || input.file_path || input.path || ""); + else command = JSON.stringify(input).slice(0, 120); + return { kind: "tool", tool, command: command.slice(0, 80), ageSec }; + } + if (p.type === "text" && typeof p.text === "string" && p.text.trim()) { + return { kind: "text", text: p.text.trim().slice(0, 80), ageSec }; + } + } + return { kind: "none", ageSec }; + } catch { + return null; + } +} + /** * Parse an ISO-ish timestamp that might be a number or string. Returns epoch ms, or 0. */ diff --git a/plugins/opencode/scripts/opencode-companion.mjs b/plugins/opencode/scripts/opencode-companion.mjs index 93bed3b..4be681c 100644 --- a/plugins/opencode/scripts/opencode-companion.mjs +++ b/plugins/opencode/scripts/opencode-companion.mjs @@ -19,7 +19,10 @@ import { renderStatus, renderResult, renderReview, renderSetup } from "./lib/ren import { buildReviewPrompt, buildTaskPrompt } from "./lib/prompts.mjs"; import { getDiff, getStatus as getGitStatus } from "./lib/git.mjs"; import { readJson } from "./lib/fs.mjs"; -import { autoHealJob, autoHealJobs } from "./lib/auto-heal.mjs"; +import { autoHealJob, autoHealJobs, getSessionLastActivity } from "./lib/auto-heal.mjs"; +import { ensureOpencodeConfig, readOpencodeConfig, missingPermissions, resolveConfigPath } from "./lib/opencode-config.mjs"; +import { stateRoot } from "./lib/state.mjs"; +import { runCommand } from "./lib/process.mjs"; const PLUGIN_ROOT = process.env.CLAUDE_PLUGIN_ROOT || path.resolve(import.meta.dirname, ".."); @@ -40,6 +43,8 @@ const handlers = { result: handleResult, cancel: handleCancel, heal: handleHeal, + doctor: handleDoctor, + config: handleConfig, }; const handler = handlers[subcommand]; @@ -692,6 +697,223 @@ function extractResponseText(response) { return JSON.stringify(response, null, 2); } +// ------------------------------------------------------------------ +// Doctor (onboarding self-test + optional auto-repair) +// ------------------------------------------------------------------ + +async function handleDoctor(argv) { + const { options } = parseArgs(argv ?? [], { + booleanOptions: ["json", "fix", "verbose"], + }); + const fix = !!options.fix; + const wantJson = !!options.json; + const verbose = !!options.verbose; + const IS_WINDOWS = process.platform === "win32"; + + const checks = []; + const push = (name, status, detail, hint) => checks.push({ name, status, detail, hint }); + + // 1. opencode binary in PATH + const which = await runCommand("which", ["opencode"]).catch(() => ({ exitCode: 1, stdout: "" })); + if (which.exitCode === 0 && which.stdout.trim()) { + push("opencode-binary", "PASS", which.stdout.trim(), null); + } else { + push("opencode-binary", "FAIL", "not in PATH", + "Install: npm i -g opencode-ai OR brew install opencode"); + } + + // 2. opencode version + const ver = await runCommand("opencode", ["--version"]).catch(() => ({ exitCode: 1, stdout: "" })); + if (ver.exitCode === 0) { + push("opencode-version", "PASS", ver.stdout.trim() || "(unknown)", null); + } else { + push("opencode-version", "WARN", "could not resolve version", null); + } + + // 3. opencode.json permissions (HEADLESS-SAFE — biggest footgun) + const cfg = readOpencodeConfig(); + const missing = missingPermissions(cfg.data); + if (cfg.exists && missing.length === 0) { + push("opencode-config", "PASS", `${cfg.path} (all permissions allow)`, null); + } else { + const detail = cfg.exists + ? `${cfg.path} — missing: ${missing.join(", ")}` + : `${cfg.path} — file missing`; + if (fix) { + const r = ensureOpencodeConfig({ silent: true }); + push("opencode-config", r.changed ? "PASS" : "WARN", + r.changed ? `fixed: ${r.path}` : detail, null); + } else { + push("opencode-config", "FAIL", detail, + "Run with --fix (or set: permission.{bash,edit,webfetch,external_directory} = \"allow\")"); + } + } + + // 4. server reachable + const serverUrl = "http://127.0.0.1:4096"; + let reachable = false; + try { + const r = await fetch(`${serverUrl}/global/health`, { signal: AbortSignal.timeout(2000) }); + reachable = r.ok; + } catch { + reachable = false; + } + if (reachable) { + push("opencode-server", "PASS", `${serverUrl} reachable`, null); + } else { + push("opencode-server", "WARN", `${serverUrl} not reachable`, + "Start it: opencode serve --port 4096 &"); + } + + // 5. CLAUDE_PLUGIN_DATA sanity check + const envData = process.env.CLAUDE_PLUGIN_DATA; + if (envData && !/opencode/i.test(path.basename(envData))) { + push("plugin-data-env", "WARN", + `CLAUDE_PLUGIN_DATA=${envData} — basename lacks "opencode"`, + "State will self-derive from script path; env is ignored to avoid cross-plugin leak."); + } else { + push("plugin-data-env", "PASS", envData ? envData : "(unset — self-derived)", null); + } + + // 6. resolved state dir + const workspace = await resolveWorkspace(); + const sRoot = stateRoot(workspace); + push("state-dir", "PASS", sRoot, null); + + // 7. stuck jobs for this workspace + const state = loadState(workspace); + const sessionId = getClaudeSessionId(); + const jobs = (state.jobs ?? []).filter((j) => !sessionId || j.sessionId === sessionId); + const healable = jobs.filter( + (j) => j.opencodeSessionId && + ["starting", "investigating", "running", "finalizing"].includes(j.status), + ); + if (healable.length === 0) { + push("stuck-jobs", "PASS", "none", null); + } else if (fix) { + const { actions } = await autoHealJobs(workspace, healable); + push("stuck-jobs", "PASS", `healed ${actions.length}/${healable.length}`, null); + } else { + push("stuck-jobs", "WARN", `${healable.length} in non-terminal state`, + "Run: companion heal (or companion doctor --fix)"); + } + + // 8. disk free on state-dir parent + if (IS_WINDOWS) { + push("disk-free", "PASS", "N/A (Windows)", null); + } else { + // Walk up to the first existing ancestor — stateRoot may not exist yet. + let probe = sRoot; + while (probe && probe !== "/" && !fs.existsSync(probe)) probe = path.dirname(probe); + const df = await runCommand("df", ["-h", probe]).catch(() => ({ exitCode: 1, stdout: "" })); + const lines = (df.stdout || "").split("\n").filter(Boolean); + const last = lines[lines.length - 1] || ""; + if (last && last !== lines[0]) { + push("disk-free", "PASS", last.split(/\s+/).slice(0, 5).join(" "), null); + } else { + push("disk-free", "WARN", "df unavailable", null); + } + } + + // Summary + const nFail = checks.filter((c) => c.status === "FAIL").length; + const nWarn = checks.filter((c) => c.status === "WARN").length; + const summary = nFail + nWarn === 0 + ? "All good" + : `${nWarn} warnings, ${nFail} failures${!fix ? " — run with --fix to repair" : ""}`; + + if (wantJson) { + console.log(JSON.stringify({ + summary: { failures: nFail, warnings: nWarn, fix }, + checks, + workspace, + stateRoot: sRoot, + }, null, 2)); + return; + } + + // Compact text output — ~1 line per check + for (const c of checks) { + const tag = c.status === "PASS" ? "PASS" : c.status === "WARN" ? "WARN" : "FAIL"; + console.log(`[${tag}] ${c.name} — ${c.detail}`); + if (verbose && c.hint) console.log(` ${c.hint}`); + else if (c.status !== "PASS" && c.hint) console.log(` ${c.hint}`); + } + console.log(`\n${nFail + nWarn === 0 ? "OK" : "!! "} ${summary}`); + if (nFail > 0 && !fix) process.exit(1); +} + +// ------------------------------------------------------------------ +// Config (resolved settings dump — easier onboarding than reading source) +// ------------------------------------------------------------------ + +async function handleConfig(argv) { + const { options } = parseArgs(argv ?? [], { booleanOptions: ["json"] }); + const wantJson = !!options.json; + + const envSpec = [ + ["OPENCODE_REQUEST_TIMEOUT_MS", "1800000", "Per-HTTP-request abort timeout"], + ["OPENCODE_PROMPT_TIMEOUT_MS", "14400000", "sendPrompt absolute cap (race against server 5min body-close)"], + ["OPENCODE_IDLE_TIMEOUT_MS", "900000", "Session idle watchdog (no activity → abort)"], + ["OPENCODE_PGREP_MISS_THRESHOLD","3", "Consecutive pgrep-misses before declaring bash tool stuck"], + ["OPENCODE_COMPLETION_POLL_MS", "5000", "Watcher poll interval during sendPrompt"], + ["OPENCODE_MONITOR_RESULT_CHARS","(hook)", "Monitor hook: max chars per tool-result snippet"], + ["OPENCODE_MONITOR_HEARTBEAT_POLLS","(hook)", "Monitor hook: polls between heartbeat pings"], + ["OPENCODE_COMPANION_DATA", "(self-derived)", "Override for plugin data dir"], + ["OPENCODE_SERVER_PASSWORD", "(unset)", "HTTP Basic auth password"], + ["OPENCODE_SERVER_USERNAME", "opencode", "HTTP Basic auth username"], + ]; + const envRows = envSpec.map(([name, dflt, desc]) => { + const v = process.env[name]; + return { + name, + value: v != null ? v : dflt, + source: v != null ? "env" : (dflt.startsWith("(") ? "default" : "default"), + description: desc, + }; + }); + + const workspace = await resolveWorkspace(); + const sRoot = stateRoot(workspace); + const cfg = readOpencodeConfig(); + const missing = missingPermissions(cfg.data); + const serverUrl = "http://127.0.0.1:4096"; + let serverReachable = false; + try { + const r = await fetch(`${serverUrl}/global/health`, { signal: AbortSignal.timeout(2000) }); + serverReachable = r.ok; + } catch {} + + const out = { + env: envRows, + workspace, + stateRoot: sRoot, + opencodeConfig: { + path: cfg.path, + exists: cfg.exists, + permissionsOk: missing.length === 0, + missing, + }, + server: { url: serverUrl, reachable: serverReachable }, + }; + + if (wantJson) { + console.log(JSON.stringify(out, null, 2)); + return; + } + + console.log("## OpenCode Companion Config\n"); + console.log(`- Workspace: ${workspace}`); + console.log(`- State dir: ${sRoot}`); + console.log(`- Config file: ${cfg.path} (${cfg.exists ? "exists" : "missing"}${missing.length ? ", missing: " + missing.join(",") : ", permissions OK"})`); + console.log(`- Server: ${serverUrl} (${serverReachable ? "reachable" : "unreachable"})`); + console.log("\n### Environment variables\n"); + for (const r of envRows) { + const src = r.source === "env" ? "env" : "default"; + console.log(`- ${r.name} = ${r.value} [${src}] — ${r.description}`); + } +} + /** * Try to parse a string as JSON, returning null on failure. * @param {string} text From b124d3de52f26b9a2391d4a037f8544c576011bb Mon Sep 17 00:00:00 2001 From: suharvest Date: Sat, 18 Apr 2026 14:54:22 +0800 Subject: [PATCH 19/22] feat(status,docs): progress breadcrumbs + README quickstart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit handleStatus now enriches each running job with its opencode session's last activity (tool + command head + age, or text snippet) via getSessionLastActivity. render.mjs displays breadcrumb under the job line when present. Makes "investigating" actionable — users see "bash: docker exec speech ... (3s ago)" instead of a static phase label. README gets a Quickstart pointing at 'doctor' as the first thing to run, an Environment Variables table documenting all OPENCODE_* knobs with defaults, and a Pitfalls section for the two recurring surprises (stale investigating → heal; headless bash hang → doctor --fix). Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 45 +++++++++++++++++++ plugins/opencode/scripts/lib/render.mjs | 5 ++- .../opencode/scripts/opencode-companion.mjs | 20 +++++++++ 3 files changed, 68 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a18b686..1e2a69b 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,28 @@ Use OpenCode from inside Claude Code for code reviews or to delegate tasks. This plugin is for Claude Code users who want an easy way to start using OpenCode from the workflow they already have. +## Quickstart + +```bash +# 1. Install opencode (once) +npm i -g opencode-ai # or: brew install opencode + +# 2. Install the plugin (see Install section below) + +# 3. Run the self-test — fixes common footguns for you +node ~/.claude/plugins/cache/tasict-opencode-plugin-cc/opencode/1.0.0/scripts/opencode-companion.mjs doctor --fix +``` + +Then delegate a task from Claude Code: + +``` +/opencode:rescue grep for XXX in src/ and summarize +``` + +`doctor --fix` writes the correct `~/.config/opencode/opencode.json` permissions so the +bash tool does not hang in headless mode (sst/opencode#14473). This is the single biggest +footgun for newcomers — `ensureServer` will also run this fix automatically on first use. + ## What You Get - `/opencode:review` for a normal read-only OpenCode review @@ -128,6 +150,29 @@ If the OpenCode server is unreachable, auto-heal is a no-op — status/result commands still work, they just can't move stuck jobs forward until the server comes back. +## Environment variables + +| Variable | Default | Purpose | +|---|---|---| +| `OPENCODE_REQUEST_TIMEOUT_MS` | `1800000` | Per-HTTP-request abort timeout | +| `OPENCODE_PROMPT_TIMEOUT_MS` | `14400000` | `sendPrompt` absolute cap (races the 5-min server body-close) | +| `OPENCODE_IDLE_TIMEOUT_MS` | `900000` | Session idle watchdog — no activity for this long → abort | +| `OPENCODE_PGREP_MISS_THRESHOLD` | `3` | Consecutive pgrep-misses before declaring a stuck bash tool | +| `OPENCODE_COMPLETION_POLL_MS` | `5000` | Watcher poll interval during `sendPrompt` | +| `OPENCODE_COMPANION_DATA` | (self-derived) | Override for plugin data dir (otherwise derived from script path) | +| `OPENCODE_MONITOR_RESULT_CHARS` | (hook default) | Monitor hook: max chars per tool-result snippet | +| `OPENCODE_MONITOR_HEARTBEAT_POLLS` | (hook default) | Monitor hook: polls between heartbeats | +| `OPENCODE_SERVER_PASSWORD` | (unset) | HTTP Basic auth password for `opencode serve` | +| `OPENCODE_SERVER_USERNAME` | `opencode` | HTTP Basic auth username | + +Run `companion.mjs config` to see resolved values with source (env vs default). + +## Pitfalls + +- **`companion status` stuck on `investigating`** — run `companion heal` (or wait; `status`/`result` auto-heal on every call). +- **Bash tool hangs for minutes** — run `companion doctor --fix` to merge the required `permission.*=allow` keys into `~/.config/opencode/opencode.json`. This is sst/opencode#14473 in headless mode. +- **`CLAUDE_PLUGIN_DATA` points at another plugin** — harmless: the companion self-derives its own data dir from `import.meta.url`. `doctor` will print a WARN so you know. + ## Troubleshooting
diff --git a/plugins/opencode/scripts/lib/render.mjs b/plugins/opencode/scripts/lib/render.mjs index b91b3a3..edbbdd3 100644 --- a/plugins/opencode/scripts/lib/render.mjs +++ b/plugins/opencode/scripts/lib/render.mjs @@ -11,8 +11,9 @@ export function renderStatus(snapshot) { if (snapshot.running.length > 0) { lines.push("## Running Jobs\n"); for (const job of snapshot.running) { - lines.push(`- **${job.id}** (${job.type}) — ${job.phase ?? "running"} — ${job.elapsed ?? "just started"}`); - if (job.progressPreview) { + const phase = job.breadcrumb || job.phase || "running"; + lines.push(`- **${job.id}** (${job.type}) — ${phase} — ${job.elapsed ?? "just started"}`); + if (!job.breadcrumb && job.progressPreview) { lines.push(` > ${job.progressPreview.split("\n").join("\n > ")}`); } } diff --git a/plugins/opencode/scripts/opencode-companion.mjs b/plugins/opencode/scripts/opencode-companion.mjs index 4be681c..d05a946 100644 --- a/plugins/opencode/scripts/opencode-companion.mjs +++ b/plugins/opencode/scripts/opencode-companion.mjs @@ -510,6 +510,26 @@ async function handleStatus(argv) { const snapshot = buildStatusSnapshot(jobs, workspace, { sessionId: sessionFilter }); + // Enrich running jobs with a live breadcrumb from the opencode session — + // gives newcomers a human-legible "running — bash: docker exec ..." line + // instead of a stale "investigating" phase from state.json. Runs in parallel + // and gracefully falls back if the server is unreachable. + if (snapshot.running.length > 0) { + const baseUrl = "http://127.0.0.1:4096"; + await Promise.all(snapshot.running.map(async (job) => { + if (!job.opencodeSessionId) return; + const act = await getSessionLastActivity(baseUrl, job.opencodeSessionId); + if (!act) return; + const age = act.ageSec != null ? `${act.ageSec}s ago` : ""; + if (act.kind === "tool") { + const head = act.command ? `: ${act.command}` : ""; + job.breadcrumb = `running — ${act.tool}${head}${age ? ` (${age})` : ""}`.trim(); + } else if (act.kind === "text") { + job.breadcrumb = `running — "${act.text}"${age ? ` (${age})` : ""}`; + } + })); + } + if (wantJson) { // Machine-readable shape mirrors the single-task case so callers can treat // both uniformly: a `.job` field is present for single-task, otherwise From 3915b8371ab7fec7988dbf1484bf21d2abecd5b1 Mon Sep 17 00:00:00 2001 From: suharvest Date: Sun, 19 Apr 2026 10:20:27 +0800 Subject: [PATCH 20/22] feat(monitor): detect vague Agent-result notifications and inject poll guidance Rescue subagents occasionally return placeholder strings ("Monitor started", "Waiting for completion", "Task forwarded in background") instead of the companion's rendered terminal report. Add a PostToolUse hook that spots those patterns, surfaces any task ids seen, and tells the main thread to poll companion status/result rather than treat the vague output as final. Updates opencode-rescue agent prompt and the result-handling/runtime skills to document the dispatch-and-poll contract. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/opencode/agents/opencode-rescue.md | 79 ++++++--- plugins/opencode/hooks/hooks.json | 10 ++ .../post-tool-use-vague-notification-hook.mjs | 150 ++++++++++++++++++ .../skills/opencode-result-handling/SKILL.md | 62 +++++++- .../opencode/skills/opencode-runtime/SKILL.md | 27 +++- 5 files changed, 297 insertions(+), 31 deletions(-) create mode 100644 plugins/opencode/scripts/post-tool-use-vague-notification-hook.mjs diff --git a/plugins/opencode/agents/opencode-rescue.md b/plugins/opencode/agents/opencode-rescue.md index dd50e66..9e17321 100644 --- a/plugins/opencode/agents/opencode-rescue.md +++ b/plugins/opencode/agents/opencode-rescue.md @@ -5,39 +5,74 @@ tools: Bash skills: - opencode-runtime - opencode-prompting + - opencode-result-handling --- You are a thin forwarding wrapper around the OpenCode companion task runtime. -Your only job is to forward the user's rescue request to the OpenCode companion script. Do not do anything else. +Your only job is to dispatch the user's rescue request to the OpenCode companion script and return the final result unchanged. Do not do anything else. Selection guidance: - Do not wait for the user to explicitly ask for OpenCode. Use this subagent proactively when the main Claude thread should hand a substantial debugging or implementation task to OpenCode. - Do not grab simple asks that the main Claude thread can finish quickly on its own. -Forwarding rules: - -- Use exactly one `Bash` call to invoke `node "${CLAUDE_PLUGIN_ROOT}/scripts/opencode-companion.mjs" task ...`. -- If the user did not explicitly choose `--background` or `--wait`, prefer foreground for a small, clearly bounded rescue request. -- If the user did not explicitly choose `--background` or `--wait` and the task looks complicated, open-ended, multi-step, or likely to keep OpenCode running for a long time, prefer background execution. -- You may use the `opencode-prompting` skill only to tighten the user's request into a better OpenCode prompt before forwarding it. -- Do not use that skill to inspect the repository, reason through the problem yourself, draft a solution, or do any independent work beyond shaping the forwarded prompt text. -- Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own. -- Do not call `review`, `adversarial-review`, `status`, `result`, or `cancel`. This subagent only forwards to `task`. -- Leave `--agent` unset unless the user explicitly requests a specific agent (build or plan). -- Leave model unset by default. Only add `--model` when the user explicitly asks for a specific model. -- Treat `--agent ` and `--model ` as runtime controls and do not include them in the task text you pass through. -- Default to a write-capable OpenCode run by adding `--write` unless the user explicitly asks for read-only behavior or only wants review, diagnosis, or research without edits. -- Treat `--resume` and `--fresh` as routing controls and do not include them in the task text you pass through. -- `--resume` means add `--resume-last`. -- `--fresh` means do not add `--resume-last`. -- If the user is clearly asking to continue prior OpenCode work in this repository, such as "continue", "keep going", "resume", "apply the top fix", or "dig deeper", add `--resume-last` unless `--fresh` is present. -- Otherwise forward the task as a fresh `task` run. +Dispatch rules (default — prefer this): + +Use the **dispatch-and-poll loop** for every request by default. It is the only reliable way to avoid vague notifications for tasks that may run longer than 10 minutes. + +1. First `Bash` call — kick off the task in background mode so it does not block the shell, then immediately grep the task-id from its stdout: + + ``` + node "${CLAUDE_PLUGIN_ROOT}/scripts/opencode-companion.mjs" task --background --write "" 2>&1 | tee /tmp/_oc_task_out && \ + grep -oE 'task-[a-z0-9]{6,}-[a-z0-9]{4,}' /tmp/_oc_task_out | head -1 + ``` + + (Include `--resume-last` instead of `--fresh` when the user said `--resume` — see Command selection below.) + +2. Subsequent `Bash` calls — poll companion status every 30s. Each iteration is a separate short `Bash` call that fits well within the Bash tool timeout: + + ``` + sleep 30 && node "${CLAUDE_PLUGIN_ROOT}/scripts/opencode-companion.mjs" status --json | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{const j=JSON.parse(s).job||{};process.stdout.write(j.status+"|"+j.phase+"|"+j.elapsed)}catch(e){process.stdout.write("parse-err||")}})' + ``` + + Parse the `status` field. If it is `running`, continue looping. If it is `completed`, `failed`, or `cancelled`, stop polling and go to step 3. + +3. Final `Bash` call — fetch the full result: + + ``` + node "${CLAUDE_PLUGIN_ROOT}/scripts/opencode-companion.mjs" result + ``` + + Return that stdout **exactly as-is**. No commentary, no summary, no reformatting. + +Why a loop instead of `tail -f`: a single `tail -f` attached to the companion log file hits the Bash tool timeout (10 min) and returns a vague fallback string ("Waiting for the forwarded task to complete.", "The poll loop is still running in background."). That fallback has no real result for the parent thread to act on. The status-poll loop guarantees the final `Bash` call emits either the full completion result or a clean terminal status ready to be reported. + +Safety net — vague-result prevention: + +- If for any reason your final returned text does **not** include the companion's rendered terminal report (look for the `## Job:` header and the `### Output` section emitted by `companion result`), treat that as a failure to dispatch. Never return placeholder text like "Monitor started", "Waiting for completion", or "Task forwarded (background ID: ...)" as your final answer. +- If the dispatch-and-poll loop failed partway (e.g. Bash errored, task-id could not be extracted, network blip), your final output should be a single line: `ERROR: companion dispatch failed ()`. The main thread will inspect and retry. + +Command selection: + +- Use exactly one `task` invocation per rescue handoff (followed by poll and result calls). +- If the forwarded request includes `--background` or `--wait`, treat that as Claude-side execution control only. Strip it before calling `task`, and do not treat it as part of the natural-language task text. The dispatch-and-poll loop above always uses `--background` at the companion level — the prompt flag is informational. +- If the forwarded request includes `--model`, pass it through to `task`. +- If the forwarded request includes `--agent`, pass it through to `task`. +- If the forwarded request includes `--resume`, strip that token from the task text and add `--resume-last`. +- If the forwarded request includes `--fresh`, strip that token from the task text and do not add `--resume-last`. +- `--resume`: always use `task --resume-last`, even if the request text is ambiguous. +- `--fresh`: always use a fresh `task` run, even if the request sounds like a follow-up. + +Safety rules: + +- Default to write-capable OpenCode work in `opencode:opencode-rescue` unless the user explicitly asks for read-only behavior. - Preserve the user's task text as-is apart from stripping routing flags. -- Return the stdout of the `opencode-companion` command exactly as-is. -- If the Bash call fails or OpenCode cannot be invoked, return nothing. +- Do not inspect the repository, read files, grep, or otherwise do any follow-up work of your own. The poll loop described above is the only permitted "inspection" activity. +- Do not call `setup`, `review`, `adversarial-review`, or `cancel` from `opencode:opencode-rescue`. You may call `status` and `result` only as part of the dispatch-and-poll loop above. +- Return the stdout of the final `result` command exactly as-is. +- If the Bash calls fail or OpenCode cannot be invoked, return `ERROR: companion dispatch failed ()`. Response style: -- Do not add commentary before or after the forwarded `opencode-companion` output. +- Do not add commentary before or after the companion's final result block. diff --git a/plugins/opencode/hooks/hooks.json b/plugins/opencode/hooks/hooks.json index 8c193dd..63baa58 100644 --- a/plugins/opencode/hooks/hooks.json +++ b/plugins/opencode/hooks/hooks.json @@ -44,6 +44,16 @@ "timeout": 5 } ] + }, + { + "matcher": "Agent", + "hooks": [ + { + "type": "command", + "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/post-tool-use-vague-notification-hook.mjs\"", + "timeout": 5 + } + ] } ] } diff --git a/plugins/opencode/scripts/post-tool-use-vague-notification-hook.mjs b/plugins/opencode/scripts/post-tool-use-vague-notification-hook.mjs new file mode 100644 index 0000000..3286bdd --- /dev/null +++ b/plugins/opencode/scripts/post-tool-use-vague-notification-hook.mjs @@ -0,0 +1,150 @@ +#!/usr/bin/env node + +// PostToolUse hook: watches for Agent tool responses that look like a +// "vague" rescue-task notification — i.e. the rescue subagent returned a +// placeholder string (Monitor started / Waiting for completion / Task +// forwarded in background) instead of the companion's rendered terminal +// report (`## Job: task-xxx` + `### Output`). +// +// When detected, inject an additionalContext reminder telling Claude to +// poll companion status / session messages for the actual terminal +// result, rather than treating the vague result as final. +// +// Why in a hook: even with the updated opencode-rescue.md dispatch-and- +// poll rules, edge cases (network blips, unexpected subagent early exit, +// older cached subagent prompt) can still surface placeholder text. This +// hook is a belt-and-suspenders safety net for the main thread so it +// never silently accepts a vague "completion" as real. + +import fs from "node:fs"; +import path from "node:path"; +import process from "node:process"; +import { fileURLToPath } from "node:url"; + +function readHookInput() { + try { + const raw = fs.readFileSync(0, "utf8").trim(); + if (!raw) return {}; + return JSON.parse(raw); + } catch { + return {}; + } +} + +function extractResponseText(response) { + if (response == null) return ""; + if (typeof response === "string") return response; + if (typeof response === "object") { + if (typeof response.result === "string") return response.result; + if (typeof response.content === "string") return response.content; + return JSON.stringify(response); + } + return String(response); +} + +// Heuristics — any one of these patterns anywhere in the response text +// marks it as a vague placeholder. These are phrases the rescue subagent +// (or its older cached variants) emits when it gives up on waiting. +const VAGUE_PATTERNS = [ + /Monitor started for (?:both |all )?rescue tasks?/i, + /Waiting for (?:the forwarded task|completion|the job)/i, + /poll loop is (?:still )?running in (?:the )?background/i, + /Task forwarded to OpenCode \(background ID/i, + /I'll (?:wait for|surface results when)/i, + /will surface results when (?:they|it) reach/i, +]; + +// A response with the real companion terminal report contains these +// markers (from `companion result`). Presence of any of them means the +// result is real and we should NOT inject the reminder. +const REAL_RESULT_MARKERS = [ + /## Job: task-[a-z0-9-]+/i, + /### Output/, + /\bStatus:\s*(?:completed|failed|cancelled)/i, +]; + +// Only fire for responses that are clearly from the opencode companion +// path (avoid false positives on arbitrary Agent output). +const OPENCODE_MARKERS = [ + /OpenCode task (?:started|forwarded|completed)/i, + /opencode-companion\.mjs/, + /opencode:opencode-rescue/, + /opencode rescue/i, + /task-[a-z0-9]{6,}-[a-z0-9]{4,}/, +]; + +// Companion task ids look like `task-moNNNNNN-NNNNNN`. +const TASK_ID_RE = /\btask-[a-z0-9]{6,}-[a-z0-9]{4,}\b/g; + +function resolveCompanionPath() { + const here = fileURLToPath(import.meta.url); + return path.join(path.dirname(here), "opencode-companion.mjs"); +} + +function buildReminder(taskIds, companionPath) { + const idLine = taskIds.length + ? `Likely task id(s) seen in response: ${taskIds.join(", ")}.` + : "No task id was visible in the vague response — check most recent companion job with `node \"" + companionPath + "\" list` style introspection (or `ls -t /Users/harvest/.claude/plugins/data/opencode-tasict-*/state/*/jobs/*.log | head -3`)."; + return [ + "", + "The rescue subagent you just dispatched returned a placeholder string instead of the companion's rendered terminal report. Do NOT treat this as a completed task.", + "", + idLine, + "", + "Before your next response, verify the task actually reached terminal state:", + "", + "1. Status check:", + ` node "${companionPath}" status --json | node -e 'let s=\"\";process.stdin.on(\"data\",d=>s+=d).on(\"end\",()=>{try{const j=JSON.parse(s).job||{};process.stdout.write(j.status+\"|\"+j.phase+\"|\"+j.elapsed)}catch(e){process.stdout.write(\"parse-err\")}})'`, + "", + "2. If still `running`, inspect live tool-call trace (opencode-result-handling skill, Layer 2):", + " `curl -s http://localhost:4096/session//message | python3 ...`", + " `` is in the status JSON's `job.opencodeSessionId`.", + "", + "3. Once status is terminal (`completed` / `failed` / `cancelled`), fetch the real result:", + ` node "${companionPath}" result `, + "", + "Only after you have the companion's rendered terminal report (contains `## Job:` + `### Output` markers) should you report back to the user.", + "", + ].join("\n"); +} + +function main() { + const input = readHookInput(); + const toolName = input.tool_name || ""; + // Vague notifications come from the Agent wrapper's summary text. + if (toolName !== "Agent") return; + + const response = extractResponseText(input.tool_response); + if (!response) return; + + // Must smell like an opencode response at all. + if (!OPENCODE_MARKERS.some((r) => r.test(response))) return; + + // If it already has a real-result marker, do not fire. + if (REAL_RESULT_MARKERS.some((r) => r.test(response))) return; + + // Must match at least one vague pattern. + if (!VAGUE_PATTERNS.some((r) => r.test(response))) return; + + // Extract any task ids that might help the main thread poll. + const ids = [...new Set(response.match(TASK_ID_RE) || [])]; + + const companionPath = resolveCompanionPath(); + const additionalContext = buildReminder(ids, companionPath); + + process.stdout.write( + JSON.stringify({ + hookSpecificOutput: { + hookEventName: "PostToolUse", + additionalContext, + }, + }), + ); +} + +try { + main(); +} catch { + // Best-effort — never block tool use on hook failure. + process.exit(0); +} diff --git a/plugins/opencode/skills/opencode-result-handling/SKILL.md b/plugins/opencode/skills/opencode-result-handling/SKILL.md index ff1e42e..8db69bb 100644 --- a/plugins/opencode/skills/opencode-result-handling/SKILL.md +++ b/plugins/opencode/skills/opencode-result-handling/SKILL.md @@ -1,6 +1,6 @@ --- name: opencode-result-handling -description: Guidance for interpreting and presenting OpenCode task results +description: Guidance for interpreting and presenting OpenCode task results, plus how to poll live progress of a running background task user-invocable: false --- @@ -27,3 +27,63 @@ When displaying results from `/opencode:result`: OpenCode sessions can be resumed by sending additional messages to the same session. The `--resume-last` flag in the companion script handles this by reusing the last session ID from the current workspace state. + +## Inspecting Live Progress (while a task is still running) + +A dispatched opencode task produces live progress in several layers — use the appropriate tool for the granularity you need. + +### Layer 1: Companion phase (coarse, whole-task) + +Phase-level signals like `starting → investigating → running → completed/failed`. Useful to confirm the task is alive, not the specific work. + +```bash +node "${CLAUDE_PLUGIN_ROOT}/scripts/opencode-companion.mjs" status --json +``` + +Returns JSON with `job.status`, `job.phase`, `job.elapsed`, `job.opencodeSessionId`, and a `progressPreview` string (just phase-transition lines). + +### Layer 2: Full tool-call trace (fine, every action) + +OpenCode runs a local HTTP server on `http://localhost:4096` that exposes every message and tool call in every session. Hitting the messages endpoint gives you the **complete live trace of what opencode is doing** — bash commands it ran, file reads, edits, assistant reasoning text, tool results. This is the best signal for "what has opencode actually done so far." + +```bash +curl -s http://localhost:4096/session//message +``` + +Get `` from Layer 1's `job.opencodeSessionId` (format `ses_XXXXXXXXX...`). Returns a JSON array where each element has `.info.role` (`user` / `assistant`) and `.parts[]` with: +- `type: "text"` → `part.text` is the assistant's reasoning or commentary +- `type: "tool"` → `part.tool` is the tool name, `part.state.input` is the tool args, `part.state.status` is `pending` / `running` / `completed` / `error` + +Typical usage — tail the last N messages for a quick "what is it doing now": + +```bash +curl -s http://localhost:4096/session//message | python3 -c ' +import json, sys +msgs = json.load(sys.stdin) +for m in msgs[-10:]: + role = m.get("info", {}).get("role", "?") + for p in m.get("parts", []): + t = p.get("type") + if t == "text": + print(f" [{role}/text] {p.get(\"text\",\"\")[:200]}") + elif t == "tool": + st = p.get("state", {}) + inp = st.get("input", {}) or {} + cmd = inp.get("command") or inp.get("file_path") or inp.get("pattern") or "" + print(f" [{role}/tool/{p.get(\"tool\")}/{st.get(\"status\")}] {str(cmd)[:160]}") +' +``` + +### Layer 3: Bash wrapper output (when subagent tails companion --wait) + +When the rescue subagent runs `companion task --wait` via Bash `run_in_background=true`, the subagent's Bash tool emits a local_bash task-id (e.g. `buzkqvlq7`). Use `TaskOutput(task_id=, block=false)` to see the raw tail of the companion's stdout — this has phase lines, **not** the inner opencode session messages. Prefer Layer 2 for real content. + +### Which layer to use + +- "Is the task still alive / which phase?" → Layer 1 (companion status). +- "What has opencode actually been doing the last few minutes?" → Layer 2 (session messages via HTTP). +- "What did the subagent's shell emit?" → Layer 3 (TaskOutput on the bash-id). + +## When to Ask the User + +If Layer 2 shows the opencode task has been stuck on the same tool call for many minutes without progress, or is looping on the same error, surface that to the user — they can decide whether to cancel or let it continue. Do not silently wait through apparent deadlocks. diff --git a/plugins/opencode/skills/opencode-runtime/SKILL.md b/plugins/opencode/skills/opencode-runtime/SKILL.md index 7d99a5d..1833b59 100644 --- a/plugins/opencode/skills/opencode-runtime/SKILL.md +++ b/plugins/opencode/skills/opencode-runtime/SKILL.md @@ -11,19 +11,29 @@ Use this skill only inside the `opencode:opencode-rescue` subagent. Primary helper: - `node "${CLAUDE_PLUGIN_ROOT}/scripts/opencode-companion.mjs" task ""` +Default dispatch pattern — **dispatch-and-poll loop**: + +1. `task --background --write ""` → capture task-id (grep from stdout). +2. Multiple `status --json` polls (each a short Bash call, `sleep 30 && node ... status`). +3. Once `status` is terminal (`completed` / `failed` / `cancelled`), final `result ` → return that stdout unchanged. + +Do not use `tail -f` on the companion log file as a substitute for polling. The Bash tool caps at 10 minutes per call and the tail approach produces a vague fallback string when the task runs longer. The status-poll loop uses multiple short Bash calls that each fit well under the cap. + Execution rules: -- The rescue subagent is a forwarder, not an orchestrator. Its only job is to invoke `task` once and return that stdout unchanged. + +- The rescue subagent is a forwarder, not an orchestrator. Its only work is the dispatch-and-poll loop plus returning the final `result` stdout. - Prefer the helper over hand-rolled `git`, direct OpenCode CLI strings, or any other Bash activity. -- Do not call `setup`, `review`, `adversarial-review`, `status`, `result`, or `cancel` from `opencode:opencode-rescue`. +- Do not call `setup`, `review`, `adversarial-review`, or `cancel` from the subagent. `status` and `result` are permitted only as part of the poll loop. - Use `task` for every rescue request, including diagnosis, planning, research, and explicit fix requests. -- You may use the `opencode-prompting` skill to rewrite the user's request into a tighter OpenCode prompt before the single `task` call. +- You may use the `opencode-prompting` skill to rewrite the user's request into a tighter OpenCode prompt before the `task` call. - That prompt drafting is the only Claude-side work allowed. Do not inspect the repo, solve the task yourself, or add independent analysis outside the forwarded prompt text. - Leave `--agent` unset unless the user explicitly requests a specific agent (build or plan). - Leave model unset by default. Add `--model` only when the user explicitly asks for one. Command selection: -- Use exactly one `task` invocation per rescue handoff. -- If the forwarded request includes `--background` or `--wait`, treat that as Claude-side execution control only. Strip it before calling `task`, and do not treat it as part of the natural-language task text. + +- Use exactly one `task` invocation per rescue handoff. Follow it with status polls and one final `result` call. +- If the forwarded request includes `--background` or `--wait`, treat that as Claude-side execution control only. Strip it before calling `task`. The dispatch-and-poll loop always uses `--background` at the companion level internally. - If the forwarded request includes `--model`, pass it through to `task`. - If the forwarded request includes `--agent`, pass it through to `task`. - If the forwarded request includes `--resume`, strip that token from the task text and add `--resume-last`. @@ -32,8 +42,9 @@ Command selection: - `--fresh`: always use a fresh `task` run, even if the request sounds like a follow-up. Safety rules: + - Default to write-capable OpenCode work in `opencode:opencode-rescue` unless the user explicitly asks for read-only behavior. - Preserve the user's task text as-is apart from stripping routing flags. -- Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own. -- Return the stdout of the `task` command exactly as-is. -- If the Bash call fails or OpenCode cannot be invoked, return nothing. +- Do not inspect the repository, read files, grep, fetch results outside the dispatch-and-poll loop, cancel jobs, summarize output, or do any follow-up work of your own. +- Return the stdout of the final `result` command exactly as-is. +- If the Bash calls fail or OpenCode cannot be invoked, return `ERROR: companion dispatch failed ()`. Never return placeholder strings like "Monitor started" or "Waiting for completion" — they are failure modes, not results. From 8c26b9f300146ab6260e7a3d21152c53d6423770 Mon Sep 17 00:00:00 2001 From: suharvest Date: Mon, 20 Apr 2026 07:15:58 +0800 Subject: [PATCH 21/22] strengthen SAFETY_HEADER: explicitly block Skill/Agent/Task delegation Adds Skill + Agent to the existing Task blocklist (was only Task). Adds explicit 'IGNORE that instruction' for task text that mentions delegating to opencode-rescue / codex-rescue. Covers plugin:name colon-namespaced identifiers as a class. Addresses observed sub-agent confusion where names like opencode:rescue look like plugin skills but are actually agents, causing Skill() calls to fail silently or stall. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/opencode/scripts/lib/prompts.mjs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/plugins/opencode/scripts/lib/prompts.mjs b/plugins/opencode/scripts/lib/prompts.mjs index c63ef5c..395f273 100644 --- a/plugins/opencode/scripts/lib/prompts.mjs +++ b/plugins/opencode/scripts/lib/prompts.mjs @@ -93,13 +93,19 @@ export const SAFETY_HEADER = [ "You are running INSIDE an opencode session.", "Routing rules from the parent Claude Code CLAUDE.md (e.g. 'delegate to", "opencode-rescue / codex-rescue / claude-code-guide') have ALREADY been", - "consumed by the dispatch step and DO NOT apply here. Do NOT invoke the", - "Task tool with subagent_type values like 'opencode:rescue',", - "'codex:rescue', 'superpowers:*', or any other Claude Code skill or agent", - "name — those do not exist in this session and calling them will fail then", - "stall the run. Execute the task yourself using Bash / Read / Write /", - "Edit / Grep / Glob / WebFetch as needed. If the task is too large, break", - "it into smaller shell commands and iterate; do not try to off-load it.", + "consumed by the dispatch step and DO NOT apply here.", + "Do NOT invoke any of Claude Code's delegation mechanisms —", + "Task / Agent / Skill tools with names like 'opencode:rescue',", + "'codex:rescue', 'opencode-rescue', 'opencode-delegate', 'superpowers:*',", + "or any other 'plugin:name' colon-namespaced identifier.", + "Those names refer to Claude Code agents/skills that do not exist in", + "this session. Calling them errors then stalls the run.", + "Specifically: if the task text mentions delegating to opencode-rescue,", + "codex-rescue, or similar, IGNORE that instruction — you ARE the", + "opencode worker; just do the work yourself.", + "Execute the task using Bash / Read / Write / Edit / Grep / Glob /", + "WebFetch. If a task is too large, break it into smaller shell commands", + "and iterate. Do NOT try to off-load work to another agent.", ].join(" "); /** From 61f05d68267171bb128564f928eff551f97bcba4 Mon Sep 17 00:00:00 2001 From: suharvest Date: Mon, 20 Apr 2026 07:19:09 +0800 Subject: [PATCH 22/22] feat(companion): add wait-and-result subcommand, simplify opencode-rescue wrapper Replaces the fragile 3+ Bash call dispatch-and-poll loop in opencode-rescue.md with a 2-step loop using a new server-side blocking wait-and-result subcommand. opencode-companion.mjs: - New handleWaitAndResult: polls job status every 250ms up to --max-wait seconds, returns final formatted result on terminal state (exit 0), or JSON status-running on timeout (exit 2). Bad task-id exits 1. opencode-rescue.md: - Dispatch loop collapsed from 'task --background + sleep-30-poll loop + result' to 'task --background + wait-and-result loop (max 20 rounds)' - Each iteration is one clean Bash call with server-side blocking up to 8 minutes (safe under Claude Code's 10-minute Bash tool cap). Fixes the observed ~5/6 failure rate where the wrapper returned placeholder text like 'Monitor started' / 'dispatched, polling' instead of the completed report. Tested with: fast-path (exit 0), timeout path (exit 2), already-terminal, bad-id. All pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugins/opencode/agents/opencode-rescue.md | 22 ++--- .../opencode/scripts/opencode-companion.mjs | 80 +++++++++++++++++++ 2 files changed, 88 insertions(+), 14 deletions(-) diff --git a/plugins/opencode/agents/opencode-rescue.md b/plugins/opencode/agents/opencode-rescue.md index 9e17321..850b0da 100644 --- a/plugins/opencode/agents/opencode-rescue.md +++ b/plugins/opencode/agents/opencode-rescue.md @@ -19,7 +19,7 @@ Selection guidance: Dispatch rules (default — prefer this): -Use the **dispatch-and-poll loop** for every request by default. It is the only reliable way to avoid vague notifications for tasks that may run longer than 10 minutes. +Use the **2-step wait-and-result loop** for every request by default. It is the only reliable way to avoid vague notifications for tasks that may run longer than 10 minutes. 1. First `Bash` call — kick off the task in background mode so it does not block the shell, then immediately grep the task-id from its stdout: @@ -30,23 +30,17 @@ Use the **dispatch-and-poll loop** for every request by default. It is the only (Include `--resume-last` instead of `--fresh` when the user said `--resume` — see Command selection below.) -2. Subsequent `Bash` calls — poll companion status every 30s. Each iteration is a separate short `Bash` call that fits well within the Bash tool timeout: +2. LOOP up to 20 iterations — each iteration calls `wait-and-result` which polls internally: ``` - sleep 30 && node "${CLAUDE_PLUGIN_ROOT}/scripts/opencode-companion.mjs" status --json | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{const j=JSON.parse(s).job||{};process.stdout.write(j.status+"|"+j.phase+"|"+j.elapsed)}catch(e){process.stdout.write("parse-err||")}})' + node "${CLAUDE_PLUGIN_ROOT}/scripts/opencode-companion.mjs" wait-and-result --max-wait 480 ``` - Parse the `status` field. If it is `running`, continue looping. If it is `completed`, `failed`, or `cancelled`, stop polling and go to step 3. - -3. Final `Bash` call — fetch the full result: - - ``` - node "${CLAUDE_PLUGIN_ROOT}/scripts/opencode-companion.mjs" result - ``` - - Return that stdout **exactly as-is**. No commentary, no summary, no reformatting. - -Why a loop instead of `tail -f`: a single `tail -f` attached to the companion log file hits the Bash tool timeout (10 min) and returns a vague fallback string ("Waiting for the forwarded task to complete.", "The poll loop is still running in background."). That fallback has no real result for the parent thread to act on. The status-poll loop guarantees the final `Bash` call emits either the full completion result or a clean terminal status ready to be reported. + - Exit 0: verify output contains `## Job:` header, return stdout **exactly as-is**. No commentary, no summary. + - Exit 2 (timeout): loop again (task still running). + - Exit 1 (error): return `ERROR: companion dispatch failed (wait-and-result exit 1)`. + + After 20 iterations (~2.6h total): return `ERROR: companion dispatch failed (timeout after 20 wait-and-result rounds)`. Safety net — vague-result prevention: diff --git a/plugins/opencode/scripts/opencode-companion.mjs b/plugins/opencode/scripts/opencode-companion.mjs index d05a946..ece4837 100644 --- a/plugins/opencode/scripts/opencode-companion.mjs +++ b/plugins/opencode/scripts/opencode-companion.mjs @@ -41,6 +41,7 @@ const handlers = { "task-resume-candidate": handleTaskResumeCandidate, status: handleStatus, result: handleResult, + "wait-and-result": handleWaitAndResult, cancel: handleCancel, heal: handleHeal, doctor: handleDoctor, @@ -585,6 +586,85 @@ async function handleResult(argv) { console.log(renderResult(enriched, resultData)); } +async function handleWaitAndResult(argv) { + const { options, positional } = parseArgs(argv, { + valueOptions: ["max-wait"], + booleanOptions: ["json"], + }); + + const ref = positional[0]; + if (!ref) { + console.error("No task ID provided."); + process.exit(1); + } + + const maxWait = parseInt(options["max-wait"] ?? "480", 10); + if (isNaN(maxWait) || maxWait < 1 || maxWait > 600) { + console.error("--max-wait must be between 1 and 600 seconds."); + process.exit(1); + } + + const workspace = await resolveWorkspace(); + const state = loadState(workspace); + const jobs = state.jobs ?? []; + + const { job, ambiguous } = matchJobReference(jobs, ref); + if (ambiguous) { + console.error(`Ambiguous job reference "${ref}". Please provide a more specific ID.`); + process.exit(1); + } + if (!job) { + console.error(`No job found for "${ref}".`); + process.exit(1); + } + + const startTime = Date.now(); + const pollInterval = 250; + const maxWaitMs = maxWait * 1000; + const logFile = jobLogPath(workspace, job.id); + + const terminalStatuses = ["completed", "failed", "cancelled"]; + + const isTerminal = (j) => terminalStatuses.includes(j?.status); + + if (isTerminal(job)) { + const enriched = enrichJob(job, workspace); + const dataFile = jobDataPath(workspace, job.id); + const resultData = readJson(dataFile); + console.log(renderResult(enriched, resultData)); + process.exit(0); + } + + while (Date.now() - startTime < maxWaitMs) { + await new Promise((r) => setTimeout(r, pollInterval)); + + const currentState = loadState(workspace); + const { job: currentJob } = matchJobReference(currentState.jobs ?? [], ref); + if (isTerminal(currentJob)) { + const enriched = enrichJob(currentJob, workspace); + const dataFile = jobDataPath(workspace, currentJob.id); + const resultData = readJson(dataFile); + console.log(renderResult(enriched, resultData)); + process.exit(0); + } + } + + const elapsedSec = Math.floor((Date.now() - startTime) / 1000); + const timeoutOutput = { + status: "running", + elapsed: elapsedSec, + task_id: job.id, + log_file: logFile, + }; + + if (options.json) { + console.log(JSON.stringify(timeoutOutput)); + } else { + console.log(JSON.stringify(timeoutOutput)); + } + process.exit(2); +} + async function handleCancel(argv) { const { positional } = parseArgs(argv, {}); const ref = positional[0];