diff --git a/packages/cli/src/commands/transcribe.test.ts b/packages/cli/src/commands/transcribe.test.ts new file mode 100644 index 0000000000..b485cb6a74 --- /dev/null +++ b/packages/cli/src/commands/transcribe.test.ts @@ -0,0 +1,69 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import { writeFileSync, mkdtempSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { WhisperUnavailableError } from "../whisper/manager.js"; + +// Make the whisper core report "unavailable" so we exercise the soft-skip path. +const transcribeMock = vi.fn(); +vi.mock("../whisper/transcribe.js", () => ({ transcribe: transcribeMock })); + +const trackTranscribeUnavailable = vi.fn(); +const trackCommandFailure = vi.fn(); +vi.mock("../telemetry/events.js", () => ({ + trackTranscribeUnavailable: (...a: unknown[]) => trackTranscribeUnavailable(...a), + trackCommandFailure: (...a: unknown[]) => trackCommandFailure(...a), +})); + +import transcribeCmd from "./transcribe.js"; + +function dummyAudio(): { dir: string; input: string } { + const dir = mkdtempSync(join(tmpdir(), "hf-transcribe-test-")); + const input = join(dir, "narration.wav"); + writeFileSync(input, "not-real-audio"); + return { dir, input }; +} + +describe("transcribe — whisper unavailable", () => { + let dirs: string[] = []; + let priorExitCode: typeof process.exitCode; + + beforeEach(() => { + dirs = []; + priorExitCode = process.exitCode; + process.exitCode = undefined; + transcribeMock.mockReset(); + trackTranscribeUnavailable.mockReset(); + trackCommandFailure.mockReset(); + transcribeMock.mockRejectedValue( + new WhisperUnavailableError("whisper-cpp not found. Install: brew install whisper-cpp"), + ); + vi.spyOn(console, "log").mockImplementation(() => {}); + }); + + afterEach(() => { + process.exitCode = priorExitCode; + for (const d of dirs) rmSync(d, { recursive: true, force: true }); + vi.restoreAllMocks(); + }); + + it("explicit run exits non-zero and is NOT reported as a command failure", async () => { + const { dir, input } = dummyAudio(); + dirs.push(dir); + await transcribeCmd.run!({ args: { input, json: true, optional: false } } as never); + + expect(process.exitCode).toBe(1); + expect(trackTranscribeUnavailable).toHaveBeenCalledWith({ optional: false }); + expect(trackCommandFailure).not.toHaveBeenCalled(); + }); + + it("--optional skips cleanly with exit 0", async () => { + const { dir, input } = dummyAudio(); + dirs.push(dir); + await transcribeCmd.run!({ args: { input, json: true, optional: true } } as never); + + expect(process.exitCode).toBe(0); + expect(trackTranscribeUnavailable).toHaveBeenCalledWith({ optional: true }); + expect(trackCommandFailure).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/cli/src/commands/transcribe.ts b/packages/cli/src/commands/transcribe.ts index 7a283b736e..7a14844029 100644 --- a/packages/cli/src/commands/transcribe.ts +++ b/packages/cli/src/commands/transcribe.ts @@ -13,8 +13,8 @@ export const examples: Example[] = [ import { resolve, join, extname, dirname } from "node:path"; import * as clack from "@clack/prompts"; import { c } from "../ui/colors.js"; -import { DEFAULT_MODEL } from "../whisper/manager.js"; -import { trackCommandFailure } from "../telemetry/events.js"; +import { DEFAULT_MODEL, isWhisperUnavailable } from "../whisper/manager.js"; +import { trackCommandFailure, trackTranscribeUnavailable } from "../telemetry/events.js"; export default defineCommand({ meta: { @@ -49,6 +49,12 @@ export default defineCommand({ description: "Output result as JSON", default: false, }, + optional: { + type: "boolean", + description: + "Treat captions as optional: if whisper-cpp is unavailable, skip and exit 0 instead of failing. For pipelines that continue without captions.", + default: false, + }, }, async run({ args }) { const inputPath = resolve(args.input); @@ -77,6 +83,7 @@ export default defineCommand({ model: args.model, language: args.language, json: args.json, + optional: args.optional, }); }, }); @@ -118,7 +125,7 @@ async function importTranscript(inputPath: string, dir: string, json: boolean): async function transcribeAudio( inputPath: string, dir: string, - opts: { model?: string; language?: string; json?: boolean }, + opts: { model?: string; language?: string; json?: boolean; optional?: boolean }, ): Promise { const { transcribe } = await import("../whisper/transcribe.js"); const { loadTranscript, patchCaptionHtml, stripBeforeOnset } = @@ -175,6 +182,25 @@ async function transcribeAudio( } } catch (err) { const message = err instanceof Error ? err.message : String(err); + + // whisper-cpp is an optional prerequisite, not part of the CLI. When it is + // simply unavailable (no binary, no toolchain to build one), that is a setup + // condition, not a command crash — report it on its own metric so it does + // not inflate the cli_error budget, and let `--optional` callers continue. + if (isWhisperUnavailable(err)) { + trackTranscribeUnavailable({ optional: opts.optional === true }); + if (opts.json) { + console.log(JSON.stringify({ ok: false, skipped: true, reason: "whisper_unavailable" })); + } else { + spin?.stop(c.warn(`Captions skipped — ${message}`)); + } + // Optional callers (pipelines) treat a missing prerequisite as a clean + // skip; explicit runs still surface non-zero. Set the status and return + // rather than guarding a process.exit() on the flag. + process.exitCode = opts.optional ? 0 : 1; + return; + } + trackCommandFailure("transcribe", err); if (opts.json) { console.log(JSON.stringify({ ok: false, error: message })); diff --git a/packages/cli/src/telemetry/events.ts b/packages/cli/src/telemetry/events.ts index 17262dba53..7762e4d8f2 100644 --- a/packages/cli/src/telemetry/events.ts +++ b/packages/cli/src/telemetry/events.ts @@ -330,6 +330,14 @@ export function trackCommandFailure(command: string, err: unknown): void { }); } +// Whisper being absent/uninstallable is an environment prerequisite gap, not a +// command crash — track it on its own low-severity metric instead of cli_error +// so the command-failure budget reflects real bugs. `optional` records whether +// the caller (init / skill pipeline) treated captions as skippable. +export function trackTranscribeUnavailable(props: { optional: boolean }): void { + trackEvent("transcribe_unavailable", { optional: props.optional }); +} + export function trackRenderFeedback(props: { rating: number; renderDurationMs: number; diff --git a/packages/cli/src/whisper/manager.test.ts b/packages/cli/src/whisper/manager.test.ts new file mode 100644 index 0000000000..763ccdc0e5 --- /dev/null +++ b/packages/cli/src/whisper/manager.test.ts @@ -0,0 +1,30 @@ +import { describe, it, expect } from "vitest"; +import { WhisperUnavailableError, isWhisperUnavailable } from "./manager.js"; + +describe("isWhisperUnavailable", () => { + it("recognizes WhisperUnavailableError instances", () => { + const err = new WhisperUnavailableError( + "whisper-cpp not found. Install: brew install whisper-cpp", + ); + expect(isWhisperUnavailable(err)).toBe(true); + expect(err.code).toBe("WHISPER_UNAVAILABLE"); + expect(err.name).toBe("WhisperUnavailableError"); + }); + + it("recognizes a plain Error carrying the WHISPER_UNAVAILABLE code (cross-bundle safety)", () => { + const err = Object.assign(new Error("nope"), { code: "WHISPER_UNAVAILABLE" }); + expect(isWhisperUnavailable(err)).toBe(true); + }); + + it("does NOT classify a genuine transcription failure as unavailable", () => { + // whisper present but the run crashed — must stay a real command failure. + expect(isWhisperUnavailable(new Error("Command failed: whisper-cli exited with code 1"))).toBe( + false, + ); + expect(isWhisperUnavailable(new Error("whisper-cpp build failed. Ensure cmake..."))).toBe( + false, + ); + expect(isWhisperUnavailable("whisper-cpp not found")).toBe(false); + expect(isWhisperUnavailable(undefined)).toBe(false); + }); +}); diff --git a/packages/cli/src/whisper/manager.ts b/packages/cli/src/whisper/manager.ts index e2e0af1e69..05a7c74489 100644 --- a/packages/cli/src/whisper/manager.ts +++ b/packages/cli/src/whisper/manager.ts @@ -16,6 +16,24 @@ export interface WhisperResult { source: WhisperSource; } +// A missing/uninstallable whisper-cpp is an environment prerequisite gap (no +// binary, no Homebrew, no compiler toolchain), not a transcription bug. Callers +// that treat captions as optional (init, skill pipelines) skip on this and keep +// going; explicit `transcribe` still fails, but it is reported as a setup +// condition rather than a command crash. +export class WhisperUnavailableError extends Error { + readonly code = "WHISPER_UNAVAILABLE" as const; + constructor(message: string) { + super(message); + this.name = "WhisperUnavailableError"; + } +} + +export function isWhisperUnavailable(err: unknown): err is WhisperUnavailableError { + if (err instanceof WhisperUnavailableError) return true; + return err instanceof Error && "code" in err && err.code === "WHISPER_UNAVAILABLE"; +} + function getModelUrl(model: string): string { return `https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-${model}.bin`; } @@ -184,7 +202,7 @@ export async function ensureWhisper(options?: { } // 4. Give up — tell the user how - throw new Error(`whisper-cpp not found. Install: ${getInstallInstructions()}`); + throw new WhisperUnavailableError(`whisper-cpp not found. Install: ${getInstallInstructions()}`); } export async function ensureModel( diff --git a/skills/website-to-video/references/step-4-vo.md b/skills/website-to-video/references/step-4-vo.md index 37e6d9e8ee..59b84c98d0 100644 --- a/skills/website-to-video/references/step-4-vo.md +++ b/skills/website-to-video/references/step-4-vo.md @@ -144,7 +144,7 @@ Generate the full script as `narration.wav` (or `.mp3`) in the project directory 1. **Try again** — kill the process, run the same command again (transient failures are common) 2. **Try different flags** — smaller model (`--model tiny.en`), different voice, shorter test sentence first -3. **Try a different tool for the same task** — if `hyperframes transcribe` hangs, try `whisper-cli` directly, or Groq API (`npx hyperframes transcribe --provider groq`), or OpenAI API +3. **Try a different tool for the same task** — if `hyperframes transcribe` hangs, run `whisper-cli` directly on the audio 4. **Switch provider entirely** — if ElevenLabs is down, try HeyGen or Kokoro. If Kokoro hangs, try ElevenLabs. Never sit idle for 10 minutes hoping a stuck process will finish.