refactor: 使用 @picovoice/web-voice-processor 替换手写音频采集管线

- 引入 WebVoiceProcessor 处理 getUserMedia、AudioContext 生命周期和 WASM 重采样 - 删除自定义 AudioWorklet (audio-processor.ts) 和线性插值重采样器 (resample.ts) - 改善音频采集稳定性：自动检测 AudioContext suspended/closed 状态并重建 - 更精确的错误提示：区分权限拒绝、设备未找到、设备异常
2026-03-02 07:42:45 +08:00
parent 677ef35ff7
commit 669bfac722
6 changed files with 61 additions and 181 deletions
@@ -1,8 +1,13 @@
+import { WebVoiceProcessor } from "@picovoice/web-voice-processor";
 import { useCallback, useRef } from "react";
 import { toast } from "sonner";
-import { resampleTo16kInt16 } from "../lib/resample";
 import { useAppStore } from "../stores/app-store";
-import audioProcessorUrl from "../workers/audio-processor.ts?worker&url";
+
+/**
+ * ~200ms frames at 16kHz = 3200 samples.
+ * Doubao bigmodel_async recommends 200ms packets for optimal performance.
+ */
+const FRAME_LENGTH = 3200;

 interface UseRecorderOptions {
 	sendJSON: (obj: Record<string, unknown>) => void;
@@ -10,10 +15,10 @@ interface UseRecorderOptions {
 }

 export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) {
-	const audioCtxRef = useRef<AudioContext | null>(null);
-	const workletRef = useRef<AudioWorkletNode | null>(null);
-	const streamRef = useRef<MediaStream | null>(null);
 	const abortRef = useRef<AbortController | null>(null);
+	const engineRef = useRef<{ onmessage: (e: MessageEvent) => void } | null>(
+		null,
+	);

 	// Keep stable refs so callbacks never go stale
 	const sendJSONRef = useRef(sendJSON);
@@ -21,16 +26,6 @@ export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) {
 	sendJSONRef.current = sendJSON;
 	sendBinaryRef.current = sendBinary;

-	const initAudio = useCallback(async () => {
-		if (audioCtxRef.current) return;
-		// Use device native sample rate — we resample to 16kHz in software
-		const ctx = new AudioContext();
-		// Chrome requires resume() after user gesture
-		if (ctx.state === "suspended") await ctx.resume();
-		await ctx.audioWorklet.addModule(audioProcessorUrl);
-		audioCtxRef.current = ctx;
-	}, []);
-
 	const startRecording = useCallback(async () => {
 		const store = useAppStore.getState();
 		if (store.recording || store.pendingStart) return;
@@ -40,48 +35,32 @@ export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) {
 		abortRef.current = abort;

 		try {
-			await initAudio();
-			if (abort.signal.aborted) {
-				store.setPendingStart(false);
-				return;
-			}
-
-			const ctx = audioCtxRef.current as AudioContext;
-			if (ctx.state === "suspended") await ctx.resume();
-			if (abort.signal.aborted) {
-				store.setPendingStart(false);
-				return;
-			}
-
-			const stream = await navigator.mediaDevices.getUserMedia({
-				audio: {
-					echoCancellation: true,
-					noiseSuppression: true,
-					channelCount: 1,
+			// Create an engine that receives Int16Array @ 16kHz from WebVoiceProcessor
+			const engine = {
+				onmessage: (e: MessageEvent) => {
+					if (e.data.command === "process") {
+						sendBinaryRef.current(e.data.inputFrame as Int16Array);
+					}
 				},
+			};
+			engineRef.current = engine;
+
+			WebVoiceProcessor.setOptions({
+				frameLength: FRAME_LENGTH,
+				outputSampleRate: 16000,
 			});
+
+			// subscribe() handles getUserMedia + AudioContext lifecycle internally.
+			// It checks for closed/suspended AudioContext and re-creates as needed.
+			await WebVoiceProcessor.subscribe(engine);
+
 			if (abort.signal.aborted) {
-				stream.getTracks().forEach((t) => {
-					t.stop();
-				});
+				await WebVoiceProcessor.unsubscribe(engine);
+				engineRef.current = null;
 				store.setPendingStart(false);
 				return;
 			}

-			streamRef.current = stream;
-			const source = ctx.createMediaStreamSource(stream);
-			const worklet = new AudioWorkletNode(ctx, "audio-processor");
-			worklet.port.onmessage = (e: MessageEvent) => {
-				if (e.data.type === "audio") {
-					sendBinaryRef.current(
-						resampleTo16kInt16(e.data.samples, e.data.sampleRate),
-					);
-				}
-			};
-			source.connect(worklet);
-			worklet.port.postMessage({ command: "start" });
-			workletRef.current = worklet;
-
 			store.setPendingStart(false);
 			abortRef.current = null;
 			store.setRecording(true);
@@ -90,9 +69,24 @@ export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) {
 		} catch (err) {
 			useAppStore.getState().setPendingStart(false);
 			abortRef.current = null;
-			toast.error(`麦克风错误: ${(err as Error).message}`);
+			engineRef.current = null;
+
+			const error = err as Error;
+			switch (error.name) {
+				case "PermissionError":
+					toast.error("麦克风权限被拒绝");
+					break;
+				case "DeviceMissingError":
+					toast.error("未找到麦克风设备");
+					break;
+				case "DeviceReadError":
+					toast.error("麦克风设备异常，请检查连接");
+					break;
+				default:
+					toast.error(`麦克风错误: ${error.message}`);
+			}
 		}
-	}, [initAudio]);
+	}, []);

 	const stopRecording = useCallback(() => {
 		const store = useAppStore.getState();
@@ -107,16 +101,10 @@ export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) {
 		if (!store.recording) return;
 		store.setRecording(false);

-		if (workletRef.current) {
-			workletRef.current.port.postMessage({ command: "stop" });
-			workletRef.current.disconnect();
-			workletRef.current = null;
-		}
-		if (streamRef.current) {
-			streamRef.current.getTracks().forEach((t) => {
-				t.stop();
-			});
-			streamRef.current = null;
+		if (engineRef.current) {
+			// Fire-and-forget: state is already updated, cleanup is async
+			WebVoiceProcessor.unsubscribe(engineRef.current);
+			engineRef.current = null;
 		}

 		sendJSONRef.current({ type: "stop" });
@@ -1,23 +0,0 @@
-/**
- * Linear interpolation resampler: native sample rate -> 16kHz 16-bit mono PCM.
- */
-const TARGET_SAMPLE_RATE = 16000;
-
-export function resampleTo16kInt16(
-	float32: Float32Array,
-	srcRate: number,
-): Int16Array {
-	const ratio = srcRate / TARGET_SAMPLE_RATE;
-	const outLen = Math.floor(float32.length / ratio);
-	const out = new Int16Array(outLen);
-	for (let i = 0; i < outLen; i++) {
-		const srcIdx = i * ratio;
-		const lo = Math.floor(srcIdx);
-		const hi = Math.min(lo + 1, float32.length - 1);
-		const frac = srcIdx - lo;
-		const sample = float32[lo] + frac * (float32[hi] - float32[lo]);
-		// Clamp to [-1, 1] then scale to Int16
-		out[i] = Math.max(-32768, Math.min(32767, Math.round(sample * 32767)));
-	}
-	return out;
-}
@@ -1,88 +0,0 @@
-/**
- * AudioWorklet processor for VoicePaste.
- *
- * Captures raw Float32 PCM from the microphone, accumulates samples into
- * ~200ms frames, and posts them to the main thread for resampling + WS send.
- *
- * Communication:
- *   Main → Processor: { command: "start" | "stop" }
- *   Processor → Main: { type: "audio", samples: Float32Array, sampleRate: number }
- */
-
-// AudioWorkletGlobalScope globals (not in standard lib)
-declare const sampleRate: number;
-declare class AudioWorkletProcessor {
-	readonly port: MessagePort;
-	constructor();
-	process(
-		inputs: Float32Array[][],
-		outputs: Float32Array[][],
-		parameters: Record<string, Float32Array>,
-	): boolean;
-}
-declare function registerProcessor(
-	name: string,
-	ctor: new () => AudioWorkletProcessor,
-): void;
-
-class VoicePasteProcessor extends AudioWorkletProcessor {
-	private recording = false;
-	private buffer: Float32Array[] = [];
-	private bufferLen = 0;
-	private readonly frameSize: number;
-
-	constructor() {
-		super();
-		// ~200ms worth of samples at current sample rate
-		this.frameSize = Math.floor(sampleRate * 0.2);
-
-		this.port.onmessage = (e: MessageEvent) => {
-			if (e.data.command === "start") {
-				this.recording = true;
-				this.buffer = [];
-				this.bufferLen = 0;
-			} else if (e.data.command === "stop") {
-				if (this.bufferLen > 0) {
-					this.flush();
-				}
-				this.recording = false;
-			}
-		};
-	}
-
-	process(inputs: Float32Array[][]): boolean {
-		if (!this.recording) return true;
-
-		const input = inputs[0];
-		if (!input || !input[0]) return true;
-
-		const channelData = input[0];
-		this.buffer.push(new Float32Array(channelData));
-		this.bufferLen += channelData.length;
-
-		if (this.bufferLen >= this.frameSize) {
-			this.flush();
-		}
-
-		return true;
-	}
-
-	private flush(): void {
-		const merged = new Float32Array(this.bufferLen);
-		let offset = 0;
-		for (const chunk of this.buffer) {
-			merged.set(chunk, offset);
-			offset += chunk.length;
-		}
-
-		this.port.postMessage(
-			{ type: "audio", samples: merged, sampleRate: sampleRate },
-			[merged.buffer],
-		);
-
-		this.buffer = [];
-		this.bufferLen = 0;
-	}
-}
-
-registerProcessor("audio-processor", VoicePasteProcessor);