refactor: 使用 @picovoice/web-voice-processor 替换手写音频采集管线
- 引入 WebVoiceProcessor 处理 getUserMedia、AudioContext 生命周期和 WASM 重采样 - 删除自定义 AudioWorklet (audio-processor.ts) 和线性插值重采样器 (resample.ts) - 改善音频采集稳定性:自动检测 AudioContext suspended/closed 状态并重建 - 更精确的错误提示:区分权限拒绝、设备未找到、设备异常
This commit is contained in:
@@ -1,8 +1,13 @@
|
||||
import { WebVoiceProcessor } from "@picovoice/web-voice-processor";
|
||||
import { useCallback, useRef } from "react";
|
||||
import { toast } from "sonner";
|
||||
import { resampleTo16kInt16 } from "../lib/resample";
|
||||
import { useAppStore } from "../stores/app-store";
|
||||
import audioProcessorUrl from "../workers/audio-processor.ts?worker&url";
|
||||
|
||||
/**
|
||||
* ~200ms frames at 16kHz = 3200 samples.
|
||||
* Doubao bigmodel_async recommends 200ms packets for optimal performance.
|
||||
*/
|
||||
const FRAME_LENGTH = 3200;
|
||||
|
||||
interface UseRecorderOptions {
|
||||
sendJSON: (obj: Record<string, unknown>) => void;
|
||||
@@ -10,10 +15,10 @@ interface UseRecorderOptions {
|
||||
}
|
||||
|
||||
export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) {
|
||||
const audioCtxRef = useRef<AudioContext | null>(null);
|
||||
const workletRef = useRef<AudioWorkletNode | null>(null);
|
||||
const streamRef = useRef<MediaStream | null>(null);
|
||||
const abortRef = useRef<AbortController | null>(null);
|
||||
const engineRef = useRef<{ onmessage: (e: MessageEvent) => void } | null>(
|
||||
null,
|
||||
);
|
||||
|
||||
// Keep stable refs so callbacks never go stale
|
||||
const sendJSONRef = useRef(sendJSON);
|
||||
@@ -21,16 +26,6 @@ export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) {
|
||||
sendJSONRef.current = sendJSON;
|
||||
sendBinaryRef.current = sendBinary;
|
||||
|
||||
const initAudio = useCallback(async () => {
|
||||
if (audioCtxRef.current) return;
|
||||
// Use device native sample rate — we resample to 16kHz in software
|
||||
const ctx = new AudioContext();
|
||||
// Chrome requires resume() after user gesture
|
||||
if (ctx.state === "suspended") await ctx.resume();
|
||||
await ctx.audioWorklet.addModule(audioProcessorUrl);
|
||||
audioCtxRef.current = ctx;
|
||||
}, []);
|
||||
|
||||
const startRecording = useCallback(async () => {
|
||||
const store = useAppStore.getState();
|
||||
if (store.recording || store.pendingStart) return;
|
||||
@@ -40,48 +35,32 @@ export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) {
|
||||
abortRef.current = abort;
|
||||
|
||||
try {
|
||||
await initAudio();
|
||||
if (abort.signal.aborted) {
|
||||
store.setPendingStart(false);
|
||||
return;
|
||||
}
|
||||
|
||||
const ctx = audioCtxRef.current as AudioContext;
|
||||
if (ctx.state === "suspended") await ctx.resume();
|
||||
if (abort.signal.aborted) {
|
||||
store.setPendingStart(false);
|
||||
return;
|
||||
}
|
||||
|
||||
const stream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
channelCount: 1,
|
||||
// Create an engine that receives Int16Array @ 16kHz from WebVoiceProcessor
|
||||
const engine = {
|
||||
onmessage: (e: MessageEvent) => {
|
||||
if (e.data.command === "process") {
|
||||
sendBinaryRef.current(e.data.inputFrame as Int16Array);
|
||||
}
|
||||
},
|
||||
};
|
||||
engineRef.current = engine;
|
||||
|
||||
WebVoiceProcessor.setOptions({
|
||||
frameLength: FRAME_LENGTH,
|
||||
outputSampleRate: 16000,
|
||||
});
|
||||
|
||||
// subscribe() handles getUserMedia + AudioContext lifecycle internally.
|
||||
// It checks for closed/suspended AudioContext and re-creates as needed.
|
||||
await WebVoiceProcessor.subscribe(engine);
|
||||
|
||||
if (abort.signal.aborted) {
|
||||
stream.getTracks().forEach((t) => {
|
||||
t.stop();
|
||||
});
|
||||
await WebVoiceProcessor.unsubscribe(engine);
|
||||
engineRef.current = null;
|
||||
store.setPendingStart(false);
|
||||
return;
|
||||
}
|
||||
|
||||
streamRef.current = stream;
|
||||
const source = ctx.createMediaStreamSource(stream);
|
||||
const worklet = new AudioWorkletNode(ctx, "audio-processor");
|
||||
worklet.port.onmessage = (e: MessageEvent) => {
|
||||
if (e.data.type === "audio") {
|
||||
sendBinaryRef.current(
|
||||
resampleTo16kInt16(e.data.samples, e.data.sampleRate),
|
||||
);
|
||||
}
|
||||
};
|
||||
source.connect(worklet);
|
||||
worklet.port.postMessage({ command: "start" });
|
||||
workletRef.current = worklet;
|
||||
|
||||
store.setPendingStart(false);
|
||||
abortRef.current = null;
|
||||
store.setRecording(true);
|
||||
@@ -90,9 +69,24 @@ export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) {
|
||||
} catch (err) {
|
||||
useAppStore.getState().setPendingStart(false);
|
||||
abortRef.current = null;
|
||||
toast.error(`麦克风错误: ${(err as Error).message}`);
|
||||
engineRef.current = null;
|
||||
|
||||
const error = err as Error;
|
||||
switch (error.name) {
|
||||
case "PermissionError":
|
||||
toast.error("麦克风权限被拒绝");
|
||||
break;
|
||||
case "DeviceMissingError":
|
||||
toast.error("未找到麦克风设备");
|
||||
break;
|
||||
case "DeviceReadError":
|
||||
toast.error("麦克风设备异常,请检查连接");
|
||||
break;
|
||||
default:
|
||||
toast.error(`麦克风错误: ${error.message}`);
|
||||
}
|
||||
}
|
||||
}, [initAudio]);
|
||||
}, []);
|
||||
|
||||
const stopRecording = useCallback(() => {
|
||||
const store = useAppStore.getState();
|
||||
@@ -107,16 +101,10 @@ export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) {
|
||||
if (!store.recording) return;
|
||||
store.setRecording(false);
|
||||
|
||||
if (workletRef.current) {
|
||||
workletRef.current.port.postMessage({ command: "stop" });
|
||||
workletRef.current.disconnect();
|
||||
workletRef.current = null;
|
||||
}
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach((t) => {
|
||||
t.stop();
|
||||
});
|
||||
streamRef.current = null;
|
||||
if (engineRef.current) {
|
||||
// Fire-and-forget: state is already updated, cleanup is async
|
||||
WebVoiceProcessor.unsubscribe(engineRef.current);
|
||||
engineRef.current = null;
|
||||
}
|
||||
|
||||
sendJSONRef.current({ type: "stop" });
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
/**
|
||||
* Linear interpolation resampler: native sample rate -> 16kHz 16-bit mono PCM.
|
||||
*/
|
||||
const TARGET_SAMPLE_RATE = 16000;
|
||||
|
||||
export function resampleTo16kInt16(
|
||||
float32: Float32Array,
|
||||
srcRate: number,
|
||||
): Int16Array {
|
||||
const ratio = srcRate / TARGET_SAMPLE_RATE;
|
||||
const outLen = Math.floor(float32.length / ratio);
|
||||
const out = new Int16Array(outLen);
|
||||
for (let i = 0; i < outLen; i++) {
|
||||
const srcIdx = i * ratio;
|
||||
const lo = Math.floor(srcIdx);
|
||||
const hi = Math.min(lo + 1, float32.length - 1);
|
||||
const frac = srcIdx - lo;
|
||||
const sample = float32[lo] + frac * (float32[hi] - float32[lo]);
|
||||
// Clamp to [-1, 1] then scale to Int16
|
||||
out[i] = Math.max(-32768, Math.min(32767, Math.round(sample * 32767)));
|
||||
}
|
||||
return out;
|
||||
}
|
||||
@@ -1,88 +0,0 @@
|
||||
/**
|
||||
* AudioWorklet processor for VoicePaste.
|
||||
*
|
||||
* Captures raw Float32 PCM from the microphone, accumulates samples into
|
||||
* ~200ms frames, and posts them to the main thread for resampling + WS send.
|
||||
*
|
||||
* Communication:
|
||||
* Main → Processor: { command: "start" | "stop" }
|
||||
* Processor → Main: { type: "audio", samples: Float32Array, sampleRate: number }
|
||||
*/
|
||||
|
||||
// AudioWorkletGlobalScope globals (not in standard lib)
|
||||
declare const sampleRate: number;
|
||||
declare class AudioWorkletProcessor {
|
||||
readonly port: MessagePort;
|
||||
constructor();
|
||||
process(
|
||||
inputs: Float32Array[][],
|
||||
outputs: Float32Array[][],
|
||||
parameters: Record<string, Float32Array>,
|
||||
): boolean;
|
||||
}
|
||||
declare function registerProcessor(
|
||||
name: string,
|
||||
ctor: new () => AudioWorkletProcessor,
|
||||
): void;
|
||||
|
||||
class VoicePasteProcessor extends AudioWorkletProcessor {
|
||||
private recording = false;
|
||||
private buffer: Float32Array[] = [];
|
||||
private bufferLen = 0;
|
||||
private readonly frameSize: number;
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
// ~200ms worth of samples at current sample rate
|
||||
this.frameSize = Math.floor(sampleRate * 0.2);
|
||||
|
||||
this.port.onmessage = (e: MessageEvent) => {
|
||||
if (e.data.command === "start") {
|
||||
this.recording = true;
|
||||
this.buffer = [];
|
||||
this.bufferLen = 0;
|
||||
} else if (e.data.command === "stop") {
|
||||
if (this.bufferLen > 0) {
|
||||
this.flush();
|
||||
}
|
||||
this.recording = false;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
process(inputs: Float32Array[][]): boolean {
|
||||
if (!this.recording) return true;
|
||||
|
||||
const input = inputs[0];
|
||||
if (!input || !input[0]) return true;
|
||||
|
||||
const channelData = input[0];
|
||||
this.buffer.push(new Float32Array(channelData));
|
||||
this.bufferLen += channelData.length;
|
||||
|
||||
if (this.bufferLen >= this.frameSize) {
|
||||
this.flush();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private flush(): void {
|
||||
const merged = new Float32Array(this.bufferLen);
|
||||
let offset = 0;
|
||||
for (const chunk of this.buffer) {
|
||||
merged.set(chunk, offset);
|
||||
offset += chunk.length;
|
||||
}
|
||||
|
||||
this.port.postMessage(
|
||||
{ type: "audio", samples: merged, sampleRate: sampleRate },
|
||||
[merged.buffer],
|
||||
);
|
||||
|
||||
this.buffer = [];
|
||||
this.bufferLen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
registerProcessor("audio-processor", VoicePasteProcessor);
|
||||
Reference in New Issue
Block a user