diff --git a/web/src/App.tsx b/web/src/App.tsx
index 066389f..8a55eb3 100644
--- a/web/src/App.tsx
+++ b/web/src/App.tsx
@@ -1,3 +1,4 @@
+import { useEffect } from "react";
import { Toaster } from "sonner";
import { HistoryList } from "./components/HistoryList";
import { MicButton } from "./components/MicButton";
@@ -5,13 +6,36 @@ import { PreviewBox } from "./components/PreviewBox";
import { StatusBadge } from "./components/StatusBadge";
import { useRecorder } from "./hooks/useRecorder";
import { useWebSocket } from "./hooks/useWebSocket";
+import { useAppStore } from "./stores/app-store";
export function App() {
- const { sendJSON, sendBinary } = useWebSocket();
- const { startRecording, stopRecording } = useRecorder({
- sendJSON,
- sendBinary,
+ const { requestStart, sendStop, sendPaste, sendAudioFrame } = useWebSocket();
+ const { prewarm, startRecording, stopRecording } = useRecorder({
+ requestStart,
+ sendStop,
+ sendAudioFrame,
});
+ const micReady = useAppStore((s) => s.micReady);
+
+ useEffect(() => {
+ const forceStopOnBackground = () => {
+ const state = useAppStore.getState();
+ if (state.recording || state.pendingStart) {
+ stopRecording();
+ }
+ };
+
+ const onVisibility = () => {
+ if (document.hidden) forceStopOnBackground();
+ };
+
+ document.addEventListener("visibilitychange", onVisibility);
+ window.addEventListener("pagehide", forceStopOnBackground);
+ return () => {
+ document.removeEventListener("visibilitychange", onVisibility);
+ window.removeEventListener("pagehide", forceStopOnBackground);
+ };
+ }, [stopRecording]);
return (
<>
@@ -24,8 +48,17 @@ export function App() {
+ {!micReady ? (
+
+ ) : null}
-
+
>
diff --git a/web/src/hooks/useRecorder.ts b/web/src/hooks/useRecorder.ts
index dc32767..a7c1bd4 100644
--- a/web/src/hooks/useRecorder.ts
+++ b/web/src/hooks/useRecorder.ts
@@ -3,71 +3,116 @@ import { useCallback, useRef } from "react";
import { toast } from "sonner";
import { useAppStore } from "../stores/app-store";
-/**
- * ~200ms frames at 16kHz = 3200 samples.
- * Doubao bigmodel_async recommends 200ms packets for optimal performance.
- */
const FRAME_LENGTH = 3200;
interface UseRecorderOptions {
- sendJSON: (obj: Record) => void;
- sendBinary: (data: Int16Array) => void;
+ requestStart: () => Promise;
+ sendStop: (sessionId: string | null) => void;
+ sendAudioFrame: (sessionId: string, seq: number, data: Int16Array) => boolean;
}
-export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) {
+let optionsInitialized = false;
+
+export function useRecorder({
+ requestStart,
+ sendStop,
+ sendAudioFrame,
+}: UseRecorderOptions) {
const abortRef = useRef(null);
const engineRef = useRef<{ onmessage: (e: MessageEvent) => void } | null>(
null,
);
+ const warmedRef = useRef(false);
+ const audioSeqRef = useRef(1);
- // Keep stable refs so callbacks never go stale
- const sendJSONRef = useRef(sendJSON);
- const sendBinaryRef = useRef(sendBinary);
- sendJSONRef.current = sendJSON;
- sendBinaryRef.current = sendBinary;
+ const initOptions = useCallback(() => {
+ if (optionsInitialized) return;
+ WebVoiceProcessor.setOptions({
+ frameLength: FRAME_LENGTH,
+ outputSampleRate: 16000,
+ });
+ optionsInitialized = true;
+ }, []);
+
+ const prewarm = useCallback(async (): Promise => {
+ if (warmedRef.current) return true;
+ initOptions();
+
+ const warmEngine = {
+ onmessage: () => {},
+ };
+
+ try {
+ await WebVoiceProcessor.subscribe(warmEngine);
+ await WebVoiceProcessor.unsubscribe(warmEngine);
+ warmedRef.current = true;
+ useAppStore.getState().setMicReady(true);
+ return true;
+ } catch (err) {
+ const error = err as Error;
+ toast.error(`麦克风准备失败: ${error.message}`);
+ return false;
+ }
+ }, [initOptions]);
const startRecording = useCallback(async () => {
const store = useAppStore.getState();
- if (store.recording || store.pendingStart) return;
+ if (store.recording || store.pendingStart || store.stopping) return;
store.setPendingStart(true);
+ store.setStopping(false);
+
const abort = new AbortController();
abortRef.current = abort;
try {
- // Create an engine that receives Int16Array @ 16kHz from WebVoiceProcessor
+ const warmed = await prewarm();
+ if (!warmed) {
+ store.setPendingStart(false);
+ abortRef.current = null;
+ return;
+ }
+
+ const sessionId = await requestStart();
+ if (!sessionId) {
+ store.setPendingStart(false);
+ abortRef.current = null;
+ toast.error("启动会话失败,请重试");
+ return;
+ }
+
const engine = {
onmessage: (e: MessageEvent) => {
- if (e.data.command === "process") {
- sendBinaryRef.current(e.data.inputFrame as Int16Array);
- }
+ if (e.data.command !== "process") return;
+ const seq = audioSeqRef.current;
+ audioSeqRef.current += 1;
+ sendAudioFrame(sessionId, seq, e.data.inputFrame as Int16Array);
},
};
engineRef.current = engine;
+ audioSeqRef.current = 1;
- WebVoiceProcessor.setOptions({
- frameLength: FRAME_LENGTH,
- outputSampleRate: 16000,
- });
-
- // subscribe() handles getUserMedia + AudioContext lifecycle internally.
- // It checks for closed/suspended AudioContext and re-creates as needed.
await WebVoiceProcessor.subscribe(engine);
if (abort.signal.aborted) {
await WebVoiceProcessor.unsubscribe(engine);
engineRef.current = null;
+ sendStop(sessionId);
store.setPendingStart(false);
+ abortRef.current = null;
return;
}
+ store.setActiveSessionId(sessionId);
store.setPendingStart(false);
- abortRef.current = null;
store.setRecording(true);
- sendJSONRef.current({ type: "start" });
+ store.setStopping(false);
store.clearPreview();
+ abortRef.current = null;
} catch (err) {
- useAppStore.getState().setPendingStart(false);
+ store.setPendingStart(false);
+ store.setRecording(false);
+ store.setStopping(false);
abortRef.current = null;
engineRef.current = null;
@@ -86,7 +131,7 @@ export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) {
toast.error(`麦克风错误: ${error.message}`);
}
}
- }, []);
+ }, [prewarm, requestStart, sendAudioFrame, sendStop]);
const stopRecording = useCallback(() => {
const store = useAppStore.getState();
@@ -100,15 +145,16 @@ export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) {
if (!store.recording) return;
store.setRecording(false);
+ store.setStopping(true);
+ audioSeqRef.current = 1;
if (engineRef.current) {
- // Fire-and-forget: state is already updated, cleanup is async
WebVoiceProcessor.unsubscribe(engineRef.current);
engineRef.current = null;
}
- sendJSONRef.current({ type: "stop" });
- }, []);
+ sendStop(store.activeSessionId);
+ }, [sendStop]);
- return { startRecording, stopRecording };
+ return { prewarm, startRecording, stopRecording };
}