diff --git a/web/src/App.tsx b/web/src/App.tsx index 066389f..8a55eb3 100644 --- a/web/src/App.tsx +++ b/web/src/App.tsx @@ -1,3 +1,4 @@ +import { useEffect } from "react"; import { Toaster } from "sonner"; import { HistoryList } from "./components/HistoryList"; import { MicButton } from "./components/MicButton"; @@ -5,13 +6,36 @@ import { PreviewBox } from "./components/PreviewBox"; import { StatusBadge } from "./components/StatusBadge"; import { useRecorder } from "./hooks/useRecorder"; import { useWebSocket } from "./hooks/useWebSocket"; +import { useAppStore } from "./stores/app-store"; export function App() { - const { sendJSON, sendBinary } = useWebSocket(); - const { startRecording, stopRecording } = useRecorder({ - sendJSON, - sendBinary, + const { requestStart, sendStop, sendPaste, sendAudioFrame } = useWebSocket(); + const { prewarm, startRecording, stopRecording } = useRecorder({ + requestStart, + sendStop, + sendAudioFrame, }); + const micReady = useAppStore((s) => s.micReady); + + useEffect(() => { + const forceStopOnBackground = () => { + const state = useAppStore.getState(); + if (state.recording || state.pendingStart) { + stopRecording(); + } + }; + + const onVisibility = () => { + if (document.hidden) forceStopOnBackground(); + }; + + document.addEventListener("visibilitychange", onVisibility); + window.addEventListener("pagehide", forceStopOnBackground); + return () => { + document.removeEventListener("visibilitychange", onVisibility); + window.removeEventListener("pagehide", forceStopOnBackground); + }; + }, [stopRecording]); return ( <> @@ -24,8 +48,17 @@ export function App() { + {!micReady ? ( + + ) : null} - + diff --git a/web/src/hooks/useRecorder.ts b/web/src/hooks/useRecorder.ts index dc32767..a7c1bd4 100644 --- a/web/src/hooks/useRecorder.ts +++ b/web/src/hooks/useRecorder.ts @@ -3,71 +3,116 @@ import { useCallback, useRef } from "react"; import { toast } from "sonner"; import { useAppStore } from "../stores/app-store"; -/** - * ~200ms frames at 16kHz = 3200 samples. - * Doubao bigmodel_async recommends 200ms packets for optimal performance. - */ const FRAME_LENGTH = 3200; interface UseRecorderOptions { - sendJSON: (obj: Record) => void; - sendBinary: (data: Int16Array) => void; + requestStart: () => Promise; + sendStop: (sessionId: string | null) => void; + sendAudioFrame: (sessionId: string, seq: number, data: Int16Array) => boolean; } -export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) { +let optionsInitialized = false; + +export function useRecorder({ + requestStart, + sendStop, + sendAudioFrame, +}: UseRecorderOptions) { const abortRef = useRef(null); const engineRef = useRef<{ onmessage: (e: MessageEvent) => void } | null>( null, ); + const warmedRef = useRef(false); + const audioSeqRef = useRef(1); - // Keep stable refs so callbacks never go stale - const sendJSONRef = useRef(sendJSON); - const sendBinaryRef = useRef(sendBinary); - sendJSONRef.current = sendJSON; - sendBinaryRef.current = sendBinary; + const initOptions = useCallback(() => { + if (optionsInitialized) return; + WebVoiceProcessor.setOptions({ + frameLength: FRAME_LENGTH, + outputSampleRate: 16000, + }); + optionsInitialized = true; + }, []); + + const prewarm = useCallback(async (): Promise => { + if (warmedRef.current) return true; + initOptions(); + + const warmEngine = { + onmessage: () => {}, + }; + + try { + await WebVoiceProcessor.subscribe(warmEngine); + await WebVoiceProcessor.unsubscribe(warmEngine); + warmedRef.current = true; + useAppStore.getState().setMicReady(true); + return true; + } catch (err) { + const error = err as Error; + toast.error(`麦克风准备失败: ${error.message}`); + return false; + } + }, [initOptions]); const startRecording = useCallback(async () => { const store = useAppStore.getState(); - if (store.recording || store.pendingStart) return; + if (store.recording || store.pendingStart || store.stopping) return; store.setPendingStart(true); + store.setStopping(false); + const abort = new AbortController(); abortRef.current = abort; try { - // Create an engine that receives Int16Array @ 16kHz from WebVoiceProcessor + const warmed = await prewarm(); + if (!warmed) { + store.setPendingStart(false); + abortRef.current = null; + return; + } + + const sessionId = await requestStart(); + if (!sessionId) { + store.setPendingStart(false); + abortRef.current = null; + toast.error("启动会话失败,请重试"); + return; + } + const engine = { onmessage: (e: MessageEvent) => { - if (e.data.command === "process") { - sendBinaryRef.current(e.data.inputFrame as Int16Array); - } + if (e.data.command !== "process") return; + const seq = audioSeqRef.current; + audioSeqRef.current += 1; + sendAudioFrame(sessionId, seq, e.data.inputFrame as Int16Array); }, }; engineRef.current = engine; + audioSeqRef.current = 1; - WebVoiceProcessor.setOptions({ - frameLength: FRAME_LENGTH, - outputSampleRate: 16000, - }); - - // subscribe() handles getUserMedia + AudioContext lifecycle internally. - // It checks for closed/suspended AudioContext and re-creates as needed. await WebVoiceProcessor.subscribe(engine); if (abort.signal.aborted) { await WebVoiceProcessor.unsubscribe(engine); engineRef.current = null; + sendStop(sessionId); store.setPendingStart(false); + abortRef.current = null; return; } + store.setActiveSessionId(sessionId); store.setPendingStart(false); - abortRef.current = null; store.setRecording(true); - sendJSONRef.current({ type: "start" }); + store.setStopping(false); store.clearPreview(); + abortRef.current = null; } catch (err) { - useAppStore.getState().setPendingStart(false); + store.setPendingStart(false); + store.setRecording(false); + store.setStopping(false); abortRef.current = null; engineRef.current = null; @@ -86,7 +131,7 @@ export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) { toast.error(`麦克风错误: ${error.message}`); } } - }, []); + }, [prewarm, requestStart, sendAudioFrame, sendStop]); const stopRecording = useCallback(() => { const store = useAppStore.getState(); @@ -100,15 +145,16 @@ export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) { if (!store.recording) return; store.setRecording(false); + store.setStopping(true); + audioSeqRef.current = 1; if (engineRef.current) { - // Fire-and-forget: state is already updated, cleanup is async WebVoiceProcessor.unsubscribe(engineRef.current); engineRef.current = null; } - sendJSONRef.current({ type: "stop" }); - }, []); + sendStop(store.activeSessionId); + }, [sendStop]); - return { startRecording, stopRecording }; + return { prewarm, startRecording, stopRecording }; }