Compare commits
6 Commits
669bfac722
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 393e26e8de | |||
| b309dca688 | |||
| 5a817e6646 | |||
| f78c022f75 | |||
| 7cf48246f2 | |||
| df2cf549c9 |
2
go.mod
2
go.mod
@@ -54,6 +54,8 @@ require (
|
||||
github.com/vcaesar/keycode v0.10.1 // indirect
|
||||
github.com/vcaesar/screenshot v0.11.1 // indirect
|
||||
github.com/vcaesar/tt v0.20.1 // indirect
|
||||
github.com/vmihailenco/msgpack/v5 v5.4.1 // indirect
|
||||
github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect
|
||||
github.com/yusufpapurcu/wmi v1.2.4 // indirect
|
||||
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
||||
golang.org/x/crypto v0.48.0 // indirect
|
||||
|
||||
4
go.sum
4
go.sum
@@ -120,6 +120,10 @@ github.com/vcaesar/screenshot v0.11.1 h1:GgPuN89XC4Yh38dLx4quPlSo3YiWWhwIria/j3L
|
||||
github.com/vcaesar/screenshot v0.11.1/go.mod h1:gJNwHBiP1v1v7i8TQ4yV1XJtcyn2I/OJL7OziVQkwjs=
|
||||
github.com/vcaesar/tt v0.20.1 h1:D/jUeeVCNbq3ad8M7hhtB3J9x5RZ6I1n1eZ0BJp7M+4=
|
||||
github.com/vcaesar/tt v0.20.1/go.mod h1:cH2+AwGAJm19Wa6xvEa+0r+sXDJBT0QgNQey6mwqLeU=
|
||||
github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IUPn0Bjt8=
|
||||
github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok=
|
||||
github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g=
|
||||
github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds=
|
||||
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
|
||||
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
|
||||
github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
|
||||
|
||||
@@ -2,40 +2,46 @@ package ws
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/gofiber/contrib/v3/websocket"
|
||||
"github.com/gofiber/fiber/v3"
|
||||
"github.com/google/uuid"
|
||||
"github.com/vmihailenco/msgpack/v5"
|
||||
)
|
||||
|
||||
// session holds the state for a single WebSocket connection.
|
||||
const wsReadTimeout = 75 * time.Second
|
||||
|
||||
type session struct {
|
||||
conn *websocket.Conn
|
||||
log *slog.Logger
|
||||
resultCh chan ServerMsg
|
||||
|
||||
stateMu sync.Mutex
|
||||
writeMu sync.Mutex
|
||||
previewMu sync.Mutex
|
||||
state string
|
||||
sessionID string
|
||||
seq int64
|
||||
lastAudioSeq uint64
|
||||
previewText string
|
||||
sendAudio func([]byte)
|
||||
cleanup func()
|
||||
active bool
|
||||
}
|
||||
// PasteFunc is called when the server should paste text into the focused app.
|
||||
|
||||
type PasteFunc func(text string) error
|
||||
|
||||
// ASRFactory creates an ASR session. It returns a channel that receives
|
||||
// partial/final results, and a function to send audio frames.
|
||||
// The cleanup function must be called when the session ends.
|
||||
type ASRFactory func(resultCh chan<- ServerMsg) (sendAudio func(pcm []byte), cleanup func(), err error)
|
||||
|
||||
// Handler holds dependencies for WebSocket connections.
|
||||
type Handler struct {
|
||||
token string
|
||||
pasteFunc PasteFunc
|
||||
asrFactory ASRFactory
|
||||
}
|
||||
|
||||
// NewHandler creates a WS handler with the given dependencies.
|
||||
func NewHandler(token string, pasteFn PasteFunc, asrFn ASRFactory) *Handler {
|
||||
return &Handler{
|
||||
token: token,
|
||||
@@ -44,9 +50,7 @@ func NewHandler(token string, pasteFn PasteFunc, asrFn ASRFactory) *Handler {
|
||||
}
|
||||
}
|
||||
|
||||
// Register adds the /ws route to the Fiber app.
|
||||
func (h *Handler) Register(app *fiber.App) {
|
||||
// Token check middleware (before upgrade)
|
||||
app.Use("/ws", func(c fiber.Ctx) error {
|
||||
if !websocket.IsWebSocketUpgrade(c) {
|
||||
return fiber.ErrUpgradeRequired
|
||||
@@ -67,127 +71,332 @@ func (h *Handler) handleConn(c *websocket.Conn) {
|
||||
sess := &session{
|
||||
conn: c,
|
||||
log: slog.With("remote", c.RemoteAddr().String()),
|
||||
resultCh: make(chan ServerMsg, 32),
|
||||
resultCh: make(chan ServerMsg, 64),
|
||||
state: StateIdle,
|
||||
sessionID: "",
|
||||
seq: 1,
|
||||
lastAudioSeq: 0,
|
||||
}
|
||||
|
||||
sess.log.Info("ws connected")
|
||||
defer sess.log.Info("ws disconnected")
|
||||
defer close(sess.resultCh)
|
||||
defer sess.cleanupASR()
|
||||
defer sess.forceStop()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go sess.writerLoop(&wg)
|
||||
defer wg.Wait()
|
||||
|
||||
_ = sess.writeJSON(ServerMsg{Type: MsgReady, Message: "ok"})
|
||||
_ = sess.writeJSON(ServerMsg{Type: MsgState, State: StateIdle})
|
||||
|
||||
for {
|
||||
_ = c.SetReadDeadline(time.Now().Add(wsReadTimeout))
|
||||
mt, data, err := c.ReadMessage()
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
if mt == websocket.BinaryMessage {
|
||||
switch mt {
|
||||
case websocket.BinaryMessage:
|
||||
sess.handleAudioFrame(data)
|
||||
} else if mt == websocket.TextMessage {
|
||||
case websocket.TextMessage:
|
||||
h.handleTextMessage(sess, data)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *session) writerLoop(wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
for msg := range s.resultCh {
|
||||
if msg.Type == MsgPartial || msg.Type == MsgFinal {
|
||||
s.previewMu.Lock()
|
||||
s.previewText = msg.Text
|
||||
preview := ServerMsg{Type: msg.Type, Text: s.previewText}
|
||||
s.previewMu.Unlock()
|
||||
if err := s.conn.WriteMessage(websocket.TextMessage, preview.Bytes()); err != nil {
|
||||
s.log.Warn("ws write error", "err", err)
|
||||
return
|
||||
|
||||
s.stateMu.Lock()
|
||||
msg.SessionID = s.sessionID
|
||||
msg.Seq = s.seq
|
||||
s.seq++
|
||||
s.stateMu.Unlock()
|
||||
}
|
||||
continue
|
||||
}
|
||||
if err := s.conn.WriteMessage(websocket.TextMessage, msg.Bytes()); err != nil {
|
||||
|
||||
if err := s.writeJSON(msg); err != nil {
|
||||
s.log.Warn("ws write error", "err", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *session) writeJSON(msg ServerMsg) error {
|
||||
s.writeMu.Lock()
|
||||
defer s.writeMu.Unlock()
|
||||
return s.conn.WriteMessage(websocket.TextMessage, msg.Bytes())
|
||||
}
|
||||
|
||||
func (s *session) handleAudioFrame(data []byte) {
|
||||
if s.active && s.sendAudio != nil {
|
||||
s.sendAudio(data)
|
||||
packet, err := decodeAudioPacket(data)
|
||||
if err != nil {
|
||||
s.log.Warn("invalid audio packet", "err", err)
|
||||
return
|
||||
}
|
||||
|
||||
s.stateMu.Lock()
|
||||
if s.state != StateRecording || s.sendAudio == nil {
|
||||
s.stateMu.Unlock()
|
||||
return
|
||||
}
|
||||
if packet.SessionID != s.sessionID {
|
||||
s.stateMu.Unlock()
|
||||
return
|
||||
}
|
||||
if packet.Seq <= s.lastAudioSeq {
|
||||
s.stateMu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
lastSeq := s.lastAudioSeq
|
||||
if lastSeq > 0 && packet.Seq > lastSeq+1 {
|
||||
s.log.Warn(
|
||||
"audio seq gap",
|
||||
"session_id",
|
||||
s.sessionID,
|
||||
"last",
|
||||
lastSeq,
|
||||
"curr",
|
||||
packet.Seq,
|
||||
)
|
||||
}
|
||||
s.lastAudioSeq = packet.Seq
|
||||
sendAudio := s.sendAudio
|
||||
s.stateMu.Unlock()
|
||||
|
||||
sendAudio(packet.PCM)
|
||||
}
|
||||
|
||||
type audioPacket struct {
|
||||
Version int `msgpack:"v"`
|
||||
SessionID string `msgpack:"sessionId"`
|
||||
Seq uint64 `msgpack:"seq"`
|
||||
PCM []byte `msgpack:"pcm"`
|
||||
}
|
||||
|
||||
func decodeAudioPacket(data []byte) (audioPacket, error) {
|
||||
var packet audioPacket
|
||||
if err := msgpack.Unmarshal(data, &packet); err != nil {
|
||||
return audioPacket{}, fmt.Errorf("msgpack decode failed: %w", err)
|
||||
}
|
||||
if packet.Version != 1 {
|
||||
return audioPacket{}, fmt.Errorf("unsupported version: %d", packet.Version)
|
||||
}
|
||||
if packet.SessionID == "" || len(packet.SessionID) > 96 {
|
||||
return audioPacket{}, fmt.Errorf("invalid session id")
|
||||
}
|
||||
if packet.Seq == 0 {
|
||||
return audioPacket{}, fmt.Errorf("invalid seq")
|
||||
}
|
||||
if len(packet.PCM) == 0 || len(packet.PCM)%2 != 0 {
|
||||
return audioPacket{}, fmt.Errorf("invalid pcm size")
|
||||
}
|
||||
|
||||
return packet, nil
|
||||
}
|
||||
|
||||
func (h *Handler) handleTextMessage(s *session, data []byte) {
|
||||
var msg ClientMsg
|
||||
if err := json.Unmarshal(data, &msg); err != nil {
|
||||
s.log.Warn("invalid json", "err", err)
|
||||
return
|
||||
}
|
||||
|
||||
switch msg.Type {
|
||||
case MsgHello:
|
||||
_ = s.writeJSON(ServerMsg{Type: MsgReady, Message: "ok"})
|
||||
s.stateMu.Lock()
|
||||
state := s.state
|
||||
sid := s.sessionID
|
||||
s.stateMu.Unlock()
|
||||
_ = s.writeJSON(ServerMsg{Type: MsgState, State: state, SessionID: sid})
|
||||
case MsgPing:
|
||||
_ = s.writeJSON(ServerMsg{Type: MsgPong, TS: msg.TS})
|
||||
case MsgStart:
|
||||
h.handleStart(s)
|
||||
h.handleStart(s, msg)
|
||||
case MsgStop:
|
||||
h.handleStop(s)
|
||||
h.handleStop(s, msg)
|
||||
case MsgPaste:
|
||||
h.handlePaste(s, msg.Text)
|
||||
h.handlePaste(s, msg.Text, msg.SessionID)
|
||||
default:
|
||||
h.sendError(s, "bad_message", "unsupported message type", true, "")
|
||||
}
|
||||
}
|
||||
func (h *Handler) handleStart(s *session) {
|
||||
if s.active {
|
||||
|
||||
func (h *Handler) handleStart(s *session, msg ClientMsg) {
|
||||
if msg.SessionID == "" {
|
||||
msg.SessionID = uuid.NewString()
|
||||
}
|
||||
if len(msg.SessionID) > 96 {
|
||||
h.sendError(s, "invalid_session", "invalid sessionId", false, "")
|
||||
return
|
||||
}
|
||||
// Future extension: support runtime dynamic hotwords (Phase 2)
|
||||
// if len(msg.Hotwords) > 0 {
|
||||
// // Priority: runtime hotwords > config.yaml hotwords
|
||||
// // Need to modify asrFactory signature to pass msg.Hotwords
|
||||
// }
|
||||
|
||||
s.stateMu.Lock()
|
||||
if s.state != StateIdle {
|
||||
current := s.sessionID
|
||||
s.stateMu.Unlock()
|
||||
h.sendError(s, "busy", "session is not idle", true, current)
|
||||
return
|
||||
}
|
||||
s.state = StateRecording
|
||||
s.sessionID = msg.SessionID
|
||||
s.seq = 1
|
||||
s.lastAudioSeq = 0
|
||||
s.sendAudio = nil
|
||||
s.cleanup = nil
|
||||
s.stateMu.Unlock()
|
||||
|
||||
s.previewMu.Lock()
|
||||
s.previewText = ""
|
||||
s.previewMu.Unlock()
|
||||
|
||||
sa, cl, err := h.asrFactory(s.resultCh)
|
||||
if err != nil {
|
||||
s.log.Error("asr start failed", "err", err)
|
||||
s.resultCh <- ServerMsg{Type: MsgError, Message: "ASR start failed"}
|
||||
s.stateMu.Lock()
|
||||
s.state = StateIdle
|
||||
s.sessionID = ""
|
||||
s.sendAudio = nil
|
||||
s.cleanup = nil
|
||||
s.stateMu.Unlock()
|
||||
h.sendError(s, "start_failed", "ASR start failed", true, "")
|
||||
_ = s.writeJSON(ServerMsg{Type: MsgState, State: StateIdle})
|
||||
return
|
||||
}
|
||||
|
||||
s.stateMu.Lock()
|
||||
if s.sessionID != msg.SessionID || s.state != StateRecording {
|
||||
s.stateMu.Unlock()
|
||||
cl()
|
||||
return
|
||||
}
|
||||
s.sendAudio = sa
|
||||
s.cleanup = cl
|
||||
s.active = true
|
||||
s.log.Info("recording started")
|
||||
s.stateMu.Unlock()
|
||||
|
||||
_ = s.writeJSON(ServerMsg{Type: MsgStartAck, SessionID: msg.SessionID})
|
||||
_ = s.writeJSON(ServerMsg{Type: MsgState, State: StateRecording, SessionID: msg.SessionID})
|
||||
s.log.Info("recording started", "session_id", msg.SessionID)
|
||||
}
|
||||
func (h *Handler) handleStop(s *session) {
|
||||
if !s.active {
|
||||
|
||||
func (h *Handler) handleStop(s *session, msg ClientMsg) {
|
||||
s.stateMu.Lock()
|
||||
if s.state == StateIdle {
|
||||
s.stateMu.Unlock()
|
||||
return
|
||||
}
|
||||
s.cleanupASR()
|
||||
s.sendAudio = nil
|
||||
s.active = false
|
||||
if s.state == StateStopping {
|
||||
sid := s.sessionID
|
||||
s.stateMu.Unlock()
|
||||
_ = s.writeJSON(ServerMsg{Type: MsgStopAck, SessionID: sid})
|
||||
return
|
||||
}
|
||||
|
||||
if msg.SessionID != "" && msg.SessionID != s.sessionID {
|
||||
current := s.sessionID
|
||||
s.stateMu.Unlock()
|
||||
h.sendError(s, "session_mismatch", "stop sessionId mismatch", false, current)
|
||||
return
|
||||
}
|
||||
|
||||
s.state = StateStopping
|
||||
sid := s.sessionID
|
||||
s.stateMu.Unlock()
|
||||
|
||||
_ = s.writeJSON(ServerMsg{Type: MsgStopAck, SessionID: sid})
|
||||
_ = s.writeJSON(ServerMsg{Type: MsgState, State: StateStopping, SessionID: sid})
|
||||
|
||||
go h.finalizeStop(s, sid)
|
||||
}
|
||||
|
||||
func (h *Handler) finalizeStop(s *session, sid string) {
|
||||
cleanup := s.detachCleanup(sid)
|
||||
if cleanup != nil {
|
||||
cleanup()
|
||||
}
|
||||
|
||||
s.previewMu.Lock()
|
||||
finalText := s.previewText
|
||||
s.previewText = ""
|
||||
s.previewMu.Unlock()
|
||||
|
||||
if finalText != "" && h.pasteFunc != nil {
|
||||
if err := h.pasteFunc(finalText); err != nil {
|
||||
s.log.Error("auto-paste failed", "err", err)
|
||||
h.sendError(s, "paste_failed", "auto-paste failed", true, sid)
|
||||
} else {
|
||||
s.resultCh <- ServerMsg{Type: MsgPasted}
|
||||
_ = s.writeJSON(ServerMsg{Type: MsgPasted, SessionID: sid})
|
||||
}
|
||||
}
|
||||
s.log.Info("recording stopped")
|
||||
|
||||
s.stateMu.Lock()
|
||||
if s.sessionID == sid {
|
||||
s.state = StateIdle
|
||||
s.sessionID = ""
|
||||
s.seq = 1
|
||||
s.lastAudioSeq = 0
|
||||
s.sendAudio = nil
|
||||
s.cleanup = nil
|
||||
}
|
||||
func (h *Handler) handlePaste(s *session, text string) {
|
||||
s.stateMu.Unlock()
|
||||
|
||||
_ = s.writeJSON(ServerMsg{Type: MsgState, State: StateIdle})
|
||||
s.log.Info("recording stopped", "session_id", sid)
|
||||
}
|
||||
|
||||
func (h *Handler) handlePaste(s *session, text, sid string) {
|
||||
if text == "" {
|
||||
return
|
||||
}
|
||||
if h.pasteFunc != nil {
|
||||
if err := h.pasteFunc(text); err != nil {
|
||||
s.log.Error("paste failed", "err", err)
|
||||
s.resultCh <- ServerMsg{Type: MsgError, Message: "paste failed"}
|
||||
h.sendError(s, "paste_failed", "paste failed", true, sid)
|
||||
} else {
|
||||
s.resultCh <- ServerMsg{Type: MsgPasted}
|
||||
_ = s.writeJSON(ServerMsg{Type: MsgPasted, SessionID: sid})
|
||||
}
|
||||
}
|
||||
}
|
||||
func (s *session) cleanupASR() {
|
||||
if s.cleanup != nil {
|
||||
s.cleanup()
|
||||
|
||||
func (h *Handler) sendError(s *session, code, message string, retryable bool, sid string) {
|
||||
err := s.writeJSON(ServerMsg{
|
||||
Type: MsgError,
|
||||
Code: code,
|
||||
Message: message,
|
||||
Retryable: retryable,
|
||||
SessionID: sid,
|
||||
})
|
||||
if err != nil {
|
||||
s.log.Warn("send error message failed", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *session) detachCleanup(sid string) func() {
|
||||
s.stateMu.Lock()
|
||||
defer s.stateMu.Unlock()
|
||||
if sid != "" && s.sessionID != sid {
|
||||
return nil
|
||||
}
|
||||
cleanup := s.cleanup
|
||||
s.cleanup = nil
|
||||
s.sendAudio = nil
|
||||
return cleanup
|
||||
}
|
||||
|
||||
func (s *session) forceStop() {
|
||||
cleanup := s.detachCleanup("")
|
||||
if cleanup != nil {
|
||||
cleanup()
|
||||
}
|
||||
s.stateMu.Lock()
|
||||
s.state = StateIdle
|
||||
s.sessionID = ""
|
||||
s.seq = 1
|
||||
s.lastAudioSeq = 0
|
||||
s.stateMu.Unlock()
|
||||
}
|
||||
@@ -8,15 +8,22 @@ import "encoding/json"
|
||||
type MsgType string
|
||||
|
||||
const (
|
||||
MsgStart MsgType = "start" // Begin recording session
|
||||
MsgStop MsgType = "stop" // End recording session
|
||||
MsgPaste MsgType = "paste" // Re-paste a history item
|
||||
MsgHello MsgType = "hello"
|
||||
MsgStart MsgType = "start"
|
||||
MsgStop MsgType = "stop"
|
||||
MsgPaste MsgType = "paste"
|
||||
MsgPing MsgType = "ping"
|
||||
MsgPong MsgType = "pong"
|
||||
)
|
||||
|
||||
// ClientMsg is a JSON control message from the phone.
|
||||
type ClientMsg struct {
|
||||
Type MsgType `json:"type"`
|
||||
SessionID string `json:"sessionId,omitempty"`
|
||||
Seq int64 `json:"seq,omitempty"`
|
||||
Text string `json:"text,omitempty"` // Only for "paste"
|
||||
Version int `json:"version,omitempty"`
|
||||
TS int64 `json:"ts,omitempty"`
|
||||
// Future extension: dynamic hotwords (Phase 2)
|
||||
// Hotwords []string `json:"hotwords,omitempty"`
|
||||
}
|
||||
@@ -24,17 +31,33 @@ type ClientMsg struct {
|
||||
// ── Server → Client messages ──
|
||||
|
||||
const (
|
||||
MsgPartial MsgType = "partial" // Interim ASR result
|
||||
MsgFinal MsgType = "final" // Final ASR result
|
||||
MsgPasted MsgType = "pasted" // Paste confirmed
|
||||
MsgError MsgType = "error" // Error notification
|
||||
MsgReady MsgType = "ready"
|
||||
MsgState MsgType = "state"
|
||||
MsgStartAck MsgType = "start_ack"
|
||||
MsgStopAck MsgType = "stop_ack"
|
||||
MsgPartial MsgType = "partial"
|
||||
MsgFinal MsgType = "final"
|
||||
MsgPasted MsgType = "pasted"
|
||||
MsgError MsgType = "error"
|
||||
)
|
||||
|
||||
const (
|
||||
StateIdle = "idle"
|
||||
StateRecording = "recording"
|
||||
StateStopping = "stopping"
|
||||
)
|
||||
|
||||
// ServerMsg is a JSON message sent to the phone.
|
||||
type ServerMsg struct {
|
||||
Type MsgType `json:"type"`
|
||||
State string `json:"state,omitempty"`
|
||||
SessionID string `json:"sessionId,omitempty"`
|
||||
Seq int64 `json:"seq,omitempty"`
|
||||
Text string `json:"text,omitempty"`
|
||||
Message string `json:"message,omitempty"` // For errors
|
||||
Code string `json:"code,omitempty"`
|
||||
Retryable bool `json:"retryable,omitempty"`
|
||||
TS int64 `json:"ts,omitempty"`
|
||||
}
|
||||
|
||||
func (m ServerMsg) Bytes() []byte {
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
"": {
|
||||
"name": "web",
|
||||
"dependencies": {
|
||||
"@msgpack/msgpack": "^3.1.3",
|
||||
"@picovoice/web-voice-processor": "^4.0.9",
|
||||
"partysocket": "^1.1.16",
|
||||
"react": "^19.2.4",
|
||||
@@ -297,6 +298,8 @@
|
||||
|
||||
"@jridgewell/trace-mapping": ["@jridgewell/trace-mapping@0.3.31", "", { "dependencies": { "@jridgewell/resolve-uri": "^3.1.0", "@jridgewell/sourcemap-codec": "^1.4.14" } }, "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw=="],
|
||||
|
||||
"@msgpack/msgpack": ["@msgpack/msgpack@3.1.3", "", {}, "sha512-47XIizs9XZXvuJgoaJUIE2lFoID8ugvc0jzSHP+Ptfk8nTbnR8g788wv48N03Kx0UkAv559HWRQ3yzOgzlRNUA=="],
|
||||
|
||||
"@picovoice/web-utils": ["@picovoice/web-utils@1.3.1", "", { "dependencies": { "commander": "^9.2.0" }, "bin": { "pvbase64": "scripts/base64.js" } }, "sha512-jcDqdULtTm+yJrnHDjg64hARup+Z4wNkYuXHNx6EM8+qZkweBq9UA6XJrHAlUkPnlkso4JWjaIKhz3x8vZcd3g=="],
|
||||
|
||||
"@picovoice/web-voice-processor": ["@picovoice/web-voice-processor@4.0.9", "", { "dependencies": { "@picovoice/web-utils": "=1.3.1" } }, "sha512-20pdkFjtuiojAdLIkNHXt4YgpRnlUePFW+gfkeCb+J+2XTRDGOI50+aJzL95p6QjDzGXsO7PZhlz7yDofOvZtg=="],
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
"vite-plugin-pwa": "^1.2.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@msgpack/msgpack": "^3.1.3",
|
||||
"@picovoice/web-voice-processor": "^4.0.9",
|
||||
"partysocket": "^1.1.16",
|
||||
"react": "^19.2.4",
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { useEffect } from "react";
|
||||
import { Toaster } from "sonner";
|
||||
import { HistoryList } from "./components/HistoryList";
|
||||
import { MicButton } from "./components/MicButton";
|
||||
@@ -5,13 +6,36 @@ import { PreviewBox } from "./components/PreviewBox";
|
||||
import { StatusBadge } from "./components/StatusBadge";
|
||||
import { useRecorder } from "./hooks/useRecorder";
|
||||
import { useWebSocket } from "./hooks/useWebSocket";
|
||||
import { useAppStore } from "./stores/app-store";
|
||||
|
||||
export function App() {
|
||||
const { sendJSON, sendBinary } = useWebSocket();
|
||||
const { startRecording, stopRecording } = useRecorder({
|
||||
sendJSON,
|
||||
sendBinary,
|
||||
const { requestStart, sendStop, sendPaste, sendAudioFrame } = useWebSocket();
|
||||
const { prewarm, startRecording, stopRecording } = useRecorder({
|
||||
requestStart,
|
||||
sendStop,
|
||||
sendAudioFrame,
|
||||
});
|
||||
const micReady = useAppStore((s) => s.micReady);
|
||||
|
||||
useEffect(() => {
|
||||
const forceStopOnBackground = () => {
|
||||
const state = useAppStore.getState();
|
||||
if (state.recording || state.pendingStart) {
|
||||
stopRecording();
|
||||
}
|
||||
};
|
||||
|
||||
const onVisibility = () => {
|
||||
if (document.hidden) forceStopOnBackground();
|
||||
};
|
||||
|
||||
document.addEventListener("visibilitychange", onVisibility);
|
||||
window.addEventListener("pagehide", forceStopOnBackground);
|
||||
return () => {
|
||||
document.removeEventListener("visibilitychange", onVisibility);
|
||||
window.removeEventListener("pagehide", forceStopOnBackground);
|
||||
};
|
||||
}, [stopRecording]);
|
||||
|
||||
return (
|
||||
<>
|
||||
@@ -24,8 +48,17 @@ export function App() {
|
||||
</header>
|
||||
|
||||
<PreviewBox />
|
||||
{!micReady ? (
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => void prewarm()}
|
||||
className="mb-3 rounded-xl border border-edge bg-surface px-4 py-3 font-medium text-fg text-sm"
|
||||
>
|
||||
{"先点我准备麦克风(首次会弹权限)"}
|
||||
</button>
|
||||
) : null}
|
||||
<MicButton onStart={startRecording} onStop={stopRecording} />
|
||||
<HistoryList sendJSON={sendJSON} />
|
||||
<HistoryList sendPaste={sendPaste} />
|
||||
</div>
|
||||
<Toaster theme="dark" position="bottom-center" />
|
||||
</>
|
||||
|
||||
@@ -8,19 +8,19 @@ function formatTime(ts: number): string {
|
||||
}
|
||||
|
||||
interface HistoryListProps {
|
||||
sendJSON: (obj: Record<string, unknown>) => void;
|
||||
sendPaste: (text: string) => void;
|
||||
}
|
||||
|
||||
export function HistoryList({ sendJSON }: HistoryListProps) {
|
||||
export function HistoryList({ sendPaste }: HistoryListProps) {
|
||||
const history = useAppStore((s) => s.history);
|
||||
const clearHistory = useAppStore((s) => s.clearHistory);
|
||||
|
||||
const handleItemClick = useCallback(
|
||||
(text: string) => {
|
||||
sendJSON({ type: "paste", text });
|
||||
sendPaste(text);
|
||||
toast.info("发送粘贴…");
|
||||
},
|
||||
[sendJSON],
|
||||
[sendPaste],
|
||||
);
|
||||
|
||||
return (
|
||||
|
||||
@@ -8,9 +8,13 @@ interface MicButtonProps {
|
||||
|
||||
export function MicButton({ onStart, onStop }: MicButtonProps) {
|
||||
const connected = useAppStore((s) => s.connectionStatus === "connected");
|
||||
const micReady = useAppStore((s) => s.micReady);
|
||||
const recording = useAppStore((s) => s.recording);
|
||||
const pendingStart = useAppStore((s) => s.pendingStart);
|
||||
const isActive = recording || pendingStart;
|
||||
const stopping = useAppStore((s) => s.stopping);
|
||||
const weakNetwork = useAppStore((s) => s.weakNetwork);
|
||||
const isActive = recording || pendingStart || stopping;
|
||||
const disabled = !connected || !micReady || stopping;
|
||||
|
||||
const handlePointerDown = useCallback(
|
||||
(e: React.PointerEvent<HTMLButtonElement>) => {
|
||||
@@ -46,6 +50,8 @@ export function MicButton({ onStart, onStop }: MicButtonProps) {
|
||||
"cursor-not-allowed border-edge bg-linear-to-br from-surface-hover to-surface text-fg-secondary opacity-30 shadow-[0_2px_12px_rgba(0,0,0,0.3),inset_0_1px_0_rgba(255,255,255,0.04)]";
|
||||
const activeClasses =
|
||||
"animate-mic-breathe scale-[1.06] border-accent-hover bg-accent text-white shadow-[0_0_32px_rgba(99,102,241,0.35),0_0_80px_rgba(99,102,241,0.2)]";
|
||||
const weakClasses =
|
||||
"border-amber-400 bg-linear-to-br from-amber-400/15 to-surface text-amber-200 shadow-[0_0_22px_rgba(251,191,36,0.28)]";
|
||||
const idleClasses =
|
||||
"border-edge bg-linear-to-br from-surface-hover to-surface text-fg-secondary shadow-[0_2px_12px_rgba(0,0,0,0.3),inset_0_1px_0_rgba(255,255,255,0.04)]";
|
||||
|
||||
@@ -54,10 +60,12 @@ export function MicButton({ onStart, onStop }: MicButtonProps) {
|
||||
<div className="relative flex touch-none items-center justify-center">
|
||||
<button
|
||||
type="button"
|
||||
disabled={!connected}
|
||||
disabled={disabled}
|
||||
className={`relative z-1 flex size-24 cursor-pointer touch-none select-none items-center justify-center rounded-full border-2 transition-all duration-[250ms] ease-[cubic-bezier(0.4,0,0.2,1)] ${
|
||||
!connected
|
||||
disabled
|
||||
? disabledClasses
|
||||
: weakNetwork
|
||||
? weakClasses
|
||||
: isActive
|
||||
? activeClasses
|
||||
: idleClasses
|
||||
@@ -98,7 +106,17 @@ export function MicButton({ onStart, onStop }: MicButtonProps) {
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
<p className="font-medium text-fg-dim text-sm">{"按住说话"}</p>
|
||||
<p className="font-medium text-fg-dim text-sm">
|
||||
{!micReady
|
||||
? "请先准备麦克风"
|
||||
: !connected
|
||||
? "连接中断,等待重连"
|
||||
: stopping
|
||||
? "收尾中…"
|
||||
: weakNetwork
|
||||
? "网络波动,已启用缓冲"
|
||||
: "按住说话"}
|
||||
</p>
|
||||
</section>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -3,7 +3,11 @@ import { useAppStore } from "../stores/app-store";
|
||||
export function PreviewBox() {
|
||||
const text = useAppStore((s) => s.previewText);
|
||||
const active = useAppStore((s) => s.previewActive);
|
||||
const weakNetwork = useAppStore((s) => s.weakNetwork);
|
||||
const recording = useAppStore((s) => s.recording);
|
||||
const hasText = text.length > 0;
|
||||
const placeholder =
|
||||
weakNetwork && recording ? "网络波动中,音频缓冲后发送…" : "按住说话…";
|
||||
|
||||
return (
|
||||
<section className="shrink-0 pb-3">
|
||||
@@ -17,7 +21,7 @@ export function PreviewBox() {
|
||||
<p
|
||||
className={`break-words text-base leading-relaxed ${hasText ? "" : "text-fg-dim"}`}
|
||||
>
|
||||
{hasText ? text : "按住说话…"}
|
||||
{hasText ? text : placeholder}
|
||||
</p>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
@@ -20,7 +20,12 @@ const statusConfig = {
|
||||
|
||||
export function StatusBadge() {
|
||||
const status = useAppStore((s) => s.connectionStatus);
|
||||
const { text, dotClass, borderClass } = statusConfig[status];
|
||||
const weakNetwork = useAppStore((s) => s.weakNetwork);
|
||||
const { dotClass, borderClass } = statusConfig[status];
|
||||
const text =
|
||||
status === "connected" && weakNetwork
|
||||
? "网络波动"
|
||||
: statusConfig[status].text;
|
||||
|
||||
return (
|
||||
<div
|
||||
|
||||
@@ -3,71 +3,116 @@ import { useCallback, useRef } from "react";
|
||||
import { toast } from "sonner";
|
||||
import { useAppStore } from "../stores/app-store";
|
||||
|
||||
/**
|
||||
* ~200ms frames at 16kHz = 3200 samples.
|
||||
* Doubao bigmodel_async recommends 200ms packets for optimal performance.
|
||||
*/
|
||||
const FRAME_LENGTH = 3200;
|
||||
|
||||
interface UseRecorderOptions {
|
||||
sendJSON: (obj: Record<string, unknown>) => void;
|
||||
sendBinary: (data: Int16Array) => void;
|
||||
requestStart: () => Promise<string | null>;
|
||||
sendStop: (sessionId: string | null) => void;
|
||||
sendAudioFrame: (sessionId: string, seq: number, data: Int16Array) => boolean;
|
||||
}
|
||||
|
||||
export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) {
|
||||
let optionsInitialized = false;
|
||||
|
||||
export function useRecorder({
|
||||
requestStart,
|
||||
sendStop,
|
||||
sendAudioFrame,
|
||||
}: UseRecorderOptions) {
|
||||
const abortRef = useRef<AbortController | null>(null);
|
||||
const engineRef = useRef<{ onmessage: (e: MessageEvent) => void } | null>(
|
||||
null,
|
||||
);
|
||||
const warmedRef = useRef(false);
|
||||
const audioSeqRef = useRef(1);
|
||||
|
||||
// Keep stable refs so callbacks never go stale
|
||||
const sendJSONRef = useRef(sendJSON);
|
||||
const sendBinaryRef = useRef(sendBinary);
|
||||
sendJSONRef.current = sendJSON;
|
||||
sendBinaryRef.current = sendBinary;
|
||||
|
||||
const startRecording = useCallback(async () => {
|
||||
const store = useAppStore.getState();
|
||||
if (store.recording || store.pendingStart) return;
|
||||
|
||||
store.setPendingStart(true);
|
||||
const abort = new AbortController();
|
||||
abortRef.current = abort;
|
||||
|
||||
try {
|
||||
// Create an engine that receives Int16Array @ 16kHz from WebVoiceProcessor
|
||||
const engine = {
|
||||
onmessage: (e: MessageEvent) => {
|
||||
if (e.data.command === "process") {
|
||||
sendBinaryRef.current(e.data.inputFrame as Int16Array);
|
||||
}
|
||||
},
|
||||
};
|
||||
engineRef.current = engine;
|
||||
|
||||
const initOptions = useCallback(() => {
|
||||
if (optionsInitialized) return;
|
||||
WebVoiceProcessor.setOptions({
|
||||
frameLength: FRAME_LENGTH,
|
||||
outputSampleRate: 16000,
|
||||
});
|
||||
optionsInitialized = true;
|
||||
}, []);
|
||||
|
||||
const prewarm = useCallback(async (): Promise<boolean> => {
|
||||
if (warmedRef.current) return true;
|
||||
initOptions();
|
||||
|
||||
const warmEngine = {
|
||||
onmessage: () => {},
|
||||
};
|
||||
|
||||
try {
|
||||
await WebVoiceProcessor.subscribe(warmEngine);
|
||||
await WebVoiceProcessor.unsubscribe(warmEngine);
|
||||
warmedRef.current = true;
|
||||
useAppStore.getState().setMicReady(true);
|
||||
return true;
|
||||
} catch (err) {
|
||||
const error = err as Error;
|
||||
toast.error(`麦克风准备失败: ${error.message}`);
|
||||
return false;
|
||||
}
|
||||
}, [initOptions]);
|
||||
|
||||
const startRecording = useCallback(async () => {
|
||||
const store = useAppStore.getState();
|
||||
if (store.recording || store.pendingStart || store.stopping) return;
|
||||
|
||||
store.setPendingStart(true);
|
||||
store.setStopping(false);
|
||||
|
||||
const abort = new AbortController();
|
||||
abortRef.current = abort;
|
||||
|
||||
try {
|
||||
const warmed = await prewarm();
|
||||
if (!warmed) {
|
||||
store.setPendingStart(false);
|
||||
abortRef.current = null;
|
||||
return;
|
||||
}
|
||||
|
||||
const sessionId = await requestStart();
|
||||
if (!sessionId) {
|
||||
store.setPendingStart(false);
|
||||
abortRef.current = null;
|
||||
toast.error("启动会话失败,请重试");
|
||||
return;
|
||||
}
|
||||
|
||||
const engine = {
|
||||
onmessage: (e: MessageEvent) => {
|
||||
if (e.data.command !== "process") return;
|
||||
const seq = audioSeqRef.current;
|
||||
audioSeqRef.current += 1;
|
||||
sendAudioFrame(sessionId, seq, e.data.inputFrame as Int16Array);
|
||||
},
|
||||
};
|
||||
engineRef.current = engine;
|
||||
audioSeqRef.current = 1;
|
||||
|
||||
// subscribe() handles getUserMedia + AudioContext lifecycle internally.
|
||||
// It checks for closed/suspended AudioContext and re-creates as needed.
|
||||
await WebVoiceProcessor.subscribe(engine);
|
||||
|
||||
if (abort.signal.aborted) {
|
||||
await WebVoiceProcessor.unsubscribe(engine);
|
||||
engineRef.current = null;
|
||||
sendStop(sessionId);
|
||||
store.setPendingStart(false);
|
||||
abortRef.current = null;
|
||||
return;
|
||||
}
|
||||
|
||||
store.setActiveSessionId(sessionId);
|
||||
store.setPendingStart(false);
|
||||
abortRef.current = null;
|
||||
store.setRecording(true);
|
||||
sendJSONRef.current({ type: "start" });
|
||||
store.setStopping(false);
|
||||
store.clearPreview();
|
||||
abortRef.current = null;
|
||||
} catch (err) {
|
||||
useAppStore.getState().setPendingStart(false);
|
||||
store.setPendingStart(false);
|
||||
store.setRecording(false);
|
||||
store.setStopping(false);
|
||||
abortRef.current = null;
|
||||
engineRef.current = null;
|
||||
|
||||
@@ -86,7 +131,7 @@ export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) {
|
||||
toast.error(`麦克风错误: ${error.message}`);
|
||||
}
|
||||
}
|
||||
}, []);
|
||||
}, [prewarm, requestStart, sendAudioFrame, sendStop]);
|
||||
|
||||
const stopRecording = useCallback(() => {
|
||||
const store = useAppStore.getState();
|
||||
@@ -100,15 +145,16 @@ export function useRecorder({ sendJSON, sendBinary }: UseRecorderOptions) {
|
||||
|
||||
if (!store.recording) return;
|
||||
store.setRecording(false);
|
||||
store.setStopping(true);
|
||||
audioSeqRef.current = 1;
|
||||
|
||||
if (engineRef.current) {
|
||||
// Fire-and-forget: state is already updated, cleanup is async
|
||||
WebVoiceProcessor.unsubscribe(engineRef.current);
|
||||
engineRef.current = null;
|
||||
}
|
||||
|
||||
sendJSONRef.current({ type: "stop" });
|
||||
}, []);
|
||||
sendStop(store.activeSessionId);
|
||||
}, [sendStop]);
|
||||
|
||||
return { startRecording, stopRecording };
|
||||
return { prewarm, startRecording, stopRecording };
|
||||
}
|
||||
|
||||
@@ -1,8 +1,31 @@
|
||||
import { encode } from "@msgpack/msgpack";
|
||||
import { WebSocket as ReconnectingWebSocket } from "partysocket";
|
||||
import { useCallback, useEffect, useRef } from "react";
|
||||
import { toast } from "sonner";
|
||||
import type { ClientMsg, ServerMsg } from "../protocol";
|
||||
import { useAppStore } from "../stores/app-store";
|
||||
|
||||
const HEARTBEAT_INTERVAL_MS = 15000;
|
||||
const HEARTBEAT_TIMEOUT_MS = 10000;
|
||||
const START_TIMEOUT_MS = 5000;
|
||||
const AUDIO_PACKET_VERSION = 1;
|
||||
const MAX_AUDIO_QUEUE = 6;
|
||||
const WEAK_NETWORK_TOAST_INTERVAL_MS = 4000;
|
||||
const WS_BACKPRESSURE_BYTES = 128 * 1024;
|
||||
|
||||
interface QueuedAudioPacket {
|
||||
sessionId: string;
|
||||
seq: number;
|
||||
buffer: ArrayBuffer;
|
||||
}
|
||||
|
||||
interface AudioPacketPayload {
|
||||
v: number;
|
||||
sessionId: string;
|
||||
seq: number;
|
||||
pcm: Uint8Array;
|
||||
}
|
||||
|
||||
function getWsUrl(): string {
|
||||
const proto = location.protocol === "https:" ? "wss:" : "ws:";
|
||||
const params = new URLSearchParams(location.search);
|
||||
@@ -11,75 +34,355 @@ function getWsUrl(): string {
|
||||
return `${proto}//${location.host}/ws${q}`;
|
||||
}
|
||||
|
||||
function createSessionId(): string {
|
||||
if (typeof crypto !== "undefined" && "randomUUID" in crypto) {
|
||||
return crypto.randomUUID();
|
||||
}
|
||||
return `${Date.now()}-${Math.random().toString(16).slice(2)}`;
|
||||
}
|
||||
|
||||
function encodeAudioPacket(
|
||||
sessionId: string,
|
||||
seq: number,
|
||||
pcm: Int16Array,
|
||||
): ArrayBuffer {
|
||||
const sidBytes = new TextEncoder().encode(sessionId);
|
||||
if (sidBytes.length === 0 || sidBytes.length > 96) {
|
||||
throw new Error("invalid sessionId length");
|
||||
}
|
||||
|
||||
const pcmBytes = new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength);
|
||||
const payload: AudioPacketPayload = {
|
||||
v: AUDIO_PACKET_VERSION,
|
||||
sessionId,
|
||||
seq,
|
||||
pcm: pcmBytes,
|
||||
};
|
||||
const packed = encode(payload);
|
||||
return packed.buffer.slice(
|
||||
packed.byteOffset,
|
||||
packed.byteOffset + packed.byteLength,
|
||||
);
|
||||
}
|
||||
|
||||
export function useWebSocket() {
|
||||
const wsRef = useRef<ReconnectingWebSocket | null>(null);
|
||||
const heartbeatTimerRef = useRef<number | null>(null);
|
||||
const heartbeatTimeoutRef = useRef<number | null>(null);
|
||||
const pendingStartRef = useRef<{
|
||||
sessionId: string;
|
||||
resolve: (sessionId: string | null) => void;
|
||||
timer: number;
|
||||
} | null>(null);
|
||||
const audioQueueRef = useRef<QueuedAudioPacket[]>([]);
|
||||
const lastWeakToastAtRef = useRef(0);
|
||||
|
||||
const sendJSON = useCallback((obj: Record<string, unknown>) => {
|
||||
const ws = wsRef.current;
|
||||
if (ws?.readyState === WebSocket.OPEN) {
|
||||
ws.send(JSON.stringify(obj));
|
||||
const clearHeartbeat = useCallback(() => {
|
||||
if (heartbeatTimerRef.current !== null) {
|
||||
window.clearInterval(heartbeatTimerRef.current);
|
||||
heartbeatTimerRef.current = null;
|
||||
}
|
||||
if (heartbeatTimeoutRef.current !== null) {
|
||||
window.clearTimeout(heartbeatTimeoutRef.current);
|
||||
heartbeatTimeoutRef.current = null;
|
||||
}
|
||||
}, []);
|
||||
|
||||
const sendBinary = useCallback((data: Int16Array) => {
|
||||
const sendControl = useCallback((msg: ClientMsg): boolean => {
|
||||
const ws = wsRef.current;
|
||||
if (ws?.readyState === WebSocket.OPEN) {
|
||||
ws.send(data.buffer as ArrayBuffer);
|
||||
ws.send(JSON.stringify(msg));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}, []);
|
||||
|
||||
const notifyWeakNetwork = useCallback(() => {
|
||||
const now = Date.now();
|
||||
if (now - lastWeakToastAtRef.current < WEAK_NETWORK_TOAST_INTERVAL_MS) {
|
||||
return;
|
||||
}
|
||||
lastWeakToastAtRef.current = now;
|
||||
toast.warning("网络波动,正在缓冲音频…");
|
||||
}, []);
|
||||
|
||||
const flushAudioQueue = useCallback(() => {
|
||||
const ws = wsRef.current;
|
||||
if (ws?.readyState !== WebSocket.OPEN) return;
|
||||
|
||||
const activeSessionId = useAppStore.getState().activeSessionId;
|
||||
if (!activeSessionId) {
|
||||
audioQueueRef.current = [];
|
||||
useAppStore.getState().setWeakNetwork(false);
|
||||
return;
|
||||
}
|
||||
|
||||
const remaining: QueuedAudioPacket[] = [];
|
||||
for (const packet of audioQueueRef.current) {
|
||||
if (packet.sessionId !== activeSessionId) {
|
||||
continue;
|
||||
}
|
||||
if (
|
||||
ws.readyState !== WebSocket.OPEN ||
|
||||
ws.bufferedAmount > WS_BACKPRESSURE_BYTES
|
||||
) {
|
||||
remaining.push(packet);
|
||||
continue;
|
||||
}
|
||||
ws.send(packet.buffer);
|
||||
}
|
||||
|
||||
audioQueueRef.current = remaining;
|
||||
if (audioQueueRef.current.length === 0) {
|
||||
useAppStore.getState().setWeakNetwork(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
useAppStore.getState().setConnectionStatus("connecting");
|
||||
const sendAudioFrame = useCallback(
|
||||
(sessionId: string, seq: number, pcm: Int16Array): boolean => {
|
||||
const store = useAppStore.getState();
|
||||
if (!store.recording || store.activeSessionId !== sessionId) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const ws = new ReconnectingWebSocket(getWsUrl(), undefined, {
|
||||
minReconnectionDelay: 1000,
|
||||
maxReconnectionDelay: 16000,
|
||||
});
|
||||
ws.binaryType = "arraybuffer";
|
||||
|
||||
ws.onopen = () => {
|
||||
useAppStore.getState().setConnectionStatus("connected");
|
||||
};
|
||||
|
||||
ws.onmessage = (e: MessageEvent) => {
|
||||
if (typeof e.data !== "string") return;
|
||||
let buffer: ArrayBuffer;
|
||||
try {
|
||||
const msg = JSON.parse(e.data);
|
||||
buffer = encodeAudioPacket(sessionId, seq, pcm);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
|
||||
const ws = wsRef.current;
|
||||
if (
|
||||
ws?.readyState === WebSocket.OPEN &&
|
||||
ws.bufferedAmount <= WS_BACKPRESSURE_BYTES
|
||||
) {
|
||||
ws.send(buffer);
|
||||
if (audioQueueRef.current.length === 0) {
|
||||
useAppStore.getState().setWeakNetwork(false);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (audioQueueRef.current.length >= MAX_AUDIO_QUEUE) {
|
||||
audioQueueRef.current.shift();
|
||||
}
|
||||
audioQueueRef.current.push({ sessionId, seq, buffer });
|
||||
useAppStore.getState().setWeakNetwork(true);
|
||||
notifyWeakNetwork();
|
||||
return false;
|
||||
},
|
||||
[notifyWeakNetwork],
|
||||
);
|
||||
|
||||
const resolvePendingStart = useCallback((sessionId: string | null) => {
|
||||
const pending = pendingStartRef.current;
|
||||
if (!pending) return;
|
||||
window.clearTimeout(pending.timer);
|
||||
pending.resolve(sessionId);
|
||||
pendingStartRef.current = null;
|
||||
}, []);
|
||||
|
||||
const handleServerMessage = useCallback(
|
||||
(raw: string) => {
|
||||
let msg: ServerMsg;
|
||||
try {
|
||||
msg = JSON.parse(raw) as ServerMsg;
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
|
||||
const store = useAppStore.getState();
|
||||
switch (msg.type) {
|
||||
case "partial":
|
||||
case "ready":
|
||||
break;
|
||||
case "state":
|
||||
if (msg.state === "idle") {
|
||||
store.setRecording(false);
|
||||
store.setStopping(false);
|
||||
store.setPendingStart(false);
|
||||
store.setActiveSessionId(null);
|
||||
}
|
||||
if (msg.state === "stopping") {
|
||||
store.setStopping(true);
|
||||
}
|
||||
break;
|
||||
case "start_ack":
|
||||
if (
|
||||
pendingStartRef.current &&
|
||||
msg.sessionId === pendingStartRef.current.sessionId
|
||||
) {
|
||||
resolvePendingStart(msg.sessionId ?? null);
|
||||
}
|
||||
break;
|
||||
case "stop_ack":
|
||||
store.setStopping(true);
|
||||
break;
|
||||
case "partial": {
|
||||
const activeSessionId = store.activeSessionId;
|
||||
if (!activeSessionId || msg.sessionId !== activeSessionId) break;
|
||||
store.setPreview(msg.text || "", false);
|
||||
break;
|
||||
case "final":
|
||||
}
|
||||
case "final": {
|
||||
const activeSessionId = store.activeSessionId;
|
||||
if (!activeSessionId || msg.sessionId !== activeSessionId) break;
|
||||
store.setPreview(msg.text || "", true);
|
||||
if (msg.text) store.addHistory(msg.text);
|
||||
break;
|
||||
}
|
||||
case "pasted":
|
||||
toast.success("已粘贴");
|
||||
break;
|
||||
case "error":
|
||||
toast.error(msg.message || "错误");
|
||||
store.setRecording(false);
|
||||
store.setPendingStart(false);
|
||||
store.setStopping(false);
|
||||
resolvePendingStart(null);
|
||||
toast.error(msg.message || "服务错误");
|
||||
break;
|
||||
case "pong":
|
||||
if (heartbeatTimeoutRef.current !== null) {
|
||||
window.clearTimeout(heartbeatTimeoutRef.current);
|
||||
heartbeatTimeoutRef.current = null;
|
||||
}
|
||||
break;
|
||||
}
|
||||
} catch {
|
||||
// Ignore malformed messages
|
||||
},
|
||||
[resolvePendingStart],
|
||||
);
|
||||
|
||||
const startHeartbeat = useCallback(() => {
|
||||
clearHeartbeat();
|
||||
heartbeatTimerRef.current = window.setInterval(() => {
|
||||
if (document.hidden) return;
|
||||
const now = Date.now();
|
||||
if (!sendControl({ type: "ping", ts: now })) return;
|
||||
if (heartbeatTimeoutRef.current !== null) {
|
||||
window.clearTimeout(heartbeatTimeoutRef.current);
|
||||
}
|
||||
heartbeatTimeoutRef.current = window.setTimeout(() => {
|
||||
wsRef.current?.close();
|
||||
}, HEARTBEAT_TIMEOUT_MS);
|
||||
}, HEARTBEAT_INTERVAL_MS);
|
||||
}, [clearHeartbeat, sendControl]);
|
||||
|
||||
useEffect(() => {
|
||||
const ws = new ReconnectingWebSocket(getWsUrl(), undefined, {
|
||||
minReconnectionDelay: 400,
|
||||
maxReconnectionDelay: 6000,
|
||||
});
|
||||
ws.binaryType = "arraybuffer";
|
||||
wsRef.current = ws;
|
||||
useAppStore.getState().setConnectionStatus("connecting");
|
||||
|
||||
ws.onopen = () => {
|
||||
const store = useAppStore.getState();
|
||||
store.setConnectionStatus("connected");
|
||||
store.setWeakNetwork(false);
|
||||
sendControl({ type: "hello", version: 2 });
|
||||
startHeartbeat();
|
||||
flushAudioQueue();
|
||||
};
|
||||
|
||||
ws.onmessage = (e: MessageEvent) => {
|
||||
if (typeof e.data === "string") {
|
||||
handleServerMessage(e.data);
|
||||
}
|
||||
};
|
||||
|
||||
ws.onclose = () => {
|
||||
clearHeartbeat();
|
||||
resolvePendingStart(null);
|
||||
const store = useAppStore.getState();
|
||||
if (store.recording || store.pendingStart) {
|
||||
toast.warning("连接中断,本次录音已结束");
|
||||
}
|
||||
store.setConnectionStatus("disconnected");
|
||||
if (store.recording) store.setRecording(false);
|
||||
if (store.pendingStart) store.setPendingStart(false);
|
||||
store.setWeakNetwork(store.recording || audioQueueRef.current.length > 0);
|
||||
store.setRecording(false);
|
||||
store.setPendingStart(false);
|
||||
store.setStopping(false);
|
||||
store.setActiveSessionId(null);
|
||||
audioQueueRef.current = [];
|
||||
};
|
||||
|
||||
wsRef.current = ws;
|
||||
ws.onerror = () => {
|
||||
ws.close();
|
||||
};
|
||||
|
||||
const onVisibilityChange = () => {
|
||||
if (!document.hidden && wsRef.current?.readyState !== WebSocket.OPEN) {
|
||||
wsRef.current?.close();
|
||||
}
|
||||
};
|
||||
document.addEventListener("visibilitychange", onVisibilityChange);
|
||||
|
||||
return () => {
|
||||
ws.close();
|
||||
document.removeEventListener("visibilitychange", onVisibilityChange);
|
||||
clearHeartbeat();
|
||||
resolvePendingStart(null);
|
||||
wsRef.current?.close();
|
||||
wsRef.current = null;
|
||||
};
|
||||
}, []);
|
||||
}, [
|
||||
clearHeartbeat,
|
||||
flushAudioQueue,
|
||||
handleServerMessage,
|
||||
resolvePendingStart,
|
||||
sendControl,
|
||||
startHeartbeat,
|
||||
]);
|
||||
|
||||
return { sendJSON, sendBinary };
|
||||
const requestStart = useCallback((): Promise<string | null> => {
|
||||
const sessionId = createSessionId();
|
||||
return new Promise<string | null>((resolve) => {
|
||||
if (pendingStartRef.current) {
|
||||
resolve(null);
|
||||
return;
|
||||
}
|
||||
|
||||
const timer = window.setTimeout(() => {
|
||||
if (pendingStartRef.current?.sessionId === sessionId) {
|
||||
pendingStartRef.current = null;
|
||||
resolve(null);
|
||||
}
|
||||
}, START_TIMEOUT_MS);
|
||||
|
||||
pendingStartRef.current = { sessionId, resolve, timer };
|
||||
const ok = sendControl({ type: "start", sessionId, version: 2 });
|
||||
if (!ok) {
|
||||
resolvePendingStart(null);
|
||||
}
|
||||
});
|
||||
}, [resolvePendingStart, sendControl]);
|
||||
|
||||
const sendStop = useCallback(
|
||||
(sessionId: string | null) => {
|
||||
if (!sessionId) return;
|
||||
audioQueueRef.current = audioQueueRef.current.filter(
|
||||
(packet) => packet.sessionId !== sessionId,
|
||||
);
|
||||
if (audioQueueRef.current.length === 0) {
|
||||
useAppStore.getState().setWeakNetwork(false);
|
||||
}
|
||||
sendControl({ type: "stop", sessionId });
|
||||
},
|
||||
[sendControl],
|
||||
);
|
||||
|
||||
const sendPaste = useCallback(
|
||||
(text: string) => {
|
||||
const store = useAppStore.getState();
|
||||
if (store.connectionStatus !== "connected") {
|
||||
toast.warning("当前未连接,无法发送粘贴");
|
||||
return;
|
||||
}
|
||||
const sessionId = useAppStore.getState().activeSessionId ?? undefined;
|
||||
sendControl({ type: "paste", text, sessionId });
|
||||
},
|
||||
[sendControl],
|
||||
);
|
||||
|
||||
return { requestStart, sendStop, sendPaste, sendAudioFrame };
|
||||
}
|
||||
|
||||
35
web/src/protocol.ts
Normal file
35
web/src/protocol.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
export type ClientMsgType = "hello" | "start" | "stop" | "paste" | "ping";
|
||||
|
||||
export type ServerMsgType =
|
||||
| "ready"
|
||||
| "state"
|
||||
| "start_ack"
|
||||
| "stop_ack"
|
||||
| "partial"
|
||||
| "final"
|
||||
| "pasted"
|
||||
| "error"
|
||||
| "pong";
|
||||
|
||||
export type SessionState = "idle" | "recording" | "stopping";
|
||||
|
||||
export interface ClientMsg {
|
||||
type: ClientMsgType;
|
||||
sessionId?: string;
|
||||
seq?: number;
|
||||
text?: string;
|
||||
version?: number;
|
||||
ts?: number;
|
||||
}
|
||||
|
||||
export interface ServerMsg {
|
||||
type: ServerMsgType;
|
||||
state?: SessionState;
|
||||
sessionId?: string;
|
||||
seq?: number;
|
||||
text?: string;
|
||||
message?: string;
|
||||
code?: string;
|
||||
retryable?: boolean;
|
||||
ts?: number;
|
||||
}
|
||||
@@ -13,9 +13,13 @@ type ConnectionStatus = "connected" | "disconnected" | "connecting";
|
||||
interface AppState {
|
||||
// Connection
|
||||
connectionStatus: ConnectionStatus;
|
||||
weakNetwork: boolean;
|
||||
// Recording
|
||||
recording: boolean;
|
||||
pendingStart: boolean;
|
||||
stopping: boolean;
|
||||
micReady: boolean;
|
||||
activeSessionId: string | null;
|
||||
// Preview
|
||||
previewText: string;
|
||||
previewActive: boolean;
|
||||
@@ -24,8 +28,12 @@ interface AppState {
|
||||
|
||||
// Actions
|
||||
setConnectionStatus: (status: ConnectionStatus) => void;
|
||||
setWeakNetwork: (weak: boolean) => void;
|
||||
setRecording: (recording: boolean) => void;
|
||||
setPendingStart: (pending: boolean) => void;
|
||||
setStopping: (stopping: boolean) => void;
|
||||
setMicReady: (ready: boolean) => void;
|
||||
setActiveSessionId: (sessionId: string | null) => void;
|
||||
setPreview: (text: string, isFinal: boolean) => void;
|
||||
clearPreview: () => void;
|
||||
addHistory: (text: string) => void;
|
||||
@@ -36,15 +44,23 @@ export const useAppStore = create<AppState>()(
|
||||
persist(
|
||||
(set, get) => ({
|
||||
connectionStatus: "connecting",
|
||||
weakNetwork: false,
|
||||
recording: false,
|
||||
pendingStart: false,
|
||||
stopping: false,
|
||||
micReady: false,
|
||||
activeSessionId: null,
|
||||
previewText: "",
|
||||
previewActive: false,
|
||||
history: [],
|
||||
|
||||
setConnectionStatus: (connectionStatus) => set({ connectionStatus }),
|
||||
setWeakNetwork: (weakNetwork) => set({ weakNetwork }),
|
||||
setRecording: (recording) => set({ recording }),
|
||||
setPendingStart: (pendingStart) => set({ pendingStart }),
|
||||
setStopping: (stopping) => set({ stopping }),
|
||||
setMicReady: (micReady) => set({ micReady }),
|
||||
setActiveSessionId: (activeSessionId) => set({ activeSessionId }),
|
||||
|
||||
setPreview: (text, isFinal) =>
|
||||
set({
|
||||
|
||||
Reference in New Issue
Block a user