From bfaa792760c903441dbfb04184896d56ba61793d Mon Sep 17 00:00:00 2001 From: imbytecat Date: Sun, 1 Mar 2026 06:34:55 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E8=AF=AD=E9=9F=B3?= =?UTF-8?q?=E5=81=9C=E9=A1=BF=E5=AF=BC=E8=87=B4=E6=8F=90=E5=89=8D=E7=B2=98?= =?UTF-8?q?=E8=B4=B4=E7=9A=84=E9=97=AE=E9=A2=98=EF=BC=88=E7=B4=AF=E7=A7=AF?= =?UTF-8?q?=E6=96=87=E6=9C=AC=EF=BC=8C=E6=9D=BE=E5=BC=80=E6=8C=89=E9=92=AE?= =?UTF-8?q?=E6=89=8D=E7=B2=98=E8=B4=B4=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/asr/client.go | 11 ++++++++--- internal/ws/handler.go | 42 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/internal/asr/client.go b/internal/asr/client.go index 13e26dc..3f0f553 100644 --- a/internal/asr/client.go +++ b/internal/asr/client.go @@ -74,7 +74,7 @@ func Dial(cfg Config, resultCh chan<- wsMsg.ServerMsg) (*Client, error) { EnableDDC: true, ShowUtterances: false, ResultType: "single", - EndWindowSize: 400, + EndWindowSize: 2000, }, } data, err := EncodeFullClientRequest(req) @@ -132,10 +132,15 @@ func (c *Client) readLoop(resultCh chan<- wsMsg.ServerMsg) { resultCh <- wsMsg.ServerMsg{Type: wsMsg.MsgError, Message: resp.ErrMsg} return } - // nostream mode: result comes after last audio packet or >15s + // nostream mode: may return intermediate results every ~15s text := resp.Text if text != "" { - resultCh <- wsMsg.ServerMsg{Type: wsMsg.MsgFinal, Text: text} + if resp.IsLast { + resultCh <- wsMsg.ServerMsg{Type: wsMsg.MsgFinal, Text: text} + } else { + // Intermediate result (>15s audio) — preview only, don't paste + resultCh <- wsMsg.ServerMsg{Type: wsMsg.MsgPartial, Text: text} + } } if resp.IsLast { return diff --git a/internal/ws/handler.go b/internal/ws/handler.go index 3130c70..b0d707f 100644 --- a/internal/ws/handler.go +++ b/internal/ws/handler.go @@ -62,23 +62,32 @@ func (h *Handler) handleConn(c *websocket.Conn) { defer close(resultCh) // Writer goroutine: single writer to avoid concurrent writes + // Accumulates all result texts; paste is triggered by stop, not by ASR final. var wg sync.WaitGroup + var accMu sync.Mutex + var accText string wg.Add(1) go func() { defer wg.Done() for msg := range resultCh { + // Accumulate text from both partial and final results + if msg.Type == MsgPartial || msg.Type == MsgFinal { + accMu.Lock() + accText += msg.Text + // Send accumulated preview to phone + preview := ServerMsg{Type: MsgPartial, Text: accText} + accMu.Unlock() + if err := c.WriteMessage(websocket.TextMessage, preview.Bytes()); err != nil { + log.Warn("ws write error", "err", err) + return + } + continue + } + // Forward other messages (error, pasted) as-is if err := c.WriteMessage(websocket.TextMessage, msg.Bytes()); err != nil { log.Warn("ws write error", "err", err) return } - // Auto-paste on final result - if msg.Type == MsgFinal && msg.Text != "" && h.pasteFunc != nil { - if err := h.pasteFunc(msg.Text); err != nil { - log.Error("auto-paste failed", "err", err) - } else { - _ = c.WriteMessage(websocket.TextMessage, ServerMsg{Type: MsgPasted}.Bytes()) - } - } } }() @@ -119,6 +128,10 @@ func (h *Handler) handleConn(c *websocket.Conn) { if active { continue } + // Reset accumulated text for new session + accMu.Lock() + accText = "" + accMu.Unlock() sa, cl, err := h.asrFactory(resultCh) if err != nil { log.Error("asr start failed", "err", err) @@ -134,12 +147,25 @@ func (h *Handler) handleConn(c *websocket.Conn) { if !active { continue } + // Finish ASR session — waits for final result from readLoop if cleanup != nil { cleanup() cleanup = nil } sendAudio = nil active = false + // Now paste the accumulated text + accMu.Lock() + finalText := accText + accText = "" + accMu.Unlock() + if finalText != "" && h.pasteFunc != nil { + if err := h.pasteFunc(finalText); err != nil { + log.Error("auto-paste failed", "err", err) + } else { + resultCh <- ServerMsg{Type: MsgPasted} + } + } log.Info("recording stopped") case MsgPaste: