feat: ASR 从双向流式切换为流式输入模式(bigmodel_nostream)

- endpoint 从 bigmodel_async 改为 bigmodel_nostream
- 二进制协议去掉 sequence 字段,初始请求和音频帧均不带序号
- 最后一帧使用 FlagLastNoSeq 标志
- RequestMeta 新增 result_type=single、end_window_size=400
- ShowUtterances 关闭(nostream 模式不需要)
- readLoop 简化:nostream 模式下直接返回 final 结果
This commit is contained in:
2026-03-01 06:12:58 +08:00
parent ce1ff2d04d
commit 350e405fac
2 changed files with 18 additions and 30 deletions

View File

@@ -5,7 +5,6 @@ import (
"log/slog"
"net/http"
"sync"
"sync/atomic"
"time"
"github.com/fasthttp/websocket"
@@ -14,7 +13,7 @@ import (
)
const (
doubaoEndpoint = "wss://openspeech.bytedance.com/api/v3/sauc/bigmodel_async"
doubaoEndpoint = "wss://openspeech.bytedance.com/api/v3/sauc/bigmodel_nostream"
writeTimeout = 10 * time.Second
readTimeout = 30 * time.Second
)
@@ -30,7 +29,6 @@ type Config struct {
type Client struct {
cfg Config
conn *websocket.Conn
seq atomic.Int32
mu sync.Mutex
closed bool
closeCh chan struct{}
@@ -74,11 +72,12 @@ func Dial(cfg Config, resultCh chan<- wsMsg.ServerMsg) (*Client, error) {
EnableITN: true,
EnablePUNC: true,
EnableDDC: true,
ShowUtterances: true,
ShowUtterances: false,
ResultType: "single",
EndWindowSize: 400,
},
}
c.seq.Store(1)
data, err := EncodeFullClientRequest(req, c.seq.Load())
data, err := EncodeFullClientRequest(req)
if err != nil {
conn.Close()
return nil, fmt.Errorf("encode full request: %w", err)
@@ -100,8 +99,7 @@ func (c *Client) SendAudio(pcm []byte, last bool) error {
if c.closed {
return fmt.Errorf("client closed")
}
seq := c.seq.Add(1)
data, err := EncodeAudioFrame(seq, pcm, last)
data, err := EncodeAudioFrame(pcm, last)
if err != nil {
return fmt.Errorf("encode audio: %w", err)
}
@@ -134,20 +132,10 @@ func (c *Client) readLoop(resultCh chan<- wsMsg.ServerMsg) {
resultCh <- wsMsg.ServerMsg{Type: wsMsg.MsgError, Message: resp.ErrMsg}
return
}
// Determine if this is a final result by checking utterances
isFinal := false
// nostream mode: result comes after last audio packet or >15s
text := resp.Text
for _, u := range resp.Utterances {
if u.Definite {
isFinal = true
text = u.Text
break
}
}
if isFinal {
if text != "" {
resultCh <- wsMsg.ServerMsg{Type: wsMsg.MsgFinal, Text: text}
} else if text != "" {
resultCh <- wsMsg.ServerMsg{Type: wsMsg.MsgPartial, Text: text}
}
if resp.IsLast {
return