feat: ASR 从双向流式切换为流式输入模式(bigmodel_nostream)

- endpoint 从 bigmodel_async 改为 bigmodel_nostream
- 二进制协议去掉 sequence 字段,初始请求和音频帧均不带序号
- 最后一帧使用 FlagLastNoSeq 标志
- RequestMeta 新增 result_type=single、end_window_size=400
- ShowUtterances 关闭(nostream 模式不需要)
- readLoop 简化:nostream 模式下直接返回 final 结果
This commit is contained in:
2026-03-01 06:12:58 +08:00
parent ce1ff2d04d
commit 350e405fac
2 changed files with 18 additions and 30 deletions

View File

@@ -109,9 +109,12 @@ type RequestMeta struct {
EnablePUNC bool `json:"enable_punc"`
EnableDDC bool `json:"enable_ddc"`
ShowUtterances bool `json:"show_utterances"`
ResultType string `json:"result_type,omitempty"`
EndWindowSize int `json:"end_window_size,omitempty"`
}
// EncodeFullClientRequest builds the binary message for the initial handshake.
func EncodeFullClientRequest(req *FullClientRequest, seq int32) ([]byte, error) {
// nostream mode: header(4) + payload_size(4) + gzip(json)
func EncodeFullClientRequest(req *FullClientRequest) ([]byte, error) {
payloadJSON, err := json.Marshal(req)
if err != nil {
return nil, fmt.Errorf("marshal request: %w", err)
@@ -121,20 +124,18 @@ func EncodeFullClientRequest(req *FullClientRequest, seq int32) ([]byte, error)
return nil, fmt.Errorf("gzip compress: %w", err)
}
var buf bytes.Buffer
buf.Write(encodeHeader(MsgFullClientRequest, FlagPosSeq, SerJSON, CompGzip))
_ = binary.Write(&buf, binary.BigEndian, seq)
buf.Write(encodeHeader(MsgFullClientRequest, FlagNoSeq, SerJSON, CompGzip))
_ = binary.Write(&buf, binary.BigEndian, int32(len(compressed)))
buf.Write(compressed)
return buf.Bytes(), nil
}
// EncodeAudioFrame builds a binary audio-only request.
// If last is true, seq is sent as negative to signal end of stream.
func EncodeAudioFrame(seq int32, pcm []byte, last bool) ([]byte, error) {
flags := FlagPosSeq
wireSeq := seq
// nostream mode: header(4) + payload_size(4) + gzip(pcm)
// last=true sets FlagLastNoSeq to signal end of stream.
func EncodeAudioFrame(pcm []byte, last bool) ([]byte, error) {
flags := FlagNoSeq
if last {
flags = FlagNegSeq
wireSeq = -seq
flags = FlagLastNoSeq
}
compressed, err := gzipCompress(pcm)
if err != nil {
@@ -142,7 +143,6 @@ func EncodeAudioFrame(seq int32, pcm []byte, last bool) ([]byte, error) {
}
var buf bytes.Buffer
buf.Write(encodeHeader(MsgAudioOnlyRequest, flags, SerNone, CompGzip))
_ = binary.Write(&buf, binary.BigEndian, wireSeq)
_ = binary.Write(&buf, binary.BigEndian, int32(len(compressed)))
buf.Write(compressed)
return buf.Bytes(), nil