feat: ASR 从双向流式切换为流式输入模式(bigmodel_nostream)
- endpoint 从 bigmodel_async 改为 bigmodel_nostream - 二进制协议去掉 sequence 字段,初始请求和音频帧均不带序号 - 最后一帧使用 FlagLastNoSeq 标志 - RequestMeta 新增 result_type=single、end_window_size=400 - ShowUtterances 关闭(nostream 模式不需要) - readLoop 简化:nostream 模式下直接返回 final 结果
This commit is contained in:
@@ -109,9 +109,12 @@ type RequestMeta struct {
|
||||
EnablePUNC bool `json:"enable_punc"`
|
||||
EnableDDC bool `json:"enable_ddc"`
|
||||
ShowUtterances bool `json:"show_utterances"`
|
||||
ResultType string `json:"result_type,omitempty"`
|
||||
EndWindowSize int `json:"end_window_size,omitempty"`
|
||||
}
|
||||
// EncodeFullClientRequest builds the binary message for the initial handshake.
|
||||
func EncodeFullClientRequest(req *FullClientRequest, seq int32) ([]byte, error) {
|
||||
// nostream mode: header(4) + payload_size(4) + gzip(json)
|
||||
func EncodeFullClientRequest(req *FullClientRequest) ([]byte, error) {
|
||||
payloadJSON, err := json.Marshal(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal request: %w", err)
|
||||
@@ -121,20 +124,18 @@ func EncodeFullClientRequest(req *FullClientRequest, seq int32) ([]byte, error)
|
||||
return nil, fmt.Errorf("gzip compress: %w", err)
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
buf.Write(encodeHeader(MsgFullClientRequest, FlagPosSeq, SerJSON, CompGzip))
|
||||
_ = binary.Write(&buf, binary.BigEndian, seq)
|
||||
buf.Write(encodeHeader(MsgFullClientRequest, FlagNoSeq, SerJSON, CompGzip))
|
||||
_ = binary.Write(&buf, binary.BigEndian, int32(len(compressed)))
|
||||
buf.Write(compressed)
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
// EncodeAudioFrame builds a binary audio-only request.
|
||||
// If last is true, seq is sent as negative to signal end of stream.
|
||||
func EncodeAudioFrame(seq int32, pcm []byte, last bool) ([]byte, error) {
|
||||
flags := FlagPosSeq
|
||||
wireSeq := seq
|
||||
// nostream mode: header(4) + payload_size(4) + gzip(pcm)
|
||||
// last=true sets FlagLastNoSeq to signal end of stream.
|
||||
func EncodeAudioFrame(pcm []byte, last bool) ([]byte, error) {
|
||||
flags := FlagNoSeq
|
||||
if last {
|
||||
flags = FlagNegSeq
|
||||
wireSeq = -seq
|
||||
flags = FlagLastNoSeq
|
||||
}
|
||||
compressed, err := gzipCompress(pcm)
|
||||
if err != nil {
|
||||
@@ -142,7 +143,6 @@ func EncodeAudioFrame(seq int32, pcm []byte, last bool) ([]byte, error) {
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
buf.Write(encodeHeader(MsgAudioOnlyRequest, flags, SerNone, CompGzip))
|
||||
_ = binary.Write(&buf, binary.BigEndian, wireSeq)
|
||||
_ = binary.Write(&buf, binary.BigEndian, int32(len(compressed)))
|
||||
buf.Write(compressed)
|
||||
return buf.Bytes(), nil
|
||||
|
||||
Reference in New Issue
Block a user