- 在 config.yaml 中添加 hotwords 配置项,支持本地管理热词列表
- 实现热词解析、格式化和表名生成工具(internal/asr/hotwords.go)
- 在 ASR 连接建立时自动将热词发送给豆包(boosting_table_name 参数)
- 支持热词权重配置(1-10,默认 4),格式:"词|权重" 或 "词"
- 支持配置热重载,修改热词后新连接自动生效
- 为未来动态热词功能预留扩展接口
热词格式示例:
hotwords:
- 张三|8
- VoicePaste|10
- 人工智能|6
108 lines
2.6 KiB
Go
108 lines
2.6 KiB
Go
package asr
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"strconv"
|
|
"strings"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
const (
|
|
// DefaultHotwordWeight is the default weight when not specified.
|
|
DefaultHotwordWeight = 4
|
|
// MaxHotwordWeight is the maximum allowed weight.
|
|
MaxHotwordWeight = 10
|
|
// MinHotwordWeight is the minimum allowed weight.
|
|
MinHotwordWeight = 1
|
|
// MaxHotwordLength is the maximum character count per hotword.
|
|
MaxHotwordLength = 10
|
|
)
|
|
|
|
// HotwordEntry represents a single hotword with its weight.
|
|
type HotwordEntry struct {
|
|
Word string
|
|
Weight int // 1-10, default 4
|
|
}
|
|
|
|
// ParseHotwords parses raw hotword strings from config.
|
|
// Format: "word|weight" or "word" (default weight 4).
|
|
// Returns error if any hotword is invalid.
|
|
func ParseHotwords(raw []string) ([]HotwordEntry, error) {
|
|
if len(raw) == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
entries := make([]HotwordEntry, 0, len(raw))
|
|
for i, line := range raw {
|
|
line = strings.TrimSpace(line)
|
|
if line == "" {
|
|
continue
|
|
}
|
|
|
|
parts := strings.Split(line, "|")
|
|
word := strings.TrimSpace(parts[0])
|
|
|
|
// Validate word length
|
|
if utf8.RuneCountInString(word) > MaxHotwordLength {
|
|
return nil, fmt.Errorf("hotword %d: exceeds %d characters: %q", i+1, MaxHotwordLength, word)
|
|
}
|
|
if word == "" {
|
|
return nil, fmt.Errorf("hotword %d: empty word", i+1)
|
|
}
|
|
|
|
// Parse weight
|
|
weight := DefaultHotwordWeight
|
|
if len(parts) > 1 {
|
|
weightStr := strings.TrimSpace(parts[1])
|
|
if weightStr != "" {
|
|
w, err := strconv.Atoi(weightStr)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("hotword %d: invalid weight %q: %w", i+1, weightStr, err)
|
|
}
|
|
if w < MinHotwordWeight || w > MaxHotwordWeight {
|
|
return nil, fmt.Errorf("hotword %d: weight %d out of range [%d, %d]", i+1, w, MinHotwordWeight, MaxHotwordWeight)
|
|
}
|
|
weight = w
|
|
}
|
|
}
|
|
|
|
entries = append(entries, HotwordEntry{
|
|
Word: word,
|
|
Weight: weight,
|
|
})
|
|
}
|
|
|
|
return entries, nil
|
|
}
|
|
|
|
// FormatHotwordsTable generates Doubao-compatible hotword table content.
|
|
// Format: each line "word|weight\n".
|
|
func FormatHotwordsTable(entries []HotwordEntry) string {
|
|
if len(entries) == 0 {
|
|
return ""
|
|
}
|
|
|
|
var sb strings.Builder
|
|
for _, e := range entries {
|
|
sb.WriteString(e.Word)
|
|
sb.WriteString("|")
|
|
sb.WriteString(strconv.Itoa(e.Weight))
|
|
sb.WriteString("\n")
|
|
}
|
|
return sb.String()
|
|
}
|
|
|
|
// GenerateTableName generates a unique table name based on hotword content.
|
|
// Uses SHA256 hash of the formatted table content (first 16 hex chars).
|
|
func GenerateTableName(entries []HotwordEntry) string {
|
|
if len(entries) == 0 {
|
|
return ""
|
|
}
|
|
|
|
content := FormatHotwordsTable(entries)
|
|
hash := sha256.Sum256([]byte(content))
|
|
return "voicepaste_" + hex.EncodeToString(hash[:])[:16]
|
|
}
|