mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-06 01:15:29 +08:00
Replace hardcoded DSML typo variant lists in Go/Node tool call parsers with generalized prefix consumption that tolerates repeated leading <, repeated DSML prefix noise, and trailing pipe terminators. Split tiktoken-dependent token counting into a build-tagged file for non-cgo platform compatibility. Add /data directory to Dockerfile for bind-mount permissions. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
45 lines
1.1 KiB
Go
45 lines
1.1 KiB
Go
//go:build !386 && !arm && !mips && !mipsle && !wasm
|
|
|
|
package util
|
|
|
|
import (
|
|
"strings"
|
|
|
|
tiktoken "github.com/hupe1980/go-tiktoken"
|
|
)
|
|
|
|
func countWithTokenizer(text, model string) int {
|
|
text = strings.TrimSpace(text)
|
|
if text == "" {
|
|
return 0
|
|
}
|
|
encoding, err := tiktoken.NewEncodingForModel(tokenizerModelForCount(model))
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
ids, _, err := encoding.Encode(text, nil, nil)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return len(ids)
|
|
}
|
|
|
|
func tokenizerModelForCount(model string) string {
|
|
model = strings.ToLower(strings.TrimSpace(model))
|
|
if model == "" {
|
|
return defaultTokenizerModel
|
|
}
|
|
switch {
|
|
case strings.HasPrefix(model, "claude"):
|
|
return claudeTokenizerModel
|
|
case strings.HasPrefix(model, "gpt-4"), strings.HasPrefix(model, "gpt-5"), strings.HasPrefix(model, "o1"), strings.HasPrefix(model, "o3"), strings.HasPrefix(model, "o4"):
|
|
return defaultTokenizerModel
|
|
case strings.HasPrefix(model, "deepseek-v4"):
|
|
return defaultTokenizerModel
|
|
case strings.HasPrefix(model, "deepseek"):
|
|
return defaultTokenizerModel
|
|
default:
|
|
return defaultTokenizerModel
|
|
}
|
|
}
|