mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-01 23:15:27 +08:00
Replace hardcoded DSML typo variant lists in Go/Node tool call parsers with generalized prefix consumption that tolerates repeated leading <, repeated DSML prefix noise, and trailing pipe terminators. Split tiktoken-dependent token counting into a build-tagged file for non-cgo platform compatibility. Add /data directory to Dockerfile for bind-mount permissions. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
47 lines
741 B
Go
47 lines
741 B
Go
package util
|
|
|
|
const (
|
|
defaultTokenizerModel = "gpt-4o"
|
|
claudeTokenizerModel = "claude"
|
|
)
|
|
|
|
func CountPromptTokens(text, model string) int {
|
|
base := maxTokenCount(
|
|
EstimateTokens(text),
|
|
countWithTokenizer(text, model),
|
|
)
|
|
if base <= 0 {
|
|
return 0
|
|
}
|
|
return base + conservativePromptPadding(base)
|
|
}
|
|
|
|
func CountOutputTokens(text, model string) int {
|
|
base := maxTokenCount(
|
|
EstimateTokens(text),
|
|
countWithTokenizer(text, model),
|
|
)
|
|
if base <= 0 {
|
|
return 0
|
|
}
|
|
return base
|
|
}
|
|
|
|
func conservativePromptPadding(base int) int {
|
|
padding := base / 50
|
|
if padding < 4 {
|
|
padding = 4
|
|
}
|
|
return padding
|
|
}
|
|
|
|
func maxTokenCount(values ...int) int {
|
|
best := 0
|
|
for _, v := range values {
|
|
if v > best {
|
|
best = v
|
|
}
|
|
}
|
|
return best
|
|
}
|