Files
ds2api/internal/util/token_count.go
CJACK fd0ec29991 refactor: generalize DSML tag parsing to tolerate model noise; split tiktoken by build tags
Replace hardcoded DSML typo variant lists in Go/Node tool call parsers with
generalized prefix consumption that tolerates repeated leading <, repeated DSML
prefix noise, and trailing pipe terminators. Split tiktoken-dependent token
counting into a build-tagged file for non-cgo platform compatibility. Add /data
directory to Dockerfile for bind-mount permissions.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-01 15:17:11 +08:00

47 lines
741 B
Go

package util
const (
defaultTokenizerModel = "gpt-4o"
claudeTokenizerModel = "claude"
)
func CountPromptTokens(text, model string) int {
base := maxTokenCount(
EstimateTokens(text),
countWithTokenizer(text, model),
)
if base <= 0 {
return 0
}
return base + conservativePromptPadding(base)
}
func CountOutputTokens(text, model string) int {
base := maxTokenCount(
EstimateTokens(text),
countWithTokenizer(text, model),
)
if base <= 0 {
return 0
}
return base
}
func conservativePromptPadding(base int) int {
padding := base / 50
if padding < 4 {
padding = 4
}
return padding
}
func maxTokenCount(values ...int) int {
best := 0
for _, v := range values {
if v > best {
best = v
}
}
return best
}