From bd41c8a90c3cdd1e4ea30c81cac6d252304c653e Mon Sep 17 00:00:00 2001 From: shern-point Date: Thu, 30 Apr 2026 00:44:11 +0800 Subject: [PATCH] feat: add tokenizer-based token counting utilities Use go-tiktoken with embedded vocabularies for accurate BPE token counting. CountPromptTokens applies conservative padding so returned context token counts stay slightly above the real value instead of undercounting. --- go.mod | 5 +++ go.sum | 4 ++ internal/util/token_count.go | 87 ++++++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+) create mode 100644 internal/util/token_count.go diff --git a/go.mod b/go.mod index 2613f89..87cabfa 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,11 @@ require ( github.com/router-for-me/CLIProxyAPI/v6 v6.9.14 ) +require ( + github.com/dlclark/regexp2 v1.11.5 // indirect + github.com/hupe1980/go-tiktoken v0.0.10 // indirect +) + require ( github.com/klauspost/compress v1.18.5 // indirect github.com/sirupsen/logrus v1.9.4 // indirect diff --git a/go.sum b/go.sum index 4b47fb0..811e782 100644 --- a/go.sum +++ b/go.sum @@ -2,10 +2,14 @@ github.com/andybalholm/brotli v1.2.1 h1:R+f5xP285VArJDRgowrfb9DqL18yVK0gKAW/F+eT github.com/andybalholm/brotli v1.2.1/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ= +github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/go-chi/chi/v5 v5.2.5 h1:Eg4myHZBjyvJmAFjFvWgrqDTXFyOzjj7YIm3L3mu6Ug= github.com/go-chi/chi/v5 v5.2.5/go.mod h1:X7Gx4mteadT3eDOMTsXzmI4/rwUpOwBHLpAfupzFJP0= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hupe1980/go-tiktoken v0.0.10 h1:m6phOJaGyctqWdGIgwn9X8AfJvaG74tnQoDL+ntOUEQ= +github.com/hupe1980/go-tiktoken v0.0.10/go.mod h1:NME6d8hrE+Jo+kLUZHhXShYV8e40hYkm4BbSLQKtvAo= github.com/klauspost/compress v1.18.5 h1:/h1gH5Ce+VWNLSWqPzOVn6XBO+vJbCNGvjoaGBFW2IE= github.com/klauspost/compress v1.18.5/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/internal/util/token_count.go b/internal/util/token_count.go new file mode 100644 index 0000000..7ed75d8 --- /dev/null +++ b/internal/util/token_count.go @@ -0,0 +1,87 @@ +package util + +import ( + "strings" + + tiktoken "github.com/hupe1980/go-tiktoken" +) + +const ( + defaultTokenizerModel = "gpt-4o" + claudeTokenizerModel = "claude" +) + +func CountPromptTokens(text, model string) int { + base := maxTokenCount( + EstimateTokens(text), + countWithTokenizer(text, model), + ) + if base <= 0 { + return 0 + } + return base + conservativePromptPadding(base) +} + +func CountOutputTokens(text, model string) int { + base := maxTokenCount( + EstimateTokens(text), + countWithTokenizer(text, model), + ) + if base <= 0 { + return 0 + } + return base +} + +func countWithTokenizer(text, model string) int { + text = strings.TrimSpace(text) + if text == "" { + return 0 + } + encoding, err := tiktoken.NewEncodingForModel(tokenizerModelForCount(model)) + if err != nil { + return 0 + } + ids, _, err := encoding.Encode(text, nil, nil) + if err != nil { + return 0 + } + return len(ids) +} + +func tokenizerModelForCount(model string) string { + model = strings.ToLower(strings.TrimSpace(model)) + if model == "" { + return defaultTokenizerModel + } + switch { + case strings.HasPrefix(model, "claude"): + return claudeTokenizerModel + case strings.HasPrefix(model, "gpt-4"), strings.HasPrefix(model, "gpt-5"), strings.HasPrefix(model, "o1"), strings.HasPrefix(model, "o3"), strings.HasPrefix(model, "o4"): + return defaultTokenizerModel + case strings.HasPrefix(model, "deepseek-v4"): + return defaultTokenizerModel + case strings.HasPrefix(model, "deepseek"): + return defaultTokenizerModel + default: + return defaultTokenizerModel + } +} + +func conservativePromptPadding(base int) int { + padding := base / 50 + if padding < 4 { + padding = 4 + } + return padding +} + +func maxTokenCount(values ...int) int { + best := 0 + for _, v := range values { + if v > best { + best = v + } + } + return best +}