From 90817cb9e2ec49990c88b9c0bef3ba96bbcc3eca Mon Sep 17 00:00:00 2001 From: shern-point Date: Thu, 30 Apr 2026 00:45:29 +0800 Subject: [PATCH] feat: apply tokenizer-based counting in OpenAI usage builders Move OpenAI chat and responses usage accounting onto the shared tokenizer-aware counters so prompt and output usage stay model-aware and conservatively sized. --- internal/format/openai/render_chat.go | 2 +- internal/format/openai/render_responses.go | 2 +- internal/format/openai/render_test.go | 15 ++++++++++++++ internal/format/openai/render_usage.go | 24 ++++++++++++++-------- 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/internal/format/openai/render_chat.go b/internal/format/openai/render_chat.go index f88ba41..14a9d1f 100644 --- a/internal/format/openai/render_chat.go +++ b/internal/format/openai/render_chat.go @@ -29,7 +29,7 @@ func BuildChatCompletionWithToolCalls(completionID, model, finalPrompt, finalThi "created": time.Now().Unix(), "model": model, "choices": []map[string]any{{"index": 0, "message": messageObj, "finish_reason": finishReason}}, - "usage": BuildChatUsage(finalPrompt, finalThinking, finalText), + "usage": BuildChatUsageForModel(model, finalPrompt, finalThinking, finalText), } } diff --git a/internal/format/openai/render_responses.go b/internal/format/openai/render_responses.go index 6148fdc..2d3c9dd 100644 --- a/internal/format/openai/render_responses.go +++ b/internal/format/openai/render_responses.go @@ -70,7 +70,7 @@ func BuildResponseObjectFromItems(responseID, model, finalPrompt, finalThinking, "model": model, "output": output, "output_text": outputText, - "usage": BuildResponsesUsage(finalPrompt, finalThinking, finalText), + "usage": BuildResponsesUsageForModel(model, finalPrompt, finalThinking, finalText), } } diff --git a/internal/format/openai/render_test.go b/internal/format/openai/render_test.go index c1dc540..61cdb3f 100644 --- a/internal/format/openai/render_test.go +++ b/internal/format/openai/render_test.go @@ -6,6 +6,7 @@ import ( "testing" "ds2api/internal/toolcall" + "ds2api/internal/util" ) func TestBuildResponseObjectKeepsFencedToolPayloadAsText(t *testing.T) { @@ -177,3 +178,17 @@ func TestBuildResponseObjectWithToolCallsCoercesSchemaDeclaredStringArguments(t t.Fatalf("expected response content stringified by schema, got %#v", args["content"]) } } + +func TestBuildChatUsageForModelUsesConservativePromptCount(t *testing.T) { + prompt := strings.Repeat("上下文token ", 40) + usage := BuildChatUsageForModel("deepseek-v4-flash", prompt, "", "ok") + promptTokens, _ := usage["prompt_tokens"].(int) + if promptTokens <= util.EstimateTokens(prompt) { + t.Fatalf("expected conservative prompt token count > rough estimate, got=%d estimate=%d", promptTokens, util.EstimateTokens(prompt)) + } + totalTokens, _ := usage["total_tokens"].(int) + completionTokens, _ := usage["completion_tokens"].(int) + if totalTokens != promptTokens+completionTokens { + t.Fatalf("expected total tokens to add up, got usage=%#v", usage) + } +} diff --git a/internal/format/openai/render_usage.go b/internal/format/openai/render_usage.go index b328d20..ad1f380 100644 --- a/internal/format/openai/render_usage.go +++ b/internal/format/openai/render_usage.go @@ -2,10 +2,10 @@ package openai import "ds2api/internal/util" -func BuildChatUsage(finalPrompt, finalThinking, finalText string) map[string]any { - promptTokens := util.EstimateTokens(finalPrompt) - reasoningTokens := util.EstimateTokens(finalThinking) - completionTokens := util.EstimateTokens(finalText) +func BuildChatUsageForModel(model, finalPrompt, finalThinking, finalText string) map[string]any { + promptTokens := util.CountPromptTokens(finalPrompt, model) + reasoningTokens := util.CountOutputTokens(finalThinking, model) + completionTokens := util.CountOutputTokens(finalText, model) return map[string]any{ "prompt_tokens": promptTokens, "completion_tokens": reasoningTokens + completionTokens, @@ -16,13 +16,21 @@ func BuildChatUsage(finalPrompt, finalThinking, finalText string) map[string]any } } -func BuildResponsesUsage(finalPrompt, finalThinking, finalText string) map[string]any { - promptTokens := util.EstimateTokens(finalPrompt) - reasoningTokens := util.EstimateTokens(finalThinking) - completionTokens := util.EstimateTokens(finalText) +func BuildChatUsage(finalPrompt, finalThinking, finalText string) map[string]any { + return BuildChatUsageForModel("", finalPrompt, finalThinking, finalText) +} + +func BuildResponsesUsageForModel(model, finalPrompt, finalThinking, finalText string) map[string]any { + promptTokens := util.CountPromptTokens(finalPrompt, model) + reasoningTokens := util.CountOutputTokens(finalThinking, model) + completionTokens := util.CountOutputTokens(finalText, model) return map[string]any{ "input_tokens": promptTokens, "output_tokens": reasoningTokens + completionTokens, "total_tokens": promptTokens + reasoningTokens + completionTokens, } } + +func BuildResponsesUsage(finalPrompt, finalThinking, finalText string) map[string]any { + return BuildResponsesUsageForModel("", finalPrompt, finalThinking, finalText) +}