From 5bcea3d727c5f70cb290be10e9722edbb62bf86c Mon Sep 17 00:00:00 2001 From: "CJACK." Date: Tue, 7 Apr 2026 10:16:00 +0800 Subject: [PATCH] Propagate upstream token usage across Gemini usage metadata --- API.en.md | 2 ++ API.md | 2 ++ internal/adapter/gemini/handler_generate.go | 11 +++++--- .../adapter/gemini/handler_stream_runtime.go | 6 ++++- internal/adapter/gemini/handler_test.go | 26 +++++++++++++++++++ 5 files changed, 42 insertions(+), 5 deletions(-) diff --git a/API.en.md b/API.en.md index 7276c86..2b83245 100644 --- a/API.en.md +++ b/API.en.md @@ -267,6 +267,7 @@ data: [DONE] - `deepseek-reasoner` / `deepseek-reasoner-search` models emit `delta.reasoning_content` - Text emits `delta.content` - Last chunk includes `finish_reason` and `usage` +- Token counting prefers pass-through from upstream DeepSeek SSE (`accumulated_token_usage` / `token_usage`), and only falls back to local estimation when upstream usage is absent #### Tool Calls @@ -535,6 +536,7 @@ Returns SSE (`text/event-stream`), each chunk as `data: `: - regular text: incremental text chunks - `tools` mode: buffered and emitted as `functionCall` at finalize phase - final chunk: includes `finishReason: "STOP"` and `usageMetadata` +- Token counting prefers pass-through from upstream DeepSeek SSE (`accumulated_token_usage` / `token_usage`), and only falls back to local estimation when upstream usage is absent --- diff --git a/API.md b/API.md index 8552052..1caa984 100644 --- a/API.md +++ b/API.md @@ -267,6 +267,7 @@ data: [DONE] - `deepseek-reasoner` / `deepseek-reasoner-search` 模型输出 `delta.reasoning_content` - 普通文本输出 `delta.content` - 最后一段包含 `finish_reason` 和 `usage` +- token 计数优先透传上游 DeepSeek SSE(如 `accumulated_token_usage` / `token_usage`);仅在上游缺失时回退本地估算 #### Tool Calls @@ -541,6 +542,7 @@ data: {"type":"message_stop"} - 常规文本:持续返回增量文本 chunk - `tools` 场景:会缓冲并在结束时输出 `functionCall` 结构 - 结束 chunk:包含 `finishReason: "STOP"` 与 `usageMetadata` +- token 计数优先透传上游 DeepSeek SSE(如 `accumulated_token_usage` / `token_usage`);仅在上游缺失时回退本地估算 --- diff --git a/internal/adapter/gemini/handler_generate.go b/internal/adapter/gemini/handler_generate.go index b03b3ea..56cc0e6 100644 --- a/internal/adapter/gemini/handler_generate.go +++ b/internal/adapter/gemini/handler_generate.go @@ -149,14 +149,15 @@ func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *ht cleanVisibleOutput(result.Thinking, stripReferenceMarkers), cleanVisibleOutput(result.Text, stripReferenceMarkers), toolNames, + result.PromptTokens, result.OutputTokens, )) } //nolint:unused // retained for native Gemini non-stream handling path. -func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string, outputTokens int) map[string]any { +func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string, promptTokens, outputTokens int) map[string]any { parts := buildGeminiPartsFromFinal(finalText, finalThinking, toolNames) - usage := buildGeminiUsage(finalPrompt, finalThinking, finalText, outputTokens) + usage := buildGeminiUsage(finalPrompt, finalThinking, finalText, promptTokens, outputTokens) return map[string]any{ "candidates": []map[string]any{ { @@ -174,8 +175,10 @@ func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, final } //nolint:unused // retained for native Gemini non-stream handling path. -func buildGeminiUsage(finalPrompt, finalThinking, finalText string, outputTokens int) map[string]any { - promptTokens := util.EstimateTokens(finalPrompt) +func buildGeminiUsage(finalPrompt, finalThinking, finalText string, promptTokens, outputTokens int) map[string]any { + if promptTokens <= 0 { + promptTokens = util.EstimateTokens(finalPrompt) + } reasoningTokens := util.EstimateTokens(finalThinking) completionTokens := util.EstimateTokens(finalText) if outputTokens > 0 { diff --git a/internal/adapter/gemini/handler_stream_runtime.go b/internal/adapter/gemini/handler_stream_runtime.go index e7c9b87..b8d2701 100644 --- a/internal/adapter/gemini/handler_stream_runtime.go +++ b/internal/adapter/gemini/handler_stream_runtime.go @@ -67,6 +67,7 @@ type geminiStreamRuntime struct { thinking strings.Builder text strings.Builder + promptTokens int outputTokens int } @@ -112,6 +113,9 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse if !parsed.Parsed { return streamengine.ParsedDecision{} } + if parsed.PromptTokens > 0 { + s.promptTokens = parsed.PromptTokens + } if parsed.OutputTokens > 0 { s.outputTokens = parsed.OutputTokens } @@ -198,6 +202,6 @@ func (s *geminiStreamRuntime) finalize() { }, }, "modelVersion": s.model, - "usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText, s.outputTokens), + "usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText, s.promptTokens, s.outputTokens), }) } diff --git a/internal/adapter/gemini/handler_test.go b/internal/adapter/gemini/handler_test.go index b7aea1b..aa3ae46 100644 --- a/internal/adapter/gemini/handler_test.go +++ b/internal/adapter/gemini/handler_test.go @@ -296,6 +296,32 @@ func TestGenerateContentOpenAIProxyErrorUsesGeminiEnvelope(t *testing.T) { } } +func TestBuildGeminiUsageOverridesPromptAndOutputTokensWhenProvided(t *testing.T) { + usage := buildGeminiUsage("prompt", "thinking", "answer", 11, 29) + if got, _ := usage["promptTokenCount"].(int); got != 11 { + t.Fatalf("expected promptTokenCount=11, got %#v", usage["promptTokenCount"]) + } + if got, _ := usage["candidatesTokenCount"].(int); got != 29 { + t.Fatalf("expected candidatesTokenCount=29, got %#v", usage["candidatesTokenCount"]) + } + if got, _ := usage["totalTokenCount"].(int); got != 40 { + t.Fatalf("expected totalTokenCount=40, got %#v", usage["totalTokenCount"]) + } +} + +func TestBuildGeminiUsageFallsBackToEstimateWhenNoUpstreamUsage(t *testing.T) { + usage := buildGeminiUsage("abcdef", "", "ghijkl", 0, 0) + if got, _ := usage["promptTokenCount"].(int); got <= 0 { + t.Fatalf("expected positive promptTokenCount estimate, got %#v", usage["promptTokenCount"]) + } + if got, _ := usage["candidatesTokenCount"].(int); got <= 0 { + t.Fatalf("expected positive candidatesTokenCount estimate, got %#v", usage["candidatesTokenCount"]) + } + if got, _ := usage["totalTokenCount"].(int); got <= 0 { + t.Fatalf("expected positive totalTokenCount estimate, got %#v", usage["totalTokenCount"]) + } +} + func extractGeminiSSEFrames(t *testing.T, body string) []map[string]any { t.Helper() scanner := bufio.NewScanner(strings.NewReader(body))