Propagate upstream token usage across Gemini usage metadata

This commit is contained in:
CJACK.
2026-04-07 10:16:00 +08:00
parent 96b8587c5b
commit 5bcea3d727
5 changed files with 42 additions and 5 deletions

View File

@@ -267,6 +267,7 @@ data: [DONE]
- `deepseek-reasoner` / `deepseek-reasoner-search` models emit `delta.reasoning_content`
- Text emits `delta.content`
- Last chunk includes `finish_reason` and `usage`
- Token counting prefers pass-through from upstream DeepSeek SSE (`accumulated_token_usage` / `token_usage`), and only falls back to local estimation when upstream usage is absent
#### Tool Calls
@@ -535,6 +536,7 @@ Returns SSE (`text/event-stream`), each chunk as `data: <json>`:
- regular text: incremental text chunks
- `tools` mode: buffered and emitted as `functionCall` at finalize phase
- final chunk: includes `finishReason: "STOP"` and `usageMetadata`
- Token counting prefers pass-through from upstream DeepSeek SSE (`accumulated_token_usage` / `token_usage`), and only falls back to local estimation when upstream usage is absent
---

2
API.md
View File

@@ -267,6 +267,7 @@ data: [DONE]
- `deepseek-reasoner` / `deepseek-reasoner-search` 模型输出 `delta.reasoning_content`
- 普通文本输出 `delta.content`
- 最后一段包含 `finish_reason``usage`
- token 计数优先透传上游 DeepSeek SSE`accumulated_token_usage` / `token_usage`);仅在上游缺失时回退本地估算
#### Tool Calls
@@ -541,6 +542,7 @@ data: {"type":"message_stop"}
- 常规文本:持续返回增量文本 chunk
- `tools` 场景:会缓冲并在结束时输出 `functionCall` 结构
- 结束 chunk包含 `finishReason: "STOP"``usageMetadata`
- token 计数优先透传上游 DeepSeek SSE`accumulated_token_usage` / `token_usage`);仅在上游缺失时回退本地估算
---

View File

@@ -149,14 +149,15 @@ func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *ht
cleanVisibleOutput(result.Thinking, stripReferenceMarkers),
cleanVisibleOutput(result.Text, stripReferenceMarkers),
toolNames,
result.PromptTokens,
result.OutputTokens,
))
}
//nolint:unused // retained for native Gemini non-stream handling path.
func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string, outputTokens int) map[string]any {
func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string, promptTokens, outputTokens int) map[string]any {
parts := buildGeminiPartsFromFinal(finalText, finalThinking, toolNames)
usage := buildGeminiUsage(finalPrompt, finalThinking, finalText, outputTokens)
usage := buildGeminiUsage(finalPrompt, finalThinking, finalText, promptTokens, outputTokens)
return map[string]any{
"candidates": []map[string]any{
{
@@ -174,8 +175,10 @@ func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, final
}
//nolint:unused // retained for native Gemini non-stream handling path.
func buildGeminiUsage(finalPrompt, finalThinking, finalText string, outputTokens int) map[string]any {
promptTokens := util.EstimateTokens(finalPrompt)
func buildGeminiUsage(finalPrompt, finalThinking, finalText string, promptTokens, outputTokens int) map[string]any {
if promptTokens <= 0 {
promptTokens = util.EstimateTokens(finalPrompt)
}
reasoningTokens := util.EstimateTokens(finalThinking)
completionTokens := util.EstimateTokens(finalText)
if outputTokens > 0 {

View File

@@ -67,6 +67,7 @@ type geminiStreamRuntime struct {
thinking strings.Builder
text strings.Builder
promptTokens int
outputTokens int
}
@@ -112,6 +113,9 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
if !parsed.Parsed {
return streamengine.ParsedDecision{}
}
if parsed.PromptTokens > 0 {
s.promptTokens = parsed.PromptTokens
}
if parsed.OutputTokens > 0 {
s.outputTokens = parsed.OutputTokens
}
@@ -198,6 +202,6 @@ func (s *geminiStreamRuntime) finalize() {
},
},
"modelVersion": s.model,
"usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText, s.outputTokens),
"usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText, s.promptTokens, s.outputTokens),
})
}

View File

@@ -296,6 +296,32 @@ func TestGenerateContentOpenAIProxyErrorUsesGeminiEnvelope(t *testing.T) {
}
}
func TestBuildGeminiUsageOverridesPromptAndOutputTokensWhenProvided(t *testing.T) {
usage := buildGeminiUsage("prompt", "thinking", "answer", 11, 29)
if got, _ := usage["promptTokenCount"].(int); got != 11 {
t.Fatalf("expected promptTokenCount=11, got %#v", usage["promptTokenCount"])
}
if got, _ := usage["candidatesTokenCount"].(int); got != 29 {
t.Fatalf("expected candidatesTokenCount=29, got %#v", usage["candidatesTokenCount"])
}
if got, _ := usage["totalTokenCount"].(int); got != 40 {
t.Fatalf("expected totalTokenCount=40, got %#v", usage["totalTokenCount"])
}
}
func TestBuildGeminiUsageFallsBackToEstimateWhenNoUpstreamUsage(t *testing.T) {
usage := buildGeminiUsage("abcdef", "", "ghijkl", 0, 0)
if got, _ := usage["promptTokenCount"].(int); got <= 0 {
t.Fatalf("expected positive promptTokenCount estimate, got %#v", usage["promptTokenCount"])
}
if got, _ := usage["candidatesTokenCount"].(int); got <= 0 {
t.Fatalf("expected positive candidatesTokenCount estimate, got %#v", usage["candidatesTokenCount"])
}
if got, _ := usage["totalTokenCount"].(int); got <= 0 {
t.Fatalf("expected positive totalTokenCount estimate, got %#v", usage["totalTokenCount"])
}
}
func extractGeminiSSEFrames(t *testing.T, body string) []map[string]any {
t.Helper()
scanner := bufio.NewScanner(strings.NewReader(body))