mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-04 16:35:27 +08:00
Propagate upstream token usage across Gemini usage metadata
This commit is contained in:
@@ -267,6 +267,7 @@ data: [DONE]
|
||||
- `deepseek-reasoner` / `deepseek-reasoner-search` models emit `delta.reasoning_content`
|
||||
- Text emits `delta.content`
|
||||
- Last chunk includes `finish_reason` and `usage`
|
||||
- Token counting prefers pass-through from upstream DeepSeek SSE (`accumulated_token_usage` / `token_usage`), and only falls back to local estimation when upstream usage is absent
|
||||
|
||||
#### Tool Calls
|
||||
|
||||
@@ -535,6 +536,7 @@ Returns SSE (`text/event-stream`), each chunk as `data: <json>`:
|
||||
- regular text: incremental text chunks
|
||||
- `tools` mode: buffered and emitted as `functionCall` at finalize phase
|
||||
- final chunk: includes `finishReason: "STOP"` and `usageMetadata`
|
||||
- Token counting prefers pass-through from upstream DeepSeek SSE (`accumulated_token_usage` / `token_usage`), and only falls back to local estimation when upstream usage is absent
|
||||
|
||||
---
|
||||
|
||||
|
||||
2
API.md
2
API.md
@@ -267,6 +267,7 @@ data: [DONE]
|
||||
- `deepseek-reasoner` / `deepseek-reasoner-search` 模型输出 `delta.reasoning_content`
|
||||
- 普通文本输出 `delta.content`
|
||||
- 最后一段包含 `finish_reason` 和 `usage`
|
||||
- token 计数优先透传上游 DeepSeek SSE(如 `accumulated_token_usage` / `token_usage`);仅在上游缺失时回退本地估算
|
||||
|
||||
#### Tool Calls
|
||||
|
||||
@@ -541,6 +542,7 @@ data: {"type":"message_stop"}
|
||||
- 常规文本:持续返回增量文本 chunk
|
||||
- `tools` 场景:会缓冲并在结束时输出 `functionCall` 结构
|
||||
- 结束 chunk:包含 `finishReason: "STOP"` 与 `usageMetadata`
|
||||
- token 计数优先透传上游 DeepSeek SSE(如 `accumulated_token_usage` / `token_usage`);仅在上游缺失时回退本地估算
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -149,14 +149,15 @@ func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *ht
|
||||
cleanVisibleOutput(result.Thinking, stripReferenceMarkers),
|
||||
cleanVisibleOutput(result.Text, stripReferenceMarkers),
|
||||
toolNames,
|
||||
result.PromptTokens,
|
||||
result.OutputTokens,
|
||||
))
|
||||
}
|
||||
|
||||
//nolint:unused // retained for native Gemini non-stream handling path.
|
||||
func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string, outputTokens int) map[string]any {
|
||||
func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string, promptTokens, outputTokens int) map[string]any {
|
||||
parts := buildGeminiPartsFromFinal(finalText, finalThinking, toolNames)
|
||||
usage := buildGeminiUsage(finalPrompt, finalThinking, finalText, outputTokens)
|
||||
usage := buildGeminiUsage(finalPrompt, finalThinking, finalText, promptTokens, outputTokens)
|
||||
return map[string]any{
|
||||
"candidates": []map[string]any{
|
||||
{
|
||||
@@ -174,8 +175,10 @@ func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, final
|
||||
}
|
||||
|
||||
//nolint:unused // retained for native Gemini non-stream handling path.
|
||||
func buildGeminiUsage(finalPrompt, finalThinking, finalText string, outputTokens int) map[string]any {
|
||||
promptTokens := util.EstimateTokens(finalPrompt)
|
||||
func buildGeminiUsage(finalPrompt, finalThinking, finalText string, promptTokens, outputTokens int) map[string]any {
|
||||
if promptTokens <= 0 {
|
||||
promptTokens = util.EstimateTokens(finalPrompt)
|
||||
}
|
||||
reasoningTokens := util.EstimateTokens(finalThinking)
|
||||
completionTokens := util.EstimateTokens(finalText)
|
||||
if outputTokens > 0 {
|
||||
|
||||
@@ -67,6 +67,7 @@ type geminiStreamRuntime struct {
|
||||
|
||||
thinking strings.Builder
|
||||
text strings.Builder
|
||||
promptTokens int
|
||||
outputTokens int
|
||||
}
|
||||
|
||||
@@ -112,6 +113,9 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
|
||||
if !parsed.Parsed {
|
||||
return streamengine.ParsedDecision{}
|
||||
}
|
||||
if parsed.PromptTokens > 0 {
|
||||
s.promptTokens = parsed.PromptTokens
|
||||
}
|
||||
if parsed.OutputTokens > 0 {
|
||||
s.outputTokens = parsed.OutputTokens
|
||||
}
|
||||
@@ -198,6 +202,6 @@ func (s *geminiStreamRuntime) finalize() {
|
||||
},
|
||||
},
|
||||
"modelVersion": s.model,
|
||||
"usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText, s.outputTokens),
|
||||
"usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText, s.promptTokens, s.outputTokens),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -296,6 +296,32 @@ func TestGenerateContentOpenAIProxyErrorUsesGeminiEnvelope(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildGeminiUsageOverridesPromptAndOutputTokensWhenProvided(t *testing.T) {
|
||||
usage := buildGeminiUsage("prompt", "thinking", "answer", 11, 29)
|
||||
if got, _ := usage["promptTokenCount"].(int); got != 11 {
|
||||
t.Fatalf("expected promptTokenCount=11, got %#v", usage["promptTokenCount"])
|
||||
}
|
||||
if got, _ := usage["candidatesTokenCount"].(int); got != 29 {
|
||||
t.Fatalf("expected candidatesTokenCount=29, got %#v", usage["candidatesTokenCount"])
|
||||
}
|
||||
if got, _ := usage["totalTokenCount"].(int); got != 40 {
|
||||
t.Fatalf("expected totalTokenCount=40, got %#v", usage["totalTokenCount"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildGeminiUsageFallsBackToEstimateWhenNoUpstreamUsage(t *testing.T) {
|
||||
usage := buildGeminiUsage("abcdef", "", "ghijkl", 0, 0)
|
||||
if got, _ := usage["promptTokenCount"].(int); got <= 0 {
|
||||
t.Fatalf("expected positive promptTokenCount estimate, got %#v", usage["promptTokenCount"])
|
||||
}
|
||||
if got, _ := usage["candidatesTokenCount"].(int); got <= 0 {
|
||||
t.Fatalf("expected positive candidatesTokenCount estimate, got %#v", usage["candidatesTokenCount"])
|
||||
}
|
||||
if got, _ := usage["totalTokenCount"].(int); got <= 0 {
|
||||
t.Fatalf("expected positive totalTokenCount estimate, got %#v", usage["totalTokenCount"])
|
||||
}
|
||||
}
|
||||
|
||||
func extractGeminiSSEFrames(t *testing.T, body string) []map[string]any {
|
||||
t.Helper()
|
||||
scanner := bufio.NewScanner(strings.NewReader(body))
|
||||
|
||||
Reference in New Issue
Block a user