From 4b4f0970062400ac4aa4e6d993ba7b886155ffc0 Mon Sep 17 00:00:00 2001 From: shern-point Date: Thu, 30 Apr 2026 00:46:05 +0800 Subject: [PATCH] feat: use model-aware prompt counting in Gemini paths Preserve Gemini prompt token text during normalization and remove the hardcoded DeepSeek model from native Gemini usage helpers. --- internal/httpapi/gemini/convert_request.go | 23 ++++++++++--------- internal/httpapi/gemini/handler_generate.go | 10 ++++---- .../httpapi/gemini/handler_stream_runtime.go | 2 +- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/internal/httpapi/gemini/convert_request.go b/internal/httpapi/gemini/convert_request.go index ca1497a..43697e7 100644 --- a/internal/httpapi/gemini/convert_request.go +++ b/internal/httpapi/gemini/convert_request.go @@ -36,16 +36,17 @@ func normalizeGeminiRequest(store ConfigReader, routeModel string, req map[strin passThrough := collectGeminiPassThrough(req) return promptcompat.StandardRequest{ - Surface: "google_gemini", - RequestedModel: requestedModel, - ResolvedModel: resolvedModel, - ResponseModel: requestedModel, - Messages: messagesRaw, - FinalPrompt: finalPrompt, - ToolNames: toolNames, - Stream: stream, - Thinking: thinkingEnabled, - Search: searchEnabled, - PassThrough: passThrough, + Surface: "google_gemini", + RequestedModel: requestedModel, + ResolvedModel: resolvedModel, + ResponseModel: requestedModel, + Messages: messagesRaw, + PromptTokenText: finalPrompt, + FinalPrompt: finalPrompt, + ToolNames: toolNames, + Stream: stream, + Thinking: thinkingEnabled, + Search: searchEnabled, + PassThrough: passThrough, }, nil } diff --git a/internal/httpapi/gemini/handler_generate.go b/internal/httpapi/gemini/handler_generate.go index c6a08eb..00c4655 100644 --- a/internal/httpapi/gemini/handler_generate.go +++ b/internal/httpapi/gemini/handler_generate.go @@ -227,7 +227,7 @@ func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *ht //nolint:unused // retained for native Gemini non-stream handling path. func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any { parts := buildGeminiPartsFromFinal(finalText, finalThinking, toolNames) - usage := buildGeminiUsage(finalPrompt, finalThinking, finalText) + usage := buildGeminiUsage(model, finalPrompt, finalThinking, finalText) return map[string]any{ "candidates": []map[string]any{ { @@ -245,10 +245,10 @@ func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, final } //nolint:unused // retained for native Gemini non-stream handling path. -func buildGeminiUsage(finalPrompt, finalThinking, finalText string) map[string]any { - promptTokens := util.EstimateTokens(finalPrompt) - reasoningTokens := util.EstimateTokens(finalThinking) - completionTokens := util.EstimateTokens(finalText) +func buildGeminiUsage(model, finalPrompt, finalThinking, finalText string) map[string]any { + promptTokens := util.CountPromptTokens(finalPrompt, model) + reasoningTokens := util.CountOutputTokens(finalThinking, model) + completionTokens := util.CountOutputTokens(finalText, model) return map[string]any{ "promptTokenCount": promptTokens, "candidatesTokenCount": reasoningTokens + completionTokens, diff --git a/internal/httpapi/gemini/handler_stream_runtime.go b/internal/httpapi/gemini/handler_stream_runtime.go index 13729fb..fb72981 100644 --- a/internal/httpapi/gemini/handler_stream_runtime.go +++ b/internal/httpapi/gemini/handler_stream_runtime.go @@ -194,6 +194,6 @@ func (s *geminiStreamRuntime) finalize() { }, }, "modelVersion": s.model, - "usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText), + "usageMetadata": buildGeminiUsage(s.model, s.finalPrompt, finalThinking, finalText), }) }