mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-23 01:17:44 +08:00
remove upstream token-usage plumbing and always estimate from content
This commit is contained in:
@@ -24,10 +24,9 @@ type claudeStreamRuntime struct {
|
||||
bufferToolContent bool
|
||||
stripReferenceMarkers bool
|
||||
|
||||
messageID string
|
||||
thinking strings.Builder
|
||||
text strings.Builder
|
||||
outputTokens int
|
||||
messageID string
|
||||
thinking strings.Builder
|
||||
text strings.Builder
|
||||
|
||||
nextBlockIndex int
|
||||
thinkingBlockOpen bool
|
||||
@@ -70,9 +69,6 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
|
||||
if !parsed.Parsed {
|
||||
return streamengine.ParsedDecision{}
|
||||
}
|
||||
if parsed.OutputTokens > 0 {
|
||||
s.outputTokens = parsed.OutputTokens
|
||||
}
|
||||
if parsed.ErrorMessage != "" {
|
||||
s.upstreamErr = parsed.ErrorMessage
|
||||
return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("upstream_error")}
|
||||
|
||||
@@ -109,9 +109,6 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
|
||||
}
|
||||
|
||||
outputTokens := util.EstimateTokens(finalThinking) + util.EstimateTokens(finalText)
|
||||
if s.outputTokens > 0 {
|
||||
outputTokens = s.outputTokens
|
||||
}
|
||||
s.send("message_delta", map[string]any{
|
||||
"type": "message_delta",
|
||||
"delta": map[string]any{
|
||||
|
||||
@@ -149,14 +149,13 @@ func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *ht
|
||||
cleanVisibleOutput(result.Thinking, stripReferenceMarkers),
|
||||
cleanVisibleOutput(result.Text, stripReferenceMarkers),
|
||||
toolNames,
|
||||
result.OutputTokens,
|
||||
))
|
||||
}
|
||||
|
||||
//nolint:unused // retained for native Gemini non-stream handling path.
|
||||
func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string, outputTokens int) map[string]any {
|
||||
func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any {
|
||||
parts := buildGeminiPartsFromFinal(finalText, finalThinking, toolNames)
|
||||
usage := buildGeminiUsage(finalPrompt, finalThinking, finalText, outputTokens)
|
||||
usage := buildGeminiUsage(finalPrompt, finalThinking, finalText)
|
||||
return map[string]any{
|
||||
"candidates": []map[string]any{
|
||||
{
|
||||
@@ -174,14 +173,10 @@ func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, final
|
||||
}
|
||||
|
||||
//nolint:unused // retained for native Gemini non-stream handling path.
|
||||
func buildGeminiUsage(finalPrompt, finalThinking, finalText string, outputTokens int) map[string]any {
|
||||
func buildGeminiUsage(finalPrompt, finalThinking, finalText string) map[string]any {
|
||||
promptTokens := util.EstimateTokens(finalPrompt)
|
||||
reasoningTokens := util.EstimateTokens(finalThinking)
|
||||
completionTokens := util.EstimateTokens(finalText)
|
||||
if outputTokens > 0 {
|
||||
completionTokens = outputTokens
|
||||
reasoningTokens = 0
|
||||
}
|
||||
return map[string]any{
|
||||
"promptTokenCount": promptTokens,
|
||||
"candidatesTokenCount": reasoningTokens + completionTokens,
|
||||
|
||||
@@ -65,9 +65,8 @@ type geminiStreamRuntime struct {
|
||||
stripReferenceMarkers bool
|
||||
toolNames []string
|
||||
|
||||
thinking strings.Builder
|
||||
text strings.Builder
|
||||
outputTokens int
|
||||
thinking strings.Builder
|
||||
text strings.Builder
|
||||
}
|
||||
|
||||
//nolint:unused // retained for native Gemini stream handling path.
|
||||
@@ -112,9 +111,6 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
|
||||
if !parsed.Parsed {
|
||||
return streamengine.ParsedDecision{}
|
||||
}
|
||||
if parsed.OutputTokens > 0 {
|
||||
s.outputTokens = parsed.OutputTokens
|
||||
}
|
||||
if parsed.ContentFilter || parsed.ErrorMessage != "" || parsed.Stop {
|
||||
return streamengine.ParsedDecision{Stop: true}
|
||||
}
|
||||
@@ -198,6 +194,6 @@ func (s *geminiStreamRuntime) finalize() {
|
||||
},
|
||||
},
|
||||
"modelVersion": s.model,
|
||||
"usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText, s.outputTokens),
|
||||
"usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -37,8 +37,6 @@ type chatStreamRuntime struct {
|
||||
streamToolNames map[int]string
|
||||
thinking strings.Builder
|
||||
text strings.Builder
|
||||
promptTokens int
|
||||
outputTokens int
|
||||
}
|
||||
|
||||
func newChatStreamRuntime(
|
||||
@@ -171,17 +169,6 @@ func (s *chatStreamRuntime) finalize(finishReason string) {
|
||||
finishReason = "tool_calls"
|
||||
}
|
||||
usage := openaifmt.BuildChatUsage(s.finalPrompt, finalThinking, finalText)
|
||||
if s.promptTokens > 0 {
|
||||
usage["prompt_tokens"] = s.promptTokens
|
||||
}
|
||||
if s.outputTokens > 0 {
|
||||
usage["completion_tokens"] = s.outputTokens
|
||||
}
|
||||
if s.promptTokens > 0 || s.outputTokens > 0 {
|
||||
p := usage["prompt_tokens"].(int)
|
||||
c := usage["completion_tokens"].(int)
|
||||
usage["total_tokens"] = p + c
|
||||
}
|
||||
s.sendChunk(openaifmt.BuildChatStreamChunk(
|
||||
s.completionID,
|
||||
s.created,
|
||||
@@ -196,12 +183,6 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
|
||||
if !parsed.Parsed {
|
||||
return streamengine.ParsedDecision{}
|
||||
}
|
||||
if parsed.PromptTokens > 0 {
|
||||
s.promptTokens = parsed.PromptTokens
|
||||
}
|
||||
if parsed.OutputTokens > 0 {
|
||||
s.outputTokens = parsed.OutputTokens
|
||||
}
|
||||
if parsed.ContentFilter {
|
||||
return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReasonHandlerRequested}
|
||||
}
|
||||
|
||||
@@ -131,19 +131,6 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, ctx context.Context, re
|
||||
return
|
||||
}
|
||||
respBody := openaifmt.BuildChatCompletion(completionID, model, finalPrompt, finalThinking, finalText, toolNames)
|
||||
if result.PromptTokens > 0 || result.OutputTokens > 0 {
|
||||
if usage, ok := respBody["usage"].(map[string]any); ok {
|
||||
if result.PromptTokens > 0 {
|
||||
usage["prompt_tokens"] = result.PromptTokens
|
||||
}
|
||||
if result.OutputTokens > 0 {
|
||||
usage["completion_tokens"] = result.OutputTokens
|
||||
}
|
||||
p, _ := usage["prompt_tokens"].(int)
|
||||
c, _ := usage["completion_tokens"].(int)
|
||||
usage["total_tokens"] = p + c
|
||||
}
|
||||
}
|
||||
writeJSON(w, http.StatusOK, respBody)
|
||||
}
|
||||
|
||||
|
||||
@@ -130,19 +130,6 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
|
||||
}
|
||||
|
||||
responseObj := openaifmt.BuildResponseObject(responseID, model, finalPrompt, sanitizedThinking, sanitizedText, toolNames)
|
||||
if result.PromptTokens > 0 || result.OutputTokens > 0 {
|
||||
if usage, ok := responseObj["usage"].(map[string]any); ok {
|
||||
if result.PromptTokens > 0 {
|
||||
usage["input_tokens"] = result.PromptTokens
|
||||
}
|
||||
if result.OutputTokens > 0 {
|
||||
usage["output_tokens"] = result.OutputTokens
|
||||
}
|
||||
input, _ := usage["input_tokens"].(int)
|
||||
output, _ := usage["output_tokens"].(int)
|
||||
usage["total_tokens"] = input + output
|
||||
}
|
||||
}
|
||||
h.getResponseStore().put(owner, responseID, responseObj)
|
||||
writeJSON(w, http.StatusOK, responseObj)
|
||||
}
|
||||
|
||||
@@ -51,8 +51,6 @@ type responsesStreamRuntime struct {
|
||||
messagePartAdded bool
|
||||
sequence int
|
||||
failed bool
|
||||
promptTokens int
|
||||
outputTokens int
|
||||
|
||||
persistResponse func(obj map[string]any)
|
||||
}
|
||||
@@ -150,24 +148,6 @@ func (s *responsesStreamRuntime) finalize() {
|
||||
s.closeIncompleteFunctionItems()
|
||||
|
||||
obj := s.buildCompletedResponseObject(finalThinking, finalText, detected)
|
||||
if s.outputTokens > 0 {
|
||||
if usage, ok := obj["usage"].(map[string]any); ok {
|
||||
usage["output_tokens"] = s.outputTokens
|
||||
}
|
||||
}
|
||||
if s.promptTokens > 0 || s.outputTokens > 0 {
|
||||
if usage, ok := obj["usage"].(map[string]any); ok {
|
||||
if s.promptTokens > 0 {
|
||||
usage["input_tokens"] = s.promptTokens
|
||||
}
|
||||
if s.outputTokens > 0 {
|
||||
usage["output_tokens"] = s.outputTokens
|
||||
}
|
||||
input, _ := usage["input_tokens"].(int)
|
||||
output, _ := usage["output_tokens"].(int)
|
||||
usage["total_tokens"] = input + output
|
||||
}
|
||||
}
|
||||
if s.persistResponse != nil {
|
||||
s.persistResponse(obj)
|
||||
}
|
||||
@@ -196,12 +176,6 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
|
||||
if !parsed.Parsed {
|
||||
return streamengine.ParsedDecision{}
|
||||
}
|
||||
if parsed.PromptTokens > 0 {
|
||||
s.promptTokens = parsed.PromptTokens
|
||||
}
|
||||
if parsed.OutputTokens > 0 {
|
||||
s.outputTokens = parsed.OutputTokens
|
||||
}
|
||||
if parsed.ContentFilter || parsed.ErrorMessage != "" || parsed.Stop {
|
||||
return streamengine.ParsedDecision{Stop: true}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user