temporarily ignore upstream token usage fields globally

2026-05-02 07:25:26 +08:00 · 2026-04-07 19:40:47 +08:00
parent b59e991ad5
commit a14e5b0847
8 changed files with 44 additions and 182 deletions
--- a/internal/adapter/openai/stream_status_test.go
+++ b/internal/adapter/openai/stream_status_test.go
@@ -239,7 +239,7 @@ func TestChatCompletionsStreamContentFilterStopsNormallyWithoutLeak(t *testing.T
 	}
 }

-func TestResponsesStreamUsageOverridesFromBatchAccumulatedTokenUsage(t *testing.T) {
+func TestResponsesStreamUsageIgnoresBatchAccumulatedTokenUsage(t *testing.T) {
 	statuses := make([]int, 0, 1)
 	h := &Handler{
 		Store: mockOpenAIConfig{wideInput: true},
@@ -282,12 +282,12 @@ func TestResponsesStreamUsageOverridesFromBatchAccumulatedTokenUsage(t *testing.
 	if usage == nil {
 		t.Fatalf("expected usage in response payload, got %#v", resp)
 	}
-	if got, _ := usage["output_tokens"].(float64); int(got) != 190 {
-		t.Fatalf("expected output_tokens=190, got %#v", usage["output_tokens"])
+	if got, _ := usage["output_tokens"].(float64); int(got) == 190 {
+		t.Fatalf("expected upstream accumulated token usage to be ignored, got %#v", usage["output_tokens"])
 	}
 }

-func TestResponsesNonStreamUsageOverridesPromptAndOutputTokenUsage(t *testing.T) {
+func TestResponsesNonStreamUsageIgnoresPromptAndOutputTokenUsage(t *testing.T) {
 	statuses := make([]int, 0, 1)
 	h := &Handler{
 		Store: mockOpenAIConfig{wideInput: true},
@@ -322,13 +322,13 @@ func TestResponsesNonStreamUsageOverridesPromptAndOutputTokenUsage(t *testing.T)
 	if usage == nil {
 		t.Fatalf("expected usage object, got %#v", out)
 	}
-	if got, _ := usage["input_tokens"].(float64); int(got) != 11 {
-		t.Fatalf("expected input_tokens=11, got %#v", usage["input_tokens"])
+	input, _ := usage["input_tokens"].(float64)
+	output, _ := usage["output_tokens"].(float64)
+	total, _ := usage["total_tokens"].(float64)
+	if int(output) == 29 {
+		t.Fatalf("expected upstream completion token usage to be ignored, got %#v", usage["output_tokens"])
 	}
-	if got, _ := usage["output_tokens"].(float64); int(got) != 29 {
-		t.Fatalf("expected output_tokens=29, got %#v", usage["output_tokens"])
-	}
-	if got, _ := usage["total_tokens"].(float64); int(got) != 40 {
-		t.Fatalf("expected total_tokens=40, got %#v", usage["total_tokens"])
+	if int(total) != int(input)+int(output) {
+		t.Fatalf("expected total_tokens=input_tokens+output_tokens, usage=%#v", usage)
 	}
 }
--- a/internal/js/chat-stream/sse_parse_impl.js
+++ b/internal/js/chat-stream/sse_parse_impl.js
@@ -443,70 +443,10 @@ function hasContentFilterStatusValue(v) {
 }

 function extractAccumulatedTokenUsage(chunk) {
-  const usage = findAccumulatedTokenUsage(chunk);
-  return usage || { prompt: 0, output: 0 };
-}
-
-function findAccumulatedTokenUsage(v) {
-  if (Array.isArray(v)) {
-    for (const item of v) {
-      const u = findAccumulatedTokenUsage(item);
-      if (u) return u;
-    }
-    return null;
-  }
-  if (!v || typeof v !== 'object') {
-    return null;
-  }
-  const pathValue = asString(v.p);
-  if (pathValue && pathValue.toLowerCase().includes('accumulated_token_usage')) {
-    const n = toInt(v.v);
-    if (n > 0) {
-      return { prompt: 0, output: n };
-    }
-  }
-  if (pathValue && pathValue.toLowerCase().includes('token_usage')) {
-    const u = v.v;
-    if (u && typeof u === 'object') {
-      const p = toInt(u.prompt_tokens);
-      const c = toInt(u.completion_tokens);
-      if (p > 0 || c > 0) {
-        return { prompt: p, output: c };
-      }
-    }
-  }
-  const direct = toInt(v.accumulated_token_usage);
-  if (direct > 0) {
-    return { prompt: 0, output: direct };
-  }
-  if (v.token_usage && typeof v.token_usage === 'object') {
-    const p = toInt(v.token_usage.prompt_tokens);
-    const c = toInt(v.token_usage.completion_tokens);
-    if (p > 0 || c > 0) {
-      return { prompt: p, output: c };
-    }
-  }
-  for (const value of Object.values(v)) {
-    const u = findAccumulatedTokenUsage(value);
-    if (u) return u;
-  }
-  return null;
-}
-
-function toInt(v) {
-  if (typeof v === 'number' && Number.isFinite(v)) {
-    return Math.trunc(v);
-  }
-  if (typeof v === 'string' && v.trim() !== '') {
-    const n = Number(v);
-    if (Number.isFinite(n)) {
-      return Math.trunc(n);
-    }
-  }
-  if (typeof v !== 'number') {
-    return 0;
-  }
-  return Number.isFinite(v) ? Math.trunc(v) : 0;
+  // 临时策略：忽略上游 usage 字段（accumulated_token_usage / token_usage），
+  // 统一使用内部估算计数，避免上下文累计口径误差。
+  void chunk;
+  return { prompt: 0, output: 0 };
 }

 function formatErrorMessage(v) {
--- a/internal/sse/line_test.go
+++ b/internal/sse/line_test.go
@@ -26,7 +26,7 @@ func TestParseDeepSeekContentLineContentFilter(t *testing.T) {
 	}
 }

-func TestParseDeepSeekContentLineContentFilterCodeIncludesOutputTokens(t *testing.T) {
+func TestParseDeepSeekContentLineContentFilterCodeIgnoresUpstreamOutputTokens(t *testing.T) {
 	res := ParseDeepSeekContentLine(
 		[]byte(`data: {"code":"content_filter","accumulated_token_usage":99}`),
 		false, "text",
@@ -34,8 +34,8 @@ func TestParseDeepSeekContentLineContentFilterCodeIncludesOutputTokens(t *testin
 	if !res.Parsed || !res.Stop || !res.ContentFilter {
 		t.Fatalf("expected content-filter stop result: %#v", res)
 	}
-	if res.OutputTokens != 99 {
-		t.Fatalf("expected output token usage 99, got %d", res.OutputTokens)
+	if res.OutputTokens != 0 {
+		t.Fatalf("expected upstream output token usage to be ignored, got %d", res.OutputTokens)
 	}
 }

@@ -46,27 +46,27 @@ func TestParseDeepSeekContentLineContentFilterStatus(t *testing.T) {
 	}
 }

-func TestParseDeepSeekContentLineCapturesAccumulatedTokenUsage(t *testing.T) {
+func TestParseDeepSeekContentLineIgnoresAccumulatedTokenUsage(t *testing.T) {
 	res := ParseDeepSeekContentLine([]byte(`data: {"p":"response","o":"BATCH","v":[{"p":"accumulated_token_usage","v":1383},{"p":"quasi_status","v":"FINISHED"}]}`), false, "text")
-	if res.OutputTokens != 1383 {
-		t.Fatalf("expected output token usage 1383, got %d", res.OutputTokens)
+	if res.OutputTokens != 0 {
+		t.Fatalf("expected accumulated token usage ignored, got %d", res.OutputTokens)
 	}
 }

-func TestParseDeepSeekContentLineCapturesAccumulatedTokenUsageString(t *testing.T) {
+func TestParseDeepSeekContentLineIgnoresAccumulatedTokenUsageString(t *testing.T) {
 	res := ParseDeepSeekContentLine([]byte(`data: {"p":"response","o":"BATCH","v":[{"p":"accumulated_token_usage","v":"190"},{"p":"quasi_status","v":"FINISHED"}]}`), false, "text")
-	if res.OutputTokens != 190 {
-		t.Fatalf("expected output token usage 190, got %d", res.OutputTokens)
+	if res.OutputTokens != 0 {
+		t.Fatalf("expected accumulated token usage string ignored, got %d", res.OutputTokens)
 	}
 }

-func TestParseDeepSeekContentLineErrorIncludesOutputTokens(t *testing.T) {
+func TestParseDeepSeekContentLineErrorIgnoresUpstreamOutputTokens(t *testing.T) {
 	res := ParseDeepSeekContentLine([]byte(`data: {"error":"boom","accumulated_token_usage":123}`), false, "text")
 	if !res.Parsed || !res.Stop {
 		t.Fatalf("expected stop on error: %#v", res)
 	}
-	if res.OutputTokens != 123 {
-		t.Fatalf("expected output token usage 123 on error, got %d", res.OutputTokens)
+	if res.OutputTokens != 0 {
+		t.Fatalf("expected output token usage ignored on error, got %d", res.OutputTokens)
 	}
 }

--- a/internal/sse/parser.go
+++ b/internal/sse/parser.go
@@ -3,8 +3,6 @@ package sse
 import (
 	"bytes"
 	"encoding/json"
-	"math"
-	"strconv"
 	"strings"

 	"ds2api/internal/deepseek"
@@ -365,84 +363,8 @@ func hasContentFilterStatusValue(v any) bool {
 }

 func extractAccumulatedTokenUsage(chunk map[string]any) (int, int) {
-	return findAccumulatedTokenUsage(chunk)
-}
-
-func findAccumulatedTokenUsage(v any) (int, int) {
-	switch x := v.(type) {
-	case map[string]any:
-		if p, _ := x["p"].(string); strings.Contains(strings.ToLower(p), "accumulated_token_usage") {
-			if n, ok := toInt(x["v"]); ok && n > 0 {
-				return 0, n
-			}
-		}
-		if p, _ := x["p"].(string); strings.Contains(strings.ToLower(p), "token_usage") {
-			if m, ok := x["v"].(map[string]any); ok {
-				p, _ := toInt(m["prompt_tokens"])
-				c, _ := toInt(m["completion_tokens"])
-				if p > 0 || c > 0 {
-					return p, c
-				}
-			}
-		}
-		if n, ok := toInt(x["accumulated_token_usage"]); ok && n > 0 {
-			return 0, n
-		}
-		if usage, ok := x["token_usage"].(map[string]any); ok {
-			p, _ := toInt(usage["prompt_tokens"])
-			c, _ := toInt(usage["completion_tokens"])
-			if p > 0 || c > 0 {
-				return p, c
-			}
-		}
-		for _, vv := range x {
-			if p, c := findAccumulatedTokenUsage(vv); p > 0 || c > 0 {
-				return p, c
-			}
-		}
-	case []any:
-		for _, item := range x {
-			if p, c := findAccumulatedTokenUsage(item); p > 0 || c > 0 {
-				return p, c
-			}
-		}
-	}
+	// 临时策略：忽略上游 usage 字段（accumulated_token_usage / token_usage），
+	// 由下游统一使用内部估算 token 计数，避免上下文累计口径导致单次输出偏差过大。
+	_ = chunk
 	return 0, 0
 }
-
-func toInt(v any) (int, bool) {
-	switch x := v.(type) {
-	case int:
-		return x, true
-	case int32:
-		return int(x), true
-	case int64:
-		return int(x), true
-	case float64:
-		if math.IsNaN(x) || math.IsInf(x, 0) {
-			return 0, false
-		}
-		return int(x), true
-	case json.Number:
-		i, err := x.Int64()
-		if err != nil {
-			return 0, false
-		}
-		return int(i), true
-	case string:
-		s := strings.TrimSpace(x)
-		if s == "" {
-			return 0, false
-		}
-		if i, err := strconv.Atoi(s); err == nil {
-			return i, true
-		}
-		f, err := strconv.ParseFloat(s, 64)
-		if err != nil || math.IsNaN(f) || math.IsInf(f, 0) {
-			return 0, false
-		}
-		return int(f), true
-	default:
-		return 0, false
-	}
-}
--- a/internal/sse/parser_test.go
+++ b/internal/sse/parser_test.go
@@ -28,8 +28,8 @@ func TestExtractTokenUsage(t *testing.T) {
 		},
 	}
 	p, c := extractAccumulatedTokenUsage(chunk)
-	if p != 123 || c != 456 {
-		t.Fatalf("expected 123/456, got %d/%d", p, c)
+	if p != 0 || c != 0 {
+		t.Fatalf("expected upstream usage ignored as 0/0, got %d/%d", p, c)
 	}
 }

--- a/internal/sse/raw_stream_token_replay_test.go
+++ b/internal/sse/raw_stream_token_replay_test.go
@@ -40,8 +40,8 @@ func TestRawStreamSamplesTokenReplay(t *testing.T) {
 			if expectedTokens <= 0 {
 				t.Fatalf("expected positive token usage from raw stream, got %d", expectedTokens)
 			}
-			if parsedTokens != expectedTokens {
-				t.Fatalf("token mismatch parsed=%d expected=%d", parsedTokens, expectedTokens)
+			if parsedTokens != 0 {
+				t.Fatalf("expected parser to ignore upstream token usage, got parsed=%d expectedRaw=%d", parsedTokens, expectedTokens)
 			}
 		})
 	}
--- a/tests/compat/expected/sse_content_filter_status.json
+++ b/tests/compat/expected/sse_content_filter_status.json
@@ -3,6 +3,6 @@
  "finished": true,
  "new_type": "text",
  "content_filter": true,
-  "output_tokens": 77,
+  "output_tokens": 0,
  "error_message": ""
 }
--- a/tests/node/chat-stream.test.js
+++ b/tests/node/chat-stream.test.js
@@ -248,7 +248,7 @@ test('parseChunkForContent strips reference markers from fragment content', () =
  assert.deepEqual(parsed.parts, [{ text: '广州天气  多云', type: 'text' }]);
 });

-test('parseChunkForContent detects content_filter status and carries output tokens', () => {
+test('parseChunkForContent detects content_filter status and ignores upstream output tokens', () => {
  const chunk = {
    p: 'response',
    v: [
@@ -260,7 +260,7 @@ test('parseChunkForContent detects content_filter status and carries output toke
  assert.equal(parsed.parsed, true);
  assert.equal(parsed.finished, true);
  assert.equal(parsed.contentFilter, true);
-  assert.equal(parsed.outputTokens, 77);
+  assert.equal(parsed.outputTokens, 0);
  assert.deepEqual(parsed.parts, []);
 });

@@ -275,11 +275,11 @@ test('parseChunkForContent keeps error branches distinct from content_filter sta
  assert.equal(parsed.finished, true);
  assert.equal(parsed.contentFilter, false);
  assert.equal(parsed.errorMessage.length > 0, true);
-  assert.equal(parsed.outputTokens, 88);
+  assert.equal(parsed.outputTokens, 0);
  assert.deepEqual(parsed.parts, []);
 });

-test('parseChunkForContent preserves output tokens on FINISHED lines', () => {
+test('parseChunkForContent ignores output tokens on FINISHED lines', () => {
  const parsed = parseChunkForContent(
    { p: 'response/status', v: 'FINISHED', accumulated_token_usage: 190 },
    false,
@@ -288,11 +288,11 @@ test('parseChunkForContent preserves output tokens on FINISHED lines', () => {
  assert.equal(parsed.parsed, true);
  assert.equal(parsed.finished, true);
  assert.equal(parsed.contentFilter, false);
-  assert.equal(parsed.outputTokens, 190);
+  assert.equal(parsed.outputTokens, 0);
  assert.deepEqual(parsed.parts, []);
 });

-test('parseChunkForContent captures output tokens from response BATCH status snapshots', () => {
+test('parseChunkForContent ignores output tokens from response BATCH status snapshots', () => {
  const parsed = parseChunkForContent(
    {
      p: 'response',
@@ -308,7 +308,7 @@ test('parseChunkForContent captures output tokens from response BATCH status sna
  assert.equal(parsed.parsed, true);
  assert.equal(parsed.finished, false);
  assert.equal(parsed.contentFilter, false);
-  assert.equal(parsed.outputTokens, 190);
+  assert.equal(parsed.outputTokens, 0);
  assert.deepEqual(parsed.parts, []);
 });

@@ -321,7 +321,7 @@ test('parseChunkForContent matches FINISHED case-insensitively on status paths',
  assert.equal(parsed.parsed, true);
  assert.equal(parsed.finished, true);
  assert.equal(parsed.contentFilter, false);
-  assert.equal(parsed.outputTokens, 190);
+  assert.equal(parsed.outputTokens, 0);
  assert.deepEqual(parsed.parts, []);
 });

@@ -334,7 +334,7 @@ test('parseChunkForContent filters INCOMPLETE status text without stopping strea
  assert.equal(parsed.parsed, true);
  assert.equal(parsed.finished, false);
  assert.equal(parsed.contentFilter, false);
-  assert.equal(parsed.outputTokens, 190);
+  assert.equal(parsed.outputTokens, 0);
  assert.deepEqual(parsed.parts, []);
 });