mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-02 07:25:26 +08:00
temporarily ignore upstream token usage fields globally
This commit is contained in:
@@ -239,7 +239,7 @@ func TestChatCompletionsStreamContentFilterStopsNormallyWithoutLeak(t *testing.T
|
||||
}
|
||||
}
|
||||
|
||||
func TestResponsesStreamUsageOverridesFromBatchAccumulatedTokenUsage(t *testing.T) {
|
||||
func TestResponsesStreamUsageIgnoresBatchAccumulatedTokenUsage(t *testing.T) {
|
||||
statuses := make([]int, 0, 1)
|
||||
h := &Handler{
|
||||
Store: mockOpenAIConfig{wideInput: true},
|
||||
@@ -282,12 +282,12 @@ func TestResponsesStreamUsageOverridesFromBatchAccumulatedTokenUsage(t *testing.
|
||||
if usage == nil {
|
||||
t.Fatalf("expected usage in response payload, got %#v", resp)
|
||||
}
|
||||
if got, _ := usage["output_tokens"].(float64); int(got) != 190 {
|
||||
t.Fatalf("expected output_tokens=190, got %#v", usage["output_tokens"])
|
||||
if got, _ := usage["output_tokens"].(float64); int(got) == 190 {
|
||||
t.Fatalf("expected upstream accumulated token usage to be ignored, got %#v", usage["output_tokens"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestResponsesNonStreamUsageOverridesPromptAndOutputTokenUsage(t *testing.T) {
|
||||
func TestResponsesNonStreamUsageIgnoresPromptAndOutputTokenUsage(t *testing.T) {
|
||||
statuses := make([]int, 0, 1)
|
||||
h := &Handler{
|
||||
Store: mockOpenAIConfig{wideInput: true},
|
||||
@@ -322,13 +322,13 @@ func TestResponsesNonStreamUsageOverridesPromptAndOutputTokenUsage(t *testing.T)
|
||||
if usage == nil {
|
||||
t.Fatalf("expected usage object, got %#v", out)
|
||||
}
|
||||
if got, _ := usage["input_tokens"].(float64); int(got) != 11 {
|
||||
t.Fatalf("expected input_tokens=11, got %#v", usage["input_tokens"])
|
||||
input, _ := usage["input_tokens"].(float64)
|
||||
output, _ := usage["output_tokens"].(float64)
|
||||
total, _ := usage["total_tokens"].(float64)
|
||||
if int(output) == 29 {
|
||||
t.Fatalf("expected upstream completion token usage to be ignored, got %#v", usage["output_tokens"])
|
||||
}
|
||||
if got, _ := usage["output_tokens"].(float64); int(got) != 29 {
|
||||
t.Fatalf("expected output_tokens=29, got %#v", usage["output_tokens"])
|
||||
}
|
||||
if got, _ := usage["total_tokens"].(float64); int(got) != 40 {
|
||||
t.Fatalf("expected total_tokens=40, got %#v", usage["total_tokens"])
|
||||
if int(total) != int(input)+int(output) {
|
||||
t.Fatalf("expected total_tokens=input_tokens+output_tokens, usage=%#v", usage)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -443,70 +443,10 @@ function hasContentFilterStatusValue(v) {
|
||||
}
|
||||
|
||||
function extractAccumulatedTokenUsage(chunk) {
|
||||
const usage = findAccumulatedTokenUsage(chunk);
|
||||
return usage || { prompt: 0, output: 0 };
|
||||
}
|
||||
|
||||
function findAccumulatedTokenUsage(v) {
|
||||
if (Array.isArray(v)) {
|
||||
for (const item of v) {
|
||||
const u = findAccumulatedTokenUsage(item);
|
||||
if (u) return u;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (!v || typeof v !== 'object') {
|
||||
return null;
|
||||
}
|
||||
const pathValue = asString(v.p);
|
||||
if (pathValue && pathValue.toLowerCase().includes('accumulated_token_usage')) {
|
||||
const n = toInt(v.v);
|
||||
if (n > 0) {
|
||||
return { prompt: 0, output: n };
|
||||
}
|
||||
}
|
||||
if (pathValue && pathValue.toLowerCase().includes('token_usage')) {
|
||||
const u = v.v;
|
||||
if (u && typeof u === 'object') {
|
||||
const p = toInt(u.prompt_tokens);
|
||||
const c = toInt(u.completion_tokens);
|
||||
if (p > 0 || c > 0) {
|
||||
return { prompt: p, output: c };
|
||||
}
|
||||
}
|
||||
}
|
||||
const direct = toInt(v.accumulated_token_usage);
|
||||
if (direct > 0) {
|
||||
return { prompt: 0, output: direct };
|
||||
}
|
||||
if (v.token_usage && typeof v.token_usage === 'object') {
|
||||
const p = toInt(v.token_usage.prompt_tokens);
|
||||
const c = toInt(v.token_usage.completion_tokens);
|
||||
if (p > 0 || c > 0) {
|
||||
return { prompt: p, output: c };
|
||||
}
|
||||
}
|
||||
for (const value of Object.values(v)) {
|
||||
const u = findAccumulatedTokenUsage(value);
|
||||
if (u) return u;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function toInt(v) {
|
||||
if (typeof v === 'number' && Number.isFinite(v)) {
|
||||
return Math.trunc(v);
|
||||
}
|
||||
if (typeof v === 'string' && v.trim() !== '') {
|
||||
const n = Number(v);
|
||||
if (Number.isFinite(n)) {
|
||||
return Math.trunc(n);
|
||||
}
|
||||
}
|
||||
if (typeof v !== 'number') {
|
||||
return 0;
|
||||
}
|
||||
return Number.isFinite(v) ? Math.trunc(v) : 0;
|
||||
// 临时策略:忽略上游 usage 字段(accumulated_token_usage / token_usage),
|
||||
// 统一使用内部估算计数,避免上下文累计口径误差。
|
||||
void chunk;
|
||||
return { prompt: 0, output: 0 };
|
||||
}
|
||||
|
||||
function formatErrorMessage(v) {
|
||||
|
||||
@@ -26,7 +26,7 @@ func TestParseDeepSeekContentLineContentFilter(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseDeepSeekContentLineContentFilterCodeIncludesOutputTokens(t *testing.T) {
|
||||
func TestParseDeepSeekContentLineContentFilterCodeIgnoresUpstreamOutputTokens(t *testing.T) {
|
||||
res := ParseDeepSeekContentLine(
|
||||
[]byte(`data: {"code":"content_filter","accumulated_token_usage":99}`),
|
||||
false, "text",
|
||||
@@ -34,8 +34,8 @@ func TestParseDeepSeekContentLineContentFilterCodeIncludesOutputTokens(t *testin
|
||||
if !res.Parsed || !res.Stop || !res.ContentFilter {
|
||||
t.Fatalf("expected content-filter stop result: %#v", res)
|
||||
}
|
||||
if res.OutputTokens != 99 {
|
||||
t.Fatalf("expected output token usage 99, got %d", res.OutputTokens)
|
||||
if res.OutputTokens != 0 {
|
||||
t.Fatalf("expected upstream output token usage to be ignored, got %d", res.OutputTokens)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,27 +46,27 @@ func TestParseDeepSeekContentLineContentFilterStatus(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseDeepSeekContentLineCapturesAccumulatedTokenUsage(t *testing.T) {
|
||||
func TestParseDeepSeekContentLineIgnoresAccumulatedTokenUsage(t *testing.T) {
|
||||
res := ParseDeepSeekContentLine([]byte(`data: {"p":"response","o":"BATCH","v":[{"p":"accumulated_token_usage","v":1383},{"p":"quasi_status","v":"FINISHED"}]}`), false, "text")
|
||||
if res.OutputTokens != 1383 {
|
||||
t.Fatalf("expected output token usage 1383, got %d", res.OutputTokens)
|
||||
if res.OutputTokens != 0 {
|
||||
t.Fatalf("expected accumulated token usage ignored, got %d", res.OutputTokens)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseDeepSeekContentLineCapturesAccumulatedTokenUsageString(t *testing.T) {
|
||||
func TestParseDeepSeekContentLineIgnoresAccumulatedTokenUsageString(t *testing.T) {
|
||||
res := ParseDeepSeekContentLine([]byte(`data: {"p":"response","o":"BATCH","v":[{"p":"accumulated_token_usage","v":"190"},{"p":"quasi_status","v":"FINISHED"}]}`), false, "text")
|
||||
if res.OutputTokens != 190 {
|
||||
t.Fatalf("expected output token usage 190, got %d", res.OutputTokens)
|
||||
if res.OutputTokens != 0 {
|
||||
t.Fatalf("expected accumulated token usage string ignored, got %d", res.OutputTokens)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseDeepSeekContentLineErrorIncludesOutputTokens(t *testing.T) {
|
||||
func TestParseDeepSeekContentLineErrorIgnoresUpstreamOutputTokens(t *testing.T) {
|
||||
res := ParseDeepSeekContentLine([]byte(`data: {"error":"boom","accumulated_token_usage":123}`), false, "text")
|
||||
if !res.Parsed || !res.Stop {
|
||||
t.Fatalf("expected stop on error: %#v", res)
|
||||
}
|
||||
if res.OutputTokens != 123 {
|
||||
t.Fatalf("expected output token usage 123 on error, got %d", res.OutputTokens)
|
||||
if res.OutputTokens != 0 {
|
||||
t.Fatalf("expected output token usage ignored on error, got %d", res.OutputTokens)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,8 +3,6 @@ package sse
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"ds2api/internal/deepseek"
|
||||
@@ -365,84 +363,8 @@ func hasContentFilterStatusValue(v any) bool {
|
||||
}
|
||||
|
||||
func extractAccumulatedTokenUsage(chunk map[string]any) (int, int) {
|
||||
return findAccumulatedTokenUsage(chunk)
|
||||
}
|
||||
|
||||
func findAccumulatedTokenUsage(v any) (int, int) {
|
||||
switch x := v.(type) {
|
||||
case map[string]any:
|
||||
if p, _ := x["p"].(string); strings.Contains(strings.ToLower(p), "accumulated_token_usage") {
|
||||
if n, ok := toInt(x["v"]); ok && n > 0 {
|
||||
return 0, n
|
||||
}
|
||||
}
|
||||
if p, _ := x["p"].(string); strings.Contains(strings.ToLower(p), "token_usage") {
|
||||
if m, ok := x["v"].(map[string]any); ok {
|
||||
p, _ := toInt(m["prompt_tokens"])
|
||||
c, _ := toInt(m["completion_tokens"])
|
||||
if p > 0 || c > 0 {
|
||||
return p, c
|
||||
}
|
||||
}
|
||||
}
|
||||
if n, ok := toInt(x["accumulated_token_usage"]); ok && n > 0 {
|
||||
return 0, n
|
||||
}
|
||||
if usage, ok := x["token_usage"].(map[string]any); ok {
|
||||
p, _ := toInt(usage["prompt_tokens"])
|
||||
c, _ := toInt(usage["completion_tokens"])
|
||||
if p > 0 || c > 0 {
|
||||
return p, c
|
||||
}
|
||||
}
|
||||
for _, vv := range x {
|
||||
if p, c := findAccumulatedTokenUsage(vv); p > 0 || c > 0 {
|
||||
return p, c
|
||||
}
|
||||
}
|
||||
case []any:
|
||||
for _, item := range x {
|
||||
if p, c := findAccumulatedTokenUsage(item); p > 0 || c > 0 {
|
||||
return p, c
|
||||
}
|
||||
}
|
||||
}
|
||||
// 临时策略:忽略上游 usage 字段(accumulated_token_usage / token_usage),
|
||||
// 由下游统一使用内部估算 token 计数,避免上下文累计口径导致单次输出偏差过大。
|
||||
_ = chunk
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
func toInt(v any) (int, bool) {
|
||||
switch x := v.(type) {
|
||||
case int:
|
||||
return x, true
|
||||
case int32:
|
||||
return int(x), true
|
||||
case int64:
|
||||
return int(x), true
|
||||
case float64:
|
||||
if math.IsNaN(x) || math.IsInf(x, 0) {
|
||||
return 0, false
|
||||
}
|
||||
return int(x), true
|
||||
case json.Number:
|
||||
i, err := x.Int64()
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
return int(i), true
|
||||
case string:
|
||||
s := strings.TrimSpace(x)
|
||||
if s == "" {
|
||||
return 0, false
|
||||
}
|
||||
if i, err := strconv.Atoi(s); err == nil {
|
||||
return i, true
|
||||
}
|
||||
f, err := strconv.ParseFloat(s, 64)
|
||||
if err != nil || math.IsNaN(f) || math.IsInf(f, 0) {
|
||||
return 0, false
|
||||
}
|
||||
return int(f), true
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,8 +28,8 @@ func TestExtractTokenUsage(t *testing.T) {
|
||||
},
|
||||
}
|
||||
p, c := extractAccumulatedTokenUsage(chunk)
|
||||
if p != 123 || c != 456 {
|
||||
t.Fatalf("expected 123/456, got %d/%d", p, c)
|
||||
if p != 0 || c != 0 {
|
||||
t.Fatalf("expected upstream usage ignored as 0/0, got %d/%d", p, c)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -40,8 +40,8 @@ func TestRawStreamSamplesTokenReplay(t *testing.T) {
|
||||
if expectedTokens <= 0 {
|
||||
t.Fatalf("expected positive token usage from raw stream, got %d", expectedTokens)
|
||||
}
|
||||
if parsedTokens != expectedTokens {
|
||||
t.Fatalf("token mismatch parsed=%d expected=%d", parsedTokens, expectedTokens)
|
||||
if parsedTokens != 0 {
|
||||
t.Fatalf("expected parser to ignore upstream token usage, got parsed=%d expectedRaw=%d", parsedTokens, expectedTokens)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -3,6 +3,6 @@
|
||||
"finished": true,
|
||||
"new_type": "text",
|
||||
"content_filter": true,
|
||||
"output_tokens": 77,
|
||||
"output_tokens": 0,
|
||||
"error_message": ""
|
||||
}
|
||||
|
||||
@@ -248,7 +248,7 @@ test('parseChunkForContent strips reference markers from fragment content', () =
|
||||
assert.deepEqual(parsed.parts, [{ text: '广州天气 多云', type: 'text' }]);
|
||||
});
|
||||
|
||||
test('parseChunkForContent detects content_filter status and carries output tokens', () => {
|
||||
test('parseChunkForContent detects content_filter status and ignores upstream output tokens', () => {
|
||||
const chunk = {
|
||||
p: 'response',
|
||||
v: [
|
||||
@@ -260,7 +260,7 @@ test('parseChunkForContent detects content_filter status and carries output toke
|
||||
assert.equal(parsed.parsed, true);
|
||||
assert.equal(parsed.finished, true);
|
||||
assert.equal(parsed.contentFilter, true);
|
||||
assert.equal(parsed.outputTokens, 77);
|
||||
assert.equal(parsed.outputTokens, 0);
|
||||
assert.deepEqual(parsed.parts, []);
|
||||
});
|
||||
|
||||
@@ -275,11 +275,11 @@ test('parseChunkForContent keeps error branches distinct from content_filter sta
|
||||
assert.equal(parsed.finished, true);
|
||||
assert.equal(parsed.contentFilter, false);
|
||||
assert.equal(parsed.errorMessage.length > 0, true);
|
||||
assert.equal(parsed.outputTokens, 88);
|
||||
assert.equal(parsed.outputTokens, 0);
|
||||
assert.deepEqual(parsed.parts, []);
|
||||
});
|
||||
|
||||
test('parseChunkForContent preserves output tokens on FINISHED lines', () => {
|
||||
test('parseChunkForContent ignores output tokens on FINISHED lines', () => {
|
||||
const parsed = parseChunkForContent(
|
||||
{ p: 'response/status', v: 'FINISHED', accumulated_token_usage: 190 },
|
||||
false,
|
||||
@@ -288,11 +288,11 @@ test('parseChunkForContent preserves output tokens on FINISHED lines', () => {
|
||||
assert.equal(parsed.parsed, true);
|
||||
assert.equal(parsed.finished, true);
|
||||
assert.equal(parsed.contentFilter, false);
|
||||
assert.equal(parsed.outputTokens, 190);
|
||||
assert.equal(parsed.outputTokens, 0);
|
||||
assert.deepEqual(parsed.parts, []);
|
||||
});
|
||||
|
||||
test('parseChunkForContent captures output tokens from response BATCH status snapshots', () => {
|
||||
test('parseChunkForContent ignores output tokens from response BATCH status snapshots', () => {
|
||||
const parsed = parseChunkForContent(
|
||||
{
|
||||
p: 'response',
|
||||
@@ -308,7 +308,7 @@ test('parseChunkForContent captures output tokens from response BATCH status sna
|
||||
assert.equal(parsed.parsed, true);
|
||||
assert.equal(parsed.finished, false);
|
||||
assert.equal(parsed.contentFilter, false);
|
||||
assert.equal(parsed.outputTokens, 190);
|
||||
assert.equal(parsed.outputTokens, 0);
|
||||
assert.deepEqual(parsed.parts, []);
|
||||
});
|
||||
|
||||
@@ -321,7 +321,7 @@ test('parseChunkForContent matches FINISHED case-insensitively on status paths',
|
||||
assert.equal(parsed.parsed, true);
|
||||
assert.equal(parsed.finished, true);
|
||||
assert.equal(parsed.contentFilter, false);
|
||||
assert.equal(parsed.outputTokens, 190);
|
||||
assert.equal(parsed.outputTokens, 0);
|
||||
assert.deepEqual(parsed.parts, []);
|
||||
});
|
||||
|
||||
@@ -334,7 +334,7 @@ test('parseChunkForContent filters INCOMPLETE status text without stopping strea
|
||||
assert.equal(parsed.parsed, true);
|
||||
assert.equal(parsed.finished, false);
|
||||
assert.equal(parsed.contentFilter, false);
|
||||
assert.equal(parsed.outputTokens, 190);
|
||||
assert.equal(parsed.outputTokens, 0);
|
||||
assert.deepEqual(parsed.parts, []);
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user