Merge pull request #220 from CJackHwang/codex/fix-pull-request-review-comments

Migrate and reorganize .golangci.yml to v2 with updated linters and exclusions
2026-05-04 00:15:28 +08:00 · 2026-04-06 12:33:51 +08:00
parent 36af2e00f6 3b99d2edbe
commit da75ed6966
23 changed files with 376 additions and 120 deletions
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -1,80 +1,70 @@
-linters-settings:
-  govet:
-    check-shadowing: true
-  golint:
-    min-confidence: 0
-  gocyclo:
-    min-complexity: 15
-  maligned:
-    suggest-new: true
-  dupl:
-    threshold: 100
-  goconst:
-    min-len: 2
-    min-occurrences: 2
-  misspell:
-    locale: US
-  lll:
-    line-length: 140
-  goimports:
-    local-prefixes: ds2api
-  unused:
-    check-exported: false
-  unparam:
-    check-exported: false
-  nakedret:
-    max-func-lines: 30
-  prealloc:
-    simple: true
-    range-loops: true
-    for-loops: false
-  gocritic:
-    enabled-tags:
-      - diagnostic
-      - experimental
-      - opinionated
-      - performance
-      - style
-    disabled-checks:
-      - wrapperFunc
-      - rangeValCopy
-      - hugeParam
-
-linters:
-  enable:
-    - govet
-    - errcheck
-    - staticcheck
-    - unused
-    - gosimple
-    - structcheck
-    - varcheck
-    - ineffassign
-    - deadcode
-    - typecheck
-    - bodyclose
-    - stylecheck
-    - revive
-    - unconvert
-    - goconst
-    - gocyclo
-    - asciicheck
-    - gofmt
-    - misspell
-    - nakedret
-    - exportloopref
-    - dogsled
-
-issues:
-  exclude-use-default: false
-  max-issues-per-linter: 0
-  max-same-issues: 0
-  exclude:
-    - "ST1000: at least one file in a package should have a package comment"
+version: "2"

 run:
-  timeout: 5m
  tests: true
-  skip-dirs:
-    - vendor
-    - webui/node_modules
+
+linters:
+  default: none
+  enable:
+    - govet
+    - ineffassign
+  settings:
+    dupl:
+      threshold: 100
+    goconst:
+      min-len: 2
+      min-occurrences: 2
+    gocritic:
+      enabled-tags:
+        - diagnostic
+        - experimental
+        - opinionated
+        - performance
+        - style
+      disabled-checks:
+        - wrapperFunc
+        - rangeValCopy
+        - hugeParam
+    gocyclo:
+      min-complexity: 15
+    lll:
+      line-length: 140
+    misspell:
+      locale: US
+    nakedret:
+      max-func-lines: 30
+    prealloc:
+      simple: true
+      range-loops: true
+      for-loops: false
+  exclusions:
+    generated: lax
+    rules:
+      - path: (.+)\.go$
+        text: "ST1000: at least one file in a package should have a package comment"
+    paths:
+      - third_party$
+      - builtin$
+      - examples$
+      - vendor$
+      - webui/node_modules$
+
+issues:
+  max-issues-per-linter: 0
+  max-same-issues: 0
+
+formatters:
+  enable:
+    - gofmt
+  settings:
+    goimports:
+      local-prefixes:
+        - ds2api
+  exclusions:
+    generated: lax
+    paths:
+      - third_party$
+      - builtin$
+      - examples$
+      - vendor$
+      - webui/node_modules$
--- a/internal/adapter/gemini/handler_generate.go
+++ b/internal/adapter/gemini/handler_generate.go
@@ -1,8 +1,8 @@
 package gemini

 import (
-	"ds2api/internal/toolcall"
 	"bytes"
+	"ds2api/internal/toolcall"
 	"encoding/json"
 	"io"
 	"net/http"
--- a/internal/adapter/openai/tool_sieve_state.go
+++ b/internal/adapter/openai/tool_sieve_state.go
@@ -3,7 +3,6 @@ package openai
 import (
 	"ds2api/internal/toolcall"
 	"strings"
-
 )

 type toolStreamSieveState struct {
--- a/internal/adapter/openai/tool_sieve_xml.go
+++ b/internal/adapter/openai/tool_sieve_xml.go
@@ -4,7 +4,6 @@ import (
 	"ds2api/internal/toolcall"
 	"regexp"
 	"strings"
-
 )

 // --- XML tool call support for the streaming sieve ---
--- a/internal/admin/handler_raw_samples_test.go
+++ b/internal/admin/handler_raw_samples_test.go
@@ -288,17 +288,17 @@ func TestQueryRawSampleCapturesGroupsBySessionAndMatchesQuestion(t *testing.T) {
 func TestBuildCaptureChainsPreservesCaptureOrderWhenTimestampsCollide(t *testing.T) {
 	snapshot := []devcapture.Entry{
 		{
-			ID:          "cap_continue",
-			CreatedAt:   1712365200,
-			Label:       "deepseek_continue",
-			RequestBody: `{"chat_session_id":"session-collision","message_id":2}`,
+			ID:           "cap_continue",
+			CreatedAt:    1712365200,
+			Label:        "deepseek_continue",
+			RequestBody:  `{"chat_session_id":"session-collision","message_id":2}`,
 			ResponseBody: "data: {\"v\":\"第二段\"}\n\n",
 		},
 		{
-			ID:          "cap_completion",
-			CreatedAt:   1712365200,
-			Label:       "deepseek_completion",
-			RequestBody: `{"chat_session_id":"session-collision","prompt":"题目"}`,
+			ID:           "cap_completion",
+			CreatedAt:    1712365200,
+			Label:        "deepseek_completion",
+			RequestBody:  `{"chat_session_id":"session-collision","prompt":"题目"}`,
 			ResponseBody: "data: {\"v\":\"第一段\"}\n\n",
 		},
 	}
--- a/internal/compat/go_compat_test.go
+++ b/internal/compat/go_compat_test.go
@@ -88,9 +88,9 @@ func TestGoCompatToolcallFixtures(t *testing.T) {

 		var expected struct {
 			Calls             []toolcall.ParsedToolCall `json:"calls"`
-			SawToolCallSyntax bool                  `json:"sawToolCallSyntax"`
-			RejectedByPolicy  bool                  `json:"rejectedByPolicy"`
-			RejectedToolNames []string              `json:"rejectedToolNames"`
+			SawToolCallSyntax bool                      `json:"sawToolCallSyntax"`
+			RejectedByPolicy  bool                      `json:"rejectedByPolicy"`
+			RejectedToolNames []string                  `json:"rejectedToolNames"`
 		}
 		mustLoadJSON(t, expectedPath, &expected)

--- a/internal/deepseek/client_continue_test.go
+++ b/internal/deepseek/client_continue_test.go
@@ -31,7 +31,7 @@ func TestCallContinuePropagatesPowHeaderToFallbackRequest(t *testing.T) {
 	var seenURL string

 	client := &Client{
-		stream:   failingDoer{err: errors.New("stream transport failed")},
+		stream: failingDoer{err: errors.New("stream transport failed")},
 		fallbackS: &http.Client{
 			Transport: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
 				seenPow = req.Header.Get("x-ds-pow-response")
--- a/internal/deepseek/constants.go
+++ b/internal/deepseek/constants.go
@@ -6,13 +6,13 @@ import (
 )

 const (
-	DeepSeekHost             = "chat.deepseek.com"
-	DeepSeekLoginURL         = "https://chat.deepseek.com/api/v0/users/login"
-	DeepSeekCreateSessionURL = "https://chat.deepseek.com/api/v0/chat_session/create"
-	DeepSeekCreatePowURL     = "https://chat.deepseek.com/api/v0/chat/create_pow_challenge"
-	DeepSeekCompletionURL    = "https://chat.deepseek.com/api/v0/chat/completion"
-	DeepSeekContinueURL      = "https://chat.deepseek.com/api/v0/chat/continue"
-	DeepSeekFetchSessionURL  = "https://chat.deepseek.com/api/v0/chat_session/fetch_page"
+	DeepSeekHost                 = "chat.deepseek.com"
+	DeepSeekLoginURL             = "https://chat.deepseek.com/api/v0/users/login"
+	DeepSeekCreateSessionURL     = "https://chat.deepseek.com/api/v0/chat_session/create"
+	DeepSeekCreatePowURL         = "https://chat.deepseek.com/api/v0/chat/create_pow_challenge"
+	DeepSeekCompletionURL        = "https://chat.deepseek.com/api/v0/chat/completion"
+	DeepSeekContinueURL          = "https://chat.deepseek.com/api/v0/chat/continue"
+	DeepSeekFetchSessionURL      = "https://chat.deepseek.com/api/v0/chat_session/fetch_page"
 	DeepSeekDeleteSessionURL     = "https://chat.deepseek.com/api/v0/chat_session/delete"
 	DeepSeekDeleteAllSessionsURL = "https://chat.deepseek.com/api/v0/chat_session/delete_all"
 )
--- a/internal/format/openai/render_chat.go
+++ b/internal/format/openai/render_chat.go
@@ -4,7 +4,6 @@ import (
 	"ds2api/internal/toolcall"
 	"strings"
 	"time"
-
 )

 func BuildChatCompletion(completionID, model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any {
--- a/internal/format/openai/render_responses.go
+++ b/internal/format/openai/render_responses.go
@@ -7,7 +7,6 @@ import (
 	"time"

 	"github.com/google/uuid"
-
 )

 func BuildResponseObject(responseID, model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any {
--- a/internal/format/openai/render_stream_events.go
+++ b/internal/format/openai/render_stream_events.go
@@ -71,7 +71,6 @@ func BuildResponsesTextDeltaPayload(responseID, itemID string, outputIndex, cont
 	}
 }

-
 func BuildResponsesTextDonePayload(responseID, itemID string, outputIndex, contentIndex int, text string) map[string]any {
 	return map[string]any{
 		"type":          "response.output_text.done",
--- a/internal/js/chat-stream/sse_parse_impl.js
+++ b/internal/js/chat-stream/sse_parse_impl.js
@@ -20,6 +20,8 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
    };
  }

+  const outputTokens = extractAccumulatedTokenUsage(chunk);
+
  if (Object.prototype.hasOwnProperty.call(chunk, 'error')) {
    return {
      parsed: true,
@@ -33,7 +35,6 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
  }

  const pathValue = asString(chunk.p);
-  const outputTokens = extractAccumulatedTokenUsage(chunk);

  if (hasContentFilterStatus(chunk)) {
    return {
@@ -465,10 +466,19 @@ function findAccumulatedTokenUsage(v) {
 }

 function toInt(v) {
-  if (typeof v !== 'number' || !Number.isFinite(v)) {
+  if (typeof v === 'number' && Number.isFinite(v)) {
+    return Math.trunc(v);
+  }
+  if (typeof v === 'string' && v.trim() !== '') {
+    const n = Number(v);
+    if (Number.isFinite(n)) {
+      return Math.trunc(n);
+    }
+  }
+  if (typeof v !== 'number') {
    return 0;
  }
-  return Math.trunc(v);
+  return Number.isFinite(v) ? Math.trunc(v) : 0;
 }

 function formatErrorMessage(v) {
--- a/internal/sse/line.go
+++ b/internal/sse/line.go
@@ -20,8 +20,9 @@ func ParseDeepSeekContentLine(raw []byte, thinkingEnabled bool, currentType stri
 	if !parsed {
 		return LineResult{NextType: currentType}
 	}
+	outputTokens := extractAccumulatedTokenUsage(chunk)
 	if done {
-		return LineResult{Parsed: true, Stop: true, NextType: currentType}
+		return LineResult{Parsed: true, Stop: true, NextType: currentType, OutputTokens: outputTokens}
 	}
 	if errObj, hasErr := chunk["error"]; hasErr {
 		return LineResult{
@@ -29,6 +30,7 @@ func ParseDeepSeekContentLine(raw []byte, thinkingEnabled bool, currentType stri
 			Stop:         true,
 			ErrorMessage: fmt.Sprintf("%v", errObj),
 			NextType:     currentType,
+			OutputTokens: outputTokens,
 		}
 	}
 	if code, _ := chunk["code"].(string); code == "content_filter" {
@@ -37,7 +39,7 @@ func ParseDeepSeekContentLine(raw []byte, thinkingEnabled bool, currentType stri
 			Stop:          true,
 			ContentFilter: true,
 			NextType:      currentType,
-			OutputTokens:  extractAccumulatedTokenUsage(chunk),
+			OutputTokens:  outputTokens,
 		}
 	}
 	if hasContentFilterStatus(chunk) {
@@ -46,16 +48,16 @@ func ParseDeepSeekContentLine(raw []byte, thinkingEnabled bool, currentType stri
 			Stop:          true,
 			ContentFilter: true,
 			NextType:      currentType,
-			OutputTokens:  extractAccumulatedTokenUsage(chunk),
+			OutputTokens:  outputTokens,
 		}
 	}
 	parts, finished, nextType := ParseSSEChunkForContent(chunk, thinkingEnabled, currentType)
 	parts = filterLeakedContentFilterParts(parts)
 	return LineResult{
-		Parsed:   true,
-		Stop:     finished,
-		Parts:    parts,
-		NextType: nextType,
-		OutputTokens: extractAccumulatedTokenUsage(chunk),
+		Parsed:       true,
+		Stop:         finished,
+		Parts:        parts,
+		NextType:     nextType,
+		OutputTokens: outputTokens,
 	}
 }
--- a/internal/sse/line_test.go
+++ b/internal/sse/line_test.go
@@ -53,6 +53,23 @@ func TestParseDeepSeekContentLineCapturesAccumulatedTokenUsage(t *testing.T) {
 	}
 }

+func TestParseDeepSeekContentLineCapturesAccumulatedTokenUsageString(t *testing.T) {
+	res := ParseDeepSeekContentLine([]byte(`data: {"p":"response","o":"BATCH","v":[{"p":"accumulated_token_usage","v":"190"},{"p":"quasi_status","v":"FINISHED"}]}`), false, "text")
+	if res.OutputTokens != 190 {
+		t.Fatalf("expected output token usage 190, got %d", res.OutputTokens)
+	}
+}
+
+func TestParseDeepSeekContentLineErrorIncludesOutputTokens(t *testing.T) {
+	res := ParseDeepSeekContentLine([]byte(`data: {"error":"boom","accumulated_token_usage":123}`), false, "text")
+	if !res.Parsed || !res.Stop {
+		t.Fatalf("expected stop on error: %#v", res)
+	}
+	if res.OutputTokens != 123 {
+		t.Fatalf("expected output token usage 123 on error, got %d", res.OutputTokens)
+	}
+}
+
 func TestParseDeepSeekContentLineContent(t *testing.T) {
 	res := ParseDeepSeekContentLine([]byte(`data: {"p":"response/content","v":"hi"}`), false, "text")
 	if !res.Parsed || res.Stop {
--- a/internal/sse/parser.go
+++ b/internal/sse/parser.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"encoding/json"
 	"math"
+	"strconv"
 	"strings"

 	"ds2api/internal/deepseek"
@@ -413,6 +414,19 @@ func toInt(v any) (int, bool) {
 			return 0, false
 		}
 		return int(i), true
+	case string:
+		s := strings.TrimSpace(x)
+		if s == "" {
+			return 0, false
+		}
+		if i, err := strconv.Atoi(s); err == nil {
+			return i, true
+		}
+		f, err := strconv.ParseFloat(s, 64)
+		if err != nil || math.IsNaN(f) || math.IsInf(f, 0) {
+			return 0, false
+		}
+		return int(f), true
 	default:
 		return 0, false
 	}
--- a/internal/sse/raw_stream_token_replay_test.go
+++ b/internal/sse/raw_stream_token_replay_test.go
@@ -0,0 +1,123 @@
+package sse
+
+import (
+	"bufio"
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+func TestRawStreamSamplesTokenReplay(t *testing.T) {
+	root := filepath.Join("..", "..", "tests", "raw_stream_samples")
+	entries, err := os.ReadDir(root)
+	if err != nil {
+		t.Fatalf("read samples root: %v", err)
+	}
+
+	found := 0
+	for _, entry := range entries {
+		if !entry.IsDir() {
+			continue
+		}
+		ssePath := filepath.Join(root, entry.Name(), "upstream.stream.sse")
+		if _, err := os.Stat(ssePath); err != nil {
+			continue
+		}
+		found++
+		t.Run(entry.Name(), func(t *testing.T) {
+			raw, err := os.ReadFile(ssePath)
+			if err != nil {
+				t.Fatalf("read sample: %v", err)
+			}
+			parsedTokens, expectedTokens := replayAndCollectTokens(string(raw))
+			if expectedTokens <= 0 {
+				t.Fatalf("expected positive token usage from raw stream, got %d", expectedTokens)
+			}
+			if parsedTokens != expectedTokens {
+				t.Fatalf("token mismatch parsed=%d expected=%d", parsedTokens, expectedTokens)
+			}
+		})
+	}
+
+	if found == 0 {
+		t.Fatalf("no upstream.stream.sse samples found under %s", root)
+	}
+}
+
+func replayAndCollectTokens(raw string) (parsedTokens int, expectedTokens int) {
+	currentType := "thinking"
+	scanner := bufio.NewScanner(strings.NewReader(raw))
+	for scanner.Scan() {
+		line := strings.TrimSpace(scanner.Text())
+		if !strings.HasPrefix(line, "data:") {
+			continue
+		}
+		payload := strings.TrimSpace(strings.TrimPrefix(line, "data:"))
+		if payload == "" || payload == "[DONE]" || !strings.HasPrefix(payload, "{") {
+			continue
+		}
+		var chunk map[string]any
+		if err := json.Unmarshal([]byte(payload), &chunk); err != nil {
+			continue
+		}
+		if n := rawAccumulatedTokenUsage(chunk); n > 0 {
+			expectedTokens = n
+		}
+		res := ParseDeepSeekContentLine([]byte(line), true, currentType)
+		currentType = res.NextType
+		if res.OutputTokens > 0 {
+			parsedTokens = res.OutputTokens
+		}
+	}
+	return parsedTokens, expectedTokens
+}
+
+func rawAccumulatedTokenUsage(v any) int {
+	switch x := v.(type) {
+	case []any:
+		for _, item := range x {
+			if n := rawAccumulatedTokenUsage(item); n > 0 {
+				return n
+			}
+		}
+	case map[string]any:
+		if n := rawToInt(x["accumulated_token_usage"]); n > 0 {
+			return n
+		}
+		if p, _ := x["p"].(string); strings.Contains(strings.ToLower(strings.TrimSpace(p)), "accumulated_token_usage") {
+			if n := rawToInt(x["v"]); n > 0 {
+				return n
+			}
+		}
+		for _, vv := range x {
+			if n := rawAccumulatedTokenUsage(vv); n > 0 {
+				return n
+			}
+		}
+	}
+	return 0
+}
+
+func rawToInt(v any) int {
+	switch x := v.(type) {
+	case float64:
+		return int(x)
+	case int:
+		return x
+	case string:
+		s := strings.TrimSpace(x)
+		if s == "" {
+			return 0
+		}
+		if n, err := strconv.Atoi(s); err == nil {
+			return n
+		}
+		if f, err := strconv.ParseFloat(s, 64); err == nil {
+			return int(f)
+		}
+	}
+	return 0
+}
--- a/internal/toolcall/toolcalls_parse_item.go
+++ b/internal/toolcall/toolcalls_parse_item.go
@@ -73,7 +73,6 @@ func parseToolCallItem(m map[string]any) (ParsedToolCall, bool) {
 		for _, key := range []string{"arguments", "args", "parameters", "params"} {
 			if v, ok := m[key]; ok {
 				inputRaw = v
-				hasInput = true
 				break
 			}
 		}
--- a/internal/util/render.go
+++ b/internal/util/render.go
@@ -57,9 +57,9 @@ func BuildOpenAIResponseObject(responseID, model, finalPrompt, finalThinking, fi
 		toolCalls := make([]any, 0, len(detected))
 		for _, tc := range detected {
 			toolCalls = append(toolCalls, map[string]any{
-				"type":       "tool_call",
-				"name":       tc.Name,
-				"arguments":  tc.Input,
+				"type":      "tool_call",
+				"name":      tc.Name,
+				"arguments": tc.Input,
 			})
 		}
 		output = append(output, map[string]any{
--- a/internal/util/util_edge_test.go
+++ b/internal/util/util_edge_test.go
@@ -355,4 +355,3 @@ func TestConvertClaudeToDeepSeekOpusUsesSlowMapping(t *testing.T) {
 		t.Fatalf("expected opus to use slow mapping, got %q", out["model"])
 	}
 }
-
--- a/scripts/lint.sh
+++ b/scripts/lint.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT_DIR"
+
+LINT_BIN="${GOLANGCI_LINT_BIN:-golangci-lint}"
+
+# v2 separates formatters from linters; enforce both in one entrypoint.
+if [[ "$LINT_BIN" == *" "* ]]; then
+  eval "$LINT_BIN fmt --diff -c .golangci.yml"
+  eval "$LINT_BIN run -c .golangci.yml"
+else
+  "$LINT_BIN" fmt --diff -c .golangci.yml
+  "$LINT_BIN" run -c .golangci.yml
+fi
--- a/tests/raw_stream_samples/README.md
+++ b/tests/raw_stream_samples/README.md
@@ -76,6 +76,23 @@ POST /admin/dev/raw-samples/save
 ./tests/scripts/run-raw-stream-sim.sh
 ```

+运行**全部样本目录**（不只 manifest 默认样本），并逐个打印 token 对齐结果：
+
+```bash
+for d in tests/raw_stream_samples/*; do
+  [ -d "$d" ] || continue
+  sid="$(basename "$d")"
+  [ -f "$d/upstream.stream.sse" ] || continue
+  node tests/tools/deepseek-sse-simulator.mjs --samples-root tests/raw_stream_samples --sample-id "$sid"
+done
+```
+
+回放输出会显示 `tokens=<parsed>/<expected>`，并在不一致时判定失败；`report.json` 中也会包含：
+
+- `raw_expected_output_tokens`
+- `raw_parsed_output_tokens`
+- `raw_token_mismatch`
+
 运行单个样本并和已有基线比对：

 ```bash
--- a/tests/scripts/run-unit-node.sh
+++ b/tests/scripts/run-unit-node.sh
@@ -17,6 +17,10 @@ trap cleanup EXIT
 if ! node --test --test-concurrency=1 tests/node/stream-tool-sieve.test.js tests/node/chat-stream.test.js tests/node/js_compat_test.js "$@" 2>&1 | tee "$NODE_TEST_LOG"; then
  echo
  echo "[run-unit-node] Node tests failed. 失败摘要如下："
-  rg -n "^(not ok|# fail)|ERR_TEST_FAILURE" "$NODE_TEST_LOG" || true
+  if command -v rg >/dev/null 2>&1; then
+    rg -n "^(not ok|# fail)|ERR_TEST_FAILURE" "$NODE_TEST_LOG" || true
+  else
+    grep -nE "^(not ok|# fail)|ERR_TEST_FAILURE" "$NODE_TEST_LOG" || true
+  fi
  exit 1
 fi
--- a/tests/tools/deepseek-sse-simulator.mjs
+++ b/tests/tools/deepseek-sse-simulator.mjs
@@ -20,6 +20,7 @@ function parseArgs(argv) {
    failOnReferenceLeak: true,
    failOnMissingFinish: true,
    failOnBaselineMismatch: true,
+    failOnTokenMismatch: true,
    showOutput: false,
    writeReplayText: false,
  };
@@ -43,6 +44,8 @@ function parseArgs(argv) {
      out.failOnMissingFinish = false;
    } else if (a === '--no-fail-on-baseline-mismatch' || a === '--no-fail-on-processed-mismatch') {
      out.failOnBaselineMismatch = false;
+    } else if (a === '--no-fail-on-token-mismatch') {
+      out.failOnTokenMismatch = false;
    } else if (a === '--show-output') {
      out.showOutput = true;
    } else if (a === '--write-replay-text' || a === '--write-processed-text') {
@@ -183,6 +186,8 @@ function parseDeepSeekReplay(raw) {
  let thinkingText = '';
  let textOutput = '';
  let parsedChunks = 0;
+  let parsedOutputTokens = 0;
+  let expectedOutputTokens = 0;

  for (const evt of events) {
    if (evt.event === 'finish') {
@@ -198,7 +203,14 @@ function parseDeepSeekReplay(raw) {
      continue;
    }
    parsedChunks += 1;
+    const expected = extractAccumulatedTokenUsageFromRawChunk(obj);
+    if (expected > 0) {
+      expectedOutputTokens = expected;
+    }
    const parsed = parseChunkForContent(obj, true, currentType);
+    if (parsed.outputTokens > 0) {
+      parsedOutputTokens = parsed.outputTokens;
+    }
    currentType = parsed.newType;
    if (parsed.finished) {
      sawFinish = true;
@@ -220,6 +232,9 @@ function parseDeepSeekReplay(raw) {
    events: events.length,
    parsedChunks,
    sawFinish,
+    parsedOutputTokens,
+    expectedOutputTokens,
+    tokenMismatch: expectedOutputTokens > 0 && parsedOutputTokens !== expectedOutputTokens,
    outputText,
    outputChars: outputText.length,
    leakedFinishedText: outputText.includes('FINISHED'),
@@ -228,6 +243,52 @@ function parseDeepSeekReplay(raw) {
  };
 }

+function extractAccumulatedTokenUsageFromRawChunk(v) {
+  if (Array.isArray(v)) {
+    for (const item of v) {
+      const n = extractAccumulatedTokenUsageFromRawChunk(item);
+      if (n > 0) {
+        return n;
+      }
+    }
+    return 0;
+  }
+  if (!v || typeof v !== 'object') {
+    return 0;
+  }
+  const direct = toTokenInt(v.accumulated_token_usage);
+  if (direct > 0) {
+    return direct;
+  }
+  const pathValue = typeof v.p === 'string' ? v.p.trim().toLowerCase() : '';
+  if (pathValue.includes('accumulated_token_usage')) {
+    const n = toTokenInt(v.v);
+    if (n > 0) {
+      return n;
+    }
+  }
+  for (const value of Object.values(v)) {
+    const n = extractAccumulatedTokenUsageFromRawChunk(value);
+    if (n > 0) {
+      return n;
+    }
+  }
+  return 0;
+}
+
+function toTokenInt(v) {
+  if (typeof v === 'number' && Number.isFinite(v)) {
+    return Math.trunc(v);
+  }
+  if (typeof v === 'string' && v.trim() !== '') {
+    const n = Number(v);
+    if (Number.isFinite(n)) {
+      return Math.trunc(n);
+    }
+  }
+  return 0;
+}
+
 function parseOpenAIStream(raw) {
  const events = parseSSE(raw);
  let outputText = '';
@@ -410,12 +471,18 @@ function replaySample(dir, opts) {
  if (baselineResult && opts.failOnBaselineMismatch && !baselineMatch) {
    errors.push('baseline output mismatch');
  }
+  if (opts.failOnTokenMismatch && rawResult.tokenMismatch) {
+    errors.push(`token mismatch expected=${rawResult.expectedOutputTokens} parsed=${rawResult.parsedOutputTokens}`);
+  }

  return {
    sample_id: path.basename(dir),
    raw_events: rawResult.events,
    raw_parsed_chunks: rawResult.parsedChunks,
    raw_saw_finish: rawResult.sawFinish,
+    raw_expected_output_tokens: rawResult.expectedOutputTokens,
+    raw_parsed_output_tokens: rawResult.parsedOutputTokens,
+    raw_token_mismatch: rawResult.tokenMismatch,
    raw_output_chars: rawResult.outputChars,
    raw_leaked_finished_text: rawResult.leakedFinishedText,
    raw_leaked_reference_markers: rawResult.leakedReferenceMarkers,
@@ -485,6 +552,9 @@ function main() {
      raw_events: sample.raw_events,
      raw_parsed_chunks: sample.raw_parsed_chunks,
      raw_saw_finish: sample.raw_saw_finish,
+      raw_expected_output_tokens: sample.raw_expected_output_tokens,
+      raw_parsed_output_tokens: sample.raw_parsed_output_tokens,
+      raw_token_mismatch: sample.raw_token_mismatch,
      raw_output_chars: sample.raw_output_chars,
      raw_leaked_finished_text: sample.raw_leaked_finished_text,
      raw_leaked_reference_markers: sample.raw_leaked_reference_markers,
@@ -508,7 +578,7 @@ function main() {
      ? ` baseline=${sample.baseline_output_matches_replay ? 'match' : 'mismatch'}`
      : ' baseline=missing';
    const note = errors.length > 0 ? ` errors=${errors.join(';')}` : '';
-    console.log(`[sim] ${status} ${sample.sample_id} events=${sample.raw_events} parsed=${sample.raw_parsed_chunks} chars=${sample.raw_output_chars}${leakNote}${matchNote}${note}`);
+    console.log(`[sim] ${status} ${sample.sample_id} events=${sample.raw_events} parsed=${sample.raw_parsed_chunks} tokens=${sample.raw_parsed_output_tokens}/${sample.raw_expected_output_tokens} chars=${sample.raw_output_chars}${leakNote}${matchNote}${note}`);
    if (opts.showOutput) {
      console.log(`[sim] replay output for ${sample.sample_id}:`);
      console.log(sample.replay_output_text || '(empty)');