test: validate raw stream token replay and enforce gofmt in lint script

2026-05-10 11:17:41 +08:00 · 2026-04-06 11:15:08 +08:00
parent a8c160b05d
commit 9e0fd83a76
3 changed files with 210 additions and 1 deletions
--- a/internal/sse/raw_stream_token_replay_test.go
+++ b/internal/sse/raw_stream_token_replay_test.go
@@ -0,0 +1,123 @@
+package sse
+
+import (
+	"bufio"
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+func TestRawStreamSamplesTokenReplay(t *testing.T) {
+	root := filepath.Join("..", "..", "tests", "raw_stream_samples")
+	entries, err := os.ReadDir(root)
+	if err != nil {
+		t.Fatalf("read samples root: %v", err)
+	}
+
+	found := 0
+	for _, entry := range entries {
+		if !entry.IsDir() {
+			continue
+		}
+		ssePath := filepath.Join(root, entry.Name(), "upstream.stream.sse")
+		if _, err := os.Stat(ssePath); err != nil {
+			continue
+		}
+		found++
+		t.Run(entry.Name(), func(t *testing.T) {
+			raw, err := os.ReadFile(ssePath)
+			if err != nil {
+				t.Fatalf("read sample: %v", err)
+			}
+			parsedTokens, expectedTokens := replayAndCollectTokens(string(raw))
+			if expectedTokens <= 0 {
+				t.Fatalf("expected positive token usage from raw stream, got %d", expectedTokens)
+			}
+			if parsedTokens != expectedTokens {
+				t.Fatalf("token mismatch parsed=%d expected=%d", parsedTokens, expectedTokens)
+			}
+		})
+	}
+
+	if found == 0 {
+		t.Fatalf("no upstream.stream.sse samples found under %s", root)
+	}
+}
+
+func replayAndCollectTokens(raw string) (parsedTokens int, expectedTokens int) {
+	currentType := "thinking"
+	scanner := bufio.NewScanner(strings.NewReader(raw))
+	for scanner.Scan() {
+		line := strings.TrimSpace(scanner.Text())
+		if !strings.HasPrefix(line, "data:") {
+			continue
+		}
+		payload := strings.TrimSpace(strings.TrimPrefix(line, "data:"))
+		if payload == "" || payload == "[DONE]" || !strings.HasPrefix(payload, "{") {
+			continue
+		}
+		var chunk map[string]any
+		if err := json.Unmarshal([]byte(payload), &chunk); err != nil {
+			continue
+		}
+		if n := rawAccumulatedTokenUsage(chunk); n > 0 {
+			expectedTokens = n
+		}
+		res := ParseDeepSeekContentLine([]byte(line), true, currentType)
+		currentType = res.NextType
+		if res.OutputTokens > 0 {
+			parsedTokens = res.OutputTokens
+		}
+	}
+	return parsedTokens, expectedTokens
+}
+
+func rawAccumulatedTokenUsage(v any) int {
+	switch x := v.(type) {
+	case []any:
+		for _, item := range x {
+			if n := rawAccumulatedTokenUsage(item); n > 0 {
+				return n
+			}
+		}
+	case map[string]any:
+		if n := rawToInt(x["accumulated_token_usage"]); n > 0 {
+			return n
+		}
+		if p, _ := x["p"].(string); strings.Contains(strings.ToLower(strings.TrimSpace(p)), "accumulated_token_usage") {
+			if n := rawToInt(x["v"]); n > 0 {
+				return n
+			}
+		}
+		for _, vv := range x {
+			if n := rawAccumulatedTokenUsage(vv); n > 0 {
+				return n
+			}
+		}
+	}
+	return 0
+}
+
+func rawToInt(v any) int {
+	switch x := v.(type) {
+	case float64:
+		return int(x)
+	case int:
+		return x
+	case string:
+		s := strings.TrimSpace(x)
+		if s == "" {
+			return 0
+		}
+		if n, err := strconv.Atoi(s); err == nil {
+			return n
+		}
+		if f, err := strconv.ParseFloat(s, 64); err == nil {
+			return int(f)
+		}
+	}
+	return 0
+}
--- a/scripts/lint.sh
+++ b/scripts/lint.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT_DIR"
+
+LINT_BIN="${GOLANGCI_LINT_BIN:-golangci-lint}"
+
+# v2 separates formatters from linters; enforce both in one entrypoint.
+if [[ "$LINT_BIN" == *" "* ]]; then
+  eval "$LINT_BIN fmt --diff -c .golangci.yml"
+  eval "$LINT_BIN run -c .golangci.yml"
+else
+  "$LINT_BIN" fmt --diff -c .golangci.yml
+  "$LINT_BIN" run -c .golangci.yml
+fi
--- a/tests/tools/deepseek-sse-simulator.mjs
+++ b/tests/tools/deepseek-sse-simulator.mjs
@@ -20,6 +20,7 @@ function parseArgs(argv) {
    failOnReferenceLeak: true,
    failOnMissingFinish: true,
    failOnBaselineMismatch: true,
+    failOnTokenMismatch: true,
    showOutput: false,
    writeReplayText: false,
  };
@@ -43,6 +44,8 @@ function parseArgs(argv) {
      out.failOnMissingFinish = false;
    } else if (a === '--no-fail-on-baseline-mismatch' || a === '--no-fail-on-processed-mismatch') {
      out.failOnBaselineMismatch = false;
+    } else if (a === '--no-fail-on-token-mismatch') {
+      out.failOnTokenMismatch = false;
    } else if (a === '--show-output') {
      out.showOutput = true;
    } else if (a === '--write-replay-text' || a === '--write-processed-text') {
@@ -183,6 +186,8 @@ function parseDeepSeekReplay(raw) {
  let thinkingText = '';
  let textOutput = '';
  let parsedChunks = 0;
+  let parsedOutputTokens = 0;
+  let expectedOutputTokens = 0;

  for (const evt of events) {
    if (evt.event === 'finish') {
@@ -198,7 +203,14 @@ function parseDeepSeekReplay(raw) {
      continue;
    }
    parsedChunks += 1;
+    const expected = extractAccumulatedTokenUsageFromRawChunk(obj);
+    if (expected > 0) {
+      expectedOutputTokens = expected;
+    }
    const parsed = parseChunkForContent(obj, true, currentType);
+    if (parsed.outputTokens > 0) {
+      parsedOutputTokens = parsed.outputTokens;
+    }
    currentType = parsed.newType;
    if (parsed.finished) {
      sawFinish = true;
@@ -220,6 +232,9 @@ function parseDeepSeekReplay(raw) {
    events: events.length,
    parsedChunks,
    sawFinish,
+    parsedOutputTokens,
+    expectedOutputTokens,
+    tokenMismatch: expectedOutputTokens > 0 && parsedOutputTokens !== expectedOutputTokens,
    outputText,
    outputChars: outputText.length,
    leakedFinishedText: outputText.includes('FINISHED'),
@@ -228,6 +243,52 @@ function parseDeepSeekReplay(raw) {
  };
 }

+function extractAccumulatedTokenUsageFromRawChunk(v) {
+  if (Array.isArray(v)) {
+    for (const item of v) {
+      const n = extractAccumulatedTokenUsageFromRawChunk(item);
+      if (n > 0) {
+        return n;
+      }
+    }
+    return 0;
+  }
+  if (!v || typeof v !== 'object') {
+    return 0;
+  }
+  const direct = toTokenInt(v.accumulated_token_usage);
+  if (direct > 0) {
+    return direct;
+  }
+  const pathValue = typeof v.p === 'string' ? v.p.trim().toLowerCase() : '';
+  if (pathValue.includes('accumulated_token_usage')) {
+    const n = toTokenInt(v.v);
+    if (n > 0) {
+      return n;
+    }
+  }
+  for (const value of Object.values(v)) {
+    const n = extractAccumulatedTokenUsageFromRawChunk(value);
+    if (n > 0) {
+      return n;
+    }
+  }
+  return 0;
+}
+
+function toTokenInt(v) {
+  if (typeof v === 'number' && Number.isFinite(v)) {
+    return Math.trunc(v);
+  }
+  if (typeof v === 'string' && v.trim() !== '') {
+    const n = Number(v);
+    if (Number.isFinite(n)) {
+      return Math.trunc(n);
+    }
+  }
+  return 0;
+}
+
 function parseOpenAIStream(raw) {
  const events = parseSSE(raw);
  let outputText = '';
@@ -410,12 +471,18 @@ function replaySample(dir, opts) {
  if (baselineResult && opts.failOnBaselineMismatch && !baselineMatch) {
    errors.push('baseline output mismatch');
  }
+  if (opts.failOnTokenMismatch && rawResult.tokenMismatch) {
+    errors.push(`token mismatch expected=${rawResult.expectedOutputTokens} parsed=${rawResult.parsedOutputTokens}`);
+  }

  return {
    sample_id: path.basename(dir),
    raw_events: rawResult.events,
    raw_parsed_chunks: rawResult.parsedChunks,
    raw_saw_finish: rawResult.sawFinish,
+    raw_expected_output_tokens: rawResult.expectedOutputTokens,
+    raw_parsed_output_tokens: rawResult.parsedOutputTokens,
+    raw_token_mismatch: rawResult.tokenMismatch,
    raw_output_chars: rawResult.outputChars,
    raw_leaked_finished_text: rawResult.leakedFinishedText,
    raw_leaked_reference_markers: rawResult.leakedReferenceMarkers,
@@ -485,6 +552,9 @@ function main() {
      raw_events: sample.raw_events,
      raw_parsed_chunks: sample.raw_parsed_chunks,
      raw_saw_finish: sample.raw_saw_finish,
+      raw_expected_output_tokens: sample.raw_expected_output_tokens,
+      raw_parsed_output_tokens: sample.raw_parsed_output_tokens,
+      raw_token_mismatch: sample.raw_token_mismatch,
      raw_output_chars: sample.raw_output_chars,
      raw_leaked_finished_text: sample.raw_leaked_finished_text,
      raw_leaked_reference_markers: sample.raw_leaked_reference_markers,
@@ -508,7 +578,7 @@ function main() {
      ? ` baseline=${sample.baseline_output_matches_replay ? 'match' : 'mismatch'}`
      : ' baseline=missing';
    const note = errors.length > 0 ? ` errors=${errors.join(';')}` : '';
-    console.log(`[sim] ${status} ${sample.sample_id} events=${sample.raw_events} parsed=${sample.raw_parsed_chunks} chars=${sample.raw_output_chars}${leakNote}${matchNote}${note}`);
+    console.log(`[sim] ${status} ${sample.sample_id} events=${sample.raw_events} parsed=${sample.raw_parsed_chunks} tokens=${sample.raw_parsed_output_tokens}/${sample.raw_expected_output_tokens} chars=${sample.raw_output_chars}${leakNote}${matchNote}${note}`);
    if (opts.showOutput) {
      console.log(`[sim] replay output for ${sample.sample_id}:`);
      console.log(sample.replay_output_text || '(empty)');