diff --git a/internal/sse/raw_stream_token_replay_test.go b/internal/sse/raw_stream_token_replay_test.go new file mode 100644 index 0000000..3ba0c29 --- /dev/null +++ b/internal/sse/raw_stream_token_replay_test.go @@ -0,0 +1,123 @@ +package sse + +import ( + "bufio" + "encoding/json" + "os" + "path/filepath" + "strconv" + "strings" + "testing" +) + +func TestRawStreamSamplesTokenReplay(t *testing.T) { + root := filepath.Join("..", "..", "tests", "raw_stream_samples") + entries, err := os.ReadDir(root) + if err != nil { + t.Fatalf("read samples root: %v", err) + } + + found := 0 + for _, entry := range entries { + if !entry.IsDir() { + continue + } + ssePath := filepath.Join(root, entry.Name(), "upstream.stream.sse") + if _, err := os.Stat(ssePath); err != nil { + continue + } + found++ + t.Run(entry.Name(), func(t *testing.T) { + raw, err := os.ReadFile(ssePath) + if err != nil { + t.Fatalf("read sample: %v", err) + } + parsedTokens, expectedTokens := replayAndCollectTokens(string(raw)) + if expectedTokens <= 0 { + t.Fatalf("expected positive token usage from raw stream, got %d", expectedTokens) + } + if parsedTokens != expectedTokens { + t.Fatalf("token mismatch parsed=%d expected=%d", parsedTokens, expectedTokens) + } + }) + } + + if found == 0 { + t.Fatalf("no upstream.stream.sse samples found under %s", root) + } +} + +func replayAndCollectTokens(raw string) (parsedTokens int, expectedTokens int) { + currentType := "thinking" + scanner := bufio.NewScanner(strings.NewReader(raw)) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if !strings.HasPrefix(line, "data:") { + continue + } + payload := strings.TrimSpace(strings.TrimPrefix(line, "data:")) + if payload == "" || payload == "[DONE]" || !strings.HasPrefix(payload, "{") { + continue + } + var chunk map[string]any + if err := json.Unmarshal([]byte(payload), &chunk); err != nil { + continue + } + if n := rawAccumulatedTokenUsage(chunk); n > 0 { + expectedTokens = n + } + res := ParseDeepSeekContentLine([]byte(line), true, currentType) + currentType = res.NextType + if res.OutputTokens > 0 { + parsedTokens = res.OutputTokens + } + } + return parsedTokens, expectedTokens +} + +func rawAccumulatedTokenUsage(v any) int { + switch x := v.(type) { + case []any: + for _, item := range x { + if n := rawAccumulatedTokenUsage(item); n > 0 { + return n + } + } + case map[string]any: + if n := rawToInt(x["accumulated_token_usage"]); n > 0 { + return n + } + if p, _ := x["p"].(string); strings.Contains(strings.ToLower(strings.TrimSpace(p)), "accumulated_token_usage") { + if n := rawToInt(x["v"]); n > 0 { + return n + } + } + for _, vv := range x { + if n := rawAccumulatedTokenUsage(vv); n > 0 { + return n + } + } + } + return 0 +} + +func rawToInt(v any) int { + switch x := v.(type) { + case float64: + return int(x) + case int: + return x + case string: + s := strings.TrimSpace(x) + if s == "" { + return 0 + } + if n, err := strconv.Atoi(s); err == nil { + return n + } + if f, err := strconv.ParseFloat(s, 64); err == nil { + return int(f) + } + } + return 0 +} diff --git a/scripts/lint.sh b/scripts/lint.sh new file mode 100755 index 0000000..7ce1c2d --- /dev/null +++ b/scripts/lint.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT_DIR" + +LINT_BIN="${GOLANGCI_LINT_BIN:-golangci-lint}" + +# v2 separates formatters from linters; enforce both in one entrypoint. +if [[ "$LINT_BIN" == *" "* ]]; then + eval "$LINT_BIN fmt --diff -c .golangci.yml" + eval "$LINT_BIN run -c .golangci.yml" +else + "$LINT_BIN" fmt --diff -c .golangci.yml + "$LINT_BIN" run -c .golangci.yml +fi diff --git a/tests/tools/deepseek-sse-simulator.mjs b/tests/tools/deepseek-sse-simulator.mjs index a9f4c3a..68ed3fe 100644 --- a/tests/tools/deepseek-sse-simulator.mjs +++ b/tests/tools/deepseek-sse-simulator.mjs @@ -20,6 +20,7 @@ function parseArgs(argv) { failOnReferenceLeak: true, failOnMissingFinish: true, failOnBaselineMismatch: true, + failOnTokenMismatch: true, showOutput: false, writeReplayText: false, }; @@ -43,6 +44,8 @@ function parseArgs(argv) { out.failOnMissingFinish = false; } else if (a === '--no-fail-on-baseline-mismatch' || a === '--no-fail-on-processed-mismatch') { out.failOnBaselineMismatch = false; + } else if (a === '--no-fail-on-token-mismatch') { + out.failOnTokenMismatch = false; } else if (a === '--show-output') { out.showOutput = true; } else if (a === '--write-replay-text' || a === '--write-processed-text') { @@ -183,6 +186,8 @@ function parseDeepSeekReplay(raw) { let thinkingText = ''; let textOutput = ''; let parsedChunks = 0; + let parsedOutputTokens = 0; + let expectedOutputTokens = 0; for (const evt of events) { if (evt.event === 'finish') { @@ -198,7 +203,14 @@ function parseDeepSeekReplay(raw) { continue; } parsedChunks += 1; + const expected = extractAccumulatedTokenUsageFromRawChunk(obj); + if (expected > 0) { + expectedOutputTokens = expected; + } const parsed = parseChunkForContent(obj, true, currentType); + if (parsed.outputTokens > 0) { + parsedOutputTokens = parsed.outputTokens; + } currentType = parsed.newType; if (parsed.finished) { sawFinish = true; @@ -220,6 +232,9 @@ function parseDeepSeekReplay(raw) { events: events.length, parsedChunks, sawFinish, + parsedOutputTokens, + expectedOutputTokens, + tokenMismatch: expectedOutputTokens > 0 && parsedOutputTokens !== expectedOutputTokens, outputText, outputChars: outputText.length, leakedFinishedText: outputText.includes('FINISHED'), @@ -228,6 +243,52 @@ function parseDeepSeekReplay(raw) { }; } +function extractAccumulatedTokenUsageFromRawChunk(v) { + if (Array.isArray(v)) { + for (const item of v) { + const n = extractAccumulatedTokenUsageFromRawChunk(item); + if (n > 0) { + return n; + } + } + return 0; + } + if (!v || typeof v !== 'object') { + return 0; + } + const direct = toTokenInt(v.accumulated_token_usage); + if (direct > 0) { + return direct; + } + const pathValue = typeof v.p === 'string' ? v.p.trim().toLowerCase() : ''; + if (pathValue.includes('accumulated_token_usage')) { + const n = toTokenInt(v.v); + if (n > 0) { + return n; + } + } + for (const value of Object.values(v)) { + const n = extractAccumulatedTokenUsageFromRawChunk(value); + if (n > 0) { + return n; + } + } + return 0; +} + +function toTokenInt(v) { + if (typeof v === 'number' && Number.isFinite(v)) { + return Math.trunc(v); + } + if (typeof v === 'string' && v.trim() !== '') { + const n = Number(v); + if (Number.isFinite(n)) { + return Math.trunc(n); + } + } + return 0; +} + function parseOpenAIStream(raw) { const events = parseSSE(raw); let outputText = ''; @@ -410,12 +471,18 @@ function replaySample(dir, opts) { if (baselineResult && opts.failOnBaselineMismatch && !baselineMatch) { errors.push('baseline output mismatch'); } + if (opts.failOnTokenMismatch && rawResult.tokenMismatch) { + errors.push(`token mismatch expected=${rawResult.expectedOutputTokens} parsed=${rawResult.parsedOutputTokens}`); + } return { sample_id: path.basename(dir), raw_events: rawResult.events, raw_parsed_chunks: rawResult.parsedChunks, raw_saw_finish: rawResult.sawFinish, + raw_expected_output_tokens: rawResult.expectedOutputTokens, + raw_parsed_output_tokens: rawResult.parsedOutputTokens, + raw_token_mismatch: rawResult.tokenMismatch, raw_output_chars: rawResult.outputChars, raw_leaked_finished_text: rawResult.leakedFinishedText, raw_leaked_reference_markers: rawResult.leakedReferenceMarkers, @@ -485,6 +552,9 @@ function main() { raw_events: sample.raw_events, raw_parsed_chunks: sample.raw_parsed_chunks, raw_saw_finish: sample.raw_saw_finish, + raw_expected_output_tokens: sample.raw_expected_output_tokens, + raw_parsed_output_tokens: sample.raw_parsed_output_tokens, + raw_token_mismatch: sample.raw_token_mismatch, raw_output_chars: sample.raw_output_chars, raw_leaked_finished_text: sample.raw_leaked_finished_text, raw_leaked_reference_markers: sample.raw_leaked_reference_markers, @@ -508,7 +578,7 @@ function main() { ? ` baseline=${sample.baseline_output_matches_replay ? 'match' : 'mismatch'}` : ' baseline=missing'; const note = errors.length > 0 ? ` errors=${errors.join(';')}` : ''; - console.log(`[sim] ${status} ${sample.sample_id} events=${sample.raw_events} parsed=${sample.raw_parsed_chunks} chars=${sample.raw_output_chars}${leakNote}${matchNote}${note}`); + console.log(`[sim] ${status} ${sample.sample_id} events=${sample.raw_events} parsed=${sample.raw_parsed_chunks} tokens=${sample.raw_parsed_output_tokens}/${sample.raw_expected_output_tokens} chars=${sample.raw_output_chars}${leakNote}${matchNote}${note}`); if (opts.showOutput) { console.log(`[sim] replay output for ${sample.sample_id}:`); console.log(sample.replay_output_text || '(empty)');