test: validate raw stream token replay and enforce gofmt in lint script

This commit is contained in:
CJACK.
2026-04-06 11:15:08 +08:00
parent a8c160b05d
commit 9e0fd83a76
3 changed files with 210 additions and 1 deletions

View File

@@ -0,0 +1,123 @@
package sse
import (
"bufio"
"encoding/json"
"os"
"path/filepath"
"strconv"
"strings"
"testing"
)
func TestRawStreamSamplesTokenReplay(t *testing.T) {
root := filepath.Join("..", "..", "tests", "raw_stream_samples")
entries, err := os.ReadDir(root)
if err != nil {
t.Fatalf("read samples root: %v", err)
}
found := 0
for _, entry := range entries {
if !entry.IsDir() {
continue
}
ssePath := filepath.Join(root, entry.Name(), "upstream.stream.sse")
if _, err := os.Stat(ssePath); err != nil {
continue
}
found++
t.Run(entry.Name(), func(t *testing.T) {
raw, err := os.ReadFile(ssePath)
if err != nil {
t.Fatalf("read sample: %v", err)
}
parsedTokens, expectedTokens := replayAndCollectTokens(string(raw))
if expectedTokens <= 0 {
t.Fatalf("expected positive token usage from raw stream, got %d", expectedTokens)
}
if parsedTokens != expectedTokens {
t.Fatalf("token mismatch parsed=%d expected=%d", parsedTokens, expectedTokens)
}
})
}
if found == 0 {
t.Fatalf("no upstream.stream.sse samples found under %s", root)
}
}
func replayAndCollectTokens(raw string) (parsedTokens int, expectedTokens int) {
currentType := "thinking"
scanner := bufio.NewScanner(strings.NewReader(raw))
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if !strings.HasPrefix(line, "data:") {
continue
}
payload := strings.TrimSpace(strings.TrimPrefix(line, "data:"))
if payload == "" || payload == "[DONE]" || !strings.HasPrefix(payload, "{") {
continue
}
var chunk map[string]any
if err := json.Unmarshal([]byte(payload), &chunk); err != nil {
continue
}
if n := rawAccumulatedTokenUsage(chunk); n > 0 {
expectedTokens = n
}
res := ParseDeepSeekContentLine([]byte(line), true, currentType)
currentType = res.NextType
if res.OutputTokens > 0 {
parsedTokens = res.OutputTokens
}
}
return parsedTokens, expectedTokens
}
func rawAccumulatedTokenUsage(v any) int {
switch x := v.(type) {
case []any:
for _, item := range x {
if n := rawAccumulatedTokenUsage(item); n > 0 {
return n
}
}
case map[string]any:
if n := rawToInt(x["accumulated_token_usage"]); n > 0 {
return n
}
if p, _ := x["p"].(string); strings.Contains(strings.ToLower(strings.TrimSpace(p)), "accumulated_token_usage") {
if n := rawToInt(x["v"]); n > 0 {
return n
}
}
for _, vv := range x {
if n := rawAccumulatedTokenUsage(vv); n > 0 {
return n
}
}
}
return 0
}
func rawToInt(v any) int {
switch x := v.(type) {
case float64:
return int(x)
case int:
return x
case string:
s := strings.TrimSpace(x)
if s == "" {
return 0
}
if n, err := strconv.Atoi(s); err == nil {
return n
}
if f, err := strconv.ParseFloat(s, 64); err == nil {
return int(f)
}
}
return 0
}

16
scripts/lint.sh Executable file
View File

@@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
cd "$ROOT_DIR"
LINT_BIN="${GOLANGCI_LINT_BIN:-golangci-lint}"
# v2 separates formatters from linters; enforce both in one entrypoint.
if [[ "$LINT_BIN" == *" "* ]]; then
eval "$LINT_BIN fmt --diff -c .golangci.yml"
eval "$LINT_BIN run -c .golangci.yml"
else
"$LINT_BIN" fmt --diff -c .golangci.yml
"$LINT_BIN" run -c .golangci.yml
fi

View File

@@ -20,6 +20,7 @@ function parseArgs(argv) {
failOnReferenceLeak: true,
failOnMissingFinish: true,
failOnBaselineMismatch: true,
failOnTokenMismatch: true,
showOutput: false,
writeReplayText: false,
};
@@ -43,6 +44,8 @@ function parseArgs(argv) {
out.failOnMissingFinish = false;
} else if (a === '--no-fail-on-baseline-mismatch' || a === '--no-fail-on-processed-mismatch') {
out.failOnBaselineMismatch = false;
} else if (a === '--no-fail-on-token-mismatch') {
out.failOnTokenMismatch = false;
} else if (a === '--show-output') {
out.showOutput = true;
} else if (a === '--write-replay-text' || a === '--write-processed-text') {
@@ -183,6 +186,8 @@ function parseDeepSeekReplay(raw) {
let thinkingText = '';
let textOutput = '';
let parsedChunks = 0;
let parsedOutputTokens = 0;
let expectedOutputTokens = 0;
for (const evt of events) {
if (evt.event === 'finish') {
@@ -198,7 +203,14 @@ function parseDeepSeekReplay(raw) {
continue;
}
parsedChunks += 1;
const expected = extractAccumulatedTokenUsageFromRawChunk(obj);
if (expected > 0) {
expectedOutputTokens = expected;
}
const parsed = parseChunkForContent(obj, true, currentType);
if (parsed.outputTokens > 0) {
parsedOutputTokens = parsed.outputTokens;
}
currentType = parsed.newType;
if (parsed.finished) {
sawFinish = true;
@@ -220,6 +232,9 @@ function parseDeepSeekReplay(raw) {
events: events.length,
parsedChunks,
sawFinish,
parsedOutputTokens,
expectedOutputTokens,
tokenMismatch: expectedOutputTokens > 0 && parsedOutputTokens !== expectedOutputTokens,
outputText,
outputChars: outputText.length,
leakedFinishedText: outputText.includes('FINISHED'),
@@ -228,6 +243,52 @@ function parseDeepSeekReplay(raw) {
};
}
function extractAccumulatedTokenUsageFromRawChunk(v) {
if (Array.isArray(v)) {
for (const item of v) {
const n = extractAccumulatedTokenUsageFromRawChunk(item);
if (n > 0) {
return n;
}
}
return 0;
}
if (!v || typeof v !== 'object') {
return 0;
}
const direct = toTokenInt(v.accumulated_token_usage);
if (direct > 0) {
return direct;
}
const pathValue = typeof v.p === 'string' ? v.p.trim().toLowerCase() : '';
if (pathValue.includes('accumulated_token_usage')) {
const n = toTokenInt(v.v);
if (n > 0) {
return n;
}
}
for (const value of Object.values(v)) {
const n = extractAccumulatedTokenUsageFromRawChunk(value);
if (n > 0) {
return n;
}
}
return 0;
}
function toTokenInt(v) {
if (typeof v === 'number' && Number.isFinite(v)) {
return Math.trunc(v);
}
if (typeof v === 'string' && v.trim() !== '') {
const n = Number(v);
if (Number.isFinite(n)) {
return Math.trunc(n);
}
}
return 0;
}
function parseOpenAIStream(raw) {
const events = parseSSE(raw);
let outputText = '';
@@ -410,12 +471,18 @@ function replaySample(dir, opts) {
if (baselineResult && opts.failOnBaselineMismatch && !baselineMatch) {
errors.push('baseline output mismatch');
}
if (opts.failOnTokenMismatch && rawResult.tokenMismatch) {
errors.push(`token mismatch expected=${rawResult.expectedOutputTokens} parsed=${rawResult.parsedOutputTokens}`);
}
return {
sample_id: path.basename(dir),
raw_events: rawResult.events,
raw_parsed_chunks: rawResult.parsedChunks,
raw_saw_finish: rawResult.sawFinish,
raw_expected_output_tokens: rawResult.expectedOutputTokens,
raw_parsed_output_tokens: rawResult.parsedOutputTokens,
raw_token_mismatch: rawResult.tokenMismatch,
raw_output_chars: rawResult.outputChars,
raw_leaked_finished_text: rawResult.leakedFinishedText,
raw_leaked_reference_markers: rawResult.leakedReferenceMarkers,
@@ -485,6 +552,9 @@ function main() {
raw_events: sample.raw_events,
raw_parsed_chunks: sample.raw_parsed_chunks,
raw_saw_finish: sample.raw_saw_finish,
raw_expected_output_tokens: sample.raw_expected_output_tokens,
raw_parsed_output_tokens: sample.raw_parsed_output_tokens,
raw_token_mismatch: sample.raw_token_mismatch,
raw_output_chars: sample.raw_output_chars,
raw_leaked_finished_text: sample.raw_leaked_finished_text,
raw_leaked_reference_markers: sample.raw_leaked_reference_markers,
@@ -508,7 +578,7 @@ function main() {
? ` baseline=${sample.baseline_output_matches_replay ? 'match' : 'mismatch'}`
: ' baseline=missing';
const note = errors.length > 0 ? ` errors=${errors.join(';')}` : '';
console.log(`[sim] ${status} ${sample.sample_id} events=${sample.raw_events} parsed=${sample.raw_parsed_chunks} chars=${sample.raw_output_chars}${leakNote}${matchNote}${note}`);
console.log(`[sim] ${status} ${sample.sample_id} events=${sample.raw_events} parsed=${sample.raw_parsed_chunks} tokens=${sample.raw_parsed_output_tokens}/${sample.raw_expected_output_tokens} chars=${sample.raw_output_chars}${leakNote}${matchNote}${note}`);
if (opts.showOutput) {
console.log(`[sim] replay output for ${sample.sample_id}:`);
console.log(sample.replay_output_text || '(empty)');