Files
ds2api/tests/tools/deepseek-sse-simulator.mjs

602 lines
18 KiB
JavaScript

#!/usr/bin/env node
import fs from 'node:fs';
import path from 'node:path';
import process from 'node:process';
import { createRequire } from 'node:module';
const require = createRequire(import.meta.url);
const chatStream = require('../../api/chat-stream.js');
const { parseChunkForContent } = chatStream.__test;
const { trimContinuationOverlap } = chatStream.__test;
function parseArgs(argv) {
const out = {
samplesRoot: 'tests/raw_stream_samples',
reportPath: '',
outputRoot: '',
baselineRoot: '',
sampleId: '',
failOnLeak: true,
failOnReferenceLeak: true,
failOnMissingFinish: true,
failOnBaselineMismatch: true,
failOnTokenMismatch: false,
showOutput: false,
writeReplayText: false,
};
for (let i = 2; i < argv.length; i += 1) {
const a = argv[i];
if (a === '--samples-root' && argv[i + 1]) {
out.samplesRoot = argv[++i];
} else if (a === '--report' && argv[i + 1]) {
out.reportPath = argv[++i];
} else if (a === '--output-root' && argv[i + 1]) {
out.outputRoot = argv[++i];
} else if (a === '--baseline-root' && argv[i + 1]) {
out.baselineRoot = argv[++i];
} else if (a === '--sample-id' && argv[i + 1]) {
out.sampleId = argv[++i];
} else if (a === '--no-fail-on-leak') {
out.failOnLeak = false;
} else if (a === '--no-fail-on-reference-leak') {
out.failOnReferenceLeak = false;
} else if (a === '--no-fail-on-missing-finish') {
out.failOnMissingFinish = false;
} else if (a === '--no-fail-on-baseline-mismatch' || a === '--no-fail-on-processed-mismatch') {
out.failOnBaselineMismatch = false;
} else if (a === '--fail-on-token-mismatch') {
out.failOnTokenMismatch = true;
} else if (a === '--no-fail-on-token-mismatch') {
out.failOnTokenMismatch = false;
} else if (a === '--show-output') {
out.showOutput = true;
} else if (a === '--write-replay-text' || a === '--write-processed-text') {
out.writeReplayText = true;
}
}
return out;
}
function loadManifest(root) {
const manifestPath = path.join(root, 'manifest.json');
if (!fs.existsSync(manifestPath)) {
return null;
}
try {
const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
const defaultSamples = Array.isArray(manifest.default_samples)
? manifest.default_samples.map((v) => String(v).trim()).filter(Boolean)
: [];
if (defaultSamples.length === 0) {
return null;
}
return { manifestPath, defaultSamples };
} catch (err) {
throw new Error(`[sim] failed to parse ${manifestPath}: ${err.message}`);
}
}
function resolveSampleDirs(root, sampleID) {
if (!fs.existsSync(root)) {
return { dirs: [], manifestPath: '' };
}
if (sampleID) {
const dir = path.join(root, sampleID);
const ssePath = path.join(dir, 'upstream.stream.sse');
if (!fs.existsSync(dir) || !fs.statSync(dir).isDirectory() || !fs.existsSync(ssePath)) {
throw new Error(`[sim] sample missing: ${sampleID}`);
}
return { dirs: [dir], manifestPath: '' };
}
const manifest = loadManifest(root);
if (manifest) {
const dirs = [];
const missing = [];
for (const sampleID of manifest.defaultSamples) {
const dir = path.join(root, sampleID);
const ssePath = path.join(dir, 'upstream.stream.sse');
if (!fs.existsSync(dir) || !fs.statSync(dir).isDirectory() || !fs.existsSync(ssePath)) {
missing.push(sampleID);
continue;
}
dirs.push(dir);
}
if (missing.length > 0) {
throw new Error(`[sim] manifest sample(s) missing: ${missing.join(', ')}`);
}
return { dirs, manifestPath: manifest.manifestPath };
}
const dirs = fs.readdirSync(root)
.map((name) => path.join(root, name))
.filter((p) => fs.statSync(p).isDirectory())
.filter((p) => fs.existsSync(path.join(p, 'upstream.stream.sse')))
.sort();
return { dirs, manifestPath: '' };
}
function parseSSE(raw) {
const events = [];
for (const block of raw.split(/\r?\n\r?\n/)) {
if (!block.trim()) {
continue;
}
let eventType = 'message';
const dataLines = [];
for (const line of block.split(/\r?\n/)) {
if (line.startsWith('event:')) {
eventType = line.slice(6).trim() || 'message';
} else if (line.startsWith('data:')) {
dataLines.push(line.slice(5).trimStart());
}
}
if (dataLines.length === 0) {
continue;
}
const payload = dataLines.join('\n').trim();
events.push({ event: eventType, payload });
}
return events;
}
function collectVisibleText(value) {
if (value == null) {
return '';
}
if (typeof value === 'string') {
return value;
}
if (Array.isArray(value)) {
let out = '';
for (const item of value) {
out += collectVisibleText(item);
}
return out;
}
if (typeof value !== 'object') {
return '';
}
let out = '';
if (typeof value.reasoning_content === 'string') {
out += value.reasoning_content;
}
if (Object.prototype.hasOwnProperty.call(value, 'text')) {
out += collectVisibleText(value.text);
}
if (Object.prototype.hasOwnProperty.call(value, 'content')) {
out += collectVisibleText(value.content);
}
if (Object.prototype.hasOwnProperty.call(value, 'output_text')) {
out += collectVisibleText(value.output_text);
}
if (Object.prototype.hasOwnProperty.call(value, 'message')) {
out += collectVisibleText(value.message);
}
if (Object.prototype.hasOwnProperty.call(value, 'delta')) {
out += collectVisibleText(value.delta);
}
return out;
}
function parseDeepSeekReplay(raw) {
const events = parseSSE(raw);
let currentType = 'thinking';
let sawFinish = false;
let outputText = '';
let thinkingText = '';
let textOutput = '';
let parsedChunks = 0;
let parsedOutputTokens = 0;
let expectedOutputTokens = 0;
for (const evt of events) {
if (evt.event === 'finish') {
sawFinish = true;
}
if (!evt.payload || evt.payload === '[DONE]' || evt.payload[0] !== '{') {
continue;
}
let obj;
try {
obj = JSON.parse(evt.payload);
} catch {
continue;
}
parsedChunks += 1;
const expected = extractAccumulatedTokenUsageFromRawChunk(obj);
if (expected > 0) {
expectedOutputTokens = expected;
}
const parsed = parseChunkForContent(obj, true, currentType);
if (parsed.outputTokens > 0) {
parsedOutputTokens = parsed.outputTokens;
}
currentType = parsed.newType;
if (parsed.finished) {
sawFinish = true;
}
for (const part of parsed.parts) {
if (part.type === 'thinking') {
const trimmed = trimContinuationOverlap(thinkingText, part.text);
thinkingText += trimmed;
outputText += trimmed;
} else {
const trimmed = trimContinuationOverlap(textOutput, part.text);
textOutput += trimmed;
outputText += trimmed;
}
}
}
return {
events: events.length,
parsedChunks,
sawFinish,
parsedOutputTokens,
expectedOutputTokens,
tokenMismatch: expectedOutputTokens > 0 && parsedOutputTokens !== expectedOutputTokens,
outputText,
outputChars: outputText.length,
leakedFinishedText: outputText.includes('FINISHED'),
leakedReferenceMarkers: /\[reference:/i.test(outputText),
referenceLeakCount: (outputText.match(/\[reference:/gi) || []).length,
};
}
function extractAccumulatedTokenUsageFromRawChunk(v) {
if (Array.isArray(v)) {
for (const item of v) {
const n = extractAccumulatedTokenUsageFromRawChunk(item);
if (n > 0) {
return n;
}
}
return 0;
}
if (!v || typeof v !== 'object') {
return 0;
}
const direct = toTokenInt(v.accumulated_token_usage);
if (direct > 0) {
return direct;
}
const pathValue = typeof v.p === 'string' ? v.p.trim().toLowerCase() : '';
if (pathValue.includes('accumulated_token_usage')) {
const n = toTokenInt(v.v);
if (n > 0) {
return n;
}
}
for (const value of Object.values(v)) {
const n = extractAccumulatedTokenUsageFromRawChunk(value);
if (n > 0) {
return n;
}
}
return 0;
}
function toTokenInt(v) {
if (typeof v === 'number' && Number.isFinite(v)) {
return Math.trunc(v);
}
if (typeof v === 'string' && v.trim() !== '') {
const n = Number(v);
if (Number.isFinite(n)) {
return Math.trunc(n);
}
}
return 0;
}
function parseOpenAIStream(raw) {
const events = parseSSE(raw);
let outputText = '';
let parsedChunks = 0;
let sawFinish = false;
for (const evt of events) {
if (evt.event === 'finish') {
sawFinish = true;
}
if (!evt.payload || evt.payload === '[DONE]' || evt.payload[0] !== '{') {
continue;
}
let obj;
try {
obj = JSON.parse(evt.payload);
} catch {
continue;
}
parsedChunks += 1;
if (Array.isArray(obj.choices)) {
for (const choice of obj.choices) {
if (!choice || typeof choice !== 'object') {
continue;
}
if (choice.finish_reason) {
sawFinish = true;
}
if (choice.delta) {
outputText += collectVisibleText(choice.delta);
}
if (choice.message) {
outputText += collectVisibleText(choice.message);
}
}
} else {
outputText += collectVisibleText(obj);
}
}
return {
events: events.length,
parsedChunks,
sawFinish,
outputText,
outputChars: outputText.length,
};
}
function parseOpenAIJSON(raw) {
let obj;
try {
obj = JSON.parse(raw);
} catch {
return {
parsedChunks: 0,
sawFinish: false,
outputText: '',
outputChars: 0,
};
}
let outputText = '';
let sawFinish = false;
if (typeof obj.output_text === 'string') {
outputText += obj.output_text;
}
if (Array.isArray(obj.output)) {
for (const item of obj.output) {
outputText += collectVisibleText(item);
}
}
if (Array.isArray(obj.choices)) {
for (const choice of obj.choices) {
if (!choice || typeof choice !== 'object') {
continue;
}
if (choice.finish_reason) {
sawFinish = true;
}
if (choice.message) {
outputText += collectVisibleText(choice.message);
}
if (choice.delta) {
outputText += collectVisibleText(choice.delta);
}
}
}
return {
parsedChunks: 1,
sawFinish,
outputText,
outputChars: outputText.length,
};
}
function loadBaselineSample(dir, baselineRoot) {
const sampleID = path.basename(dir);
const roots = [];
if (baselineRoot) {
roots.push(path.join(baselineRoot, sampleID));
}
roots.push(dir);
for (const root of roots) {
const textPath = path.join(root, 'replay.output.txt');
if (fs.existsSync(textPath)) {
return {
path: textPath,
kind: 'text',
raw: fs.readFileSync(textPath, 'utf8'),
};
}
const legacyTextPath = path.join(root, 'openai.output.txt');
if (fs.existsSync(legacyTextPath)) {
return {
path: legacyTextPath,
kind: 'text',
raw: fs.readFileSync(legacyTextPath, 'utf8'),
};
}
const streamPath = path.join(root, 'openai.stream.sse');
if (fs.existsSync(streamPath)) {
return {
path: streamPath,
kind: 'stream',
raw: fs.readFileSync(streamPath, 'utf8'),
};
}
const jsonPath = path.join(root, 'openai.response.json');
if (fs.existsSync(jsonPath)) {
return {
path: jsonPath,
kind: 'json',
raw: fs.readFileSync(jsonPath, 'utf8'),
};
}
}
return null;
}
function replaySample(dir, opts) {
const raw = fs.readFileSync(path.join(dir, 'upstream.stream.sse'), 'utf8');
const rawResult = parseDeepSeekReplay(raw);
let replayOutputPath = '';
if (opts.outputRoot) {
const sampleOutputDir = path.join(opts.outputRoot, path.basename(dir));
fs.mkdirSync(sampleOutputDir, { recursive: true });
replayOutputPath = path.join(sampleOutputDir, 'replay.output.txt');
fs.writeFileSync(replayOutputPath, rawResult.outputText);
}
const baseline = loadBaselineSample(dir, opts.baselineRoot);
const baselineResult = baseline
? (baseline.kind === 'text'
? {
events: 0,
parsedChunks: 0,
sawFinish: false,
outputText: baseline.raw,
outputChars: baseline.raw.length,
}
: baseline.kind === 'stream'
? parseOpenAIStream(baseline.raw)
: parseOpenAIJSON(baseline.raw))
: null;
const baselineMatch = baselineResult ? baselineResult.outputText === rawResult.outputText : null;
const baselinePreview = baselineResult ? previewText(baselineResult.outputText, 280) : '';
const errors = [];
if (opts.failOnMissingFinish && !rawResult.sawFinish) {
errors.push('missing finish signal');
}
if (opts.failOnLeak && rawResult.leakedFinishedText) {
errors.push('FINISHED leaked into output text');
}
if (opts.failOnReferenceLeak && rawResult.leakedReferenceMarkers) {
errors.push('reference markers leaked into output text');
}
if (baselineResult && opts.failOnBaselineMismatch && !baselineMatch) {
errors.push('baseline output mismatch');
}
if (opts.failOnTokenMismatch && rawResult.tokenMismatch) {
errors.push(`token mismatch expected=${rawResult.expectedOutputTokens} parsed=${rawResult.parsedOutputTokens}`);
}
return {
sample_id: path.basename(dir),
raw_events: rawResult.events,
raw_parsed_chunks: rawResult.parsedChunks,
raw_saw_finish: rawResult.sawFinish,
raw_expected_output_tokens: rawResult.expectedOutputTokens,
raw_parsed_output_tokens: rawResult.parsedOutputTokens,
raw_token_mismatch: rawResult.tokenMismatch,
raw_output_chars: rawResult.outputChars,
raw_leaked_finished_text: rawResult.leakedFinishedText,
raw_leaked_reference_markers: rawResult.leakedReferenceMarkers,
raw_reference_leak_count: rawResult.referenceLeakCount,
baseline_available: Boolean(baselineResult),
baseline_path: baseline ? baseline.path : '',
baseline_kind: baseline ? baseline.kind : '',
baseline_parsed_chunks: baselineResult ? baselineResult.parsedChunks : 0,
baseline_saw_finish: baselineResult ? baselineResult.sawFinish : false,
baseline_output_chars: baselineResult ? baselineResult.outputChars : 0,
baseline_output_matches_replay: baselineResult ? baselineMatch : null,
baseline_output_preview: baselinePreview,
ok: errors.length === 0,
errors,
replay_output_text: rawResult.outputText,
replay_output_path: replayOutputPath,
baseline_output_text: baselineResult ? baselineResult.outputText : '',
};
}
function previewText(text, limit) {
if (!text) {
return '';
}
if (text.length <= limit) {
return text;
}
return `${text.slice(0, limit)}...`;
}
function main() {
const opts = parseArgs(process.argv);
if (!opts.outputRoot && opts.writeReplayText) {
const stamp = new Date().toISOString().replace(/[:.]/g, '-');
opts.outputRoot = path.join('artifacts/raw-stream-sim', `adhoc-${stamp}`);
}
const { dirs, manifestPath } = resolveSampleDirs(opts.samplesRoot, opts.sampleId);
if (dirs.length === 0) {
console.error(`[sim] no samples found: ${opts.samplesRoot}`);
process.exit(1);
}
const report = {
generated_at: new Date().toISOString(),
samples_root: opts.samplesRoot,
manifest_path: manifestPath,
output_root: opts.outputRoot,
baseline_root: opts.baselineRoot,
sample_id: opts.sampleId,
total: dirs.length,
failed: 0,
samples: [],
};
if (manifestPath) {
console.log(`[sim] using manifest ${manifestPath} samples=${dirs.length}`);
}
for (const dir of dirs) {
const sample = replaySample(dir, opts);
const errors = [...sample.errors];
if (errors.length > 0) {
report.failed += 1;
}
report.samples.push({
sample_id: sample.sample_id,
raw_events: sample.raw_events,
raw_parsed_chunks: sample.raw_parsed_chunks,
raw_saw_finish: sample.raw_saw_finish,
raw_expected_output_tokens: sample.raw_expected_output_tokens,
raw_parsed_output_tokens: sample.raw_parsed_output_tokens,
raw_token_mismatch: sample.raw_token_mismatch,
raw_output_chars: sample.raw_output_chars,
raw_leaked_finished_text: sample.raw_leaked_finished_text,
raw_leaked_reference_markers: sample.raw_leaked_reference_markers,
raw_reference_leak_count: sample.raw_reference_leak_count,
baseline_available: sample.baseline_available,
baseline_path: sample.baseline_path,
baseline_kind: sample.baseline_kind,
baseline_parsed_chunks: sample.baseline_parsed_chunks,
baseline_saw_finish: sample.baseline_saw_finish,
baseline_output_chars: sample.baseline_output_chars,
baseline_output_matches_replay: sample.baseline_output_matches_replay,
baseline_output_preview: sample.baseline_output_preview,
replay_output_path: sample.replay_output_path,
ok: errors.length === 0,
errors,
});
const status = sample.ok ? 'OK' : 'FAIL';
const leakNote = sample.raw_leaked_reference_markers ? ` refLeaks=${sample.raw_reference_leak_count}` : '';
const matchNote = sample.baseline_available
? ` baseline=${sample.baseline_output_matches_replay ? 'match' : 'mismatch'}`
: ' baseline=missing';
const note = errors.length > 0 ? ` errors=${errors.join(';')}` : '';
console.log(`[sim] ${status} ${sample.sample_id} events=${sample.raw_events} parsed=${sample.raw_parsed_chunks} tokens=${sample.raw_parsed_output_tokens}/${sample.raw_expected_output_tokens} chars=${sample.raw_output_chars}${leakNote}${matchNote}${note}`);
if (opts.showOutput) {
console.log(`[sim] replay output for ${sample.sample_id}:`);
console.log(sample.replay_output_text || '(empty)');
}
}
if (opts.reportPath) {
fs.writeFileSync(opts.reportPath, JSON.stringify(report, null, 2));
}
if (report.failed > 0) {
console.error(`[sim] ${report.failed}/${report.total} samples failed`);
process.exit(2);
}
console.log(`[sim] all ${report.total} samples passed`);
}
main();