mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-04 08:25:26 +08:00
602 lines
18 KiB
JavaScript
602 lines
18 KiB
JavaScript
#!/usr/bin/env node
|
|
import fs from 'node:fs';
|
|
import path from 'node:path';
|
|
import process from 'node:process';
|
|
import { createRequire } from 'node:module';
|
|
|
|
const require = createRequire(import.meta.url);
|
|
const chatStream = require('../../api/chat-stream.js');
|
|
const { parseChunkForContent } = chatStream.__test;
|
|
const { trimContinuationOverlap } = chatStream.__test;
|
|
|
|
function parseArgs(argv) {
|
|
const out = {
|
|
samplesRoot: 'tests/raw_stream_samples',
|
|
reportPath: '',
|
|
outputRoot: '',
|
|
baselineRoot: '',
|
|
sampleId: '',
|
|
failOnLeak: true,
|
|
failOnReferenceLeak: true,
|
|
failOnMissingFinish: true,
|
|
failOnBaselineMismatch: true,
|
|
failOnTokenMismatch: false,
|
|
showOutput: false,
|
|
writeReplayText: false,
|
|
};
|
|
for (let i = 2; i < argv.length; i += 1) {
|
|
const a = argv[i];
|
|
if (a === '--samples-root' && argv[i + 1]) {
|
|
out.samplesRoot = argv[++i];
|
|
} else if (a === '--report' && argv[i + 1]) {
|
|
out.reportPath = argv[++i];
|
|
} else if (a === '--output-root' && argv[i + 1]) {
|
|
out.outputRoot = argv[++i];
|
|
} else if (a === '--baseline-root' && argv[i + 1]) {
|
|
out.baselineRoot = argv[++i];
|
|
} else if (a === '--sample-id' && argv[i + 1]) {
|
|
out.sampleId = argv[++i];
|
|
} else if (a === '--no-fail-on-leak') {
|
|
out.failOnLeak = false;
|
|
} else if (a === '--no-fail-on-reference-leak') {
|
|
out.failOnReferenceLeak = false;
|
|
} else if (a === '--no-fail-on-missing-finish') {
|
|
out.failOnMissingFinish = false;
|
|
} else if (a === '--no-fail-on-baseline-mismatch' || a === '--no-fail-on-processed-mismatch') {
|
|
out.failOnBaselineMismatch = false;
|
|
} else if (a === '--fail-on-token-mismatch') {
|
|
out.failOnTokenMismatch = true;
|
|
} else if (a === '--no-fail-on-token-mismatch') {
|
|
out.failOnTokenMismatch = false;
|
|
} else if (a === '--show-output') {
|
|
out.showOutput = true;
|
|
} else if (a === '--write-replay-text' || a === '--write-processed-text') {
|
|
out.writeReplayText = true;
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
function loadManifest(root) {
|
|
const manifestPath = path.join(root, 'manifest.json');
|
|
if (!fs.existsSync(manifestPath)) {
|
|
return null;
|
|
}
|
|
try {
|
|
const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
|
|
const defaultSamples = Array.isArray(manifest.default_samples)
|
|
? manifest.default_samples.map((v) => String(v).trim()).filter(Boolean)
|
|
: [];
|
|
if (defaultSamples.length === 0) {
|
|
return null;
|
|
}
|
|
return { manifestPath, defaultSamples };
|
|
} catch (err) {
|
|
throw new Error(`[sim] failed to parse ${manifestPath}: ${err.message}`);
|
|
}
|
|
}
|
|
|
|
function resolveSampleDirs(root, sampleID) {
|
|
if (!fs.existsSync(root)) {
|
|
return { dirs: [], manifestPath: '' };
|
|
}
|
|
|
|
if (sampleID) {
|
|
const dir = path.join(root, sampleID);
|
|
const ssePath = path.join(dir, 'upstream.stream.sse');
|
|
if (!fs.existsSync(dir) || !fs.statSync(dir).isDirectory() || !fs.existsSync(ssePath)) {
|
|
throw new Error(`[sim] sample missing: ${sampleID}`);
|
|
}
|
|
return { dirs: [dir], manifestPath: '' };
|
|
}
|
|
|
|
const manifest = loadManifest(root);
|
|
if (manifest) {
|
|
const dirs = [];
|
|
const missing = [];
|
|
for (const sampleID of manifest.defaultSamples) {
|
|
const dir = path.join(root, sampleID);
|
|
const ssePath = path.join(dir, 'upstream.stream.sse');
|
|
if (!fs.existsSync(dir) || !fs.statSync(dir).isDirectory() || !fs.existsSync(ssePath)) {
|
|
missing.push(sampleID);
|
|
continue;
|
|
}
|
|
dirs.push(dir);
|
|
}
|
|
if (missing.length > 0) {
|
|
throw new Error(`[sim] manifest sample(s) missing: ${missing.join(', ')}`);
|
|
}
|
|
return { dirs, manifestPath: manifest.manifestPath };
|
|
}
|
|
|
|
const dirs = fs.readdirSync(root)
|
|
.map((name) => path.join(root, name))
|
|
.filter((p) => fs.statSync(p).isDirectory())
|
|
.filter((p) => fs.existsSync(path.join(p, 'upstream.stream.sse')))
|
|
.sort();
|
|
return { dirs, manifestPath: '' };
|
|
}
|
|
|
|
function parseSSE(raw) {
|
|
const events = [];
|
|
for (const block of raw.split(/\r?\n\r?\n/)) {
|
|
if (!block.trim()) {
|
|
continue;
|
|
}
|
|
let eventType = 'message';
|
|
const dataLines = [];
|
|
for (const line of block.split(/\r?\n/)) {
|
|
if (line.startsWith('event:')) {
|
|
eventType = line.slice(6).trim() || 'message';
|
|
} else if (line.startsWith('data:')) {
|
|
dataLines.push(line.slice(5).trimStart());
|
|
}
|
|
}
|
|
if (dataLines.length === 0) {
|
|
continue;
|
|
}
|
|
const payload = dataLines.join('\n').trim();
|
|
events.push({ event: eventType, payload });
|
|
}
|
|
return events;
|
|
}
|
|
|
|
function collectVisibleText(value) {
|
|
if (value == null) {
|
|
return '';
|
|
}
|
|
if (typeof value === 'string') {
|
|
return value;
|
|
}
|
|
if (Array.isArray(value)) {
|
|
let out = '';
|
|
for (const item of value) {
|
|
out += collectVisibleText(item);
|
|
}
|
|
return out;
|
|
}
|
|
if (typeof value !== 'object') {
|
|
return '';
|
|
}
|
|
let out = '';
|
|
if (typeof value.reasoning_content === 'string') {
|
|
out += value.reasoning_content;
|
|
}
|
|
if (Object.prototype.hasOwnProperty.call(value, 'text')) {
|
|
out += collectVisibleText(value.text);
|
|
}
|
|
if (Object.prototype.hasOwnProperty.call(value, 'content')) {
|
|
out += collectVisibleText(value.content);
|
|
}
|
|
if (Object.prototype.hasOwnProperty.call(value, 'output_text')) {
|
|
out += collectVisibleText(value.output_text);
|
|
}
|
|
if (Object.prototype.hasOwnProperty.call(value, 'message')) {
|
|
out += collectVisibleText(value.message);
|
|
}
|
|
if (Object.prototype.hasOwnProperty.call(value, 'delta')) {
|
|
out += collectVisibleText(value.delta);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
function parseDeepSeekReplay(raw) {
|
|
const events = parseSSE(raw);
|
|
let currentType = 'thinking';
|
|
let sawFinish = false;
|
|
let outputText = '';
|
|
let thinkingText = '';
|
|
let textOutput = '';
|
|
let parsedChunks = 0;
|
|
let parsedOutputTokens = 0;
|
|
let expectedOutputTokens = 0;
|
|
|
|
for (const evt of events) {
|
|
if (evt.event === 'finish') {
|
|
sawFinish = true;
|
|
}
|
|
if (!evt.payload || evt.payload === '[DONE]' || evt.payload[0] !== '{') {
|
|
continue;
|
|
}
|
|
let obj;
|
|
try {
|
|
obj = JSON.parse(evt.payload);
|
|
} catch {
|
|
continue;
|
|
}
|
|
parsedChunks += 1;
|
|
const expected = extractAccumulatedTokenUsageFromRawChunk(obj);
|
|
if (expected > 0) {
|
|
expectedOutputTokens = expected;
|
|
}
|
|
const parsed = parseChunkForContent(obj, true, currentType);
|
|
if (parsed.outputTokens > 0) {
|
|
parsedOutputTokens = parsed.outputTokens;
|
|
}
|
|
currentType = parsed.newType;
|
|
if (parsed.finished) {
|
|
sawFinish = true;
|
|
}
|
|
for (const part of parsed.parts) {
|
|
if (part.type === 'thinking') {
|
|
const trimmed = trimContinuationOverlap(thinkingText, part.text);
|
|
thinkingText += trimmed;
|
|
outputText += trimmed;
|
|
} else {
|
|
const trimmed = trimContinuationOverlap(textOutput, part.text);
|
|
textOutput += trimmed;
|
|
outputText += trimmed;
|
|
}
|
|
}
|
|
}
|
|
|
|
return {
|
|
events: events.length,
|
|
parsedChunks,
|
|
sawFinish,
|
|
parsedOutputTokens,
|
|
expectedOutputTokens,
|
|
tokenMismatch: expectedOutputTokens > 0 && parsedOutputTokens !== expectedOutputTokens,
|
|
outputText,
|
|
outputChars: outputText.length,
|
|
leakedFinishedText: outputText.includes('FINISHED'),
|
|
leakedReferenceMarkers: /\[reference:/i.test(outputText),
|
|
referenceLeakCount: (outputText.match(/\[reference:/gi) || []).length,
|
|
};
|
|
}
|
|
|
|
function extractAccumulatedTokenUsageFromRawChunk(v) {
|
|
if (Array.isArray(v)) {
|
|
for (const item of v) {
|
|
const n = extractAccumulatedTokenUsageFromRawChunk(item);
|
|
if (n > 0) {
|
|
return n;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
if (!v || typeof v !== 'object') {
|
|
return 0;
|
|
}
|
|
const direct = toTokenInt(v.accumulated_token_usage);
|
|
if (direct > 0) {
|
|
return direct;
|
|
}
|
|
const pathValue = typeof v.p === 'string' ? v.p.trim().toLowerCase() : '';
|
|
if (pathValue.includes('accumulated_token_usage')) {
|
|
const n = toTokenInt(v.v);
|
|
if (n > 0) {
|
|
return n;
|
|
}
|
|
}
|
|
for (const value of Object.values(v)) {
|
|
const n = extractAccumulatedTokenUsageFromRawChunk(value);
|
|
if (n > 0) {
|
|
return n;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
function toTokenInt(v) {
|
|
if (typeof v === 'number' && Number.isFinite(v)) {
|
|
return Math.trunc(v);
|
|
}
|
|
if (typeof v === 'string' && v.trim() !== '') {
|
|
const n = Number(v);
|
|
if (Number.isFinite(n)) {
|
|
return Math.trunc(n);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
function parseOpenAIStream(raw) {
|
|
const events = parseSSE(raw);
|
|
let outputText = '';
|
|
let parsedChunks = 0;
|
|
let sawFinish = false;
|
|
|
|
for (const evt of events) {
|
|
if (evt.event === 'finish') {
|
|
sawFinish = true;
|
|
}
|
|
if (!evt.payload || evt.payload === '[DONE]' || evt.payload[0] !== '{') {
|
|
continue;
|
|
}
|
|
let obj;
|
|
try {
|
|
obj = JSON.parse(evt.payload);
|
|
} catch {
|
|
continue;
|
|
}
|
|
parsedChunks += 1;
|
|
if (Array.isArray(obj.choices)) {
|
|
for (const choice of obj.choices) {
|
|
if (!choice || typeof choice !== 'object') {
|
|
continue;
|
|
}
|
|
if (choice.finish_reason) {
|
|
sawFinish = true;
|
|
}
|
|
if (choice.delta) {
|
|
outputText += collectVisibleText(choice.delta);
|
|
}
|
|
if (choice.message) {
|
|
outputText += collectVisibleText(choice.message);
|
|
}
|
|
}
|
|
} else {
|
|
outputText += collectVisibleText(obj);
|
|
}
|
|
}
|
|
|
|
return {
|
|
events: events.length,
|
|
parsedChunks,
|
|
sawFinish,
|
|
outputText,
|
|
outputChars: outputText.length,
|
|
};
|
|
}
|
|
|
|
function parseOpenAIJSON(raw) {
|
|
let obj;
|
|
try {
|
|
obj = JSON.parse(raw);
|
|
} catch {
|
|
return {
|
|
parsedChunks: 0,
|
|
sawFinish: false,
|
|
outputText: '',
|
|
outputChars: 0,
|
|
};
|
|
}
|
|
let outputText = '';
|
|
let sawFinish = false;
|
|
if (typeof obj.output_text === 'string') {
|
|
outputText += obj.output_text;
|
|
}
|
|
if (Array.isArray(obj.output)) {
|
|
for (const item of obj.output) {
|
|
outputText += collectVisibleText(item);
|
|
}
|
|
}
|
|
if (Array.isArray(obj.choices)) {
|
|
for (const choice of obj.choices) {
|
|
if (!choice || typeof choice !== 'object') {
|
|
continue;
|
|
}
|
|
if (choice.finish_reason) {
|
|
sawFinish = true;
|
|
}
|
|
if (choice.message) {
|
|
outputText += collectVisibleText(choice.message);
|
|
}
|
|
if (choice.delta) {
|
|
outputText += collectVisibleText(choice.delta);
|
|
}
|
|
}
|
|
}
|
|
return {
|
|
parsedChunks: 1,
|
|
sawFinish,
|
|
outputText,
|
|
outputChars: outputText.length,
|
|
};
|
|
}
|
|
|
|
function loadBaselineSample(dir, baselineRoot) {
|
|
const sampleID = path.basename(dir);
|
|
const roots = [];
|
|
if (baselineRoot) {
|
|
roots.push(path.join(baselineRoot, sampleID));
|
|
}
|
|
roots.push(dir);
|
|
|
|
for (const root of roots) {
|
|
const textPath = path.join(root, 'replay.output.txt');
|
|
if (fs.existsSync(textPath)) {
|
|
return {
|
|
path: textPath,
|
|
kind: 'text',
|
|
raw: fs.readFileSync(textPath, 'utf8'),
|
|
};
|
|
}
|
|
const legacyTextPath = path.join(root, 'openai.output.txt');
|
|
if (fs.existsSync(legacyTextPath)) {
|
|
return {
|
|
path: legacyTextPath,
|
|
kind: 'text',
|
|
raw: fs.readFileSync(legacyTextPath, 'utf8'),
|
|
};
|
|
}
|
|
const streamPath = path.join(root, 'openai.stream.sse');
|
|
if (fs.existsSync(streamPath)) {
|
|
return {
|
|
path: streamPath,
|
|
kind: 'stream',
|
|
raw: fs.readFileSync(streamPath, 'utf8'),
|
|
};
|
|
}
|
|
const jsonPath = path.join(root, 'openai.response.json');
|
|
if (fs.existsSync(jsonPath)) {
|
|
return {
|
|
path: jsonPath,
|
|
kind: 'json',
|
|
raw: fs.readFileSync(jsonPath, 'utf8'),
|
|
};
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function replaySample(dir, opts) {
|
|
const raw = fs.readFileSync(path.join(dir, 'upstream.stream.sse'), 'utf8');
|
|
const rawResult = parseDeepSeekReplay(raw);
|
|
|
|
let replayOutputPath = '';
|
|
if (opts.outputRoot) {
|
|
const sampleOutputDir = path.join(opts.outputRoot, path.basename(dir));
|
|
fs.mkdirSync(sampleOutputDir, { recursive: true });
|
|
replayOutputPath = path.join(sampleOutputDir, 'replay.output.txt');
|
|
fs.writeFileSync(replayOutputPath, rawResult.outputText);
|
|
}
|
|
|
|
const baseline = loadBaselineSample(dir, opts.baselineRoot);
|
|
const baselineResult = baseline
|
|
? (baseline.kind === 'text'
|
|
? {
|
|
events: 0,
|
|
parsedChunks: 0,
|
|
sawFinish: false,
|
|
outputText: baseline.raw,
|
|
outputChars: baseline.raw.length,
|
|
}
|
|
: baseline.kind === 'stream'
|
|
? parseOpenAIStream(baseline.raw)
|
|
: parseOpenAIJSON(baseline.raw))
|
|
: null;
|
|
const baselineMatch = baselineResult ? baselineResult.outputText === rawResult.outputText : null;
|
|
const baselinePreview = baselineResult ? previewText(baselineResult.outputText, 280) : '';
|
|
const errors = [];
|
|
|
|
if (opts.failOnMissingFinish && !rawResult.sawFinish) {
|
|
errors.push('missing finish signal');
|
|
}
|
|
if (opts.failOnLeak && rawResult.leakedFinishedText) {
|
|
errors.push('FINISHED leaked into output text');
|
|
}
|
|
if (opts.failOnReferenceLeak && rawResult.leakedReferenceMarkers) {
|
|
errors.push('reference markers leaked into output text');
|
|
}
|
|
if (baselineResult && opts.failOnBaselineMismatch && !baselineMatch) {
|
|
errors.push('baseline output mismatch');
|
|
}
|
|
if (opts.failOnTokenMismatch && rawResult.tokenMismatch) {
|
|
errors.push(`token mismatch expected=${rawResult.expectedOutputTokens} parsed=${rawResult.parsedOutputTokens}`);
|
|
}
|
|
|
|
return {
|
|
sample_id: path.basename(dir),
|
|
raw_events: rawResult.events,
|
|
raw_parsed_chunks: rawResult.parsedChunks,
|
|
raw_saw_finish: rawResult.sawFinish,
|
|
raw_expected_output_tokens: rawResult.expectedOutputTokens,
|
|
raw_parsed_output_tokens: rawResult.parsedOutputTokens,
|
|
raw_token_mismatch: rawResult.tokenMismatch,
|
|
raw_output_chars: rawResult.outputChars,
|
|
raw_leaked_finished_text: rawResult.leakedFinishedText,
|
|
raw_leaked_reference_markers: rawResult.leakedReferenceMarkers,
|
|
raw_reference_leak_count: rawResult.referenceLeakCount,
|
|
baseline_available: Boolean(baselineResult),
|
|
baseline_path: baseline ? baseline.path : '',
|
|
baseline_kind: baseline ? baseline.kind : '',
|
|
baseline_parsed_chunks: baselineResult ? baselineResult.parsedChunks : 0,
|
|
baseline_saw_finish: baselineResult ? baselineResult.sawFinish : false,
|
|
baseline_output_chars: baselineResult ? baselineResult.outputChars : 0,
|
|
baseline_output_matches_replay: baselineResult ? baselineMatch : null,
|
|
baseline_output_preview: baselinePreview,
|
|
ok: errors.length === 0,
|
|
errors,
|
|
replay_output_text: rawResult.outputText,
|
|
replay_output_path: replayOutputPath,
|
|
baseline_output_text: baselineResult ? baselineResult.outputText : '',
|
|
};
|
|
}
|
|
|
|
function previewText(text, limit) {
|
|
if (!text) {
|
|
return '';
|
|
}
|
|
if (text.length <= limit) {
|
|
return text;
|
|
}
|
|
return `${text.slice(0, limit)}...`;
|
|
}
|
|
|
|
function main() {
|
|
const opts = parseArgs(process.argv);
|
|
if (!opts.outputRoot && opts.writeReplayText) {
|
|
const stamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
opts.outputRoot = path.join('artifacts/raw-stream-sim', `adhoc-${stamp}`);
|
|
}
|
|
const { dirs, manifestPath } = resolveSampleDirs(opts.samplesRoot, opts.sampleId);
|
|
if (dirs.length === 0) {
|
|
console.error(`[sim] no samples found: ${opts.samplesRoot}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
const report = {
|
|
generated_at: new Date().toISOString(),
|
|
samples_root: opts.samplesRoot,
|
|
manifest_path: manifestPath,
|
|
output_root: opts.outputRoot,
|
|
baseline_root: opts.baselineRoot,
|
|
sample_id: opts.sampleId,
|
|
total: dirs.length,
|
|
failed: 0,
|
|
samples: [],
|
|
};
|
|
|
|
if (manifestPath) {
|
|
console.log(`[sim] using manifest ${manifestPath} samples=${dirs.length}`);
|
|
}
|
|
|
|
for (const dir of dirs) {
|
|
const sample = replaySample(dir, opts);
|
|
const errors = [...sample.errors];
|
|
if (errors.length > 0) {
|
|
report.failed += 1;
|
|
}
|
|
report.samples.push({
|
|
sample_id: sample.sample_id,
|
|
raw_events: sample.raw_events,
|
|
raw_parsed_chunks: sample.raw_parsed_chunks,
|
|
raw_saw_finish: sample.raw_saw_finish,
|
|
raw_expected_output_tokens: sample.raw_expected_output_tokens,
|
|
raw_parsed_output_tokens: sample.raw_parsed_output_tokens,
|
|
raw_token_mismatch: sample.raw_token_mismatch,
|
|
raw_output_chars: sample.raw_output_chars,
|
|
raw_leaked_finished_text: sample.raw_leaked_finished_text,
|
|
raw_leaked_reference_markers: sample.raw_leaked_reference_markers,
|
|
raw_reference_leak_count: sample.raw_reference_leak_count,
|
|
baseline_available: sample.baseline_available,
|
|
baseline_path: sample.baseline_path,
|
|
baseline_kind: sample.baseline_kind,
|
|
baseline_parsed_chunks: sample.baseline_parsed_chunks,
|
|
baseline_saw_finish: sample.baseline_saw_finish,
|
|
baseline_output_chars: sample.baseline_output_chars,
|
|
baseline_output_matches_replay: sample.baseline_output_matches_replay,
|
|
baseline_output_preview: sample.baseline_output_preview,
|
|
replay_output_path: sample.replay_output_path,
|
|
ok: errors.length === 0,
|
|
errors,
|
|
});
|
|
|
|
const status = sample.ok ? 'OK' : 'FAIL';
|
|
const leakNote = sample.raw_leaked_reference_markers ? ` refLeaks=${sample.raw_reference_leak_count}` : '';
|
|
const matchNote = sample.baseline_available
|
|
? ` baseline=${sample.baseline_output_matches_replay ? 'match' : 'mismatch'}`
|
|
: ' baseline=missing';
|
|
const note = errors.length > 0 ? ` errors=${errors.join(';')}` : '';
|
|
console.log(`[sim] ${status} ${sample.sample_id} events=${sample.raw_events} parsed=${sample.raw_parsed_chunks} tokens=${sample.raw_parsed_output_tokens}/${sample.raw_expected_output_tokens} chars=${sample.raw_output_chars}${leakNote}${matchNote}${note}`);
|
|
if (opts.showOutput) {
|
|
console.log(`[sim] replay output for ${sample.sample_id}:`);
|
|
console.log(sample.replay_output_text || '(empty)');
|
|
}
|
|
}
|
|
|
|
if (opts.reportPath) {
|
|
fs.writeFileSync(opts.reportPath, JSON.stringify(report, null, 2));
|
|
}
|
|
|
|
if (report.failed > 0) {
|
|
console.error(`[sim] ${report.failed}/${report.total} samples failed`);
|
|
process.exit(2);
|
|
}
|
|
console.log(`[sim] all ${report.total} samples passed`);
|
|
}
|
|
|
|
main();
|