From 05422b244976d2f39a19020955a44351f473c834 Mon Sep 17 00:00:00 2001 From: CJACK Date: Tue, 17 Feb 2026 14:35:24 +0800 Subject: [PATCH] refactor: Improve chat stream content and tool call parsing with a new recursive extraction function and dedicated tests. --- TESTING.md | 5 + api/chat-stream.js | 110 +++++++++++++++++----- api/chat-stream.test.js | 128 +++++++++++++++++++++++++ api/helpers/stream-tool-sieve.test.js | 130 ++++++++++++++++++++++++++ internal/testsuite/runner.go | 1 + 5 files changed, 349 insertions(+), 25 deletions(-) create mode 100644 api/chat-stream.test.js create mode 100644 api/helpers/stream-tool-sieve.test.js diff --git a/TESTING.md b/TESTING.md index 52cbaf3..5540592 100644 --- a/TESTING.md +++ b/TESTING.md @@ -23,6 +23,10 @@ DS2API 提供两个层级的测试: go test ./... ``` +```bash +node --test api/helpers/stream-tool-sieve.test.js api/chat-stream.test.js +``` + ### 端到端测试 | End-to-End Tests ```bash @@ -35,6 +39,7 @@ go test ./... - `go test ./... -count=1`(单元测试) - `node --check api/chat-stream.js`(语法检查) - `node --check api/helpers/stream-tool-sieve.js`(语法检查) + - `node --test api/helpers/stream-tool-sieve.test.js api/chat-stream.test.js`(Node 流式拦截单测) - `npm run build --prefix webui`(WebUI 构建检查) 2. **隔离启动**:复制 `config.json` 到临时目录,启动独立服务进程 diff --git a/api/chat-stream.js b/api/chat-stream.js index 566fa57..aa92b17 100644 --- a/api/chat-stream.js +++ b/api/chat-stream.js @@ -517,32 +517,11 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType) { } if (Array.isArray(val)) { - for (const entry of val) { - if (typeof entry === 'string') { - if (entry) { - parts.push({ text: entry, type: partType }); - } - continue; - } - if (!entry || typeof entry !== 'object') { - continue; - } - if (asString(entry.p) === 'status' && asString(entry.v) === 'FINISHED') { - return { parts: [], finished: true, newType }; - } - const content = asString(entry.content); - if (!content) { - continue; - } - const t = asString(entry.type).toUpperCase(); - if (t === 'THINK' || t === 'THINKING') { - parts.push({ text: content, type: 'thinking' }); - } else if (t === 'RESPONSE') { - parts.push({ text: content, type: 'text' }); - } else { - parts.push({ text: content, type: partType }); - } + const extracted = extractContentRecursive(val, partType); + if (extracted.finished) { + return { parts: [], finished: true, newType }; } + parts.push(...extracted.parts); return { parts, finished: false, newType }; } @@ -573,6 +552,80 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType) { return { parts, finished: false, newType }; } +function extractContentRecursive(items, defaultType) { + const parts = []; + for (const it of items) { + if (!it || typeof it !== 'object') { + continue; + } + if (!Object.prototype.hasOwnProperty.call(it, 'v')) { + continue; + } + const itemPath = asString(it.p); + const itemV = it.v; + if (itemPath === 'status' && asString(itemV) === 'FINISHED') { + return { parts: [], finished: true }; + } + if (shouldSkipPath(itemPath)) { + continue; + } + const content = asString(it.content); + if (content) { + const typeName = asString(it.type).toUpperCase(); + if (typeName === 'THINK' || typeName === 'THINKING') { + parts.push({ text: content, type: 'thinking' }); + } else if (typeName === 'RESPONSE') { + parts.push({ text: content, type: 'text' }); + } else { + parts.push({ text: content, type: defaultType }); + } + continue; + } + + let partType = defaultType; + if (itemPath.includes('thinking')) { + partType = 'thinking'; + } else if (itemPath.includes('content') || itemPath === 'response' || itemPath === 'fragments') { + partType = 'text'; + } + + if (typeof itemV === 'string') { + if (itemV && itemV !== 'FINISHED') { + parts.push({ text: itemV, type: partType }); + } + continue; + } + + if (!Array.isArray(itemV)) { + continue; + } + for (const inner of itemV) { + if (typeof inner === 'string') { + if (inner) { + parts.push({ text: inner, type: partType }); + } + continue; + } + if (!inner || typeof inner !== 'object') { + continue; + } + const ct = asString(inner.content); + if (!ct) { + continue; + } + const typeName = asString(inner.type).toUpperCase(); + if (typeName === 'THINK' || typeName === 'THINKING') { + parts.push({ text: ct, type: 'thinking' }); + } else if (typeName === 'RESPONSE') { + parts.push({ text: ct, type: 'text' }); + } else { + parts.push({ text: ct, type: partType }); + } + } + } + return { parts, finished: false }; +} + function shouldSkipPath(pathValue) { if (pathValue === 'response/search_status') { return true; @@ -708,3 +761,10 @@ function asString(v) { } return String(v).trim(); } + +module.exports.__test = { + parseChunkForContent, + extractContentRecursive, + shouldSkipPath, + asString, +}; diff --git a/api/chat-stream.test.js b/api/chat-stream.test.js new file mode 100644 index 0000000..b347342 --- /dev/null +++ b/api/chat-stream.test.js @@ -0,0 +1,128 @@ +'use strict'; + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const handler = require('./chat-stream'); +const { + createToolSieveState, + processToolSieveChunk, + flushToolSieve, +} = require('./helpers/stream-tool-sieve'); + +const { parseChunkForContent } = handler.__test; + +test('chat-stream exposes parser test hooks', () => { + assert.equal(typeof parseChunkForContent, 'function'); +}); + +test('parseChunkForContent keeps split response/content fragments inside response array', () => { + const chunk = { + p: 'response', + v: [ + { p: 'response/content', v: '{"' }, + { p: 'response/content', v: 'tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}' }, + ], + }; + const parsed = parseChunkForContent(chunk, false, 'text'); + assert.equal(parsed.finished, false); + assert.equal(parsed.newType, 'text'); + assert.equal(parsed.parts.length, 2); + const combined = parsed.parts.map((p) => p.text).join(''); + assert.equal(combined, '{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}'); +}); + +test('parseChunkForContent + sieve does not leak suspicious prefix in split tool json case', () => { + const chunk = { + p: 'response', + v: [ + { p: 'response/content', v: '{"' }, + { p: 'response/content', v: 'tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}' }, + ], + }; + const parsed = parseChunkForContent(chunk, false, 'text'); + const state = createToolSieveState(); + const events = []; + for (const part of parsed.parts) { + events.push(...processToolSieveChunk(state, part.text, ['read_file'])); + } + events.push(...flushToolSieve(state, ['read_file'])); + + const hasToolCalls = events.some((evt) => evt.type === 'tool_calls' && evt.calls && evt.calls.length > 0); + const leakedText = events + .filter((evt) => evt.type === 'text' && evt.text) + .map((evt) => evt.text) + .join(''); + + assert.equal(hasToolCalls, true); + assert.equal(leakedText.includes('{'), false); + assert.equal(leakedText.toLowerCase().includes('tool_calls'), false); +}); + +test('parseChunkForContent consumes nested item.v array payloads', () => { + const chunk = { + p: 'response', + v: [ + { p: 'response/content', v: ['A', 'B'] }, + { p: 'response/content', v: [{ content: 'C', type: 'RESPONSE' }] }, + ], + }; + const parsed = parseChunkForContent(chunk, false, 'text'); + assert.equal(parsed.finished, false); + assert.equal(parsed.parts.map((p) => p.text).join(''), 'ABC'); +}); + +test('parseChunkForContent detects nested status FINISHED in array payload', () => { + const chunk = { + p: 'response', + v: [{ p: 'status', v: 'FINISHED' }], + }; + const parsed = parseChunkForContent(chunk, false, 'text'); + assert.equal(parsed.finished, true); + assert.deepEqual(parsed.parts, []); +}); + +test('parseChunkForContent ignores items without v to match Go parser behavior', () => { + const chunk = { + p: 'response', + v: [{ type: 'RESPONSE', content: 'no-v-content' }], + }; + const parsed = parseChunkForContent(chunk, false, 'text'); + assert.equal(parsed.finished, false); + assert.deepEqual(parsed.parts, []); +}); + +test('parseChunkForContent handles response/fragments APPEND with thinking and response transitions', () => { + const chunk = { + p: 'response/fragments', + o: 'APPEND', + v: [ + { type: 'THINK', content: '思考中' }, + { type: 'RESPONSE', content: '结论' }, + ], + }; + const parsed = parseChunkForContent(chunk, true, 'thinking'); + assert.equal(parsed.finished, false); + assert.equal(parsed.newType, 'text'); + assert.deepEqual(parsed.parts, [ + { text: '思考中', type: 'thinking' }, + { text: '结论', type: 'text' }, + ]); +}); + +test('parseChunkForContent supports wrapped response.fragments object shape', () => { + const chunk = { + p: 'response', + v: { + response: { + fragments: [ + { type: 'RESPONSE', content: 'A' }, + { type: 'RESPONSE', content: 'B' }, + ], + }, + }, + }; + const parsed = parseChunkForContent(chunk, false, 'text'); + assert.equal(parsed.finished, false); + assert.equal(parsed.parts.map((p) => p.text).join(''), 'AB'); +}); diff --git a/api/helpers/stream-tool-sieve.test.js b/api/helpers/stream-tool-sieve.test.js new file mode 100644 index 0000000..47b3100 --- /dev/null +++ b/api/helpers/stream-tool-sieve.test.js @@ -0,0 +1,130 @@ +'use strict'; + +const test = require('node:test'); +const assert = require('node:assert/strict'); + +const { + extractToolNames, + createToolSieveState, + processToolSieveChunk, + flushToolSieve, + parseToolCalls, +} = require('./stream-tool-sieve'); + +function runSieve(chunks, toolNames) { + const state = createToolSieveState(); + const events = []; + for (const chunk of chunks) { + events.push(...processToolSieveChunk(state, chunk, toolNames)); + } + events.push(...flushToolSieve(state, toolNames)); + return events; +} + +function collectText(events) { + return events + .filter((evt) => evt.type === 'text' && evt.text) + .map((evt) => evt.text) + .join(''); +} + +test('extractToolNames keeps tool mode enabled with unknown fallback', () => { + const names = extractToolNames([ + { function: { description: 'no name tool' } }, + { function: { name: ' read_file ' } }, + {}, + ]); + assert.deepEqual(names, ['unknown', 'read_file', 'unknown']); +}); + +test('parseToolCalls keeps non-object argument strings as _raw (Go parity)', () => { + const payload = JSON.stringify({ + tool_calls: [ + { name: 'read_file', input: '123' }, + { name: 'list_dir', input: '[1,2,3]' }, + ], + }); + const calls = parseToolCalls(payload, ['read_file', 'list_dir']); + assert.deepEqual(calls, [ + { name: 'read_file', input: { _raw: '123' } }, + { name: 'list_dir', input: { _raw: '[1,2,3]' } }, + ]); +}); + +test('parseToolCalls still intercepts unknown schema names to avoid leaks', () => { + const payload = JSON.stringify({ + tool_calls: [{ name: 'not_in_schema', input: { q: 'go' } }], + }); + const calls = parseToolCalls(payload, ['search']); + assert.equal(calls.length, 1); + assert.equal(calls[0].name, 'not_in_schema'); +}); + +test('parseToolCalls supports fenced json and function.arguments string payload', () => { + const text = [ + 'I will call a tool now.', + '```json', + '{"tool_calls":[{"function":{"name":"read_file","arguments":"{\\"path\\":\\"README.md\\"}"}}]}', + '```', + ].join('\n'); + const calls = parseToolCalls(text, ['read_file']); + assert.equal(calls.length, 1); + assert.equal(calls[0].name, 'read_file'); + assert.deepEqual(calls[0].input, { path: 'README.md' }); +}); + +test('sieve emits tool_calls and does not leak suspicious prefix on late key convergence', () => { + const events = runSieve( + [ + '{"', + 'tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}', + '后置正文C。', + ], + ['read_file'], + ); + const leakedText = collectText(events); + const hasToolCall = events.some((evt) => evt.type === 'tool_calls' && Array.isArray(evt.calls) && evt.calls.length > 0); + assert.equal(hasToolCall, true); + assert.equal(leakedText.includes('{'), false); + assert.equal(leakedText.toLowerCase().includes('tool_calls'), false); + assert.equal(leakedText.includes('后置正文C。'), true); +}); + +test('sieve drops invalid tool json body while preserving surrounding text', () => { + const events = runSieve( + [ + '前置正文D。', + "{'tool_calls':[{'name':'read_file','input':{'path':'README.MD'}}]}", + '后置正文E。', + ], + ['read_file'], + ); + const leakedText = collectText(events); + const hasToolCall = events.some((evt) => evt.type === 'tool_calls'); + assert.equal(hasToolCall, false); + assert.equal(leakedText.includes('前置正文D。'), true); + assert.equal(leakedText.includes('后置正文E。'), true); + assert.equal(leakedText.toLowerCase().includes('tool_calls'), false); +}); + +test('sieve suppresses incomplete captured tool json on stream finalize', () => { + const events = runSieve( + ['前置正文F。', '{"tool_calls":[{"name":"read_file"'], + ['read_file'], + ); + const leakedText = collectText(events); + assert.equal(leakedText.includes('前置正文F。'), true); + assert.equal(leakedText.toLowerCase().includes('tool_calls'), false); + assert.equal(leakedText.includes('{'), false); +}); + +test('sieve keeps plain text intact in tool mode when no tool call appears', () => { + const events = runSieve( + ['你好,', '这是普通文本回复。', '请继续。'], + ['read_file'], + ); + const leakedText = collectText(events); + const hasToolCall = events.some((evt) => evt.type === 'tool_calls'); + assert.equal(hasToolCall, false); + assert.equal(leakedText, '你好,这是普通文本回复。请继续。'); +}); diff --git a/internal/testsuite/runner.go b/internal/testsuite/runner.go index 8af1b7d..b48bce5 100644 --- a/internal/testsuite/runner.go +++ b/internal/testsuite/runner.go @@ -327,6 +327,7 @@ func (r *Runner) runPreflight(ctx context.Context) error { {"go", "test", "./...", "-count=1"}, {"node", "--check", "api/chat-stream.js"}, {"node", "--check", "api/helpers/stream-tool-sieve.js"}, + {"node", "--test", "api/helpers/stream-tool-sieve.test.js", "api/chat-stream.test.js"}, {"npm", "run", "build", "--prefix", "webui"}, } f, err := os.OpenFile(r.preflightLog, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644)