refactor: Improve chat stream content and tool call parsing with a new recursive extraction function and dedicated tests.

This commit is contained in:
CJACK
2026-02-17 14:35:24 +08:00
parent 5106773573
commit 05422b2449
5 changed files with 349 additions and 25 deletions

View File

@@ -23,6 +23,10 @@ DS2API 提供两个层级的测试:
go test ./...
```
```bash
node --test api/helpers/stream-tool-sieve.test.js api/chat-stream.test.js
```
### 端到端测试 | End-to-End Tests
```bash
@@ -35,6 +39,7 @@ go test ./...
- `go test ./... -count=1`(单元测试)
- `node --check api/chat-stream.js`(语法检查)
- `node --check api/helpers/stream-tool-sieve.js`(语法检查)
- `node --test api/helpers/stream-tool-sieve.test.js api/chat-stream.test.js`Node 流式拦截单测)
- `npm run build --prefix webui`WebUI 构建检查)
2. **隔离启动**:复制 `config.json` 到临时目录,启动独立服务进程

View File

@@ -517,32 +517,11 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType) {
}
if (Array.isArray(val)) {
for (const entry of val) {
if (typeof entry === 'string') {
if (entry) {
parts.push({ text: entry, type: partType });
}
continue;
}
if (!entry || typeof entry !== 'object') {
continue;
}
if (asString(entry.p) === 'status' && asString(entry.v) === 'FINISHED') {
return { parts: [], finished: true, newType };
}
const content = asString(entry.content);
if (!content) {
continue;
}
const t = asString(entry.type).toUpperCase();
if (t === 'THINK' || t === 'THINKING') {
parts.push({ text: content, type: 'thinking' });
} else if (t === 'RESPONSE') {
parts.push({ text: content, type: 'text' });
} else {
parts.push({ text: content, type: partType });
}
const extracted = extractContentRecursive(val, partType);
if (extracted.finished) {
return { parts: [], finished: true, newType };
}
parts.push(...extracted.parts);
return { parts, finished: false, newType };
}
@@ -573,6 +552,80 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType) {
return { parts, finished: false, newType };
}
function extractContentRecursive(items, defaultType) {
const parts = [];
for (const it of items) {
if (!it || typeof it !== 'object') {
continue;
}
if (!Object.prototype.hasOwnProperty.call(it, 'v')) {
continue;
}
const itemPath = asString(it.p);
const itemV = it.v;
if (itemPath === 'status' && asString(itemV) === 'FINISHED') {
return { parts: [], finished: true };
}
if (shouldSkipPath(itemPath)) {
continue;
}
const content = asString(it.content);
if (content) {
const typeName = asString(it.type).toUpperCase();
if (typeName === 'THINK' || typeName === 'THINKING') {
parts.push({ text: content, type: 'thinking' });
} else if (typeName === 'RESPONSE') {
parts.push({ text: content, type: 'text' });
} else {
parts.push({ text: content, type: defaultType });
}
continue;
}
let partType = defaultType;
if (itemPath.includes('thinking')) {
partType = 'thinking';
} else if (itemPath.includes('content') || itemPath === 'response' || itemPath === 'fragments') {
partType = 'text';
}
if (typeof itemV === 'string') {
if (itemV && itemV !== 'FINISHED') {
parts.push({ text: itemV, type: partType });
}
continue;
}
if (!Array.isArray(itemV)) {
continue;
}
for (const inner of itemV) {
if (typeof inner === 'string') {
if (inner) {
parts.push({ text: inner, type: partType });
}
continue;
}
if (!inner || typeof inner !== 'object') {
continue;
}
const ct = asString(inner.content);
if (!ct) {
continue;
}
const typeName = asString(inner.type).toUpperCase();
if (typeName === 'THINK' || typeName === 'THINKING') {
parts.push({ text: ct, type: 'thinking' });
} else if (typeName === 'RESPONSE') {
parts.push({ text: ct, type: 'text' });
} else {
parts.push({ text: ct, type: partType });
}
}
}
return { parts, finished: false };
}
function shouldSkipPath(pathValue) {
if (pathValue === 'response/search_status') {
return true;
@@ -708,3 +761,10 @@ function asString(v) {
}
return String(v).trim();
}
module.exports.__test = {
parseChunkForContent,
extractContentRecursive,
shouldSkipPath,
asString,
};

128
api/chat-stream.test.js Normal file
View File

@@ -0,0 +1,128 @@
'use strict';
const test = require('node:test');
const assert = require('node:assert/strict');
const handler = require('./chat-stream');
const {
createToolSieveState,
processToolSieveChunk,
flushToolSieve,
} = require('./helpers/stream-tool-sieve');
const { parseChunkForContent } = handler.__test;
test('chat-stream exposes parser test hooks', () => {
assert.equal(typeof parseChunkForContent, 'function');
});
test('parseChunkForContent keeps split response/content fragments inside response array', () => {
const chunk = {
p: 'response',
v: [
{ p: 'response/content', v: '{"' },
{ p: 'response/content', v: 'tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}' },
],
};
const parsed = parseChunkForContent(chunk, false, 'text');
assert.equal(parsed.finished, false);
assert.equal(parsed.newType, 'text');
assert.equal(parsed.parts.length, 2);
const combined = parsed.parts.map((p) => p.text).join('');
assert.equal(combined, '{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}');
});
test('parseChunkForContent + sieve does not leak suspicious prefix in split tool json case', () => {
const chunk = {
p: 'response',
v: [
{ p: 'response/content', v: '{"' },
{ p: 'response/content', v: 'tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}' },
],
};
const parsed = parseChunkForContent(chunk, false, 'text');
const state = createToolSieveState();
const events = [];
for (const part of parsed.parts) {
events.push(...processToolSieveChunk(state, part.text, ['read_file']));
}
events.push(...flushToolSieve(state, ['read_file']));
const hasToolCalls = events.some((evt) => evt.type === 'tool_calls' && evt.calls && evt.calls.length > 0);
const leakedText = events
.filter((evt) => evt.type === 'text' && evt.text)
.map((evt) => evt.text)
.join('');
assert.equal(hasToolCalls, true);
assert.equal(leakedText.includes('{'), false);
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
});
test('parseChunkForContent consumes nested item.v array payloads', () => {
const chunk = {
p: 'response',
v: [
{ p: 'response/content', v: ['A', 'B'] },
{ p: 'response/content', v: [{ content: 'C', type: 'RESPONSE' }] },
],
};
const parsed = parseChunkForContent(chunk, false, 'text');
assert.equal(parsed.finished, false);
assert.equal(parsed.parts.map((p) => p.text).join(''), 'ABC');
});
test('parseChunkForContent detects nested status FINISHED in array payload', () => {
const chunk = {
p: 'response',
v: [{ p: 'status', v: 'FINISHED' }],
};
const parsed = parseChunkForContent(chunk, false, 'text');
assert.equal(parsed.finished, true);
assert.deepEqual(parsed.parts, []);
});
test('parseChunkForContent ignores items without v to match Go parser behavior', () => {
const chunk = {
p: 'response',
v: [{ type: 'RESPONSE', content: 'no-v-content' }],
};
const parsed = parseChunkForContent(chunk, false, 'text');
assert.equal(parsed.finished, false);
assert.deepEqual(parsed.parts, []);
});
test('parseChunkForContent handles response/fragments APPEND with thinking and response transitions', () => {
const chunk = {
p: 'response/fragments',
o: 'APPEND',
v: [
{ type: 'THINK', content: '思考中' },
{ type: 'RESPONSE', content: '结论' },
],
};
const parsed = parseChunkForContent(chunk, true, 'thinking');
assert.equal(parsed.finished, false);
assert.equal(parsed.newType, 'text');
assert.deepEqual(parsed.parts, [
{ text: '思考中', type: 'thinking' },
{ text: '结论', type: 'text' },
]);
});
test('parseChunkForContent supports wrapped response.fragments object shape', () => {
const chunk = {
p: 'response',
v: {
response: {
fragments: [
{ type: 'RESPONSE', content: 'A' },
{ type: 'RESPONSE', content: 'B' },
],
},
},
};
const parsed = parseChunkForContent(chunk, false, 'text');
assert.equal(parsed.finished, false);
assert.equal(parsed.parts.map((p) => p.text).join(''), 'AB');
});

View File

@@ -0,0 +1,130 @@
'use strict';
const test = require('node:test');
const assert = require('node:assert/strict');
const {
extractToolNames,
createToolSieveState,
processToolSieveChunk,
flushToolSieve,
parseToolCalls,
} = require('./stream-tool-sieve');
function runSieve(chunks, toolNames) {
const state = createToolSieveState();
const events = [];
for (const chunk of chunks) {
events.push(...processToolSieveChunk(state, chunk, toolNames));
}
events.push(...flushToolSieve(state, toolNames));
return events;
}
function collectText(events) {
return events
.filter((evt) => evt.type === 'text' && evt.text)
.map((evt) => evt.text)
.join('');
}
test('extractToolNames keeps tool mode enabled with unknown fallback', () => {
const names = extractToolNames([
{ function: { description: 'no name tool' } },
{ function: { name: ' read_file ' } },
{},
]);
assert.deepEqual(names, ['unknown', 'read_file', 'unknown']);
});
test('parseToolCalls keeps non-object argument strings as _raw (Go parity)', () => {
const payload = JSON.stringify({
tool_calls: [
{ name: 'read_file', input: '123' },
{ name: 'list_dir', input: '[1,2,3]' },
],
});
const calls = parseToolCalls(payload, ['read_file', 'list_dir']);
assert.deepEqual(calls, [
{ name: 'read_file', input: { _raw: '123' } },
{ name: 'list_dir', input: { _raw: '[1,2,3]' } },
]);
});
test('parseToolCalls still intercepts unknown schema names to avoid leaks', () => {
const payload = JSON.stringify({
tool_calls: [{ name: 'not_in_schema', input: { q: 'go' } }],
});
const calls = parseToolCalls(payload, ['search']);
assert.equal(calls.length, 1);
assert.equal(calls[0].name, 'not_in_schema');
});
test('parseToolCalls supports fenced json and function.arguments string payload', () => {
const text = [
'I will call a tool now.',
'```json',
'{"tool_calls":[{"function":{"name":"read_file","arguments":"{\\"path\\":\\"README.md\\"}"}}]}',
'```',
].join('\n');
const calls = parseToolCalls(text, ['read_file']);
assert.equal(calls.length, 1);
assert.equal(calls[0].name, 'read_file');
assert.deepEqual(calls[0].input, { path: 'README.md' });
});
test('sieve emits tool_calls and does not leak suspicious prefix on late key convergence', () => {
const events = runSieve(
[
'{"',
'tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}',
'后置正文C。',
],
['read_file'],
);
const leakedText = collectText(events);
const hasToolCall = events.some((evt) => evt.type === 'tool_calls' && Array.isArray(evt.calls) && evt.calls.length > 0);
assert.equal(hasToolCall, true);
assert.equal(leakedText.includes('{'), false);
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
assert.equal(leakedText.includes('后置正文C。'), true);
});
test('sieve drops invalid tool json body while preserving surrounding text', () => {
const events = runSieve(
[
'前置正文D。',
"{'tool_calls':[{'name':'read_file','input':{'path':'README.MD'}}]}",
'后置正文E。',
],
['read_file'],
);
const leakedText = collectText(events);
const hasToolCall = events.some((evt) => evt.type === 'tool_calls');
assert.equal(hasToolCall, false);
assert.equal(leakedText.includes('前置正文D。'), true);
assert.equal(leakedText.includes('后置正文E。'), true);
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
});
test('sieve suppresses incomplete captured tool json on stream finalize', () => {
const events = runSieve(
['前置正文F。', '{"tool_calls":[{"name":"read_file"'],
['read_file'],
);
const leakedText = collectText(events);
assert.equal(leakedText.includes('前置正文F。'), true);
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
assert.equal(leakedText.includes('{'), false);
});
test('sieve keeps plain text intact in tool mode when no tool call appears', () => {
const events = runSieve(
['你好,', '这是普通文本回复。', '请继续。'],
['read_file'],
);
const leakedText = collectText(events);
const hasToolCall = events.some((evt) => evt.type === 'tool_calls');
assert.equal(hasToolCall, false);
assert.equal(leakedText, '你好,这是普通文本回复。请继续。');
});

View File

@@ -327,6 +327,7 @@ func (r *Runner) runPreflight(ctx context.Context) error {
{"go", "test", "./...", "-count=1"},
{"node", "--check", "api/chat-stream.js"},
{"node", "--check", "api/helpers/stream-tool-sieve.js"},
{"node", "--test", "api/helpers/stream-tool-sieve.test.js", "api/chat-stream.test.js"},
{"npm", "run", "build", "--prefix", "webui"},
}
f, err := os.OpenFile(r.preflightLog, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644)