refactor: Improve chat stream content and tool call parsing with a new recursive extraction function and dedicated tests.

2026-05-19 15:37:44 +08:00 · 2026-02-17 14:35:24 +08:00
parent 5106773573
commit 05422b2449
5 changed files with 349 additions and 25 deletions
--- a/api/helpers/stream-tool-sieve.test.js
+++ b/api/helpers/stream-tool-sieve.test.js
@@ -0,0 +1,130 @@
+'use strict';
+
+const test = require('node:test');
+const assert = require('node:assert/strict');
+
+const {
+  extractToolNames,
+  createToolSieveState,
+  processToolSieveChunk,
+  flushToolSieve,
+  parseToolCalls,
+} = require('./stream-tool-sieve');
+
+function runSieve(chunks, toolNames) {
+  const state = createToolSieveState();
+  const events = [];
+  for (const chunk of chunks) {
+    events.push(...processToolSieveChunk(state, chunk, toolNames));
+  }
+  events.push(...flushToolSieve(state, toolNames));
+  return events;
+}
+
+function collectText(events) {
+  return events
+    .filter((evt) => evt.type === 'text' && evt.text)
+    .map((evt) => evt.text)
+    .join('');
+}
+
+test('extractToolNames keeps tool mode enabled with unknown fallback', () => {
+  const names = extractToolNames([
+    { function: { description: 'no name tool' } },
+    { function: { name: ' read_file ' } },
+    {},
+  ]);
+  assert.deepEqual(names, ['unknown', 'read_file', 'unknown']);
+});
+
+test('parseToolCalls keeps non-object argument strings as _raw (Go parity)', () => {
+  const payload = JSON.stringify({
+    tool_calls: [
+      { name: 'read_file', input: '123' },
+      { name: 'list_dir', input: '[1,2,3]' },
+    ],
+  });
+  const calls = parseToolCalls(payload, ['read_file', 'list_dir']);
+  assert.deepEqual(calls, [
+    { name: 'read_file', input: { _raw: '123' } },
+    { name: 'list_dir', input: { _raw: '[1,2,3]' } },
+  ]);
+});
+
+test('parseToolCalls still intercepts unknown schema names to avoid leaks', () => {
+  const payload = JSON.stringify({
+    tool_calls: [{ name: 'not_in_schema', input: { q: 'go' } }],
+  });
+  const calls = parseToolCalls(payload, ['search']);
+  assert.equal(calls.length, 1);
+  assert.equal(calls[0].name, 'not_in_schema');
+});
+
+test('parseToolCalls supports fenced json and function.arguments string payload', () => {
+  const text = [
+    'I will call a tool now.',
+    '```json',
+    '{"tool_calls":[{"function":{"name":"read_file","arguments":"{\\"path\\":\\"README.md\\"}"}}]}',
+    '```',
+  ].join('\n');
+  const calls = parseToolCalls(text, ['read_file']);
+  assert.equal(calls.length, 1);
+  assert.equal(calls[0].name, 'read_file');
+  assert.deepEqual(calls[0].input, { path: 'README.md' });
+});
+
+test('sieve emits tool_calls and does not leak suspicious prefix on late key convergence', () => {
+  const events = runSieve(
+    [
+      '{"',
+      'tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}',
+      '后置正文C。',
+    ],
+    ['read_file'],
+  );
+  const leakedText = collectText(events);
+  const hasToolCall = events.some((evt) => evt.type === 'tool_calls' && Array.isArray(evt.calls) && evt.calls.length > 0);
+  assert.equal(hasToolCall, true);
+  assert.equal(leakedText.includes('{'), false);
+  assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
+  assert.equal(leakedText.includes('后置正文C。'), true);
+});
+
+test('sieve drops invalid tool json body while preserving surrounding text', () => {
+  const events = runSieve(
+    [
+      '前置正文D。',
+      "{'tool_calls':[{'name':'read_file','input':{'path':'README.MD'}}]}",
+      '后置正文E。',
+    ],
+    ['read_file'],
+  );
+  const leakedText = collectText(events);
+  const hasToolCall = events.some((evt) => evt.type === 'tool_calls');
+  assert.equal(hasToolCall, false);
+  assert.equal(leakedText.includes('前置正文D。'), true);
+  assert.equal(leakedText.includes('后置正文E。'), true);
+  assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
+});
+
+test('sieve suppresses incomplete captured tool json on stream finalize', () => {
+  const events = runSieve(
+    ['前置正文F。', '{"tool_calls":[{"name":"read_file"'],
+    ['read_file'],
+  );
+  const leakedText = collectText(events);
+  assert.equal(leakedText.includes('前置正文F。'), true);
+  assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
+  assert.equal(leakedText.includes('{'), false);
+});
+
+test('sieve keeps plain text intact in tool mode when no tool call appears', () => {
+  const events = runSieve(
+    ['你好，', '这是普通文本回复。', '请继续。'],
+    ['read_file'],
+  );
+  const leakedText = collectText(events);
+  const hasToolCall = events.some((evt) => evt.type === 'tool_calls');
+  assert.equal(hasToolCall, false);
+  assert.equal(leakedText, '你好，这是普通文本回复。请继续。');
+});