Tighten XML tool call parsing and upstream empty handling

This commit is contained in:
CJACK
2026-04-26 01:17:16 +08:00
parent a44afb335a
commit 1b0e8cbadb
37 changed files with 273 additions and 991 deletions

View File

@@ -42,7 +42,7 @@ test('extractToolNames keeps only declared tool names (Go parity)', () => {
});
test('parseToolCalls parses XML markup tool call', () => {
const payload = '<tool_call><tool_name>read_file</tool_name><parameters>{"path":"README.MD"}</parameters></tool_call>';
const payload = '<tools><tool_call><tool_name>read_file</tool_name><param>{"path":"README.MD"}</param></tool_call></tools>';
const calls = parseToolCalls(payload, ['read_file']);
assert.equal(calls.length, 1);
assert.equal(calls[0].name, 'read_file');
@@ -61,7 +61,7 @@ test('parseToolCalls ignores tool_call payloads that exist only inside fenced co
const text = [
'I will call a tool now.',
'```xml',
'<tool_call><tool_name>read_file</tool_name><parameters>{"path":"README.md"}</parameters></tool_call>',
'<tools><tool_call><tool_name>read_file</tool_name><param>{"path":"README.md"}</param></tool_call></tools>',
'```',
].join('\n');
const calls = parseToolCalls(text, ['read_file']);
@@ -69,7 +69,7 @@ test('parseToolCalls ignores tool_call payloads that exist only inside fenced co
});
test('parseToolCalls keeps unknown schema names when toolNames is provided', () => {
const payload = '<tool_call><tool_name>not_in_schema</tool_name><parameters>{"q":"go"}</parameters></tool_call>';
const payload = '<tools><tool_call><tool_name>not_in_schema</tool_name><param>{"q":"go"}</param></tool_call></tools>';
const calls = parseToolCalls(payload, ['search']);
assert.equal(calls.length, 1);
assert.equal(calls[0].name, 'not_in_schema');
@@ -77,7 +77,7 @@ test('parseToolCalls keeps unknown schema names when toolNames is provided', ()
test('sieve emits tool_calls for XML tool call payload', () => {
const events = runSieve(
['<tool_call><tool_name>read_file</tool_name><parameters>{"path":"README.MD"}</parameters></tool_call>'],
['<tools><tool_call><tool_name>read_file</tool_name><param>{"path":"README.MD"}</param></tool_call></tools>'],
['read_file'],
);
const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
@@ -88,8 +88,8 @@ test('sieve emits tool_calls for XML tool call payload', () => {
test('sieve emits tool_calls when XML tag spans multiple chunks', () => {
const events = runSieve(
[
'<tool_call><tool_name>read_file</tool_name>',
'<parameters>{"path":"README.MD"}</parameters></tool_call>',
'<tools><tool_call><tool_name>read_file</tool_name>',
'<param>{"path":"README.MD"}</param></tool_call></tools>',
],
['read_file'],
);
@@ -103,10 +103,10 @@ test('sieve keeps long XML tool calls buffered until the closing tag arrives', (
const splitAt = longContent.length / 2;
const events = runSieve(
[
'<tool_calls>\n <tool_call>\n <tool_name>write_to_file</tool_name>\n <parameters>\n <content><![CDATA[',
'<tools>\n <tool_call>\n <tool_name>write_to_file</tool_name>\n <param>\n <content><![CDATA[',
longContent.slice(0, splitAt),
longContent.slice(splitAt),
']]></content>\n </parameters>\n </tool_call>\n</tool_calls>',
']]></content>\n </param>\n </tool_call>\n</tools>',
],
['write_to_file'],
);
@@ -147,7 +147,7 @@ test('sieve keeps embedded invalid tool-like json as normal text to avoid stream
});
test('sieve passes malformed executable-looking XML through as text', () => {
const chunk = '<tool_call><parameters>{"path":"README.MD"}</parameters></tool_call>';
const chunk = '<tools><tool_call><param>{"path":"README.MD"}</param></tool_call></tools>';
const events = runSieve([chunk], ['read_file']);
const leakedText = collectText(events);
const hasToolCalls = events.some((evt) => evt.type === 'tool_calls' && evt.calls?.length > 0);
@@ -159,14 +159,14 @@ test('sieve flushes incomplete captured XML tool blocks by falling back to raw t
const events = runSieve(
[
'前置正文G。',
'<tool_calls>\n',
'<tools>\n',
' <tool_call>\n',
' <tool_name>read_file</tool_name>\n',
],
['read_file'],
);
const leakedText = collectText(events);
const expected = ['前置正文G。', '<tool_calls>\n', ' <tool_call>\n', ' <tool_name>read_file</tool_name>\n'].join('');
const expected = ['前置正文G。', '<tools>\n', ' <tool_call>\n', ' <tool_name>read_file</tool_name>\n'].join('');
const hasToolCalls = events.some((evt) => evt.type === 'tool_calls' && evt.calls?.length > 0);
assert.equal(hasToolCalls, false);
assert.equal(leakedText, expected);
@@ -176,7 +176,7 @@ test('sieve captures XML wrapper tags with attributes without leaking wrapper te
const events = runSieve(
[
'前置正文H。',
'<tool_calls id="x"><tool_call><tool_name>read_file</tool_name><parameters>{"path":"README.MD"}</parameters></tool_call></tool_calls>',
'<tools id="x"><tool_call><tool_name>read_file</tool_name><param>{"path":"README.MD"}</param></tool_call></tools>',
'后置正文I。',
],
['read_file'],
@@ -186,8 +186,8 @@ test('sieve captures XML wrapper tags with attributes without leaking wrapper te
assert.equal(hasToolCall, true);
assert.equal(leakedText.includes('前置正文H。'), true);
assert.equal(leakedText.includes('后置正文I。'), true);
assert.equal(leakedText.includes('<tool_calls id=\"x\">'), false);
assert.equal(leakedText.includes('</tool_calls>'), false);
assert.equal(leakedText.includes('<tools id=\"x\">'), false);
assert.equal(leakedText.includes('</tools>'), false);
});
test('sieve keeps plain text intact in tool mode when no tool call appears', () => {
@@ -270,7 +270,7 @@ test('formatOpenAIStreamToolCalls reuses ids with the same idStore', () => {
});
test('parseToolCalls rejects mismatched markup tags', () => {
const payload = '<tool_call><name>read_file</function><arguments>{"path":"README.md"}</arguments></tool_call>';
const payload = '<tools><tool_call><tool_name>read_file</function><param>{"path":"README.md"}</param></tool_call></tools>';
const calls = parseToolCalls(payload, ['read_file']);
assert.equal(calls.length, 0);
});