mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-11 03:37:40 +08:00
feat(toolcall): harden confusable candidate spans
This commit is contained in:
@@ -643,6 +643,16 @@ test('parseChunkForContent strips citation and reference markers from fragment c
|
||||
assert.deepEqual(parsed.parts, [{ text: '广州天气 多云', type: 'text' }]);
|
||||
});
|
||||
|
||||
test('parseChunkForContent strips leaked thought control markers from content', () => {
|
||||
const chunk = {
|
||||
p: 'response/content',
|
||||
v: '<|▁of▁thought|>A<| of_thought |>B<| end_of_thought |>C',
|
||||
};
|
||||
const parsed = parseChunkForContent(chunk, false, 'text');
|
||||
assert.equal(parsed.finished, false);
|
||||
assert.deepEqual(parsed.parts, [{ text: 'ABC', type: 'text' }]);
|
||||
});
|
||||
|
||||
test('parseChunkForContent detects content_filter status and ignores upstream output tokens', () => {
|
||||
const chunk = {
|
||||
p: 'response',
|
||||
|
||||
@@ -57,6 +57,38 @@ test('parseToolCalls parses DSML shell as XML-compatible tool call', () => {
|
||||
assert.deepEqual(calls[0].input, { path: 'README.MD' });
|
||||
});
|
||||
|
||||
test('parseToolCalls tolerates fullwidth closing slash in DSML wrapper', () => {
|
||||
const payload = '<|DSML|tool_calls><|DSML|invoke name="execute_code"><|DSML|parameter name="code"><![CDATA[print("hi")]]></|DSML|parameter></|DSML|invoke></DSML|tool_calls>';
|
||||
const calls = parseToolCalls(payload, ['execute_code']);
|
||||
assert.equal(calls.length, 1);
|
||||
assert.equal(calls[0].name, 'execute_code');
|
||||
assert.deepEqual(calls[0].input, { code: 'print("hi")' });
|
||||
});
|
||||
|
||||
test('parseToolCalls tolerates sentencepiece separator and fullwidth terminator', () => {
|
||||
const payload = '<|DSML▁tool_calls|><|DSML▁invoke▁name="execute_code"><|DSML▁parameter▁name="code"><![CDATA[print("hi")]]></|DSML▁parameter></|DSML▁invoke></|DSML▁tool_calls>';
|
||||
const calls = parseToolCalls(payload, ['execute_code']);
|
||||
assert.equal(calls.length, 1);
|
||||
assert.equal(calls[0].name, 'execute_code');
|
||||
assert.deepEqual(calls[0].input, { code: 'print("hi")' });
|
||||
});
|
||||
|
||||
test('parseToolCalls tolerates fullwidth opening delimiter and Unicode attribute confusables', () => {
|
||||
const payload = '<|DSML tool_calls><|DSML invoke name=“execute_code”><|DSML parameter name=“code”><![CDATA[print("hi")]]></DSML|parameter></DSML|invoke></DSML|tool_calls>';
|
||||
const calls = parseToolCalls(payload, ['execute_code']);
|
||||
assert.equal(calls.length, 1);
|
||||
assert.equal(calls[0].name, 'execute_code');
|
||||
assert.deepEqual(calls[0].input, { code: 'print("hi")' });
|
||||
});
|
||||
|
||||
test('parseToolCalls canonicalizes confusable candidate shell only', () => {
|
||||
const payload = '<|\u200b\uff24\u0405\u039cL|to\u03bfl\uff3fcalls><|\ufeffDSML|inv\u03bfk\u0435 n\u0430me\uff1d\u201cexecute_code\u201d><|\u200bDSML|par\u0430meter n\u0430me\uff1d\u201ccode\u201d><![\ufeff\u0421D\u0410T\u0410[print("hi")]]></|\u200bDSML|par\u0430meter></|\u200bDSML|inv\u03bfk\u0435></|\u200b\uff24\u0405\u039cL|to\u03bfl\uff3fcalls>';
|
||||
const calls = parseToolCalls(payload, ['execute_code']);
|
||||
assert.equal(calls.length, 1);
|
||||
assert.equal(calls[0].name, 'execute_code');
|
||||
assert.deepEqual(calls[0].input, { code: 'print("hi")' });
|
||||
});
|
||||
|
||||
test('parseToolCalls parses hyphenated DSML shell with here-doc CDATA', () => {
|
||||
const payload = `<dsml-tool-calls>
|
||||
<dsml-invoke name="Bash">
|
||||
@@ -344,6 +376,12 @@ test('parseToolCalls ignores collapsed DSML lookalike tag names', () => {
|
||||
assert.equal(calls.length, 0);
|
||||
});
|
||||
|
||||
test('parseToolCalls rejects confusable near-miss tag names', () => {
|
||||
const payload = '<tool_calls><inv\u03bfker name="execute_code"><parameter name="code">pwd</parameter></inv\u03bfker></tool_calls>';
|
||||
const calls = parseToolCalls(payload, ['execute_code']);
|
||||
assert.equal(calls.length, 0);
|
||||
});
|
||||
|
||||
test('parseToolCalls keeps canonical XML examples inside DSML CDATA', () => {
|
||||
const content = '<tool_calls><invoke name="demo"><parameter name="value">x</parameter></invoke></tool_calls>';
|
||||
const payload = `<|DSML|tool_calls><|DSML|invoke name="write_file"><|DSML|parameter name="path">notes.md</|DSML|parameter><|DSML|parameter name="content"><![CDATA[${content}]]></|DSML|parameter></|DSML|invoke></|DSML|tool_calls>`;
|
||||
@@ -360,6 +398,14 @@ test('parseToolCalls preserves simple inline markup inside CDATA as text', () =>
|
||||
assert.equal(calls[0].input.description, '<b>urgent</b>');
|
||||
});
|
||||
|
||||
test('parseToolCalls keeps confusable markup examples inside CDATA as text', () => {
|
||||
const value = '<inv\u03bfke>literal</inv\u03bfke>';
|
||||
const payload = `<tool_calls><invoke name="Write"><parameter name="description"><![\u200b\u0421D\u0410T\u0410[${value}]]></parameter></invoke></tool_calls>`;
|
||||
const calls = parseToolCalls(payload, ['Write']);
|
||||
assert.equal(calls.length, 1);
|
||||
assert.equal(calls[0].input.description, value);
|
||||
});
|
||||
|
||||
test('parseToolCalls recovers when CDATA never closes inside a valid wrapper', () => {
|
||||
const payload = '<tool_calls><invoke name="Write"><parameter name="content"><![CDATA[hello world</parameter></invoke></tool_calls>';
|
||||
const calls = parseToolCalls(payload, ['Write']);
|
||||
@@ -556,6 +602,65 @@ test('sieve emits tool_calls for DSML space-separator typo', () => {
|
||||
assert.equal(text.includes('<|DSML invoke'), false);
|
||||
});
|
||||
|
||||
test('sieve emits tool_calls for fullwidth closing slash and preserves suffix text', () => {
|
||||
const input = '<|DSML|tool_calls><|DSML|invoke name="execute_code"><|DSML|parameter name="code"><![CDATA[print("hi")]]></|DSML|parameter></|DSML|invoke></DSML|tool_calls> sao cụm này lại đc trả là 1 message';
|
||||
const events = runSieve([input], ['execute_code']);
|
||||
const text = collectText(events);
|
||||
const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
assert.equal(finalCalls.length, 1);
|
||||
assert.equal(finalCalls[0].name, 'execute_code');
|
||||
assert.deepEqual(finalCalls[0].input, { code: 'print("hi")' });
|
||||
assert.equal(text, ' sao cụm này lại đc trả là 1 message');
|
||||
});
|
||||
|
||||
test('sieve emits tool_calls for sentencepiece separator and fullwidth terminator', () => {
|
||||
const input = '<|DSML▁tool_calls|><|DSML▁invoke▁name="execute_code"><|DSML▁parameter▁name="code"><![CDATA[print("hi")]]></|DSML▁parameter></|DSML▁invoke></|DSML▁tool_calls> suffix';
|
||||
const events = runSieve([input], ['execute_code']);
|
||||
const text = collectText(events);
|
||||
const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
assert.equal(finalCalls.length, 1);
|
||||
assert.equal(finalCalls[0].name, 'execute_code');
|
||||
assert.deepEqual(finalCalls[0].input, { code: 'print("hi")' });
|
||||
assert.equal(text, ' suffix');
|
||||
});
|
||||
|
||||
test('sieve emits tool_calls for fullwidth opening delimiter and Unicode attribute confusables', () => {
|
||||
const input = '<|DSML tool_calls><|DSML invoke name=“execute_code”><|DSML parameter name=“code”><![CDATA[print("hi")]]></DSML|parameter></DSML|invoke></DSML|tool_calls> suffix';
|
||||
const events = runSieve([input], ['execute_code']);
|
||||
const text = collectText(events);
|
||||
const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
assert.equal(finalCalls.length, 1);
|
||||
assert.equal(finalCalls[0].name, 'execute_code');
|
||||
assert.deepEqual(finalCalls[0].input, { code: 'print("hi")' });
|
||||
assert.equal(text, ' suffix');
|
||||
});
|
||||
|
||||
test('sieve emits tool_calls for confusable candidate shell and preserves suffix text', () => {
|
||||
const input = '<|\u200b\uff24\u0405\u039cL|to\u03bfl\uff3fcalls><|\ufeffDSML|inv\u03bfk\u0435 n\u0430me\uff1d\u201cexecute_code\u201d><|\u200bDSML|par\u0430meter n\u0430me\uff1d\u201ccode\u201d><![\ufeff\u0421D\u0410T\u0410[print("hi")]]></|\u200bDSML|par\u0430meter></|\u200bDSML|inv\u03bfk\u0435></|\u200b\uff24\u0405\u039cL|to\u03bfl\uff3fcalls> suffix';
|
||||
const events = runSieve([input], ['execute_code']);
|
||||
const text = collectText(events);
|
||||
const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
assert.equal(finalCalls.length, 1);
|
||||
assert.equal(finalCalls[0].name, 'execute_code');
|
||||
assert.deepEqual(finalCalls[0].input, { code: 'print("hi")' });
|
||||
assert.equal(text, ' suffix');
|
||||
});
|
||||
|
||||
test('sieve repairs confusable missing opening wrapper and preserves suffix text', () => {
|
||||
const events = runSieve([
|
||||
'<inv\u03bfk\u0435 n\u0430me="read_file">\n',
|
||||
' <par\u0430meter n\u0430me="path"><![\u200b\u0421D\u0410T\u0410[README.md]]></par\u0430meter>\n',
|
||||
'</inv\u03bfk\u0435>\n',
|
||||
'</to\u03bfl_calls> trailing prose',
|
||||
], ['read_file']);
|
||||
const text = collectText(events);
|
||||
const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
assert.equal(finalCalls.length, 1);
|
||||
assert.equal(finalCalls[0].name, 'read_file');
|
||||
assert.deepEqual(finalCalls[0].input, { path: 'README.md' });
|
||||
assert.equal(text, ' trailing prose');
|
||||
});
|
||||
|
||||
test('sieve emits tool_calls for DSML trailing pipe tag terminator', () => {
|
||||
const events = runSieve([
|
||||
'<|DSML|tool_calls| \n',
|
||||
@@ -742,6 +847,14 @@ test('sieve keeps collapsed DSML lookalike tag names as text', () => {
|
||||
assert.equal(collectText(events), input);
|
||||
});
|
||||
|
||||
test('sieve keeps confusable near-miss wrappers as text', () => {
|
||||
const input = '<to\u03bfl_callz><inv\u03bfke name="read_file"><parameter name="path">README.md</parameter></inv\u03bfke></to\u03bfl_callz>';
|
||||
const events = runSieve([input], ['read_file']);
|
||||
const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
assert.equal(finalCalls.length, 0);
|
||||
assert.equal(collectText(events), input);
|
||||
});
|
||||
|
||||
test('sieve preserves review body with alias mentions before real DSML tool calls', () => {
|
||||
const events = runSieve([
|
||||
"Done reviewing the diff. Here's my analysis before we commit:\n\n",
|
||||
|
||||
Reference in New Issue
Block a user