mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-05 00:45:29 +08:00
refactor: generalize DSML tag parsing to tolerate model noise; split tiktoken by build tags
Replace hardcoded DSML typo variant lists in Go/Node tool call parsers with generalized prefix consumption that tolerates repeated leading <, repeated DSML prefix noise, and trailing pipe terminators. Split tiktoken-dependent token counting into a build-tagged file for non-cgo platform compatibility. Add /data directory to Dockerfile for bind-mount permissions. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -57,6 +57,49 @@ test('parseToolCalls parses DSML shell as XML-compatible tool call', () => {
|
||||
assert.deepEqual(calls[0].input, { path: 'README.MD' });
|
||||
});
|
||||
|
||||
test('parseToolCalls tolerates DSML trailing pipe tag terminator', () => {
|
||||
const payload = [
|
||||
'<|DSML|tool_calls| ',
|
||||
' <|DSML|invoke name="terminal">',
|
||||
' <|DSML|parameter name="command"><![CDATA[find "/home" -type d]]></|DSML|parameter>',
|
||||
' <|DSML|parameter name="timeout"><![CDATA[10]]></|DSML|parameter>',
|
||||
' </|DSML|invoke>',
|
||||
'</|DSML|tool_calls>',
|
||||
].join('\n');
|
||||
const calls = parseToolCalls(payload, ['terminal']);
|
||||
assert.equal(calls.length, 1);
|
||||
assert.equal(calls[0].name, 'terminal');
|
||||
assert.deepEqual(calls[0].input, { command: 'find "/home" -type d', timeout: 10 });
|
||||
});
|
||||
|
||||
test('parseToolCalls tolerates extra leading less-than before DSML tags', () => {
|
||||
const payload = [
|
||||
'<<|DSML|tool_calls>',
|
||||
' <<|DSML|invoke name="Bash">',
|
||||
' <<|DSML|parameter name="command"><![CDATA[pwd]]></|DSML|parameter>',
|
||||
' </|DSML|invoke>',
|
||||
'</|DSML|tool_calls>',
|
||||
].join('\n');
|
||||
const calls = parseToolCalls(payload, ['Bash']);
|
||||
assert.equal(calls.length, 1);
|
||||
assert.equal(calls[0].name, 'Bash');
|
||||
assert.deepEqual(calls[0].input, { command: 'pwd' });
|
||||
});
|
||||
|
||||
test('parseToolCalls tolerates repeated DSML prefix noise', () => {
|
||||
const payload = [
|
||||
'<<DSML|DSML|tool_calls>',
|
||||
' <<DSML|DSML|invoke name="Bash">',
|
||||
' <<DSML|DSML|parameter name="command"><![CDATA[git status]]></DSML|DSML|parameter>',
|
||||
' </DSML|DSML|invoke>',
|
||||
'</DSML|DSML|tool_calls>',
|
||||
].join('\n');
|
||||
const calls = parseToolCalls(payload, ['Bash']);
|
||||
assert.equal(calls.length, 1);
|
||||
assert.equal(calls[0].name, 'Bash');
|
||||
assert.deepEqual(calls[0].input, { command: 'git status' });
|
||||
});
|
||||
|
||||
test('parseToolCalls tolerates DSML space-separator typo', () => {
|
||||
const payload = '<|DSML tool_calls><|DSML invoke name="Read"><|DSML parameter name="file_path"><![CDATA[/tmp/input.txt]]></|DSML parameter></|DSML invoke></|DSML tool_calls>';
|
||||
const calls = parseToolCalls(payload, ['Read']);
|
||||
@@ -285,6 +328,39 @@ test('sieve emits tool_calls for DSML space-separator typo', () => {
|
||||
assert.equal(text.includes('<|DSML invoke'), false);
|
||||
});
|
||||
|
||||
test('sieve emits tool_calls for DSML trailing pipe tag terminator', () => {
|
||||
const events = runSieve([
|
||||
'<|DSML|tool_calls| \n',
|
||||
'<|DSML|invoke name="terminal">\n',
|
||||
'<|DSML|parameter name="command"><![CDATA[find "/home" -type d]]></|DSML|parameter>\n',
|
||||
'<|DSML|parameter name="timeout"><![CDATA[10]]></|DSML|parameter>\n',
|
||||
'</|DSML|invoke>\n',
|
||||
'</|DSML|tool_calls>',
|
||||
], ['terminal']);
|
||||
const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
const text = collectText(events);
|
||||
assert.equal(finalCalls.length, 1);
|
||||
assert.equal(finalCalls[0].name, 'terminal');
|
||||
assert.deepEqual(finalCalls[0].input, { command: 'find "/home" -type d', timeout: 10 });
|
||||
assert.equal(text.toLowerCase().includes('dsml'), false);
|
||||
});
|
||||
|
||||
test('sieve emits tool_calls for extra leading less-than DSML tags without leaking prefix', () => {
|
||||
const events = runSieve([
|
||||
'<<|DSML|tool_calls>\n',
|
||||
'<<|DSML|invoke name="Bash">\n',
|
||||
'<<|DSML|parameter name="command"><![CDATA[pwd]]></|DSML|parameter>\n',
|
||||
'</|DSML|invoke>\n',
|
||||
'</|DSML|tool_calls>',
|
||||
], ['Bash']);
|
||||
const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
const text = collectText(events);
|
||||
assert.equal(finalCalls.length, 1);
|
||||
assert.equal(finalCalls[0].name, 'Bash');
|
||||
assert.deepEqual(finalCalls[0].input, { command: 'pwd' });
|
||||
assert.equal(text.includes('<'), false);
|
||||
});
|
||||
|
||||
test('sieve keeps DSML space lookalike tag names as text', () => {
|
||||
const input = '<|DSML tool_calls_extra><|DSML invoke name="Read"><|DSML parameter name="file_path">/tmp/input.txt</|DSML parameter></|DSML invoke></|DSML tool_calls_extra>';
|
||||
const events = runSieve([input], ['Read']);
|
||||
|
||||
Reference in New Issue
Block a user