feat: add support for CJK angle bracket and trailing attribute separator drift in DSML tool parsing

This commit is contained in:
CJACK
2026-05-10 01:54:31 +08:00
parent 77b6d83266
commit 61d42f8b72
12 changed files with 222 additions and 35 deletions

View File

@@ -129,6 +129,29 @@ test('parseToolCalls parses fullwidth DSML shell drift', () => {
assert.deepEqual(calls[1].input, { file_path: '/Users/aq/Desktop/myproject/Personal_Blog/index.html' });
});
test('parseToolCalls parses CJK-angle DSM drift', () => {
const payload = `<DSMtool_calls>
<DSMinvoke name="Bash">
<DSMparameter name="description">〈![CDATA[Show commits on local dev not on origin/dev]]〉〈/DSMparameter〉
<DSMparameter name="command">〈![CDATA[git log --oneline origin/dev..dev]]〉〈/DSMparameter〉
〈/DSMinvoke〉
<DSMinvoke name="Bash">
<DSMparameter name="description">〈![CDATA[Show commits on origin/dev not on local dev]]〉〈/DSMparameter〉
<DSMparameter name="command">〈![CDATA[git log --oneline dev..origin/dev]]〉〈/DSMparameter〉
〈/DSMinvoke〉
<DSMinvoke name="Bash">
<DSMparameter name="description">〈![CDATA[Check tracking branch status]]〉〈/DSMparameter〉
<DSMparameter name="command">〈![CDATA[git status -b --short]]〉〈/DSMparameter〉
〈/DSMinvoke〉
〈/DSMtool_calls〉`;
const calls = parseToolCalls(payload, ['Bash']);
assert.equal(calls.length, 3);
assert.equal(calls[0].name, 'Bash');
assert.equal(calls[0].input.command, 'git log --oneline origin/dev..dev');
assert.equal(calls[1].input.description, 'Show commits on origin/dev not on local dev');
assert.equal(calls[2].input.command, 'git status -b --short');
});
test('parseToolCalls parses DSML control separator drift', () => {
for (const sep of ['␂', '\x02']) {
const payload = `<DSML${sep}tool_calls>
@@ -523,6 +546,22 @@ test('sieve emits tool_calls for arbitrary-prefixed tool tags', () => {
assert.equal(text.includes('💥'), false);
});
test('sieve emits tool_calls for CJK-angle DSM drift', () => {
const events = runSieve([
'<DSMtool_calls>\n',
'<DSMinvoke name="Bash">\n',
'<DSMparameter name="description">〈![CDATA[Check tracking branch status]]〉〈/DSMparameter〉\n',
'<DSMparameter name="command">〈![CDATA[git status -b --short]]〉〈/DSMparameter〉\n',
'〈/DSMinvoke〉\n',
'〈/DSMtool_calls〉',
], ['Bash']);
const finalCalls = events.flatMap((evt) => (evt.type === 'tool_calls' ? evt.calls : []));
assert.equal(finalCalls.length, 1);
assert.equal(finalCalls[0].name, 'Bash');
assert.equal(finalCalls[0].input.command, 'git status -b --short');
assert.equal(collectText(events), '');
});
test('sieve emits all-empty arbitrary-prefixed tool tags without leaking text', () => {
const payload = [
'<TDSMLtool_calls>\n',