feat: improve CDATA and DSML tag parsing robustness with support for fullwidth-bang, ideographic-comma, and extended quote/separator normalization.

This commit is contained in:
CJACK
2026-05-10 03:41:55 +08:00
parent 61d42f8b72
commit 7a28b9e265
16 changed files with 460 additions and 32 deletions

View File

@@ -152,6 +152,40 @@ test('parseToolCalls parses CJK-angle DSM drift', () => {
assert.equal(calls[2].input.command, 'git status -b --short');
});
test('parseToolCalls parses fullwidth-bang DSML drift', () => {
const payload = `<DSMLtool_calls>
<DSMLinvoke name=“Bash”>
<DSMLparameter name=“command”><[CDATA[lsof -i :4321 -t]]></DSMLparameter>
<DSMLparameter name=“description”><[CDATA[Verify port 4321 is free]]></DSMLparameter>
</DSMLinvoke>
</DSMLtool_calls>`;
const calls = parseToolCalls(payload, ['Bash']);
assert.equal(calls.length, 1);
assert.equal(calls[0].name, 'Bash');
assert.equal(calls[0].input.command, 'lsof -i :4321 -t');
assert.equal(calls[0].input.description, 'Verify port 4321 is free');
});
test('parseToolCalls parses ideographic-comma DSML drift', () => {
const payload = `<、DSML、tool_calls>
<、DSML、invoke name="Bash">
<、DSML、parameter name="command"><、[CDATA[git commit -m "$(cat <<'EOF'
feat: expand fullwidth bang separator and curly quote tolerance in DSML tool parsing
Co-Authored-By: Claude Opus 4.6 noreply@anthropic.com
EOF
)"]]><、/DSML、parameter>
<、DSML、parameter name="description"><、[CDATA[Create commit with staged changes]]><、/DSML、parameter>
<、/DSML、invoke>
<、/DSML、tool_calls>`;
const calls = parseToolCalls(payload, ['Bash']);
assert.equal(calls.length, 1);
assert.equal(calls[0].name, 'Bash');
assert.equal(calls[0].input.command.includes('git commit -m "$(cat <<\'EOF\''), true);
assert.equal(calls[0].input.command.includes('Co-Authored-By: Claude Opus 4.6 noreply@anthropic.com'), true);
assert.equal(calls[0].input.description, 'Create commit with staged changes');
});
test('parseToolCalls parses DSML control separator drift', () => {
for (const sep of ['␂', '\x02']) {
const payload = `<DSML${sep}tool_calls>
@@ -562,6 +596,42 @@ test('sieve emits tool_calls for CJK-angle DSM drift', () => {
assert.equal(collectText(events), '');
});
test('sieve emits tool_calls for fullwidth-bang DSML drift', () => {
const events = runSieve([
'<DSMLtool_calls>\n',
' <DSMLinvoke name=“Bash”>\n',
' <DSMLparameter name=“command”><[CDATA[lsof -i :4321 -t]]></DSMLparameter>\n',
' <DSMLparameter name=“description”><[CDATA[Verify port 4321 is free]]></DSMLparameter>\n',
' </DSMLinvoke>\n',
' </DSMLtool_calls>',
], ['Bash']);
const finalCalls = events.flatMap((evt) => (evt.type === 'tool_calls' ? evt.calls : []));
assert.equal(finalCalls.length, 1);
assert.equal(finalCalls[0].name, 'Bash');
assert.equal(finalCalls[0].input.command, 'lsof -i :4321 -t');
assert.equal(collectText(events), '');
});
test('sieve emits tool_calls for ideographic-comma DSML drift', () => {
const events = runSieve([
'<、DSML、tool_calls>\n',
' <、DSML、invoke name="Bash">\n',
" <、DSML、parameter name=\"command\"><、[CDATA[git commit -m \"$(cat <<'EOF'\n",
'feat: expand fullwidth bang separator and curly quote tolerance in DSML tool parsing\n\n',
'Co-Authored-By: Claude Opus 4.6 noreply@anthropic.com\n',
'EOF\n',
')"]]><、/DSML、parameter>\n',
' <、DSML、parameter name="description"><、[CDATA[Create commit with staged changes]]><、/DSML、parameter>\n',
' <、/DSML、invoke>\n',
'<、/DSML、tool_calls>',
], ['Bash']);
const finalCalls = events.flatMap((evt) => (evt.type === 'tool_calls' ? evt.calls : []));
assert.equal(finalCalls.length, 1);
assert.equal(finalCalls[0].name, 'Bash');
assert.equal(finalCalls[0].input.command.includes('git commit -m'), true);
assert.equal(collectText(events), '');
});
test('sieve emits all-empty arbitrary-prefixed tool tags without leaking text', () => {
const payload = [
'<TDSMLtool_calls>\n',