refactor: unify tool markup pipe and CDATA separator into general-purpose separator detector

Replace the hardcoded isToolMarkupPipe (matching |, |, ␂, \x02, !) and
isToolCDATAOpenSeparator (exclusion-based) with a single isToolMarkupSeparator
that treats any Unicode punctuation outside structural characters as a valid
DSML separator. This eliminates the need for a per-character allowlist — novel
separators like ※ are automatically supported without code changes. Also
removes the unused cdataPattern regexp and updates docs to use "non-structural
separator" terminology.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-05-10 04:24:10 +08:00
parent 7a28b9e265
commit 247fc7c788
10 changed files with 78 additions and 57 deletions

View File

@@ -247,6 +247,20 @@ test('parseToolCalls tolerates DSML trailing pipe tag terminator', () => {
assert.deepEqual(calls[0].input, { command: 'find "/home" -type d', timeout: 10 });
});
test('parseToolCalls tolerates DSML trailing novel separator tag terminator', () => {
const payload = [
'<DSMLtool_calls※>',
' <DSMLinvoke name="Bash"※>',
' <DSMLparameter name="command"※><![CDATA[pwd]]></DSMLparameter※>',
' </DSMLinvoke※>',
'</DSMLtool_calls※>',
].join('\n');
const calls = parseToolCalls(payload, ['Bash']);
assert.equal(calls.length, 1);
assert.equal(calls[0].name, 'Bash');
assert.deepEqual(calls[0].input, { command: 'pwd' });
});
test('parseToolCalls tolerates extra leading less-than before DSML tags', () => {
const payload = [
'<<|DSML|tool_calls>',