feat: expand DSML tool-call alias and fence handling

Add support for DSML wrapper aliases (<dsml|tool_calls>, <|tool_calls>,
<|tool_calls>) alongside canonical XML. Normalize mixed DSML/canonical
tags instead of rejecting them. Add tilde fence (~~~) support, fix
nested fence and unclosed fence handling, support CDATA-protected fence
content, and skip prose mentions when scanning for real tool blocks.
Mirror all changes between Go and Node.js runtimes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-04-27 13:39:50 +08:00
parent 90ce595325
commit a13293e113
19 changed files with 1524 additions and 125 deletions

View File

@@ -8,7 +8,7 @@ const {
stripFencedCodeBlocks,
} = require('./parse_payload');
const TOOL_MARKUP_PREFIXES = ['<tool_calls', '<|dsml|tool_calls'];
const TOOL_MARKUP_PREFIXES = ['<tool_calls', '<|dsml|tool_calls', '<dsml|tool_calls', '<tool_calls', '<|tool_calls'];
function extractToolNames(tools) {
if (!Array.isArray(tools) || tools.length === 0) {

View File

@@ -13,7 +13,102 @@ function stripFencedCodeBlocks(text) {
if (!t) {
return '';
}
return t.replace(/```[\s\S]*?```/g, ' ');
const lines = t.split('\n');
const out = [];
let inFence = false;
let fenceChar = '';
let fenceLen = 0;
let inCDATA = false;
let beforeFenceIdx = 0;
for (let li = 0; li < lines.length; li += 1) {
const line = lines[li];
const lineWithNL = li < lines.length - 1 ? line + '\n' : line;
// CDATA protection
if (inCDATA || cdataStartsBeforeFence(line)) {
out.push(lineWithNL);
inCDATA = updateCDATAStateLine(inCDATA, line);
continue;
}
const trimmed = line.replace(/^[ \t]+/, '');
if (!inFence) {
const fence = parseFenceOpenLine(trimmed);
if (fence) {
inFence = true;
fenceChar = fence.ch;
fenceLen = fence.count;
beforeFenceIdx = out.length;
continue;
}
out.push(lineWithNL);
continue;
}
if (isFenceCloseLine(trimmed, fenceChar, fenceLen)) {
inFence = false;
fenceChar = '';
fenceLen = 0;
}
}
if (inFence) {
// Unclosed fence: keep content before the fence started.
if (beforeFenceIdx > 0) {
return out.slice(0, beforeFenceIdx).join('');
}
return '';
}
return out.join('');
}
function parseFenceOpenLine(trimmed) {
if (trimmed.length < 3) return null;
const ch = trimmed[0];
if (ch !== '`' && ch !== '~') return null;
let count = 0;
while (count < trimmed.length && trimmed[count] === ch) count++;
if (count < 3) return null;
return { ch, count };
}
function isFenceCloseLine(trimmed, fenceChar, fenceLen) {
if (!fenceChar || !trimmed || trimmed[0] !== fenceChar) return false;
let count = 0;
while (count < trimmed.length && trimmed[count] === fenceChar) count++;
if (count < fenceLen) return false;
return trimmed.slice(count).trim() === '';
}
function cdataStartsBeforeFence(line) {
const cdataIdx = line.toLowerCase().indexOf('<![cdata[');
if (cdataIdx < 0) return false;
const fenceIdx = Math.min(
line.indexOf('```') >= 0 ? line.indexOf('```') : Infinity,
line.indexOf('~~~') >= 0 ? line.indexOf('~~~') : Infinity,
);
return fenceIdx === Infinity || cdataIdx < fenceIdx;
}
function updateCDATAStateLine(inCDATA, line) {
const lower = line.toLowerCase();
let pos = 0;
let state = inCDATA;
while (pos < lower.length) {
if (state) {
const end = lower.indexOf(']]>', pos);
if (end < 0) return true;
pos = end + ']]>'.length;
state = false;
continue;
}
const start = lower.indexOf('<![cdata[', pos);
if (start < 0) return false;
pos = start + '<![cdata['.length;
state = true;
}
return state;
}
function parseMarkupToolCalls(text) {
@@ -44,12 +139,12 @@ function normalizeDSMLToolCallMarkup(text) {
return { text: '', ok: true };
}
const styles = toolMarkupStylesOutsideIgnored(raw);
if (styles.dsml && styles.canonical) {
return { text: raw, ok: false };
}
if (!styles.dsml) {
return { text: raw, ok: true };
}
// Always normalize DSML aliases to canonical form, even when canonical
// tags coexist. Models frequently mix DSML wrapper tags with canonical
// inner tags (e.g., <tool_calls><invoke name="...">).
return {
text: replaceDSMLToolMarkupOutsideIgnored(raw),
ok: true,
@@ -71,6 +166,24 @@ const DSML_TOOL_MARKUP_ALIASES = [
{ from: '</|dsml|invoke>', to: '</invoke>' },
{ from: '<|dsml|parameter', to: '<parameter' },
{ from: '</|dsml|parameter>', to: '</parameter>' },
{ from: '<dsml|tool_calls', to: '<tool_calls' },
{ from: '</dsml|tool_calls>', to: '</tool_calls>' },
{ from: '<dsml|invoke', to: '<invoke' },
{ from: '</dsml|invoke>', to: '</invoke>' },
{ from: '<dsml|parameter', to: '<parameter' },
{ from: '</dsml|parameter>', to: '</parameter>' },
{ from: '<|tool_calls', to: '<tool_calls' },
{ from: '</|tool_calls>', to: '</tool_calls>' },
{ from: '<|invoke', to: '<invoke' },
{ from: '</|invoke>', to: '</invoke>' },
{ from: '<|parameter', to: '<parameter' },
{ from: '</|parameter>', to: '</parameter>' },
{ from: '<tool_calls', to: '<tool_calls' },
{ from: '</tool_calls>', to: '</tool_calls>' },
{ from: '<invoke', to: '<invoke' },
{ from: '</invoke>', to: '</invoke>' },
{ from: '<parameter', to: '<parameter' },
{ from: '</parameter>', to: '</parameter>' },
];
const CANONICAL_TOOL_MARKUP_PREFIXES = [
@@ -190,7 +303,8 @@ function findXmlElementBlocks(text, tag) {
}
const end = findMatchingXmlEndTagOutsideCDATA(source, name, start.bodyStart);
if (!end) {
break;
pos = start.bodyStart;
continue;
}
out.push({
attrs: start.attrs,

View File

@@ -4,43 +4,77 @@ const { parseToolCalls } = require('./parse');
// XML wrapper tag pair used by the streaming sieve.
const XML_TOOL_TAG_PAIRS = [
{ open: '<|dsml|tool_calls', close: '</|dsml|tool_calls>' },
{ open: '<dsml|tool_calls', close: '</dsml|tool_calls>' },
{ open: '<tool_calls', close: '</tool_calls>' },
{ open: '<|tool_calls', close: '</|tool_calls>' },
{ open: '<tool_calls', close: '</tool_calls>' },
];
const XML_TOOL_OPENING_TAGS = XML_TOOL_TAG_PAIRS.map(p => p.open);
const XML_TOOL_OPENING_TAGS = [
...XML_TOOL_TAG_PAIRS.map(p => p.open),
'<|dsml|invoke', '<dsml|invoke', '<invoke', '<|invoke', '<invoke',
];
function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
const lower = captured.toLowerCase();
// Find the FIRST matching open/close pair for the canonical wrapper.
let anyOpenFound = false;
let best = null;
let rejected = null;
// Scan every wrapper occurrence. Prose can mention a wrapper tag before the
// actual tool block, including the same variant as the real block.
for (const pair of XML_TOOL_TAG_PAIRS) {
const openIdx = lower.indexOf(pair.open);
if (openIdx < 0) {
continue;
}
// Ignore closing tags that appear inside CDATA payloads, such as
// write-file content containing tool-call documentation examples.
const closeIdx = findXMLCloseOutsideCDATA(captured, pair.close, openIdx + pair.open.length);
if (closeIdx < 0) {
// Opening tag present but specific closing tag hasn't arrived.
// Return not-ready so buffering continues until the wrapper closes.
return { ready: false, prefix: '', calls: [], suffix: '' };
}
const closeEnd = closeIdx + pair.close.length;
const xmlBlock = captured.slice(openIdx, closeEnd);
let prefixPart = captured.slice(0, openIdx);
let suffixPart = captured.slice(closeEnd);
const parsed = parseToolCalls(xmlBlock, toolNames);
if (Array.isArray(parsed) && parsed.length > 0) {
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
return {
ready: true,
prefix: trimmedFence.prefix,
calls: parsed,
suffix: trimmedFence.suffix,
};
let searchFrom = 0;
while (searchFrom < lower.length) {
const openIdx = findXMLOpenOutsideCDATA(captured, pair.open, searchFrom);
if (openIdx < 0) {
break;
}
// Ignore closing tags that appear inside CDATA payloads, such as
// write-file content containing tool-call documentation examples.
const closeIdx = findMatchingXMLToolWrapperClose(captured, pair.open, pair.close, openIdx);
if (closeIdx < 0) {
anyOpenFound = true;
searchFrom = openIdx + pair.open.length;
continue;
}
const closeEnd = closeIdx + pair.close.length;
const xmlBlock = captured.slice(openIdx, closeEnd);
let prefixPart = captured.slice(0, openIdx);
let suffixPart = captured.slice(closeEnd);
const parsed = parseToolCalls(xmlBlock, toolNames);
if (Array.isArray(parsed) && parsed.length > 0) {
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
if (!best || openIdx < best.start) {
best = {
start: openIdx,
prefix: trimmedFence.prefix,
calls: parsed,
suffix: trimmedFence.suffix,
};
}
break;
}
if (!rejected || openIdx < rejected.start) {
rejected = {
start: openIdx,
prefix: prefixPart + xmlBlock,
suffix: suffixPart,
};
}
searchFrom = openIdx + pair.open.length;
}
}
if (best) {
return { ready: true, prefix: best.prefix, calls: best.calls, suffix: best.suffix };
}
if (anyOpenFound) {
// At least one opening tag was found but none had a matching close tag.
return { ready: false, prefix: '', calls: [], suffix: '' };
}
if (rejected) {
// If this block failed to become a tool call, pass it through as text.
return { ready: true, prefix: prefixPart + xmlBlock, calls: [], suffix: suffixPart };
return { ready: true, prefix: rejected.prefix, calls: [], suffix: rejected.suffix };
}
if (!containsAnyToolCallWrapper(lower)) {
const found = firstInvokeIndex(lower);
@@ -70,6 +104,89 @@ function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
return { ready: false, prefix: '', calls: [], suffix: '' };
}
function findMatchingXMLToolWrapperClose(s, openTag, closeTag, openIdx) {
const text = typeof s === 'string' ? s : '';
const openTarget = String(openTag || '').toLowerCase();
const closeTarget = String(closeTag || '').toLowerCase();
if (!text || !openTarget || !closeTarget || openIdx < 0) {
return -1;
}
const lower = text.toLowerCase();
let depth = 1;
for (let i = openIdx + openTarget.length; i < text.length;) {
if (lower.startsWith('<![cdata[', i)) {
const end = lower.indexOf(']]>', i + '<![cdata['.length);
if (end < 0) {
return -1;
}
i = end + ']]>'.length;
continue;
}
if (lower.startsWith('<!--', i)) {
const end = lower.indexOf('-->', i + '<!--'.length);
if (end < 0) {
return -1;
}
i = end + '-->'.length;
continue;
}
if (lower.startsWith(closeTarget, i)) {
depth -= 1;
if (depth === 0) {
return i;
}
i += closeTarget.length;
continue;
}
if (lower.startsWith(openTarget, i) && hasXMLToolTagBoundary(text, i + openTarget.length)) {
depth += 1;
i += openTarget.length;
continue;
}
i += 1;
}
return -1;
}
function findXMLOpenOutsideCDATA(s, openTag, start) {
const text = typeof s === 'string' ? s : '';
const target = String(openTag || '').toLowerCase();
if (!text || !target) {
return -1;
}
const lower = text.toLowerCase();
for (let i = Math.max(0, start || 0); i < text.length;) {
if (lower.startsWith('<![cdata[', i)) {
const end = lower.indexOf(']]>', i + '<![cdata['.length);
if (end < 0) {
return -1;
}
i = end + ']]>'.length;
continue;
}
if (lower.startsWith('<!--', i)) {
const end = lower.indexOf('-->', i + '<!--'.length);
if (end < 0) {
return -1;
}
i = end + '-->'.length;
continue;
}
if (lower.startsWith(target, i) && hasXMLToolTagBoundary(text, i + target.length)) {
return i;
}
i += 1;
}
return -1;
}
function hasXMLToolTagBoundary(text, idx) {
if (idx >= text.length) {
return true;
}
return [' ', '\t', '\n', '\r', '>', '/'].includes(text[idx]);
}
function hasOpenXMLToolTag(captured) {
const lower = captured.toLowerCase();
for (const pair of XML_TOOL_TAG_PAIRS) {
@@ -84,12 +201,24 @@ function hasOpenXMLToolTag(captured) {
}
function containsAnyToolCallWrapper(lower) {
return lower.includes('<tool_calls') || lower.includes('<|dsml|tool_calls');
return lower.includes('<tool_calls') ||
lower.includes('<|dsml|tool_calls') ||
lower.includes('<dsml|tool_calls') ||
lower.includes('<tool_calls') ||
lower.includes('<|tool_calls');
}
function firstInvokeIndex(lower) {
const xmlIdx = lower.indexOf('<invoke');
const dsmlIdx = lower.indexOf('<|dsml|invoke');
// Check all DSML-like invoke prefixes.
const dsmlPrefixes = ['<|dsml|invoke', '<dsml|invoke', '<invoke', '<|invoke'];
let dsmlIdx = -1;
for (const prefix of dsmlPrefixes) {
const idx = lower.indexOf(prefix);
if (idx >= 0 && (dsmlIdx < 0 || idx < dsmlIdx)) {
dsmlIdx = idx;
}
}
if (xmlIdx < 0) {
return { index: dsmlIdx, dsml: dsmlIdx >= 0 };
}

View File

@@ -43,6 +43,10 @@ function processToolSieveChunk(state, chunk, toolNames) {
resetIncrementalToolState(state);
if (Array.isArray(consumed.calls) && consumed.calls.length > 0) {
if (consumed.prefix) {
noteText(state, consumed.prefix);
events.push({ type: 'text', text: consumed.prefix });
}
state.pendingToolRaw = captured;
state.pendingToolCalls = consumed.calls;
if (consumed.suffix) {

View File

@@ -7,6 +7,7 @@ function createToolSieveState() {
capturing: false,
codeFenceStack: [],
codeFencePendingTicks: 0,
codeFencePendingTildes: 0,
codeFenceLineStart: true,
pendingToolRaw: '',
pendingToolCalls: [],
@@ -46,8 +47,7 @@ function insideCodeFence(text) {
if (!t) {
return false;
}
const ticks = (t.match(/```/g) || []).length;
return ticks % 2 === 1;
return simulateCodeFenceState([], 0, 0, true, t).stack.length > 0;
}
function insideCodeFenceWithState(state, text) {
@@ -57,6 +57,7 @@ function insideCodeFenceWithState(state, text) {
const simulated = simulateCodeFenceState(
Array.isArray(state.codeFenceStack) ? state.codeFenceStack : [],
Number.isInteger(state.codeFencePendingTicks) ? state.codeFencePendingTicks : 0,
Number.isInteger(state.codeFencePendingTildes) ? state.codeFencePendingTildes : 0,
state.codeFenceLineStart !== false,
text,
);
@@ -70,37 +71,57 @@ function updateCodeFenceState(state, text) {
const next = simulateCodeFenceState(
Array.isArray(state.codeFenceStack) ? state.codeFenceStack : [],
Number.isInteger(state.codeFencePendingTicks) ? state.codeFencePendingTicks : 0,
Number.isInteger(state.codeFencePendingTildes) ? state.codeFencePendingTildes : 0,
state.codeFenceLineStart !== false,
text,
);
state.codeFenceStack = next.stack;
state.codeFencePendingTicks = next.pendingTicks;
state.codeFencePendingTildes = next.pendingTildes;
state.codeFenceLineStart = next.lineStart;
}
function simulateCodeFenceState(stack, pendingTicks, lineStart, text) {
function simulateCodeFenceState(stack, pendingTicks, pendingTildes, lineStart, text) {
const chunk = typeof text === 'string' ? text : '';
const nextStack = Array.isArray(stack) ? [...stack] : [];
let ticks = Number.isInteger(pendingTicks) ? pendingTicks : 0;
let tildes = Number.isInteger(pendingTildes) ? pendingTildes : 0;
let atLineStart = lineStart !== false;
const flushTicks = () => {
const flushPending = () => {
if (ticks > 0) {
if (atLineStart && ticks >= 3) {
applyFenceMarker(nextStack, ticks);
applyFenceMarker(nextStack, ticks); // positive = backtick
}
atLineStart = false;
ticks = 0;
}
if (tildes > 0) {
if (atLineStart && tildes >= 3) {
applyFenceMarker(nextStack, -tildes); // negative = tilde
}
atLineStart = false;
tildes = 0;
}
};
for (let i = 0; i < chunk.length; i += 1) {
const ch = chunk[i];
if (ch === '`') {
if (tildes > 0) {
flushPending();
}
ticks += 1;
continue;
}
flushTicks();
if (ch === '~') {
if (ticks > 0) {
flushPending();
}
tildes += 1;
continue;
}
flushPending();
if (ch === '\n' || ch === '\r') {
atLineStart = true;
continue;
@@ -110,29 +131,37 @@ function simulateCodeFenceState(stack, pendingTicks, lineStart, text) {
}
atLineStart = false;
}
// keep ticks for cross-chunk continuation.
return {
stack: nextStack,
pendingTicks: ticks,
pendingTildes: tildes,
lineStart: atLineStart,
};
}
function applyFenceMarker(stack, ticks) {
// Positive values = backtick fences, negative = tilde fences.
// Closing must match fence type.
function applyFenceMarker(stack, marker) {
if (!Array.isArray(stack)) {
return;
}
if (stack.length === 0) {
stack.push(ticks);
stack.push(marker);
return;
}
const top = stack[stack.length - 1];
if (ticks >= top) {
const sameType = (top > 0 && marker > 0) || (top < 0 && marker < 0);
if (!sameType) {
stack.push(marker);
return;
}
const absMarker = Math.abs(marker);
const absTop = Math.abs(top);
if (absMarker >= absTop) {
stack.pop();
return;
}
// nested/open inner fence using longer marker for robustness.
stack.push(ticks);
stack.push(marker);
}
function hasMeaningfulText(text) {

View File

@@ -2,16 +2,30 @@
const XML_TOOL_SEGMENT_TAGS = [
'<|dsml|tool_calls>', '<|dsml|tool_calls\n', '<|dsml|tool_calls ',
'<|dsml|invoke ', '<|dsml|invoke\n', '<|dsml|invoke\t', '<|dsml|invoke\r',
'<dsml|tool_calls>', '<dsml|tool_calls\n', '<dsml|tool_calls ',
'<dsml|invoke ', '<dsml|invoke\n', '<dsml|invoke\t', '<dsml|invoke\r',
'<tool_calls>', '<tool_calls\n', '<tool_calls ',
'<invoke ', '<invoke\n', '<invoke\t', '<invoke\r',
'<|tool_calls>', '<|tool_calls\n', '<|tool_calls ',
'<|invoke ', '<|invoke\n', '<|invoke\t', '<|invoke\r',
'<tool_calls>', '<tool_calls\n', '<tool_calls ',
'<invoke ', '<invoke\n', '<invoke\t', '<invoke\r',
];
const XML_TOOL_OPENING_TAGS = [
'<|dsml|tool_calls',
'<dsml|tool_calls',
'<tool_calls',
'<|tool_calls',
'<tool_calls',
];
const XML_TOOL_CLOSING_TAGS = [
'</|dsml|tool_calls>',
'</dsml|tool_calls>',
'</tool_calls>',
'</|tool_calls>',
'</tool_calls>',
];