refactor: unify Go/Node XML tool markup scanning and expand DSML alias support

- Add shared ToolMarkupTag scanner (toolcalls_scan.go) replacing hardcoded alias tables
- Support DSML collapsed tag names (<DSMLtool_calls>, <DSMLinvoke>, <DSMLparameter>)
- Parse JSON literal values from parameter bodies (123→number, true→bool, null)
- Recover unclosed CDATA in final parse/flush via SanitizeLooseCDATA
- Align Go and Node implementations (scanToolMarkupTagAt, findMatchingToolMarkupClose)
- Reject bare <invoke> as unsupported syntax, only tool_calls wrapper triggers tool path
- Update API.md and toolcall-semantics.md documentation

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-04-27 17:53:59 +08:00
parent 70467054c3
commit 2d5d211a7a
21 changed files with 1132 additions and 777 deletions

View File

@@ -6,10 +6,10 @@ const {
const {
parseMarkupToolCalls,
stripFencedCodeBlocks,
containsToolCallWrapperSyntaxOutsideIgnored,
sanitizeLooseCDATA,
} = require('./parse_payload');
const TOOL_MARKUP_PREFIXES = ['<tool_calls', '<|dsml|tool_calls', '<|dsml tool_calls', '<dsml|tool_calls', '<dsml tool_calls', '<tool_calls', '<|tool_calls'];
function extractToolNames(tools) {
if (!Array.isArray(tools) || tools.length === 0) {
return [];
@@ -46,7 +46,13 @@ function parseToolCallsDetailed(text, toolNames) {
return result;
}
// XML markup parsing only.
const parsed = parseMarkupToolCalls(normalized);
let parsed = parseMarkupToolCalls(normalized);
if (parsed.length === 0 && normalized.toLowerCase().includes('<![cdata[')) {
const recovered = sanitizeLooseCDATA(normalized);
if (recovered !== normalized) {
parsed = parseMarkupToolCalls(recovered);
}
}
if (parsed.length === 0) {
return result;
}
@@ -73,7 +79,13 @@ function parseStandaloneToolCallsDetailed(text, toolNames) {
return result;
}
// XML markup parsing only.
const parsed = parseMarkupToolCalls(trimmed);
let parsed = parseMarkupToolCalls(trimmed);
if (parsed.length === 0 && trimmed.toLowerCase().includes('<![cdata[')) {
const recovered = sanitizeLooseCDATA(trimmed);
if (recovered !== trimmed) {
parsed = parseMarkupToolCalls(recovered);
}
}
if (parsed.length === 0) {
return result;
}
@@ -110,8 +122,8 @@ function filterToolCallsDetailed(parsed, toolNames) {
}
function looksLikeToolCallSyntax(text) {
const lower = toStringSafe(text).toLowerCase();
return TOOL_MARKUP_PREFIXES.some((prefix) => lower.includes(prefix));
const styles = containsToolCallWrapperSyntaxOutsideIgnored(text);
return styles.dsml || styles.canonical;
}
function shouldSkipToolCallParsingForCodeFenceExample(text) {

View File

@@ -3,6 +3,7 @@
const TOOL_CALL_MARKUP_KV_PATTERN = /<(?:[a-z0-9_:-]+:)?([a-z0-9_.-]+)\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi;
const CDATA_PATTERN = /^<!\[CDATA\[([\s\S]*?)]]>$/i;
const XML_ATTR_PATTERN = /\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')/gi;
const TOOL_MARKUP_NAMES = ['tool_calls', 'invoke', 'parameter'];
const {
toStringSafe,
@@ -138,13 +139,10 @@ function normalizeDSMLToolCallMarkup(text) {
if (!raw) {
return { text: '', ok: true };
}
const styles = toolMarkupStylesOutsideIgnored(raw);
const styles = containsToolMarkupSyntaxOutsideIgnored(raw);
if (!styles.dsml) {
return { text: raw, ok: true };
}
// Always normalize DSML aliases to canonical form, even when canonical
// tags coexist. Models frequently mix DSML wrapper tags with canonical
// inner tags (e.g., <tool_calls><invoke name="...">).
return {
text: replaceDSMLToolMarkupOutsideIgnored(raw),
ok: true,
@@ -152,65 +150,21 @@ function normalizeDSMLToolCallMarkup(text) {
}
function containsDSMLToolMarkup(text) {
return toolMarkupStylesOutsideIgnored(text).dsml;
return containsToolMarkupSyntaxOutsideIgnored(text).dsml;
}
function containsCanonicalToolMarkup(text) {
return toolMarkupStylesOutsideIgnored(text).canonical;
return containsToolMarkupSyntaxOutsideIgnored(text).canonical;
}
const DSML_TOOL_MARKUP_ALIASES = [
{ from: '<|dsml|tool_calls', to: '<tool_calls' },
{ from: '</|dsml|tool_calls>', to: '</tool_calls>' },
{ from: '<|dsml|invoke', to: '<invoke' },
{ from: '</|dsml|invoke>', to: '</invoke>' },
{ from: '<|dsml|parameter', to: '<parameter' },
{ from: '</|dsml|parameter>', to: '</parameter>' },
{ from: '<|dsml tool_calls', to: '<tool_calls' },
{ from: '</|dsml tool_calls>', to: '</tool_calls>' },
{ from: '<|dsml invoke', to: '<invoke' },
{ from: '</|dsml invoke>', to: '</invoke>' },
{ from: '<|dsml parameter', to: '<parameter' },
{ from: '</|dsml parameter>', to: '</parameter>' },
{ from: '<dsml tool_calls', to: '<tool_calls' },
{ from: '</dsml tool_calls>', to: '</tool_calls>' },
{ from: '<dsml invoke', to: '<invoke' },
{ from: '</dsml invoke>', to: '</invoke>' },
{ from: '<dsml parameter', to: '<parameter' },
{ from: '</dsml parameter>', to: '</parameter>' },
{ from: '<dsml|tool_calls', to: '<tool_calls' },
{ from: '</dsml|tool_calls>', to: '</tool_calls>' },
{ from: '<dsml|invoke', to: '<invoke' },
{ from: '</dsml|invoke>', to: '</invoke>' },
{ from: '<dsml|parameter', to: '<parameter' },
{ from: '</dsml|parameter>', to: '</parameter>' },
{ from: '<|tool_calls', to: '<tool_calls' },
{ from: '</|tool_calls>', to: '</tool_calls>' },
{ from: '<|invoke', to: '<invoke' },
{ from: '</|invoke>', to: '</invoke>' },
{ from: '<|parameter', to: '<parameter' },
{ from: '</|parameter>', to: '</parameter>' },
{ from: '<tool_calls', to: '<tool_calls' },
{ from: '</tool_calls>', to: '</tool_calls>' },
{ from: '<invoke', to: '<invoke' },
{ from: '</invoke>', to: '</invoke>' },
{ from: '<parameter', to: '<parameter' },
{ from: '</parameter>', to: '</parameter>' },
];
const CANONICAL_TOOL_MARKUP_PREFIXES = [
'<tool_calls',
'</tool_calls>',
'<invoke',
'</invoke>',
'<parameter',
'</parameter>',
];
function toolMarkupStylesOutsideIgnored(text) {
const lower = toStringSafe(text).toLowerCase();
function containsToolCallWrapperSyntaxOutsideIgnored(text) {
const raw = toStringSafe(text);
const styles = { dsml: false, canonical: false };
for (let i = 0; i < lower.length;) {
if (!raw) {
return styles;
}
const lower = raw.toLowerCase();
for (let i = 0; i < raw.length;) {
const skipped = skipXmlIgnoredSection(lower, i);
if (skipped.blocked) {
return styles;
@@ -219,15 +173,55 @@ function toolMarkupStylesOutsideIgnored(text) {
i = skipped.next;
continue;
}
if (CANONICAL_TOOL_MARKUP_PREFIXES.some(prefix => lower.startsWith(prefix, i))) {
styles.canonical = true;
const tag = scanToolMarkupTagAt(raw, i);
if (tag) {
if (tag.name !== 'tool_calls') {
i = tag.end + 1;
continue;
}
if (tag.dsmlLike) {
styles.dsml = true;
} else {
styles.canonical = true;
}
if (styles.dsml && styles.canonical) {
return styles;
}
i = tag.end + 1;
continue;
}
if (DSML_TOOL_MARKUP_ALIASES.some(alias => lower.startsWith(alias.from, i))) {
styles.dsml = true;
}
if (styles.dsml && styles.canonical) {
i += 1;
}
return styles;
}
function containsToolMarkupSyntaxOutsideIgnored(text) {
const raw = toStringSafe(text);
const styles = { dsml: false, canonical: false };
if (!raw) {
return styles;
}
for (let i = 0; i < raw.length;) {
const skipped = skipXmlIgnoredSection(raw.toLowerCase(), i);
if (skipped.blocked) {
return styles;
}
if (skipped.advanced) {
i = skipped.next;
continue;
}
const tag = scanToolMarkupTagAt(raw, i);
if (tag) {
if (tag.dsmlLike) {
styles.dsml = true;
} else {
styles.canonical = true;
}
if (styles.dsml && styles.canonical) {
return styles;
}
i = tag.end + 1;
continue;
}
i += 1;
}
return styles;
@@ -235,6 +229,9 @@ function toolMarkupStylesOutsideIgnored(text) {
function replaceDSMLToolMarkupOutsideIgnored(text) {
const raw = toStringSafe(text);
if (!raw) {
return '';
}
const lower = raw.toLowerCase();
let out = '';
for (let i = 0; i < raw.length;) {
@@ -248,10 +245,14 @@ function replaceDSMLToolMarkupOutsideIgnored(text) {
i = skipped.next;
continue;
}
const alias = DSML_TOOL_MARKUP_ALIASES.find(item => lower.startsWith(item.from, i));
if (alias) {
out += alias.to;
i += alias.from.length;
const tag = scanToolMarkupTagAt(raw, i);
if (tag) {
if (tag.dsmlLike) {
out += `<${tag.closing ? '/' : ''}${tag.name}${raw.slice(tag.nameEnd, tag.end + 1)}`;
} else {
out += raw.slice(tag.start, tag.end + 1);
}
i = tag.end + 1;
continue;
}
out += raw[i];
@@ -417,6 +418,150 @@ function skipXmlIgnoredSection(lower, i) {
return { advanced: false, blocked: false, next: i };
}
function scanToolMarkupTagAt(text, start) {
const raw = toStringSafe(text);
if (!raw || start < 0 || start >= raw.length || raw[start] !== '<') {
return null;
}
const lower = raw.toLowerCase();
let i = start + 1;
const closing = raw[i] === '/';
if (closing) {
i += 1;
}
let dsmlLike = false;
if (i < raw.length && isToolMarkupPipe(raw[i])) {
dsmlLike = true;
i += 1;
}
if (lower.startsWith('dsml', i)) {
dsmlLike = true;
i += 'dsml'.length;
while (i < raw.length && isToolMarkupSeparator(raw[i])) {
i += 1;
}
}
const { name, len } = matchToolMarkupName(lower, i);
if (!name) {
return null;
}
const nameEnd = i + len;
if (!hasXmlTagBoundary(raw, nameEnd)) {
return null;
}
const end = findXmlTagEnd(raw, nameEnd);
if (end < 0) {
return null;
}
return {
start,
end,
nameStart: i,
nameEnd,
name,
closing,
selfClosing: raw.slice(start, end + 1).trim().endsWith('/>'),
dsmlLike,
canonical: !dsmlLike,
};
}
function findToolMarkupTagOutsideIgnored(text, from) {
const raw = toStringSafe(text);
const lower = raw.toLowerCase();
for (let i = Math.max(0, from || 0); i < raw.length;) {
const skipped = skipXmlIgnoredSection(lower, i);
if (skipped.blocked) {
return null;
}
if (skipped.advanced) {
i = skipped.next;
continue;
}
const tag = scanToolMarkupTagAt(raw, i);
if (tag) {
return tag;
}
i += 1;
}
return null;
}
function findMatchingToolMarkupClose(text, openTag) {
const raw = toStringSafe(text);
if (!raw || !openTag || !openTag.name || openTag.closing) {
return null;
}
let depth = 1;
for (let pos = openTag.end + 1; pos < raw.length;) {
const tag = findToolMarkupTagOutsideIgnored(raw, pos);
if (!tag) {
return null;
}
if (tag.name !== openTag.name) {
pos = tag.end + 1;
continue;
}
if (tag.closing) {
depth -= 1;
if (depth === 0) {
return tag;
}
} else if (!tag.selfClosing) {
depth += 1;
}
pos = tag.end + 1;
}
return null;
}
function findPartialToolMarkupStart(text) {
const raw = toStringSafe(text);
const lastLT = raw.lastIndexOf('<');
if (lastLT < 0) {
return -1;
}
const tail = raw.slice(lastLT);
if (tail.includes('>')) {
return -1;
}
const lowerTail = tail.toLowerCase();
const candidates = [
'<tool_calls', '<invoke', '<parameter',
'<|tool_calls', '<|invoke', '<|parameter',
'<tool_calls', '<invoke', '<parameter',
'<|dsml|tool_calls', '<|dsml|invoke', '<|dsml|parameter',
'<dsmltool_calls', '<dsmlinvoke', '<dsmlparameter',
'<dsml tool_calls', '<dsml invoke', '<dsml parameter',
'<dsml|tool_calls', '<dsml|invoke', '<dsml|parameter',
'<|dsmltool_calls', '<|dsmlinvoke', '<|dsmlparameter',
'<|dsml tool_calls', '<|dsml invoke', '<|dsml parameter',
];
for (const candidate of candidates) {
if (candidate.startsWith(lowerTail)) {
return lastLT;
}
}
return -1;
}
function isToolMarkupPipe(ch) {
return ch === '|' || ch === '';
}
function isToolMarkupSeparator(ch) {
return ch === ' ' || ch === '\t' || ch === '\r' || ch === '\n' || isToolMarkupPipe(ch);
}
function matchToolMarkupName(lower, start) {
for (const name of TOOL_MARKUP_NAMES) {
if (lower.startsWith(name, start)) {
return { name, len: name.length };
}
}
return { name: '', len: 0 };
}
function findXmlTagEnd(text, from) {
let quote = '';
for (let i = Math.max(0, from || 0); i < text.length; i += 1) {
@@ -494,7 +639,8 @@ function parseMarkupKVObject(text) {
function parseMarkupValue(raw) {
const cdata = extractStandaloneCDATA(raw);
if (cdata.ok) {
return cdata.value;
const literal = parseJSONLiteralValue(cdata.value);
return literal.ok ? literal.value : cdata.value;
}
const s = toStringSafe(extractRawTagValue(raw)).trim();
if (!s) {
@@ -511,12 +657,9 @@ function parseMarkupValue(raw) {
}
}
if (s.startsWith('{') || s.startsWith('[')) {
try {
return JSON.parse(s);
} catch (_err) {
return s;
}
const literal = parseJSONLiteralValue(s);
if (literal.ok) {
return literal.value;
}
return s;
}
@@ -554,9 +697,65 @@ function extractStandaloneCDATA(inner) {
if (cdataMatch && cdataMatch[1] !== undefined) {
return { ok: true, value: cdataMatch[1] };
}
if (s.toLowerCase().startsWith('<![cdata[')) {
return { ok: true, value: s.slice('<![CDATA['.length) };
}
return { ok: false, value: '' };
}
function parseJSONLiteralValue(raw) {
const s = toStringSafe(raw).trim();
if (!s) {
return { ok: false, value: null };
}
if (!['{', '[', '"', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 't', 'f', 'n'].includes(s[0])) {
return { ok: false, value: null };
}
try {
return { ok: true, value: JSON.parse(s) };
} catch (_err) {
return { ok: false, value: null };
}
}
function sanitizeLooseCDATA(text) {
const raw = toStringSafe(text);
if (!raw) {
return '';
}
const lower = raw.toLowerCase();
const openMarker = '<![cdata[';
const closeMarker = ']]>';
let out = '';
let pos = 0;
let changed = false;
while (pos < raw.length) {
const startRel = lower.indexOf(openMarker, pos);
if (startRel < 0) {
out += raw.slice(pos);
break;
}
const start = startRel;
const contentStart = start + openMarker.length;
out += raw.slice(pos, start);
const endRel = lower.indexOf(closeMarker, contentStart);
if (endRel >= 0) {
const end = endRel + closeMarker.length;
out += raw.slice(start, end);
pos = end;
continue;
}
changed = true;
out += raw.slice(contentStart);
pos = raw.length;
}
return changed ? out : raw;
}
function parseTagAttributes(raw) {
const source = toStringSafe(raw);
const out = {};
@@ -631,4 +830,10 @@ module.exports = {
stripFencedCodeBlocks,
parseMarkupToolCalls,
normalizeDSMLToolCallMarkup,
containsToolMarkupSyntaxOutsideIgnored,
containsToolCallWrapperSyntaxOutsideIgnored,
findToolMarkupTagOutsideIgnored,
findMatchingToolMarkupClose,
findPartialToolMarkupStart,
sanitizeLooseCDATA,
};

View File

@@ -1,71 +1,53 @@
'use strict';
const { parseToolCalls } = require('./parse');
// XML wrapper tag pair used by the streaming sieve.
const XML_TOOL_TAG_PAIRS = [
{ open: '<|dsml|tool_calls', close: '</|dsml|tool_calls>' },
{ open: '<|dsml tool_calls', close: '</|dsml tool_calls>' },
{ open: '<dsml|tool_calls', close: '</dsml|tool_calls>' },
{ open: '<dsml tool_calls', close: '</dsml tool_calls>' },
{ open: '<tool_calls', close: '</tool_calls>' },
{ open: '<|tool_calls', close: '</|tool_calls>' },
{ open: '<tool_calls', close: '</tool_calls>' },
];
const XML_TOOL_OPENING_TAGS = [
...XML_TOOL_TAG_PAIRS.map(p => p.open),
'<|dsml|invoke', '<|dsml invoke', '<dsml|invoke', '<dsml invoke', '<invoke', '<|invoke', '<invoke',
];
const {
findToolMarkupTagOutsideIgnored,
findMatchingToolMarkupClose,
findPartialToolMarkupStart,
} = require('./parse_payload');
function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
const lower = captured.toLowerCase();
let anyOpenFound = false;
let best = null;
let rejected = null;
// Scan every wrapper occurrence. Prose can mention a wrapper tag before the
// actual tool block, including the same variant as the real block.
for (const pair of XML_TOOL_TAG_PAIRS) {
let searchFrom = 0;
while (searchFrom < lower.length) {
const openIdx = findXMLOpenOutsideCDATA(captured, pair.open, searchFrom);
if (openIdx < 0) {
break;
}
// Ignore closing tags that appear inside CDATA payloads, such as
// write-file content containing tool-call documentation examples.
const closeIdx = findMatchingXMLToolWrapperClose(captured, pair.open, pair.close, openIdx);
if (closeIdx < 0) {
anyOpenFound = true;
searchFrom = openIdx + pair.open.length;
continue;
}
const closeEnd = closeIdx + pair.close.length;
const xmlBlock = captured.slice(openIdx, closeEnd);
let prefixPart = captured.slice(0, openIdx);
let suffixPart = captured.slice(closeEnd);
const parsed = parseToolCalls(xmlBlock, toolNames);
if (Array.isArray(parsed) && parsed.length > 0) {
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
if (!best || openIdx < best.start) {
best = {
start: openIdx,
prefix: trimmedFence.prefix,
calls: parsed,
suffix: trimmedFence.suffix,
};
}
break;
}
if (!rejected || openIdx < rejected.start) {
rejected = {
start: openIdx,
prefix: prefixPart + xmlBlock,
suffix: suffixPart,
// Scan every recognized wrapper occurrence. Prose can mention a wrapper tag
// before the actual tool block, including the same variant as the real block.
for (let searchFrom = 0; searchFrom < captured.length;) {
const openTag = findFirstToolTag(captured, searchFrom, 'tool_calls', false);
if (!openTag) {
break;
}
const closeTag = findMatchingToolMarkupClose(captured, openTag);
if (!closeTag) {
anyOpenFound = true;
searchFrom = openTag.end + 1;
continue;
}
const xmlBlock = captured.slice(openTag.start, closeTag.end + 1);
const prefixPart = captured.slice(0, openTag.start);
const suffixPart = captured.slice(closeTag.end + 1);
const parsed = parseToolCalls(xmlBlock, toolNames);
if (Array.isArray(parsed) && parsed.length > 0) {
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
if (!best || openTag.start < best.start) {
best = {
start: openTag.start,
prefix: trimmedFence.prefix,
calls: parsed,
suffix: trimmedFence.suffix,
};
}
searchFrom = openIdx + pair.open.length;
break;
}
if (!rejected || openTag.start < rejected.start) {
rejected = {
start: openTag.start,
prefix: prefixPart + xmlBlock,
suffix: suffixPart,
};
}
searchFrom = openTag.end + 1;
}
if (best) {
return { ready: true, prefix: best.prefix, calls: best.calls, suffix: best.suffix };
@@ -78,17 +60,15 @@ function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
// If this block failed to become a tool call, pass it through as text.
return { ready: true, prefix: rejected.prefix, calls: [], suffix: rejected.suffix };
}
if (!containsAnyToolCallWrapper(lower)) {
const found = firstInvokeIndex(lower);
if (found.index >= 0) {
const closeTag = found.dsml ? '</|dsml|tool_calls>' : '</tool_calls>';
const openWrapper = found.dsml ? '<|DSML|tool_calls>' : '<tool_calls>';
const closeIdx = findXMLCloseOutsideCDATA(captured, closeTag, found.index);
if (closeIdx > found.index) {
const closeEnd = closeIdx + closeTag.length;
const xmlBlock = openWrapper + captured.slice(found.index, closeIdx) + closeTag;
let prefixPart = captured.slice(0, found.index);
let suffixPart = captured.slice(closeEnd);
const invokeTag = findFirstToolTag(captured, 0, 'invoke', false);
if (invokeTag) {
const wrapperOpen = findFirstToolTag(captured, 0, 'tool_calls', false);
if (!wrapperOpen || wrapperOpen.start > invokeTag.start) {
const closeTag = findFirstToolTag(captured, invokeTag.start + 1, 'tool_calls', true);
if (closeTag && closeTag.start > invokeTag.start) {
const xmlBlock = '<tool_calls>' + captured.slice(invokeTag.start, closeTag.end + 1);
const prefixPart = captured.slice(0, invokeTag.start);
const suffixPart = captured.slice(closeTag.end + 1);
const parsed = parseToolCalls(xmlBlock, toolNames);
if (Array.isArray(parsed) && parsed.length > 0) {
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
@@ -99,194 +79,43 @@ function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
suffix: trimmedFence.suffix,
};
}
return { ready: true, prefix: prefixPart + captured.slice(found.index, closeEnd), calls: [], suffix: suffixPart };
return { ready: true, prefix: prefixPart + captured.slice(invokeTag.start, closeTag.end + 1), calls: [], suffix: suffixPart };
}
}
}
return { ready: false, prefix: '', calls: [], suffix: '' };
}
function findMatchingXMLToolWrapperClose(s, openTag, closeTag, openIdx) {
const text = typeof s === 'string' ? s : '';
const openTarget = String(openTag || '').toLowerCase();
const closeTarget = String(closeTag || '').toLowerCase();
if (!text || !openTarget || !closeTarget || openIdx < 0) {
return -1;
}
const lower = text.toLowerCase();
let depth = 1;
for (let i = openIdx + openTarget.length; i < text.length;) {
if (lower.startsWith('<![cdata[', i)) {
const end = lower.indexOf(']]>', i + '<![cdata['.length);
if (end < 0) {
return -1;
}
i = end + ']]>'.length;
continue;
}
if (lower.startsWith('<!--', i)) {
const end = lower.indexOf('-->', i + '<!--'.length);
if (end < 0) {
return -1;
}
i = end + '-->'.length;
continue;
}
if (lower.startsWith(closeTarget, i)) {
depth -= 1;
if (depth === 0) {
return i;
}
i += closeTarget.length;
continue;
}
if (lower.startsWith(openTarget, i) && hasXMLToolTagBoundary(text, i + openTarget.length)) {
depth += 1;
i += openTarget.length;
continue;
}
i += 1;
}
return -1;
}
function findXMLOpenOutsideCDATA(s, openTag, start) {
const text = typeof s === 'string' ? s : '';
const target = String(openTag || '').toLowerCase();
if (!text || !target) {
return -1;
}
const lower = text.toLowerCase();
for (let i = Math.max(0, start || 0); i < text.length;) {
if (lower.startsWith('<![cdata[', i)) {
const end = lower.indexOf(']]>', i + '<![cdata['.length);
if (end < 0) {
return -1;
}
i = end + ']]>'.length;
continue;
}
if (lower.startsWith('<!--', i)) {
const end = lower.indexOf('-->', i + '<!--'.length);
if (end < 0) {
return -1;
}
i = end + '-->'.length;
continue;
}
if (lower.startsWith(target, i) && hasXMLToolTagBoundary(text, i + target.length)) {
return i;
}
i += 1;
}
return -1;
}
function hasXMLToolTagBoundary(text, idx) {
if (idx >= text.length) {
return true;
}
return [' ', '\t', '\n', '\r', '>', '/'].includes(text[idx]);
}
function hasOpenXMLToolTag(captured) {
for (const pair of XML_TOOL_TAG_PAIRS) {
const openIdx = findXMLOpenOutsideCDATA(captured, pair.open, 0);
if (openIdx >= 0) {
if (findMatchingXMLToolWrapperClose(captured, pair.open, pair.close, openIdx) < 0) {
return true;
}
for (let pos = 0; pos < captured.length;) {
const tag = findFirstToolTag(captured, pos, 'tool_calls', false);
if (!tag) {
return false;
}
if (!findMatchingToolMarkupClose(captured, tag)) {
return true;
}
pos = tag.end + 1;
}
return false;
}
function containsAnyToolCallWrapper(lower) {
return lower.includes('<tool_calls') ||
lower.includes('<|dsml|tool_calls') ||
lower.includes('<|dsml tool_calls') ||
lower.includes('<dsml|tool_calls') ||
lower.includes('<dsml tool_calls') ||
lower.includes('<tool_calls') ||
lower.includes('<|tool_calls');
}
function firstInvokeIndex(lower) {
const xmlIdx = lower.indexOf('<invoke');
// Check all DSML-like invoke prefixes.
const dsmlPrefixes = ['<|dsml|invoke', '<|dsml invoke', '<dsml|invoke', '<dsml invoke', '<invoke', '<|invoke'];
let dsmlIdx = -1;
for (const prefix of dsmlPrefixes) {
const idx = lower.indexOf(prefix);
if (idx >= 0 && (dsmlIdx < 0 || idx < dsmlIdx)) {
dsmlIdx = idx;
function findFirstToolTag(text, from, name, closing) {
for (let pos = Math.max(0, from || 0); pos < text.length;) {
const tag = findToolMarkupTagOutsideIgnored(text, pos);
if (!tag) {
return null;
}
}
if (xmlIdx < 0) {
return { index: dsmlIdx, dsml: dsmlIdx >= 0 };
}
if (dsmlIdx < 0) {
return { index: xmlIdx, dsml: false };
}
if (dsmlIdx < xmlIdx) {
return { index: dsmlIdx, dsml: true };
}
return { index: xmlIdx, dsml: false };
}
function findPartialXMLToolTagStart(s) {
const lastLT = s.lastIndexOf('<');
if (lastLT < 0) {
return -1;
}
const tail = s.slice(lastLT);
if (tail.includes('>')) {
return -1;
}
const lowerTail = tail.toLowerCase();
for (const tag of XML_TOOL_OPENING_TAGS) {
const tagWithLT = tag.startsWith('<') ? tag : '<' + tag;
if (tagWithLT.startsWith(lowerTail)) {
return lastLT;
if (tag.name === name && tag.closing === closing) {
return tag;
}
pos = tag.end + 1;
}
return -1;
}
function findXMLCloseOutsideCDATA(s, closeTag, start) {
const text = typeof s === 'string' ? s : '';
const target = String(closeTag || '').toLowerCase();
if (!text || !target) {
return -1;
}
const lower = text.toLowerCase();
for (let i = Math.max(0, start || 0); i < text.length;) {
if (lower.startsWith('<![cdata[', i)) {
const end = lower.indexOf(']]>', i + '<![cdata['.length);
if (end < 0) {
return -1;
}
i = end + ']]>'.length;
continue;
}
if (lower.startsWith('<!--', i)) {
const end = lower.indexOf('-->', i + '<!--'.length);
if (end < 0) {
return -1;
}
i = end + '-->'.length;
continue;
}
if (lower.startsWith(target, i)) {
return i;
}
i += 1;
}
return -1;
return null;
}
module.exports = {
consumeXMLToolCapture,
hasOpenXMLToolTag,
findPartialXMLToolTagStart,
findPartialXMLToolTagStart: findPartialToolMarkupStart,
};

View File

@@ -6,8 +6,9 @@ const {
} = require('./state');
const { trimWrappingJSONFence } = require('./jsonscan');
const {
XML_TOOL_SEGMENT_TAGS,
} = require('./tool-keywords');
findToolMarkupTagOutsideIgnored,
sanitizeLooseCDATA,
} = require('./parse_payload');
const {
consumeXMLToolCapture: consumeXMLToolCaptureImpl,
hasOpenXMLToolTag,
@@ -117,8 +118,27 @@ function flushToolSieve(state, toolNames) {
}
} else if (state.capture) {
const content = state.capture;
noteText(state, content);
events.push({ type: 'text', text: content });
const recovered = sanitizeLooseCDATA(content);
if (recovered !== content) {
const recoveredResult = consumeXMLToolCaptureImpl(recovered, toolNames, trimWrappingJSONFence);
if (recoveredResult.ready && Array.isArray(recoveredResult.calls) && recoveredResult.calls.length > 0) {
if (recoveredResult.prefix) {
noteText(state, recoveredResult.prefix);
events.push({ type: 'text', text: recoveredResult.prefix });
}
events.push({ type: 'tool_calls', calls: recoveredResult.calls });
if (recoveredResult.suffix) {
noteText(state, recoveredResult.suffix);
events.push({ type: 'text', text: recoveredResult.suffix });
}
} else {
noteText(state, content);
events.push({ type: 'text', text: content });
}
} else {
noteText(state, content);
events.push({ type: 'text', text: content });
}
}
state.capture = '';
state.capturing = false;
@@ -155,26 +175,16 @@ function findToolSegmentStart(state, s) {
if (!s) {
return -1;
}
const lower = s.toLowerCase();
let offset = 0;
while (true) {
// Only check XML tool tags.
let bestIdx = -1;
let matchedTag = '';
for (const tag of XML_TOOL_SEGMENT_TAGS) {
const idx = lower.indexOf(tag, offset);
if (idx >= 0 && (bestIdx < 0 || idx < bestIdx)) {
bestIdx = idx;
matchedTag = tag;
}
}
if (bestIdx < 0) {
const tag = findToolMarkupTagOutsideIgnored(s, offset);
if (!tag) {
return -1;
}
if (!insideCodeFenceWithState(state, s.slice(0, bestIdx))) {
return bestIdx;
if (!insideCodeFenceWithState(state, s.slice(0, tag.start))) {
return tag.start;
}
offset = bestIdx + matchedTag.length;
offset = tag.end + 1;
}
}

View File

@@ -3,10 +3,14 @@
const XML_TOOL_SEGMENT_TAGS = [
'<|dsml|tool_calls>', '<|dsml|tool_calls\n', '<|dsml|tool_calls ',
'<|dsml|invoke ', '<|dsml|invoke\n', '<|dsml|invoke\t', '<|dsml|invoke\r',
'<|dsmltool_calls>', '<|dsmltool_calls\n', '<|dsmltool_calls ',
'<|dsmlinvoke ', '<|dsmlinvoke\n', '<|dsmlinvoke\t', '<|dsmlinvoke\r',
'<|dsml tool_calls>', '<|dsml tool_calls\n', '<|dsml tool_calls ',
'<|dsml invoke ', '<|dsml invoke\n', '<|dsml invoke\t', '<|dsml invoke\r',
'<dsml|tool_calls>', '<dsml|tool_calls\n', '<dsml|tool_calls ',
'<dsml|invoke ', '<dsml|invoke\n', '<dsml|invoke\t', '<dsml|invoke\r',
'<dsmltool_calls>', '<dsmltool_calls\n', '<dsmltool_calls ',
'<dsmlinvoke ', '<dsmlinvoke\n', '<dsmlinvoke\t', '<dsmlinvoke\r',
'<dsml tool_calls>', '<dsml tool_calls\n', '<dsml tool_calls ',
'<dsml invoke ', '<dsml invoke\n', '<dsml invoke\t', '<dsml invoke\r',
'<tool_calls>', '<tool_calls\n', '<tool_calls ',
@@ -19,8 +23,10 @@ const XML_TOOL_SEGMENT_TAGS = [
const XML_TOOL_OPENING_TAGS = [
'<|dsml|tool_calls',
'<|dsmltool_calls',
'<|dsml tool_calls',
'<dsml|tool_calls',
'<dsmltool_calls',
'<dsml tool_calls',
'<tool_calls',
'<|tool_calls',
@@ -29,8 +35,10 @@ const XML_TOOL_OPENING_TAGS = [
const XML_TOOL_CLOSING_TAGS = [
'</|dsml|tool_calls>',
'</|dsmltool_calls>',
'</|dsml tool_calls>',
'</dsml|tool_calls>',
'</dsmltool_calls>',
'</dsml tool_calls>',
'</tool_calls>',
'</|tool_calls>',