mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-19 23:47:45 +08:00
refactor: unify Go/Node XML tool markup scanning and expand DSML alias support
- Add shared ToolMarkupTag scanner (toolcalls_scan.go) replacing hardcoded alias tables - Support DSML collapsed tag names (<DSMLtool_calls>, <DSMLinvoke>, <DSMLparameter>) - Parse JSON literal values from parameter bodies (123→number, true→bool, null) - Recover unclosed CDATA in final parse/flush via SanitizeLooseCDATA - Align Go and Node implementations (scanToolMarkupTagAt, findMatchingToolMarkupClose) - Reject bare <invoke> as unsupported syntax, only tool_calls wrapper triggers tool path - Update API.md and toolcall-semantics.md documentation Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -6,10 +6,10 @@ const {
|
||||
const {
|
||||
parseMarkupToolCalls,
|
||||
stripFencedCodeBlocks,
|
||||
containsToolCallWrapperSyntaxOutsideIgnored,
|
||||
sanitizeLooseCDATA,
|
||||
} = require('./parse_payload');
|
||||
|
||||
const TOOL_MARKUP_PREFIXES = ['<tool_calls', '<|dsml|tool_calls', '<|dsml tool_calls', '<dsml|tool_calls', '<dsml tool_calls', '<|tool_calls', '<|tool_calls'];
|
||||
|
||||
function extractToolNames(tools) {
|
||||
if (!Array.isArray(tools) || tools.length === 0) {
|
||||
return [];
|
||||
@@ -46,7 +46,13 @@ function parseToolCallsDetailed(text, toolNames) {
|
||||
return result;
|
||||
}
|
||||
// XML markup parsing only.
|
||||
const parsed = parseMarkupToolCalls(normalized);
|
||||
let parsed = parseMarkupToolCalls(normalized);
|
||||
if (parsed.length === 0 && normalized.toLowerCase().includes('<![cdata[')) {
|
||||
const recovered = sanitizeLooseCDATA(normalized);
|
||||
if (recovered !== normalized) {
|
||||
parsed = parseMarkupToolCalls(recovered);
|
||||
}
|
||||
}
|
||||
if (parsed.length === 0) {
|
||||
return result;
|
||||
}
|
||||
@@ -73,7 +79,13 @@ function parseStandaloneToolCallsDetailed(text, toolNames) {
|
||||
return result;
|
||||
}
|
||||
// XML markup parsing only.
|
||||
const parsed = parseMarkupToolCalls(trimmed);
|
||||
let parsed = parseMarkupToolCalls(trimmed);
|
||||
if (parsed.length === 0 && trimmed.toLowerCase().includes('<![cdata[')) {
|
||||
const recovered = sanitizeLooseCDATA(trimmed);
|
||||
if (recovered !== trimmed) {
|
||||
parsed = parseMarkupToolCalls(recovered);
|
||||
}
|
||||
}
|
||||
if (parsed.length === 0) {
|
||||
return result;
|
||||
}
|
||||
@@ -110,8 +122,8 @@ function filterToolCallsDetailed(parsed, toolNames) {
|
||||
}
|
||||
|
||||
function looksLikeToolCallSyntax(text) {
|
||||
const lower = toStringSafe(text).toLowerCase();
|
||||
return TOOL_MARKUP_PREFIXES.some((prefix) => lower.includes(prefix));
|
||||
const styles = containsToolCallWrapperSyntaxOutsideIgnored(text);
|
||||
return styles.dsml || styles.canonical;
|
||||
}
|
||||
|
||||
function shouldSkipToolCallParsingForCodeFenceExample(text) {
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
const TOOL_CALL_MARKUP_KV_PATTERN = /<(?:[a-z0-9_:-]+:)?([a-z0-9_.-]+)\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi;
|
||||
const CDATA_PATTERN = /^<!\[CDATA\[([\s\S]*?)]]>$/i;
|
||||
const XML_ATTR_PATTERN = /\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')/gi;
|
||||
const TOOL_MARKUP_NAMES = ['tool_calls', 'invoke', 'parameter'];
|
||||
|
||||
const {
|
||||
toStringSafe,
|
||||
@@ -138,13 +139,10 @@ function normalizeDSMLToolCallMarkup(text) {
|
||||
if (!raw) {
|
||||
return { text: '', ok: true };
|
||||
}
|
||||
const styles = toolMarkupStylesOutsideIgnored(raw);
|
||||
const styles = containsToolMarkupSyntaxOutsideIgnored(raw);
|
||||
if (!styles.dsml) {
|
||||
return { text: raw, ok: true };
|
||||
}
|
||||
// Always normalize DSML aliases to canonical form, even when canonical
|
||||
// tags coexist. Models frequently mix DSML wrapper tags with canonical
|
||||
// inner tags (e.g., <|tool_calls><invoke name="...">).
|
||||
return {
|
||||
text: replaceDSMLToolMarkupOutsideIgnored(raw),
|
||||
ok: true,
|
||||
@@ -152,65 +150,21 @@ function normalizeDSMLToolCallMarkup(text) {
|
||||
}
|
||||
|
||||
function containsDSMLToolMarkup(text) {
|
||||
return toolMarkupStylesOutsideIgnored(text).dsml;
|
||||
return containsToolMarkupSyntaxOutsideIgnored(text).dsml;
|
||||
}
|
||||
|
||||
function containsCanonicalToolMarkup(text) {
|
||||
return toolMarkupStylesOutsideIgnored(text).canonical;
|
||||
return containsToolMarkupSyntaxOutsideIgnored(text).canonical;
|
||||
}
|
||||
|
||||
const DSML_TOOL_MARKUP_ALIASES = [
|
||||
{ from: '<|dsml|tool_calls', to: '<tool_calls' },
|
||||
{ from: '</|dsml|tool_calls>', to: '</tool_calls>' },
|
||||
{ from: '<|dsml|invoke', to: '<invoke' },
|
||||
{ from: '</|dsml|invoke>', to: '</invoke>' },
|
||||
{ from: '<|dsml|parameter', to: '<parameter' },
|
||||
{ from: '</|dsml|parameter>', to: '</parameter>' },
|
||||
{ from: '<|dsml tool_calls', to: '<tool_calls' },
|
||||
{ from: '</|dsml tool_calls>', to: '</tool_calls>' },
|
||||
{ from: '<|dsml invoke', to: '<invoke' },
|
||||
{ from: '</|dsml invoke>', to: '</invoke>' },
|
||||
{ from: '<|dsml parameter', to: '<parameter' },
|
||||
{ from: '</|dsml parameter>', to: '</parameter>' },
|
||||
{ from: '<dsml tool_calls', to: '<tool_calls' },
|
||||
{ from: '</dsml tool_calls>', to: '</tool_calls>' },
|
||||
{ from: '<dsml invoke', to: '<invoke' },
|
||||
{ from: '</dsml invoke>', to: '</invoke>' },
|
||||
{ from: '<dsml parameter', to: '<parameter' },
|
||||
{ from: '</dsml parameter>', to: '</parameter>' },
|
||||
{ from: '<dsml|tool_calls', to: '<tool_calls' },
|
||||
{ from: '</dsml|tool_calls>', to: '</tool_calls>' },
|
||||
{ from: '<dsml|invoke', to: '<invoke' },
|
||||
{ from: '</dsml|invoke>', to: '</invoke>' },
|
||||
{ from: '<dsml|parameter', to: '<parameter' },
|
||||
{ from: '</dsml|parameter>', to: '</parameter>' },
|
||||
{ from: '<|tool_calls', to: '<tool_calls' },
|
||||
{ from: '</|tool_calls>', to: '</tool_calls>' },
|
||||
{ from: '<|invoke', to: '<invoke' },
|
||||
{ from: '</|invoke>', to: '</invoke>' },
|
||||
{ from: '<|parameter', to: '<parameter' },
|
||||
{ from: '</|parameter>', to: '</parameter>' },
|
||||
{ from: '<|tool_calls', to: '<tool_calls' },
|
||||
{ from: '</|tool_calls>', to: '</tool_calls>' },
|
||||
{ from: '<|invoke', to: '<invoke' },
|
||||
{ from: '</|invoke>', to: '</invoke>' },
|
||||
{ from: '<|parameter', to: '<parameter' },
|
||||
{ from: '</|parameter>', to: '</parameter>' },
|
||||
];
|
||||
|
||||
const CANONICAL_TOOL_MARKUP_PREFIXES = [
|
||||
'<tool_calls',
|
||||
'</tool_calls>',
|
||||
'<invoke',
|
||||
'</invoke>',
|
||||
'<parameter',
|
||||
'</parameter>',
|
||||
];
|
||||
|
||||
function toolMarkupStylesOutsideIgnored(text) {
|
||||
const lower = toStringSafe(text).toLowerCase();
|
||||
function containsToolCallWrapperSyntaxOutsideIgnored(text) {
|
||||
const raw = toStringSafe(text);
|
||||
const styles = { dsml: false, canonical: false };
|
||||
for (let i = 0; i < lower.length;) {
|
||||
if (!raw) {
|
||||
return styles;
|
||||
}
|
||||
const lower = raw.toLowerCase();
|
||||
for (let i = 0; i < raw.length;) {
|
||||
const skipped = skipXmlIgnoredSection(lower, i);
|
||||
if (skipped.blocked) {
|
||||
return styles;
|
||||
@@ -219,15 +173,55 @@ function toolMarkupStylesOutsideIgnored(text) {
|
||||
i = skipped.next;
|
||||
continue;
|
||||
}
|
||||
if (CANONICAL_TOOL_MARKUP_PREFIXES.some(prefix => lower.startsWith(prefix, i))) {
|
||||
styles.canonical = true;
|
||||
const tag = scanToolMarkupTagAt(raw, i);
|
||||
if (tag) {
|
||||
if (tag.name !== 'tool_calls') {
|
||||
i = tag.end + 1;
|
||||
continue;
|
||||
}
|
||||
if (tag.dsmlLike) {
|
||||
styles.dsml = true;
|
||||
} else {
|
||||
styles.canonical = true;
|
||||
}
|
||||
if (styles.dsml && styles.canonical) {
|
||||
return styles;
|
||||
}
|
||||
i = tag.end + 1;
|
||||
continue;
|
||||
}
|
||||
if (DSML_TOOL_MARKUP_ALIASES.some(alias => lower.startsWith(alias.from, i))) {
|
||||
styles.dsml = true;
|
||||
}
|
||||
if (styles.dsml && styles.canonical) {
|
||||
i += 1;
|
||||
}
|
||||
return styles;
|
||||
}
|
||||
function containsToolMarkupSyntaxOutsideIgnored(text) {
|
||||
const raw = toStringSafe(text);
|
||||
const styles = { dsml: false, canonical: false };
|
||||
if (!raw) {
|
||||
return styles;
|
||||
}
|
||||
for (let i = 0; i < raw.length;) {
|
||||
const skipped = skipXmlIgnoredSection(raw.toLowerCase(), i);
|
||||
if (skipped.blocked) {
|
||||
return styles;
|
||||
}
|
||||
if (skipped.advanced) {
|
||||
i = skipped.next;
|
||||
continue;
|
||||
}
|
||||
const tag = scanToolMarkupTagAt(raw, i);
|
||||
if (tag) {
|
||||
if (tag.dsmlLike) {
|
||||
styles.dsml = true;
|
||||
} else {
|
||||
styles.canonical = true;
|
||||
}
|
||||
if (styles.dsml && styles.canonical) {
|
||||
return styles;
|
||||
}
|
||||
i = tag.end + 1;
|
||||
continue;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
return styles;
|
||||
@@ -235,6 +229,9 @@ function toolMarkupStylesOutsideIgnored(text) {
|
||||
|
||||
function replaceDSMLToolMarkupOutsideIgnored(text) {
|
||||
const raw = toStringSafe(text);
|
||||
if (!raw) {
|
||||
return '';
|
||||
}
|
||||
const lower = raw.toLowerCase();
|
||||
let out = '';
|
||||
for (let i = 0; i < raw.length;) {
|
||||
@@ -248,10 +245,14 @@ function replaceDSMLToolMarkupOutsideIgnored(text) {
|
||||
i = skipped.next;
|
||||
continue;
|
||||
}
|
||||
const alias = DSML_TOOL_MARKUP_ALIASES.find(item => lower.startsWith(item.from, i));
|
||||
if (alias) {
|
||||
out += alias.to;
|
||||
i += alias.from.length;
|
||||
const tag = scanToolMarkupTagAt(raw, i);
|
||||
if (tag) {
|
||||
if (tag.dsmlLike) {
|
||||
out += `<${tag.closing ? '/' : ''}${tag.name}${raw.slice(tag.nameEnd, tag.end + 1)}`;
|
||||
} else {
|
||||
out += raw.slice(tag.start, tag.end + 1);
|
||||
}
|
||||
i = tag.end + 1;
|
||||
continue;
|
||||
}
|
||||
out += raw[i];
|
||||
@@ -417,6 +418,150 @@ function skipXmlIgnoredSection(lower, i) {
|
||||
return { advanced: false, blocked: false, next: i };
|
||||
}
|
||||
|
||||
function scanToolMarkupTagAt(text, start) {
|
||||
const raw = toStringSafe(text);
|
||||
if (!raw || start < 0 || start >= raw.length || raw[start] !== '<') {
|
||||
return null;
|
||||
}
|
||||
const lower = raw.toLowerCase();
|
||||
let i = start + 1;
|
||||
const closing = raw[i] === '/';
|
||||
if (closing) {
|
||||
i += 1;
|
||||
}
|
||||
let dsmlLike = false;
|
||||
if (i < raw.length && isToolMarkupPipe(raw[i])) {
|
||||
dsmlLike = true;
|
||||
i += 1;
|
||||
}
|
||||
if (lower.startsWith('dsml', i)) {
|
||||
dsmlLike = true;
|
||||
i += 'dsml'.length;
|
||||
while (i < raw.length && isToolMarkupSeparator(raw[i])) {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
const { name, len } = matchToolMarkupName(lower, i);
|
||||
if (!name) {
|
||||
return null;
|
||||
}
|
||||
const nameEnd = i + len;
|
||||
if (!hasXmlTagBoundary(raw, nameEnd)) {
|
||||
return null;
|
||||
}
|
||||
const end = findXmlTagEnd(raw, nameEnd);
|
||||
if (end < 0) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
start,
|
||||
end,
|
||||
nameStart: i,
|
||||
nameEnd,
|
||||
name,
|
||||
closing,
|
||||
selfClosing: raw.slice(start, end + 1).trim().endsWith('/>'),
|
||||
dsmlLike,
|
||||
canonical: !dsmlLike,
|
||||
};
|
||||
}
|
||||
|
||||
function findToolMarkupTagOutsideIgnored(text, from) {
|
||||
const raw = toStringSafe(text);
|
||||
const lower = raw.toLowerCase();
|
||||
for (let i = Math.max(0, from || 0); i < raw.length;) {
|
||||
const skipped = skipXmlIgnoredSection(lower, i);
|
||||
if (skipped.blocked) {
|
||||
return null;
|
||||
}
|
||||
if (skipped.advanced) {
|
||||
i = skipped.next;
|
||||
continue;
|
||||
}
|
||||
const tag = scanToolMarkupTagAt(raw, i);
|
||||
if (tag) {
|
||||
return tag;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function findMatchingToolMarkupClose(text, openTag) {
|
||||
const raw = toStringSafe(text);
|
||||
if (!raw || !openTag || !openTag.name || openTag.closing) {
|
||||
return null;
|
||||
}
|
||||
let depth = 1;
|
||||
for (let pos = openTag.end + 1; pos < raw.length;) {
|
||||
const tag = findToolMarkupTagOutsideIgnored(raw, pos);
|
||||
if (!tag) {
|
||||
return null;
|
||||
}
|
||||
if (tag.name !== openTag.name) {
|
||||
pos = tag.end + 1;
|
||||
continue;
|
||||
}
|
||||
if (tag.closing) {
|
||||
depth -= 1;
|
||||
if (depth === 0) {
|
||||
return tag;
|
||||
}
|
||||
} else if (!tag.selfClosing) {
|
||||
depth += 1;
|
||||
}
|
||||
pos = tag.end + 1;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function findPartialToolMarkupStart(text) {
|
||||
const raw = toStringSafe(text);
|
||||
const lastLT = raw.lastIndexOf('<');
|
||||
if (lastLT < 0) {
|
||||
return -1;
|
||||
}
|
||||
const tail = raw.slice(lastLT);
|
||||
if (tail.includes('>')) {
|
||||
return -1;
|
||||
}
|
||||
const lowerTail = tail.toLowerCase();
|
||||
const candidates = [
|
||||
'<tool_calls', '<invoke', '<parameter',
|
||||
'<|tool_calls', '<|invoke', '<|parameter',
|
||||
'<|tool_calls', '<|invoke', '<|parameter',
|
||||
'<|dsml|tool_calls', '<|dsml|invoke', '<|dsml|parameter',
|
||||
'<dsmltool_calls', '<dsmlinvoke', '<dsmlparameter',
|
||||
'<dsml tool_calls', '<dsml invoke', '<dsml parameter',
|
||||
'<dsml|tool_calls', '<dsml|invoke', '<dsml|parameter',
|
||||
'<|dsmltool_calls', '<|dsmlinvoke', '<|dsmlparameter',
|
||||
'<|dsml tool_calls', '<|dsml invoke', '<|dsml parameter',
|
||||
];
|
||||
for (const candidate of candidates) {
|
||||
if (candidate.startsWith(lowerTail)) {
|
||||
return lastLT;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
function isToolMarkupPipe(ch) {
|
||||
return ch === '|' || ch === '|';
|
||||
}
|
||||
|
||||
function isToolMarkupSeparator(ch) {
|
||||
return ch === ' ' || ch === '\t' || ch === '\r' || ch === '\n' || isToolMarkupPipe(ch);
|
||||
}
|
||||
|
||||
function matchToolMarkupName(lower, start) {
|
||||
for (const name of TOOL_MARKUP_NAMES) {
|
||||
if (lower.startsWith(name, start)) {
|
||||
return { name, len: name.length };
|
||||
}
|
||||
}
|
||||
return { name: '', len: 0 };
|
||||
}
|
||||
|
||||
function findXmlTagEnd(text, from) {
|
||||
let quote = '';
|
||||
for (let i = Math.max(0, from || 0); i < text.length; i += 1) {
|
||||
@@ -494,7 +639,8 @@ function parseMarkupKVObject(text) {
|
||||
function parseMarkupValue(raw) {
|
||||
const cdata = extractStandaloneCDATA(raw);
|
||||
if (cdata.ok) {
|
||||
return cdata.value;
|
||||
const literal = parseJSONLiteralValue(cdata.value);
|
||||
return literal.ok ? literal.value : cdata.value;
|
||||
}
|
||||
const s = toStringSafe(extractRawTagValue(raw)).trim();
|
||||
if (!s) {
|
||||
@@ -511,12 +657,9 @@ function parseMarkupValue(raw) {
|
||||
}
|
||||
}
|
||||
|
||||
if (s.startsWith('{') || s.startsWith('[')) {
|
||||
try {
|
||||
return JSON.parse(s);
|
||||
} catch (_err) {
|
||||
return s;
|
||||
}
|
||||
const literal = parseJSONLiteralValue(s);
|
||||
if (literal.ok) {
|
||||
return literal.value;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
@@ -554,9 +697,65 @@ function extractStandaloneCDATA(inner) {
|
||||
if (cdataMatch && cdataMatch[1] !== undefined) {
|
||||
return { ok: true, value: cdataMatch[1] };
|
||||
}
|
||||
if (s.toLowerCase().startsWith('<![cdata[')) {
|
||||
return { ok: true, value: s.slice('<![CDATA['.length) };
|
||||
}
|
||||
return { ok: false, value: '' };
|
||||
}
|
||||
|
||||
function parseJSONLiteralValue(raw) {
|
||||
const s = toStringSafe(raw).trim();
|
||||
if (!s) {
|
||||
return { ok: false, value: null };
|
||||
}
|
||||
if (!['{', '[', '"', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 't', 'f', 'n'].includes(s[0])) {
|
||||
return { ok: false, value: null };
|
||||
}
|
||||
try {
|
||||
return { ok: true, value: JSON.parse(s) };
|
||||
} catch (_err) {
|
||||
return { ok: false, value: null };
|
||||
}
|
||||
}
|
||||
|
||||
function sanitizeLooseCDATA(text) {
|
||||
const raw = toStringSafe(text);
|
||||
if (!raw) {
|
||||
return '';
|
||||
}
|
||||
const lower = raw.toLowerCase();
|
||||
const openMarker = '<![cdata[';
|
||||
const closeMarker = ']]>';
|
||||
|
||||
let out = '';
|
||||
let pos = 0;
|
||||
let changed = false;
|
||||
while (pos < raw.length) {
|
||||
const startRel = lower.indexOf(openMarker, pos);
|
||||
if (startRel < 0) {
|
||||
out += raw.slice(pos);
|
||||
break;
|
||||
}
|
||||
const start = startRel;
|
||||
const contentStart = start + openMarker.length;
|
||||
out += raw.slice(pos, start);
|
||||
|
||||
const endRel = lower.indexOf(closeMarker, contentStart);
|
||||
if (endRel >= 0) {
|
||||
const end = endRel + closeMarker.length;
|
||||
out += raw.slice(start, end);
|
||||
pos = end;
|
||||
continue;
|
||||
}
|
||||
|
||||
changed = true;
|
||||
out += raw.slice(contentStart);
|
||||
pos = raw.length;
|
||||
}
|
||||
|
||||
return changed ? out : raw;
|
||||
}
|
||||
|
||||
function parseTagAttributes(raw) {
|
||||
const source = toStringSafe(raw);
|
||||
const out = {};
|
||||
@@ -631,4 +830,10 @@ module.exports = {
|
||||
stripFencedCodeBlocks,
|
||||
parseMarkupToolCalls,
|
||||
normalizeDSMLToolCallMarkup,
|
||||
containsToolMarkupSyntaxOutsideIgnored,
|
||||
containsToolCallWrapperSyntaxOutsideIgnored,
|
||||
findToolMarkupTagOutsideIgnored,
|
||||
findMatchingToolMarkupClose,
|
||||
findPartialToolMarkupStart,
|
||||
sanitizeLooseCDATA,
|
||||
};
|
||||
|
||||
@@ -1,71 +1,53 @@
|
||||
'use strict';
|
||||
const { parseToolCalls } = require('./parse');
|
||||
|
||||
// XML wrapper tag pair used by the streaming sieve.
|
||||
const XML_TOOL_TAG_PAIRS = [
|
||||
{ open: '<|dsml|tool_calls', close: '</|dsml|tool_calls>' },
|
||||
{ open: '<|dsml tool_calls', close: '</|dsml tool_calls>' },
|
||||
{ open: '<dsml|tool_calls', close: '</dsml|tool_calls>' },
|
||||
{ open: '<dsml tool_calls', close: '</dsml tool_calls>' },
|
||||
{ open: '<|tool_calls', close: '</|tool_calls>' },
|
||||
{ open: '<|tool_calls', close: '</|tool_calls>' },
|
||||
{ open: '<tool_calls', close: '</tool_calls>' },
|
||||
];
|
||||
|
||||
const XML_TOOL_OPENING_TAGS = [
|
||||
...XML_TOOL_TAG_PAIRS.map(p => p.open),
|
||||
'<|dsml|invoke', '<|dsml invoke', '<dsml|invoke', '<dsml invoke', '<|invoke', '<|invoke', '<invoke',
|
||||
];
|
||||
const {
|
||||
findToolMarkupTagOutsideIgnored,
|
||||
findMatchingToolMarkupClose,
|
||||
findPartialToolMarkupStart,
|
||||
} = require('./parse_payload');
|
||||
|
||||
function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
|
||||
const lower = captured.toLowerCase();
|
||||
let anyOpenFound = false;
|
||||
let best = null;
|
||||
let rejected = null;
|
||||
|
||||
// Scan every wrapper occurrence. Prose can mention a wrapper tag before the
|
||||
// actual tool block, including the same variant as the real block.
|
||||
for (const pair of XML_TOOL_TAG_PAIRS) {
|
||||
let searchFrom = 0;
|
||||
while (searchFrom < lower.length) {
|
||||
const openIdx = findXMLOpenOutsideCDATA(captured, pair.open, searchFrom);
|
||||
if (openIdx < 0) {
|
||||
break;
|
||||
}
|
||||
// Ignore closing tags that appear inside CDATA payloads, such as
|
||||
// write-file content containing tool-call documentation examples.
|
||||
const closeIdx = findMatchingXMLToolWrapperClose(captured, pair.open, pair.close, openIdx);
|
||||
if (closeIdx < 0) {
|
||||
anyOpenFound = true;
|
||||
searchFrom = openIdx + pair.open.length;
|
||||
continue;
|
||||
}
|
||||
const closeEnd = closeIdx + pair.close.length;
|
||||
const xmlBlock = captured.slice(openIdx, closeEnd);
|
||||
let prefixPart = captured.slice(0, openIdx);
|
||||
let suffixPart = captured.slice(closeEnd);
|
||||
const parsed = parseToolCalls(xmlBlock, toolNames);
|
||||
if (Array.isArray(parsed) && parsed.length > 0) {
|
||||
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
|
||||
if (!best || openIdx < best.start) {
|
||||
best = {
|
||||
start: openIdx,
|
||||
prefix: trimmedFence.prefix,
|
||||
calls: parsed,
|
||||
suffix: trimmedFence.suffix,
|
||||
};
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (!rejected || openIdx < rejected.start) {
|
||||
rejected = {
|
||||
start: openIdx,
|
||||
prefix: prefixPart + xmlBlock,
|
||||
suffix: suffixPart,
|
||||
// Scan every recognized wrapper occurrence. Prose can mention a wrapper tag
|
||||
// before the actual tool block, including the same variant as the real block.
|
||||
for (let searchFrom = 0; searchFrom < captured.length;) {
|
||||
const openTag = findFirstToolTag(captured, searchFrom, 'tool_calls', false);
|
||||
if (!openTag) {
|
||||
break;
|
||||
}
|
||||
const closeTag = findMatchingToolMarkupClose(captured, openTag);
|
||||
if (!closeTag) {
|
||||
anyOpenFound = true;
|
||||
searchFrom = openTag.end + 1;
|
||||
continue;
|
||||
}
|
||||
const xmlBlock = captured.slice(openTag.start, closeTag.end + 1);
|
||||
const prefixPart = captured.slice(0, openTag.start);
|
||||
const suffixPart = captured.slice(closeTag.end + 1);
|
||||
const parsed = parseToolCalls(xmlBlock, toolNames);
|
||||
if (Array.isArray(parsed) && parsed.length > 0) {
|
||||
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
|
||||
if (!best || openTag.start < best.start) {
|
||||
best = {
|
||||
start: openTag.start,
|
||||
prefix: trimmedFence.prefix,
|
||||
calls: parsed,
|
||||
suffix: trimmedFence.suffix,
|
||||
};
|
||||
}
|
||||
searchFrom = openIdx + pair.open.length;
|
||||
break;
|
||||
}
|
||||
if (!rejected || openTag.start < rejected.start) {
|
||||
rejected = {
|
||||
start: openTag.start,
|
||||
prefix: prefixPart + xmlBlock,
|
||||
suffix: suffixPart,
|
||||
};
|
||||
}
|
||||
searchFrom = openTag.end + 1;
|
||||
}
|
||||
if (best) {
|
||||
return { ready: true, prefix: best.prefix, calls: best.calls, suffix: best.suffix };
|
||||
@@ -78,17 +60,15 @@ function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
|
||||
// If this block failed to become a tool call, pass it through as text.
|
||||
return { ready: true, prefix: rejected.prefix, calls: [], suffix: rejected.suffix };
|
||||
}
|
||||
if (!containsAnyToolCallWrapper(lower)) {
|
||||
const found = firstInvokeIndex(lower);
|
||||
if (found.index >= 0) {
|
||||
const closeTag = found.dsml ? '</|dsml|tool_calls>' : '</tool_calls>';
|
||||
const openWrapper = found.dsml ? '<|DSML|tool_calls>' : '<tool_calls>';
|
||||
const closeIdx = findXMLCloseOutsideCDATA(captured, closeTag, found.index);
|
||||
if (closeIdx > found.index) {
|
||||
const closeEnd = closeIdx + closeTag.length;
|
||||
const xmlBlock = openWrapper + captured.slice(found.index, closeIdx) + closeTag;
|
||||
let prefixPart = captured.slice(0, found.index);
|
||||
let suffixPart = captured.slice(closeEnd);
|
||||
const invokeTag = findFirstToolTag(captured, 0, 'invoke', false);
|
||||
if (invokeTag) {
|
||||
const wrapperOpen = findFirstToolTag(captured, 0, 'tool_calls', false);
|
||||
if (!wrapperOpen || wrapperOpen.start > invokeTag.start) {
|
||||
const closeTag = findFirstToolTag(captured, invokeTag.start + 1, 'tool_calls', true);
|
||||
if (closeTag && closeTag.start > invokeTag.start) {
|
||||
const xmlBlock = '<tool_calls>' + captured.slice(invokeTag.start, closeTag.end + 1);
|
||||
const prefixPart = captured.slice(0, invokeTag.start);
|
||||
const suffixPart = captured.slice(closeTag.end + 1);
|
||||
const parsed = parseToolCalls(xmlBlock, toolNames);
|
||||
if (Array.isArray(parsed) && parsed.length > 0) {
|
||||
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
|
||||
@@ -99,194 +79,43 @@ function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
|
||||
suffix: trimmedFence.suffix,
|
||||
};
|
||||
}
|
||||
return { ready: true, prefix: prefixPart + captured.slice(found.index, closeEnd), calls: [], suffix: suffixPart };
|
||||
return { ready: true, prefix: prefixPart + captured.slice(invokeTag.start, closeTag.end + 1), calls: [], suffix: suffixPart };
|
||||
}
|
||||
}
|
||||
}
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
|
||||
function findMatchingXMLToolWrapperClose(s, openTag, closeTag, openIdx) {
|
||||
const text = typeof s === 'string' ? s : '';
|
||||
const openTarget = String(openTag || '').toLowerCase();
|
||||
const closeTarget = String(closeTag || '').toLowerCase();
|
||||
if (!text || !openTarget || !closeTarget || openIdx < 0) {
|
||||
return -1;
|
||||
}
|
||||
const lower = text.toLowerCase();
|
||||
let depth = 1;
|
||||
for (let i = openIdx + openTarget.length; i < text.length;) {
|
||||
if (lower.startsWith('<![cdata[', i)) {
|
||||
const end = lower.indexOf(']]>', i + '<![cdata['.length);
|
||||
if (end < 0) {
|
||||
return -1;
|
||||
}
|
||||
i = end + ']]>'.length;
|
||||
continue;
|
||||
}
|
||||
if (lower.startsWith('<!--', i)) {
|
||||
const end = lower.indexOf('-->', i + '<!--'.length);
|
||||
if (end < 0) {
|
||||
return -1;
|
||||
}
|
||||
i = end + '-->'.length;
|
||||
continue;
|
||||
}
|
||||
if (lower.startsWith(closeTarget, i)) {
|
||||
depth -= 1;
|
||||
if (depth === 0) {
|
||||
return i;
|
||||
}
|
||||
i += closeTarget.length;
|
||||
continue;
|
||||
}
|
||||
if (lower.startsWith(openTarget, i) && hasXMLToolTagBoundary(text, i + openTarget.length)) {
|
||||
depth += 1;
|
||||
i += openTarget.length;
|
||||
continue;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
function findXMLOpenOutsideCDATA(s, openTag, start) {
|
||||
const text = typeof s === 'string' ? s : '';
|
||||
const target = String(openTag || '').toLowerCase();
|
||||
if (!text || !target) {
|
||||
return -1;
|
||||
}
|
||||
const lower = text.toLowerCase();
|
||||
for (let i = Math.max(0, start || 0); i < text.length;) {
|
||||
if (lower.startsWith('<![cdata[', i)) {
|
||||
const end = lower.indexOf(']]>', i + '<![cdata['.length);
|
||||
if (end < 0) {
|
||||
return -1;
|
||||
}
|
||||
i = end + ']]>'.length;
|
||||
continue;
|
||||
}
|
||||
if (lower.startsWith('<!--', i)) {
|
||||
const end = lower.indexOf('-->', i + '<!--'.length);
|
||||
if (end < 0) {
|
||||
return -1;
|
||||
}
|
||||
i = end + '-->'.length;
|
||||
continue;
|
||||
}
|
||||
if (lower.startsWith(target, i) && hasXMLToolTagBoundary(text, i + target.length)) {
|
||||
return i;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
function hasXMLToolTagBoundary(text, idx) {
|
||||
if (idx >= text.length) {
|
||||
return true;
|
||||
}
|
||||
return [' ', '\t', '\n', '\r', '>', '/'].includes(text[idx]);
|
||||
}
|
||||
|
||||
function hasOpenXMLToolTag(captured) {
|
||||
for (const pair of XML_TOOL_TAG_PAIRS) {
|
||||
const openIdx = findXMLOpenOutsideCDATA(captured, pair.open, 0);
|
||||
if (openIdx >= 0) {
|
||||
if (findMatchingXMLToolWrapperClose(captured, pair.open, pair.close, openIdx) < 0) {
|
||||
return true;
|
||||
}
|
||||
for (let pos = 0; pos < captured.length;) {
|
||||
const tag = findFirstToolTag(captured, pos, 'tool_calls', false);
|
||||
if (!tag) {
|
||||
return false;
|
||||
}
|
||||
if (!findMatchingToolMarkupClose(captured, tag)) {
|
||||
return true;
|
||||
}
|
||||
pos = tag.end + 1;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function containsAnyToolCallWrapper(lower) {
|
||||
return lower.includes('<tool_calls') ||
|
||||
lower.includes('<|dsml|tool_calls') ||
|
||||
lower.includes('<|dsml tool_calls') ||
|
||||
lower.includes('<dsml|tool_calls') ||
|
||||
lower.includes('<dsml tool_calls') ||
|
||||
lower.includes('<|tool_calls') ||
|
||||
lower.includes('<|tool_calls');
|
||||
}
|
||||
|
||||
function firstInvokeIndex(lower) {
|
||||
const xmlIdx = lower.indexOf('<invoke');
|
||||
// Check all DSML-like invoke prefixes.
|
||||
const dsmlPrefixes = ['<|dsml|invoke', '<|dsml invoke', '<dsml|invoke', '<dsml invoke', '<|invoke', '<|invoke'];
|
||||
let dsmlIdx = -1;
|
||||
for (const prefix of dsmlPrefixes) {
|
||||
const idx = lower.indexOf(prefix);
|
||||
if (idx >= 0 && (dsmlIdx < 0 || idx < dsmlIdx)) {
|
||||
dsmlIdx = idx;
|
||||
function findFirstToolTag(text, from, name, closing) {
|
||||
for (let pos = Math.max(0, from || 0); pos < text.length;) {
|
||||
const tag = findToolMarkupTagOutsideIgnored(text, pos);
|
||||
if (!tag) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
if (xmlIdx < 0) {
|
||||
return { index: dsmlIdx, dsml: dsmlIdx >= 0 };
|
||||
}
|
||||
if (dsmlIdx < 0) {
|
||||
return { index: xmlIdx, dsml: false };
|
||||
}
|
||||
if (dsmlIdx < xmlIdx) {
|
||||
return { index: dsmlIdx, dsml: true };
|
||||
}
|
||||
return { index: xmlIdx, dsml: false };
|
||||
}
|
||||
|
||||
function findPartialXMLToolTagStart(s) {
|
||||
const lastLT = s.lastIndexOf('<');
|
||||
if (lastLT < 0) {
|
||||
return -1;
|
||||
}
|
||||
const tail = s.slice(lastLT);
|
||||
if (tail.includes('>')) {
|
||||
return -1;
|
||||
}
|
||||
const lowerTail = tail.toLowerCase();
|
||||
for (const tag of XML_TOOL_OPENING_TAGS) {
|
||||
const tagWithLT = tag.startsWith('<') ? tag : '<' + tag;
|
||||
if (tagWithLT.startsWith(lowerTail)) {
|
||||
return lastLT;
|
||||
if (tag.name === name && tag.closing === closing) {
|
||||
return tag;
|
||||
}
|
||||
pos = tag.end + 1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
function findXMLCloseOutsideCDATA(s, closeTag, start) {
|
||||
const text = typeof s === 'string' ? s : '';
|
||||
const target = String(closeTag || '').toLowerCase();
|
||||
if (!text || !target) {
|
||||
return -1;
|
||||
}
|
||||
const lower = text.toLowerCase();
|
||||
for (let i = Math.max(0, start || 0); i < text.length;) {
|
||||
if (lower.startsWith('<![cdata[', i)) {
|
||||
const end = lower.indexOf(']]>', i + '<![cdata['.length);
|
||||
if (end < 0) {
|
||||
return -1;
|
||||
}
|
||||
i = end + ']]>'.length;
|
||||
continue;
|
||||
}
|
||||
if (lower.startsWith('<!--', i)) {
|
||||
const end = lower.indexOf('-->', i + '<!--'.length);
|
||||
if (end < 0) {
|
||||
return -1;
|
||||
}
|
||||
i = end + '-->'.length;
|
||||
continue;
|
||||
}
|
||||
if (lower.startsWith(target, i)) {
|
||||
return i;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
return -1;
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
consumeXMLToolCapture,
|
||||
hasOpenXMLToolTag,
|
||||
findPartialXMLToolTagStart,
|
||||
findPartialXMLToolTagStart: findPartialToolMarkupStart,
|
||||
};
|
||||
|
||||
@@ -6,8 +6,9 @@ const {
|
||||
} = require('./state');
|
||||
const { trimWrappingJSONFence } = require('./jsonscan');
|
||||
const {
|
||||
XML_TOOL_SEGMENT_TAGS,
|
||||
} = require('./tool-keywords');
|
||||
findToolMarkupTagOutsideIgnored,
|
||||
sanitizeLooseCDATA,
|
||||
} = require('./parse_payload');
|
||||
const {
|
||||
consumeXMLToolCapture: consumeXMLToolCaptureImpl,
|
||||
hasOpenXMLToolTag,
|
||||
@@ -117,8 +118,27 @@ function flushToolSieve(state, toolNames) {
|
||||
}
|
||||
} else if (state.capture) {
|
||||
const content = state.capture;
|
||||
noteText(state, content);
|
||||
events.push({ type: 'text', text: content });
|
||||
const recovered = sanitizeLooseCDATA(content);
|
||||
if (recovered !== content) {
|
||||
const recoveredResult = consumeXMLToolCaptureImpl(recovered, toolNames, trimWrappingJSONFence);
|
||||
if (recoveredResult.ready && Array.isArray(recoveredResult.calls) && recoveredResult.calls.length > 0) {
|
||||
if (recoveredResult.prefix) {
|
||||
noteText(state, recoveredResult.prefix);
|
||||
events.push({ type: 'text', text: recoveredResult.prefix });
|
||||
}
|
||||
events.push({ type: 'tool_calls', calls: recoveredResult.calls });
|
||||
if (recoveredResult.suffix) {
|
||||
noteText(state, recoveredResult.suffix);
|
||||
events.push({ type: 'text', text: recoveredResult.suffix });
|
||||
}
|
||||
} else {
|
||||
noteText(state, content);
|
||||
events.push({ type: 'text', text: content });
|
||||
}
|
||||
} else {
|
||||
noteText(state, content);
|
||||
events.push({ type: 'text', text: content });
|
||||
}
|
||||
}
|
||||
state.capture = '';
|
||||
state.capturing = false;
|
||||
@@ -155,26 +175,16 @@ function findToolSegmentStart(state, s) {
|
||||
if (!s) {
|
||||
return -1;
|
||||
}
|
||||
const lower = s.toLowerCase();
|
||||
let offset = 0;
|
||||
while (true) {
|
||||
// Only check XML tool tags.
|
||||
let bestIdx = -1;
|
||||
let matchedTag = '';
|
||||
for (const tag of XML_TOOL_SEGMENT_TAGS) {
|
||||
const idx = lower.indexOf(tag, offset);
|
||||
if (idx >= 0 && (bestIdx < 0 || idx < bestIdx)) {
|
||||
bestIdx = idx;
|
||||
matchedTag = tag;
|
||||
}
|
||||
}
|
||||
if (bestIdx < 0) {
|
||||
const tag = findToolMarkupTagOutsideIgnored(s, offset);
|
||||
if (!tag) {
|
||||
return -1;
|
||||
}
|
||||
if (!insideCodeFenceWithState(state, s.slice(0, bestIdx))) {
|
||||
return bestIdx;
|
||||
if (!insideCodeFenceWithState(state, s.slice(0, tag.start))) {
|
||||
return tag.start;
|
||||
}
|
||||
offset = bestIdx + matchedTag.length;
|
||||
offset = tag.end + 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,10 +3,14 @@
|
||||
const XML_TOOL_SEGMENT_TAGS = [
|
||||
'<|dsml|tool_calls>', '<|dsml|tool_calls\n', '<|dsml|tool_calls ',
|
||||
'<|dsml|invoke ', '<|dsml|invoke\n', '<|dsml|invoke\t', '<|dsml|invoke\r',
|
||||
'<|dsmltool_calls>', '<|dsmltool_calls\n', '<|dsmltool_calls ',
|
||||
'<|dsmlinvoke ', '<|dsmlinvoke\n', '<|dsmlinvoke\t', '<|dsmlinvoke\r',
|
||||
'<|dsml tool_calls>', '<|dsml tool_calls\n', '<|dsml tool_calls ',
|
||||
'<|dsml invoke ', '<|dsml invoke\n', '<|dsml invoke\t', '<|dsml invoke\r',
|
||||
'<dsml|tool_calls>', '<dsml|tool_calls\n', '<dsml|tool_calls ',
|
||||
'<dsml|invoke ', '<dsml|invoke\n', '<dsml|invoke\t', '<dsml|invoke\r',
|
||||
'<dsmltool_calls>', '<dsmltool_calls\n', '<dsmltool_calls ',
|
||||
'<dsmlinvoke ', '<dsmlinvoke\n', '<dsmlinvoke\t', '<dsmlinvoke\r',
|
||||
'<dsml tool_calls>', '<dsml tool_calls\n', '<dsml tool_calls ',
|
||||
'<dsml invoke ', '<dsml invoke\n', '<dsml invoke\t', '<dsml invoke\r',
|
||||
'<|tool_calls>', '<|tool_calls\n', '<|tool_calls ',
|
||||
@@ -19,8 +23,10 @@ const XML_TOOL_SEGMENT_TAGS = [
|
||||
|
||||
const XML_TOOL_OPENING_TAGS = [
|
||||
'<|dsml|tool_calls',
|
||||
'<|dsmltool_calls',
|
||||
'<|dsml tool_calls',
|
||||
'<dsml|tool_calls',
|
||||
'<dsmltool_calls',
|
||||
'<dsml tool_calls',
|
||||
'<|tool_calls',
|
||||
'<|tool_calls',
|
||||
@@ -29,8 +35,10 @@ const XML_TOOL_OPENING_TAGS = [
|
||||
|
||||
const XML_TOOL_CLOSING_TAGS = [
|
||||
'</|dsml|tool_calls>',
|
||||
'</|dsmltool_calls>',
|
||||
'</|dsml tool_calls>',
|
||||
'</dsml|tool_calls>',
|
||||
'</dsmltool_calls>',
|
||||
'</dsml tool_calls>',
|
||||
'</|tool_calls>',
|
||||
'</|tool_calls>',
|
||||
|
||||
Reference in New Issue
Block a user