Tighten XML tool call parsing and upstream empty handling

This commit is contained in:
CJACK
2026-04-26 01:17:16 +08:00
parent a44afb335a
commit 1b0e8cbadb
37 changed files with 273 additions and 991 deletions

View File

@@ -8,7 +8,7 @@ const {
stripFencedCodeBlocks,
} = require('./parse_payload');
const TOOL_MARKUP_PREFIXES = ['<tool_call', '<function_call', '<invoke'];
const TOOL_MARKUP_PREFIXES = ['<tools', '<tool_call'];
function extractToolNames(tools) {
if (!Array.isArray(tools) || tools.length === 0) {
@@ -45,7 +45,6 @@ function parseToolCallsDetailed(text, toolNames) {
if (shouldSkipToolCallParsingForCodeFenceExample(normalized)) {
return result;
}
// XML markup parsing only.
const parsed = parseMarkupToolCalls(normalized);
if (parsed.length === 0) {
@@ -73,7 +72,6 @@ function parseStandaloneToolCallsDetailed(text, toolNames) {
if (shouldSkipToolCallParsingForCodeFenceExample(trimmed)) {
return result;
}
// XML markup parsing only.
const parsed = parseMarkupToolCalls(trimmed);
if (parsed.length === 0) {

View File

@@ -1,26 +1,10 @@
'use strict';
const TOOL_CALL_MARKUP_BLOCK_PATTERN = /<(?:[a-z0-9_:-]+:)?(tool_call|function_call|invoke)\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi;
const TOOL_CALL_MARKUP_SELFCLOSE_PATTERN = /<(?:[a-z0-9_:-]+:)?invoke\b([^>]*)\/>/gi;
const TOOLS_WRAPPER_PATTERN = /<tools\b[^>]*>([\s\S]*?)<\/tools>/gi;
const TOOL_CALL_MARKUP_BLOCK_PATTERN = /<(?:[a-z0-9_:-]+:)?tool_call\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?tool_call>/gi;
const TOOL_CALL_CANONICAL_BODY_PATTERN = /^\s*<(?:[a-z0-9_:-]+:)?tool_name\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?tool_name>\s*<(?:[a-z0-9_:-]+:)?param\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?param>\s*$/i;
const TOOL_CALL_MARKUP_KV_PATTERN = /<(?:[a-z0-9_:-]+:)?([a-z0-9_.-]+)\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi;
const TOOL_CALL_MARKUP_ATTR_PATTERN = /(name|function|tool)\s*=\s*"([^"]+)"/i;
const TOOL_CALL_MARKUP_NAME_PATTERNS = [
/<(?:[a-z0-9_:-]+:)?tool_name\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?tool_name>/i,
/<(?:[a-z0-9_:-]+:)?function_name\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?function_name>/i,
/<(?:[a-z0-9_:-]+:)?name\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?name>/i,
/<(?:[a-z0-9_:-]+:)?function\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?function>/i,
];
const TOOL_CALL_MARKUP_ARGS_PATTERNS = [
/<(?:[a-z0-9_:-]+:)?input\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?input>/i,
/<(?:[a-z0-9_:-]+:)?arguments\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?arguments>/i,
/<(?:[a-z0-9_:-]+:)?argument\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?argument>/i,
/<(?:[a-z0-9_:-]+:)?parameters\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?parameters>/i,
/<(?:[a-z0-9_:-]+:)?parameter\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?parameter>/i,
/<(?:[a-z0-9_:-]+:)?args\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?args>/i,
/<(?:[a-z0-9_:-]+:)?params\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?params>/i,
];
const CDATA_PATTERN = /^<!\[CDATA\[([\s\S]*?)]]>$/i;
const HTML_ENTITIES_PATTERN = /&[a-z0-9#]+;/gi;
const {
toStringSafe,
@@ -40,22 +24,19 @@ function parseMarkupToolCalls(text) {
return [];
}
const out = [];
for (const m of raw.matchAll(TOOL_CALL_MARKUP_BLOCK_PATTERN)) {
const parsed = parseMarkupSingleToolCall(toStringSafe(m[2]).trim(), toStringSafe(m[3]).trim());
if (parsed) {
out.push(parsed);
}
}
for (const m of raw.matchAll(TOOL_CALL_MARKUP_SELFCLOSE_PATTERN)) {
const parsed = parseMarkupSingleToolCall(toStringSafe(m[1]).trim(), '');
if (parsed) {
out.push(parsed);
for (const wrapper of raw.matchAll(TOOLS_WRAPPER_PATTERN)) {
const body = toStringSafe(wrapper[1]);
for (const block of body.matchAll(TOOL_CALL_MARKUP_BLOCK_PATTERN)) {
const parsed = parseMarkupSingleToolCall(toStringSafe(block[1]).trim());
if (parsed) {
out.push(parsed);
}
}
}
return out;
}
function parseMarkupSingleToolCall(attrs, inner) {
function parseMarkupSingleToolCall(inner) {
// Try inline JSON parse for the inner content.
if (inner) {
try {
@@ -70,28 +51,18 @@ function parseMarkupSingleToolCall(attrs, inner) {
// Not JSON, continue with markup parsing.
}
}
let name = '';
const attrMatch = attrs.match(TOOL_CALL_MARKUP_ATTR_PATTERN);
if (attrMatch && attrMatch[2]) {
name = toStringSafe(attrMatch[2]).trim();
}
if (!name) {
name = extractRawTagValue(findMarkupTagValue(inner, TOOL_CALL_MARKUP_NAME_PATTERNS));
const match = inner.match(TOOL_CALL_CANONICAL_BODY_PATTERN);
if (!match || match.length < 3) {
return null;
}
const name = extractRawTagValue(match[1]).trim();
if (!name) {
return null;
}
let input = {};
const argsRaw = findMarkupTagValue(inner, TOOL_CALL_MARKUP_ARGS_PATTERNS);
if (argsRaw) {
input = parseMarkupInput(argsRaw);
} else {
const kv = parseMarkupKVObject(inner);
if (Object.keys(kv).length > 0) {
input = kv;
}
}
const input = parseMarkupInput(match[2]);
return { name, input };
}
@@ -187,21 +158,6 @@ function unescapeHtml(safe) {
.replace(/&#x27;/g, "'");
}
function stripTagText(text) {
return toStringSafe(text).replace(/<[^>]+>/g, ' ').trim();
}
function findMarkupTagValue(text, patterns) {
const source = toStringSafe(text);
for (const p of patterns) {
const m = source.match(p);
if (m && m[1] !== undefined) {
return toStringSafe(m[1]);
}
}
return '';
}
function parseToolCallInput(v) {
if (v == null) {
return {};

View File

@@ -3,12 +3,8 @@ const { parseToolCalls } = require('./parse');
// Tag pairs ordered longest-first: wrapper tags checked before inner tags.
const XML_TOOL_TAG_PAIRS = [
{ open: '<tool_calls', close: '</tool_calls>' },
{ open: '<tools', close: '</tools>' },
{ open: '<tool_call', close: '</tool_call>' },
{ open: '<function_calls', close: '</function_calls>' },
{ open: '<function_call', close: '</function_call>' },
{ open: '<invoke', close: '</invoke>' },
{ open: '<tool_use', close: '</tool_use>' },
];
const XML_TOOL_OPENING_TAGS = XML_TOOL_TAG_PAIRS.map(p => p.open);

View File

@@ -1,16 +1,15 @@
'use strict';
const XML_TOOL_SEGMENT_TAGS = [
'<tool_calls>', '<tool_calls\n', '<tool_calls ', '<tool_call>', '<tool_call\n', '<tool_call ',
'<invoke ', '<invoke>', '<function_call', '<function_calls', '<tool_use>',
'<tools>', '<tools\n', '<tools ', '<tool_call>', '<tool_call\n', '<tool_call ',
];
const XML_TOOL_OPENING_TAGS = [
'<tool_calls', '<tool_call', '<invoke', '<function_call', '<function_calls', '<tool_use',
'<tools', '<tool_call',
];
const XML_TOOL_CLOSING_TAGS = [
'</tool_calls>', '</tool_call>', '</invoke>', '</function_call>', '</function_calls>', '</tool_use>',
'</tools>', '</tool_call>',
];
module.exports = {
@@ -18,4 +17,3 @@ module.exports = {
XML_TOOL_OPENING_TAGS,
XML_TOOL_CLOSING_TAGS,
};