refactor: remove JSON-based tool call parsing from sieve and delete associated compatibility tests

This commit is contained in:
CJACK
2026-04-19 13:39:47 +08:00
parent 0c644d1f4d
commit c945f49fc4
26 changed files with 91 additions and 934 deletions

View File

@@ -4,15 +4,10 @@ const {
toStringSafe,
} = require('./state');
const {
buildToolCallCandidates,
parseToolCallsPayload,
parseMarkupToolCalls,
parseTextKVToolCalls,
stripFencedCodeBlocks,
} = require('./parse_payload');
const { TOOL_SEGMENT_KEYWORDS } = require('./tool-keywords');
const TOOL_NAME_LOOSE_PATTERN = /[^a-z0-9]+/g;
const TOOL_MARKUP_PREFIXES = ['<tool_call', '<function_call', '<invoke'];
function extractToolNames(tools) {
@@ -51,47 +46,12 @@ function parseToolCallsDetailed(text, toolNames) {
return result;
}
const candidates = buildToolCallCandidates(normalized);
for (const c of candidates) {
if (!isLikelyJSONToolPayloadCandidate(c)) {
continue;
}
const jsonParsed = parseToolCallsPayload(c);
if (jsonParsed.length === 0) {
continue;
}
result.sawToolCallSyntax = true;
const filteredJSON = filterToolCallsDetailed(jsonParsed, toolNames);
result.calls = filteredJSON.calls;
result.rejectedToolNames = filteredJSON.rejectedToolNames;
result.rejectedByPolicy = filteredJSON.rejectedToolNames.length > 0 && filteredJSON.calls.length === 0;
// XML markup parsing only.
const parsed = parseMarkupToolCalls(normalized);
if (parsed.length === 0) {
return result;
}
let parsed = [];
for (const c of candidates) {
parsed = parseMarkupToolCalls(c);
if (parsed.length === 0) {
parsed = parseToolCallsPayload(c);
}
if (parsed.length === 0) {
parsed = parseTextKVToolCalls(c);
}
if (parsed.length > 0) {
result.sawToolCallSyntax = true;
break;
}
}
if (parsed.length === 0) {
parsed = parseMarkupToolCalls(normalized);
if (parsed.length === 0) {
parsed = parseTextKVToolCalls(normalized);
if (parsed.length === 0) {
return result;
}
}
result.sawToolCallSyntax = true;
}
result.sawToolCallSyntax = true;
const filtered = filterToolCallsDetailed(parsed, toolNames);
result.calls = filtered.calls;
result.rejectedToolNames = filtered.rejectedToolNames;
@@ -113,43 +73,11 @@ function parseStandaloneToolCallsDetailed(text, toolNames) {
if (shouldSkipToolCallParsingForCodeFenceExample(trimmed)) {
return result;
}
const candidates = buildToolCallCandidates(trimmed);
let parsed = [];
for (const c of candidates) {
if (!isLikelyJSONToolPayloadCandidate(c)) {
continue;
}
parsed = parseToolCallsPayload(c);
if (parsed.length === 0) {
continue;
}
result.sawToolCallSyntax = true;
const filteredJSON = filterToolCallsDetailed(parsed, toolNames);
result.calls = filteredJSON.calls;
result.rejectedToolNames = filteredJSON.rejectedToolNames;
result.rejectedByPolicy = filteredJSON.rejectedToolNames.length > 0 && filteredJSON.calls.length === 0;
return result;
}
for (const c of candidates) {
parsed = parseMarkupToolCalls(c);
if (parsed.length === 0) {
parsed = parseToolCallsPayload(c);
}
if (parsed.length === 0) {
parsed = parseTextKVToolCalls(c);
}
if (parsed.length > 0) {
break;
}
}
// XML markup parsing only.
const parsed = parseMarkupToolCalls(trimmed);
if (parsed.length === 0) {
parsed = parseMarkupToolCalls(trimmed);
if (parsed.length === 0) {
parsed = parseTextKVToolCalls(trimmed);
if (parsed.length === 0) {
return result;
}
}
return result;
}
result.sawToolCallSyntax = true;
@@ -183,41 +111,9 @@ function filterToolCallsDetailed(parsed, toolNames) {
return { calls, rejectedToolNames: [] };
}
function resolveAllowedToolName(name, allowed, allowedCanonical) {
const normalizedName = toStringSafe(name).trim();
if (!normalizedName) {
return '';
}
if (allowed.has(normalizedName)) {
return normalizedName;
}
const lower = normalizedName.toLowerCase();
if (allowedCanonical.has(lower)) {
return allowedCanonical.get(lower);
}
const idx = lower.lastIndexOf('.');
if (idx >= 0 && idx < lower.length - 1) {
const tail = lower.slice(idx + 1);
if (allowedCanonical.has(tail)) {
return allowedCanonical.get(tail);
}
}
const loose = lower.replace(TOOL_NAME_LOOSE_PATTERN, '');
if (!loose) {
return '';
}
for (const [candidateLower, canonical] of allowedCanonical.entries()) {
if (candidateLower.replace(TOOL_NAME_LOOSE_PATTERN, '') === loose) {
return canonical;
}
}
return '';
}
function looksLikeToolCallSyntax(text) {
const lower = toStringSafe(text).toLowerCase();
return TOOL_SEGMENT_KEYWORDS.some((kw) => lower.includes(kw))
|| TOOL_MARKUP_PREFIXES.some((prefix) => lower.includes(prefix));
return TOOL_MARKUP_PREFIXES.some((prefix) => lower.includes(prefix));
}
function shouldSkipToolCallParsingForCodeFenceExample(text) {
@@ -228,21 +124,6 @@ function shouldSkipToolCallParsingForCodeFenceExample(text) {
return !looksLikeToolCallSyntax(stripped);
}
function isLikelyJSONToolPayloadCandidate(text) {
const trimmed = toStringSafe(text).trim();
if (!trimmed) {
return false;
}
if (!(trimmed.startsWith('{') || trimmed.startsWith('['))) {
return false;
}
const lower = trimmed.toLowerCase();
return lower.includes('tool_calls')
|| lower.includes('"function"')
|| lower.includes('functioncall')
|| lower.includes('"tool_use"');
}
module.exports = {
extractToolNames,
parseToolCalls,

View File

@@ -1,6 +1,5 @@
'use strict';
const TOOL_CALL_PATTERN = /\{\s*["']tool_calls["']\s*:\s*\[(.*?)\]\s*\}/s;
const TOOL_CALL_MARKUP_BLOCK_PATTERN = /<(?:[a-z0-9_:-]+:)?(tool_call|function_call|invoke)\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi;
const TOOL_CALL_MARKUP_SELFCLOSE_PATTERN = /<(?:[a-z0-9_:-]+:)?invoke\b([^>]*)\/>/gi;
const TOOL_CALL_MARKUP_KV_PATTERN = /<(?:[a-z0-9_:-]+:)?([a-z0-9_.-]+)\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi;
@@ -20,14 +19,10 @@ const TOOL_CALL_MARKUP_ARGS_PATTERNS = [
/<(?:[a-z0-9_:-]+:)?args\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?args>/i,
/<(?:[a-z0-9_:-]+:)?params\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?params>/i,
];
const TEXT_KV_NAME_PATTERN = /function\.name:\s*([a-zA-Z0-9_.-]+)/gi;
const {
toStringSafe,
} = require('./state');
const {
extractJSONObjectFrom,
} = require('./jsonscan');
function stripFencedCodeBlocks(text) {
const t = typeof text === 'string' ? text : '';
@@ -37,138 +32,6 @@ function stripFencedCodeBlocks(text) {
return t.replace(/```[\s\S]*?```/g, ' ');
}
function buildToolCallCandidates(text) {
const trimmed = toStringSafe(text);
const candidates = [trimmed];
const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)\s*```/gi) || [];
for (const block of fenced) {
const m = block.match(/```(?:json)?\s*([\s\S]*?)\s*```/i);
if (m && m[1]) {
candidates.push(toStringSafe(m[1]));
}
}
for (const candidate of extractToolCallObjects(trimmed)) {
candidates.push(toStringSafe(candidate));
}
const first = trimmed.indexOf('{');
const last = trimmed.lastIndexOf('}');
if (first >= 0 && last > first) {
candidates.push(toStringSafe(trimmed.slice(first, last + 1)));
}
const firstArr = trimmed.indexOf('[');
const lastArr = trimmed.lastIndexOf(']');
if (firstArr >= 0 && lastArr > firstArr) {
candidates.push(toStringSafe(trimmed.slice(firstArr, lastArr + 1)));
}
const m = trimmed.match(TOOL_CALL_PATTERN);
if (m && m[1]) {
candidates.push(`{"tool_calls":[${m[1]}]}`);
}
return [...new Set(candidates.filter(Boolean))];
}
function extractToolCallObjects(text) {
const raw = toStringSafe(text);
if (!raw) {
return [];
}
const lower = raw.toLowerCase();
const out = [];
let offset = 0;
// eslint-disable-next-line no-constant-condition
while (true) {
const idxToolCalls = lower.indexOf('tool_calls', offset);
const idxFunction = lower.indexOf('"function"', offset);
const idxFunctionCall = lower.indexOf('functioncall', offset);
const idxToolUse = lower.indexOf('"tool_use"', offset);
let idx = -1;
let matched = '';
if (idxToolCalls >= 0 && (idxFunction < 0 || idxToolCalls <= idxFunction)) {
idx = idxToolCalls;
matched = 'tool_calls';
} else if (idxFunction >= 0) {
idx = idxFunction;
matched = '"function"';
}
if (idxFunctionCall >= 0 && (idx < 0 || idxFunctionCall < idx)) {
idx = idxFunctionCall;
matched = 'functioncall';
}
if (idxToolUse >= 0 && (idx < 0 || idxToolUse < idx)) {
idx = idxToolUse;
matched = '"tool_use"';
}
if (idx < 0) {
break;
}
let start = raw.slice(0, idx).lastIndexOf('{');
while (start >= 0) {
const obj = extractJSONObjectFrom(raw, start);
if (obj.ok) {
out.push(raw.slice(start, obj.end).trim());
// Ensure forward progress even when the matched keyword is outside
// the extracted JSON object (e.g. closing XML wrapper tags containing
// "tool_calls" after an earlier JSON arguments object).
offset = Math.max(obj.end, idx + matched.length);
idx = -1;
break;
}
start = raw.slice(0, start).lastIndexOf('{');
}
if (idx >= 0) {
offset = idx + matched.length;
}
}
return out;
}
function parseToolCallsPayload(payload) {
let decoded;
try {
decoded = JSON.parse(payload);
} catch (_err) {
return [];
}
if (Array.isArray(decoded)) {
return parseToolCallList(decoded);
}
if (!decoded || typeof decoded !== 'object') {
return [];
}
if (decoded.tool_calls) {
if (isLikelyChatMessageEnvelope(decoded)) {
return [];
}
return parseToolCallList(decoded.tool_calls);
}
const one = parseToolCallItem(decoded);
return one ? [one] : [];
}
function isLikelyChatMessageEnvelope(value) {
if (!value || typeof value !== 'object' || Array.isArray(value)) {
return false;
}
if (!Object.prototype.hasOwnProperty.call(value, 'tool_calls')) {
return false;
}
const role = toStringSafe(value.role).trim().toLowerCase();
if (role === 'assistant' || role === 'tool' || role === 'user' || role === 'system') {
return true;
}
return Object.prototype.hasOwnProperty.call(value, 'tool_call_id')
|| Object.prototype.hasOwnProperty.call(value, 'content');
}
function parseMarkupToolCalls(text) {
const raw = toStringSafe(text).trim();
if (!raw) {
@@ -190,51 +53,20 @@ function parseMarkupToolCalls(text) {
return out;
}
function parseTextKVToolCalls(text) {
const raw = toStringSafe(text);
if (!raw) {
return [];
}
const out = [];
const matches = [...raw.matchAll(TEXT_KV_NAME_PATTERN)];
if (matches.length === 0) {
return out;
}
for (let i = 0; i < matches.length; i += 1) {
const match = matches[i];
const name = toStringSafe(match[1]).trim();
if (!name) {
continue;
}
const nameEnd = match.index + toStringSafe(match[0]).length;
const searchEnd = i + 1 < matches.length ? matches[i + 1].index : raw.length;
const searchArea = raw.slice(nameEnd, searchEnd);
const argIdx = searchArea.indexOf('function.arguments:');
if (argIdx < 0) {
continue;
}
const argStart = nameEnd + argIdx + 'function.arguments:'.length;
const bracePos = raw.slice(argStart, searchEnd).indexOf('{');
if (bracePos < 0) {
continue;
}
const objStart = argStart + bracePos;
const obj = extractJSONObjectFrom(raw, objStart);
if (!obj.ok) {
continue;
}
out.push({
name,
input: parseToolCallInput(raw.slice(objStart, obj.end)),
});
}
return out;
}
function parseMarkupSingleToolCall(attrs, inner) {
const embedded = parseToolCallsPayload(inner);
if (embedded.length > 0) {
return embedded[0];
// Try inline JSON parse for the inner content.
if (inner) {
try {
const decoded = JSON.parse(inner);
if (decoded && typeof decoded === 'object' && !Array.isArray(decoded) && decoded.name) {
return {
name: toStringSafe(decoded.name),
input: decoded.input && typeof decoded.input === 'object' && !Array.isArray(decoded.input) ? decoded.input : {},
};
}
} catch (_err) {
// Not JSON, continue with markup parsing.
}
}
let name = '';
const attrMatch = attrs.match(TOOL_CALL_MARKUP_ATTR_PATTERN);
@@ -316,73 +148,6 @@ function findMarkupTagValue(text, patterns) {
return '';
}
function parseToolCallList(v) {
if (!Array.isArray(v)) {
return [];
}
const out = [];
for (const item of v) {
if (!item || typeof item !== 'object') {
continue;
}
const one = parseToolCallItem(item);
if (one) {
out.push(one);
}
}
return out;
}
function parseToolCallItem(m) {
let name = toStringSafe(m.name);
let inputRaw = m.input;
let hasInput = Object.prototype.hasOwnProperty.call(m, 'input');
const fnCall = m.functionCall && typeof m.functionCall === 'object' ? m.functionCall : null;
if (fnCall) {
if (!name) {
name = toStringSafe(fnCall.name);
}
if (!hasInput && Object.prototype.hasOwnProperty.call(fnCall, 'args')) {
inputRaw = fnCall.args;
hasInput = true;
}
if (!hasInput && Object.prototype.hasOwnProperty.call(fnCall, 'arguments')) {
inputRaw = fnCall.arguments;
hasInput = true;
}
}
const fn = m.function && typeof m.function === 'object' ? m.function : null;
if (fn) {
if (!name) {
name = toStringSafe(fn.name);
}
if (!hasInput && Object.prototype.hasOwnProperty.call(fn, 'arguments')) {
inputRaw = fn.arguments;
hasInput = true;
}
}
if (!hasInput) {
for (const k of ['arguments', 'args', 'parameters', 'params']) {
if (Object.prototype.hasOwnProperty.call(m, k)) {
inputRaw = m[k];
hasInput = true;
break;
}
}
}
if (!name) {
return null;
}
return {
name,
input: parseToolCallInput(inputRaw),
};
}
function parseToolCallInput(v) {
if (v == null) {
return {};
@@ -418,8 +183,5 @@ function parseToolCallInput(v) {
module.exports = {
stripFencedCodeBlocks,
buildToolCallCandidates,
parseToolCallsPayload,
parseMarkupToolCalls,
parseTextKVToolCalls,
};

View File

@@ -4,12 +4,9 @@ const {
noteText,
insideCodeFenceWithState,
} = require('./state');
const { parseStandaloneToolCallsDetailed } = require('./parse');
const { extractJSONObjectFrom, trimWrappingJSONFence } = require('./jsonscan');
const { trimWrappingJSONFence } = require('./jsonscan');
const {
TOOL_SEGMENT_KEYWORDS,
XML_TOOL_SEGMENT_TAGS,
earliestKeywordIndex,
} = require('./tool-keywords');
const {
consumeXMLToolCapture: consumeXMLToolCaptureImpl,
@@ -141,30 +138,15 @@ function splitSafeContentForToolDetection(s) {
if (!text) {
return ['', ''];
}
const suspiciousStart = findSuspiciousPrefixStart(text);
if (suspiciousStart < 0) {
return [text, ''];
}
if (suspiciousStart > 0) {
return [text.slice(0, suspiciousStart), text.slice(suspiciousStart)];
}
return ['', text];
}
function findSuspiciousPrefixStart(s) {
let start = -1;
for (const needle of ['{', '[', '```']) {
const idx = s.lastIndexOf(needle);
if (idx > start) {
start = idx;
// Only hold back partial XML tool tags.
const xmlIdx = findPartialXMLToolTagStart(text);
if (xmlIdx >= 0) {
if (xmlIdx > 0) {
return [text.slice(0, xmlIdx), text.slice(xmlIdx)];
}
return ['', text];
}
// Also check for partial XML tool tag at end of string.
const xmlIdx = findPartialXMLToolTagStart(s);
if (xmlIdx >= 0 && xmlIdx > start) {
start = xmlIdx;
}
return start;
return [text, ''];
}
function findToolSegmentStart(state, s) {
@@ -174,39 +156,23 @@ function findToolSegmentStart(state, s) {
const lower = s.toLowerCase();
let offset = 0;
while (true) {
// Check JSON keywords.
let { index: bestKeyIdx, keyword: matchedKeyword } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS, offset);
// Also check XML tool tags.
// Only check XML tool tags.
let bestIdx = -1;
let matchedTag = '';
for (const tag of XML_TOOL_SEGMENT_TAGS) {
const idx = lower.indexOf(tag, offset);
if (idx >= 0 && (bestKeyIdx < 0 || idx < bestKeyIdx)) {
bestKeyIdx = idx;
matchedKeyword = tag;
if (idx >= 0 && (bestIdx < 0 || idx < bestIdx)) {
bestIdx = idx;
matchedTag = tag;
}
}
if (bestKeyIdx < 0) {
if (bestIdx < 0) {
return -1;
}
// For XML tags, the '<' is itself the segment start.
if (s[bestKeyIdx] === '<') {
if (!insideCodeFenceWithState(state, s.slice(0, bestKeyIdx))) {
return bestKeyIdx;
}
offset = bestKeyIdx + matchedKeyword.length;
continue;
if (!insideCodeFenceWithState(state, s.slice(0, bestIdx))) {
return bestIdx;
}
const keyIdx = bestKeyIdx;
const start = s.slice(0, keyIdx).lastIndexOf('{');
let candidateStart = start >= 0 ? start : keyIdx;
// If the keyword matched inside an XML tag (e.g. "tool_calls" in "<tool_calls>"),
// back up past the '<' to capture the full tag.
if (candidateStart > 0 && s[candidateStart - 1] === '<') {
candidateStart--;
}
if (!insideCodeFenceWithState(state, s.slice(0, candidateStart))) {
return candidateStart;
}
offset = keyIdx + matchedKeyword.length;
offset = bestIdx + matchedTag.length;
}
}
@@ -216,7 +182,7 @@ function consumeToolCapture(state, toolNames) {
return { ready: false, prefix: '', calls: [], suffix: '' };
}
// Try XML tool call extraction first.
// XML-only tool call extraction.
const xmlResult = consumeXMLToolCaptureImpl(captured, toolNames, trimWrappingJSONFence);
if (xmlResult.ready) {
return xmlResult;
@@ -226,50 +192,12 @@ function consumeToolCapture(state, toolNames) {
return { ready: false, prefix: '', calls: [], suffix: '' };
}
const lower = captured.toLowerCase();
const { index: keyIdx } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS);
if (keyIdx < 0) {
return { ready: false, prefix: '', calls: [], suffix: '' };
}
const start = captured.slice(0, keyIdx).lastIndexOf('{');
const actualStart = start >= 0 ? start : keyIdx;
const obj = extractJSONObjectFrom(captured, actualStart);
if (!obj.ok) {
return { ready: false, prefix: '', calls: [], suffix: '' };
}
const prefixPart = captured.slice(0, actualStart);
const suffixPart = captured.slice(obj.end);
if (insideCodeFenceWithState(state, prefixPart)) {
return {
ready: true,
prefix: captured,
calls: [],
suffix: '',
};
}
const parsed = parseStandaloneToolCallsDetailed(captured.slice(actualStart, obj.end), toolNames);
if (!Array.isArray(parsed.calls) || parsed.calls.length === 0) {
if (parsed.sawToolCallSyntax && parsed.rejectedByPolicy) {
return {
ready: true,
prefix: prefixPart,
calls: [],
suffix: suffixPart,
};
}
return {
ready: true,
prefix: captured,
calls: [],
suffix: '',
};
}
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
// No XML tool tags detected — release captured content as text.
return {
ready: true,
prefix: trimmedFence.prefix,
calls: parsed.calls,
suffix: trimmedFence.suffix,
prefix: captured,
calls: [],
suffix: '',
};
}

View File

@@ -1,15 +1,7 @@
'use strict';
const TOOL_SEGMENT_KEYWORDS = [
'tool_calls',
'"function"',
'function.name:',
'functioncall',
'"tool_use"',
];
const XML_TOOL_SEGMENT_TAGS = [
'<tool_calls>', '<tool_calls\n', '<tool_call>', '<tool_call\n',
'<tool_calls>', '<tool_calls\n', '<tool_calls ', '<tool_call>', '<tool_call\n', '<tool_call ',
'<invoke ', '<invoke>', '<function_call', '<function_calls', '<tool_use>',
];
@@ -21,26 +13,9 @@ const XML_TOOL_CLOSING_TAGS = [
'</tool_calls>', '</tool_call>', '</invoke>', '</function_call>', '</function_calls>', '</tool_use>',
];
function earliestKeywordIndex(text, keywords = TOOL_SEGMENT_KEYWORDS, offset = 0) {
if (!text) {
return { index: -1, keyword: '' };
}
let index = -1;
let keyword = '';
for (const kw of keywords) {
const candidate = text.indexOf(kw, offset);
if (candidate >= 0 && (index < 0 || candidate < index)) {
index = candidate;
keyword = kw;
}
}
return { index, keyword };
}
module.exports = {
TOOL_SEGMENT_KEYWORDS,
XML_TOOL_SEGMENT_TAGS,
XML_TOOL_OPENING_TAGS,
XML_TOOL_CLOSING_TAGS,
earliestKeywordIndex,
};