mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-07 01:45:27 +08:00
refactor: remove JSON-based tool call parsing from sieve and delete associated compatibility tests
This commit is contained in:
@@ -4,15 +4,10 @@ const {
|
||||
toStringSafe,
|
||||
} = require('./state');
|
||||
const {
|
||||
buildToolCallCandidates,
|
||||
parseToolCallsPayload,
|
||||
parseMarkupToolCalls,
|
||||
parseTextKVToolCalls,
|
||||
stripFencedCodeBlocks,
|
||||
} = require('./parse_payload');
|
||||
const { TOOL_SEGMENT_KEYWORDS } = require('./tool-keywords');
|
||||
|
||||
const TOOL_NAME_LOOSE_PATTERN = /[^a-z0-9]+/g;
|
||||
const TOOL_MARKUP_PREFIXES = ['<tool_call', '<function_call', '<invoke'];
|
||||
|
||||
function extractToolNames(tools) {
|
||||
@@ -51,47 +46,12 @@ function parseToolCallsDetailed(text, toolNames) {
|
||||
return result;
|
||||
}
|
||||
|
||||
const candidates = buildToolCallCandidates(normalized);
|
||||
for (const c of candidates) {
|
||||
if (!isLikelyJSONToolPayloadCandidate(c)) {
|
||||
continue;
|
||||
}
|
||||
const jsonParsed = parseToolCallsPayload(c);
|
||||
if (jsonParsed.length === 0) {
|
||||
continue;
|
||||
}
|
||||
result.sawToolCallSyntax = true;
|
||||
const filteredJSON = filterToolCallsDetailed(jsonParsed, toolNames);
|
||||
result.calls = filteredJSON.calls;
|
||||
result.rejectedToolNames = filteredJSON.rejectedToolNames;
|
||||
result.rejectedByPolicy = filteredJSON.rejectedToolNames.length > 0 && filteredJSON.calls.length === 0;
|
||||
// XML markup parsing only.
|
||||
const parsed = parseMarkupToolCalls(normalized);
|
||||
if (parsed.length === 0) {
|
||||
return result;
|
||||
}
|
||||
let parsed = [];
|
||||
for (const c of candidates) {
|
||||
parsed = parseMarkupToolCalls(c);
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseToolCallsPayload(c);
|
||||
}
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseTextKVToolCalls(c);
|
||||
}
|
||||
if (parsed.length > 0) {
|
||||
result.sawToolCallSyntax = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseMarkupToolCalls(normalized);
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseTextKVToolCalls(normalized);
|
||||
if (parsed.length === 0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
result.sawToolCallSyntax = true;
|
||||
}
|
||||
|
||||
result.sawToolCallSyntax = true;
|
||||
const filtered = filterToolCallsDetailed(parsed, toolNames);
|
||||
result.calls = filtered.calls;
|
||||
result.rejectedToolNames = filtered.rejectedToolNames;
|
||||
@@ -113,43 +73,11 @@ function parseStandaloneToolCallsDetailed(text, toolNames) {
|
||||
if (shouldSkipToolCallParsingForCodeFenceExample(trimmed)) {
|
||||
return result;
|
||||
}
|
||||
const candidates = buildToolCallCandidates(trimmed);
|
||||
let parsed = [];
|
||||
for (const c of candidates) {
|
||||
if (!isLikelyJSONToolPayloadCandidate(c)) {
|
||||
continue;
|
||||
}
|
||||
parsed = parseToolCallsPayload(c);
|
||||
if (parsed.length === 0) {
|
||||
continue;
|
||||
}
|
||||
result.sawToolCallSyntax = true;
|
||||
const filteredJSON = filterToolCallsDetailed(parsed, toolNames);
|
||||
result.calls = filteredJSON.calls;
|
||||
result.rejectedToolNames = filteredJSON.rejectedToolNames;
|
||||
result.rejectedByPolicy = filteredJSON.rejectedToolNames.length > 0 && filteredJSON.calls.length === 0;
|
||||
return result;
|
||||
}
|
||||
for (const c of candidates) {
|
||||
parsed = parseMarkupToolCalls(c);
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseToolCallsPayload(c);
|
||||
}
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseTextKVToolCalls(c);
|
||||
}
|
||||
if (parsed.length > 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// XML markup parsing only.
|
||||
const parsed = parseMarkupToolCalls(trimmed);
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseMarkupToolCalls(trimmed);
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseTextKVToolCalls(trimmed);
|
||||
if (parsed.length === 0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
result.sawToolCallSyntax = true;
|
||||
@@ -183,41 +111,9 @@ function filterToolCallsDetailed(parsed, toolNames) {
|
||||
return { calls, rejectedToolNames: [] };
|
||||
}
|
||||
|
||||
function resolveAllowedToolName(name, allowed, allowedCanonical) {
|
||||
const normalizedName = toStringSafe(name).trim();
|
||||
if (!normalizedName) {
|
||||
return '';
|
||||
}
|
||||
if (allowed.has(normalizedName)) {
|
||||
return normalizedName;
|
||||
}
|
||||
const lower = normalizedName.toLowerCase();
|
||||
if (allowedCanonical.has(lower)) {
|
||||
return allowedCanonical.get(lower);
|
||||
}
|
||||
const idx = lower.lastIndexOf('.');
|
||||
if (idx >= 0 && idx < lower.length - 1) {
|
||||
const tail = lower.slice(idx + 1);
|
||||
if (allowedCanonical.has(tail)) {
|
||||
return allowedCanonical.get(tail);
|
||||
}
|
||||
}
|
||||
const loose = lower.replace(TOOL_NAME_LOOSE_PATTERN, '');
|
||||
if (!loose) {
|
||||
return '';
|
||||
}
|
||||
for (const [candidateLower, canonical] of allowedCanonical.entries()) {
|
||||
if (candidateLower.replace(TOOL_NAME_LOOSE_PATTERN, '') === loose) {
|
||||
return canonical;
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
function looksLikeToolCallSyntax(text) {
|
||||
const lower = toStringSafe(text).toLowerCase();
|
||||
return TOOL_SEGMENT_KEYWORDS.some((kw) => lower.includes(kw))
|
||||
|| TOOL_MARKUP_PREFIXES.some((prefix) => lower.includes(prefix));
|
||||
return TOOL_MARKUP_PREFIXES.some((prefix) => lower.includes(prefix));
|
||||
}
|
||||
|
||||
function shouldSkipToolCallParsingForCodeFenceExample(text) {
|
||||
@@ -228,21 +124,6 @@ function shouldSkipToolCallParsingForCodeFenceExample(text) {
|
||||
return !looksLikeToolCallSyntax(stripped);
|
||||
}
|
||||
|
||||
function isLikelyJSONToolPayloadCandidate(text) {
|
||||
const trimmed = toStringSafe(text).trim();
|
||||
if (!trimmed) {
|
||||
return false;
|
||||
}
|
||||
if (!(trimmed.startsWith('{') || trimmed.startsWith('['))) {
|
||||
return false;
|
||||
}
|
||||
const lower = trimmed.toLowerCase();
|
||||
return lower.includes('tool_calls')
|
||||
|| lower.includes('"function"')
|
||||
|| lower.includes('functioncall')
|
||||
|| lower.includes('"tool_use"');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
extractToolNames,
|
||||
parseToolCalls,
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
'use strict';
|
||||
|
||||
const TOOL_CALL_PATTERN = /\{\s*["']tool_calls["']\s*:\s*\[(.*?)\]\s*\}/s;
|
||||
const TOOL_CALL_MARKUP_BLOCK_PATTERN = /<(?:[a-z0-9_:-]+:)?(tool_call|function_call|invoke)\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi;
|
||||
const TOOL_CALL_MARKUP_SELFCLOSE_PATTERN = /<(?:[a-z0-9_:-]+:)?invoke\b([^>]*)\/>/gi;
|
||||
const TOOL_CALL_MARKUP_KV_PATTERN = /<(?:[a-z0-9_:-]+:)?([a-z0-9_.-]+)\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi;
|
||||
@@ -20,14 +19,10 @@ const TOOL_CALL_MARKUP_ARGS_PATTERNS = [
|
||||
/<(?:[a-z0-9_:-]+:)?args\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?args>/i,
|
||||
/<(?:[a-z0-9_:-]+:)?params\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?params>/i,
|
||||
];
|
||||
const TEXT_KV_NAME_PATTERN = /function\.name:\s*([a-zA-Z0-9_.-]+)/gi;
|
||||
|
||||
const {
|
||||
toStringSafe,
|
||||
} = require('./state');
|
||||
const {
|
||||
extractJSONObjectFrom,
|
||||
} = require('./jsonscan');
|
||||
|
||||
function stripFencedCodeBlocks(text) {
|
||||
const t = typeof text === 'string' ? text : '';
|
||||
@@ -37,138 +32,6 @@ function stripFencedCodeBlocks(text) {
|
||||
return t.replace(/```[\s\S]*?```/g, ' ');
|
||||
}
|
||||
|
||||
function buildToolCallCandidates(text) {
|
||||
const trimmed = toStringSafe(text);
|
||||
const candidates = [trimmed];
|
||||
|
||||
const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)\s*```/gi) || [];
|
||||
for (const block of fenced) {
|
||||
const m = block.match(/```(?:json)?\s*([\s\S]*?)\s*```/i);
|
||||
if (m && m[1]) {
|
||||
candidates.push(toStringSafe(m[1]));
|
||||
}
|
||||
}
|
||||
|
||||
for (const candidate of extractToolCallObjects(trimmed)) {
|
||||
candidates.push(toStringSafe(candidate));
|
||||
}
|
||||
|
||||
const first = trimmed.indexOf('{');
|
||||
const last = trimmed.lastIndexOf('}');
|
||||
if (first >= 0 && last > first) {
|
||||
candidates.push(toStringSafe(trimmed.slice(first, last + 1)));
|
||||
}
|
||||
const firstArr = trimmed.indexOf('[');
|
||||
const lastArr = trimmed.lastIndexOf(']');
|
||||
if (firstArr >= 0 && lastArr > firstArr) {
|
||||
candidates.push(toStringSafe(trimmed.slice(firstArr, lastArr + 1)));
|
||||
}
|
||||
|
||||
const m = trimmed.match(TOOL_CALL_PATTERN);
|
||||
if (m && m[1]) {
|
||||
candidates.push(`{"tool_calls":[${m[1]}]}`);
|
||||
}
|
||||
|
||||
return [...new Set(candidates.filter(Boolean))];
|
||||
}
|
||||
|
||||
function extractToolCallObjects(text) {
|
||||
const raw = toStringSafe(text);
|
||||
if (!raw) {
|
||||
return [];
|
||||
}
|
||||
const lower = raw.toLowerCase();
|
||||
const out = [];
|
||||
let offset = 0;
|
||||
|
||||
// eslint-disable-next-line no-constant-condition
|
||||
while (true) {
|
||||
const idxToolCalls = lower.indexOf('tool_calls', offset);
|
||||
const idxFunction = lower.indexOf('"function"', offset);
|
||||
const idxFunctionCall = lower.indexOf('functioncall', offset);
|
||||
const idxToolUse = lower.indexOf('"tool_use"', offset);
|
||||
let idx = -1;
|
||||
let matched = '';
|
||||
if (idxToolCalls >= 0 && (idxFunction < 0 || idxToolCalls <= idxFunction)) {
|
||||
idx = idxToolCalls;
|
||||
matched = 'tool_calls';
|
||||
} else if (idxFunction >= 0) {
|
||||
idx = idxFunction;
|
||||
matched = '"function"';
|
||||
}
|
||||
if (idxFunctionCall >= 0 && (idx < 0 || idxFunctionCall < idx)) {
|
||||
idx = idxFunctionCall;
|
||||
matched = 'functioncall';
|
||||
}
|
||||
if (idxToolUse >= 0 && (idx < 0 || idxToolUse < idx)) {
|
||||
idx = idxToolUse;
|
||||
matched = '"tool_use"';
|
||||
}
|
||||
if (idx < 0) {
|
||||
break;
|
||||
}
|
||||
let start = raw.slice(0, idx).lastIndexOf('{');
|
||||
while (start >= 0) {
|
||||
const obj = extractJSONObjectFrom(raw, start);
|
||||
if (obj.ok) {
|
||||
out.push(raw.slice(start, obj.end).trim());
|
||||
// Ensure forward progress even when the matched keyword is outside
|
||||
// the extracted JSON object (e.g. closing XML wrapper tags containing
|
||||
// "tool_calls" after an earlier JSON arguments object).
|
||||
offset = Math.max(obj.end, idx + matched.length);
|
||||
idx = -1;
|
||||
break;
|
||||
}
|
||||
start = raw.slice(0, start).lastIndexOf('{');
|
||||
}
|
||||
if (idx >= 0) {
|
||||
offset = idx + matched.length;
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
function parseToolCallsPayload(payload) {
|
||||
let decoded;
|
||||
try {
|
||||
decoded = JSON.parse(payload);
|
||||
} catch (_err) {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (Array.isArray(decoded)) {
|
||||
return parseToolCallList(decoded);
|
||||
}
|
||||
if (!decoded || typeof decoded !== 'object') {
|
||||
return [];
|
||||
}
|
||||
if (decoded.tool_calls) {
|
||||
if (isLikelyChatMessageEnvelope(decoded)) {
|
||||
return [];
|
||||
}
|
||||
return parseToolCallList(decoded.tool_calls);
|
||||
}
|
||||
|
||||
const one = parseToolCallItem(decoded);
|
||||
return one ? [one] : [];
|
||||
}
|
||||
|
||||
function isLikelyChatMessageEnvelope(value) {
|
||||
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
||||
return false;
|
||||
}
|
||||
if (!Object.prototype.hasOwnProperty.call(value, 'tool_calls')) {
|
||||
return false;
|
||||
}
|
||||
const role = toStringSafe(value.role).trim().toLowerCase();
|
||||
if (role === 'assistant' || role === 'tool' || role === 'user' || role === 'system') {
|
||||
return true;
|
||||
}
|
||||
return Object.prototype.hasOwnProperty.call(value, 'tool_call_id')
|
||||
|| Object.prototype.hasOwnProperty.call(value, 'content');
|
||||
}
|
||||
|
||||
function parseMarkupToolCalls(text) {
|
||||
const raw = toStringSafe(text).trim();
|
||||
if (!raw) {
|
||||
@@ -190,51 +53,20 @@ function parseMarkupToolCalls(text) {
|
||||
return out;
|
||||
}
|
||||
|
||||
function parseTextKVToolCalls(text) {
|
||||
const raw = toStringSafe(text);
|
||||
if (!raw) {
|
||||
return [];
|
||||
}
|
||||
const out = [];
|
||||
const matches = [...raw.matchAll(TEXT_KV_NAME_PATTERN)];
|
||||
if (matches.length === 0) {
|
||||
return out;
|
||||
}
|
||||
for (let i = 0; i < matches.length; i += 1) {
|
||||
const match = matches[i];
|
||||
const name = toStringSafe(match[1]).trim();
|
||||
if (!name) {
|
||||
continue;
|
||||
}
|
||||
const nameEnd = match.index + toStringSafe(match[0]).length;
|
||||
const searchEnd = i + 1 < matches.length ? matches[i + 1].index : raw.length;
|
||||
const searchArea = raw.slice(nameEnd, searchEnd);
|
||||
const argIdx = searchArea.indexOf('function.arguments:');
|
||||
if (argIdx < 0) {
|
||||
continue;
|
||||
}
|
||||
const argStart = nameEnd + argIdx + 'function.arguments:'.length;
|
||||
const bracePos = raw.slice(argStart, searchEnd).indexOf('{');
|
||||
if (bracePos < 0) {
|
||||
continue;
|
||||
}
|
||||
const objStart = argStart + bracePos;
|
||||
const obj = extractJSONObjectFrom(raw, objStart);
|
||||
if (!obj.ok) {
|
||||
continue;
|
||||
}
|
||||
out.push({
|
||||
name,
|
||||
input: parseToolCallInput(raw.slice(objStart, obj.end)),
|
||||
});
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function parseMarkupSingleToolCall(attrs, inner) {
|
||||
const embedded = parseToolCallsPayload(inner);
|
||||
if (embedded.length > 0) {
|
||||
return embedded[0];
|
||||
// Try inline JSON parse for the inner content.
|
||||
if (inner) {
|
||||
try {
|
||||
const decoded = JSON.parse(inner);
|
||||
if (decoded && typeof decoded === 'object' && !Array.isArray(decoded) && decoded.name) {
|
||||
return {
|
||||
name: toStringSafe(decoded.name),
|
||||
input: decoded.input && typeof decoded.input === 'object' && !Array.isArray(decoded.input) ? decoded.input : {},
|
||||
};
|
||||
}
|
||||
} catch (_err) {
|
||||
// Not JSON, continue with markup parsing.
|
||||
}
|
||||
}
|
||||
let name = '';
|
||||
const attrMatch = attrs.match(TOOL_CALL_MARKUP_ATTR_PATTERN);
|
||||
@@ -316,73 +148,6 @@ function findMarkupTagValue(text, patterns) {
|
||||
return '';
|
||||
}
|
||||
|
||||
function parseToolCallList(v) {
|
||||
if (!Array.isArray(v)) {
|
||||
return [];
|
||||
}
|
||||
const out = [];
|
||||
for (const item of v) {
|
||||
if (!item || typeof item !== 'object') {
|
||||
continue;
|
||||
}
|
||||
const one = parseToolCallItem(item);
|
||||
if (one) {
|
||||
out.push(one);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function parseToolCallItem(m) {
|
||||
let name = toStringSafe(m.name);
|
||||
let inputRaw = m.input;
|
||||
let hasInput = Object.prototype.hasOwnProperty.call(m, 'input');
|
||||
const fnCall = m.functionCall && typeof m.functionCall === 'object' ? m.functionCall : null;
|
||||
if (fnCall) {
|
||||
if (!name) {
|
||||
name = toStringSafe(fnCall.name);
|
||||
}
|
||||
if (!hasInput && Object.prototype.hasOwnProperty.call(fnCall, 'args')) {
|
||||
inputRaw = fnCall.args;
|
||||
hasInput = true;
|
||||
}
|
||||
if (!hasInput && Object.prototype.hasOwnProperty.call(fnCall, 'arguments')) {
|
||||
inputRaw = fnCall.arguments;
|
||||
hasInput = true;
|
||||
}
|
||||
}
|
||||
const fn = m.function && typeof m.function === 'object' ? m.function : null;
|
||||
|
||||
if (fn) {
|
||||
if (!name) {
|
||||
name = toStringSafe(fn.name);
|
||||
}
|
||||
if (!hasInput && Object.prototype.hasOwnProperty.call(fn, 'arguments')) {
|
||||
inputRaw = fn.arguments;
|
||||
hasInput = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasInput) {
|
||||
for (const k of ['arguments', 'args', 'parameters', 'params']) {
|
||||
if (Object.prototype.hasOwnProperty.call(m, k)) {
|
||||
inputRaw = m[k];
|
||||
hasInput = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!name) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
name,
|
||||
input: parseToolCallInput(inputRaw),
|
||||
};
|
||||
}
|
||||
|
||||
function parseToolCallInput(v) {
|
||||
if (v == null) {
|
||||
return {};
|
||||
@@ -418,8 +183,5 @@ function parseToolCallInput(v) {
|
||||
|
||||
module.exports = {
|
||||
stripFencedCodeBlocks,
|
||||
buildToolCallCandidates,
|
||||
parseToolCallsPayload,
|
||||
parseMarkupToolCalls,
|
||||
parseTextKVToolCalls,
|
||||
};
|
||||
|
||||
@@ -4,12 +4,9 @@ const {
|
||||
noteText,
|
||||
insideCodeFenceWithState,
|
||||
} = require('./state');
|
||||
const { parseStandaloneToolCallsDetailed } = require('./parse');
|
||||
const { extractJSONObjectFrom, trimWrappingJSONFence } = require('./jsonscan');
|
||||
const { trimWrappingJSONFence } = require('./jsonscan');
|
||||
const {
|
||||
TOOL_SEGMENT_KEYWORDS,
|
||||
XML_TOOL_SEGMENT_TAGS,
|
||||
earliestKeywordIndex,
|
||||
} = require('./tool-keywords');
|
||||
const {
|
||||
consumeXMLToolCapture: consumeXMLToolCaptureImpl,
|
||||
@@ -141,30 +138,15 @@ function splitSafeContentForToolDetection(s) {
|
||||
if (!text) {
|
||||
return ['', ''];
|
||||
}
|
||||
const suspiciousStart = findSuspiciousPrefixStart(text);
|
||||
if (suspiciousStart < 0) {
|
||||
return [text, ''];
|
||||
}
|
||||
if (suspiciousStart > 0) {
|
||||
return [text.slice(0, suspiciousStart), text.slice(suspiciousStart)];
|
||||
}
|
||||
return ['', text];
|
||||
}
|
||||
|
||||
function findSuspiciousPrefixStart(s) {
|
||||
let start = -1;
|
||||
for (const needle of ['{', '[', '```']) {
|
||||
const idx = s.lastIndexOf(needle);
|
||||
if (idx > start) {
|
||||
start = idx;
|
||||
// Only hold back partial XML tool tags.
|
||||
const xmlIdx = findPartialXMLToolTagStart(text);
|
||||
if (xmlIdx >= 0) {
|
||||
if (xmlIdx > 0) {
|
||||
return [text.slice(0, xmlIdx), text.slice(xmlIdx)];
|
||||
}
|
||||
return ['', text];
|
||||
}
|
||||
// Also check for partial XML tool tag at end of string.
|
||||
const xmlIdx = findPartialXMLToolTagStart(s);
|
||||
if (xmlIdx >= 0 && xmlIdx > start) {
|
||||
start = xmlIdx;
|
||||
}
|
||||
return start;
|
||||
return [text, ''];
|
||||
}
|
||||
|
||||
function findToolSegmentStart(state, s) {
|
||||
@@ -174,39 +156,23 @@ function findToolSegmentStart(state, s) {
|
||||
const lower = s.toLowerCase();
|
||||
let offset = 0;
|
||||
while (true) {
|
||||
// Check JSON keywords.
|
||||
let { index: bestKeyIdx, keyword: matchedKeyword } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS, offset);
|
||||
// Also check XML tool tags.
|
||||
// Only check XML tool tags.
|
||||
let bestIdx = -1;
|
||||
let matchedTag = '';
|
||||
for (const tag of XML_TOOL_SEGMENT_TAGS) {
|
||||
const idx = lower.indexOf(tag, offset);
|
||||
if (idx >= 0 && (bestKeyIdx < 0 || idx < bestKeyIdx)) {
|
||||
bestKeyIdx = idx;
|
||||
matchedKeyword = tag;
|
||||
if (idx >= 0 && (bestIdx < 0 || idx < bestIdx)) {
|
||||
bestIdx = idx;
|
||||
matchedTag = tag;
|
||||
}
|
||||
}
|
||||
if (bestKeyIdx < 0) {
|
||||
if (bestIdx < 0) {
|
||||
return -1;
|
||||
}
|
||||
// For XML tags, the '<' is itself the segment start.
|
||||
if (s[bestKeyIdx] === '<') {
|
||||
if (!insideCodeFenceWithState(state, s.slice(0, bestKeyIdx))) {
|
||||
return bestKeyIdx;
|
||||
}
|
||||
offset = bestKeyIdx + matchedKeyword.length;
|
||||
continue;
|
||||
if (!insideCodeFenceWithState(state, s.slice(0, bestIdx))) {
|
||||
return bestIdx;
|
||||
}
|
||||
const keyIdx = bestKeyIdx;
|
||||
const start = s.slice(0, keyIdx).lastIndexOf('{');
|
||||
let candidateStart = start >= 0 ? start : keyIdx;
|
||||
// If the keyword matched inside an XML tag (e.g. "tool_calls" in "<tool_calls>"),
|
||||
// back up past the '<' to capture the full tag.
|
||||
if (candidateStart > 0 && s[candidateStart - 1] === '<') {
|
||||
candidateStart--;
|
||||
}
|
||||
if (!insideCodeFenceWithState(state, s.slice(0, candidateStart))) {
|
||||
return candidateStart;
|
||||
}
|
||||
offset = keyIdx + matchedKeyword.length;
|
||||
offset = bestIdx + matchedTag.length;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -216,7 +182,7 @@ function consumeToolCapture(state, toolNames) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
|
||||
// Try XML tool call extraction first.
|
||||
// XML-only tool call extraction.
|
||||
const xmlResult = consumeXMLToolCaptureImpl(captured, toolNames, trimWrappingJSONFence);
|
||||
if (xmlResult.ready) {
|
||||
return xmlResult;
|
||||
@@ -226,50 +192,12 @@ function consumeToolCapture(state, toolNames) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
|
||||
const lower = captured.toLowerCase();
|
||||
const { index: keyIdx } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS);
|
||||
if (keyIdx < 0) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
const start = captured.slice(0, keyIdx).lastIndexOf('{');
|
||||
const actualStart = start >= 0 ? start : keyIdx;
|
||||
const obj = extractJSONObjectFrom(captured, actualStart);
|
||||
if (!obj.ok) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
const prefixPart = captured.slice(0, actualStart);
|
||||
const suffixPart = captured.slice(obj.end);
|
||||
if (insideCodeFenceWithState(state, prefixPart)) {
|
||||
return {
|
||||
ready: true,
|
||||
prefix: captured,
|
||||
calls: [],
|
||||
suffix: '',
|
||||
};
|
||||
}
|
||||
const parsed = parseStandaloneToolCallsDetailed(captured.slice(actualStart, obj.end), toolNames);
|
||||
if (!Array.isArray(parsed.calls) || parsed.calls.length === 0) {
|
||||
if (parsed.sawToolCallSyntax && parsed.rejectedByPolicy) {
|
||||
return {
|
||||
ready: true,
|
||||
prefix: prefixPart,
|
||||
calls: [],
|
||||
suffix: suffixPart,
|
||||
};
|
||||
}
|
||||
return {
|
||||
ready: true,
|
||||
prefix: captured,
|
||||
calls: [],
|
||||
suffix: '',
|
||||
};
|
||||
}
|
||||
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
|
||||
// No XML tool tags detected — release captured content as text.
|
||||
return {
|
||||
ready: true,
|
||||
prefix: trimmedFence.prefix,
|
||||
calls: parsed.calls,
|
||||
suffix: trimmedFence.suffix,
|
||||
prefix: captured,
|
||||
calls: [],
|
||||
suffix: '',
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -1,15 +1,7 @@
|
||||
'use strict';
|
||||
|
||||
const TOOL_SEGMENT_KEYWORDS = [
|
||||
'tool_calls',
|
||||
'"function"',
|
||||
'function.name:',
|
||||
'functioncall',
|
||||
'"tool_use"',
|
||||
];
|
||||
|
||||
const XML_TOOL_SEGMENT_TAGS = [
|
||||
'<tool_calls>', '<tool_calls\n', '<tool_call>', '<tool_call\n',
|
||||
'<tool_calls>', '<tool_calls\n', '<tool_calls ', '<tool_call>', '<tool_call\n', '<tool_call ',
|
||||
'<invoke ', '<invoke>', '<function_call', '<function_calls', '<tool_use>',
|
||||
];
|
||||
|
||||
@@ -21,26 +13,9 @@ const XML_TOOL_CLOSING_TAGS = [
|
||||
'</tool_calls>', '</tool_call>', '</invoke>', '</function_call>', '</function_calls>', '</tool_use>',
|
||||
];
|
||||
|
||||
function earliestKeywordIndex(text, keywords = TOOL_SEGMENT_KEYWORDS, offset = 0) {
|
||||
if (!text) {
|
||||
return { index: -1, keyword: '' };
|
||||
}
|
||||
let index = -1;
|
||||
let keyword = '';
|
||||
for (const kw of keywords) {
|
||||
const candidate = text.indexOf(kw, offset);
|
||||
if (candidate >= 0 && (index < 0 || candidate < index)) {
|
||||
index = candidate;
|
||||
keyword = kw;
|
||||
}
|
||||
}
|
||||
return { index, keyword };
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
TOOL_SEGMENT_KEYWORDS,
|
||||
XML_TOOL_SEGMENT_TAGS,
|
||||
XML_TOOL_OPENING_TAGS,
|
||||
XML_TOOL_CLOSING_TAGS,
|
||||
earliestKeywordIndex,
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user