mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-04 08:25:26 +08:00
refactor: remove JSON-based tool call parsing from sieve and delete associated compatibility tests
This commit is contained in:
@@ -4,15 +4,10 @@ const {
|
||||
toStringSafe,
|
||||
} = require('./state');
|
||||
const {
|
||||
buildToolCallCandidates,
|
||||
parseToolCallsPayload,
|
||||
parseMarkupToolCalls,
|
||||
parseTextKVToolCalls,
|
||||
stripFencedCodeBlocks,
|
||||
} = require('./parse_payload');
|
||||
const { TOOL_SEGMENT_KEYWORDS } = require('./tool-keywords');
|
||||
|
||||
const TOOL_NAME_LOOSE_PATTERN = /[^a-z0-9]+/g;
|
||||
const TOOL_MARKUP_PREFIXES = ['<tool_call', '<function_call', '<invoke'];
|
||||
|
||||
function extractToolNames(tools) {
|
||||
@@ -51,47 +46,12 @@ function parseToolCallsDetailed(text, toolNames) {
|
||||
return result;
|
||||
}
|
||||
|
||||
const candidates = buildToolCallCandidates(normalized);
|
||||
for (const c of candidates) {
|
||||
if (!isLikelyJSONToolPayloadCandidate(c)) {
|
||||
continue;
|
||||
}
|
||||
const jsonParsed = parseToolCallsPayload(c);
|
||||
if (jsonParsed.length === 0) {
|
||||
continue;
|
||||
}
|
||||
result.sawToolCallSyntax = true;
|
||||
const filteredJSON = filterToolCallsDetailed(jsonParsed, toolNames);
|
||||
result.calls = filteredJSON.calls;
|
||||
result.rejectedToolNames = filteredJSON.rejectedToolNames;
|
||||
result.rejectedByPolicy = filteredJSON.rejectedToolNames.length > 0 && filteredJSON.calls.length === 0;
|
||||
// XML markup parsing only.
|
||||
const parsed = parseMarkupToolCalls(normalized);
|
||||
if (parsed.length === 0) {
|
||||
return result;
|
||||
}
|
||||
let parsed = [];
|
||||
for (const c of candidates) {
|
||||
parsed = parseMarkupToolCalls(c);
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseToolCallsPayload(c);
|
||||
}
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseTextKVToolCalls(c);
|
||||
}
|
||||
if (parsed.length > 0) {
|
||||
result.sawToolCallSyntax = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseMarkupToolCalls(normalized);
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseTextKVToolCalls(normalized);
|
||||
if (parsed.length === 0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
result.sawToolCallSyntax = true;
|
||||
}
|
||||
|
||||
result.sawToolCallSyntax = true;
|
||||
const filtered = filterToolCallsDetailed(parsed, toolNames);
|
||||
result.calls = filtered.calls;
|
||||
result.rejectedToolNames = filtered.rejectedToolNames;
|
||||
@@ -113,43 +73,11 @@ function parseStandaloneToolCallsDetailed(text, toolNames) {
|
||||
if (shouldSkipToolCallParsingForCodeFenceExample(trimmed)) {
|
||||
return result;
|
||||
}
|
||||
const candidates = buildToolCallCandidates(trimmed);
|
||||
let parsed = [];
|
||||
for (const c of candidates) {
|
||||
if (!isLikelyJSONToolPayloadCandidate(c)) {
|
||||
continue;
|
||||
}
|
||||
parsed = parseToolCallsPayload(c);
|
||||
if (parsed.length === 0) {
|
||||
continue;
|
||||
}
|
||||
result.sawToolCallSyntax = true;
|
||||
const filteredJSON = filterToolCallsDetailed(parsed, toolNames);
|
||||
result.calls = filteredJSON.calls;
|
||||
result.rejectedToolNames = filteredJSON.rejectedToolNames;
|
||||
result.rejectedByPolicy = filteredJSON.rejectedToolNames.length > 0 && filteredJSON.calls.length === 0;
|
||||
return result;
|
||||
}
|
||||
for (const c of candidates) {
|
||||
parsed = parseMarkupToolCalls(c);
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseToolCallsPayload(c);
|
||||
}
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseTextKVToolCalls(c);
|
||||
}
|
||||
if (parsed.length > 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// XML markup parsing only.
|
||||
const parsed = parseMarkupToolCalls(trimmed);
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseMarkupToolCalls(trimmed);
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseTextKVToolCalls(trimmed);
|
||||
if (parsed.length === 0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
result.sawToolCallSyntax = true;
|
||||
@@ -183,41 +111,9 @@ function filterToolCallsDetailed(parsed, toolNames) {
|
||||
return { calls, rejectedToolNames: [] };
|
||||
}
|
||||
|
||||
function resolveAllowedToolName(name, allowed, allowedCanonical) {
|
||||
const normalizedName = toStringSafe(name).trim();
|
||||
if (!normalizedName) {
|
||||
return '';
|
||||
}
|
||||
if (allowed.has(normalizedName)) {
|
||||
return normalizedName;
|
||||
}
|
||||
const lower = normalizedName.toLowerCase();
|
||||
if (allowedCanonical.has(lower)) {
|
||||
return allowedCanonical.get(lower);
|
||||
}
|
||||
const idx = lower.lastIndexOf('.');
|
||||
if (idx >= 0 && idx < lower.length - 1) {
|
||||
const tail = lower.slice(idx + 1);
|
||||
if (allowedCanonical.has(tail)) {
|
||||
return allowedCanonical.get(tail);
|
||||
}
|
||||
}
|
||||
const loose = lower.replace(TOOL_NAME_LOOSE_PATTERN, '');
|
||||
if (!loose) {
|
||||
return '';
|
||||
}
|
||||
for (const [candidateLower, canonical] of allowedCanonical.entries()) {
|
||||
if (candidateLower.replace(TOOL_NAME_LOOSE_PATTERN, '') === loose) {
|
||||
return canonical;
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
function looksLikeToolCallSyntax(text) {
|
||||
const lower = toStringSafe(text).toLowerCase();
|
||||
return TOOL_SEGMENT_KEYWORDS.some((kw) => lower.includes(kw))
|
||||
|| TOOL_MARKUP_PREFIXES.some((prefix) => lower.includes(prefix));
|
||||
return TOOL_MARKUP_PREFIXES.some((prefix) => lower.includes(prefix));
|
||||
}
|
||||
|
||||
function shouldSkipToolCallParsingForCodeFenceExample(text) {
|
||||
@@ -228,21 +124,6 @@ function shouldSkipToolCallParsingForCodeFenceExample(text) {
|
||||
return !looksLikeToolCallSyntax(stripped);
|
||||
}
|
||||
|
||||
function isLikelyJSONToolPayloadCandidate(text) {
|
||||
const trimmed = toStringSafe(text).trim();
|
||||
if (!trimmed) {
|
||||
return false;
|
||||
}
|
||||
if (!(trimmed.startsWith('{') || trimmed.startsWith('['))) {
|
||||
return false;
|
||||
}
|
||||
const lower = trimmed.toLowerCase();
|
||||
return lower.includes('tool_calls')
|
||||
|| lower.includes('"function"')
|
||||
|| lower.includes('functioncall')
|
||||
|| lower.includes('"tool_use"');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
extractToolNames,
|
||||
parseToolCalls,
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
'use strict';
|
||||
|
||||
const TOOL_CALL_PATTERN = /\{\s*["']tool_calls["']\s*:\s*\[(.*?)\]\s*\}/s;
|
||||
const TOOL_CALL_MARKUP_BLOCK_PATTERN = /<(?:[a-z0-9_:-]+:)?(tool_call|function_call|invoke)\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi;
|
||||
const TOOL_CALL_MARKUP_SELFCLOSE_PATTERN = /<(?:[a-z0-9_:-]+:)?invoke\b([^>]*)\/>/gi;
|
||||
const TOOL_CALL_MARKUP_KV_PATTERN = /<(?:[a-z0-9_:-]+:)?([a-z0-9_.-]+)\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi;
|
||||
@@ -20,14 +19,10 @@ const TOOL_CALL_MARKUP_ARGS_PATTERNS = [
|
||||
/<(?:[a-z0-9_:-]+:)?args\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?args>/i,
|
||||
/<(?:[a-z0-9_:-]+:)?params\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?params>/i,
|
||||
];
|
||||
const TEXT_KV_NAME_PATTERN = /function\.name:\s*([a-zA-Z0-9_.-]+)/gi;
|
||||
|
||||
const {
|
||||
toStringSafe,
|
||||
} = require('./state');
|
||||
const {
|
||||
extractJSONObjectFrom,
|
||||
} = require('./jsonscan');
|
||||
|
||||
function stripFencedCodeBlocks(text) {
|
||||
const t = typeof text === 'string' ? text : '';
|
||||
@@ -37,138 +32,6 @@ function stripFencedCodeBlocks(text) {
|
||||
return t.replace(/```[\s\S]*?```/g, ' ');
|
||||
}
|
||||
|
||||
function buildToolCallCandidates(text) {
|
||||
const trimmed = toStringSafe(text);
|
||||
const candidates = [trimmed];
|
||||
|
||||
const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)\s*```/gi) || [];
|
||||
for (const block of fenced) {
|
||||
const m = block.match(/```(?:json)?\s*([\s\S]*?)\s*```/i);
|
||||
if (m && m[1]) {
|
||||
candidates.push(toStringSafe(m[1]));
|
||||
}
|
||||
}
|
||||
|
||||
for (const candidate of extractToolCallObjects(trimmed)) {
|
||||
candidates.push(toStringSafe(candidate));
|
||||
}
|
||||
|
||||
const first = trimmed.indexOf('{');
|
||||
const last = trimmed.lastIndexOf('}');
|
||||
if (first >= 0 && last > first) {
|
||||
candidates.push(toStringSafe(trimmed.slice(first, last + 1)));
|
||||
}
|
||||
const firstArr = trimmed.indexOf('[');
|
||||
const lastArr = trimmed.lastIndexOf(']');
|
||||
if (firstArr >= 0 && lastArr > firstArr) {
|
||||
candidates.push(toStringSafe(trimmed.slice(firstArr, lastArr + 1)));
|
||||
}
|
||||
|
||||
const m = trimmed.match(TOOL_CALL_PATTERN);
|
||||
if (m && m[1]) {
|
||||
candidates.push(`{"tool_calls":[${m[1]}]}`);
|
||||
}
|
||||
|
||||
return [...new Set(candidates.filter(Boolean))];
|
||||
}
|
||||
|
||||
function extractToolCallObjects(text) {
|
||||
const raw = toStringSafe(text);
|
||||
if (!raw) {
|
||||
return [];
|
||||
}
|
||||
const lower = raw.toLowerCase();
|
||||
const out = [];
|
||||
let offset = 0;
|
||||
|
||||
// eslint-disable-next-line no-constant-condition
|
||||
while (true) {
|
||||
const idxToolCalls = lower.indexOf('tool_calls', offset);
|
||||
const idxFunction = lower.indexOf('"function"', offset);
|
||||
const idxFunctionCall = lower.indexOf('functioncall', offset);
|
||||
const idxToolUse = lower.indexOf('"tool_use"', offset);
|
||||
let idx = -1;
|
||||
let matched = '';
|
||||
if (idxToolCalls >= 0 && (idxFunction < 0 || idxToolCalls <= idxFunction)) {
|
||||
idx = idxToolCalls;
|
||||
matched = 'tool_calls';
|
||||
} else if (idxFunction >= 0) {
|
||||
idx = idxFunction;
|
||||
matched = '"function"';
|
||||
}
|
||||
if (idxFunctionCall >= 0 && (idx < 0 || idxFunctionCall < idx)) {
|
||||
idx = idxFunctionCall;
|
||||
matched = 'functioncall';
|
||||
}
|
||||
if (idxToolUse >= 0 && (idx < 0 || idxToolUse < idx)) {
|
||||
idx = idxToolUse;
|
||||
matched = '"tool_use"';
|
||||
}
|
||||
if (idx < 0) {
|
||||
break;
|
||||
}
|
||||
let start = raw.slice(0, idx).lastIndexOf('{');
|
||||
while (start >= 0) {
|
||||
const obj = extractJSONObjectFrom(raw, start);
|
||||
if (obj.ok) {
|
||||
out.push(raw.slice(start, obj.end).trim());
|
||||
// Ensure forward progress even when the matched keyword is outside
|
||||
// the extracted JSON object (e.g. closing XML wrapper tags containing
|
||||
// "tool_calls" after an earlier JSON arguments object).
|
||||
offset = Math.max(obj.end, idx + matched.length);
|
||||
idx = -1;
|
||||
break;
|
||||
}
|
||||
start = raw.slice(0, start).lastIndexOf('{');
|
||||
}
|
||||
if (idx >= 0) {
|
||||
offset = idx + matched.length;
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
function parseToolCallsPayload(payload) {
|
||||
let decoded;
|
||||
try {
|
||||
decoded = JSON.parse(payload);
|
||||
} catch (_err) {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (Array.isArray(decoded)) {
|
||||
return parseToolCallList(decoded);
|
||||
}
|
||||
if (!decoded || typeof decoded !== 'object') {
|
||||
return [];
|
||||
}
|
||||
if (decoded.tool_calls) {
|
||||
if (isLikelyChatMessageEnvelope(decoded)) {
|
||||
return [];
|
||||
}
|
||||
return parseToolCallList(decoded.tool_calls);
|
||||
}
|
||||
|
||||
const one = parseToolCallItem(decoded);
|
||||
return one ? [one] : [];
|
||||
}
|
||||
|
||||
function isLikelyChatMessageEnvelope(value) {
|
||||
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
||||
return false;
|
||||
}
|
||||
if (!Object.prototype.hasOwnProperty.call(value, 'tool_calls')) {
|
||||
return false;
|
||||
}
|
||||
const role = toStringSafe(value.role).trim().toLowerCase();
|
||||
if (role === 'assistant' || role === 'tool' || role === 'user' || role === 'system') {
|
||||
return true;
|
||||
}
|
||||
return Object.prototype.hasOwnProperty.call(value, 'tool_call_id')
|
||||
|| Object.prototype.hasOwnProperty.call(value, 'content');
|
||||
}
|
||||
|
||||
function parseMarkupToolCalls(text) {
|
||||
const raw = toStringSafe(text).trim();
|
||||
if (!raw) {
|
||||
@@ -190,51 +53,20 @@ function parseMarkupToolCalls(text) {
|
||||
return out;
|
||||
}
|
||||
|
||||
function parseTextKVToolCalls(text) {
|
||||
const raw = toStringSafe(text);
|
||||
if (!raw) {
|
||||
return [];
|
||||
}
|
||||
const out = [];
|
||||
const matches = [...raw.matchAll(TEXT_KV_NAME_PATTERN)];
|
||||
if (matches.length === 0) {
|
||||
return out;
|
||||
}
|
||||
for (let i = 0; i < matches.length; i += 1) {
|
||||
const match = matches[i];
|
||||
const name = toStringSafe(match[1]).trim();
|
||||
if (!name) {
|
||||
continue;
|
||||
}
|
||||
const nameEnd = match.index + toStringSafe(match[0]).length;
|
||||
const searchEnd = i + 1 < matches.length ? matches[i + 1].index : raw.length;
|
||||
const searchArea = raw.slice(nameEnd, searchEnd);
|
||||
const argIdx = searchArea.indexOf('function.arguments:');
|
||||
if (argIdx < 0) {
|
||||
continue;
|
||||
}
|
||||
const argStart = nameEnd + argIdx + 'function.arguments:'.length;
|
||||
const bracePos = raw.slice(argStart, searchEnd).indexOf('{');
|
||||
if (bracePos < 0) {
|
||||
continue;
|
||||
}
|
||||
const objStart = argStart + bracePos;
|
||||
const obj = extractJSONObjectFrom(raw, objStart);
|
||||
if (!obj.ok) {
|
||||
continue;
|
||||
}
|
||||
out.push({
|
||||
name,
|
||||
input: parseToolCallInput(raw.slice(objStart, obj.end)),
|
||||
});
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function parseMarkupSingleToolCall(attrs, inner) {
|
||||
const embedded = parseToolCallsPayload(inner);
|
||||
if (embedded.length > 0) {
|
||||
return embedded[0];
|
||||
// Try inline JSON parse for the inner content.
|
||||
if (inner) {
|
||||
try {
|
||||
const decoded = JSON.parse(inner);
|
||||
if (decoded && typeof decoded === 'object' && !Array.isArray(decoded) && decoded.name) {
|
||||
return {
|
||||
name: toStringSafe(decoded.name),
|
||||
input: decoded.input && typeof decoded.input === 'object' && !Array.isArray(decoded.input) ? decoded.input : {},
|
||||
};
|
||||
}
|
||||
} catch (_err) {
|
||||
// Not JSON, continue with markup parsing.
|
||||
}
|
||||
}
|
||||
let name = '';
|
||||
const attrMatch = attrs.match(TOOL_CALL_MARKUP_ATTR_PATTERN);
|
||||
@@ -316,73 +148,6 @@ function findMarkupTagValue(text, patterns) {
|
||||
return '';
|
||||
}
|
||||
|
||||
function parseToolCallList(v) {
|
||||
if (!Array.isArray(v)) {
|
||||
return [];
|
||||
}
|
||||
const out = [];
|
||||
for (const item of v) {
|
||||
if (!item || typeof item !== 'object') {
|
||||
continue;
|
||||
}
|
||||
const one = parseToolCallItem(item);
|
||||
if (one) {
|
||||
out.push(one);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function parseToolCallItem(m) {
|
||||
let name = toStringSafe(m.name);
|
||||
let inputRaw = m.input;
|
||||
let hasInput = Object.prototype.hasOwnProperty.call(m, 'input');
|
||||
const fnCall = m.functionCall && typeof m.functionCall === 'object' ? m.functionCall : null;
|
||||
if (fnCall) {
|
||||
if (!name) {
|
||||
name = toStringSafe(fnCall.name);
|
||||
}
|
||||
if (!hasInput && Object.prototype.hasOwnProperty.call(fnCall, 'args')) {
|
||||
inputRaw = fnCall.args;
|
||||
hasInput = true;
|
||||
}
|
||||
if (!hasInput && Object.prototype.hasOwnProperty.call(fnCall, 'arguments')) {
|
||||
inputRaw = fnCall.arguments;
|
||||
hasInput = true;
|
||||
}
|
||||
}
|
||||
const fn = m.function && typeof m.function === 'object' ? m.function : null;
|
||||
|
||||
if (fn) {
|
||||
if (!name) {
|
||||
name = toStringSafe(fn.name);
|
||||
}
|
||||
if (!hasInput && Object.prototype.hasOwnProperty.call(fn, 'arguments')) {
|
||||
inputRaw = fn.arguments;
|
||||
hasInput = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasInput) {
|
||||
for (const k of ['arguments', 'args', 'parameters', 'params']) {
|
||||
if (Object.prototype.hasOwnProperty.call(m, k)) {
|
||||
inputRaw = m[k];
|
||||
hasInput = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!name) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
name,
|
||||
input: parseToolCallInput(inputRaw),
|
||||
};
|
||||
}
|
||||
|
||||
function parseToolCallInput(v) {
|
||||
if (v == null) {
|
||||
return {};
|
||||
@@ -418,8 +183,5 @@ function parseToolCallInput(v) {
|
||||
|
||||
module.exports = {
|
||||
stripFencedCodeBlocks,
|
||||
buildToolCallCandidates,
|
||||
parseToolCallsPayload,
|
||||
parseMarkupToolCalls,
|
||||
parseTextKVToolCalls,
|
||||
};
|
||||
|
||||
@@ -4,12 +4,9 @@ const {
|
||||
noteText,
|
||||
insideCodeFenceWithState,
|
||||
} = require('./state');
|
||||
const { parseStandaloneToolCallsDetailed } = require('./parse');
|
||||
const { extractJSONObjectFrom, trimWrappingJSONFence } = require('./jsonscan');
|
||||
const { trimWrappingJSONFence } = require('./jsonscan');
|
||||
const {
|
||||
TOOL_SEGMENT_KEYWORDS,
|
||||
XML_TOOL_SEGMENT_TAGS,
|
||||
earliestKeywordIndex,
|
||||
} = require('./tool-keywords');
|
||||
const {
|
||||
consumeXMLToolCapture: consumeXMLToolCaptureImpl,
|
||||
@@ -141,30 +138,15 @@ function splitSafeContentForToolDetection(s) {
|
||||
if (!text) {
|
||||
return ['', ''];
|
||||
}
|
||||
const suspiciousStart = findSuspiciousPrefixStart(text);
|
||||
if (suspiciousStart < 0) {
|
||||
return [text, ''];
|
||||
}
|
||||
if (suspiciousStart > 0) {
|
||||
return [text.slice(0, suspiciousStart), text.slice(suspiciousStart)];
|
||||
}
|
||||
return ['', text];
|
||||
}
|
||||
|
||||
function findSuspiciousPrefixStart(s) {
|
||||
let start = -1;
|
||||
for (const needle of ['{', '[', '```']) {
|
||||
const idx = s.lastIndexOf(needle);
|
||||
if (idx > start) {
|
||||
start = idx;
|
||||
// Only hold back partial XML tool tags.
|
||||
const xmlIdx = findPartialXMLToolTagStart(text);
|
||||
if (xmlIdx >= 0) {
|
||||
if (xmlIdx > 0) {
|
||||
return [text.slice(0, xmlIdx), text.slice(xmlIdx)];
|
||||
}
|
||||
return ['', text];
|
||||
}
|
||||
// Also check for partial XML tool tag at end of string.
|
||||
const xmlIdx = findPartialXMLToolTagStart(s);
|
||||
if (xmlIdx >= 0 && xmlIdx > start) {
|
||||
start = xmlIdx;
|
||||
}
|
||||
return start;
|
||||
return [text, ''];
|
||||
}
|
||||
|
||||
function findToolSegmentStart(state, s) {
|
||||
@@ -174,39 +156,23 @@ function findToolSegmentStart(state, s) {
|
||||
const lower = s.toLowerCase();
|
||||
let offset = 0;
|
||||
while (true) {
|
||||
// Check JSON keywords.
|
||||
let { index: bestKeyIdx, keyword: matchedKeyword } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS, offset);
|
||||
// Also check XML tool tags.
|
||||
// Only check XML tool tags.
|
||||
let bestIdx = -1;
|
||||
let matchedTag = '';
|
||||
for (const tag of XML_TOOL_SEGMENT_TAGS) {
|
||||
const idx = lower.indexOf(tag, offset);
|
||||
if (idx >= 0 && (bestKeyIdx < 0 || idx < bestKeyIdx)) {
|
||||
bestKeyIdx = idx;
|
||||
matchedKeyword = tag;
|
||||
if (idx >= 0 && (bestIdx < 0 || idx < bestIdx)) {
|
||||
bestIdx = idx;
|
||||
matchedTag = tag;
|
||||
}
|
||||
}
|
||||
if (bestKeyIdx < 0) {
|
||||
if (bestIdx < 0) {
|
||||
return -1;
|
||||
}
|
||||
// For XML tags, the '<' is itself the segment start.
|
||||
if (s[bestKeyIdx] === '<') {
|
||||
if (!insideCodeFenceWithState(state, s.slice(0, bestKeyIdx))) {
|
||||
return bestKeyIdx;
|
||||
}
|
||||
offset = bestKeyIdx + matchedKeyword.length;
|
||||
continue;
|
||||
if (!insideCodeFenceWithState(state, s.slice(0, bestIdx))) {
|
||||
return bestIdx;
|
||||
}
|
||||
const keyIdx = bestKeyIdx;
|
||||
const start = s.slice(0, keyIdx).lastIndexOf('{');
|
||||
let candidateStart = start >= 0 ? start : keyIdx;
|
||||
// If the keyword matched inside an XML tag (e.g. "tool_calls" in "<tool_calls>"),
|
||||
// back up past the '<' to capture the full tag.
|
||||
if (candidateStart > 0 && s[candidateStart - 1] === '<') {
|
||||
candidateStart--;
|
||||
}
|
||||
if (!insideCodeFenceWithState(state, s.slice(0, candidateStart))) {
|
||||
return candidateStart;
|
||||
}
|
||||
offset = keyIdx + matchedKeyword.length;
|
||||
offset = bestIdx + matchedTag.length;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -216,7 +182,7 @@ function consumeToolCapture(state, toolNames) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
|
||||
// Try XML tool call extraction first.
|
||||
// XML-only tool call extraction.
|
||||
const xmlResult = consumeXMLToolCaptureImpl(captured, toolNames, trimWrappingJSONFence);
|
||||
if (xmlResult.ready) {
|
||||
return xmlResult;
|
||||
@@ -226,50 +192,12 @@ function consumeToolCapture(state, toolNames) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
|
||||
const lower = captured.toLowerCase();
|
||||
const { index: keyIdx } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS);
|
||||
if (keyIdx < 0) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
const start = captured.slice(0, keyIdx).lastIndexOf('{');
|
||||
const actualStart = start >= 0 ? start : keyIdx;
|
||||
const obj = extractJSONObjectFrom(captured, actualStart);
|
||||
if (!obj.ok) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
const prefixPart = captured.slice(0, actualStart);
|
||||
const suffixPart = captured.slice(obj.end);
|
||||
if (insideCodeFenceWithState(state, prefixPart)) {
|
||||
return {
|
||||
ready: true,
|
||||
prefix: captured,
|
||||
calls: [],
|
||||
suffix: '',
|
||||
};
|
||||
}
|
||||
const parsed = parseStandaloneToolCallsDetailed(captured.slice(actualStart, obj.end), toolNames);
|
||||
if (!Array.isArray(parsed.calls) || parsed.calls.length === 0) {
|
||||
if (parsed.sawToolCallSyntax && parsed.rejectedByPolicy) {
|
||||
return {
|
||||
ready: true,
|
||||
prefix: prefixPart,
|
||||
calls: [],
|
||||
suffix: suffixPart,
|
||||
};
|
||||
}
|
||||
return {
|
||||
ready: true,
|
||||
prefix: captured,
|
||||
calls: [],
|
||||
suffix: '',
|
||||
};
|
||||
}
|
||||
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
|
||||
// No XML tool tags detected — release captured content as text.
|
||||
return {
|
||||
ready: true,
|
||||
prefix: trimmedFence.prefix,
|
||||
calls: parsed.calls,
|
||||
suffix: trimmedFence.suffix,
|
||||
prefix: captured,
|
||||
calls: [],
|
||||
suffix: '',
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -1,15 +1,7 @@
|
||||
'use strict';
|
||||
|
||||
const TOOL_SEGMENT_KEYWORDS = [
|
||||
'tool_calls',
|
||||
'"function"',
|
||||
'function.name:',
|
||||
'functioncall',
|
||||
'"tool_use"',
|
||||
];
|
||||
|
||||
const XML_TOOL_SEGMENT_TAGS = [
|
||||
'<tool_calls>', '<tool_calls\n', '<tool_call>', '<tool_call\n',
|
||||
'<tool_calls>', '<tool_calls\n', '<tool_calls ', '<tool_call>', '<tool_call\n', '<tool_call ',
|
||||
'<invoke ', '<invoke>', '<function_call', '<function_calls', '<tool_use>',
|
||||
];
|
||||
|
||||
@@ -21,26 +13,9 @@ const XML_TOOL_CLOSING_TAGS = [
|
||||
'</tool_calls>', '</tool_call>', '</invoke>', '</function_call>', '</function_calls>', '</tool_use>',
|
||||
];
|
||||
|
||||
function earliestKeywordIndex(text, keywords = TOOL_SEGMENT_KEYWORDS, offset = 0) {
|
||||
if (!text) {
|
||||
return { index: -1, keyword: '' };
|
||||
}
|
||||
let index = -1;
|
||||
let keyword = '';
|
||||
for (const kw of keywords) {
|
||||
const candidate = text.indexOf(kw, offset);
|
||||
if (candidate >= 0 && (index < 0 || candidate < index)) {
|
||||
index = candidate;
|
||||
keyword = kw;
|
||||
}
|
||||
}
|
||||
return { index, keyword };
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
TOOL_SEGMENT_KEYWORDS,
|
||||
XML_TOOL_SEGMENT_TAGS,
|
||||
XML_TOOL_OPENING_TAGS,
|
||||
XML_TOOL_CLOSING_TAGS,
|
||||
earliestKeywordIndex,
|
||||
};
|
||||
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"calls": [
|
||||
{
|
||||
"name": "unknown_tool",
|
||||
"input": {
|
||||
"x": 1
|
||||
}
|
||||
}
|
||||
],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"calls": [
|
||||
{
|
||||
"name": "Read_File",
|
||||
"input": {
|
||||
"path": "README.MD"
|
||||
}
|
||||
}
|
||||
],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
{
|
||||
"calls": [],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"calls": [
|
||||
{
|
||||
"name": "search",
|
||||
"input": {
|
||||
"q": "latest <tool_call><tool_name>wrong</tool_name><parameters>{\"x\":1}</parameters></tool_call>"
|
||||
}
|
||||
}
|
||||
],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"calls": [
|
||||
{
|
||||
"name": "read-file",
|
||||
"input": {
|
||||
"path": "README.MD"
|
||||
}
|
||||
}
|
||||
],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"calls": [
|
||||
{
|
||||
"name": "company.fs.read_file",
|
||||
"input": {
|
||||
"path": "README.MD"
|
||||
}
|
||||
}
|
||||
],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
{
|
||||
"calls": [],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"calls": [
|
||||
{
|
||||
"name": "read_file",
|
||||
"input": {
|
||||
"path": "README.MD"
|
||||
}
|
||||
}
|
||||
],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"calls": [
|
||||
{
|
||||
"name": "read_file",
|
||||
"input": {
|
||||
"path": "README.MD"
|
||||
}
|
||||
}
|
||||
],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"calls": [
|
||||
{
|
||||
"name": "unknown_tool",
|
||||
"input": {
|
||||
"x": 1
|
||||
}
|
||||
}
|
||||
],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
{
|
||||
"text": "{\"tool_calls\":[{\"name\":\"unknown_tool\",\"input\":{\"x\":1}}]}",
|
||||
"tool_names": []
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
{
|
||||
"text": "{\"tool_calls\":[{\"name\":\"Read_File\",\"input\":{\"path\":\"README.MD\"}}]}",
|
||||
"tool_names": ["read_file"]
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
{
|
||||
"text": "```json\n{\"tool_calls\":[{\"name\":\"read_file\",\"input\":{\"path\":\"README.MD\"}}]}\n```",
|
||||
"tool_names": ["read_file"]
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
{
|
||||
"text": "{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"latest <tool_call><tool_name>wrong</tool_name><parameters>{\\\"x\\\":1}</parameters></tool_call>\"}}]}",
|
||||
"tool_names": [
|
||||
"search"
|
||||
]
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
{
|
||||
"text": "{\"tool_calls\":[{\"name\":\"read-file\",\"input\":{\"path\":\"README.MD\"}}]}",
|
||||
"tool_names": [
|
||||
"read_file"
|
||||
]
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
{
|
||||
"text": "{\"tool_calls\":[{\"name\":\"company.fs.read_file\",\"input\":{\"path\":\"README.MD\"}}]}",
|
||||
"tool_names": [
|
||||
"read_file"
|
||||
]
|
||||
}
|
||||
@@ -1,5 +0,0 @@
|
||||
{
|
||||
"mode": "standalone",
|
||||
"text": "```json\n{\"tool_calls\":[{\"name\":\"read_file\",\"input\":{\"path\":\"README.MD\"}}]}\n```",
|
||||
"tool_names": ["read_file"]
|
||||
}
|
||||
@@ -1,5 +0,0 @@
|
||||
{
|
||||
"mode": "standalone",
|
||||
"text": "下面是示例:{\"tool_calls\":[{\"name\":\"read_file\",\"input\":{\"path\":\"README.MD\"}}]}请勿执行。",
|
||||
"tool_names": ["read_file"]
|
||||
}
|
||||
@@ -1,5 +0,0 @@
|
||||
{
|
||||
"mode": "standalone",
|
||||
"text": "{\"tool_calls\":[{\"name\":\"read_file\",\"input\":{\"path\":\"README.MD\"}}]}",
|
||||
"tool_names": ["read_file"]
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
{
|
||||
"text": "{\"tool_calls\":[{\"name\":\"unknown_tool\",\"input\":{\"x\":1}}]}",
|
||||
"tool_names": ["read_file"]
|
||||
}
|
||||
@@ -129,7 +129,7 @@ test('parseChunkForContent keeps split response/content fragments inside respons
|
||||
assert.equal(combined, '{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}');
|
||||
});
|
||||
|
||||
test('parseChunkForContent + sieve does not leak suspicious prefix in split tool json case', () => {
|
||||
test('parseChunkForContent + sieve passes JSON tool payload through as text (XML-only)', () => {
|
||||
const chunk = {
|
||||
p: 'response',
|
||||
v: [
|
||||
@@ -146,15 +146,14 @@ test('parseChunkForContent + sieve does not leak suspicious prefix in split tool
|
||||
events.push(...flushToolSieve(state, ['read_file']));
|
||||
|
||||
const hasToolCalls = events.some((evt) => evt.type === 'tool_calls' && evt.calls && evt.calls.length > 0);
|
||||
const hasToolDeltas = events.some((evt) => evt.type === 'tool_call_deltas' && evt.deltas && evt.deltas.length > 0);
|
||||
const leakedText = events
|
||||
.filter((evt) => evt.type === 'text' && evt.text)
|
||||
.map((evt) => evt.text)
|
||||
.join('');
|
||||
|
||||
assert.equal(hasToolCalls || hasToolDeltas, true);
|
||||
assert.equal(leakedText.includes('{'), false);
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
|
||||
// JSON payloads are no longer intercepted — they pass through as text.
|
||||
assert.equal(hasToolCalls, false);
|
||||
assert.equal(leakedText.includes('tool_calls'), true);
|
||||
});
|
||||
|
||||
test('parseChunkForContent consumes nested item.v array payloads', () => {
|
||||
|
||||
@@ -41,164 +41,72 @@ test('extractToolNames keeps only declared tool names (Go parity)', () => {
|
||||
assert.deepEqual(names, ['read_file']);
|
||||
});
|
||||
|
||||
test('parseToolCalls keeps non-object argument strings as _raw (Go parity)', () => {
|
||||
const payload = JSON.stringify({
|
||||
tool_calls: [
|
||||
{ name: 'read_file', input: '123' },
|
||||
{ name: 'list_dir', input: '[1,2,3]' },
|
||||
],
|
||||
});
|
||||
const calls = parseToolCalls(payload, ['read_file', 'list_dir']);
|
||||
assert.deepEqual(calls, [
|
||||
{ name: 'read_file', input: { _raw: '123' } },
|
||||
{ name: 'list_dir', input: { _raw: '[1,2,3]' } },
|
||||
]);
|
||||
});
|
||||
|
||||
test('parseToolCalls keeps unknown schema names when toolNames is provided', () => {
|
||||
const payload = JSON.stringify({
|
||||
tool_calls: [{ name: 'not_in_schema', input: { q: 'go' } }],
|
||||
});
|
||||
const calls = parseToolCalls(payload, ['search']);
|
||||
test('parseToolCalls parses XML markup tool call', () => {
|
||||
const payload = '<tool_call><tool_name>read_file</tool_name><parameters>{"path":"README.MD"}</parameters></tool_call>';
|
||||
const calls = parseToolCalls(payload, ['read_file']);
|
||||
assert.equal(calls.length, 1);
|
||||
assert.equal(calls[0].name, 'not_in_schema');
|
||||
assert.equal(calls[0].name, 'read_file');
|
||||
assert.deepEqual(calls[0].input, { path: 'README.MD' });
|
||||
});
|
||||
|
||||
test('parseToolCalls keeps original tool name casing', () => {
|
||||
test('parseToolCalls ignores JSON tool_calls payload (XML-only)', () => {
|
||||
const payload = JSON.stringify({
|
||||
tool_calls: [{ name: 'Read_File', input: { path: 'README.MD' } }],
|
||||
tool_calls: [{ name: 'read_file', input: { path: 'README.MD' } }],
|
||||
});
|
||||
const calls = parseToolCalls(payload, ['read_file']);
|
||||
assert.deepEqual(calls, [{ name: 'Read_File', input: { path: 'README.MD' } }]);
|
||||
});
|
||||
|
||||
test('parseToolCalls accepts all names when toolNames is empty', () => {
|
||||
const payload = JSON.stringify({
|
||||
tool_calls: [{ name: 'not_in_schema', input: { q: 'go' } }],
|
||||
});
|
||||
const calls = parseToolCalls(payload, []);
|
||||
assert.equal(calls.length, 1);
|
||||
|
||||
const detailed = parseToolCallsDetailed(payload, []);
|
||||
assert.equal(detailed.sawToolCallSyntax, true);
|
||||
assert.equal(detailed.rejectedByPolicy, false);
|
||||
assert.deepEqual(detailed.rejectedToolNames, []);
|
||||
assert.equal(calls.length, 0);
|
||||
});
|
||||
|
||||
test('parseToolCalls ignores tool_call payloads that exist only inside fenced code blocks', () => {
|
||||
const text = [
|
||||
'I will call a tool now.',
|
||||
'```json',
|
||||
'{"tool_calls":[{"function":{"name":"read_file","arguments":"{\\"path\\":\\"README.md\\"}"}}]}',
|
||||
'```xml',
|
||||
'<tool_call><tool_name>read_file</tool_name><parameters>{"path":"README.md"}</parameters></tool_call>',
|
||||
'```',
|
||||
].join('\n');
|
||||
const calls = parseToolCalls(text, ['read_file']);
|
||||
assert.equal(calls.length, 0);
|
||||
});
|
||||
|
||||
test('parseToolCalls parses text-kv fallback payload', () => {
|
||||
const text = [
|
||||
'function.name: execute_command',
|
||||
'function.arguments: {"command":"cd scripts && python check_syntax.py example.py","cwd":null,"timeout":30}',
|
||||
'Some other text thinking...',
|
||||
].join('\n');
|
||||
const calls = parseToolCalls(text, ['execute_command']);
|
||||
test('parseToolCalls keeps unknown schema names when toolNames is provided', () => {
|
||||
const payload = '<tool_call><tool_name>not_in_schema</tool_name><parameters>{"q":"go"}</parameters></tool_call>';
|
||||
const calls = parseToolCalls(payload, ['search']);
|
||||
assert.equal(calls.length, 1);
|
||||
assert.equal(calls[0].name, 'execute_command');
|
||||
assert.equal(calls[0].input.command, 'cd scripts && python check_syntax.py example.py');
|
||||
assert.equal(calls[0].name, 'not_in_schema');
|
||||
});
|
||||
|
||||
test('parseToolCalls supports Gemini functionCall JSON payload', () => {
|
||||
const payload = JSON.stringify({
|
||||
functionCall: { name: 'search_web', args: { query: 'latest' } },
|
||||
});
|
||||
const calls = parseToolCalls(payload, ['search_web']);
|
||||
assert.deepEqual(calls, [{ name: 'search_web', input: { query: 'latest' } }]);
|
||||
});
|
||||
|
||||
test('parseToolCalls supports Claude tool_use JSON payload', () => {
|
||||
const payload = JSON.stringify({
|
||||
type: 'tool_use',
|
||||
name: 'read_file',
|
||||
input: { path: 'README.md' },
|
||||
});
|
||||
const calls = parseToolCalls(payload, ['read_file']);
|
||||
assert.deepEqual(calls, [{ name: 'read_file', input: { path: 'README.md' } }]);
|
||||
});
|
||||
|
||||
test('parseToolCalls parses multiple text-kv fallback payloads', () => {
|
||||
const text = [
|
||||
'function.name: read_file',
|
||||
'function.arguments: {"path":"abc.txt"}',
|
||||
'',
|
||||
'function.name: bash',
|
||||
'function.arguments: {"command":"ls"}',
|
||||
].join('\n');
|
||||
const calls = parseToolCalls(text, ['read_file', 'bash']);
|
||||
assert.equal(calls.length, 2);
|
||||
assert.equal(calls[0].name, 'read_file');
|
||||
assert.equal(calls[1].name, 'bash');
|
||||
});
|
||||
|
||||
test('parseStandaloneToolCalls parses mixed prose payload', () => {
|
||||
const mixed = '这里是示例:{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]},请勿执行。';
|
||||
const standalone = '{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}';
|
||||
const mixedCalls = parseStandaloneToolCalls(mixed, ['read_file']);
|
||||
const standaloneCalls = parseStandaloneToolCalls(standalone, ['read_file']);
|
||||
assert.equal(mixedCalls.length, 1);
|
||||
assert.equal(standaloneCalls.length, 1);
|
||||
});
|
||||
|
||||
test('parseStandaloneToolCalls ignores fenced code block tool_call payload', () => {
|
||||
const fenced = ['```json', '{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}', '```'].join('\n');
|
||||
const calls = parseStandaloneToolCalls(fenced, ['read_file']);
|
||||
assert.equal(calls.length, 0);
|
||||
});
|
||||
|
||||
test('parseStandaloneToolCalls ignores chat transcript message envelope with tool_calls', () => {
|
||||
const transcript = JSON.stringify([
|
||||
{ role: 'user', content: '请展示完整会话' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: null,
|
||||
tool_calls: [{ function: { name: 'read_file', arguments: '{"path":"README.MD"}' } }],
|
||||
},
|
||||
]);
|
||||
const calls = parseStandaloneToolCalls(transcript, ['read_file']);
|
||||
assert.equal(calls.length, 0);
|
||||
});
|
||||
|
||||
|
||||
test('sieve emits tool_calls in the same chunk processing tick once payload is complete', () => {
|
||||
const state = createToolSieveState();
|
||||
const first = processToolSieveChunk(state, '{"', ['read_file']);
|
||||
const second = processToolSieveChunk(
|
||||
state,
|
||||
'tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}',
|
||||
['read_file'],
|
||||
);
|
||||
const firstCalls = first.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
const secondCalls = second.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
assert.equal(firstCalls.length, 0);
|
||||
assert.equal(secondCalls.length, 1);
|
||||
assert.equal(secondCalls[0].name, 'read_file');
|
||||
});
|
||||
|
||||
test('sieve emits tool_calls when late key convergence forms a complete payload', () => {
|
||||
test('sieve emits tool_calls for XML tool call payload', () => {
|
||||
const events = runSieve(
|
||||
[
|
||||
'{"',
|
||||
'tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}',
|
||||
'后置正文C。',
|
||||
],
|
||||
['<tool_call><tool_name>read_file</tool_name><parameters>{"path":"README.MD"}</parameters></tool_call>'],
|
||||
['read_file'],
|
||||
);
|
||||
const leakedText = collectText(events);
|
||||
const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
assert.equal(finalCalls.length, 1);
|
||||
assert.equal(finalCalls[0].name, 'read_file');
|
||||
assert.equal(leakedText.includes('后置正文C。'), true);
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
|
||||
});
|
||||
|
||||
test('sieve emits tool_calls when XML tag spans multiple chunks', () => {
|
||||
const events = runSieve(
|
||||
[
|
||||
'<tool_call><tool_name>read_file</tool_name>',
|
||||
'<parameters>{"path":"README.MD"}</parameters></tool_call>',
|
||||
],
|
||||
['read_file'],
|
||||
);
|
||||
const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
assert.equal(finalCalls.length, 1);
|
||||
assert.equal(finalCalls[0].name, 'read_file');
|
||||
});
|
||||
|
||||
test('sieve passes JSON tool_calls payload through as text (XML-only)', () => {
|
||||
const events = runSieve(
|
||||
['{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}'],
|
||||
['read_file'],
|
||||
);
|
||||
const leakedText = collectText(events);
|
||||
const hasToolCall = events.some((evt) => evt.type === 'tool_calls' && evt.calls?.length > 0);
|
||||
assert.equal(hasToolCall, false);
|
||||
assert.equal(leakedText.includes('tool_calls'), true);
|
||||
});
|
||||
|
||||
test('sieve keeps embedded invalid tool-like json as normal text to avoid stream stalls', () => {
|
||||
@@ -218,17 +126,6 @@ test('sieve keeps embedded invalid tool-like json as normal text to avoid stream
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), true);
|
||||
});
|
||||
|
||||
test('sieve flushes incomplete captured tool json as text on stream finalize', () => {
|
||||
const events = runSieve(
|
||||
['前置正文F。', '{"tool_calls":[{"name":"read_file"'],
|
||||
['read_file'],
|
||||
);
|
||||
const leakedText = collectText(events);
|
||||
assert.equal(leakedText.includes('前置正文F。'), true);
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), true);
|
||||
assert.equal(leakedText.includes('{'), true);
|
||||
});
|
||||
|
||||
test('sieve flushes incomplete captured XML tool blocks without leaking raw tags', () => {
|
||||
const events = runSieve(
|
||||
[
|
||||
@@ -263,20 +160,6 @@ test('sieve captures XML wrapper tags with attributes without leaking wrapper te
|
||||
assert.equal(leakedText.includes('</tool_calls>'), false);
|
||||
});
|
||||
|
||||
test('sieve still intercepts large tool json payloads over previous capture limit', () => {
|
||||
const large = 'a'.repeat(9000);
|
||||
const payload = `{"tool_calls":[{"name":"read_file","input":{"path":"${large}"}}]}`;
|
||||
const events = runSieve(
|
||||
[payload.slice(0, 3000), payload.slice(3000, 7000), payload.slice(7000)],
|
||||
['read_file'],
|
||||
);
|
||||
const leakedText = collectText(events);
|
||||
const hasToolCall = events.some((evt) => evt.type === 'tool_calls' && evt.calls?.length > 0);
|
||||
const hasToolDelta = events.some((evt) => evt.type === 'tool_call_deltas' && evt.deltas?.length > 0);
|
||||
assert.equal(hasToolCall || hasToolDelta, true);
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
|
||||
});
|
||||
|
||||
test('sieve keeps plain text intact in tool mode when no tool call appears', () => {
|
||||
const events = runSieve(
|
||||
['你好,', '这是普通文本回复。', '请继续。'],
|
||||
@@ -300,23 +183,6 @@ test('sieve keeps plain "tool_calls" prose as text when no valid payload follows
|
||||
assert.equal(leakedText, '前置。这里提到 tool_calls 只是解释,不是调用。后置。');
|
||||
});
|
||||
|
||||
test('sieve keeps numbered planning prose before a real tool payload (mobile-chat style)', () => {
|
||||
const events = runSieve(
|
||||
[
|
||||
'好的,我会依次测试每个工具,先把所有工具都调用一遍,然后汇总结果给你看。\n\n1. 获取当前时间\n',
|
||||
'{"tool_calls":[{"name":"get_current_time","input":{}}]}',
|
||||
],
|
||||
['get_current_time'],
|
||||
);
|
||||
const leakedText = collectText(events);
|
||||
const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
assert.equal(finalCalls.length, 1);
|
||||
assert.equal(finalCalls[0].name, 'get_current_time');
|
||||
assert.equal(leakedText.includes('先把所有工具都调用一遍'), true);
|
||||
assert.equal(leakedText.includes('1. 获取当前时间'), true);
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
|
||||
});
|
||||
|
||||
test('sieve keeps numbered planning prose when no tool payload follows', () => {
|
||||
const events = runSieve(
|
||||
['好的,我会依次测试每个工具。\n\n1. 获取当前时间'],
|
||||
@@ -328,77 +194,6 @@ test('sieve keeps numbered planning prose when no tool payload follows', () => {
|
||||
assert.equal(leakedText, '好的,我会依次测试每个工具。\n\n1. 获取当前时间');
|
||||
});
|
||||
|
||||
test('sieve emits unknown tool payload (no args) as executable tool call', () => {
|
||||
const events = runSieve(
|
||||
['{"tool_calls":[{"name":"not_in_schema"}]}', '后置正文G。'],
|
||||
['read_file'],
|
||||
);
|
||||
const leakedText = collectText(events);
|
||||
const hasToolCall = events.some((evt) => evt.type === 'tool_calls' && Array.isArray(evt.calls) && evt.calls.length > 0);
|
||||
const hasToolDelta = events.some((evt) => evt.type === 'tool_call_deltas' && Array.isArray(evt.deltas) && evt.deltas.length > 0);
|
||||
assert.equal(hasToolCall || hasToolDelta, true);
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
|
||||
assert.equal(leakedText.includes('后置正文G。'), true);
|
||||
});
|
||||
|
||||
test('sieve emits final tool_calls for split arguments payload without incremental deltas', () => {
|
||||
const state = createToolSieveState();
|
||||
const first = processToolSieveChunk(
|
||||
state,
|
||||
'{"tool_calls":[{"name":"read_file","input":{"path":"READ',
|
||||
['read_file'],
|
||||
);
|
||||
const second = processToolSieveChunk(
|
||||
state,
|
||||
'ME.MD","mode":"head"}}]}',
|
||||
['read_file'],
|
||||
);
|
||||
const tail = flushToolSieve(state, ['read_file']);
|
||||
const events = [...first, ...second, ...tail];
|
||||
const deltaEvents = events.filter((evt) => evt.type === 'tool_call_deltas');
|
||||
assert.equal(deltaEvents.length, 0);
|
||||
const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
assert.equal(finalCalls.length, 1);
|
||||
assert.equal(finalCalls[0].name, 'read_file');
|
||||
assert.deepEqual(finalCalls[0].input, { path: 'README.MD', mode: 'head' });
|
||||
});
|
||||
|
||||
test('sieve still emits tool_calls when leading prose exists before tool json', () => {
|
||||
const events = runSieve(
|
||||
['我将调用工具。', '{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}'],
|
||||
['read_file'],
|
||||
);
|
||||
const hasTool = events.some((evt) => (evt.type === 'tool_calls' && evt.calls?.length > 0) || (evt.type === 'tool_call_deltas' && evt.deltas?.length > 0));
|
||||
const leakedText = collectText(events);
|
||||
assert.equal(hasTool, true);
|
||||
assert.equal(leakedText.includes('我将调用工具。'), true);
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
|
||||
});
|
||||
|
||||
test('sieve emits tool_calls and keeps trailing prose when payload and prose share a chunk', () => {
|
||||
const events = runSieve(
|
||||
['{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}然后继续解释。'],
|
||||
['read_file'],
|
||||
);
|
||||
const hasTool = events.some((evt) => (evt.type === 'tool_calls' && evt.calls?.length > 0) || (evt.type === 'tool_call_deltas' && evt.deltas?.length > 0));
|
||||
const leakedText = collectText(events);
|
||||
assert.equal(hasTool, true);
|
||||
assert.equal(leakedText.includes('然后继续解释。'), true);
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
|
||||
});
|
||||
|
||||
test('sieve preserves closed fence before standalone tool payload', () => {
|
||||
const events = runSieve(
|
||||
['先给一个代码示例:\n```text\nhello\n```\n{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}'],
|
||||
['read_file'],
|
||||
);
|
||||
const hasTool = events.some((evt) => evt.type === 'tool_calls' && evt.calls?.length > 0);
|
||||
const leakedText = collectText(events);
|
||||
assert.equal(hasTool, true);
|
||||
assert.equal(leakedText.includes('```'), true);
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
|
||||
});
|
||||
|
||||
test('sieve does not trigger tool calls for long fenced examples beyond legacy tail window', () => {
|
||||
const longPadding = 'x'.repeat(700);
|
||||
const events = runSieve(
|
||||
@@ -434,24 +229,6 @@ test('sieve keeps fence state when triple-backticks are split across chunks', ()
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), true);
|
||||
});
|
||||
|
||||
test('sieve ignores tool-like payload inside nested fences and resumes detection after close', () => {
|
||||
const events = runSieve(
|
||||
[
|
||||
'外层示例开始\n````markdown\n',
|
||||
'```json\n{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}\n```\n',
|
||||
'````\n',
|
||||
'{"tool_calls":[{"name":"read_file","input":{"path":"README2.MD"}}]}',
|
||||
],
|
||||
['read_file'],
|
||||
);
|
||||
const calls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
const leakedText = collectText(events);
|
||||
assert.equal(calls.length, 1);
|
||||
assert.equal(calls[0].input.path, 'README2.MD');
|
||||
assert.equal(leakedText.includes('README.MD'), true);
|
||||
assert.equal(leakedText.includes('README2.MD'), false);
|
||||
});
|
||||
|
||||
test('formatOpenAIStreamToolCalls reuses ids with the same idStore', () => {
|
||||
const idStore = new Map();
|
||||
const calls = [{ name: 'read_file', input: { path: 'README.MD' } }];
|
||||
|
||||
Reference in New Issue
Block a user