From 93e9fb531df524672e89800d715095f3f5671cec Mon Sep 17 00:00:00 2001 From: CJACK Date: Sun, 1 Mar 2026 07:15:35 +0800 Subject: [PATCH] =?UTF-8?q?js=E5=AF=B9=E9=BD=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/compat/go_compat_test.go | 10 +- internal/js/chat-stream/index.js | 4 + internal/js/chat-stream/toolcall_policy.js | 42 ++++ internal/js/chat-stream/vercel_stream.js | 23 +- .../js/helpers/stream-tool-sieve/format.js | 18 +- .../helpers/stream-tool-sieve/incremental.js | 226 ------------------ .../js/helpers/stream-tool-sieve/index.js | 4 + .../js/helpers/stream-tool-sieve/parse.js | 133 ++++++++--- .../js/helpers/stream-tool-sieve/sieve.js | 107 +++++---- .../js/helpers/stream-tool-sieve/state.js | 4 + plans/node-syntax-gate-targets.txt | 1 - .../expected/toolcalls_allowlist_empty.json | 3 + .../toolcalls_case_insensitive_canonical.json | 10 + .../toolcalls_standalone_fenced_example.json | 3 + .../toolcalls_standalone_mixed_prose.json | 3 + .../expected/toolcalls_standalone_pure.json | 10 + .../fixtures/toolcalls/allowlist_empty.json | 4 + .../toolcalls/case_insensitive_canonical.json | 4 + .../toolcalls/standalone_fenced_example.json | 5 + .../toolcalls/standalone_mixed_prose.json | 5 + .../fixtures/toolcalls/standalone_pure.json | 5 + tests/node/chat-stream.test.js | 42 ++++ tests/node/js_compat_test.js | 16 +- tests/node/stream-tool-sieve.test.js | 66 +++-- 24 files changed, 391 insertions(+), 357 deletions(-) delete mode 100644 internal/js/helpers/stream-tool-sieve/incremental.js create mode 100644 tests/compat/expected/toolcalls_allowlist_empty.json create mode 100644 tests/compat/expected/toolcalls_case_insensitive_canonical.json create mode 100644 tests/compat/expected/toolcalls_standalone_fenced_example.json create mode 100644 tests/compat/expected/toolcalls_standalone_mixed_prose.json create mode 100644 tests/compat/expected/toolcalls_standalone_pure.json create mode 100644 tests/compat/fixtures/toolcalls/allowlist_empty.json create mode 100644 tests/compat/fixtures/toolcalls/case_insensitive_canonical.json create mode 100644 tests/compat/fixtures/toolcalls/standalone_fenced_example.json create mode 100644 tests/compat/fixtures/toolcalls/standalone_mixed_prose.json create mode 100644 tests/compat/fixtures/toolcalls/standalone_pure.json diff --git a/internal/compat/go_compat_test.go b/internal/compat/go_compat_test.go index 024e7ba..fa68eb2 100644 --- a/internal/compat/go_compat_test.go +++ b/internal/compat/go_compat_test.go @@ -5,6 +5,7 @@ import ( "os" "path/filepath" "reflect" + "strings" "testing" "ds2api/internal/sse" @@ -67,6 +68,7 @@ func TestGoCompatToolcallFixtures(t *testing.T) { var fixture struct { Text string `json:"text"` ToolNames []string `json:"tool_names"` + Mode string `json:"mode"` } mustLoadJSON(t, fixturePath, &fixture) @@ -75,7 +77,13 @@ func TestGoCompatToolcallFixtures(t *testing.T) { } mustLoadJSON(t, expectedPath, &expected) - got := util.ParseToolCalls(fixture.Text, fixture.ToolNames) + var got []util.ParsedToolCall + switch strings.ToLower(strings.TrimSpace(fixture.Mode)) { + case "standalone": + got = util.ParseStandaloneToolCalls(fixture.Text, fixture.ToolNames) + default: + got = util.ParseToolCalls(fixture.Text, fixture.ToolNames) + } if len(got) == 0 && len(expected.Calls) == 0 { continue } diff --git a/internal/js/chat-stream/index.js b/internal/js/chat-stream/index.js index 4528924..ce0587b 100644 --- a/internal/js/chat-stream/index.js +++ b/internal/js/chat-stream/index.js @@ -10,8 +10,10 @@ const { } = require('./sse_parse'); const { resolveToolcallPolicy, + formatIncrementalToolCallDeltas, normalizePreparedToolNames, boolDefaultTrue, + filterIncrementalToolCallDeltasByAllowed, } = require('./toolcall_policy'); const { estimateTokens, @@ -82,7 +84,9 @@ module.exports.__test = { shouldSkipPath, asString, resolveToolcallPolicy, + formatIncrementalToolCallDeltas, normalizePreparedToolNames, boolDefaultTrue, + filterIncrementalToolCallDeltasByAllowed, estimateTokens, }; diff --git a/internal/js/chat-stream/toolcall_policy.js b/internal/js/chat-stream/toolcall_policy.js index 4f4b37c..4523b89 100644 --- a/internal/js/chat-stream/toolcall_policy.js +++ b/internal/js/chat-stream/toolcall_policy.js @@ -68,6 +68,47 @@ function formatIncrementalToolCallDeltas(deltas, idStore) { return out; } +function filterIncrementalToolCallDeltasByAllowed(deltas, allowedNames, seenNames) { + if (!Array.isArray(deltas) || deltas.length === 0) { + return []; + } + const seen = seenNames instanceof Map ? seenNames : new Map(); + const allowed = new Set((allowedNames || []).filter((name) => asString(name) !== '')); + if (allowed.size === 0) { + for (const d of deltas) { + if (d && typeof d === 'object' && asString(d.name)) { + const index = Number.isInteger(d.index) ? d.index : 0; + seen.set(index, '__blocked__'); + } + } + return []; + } + + const out = []; + for (const d of deltas) { + if (!d || typeof d !== 'object') { + continue; + } + const index = Number.isInteger(d.index) ? d.index : 0; + const name = asString(d.name); + if (name) { + if (!allowed.has(name)) { + seen.set(index, '__blocked__'); + continue; + } + seen.set(index, name); + out.push(d); + continue; + } + const existing = asString(seen.get(index)); + if (!existing || existing === '__blocked__') { + continue; + } + out.push(d); + } + return out; +} + function ensureStreamToolCallID(idStore, index) { const key = Number.isInteger(index) ? index : 0; const existing = idStore.get(key); @@ -104,4 +145,5 @@ module.exports = { normalizePreparedToolNames, boolDefaultTrue, formatIncrementalToolCallDeltas, + filterIncrementalToolCallDeltasByAllowed, }; diff --git a/internal/js/chat-stream/vercel_stream.js b/internal/js/chat-stream/vercel_stream.js index 324a3d8..439edb8 100644 --- a/internal/js/chat-stream/vercel_stream.js +++ b/internal/js/chat-stream/vercel_stream.js @@ -5,7 +5,7 @@ const { createToolSieveState, processToolSieveChunk, flushToolSieve, - parseToolCalls, + parseStandaloneToolCalls, formatOpenAIStreamToolCalls, } = require('../helpers/stream-tool-sieve'); const { @@ -24,7 +24,6 @@ const { } = require('./token_usage'); const { resolveToolcallPolicy, - formatIncrementalToolCallDeltas, } = require('./toolcall_policy'); const { createChatCompletionEmitter, @@ -130,7 +129,6 @@ async function handleVercelStream(req, res, rawBody, payload) { let thinkingText = ''; let outputText = ''; const toolSieveEnabled = toolPolicy.toolSieveEnabled; - const emitEarlyToolDeltas = toolPolicy.emitEarlyToolDeltas; const toolSieveState = createToolSieveState(); let toolCallsEmitted = false; const streamToolCallIDs = new Map(); @@ -155,13 +153,18 @@ async function handleVercelStream(req, res, rawBody, payload) { await releaseLease(); return; } - const detected = parseToolCalls(outputText, toolNames); + const detected = parseStandaloneToolCalls(outputText, toolNames); if (detected.length > 0 && !toolCallsEmitted) { toolCallsEmitted = true; - sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(detected) }); + sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(detected, streamToolCallIDs) }); } else if (toolSieveEnabled) { const tailEvents = flushToolSieve(toolSieveState, toolNames); for (const evt of tailEvents) { + if (evt.type === 'tool_calls' && Array.isArray(evt.calls) && evt.calls.length > 0) { + toolCallsEmitted = true; + sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(evt.calls, streamToolCallIDs) }); + continue; + } if (evt.text) { sendDeltaFrame({ content: evt.text }); } @@ -252,17 +255,9 @@ async function handleVercelStream(req, res, rawBody, payload) { } const events = processToolSieveChunk(toolSieveState, p.text, toolNames); for (const evt of events) { - if (evt.type === 'tool_call_deltas' && Array.isArray(evt.deltas) && evt.deltas.length > 0) { - if (!emitEarlyToolDeltas) { - continue; - } - toolCallsEmitted = true; - sendDeltaFrame({ tool_calls: formatIncrementalToolCallDeltas(evt.deltas, streamToolCallIDs) }); - continue; - } if (evt.type === 'tool_calls') { toolCallsEmitted = true; - sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(evt.calls) }); + sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(evt.calls, streamToolCallIDs) }); continue; } if (evt.text) { diff --git a/internal/js/helpers/stream-tool-sieve/format.js b/internal/js/helpers/stream-tool-sieve/format.js index ff1dcef..74da078 100644 --- a/internal/js/helpers/stream-tool-sieve/format.js +++ b/internal/js/helpers/stream-tool-sieve/format.js @@ -2,13 +2,13 @@ const crypto = require('crypto'); -function formatOpenAIStreamToolCalls(calls) { +function formatOpenAIStreamToolCalls(calls, idStore) { if (!Array.isArray(calls) || calls.length === 0) { return []; } return calls.map((c, idx) => ({ index: idx, - id: `call_${newCallID()}`, + id: ensureStreamToolCallID(idStore, idx), type: 'function', function: { name: c.name, @@ -17,6 +17,20 @@ function formatOpenAIStreamToolCalls(calls) { })); } +function ensureStreamToolCallID(idStore, index) { + if (!(idStore instanceof Map)) { + return `call_${newCallID()}`; + } + const key = Number.isInteger(index) ? index : 0; + const existing = idStore.get(key); + if (existing) { + return existing; + } + const next = `call_${newCallID()}`; + idStore.set(key, next); + return next; +} + function newCallID() { if (typeof crypto.randomUUID === 'function') { return crypto.randomUUID().replace(/-/g, ''); diff --git a/internal/js/helpers/stream-tool-sieve/incremental.js b/internal/js/helpers/stream-tool-sieve/incremental.js deleted file mode 100644 index 1895075..0000000 --- a/internal/js/helpers/stream-tool-sieve/incremental.js +++ /dev/null @@ -1,226 +0,0 @@ -'use strict'; - -const { - looksLikeToolExampleContext, - insideCodeFence, -} = require('./state'); -const { - findObjectFieldValueStart, - parseJSONStringLiteral, - skipSpaces, -} = require('./jsonscan'); - -function buildIncrementalToolDeltas(state) { - const captured = state.capture || ''; - if (!captured) { - return []; - } - if (looksLikeToolExampleContext(state.recentTextTail)) { - return []; - } - const lower = captured.toLowerCase(); - const keyIdx = lower.indexOf('tool_calls'); - if (keyIdx < 0) { - return []; - } - const start = captured.slice(0, keyIdx).lastIndexOf('{'); - if (start < 0) { - return []; - } - if (insideCodeFence((state.recentTextTail || '') + captured.slice(0, start))) { - return []; - } - const callStart = findFirstToolCallObjectStart(captured, keyIdx); - if (callStart < 0) { - return []; - } - - const deltas = []; - if (!state.toolName) { - const name = extractToolCallName(captured, callStart); - if (!name) { - return []; - } - state.toolName = name; - } - - if (state.toolArgsStart < 0) { - const args = findToolCallArgsStart(captured, callStart); - if (args) { - state.toolArgsString = Boolean(args.stringMode); - state.toolArgsStart = state.toolArgsString ? args.start + 1 : args.start; - state.toolArgsSent = state.toolArgsStart; - } - } - if (!state.toolNameSent) { - if (state.toolArgsStart < 0) { - return []; - } - state.toolNameSent = true; - deltas.push({ index: 0, name: state.toolName }); - } - if (state.toolArgsStart < 0 || state.toolArgsDone) { - return deltas; - } - const progress = scanToolCallArgsProgress(captured, state.toolArgsStart, state.toolArgsString); - if (!progress) { - return deltas; - } - if (progress.end > state.toolArgsSent) { - deltas.push({ - index: 0, - arguments: captured.slice(state.toolArgsSent, progress.end), - }); - state.toolArgsSent = progress.end; - } - if (progress.complete) { - state.toolArgsDone = true; - } - return deltas; -} - -function findFirstToolCallObjectStart(text, keyIdx) { - const arrStart = findToolCallsArrayStart(text, keyIdx); - if (arrStart < 0) { - return -1; - } - const i = skipSpaces(text, arrStart + 1); - if (i >= text.length || text[i] !== '{') { - return -1; - } - return i; -} - -function findToolCallsArrayStart(text, keyIdx) { - let i = keyIdx + 'tool_calls'.length; - while (i < text.length && text[i] !== ':') { - i += 1; - } - if (i >= text.length) { - return -1; - } - i = skipSpaces(text, i + 1); - if (i >= text.length || text[i] !== '[') { - return -1; - } - return i; -} - -function extractToolCallName(text, callStart) { - let valueStart = findObjectFieldValueStart(text, callStart, ['name']); - if (valueStart < 0 || text[valueStart] !== '"') { - const fnStart = findFunctionObjectStart(text, callStart); - if (fnStart < 0) { - return ''; - } - valueStart = findObjectFieldValueStart(text, fnStart, ['name']); - if (valueStart < 0 || text[valueStart] !== '"') { - return ''; - } - } - const parsed = parseJSONStringLiteral(text, valueStart); - if (!parsed) { - return ''; - } - return parsed.value; -} - -function findToolCallArgsStart(text, callStart) { - const keys = ['input', 'arguments', 'args', 'parameters', 'params']; - let valueStart = findObjectFieldValueStart(text, callStart, keys); - if (valueStart < 0) { - const fnStart = findFunctionObjectStart(text, callStart); - if (fnStart < 0) { - return null; - } - valueStart = findObjectFieldValueStart(text, fnStart, keys); - if (valueStart < 0) { - return null; - } - } - if (valueStart >= text.length) { - return null; - } - const ch = text[valueStart]; - if (ch === '{' || ch === '[') { - return { start: valueStart, stringMode: false }; - } - if (ch === '"') { - return { start: valueStart, stringMode: true }; - } - return null; -} - -function scanToolCallArgsProgress(text, start, stringMode) { - if (start < 0 || start > text.length) { - return null; - } - if (stringMode) { - let escaped = false; - for (let i = start; i < text.length; i += 1) { - const ch = text[i]; - if (escaped) { - escaped = false; - continue; - } - if (ch === '\\') { - escaped = true; - continue; - } - if (ch === '"') { - return { end: i, complete: true }; - } - } - return { end: text.length, complete: false }; - } - if (start >= text.length || (text[start] !== '{' && text[start] !== '[')) { - return null; - } - let depth = 0; - let quote = ''; - let escaped = false; - for (let i = start; i < text.length; i += 1) { - const ch = text[i]; - if (quote) { - if (escaped) { - escaped = false; - continue; - } - if (ch === '\\') { - escaped = true; - continue; - } - if (ch === quote) { - quote = ''; - } - continue; - } - if (ch === '"' || ch === "'") { - quote = ch; - continue; - } - if (ch === '{' || ch === '[') { - depth += 1; - continue; - } - if (ch === '}' || ch === ']') { - depth -= 1; - if (depth === 0) { - return { end: i + 1, complete: true }; - } - } - } - return { end: text.length, complete: false }; -} - -function findFunctionObjectStart(text, callStart) { - const valueStart = findObjectFieldValueStart(text, callStart, ['function']); - if (valueStart < 0 || valueStart >= text.length || text[valueStart] !== '{') { - return -1; - } - return valueStart; -} - -module.exports = { - buildIncrementalToolDeltas, -}; diff --git a/internal/js/helpers/stream-tool-sieve/index.js b/internal/js/helpers/stream-tool-sieve/index.js index f218b52..6333d8c 100644 --- a/internal/js/helpers/stream-tool-sieve/index.js +++ b/internal/js/helpers/stream-tool-sieve/index.js @@ -10,7 +10,9 @@ const { const { extractToolNames, parseToolCalls, + parseToolCallsDetailed, parseStandaloneToolCalls, + parseStandaloneToolCallsDetailed, } = require('./parse'); const { formatOpenAIStreamToolCalls, @@ -22,6 +24,8 @@ module.exports = { processToolSieveChunk, flushToolSieve, parseToolCalls, + parseToolCallsDetailed, parseStandaloneToolCalls, + parseStandaloneToolCallsDetailed, formatOpenAIStreamToolCalls, }; diff --git a/internal/js/helpers/stream-tool-sieve/parse.js b/internal/js/helpers/stream-tool-sieve/parse.js index f1efdda..92fabc3 100644 --- a/internal/js/helpers/stream-tool-sieve/parse.js +++ b/internal/js/helpers/stream-tool-sieve/parse.js @@ -29,25 +29,38 @@ function extractToolNames(tools) { } function parseToolCalls(text, toolNames) { + return parseToolCallsDetailed(text, toolNames).calls; +} + +function parseToolCallsDetailed(text, toolNames) { + const result = emptyParseResult(); if (!toStringSafe(text)) { - return []; + return result; } const sanitized = stripFencedCodeBlocks(text); if (!toStringSafe(sanitized)) { - return []; + return result; } + result.sawToolCallSyntax = sanitized.toLowerCase().includes('tool_calls'); + const candidates = buildToolCallCandidates(sanitized); let parsed = []; for (const c of candidates) { parsed = parseToolCallsPayload(c); if (parsed.length > 0) { + result.sawToolCallSyntax = true; break; } } if (parsed.length === 0) { - return []; + return result; } - return filterToolCalls(parsed, toolNames); + + const filtered = filterToolCallsDetailed(parsed, toolNames); + result.calls = filtered.calls; + result.rejectedToolNames = filtered.rejectedToolNames; + result.rejectedByPolicy = filtered.rejectedToolNames.length > 0 && filtered.calls.length === 0; + return result; } function stripFencedCodeBlocks(text) { @@ -59,37 +72,43 @@ function stripFencedCodeBlocks(text) { } function parseStandaloneToolCalls(text, toolNames) { + return parseStandaloneToolCallsDetailed(text, toolNames).calls; +} + +function parseStandaloneToolCallsDetailed(text, toolNames) { + const result = emptyParseResult(); const trimmed = toStringSafe(text); if (!trimmed) { - return []; - } - if ((trimmed.startsWith('```') && trimmed.endsWith('```')) || trimmed.includes('```')) { - return []; + return result; } if (looksLikeToolExampleContext(trimmed)) { - return []; + return result; } - const candidates = [trimmed]; - if (trimmed.startsWith('```') && trimmed.endsWith('```')) { - const m = trimmed.match(/```(?:json)?\s*([\s\S]*?)\s*```/i); - if (m && m[1]) { - candidates.push(toStringSafe(m[1])); - } + result.sawToolCallSyntax = trimmed.toLowerCase().includes('tool_calls'); + if (!trimmed.startsWith('{') && !trimmed.startsWith('[')) { + return result; } - for (const candidate of candidates) { - const c = toStringSafe(candidate); - if (!c) { - continue; - } - if (!c.startsWith('{') && !c.startsWith('[')) { - continue; - } - const parsed = parseToolCallsPayload(c); - if (parsed.length > 0) { - return filterToolCalls(parsed, toolNames); - } + + const parsed = parseToolCallsPayload(trimmed); + if (parsed.length === 0) { + return result; } - return []; + + result.sawToolCallSyntax = true; + const filtered = filterToolCallsDetailed(parsed, toolNames); + result.calls = filtered.calls; + result.rejectedToolNames = filtered.rejectedToolNames; + result.rejectedByPolicy = filtered.rejectedToolNames.length > 0 && filtered.calls.length === 0; + return result; +} + +function emptyParseResult() { + return { + calls: [], + sawToolCallSyntax: false, + rejectedByPolicy: false, + rejectedToolNames: [], + }; } function buildToolCallCandidates(text) { @@ -251,23 +270,69 @@ function parseToolCallInput(v) { return {}; } -function filterToolCalls(parsed, toolNames) { - const allowed = new Set((toolNames || []).filter(Boolean)); - const out = []; +function filterToolCallsDetailed(parsed, toolNames) { + const allowed = new Set(); + const allowedCanonical = new Map(); + for (const item of toolNames || []) { + const name = toStringSafe(item); + if (!name) { + continue; + } + allowed.add(name); + const lower = name.toLowerCase(); + if (!allowedCanonical.has(lower)) { + allowedCanonical.set(lower, name); + } + } + + if (allowed.size === 0) { + const rejected = []; + const seen = new Set(); + for (const tc of parsed) { + if (!tc || !tc.name) { + continue; + } + if (seen.has(tc.name)) { + continue; + } + seen.add(tc.name); + rejected.push(tc.name); + } + return { calls: [], rejectedToolNames: rejected }; + } + + const calls = []; + const rejected = []; + const seenRejected = new Set(); for (const tc of parsed) { if (!tc || !tc.name) { continue; } - if (allowed.size > 0 && !allowed.has(tc.name)) { + let matchedName = ''; + if (allowed.has(tc.name)) { + matchedName = tc.name; + } else { + matchedName = allowedCanonical.get(tc.name.toLowerCase()) || ''; + } + if (!matchedName) { + if (!seenRejected.has(tc.name)) { + seenRejected.add(tc.name); + rejected.push(tc.name); + } continue; } - out.push({ name: tc.name, input: tc.input || {} }); + calls.push({ + name: matchedName, + input: tc.input && typeof tc.input === 'object' && !Array.isArray(tc.input) ? tc.input : {}, + }); } - return out; + return { calls, rejectedToolNames: rejected }; } module.exports = { extractToolNames, parseToolCalls, + parseToolCallsDetailed, parseStandaloneToolCalls, + parseStandaloneToolCallsDetailed, }; diff --git a/internal/js/helpers/stream-tool-sieve/sieve.js b/internal/js/helpers/stream-tool-sieve/sieve.js index 0abe507..1f1fc59 100644 --- a/internal/js/helpers/stream-tool-sieve/sieve.js +++ b/internal/js/helpers/stream-tool-sieve/sieve.js @@ -6,10 +6,7 @@ const { insideCodeFence, } = require('./state'); const { - buildIncrementalToolDeltas, -} = require('./incremental'); -const { - parseStandaloneToolCalls, + parseStandaloneToolCallsDetailed, } = require('./parse'); const { extractJSONObjectFrom, @@ -23,6 +20,21 @@ function processToolSieveChunk(state, chunk, toolNames) { state.pending += chunk; } const events = []; + + if (Array.isArray(state.pendingToolCalls) && state.pendingToolCalls.length > 0) { + const pending = state.pending || ''; + if (pending.trim() !== '') { + const content = (state.pendingToolRaw || '') + pending; + state.pending = ''; + state.pendingToolRaw = ''; + state.pendingToolCalls = []; + noteText(state, content); + events.push({ type: 'text', text: content }); + } else { + return events; + } + } + // eslint-disable-next-line no-constant-condition while (true) { if (state.capturing) { @@ -30,49 +42,50 @@ function processToolSieveChunk(state, chunk, toolNames) { state.capture += state.pending; state.pending = ''; } - const deltas = buildIncrementalToolDeltas(state); - if (deltas.length > 0) { - events.push({ type: 'tool_call_deltas', deltas }); - } const consumed = consumeToolCapture(state, toolNames); if (!consumed.ready) { break; } + const captured = state.capture; state.capture = ''; state.capturing = false; resetIncrementalToolState(state); + + if (Array.isArray(consumed.calls) && consumed.calls.length > 0) { + state.pendingToolRaw = captured; + state.pendingToolCalls = consumed.calls; + continue; + } if (consumed.prefix) { noteText(state, consumed.prefix); events.push({ type: 'text', text: consumed.prefix }); } - if (Array.isArray(consumed.calls) && consumed.calls.length > 0) { - events.push({ type: 'tool_calls', calls: consumed.calls }); - } if (consumed.suffix) { state.pending += consumed.suffix; } continue; } - if (!state.pending) { + const pending = state.pending || ''; + if (!pending) { break; } - const start = findToolSegmentStart(state.pending); + const start = findToolSegmentStart(pending); if (start >= 0) { - const prefix = state.pending.slice(0, start); + const prefix = pending.slice(0, start); if (prefix) { noteText(state, prefix); events.push({ type: 'text', text: prefix }); } - state.capture = state.pending.slice(start); state.pending = ''; + state.capture += pending.slice(start); state.capturing = true; resetIncrementalToolState(state); continue; } - const [safe, hold] = splitSafeContentForToolDetection(state.pending); + const [safe, hold] = splitSafeContentForToolDetection(pending); if (!safe) { break; } @@ -88,6 +101,13 @@ function flushToolSieve(state, toolNames) { return []; } const events = processToolSieveChunk(state, '', toolNames); + + if (Array.isArray(state.pendingToolCalls) && state.pendingToolCalls.length > 0) { + events.push({ type: 'tool_calls', calls: state.pendingToolCalls }); + state.pendingToolRaw = ''; + state.pendingToolCalls = []; + } + if (state.capturing) { const consumed = consumeToolCapture(state, toolNames); if (consumed.ready) { @@ -110,11 +130,13 @@ function flushToolSieve(state, toolNames) { state.capturing = false; resetIncrementalToolState(state); } + if (state.pending) { noteText(state, state.pending); events.push({ type: 'text', text: state.pending }); state.pending = ''; } + return events; } @@ -154,11 +176,10 @@ function findToolSegmentStart(s) { let offset = 0; // eslint-disable-next-line no-constant-condition while (true) { - const keyRel = lower.indexOf('tool_calls', offset); - if (keyRel < 0) { + const keyIdx = lower.indexOf('tool_calls', offset); + if (keyIdx < 0) { return -1; } - const keyIdx = keyRel; const start = s.slice(0, keyIdx).lastIndexOf('{'); const candidateStart = start >= 0 ? start : keyIdx; if (!insideCodeFence(s.slice(0, candidateStart))) { @@ -169,7 +190,7 @@ function findToolSegmentStart(s) { } function consumeToolCapture(state, toolNames) { - const captured = state.capture; + const captured = state.capture || ''; if (!captured) { return { ready: false, prefix: '', calls: [], suffix: '' }; } @@ -186,8 +207,10 @@ function consumeToolCapture(state, toolNames) { if (!obj.ok) { return { ready: false, prefix: '', calls: [], suffix: '' }; } + const prefixPart = captured.slice(0, start); const suffixPart = captured.slice(obj.end); + if (insideCodeFence((state.recentTextTail || '') + prefixPart)) { return { ready: true, @@ -196,18 +219,19 @@ function consumeToolCapture(state, toolNames) { suffix: '', }; } - const rawParsed = parseStandaloneToolCalls(captured.slice(start, obj.end), []); - const parsed = parseStandaloneToolCalls(captured.slice(start, obj.end), toolNames); - if (parsed.length === 0) { - if (rawParsed.length > 0 && Array.isArray(toolNames) && toolNames.length > 0) { - return { - ready: true, - prefix: prefixPart, - calls: [], - suffix: suffixPart, - }; - } - if (state.toolNameSent) { + + if ((state.recentTextTail || '').trim() !== '' || prefixPart.trim() !== '' || suffixPart.trim() !== '') { + return { + ready: true, + prefix: captured, + calls: [], + suffix: '', + }; + } + + const parsed = parseStandaloneToolCallsDetailed(captured.slice(start, obj.end), toolNames); + if (!Array.isArray(parsed.calls) || parsed.calls.length === 0) { + if (parsed.sawToolCallSyntax && parsed.rejectedByPolicy) { return { ready: true, prefix: prefixPart, @@ -222,26 +246,11 @@ function consumeToolCapture(state, toolNames) { suffix: '', }; } - if (state.toolNameSent) { - if (parsed.length > 1) { - return { - ready: true, - prefix: prefixPart, - calls: parsed.slice(1), - suffix: suffixPart, - }; - } - return { - ready: true, - prefix: prefixPart, - calls: [], - suffix: suffixPart, - }; - } + return { ready: true, prefix: prefixPart, - calls: parsed, + calls: parsed.calls, suffix: suffixPart, }; } diff --git a/internal/js/helpers/stream-tool-sieve/state.js b/internal/js/helpers/stream-tool-sieve/state.js index ff588e2..d013727 100644 --- a/internal/js/helpers/stream-tool-sieve/state.js +++ b/internal/js/helpers/stream-tool-sieve/state.js @@ -8,6 +8,9 @@ function createToolSieveState() { capture: '', capturing: false, recentTextTail: '', + pendingToolRaw: '', + pendingToolCalls: [], + disableDeltas: false, toolNameSent: false, toolName: '', toolArgsStart: -1, @@ -18,6 +21,7 @@ function createToolSieveState() { } function resetIncrementalToolState(state) { + state.disableDeltas = false; state.toolNameSent = false; state.toolName = ''; state.toolArgsStart = -1; diff --git a/plans/node-syntax-gate-targets.txt b/plans/node-syntax-gate-targets.txt index 7b268a8..8f97f83 100644 --- a/plans/node-syntax-gate-targets.txt +++ b/plans/node-syntax-gate-targets.txt @@ -16,7 +16,6 @@ internal/js/helpers/stream-tool-sieve.js internal/js/helpers/stream-tool-sieve/index.js internal/js/helpers/stream-tool-sieve/state.js internal/js/helpers/stream-tool-sieve/sieve.js -internal/js/helpers/stream-tool-sieve/incremental.js internal/js/helpers/stream-tool-sieve/jsonscan.js internal/js/helpers/stream-tool-sieve/parse.js internal/js/helpers/stream-tool-sieve/format.js diff --git a/tests/compat/expected/toolcalls_allowlist_empty.json b/tests/compat/expected/toolcalls_allowlist_empty.json new file mode 100644 index 0000000..97646bf --- /dev/null +++ b/tests/compat/expected/toolcalls_allowlist_empty.json @@ -0,0 +1,3 @@ +{ + "calls": [] +} diff --git a/tests/compat/expected/toolcalls_case_insensitive_canonical.json b/tests/compat/expected/toolcalls_case_insensitive_canonical.json new file mode 100644 index 0000000..39a11e6 --- /dev/null +++ b/tests/compat/expected/toolcalls_case_insensitive_canonical.json @@ -0,0 +1,10 @@ +{ + "calls": [ + { + "name": "read_file", + "input": { + "path": "README.MD" + } + } + ] +} diff --git a/tests/compat/expected/toolcalls_standalone_fenced_example.json b/tests/compat/expected/toolcalls_standalone_fenced_example.json new file mode 100644 index 0000000..97646bf --- /dev/null +++ b/tests/compat/expected/toolcalls_standalone_fenced_example.json @@ -0,0 +1,3 @@ +{ + "calls": [] +} diff --git a/tests/compat/expected/toolcalls_standalone_mixed_prose.json b/tests/compat/expected/toolcalls_standalone_mixed_prose.json new file mode 100644 index 0000000..97646bf --- /dev/null +++ b/tests/compat/expected/toolcalls_standalone_mixed_prose.json @@ -0,0 +1,3 @@ +{ + "calls": [] +} diff --git a/tests/compat/expected/toolcalls_standalone_pure.json b/tests/compat/expected/toolcalls_standalone_pure.json new file mode 100644 index 0000000..39a11e6 --- /dev/null +++ b/tests/compat/expected/toolcalls_standalone_pure.json @@ -0,0 +1,10 @@ +{ + "calls": [ + { + "name": "read_file", + "input": { + "path": "README.MD" + } + } + ] +} diff --git a/tests/compat/fixtures/toolcalls/allowlist_empty.json b/tests/compat/fixtures/toolcalls/allowlist_empty.json new file mode 100644 index 0000000..8855804 --- /dev/null +++ b/tests/compat/fixtures/toolcalls/allowlist_empty.json @@ -0,0 +1,4 @@ +{ + "text": "{\"tool_calls\":[{\"name\":\"unknown_tool\",\"input\":{\"x\":1}}]}", + "tool_names": [] +} diff --git a/tests/compat/fixtures/toolcalls/case_insensitive_canonical.json b/tests/compat/fixtures/toolcalls/case_insensitive_canonical.json new file mode 100644 index 0000000..7d575aa --- /dev/null +++ b/tests/compat/fixtures/toolcalls/case_insensitive_canonical.json @@ -0,0 +1,4 @@ +{ + "text": "{\"tool_calls\":[{\"name\":\"Read_File\",\"input\":{\"path\":\"README.MD\"}}]}", + "tool_names": ["read_file"] +} diff --git a/tests/compat/fixtures/toolcalls/standalone_fenced_example.json b/tests/compat/fixtures/toolcalls/standalone_fenced_example.json new file mode 100644 index 0000000..8a6c6bf --- /dev/null +++ b/tests/compat/fixtures/toolcalls/standalone_fenced_example.json @@ -0,0 +1,5 @@ +{ + "mode": "standalone", + "text": "```json\n{\"tool_calls\":[{\"name\":\"read_file\",\"input\":{\"path\":\"README.MD\"}}]}\n```", + "tool_names": ["read_file"] +} diff --git a/tests/compat/fixtures/toolcalls/standalone_mixed_prose.json b/tests/compat/fixtures/toolcalls/standalone_mixed_prose.json new file mode 100644 index 0000000..f991417 --- /dev/null +++ b/tests/compat/fixtures/toolcalls/standalone_mixed_prose.json @@ -0,0 +1,5 @@ +{ + "mode": "standalone", + "text": "下面是示例:{\"tool_calls\":[{\"name\":\"read_file\",\"input\":{\"path\":\"README.MD\"}}]}请勿执行。", + "tool_names": ["read_file"] +} diff --git a/tests/compat/fixtures/toolcalls/standalone_pure.json b/tests/compat/fixtures/toolcalls/standalone_pure.json new file mode 100644 index 0000000..aecee75 --- /dev/null +++ b/tests/compat/fixtures/toolcalls/standalone_pure.json @@ -0,0 +1,5 @@ +{ + "mode": "standalone", + "text": "{\"tool_calls\":[{\"name\":\"read_file\",\"input\":{\"path\":\"README.MD\"}}]}", + "tool_names": ["read_file"] +} diff --git a/tests/node/chat-stream.test.js b/tests/node/chat-stream.test.js index e31afbe..48be6ff 100644 --- a/tests/node/chat-stream.test.js +++ b/tests/node/chat-stream.test.js @@ -13,8 +13,10 @@ const { const { parseChunkForContent, resolveToolcallPolicy, + formatIncrementalToolCallDeltas, normalizePreparedToolNames, boolDefaultTrue, + filterIncrementalToolCallDeltasByAllowed, } = handler.__test; test('chat-stream exposes parser test hooks', () => { @@ -56,6 +58,46 @@ test('boolDefaultTrue keeps false only when explicitly false', () => { assert.equal(boolDefaultTrue(undefined), true); }); +test('filterIncrementalToolCallDeltasByAllowed blocks unknown name and follow-up args', () => { + const seen = new Map(); + const filtered = filterIncrementalToolCallDeltasByAllowed( + [ + { index: 0, name: 'not_in_schema' }, + { index: 0, arguments: '{"x":1}' }, + ], + ['read_file'], + seen, + ); + assert.deepEqual(filtered, []); + assert.equal(seen.get(0), '__blocked__'); +}); + +test('filterIncrementalToolCallDeltasByAllowed keeps allowed name and args', () => { + const seen = new Map(); + const filtered = filterIncrementalToolCallDeltasByAllowed( + [ + { index: 0, name: 'read_file' }, + { index: 0, arguments: '{"path":"README.MD"}' }, + ], + ['read_file'], + seen, + ); + assert.deepEqual(filtered, [ + { index: 0, name: 'read_file' }, + { index: 0, arguments: '{"path":"README.MD"}' }, + ]); +}); + +test('incremental and final tool formatting share stable id via idStore', () => { + const idStore = new Map(); + const incremental = formatIncrementalToolCallDeltas([{ index: 0, name: 'read_file' }], idStore); + const { formatOpenAIStreamToolCalls } = require('../../internal/js/helpers/stream-tool-sieve.js'); + const finalCalls = formatOpenAIStreamToolCalls([{ name: 'read_file', input: { path: 'README.MD' } }], idStore); + assert.equal(incremental.length, 1); + assert.equal(finalCalls.length, 1); + assert.equal(incremental[0].id, finalCalls[0].id); +}); + test('parseChunkForContent keeps split response/content fragments inside response array', () => { const chunk = { p: 'response', diff --git a/tests/node/js_compat_test.js b/tests/node/js_compat_test.js index 0029abe..74b3fd1 100644 --- a/tests/node/js_compat_test.js +++ b/tests/node/js_compat_test.js @@ -6,7 +6,7 @@ const fs = require('node:fs'); const path = require('node:path'); const chatStream = require('../../api/chat-stream.js'); -const { parseToolCalls } = require('../../internal/js/helpers/stream-tool-sieve.js'); +const { parseToolCalls, parseStandaloneToolCalls } = require('../../internal/js/helpers/stream-tool-sieve.js'); const { parseChunkForContent, estimateTokens } = chatStream.__test; @@ -41,12 +41,14 @@ test('js compat: toolcall fixtures', () => { for (const file of files) { const name = file.replace(/\.json$/i, ''); - const fixture = readJSON(path.join(fixtureDir, file)); - const expected = readJSON(path.join(expectedDir, `toolcalls_${name}.json`)); - const got = parseToolCalls(fixture.text, fixture.tool_names || []); - assert.deepEqual(got, expected.calls, `${name}: calls mismatch`); - } -}); + const fixture = readJSON(path.join(fixtureDir, file)); + const expected = readJSON(path.join(expectedDir, `toolcalls_${name}.json`)); + const mode = typeof fixture.mode === 'string' ? fixture.mode.trim().toLowerCase() : ''; + const parser = mode === 'standalone' ? parseStandaloneToolCalls : parseToolCalls; + const got = parser(fixture.text, fixture.tool_names || []); + assert.deepEqual(got, expected.calls, `${name}: calls mismatch`); + } + }); test('js compat: token fixtures', () => { const fixture = readJSON(path.join(compatRoot, 'fixtures', 'token_cases.json')); diff --git a/tests/node/stream-tool-sieve.test.js b/tests/node/stream-tool-sieve.test.js index ccbd160..e68f2ff 100644 --- a/tests/node/stream-tool-sieve.test.js +++ b/tests/node/stream-tool-sieve.test.js @@ -9,7 +9,9 @@ const { processToolSieveChunk, flushToolSieve, parseToolCalls, + parseToolCallsDetailed, parseStandaloneToolCalls, + formatOpenAIStreamToolCalls, } = require('../../internal/js/helpers/stream-tool-sieve.js'); function runSieve(chunks, toolNames) { @@ -60,13 +62,25 @@ test('parseToolCalls drops unknown schema names when toolNames is provided', () assert.equal(calls.length, 0); }); -test('parseToolCalls keeps unknown names when toolNames is empty', () => { +test('parseToolCalls matches tool name case-insensitively and canonicalizes', () => { + const payload = JSON.stringify({ + tool_calls: [{ name: 'Read_File', input: { path: 'README.MD' } }], + }); + const calls = parseToolCalls(payload, ['read_file']); + assert.deepEqual(calls, [{ name: 'read_file', input: { path: 'README.MD' } }]); +}); + +test('parseToolCalls rejects all names when toolNames is empty (Go strict parity)', () => { const payload = JSON.stringify({ tool_calls: [{ name: 'not_in_schema', input: { q: 'go' } }], }); const calls = parseToolCalls(payload, []); - assert.equal(calls.length, 1); - assert.equal(calls[0].name, 'not_in_schema'); + assert.equal(calls.length, 0); + + const detailed = parseToolCallsDetailed(payload, []); + assert.equal(detailed.sawToolCallSyntax, true); + assert.equal(detailed.rejectedByPolicy, true); + assert.deepEqual(detailed.rejectedToolNames, ['not_in_schema']); }); test('parseToolCalls supports fenced json and function.arguments string payload', () => { @@ -95,7 +109,7 @@ test('parseStandaloneToolCalls ignores fenced code block tool_call examples', () assert.equal(calls.length, 0); }); -test('sieve emits tool_calls and does not leak suspicious prefix on late key convergence', () => { +test('sieve keeps late key convergence payload as plain text in strict mode', () => { const events = runSieve( [ '{"', @@ -107,9 +121,9 @@ test('sieve emits tool_calls and does not leak suspicious prefix on late key con const leakedText = collectText(events); const hasToolCall = events.some((evt) => evt.type === 'tool_calls' && Array.isArray(evt.calls) && evt.calls.length > 0); const hasToolDelta = events.some((evt) => evt.type === 'tool_call_deltas' && Array.isArray(evt.deltas) && evt.deltas.length > 0); - assert.equal(hasToolCall || hasToolDelta, true); - assert.equal(leakedText.includes('{'), false); - assert.equal(leakedText.toLowerCase().includes('tool_calls'), false); + assert.equal(hasToolCall || hasToolDelta, false); + assert.equal(leakedText.includes('{'), true); + assert.equal(leakedText.toLowerCase().includes('tool_calls'), true); assert.equal(leakedText.includes('后置正文C。'), true); }); @@ -180,7 +194,7 @@ test('sieve intercepts rejected unknown tool payload (no args) without raw leak' assert.equal(leakedText.includes('后置正文G。'), true); }); -test('sieve emits incremental tool_call_deltas for split arguments payload', () => { +test('sieve emits final tool_calls for split arguments payload without incremental deltas', () => { const state = createToolSieveState(); const first = processToolSieveChunk( state, @@ -195,37 +209,43 @@ test('sieve emits incremental tool_call_deltas for split arguments payload', () const tail = flushToolSieve(state, ['read_file']); const events = [...first, ...second, ...tail]; const deltaEvents = events.filter((evt) => evt.type === 'tool_call_deltas'); - assert.equal(deltaEvents.length > 0, true); - const merged = deltaEvents.flatMap((evt) => evt.deltas || []); - const hasName = merged.some((d) => d.name === 'read_file'); - const argsJoined = merged - .map((d) => d.arguments || '') - .join(''); - assert.equal(hasName, true); - assert.equal(argsJoined.includes('"path":"README.MD"'), true); - assert.equal(argsJoined.includes('"mode":"head"'), true); + assert.equal(deltaEvents.length, 0); + const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []); + assert.equal(finalCalls.length, 1); + assert.equal(finalCalls[0].name, 'read_file'); + assert.deepEqual(finalCalls[0].input, { path: 'README.MD', mode: 'head' }); }); -test('sieve still intercepts tool call after leading plain text without suffix', () => { +test('sieve keeps tool json as text when leading prose exists (strict mode)', () => { const events = runSieve( ['我将调用工具。', '{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}'], ['read_file'], ); const hasTool = events.some((evt) => (evt.type === 'tool_calls' && evt.calls?.length > 0) || (evt.type === 'tool_call_deltas' && evt.deltas?.length > 0)); const leakedText = collectText(events); - assert.equal(hasTool, true); + assert.equal(hasTool, false); assert.equal(leakedText.includes('我将调用工具。'), true); - assert.equal(leakedText.toLowerCase().includes('tool_calls'), false); + assert.equal(leakedText.toLowerCase().includes('tool_calls'), true); }); -test('sieve intercepts tool call and preserves trailing same-chunk text', () => { +test('sieve keeps same-chunk trailing prose payload as text in strict mode', () => { const events = runSieve( ['{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}然后继续解释。'], ['read_file'], ); const hasTool = events.some((evt) => (evt.type === 'tool_calls' && evt.calls?.length > 0) || (evt.type === 'tool_call_deltas' && evt.deltas?.length > 0)); const leakedText = collectText(events); - assert.equal(hasTool, true); + assert.equal(hasTool, false); assert.equal(leakedText.includes('然后继续解释。'), true); - assert.equal(leakedText.toLowerCase().includes('tool_calls'), false); + assert.equal(leakedText.toLowerCase().includes('tool_calls'), true); +}); + +test('formatOpenAIStreamToolCalls reuses ids with the same idStore', () => { + const idStore = new Map(); + const calls = [{ name: 'read_file', input: { path: 'README.MD' } }]; + const first = formatOpenAIStreamToolCalls(calls, idStore); + const second = formatOpenAIStreamToolCalls(calls, idStore); + assert.equal(first.length, 1); + assert.equal(second.length, 1); + assert.equal(first[0].id, second[0].id); });