diff --git a/internal/js/helpers/stream-tool-sieve/parse.js b/internal/js/helpers/stream-tool-sieve/parse.js index 92fabc3..bf21046 100644 --- a/internal/js/helpers/stream-tool-sieve/parse.js +++ b/internal/js/helpers/stream-tool-sieve/parse.js @@ -1,14 +1,14 @@ 'use strict'; -const TOOL_CALL_PATTERN = /\{\s*["']tool_calls["']\s*:\s*\[(.*?)\]\s*\}/s; - const { toStringSafe, looksLikeToolExampleContext, } = require('./state'); const { - extractJSONObjectFrom, -} = require('./jsonscan'); + stripFencedCodeBlocks, + buildToolCallCandidates, + parseToolCallsPayload, +} = require('./parse_payload'); function extractToolNames(tools) { if (!Array.isArray(tools) || tools.length === 0) { @@ -63,14 +63,6 @@ function parseToolCallsDetailed(text, toolNames) { return result; } -function stripFencedCodeBlocks(text) { - const t = typeof text === 'string' ? text : ''; - if (!t) { - return ''; - } - return t.replace(/```[\s\S]*?```/g, ' '); -} - function parseStandaloneToolCalls(text, toolNames) { return parseStandaloneToolCallsDetailed(text, toolNames).calls; } @@ -111,169 +103,11 @@ function emptyParseResult() { }; } -function buildToolCallCandidates(text) { - const trimmed = toStringSafe(text); - const candidates = [trimmed]; - const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)\s*```/gi) || []; - for (const block of fenced) { - const m = block.match(/```(?:json)?\s*([\s\S]*?)\s*```/i); - if (m && m[1]) { - candidates.push(toStringSafe(m[1])); - } - } - for (const candidate of extractToolCallObjects(trimmed)) { - candidates.push(toStringSafe(candidate)); - } - const first = trimmed.indexOf('{'); - const last = trimmed.lastIndexOf('}'); - if (first >= 0 && last > first) { - candidates.push(toStringSafe(trimmed.slice(first, last + 1))); - } - const m = trimmed.match(TOOL_CALL_PATTERN); - if (m && m[1]) { - candidates.push(`{"tool_calls":[${m[1]}]}`); - } - return [...new Set(candidates.filter(Boolean))]; -} - -function extractToolCallObjects(text) { - const raw = toStringSafe(text); - if (!raw) { - return []; - } - const lower = raw.toLowerCase(); - const out = []; - let offset = 0; - // eslint-disable-next-line no-constant-condition - while (true) { - let idx = lower.indexOf('tool_calls', offset); - if (idx < 0) { - break; - } - let start = raw.slice(0, idx).lastIndexOf('{'); - while (start >= 0) { - const obj = extractJSONObjectFrom(raw, start); - if (obj.ok) { - out.push(raw.slice(start, obj.end).trim()); - offset = obj.end; - idx = -1; - break; - } - start = raw.slice(0, start).lastIndexOf('{'); - } - if (idx >= 0) { - offset = idx + 'tool_calls'.length; - } - } - return out; -} - -function parseToolCallsPayload(payload) { - let decoded; - try { - decoded = JSON.parse(payload); - } catch (_err) { - return []; - } - if (Array.isArray(decoded)) { - return parseToolCallList(decoded); - } - if (!decoded || typeof decoded !== 'object') { - return []; - } - if (decoded.tool_calls) { - return parseToolCallList(decoded.tool_calls); - } - const one = parseToolCallItem(decoded); - return one ? [one] : []; -} - -function parseToolCallList(v) { - if (!Array.isArray(v)) { - return []; - } - const out = []; - for (const item of v) { - if (!item || typeof item !== 'object') { - continue; - } - const one = parseToolCallItem(item); - if (one) { - out.push(one); - } - } - return out; -} - -function parseToolCallItem(m) { - let name = toStringSafe(m.name); - let inputRaw = m.input; - let hasInput = Object.prototype.hasOwnProperty.call(m, 'input'); - const fn = m.function && typeof m.function === 'object' ? m.function : null; - if (fn) { - if (!name) { - name = toStringSafe(fn.name); - } - if (!hasInput && Object.prototype.hasOwnProperty.call(fn, 'arguments')) { - inputRaw = fn.arguments; - hasInput = true; - } - } - if (!hasInput) { - for (const k of ['arguments', 'args', 'parameters', 'params']) { - if (Object.prototype.hasOwnProperty.call(m, k)) { - inputRaw = m[k]; - hasInput = true; - break; - } - } - } - if (!name) { - return null; - } - return { - name, - input: parseToolCallInput(inputRaw), - }; -} - -function parseToolCallInput(v) { - if (v == null) { - return {}; - } - if (typeof v === 'string') { - const raw = toStringSafe(v); - if (!raw) { - return {}; - } - try { - const parsed = JSON.parse(raw); - if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { - return parsed; - } - return { _raw: raw }; - } catch (_err) { - return { _raw: raw }; - } - } - if (typeof v === 'object' && !Array.isArray(v)) { - return v; - } - try { - const parsed = JSON.parse(JSON.stringify(v)); - if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { - return parsed; - } - } catch (_err) { - return {}; - } - return {}; -} - function filterToolCallsDetailed(parsed, toolNames) { + const sourceNames = Array.isArray(toolNames) ? toolNames : []; const allowed = new Set(); const allowedCanonical = new Map(); - for (const item of toolNames || []) { + for (const item of sourceNames) { const name = toStringSafe(item); if (!name) { continue; diff --git a/internal/js/helpers/stream-tool-sieve/parse_payload.js b/internal/js/helpers/stream-tool-sieve/parse_payload.js new file mode 100644 index 0000000..4be82c4 --- /dev/null +++ b/internal/js/helpers/stream-tool-sieve/parse_payload.js @@ -0,0 +1,196 @@ +'use strict'; + +const TOOL_CALL_PATTERN = /\{\s*["']tool_calls["']\s*:\s*\[(.*?)\]\s*\}/s; + +const { + toStringSafe, +} = require('./state'); +const { + extractJSONObjectFrom, +} = require('./jsonscan'); + +function stripFencedCodeBlocks(text) { + const t = typeof text === 'string' ? text : ''; + if (!t) { + return ''; + } + return t.replace(/```[\s\S]*?```/g, ' '); +} + +function buildToolCallCandidates(text) { + const trimmed = toStringSafe(text); + const candidates = [trimmed]; + + const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)\s*```/gi) || []; + for (const block of fenced) { + const m = block.match(/```(?:json)?\s*([\s\S]*?)\s*```/i); + if (m && m[1]) { + candidates.push(toStringSafe(m[1])); + } + } + + for (const candidate of extractToolCallObjects(trimmed)) { + candidates.push(toStringSafe(candidate)); + } + + const first = trimmed.indexOf('{'); + const last = trimmed.lastIndexOf('}'); + if (first >= 0 && last > first) { + candidates.push(toStringSafe(trimmed.slice(first, last + 1))); + } + + const m = trimmed.match(TOOL_CALL_PATTERN); + if (m && m[1]) { + candidates.push(`{"tool_calls":[${m[1]}]}`); + } + + return [...new Set(candidates.filter(Boolean))]; +} + +function extractToolCallObjects(text) { + const raw = toStringSafe(text); + if (!raw) { + return []; + } + const lower = raw.toLowerCase(); + const out = []; + let offset = 0; + + // eslint-disable-next-line no-constant-condition + while (true) { + let idx = lower.indexOf('tool_calls', offset); + if (idx < 0) { + break; + } + let start = raw.slice(0, idx).lastIndexOf('{'); + while (start >= 0) { + const obj = extractJSONObjectFrom(raw, start); + if (obj.ok) { + out.push(raw.slice(start, obj.end).trim()); + offset = obj.end; + idx = -1; + break; + } + start = raw.slice(0, start).lastIndexOf('{'); + } + if (idx >= 0) { + offset = idx + 'tool_calls'.length; + } + } + + return out; +} + +function parseToolCallsPayload(payload) { + let decoded; + try { + decoded = JSON.parse(payload); + } catch (_err) { + return []; + } + + if (Array.isArray(decoded)) { + return parseToolCallList(decoded); + } + if (!decoded || typeof decoded !== 'object') { + return []; + } + if (decoded.tool_calls) { + return parseToolCallList(decoded.tool_calls); + } + + const one = parseToolCallItem(decoded); + return one ? [one] : []; +} + +function parseToolCallList(v) { + if (!Array.isArray(v)) { + return []; + } + const out = []; + for (const item of v) { + if (!item || typeof item !== 'object') { + continue; + } + const one = parseToolCallItem(item); + if (one) { + out.push(one); + } + } + return out; +} + +function parseToolCallItem(m) { + let name = toStringSafe(m.name); + let inputRaw = m.input; + let hasInput = Object.prototype.hasOwnProperty.call(m, 'input'); + const fn = m.function && typeof m.function === 'object' ? m.function : null; + + if (fn) { + if (!name) { + name = toStringSafe(fn.name); + } + if (!hasInput && Object.prototype.hasOwnProperty.call(fn, 'arguments')) { + inputRaw = fn.arguments; + hasInput = true; + } + } + + if (!hasInput) { + for (const k of ['arguments', 'args', 'parameters', 'params']) { + if (Object.prototype.hasOwnProperty.call(m, k)) { + inputRaw = m[k]; + hasInput = true; + break; + } + } + } + + if (!name) { + return null; + } + + return { + name, + input: parseToolCallInput(inputRaw), + }; +} + +function parseToolCallInput(v) { + if (v == null) { + return {}; + } + if (typeof v === 'string') { + const raw = toStringSafe(v); + if (!raw) { + return {}; + } + try { + const parsed = JSON.parse(raw); + if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { + return parsed; + } + return { _raw: raw }; + } catch (_err) { + return { _raw: raw }; + } + } + if (typeof v === 'object' && !Array.isArray(v)) { + return v; + } + try { + const parsed = JSON.parse(JSON.stringify(v)); + if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { + return parsed; + } + } catch (_err) { + return {}; + } + return {}; +} + +module.exports = { + stripFencedCodeBlocks, + buildToolCallCandidates, + parseToolCallsPayload, +}; diff --git a/plans/refactor-line-gate-targets.txt b/plans/refactor-line-gate-targets.txt index c9839b2..645b7e1 100644 --- a/plans/refactor-line-gate-targets.txt +++ b/plans/refactor-line-gate-targets.txt @@ -105,7 +105,6 @@ internal/js/helpers/stream-tool-sieve.js internal/js/helpers/stream-tool-sieve/index.js internal/js/helpers/stream-tool-sieve/state.js internal/js/helpers/stream-tool-sieve/sieve.js -internal/js/helpers/stream-tool-sieve/incremental.js internal/js/helpers/stream-tool-sieve/jsonscan.js internal/js/helpers/stream-tool-sieve/parse.js internal/js/helpers/stream-tool-sieve/format.js