mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-14 13:15:07 +08:00
280 lines
8.0 KiB
JavaScript
280 lines
8.0 KiB
JavaScript
'use strict';
|
|
const {
|
|
resetIncrementalToolState,
|
|
noteText,
|
|
insideCodeFenceWithState,
|
|
} = require('./state');
|
|
const { parseStandaloneToolCallsDetailed } = require('./parse');
|
|
const { extractJSONObjectFrom, trimWrappingJSONFence } = require('./jsonscan');
|
|
const {
|
|
TOOL_SEGMENT_KEYWORDS,
|
|
XML_TOOL_SEGMENT_TAGS,
|
|
earliestKeywordIndex,
|
|
} = require('./tool-keywords');
|
|
const {
|
|
consumeXMLToolCapture: consumeXMLToolCaptureImpl,
|
|
hasOpenXMLToolTag,
|
|
findPartialXMLToolTagStart,
|
|
looksLikeXMLToolTagFragment,
|
|
} = require('./sieve-xml');
|
|
function processToolSieveChunk(state, chunk, toolNames) {
|
|
if (!state) {
|
|
return [];
|
|
}
|
|
if (chunk) {
|
|
state.pending += chunk;
|
|
}
|
|
const events = [];
|
|
while (true) {
|
|
if (Array.isArray(state.pendingToolCalls) && state.pendingToolCalls.length > 0) {
|
|
events.push({ type: 'tool_calls', calls: state.pendingToolCalls });
|
|
state.pendingToolRaw = '';
|
|
state.pendingToolCalls = [];
|
|
continue;
|
|
}
|
|
if (state.capturing) {
|
|
if (state.pending) {
|
|
state.capture += state.pending;
|
|
state.pending = '';
|
|
}
|
|
const consumed = consumeToolCapture(state, toolNames);
|
|
if (!consumed.ready) {
|
|
break;
|
|
}
|
|
const captured = state.capture;
|
|
state.capture = '';
|
|
state.capturing = false;
|
|
resetIncrementalToolState(state);
|
|
|
|
if (Array.isArray(consumed.calls) && consumed.calls.length > 0) {
|
|
state.pendingToolRaw = captured;
|
|
state.pendingToolCalls = consumed.calls;
|
|
if (consumed.suffix) {
|
|
state.pending = consumed.suffix + state.pending;
|
|
}
|
|
continue;
|
|
}
|
|
if (consumed.prefix) {
|
|
noteText(state, consumed.prefix);
|
|
events.push({ type: 'text', text: consumed.prefix });
|
|
}
|
|
if (consumed.suffix) {
|
|
state.pending += consumed.suffix;
|
|
}
|
|
continue;
|
|
}
|
|
const pending = state.pending || '';
|
|
if (!pending) {
|
|
break;
|
|
}
|
|
const start = findToolSegmentStart(state, pending);
|
|
if (start >= 0) {
|
|
const prefix = pending.slice(0, start);
|
|
if (prefix) {
|
|
noteText(state, prefix);
|
|
events.push({ type: 'text', text: prefix });
|
|
}
|
|
state.pending = '';
|
|
state.capture += pending.slice(start);
|
|
state.capturing = true;
|
|
resetIncrementalToolState(state);
|
|
continue;
|
|
}
|
|
const [safe, hold] = splitSafeContentForToolDetection(pending);
|
|
if (!safe) {
|
|
break;
|
|
}
|
|
state.pending = hold;
|
|
noteText(state, safe);
|
|
events.push({ type: 'text', text: safe });
|
|
}
|
|
return events;
|
|
}
|
|
|
|
function flushToolSieve(state, toolNames) {
|
|
if (!state) {
|
|
return [];
|
|
}
|
|
const events = processToolSieveChunk(state, '', toolNames);
|
|
if (Array.isArray(state.pendingToolCalls) && state.pendingToolCalls.length > 0) {
|
|
events.push({ type: 'tool_calls', calls: state.pendingToolCalls });
|
|
state.pendingToolRaw = '';
|
|
state.pendingToolCalls = [];
|
|
}
|
|
if (state.capturing) {
|
|
const consumed = consumeToolCapture(state, toolNames);
|
|
if (consumed.ready) {
|
|
if (consumed.prefix) {
|
|
noteText(state, consumed.prefix);
|
|
events.push({ type: 'text', text: consumed.prefix });
|
|
}
|
|
if (Array.isArray(consumed.calls) && consumed.calls.length > 0) {
|
|
events.push({ type: 'tool_calls', calls: consumed.calls });
|
|
}
|
|
if (consumed.suffix) {
|
|
noteText(state, consumed.suffix);
|
|
events.push({ type: 'text', text: consumed.suffix });
|
|
}
|
|
} else if (state.capture) {
|
|
const content = state.capture;
|
|
if (!hasOpenXMLToolTag(content) && !looksLikeXMLToolTagFragment(content)) {
|
|
noteText(state, content);
|
|
events.push({ type: 'text', text: content });
|
|
}
|
|
}
|
|
state.capture = '';
|
|
state.capturing = false;
|
|
resetIncrementalToolState(state);
|
|
}
|
|
if (state.pending) {
|
|
if (!hasOpenXMLToolTag(state.pending) && !looksLikeXMLToolTagFragment(state.pending)) {
|
|
noteText(state, state.pending);
|
|
events.push({ type: 'text', text: state.pending });
|
|
}
|
|
state.pending = '';
|
|
}
|
|
return events;
|
|
}
|
|
|
|
function splitSafeContentForToolDetection(s) {
|
|
const text = s || '';
|
|
if (!text) {
|
|
return ['', ''];
|
|
}
|
|
const suspiciousStart = findSuspiciousPrefixStart(text);
|
|
if (suspiciousStart < 0) {
|
|
return [text, ''];
|
|
}
|
|
if (suspiciousStart > 0) {
|
|
return [text.slice(0, suspiciousStart), text.slice(suspiciousStart)];
|
|
}
|
|
return ['', text];
|
|
}
|
|
|
|
function findSuspiciousPrefixStart(s) {
|
|
let start = -1;
|
|
for (const needle of ['{', '[', '```']) {
|
|
const idx = s.lastIndexOf(needle);
|
|
if (idx > start) {
|
|
start = idx;
|
|
}
|
|
}
|
|
// Also check for partial XML tool tag at end of string.
|
|
const xmlIdx = findPartialXMLToolTagStart(s);
|
|
if (xmlIdx >= 0 && xmlIdx > start) {
|
|
start = xmlIdx;
|
|
}
|
|
return start;
|
|
}
|
|
|
|
function findToolSegmentStart(state, s) {
|
|
if (!s) {
|
|
return -1;
|
|
}
|
|
const lower = s.toLowerCase();
|
|
let offset = 0;
|
|
while (true) {
|
|
// Check JSON keywords.
|
|
let { index: bestKeyIdx, keyword: matchedKeyword } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS, offset);
|
|
// Also check XML tool tags.
|
|
for (const tag of XML_TOOL_SEGMENT_TAGS) {
|
|
const idx = lower.indexOf(tag, offset);
|
|
if (idx >= 0 && (bestKeyIdx < 0 || idx < bestKeyIdx)) {
|
|
bestKeyIdx = idx;
|
|
matchedKeyword = tag;
|
|
}
|
|
}
|
|
if (bestKeyIdx < 0) {
|
|
return -1;
|
|
}
|
|
// For XML tags, the '<' is itself the segment start.
|
|
if (s[bestKeyIdx] === '<') {
|
|
if (!insideCodeFenceWithState(state, s.slice(0, bestKeyIdx))) {
|
|
return bestKeyIdx;
|
|
}
|
|
offset = bestKeyIdx + matchedKeyword.length;
|
|
continue;
|
|
}
|
|
const keyIdx = bestKeyIdx;
|
|
const start = s.slice(0, keyIdx).lastIndexOf('{');
|
|
let candidateStart = start >= 0 ? start : keyIdx;
|
|
// If the keyword matched inside an XML tag (e.g. "tool_calls" in "<tool_calls>"),
|
|
// back up past the '<' to capture the full tag.
|
|
if (candidateStart > 0 && s[candidateStart - 1] === '<') {
|
|
candidateStart--;
|
|
}
|
|
if (!insideCodeFenceWithState(state, s.slice(0, candidateStart))) {
|
|
return candidateStart;
|
|
}
|
|
offset = keyIdx + matchedKeyword.length;
|
|
}
|
|
}
|
|
|
|
function consumeToolCapture(state, toolNames) {
|
|
const captured = state.capture || '';
|
|
if (!captured) {
|
|
return { ready: false, prefix: '', calls: [], suffix: '' };
|
|
}
|
|
|
|
// Try XML tool call extraction first.
|
|
const xmlResult = consumeXMLToolCaptureImpl(captured, toolNames, trimWrappingJSONFence);
|
|
if (xmlResult.ready) {
|
|
return xmlResult;
|
|
}
|
|
// If XML tags are present but block is incomplete, keep buffering.
|
|
if (hasOpenXMLToolTag(captured)) {
|
|
return { ready: false, prefix: '', calls: [], suffix: '' };
|
|
}
|
|
|
|
const lower = captured.toLowerCase();
|
|
const { index: keyIdx } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS);
|
|
if (keyIdx < 0) {
|
|
return { ready: false, prefix: '', calls: [], suffix: '' };
|
|
}
|
|
const start = captured.slice(0, keyIdx).lastIndexOf('{');
|
|
const actualStart = start >= 0 ? start : keyIdx;
|
|
const obj = extractJSONObjectFrom(captured, actualStart);
|
|
if (!obj.ok) {
|
|
return { ready: false, prefix: '', calls: [], suffix: '' };
|
|
}
|
|
const prefixPart = captured.slice(0, actualStart);
|
|
const suffixPart = captured.slice(obj.end);
|
|
if (insideCodeFenceWithState(state, prefixPart)) {
|
|
return {
|
|
ready: true,
|
|
prefix: captured,
|
|
calls: [],
|
|
suffix: '',
|
|
};
|
|
}
|
|
const parsed = parseStandaloneToolCallsDetailed(captured.slice(actualStart, obj.end), toolNames);
|
|
if (!Array.isArray(parsed.calls) || parsed.calls.length === 0) {
|
|
if (parsed.sawToolCallSyntax && parsed.rejectedByPolicy) {
|
|
return {
|
|
ready: true,
|
|
prefix: prefixPart,
|
|
calls: [],
|
|
suffix: suffixPart,
|
|
};
|
|
}
|
|
return {
|
|
ready: true,
|
|
prefix: captured,
|
|
calls: [],
|
|
suffix: '',
|
|
};
|
|
}
|
|
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
|
|
return {
|
|
ready: true,
|
|
prefix: trimmedFence.prefix,
|
|
calls: parsed.calls,
|
|
suffix: trimmedFence.suffix,
|
|
};
|
|
}
|
|
|
|
module.exports = {
|
|
processToolSieveChunk,
|
|
flushToolSieve,
|
|
};
|