mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-13 12:47:41 +08:00
feat: implement XML-based tool call extraction and refactor sieve utilities into dedicated modules
This commit is contained in:
@@ -140,9 +140,58 @@ function extractJSONObjectFrom(text, start) {
|
||||
return { ok: false, end: 0 };
|
||||
}
|
||||
|
||||
function extractToolHistoryBlock(captured, keyIdx) {
|
||||
if (typeof captured !== 'string' || keyIdx < 0 || keyIdx >= captured.length) {
|
||||
return { ok: false, start: 0, end: 0 };
|
||||
}
|
||||
const rest = captured.slice(keyIdx).toLowerCase();
|
||||
if (rest.startsWith('[tool_call_history]')) {
|
||||
const closeTag = '[/tool_call_history]';
|
||||
const closeIdx = rest.indexOf(closeTag);
|
||||
if (closeIdx < 0) {
|
||||
return { ok: false, start: 0, end: 0 };
|
||||
}
|
||||
return { ok: true, start: keyIdx, end: keyIdx + closeIdx + closeTag.length };
|
||||
}
|
||||
if (rest.startsWith('[tool_result_history]')) {
|
||||
const closeTag = '[/tool_result_history]';
|
||||
const closeIdx = rest.indexOf(closeTag);
|
||||
if (closeIdx < 0) {
|
||||
return { ok: false, start: 0, end: 0 };
|
||||
}
|
||||
return { ok: true, start: keyIdx, end: keyIdx + closeIdx + closeTag.length };
|
||||
}
|
||||
return { ok: false, start: 0, end: 0 };
|
||||
}
|
||||
|
||||
function trimWrappingJSONFence(prefix, suffix) {
|
||||
const rightTrimmedPrefix = (prefix || '').replace(/[ \t\r\n]+$/g, '');
|
||||
const fenceIdx = rightTrimmedPrefix.lastIndexOf('```');
|
||||
if (fenceIdx < 0) return { prefix, suffix };
|
||||
const fenceCount = (rightTrimmedPrefix.slice(0, fenceIdx + 3).match(/```/g) || []).length;
|
||||
if (fenceCount % 2 === 0) {
|
||||
return { prefix, suffix };
|
||||
}
|
||||
const header = rightTrimmedPrefix.slice(fenceIdx + 3).trim().toLowerCase();
|
||||
if (header && header !== 'json') {
|
||||
return { prefix, suffix };
|
||||
}
|
||||
const leftTrimmedSuffix = (suffix || '').replace(/^[ \t\r\n]+/g, '');
|
||||
if (!leftTrimmedSuffix.startsWith('```')) {
|
||||
return { prefix, suffix };
|
||||
}
|
||||
const consumed = (suffix || '').length - leftTrimmedSuffix.length;
|
||||
return {
|
||||
prefix: rightTrimmedPrefix.slice(0, fenceIdx),
|
||||
suffix: (suffix || '').slice(consumed + 3),
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
findObjectFieldValueStart,
|
||||
parseJSONStringLiteral,
|
||||
skipSpaces,
|
||||
extractJSONObjectFrom,
|
||||
extractToolHistoryBlock,
|
||||
trimWrappingJSONFence,
|
||||
};
|
||||
|
||||
91
internal/js/helpers/stream-tool-sieve/sieve-xml.js
Normal file
91
internal/js/helpers/stream-tool-sieve/sieve-xml.js
Normal file
@@ -0,0 +1,91 @@
|
||||
'use strict';
|
||||
const { parseToolCalls } = require('./parse');
|
||||
const {
|
||||
XML_TOOL_OPENING_TAGS,
|
||||
XML_TOOL_CLOSING_TAGS,
|
||||
} = require('./tool-keywords');
|
||||
|
||||
function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
|
||||
const lower = captured.toLowerCase();
|
||||
let openIdx = -1;
|
||||
for (const tag of XML_TOOL_OPENING_TAGS) {
|
||||
const idx = lower.indexOf(tag);
|
||||
if (idx >= 0 && (openIdx < 0 || idx < openIdx)) {
|
||||
openIdx = idx;
|
||||
}
|
||||
}
|
||||
if (openIdx < 0) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
let closeIdx = -1;
|
||||
for (const tag of XML_TOOL_CLOSING_TAGS) {
|
||||
const idx = lower.indexOf(tag, openIdx);
|
||||
if (idx >= 0) {
|
||||
const absEnd = idx + tag.length;
|
||||
if (closeIdx < 0 || absEnd > closeIdx) {
|
||||
closeIdx = absEnd;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (closeIdx <= 0) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
const xmlBlock = captured.slice(openIdx, closeIdx);
|
||||
let prefixPart = captured.slice(0, openIdx);
|
||||
let suffixPart = captured.slice(closeIdx);
|
||||
const parsed = parseToolCalls(xmlBlock, toolNames);
|
||||
if (Array.isArray(parsed) && parsed.length > 0) {
|
||||
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
|
||||
return {
|
||||
ready: true,
|
||||
prefix: trimmedFence.prefix,
|
||||
calls: parsed,
|
||||
suffix: trimmedFence.suffix,
|
||||
};
|
||||
}
|
||||
return { ready: true, prefix: prefixPart, calls: [], suffix: suffixPart };
|
||||
}
|
||||
|
||||
function hasOpenXMLToolTag(captured) {
|
||||
const lower = captured.toLowerCase();
|
||||
for (const tag of XML_TOOL_OPENING_TAGS) {
|
||||
if (lower.includes(tag)) {
|
||||
let hasClosed = false;
|
||||
for (const ct of XML_TOOL_CLOSING_TAGS) {
|
||||
if (lower.includes(ct)) {
|
||||
hasClosed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!hasClosed) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function findPartialXMLToolTagStart(s) {
|
||||
const lastLT = s.lastIndexOf('<');
|
||||
if (lastLT < 0) {
|
||||
return -1;
|
||||
}
|
||||
const tail = s.slice(lastLT);
|
||||
if (tail.includes('>')) {
|
||||
return -1;
|
||||
}
|
||||
const lowerTail = tail.toLowerCase();
|
||||
for (const tag of XML_TOOL_OPENING_TAGS) {
|
||||
const tagWithLT = tag.startsWith('<') ? tag : '<' + tag;
|
||||
if (tagWithLT.startsWith(lowerTail)) {
|
||||
return lastLT;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
consumeXMLToolCapture,
|
||||
hasOpenXMLToolTag,
|
||||
findPartialXMLToolTagStart,
|
||||
};
|
||||
@@ -5,8 +5,17 @@ const {
|
||||
insideCodeFenceWithState,
|
||||
} = require('./state');
|
||||
const { parseStandaloneToolCallsDetailed } = require('./parse');
|
||||
const { extractJSONObjectFrom } = require('./jsonscan');
|
||||
const { TOOL_SEGMENT_KEYWORDS, earliestKeywordIndex } = require('./tool-keywords');
|
||||
const { extractJSONObjectFrom, extractToolHistoryBlock, trimWrappingJSONFence } = require('./jsonscan');
|
||||
const {
|
||||
TOOL_SEGMENT_KEYWORDS,
|
||||
XML_TOOL_SEGMENT_TAGS,
|
||||
earliestKeywordIndex,
|
||||
} = require('./tool-keywords');
|
||||
const {
|
||||
consumeXMLToolCapture: consumeXMLToolCaptureImpl,
|
||||
hasOpenXMLToolTag,
|
||||
findPartialXMLToolTagStart,
|
||||
} = require('./sieve-xml');
|
||||
function processToolSieveChunk(state, chunk, toolNames) {
|
||||
if (!state) {
|
||||
return [];
|
||||
@@ -144,6 +153,11 @@ function findSuspiciousPrefixStart(s) {
|
||||
start = idx;
|
||||
}
|
||||
}
|
||||
// Also check for partial XML tool tag at end of string.
|
||||
const xmlIdx = findPartialXMLToolTagStart(s);
|
||||
if (xmlIdx >= 0 && xmlIdx > start) {
|
||||
start = xmlIdx;
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
@@ -154,10 +168,27 @@ function findToolSegmentStart(state, s) {
|
||||
const lower = s.toLowerCase();
|
||||
let offset = 0;
|
||||
while (true) {
|
||||
const { index: bestKeyIdx, keyword: matchedKeyword } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS, offset);
|
||||
// Check JSON keywords.
|
||||
let { index: bestKeyIdx, keyword: matchedKeyword } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS, offset);
|
||||
// Also check XML tool tags.
|
||||
for (const tag of XML_TOOL_SEGMENT_TAGS) {
|
||||
const idx = lower.indexOf(tag, offset);
|
||||
if (idx >= 0 && (bestKeyIdx < 0 || idx < bestKeyIdx)) {
|
||||
bestKeyIdx = idx;
|
||||
matchedKeyword = tag;
|
||||
}
|
||||
}
|
||||
if (bestKeyIdx < 0) {
|
||||
return -1;
|
||||
}
|
||||
// For XML tags, the '<' is itself the segment start.
|
||||
if (s[bestKeyIdx] === '<') {
|
||||
if (!insideCodeFenceWithState(state, s.slice(0, bestKeyIdx))) {
|
||||
return bestKeyIdx;
|
||||
}
|
||||
offset = bestKeyIdx + matchedKeyword.length;
|
||||
continue;
|
||||
}
|
||||
const keyIdx = bestKeyIdx;
|
||||
const start = s.slice(0, keyIdx).lastIndexOf('{');
|
||||
const candidateStart = start >= 0 ? start : keyIdx;
|
||||
@@ -173,6 +204,17 @@ function consumeToolCapture(state, toolNames) {
|
||||
if (!captured) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
|
||||
// Try XML tool call extraction first.
|
||||
const xmlResult = consumeXMLToolCaptureImpl(captured, toolNames, trimWrappingJSONFence);
|
||||
if (xmlResult.ready) {
|
||||
return xmlResult;
|
||||
}
|
||||
// If XML tags are present but block is incomplete, keep buffering.
|
||||
if (hasOpenXMLToolTag(captured)) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
|
||||
const lower = captured.toLowerCase();
|
||||
const { index: keyIdx } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS);
|
||||
if (keyIdx < 0) {
|
||||
@@ -231,52 +273,6 @@ function consumeToolCapture(state, toolNames) {
|
||||
};
|
||||
}
|
||||
|
||||
function extractToolHistoryBlock(captured, keyIdx) {
|
||||
if (typeof captured !== 'string' || keyIdx < 0 || keyIdx >= captured.length) {
|
||||
return { ok: false, start: 0, end: 0 };
|
||||
}
|
||||
const rest = captured.slice(keyIdx).toLowerCase();
|
||||
if (rest.startsWith('[tool_call_history]')) {
|
||||
const closeTag = '[/tool_call_history]';
|
||||
const closeIdx = rest.indexOf(closeTag);
|
||||
if (closeIdx < 0) {
|
||||
return { ok: false, start: 0, end: 0 };
|
||||
}
|
||||
return { ok: true, start: keyIdx, end: keyIdx + closeIdx + closeTag.length };
|
||||
}
|
||||
if (rest.startsWith('[tool_result_history]')) {
|
||||
const closeTag = '[/tool_result_history]';
|
||||
const closeIdx = rest.indexOf(closeTag);
|
||||
if (closeIdx < 0) {
|
||||
return { ok: false, start: 0, end: 0 };
|
||||
}
|
||||
return { ok: true, start: keyIdx, end: keyIdx + closeIdx + closeTag.length };
|
||||
}
|
||||
return { ok: false, start: 0, end: 0 };
|
||||
}
|
||||
|
||||
function trimWrappingJSONFence(prefix, suffix) {
|
||||
const rightTrimmedPrefix = (prefix || '').replace(/[ \t\r\n]+$/g, '');
|
||||
const fenceIdx = rightTrimmedPrefix.lastIndexOf('```');
|
||||
if (fenceIdx < 0) return { prefix, suffix };
|
||||
const fenceCount = (rightTrimmedPrefix.slice(0, fenceIdx + 3).match(/```/g) || []).length;
|
||||
if (fenceCount % 2 === 0) {
|
||||
return { prefix, suffix };
|
||||
}
|
||||
const header = rightTrimmedPrefix.slice(fenceIdx + 3).trim().toLowerCase();
|
||||
if (header && header !== 'json') {
|
||||
return { prefix, suffix };
|
||||
}
|
||||
const leftTrimmedSuffix = (suffix || '').replace(/^[ \t\r\n]+/g, '');
|
||||
if (!leftTrimmedSuffix.startsWith('```')) {
|
||||
return { prefix, suffix };
|
||||
}
|
||||
const consumed = (suffix || '').length - leftTrimmedSuffix.length;
|
||||
return {
|
||||
prefix: rightTrimmedPrefix.slice(0, fenceIdx),
|
||||
suffix: (suffix || '').slice(consumed + 3),
|
||||
};
|
||||
}
|
||||
module.exports = {
|
||||
processToolSieveChunk,
|
||||
flushToolSieve,
|
||||
|
||||
@@ -8,6 +8,19 @@ const TOOL_SEGMENT_KEYWORDS = [
|
||||
'[tool_result_history]',
|
||||
];
|
||||
|
||||
const XML_TOOL_SEGMENT_TAGS = [
|
||||
'<tool_calls>', '<tool_calls\n', '<tool_call>', '<tool_call\n',
|
||||
'<invoke ', '<invoke>', '<function_call', '<function_calls', '<tool_use>',
|
||||
];
|
||||
|
||||
const XML_TOOL_OPENING_TAGS = [
|
||||
'<tool_calls', '<tool_call', '<invoke', '<function_call', '<function_calls', '<tool_use',
|
||||
];
|
||||
|
||||
const XML_TOOL_CLOSING_TAGS = [
|
||||
'</tool_calls>', '</tool_call>', '</invoke>', '</function_call>', '</function_calls>', '</tool_use>',
|
||||
];
|
||||
|
||||
function earliestKeywordIndex(text, keywords = TOOL_SEGMENT_KEYWORDS, offset = 0) {
|
||||
if (!text) {
|
||||
return { index: -1, keyword: '' };
|
||||
@@ -26,5 +39,8 @@ function earliestKeywordIndex(text, keywords = TOOL_SEGMENT_KEYWORDS, offset = 0
|
||||
|
||||
module.exports = {
|
||||
TOOL_SEGMENT_KEYWORDS,
|
||||
XML_TOOL_SEGMENT_TAGS,
|
||||
XML_TOOL_OPENING_TAGS,
|
||||
XML_TOOL_CLOSING_TAGS,
|
||||
earliestKeywordIndex,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user