feat(toolcall): harden confusable candidate spans

This commit is contained in:
Your Name
2026-05-10 09:27:30 +07:00
parent 6a8edf96c3
commit 196e3c46f6
20 changed files with 2257 additions and 363 deletions

View File

@@ -7,6 +7,10 @@ const {
SKIP_EXACT_PATHS,
} = require('../shared/deepseek-constants');
const LEAKED_BOS_MARKER_PATTERN = /<[|]\s*begin[_▁]of[_▁]sentence\s*[|]>/gi;
const LEAKED_THOUGHT_MARKER_PATTERN = /<[|]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[|]>/gi;
const LEAKED_META_MARKER_PATTERN = /<[|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[|]>/gi;
function stripThinkTags(text) {
@@ -621,7 +625,11 @@ function stripReferenceMarkersText(text) {
if (!text) {
return text;
}
return text.replace(/\[(?:citation|reference):\s*\d+\]/gi, '');
return text
.replace(/\[(?:citation|reference):\s*\d+\]/gi, '')
.replace(LEAKED_BOS_MARKER_PATTERN, '')
.replace(LEAKED_THOUGHT_MARKER_PATTERN, '')
.replace(LEAKED_META_MARKER_PATTERN, '');
}
function asString(v) {

View File

@@ -7,6 +7,9 @@ const {
parseMarkupToolCalls,
stripFencedCodeBlocks,
containsToolCallWrapperSyntaxOutsideIgnored,
normalizeDSMLToolCallMarkup,
hasRepairableXMLToolCallsWrapper,
indexToolCDATAOpen,
sanitizeLooseCDATA,
} = require('./parse_payload');
@@ -37,19 +40,23 @@ function parseToolCalls(text, toolNames) {
function parseToolCallsDetailed(text, toolNames) {
const result = emptyParseResult();
const normalized = toStringSafe(text);
if (!normalized) {
const raw = toStringSafe(text);
if (!raw) {
return result;
}
result.sawToolCallSyntax = looksLikeToolCallSyntax(normalized);
if (shouldSkipToolCallParsingForCodeFenceExample(normalized)) {
if (shouldSkipToolCallParsingForCodeFenceExample(raw)) {
return result;
}
const normalized = normalizeDSMLToolCallMarkup(stripFencedCodeBlocks(raw).trim());
if (!normalized.ok || !normalized.text) {
return result;
}
result.sawToolCallSyntax = looksLikeToolCallSyntax(normalized.text) || hasRepairableXMLToolCallsWrapper(normalized.text);
// XML markup parsing only.
let parsed = parseMarkupToolCalls(normalized);
if (parsed.length === 0 && normalized.toLowerCase().includes('<![cdata[')) {
const recovered = sanitizeLooseCDATA(normalized);
if (recovered !== normalized) {
let parsed = parseMarkupToolCalls(normalized.text);
if (parsed.length === 0 && indexToolCDATAOpen(normalized.text, 0) >= 0) {
const recovered = sanitizeLooseCDATA(normalized.text);
if (recovered !== normalized.text) {
parsed = parseMarkupToolCalls(recovered);
}
}
@@ -70,19 +77,23 @@ function parseStandaloneToolCalls(text, toolNames) {
function parseStandaloneToolCallsDetailed(text, toolNames) {
const result = emptyParseResult();
const trimmed = toStringSafe(text);
if (!trimmed) {
const raw = toStringSafe(text);
if (!raw) {
return result;
}
result.sawToolCallSyntax = looksLikeToolCallSyntax(trimmed);
if (shouldSkipToolCallParsingForCodeFenceExample(trimmed)) {
if (shouldSkipToolCallParsingForCodeFenceExample(raw)) {
return result;
}
const normalized = normalizeDSMLToolCallMarkup(stripFencedCodeBlocks(raw).trim());
if (!normalized.ok || !normalized.text) {
return result;
}
result.sawToolCallSyntax = looksLikeToolCallSyntax(normalized.text) || hasRepairableXMLToolCallsWrapper(normalized.text);
// XML markup parsing only.
let parsed = parseMarkupToolCalls(trimmed);
if (parsed.length === 0 && trimmed.toLowerCase().includes('<![cdata[')) {
const recovered = sanitizeLooseCDATA(trimmed);
if (recovered !== trimmed) {
let parsed = parseMarkupToolCalls(normalized.text);
if (parsed.length === 0 && indexToolCDATAOpen(normalized.text, 0) >= 0) {
const recovered = sanitizeLooseCDATA(normalized.text);
if (recovered !== normalized.text) {
parsed = parseMarkupToolCalls(recovered);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -114,6 +114,39 @@ function hasOpenXMLToolTag(captured) {
return false;
}
function shouldKeepBareInvokeCapture(captured) {
const invokeTag = findFirstToolTag(captured, 0, 'invoke', false);
if (!invokeTag) {
return false;
}
const wrapperOpen = findFirstToolTag(captured, 0, 'tool_calls', false);
if (wrapperOpen && wrapperOpen.start <= invokeTag.start) {
return false;
}
const closeTag = findFirstToolTag(captured, invokeTag.start + 1, 'tool_calls', true);
if (closeTag && closeTag.start > invokeTag.start) {
return true;
}
const startEnd = invokeTag.end;
if (startEnd < 0) {
return true;
}
const body = captured.slice(startEnd + 1);
const trimmedBody = body.replace(/^[ \t\r\n]+/, '');
if (!trimmedBody) {
return true;
}
const invokeCloseTag = findFirstToolTag(captured, startEnd + 1, 'invoke', true);
if (invokeCloseTag) {
return captured.slice(invokeCloseTag.end + 1).trim() === '';
}
const paramTag = findFirstToolTag(body, 0, 'parameter', false);
if (paramTag && body.slice(0, paramTag.start).trim() === '') {
return true;
}
return trimmedBody.startsWith('{') || trimmedBody.startsWith('[');
}
function findFirstToolTag(text, from, name, closing) {
for (let pos = Math.max(0, from || 0); pos < text.length;) {
const tag = findToolMarkupTagOutsideIgnored(text, pos);
@@ -131,5 +164,6 @@ function findFirstToolTag(text, from, name, closing) {
module.exports = {
consumeXMLToolCapture,
hasOpenXMLToolTag,
shouldKeepBareInvokeCapture,
findPartialXMLToolTagStart: findPartialToolMarkupStart,
};

View File

@@ -12,6 +12,7 @@ const {
const {
consumeXMLToolCapture: consumeXMLToolCaptureImpl,
hasOpenXMLToolTag,
shouldKeepBareInvokeCapture,
findPartialXMLToolTagStart,
} = require('./sieve-xml');
function processToolSieveChunk(state, chunk, toolNames) {
@@ -203,6 +204,9 @@ function consumeToolCapture(state, toolNames) {
if (hasOpenXMLToolTag(captured)) {
return { ready: false, prefix: '', calls: [], suffix: '' };
}
if (shouldKeepBareInvokeCapture(captured)) {
return { ready: false, prefix: '', calls: [], suffix: '' };
}
// No XML tool tags detected — release captured content as text.
return {