测试DSML

This commit is contained in:
CJACK
2026-04-27 00:21:26 +08:00
parent 645fce41c8
commit 40d5e3ebb5
50 changed files with 1112 additions and 265 deletions

View File

@@ -8,7 +8,7 @@ const {
stripFencedCodeBlocks,
} = require('./parse_payload');
const TOOL_MARKUP_PREFIXES = ['<tool_calls'];
const TOOL_MARKUP_PREFIXES = ['<tool_calls', '<|dsml|tool_calls'];
function extractToolNames(tools) {
if (!Array.isArray(tools) || tools.length === 0) {

View File

@@ -17,7 +17,11 @@ function stripFencedCodeBlocks(text) {
}
function parseMarkupToolCalls(text) {
const raw = toStringSafe(text).trim();
const normalized = normalizeDSMLToolCallMarkup(toStringSafe(text));
if (!normalized.ok) {
return [];
}
const raw = normalized.text.trim();
if (!raw) {
return [];
}
@@ -34,6 +38,103 @@ function parseMarkupToolCalls(text) {
return out;
}
function normalizeDSMLToolCallMarkup(text) {
const raw = toStringSafe(text);
if (!raw) {
return { text: '', ok: true };
}
const styles = toolMarkupStylesOutsideIgnored(raw);
if (styles.dsml && styles.canonical) {
return { text: raw, ok: false };
}
if (!styles.dsml) {
return { text: raw, ok: true };
}
return {
text: replaceDSMLToolMarkupOutsideIgnored(raw),
ok: true,
};
}
function containsDSMLToolMarkup(text) {
return toolMarkupStylesOutsideIgnored(text).dsml;
}
function containsCanonicalToolMarkup(text) {
return toolMarkupStylesOutsideIgnored(text).canonical;
}
const DSML_TOOL_MARKUP_ALIASES = [
{ from: '<|dsml|tool_calls', to: '<tool_calls' },
{ from: '</|dsml|tool_calls>', to: '</tool_calls>' },
{ from: '<|dsml|invoke', to: '<invoke' },
{ from: '</|dsml|invoke>', to: '</invoke>' },
{ from: '<|dsml|parameter', to: '<parameter' },
{ from: '</|dsml|parameter>', to: '</parameter>' },
];
const CANONICAL_TOOL_MARKUP_PREFIXES = [
'<tool_calls',
'</tool_calls>',
'<invoke',
'</invoke>',
'<parameter',
'</parameter>',
];
function toolMarkupStylesOutsideIgnored(text) {
const lower = toStringSafe(text).toLowerCase();
const styles = { dsml: false, canonical: false };
for (let i = 0; i < lower.length;) {
const skipped = skipXmlIgnoredSection(lower, i);
if (skipped.blocked) {
return styles;
}
if (skipped.advanced) {
i = skipped.next;
continue;
}
if (CANONICAL_TOOL_MARKUP_PREFIXES.some(prefix => lower.startsWith(prefix, i))) {
styles.canonical = true;
}
if (DSML_TOOL_MARKUP_ALIASES.some(alias => lower.startsWith(alias.from, i))) {
styles.dsml = true;
}
if (styles.dsml && styles.canonical) {
return styles;
}
i += 1;
}
return styles;
}
function replaceDSMLToolMarkupOutsideIgnored(text) {
const raw = toStringSafe(text);
const lower = raw.toLowerCase();
let out = '';
for (let i = 0; i < raw.length;) {
const skipped = skipXmlIgnoredSection(lower, i);
if (skipped.blocked) {
out += raw.slice(i);
break;
}
if (skipped.advanced) {
out += raw.slice(i, skipped.next);
i = skipped.next;
continue;
}
const alias = DSML_TOOL_MARKUP_ALIASES.find(item => lower.startsWith(item.from, i));
if (alias) {
out += alias.to;
i += alias.from.length;
continue;
}
out += raw[i];
i += 1;
}
return out;
}
function parseMarkupSingleToolCall(block) {
const attrs = parseTagAttributes(block.attrs);
const name = toStringSafe(attrs.name).trim();
@@ -403,4 +504,5 @@ function isOnlyRawValue(obj) {
module.exports = {
stripFencedCodeBlocks,
parseMarkupToolCalls,
normalizeDSMLToolCallMarkup,
};

View File

@@ -3,6 +3,7 @@ const { parseToolCalls } = require('./parse');
// XML wrapper tag pair used by the streaming sieve.
const XML_TOOL_TAG_PAIRS = [
{ open: '<|dsml|tool_calls', close: '</|dsml|tool_calls>' },
{ open: '<tool_calls', close: '</tool_calls>' },
];
@@ -41,6 +42,31 @@ function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
// If this block failed to become a tool call, pass it through as text.
return { ready: true, prefix: prefixPart + xmlBlock, calls: [], suffix: suffixPart };
}
if (!containsAnyToolCallWrapper(lower)) {
const found = firstInvokeIndex(lower);
if (found.index >= 0) {
const closeTag = found.dsml ? '</|dsml|tool_calls>' : '</tool_calls>';
const openWrapper = found.dsml ? '<|DSML|tool_calls>' : '<tool_calls>';
const closeIdx = findXMLCloseOutsideCDATA(captured, closeTag, found.index);
if (closeIdx > found.index) {
const closeEnd = closeIdx + closeTag.length;
const xmlBlock = openWrapper + captured.slice(found.index, closeIdx) + closeTag;
let prefixPart = captured.slice(0, found.index);
let suffixPart = captured.slice(closeEnd);
const parsed = parseToolCalls(xmlBlock, toolNames);
if (Array.isArray(parsed) && parsed.length > 0) {
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
return {
ready: true,
prefix: trimmedFence.prefix,
calls: parsed,
suffix: trimmedFence.suffix,
};
}
return { ready: true, prefix: prefixPart + captured.slice(found.index, closeEnd), calls: [], suffix: suffixPart };
}
}
}
return { ready: false, prefix: '', calls: [], suffix: '' };
}
@@ -57,6 +83,25 @@ function hasOpenXMLToolTag(captured) {
return false;
}
function containsAnyToolCallWrapper(lower) {
return lower.includes('<tool_calls') || lower.includes('<|dsml|tool_calls');
}
function firstInvokeIndex(lower) {
const xmlIdx = lower.indexOf('<invoke');
const dsmlIdx = lower.indexOf('<|dsml|invoke');
if (xmlIdx < 0) {
return { index: dsmlIdx, dsml: dsmlIdx >= 0 };
}
if (dsmlIdx < 0) {
return { index: xmlIdx, dsml: false };
}
if (dsmlIdx < xmlIdx) {
return { index: dsmlIdx, dsml: true };
}
return { index: xmlIdx, dsml: false };
}
function findPartialXMLToolTagStart(s) {
const lastLT = s.lastIndexOf('<');
if (lastLT < 0) {

View File

@@ -1,14 +1,17 @@
'use strict';
const XML_TOOL_SEGMENT_TAGS = [
'<|dsml|tool_calls>', '<|dsml|tool_calls\n', '<|dsml|tool_calls ',
'<tool_calls>', '<tool_calls\n', '<tool_calls ',
];
const XML_TOOL_OPENING_TAGS = [
'<|dsml|tool_calls',
'<tool_calls',
];
const XML_TOOL_CLOSING_TAGS = [
'</|dsml|tool_calls>',
'</tool_calls>',
];