This commit is contained in:
CJACK
2026-04-27 22:57:48 +08:00
parent b82bc1311a
commit 1e9170e385
7 changed files with 443 additions and 14 deletions

View File

@@ -153,6 +153,7 @@ OpenAI Chat / Responses 在标准化后、history split / current input file 之
工具调用正例现在优先示范官方 DSML 风格:`<|DSML|tool_calls>``<|DSML|invoke name="...">``<|DSML|parameter name="...">`
兼容层仍接受旧式纯 `<tool_calls>` wrapper但提示词会优先要求模型输出官方 DSML 标签,并强调不能只输出 closing wrapper 而漏掉 opening tag。需要注意这是“兼容 DSML 外壳,内部仍以 XML 解析语义为准”,不是原生 DSML 全链路实现DSML 标签会在解析入口归一化回现有 XML 标签后继续走同一套 parser。
数组参数使用 `<item>...</item>` 子节点表示;当某个参数体只包含 item 子节点时Go / Node 解析器会把它还原成数组,避免 `questions` / `options` 这类 schema 中要求 array 的参数被误解析成 `{ "item": ... }` 对象。若模型把完整结构化 XML fragment 误包进 CDATA兼容层会在保护 `content` / `command` 等原文字段的前提下,尝试把非原文字段中的 CDATA XML fragment 还原成 object / array。
正例中的工具名只会来自当前请求实际声明的工具;如果当前请求没有足够的已知工具形态,就省略对应的单工具、多工具或嵌套示例,避免把不可用工具名写进 prompt。
对执行类工具,脚本内容必须进入执行参数本身:`Bash` / `execute_command` 使用 `command``exec_command` 使用 `cmd`;不要把脚本示范成 `path` / `content` 文件写入参数。

View File

@@ -63,6 +63,8 @@
- 当文本中 mention 了某种标签名(如 `<dsml|tool_calls>` 或 Markdown inline code 里的 `<|DSML|tool_calls>`而后面紧跟真正工具调用时sieve 会跳过不可解析的 mention 候选并继续匹配后续真实工具块,不会因 mention 导致工具调用丢失,也不会截断 mention 后的正文
另外,`<parameter>` 的值如果本身是合法 JSON 字面量,也会按结构化值解析,而不是一律保留为字符串。例如 `123``true``null``[1,2]``{"a":1}` 都会还原成对应的 number / boolean / null / array / object。
结构化 XML 参数也会还原为 JSON 结构:如果参数体只包含一个或多个 `<item>...</item>` 子节点,会输出数组;嵌套对象里的 item-only 字段也同样按数组处理。例如 `<parameter name="questions"><item><question>...</question></item></parameter>` 会输出 `{"questions":[{"question":"..."}]}`,而不是 `{"questions":{"item":...}}`
如果模型误把完整结构化 XML fragment 放进 CDATAGo / Node 会先保护明显的原文字段(如 `content` / `command` / `prompt` / `old_string` / `new_string`),其余参数会尝试把 CDATA 内的完整 XML fragment 还原成 object / array常见的 `<br>` 分隔符会按换行归一化后再解析。
## 4) 输出结构

View File

@@ -1,6 +1,5 @@
'use strict';
const TOOL_CALL_MARKUP_KV_PATTERN = /<(?:[a-z0-9_:-]+:)?([a-z0-9_.-]+)\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi;
const CDATA_PATTERN = /^<!\[CDATA\[([\s\S]*?)]]>$/i;
const XML_ATTR_PATTERN = /\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')/gi;
const TOOL_MARKUP_NAMES = ['tool_calls', 'invoke', 'parameter'];
@@ -293,7 +292,7 @@ function parseMarkupSingleToolCall(block) {
if (!paramName) {
continue;
}
appendMarkupValue(input, paramName, parseMarkupValue(match.body));
appendMarkupValue(input, paramName, parseMarkupValue(match.body, paramName));
}
if (Object.keys(input).length === 0 && inner.trim() !== '') {
return null;
@@ -600,8 +599,11 @@ function parseMarkupInput(raw) {
return {};
}
// Prioritize XML-style KV tags (e.g., <arg>val</arg>)
const kv = parseMarkupKVObject(s);
if (Object.keys(kv).length > 0) {
const kv = unwrapItemOnlyMarkupValue(parseMarkupKVObject(s));
if (Array.isArray(kv)) {
return kv;
}
if (kv && typeof kv === 'object' && Object.keys(kv).length > 0) {
return kv;
}
@@ -622,12 +624,12 @@ function parseMarkupKVObject(text) {
return {};
}
const out = {};
for (const m of raw.matchAll(TOOL_CALL_MARKUP_KV_PATTERN)) {
const key = toStringSafe(m[1]).trim();
for (const block of findGenericXmlElementBlocks(raw)) {
const key = toStringSafe(block.localName).trim();
if (!key) {
continue;
}
const value = parseMarkupValue(m[2]);
const value = parseMarkupValue(block.body, key);
if (value === undefined || value === null) {
continue;
}
@@ -636,11 +638,146 @@ function parseMarkupKVObject(text) {
return out;
}
function parseMarkupValue(raw) {
function findGenericXmlElementBlocks(text) {
const source = toStringSafe(text);
if (!source) {
return [];
}
const out = [];
let pos = 0;
while (pos < source.length) {
const start = findGenericXmlStartTagOutsideCDATA(source, pos);
if (!start) {
break;
}
if (start.selfClosing) {
out.push({
name: start.name,
localName: start.localName,
attrs: start.attrs,
body: '',
start: start.start,
end: start.end + 1,
});
pos = start.end + 1;
continue;
}
const end = findMatchingGenericXmlEndTagOutsideCDATA(source, start.name, start.bodyStart);
if (!end) {
pos = start.bodyStart;
continue;
}
out.push({
name: start.name,
localName: start.localName,
attrs: start.attrs,
body: source.slice(start.bodyStart, end.closeStart),
start: start.start,
end: end.closeEnd,
});
pos = end.closeEnd;
}
return out;
}
function findGenericXmlStartTagOutsideCDATA(text, from) {
const lower = text.toLowerCase();
for (let i = Math.max(0, from || 0); i < text.length;) {
const skipped = skipXmlIgnoredSection(lower, i);
if (skipped.blocked) {
return null;
}
if (skipped.advanced) {
i = skipped.next;
continue;
}
if (text[i] !== '<' || text[i + 1] === '/' || text[i + 1] === '!' || text[i + 1] === '?') {
i += 1;
continue;
}
const match = text.slice(i + 1).match(/^([A-Za-z_][A-Za-z0-9_.:-]*)/);
if (!match) {
i += 1;
continue;
}
const name = match[1];
const nameEnd = i + 1 + name.length;
if (!hasXmlTagBoundary(text, nameEnd)) {
i += 1;
continue;
}
const tagEnd = findXmlTagEnd(text, nameEnd);
if (tagEnd < 0) {
return null;
}
return {
start: i,
end: tagEnd,
bodyStart: tagEnd + 1,
name,
localName: name.includes(':') ? name.slice(name.lastIndexOf(':') + 1) : name,
attrs: text.slice(nameEnd, tagEnd),
selfClosing: isSelfClosingXmlTag(text.slice(i, tagEnd)),
};
}
return null;
}
function findMatchingGenericXmlEndTagOutsideCDATA(text, name, from) {
const lower = text.toLowerCase();
const needle = toStringSafe(name).toLowerCase();
if (!needle) {
return null;
}
const openTarget = `<${needle}`;
const closeTarget = `</${needle}`;
let depth = 1;
for (let i = Math.max(0, from || 0); i < text.length;) {
const skipped = skipXmlIgnoredSection(lower, i);
if (skipped.blocked) {
return null;
}
if (skipped.advanced) {
i = skipped.next;
continue;
}
if (lower.startsWith(closeTarget, i) && hasXmlTagBoundary(text, i + closeTarget.length)) {
const tagEnd = findXmlTagEnd(text, i + closeTarget.length);
if (tagEnd < 0) {
return null;
}
depth -= 1;
if (depth === 0) {
return { closeStart: i, closeEnd: tagEnd + 1 };
}
i = tagEnd + 1;
continue;
}
if (lower.startsWith(openTarget, i) && hasXmlTagBoundary(text, i + openTarget.length)) {
const tagEnd = findXmlTagEnd(text, i + openTarget.length);
if (tagEnd < 0) {
return null;
}
if (!isSelfClosingXmlTag(text.slice(i, tagEnd))) {
depth += 1;
}
i = tagEnd + 1;
continue;
}
i += 1;
}
return null;
}
function parseMarkupValue(raw, paramName = '') {
const cdata = extractStandaloneCDATA(raw);
if (cdata.ok) {
const literal = parseJSONLiteralValue(cdata.value);
return literal.ok ? literal.value : cdata.value;
if (literal.ok) {
return literal.value;
}
const structured = parseStructuredCDATAParameterValue(paramName, cdata.value);
return structured.ok ? structured.value : cdata.value;
}
const s = toStringSafe(extractRawTagValue(raw)).trim();
if (!s) {
@@ -648,8 +785,11 @@ function parseMarkupValue(raw) {
}
if (s.includes('<') && s.includes('>')) {
const nested = parseMarkupInput(s);
if (nested && typeof nested === 'object' && !Array.isArray(nested)) {
const nested = unwrapItemOnlyMarkupValue(parseMarkupInput(s));
if (Array.isArray(nested)) {
return nested;
}
if (nested && typeof nested === 'object') {
if (isOnlyRawValue(nested)) {
return toStringSafe(nested._raw);
}
@@ -664,6 +804,66 @@ function parseMarkupValue(raw) {
return s;
}
function parseStructuredCDATAParameterValue(paramName, raw) {
if (preservesCDATAStringParameter(paramName)) {
return { ok: false, value: null };
}
const normalized = normalizeCDATAForStructuredParse(raw);
if (!normalized.includes('<') || !normalized.includes('>')) {
return { ok: false, value: null };
}
const parsed = parseMarkupInput(normalized);
if (Array.isArray(parsed)) {
return { ok: true, value: parsed };
}
if (parsed && typeof parsed === 'object' && !isOnlyRawValue(parsed) && Object.keys(parsed).length > 0) {
return { ok: true, value: parsed };
}
return { ok: false, value: null };
}
function normalizeCDATAForStructuredParse(raw) {
return unescapeHtml(toStringSafe(raw).replace(/<br\s*\/?>/gi, '\n').trim());
}
function preservesCDATAStringParameter(name) {
return new Set([
'content',
'file_content',
'text',
'prompt',
'query',
'command',
'cmd',
'script',
'code',
'old_string',
'new_string',
'pattern',
'path',
'file_path',
]).has(toStringSafe(name).trim().toLowerCase());
}
function unwrapItemOnlyMarkupValue(value) {
if (Array.isArray(value)) {
return value.map(unwrapItemOnlyMarkupValue);
}
if (!value || typeof value !== 'object') {
return value;
}
const keys = Object.keys(value);
if (keys.length === 1 && keys[0] === 'item') {
const items = unwrapItemOnlyMarkupValue(value.item);
return Array.isArray(items) ? items : [items];
}
const out = {};
for (const key of keys) {
out[key] = unwrapItemOnlyMarkupValue(value[key]);
}
return out;
}
function extractRawTagValue(inner) {
const s = toStringSafe(inner).trim();
if (!s) {

View File

@@ -10,6 +10,7 @@ import (
var xmlAttrPattern = regexp.MustCompile(`(?is)\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')`)
var xmlToolCallsClosePattern = regexp.MustCompile(`(?is)</tool_calls>`)
var xmlInvokeStartPattern = regexp.MustCompile(`(?is)<invoke\b[^>]*\bname\s*=\s*("([^"]*)"|'([^']*)')`)
var cdataBRSeparatorPattern = regexp.MustCompile(`(?i)<br\s*/?>`)
func parseXMLToolCalls(text string) []ParsedToolCall {
wrappers := findXMLElementBlocks(text, "tool_calls")
@@ -91,7 +92,7 @@ func parseSingleXMLToolCall(block xmlElementBlock) (ParsedToolCall, bool) {
if paramName == "" {
continue
}
value := parseInvokeParameterValue(paramMatch.Body)
value := parseInvokeParameterValue(paramName, paramMatch.Body)
appendMarkupValue(input, paramName, value)
}
@@ -289,7 +290,7 @@ func parseXMLTagAttributes(raw string) map[string]string {
return out
}
func parseInvokeParameterValue(raw string) any {
func parseInvokeParameterValue(paramName, raw string) any {
trimmed := strings.TrimSpace(raw)
if trimmed == "" {
return ""
@@ -298,10 +299,34 @@ func parseInvokeParameterValue(raw string) any {
if parsed, ok := parseJSONLiteralValue(value); ok {
return parsed
}
if parsed, ok := parseStructuredCDATAParameterValue(paramName, value); ok {
return parsed
}
return value
}
decoded := html.UnescapeString(extractRawTagValue(trimmed))
if strings.Contains(decoded, "<") && strings.Contains(decoded, ">") {
if parsedValue, ok := parseXMLFragmentValue(decoded); ok {
switch v := parsedValue.(type) {
case map[string]any:
if len(v) > 0 {
return v
}
case []any:
return v
case string:
text := strings.TrimSpace(v)
if text == "" {
return ""
}
if parsedText, ok := parseJSONLiteralValue(text); ok {
return parsedText
}
return v
default:
return v
}
}
if parsed := parseStructuredToolCallInput(decoded); len(parsed) > 0 {
if len(parsed) == 1 {
if rawValue, ok := parsed["_raw"].(string); ok {
@@ -316,3 +341,45 @@ func parseInvokeParameterValue(raw string) any {
}
return decoded
}
func parseStructuredCDATAParameterValue(paramName, raw string) (any, bool) {
if preservesCDATAStringParameter(paramName) {
return nil, false
}
normalized := normalizeCDATAForStructuredParse(raw)
if !strings.Contains(normalized, "<") || !strings.Contains(normalized, ">") {
return nil, false
}
parsed, ok := parseXMLFragmentValue(normalized)
if !ok {
return nil, false
}
switch v := parsed.(type) {
case []any:
return v, true
case map[string]any:
if len(v) == 0 {
return nil, false
}
return v, true
default:
return nil, false
}
}
func normalizeCDATAForStructuredParse(raw string) string {
if raw == "" {
return ""
}
normalized := cdataBRSeparatorPattern.ReplaceAllString(raw, "\n")
return html.UnescapeString(strings.TrimSpace(normalized))
}
func preservesCDATAStringParameter(name string) bool {
switch strings.ToLower(strings.TrimSpace(name)) {
case "content", "file_content", "text", "prompt", "query", "command", "cmd", "script", "code", "old_string", "new_string", "pattern", "path", "file_path":
return true
default:
return false
}
}

View File

@@ -159,6 +159,82 @@ func TestParseToolCallsSupportsJSONScalarParameters(t *testing.T) {
}
}
func TestParseToolCallsTreatsItemOnlyParameterBodyAsArray(t *testing.T) {
text := strings.Join([]string{
`<|DSML|tool_calls>`,
`<|DSML|invoke name="AskUserQuestion">`,
`<|DSML|parameter name="questions">`,
`<item>`,
`<question><![CDATA[What would you like to do next?]]></question>`,
`<header><![CDATA[Next step]]></header>`,
`<options>`,
`<item><label><![CDATA[Run tests]]></label><description><![CDATA[Run the test suite]]></description></item>`,
`<item><label><![CDATA[Other task]]></label><description><![CDATA[Something else entirely]]></description></item>`,
`</options>`,
`<multiSelect>false</multiSelect>`,
`</item>`,
`</|DSML|parameter>`,
`</|DSML|invoke>`,
`</|DSML|tool_calls>`,
}, "\n")
calls := ParseToolCalls(text, []string{"AskUserQuestion"})
if len(calls) != 1 {
t.Fatalf("expected one AskUserQuestion call, got %#v", calls)
}
questions, ok := calls[0].Input["questions"].([]any)
if !ok || len(questions) != 1 {
t.Fatalf("expected questions to parse as array, got %#v", calls[0].Input["questions"])
}
first, ok := questions[0].(map[string]any)
if !ok {
t.Fatalf("expected first question object, got %#v", questions[0])
}
if first["question"] != "What would you like to do next?" || first["header"] != "Next step" || first["multiSelect"] != false {
t.Fatalf("unexpected question payload: %#v", first)
}
options, ok := first["options"].([]any)
if !ok || len(options) != 2 {
t.Fatalf("expected options to parse as array, got %#v", first["options"])
}
}
func TestParseToolCallsTreatsCDATAItemOnlyBodyAsArray(t *testing.T) {
todos := `<br> <item><br> <activeForm>Testing EnterWorktree tool</activeForm><br> <content>Test EnterWorktree tool</content><br> <status>in_progress</status><br> </item><br> <item><br> <activeForm>Testing TodoWrite tool</activeForm><br> <content>Test TodoWrite tool</content><br> <status>completed</status><br> </item><br>`
text := `<|DSML|tool_calls><|DSML|invoke name="TodoWrite"><|DSML|parameter name="todos"><![CDATA[` + todos + `]]></|DSML|parameter></|DSML|invoke></|DSML|tool_calls>`
calls := ParseToolCalls(text, []string{"TodoWrite"})
if len(calls) != 1 {
t.Fatalf("expected one TodoWrite call, got %#v", calls)
}
items, ok := calls[0].Input["todos"].([]any)
if !ok || len(items) != 2 {
t.Fatalf("expected todos CDATA item body to parse as array, got %#v", calls[0].Input["todos"])
}
first, ok := items[0].(map[string]any)
if !ok {
t.Fatalf("expected first todo object, got %#v", items[0])
}
if first["activeForm"] != "Testing EnterWorktree tool" || first["content"] != "Test EnterWorktree tool" || first["status"] != "in_progress" {
t.Fatalf("unexpected first todo: %#v", first)
}
}
func TestParseToolCallsTreatsCDATAObjectFragmentAsObject(t *testing.T) {
payload := `<question><![CDATA[Pick one]]></question><options><item><label><![CDATA[A]]></label></item><item><label><![CDATA[B]]></label></item></options>`
text := `<tool_calls><invoke name="AskUserQuestion"><parameter name="questions"><![CDATA[` + payload + `]]></parameter></invoke></tool_calls>`
calls := ParseToolCalls(text, []string{"AskUserQuestion"})
if len(calls) != 1 {
t.Fatalf("expected one AskUserQuestion call, got %#v", calls)
}
question, ok := calls[0].Input["questions"].(map[string]any)
if !ok {
t.Fatalf("expected CDATA XML object fragment to parse as object, got %#v", calls[0].Input["questions"])
}
options, ok := question["options"].([]any)
if question["question"] != "Pick one" || !ok || len(options) != 2 {
t.Fatalf("unexpected parsed question: %#v", question)
}
}
func TestParseToolCallsPreservesRawMalformedParams(t *testing.T) {
text := `<tool_calls><invoke name="execute_command"><parameter name="command">cd /root && git status</parameter></invoke></tool_calls>`
calls := ParseToolCalls(text, []string{"execute_command"})

View File

@@ -107,10 +107,27 @@ func parseXMLNodeValue(dec *xml.Decoder, start xml.StartElement) (any, error) {
return nil, errXMLMismatch(start.Name.Local, t.Name.Local)
}
if len(children) == 0 {
if parsed, ok := parseJSONLiteralValue(text.String()); ok {
return parsed, nil
}
return text.String(), nil
}
if txt := text.String(); strings.TrimSpace(txt) != "" {
children["_text"] = txt
if parsed, ok := parseJSONLiteralValue(txt); ok {
children["_text"] = parsed
} else {
children["_text"] = txt
}
}
if len(children) == 1 {
if items, ok := children["item"]; ok {
switch v := items.(type) {
case []any:
return v, nil
default:
return []any{v}, nil
}
}
}
return children, nil
}

View File

@@ -122,6 +122,72 @@ test('parseToolCalls supports JSON scalar parameters', () => {
assert.equal(calls[0].input.enabled, true);
});
test('parseToolCalls treats item-only parameter body as array', () => {
const payload = [
'<|DSML|tool_calls>',
'<|DSML|invoke name="AskUserQuestion">',
'<|DSML|parameter name="questions">',
'<item>',
'<question><![CDATA[What would you like to do next?]]></question>',
'<header><![CDATA[Next step]]></header>',
'<options>',
'<item><label><![CDATA[Run tests]]></label><description><![CDATA[Run the test suite]]></description></item>',
'<item><label><![CDATA[Other task]]></label><description><![CDATA[Something else entirely]]></description></item>',
'</options>',
'<multiSelect>false</multiSelect>',
'</item>',
'</|DSML|parameter>',
'</|DSML|invoke>',
'</|DSML|tool_calls>',
].join('\n');
const calls = parseToolCalls(payload, ['AskUserQuestion']);
assert.equal(calls.length, 1);
assert.deepEqual(calls[0].input.questions, [
{
question: 'What would you like to do next?',
header: 'Next step',
options: [
{ label: 'Run tests', description: 'Run the test suite' },
{ label: 'Other task', description: 'Something else entirely' },
],
multiSelect: false,
},
]);
});
test('parseToolCalls treats CDATA item-only body as array', () => {
const todos = '<br> <item><br> <activeForm>Testing EnterWorktree tool</activeForm><br> <content>Test EnterWorktree tool</content><br> <status>in_progress</status><br> </item><br> <item><br> <activeForm>Testing TodoWrite tool</activeForm><br> <content>Test TodoWrite tool</content><br> <status>completed</status><br> </item><br>';
const payload = `<|DSML|tool_calls><|DSML|invoke name="TodoWrite"><|DSML|parameter name="todos"><![CDATA[${todos}]]></|DSML|parameter></|DSML|invoke></|DSML|tool_calls>`;
const calls = parseToolCalls(payload, ['TodoWrite']);
assert.equal(calls.length, 1);
assert.deepEqual(calls[0].input.todos, [
{
activeForm: 'Testing EnterWorktree tool',
content: 'Test EnterWorktree tool',
status: 'in_progress',
},
{
activeForm: 'Testing TodoWrite tool',
content: 'Test TodoWrite tool',
status: 'completed',
},
]);
});
test('parseToolCalls treats CDATA object fragment as object', () => {
const fragment = '<question><![CDATA[Pick one]]></question><options><item><label><![CDATA[A]]></label></item><item><label><![CDATA[B]]></label></item></options>';
const payload = `<tool_calls><invoke name="AskUserQuestion"><parameter name="questions"><![CDATA[${fragment}]]></parameter></invoke></tool_calls>`;
const calls = parseToolCalls(payload, ['AskUserQuestion']);
assert.equal(calls.length, 1);
assert.deepEqual(calls[0].input.questions, {
question: 'Pick one',
options: [
{ label: 'A' },
{ label: 'B' },
],
});
});
test('parseToolCalls normalizes mixed DSML and XML tool tags', () => {
// Models commonly mix DSML wrapper tags with canonical inner tags.
const payload = '<|DSML|tool_calls><invoke name="read_file"><|DSML|parameter name="path">README.MD</|DSML|parameter></invoke></|DSML|tool_calls>';