diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md index 6039e34..c7a6ad4 100644 --- a/docs/prompt-compatibility.md +++ b/docs/prompt-compatibility.md @@ -153,6 +153,7 @@ OpenAI Chat / Responses 在标准化后、history split / current input file 之 工具调用正例现在优先示范官方 DSML 风格:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`。 兼容层仍接受旧式纯 `` wrapper,但提示词会优先要求模型输出官方 DSML 标签,并强调不能只输出 closing wrapper 而漏掉 opening tag。需要注意:这是“兼容 DSML 外壳,内部仍以 XML 解析语义为准”,不是原生 DSML 全链路实现;DSML 标签会在解析入口归一化回现有 XML 标签后继续走同一套 parser。 +数组参数使用 `...` 子节点表示;当某个参数体只包含 item 子节点时,Go / Node 解析器会把它还原成数组,避免 `questions` / `options` 这类 schema 中要求 array 的参数被误解析成 `{ "item": ... }` 对象。若模型把完整结构化 XML fragment 误包进 CDATA,兼容层会在保护 `content` / `command` 等原文字段的前提下,尝试把非原文字段中的 CDATA XML fragment 还原成 object / array。 正例中的工具名只会来自当前请求实际声明的工具;如果当前请求没有足够的已知工具形态,就省略对应的单工具、多工具或嵌套示例,避免把不可用工具名写进 prompt。 对执行类工具,脚本内容必须进入执行参数本身:`Bash` / `execute_command` 使用 `command`,`exec_command` 使用 `cmd`;不要把脚本示范成 `path` / `content` 文件写入参数。 diff --git a/docs/toolcall-semantics.md b/docs/toolcall-semantics.md index 5529a4b..bb7b924 100644 --- a/docs/toolcall-semantics.md +++ b/docs/toolcall-semantics.md @@ -63,6 +63,8 @@ - 当文本中 mention 了某种标签名(如 `` 或 Markdown inline code 里的 `<|DSML|tool_calls>`)而后面紧跟真正工具调用时,sieve 会跳过不可解析的 mention 候选并继续匹配后续真实工具块,不会因 mention 导致工具调用丢失,也不会截断 mention 后的正文 另外,`` 的值如果本身是合法 JSON 字面量,也会按结构化值解析,而不是一律保留为字符串。例如 `123`、`true`、`null`、`[1,2]`、`{"a":1}` 都会还原成对应的 number / boolean / null / array / object。 +结构化 XML 参数也会还原为 JSON 结构:如果参数体只包含一个或多个 `...` 子节点,会输出数组;嵌套对象里的 item-only 字段也同样按数组处理。例如 `...` 会输出 `{"questions":[{"question":"..."}]}`,而不是 `{"questions":{"item":...}}`。 +如果模型误把完整结构化 XML fragment 放进 CDATA,Go / Node 会先保护明显的原文字段(如 `content` / `command` / `prompt` / `old_string` / `new_string`),其余参数会尝试把 CDATA 内的完整 XML fragment 还原成 object / array;常见的 `
` 分隔符会按换行归一化后再解析。 ## 4) 输出结构 diff --git a/internal/js/helpers/stream-tool-sieve/parse_payload.js b/internal/js/helpers/stream-tool-sieve/parse_payload.js index 185ed4d..a935f00 100644 --- a/internal/js/helpers/stream-tool-sieve/parse_payload.js +++ b/internal/js/helpers/stream-tool-sieve/parse_payload.js @@ -1,6 +1,5 @@ 'use strict'; -const TOOL_CALL_MARKUP_KV_PATTERN = /<(?:[a-z0-9_:-]+:)?([a-z0-9_.-]+)\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi; const CDATA_PATTERN = /^$/i; const XML_ATTR_PATTERN = /\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')/gi; const TOOL_MARKUP_NAMES = ['tool_calls', 'invoke', 'parameter']; @@ -293,7 +292,7 @@ function parseMarkupSingleToolCall(block) { if (!paramName) { continue; } - appendMarkupValue(input, paramName, parseMarkupValue(match.body)); + appendMarkupValue(input, paramName, parseMarkupValue(match.body, paramName)); } if (Object.keys(input).length === 0 && inner.trim() !== '') { return null; @@ -600,8 +599,11 @@ function parseMarkupInput(raw) { return {}; } // Prioritize XML-style KV tags (e.g., val) - const kv = parseMarkupKVObject(s); - if (Object.keys(kv).length > 0) { + const kv = unwrapItemOnlyMarkupValue(parseMarkupKVObject(s)); + if (Array.isArray(kv)) { + return kv; + } + if (kv && typeof kv === 'object' && Object.keys(kv).length > 0) { return kv; } @@ -622,12 +624,12 @@ function parseMarkupKVObject(text) { return {}; } const out = {}; - for (const m of raw.matchAll(TOOL_CALL_MARKUP_KV_PATTERN)) { - const key = toStringSafe(m[1]).trim(); + for (const block of findGenericXmlElementBlocks(raw)) { + const key = toStringSafe(block.localName).trim(); if (!key) { continue; } - const value = parseMarkupValue(m[2]); + const value = parseMarkupValue(block.body, key); if (value === undefined || value === null) { continue; } @@ -636,11 +638,146 @@ function parseMarkupKVObject(text) { return out; } -function parseMarkupValue(raw) { +function findGenericXmlElementBlocks(text) { + const source = toStringSafe(text); + if (!source) { + return []; + } + const out = []; + let pos = 0; + while (pos < source.length) { + const start = findGenericXmlStartTagOutsideCDATA(source, pos); + if (!start) { + break; + } + if (start.selfClosing) { + out.push({ + name: start.name, + localName: start.localName, + attrs: start.attrs, + body: '', + start: start.start, + end: start.end + 1, + }); + pos = start.end + 1; + continue; + } + const end = findMatchingGenericXmlEndTagOutsideCDATA(source, start.name, start.bodyStart); + if (!end) { + pos = start.bodyStart; + continue; + } + out.push({ + name: start.name, + localName: start.localName, + attrs: start.attrs, + body: source.slice(start.bodyStart, end.closeStart), + start: start.start, + end: end.closeEnd, + }); + pos = end.closeEnd; + } + return out; +} + +function findGenericXmlStartTagOutsideCDATA(text, from) { + const lower = text.toLowerCase(); + for (let i = Math.max(0, from || 0); i < text.length;) { + const skipped = skipXmlIgnoredSection(lower, i); + if (skipped.blocked) { + return null; + } + if (skipped.advanced) { + i = skipped.next; + continue; + } + if (text[i] !== '<' || text[i + 1] === '/' || text[i + 1] === '!' || text[i + 1] === '?') { + i += 1; + continue; + } + const match = text.slice(i + 1).match(/^([A-Za-z_][A-Za-z0-9_.:-]*)/); + if (!match) { + i += 1; + continue; + } + const name = match[1]; + const nameEnd = i + 1 + name.length; + if (!hasXmlTagBoundary(text, nameEnd)) { + i += 1; + continue; + } + const tagEnd = findXmlTagEnd(text, nameEnd); + if (tagEnd < 0) { + return null; + } + return { + start: i, + end: tagEnd, + bodyStart: tagEnd + 1, + name, + localName: name.includes(':') ? name.slice(name.lastIndexOf(':') + 1) : name, + attrs: text.slice(nameEnd, tagEnd), + selfClosing: isSelfClosingXmlTag(text.slice(i, tagEnd)), + }; + } + return null; +} + +function findMatchingGenericXmlEndTagOutsideCDATA(text, name, from) { + const lower = text.toLowerCase(); + const needle = toStringSafe(name).toLowerCase(); + if (!needle) { + return null; + } + const openTarget = `<${needle}`; + const closeTarget = `')) { - const nested = parseMarkupInput(s); - if (nested && typeof nested === 'object' && !Array.isArray(nested)) { + const nested = unwrapItemOnlyMarkupValue(parseMarkupInput(s)); + if (Array.isArray(nested)) { + return nested; + } + if (nested && typeof nested === 'object') { if (isOnlyRawValue(nested)) { return toStringSafe(nested._raw); } @@ -664,6 +804,66 @@ function parseMarkupValue(raw) { return s; } +function parseStructuredCDATAParameterValue(paramName, raw) { + if (preservesCDATAStringParameter(paramName)) { + return { ok: false, value: null }; + } + const normalized = normalizeCDATAForStructuredParse(raw); + if (!normalized.includes('<') || !normalized.includes('>')) { + return { ok: false, value: null }; + } + const parsed = parseMarkupInput(normalized); + if (Array.isArray(parsed)) { + return { ok: true, value: parsed }; + } + if (parsed && typeof parsed === 'object' && !isOnlyRawValue(parsed) && Object.keys(parsed).length > 0) { + return { ok: true, value: parsed }; + } + return { ok: false, value: null }; +} + +function normalizeCDATAForStructuredParse(raw) { + return unescapeHtml(toStringSafe(raw).replace(//gi, '\n').trim()); +} + +function preservesCDATAStringParameter(name) { + return new Set([ + 'content', + 'file_content', + 'text', + 'prompt', + 'query', + 'command', + 'cmd', + 'script', + 'code', + 'old_string', + 'new_string', + 'pattern', + 'path', + 'file_path', + ]).has(toStringSafe(name).trim().toLowerCase()); +} + +function unwrapItemOnlyMarkupValue(value) { + if (Array.isArray(value)) { + return value.map(unwrapItemOnlyMarkupValue); + } + if (!value || typeof value !== 'object') { + return value; + } + const keys = Object.keys(value); + if (keys.length === 1 && keys[0] === 'item') { + const items = unwrapItemOnlyMarkupValue(value.item); + return Array.isArray(items) ? items : [items]; + } + const out = {}; + for (const key of keys) { + out[key] = unwrapItemOnlyMarkupValue(value[key]); + } + return out; +} + function extractRawTagValue(inner) { const s = toStringSafe(inner).trim(); if (!s) { diff --git a/internal/toolcall/toolcalls_parse_markup.go b/internal/toolcall/toolcalls_parse_markup.go index 8633ad0..d16f5e1 100644 --- a/internal/toolcall/toolcalls_parse_markup.go +++ b/internal/toolcall/toolcalls_parse_markup.go @@ -10,6 +10,7 @@ import ( var xmlAttrPattern = regexp.MustCompile(`(?is)\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')`) var xmlToolCallsClosePattern = regexp.MustCompile(`(?is)
`) var xmlInvokeStartPattern = regexp.MustCompile(`(?is)]*\bname\s*=\s*("([^"]*)"|'([^']*)')`) +var cdataBRSeparatorPattern = regexp.MustCompile(`(?i)`) func parseXMLToolCalls(text string) []ParsedToolCall { wrappers := findXMLElementBlocks(text, "tool_calls") @@ -91,7 +92,7 @@ func parseSingleXMLToolCall(block xmlElementBlock) (ParsedToolCall, bool) { if paramName == "" { continue } - value := parseInvokeParameterValue(paramMatch.Body) + value := parseInvokeParameterValue(paramName, paramMatch.Body) appendMarkupValue(input, paramName, value) } @@ -289,7 +290,7 @@ func parseXMLTagAttributes(raw string) map[string]string { return out } -func parseInvokeParameterValue(raw string) any { +func parseInvokeParameterValue(paramName, raw string) any { trimmed := strings.TrimSpace(raw) if trimmed == "" { return "" @@ -298,10 +299,34 @@ func parseInvokeParameterValue(raw string) any { if parsed, ok := parseJSONLiteralValue(value); ok { return parsed } + if parsed, ok := parseStructuredCDATAParameterValue(paramName, value); ok { + return parsed + } return value } decoded := html.UnescapeString(extractRawTagValue(trimmed)) if strings.Contains(decoded, "<") && strings.Contains(decoded, ">") { + if parsedValue, ok := parseXMLFragmentValue(decoded); ok { + switch v := parsedValue.(type) { + case map[string]any: + if len(v) > 0 { + return v + } + case []any: + return v + case string: + text := strings.TrimSpace(v) + if text == "" { + return "" + } + if parsedText, ok := parseJSONLiteralValue(text); ok { + return parsedText + } + return v + default: + return v + } + } if parsed := parseStructuredToolCallInput(decoded); len(parsed) > 0 { if len(parsed) == 1 { if rawValue, ok := parsed["_raw"].(string); ok { @@ -316,3 +341,45 @@ func parseInvokeParameterValue(raw string) any { } return decoded } + +func parseStructuredCDATAParameterValue(paramName, raw string) (any, bool) { + if preservesCDATAStringParameter(paramName) { + return nil, false + } + normalized := normalizeCDATAForStructuredParse(raw) + if !strings.Contains(normalized, "<") || !strings.Contains(normalized, ">") { + return nil, false + } + parsed, ok := parseXMLFragmentValue(normalized) + if !ok { + return nil, false + } + switch v := parsed.(type) { + case []any: + return v, true + case map[string]any: + if len(v) == 0 { + return nil, false + } + return v, true + default: + return nil, false + } +} + +func normalizeCDATAForStructuredParse(raw string) string { + if raw == "" { + return "" + } + normalized := cdataBRSeparatorPattern.ReplaceAllString(raw, "\n") + return html.UnescapeString(strings.TrimSpace(normalized)) +} + +func preservesCDATAStringParameter(name string) bool { + switch strings.ToLower(strings.TrimSpace(name)) { + case "content", "file_content", "text", "prompt", "query", "command", "cmd", "script", "code", "old_string", "new_string", "pattern", "path", "file_path": + return true + default: + return false + } +} diff --git a/internal/toolcall/toolcalls_test.go b/internal/toolcall/toolcalls_test.go index b48f88c..30a7b3a 100644 --- a/internal/toolcall/toolcalls_test.go +++ b/internal/toolcall/toolcalls_test.go @@ -159,6 +159,82 @@ func TestParseToolCallsSupportsJSONScalarParameters(t *testing.T) { } } +func TestParseToolCallsTreatsItemOnlyParameterBodyAsArray(t *testing.T) { + text := strings.Join([]string{ + `<|DSML|tool_calls>`, + `<|DSML|invoke name="AskUserQuestion">`, + `<|DSML|parameter name="questions">`, + ``, + ``, + `
`, + ``, + ``, + ``, + ``, + `false`, + `
`, + ``, + ``, + ``, + }, "\n") + calls := ParseToolCalls(text, []string{"AskUserQuestion"}) + if len(calls) != 1 { + t.Fatalf("expected one AskUserQuestion call, got %#v", calls) + } + questions, ok := calls[0].Input["questions"].([]any) + if !ok || len(questions) != 1 { + t.Fatalf("expected questions to parse as array, got %#v", calls[0].Input["questions"]) + } + first, ok := questions[0].(map[string]any) + if !ok { + t.Fatalf("expected first question object, got %#v", questions[0]) + } + if first["question"] != "What would you like to do next?" || first["header"] != "Next step" || first["multiSelect"] != false { + t.Fatalf("unexpected question payload: %#v", first) + } + options, ok := first["options"].([]any) + if !ok || len(options) != 2 { + t.Fatalf("expected options to parse as array, got %#v", first["options"]) + } +} + +func TestParseToolCallsTreatsCDATAItemOnlyBodyAsArray(t *testing.T) { + todos := `

Testing EnterWorktree tool
Test EnterWorktree tool
in_progress


Testing TodoWrite tool
Test TodoWrite tool
completed

` + text := `<|DSML|tool_calls><|DSML|invoke name="TodoWrite"><|DSML|parameter name="todos">` + calls := ParseToolCalls(text, []string{"TodoWrite"}) + if len(calls) != 1 { + t.Fatalf("expected one TodoWrite call, got %#v", calls) + } + items, ok := calls[0].Input["todos"].([]any) + if !ok || len(items) != 2 { + t.Fatalf("expected todos CDATA item body to parse as array, got %#v", calls[0].Input["todos"]) + } + first, ok := items[0].(map[string]any) + if !ok { + t.Fatalf("expected first todo object, got %#v", items[0]) + } + if first["activeForm"] != "Testing EnterWorktree tool" || first["content"] != "Test EnterWorktree tool" || first["status"] != "in_progress" { + t.Fatalf("unexpected first todo: %#v", first) + } +} + +func TestParseToolCallsTreatsCDATAObjectFragmentAsObject(t *testing.T) { + payload := `` + text := `` + calls := ParseToolCalls(text, []string{"AskUserQuestion"}) + if len(calls) != 1 { + t.Fatalf("expected one AskUserQuestion call, got %#v", calls) + } + question, ok := calls[0].Input["questions"].(map[string]any) + if !ok { + t.Fatalf("expected CDATA XML object fragment to parse as object, got %#v", calls[0].Input["questions"]) + } + options, ok := question["options"].([]any) + if question["question"] != "Pick one" || !ok || len(options) != 2 { + t.Fatalf("unexpected parsed question: %#v", question) + } +} + func TestParseToolCallsPreservesRawMalformedParams(t *testing.T) { text := `cd /root && git status` calls := ParseToolCalls(text, []string{"execute_command"}) diff --git a/internal/toolcall/toolcalls_xml.go b/internal/toolcall/toolcalls_xml.go index b375c48..c29dec0 100644 --- a/internal/toolcall/toolcalls_xml.go +++ b/internal/toolcall/toolcalls_xml.go @@ -107,10 +107,27 @@ func parseXMLNodeValue(dec *xml.Decoder, start xml.StartElement) (any, error) { return nil, errXMLMismatch(start.Name.Local, t.Name.Local) } if len(children) == 0 { + if parsed, ok := parseJSONLiteralValue(text.String()); ok { + return parsed, nil + } return text.String(), nil } if txt := text.String(); strings.TrimSpace(txt) != "" { - children["_text"] = txt + if parsed, ok := parseJSONLiteralValue(txt); ok { + children["_text"] = parsed + } else { + children["_text"] = txt + } + } + if len(children) == 1 { + if items, ok := children["item"]; ok { + switch v := items.(type) { + case []any: + return v, nil + default: + return []any{v}, nil + } + } } return children, nil } diff --git a/tests/node/stream-tool-sieve.test.js b/tests/node/stream-tool-sieve.test.js index 1938984..5ab11aa 100644 --- a/tests/node/stream-tool-sieve.test.js +++ b/tests/node/stream-tool-sieve.test.js @@ -122,6 +122,72 @@ test('parseToolCalls supports JSON scalar parameters', () => { assert.equal(calls[0].input.enabled, true); }); +test('parseToolCalls treats item-only parameter body as array', () => { + const payload = [ + '<|DSML|tool_calls>', + '<|DSML|invoke name="AskUserQuestion">', + '<|DSML|parameter name="questions">', + '', + '', + '
', + '', + '', + '', + '', + 'false', + '
', + '', + '', + '', + ].join('\n'); + const calls = parseToolCalls(payload, ['AskUserQuestion']); + assert.equal(calls.length, 1); + assert.deepEqual(calls[0].input.questions, [ + { + question: 'What would you like to do next?', + header: 'Next step', + options: [ + { label: 'Run tests', description: 'Run the test suite' }, + { label: 'Other task', description: 'Something else entirely' }, + ], + multiSelect: false, + }, + ]); +}); + +test('parseToolCalls treats CDATA item-only body as array', () => { + const todos = '

Testing EnterWorktree tool
Test EnterWorktree tool
in_progress


Testing TodoWrite tool
Test TodoWrite tool
completed

'; + const payload = `<|DSML|tool_calls><|DSML|invoke name="TodoWrite"><|DSML|parameter name="todos">`; + const calls = parseToolCalls(payload, ['TodoWrite']); + assert.equal(calls.length, 1); + assert.deepEqual(calls[0].input.todos, [ + { + activeForm: 'Testing EnterWorktree tool', + content: 'Test EnterWorktree tool', + status: 'in_progress', + }, + { + activeForm: 'Testing TodoWrite tool', + content: 'Test TodoWrite tool', + status: 'completed', + }, + ]); +}); + +test('parseToolCalls treats CDATA object fragment as object', () => { + const fragment = ''; + const payload = ``; + const calls = parseToolCalls(payload, ['AskUserQuestion']); + assert.equal(calls.length, 1); + assert.deepEqual(calls[0].input.questions, { + question: 'Pick one', + options: [ + { label: 'A' }, + { label: 'B' }, + ], + }); +}); + test('parseToolCalls normalizes mixed DSML and XML tool tags', () => { // Models commonly mix DSML wrapper tags with canonical inner tags. const payload = '<|DSML|tool_calls><|DSML|parameter name="path">README.MD';