From 63271aea8c22af25c178e5dcb6ad8dc91898dc52 Mon Sep 17 00:00:00 2001 From: CJACK Date: Tue, 28 Apr 2026 01:39:32 +0800 Subject: [PATCH] refactor: update tool call parsing and stream tool sieve logic Co-Authored-By: Claude Opus 4.7 --- docs/prompt-compatibility.md | 2 +- docs/toolcall-semantics.md | 6 +- .../stream-tool-sieve/parse_payload.js | 19 ++++++ .../stream-tool-sieve/tool-keywords.js | 3 + internal/toolcall/toolcalls_parse_markup.go | 63 +++++++++++++++++++ internal/toolcall/toolcalls_test.go | 30 +++++++++ internal/toolstream/tool_sieve_xml.go | 1 + internal/toolstream/tool_sieve_xml_tags.go | 1 + internal/toolstream/tool_sieve_xml_test.go | 45 +++++++++++++ tests/node/stream-tool-sieve.test.js | 39 ++++++++++++ 10 files changed, 205 insertions(+), 4 deletions(-) diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md index c10ba8d..f69ee74 100644 --- a/docs/prompt-compatibility.md +++ b/docs/prompt-compatibility.md @@ -152,7 +152,7 @@ OpenAI Chat / Responses 在标准化后、current input file 之前,会默认 工具调用正例现在优先示范官方 DSML 风格:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`。 兼容层仍接受旧式纯 `` wrapper,但提示词会优先要求模型输出官方 DSML 标签,并强调不能只输出 closing wrapper 而漏掉 opening tag。需要注意:这是“兼容 DSML 外壳,内部仍以 XML 解析语义为准”,不是原生 DSML 全链路实现;DSML 标签会在解析入口归一化回现有 XML 标签后继续走同一套 parser。 -数组参数使用 `...` 子节点表示;当某个参数体只包含 item 子节点时,Go / Node 解析器会把它还原成数组,避免 `questions` / `options` 这类 schema 中要求 array 的参数被误解析成 `{ "item": ... }` 对象。若模型把完整结构化 XML fragment 误包进 CDATA,兼容层会在保护 `content` / `command` 等原文字段的前提下,尝试把非原文字段中的 CDATA XML fragment 还原成 object / array。 +数组参数使用 `...` 子节点表示;当某个参数体只包含 item 子节点时,Go / Node 解析器会把它还原成数组,避免 `questions` / `options` 这类 schema 中要求 array 的参数被误解析成 `{ "item": ... }` 对象。若模型把完整结构化 XML fragment 误包进 CDATA,兼容层会在保护 `content` / `command` 等原文字段的前提下,尝试把非原文字段中的 CDATA XML fragment 还原成 object / array。不过,如果 CDATA 只是单个平面的 XML/HTML 标签,例如 `urgent` 这种行内标记,兼容层会保留原始字符串,不会强行升成 object / array;只有明显表示结构的 CDATA 片段,例如多兄弟节点、嵌套子节点或 `item` 列表,才会触发结构化恢复。 正例中的工具名只会来自当前请求实际声明的工具;如果当前请求没有足够的已知工具形态,就省略对应的单工具、多工具或嵌套示例,避免把不可用工具名写进 prompt。 对执行类工具,脚本内容必须进入执行参数本身:`Bash` / `execute_command` 使用 `command`,`exec_command` 使用 `cmd`;不要把脚本示范成 `path` / `content` 文件写入参数。 diff --git a/docs/toolcall-semantics.md b/docs/toolcall-semantics.md index bb7b924..5a3480b 100644 --- a/docs/toolcall-semantics.md +++ b/docs/toolcall-semantics.md @@ -39,7 +39,7 @@ 兼容修复: - 如果模型漏掉 opening wrapper,但后面仍输出了一个或多个 invoke 并以 closing wrapper 收尾,Go 解析链路会在解析前补回缺失的 opening wrapper。 -- 如果模型把 DSML 标签里的分隔符 `|` 写漏成空格(例如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`,或无 leading pipe 的 `` 形态),或把 `DSML` 与工具标签名直接黏连(例如 `` / `` / ``),Go / Node 会在固定工具标签名范围内归一化;相似但非工具标签名(如 `tool_calls_extra`)仍按普通文本处理。 +- 如果模型把 DSML 标签里的分隔符 `|` 写漏成空格(例如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`,或无 leading pipe 的 `` 形态),或把 `DSML` 与工具标签名直接黏连(例如 `` / `` / ``),或把最前面的 pipe 误写成全宽竖线(例如 `<|DSML|tool_calls>` / `<|DSML|invoke>` / `<|DSML|parameter>`),Go / Node 会在固定工具标签名范围内归一化;相似但非工具标签名(如 `tool_calls_extra`)仍按普通文本处理。 - 这是一个针对常见模型失误的窄修复,不改变推荐输出格式;prompt 仍要求模型直接输出完整 DSML 外壳。 - 裸 `` / `` 不会被当成“已支持的工具语法”;只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 才会进入工具调用路径。 @@ -53,7 +53,7 @@ 在流式链路中(Go / Node 一致): -- DSML `<|DSML|tool_calls>` wrapper、兼容变体(``、`<|tool_calls>`、`<|tool_calls>`)、窄容错空格分隔形态(如 `<|DSML tool_calls>`)、黏连形态(如 ``)和 canonical `` wrapper 都会进入结构化捕获 +- DSML `<|DSML|tool_calls>` wrapper、兼容变体(``、`<|tool_calls>`、`<|tool_calls>`、`<|DSML|tool_calls>`)、窄容错空格分隔形态(如 `<|DSML tool_calls>`)、黏连形态(如 ``)和 canonical `` wrapper 都会进入结构化捕获 - 如果流里直接从 invoke 开始,但后面补上了 closing wrapper,Go 流式筛分也会按缺失 opening wrapper 的修复路径尝试恢复 - 已识别成功的工具调用不会再次回流到普通文本 - 不符合新格式的块不会执行,并继续按原样文本透传 @@ -64,7 +64,7 @@ 另外,`` 的值如果本身是合法 JSON 字面量,也会按结构化值解析,而不是一律保留为字符串。例如 `123`、`true`、`null`、`[1,2]`、`{"a":1}` 都会还原成对应的 number / boolean / null / array / object。 结构化 XML 参数也会还原为 JSON 结构:如果参数体只包含一个或多个 `...` 子节点,会输出数组;嵌套对象里的 item-only 字段也同样按数组处理。例如 `...` 会输出 `{"questions":[{"question":"..."}]}`,而不是 `{"questions":{"item":...}}`。 -如果模型误把完整结构化 XML fragment 放进 CDATA,Go / Node 会先保护明显的原文字段(如 `content` / `command` / `prompt` / `old_string` / `new_string`),其余参数会尝试把 CDATA 内的完整 XML fragment 还原成 object / array;常见的 `
` 分隔符会按换行归一化后再解析。 +如果模型误把完整结构化 XML fragment 放进 CDATA,Go / Node 会先保护明显的原文字段(如 `content` / `command` / `prompt` / `old_string` / `new_string`),其余参数会尝试把 CDATA 内的完整 XML fragment 还原成 object / array;常见的 `
` 分隔符会按换行归一化后再解析。但如果 CDATA 只是单个平面的 XML/HTML 标签,例如 `urgent` 这种行内标记,兼容层会把它保留为原始字符串,而不会强行升成 object / array;只有明显表示结构的 CDATA 片段,例如多兄弟节点、嵌套子节点或 `item` 列表,才会触发结构化恢复。 ## 4) 输出结构 diff --git a/internal/js/helpers/stream-tool-sieve/parse_payload.js b/internal/js/helpers/stream-tool-sieve/parse_payload.js index a935f00..090cc77 100644 --- a/internal/js/helpers/stream-tool-sieve/parse_payload.js +++ b/internal/js/helpers/stream-tool-sieve/parse_payload.js @@ -530,6 +530,7 @@ function findPartialToolMarkupStart(text) { '<|tool_calls', '<|invoke', '<|parameter', '<|tool_calls', '<|invoke', '<|parameter', '<|dsml|tool_calls', '<|dsml|invoke', '<|dsml|parameter', + '<|dsml|tool_calls', '<|dsml|invoke', '<|dsml|parameter', '')) { return { ok: false, value: null }; } + if (!cdataFragmentLooksExplicitlyStructured(normalized)) { + return { ok: false, value: null }; + } const parsed = parseMarkupInput(normalized); if (Array.isArray(parsed)) { return { ok: true, value: parsed }; @@ -826,6 +830,21 @@ function normalizeCDATAForStructuredParse(raw) { return unescapeHtml(toStringSafe(raw).replace(//gi, '\n').trim()); } +function cdataFragmentLooksExplicitlyStructured(raw) { + const blocks = findGenericXmlElementBlocks(raw); + if (blocks.length === 0) { + return false; + } + if (blocks.length > 1) { + return true; + } + const block = blocks[0]; + if (toStringSafe(block.localName).trim().toLowerCase() === 'item') { + return true; + } + return findGenericXmlElementBlocks(block.body).length > 0; +} + function preservesCDATAStringParameter(name) { return new Set([ 'content', diff --git a/internal/js/helpers/stream-tool-sieve/tool-keywords.js b/internal/js/helpers/stream-tool-sieve/tool-keywords.js index 382e5a2..ac47e4e 100644 --- a/internal/js/helpers/stream-tool-sieve/tool-keywords.js +++ b/internal/js/helpers/stream-tool-sieve/tool-keywords.js @@ -2,6 +2,7 @@ const XML_TOOL_SEGMENT_TAGS = [ '<|dsml|tool_calls>', '<|dsml|tool_calls\n', '<|dsml|tool_calls ', + '<|dsml|tool_calls>', '<|dsml|tool_calls\n', '<|dsml|tool_calls ', '<|dsml|invoke ', '<|dsml|invoke\n', '<|dsml|invoke\t', '<|dsml|invoke\r', '<|dsmltool_calls>', '<|dsmltool_calls\n', '<|dsmltool_calls ', '<|dsmlinvoke ', '<|dsmlinvoke\n', '<|dsmlinvoke\t', '<|dsmlinvoke\r', @@ -23,6 +24,7 @@ const XML_TOOL_SEGMENT_TAGS = [ const XML_TOOL_OPENING_TAGS = [ '<|dsml|tool_calls', + '<|dsml|tool_calls', '<|dsmltool_calls', '<|dsml tool_calls', '', + '', '', '', '
', diff --git a/internal/toolcall/toolcalls_parse_markup.go b/internal/toolcall/toolcalls_parse_markup.go index d16f5e1..d137f99 100644 --- a/internal/toolcall/toolcalls_parse_markup.go +++ b/internal/toolcall/toolcalls_parse_markup.go @@ -2,6 +2,7 @@ package toolcall import ( "encoding/json" + "encoding/xml" "html" "regexp" "strings" @@ -350,6 +351,9 @@ func parseStructuredCDATAParameterValue(paramName, raw string) (any, bool) { if !strings.Contains(normalized, "<") || !strings.Contains(normalized, ">") { return nil, false } + if !cdataFragmentLooksExplicitlyStructured(normalized) { + return nil, false + } parsed, ok := parseXMLFragmentValue(normalized) if !ok { return nil, false @@ -375,6 +379,65 @@ func normalizeCDATAForStructuredParse(raw string) string { return html.UnescapeString(strings.TrimSpace(normalized)) } +// Preserve flat CDATA fragments as strings. Only recover structure when the +// fragment clearly encodes a data shape: multiple sibling elements, nested +// child elements, or an explicit item list. +func cdataFragmentLooksExplicitlyStructured(raw string) bool { + trimmed := strings.TrimSpace(raw) + if trimmed == "" { + return false + } + + dec := xml.NewDecoder(strings.NewReader("" + trimmed + "")) + tok, err := dec.Token() + if err != nil { + return false + } + start, ok := tok.(xml.StartElement) + if !ok || !strings.EqualFold(start.Name.Local, "root") { + return false + } + + depth := 0 + directChildren := 0 + firstChildName := "" + firstChildHasNested := false + + for { + tok, err := dec.Token() + if err != nil { + return false + } + switch t := tok.(type) { + case xml.StartElement: + if depth == 0 { + directChildren++ + if directChildren == 1 { + firstChildName = strings.ToLower(strings.TrimSpace(t.Name.Local)) + } else { + return true + } + } else if directChildren == 1 && depth == 1 { + firstChildHasNested = true + } + depth++ + case xml.EndElement: + if strings.EqualFold(t.Name.Local, "root") { + if directChildren != 1 { + return false + } + if firstChildName == "item" { + return true + } + return firstChildHasNested + } + if depth > 0 { + depth-- + } + } + } +} + func preservesCDATAStringParameter(name string) bool { switch strings.ToLower(strings.TrimSpace(name)) { case "content", "file_content", "text", "prompt", "query", "command", "cmd", "script", "code", "old_string", "new_string", "pattern", "path", "file_path": diff --git a/internal/toolcall/toolcalls_test.go b/internal/toolcall/toolcalls_test.go index 30a7b3a..62d800b 100644 --- a/internal/toolcall/toolcalls_test.go +++ b/internal/toolcall/toolcalls_test.go @@ -53,6 +53,21 @@ func TestParseToolCallsSupportsDSMLShellWithCanonicalExampleInCDATA(t *testing.T } } +func TestParseToolCallsPreservesSimpleCDATAInlineMarkupAsText(t *testing.T) { + text := `urgent]]>` + calls := ParseToolCalls(text, []string{"Write"}) + if len(calls) != 1 { + t.Fatalf("expected 1 call, got %#v", calls) + } + got, ok := calls[0].Input["description"].(string) + if !ok { + t.Fatalf("expected description to remain a string, got %#v", calls[0].Input["description"]) + } + if got != "urgent" { + t.Fatalf("expected inline markup CDATA to stay raw, got %q", got) + } +} + func TestParseToolCallsTreatsUnclosedCDATAAsText(t *testing.T) { text := `` res := ParseToolCallsDetailed(text, []string{"Write"}) @@ -218,6 +233,21 @@ func TestParseToolCallsTreatsCDATAItemOnlyBodyAsArray(t *testing.T) { } } +func TestParseToolCallsTreatsSingleItemCDATAAsArray(t *testing.T) { + text := `one]]>` + calls := ParseToolCalls(text, []string{"TodoWrite"}) + if len(calls) != 1 { + t.Fatalf("expected one TodoWrite call, got %#v", calls) + } + items, ok := calls[0].Input["todos"].([]any) + if !ok || len(items) != 1 { + t.Fatalf("expected single-item CDATA body to parse as array, got %#v", calls[0].Input["todos"]) + } + if got, ok := items[0].(string); !ok || got != "one" { + t.Fatalf("expected single item value to stay intact, got %#v", items[0]) + } +} + func TestParseToolCallsTreatsCDATAObjectFragmentAsObject(t *testing.T) { payload := `` text := `` diff --git a/internal/toolstream/tool_sieve_xml.go b/internal/toolstream/tool_sieve_xml.go index 9a6789e..a95bc7e 100644 --- a/internal/toolstream/tool_sieve_xml.go +++ b/internal/toolstream/tool_sieve_xml.go @@ -154,6 +154,7 @@ func findPartialXMLToolTagStart(s string) int { "<|tool_calls", "<|invoke", "<|parameter", "<|tool_calls", "<|invoke", "<|parameter", "<|dsml|tool_calls", "<|dsml|invoke", "<|dsml|parameter", + "<|dsml|tool_calls", "<|dsml|invoke", "<|dsml|parameter", "", "<|dsml|tool_calls\n", "<|dsml|tool_calls ", + "<|dsml|tool_calls>", "<|dsml|tool_calls\n", "<|dsml|tool_calls ", "<|dsml|invoke ", "<|dsml|invoke\n", "<|dsml|invoke\t", "<|dsml|invoke\r", "<|dsmltool_calls>", "<|dsmltool_calls\n", "<|dsmltool_calls ", "<|dsmlinvoke ", "<|dsmlinvoke\n", "<|dsmlinvoke\t", "<|dsmlinvoke\r", diff --git a/internal/toolstream/tool_sieve_xml_test.go b/internal/toolstream/tool_sieve_xml_test.go index efcf56d..c05e6cb 100644 --- a/internal/toolstream/tool_sieve_xml_test.go +++ b/internal/toolstream/tool_sieve_xml_test.go @@ -745,6 +745,51 @@ func TestProcessToolSieveFullwidthPipeVariantDoesNotLeak(t *testing.T) { } } +// Test <|DSML|tool_calls> with DSML invoke/parameter tags should buffer the +// wrapper instead of leaking it before the block is complete. +func TestProcessToolSieveFullwidthDSMLPrefixVariantDoesNotLeak(t *testing.T) { + var state State + chunks := []string{ + "<|DSML|tool", + "_calls>\n", + "<|DSML|invoke name=\"Bash\">\n", + "<|DSML|parameter name=\"command\">\n", + "<|DSML|parameter name=\"description\">\n", + "\n", + "<|DSML|invoke name=\"Bash\">\n", + "<|DSML|parameter name=\"command\">/dev/null || echo \"No package.json found\"]]>\n", + "<|DSML|parameter name=\"description\">\n", + "\n", + "", + } + var events []Event + for _, c := range chunks { + events = append(events, ProcessChunk(&state, c, []string{"Bash"})...) + } + events = append(events, Flush(&state, []string{"Bash"})...) + + var textContent strings.Builder + var toolCalls int + var names []string + for _, evt := range events { + textContent.WriteString(evt.Content) + for _, call := range evt.ToolCalls { + toolCalls++ + names = append(names, call.Name) + } + } + + if toolCalls != 2 { + t.Fatalf("expected two tool calls from fullwidth DSML prefix variant, got %d events=%#v", toolCalls, events) + } + if len(names) != 2 || names[0] != "Bash" || names[1] != "Bash" { + t.Fatalf("expected two Bash tool calls, got %v", names) + } + if textContent.Len() != 0 { + t.Fatalf("expected fullwidth DSML prefix variant not to leak text, got %q", textContent.String()) + } +} + // Test with <|DSML|invoke> (DSML prefix without leading pipe on wrapper). func TestProcessToolSieveDSMLPrefixVariantDoesNotLeak(t *testing.T) { var state State diff --git a/tests/node/stream-tool-sieve.test.js b/tests/node/stream-tool-sieve.test.js index 5ab11aa..d26b8ca 100644 --- a/tests/node/stream-tool-sieve.test.js +++ b/tests/node/stream-tool-sieve.test.js @@ -104,6 +104,13 @@ test('parseToolCalls keeps canonical XML examples inside DSML CDATA', () => { assert.deepEqual(calls[0].input, { path: 'notes.md', content }); }); +test('parseToolCalls preserves simple inline markup inside CDATA as text', () => { + const payload = 'urgent]]>'; + const calls = parseToolCalls(payload, ['Write']); + assert.equal(calls.length, 1); + assert.equal(calls[0].input.description, 'urgent'); +}); + test('parseToolCalls recovers when CDATA never closes inside a valid wrapper', () => { const payload = ''; const calls = parseToolCalls(payload, ['Write']); @@ -174,6 +181,13 @@ test('parseToolCalls treats CDATA item-only body as array', () => { ]); }); +test('parseToolCalls treats single-item CDATA body as array', () => { + const payload = 'one]]>'; + const calls = parseToolCalls(payload, ['TodoWrite']); + assert.equal(calls.length, 1); + assert.deepEqual(calls[0].input.todos, ['one']); +}); + test('parseToolCalls treats CDATA object fragment as object', () => { const fragment = ''; const payload = ``; @@ -400,6 +414,31 @@ test('sieve emits tool_calls when DSML tag spans multiple chunks', () => { assert.equal(finalCalls[0].name, 'read_file'); }); +test('sieve emits tool_calls when fullwidth DSML prefix variant spans multiple chunks', () => { + const events = runSieve( + [ + '<|DSML|tool', + '_calls>\n', + '<|DSML|invoke name="Bash">\n', + '<|DSML|parameter name="command">\n', + '<|DSML|parameter name="description">\n', + '\n', + '<|DSML|invoke name="Bash">\n', + '<|DSML|parameter name="command">/dev/null || echo "No package.json found"]]>\n', + '<|DSML|parameter name="description">\n', + '\n', + '', + ], + ['Bash'], + ); + const leakedText = collectText(events); + const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []); + assert.equal(leakedText, ''); + assert.equal(finalCalls.length, 2); + assert.equal(finalCalls[0].name, 'Bash'); + assert.equal(finalCalls[1].name, 'Bash'); +}); + test('sieve keeps long XML tool calls buffered until the closing tag arrives', () => { const longContent = 'x'.repeat(4096); const splitAt = longContent.length / 2;