fix: improve CDATA parsing resilience by ignoring structural markers inside markdown fences within tool calls

This commit is contained in:
CJACK
2026-05-03 06:40:29 +08:00
parent 545ab0802f
commit 072ec57acd
8 changed files with 223 additions and 24 deletions

View File

@@ -168,7 +168,7 @@ OpenAI Chat / Responses 在标准化后、current input file 之前,会默认
工具调用正例现在优先示范官方 DSML 风格:`<|DSML|tool_calls>``<|DSML|invoke name="...">``<|DSML|parameter name="...">`
兼容层仍接受旧式纯 `<tool_calls>` wrapper并会容错若干 DSML 标签变体,包括短横线形式 `<dsml-tool-calls>` / `<dsml-invoke>` / `<dsml-parameter>`;但提示词会优先要求模型输出官方 DSML 标签,并强调不能只输出 closing wrapper 而漏掉 opening tag。需要注意这是“兼容 DSML 外壳,内部仍以 XML 解析语义为准”,不是原生 DSML 全链路实现DSML 标签会在解析入口归一化回现有 XML 标签后继续走同一套 parser。
数组参数使用 `<item>...</item>` 子节点表示;当某个参数体只包含 item 子节点时Go / Node 解析器会把它还原成数组,避免 `questions` / `options` 这类 schema 中要求 array 的参数被误解析成 `{ "item": ... }` 对象。除此之外,解析器还会回收一些更松散的列表写法,例如 JSON array 字面量或逗号分隔的 JSON 项序列,只要它们足够明确;但 `<item>` 仍然是首选形态。若模型把完整结构化 XML fragment 误包进 CDATA兼容层会在保护 `content` / `command` 等原文字段的前提下,尝试把非原文字段中的 CDATA XML fragment 还原成 object / array。不过如果 CDATA 只是单个平面的 XML/HTML 标签,例如 `<b>urgent</b>` 这种行内标记,兼容层会保留原始字符串,不会强行升成 object / array只有明显表示结构的 CDATA 片段,例如多兄弟节点、嵌套子节点或 `item` 列表,才会触发结构化恢复。
数组参数使用 `<item>...</item>` 子节点表示;当某个参数体只包含 item 子节点时Go / Node 解析器会把它还原成数组,避免 `questions` / `options` 这类 schema 中要求 array 的参数被误解析成 `{ "item": ... }` 对象。除此之外,解析器还会回收一些更松散的列表写法,例如 JSON array 字面量或逗号分隔的 JSON 项序列,只要它们足够明确;但 `<item>` 仍然是首选形态。若模型把完整结构化 XML fragment 误包进 CDATA兼容层会在保护 `content` / `command` 等原文字段的前提下,尝试把非原文字段中的 CDATA XML fragment 还原成 object / array。不过如果 CDATA 只是单个平面的 XML/HTML 标签,例如 `<b>urgent</b>` 这种行内标记,兼容层会保留原始字符串,不会强行升成 object / array只有明显表示结构的 CDATA 片段,例如多兄弟节点、嵌套子节点或 `item` 列表,才会触发结构化恢复。`command` / `content` 等长文本参数CDATA 内部的 Markdown fenced DSML / XML 示例会作为原文保护;示例里的 `]]></parameter>``</tool_calls>` 不会截断外层工具调用,解析器会继续等待围栏外真正的参数 / wrapper 结束标签。
Go 侧读取 DeepSeek SSE 时不再依赖 `bufio.Scanner` 的固定 2MiB 单行上限;当写文件类工具把很长的 `content` 放在单个 `data:` 行里返回时,非流式收集、流式解析和 auto-continue 透传都会保留完整行,再进入同一套工具解析与序列化流程。
在 assistant 最终回包阶段,如果某个 tool 参数在声明 schema 中明确是 `string`,兼容层会在把解析后的 `tool_calls` / `function_call` 重新序列化成 OpenAI / Responses / Claude 可见参数前,递归把该路径上的 number / bool / object / array 统一转成字符串;其中 object / array 会压成紧凑 JSON 字符串。这个保护只对 schema 明确声明为 string 的路径生效,不会改写本来就是 `number` / `boolean` / `object` / `array` 的参数。这样可以兼容 DeepSeek 输出了结构化片段、但上游客户端工具 schema 又严格要求字符串参数的场景(例如 `content``prompt``path``taskId` 等)。
工具 schema 的权威来源始终是**当前请求实际携带的 schema**,而不是同名工具在其他 runtimeClaude Code / OpenCode / Codex 等)里的默认印象。兼容层现在会同时兼容 OpenAI 风格 `function.parameters`、直接工具对象上的 `parameters` / `input_schema`、以及 camelCase 的 `inputSchema` / `schema`,并在最终输出阶段按这份请求内 schema 决定是保留 array/object还是仅对明确声明为 `string` 的路径做字符串化。该规则同样适用于 Claude 的流式收尾和 Vercel Node 流式 tool-call formatter避免不同 runtime 因 schema shape 差异而出现同名工具参数类型漂移。

View File

@@ -60,6 +60,7 @@
- 不符合新格式的块不会执行,并继续按原样文本透传
- fenced code block反引号 `` ``` `` 和波浪线 `~~~`)中的 XML 示例始终按普通文本处理
- 支持嵌套围栏(如 4 反引号嵌套 3 反引号)和 CDATA 内围栏保护
-`command` / `content` 等长文本参数CDATA 内部如果包含 Markdown fenced DSML / XML 示例,即使示例里出现 `]]></parameter>` / `</tool_calls>` 这类看起来像外层结束标签的片段,也会继续按参数原文保留,直到真正位于围栏外的外层结束标签
- 如果模型把 `<![CDATA[` 打开后却没有闭合,流式扫描阶段仍会保守地继续缓冲,不会误把 CDATA 里的示例 XML 当成真实工具调用;在最终 parse / flush 恢复阶段,会对这类 loose CDATA 做窄修复,尽量保住外层已完整包裹的真实工具调用
- 当文本中 mention 了某种标签名(如 `<dsml|tool_calls>` 或 Markdown inline code 里的 `<|DSML|tool_calls>`而后面紧跟真正工具调用时sieve 会跳过不可解析的 mention 候选并继续匹配后续真实工具块,不会因 mention 导致工具调用丢失,也不会截断 mention 后的正文
- Go 侧 SSE 读取不再使用 `bufio.Scanner` 的固定 token 上限;单个 `data:` 行中包含很长的写文件参数时,非流式收集、流式解析与 auto-continue 透传都应保留完整行,再交给 tool parser 处理

View File

@@ -21,7 +21,7 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
var b strings.Builder
b.Grow(len(text))
for i := 0; i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
if blocked {
b.WriteString(text[i:])
break

View File

@@ -147,13 +147,14 @@ func stripFencedCodeBlocks(text string) string {
inFence := false
fenceMarker := ""
inCDATA := false
cdataFenceMarker := ""
// Track builder length when a fence opens so we can preserve content
// collected before the unclosed fence.
beforeFenceLen := 0
for _, line := range lines {
if inCDATA || cdataStartsBeforeFence(line) {
b.WriteString(line)
inCDATA = updateCDATAState(inCDATA, line)
inCDATA, cdataFenceMarker = updateCDATAStateForStrip(inCDATA, cdataFenceMarker, line)
continue
}
trimmed := strings.TrimLeft(line, " \t")
@@ -210,28 +211,63 @@ func firstFenceMarkerIndex(line string) int {
}
}
func updateCDATAState(inCDATA bool, line string) bool {
func updateCDATAStateForStrip(inCDATA bool, cdataFenceMarker, line string) (bool, string) {
lower := strings.ToLower(line)
pos := 0
state := inCDATA
for pos < len(lower) {
if state {
end := strings.Index(lower[pos:], "]]>")
if end < 0 {
return true
}
pos += end + len("]]>")
state = false
continue
}
fenceMarker := cdataFenceMarker
if !state {
start := strings.Index(lower[pos:], "<![cdata[")
if start < 0 {
return false
return false, ""
}
pos += start + len("<![cdata[")
state = true
}
return state
if !state {
return false, ""
}
trimmed := strings.TrimLeft(line, " \t")
if fenceMarker == "" {
if marker, ok := parseFenceOpen(trimmed); ok {
fenceMarker = marker
}
} else if isFenceClose(trimmed, fenceMarker) {
fenceMarker = ""
}
for pos < len(lower) {
end := strings.Index(lower[pos:], "]]>")
if end < 0 {
return true, fenceMarker
}
endPos := pos + end
pos = endPos + len("]]>")
if fenceMarker != "" {
continue
}
if cdataEndLooksStructural(lower, pos) || strings.TrimSpace(lower[pos:]) == "" {
state = false
for pos < len(lower) {
start := strings.Index(lower[pos:], "<![cdata[")
if start < 0 {
return false, ""
}
pos += start + len("<![cdata[")
state = true
trimmedTail := strings.TrimLeft(line[pos:], " \t")
if marker, ok := parseFenceOpen(trimmedTail); ok {
fenceMarker = marker
} else {
fenceMarker = ""
}
break
}
continue
}
}
return state, fenceMarker
}
func parseFenceOpen(line string) (string, bool) {

View File

@@ -144,7 +144,7 @@ func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart i
lower := strings.ToLower(text)
target := "<" + strings.ToLower(tag)
for i := maxInt(from, 0); i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
if blocked {
return -1, -1, "", false
}
@@ -170,7 +170,7 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
closeTarget := "</" + strings.ToLower(tag)
depth := 1
for i := maxInt(from, 0); i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
if blocked {
return -1, -1, false
}
@@ -206,14 +206,14 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
return -1, -1, false
}
func skipXMLIgnoredSection(lower string, i int) (next int, advanced bool, blocked bool) {
func skipXMLIgnoredSection(text, lower string, i int) (next int, advanced bool, blocked bool) {
switch {
case strings.HasPrefix(lower[i:], "<![cdata["):
end := strings.Index(lower[i+len("<![cdata["):], "]]>")
end := findToolCDATAEnd(text, lower, i+len("<![cdata["))
if end < 0 {
return 0, false, true
}
return i + len("<![cdata[") + end + len("]]>"), true, false
return end + len("]]>"), true, false
case strings.HasPrefix(lower[i:], "<!--"):
end := strings.Index(lower[i+len("<!--"):], "-->")
if end < 0 {
@@ -225,6 +225,69 @@ func skipXMLIgnoredSection(lower string, i int) (next int, advanced bool, blocke
}
}
func findToolCDATAEnd(text, lower string, from int) int {
if from < 0 || from > len(text) {
return -1
}
const closeMarker = "]]>"
firstNonFenceEnd := -1
for searchFrom := from; searchFrom < len(text); {
rel := strings.Index(lower[searchFrom:], closeMarker)
if rel < 0 {
break
}
end := searchFrom + rel
searchFrom = end + len(closeMarker)
if cdataOffsetIsInsideMarkdownFence(text[from:end]) {
continue
}
if firstNonFenceEnd < 0 {
firstNonFenceEnd = end
}
if cdataEndLooksStructural(lower, searchFrom) {
return end
}
}
return firstNonFenceEnd
}
func cdataEndLooksStructural(lower string, after int) bool {
for after < len(lower) {
switch lower[after] {
case ' ', '\t', '\r', '\n':
after++
continue
default:
}
break
}
return strings.HasPrefix(lower[after:], "</")
}
func cdataOffsetIsInsideMarkdownFence(fragment string) bool {
if fragment == "" {
return false
}
lines := strings.SplitAfter(fragment, "\n")
inFence := false
fenceMarker := ""
for _, line := range lines {
trimmed := strings.TrimLeft(line, " \t")
if !inFence {
if marker, ok := parseFenceOpen(trimmed); ok {
inFence = true
fenceMarker = marker
}
continue
}
if isFenceClose(trimmed, fenceMarker) {
inFence = false
fenceMarker = ""
}
}
return inFence
}
func findXMLTagEnd(text string, from int) int {
quote := byte(0)
for i := maxInt(from, 0); i < len(text); i++ {

View File

@@ -30,7 +30,7 @@ type ToolMarkupTag struct {
func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
lower := strings.ToLower(text)
for i := 0; i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
if blocked {
return hasDSML, hasCanonical
}
@@ -58,7 +58,7 @@ func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical
func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
lower := strings.ToLower(text)
for i := 0; i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
if blocked {
return hasDSML, hasCanonical
}
@@ -90,7 +90,7 @@ func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanon
func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, bool) {
lower := strings.ToLower(text)
for i := maxInt(start, 0); i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
if blocked {
return ToolMarkupTag{}, false
}

View File

@@ -138,6 +138,39 @@ func TestParseToolCallsSupportsDSMLShellWithCanonicalExampleInCDATA(t *testing.T
}
}
func TestParseToolCallsKeepsHereDocCDATAWithFencedDSMLAndLiteralCDATAEnd(t *testing.T) {
command := strings.Join([]string{
"cat > docs/project-value.md << 'ENDOFFILE'",
"# DS2API project value",
"",
"```xml",
`<|DSML|tool_calls>`,
` <|DSML|invoke name="Bash">`,
` <|DSML|parameter name="command"><![CDATA[grep -E "error|fail" < input.log 2>&1]]></|DSML|parameter>`,
` </|DSML|invoke>`,
`</|DSML|tool_calls>`,
"```",
"",
"Only the literal `]]>` needs special handling.",
"",
"ENDOFFILE",
`echo "Done. Lines: $(wc -l < docs/project-value.md)"`,
}, "\n")
text := `<|DSML|tool_calls><|DSML|invoke name="Bash"><|DSML|parameter name="command"><![CDATA[` + command + `]]></|DSML|parameter><|DSML|parameter name="description"><![CDATA[Write project value doc]]></|DSML|parameter></|DSML|invoke></|DSML|tool_calls>`
calls := ParseToolCalls(text, []string{"Bash"})
if len(calls) != 1 {
t.Fatalf("expected one DSML call with extreme heredoc CDATA, got %#v", calls)
}
got, _ := calls[0].Input["command"].(string)
if got != command {
t.Fatalf("expected full heredoc command to survive, got:\n%q\nwant:\n%q", got, command)
}
if calls[0].Input["description"] != "Write project value doc" {
t.Fatalf("expected sibling parameter after command, got %#v", calls[0].Input)
}
}
func TestParseToolCallsPreservesSimpleCDATAInlineMarkupAsText(t *testing.T) {
text := `<tool_calls><invoke name="Write"><parameter name="description"><![CDATA[<b>urgent</b>]]></parameter></invoke></tool_calls>`
calls := ParseToolCalls(text, []string{"Write"})

View File

@@ -265,6 +265,72 @@ func TestProcessToolSieveKeepsCDATAEmbeddedToolClosingBuffered(t *testing.T) {
}
}
func TestProcessToolSieveKeepsExtremeHereDocCDATAUntilOuterClose(t *testing.T) {
var state State
command := strings.Join([]string{
"cat > docs/project-value.md << 'ENDOFFILE'",
"# DS2API project value",
"",
"```xml",
`<|DSML|tool_calls>`,
` <|DSML|invoke name="Bash">`,
` <|DSML|parameter name="command"><![CDATA[grep -E "error|fail" < input.log 2>&1]]></|DSML|parameter>`,
` </|DSML|invoke>`,
`</|DSML|tool_calls>`,
"```",
"",
"Only the literal `]]>` needs special handling.",
"",
"ENDOFFILE",
`echo "Done. Lines: $(wc -l < docs/project-value.md)"`,
}, "\n")
innerClose := strings.Index(command, `</|DSML|tool_calls>`) + len(`</|DSML|tool_calls>`)
chunks := []string{
`<|DSML|tool_calls>` + "\n",
`<|DSML|invoke name="Bash">` + "\n",
`<|DSML|parameter name="command"><![CDATA[` + command[:innerClose],
command[innerClose:],
`]]></|DSML|parameter>` + "\n",
`<|DSML|parameter name="description"><![CDATA[Write project value doc]]></|DSML|parameter>` + "\n",
`</|DSML|invoke>` + "\n",
`</|DSML|tool_calls>`,
}
var events []Event
for i, c := range chunks {
next := ProcessChunk(&state, c, []string{"Bash"})
if i <= 2 {
for _, evt := range next {
if evt.Content != "" || len(evt.ToolCalls) > 0 {
t.Fatalf("expected no events before outer close, chunk=%d events=%#v", i, next)
}
}
}
events = append(events, next...)
}
events = append(events, Flush(&state, []string{"Bash"})...)
var textContent strings.Builder
var gotCommand string
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
if len(evt.ToolCalls) > 0 {
toolCalls += len(evt.ToolCalls)
gotCommand, _ = evt.ToolCalls[0].Input["command"].(string)
}
}
if toolCalls != 1 {
t.Fatalf("expected one parsed tool call, got %d events=%#v", toolCalls, events)
}
if textContent.Len() != 0 {
t.Fatalf("expected no leaked text, got %q", textContent.String())
}
if gotCommand != command {
t.Fatalf("expected full heredoc command to survive, got len=%d want=%d", len(gotCommand), len(command))
}
}
func TestProcessToolSieveFallsBackWhenCDATANeverCloses(t *testing.T) {
var state State
chunks := []string{