mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-11 11:47:43 +08:00
fix: improve CDATA parsing resilience by ignoring structural markers inside markdown fences within tool calls
This commit is contained in:
@@ -168,7 +168,7 @@ OpenAI Chat / Responses 在标准化后、current input file 之前,会默认
|
||||
|
||||
工具调用正例现在优先示范官方 DSML 风格:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`。
|
||||
兼容层仍接受旧式纯 `<tool_calls>` wrapper,并会容错若干 DSML 标签变体,包括短横线形式 `<dsml-tool-calls>` / `<dsml-invoke>` / `<dsml-parameter>`;但提示词会优先要求模型输出官方 DSML 标签,并强调不能只输出 closing wrapper 而漏掉 opening tag。需要注意:这是“兼容 DSML 外壳,内部仍以 XML 解析语义为准”,不是原生 DSML 全链路实现;DSML 标签会在解析入口归一化回现有 XML 标签后继续走同一套 parser。
|
||||
数组参数使用 `<item>...</item>` 子节点表示;当某个参数体只包含 item 子节点时,Go / Node 解析器会把它还原成数组,避免 `questions` / `options` 这类 schema 中要求 array 的参数被误解析成 `{ "item": ... }` 对象。除此之外,解析器还会回收一些更松散的列表写法,例如 JSON array 字面量或逗号分隔的 JSON 项序列,只要它们足够明确;但 `<item>` 仍然是首选形态。若模型把完整结构化 XML fragment 误包进 CDATA,兼容层会在保护 `content` / `command` 等原文字段的前提下,尝试把非原文字段中的 CDATA XML fragment 还原成 object / array。不过,如果 CDATA 只是单个平面的 XML/HTML 标签,例如 `<b>urgent</b>` 这种行内标记,兼容层会保留原始字符串,不会强行升成 object / array;只有明显表示结构的 CDATA 片段,例如多兄弟节点、嵌套子节点或 `item` 列表,才会触发结构化恢复。
|
||||
数组参数使用 `<item>...</item>` 子节点表示;当某个参数体只包含 item 子节点时,Go / Node 解析器会把它还原成数组,避免 `questions` / `options` 这类 schema 中要求 array 的参数被误解析成 `{ "item": ... }` 对象。除此之外,解析器还会回收一些更松散的列表写法,例如 JSON array 字面量或逗号分隔的 JSON 项序列,只要它们足够明确;但 `<item>` 仍然是首选形态。若模型把完整结构化 XML fragment 误包进 CDATA,兼容层会在保护 `content` / `command` 等原文字段的前提下,尝试把非原文字段中的 CDATA XML fragment 还原成 object / array。不过,如果 CDATA 只是单个平面的 XML/HTML 标签,例如 `<b>urgent</b>` 这种行内标记,兼容层会保留原始字符串,不会强行升成 object / array;只有明显表示结构的 CDATA 片段,例如多兄弟节点、嵌套子节点或 `item` 列表,才会触发结构化恢复。对 `command` / `content` 等长文本参数,CDATA 内部的 Markdown fenced DSML / XML 示例会作为原文保护;示例里的 `]]></parameter>` 或 `</tool_calls>` 不会截断外层工具调用,解析器会继续等待围栏外真正的参数 / wrapper 结束标签。
|
||||
Go 侧读取 DeepSeek SSE 时不再依赖 `bufio.Scanner` 的固定 2MiB 单行上限;当写文件类工具把很长的 `content` 放在单个 `data:` 行里返回时,非流式收集、流式解析和 auto-continue 透传都会保留完整行,再进入同一套工具解析与序列化流程。
|
||||
在 assistant 最终回包阶段,如果某个 tool 参数在声明 schema 中明确是 `string`,兼容层会在把解析后的 `tool_calls` / `function_call` 重新序列化成 OpenAI / Responses / Claude 可见参数前,递归把该路径上的 number / bool / object / array 统一转成字符串;其中 object / array 会压成紧凑 JSON 字符串。这个保护只对 schema 明确声明为 string 的路径生效,不会改写本来就是 `number` / `boolean` / `object` / `array` 的参数。这样可以兼容 DeepSeek 输出了结构化片段、但上游客户端工具 schema 又严格要求字符串参数的场景(例如 `content`、`prompt`、`path`、`taskId` 等)。
|
||||
工具 schema 的权威来源始终是**当前请求实际携带的 schema**,而不是同名工具在其他 runtime(Claude Code / OpenCode / Codex 等)里的默认印象。兼容层现在会同时兼容 OpenAI 风格 `function.parameters`、直接工具对象上的 `parameters` / `input_schema`、以及 camelCase 的 `inputSchema` / `schema`,并在最终输出阶段按这份请求内 schema 决定是保留 array/object,还是仅对明确声明为 `string` 的路径做字符串化。该规则同样适用于 Claude 的流式收尾和 Vercel Node 流式 tool-call formatter,避免不同 runtime 因 schema shape 差异而出现同名工具参数类型漂移。
|
||||
|
||||
@@ -60,6 +60,7 @@
|
||||
- 不符合新格式的块不会执行,并继续按原样文本透传
|
||||
- fenced code block(反引号 `` ``` `` 和波浪线 `~~~`)中的 XML 示例始终按普通文本处理
|
||||
- 支持嵌套围栏(如 4 反引号嵌套 3 反引号)和 CDATA 内围栏保护
|
||||
- 对 `command` / `content` 等长文本参数,CDATA 内部如果包含 Markdown fenced DSML / XML 示例,即使示例里出现 `]]></parameter>` / `</tool_calls>` 这类看起来像外层结束标签的片段,也会继续按参数原文保留,直到真正位于围栏外的外层结束标签
|
||||
- 如果模型把 `<![CDATA[` 打开后却没有闭合,流式扫描阶段仍会保守地继续缓冲,不会误把 CDATA 里的示例 XML 当成真实工具调用;在最终 parse / flush 恢复阶段,会对这类 loose CDATA 做窄修复,尽量保住外层已完整包裹的真实工具调用
|
||||
- 当文本中 mention 了某种标签名(如 `<dsml|tool_calls>` 或 Markdown inline code 里的 `<|DSML|tool_calls>`)而后面紧跟真正工具调用时,sieve 会跳过不可解析的 mention 候选并继续匹配后续真实工具块,不会因 mention 导致工具调用丢失,也不会截断 mention 后的正文
|
||||
- Go 侧 SSE 读取不再使用 `bufio.Scanner` 的固定 token 上限;单个 `data:` 行中包含很长的写文件参数时,非流式收集、流式解析与 auto-continue 透传都应保留完整行,再交给 tool parser 处理
|
||||
|
||||
@@ -21,7 +21,7 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
|
||||
var b strings.Builder
|
||||
b.Grow(len(text))
|
||||
for i := 0; i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
if blocked {
|
||||
b.WriteString(text[i:])
|
||||
break
|
||||
|
||||
@@ -147,13 +147,14 @@ func stripFencedCodeBlocks(text string) string {
|
||||
inFence := false
|
||||
fenceMarker := ""
|
||||
inCDATA := false
|
||||
cdataFenceMarker := ""
|
||||
// Track builder length when a fence opens so we can preserve content
|
||||
// collected before the unclosed fence.
|
||||
beforeFenceLen := 0
|
||||
for _, line := range lines {
|
||||
if inCDATA || cdataStartsBeforeFence(line) {
|
||||
b.WriteString(line)
|
||||
inCDATA = updateCDATAState(inCDATA, line)
|
||||
inCDATA, cdataFenceMarker = updateCDATAStateForStrip(inCDATA, cdataFenceMarker, line)
|
||||
continue
|
||||
}
|
||||
trimmed := strings.TrimLeft(line, " \t")
|
||||
@@ -210,28 +211,63 @@ func firstFenceMarkerIndex(line string) int {
|
||||
}
|
||||
}
|
||||
|
||||
func updateCDATAState(inCDATA bool, line string) bool {
|
||||
func updateCDATAStateForStrip(inCDATA bool, cdataFenceMarker, line string) (bool, string) {
|
||||
lower := strings.ToLower(line)
|
||||
pos := 0
|
||||
state := inCDATA
|
||||
for pos < len(lower) {
|
||||
if state {
|
||||
end := strings.Index(lower[pos:], "]]>")
|
||||
if end < 0 {
|
||||
return true
|
||||
}
|
||||
pos += end + len("]]>")
|
||||
state = false
|
||||
continue
|
||||
}
|
||||
fenceMarker := cdataFenceMarker
|
||||
if !state {
|
||||
start := strings.Index(lower[pos:], "<![cdata[")
|
||||
if start < 0 {
|
||||
return false
|
||||
return false, ""
|
||||
}
|
||||
pos += start + len("<![cdata[")
|
||||
state = true
|
||||
}
|
||||
return state
|
||||
if !state {
|
||||
return false, ""
|
||||
}
|
||||
|
||||
trimmed := strings.TrimLeft(line, " \t")
|
||||
if fenceMarker == "" {
|
||||
if marker, ok := parseFenceOpen(trimmed); ok {
|
||||
fenceMarker = marker
|
||||
}
|
||||
} else if isFenceClose(trimmed, fenceMarker) {
|
||||
fenceMarker = ""
|
||||
}
|
||||
|
||||
for pos < len(lower) {
|
||||
end := strings.Index(lower[pos:], "]]>")
|
||||
if end < 0 {
|
||||
return true, fenceMarker
|
||||
}
|
||||
endPos := pos + end
|
||||
pos = endPos + len("]]>")
|
||||
if fenceMarker != "" {
|
||||
continue
|
||||
}
|
||||
if cdataEndLooksStructural(lower, pos) || strings.TrimSpace(lower[pos:]) == "" {
|
||||
state = false
|
||||
for pos < len(lower) {
|
||||
start := strings.Index(lower[pos:], "<![cdata[")
|
||||
if start < 0 {
|
||||
return false, ""
|
||||
}
|
||||
pos += start + len("<![cdata[")
|
||||
state = true
|
||||
trimmedTail := strings.TrimLeft(line[pos:], " \t")
|
||||
if marker, ok := parseFenceOpen(trimmedTail); ok {
|
||||
fenceMarker = marker
|
||||
} else {
|
||||
fenceMarker = ""
|
||||
}
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
return state, fenceMarker
|
||||
}
|
||||
|
||||
func parseFenceOpen(line string) (string, bool) {
|
||||
|
||||
@@ -144,7 +144,7 @@ func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart i
|
||||
lower := strings.ToLower(text)
|
||||
target := "<" + strings.ToLower(tag)
|
||||
for i := maxInt(from, 0); i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
if blocked {
|
||||
return -1, -1, "", false
|
||||
}
|
||||
@@ -170,7 +170,7 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
|
||||
closeTarget := "</" + strings.ToLower(tag)
|
||||
depth := 1
|
||||
for i := maxInt(from, 0); i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
if blocked {
|
||||
return -1, -1, false
|
||||
}
|
||||
@@ -206,14 +206,14 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
|
||||
return -1, -1, false
|
||||
}
|
||||
|
||||
func skipXMLIgnoredSection(lower string, i int) (next int, advanced bool, blocked bool) {
|
||||
func skipXMLIgnoredSection(text, lower string, i int) (next int, advanced bool, blocked bool) {
|
||||
switch {
|
||||
case strings.HasPrefix(lower[i:], "<![cdata["):
|
||||
end := strings.Index(lower[i+len("<![cdata["):], "]]>")
|
||||
end := findToolCDATAEnd(text, lower, i+len("<![cdata["))
|
||||
if end < 0 {
|
||||
return 0, false, true
|
||||
}
|
||||
return i + len("<![cdata[") + end + len("]]>"), true, false
|
||||
return end + len("]]>"), true, false
|
||||
case strings.HasPrefix(lower[i:], "<!--"):
|
||||
end := strings.Index(lower[i+len("<!--"):], "-->")
|
||||
if end < 0 {
|
||||
@@ -225,6 +225,69 @@ func skipXMLIgnoredSection(lower string, i int) (next int, advanced bool, blocke
|
||||
}
|
||||
}
|
||||
|
||||
func findToolCDATAEnd(text, lower string, from int) int {
|
||||
if from < 0 || from > len(text) {
|
||||
return -1
|
||||
}
|
||||
const closeMarker = "]]>"
|
||||
firstNonFenceEnd := -1
|
||||
for searchFrom := from; searchFrom < len(text); {
|
||||
rel := strings.Index(lower[searchFrom:], closeMarker)
|
||||
if rel < 0 {
|
||||
break
|
||||
}
|
||||
end := searchFrom + rel
|
||||
searchFrom = end + len(closeMarker)
|
||||
if cdataOffsetIsInsideMarkdownFence(text[from:end]) {
|
||||
continue
|
||||
}
|
||||
if firstNonFenceEnd < 0 {
|
||||
firstNonFenceEnd = end
|
||||
}
|
||||
if cdataEndLooksStructural(lower, searchFrom) {
|
||||
return end
|
||||
}
|
||||
}
|
||||
return firstNonFenceEnd
|
||||
}
|
||||
|
||||
func cdataEndLooksStructural(lower string, after int) bool {
|
||||
for after < len(lower) {
|
||||
switch lower[after] {
|
||||
case ' ', '\t', '\r', '\n':
|
||||
after++
|
||||
continue
|
||||
default:
|
||||
}
|
||||
break
|
||||
}
|
||||
return strings.HasPrefix(lower[after:], "</")
|
||||
}
|
||||
|
||||
func cdataOffsetIsInsideMarkdownFence(fragment string) bool {
|
||||
if fragment == "" {
|
||||
return false
|
||||
}
|
||||
lines := strings.SplitAfter(fragment, "\n")
|
||||
inFence := false
|
||||
fenceMarker := ""
|
||||
for _, line := range lines {
|
||||
trimmed := strings.TrimLeft(line, " \t")
|
||||
if !inFence {
|
||||
if marker, ok := parseFenceOpen(trimmed); ok {
|
||||
inFence = true
|
||||
fenceMarker = marker
|
||||
}
|
||||
continue
|
||||
}
|
||||
if isFenceClose(trimmed, fenceMarker) {
|
||||
inFence = false
|
||||
fenceMarker = ""
|
||||
}
|
||||
}
|
||||
return inFence
|
||||
}
|
||||
|
||||
func findXMLTagEnd(text string, from int) int {
|
||||
quote := byte(0)
|
||||
for i := maxInt(from, 0); i < len(text); i++ {
|
||||
|
||||
@@ -30,7 +30,7 @@ type ToolMarkupTag struct {
|
||||
func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
|
||||
lower := strings.ToLower(text)
|
||||
for i := 0; i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
if blocked {
|
||||
return hasDSML, hasCanonical
|
||||
}
|
||||
@@ -58,7 +58,7 @@ func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical
|
||||
func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
|
||||
lower := strings.ToLower(text)
|
||||
for i := 0; i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
if blocked {
|
||||
return hasDSML, hasCanonical
|
||||
}
|
||||
@@ -90,7 +90,7 @@ func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanon
|
||||
func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, bool) {
|
||||
lower := strings.ToLower(text)
|
||||
for i := maxInt(start, 0); i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
if blocked {
|
||||
return ToolMarkupTag{}, false
|
||||
}
|
||||
|
||||
@@ -138,6 +138,39 @@ func TestParseToolCallsSupportsDSMLShellWithCanonicalExampleInCDATA(t *testing.T
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsKeepsHereDocCDATAWithFencedDSMLAndLiteralCDATAEnd(t *testing.T) {
|
||||
command := strings.Join([]string{
|
||||
"cat > docs/project-value.md << 'ENDOFFILE'",
|
||||
"# DS2API project value",
|
||||
"",
|
||||
"```xml",
|
||||
`<|DSML|tool_calls>`,
|
||||
` <|DSML|invoke name="Bash">`,
|
||||
` <|DSML|parameter name="command"><![CDATA[grep -E "error|fail" < input.log 2>&1]]></|DSML|parameter>`,
|
||||
` </|DSML|invoke>`,
|
||||
`</|DSML|tool_calls>`,
|
||||
"```",
|
||||
"",
|
||||
"Only the literal `]]>` needs special handling.",
|
||||
"",
|
||||
"ENDOFFILE",
|
||||
`echo "Done. Lines: $(wc -l < docs/project-value.md)"`,
|
||||
}, "\n")
|
||||
text := `<|DSML|tool_calls><|DSML|invoke name="Bash"><|DSML|parameter name="command"><![CDATA[` + command + `]]></|DSML|parameter><|DSML|parameter name="description"><![CDATA[Write project value doc]]></|DSML|parameter></|DSML|invoke></|DSML|tool_calls>`
|
||||
|
||||
calls := ParseToolCalls(text, []string{"Bash"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected one DSML call with extreme heredoc CDATA, got %#v", calls)
|
||||
}
|
||||
got, _ := calls[0].Input["command"].(string)
|
||||
if got != command {
|
||||
t.Fatalf("expected full heredoc command to survive, got:\n%q\nwant:\n%q", got, command)
|
||||
}
|
||||
if calls[0].Input["description"] != "Write project value doc" {
|
||||
t.Fatalf("expected sibling parameter after command, got %#v", calls[0].Input)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsPreservesSimpleCDATAInlineMarkupAsText(t *testing.T) {
|
||||
text := `<tool_calls><invoke name="Write"><parameter name="description"><![CDATA[<b>urgent</b>]]></parameter></invoke></tool_calls>`
|
||||
calls := ParseToolCalls(text, []string{"Write"})
|
||||
|
||||
@@ -265,6 +265,72 @@ func TestProcessToolSieveKeepsCDATAEmbeddedToolClosingBuffered(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveKeepsExtremeHereDocCDATAUntilOuterClose(t *testing.T) {
|
||||
var state State
|
||||
command := strings.Join([]string{
|
||||
"cat > docs/project-value.md << 'ENDOFFILE'",
|
||||
"# DS2API project value",
|
||||
"",
|
||||
"```xml",
|
||||
`<|DSML|tool_calls>`,
|
||||
` <|DSML|invoke name="Bash">`,
|
||||
` <|DSML|parameter name="command"><![CDATA[grep -E "error|fail" < input.log 2>&1]]></|DSML|parameter>`,
|
||||
` </|DSML|invoke>`,
|
||||
`</|DSML|tool_calls>`,
|
||||
"```",
|
||||
"",
|
||||
"Only the literal `]]>` needs special handling.",
|
||||
"",
|
||||
"ENDOFFILE",
|
||||
`echo "Done. Lines: $(wc -l < docs/project-value.md)"`,
|
||||
}, "\n")
|
||||
innerClose := strings.Index(command, `</|DSML|tool_calls>`) + len(`</|DSML|tool_calls>`)
|
||||
chunks := []string{
|
||||
`<|DSML|tool_calls>` + "\n",
|
||||
`<|DSML|invoke name="Bash">` + "\n",
|
||||
`<|DSML|parameter name="command"><![CDATA[` + command[:innerClose],
|
||||
command[innerClose:],
|
||||
`]]></|DSML|parameter>` + "\n",
|
||||
`<|DSML|parameter name="description"><![CDATA[Write project value doc]]></|DSML|parameter>` + "\n",
|
||||
`</|DSML|invoke>` + "\n",
|
||||
`</|DSML|tool_calls>`,
|
||||
}
|
||||
|
||||
var events []Event
|
||||
for i, c := range chunks {
|
||||
next := ProcessChunk(&state, c, []string{"Bash"})
|
||||
if i <= 2 {
|
||||
for _, evt := range next {
|
||||
if evt.Content != "" || len(evt.ToolCalls) > 0 {
|
||||
t.Fatalf("expected no events before outer close, chunk=%d events=%#v", i, next)
|
||||
}
|
||||
}
|
||||
}
|
||||
events = append(events, next...)
|
||||
}
|
||||
events = append(events, Flush(&state, []string{"Bash"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
var gotCommand string
|
||||
toolCalls := 0
|
||||
for _, evt := range events {
|
||||
textContent.WriteString(evt.Content)
|
||||
if len(evt.ToolCalls) > 0 {
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
gotCommand, _ = evt.ToolCalls[0].Input["command"].(string)
|
||||
}
|
||||
}
|
||||
if toolCalls != 1 {
|
||||
t.Fatalf("expected one parsed tool call, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
if textContent.Len() != 0 {
|
||||
t.Fatalf("expected no leaked text, got %q", textContent.String())
|
||||
}
|
||||
if gotCommand != command {
|
||||
t.Fatalf("expected full heredoc command to survive, got len=%d want=%d", len(gotCommand), len(command))
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveFallsBackWhenCDATANeverCloses(t *testing.T) {
|
||||
var state State
|
||||
chunks := []string{
|
||||
|
||||
Reference in New Issue
Block a user