fix: ensure CDATA parsing correctly tracks line offsets to preserve compact tool call content

This commit is contained in:
CJACK
2026-05-03 06:49:22 +08:00
parent 072ec57acd
commit 51d3578465
3 changed files with 70 additions and 1 deletions

View File

@@ -216,6 +216,7 @@ func updateCDATAStateForStrip(inCDATA bool, cdataFenceMarker, line string) (bool
pos := 0
state := inCDATA
fenceMarker := cdataFenceMarker
lineForFence := line
if !state {
start := strings.Index(lower[pos:], "<![cdata[")
if start < 0 {
@@ -223,12 +224,13 @@ func updateCDATAStateForStrip(inCDATA bool, cdataFenceMarker, line string) (bool
}
pos += start + len("<![cdata[")
state = true
lineForFence = line[pos:]
}
if !state {
return false, ""
}
trimmed := strings.TrimLeft(line, " \t")
trimmed := strings.TrimLeft(lineForFence, " \t")
if fenceMarker == "" {
if marker, ok := parseFenceOpen(trimmed); ok {
fenceMarker = marker

View File

@@ -171,6 +171,28 @@ func TestParseToolCallsKeepsHereDocCDATAWithFencedDSMLAndLiteralCDATAEnd(t *test
}
}
func TestParseToolCallsKeepsCompactCDATAWithImmediateFencedDSML(t *testing.T) {
content := strings.Join([]string{
"```xml",
`<|DSML|tool_calls>`,
` <|DSML|invoke name="Bash">`,
` <|DSML|parameter name="command"><![CDATA[echo compact]]></|DSML|parameter>`,
` </|DSML|invoke>`,
`</|DSML|tool_calls>`,
"```",
"tail",
}, "\n")
text := `<tool_calls><invoke name="Write"><parameter name="content"><![CDATA[` + content + `]]></parameter></invoke></tool_calls>`
calls := ParseToolCalls(text, []string{"Write"})
if len(calls) != 1 {
t.Fatalf("expected one compact CDATA call, got %#v", calls)
}
if calls[0].Input["content"] != content {
t.Fatalf("expected compact CDATA content to survive, got %#v", calls[0].Input["content"])
}
}
func TestParseToolCallsPreservesSimpleCDATAInlineMarkupAsText(t *testing.T) {
text := `<tool_calls><invoke name="Write"><parameter name="description"><![CDATA[<b>urgent</b>]]></parameter></invoke></tool_calls>`
calls := ParseToolCalls(text, []string{"Write"})

View File

@@ -331,6 +331,51 @@ func TestProcessToolSieveKeepsExtremeHereDocCDATAUntilOuterClose(t *testing.T) {
}
}
func TestProcessToolSieveKeepsCompactCDATAWithImmediateFencedDSML(t *testing.T) {
var state State
content := strings.Join([]string{
"```xml",
`<|DSML|tool_calls>`,
` <|DSML|invoke name="Bash">`,
` <|DSML|parameter name="command"><![CDATA[echo compact]]></|DSML|parameter>`,
` </|DSML|invoke>`,
`</|DSML|tool_calls>`,
"```",
"tail",
}, "\n")
chunks := []string{
`<tool_calls><invoke name="Write"><parameter name="content"><![CDATA[` + content[:len("```xml\n")],
content[len("```xml\n"):],
`]]></parameter></invoke></tool_calls>`,
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"Write"})...)
}
events = append(events, Flush(&state, []string{"Write"})...)
var textContent strings.Builder
var gotContent string
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
if len(evt.ToolCalls) > 0 {
toolCalls += len(evt.ToolCalls)
gotContent, _ = evt.ToolCalls[0].Input["content"].(string)
}
}
if toolCalls != 1 {
t.Fatalf("expected one compact CDATA tool call, got %d events=%#v", toolCalls, events)
}
if textContent.Len() != 0 {
t.Fatalf("expected no leaked text, got %q", textContent.String())
}
if gotContent != content {
t.Fatalf("expected compact CDATA content to survive, got len=%d want=%d", len(gotContent), len(content))
}
}
func TestProcessToolSieveFallsBackWhenCDATANeverCloses(t *testing.T) {
var state State
chunks := []string{