refactor: unify Go/Node XML tool markup scanning and expand DSML alias support

- Add shared ToolMarkupTag scanner (toolcalls_scan.go) replacing hardcoded alias tables
- Support DSML collapsed tag names (<DSMLtool_calls>, <DSMLinvoke>, <DSMLparameter>)
- Parse JSON literal values from parameter bodies (123→number, true→bool, null)
- Recover unclosed CDATA in final parse/flush via SanitizeLooseCDATA
- Align Go and Node implementations (scanToolMarkupTagAt, findMatchingToolMarkupClose)
- Reject bare <invoke> as unsupported syntax, only tool_calls wrapper triggers tool path
- Update API.md and toolcall-semantics.md documentation

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-04-27 17:53:59 +08:00
parent 70467054c3
commit 2d5d211a7a
21 changed files with 1132 additions and 777 deletions

View File

@@ -615,3 +615,68 @@ func TestSieve_DSMLSpaceLookalikeTagNameStaysText(t *testing.T) {
t.Fatalf("相似标签名应作为正文透传, got %q", text.String())
}
}
func TestSieve_DSMLCollapsedTagNamesWithPrefixText(t *testing.T) {
var state State
todos := `[x] 检查 toolcalls_format.go 格式化逻辑
[x] 检查 toolcalls_parse.go 解析逻辑
[x] 检查 toolcalls_xml.go 和 toolcalls_dsml.go
[x] 检查 toolcalls_markup.go 和 toolcalls_json_repair.go
[x] 检查 prompt/tool_calls.go 注入逻辑
[x] 检查 toolstream 流式解析
[x] 查看测试文件确认预期行为
[x] 给出调查结论`
chunks := []string{
"[]\n",
"<DSMLtool_calls>\n",
"<DSMLinvoke name=\"update_todo_list\">\n",
"<DSMLparameter name=\"todos\"><![CDATA[" + todos + "]]></DSMLparameter>\n",
"</DSMLinvoke>\n",
"</DSMLtool_calls>",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"update_todo_list"})...)
}
events = append(events, Flush(&state, []string{"update_todo_list"})...)
var text strings.Builder
var gotTodos string
callCount := 0
for _, e := range events {
text.WriteString(e.Content)
for _, call := range e.ToolCalls {
callCount++
gotTodos, _ = call.Input["todos"].(string)
}
}
if callCount != 1 {
t.Fatalf("应解析出 1 个工具调用got %d, text=%q", callCount, text.String())
}
if gotTodos != todos {
t.Fatalf("todos 应完整保留got %q", gotTodos)
}
if text.String() != "[]\n" {
t.Fatalf("前置正文应完整保留且不泄漏工具块, got %q", text.String())
}
}
func TestSieve_DSMLCollapsedLookalikeTagNameStaysText(t *testing.T) {
var state State
input := "<DSMLtool_calls_extra><DSMLinvoke name=\"update_todo_list\"><DSMLparameter name=\"todos\">x</DSMLparameter></DSMLinvoke></DSMLtool_calls_extra>"
events := ProcessChunk(&state, input, []string{"update_todo_list"})
events = append(events, Flush(&state, []string{"update_todo_list"})...)
var text strings.Builder
callCount := 0
for _, e := range events {
text.WriteString(e.Content)
callCount += len(e.ToolCalls)
}
if callCount != 0 {
t.Fatalf("相似 collapsed 标签名不应触发工具调用got %d", callCount)
}
if text.String() != input {
t.Fatalf("相似 collapsed 标签名应作为正文透传, got %q", text.String())
}
}

View File

@@ -114,10 +114,30 @@ func Flush(state *State, toolNames []string) []Event {
} else {
content := state.capture.String()
if content != "" {
// If capture never resolved into a real tool call, release the
// buffered text instead of swallowing it.
state.noteText(content)
events = append(events, Event{Content: content})
recovered := toolcall.SanitizeLooseCDATA(content)
if recovered != content {
if prefix, calls, suffix, recoveredReady := consumeXMLToolCapture(recovered, toolNames); recoveredReady && len(calls) > 0 {
if prefix != "" {
state.noteText(prefix)
events = append(events, Event{Content: prefix})
}
events = append(events, Event{ToolCalls: calls})
if suffix != "" {
state.noteText(suffix)
events = append(events, Event{Content: suffix})
}
} else {
// If capture never resolved into a real tool call, release
// the buffered text instead of swallowing it.
state.noteText(content)
events = append(events, Event{Content: content})
}
} else {
// If capture never resolved into a real tool call, release the
// buffered text instead of swallowing it.
state.noteText(content)
events = append(events, Event{Content: content})
}
}
}
state.capture.Reset()

View File

@@ -7,7 +7,6 @@ import (
// consumeXMLToolCapture tries to extract complete XML tool call blocks from captured text.
func consumeXMLToolCapture(captured string, toolNames []string) (prefix string, calls []toolcall.ParsedToolCall, suffix string, ready bool) {
lower := strings.ToLower(captured)
anyOpenFound := false
type candidate struct {
start int
@@ -23,41 +22,40 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string,
var best *candidate
var rejected *rejectedBlock
// Scan every wrapper occurrence. Prose can mention a wrapper tag before the
// actual tool block, including the same variant as the real block.
for _, pair := range xmlToolCallTagPairs {
searchFrom := 0
for searchFrom < len(lower) {
openIdx := findXMLOpenOutsideCDATA(captured, pair.open, searchFrom)
if openIdx < 0 {
break
}
// Find the matching closing tag outside CDATA. Long write-file tool
// calls often contain XML examples in CDATA, including </tool_calls>.
closeIdx := findMatchingXMLToolWrapperClose(captured, pair.open, pair.close, openIdx)
if closeIdx < 0 {
anyOpenFound = true
searchFrom = openIdx + len(pair.open)
continue
}
closeEnd := closeIdx + len(pair.close)
xmlBlock := captured[openIdx:closeEnd]
prefixPart := captured[:openIdx]
suffixPart := captured[closeEnd:]
parsed := toolcall.ParseToolCalls(xmlBlock, toolNames)
if len(parsed) > 0 {
prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
if best == nil || openIdx < best.start {
best = &candidate{start: openIdx, prefix: prefixPart, calls: parsed, suffix: suffixPart}
}
break
}
if rejected == nil || openIdx < rejected.start {
rejected = &rejectedBlock{start: openIdx, prefix: prefixPart + xmlBlock, suffix: suffixPart}
}
searchFrom = openIdx + len(pair.open)
// Scan every recognized tool tag occurrence. Prose can mention a wrapper
// tag before the actual tool block, including the same variant as the real
// block. We only accept complete tool_calls wrappers that parse cleanly.
for searchFrom := 0; searchFrom < len(captured); {
tag, ok := toolcall.FindToolMarkupTagOutsideIgnored(captured, searchFrom)
if !ok {
break
}
if tag.Closing || tag.Name != "tool_calls" {
searchFrom = tag.End + 1
continue
}
closeTag, ok := toolcall.FindMatchingToolMarkupClose(captured, tag)
if !ok {
anyOpenFound = true
searchFrom = tag.End + 1
continue
}
xmlBlock := captured[tag.Start : closeTag.End+1]
prefixPart := captured[:tag.Start]
suffixPart := captured[closeTag.End+1:]
parsed := toolcall.ParseToolCalls(xmlBlock, toolNames)
if len(parsed) > 0 {
prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
if best == nil || tag.Start < best.start {
best = &candidate{start: tag.Start, prefix: prefixPart, calls: parsed, suffix: suffixPart}
}
break
}
if rejected == nil || tag.Start < rejected.start {
rejected = &rejectedBlock{start: tag.Start, prefix: prefixPart + xmlBlock, suffix: suffixPart}
}
searchFrom = tag.End + 1
}
if best != nil {
return best.prefix, best.calls, best.suffix, true
@@ -71,26 +69,19 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string,
// If this block failed to become a tool call, pass it through as text.
return rejected.prefix, nil, rejected.suffix, true
}
if !containsAnyToolCallWrapper(lower) {
invokeIdx, dsml := firstInvokeIndex(lower)
closeTag := "</tool_calls>"
openWrapper := "<tool_calls>"
if dsml {
closeTag = "</|dsml|tool_calls>"
openWrapper = "<|DSML|tool_calls>"
}
closeIdx := findXMLCloseOutsideCDATA(captured, closeTag, invokeIdx)
if invokeIdx >= 0 && closeIdx > invokeIdx {
closeEnd := closeIdx + len(closeTag)
xmlBlock := openWrapper + captured[invokeIdx:closeIdx] + closeTag
prefixPart := captured[:invokeIdx]
suffixPart := captured[closeEnd:]
parsed := toolcall.ParseToolCalls(xmlBlock, toolNames)
if len(parsed) > 0 {
prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
return prefixPart, parsed, suffixPart, true
if invokeTag, ok := findFirstToolMarkupTagByName(captured, 0, "invoke"); ok {
if wrapperOpen, ok := findFirstToolMarkupTagByName(captured, 0, "tool_calls"); !ok || wrapperOpen.Start > invokeTag.Start {
if closeTag, ok := findFirstToolMarkupTagByNameFrom(captured, invokeTag.Start+1, "tool_calls", true); ok && closeTag.Start > invokeTag.Start {
xmlBlock := "<tool_calls>" + captured[invokeTag.Start:closeTag.End+1]
prefixPart := captured[:invokeTag.Start]
suffixPart := captured[closeTag.End+1:]
parsed := toolcall.ParseToolCalls(xmlBlock, toolNames)
if len(parsed) > 0 {
prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
return prefixPart, parsed, suffixPart, true
}
return prefixPart + captured[invokeTag.Start:closeTag.End+1], nil, suffixPart, true
}
return prefixPart + captured[invokeIdx:closeEnd], nil, suffixPart, true
}
}
return "", nil, "", false
@@ -99,46 +90,35 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string,
// hasOpenXMLToolTag returns true if captured text contains an XML tool opening tag
// whose SPECIFIC closing tag has not appeared yet.
func hasOpenXMLToolTag(captured string) bool {
for _, pair := range xmlToolCallTagPairs {
openIdx := findXMLOpenOutsideCDATA(captured, pair.open, 0)
if openIdx >= 0 {
if findMatchingXMLToolWrapperClose(captured, pair.open, pair.close, openIdx) < 0 {
return true
}
for searchFrom := 0; searchFrom < len(captured); {
tag, ok := toolcall.FindToolMarkupTagOutsideIgnored(captured, searchFrom)
if !ok {
return false
}
if tag.Closing || tag.Name != "tool_calls" {
searchFrom = tag.End + 1
continue
}
if _, ok := toolcall.FindMatchingToolMarkupClose(captured, tag); !ok {
return true
}
searchFrom = tag.End + 1
}
return false
}
func shouldKeepBareInvokeCapture(captured string) bool {
lower := strings.ToLower(captured)
invokeIdx, dsml := firstInvokeIndex(lower)
if invokeIdx < 0 || containsAnyToolCallWrapper(lower) {
invokeTag, ok := findFirstToolMarkupTagByName(captured, 0, "invoke")
if !ok {
return false
}
invokeOpenLen := len("<invoke")
parameterOpen := "<parameter"
if dsml {
invokeOpenLen = len("<|dsml|invoke")
parameterOpen = "<|dsml|parameter"
if wrapperOpen, ok := findFirstToolMarkupTagByName(captured, 0, "tool_calls"); ok && wrapperOpen.Start <= invokeTag.Start {
return false
}
if dsml && strings.HasPrefix(lower[invokeIdx:], "<|dsml invoke") {
invokeOpenLen = len("<|dsml invoke")
parameterOpen = "<|dsml parameter"
}
if dsml && strings.HasPrefix(lower[invokeIdx:], "<dsml|invoke") {
invokeOpenLen = len("<dsml|invoke")
parameterOpen = "<dsml|parameter"
}
if dsml && strings.HasPrefix(lower[invokeIdx:], "<dsml invoke") {
invokeOpenLen = len("<dsml invoke")
parameterOpen = "<dsml parameter"
}
if findAnyXMLCloseOutsideCDATA(captured, possibleWrapperCloseTags(dsml), invokeIdx) > invokeIdx {
if closeTag, ok := findFirstToolMarkupTagByNameFrom(captured, invokeTag.Start+1, "tool_calls", true); ok && closeTag.Start > invokeTag.Start {
return true
}
startEnd := findXMLTagEnd(captured, invokeIdx+invokeOpenLen)
startEnd := invokeTag.End
if startEnd < 0 {
return true
}
@@ -148,84 +128,16 @@ func shouldKeepBareInvokeCapture(captured string) bool {
return true
}
invokeCloseIdx := findAnyXMLCloseOutsideCDATA(captured, possibleInvokeCloseTags(dsml), startEnd+1)
if invokeCloseIdx >= 0 {
afterClose := captured[invokeCloseIdx:]
for _, closeTag := range possibleInvokeCloseTags(dsml) {
if strings.HasPrefix(strings.ToLower(afterClose), closeTag) {
afterClose = afterClose[len(closeTag):]
break
}
}
return strings.TrimSpace(afterClose) == ""
if invokeCloseTag, ok := findFirstToolMarkupTagByNameFrom(captured, startEnd+1, "invoke", true); ok {
return strings.TrimSpace(captured[invokeCloseTag.End+1:]) == ""
}
trimmedLower := strings.ToLower(trimmedBody)
return strings.HasPrefix(trimmedLower, parameterOpen) ||
return strings.HasPrefix(trimmedLower, "<parameter") ||
strings.HasPrefix(trimmedLower, "{") ||
strings.HasPrefix(trimmedLower, "[")
}
func containsAnyToolCallWrapper(lower string) bool {
return strings.Contains(lower, "<tool_calls") ||
strings.Contains(lower, "<|dsml|tool_calls") ||
strings.Contains(lower, "<|dsml tool_calls") ||
strings.Contains(lower, "<dsml|tool_calls") ||
strings.Contains(lower, "<dsml tool_calls") ||
strings.Contains(lower, "<tool_calls") ||
strings.Contains(lower, "<|tool_calls")
}
func possibleWrapperCloseTags(dsml bool) []string {
if !dsml {
return []string{"</tool_calls>"}
}
return []string{"</|dsml|tool_calls>", "</|dsml tool_calls>", "</dsml|tool_calls>", "</dsml tool_calls>", "</tool_calls>", "</|tool_calls>"}
}
func possibleInvokeCloseTags(dsml bool) []string {
if !dsml {
return []string{"</invoke>"}
}
return []string{"</|dsml|invoke>", "</|dsml invoke>", "</dsml|invoke>", "</dsml invoke>", "</invoke>", "</|invoke>"}
}
func findAnyXMLCloseOutsideCDATA(s string, closeTags []string, start int) int {
best := -1
for _, closeTag := range closeTags {
idx := findXMLCloseOutsideCDATA(s, closeTag, start)
if idx >= 0 && (best < 0 || idx < best) {
best = idx
}
}
return best
}
func firstInvokeIndex(lower string) (int, bool) {
xmlIdx := strings.Index(lower, "<invoke")
// Check all DSML-like invoke prefixes.
dsmlPrefixes := []string{"<|dsml|invoke", "<|dsml invoke", "<dsml|invoke", "<dsml invoke", "<invoke", "<|invoke"}
dsmlIdx := -1
for _, prefix := range dsmlPrefixes {
idx := strings.Index(lower, prefix)
if idx >= 0 && (dsmlIdx < 0 || idx < dsmlIdx) {
dsmlIdx = idx
}
}
switch {
case xmlIdx < 0:
return dsmlIdx, dsmlIdx >= 0
case dsmlIdx < 0:
return xmlIdx, false
case dsmlIdx < xmlIdx:
return dsmlIdx, true
default:
return xmlIdx, false
}
}
// findPartialXMLToolTagStart checks if the string ends with a partial canonical
// XML wrapper tag (e.g., "<too") and returns the position of the '<'.
func findPartialXMLToolTagStart(s string) int {
lastLT := strings.LastIndex(s, "<")
if lastLT < 0 {
@@ -237,13 +149,18 @@ func findPartialXMLToolTagStart(s string) int {
return -1
}
lowerTail := strings.ToLower(tail)
// Check if the tail is a prefix of any known XML tool tag.
for _, tag := range xmlToolCallOpeningTags {
tagWithLT := tag
if !strings.HasPrefix(tagWithLT, "<") {
tagWithLT = "<" + tagWithLT
}
if strings.HasPrefix(tagWithLT, lowerTail) {
for _, tag := range []string{
"<tool_calls", "<invoke", "<parameter",
"<|tool_calls", "<|invoke", "<|parameter",
"<tool_calls", "<invoke", "<parameter",
"<|dsml|tool_calls", "<|dsml|invoke", "<|dsml|parameter",
"<dsmltool_calls", "<dsmlinvoke", "<dsmlparameter",
"<dsml tool_calls", "<dsml invoke", "<dsml parameter",
"<dsml|tool_calls", "<dsml|invoke", "<dsml|parameter",
"<|dsmltool_calls", "<|dsmlinvoke", "<|dsmlparameter",
"<|dsml tool_calls", "<|dsml invoke", "<|dsml parameter",
} {
if strings.HasPrefix(tag, lowerTail) {
return lastLT
}
}

View File

@@ -1,138 +1,28 @@
package toolstream
import "strings"
import "ds2api/internal/toolcall"
func findMatchingXMLToolWrapperClose(s, openTag, closeTag string, openIdx int) int {
if s == "" || openTag == "" || closeTag == "" || openIdx < 0 {
return -1
}
lower := strings.ToLower(s)
openTarget := strings.ToLower(openTag)
closeTarget := strings.ToLower(closeTag)
depth := 1
for i := openIdx + len(openTarget); i < len(s); {
switch {
case strings.HasPrefix(lower[i:], "<![cdata["):
end := strings.Index(lower[i+len("<![cdata["):], "]]>")
if end < 0 {
return -1
}
i += len("<![cdata[") + end + len("]]>")
case strings.HasPrefix(lower[i:], "<!--"):
end := strings.Index(lower[i+len("<!--"):], "-->")
if end < 0 {
return -1
}
i += len("<!--") + end + len("-->")
case strings.HasPrefix(lower[i:], closeTarget):
depth--
if depth == 0 {
return i
}
i += len(closeTarget)
case strings.HasPrefix(lower[i:], openTarget) && hasXMLToolTagBoundary(s, i+len(openTarget)):
depth++
i += len(openTarget)
default:
i++
}
}
return -1
func findFirstToolMarkupTagByName(s string, start int, name string) (toolcall.ToolMarkupTag, bool) {
return findFirstToolMarkupTagByNameFrom(s, start, name, false)
}
func findXMLOpenOutsideCDATA(s, openTag string, start int) int {
if s == "" || openTag == "" {
return -1
}
if start < 0 {
start = 0
}
lower := strings.ToLower(s)
target := strings.ToLower(openTag)
for i := start; i < len(s); {
switch {
case strings.HasPrefix(lower[i:], "<![cdata["):
end := strings.Index(lower[i+len("<![cdata["):], "]]>")
if end < 0 {
return -1
}
i += len("<![cdata[") + end + len("]]>")
case strings.HasPrefix(lower[i:], "<!--"):
end := strings.Index(lower[i+len("<!--"):], "-->")
if end < 0 {
return -1
}
i += len("<!--") + end + len("-->")
case strings.HasPrefix(lower[i:], target) && hasXMLToolTagBoundary(s, i+len(target)):
return i
default:
i++
func findFirstToolMarkupTagByNameFrom(s string, start int, name string, closing bool) (toolcall.ToolMarkupTag, bool) {
for pos := maxInt(start, 0); pos < len(s); {
tag, ok := toolcall.FindToolMarkupTagOutsideIgnored(s, pos)
if !ok {
return toolcall.ToolMarkupTag{}, false
}
if tag.Name == name && tag.Closing == closing {
return tag, true
}
pos = tag.End + 1
}
return -1
return toolcall.ToolMarkupTag{}, false
}
func findXMLCloseOutsideCDATA(s, closeTag string, start int) int {
if s == "" || closeTag == "" {
return -1
func maxInt(a, b int) int {
if a > b {
return a
}
if start < 0 {
start = 0
}
lower := strings.ToLower(s)
target := strings.ToLower(closeTag)
for i := start; i < len(s); {
switch {
case strings.HasPrefix(lower[i:], "<![cdata["):
end := strings.Index(lower[i+len("<![cdata["):], "]]>")
if end < 0 {
return -1
}
i += len("<![cdata[") + end + len("]]>")
case strings.HasPrefix(lower[i:], "<!--"):
end := strings.Index(lower[i+len("<!--"):], "-->")
if end < 0 {
return -1
}
i += len("<!--") + end + len("-->")
case strings.HasPrefix(lower[i:], target):
return i
default:
i++
}
}
return -1
}
func hasXMLToolTagBoundary(text string, idx int) bool {
if idx >= len(text) {
return true
}
switch text[idx] {
case ' ', '\t', '\n', '\r', '>', '/':
return true
default:
return false
}
}
func findXMLTagEnd(s string, start int) int {
quote := byte(0)
for i := start; i < len(s); i++ {
ch := s[i]
if quote != 0 {
if ch == quote {
quote = 0
}
continue
}
if ch == '"' || ch == '\'' {
quote = ch
continue
}
if ch == '>' {
return i
}
}
return -1
return b
}

View File

@@ -5,28 +5,7 @@ import "regexp"
// --- XML tool call support for the streaming sieve ---
//nolint:unused // kept as explicit tag inventory for future XML sieve refinements.
var xmlToolCallClosingTags = []string{"</tool_calls>", "</|dsml|tool_calls>", "</|dsml tool_calls>", "</dsml|tool_calls>", "</dsml tool_calls>", "</tool_calls>", "</|tool_calls>"}
var xmlToolCallOpeningTags = []string{
"<tool_calls", "<invoke",
"<|dsml|tool_calls", "<|dsml|invoke",
"<|dsml tool_calls", "<|dsml invoke",
"<dsml|tool_calls", "<dsml|invoke",
"<dsml tool_calls", "<dsml invoke",
"<tool_calls", "<invoke",
"<|tool_calls", "<|invoke",
}
// xmlToolCallTagPairs maps each opening tag to its expected closing tag.
// Order matters: longer/wrapper tags must be checked first.
var xmlToolCallTagPairs = []struct{ open, close string }{
{"<|dsml|tool_calls", "</|dsml|tool_calls>"},
{"<|dsml tool_calls", "</|dsml tool_calls>"},
{"<dsml|tool_calls", "</dsml|tool_calls>"},
{"<dsml tool_calls", "</dsml tool_calls>"},
{"<tool_calls", "</tool_calls>"},
{"<|tool_calls", "</|tool_calls>"},
{"<tool_calls", "</tool_calls>"},
}
var xmlToolCallClosingTags = []string{"</tool_calls>", "</|dsml|tool_calls>", "</|dsmltool_calls>", "</|dsml tool_calls>", "</dsml|tool_calls>", "</dsmltool_calls>", "</dsml tool_calls>", "</tool_calls>", "</|tool_calls>"}
// xmlToolCallBlockPattern matches a complete canonical XML tool call block.
//
@@ -37,10 +16,14 @@ var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)((?:<tool_calls\b|<\|dsml
var xmlToolTagsToDetect = []string{
"<|dsml|tool_calls>", "<|dsml|tool_calls\n", "<|dsml|tool_calls ",
"<|dsml|invoke ", "<|dsml|invoke\n", "<|dsml|invoke\t", "<|dsml|invoke\r",
"<|dsmltool_calls>", "<|dsmltool_calls\n", "<|dsmltool_calls ",
"<|dsmlinvoke ", "<|dsmlinvoke\n", "<|dsmlinvoke\t", "<|dsmlinvoke\r",
"<|dsml tool_calls>", "<|dsml tool_calls\n", "<|dsml tool_calls ",
"<|dsml invoke ", "<|dsml invoke\n", "<|dsml invoke\t", "<|dsml invoke\r",
"<dsml|tool_calls>", "<dsml|tool_calls\n", "<dsml|tool_calls ",
"<dsml|invoke ", "<dsml|invoke\n", "<dsml|invoke\t", "<dsml|invoke\r",
"<dsmltool_calls>", "<dsmltool_calls\n", "<dsmltool_calls ",
"<dsmlinvoke ", "<dsmlinvoke\n", "<dsmlinvoke\t", "<dsmlinvoke\r",
"<dsml tool_calls>", "<dsml tool_calls\n", "<dsml tool_calls ",
"<dsml invoke ", "<dsml invoke\n", "<dsml invoke\t", "<dsml invoke\r",
"<tool_calls>", "<tool_calls\n", "<tool_calls ",

View File

@@ -174,6 +174,41 @@ func TestProcessToolSieveKeepsCDATAEmbeddedToolClosingBuffered(t *testing.T) {
}
}
func TestProcessToolSieveFallsBackWhenCDATANeverCloses(t *testing.T) {
var state State
chunks := []string{
"<tool_calls>\n <invoke name=\"Write\">\n <parameter name=\"content\"><![CDATA[",
"hello world",
"</parameter>\n </invoke>\n</tool_calls>",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"Write"})...)
}
events = append(events, Flush(&state, []string{"Write"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
if evt.Content != "" {
textContent.WriteString(evt.Content)
}
toolCalls += len(evt.ToolCalls)
if len(evt.ToolCalls) > 0 {
if got, _ := evt.ToolCalls[0].Input["content"].(string); got != "hello world" {
t.Fatalf("expected recovered CDATA payload, got %q", got)
}
}
}
if toolCalls != 1 {
t.Fatalf("expected unclosed CDATA payload to still parse, got %d tool calls events=%#v", toolCalls, events)
}
if textContent.Len() != 0 {
t.Fatalf("expected no leaked text, got %q", textContent.String())
}
}
func TestProcessToolSieveXMLWithLeadingText(t *testing.T) {
var state State
// Model outputs some prose then an XML tool call.