This commit is contained in:
CJACK
2026-04-26 09:17:40 +08:00
parent 40b8182984
commit 0bfddf7943
10 changed files with 193 additions and 8 deletions

View File

@@ -10,7 +10,7 @@ import (
//nolint:unused // kept as explicit tag inventory for future XML sieve refinements.
var xmlToolCallClosingTags = []string{"</tool_calls>"}
var xmlToolCallOpeningTags = []string{"<tool_calls"}
var xmlToolCallOpeningTags = []string{"<tool_calls", "<invoke"}
// xmlToolCallTagPairs maps each opening tag to its expected closing tag.
// Order matters: longer/wrapper tags must be checked first.
@@ -24,7 +24,7 @@ var xmlToolCallTagPairs = []struct{ open, close string }{
var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)(<tool_calls\b[^>]*>\s*(?:.*?)\s*</tool_calls>)`)
// xmlToolTagsToDetect is the set of XML tag prefixes used by findToolSegmentStart.
var xmlToolTagsToDetect = []string{"<tool_calls>", "<tool_calls\n", "<tool_calls "}
var xmlToolTagsToDetect = []string{"<tool_calls>", "<tool_calls\n", "<tool_calls ", "<invoke ", "<invoke\n", "<invoke\t", "<invoke\r"}
// consumeXMLToolCapture tries to extract complete XML tool call blocks from captured text.
func consumeXMLToolCapture(captured string, toolNames []string) (prefix string, calls []toolcall.ParsedToolCall, suffix string, ready bool) {
@@ -55,6 +55,22 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string,
// If this block failed to become a tool call, pass it through as text.
return prefixPart + xmlBlock, nil, suffixPart, true
}
if !strings.Contains(lower, "<tool_calls") {
invokeIdx := strings.Index(lower, "<invoke")
closeIdx := strings.LastIndex(lower, "</tool_calls>")
if invokeIdx >= 0 && closeIdx > invokeIdx {
closeEnd := closeIdx + len("</tool_calls>")
xmlBlock := "<tool_calls>" + captured[invokeIdx:closeIdx] + "</tool_calls>"
prefixPart := captured[:invokeIdx]
suffixPart := captured[closeEnd:]
parsed := toolcall.ParseToolCalls(xmlBlock, toolNames)
if len(parsed) > 0 {
prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
return prefixPart, parsed, suffixPart, true
}
return prefixPart + captured[invokeIdx:closeEnd], nil, suffixPart, true
}
}
return "", nil, "", false
}

View File

@@ -288,6 +288,7 @@ func TestFindToolSegmentStartDetectsXMLToolCalls(t *testing.T) {
want int
}{
{"tool_calls_tag", "some text <tool_calls>\n", 10},
{"invoke_tag_missing_wrapper", "some text <invoke name=\"read_file\">\n", 10},
{"bare_tool_call_text", "prefix <tool_call>\n", -1},
{"xml_inside_code_fence", "```xml\n<tool_calls><invoke name=\"read_file\"></invoke></tool_calls>\n```", -1},
{"no_xml", "just plain text", -1},
@@ -310,6 +311,7 @@ func TestFindPartialXMLToolTagStart(t *testing.T) {
want int
}{
{"partial_tool_calls", "Hello <tool_ca", 6},
{"partial_invoke", "Hello <inv", 6},
{"bare_tool_call_not_held", "Hello <tool_name", -1},
{"partial_lt_only", "Text <", 5},
{"complete_tag", "Text <tool_calls>done", -1},
@@ -505,3 +507,32 @@ func TestProcessToolSievePassesThroughBareToolCallAsText(t *testing.T) {
t.Fatalf("expected bare invoke to pass through unchanged, got %q", textContent.String())
}
}
func TestProcessToolSieveRepairsMissingOpeningWrapperWithoutLeakingInvokeText(t *testing.T) {
var state State
chunks := []string{
"<invoke name=\"read_file\">\n",
" <parameter name=\"path\">README.md</parameter>\n",
"</invoke>\n",
"</tool_calls>",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
}
events = append(events, Flush(&state, []string{"read_file"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 1 {
t.Fatalf("expected repaired missing-wrapper stream to emit one tool call, got %d events=%#v", toolCalls, events)
}
if strings.Contains(textContent.String(), "<invoke") || strings.Contains(textContent.String(), "</tool_calls>") {
t.Fatalf("expected repaired missing-wrapper stream not to leak xml text, got %q", textContent.String())
}
}