From ace440481a61643579f8f87b6919f213ca52603c Mon Sep 17 00:00:00 2001 From: waiwai <511158080@qq.com> Date: Fri, 8 May 2026 13:02:21 +0800 Subject: [PATCH] refactor(toolcall): remove lower param from skipXMLIgnoredSection The lower parameter was a footgun: callers had to keep it in sync with the loop bound over text. Instead, skipXMLIgnoredSection now accepts only text and constructs strings.ToLower(tail) internally for its prefix checks. This eliminates the entire class of len(text) vs len(lower) boundary bugs along with the min() workaround. Also changes: - findToolCDATAEnd: drop lower param, use text directly for closeMarker search (]]> is ASCII, ToLower is a no-op for it) - cdataEndLooksStructural: drop lower param, use raw text byte comparison - All external callers: loop bound reverts to plain len(text) The inner tag-matching functions (findXMLStartTagOutsideCDATA, findMatchingXMLEndTagOutsideCDATA) retain their own local lower for HasPrefix comparisons against the target tag name, keeping concerns properly separated. Fixes #435. --- internal/toolcall/toolcalls_dsml.go | 3 +- internal/toolcall/toolcalls_parse_markup.go | 45 +++++---- internal/toolcall/toolcalls_scan.go | 11 +-- internal/toolcall/toolcalls_test.go | 104 ++++++++++++++++++++ 4 files changed, 134 insertions(+), 29 deletions(-) diff --git a/internal/toolcall/toolcalls_dsml.go b/internal/toolcall/toolcalls_dsml.go index 19477e7..8bd50aa 100644 --- a/internal/toolcall/toolcalls_dsml.go +++ b/internal/toolcall/toolcalls_dsml.go @@ -17,11 +17,10 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string { if text == "" { return "" } - lower := strings.ToLower(text) var b strings.Builder b.Grow(len(text)) for i := 0; i < len(text); { - next, advanced, blocked := skipXMLIgnoredSection(text, lower, i) + next, advanced, blocked := skipXMLIgnoredSection(text, i) if blocked { b.WriteString(text[i:]) break diff --git a/internal/toolcall/toolcalls_parse_markup.go b/internal/toolcall/toolcalls_parse_markup.go index 4036b66..13adeb9 100644 --- a/internal/toolcall/toolcalls_parse_markup.go +++ b/internal/toolcall/toolcalls_parse_markup.go @@ -144,7 +144,7 @@ func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart i lower := strings.ToLower(text) target := "<" + strings.ToLower(tag) for i := maxInt(from, 0); i < len(text); { - next, advanced, blocked := skipXMLIgnoredSection(text, lower, i) + next, advanced, blocked := skipXMLIgnoredSection(text, i) if blocked { return -1, -1, "", false } @@ -170,7 +170,7 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart, closeTarget := "= len(text) { + return i, false, false + } + tail := strings.ToLower(text[i:]) switch { - case strings.HasPrefix(lower[i:], ""), true, false - case strings.HasPrefix(lower[i:], "") + case strings.HasPrefix(tail, "") if end < 0 { return 0, false, true } @@ -225,14 +229,14 @@ func skipXMLIgnoredSection(text, lower string, i int) (next int, advanced bool, } } -func findToolCDATAEnd(text, lower string, from int) int { - if from < 0 || from > len(text) { +func findToolCDATAEnd(text string, from int) int { + if from < 0 || from >= len(text) { return -1 } const closeMarker = "]]>" firstNonFenceEnd := -1 for searchFrom := from; searchFrom < len(text); { - rel := strings.Index(lower[searchFrom:], closeMarker) + rel := strings.Index(text[searchFrom:], closeMarker) if rel < 0 { break } @@ -241,27 +245,28 @@ func findToolCDATAEnd(text, lower string, from int) int { if cdataOffsetIsInsideMarkdownFence(text[from:end]) { continue } + if cdataEndLooksStructural(text, searchFrom) { + return end + } if firstNonFenceEnd < 0 { firstNonFenceEnd = end } - if cdataEndLooksStructural(lower, searchFrom) { - return end - } } return firstNonFenceEnd } -func cdataEndLooksStructural(lower string, after int) bool { - for after < len(lower) { - switch lower[after] { - case ' ', '\t', '\r', '\n': +func cdataEndLooksStructural(text string, after int) bool { + for after < len(text) { + switch { + case text[after] == ' ' || text[after] == '\t' || text[after] == '\r' || text[after] == '\n': after++ - continue + case after+1 < len(text) && text[after] == '<' && text[after+1] == '/': + return true default: + return false } - break } - return strings.HasPrefix(lower[after:], "= len(text) { return ToolMarkupTag{}, false } depth := 1 diff --git a/internal/toolcall/toolcalls_test.go b/internal/toolcall/toolcalls_test.go index 1e03774..577de5c 100644 --- a/internal/toolcall/toolcalls_test.go +++ b/internal/toolcall/toolcalls_test.go @@ -892,3 +892,107 @@ func TestParseToolCallsSkipsProseMentionOfSameWrapperVariant(t *testing.T) { t.Fatalf("expected command to parse, got %q", got) } } + +func TestTurkishILowercaseMapping(t *testing.T) { + tests := []struct { + name string + text string + start int + wantOk bool + wantName string + }{ + {"turkish_i_at_name_start", "İ", 0, false, ""}, + {"turkish_i_at_name_end", "", 0, false, ""}, + {"turkish_i_before_tag", "İ", 0, false, ""}, + {"normal_tool_calls", "", 0, true, "tool_calls"}, + {"normal_invoke", "", 0, true, "invoke"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, ok := FindToolMarkupTagOutsideIgnored(tt.text, tt.start) + if ok != tt.wantOk { + t.Errorf("FindToolMarkupTagOutsideIgnored(%q, %d) ok = %v, want %v", tt.text, tt.start, ok, tt.wantOk) + return + } + if ok && got.Name != tt.wantName { + t.Errorf("FindToolMarkupTagOutsideIgnored(%q, %d) name = %q, want %q", tt.text, tt.start, got.Name, tt.wantName) + } + }) + } +} + +func TestSkipXMLIgnoredSectionBoundaryConditions(t *testing.T) { + text := "hello" + + tests := []struct { + name string + i int + wantNext int + wantAdv bool + wantBlk bool + }{ + {"valid_index", 2, 2, false, false}, + {"at_end_equal_len", 5, 5, false, false}, + {"beyond_end", 6, 6, false, false}, + {"negative", -1, -1, false, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + next, adv, blk := skipXMLIgnoredSection(text, tt.i) + if next != tt.wantNext || adv != tt.wantAdv || blk != tt.wantBlk { + t.Errorf("skipXMLIgnoredSection(%q, %d) = (%d, %v, %v), want (%d, %v, %v)", + text, tt.i, next, adv, blk, tt.wantNext, tt.wantAdv, tt.wantBlk) + } + }) + } +} + +func TestFindToolCDATAEndBoundaryConditions(t *testing.T) { + text := "" + + tests := []struct { + name string + from int + wantResult int + }{ + {"valid", 12, 14}, + {"at_end", 17, -1}, + {"beyond_end", 18, -1}, + {"negative", -1, -1}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := findToolCDATAEnd(text, tt.from) + if got != tt.wantResult { + t.Errorf("findToolCDATAEnd(%q, %d) = %d, want %d", + text, tt.from, got, tt.wantResult) + } + }) + } +} + +func TestFindMatchingToolMarkupCloseBoundaryConditions(t *testing.T) { + tests := []struct { + name string + text string + open ToolMarkupTag + wantOk bool + }{ + {"empty_text", "", ToolMarkupTag{Name: "tool_calls", End: 0}, false}, + {"open_end_beyond_text", "hello", ToolMarkupTag{Name: "tool_calls", End: 100}, false}, + {"open_end_equals_len", "hello", ToolMarkupTag{Name: "tool_calls", End: 5}, false}, + {"valid_simple", "", ToolMarkupTag{Name: "tool_calls", End: 11}, true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, ok := FindMatchingToolMarkupClose(tt.text, tt.open) + if ok != tt.wantOk { + t.Errorf("FindMatchingToolMarkupClose(%q, %+v) ok = %v, want %v", tt.text, tt.open, ok, tt.wantOk) + } + }) + } +}