Compare commits

...

7 Commits

Author SHA1 Message Date
CJACK.
7ab5a0e66d Merge pull request #458 from CJackHwang/dev
Avoid lowercasing ignored XML tails in toolcall
2026-05-08 17:13:00 +08:00
CJACK.
410efbd70b Merge pull request #457 from NgoQuocViet2001/ai/skipxml-lower-hotpath
fix(toolcall): avoid lowercasing ignored XML tails
2026-05-08 17:05:28 +08:00
NgoQuocViet2001
7179b995bb fix(toolcall): avoid lowercasing ignored XML tails 2026-05-08 14:15:32 +07:00
CJACK.
fef3798e5e Merge pull request #453 from CJackHwang/dev
Fix character length calculation issue
2026-05-08 13:40:47 +08:00
CJACK.
00fe18b505 Update VERSION 2026-05-08 13:36:17 +08:00
CJACK.
9b746e32d8 Merge pull request #452 from waiwaic/fix/turkish-i-boundary-panic
fix(toolcall): use len(lower) not len(text) after ToLower to prevent out-of-bounds panic
2026-05-08 13:34:28 +08:00
waiwai
ace440481a refactor(toolcall): remove lower param from skipXMLIgnoredSection
The lower parameter was a footgun: callers had to keep it in sync with the
loop bound over text. Instead, skipXMLIgnoredSection now accepts only text
and constructs strings.ToLower(tail) internally for its prefix checks.

This eliminates the entire class of len(text) vs len(lower) boundary bugs
along with the min() workaround.

Also changes:
- findToolCDATAEnd: drop lower param, use text directly for closeMarker
  search (]]> is ASCII, ToLower is a no-op for it)
- cdataEndLooksStructural: drop lower param, use raw text byte comparison
- All external callers: loop bound reverts to plain len(text)

The inner tag-matching functions (findXMLStartTagOutsideCDATA,
findMatchingXMLEndTagOutsideCDATA) retain their own local lower for
HasPrefix comparisons against the target tag name, keeping concerns
properly separated.

Fixes #435.
2026-05-08 13:29:21 +08:00
5 changed files with 185 additions and 30 deletions

View File

@@ -1 +1 @@
4.4.4 4.4.5

View File

@@ -17,11 +17,10 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
if text == "" { if text == "" {
return "" return ""
} }
lower := strings.ToLower(text)
var b strings.Builder var b strings.Builder
b.Grow(len(text)) b.Grow(len(text))
for i := 0; i < len(text); { for i := 0; i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i) next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked { if blocked {
b.WriteString(text[i:]) b.WriteString(text[i:])
break break

View File

@@ -144,7 +144,7 @@ func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart i
lower := strings.ToLower(text) lower := strings.ToLower(text)
target := "<" + strings.ToLower(tag) target := "<" + strings.ToLower(tag)
for i := maxInt(from, 0); i < len(text); { for i := maxInt(from, 0); i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i) next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked { if blocked {
return -1, -1, "", false return -1, -1, "", false
} }
@@ -170,7 +170,7 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
closeTarget := "</" + strings.ToLower(tag) closeTarget := "</" + strings.ToLower(tag)
depth := 1 depth := 1
for i := maxInt(from, 0); i < len(text); { for i := maxInt(from, 0); i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i) next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked { if blocked {
return -1, -1, false return -1, -1, false
} }
@@ -206,16 +206,19 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
return -1, -1, false return -1, -1, false
} }
func skipXMLIgnoredSection(text, lower string, i int) (next int, advanced bool, blocked bool) { func skipXMLIgnoredSection(text string, i int) (next int, advanced bool, blocked bool) {
if i < 0 || i >= len(text) {
return i, false, false
}
switch { switch {
case strings.HasPrefix(lower[i:], "<![cdata["): case hasASCIIPrefixFoldAt(text, i, "<![cdata["):
end := findToolCDATAEnd(text, lower, i+len("<![cdata[")) end := findToolCDATAEnd(text, i+len("<![cdata["))
if end < 0 { if end < 0 {
return 0, false, true return 0, false, true
} }
return end + len("]]>"), true, false return end + len("]]>"), true, false
case strings.HasPrefix(lower[i:], "<!--"): case strings.HasPrefix(text[i:], "<!--"):
end := strings.Index(lower[i+len("<!--"):], "-->") end := strings.Index(text[i+len("<!--"):], "-->")
if end < 0 { if end < 0 {
return 0, false, true return 0, false, true
} }
@@ -225,14 +228,33 @@ func skipXMLIgnoredSection(text, lower string, i int) (next int, advanced bool,
} }
} }
func findToolCDATAEnd(text, lower string, from int) int { func hasASCIIPrefixFoldAt(text string, start int, prefix string) bool {
if from < 0 || from > len(text) { if start < 0 || len(text)-start < len(prefix) {
return false
}
for j := 0; j < len(prefix); j++ {
if asciiLower(text[start+j]) != asciiLower(prefix[j]) {
return false
}
}
return true
}
func asciiLower(b byte) byte {
if b >= 'A' && b <= 'Z' {
return b + ('a' - 'A')
}
return b
}
func findToolCDATAEnd(text string, from int) int {
if from < 0 || from >= len(text) {
return -1 return -1
} }
const closeMarker = "]]>" const closeMarker = "]]>"
firstNonFenceEnd := -1 firstNonFenceEnd := -1
for searchFrom := from; searchFrom < len(text); { for searchFrom := from; searchFrom < len(text); {
rel := strings.Index(lower[searchFrom:], closeMarker) rel := strings.Index(text[searchFrom:], closeMarker)
if rel < 0 { if rel < 0 {
break break
} }
@@ -241,27 +263,28 @@ func findToolCDATAEnd(text, lower string, from int) int {
if cdataOffsetIsInsideMarkdownFence(text[from:end]) { if cdataOffsetIsInsideMarkdownFence(text[from:end]) {
continue continue
} }
if cdataEndLooksStructural(text, searchFrom) {
return end
}
if firstNonFenceEnd < 0 { if firstNonFenceEnd < 0 {
firstNonFenceEnd = end firstNonFenceEnd = end
} }
if cdataEndLooksStructural(lower, searchFrom) {
return end
}
} }
return firstNonFenceEnd return firstNonFenceEnd
} }
func cdataEndLooksStructural(lower string, after int) bool { func cdataEndLooksStructural(text string, after int) bool {
for after < len(lower) { for after < len(text) {
switch lower[after] { switch {
case ' ', '\t', '\r', '\n': case text[after] == ' ' || text[after] == '\t' || text[after] == '\r' || text[after] == '\n':
after++ after++
continue case after+1 < len(text) && text[after] == '<' && text[after+1] == '/':
return true
default: default:
return false
} }
break
} }
return strings.HasPrefix(lower[after:], "</") return false
} }
func cdataOffsetIsInsideMarkdownFence(fragment string) bool { func cdataOffsetIsInsideMarkdownFence(fragment string) bool {

View File

@@ -28,9 +28,8 @@ type ToolMarkupTag struct {
} }
func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) { func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
lower := strings.ToLower(text)
for i := 0; i < len(text); { for i := 0; i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i) next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked { if blocked {
return hasDSML, hasCanonical return hasDSML, hasCanonical
} }
@@ -56,9 +55,8 @@ func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical
} }
func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) { func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
lower := strings.ToLower(text)
for i := 0; i < len(text); { for i := 0; i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i) next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked { if blocked {
return hasDSML, hasCanonical return hasDSML, hasCanonical
} }
@@ -88,9 +86,8 @@ func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanon
} }
func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, bool) { func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, bool) {
lower := strings.ToLower(text)
for i := maxInt(start, 0); i < len(text); { for i := maxInt(start, 0); i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i) next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked { if blocked {
return ToolMarkupTag{}, false return ToolMarkupTag{}, false
} }
@@ -107,7 +104,7 @@ func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, boo
} }
func FindMatchingToolMarkupClose(text string, open ToolMarkupTag) (ToolMarkupTag, bool) { func FindMatchingToolMarkupClose(text string, open ToolMarkupTag) (ToolMarkupTag, bool) {
if text == "" || open.Name == "" || open.Closing { if text == "" || open.Name == "" || open.Closing || open.End >= len(text) {
return ToolMarkupTag{}, false return ToolMarkupTag{}, false
} }
depth := 1 depth := 1

View File

@@ -892,3 +892,139 @@ func TestParseToolCallsSkipsProseMentionOfSameWrapperVariant(t *testing.T) {
t.Fatalf("expected command to parse, got %q", got) t.Fatalf("expected command to parse, got %q", got)
} }
} }
func TestTurkishILowercaseMapping(t *testing.T) {
tests := []struct {
name string
text string
start int
wantOk bool
wantName string
}{
{"turkish_i_at_name_start", "İ<tool>", 0, false, ""},
{"turkish_i_at_name_end", "<toolİ>", 0, false, ""},
{"turkish_i_before_tag", "İ<tool>", 0, false, ""},
{"normal_tool_calls", "<tool_calls>", 0, true, "tool_calls"},
{"normal_invoke", "<invoke name=\"test\">", 0, true, "invoke"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, ok := FindToolMarkupTagOutsideIgnored(tt.text, tt.start)
if ok != tt.wantOk {
t.Errorf("FindToolMarkupTagOutsideIgnored(%q, %d) ok = %v, want %v", tt.text, tt.start, ok, tt.wantOk)
return
}
if ok && got.Name != tt.wantName {
t.Errorf("FindToolMarkupTagOutsideIgnored(%q, %d) name = %q, want %q", tt.text, tt.start, got.Name, tt.wantName)
}
})
}
}
func TestSkipXMLIgnoredSectionBoundaryConditions(t *testing.T) {
text := "hello"
tests := []struct {
name string
i int
wantNext int
wantAdv bool
wantBlk bool
}{
{"valid_index", 2, 2, false, false},
{"at_end_equal_len", 5, 5, false, false},
{"beyond_end", 6, 6, false, false},
{"negative", -1, -1, false, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
next, adv, blk := skipXMLIgnoredSection(text, tt.i)
if next != tt.wantNext || adv != tt.wantAdv || blk != tt.wantBlk {
t.Errorf("skipXMLIgnoredSection(%q, %d) = (%d, %v, %v), want (%d, %v, %v)",
text, tt.i, next, adv, blk, tt.wantNext, tt.wantAdv, tt.wantBlk)
}
})
}
}
func TestSkipXMLIgnoredSectionCommentWithUnicodeKeepsByteOffset(t *testing.T) {
text := "<!-- İ -->x<tool_calls>"
next, adv, blk := skipXMLIgnoredSection(text, 0)
if blk || !adv {
t.Fatalf("skipXMLIgnoredSection() = (%d, %v, %v), want advanced unblocked comment", next, adv, blk)
}
if want := len("<!-- İ -->"); next != want {
t.Fatalf("skipXMLIgnoredSection() next = %d, want %d", next, want)
}
}
func TestSkipXMLIgnoredSectionMatchesCDATAWithoutAllocatingTail(t *testing.T) {
text := "<![cDaTa[<tool_calls>]]><tool_calls>"
next, adv, blk := skipXMLIgnoredSection(text, 0)
if blk || !adv {
t.Fatalf("skipXMLIgnoredSection() = (%d, %v, %v), want advanced unblocked CDATA", next, adv, blk)
}
if want := len("<![cDaTa[<tool_calls>]]>"); next != want {
t.Fatalf("skipXMLIgnoredSection() next = %d, want %d", next, want)
}
tag, ok := FindToolMarkupTagOutsideIgnored(text, 0)
if !ok {
t.Fatal("expected tool tag after skipped CDATA")
}
if tag.Start != next {
t.Fatalf("FindToolMarkupTagOutsideIgnored() start = %d, want %d", tag.Start, next)
}
}
func TestFindToolCDATAEndBoundaryConditions(t *testing.T) {
text := "<![CDATA[hello]]>"
tests := []struct {
name string
from int
wantResult int
}{
{"valid", 12, 14},
{"at_end", 17, -1},
{"beyond_end", 18, -1},
{"negative", -1, -1},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := findToolCDATAEnd(text, tt.from)
if got != tt.wantResult {
t.Errorf("findToolCDATAEnd(%q, %d) = %d, want %d",
text, tt.from, got, tt.wantResult)
}
})
}
}
func TestFindMatchingToolMarkupCloseBoundaryConditions(t *testing.T) {
tests := []struct {
name string
text string
open ToolMarkupTag
wantOk bool
}{
{"empty_text", "", ToolMarkupTag{Name: "tool_calls", End: 0}, false},
{"open_end_beyond_text", "hello", ToolMarkupTag{Name: "tool_calls", End: 100}, false},
{"open_end_equals_len", "hello", ToolMarkupTag{Name: "tool_calls", End: 5}, false},
{"valid_simple", "<tool_calls></tool_calls>", ToolMarkupTag{Name: "tool_calls", End: 11}, true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, ok := FindMatchingToolMarkupClose(tt.text, tt.open)
if ok != tt.wantOk {
t.Errorf("FindMatchingToolMarkupClose(%q, %+v) ok = %v, want %v", tt.text, tt.open, ok, tt.wantOk)
}
})
}
}