Compare commits

...

4 Commits

Author SHA1 Message Date
CJACK.
fef3798e5e Merge pull request #453 from CJackHwang/dev
Fix character length calculation issue
2026-05-08 13:40:47 +08:00
CJACK.
00fe18b505 Update VERSION 2026-05-08 13:36:17 +08:00
CJACK.
9b746e32d8 Merge pull request #452 from waiwaic/fix/turkish-i-boundary-panic
fix(toolcall): use len(lower) not len(text) after ToLower to prevent out-of-bounds panic
2026-05-08 13:34:28 +08:00
waiwai
ace440481a refactor(toolcall): remove lower param from skipXMLIgnoredSection
The lower parameter was a footgun: callers had to keep it in sync with the
loop bound over text. Instead, skipXMLIgnoredSection now accepts only text
and constructs strings.ToLower(tail) internally for its prefix checks.

This eliminates the entire class of len(text) vs len(lower) boundary bugs
along with the min() workaround.

Also changes:
- findToolCDATAEnd: drop lower param, use text directly for closeMarker
  search (]]> is ASCII, ToLower is a no-op for it)
- cdataEndLooksStructural: drop lower param, use raw text byte comparison
- All external callers: loop bound reverts to plain len(text)

The inner tag-matching functions (findXMLStartTagOutsideCDATA,
findMatchingXMLEndTagOutsideCDATA) retain their own local lower for
HasPrefix comparisons against the target tag name, keeping concerns
properly separated.

Fixes #435.
2026-05-08 13:29:21 +08:00
5 changed files with 135 additions and 30 deletions

View File

@@ -1 +1 @@
4.4.4
4.4.5

View File

@@ -17,11 +17,10 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
if text == "" {
return ""
}
lower := strings.ToLower(text)
var b strings.Builder
b.Grow(len(text))
for i := 0; i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked {
b.WriteString(text[i:])
break

View File

@@ -144,7 +144,7 @@ func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart i
lower := strings.ToLower(text)
target := "<" + strings.ToLower(tag)
for i := maxInt(from, 0); i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked {
return -1, -1, "", false
}
@@ -170,7 +170,7 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
closeTarget := "</" + strings.ToLower(tag)
depth := 1
for i := maxInt(from, 0); i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked {
return -1, -1, false
}
@@ -206,16 +206,20 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
return -1, -1, false
}
func skipXMLIgnoredSection(text, lower string, i int) (next int, advanced bool, blocked bool) {
func skipXMLIgnoredSection(text string, i int) (next int, advanced bool, blocked bool) {
if i < 0 || i >= len(text) {
return i, false, false
}
tail := strings.ToLower(text[i:])
switch {
case strings.HasPrefix(lower[i:], "<![cdata["):
end := findToolCDATAEnd(text, lower, i+len("<![cdata["))
case strings.HasPrefix(tail, "<![cdata["):
end := findToolCDATAEnd(text, i+len("<![cdata["))
if end < 0 {
return 0, false, true
}
return end + len("]]>"), true, false
case strings.HasPrefix(lower[i:], "<!--"):
end := strings.Index(lower[i+len("<!--"):], "-->")
case strings.HasPrefix(tail, "<!--"):
end := strings.Index(tail[len("<!--"):], "-->")
if end < 0 {
return 0, false, true
}
@@ -225,14 +229,14 @@ func skipXMLIgnoredSection(text, lower string, i int) (next int, advanced bool,
}
}
func findToolCDATAEnd(text, lower string, from int) int {
if from < 0 || from > len(text) {
func findToolCDATAEnd(text string, from int) int {
if from < 0 || from >= len(text) {
return -1
}
const closeMarker = "]]>"
firstNonFenceEnd := -1
for searchFrom := from; searchFrom < len(text); {
rel := strings.Index(lower[searchFrom:], closeMarker)
rel := strings.Index(text[searchFrom:], closeMarker)
if rel < 0 {
break
}
@@ -241,27 +245,28 @@ func findToolCDATAEnd(text, lower string, from int) int {
if cdataOffsetIsInsideMarkdownFence(text[from:end]) {
continue
}
if cdataEndLooksStructural(text, searchFrom) {
return end
}
if firstNonFenceEnd < 0 {
firstNonFenceEnd = end
}
if cdataEndLooksStructural(lower, searchFrom) {
return end
}
}
return firstNonFenceEnd
}
func cdataEndLooksStructural(lower string, after int) bool {
for after < len(lower) {
switch lower[after] {
case ' ', '\t', '\r', '\n':
func cdataEndLooksStructural(text string, after int) bool {
for after < len(text) {
switch {
case text[after] == ' ' || text[after] == '\t' || text[after] == '\r' || text[after] == '\n':
after++
continue
case after+1 < len(text) && text[after] == '<' && text[after+1] == '/':
return true
default:
return false
}
break
}
return strings.HasPrefix(lower[after:], "</")
return false
}
func cdataOffsetIsInsideMarkdownFence(fragment string) bool {

View File

@@ -28,9 +28,8 @@ type ToolMarkupTag struct {
}
func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
lower := strings.ToLower(text)
for i := 0; i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked {
return hasDSML, hasCanonical
}
@@ -56,9 +55,8 @@ func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical
}
func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
lower := strings.ToLower(text)
for i := 0; i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked {
return hasDSML, hasCanonical
}
@@ -88,9 +86,8 @@ func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanon
}
func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, bool) {
lower := strings.ToLower(text)
for i := maxInt(start, 0); i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked {
return ToolMarkupTag{}, false
}
@@ -107,7 +104,7 @@ func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, boo
}
func FindMatchingToolMarkupClose(text string, open ToolMarkupTag) (ToolMarkupTag, bool) {
if text == "" || open.Name == "" || open.Closing {
if text == "" || open.Name == "" || open.Closing || open.End >= len(text) {
return ToolMarkupTag{}, false
}
depth := 1

View File

@@ -892,3 +892,107 @@ func TestParseToolCallsSkipsProseMentionOfSameWrapperVariant(t *testing.T) {
t.Fatalf("expected command to parse, got %q", got)
}
}
func TestTurkishILowercaseMapping(t *testing.T) {
tests := []struct {
name string
text string
start int
wantOk bool
wantName string
}{
{"turkish_i_at_name_start", "İ<tool>", 0, false, ""},
{"turkish_i_at_name_end", "<toolİ>", 0, false, ""},
{"turkish_i_before_tag", "İ<tool>", 0, false, ""},
{"normal_tool_calls", "<tool_calls>", 0, true, "tool_calls"},
{"normal_invoke", "<invoke name=\"test\">", 0, true, "invoke"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, ok := FindToolMarkupTagOutsideIgnored(tt.text, tt.start)
if ok != tt.wantOk {
t.Errorf("FindToolMarkupTagOutsideIgnored(%q, %d) ok = %v, want %v", tt.text, tt.start, ok, tt.wantOk)
return
}
if ok && got.Name != tt.wantName {
t.Errorf("FindToolMarkupTagOutsideIgnored(%q, %d) name = %q, want %q", tt.text, tt.start, got.Name, tt.wantName)
}
})
}
}
func TestSkipXMLIgnoredSectionBoundaryConditions(t *testing.T) {
text := "hello"
tests := []struct {
name string
i int
wantNext int
wantAdv bool
wantBlk bool
}{
{"valid_index", 2, 2, false, false},
{"at_end_equal_len", 5, 5, false, false},
{"beyond_end", 6, 6, false, false},
{"negative", -1, -1, false, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
next, adv, blk := skipXMLIgnoredSection(text, tt.i)
if next != tt.wantNext || adv != tt.wantAdv || blk != tt.wantBlk {
t.Errorf("skipXMLIgnoredSection(%q, %d) = (%d, %v, %v), want (%d, %v, %v)",
text, tt.i, next, adv, blk, tt.wantNext, tt.wantAdv, tt.wantBlk)
}
})
}
}
func TestFindToolCDATAEndBoundaryConditions(t *testing.T) {
text := "<![CDATA[hello]]>"
tests := []struct {
name string
from int
wantResult int
}{
{"valid", 12, 14},
{"at_end", 17, -1},
{"beyond_end", 18, -1},
{"negative", -1, -1},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := findToolCDATAEnd(text, tt.from)
if got != tt.wantResult {
t.Errorf("findToolCDATAEnd(%q, %d) = %d, want %d",
text, tt.from, got, tt.wantResult)
}
})
}
}
func TestFindMatchingToolMarkupCloseBoundaryConditions(t *testing.T) {
tests := []struct {
name string
text string
open ToolMarkupTag
wantOk bool
}{
{"empty_text", "", ToolMarkupTag{Name: "tool_calls", End: 0}, false},
{"open_end_beyond_text", "hello", ToolMarkupTag{Name: "tool_calls", End: 100}, false},
{"open_end_equals_len", "hello", ToolMarkupTag{Name: "tool_calls", End: 5}, false},
{"valid_simple", "<tool_calls></tool_calls>", ToolMarkupTag{Name: "tool_calls", End: 11}, true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, ok := FindMatchingToolMarkupClose(tt.text, tt.open)
if ok != tt.wantOk {
t.Errorf("FindMatchingToolMarkupClose(%q, %+v) ok = %v, want %v", tt.text, tt.open, ok, tt.wantOk)
}
})
}
}