Merge pull request #453 from CJackHwang/dev

Fix character length calculation issue
This commit is contained in:
CJACK.
2026-05-08 13:40:47 +08:00
committed by GitHub
5 changed files with 135 additions and 30 deletions

View File

@@ -1 +1 @@
4.4.4
4.4.5

View File

@@ -17,11 +17,10 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
if text == "" {
return ""
}
lower := strings.ToLower(text)
var b strings.Builder
b.Grow(len(text))
for i := 0; i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked {
b.WriteString(text[i:])
break

View File

@@ -144,7 +144,7 @@ func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart i
lower := strings.ToLower(text)
target := "<" + strings.ToLower(tag)
for i := maxInt(from, 0); i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked {
return -1, -1, "", false
}
@@ -170,7 +170,7 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
closeTarget := "</" + strings.ToLower(tag)
depth := 1
for i := maxInt(from, 0); i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked {
return -1, -1, false
}
@@ -206,16 +206,20 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
return -1, -1, false
}
func skipXMLIgnoredSection(text, lower string, i int) (next int, advanced bool, blocked bool) {
func skipXMLIgnoredSection(text string, i int) (next int, advanced bool, blocked bool) {
if i < 0 || i >= len(text) {
return i, false, false
}
tail := strings.ToLower(text[i:])
switch {
case strings.HasPrefix(lower[i:], "<![cdata["):
end := findToolCDATAEnd(text, lower, i+len("<![cdata["))
case strings.HasPrefix(tail, "<![cdata["):
end := findToolCDATAEnd(text, i+len("<![cdata["))
if end < 0 {
return 0, false, true
}
return end + len("]]>"), true, false
case strings.HasPrefix(lower[i:], "<!--"):
end := strings.Index(lower[i+len("<!--"):], "-->")
case strings.HasPrefix(tail, "<!--"):
end := strings.Index(tail[len("<!--"):], "-->")
if end < 0 {
return 0, false, true
}
@@ -225,14 +229,14 @@ func skipXMLIgnoredSection(text, lower string, i int) (next int, advanced bool,
}
}
func findToolCDATAEnd(text, lower string, from int) int {
if from < 0 || from > len(text) {
func findToolCDATAEnd(text string, from int) int {
if from < 0 || from >= len(text) {
return -1
}
const closeMarker = "]]>"
firstNonFenceEnd := -1
for searchFrom := from; searchFrom < len(text); {
rel := strings.Index(lower[searchFrom:], closeMarker)
rel := strings.Index(text[searchFrom:], closeMarker)
if rel < 0 {
break
}
@@ -241,27 +245,28 @@ func findToolCDATAEnd(text, lower string, from int) int {
if cdataOffsetIsInsideMarkdownFence(text[from:end]) {
continue
}
if cdataEndLooksStructural(text, searchFrom) {
return end
}
if firstNonFenceEnd < 0 {
firstNonFenceEnd = end
}
if cdataEndLooksStructural(lower, searchFrom) {
return end
}
}
return firstNonFenceEnd
}
func cdataEndLooksStructural(lower string, after int) bool {
for after < len(lower) {
switch lower[after] {
case ' ', '\t', '\r', '\n':
func cdataEndLooksStructural(text string, after int) bool {
for after < len(text) {
switch {
case text[after] == ' ' || text[after] == '\t' || text[after] == '\r' || text[after] == '\n':
after++
continue
case after+1 < len(text) && text[after] == '<' && text[after+1] == '/':
return true
default:
return false
}
break
}
return strings.HasPrefix(lower[after:], "</")
return false
}
func cdataOffsetIsInsideMarkdownFence(fragment string) bool {

View File

@@ -28,9 +28,8 @@ type ToolMarkupTag struct {
}
func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
lower := strings.ToLower(text)
for i := 0; i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked {
return hasDSML, hasCanonical
}
@@ -56,9 +55,8 @@ func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical
}
func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
lower := strings.ToLower(text)
for i := 0; i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked {
return hasDSML, hasCanonical
}
@@ -88,9 +86,8 @@ func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanon
}
func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, bool) {
lower := strings.ToLower(text)
for i := maxInt(start, 0); i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
next, advanced, blocked := skipXMLIgnoredSection(text, i)
if blocked {
return ToolMarkupTag{}, false
}
@@ -107,7 +104,7 @@ func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, boo
}
func FindMatchingToolMarkupClose(text string, open ToolMarkupTag) (ToolMarkupTag, bool) {
if text == "" || open.Name == "" || open.Closing {
if text == "" || open.Name == "" || open.Closing || open.End >= len(text) {
return ToolMarkupTag{}, false
}
depth := 1

View File

@@ -892,3 +892,107 @@ func TestParseToolCallsSkipsProseMentionOfSameWrapperVariant(t *testing.T) {
t.Fatalf("expected command to parse, got %q", got)
}
}
func TestTurkishILowercaseMapping(t *testing.T) {
tests := []struct {
name string
text string
start int
wantOk bool
wantName string
}{
{"turkish_i_at_name_start", "İ<tool>", 0, false, ""},
{"turkish_i_at_name_end", "<toolİ>", 0, false, ""},
{"turkish_i_before_tag", "İ<tool>", 0, false, ""},
{"normal_tool_calls", "<tool_calls>", 0, true, "tool_calls"},
{"normal_invoke", "<invoke name=\"test\">", 0, true, "invoke"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, ok := FindToolMarkupTagOutsideIgnored(tt.text, tt.start)
if ok != tt.wantOk {
t.Errorf("FindToolMarkupTagOutsideIgnored(%q, %d) ok = %v, want %v", tt.text, tt.start, ok, tt.wantOk)
return
}
if ok && got.Name != tt.wantName {
t.Errorf("FindToolMarkupTagOutsideIgnored(%q, %d) name = %q, want %q", tt.text, tt.start, got.Name, tt.wantName)
}
})
}
}
func TestSkipXMLIgnoredSectionBoundaryConditions(t *testing.T) {
text := "hello"
tests := []struct {
name string
i int
wantNext int
wantAdv bool
wantBlk bool
}{
{"valid_index", 2, 2, false, false},
{"at_end_equal_len", 5, 5, false, false},
{"beyond_end", 6, 6, false, false},
{"negative", -1, -1, false, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
next, adv, blk := skipXMLIgnoredSection(text, tt.i)
if next != tt.wantNext || adv != tt.wantAdv || blk != tt.wantBlk {
t.Errorf("skipXMLIgnoredSection(%q, %d) = (%d, %v, %v), want (%d, %v, %v)",
text, tt.i, next, adv, blk, tt.wantNext, tt.wantAdv, tt.wantBlk)
}
})
}
}
func TestFindToolCDATAEndBoundaryConditions(t *testing.T) {
text := "<![CDATA[hello]]>"
tests := []struct {
name string
from int
wantResult int
}{
{"valid", 12, 14},
{"at_end", 17, -1},
{"beyond_end", 18, -1},
{"negative", -1, -1},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := findToolCDATAEnd(text, tt.from)
if got != tt.wantResult {
t.Errorf("findToolCDATAEnd(%q, %d) = %d, want %d",
text, tt.from, got, tt.wantResult)
}
})
}
}
func TestFindMatchingToolMarkupCloseBoundaryConditions(t *testing.T) {
tests := []struct {
name string
text string
open ToolMarkupTag
wantOk bool
}{
{"empty_text", "", ToolMarkupTag{Name: "tool_calls", End: 0}, false},
{"open_end_beyond_text", "hello", ToolMarkupTag{Name: "tool_calls", End: 100}, false},
{"open_end_equals_len", "hello", ToolMarkupTag{Name: "tool_calls", End: 5}, false},
{"valid_simple", "<tool_calls></tool_calls>", ToolMarkupTag{Name: "tool_calls", End: 11}, true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, ok := FindMatchingToolMarkupClose(tt.text, tt.open)
if ok != tt.wantOk {
t.Errorf("FindMatchingToolMarkupClose(%q, %+v) ok = %v, want %v", tt.text, tt.open, ok, tt.wantOk)
}
})
}
}