mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-09 18:57:43 +08:00
refactor(toolcall): remove lower param from skipXMLIgnoredSection
The lower parameter was a footgun: callers had to keep it in sync with the loop bound over text. Instead, skipXMLIgnoredSection now accepts only text and constructs strings.ToLower(tail) internally for its prefix checks. This eliminates the entire class of len(text) vs len(lower) boundary bugs along with the min() workaround. Also changes: - findToolCDATAEnd: drop lower param, use text directly for closeMarker search (]]> is ASCII, ToLower is a no-op for it) - cdataEndLooksStructural: drop lower param, use raw text byte comparison - All external callers: loop bound reverts to plain len(text) The inner tag-matching functions (findXMLStartTagOutsideCDATA, findMatchingXMLEndTagOutsideCDATA) retain their own local lower for HasPrefix comparisons against the target tag name, keeping concerns properly separated. Fixes #435.
This commit is contained in:
@@ -17,11 +17,10 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
|
||||
if text == "" {
|
||||
return ""
|
||||
}
|
||||
lower := strings.ToLower(text)
|
||||
var b strings.Builder
|
||||
b.Grow(len(text))
|
||||
for i := 0; i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
b.WriteString(text[i:])
|
||||
break
|
||||
|
||||
@@ -144,7 +144,7 @@ func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart i
|
||||
lower := strings.ToLower(text)
|
||||
target := "<" + strings.ToLower(tag)
|
||||
for i := maxInt(from, 0); i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
return -1, -1, "", false
|
||||
}
|
||||
@@ -170,7 +170,7 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
|
||||
closeTarget := "</" + strings.ToLower(tag)
|
||||
depth := 1
|
||||
for i := maxInt(from, 0); i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
return -1, -1, false
|
||||
}
|
||||
@@ -206,16 +206,20 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
|
||||
return -1, -1, false
|
||||
}
|
||||
|
||||
func skipXMLIgnoredSection(text, lower string, i int) (next int, advanced bool, blocked bool) {
|
||||
func skipXMLIgnoredSection(text string, i int) (next int, advanced bool, blocked bool) {
|
||||
if i < 0 || i >= len(text) {
|
||||
return i, false, false
|
||||
}
|
||||
tail := strings.ToLower(text[i:])
|
||||
switch {
|
||||
case strings.HasPrefix(lower[i:], "<![cdata["):
|
||||
end := findToolCDATAEnd(text, lower, i+len("<![cdata["))
|
||||
case strings.HasPrefix(tail, "<![cdata["):
|
||||
end := findToolCDATAEnd(text, i+len("<![cdata["))
|
||||
if end < 0 {
|
||||
return 0, false, true
|
||||
}
|
||||
return end + len("]]>"), true, false
|
||||
case strings.HasPrefix(lower[i:], "<!--"):
|
||||
end := strings.Index(lower[i+len("<!--"):], "-->")
|
||||
case strings.HasPrefix(tail, "<!--"):
|
||||
end := strings.Index(tail[len("<!--"):], "-->")
|
||||
if end < 0 {
|
||||
return 0, false, true
|
||||
}
|
||||
@@ -225,14 +229,14 @@ func skipXMLIgnoredSection(text, lower string, i int) (next int, advanced bool,
|
||||
}
|
||||
}
|
||||
|
||||
func findToolCDATAEnd(text, lower string, from int) int {
|
||||
if from < 0 || from > len(text) {
|
||||
func findToolCDATAEnd(text string, from int) int {
|
||||
if from < 0 || from >= len(text) {
|
||||
return -1
|
||||
}
|
||||
const closeMarker = "]]>"
|
||||
firstNonFenceEnd := -1
|
||||
for searchFrom := from; searchFrom < len(text); {
|
||||
rel := strings.Index(lower[searchFrom:], closeMarker)
|
||||
rel := strings.Index(text[searchFrom:], closeMarker)
|
||||
if rel < 0 {
|
||||
break
|
||||
}
|
||||
@@ -241,27 +245,28 @@ func findToolCDATAEnd(text, lower string, from int) int {
|
||||
if cdataOffsetIsInsideMarkdownFence(text[from:end]) {
|
||||
continue
|
||||
}
|
||||
if cdataEndLooksStructural(text, searchFrom) {
|
||||
return end
|
||||
}
|
||||
if firstNonFenceEnd < 0 {
|
||||
firstNonFenceEnd = end
|
||||
}
|
||||
if cdataEndLooksStructural(lower, searchFrom) {
|
||||
return end
|
||||
}
|
||||
}
|
||||
return firstNonFenceEnd
|
||||
}
|
||||
|
||||
func cdataEndLooksStructural(lower string, after int) bool {
|
||||
for after < len(lower) {
|
||||
switch lower[after] {
|
||||
case ' ', '\t', '\r', '\n':
|
||||
func cdataEndLooksStructural(text string, after int) bool {
|
||||
for after < len(text) {
|
||||
switch {
|
||||
case text[after] == ' ' || text[after] == '\t' || text[after] == '\r' || text[after] == '\n':
|
||||
after++
|
||||
continue
|
||||
case after+1 < len(text) && text[after] == '<' && text[after+1] == '/':
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
break
|
||||
}
|
||||
return strings.HasPrefix(lower[after:], "</")
|
||||
return false
|
||||
}
|
||||
|
||||
func cdataOffsetIsInsideMarkdownFence(fragment string) bool {
|
||||
|
||||
@@ -28,9 +28,8 @@ type ToolMarkupTag struct {
|
||||
}
|
||||
|
||||
func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
|
||||
lower := strings.ToLower(text)
|
||||
for i := 0; i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
return hasDSML, hasCanonical
|
||||
}
|
||||
@@ -56,9 +55,8 @@ func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical
|
||||
}
|
||||
|
||||
func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
|
||||
lower := strings.ToLower(text)
|
||||
for i := 0; i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
return hasDSML, hasCanonical
|
||||
}
|
||||
@@ -88,9 +86,8 @@ func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanon
|
||||
}
|
||||
|
||||
func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, bool) {
|
||||
lower := strings.ToLower(text)
|
||||
for i := maxInt(start, 0); i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
return ToolMarkupTag{}, false
|
||||
}
|
||||
@@ -107,7 +104,7 @@ func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, boo
|
||||
}
|
||||
|
||||
func FindMatchingToolMarkupClose(text string, open ToolMarkupTag) (ToolMarkupTag, bool) {
|
||||
if text == "" || open.Name == "" || open.Closing {
|
||||
if text == "" || open.Name == "" || open.Closing || open.End >= len(text) {
|
||||
return ToolMarkupTag{}, false
|
||||
}
|
||||
depth := 1
|
||||
|
||||
@@ -892,3 +892,107 @@ func TestParseToolCallsSkipsProseMentionOfSameWrapperVariant(t *testing.T) {
|
||||
t.Fatalf("expected command to parse, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTurkishILowercaseMapping(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
start int
|
||||
wantOk bool
|
||||
wantName string
|
||||
}{
|
||||
{"turkish_i_at_name_start", "İ<tool>", 0, false, ""},
|
||||
{"turkish_i_at_name_end", "<toolİ>", 0, false, ""},
|
||||
{"turkish_i_before_tag", "İ<tool>", 0, false, ""},
|
||||
{"normal_tool_calls", "<tool_calls>", 0, true, "tool_calls"},
|
||||
{"normal_invoke", "<invoke name=\"test\">", 0, true, "invoke"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, ok := FindToolMarkupTagOutsideIgnored(tt.text, tt.start)
|
||||
if ok != tt.wantOk {
|
||||
t.Errorf("FindToolMarkupTagOutsideIgnored(%q, %d) ok = %v, want %v", tt.text, tt.start, ok, tt.wantOk)
|
||||
return
|
||||
}
|
||||
if ok && got.Name != tt.wantName {
|
||||
t.Errorf("FindToolMarkupTagOutsideIgnored(%q, %d) name = %q, want %q", tt.text, tt.start, got.Name, tt.wantName)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSkipXMLIgnoredSectionBoundaryConditions(t *testing.T) {
|
||||
text := "hello"
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
i int
|
||||
wantNext int
|
||||
wantAdv bool
|
||||
wantBlk bool
|
||||
}{
|
||||
{"valid_index", 2, 2, false, false},
|
||||
{"at_end_equal_len", 5, 5, false, false},
|
||||
{"beyond_end", 6, 6, false, false},
|
||||
{"negative", -1, -1, false, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
next, adv, blk := skipXMLIgnoredSection(text, tt.i)
|
||||
if next != tt.wantNext || adv != tt.wantAdv || blk != tt.wantBlk {
|
||||
t.Errorf("skipXMLIgnoredSection(%q, %d) = (%d, %v, %v), want (%d, %v, %v)",
|
||||
text, tt.i, next, adv, blk, tt.wantNext, tt.wantAdv, tt.wantBlk)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindToolCDATAEndBoundaryConditions(t *testing.T) {
|
||||
text := "<![CDATA[hello]]>"
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
from int
|
||||
wantResult int
|
||||
}{
|
||||
{"valid", 12, 14},
|
||||
{"at_end", 17, -1},
|
||||
{"beyond_end", 18, -1},
|
||||
{"negative", -1, -1},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := findToolCDATAEnd(text, tt.from)
|
||||
if got != tt.wantResult {
|
||||
t.Errorf("findToolCDATAEnd(%q, %d) = %d, want %d",
|
||||
text, tt.from, got, tt.wantResult)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindMatchingToolMarkupCloseBoundaryConditions(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
open ToolMarkupTag
|
||||
wantOk bool
|
||||
}{
|
||||
{"empty_text", "", ToolMarkupTag{Name: "tool_calls", End: 0}, false},
|
||||
{"open_end_beyond_text", "hello", ToolMarkupTag{Name: "tool_calls", End: 100}, false},
|
||||
{"open_end_equals_len", "hello", ToolMarkupTag{Name: "tool_calls", End: 5}, false},
|
||||
{"valid_simple", "<tool_calls></tool_calls>", ToolMarkupTag{Name: "tool_calls", End: 11}, true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
_, ok := FindMatchingToolMarkupClose(tt.text, tt.open)
|
||||
if ok != tt.wantOk {
|
||||
t.Errorf("FindMatchingToolMarkupClose(%q, %+v) ok = %v, want %v", tt.text, tt.open, ok, tt.wantOk)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user