mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-08 18:35:35 +08:00
Merge pull request #453 from CJackHwang/dev
Fix character length calculation issue
This commit is contained in:
@@ -17,11 +17,10 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
|
||||
if text == "" {
|
||||
return ""
|
||||
}
|
||||
lower := strings.ToLower(text)
|
||||
var b strings.Builder
|
||||
b.Grow(len(text))
|
||||
for i := 0; i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
b.WriteString(text[i:])
|
||||
break
|
||||
|
||||
@@ -144,7 +144,7 @@ func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart i
|
||||
lower := strings.ToLower(text)
|
||||
target := "<" + strings.ToLower(tag)
|
||||
for i := maxInt(from, 0); i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
return -1, -1, "", false
|
||||
}
|
||||
@@ -170,7 +170,7 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
|
||||
closeTarget := "</" + strings.ToLower(tag)
|
||||
depth := 1
|
||||
for i := maxInt(from, 0); i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
return -1, -1, false
|
||||
}
|
||||
@@ -206,16 +206,20 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
|
||||
return -1, -1, false
|
||||
}
|
||||
|
||||
func skipXMLIgnoredSection(text, lower string, i int) (next int, advanced bool, blocked bool) {
|
||||
func skipXMLIgnoredSection(text string, i int) (next int, advanced bool, blocked bool) {
|
||||
if i < 0 || i >= len(text) {
|
||||
return i, false, false
|
||||
}
|
||||
tail := strings.ToLower(text[i:])
|
||||
switch {
|
||||
case strings.HasPrefix(lower[i:], "<![cdata["):
|
||||
end := findToolCDATAEnd(text, lower, i+len("<![cdata["))
|
||||
case strings.HasPrefix(tail, "<![cdata["):
|
||||
end := findToolCDATAEnd(text, i+len("<![cdata["))
|
||||
if end < 0 {
|
||||
return 0, false, true
|
||||
}
|
||||
return end + len("]]>"), true, false
|
||||
case strings.HasPrefix(lower[i:], "<!--"):
|
||||
end := strings.Index(lower[i+len("<!--"):], "-->")
|
||||
case strings.HasPrefix(tail, "<!--"):
|
||||
end := strings.Index(tail[len("<!--"):], "-->")
|
||||
if end < 0 {
|
||||
return 0, false, true
|
||||
}
|
||||
@@ -225,14 +229,14 @@ func skipXMLIgnoredSection(text, lower string, i int) (next int, advanced bool,
|
||||
}
|
||||
}
|
||||
|
||||
func findToolCDATAEnd(text, lower string, from int) int {
|
||||
if from < 0 || from > len(text) {
|
||||
func findToolCDATAEnd(text string, from int) int {
|
||||
if from < 0 || from >= len(text) {
|
||||
return -1
|
||||
}
|
||||
const closeMarker = "]]>"
|
||||
firstNonFenceEnd := -1
|
||||
for searchFrom := from; searchFrom < len(text); {
|
||||
rel := strings.Index(lower[searchFrom:], closeMarker)
|
||||
rel := strings.Index(text[searchFrom:], closeMarker)
|
||||
if rel < 0 {
|
||||
break
|
||||
}
|
||||
@@ -241,27 +245,28 @@ func findToolCDATAEnd(text, lower string, from int) int {
|
||||
if cdataOffsetIsInsideMarkdownFence(text[from:end]) {
|
||||
continue
|
||||
}
|
||||
if cdataEndLooksStructural(text, searchFrom) {
|
||||
return end
|
||||
}
|
||||
if firstNonFenceEnd < 0 {
|
||||
firstNonFenceEnd = end
|
||||
}
|
||||
if cdataEndLooksStructural(lower, searchFrom) {
|
||||
return end
|
||||
}
|
||||
}
|
||||
return firstNonFenceEnd
|
||||
}
|
||||
|
||||
func cdataEndLooksStructural(lower string, after int) bool {
|
||||
for after < len(lower) {
|
||||
switch lower[after] {
|
||||
case ' ', '\t', '\r', '\n':
|
||||
func cdataEndLooksStructural(text string, after int) bool {
|
||||
for after < len(text) {
|
||||
switch {
|
||||
case text[after] == ' ' || text[after] == '\t' || text[after] == '\r' || text[after] == '\n':
|
||||
after++
|
||||
continue
|
||||
case after+1 < len(text) && text[after] == '<' && text[after+1] == '/':
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
break
|
||||
}
|
||||
return strings.HasPrefix(lower[after:], "</")
|
||||
return false
|
||||
}
|
||||
|
||||
func cdataOffsetIsInsideMarkdownFence(fragment string) bool {
|
||||
|
||||
@@ -28,9 +28,8 @@ type ToolMarkupTag struct {
|
||||
}
|
||||
|
||||
func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
|
||||
lower := strings.ToLower(text)
|
||||
for i := 0; i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
return hasDSML, hasCanonical
|
||||
}
|
||||
@@ -56,9 +55,8 @@ func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical
|
||||
}
|
||||
|
||||
func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
|
||||
lower := strings.ToLower(text)
|
||||
for i := 0; i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
return hasDSML, hasCanonical
|
||||
}
|
||||
@@ -88,9 +86,8 @@ func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanon
|
||||
}
|
||||
|
||||
func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, bool) {
|
||||
lower := strings.ToLower(text)
|
||||
for i := maxInt(start, 0); i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
return ToolMarkupTag{}, false
|
||||
}
|
||||
@@ -107,7 +104,7 @@ func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, boo
|
||||
}
|
||||
|
||||
func FindMatchingToolMarkupClose(text string, open ToolMarkupTag) (ToolMarkupTag, bool) {
|
||||
if text == "" || open.Name == "" || open.Closing {
|
||||
if text == "" || open.Name == "" || open.Closing || open.End >= len(text) {
|
||||
return ToolMarkupTag{}, false
|
||||
}
|
||||
depth := 1
|
||||
|
||||
@@ -892,3 +892,107 @@ func TestParseToolCallsSkipsProseMentionOfSameWrapperVariant(t *testing.T) {
|
||||
t.Fatalf("expected command to parse, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTurkishILowercaseMapping(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
start int
|
||||
wantOk bool
|
||||
wantName string
|
||||
}{
|
||||
{"turkish_i_at_name_start", "İ<tool>", 0, false, ""},
|
||||
{"turkish_i_at_name_end", "<toolİ>", 0, false, ""},
|
||||
{"turkish_i_before_tag", "İ<tool>", 0, false, ""},
|
||||
{"normal_tool_calls", "<tool_calls>", 0, true, "tool_calls"},
|
||||
{"normal_invoke", "<invoke name=\"test\">", 0, true, "invoke"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, ok := FindToolMarkupTagOutsideIgnored(tt.text, tt.start)
|
||||
if ok != tt.wantOk {
|
||||
t.Errorf("FindToolMarkupTagOutsideIgnored(%q, %d) ok = %v, want %v", tt.text, tt.start, ok, tt.wantOk)
|
||||
return
|
||||
}
|
||||
if ok && got.Name != tt.wantName {
|
||||
t.Errorf("FindToolMarkupTagOutsideIgnored(%q, %d) name = %q, want %q", tt.text, tt.start, got.Name, tt.wantName)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSkipXMLIgnoredSectionBoundaryConditions(t *testing.T) {
|
||||
text := "hello"
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
i int
|
||||
wantNext int
|
||||
wantAdv bool
|
||||
wantBlk bool
|
||||
}{
|
||||
{"valid_index", 2, 2, false, false},
|
||||
{"at_end_equal_len", 5, 5, false, false},
|
||||
{"beyond_end", 6, 6, false, false},
|
||||
{"negative", -1, -1, false, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
next, adv, blk := skipXMLIgnoredSection(text, tt.i)
|
||||
if next != tt.wantNext || adv != tt.wantAdv || blk != tt.wantBlk {
|
||||
t.Errorf("skipXMLIgnoredSection(%q, %d) = (%d, %v, %v), want (%d, %v, %v)",
|
||||
text, tt.i, next, adv, blk, tt.wantNext, tt.wantAdv, tt.wantBlk)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindToolCDATAEndBoundaryConditions(t *testing.T) {
|
||||
text := "<![CDATA[hello]]>"
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
from int
|
||||
wantResult int
|
||||
}{
|
||||
{"valid", 12, 14},
|
||||
{"at_end", 17, -1},
|
||||
{"beyond_end", 18, -1},
|
||||
{"negative", -1, -1},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := findToolCDATAEnd(text, tt.from)
|
||||
if got != tt.wantResult {
|
||||
t.Errorf("findToolCDATAEnd(%q, %d) = %d, want %d",
|
||||
text, tt.from, got, tt.wantResult)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindMatchingToolMarkupCloseBoundaryConditions(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
open ToolMarkupTag
|
||||
wantOk bool
|
||||
}{
|
||||
{"empty_text", "", ToolMarkupTag{Name: "tool_calls", End: 0}, false},
|
||||
{"open_end_beyond_text", "hello", ToolMarkupTag{Name: "tool_calls", End: 100}, false},
|
||||
{"open_end_equals_len", "hello", ToolMarkupTag{Name: "tool_calls", End: 5}, false},
|
||||
{"valid_simple", "<tool_calls></tool_calls>", ToolMarkupTag{Name: "tool_calls", End: 11}, true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
_, ok := FindMatchingToolMarkupClose(tt.text, tt.open)
|
||||
if ok != tt.wantOk {
|
||||
t.Errorf("FindMatchingToolMarkupClose(%q, %+v) ok = %v, want %v", tt.text, tt.open, ok, tt.wantOk)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user