mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-09 10:55:27 +08:00
Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7ab5a0e66d | ||
|
|
410efbd70b | ||
|
|
7179b995bb | ||
|
|
fef3798e5e | ||
|
|
00fe18b505 | ||
|
|
9b746e32d8 | ||
|
|
ace440481a |
@@ -17,11 +17,10 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
|
||||
if text == "" {
|
||||
return ""
|
||||
}
|
||||
lower := strings.ToLower(text)
|
||||
var b strings.Builder
|
||||
b.Grow(len(text))
|
||||
for i := 0; i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
b.WriteString(text[i:])
|
||||
break
|
||||
|
||||
@@ -144,7 +144,7 @@ func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart i
|
||||
lower := strings.ToLower(text)
|
||||
target := "<" + strings.ToLower(tag)
|
||||
for i := maxInt(from, 0); i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
return -1, -1, "", false
|
||||
}
|
||||
@@ -170,7 +170,7 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
|
||||
closeTarget := "</" + strings.ToLower(tag)
|
||||
depth := 1
|
||||
for i := maxInt(from, 0); i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
return -1, -1, false
|
||||
}
|
||||
@@ -206,16 +206,19 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
|
||||
return -1, -1, false
|
||||
}
|
||||
|
||||
func skipXMLIgnoredSection(text, lower string, i int) (next int, advanced bool, blocked bool) {
|
||||
func skipXMLIgnoredSection(text string, i int) (next int, advanced bool, blocked bool) {
|
||||
if i < 0 || i >= len(text) {
|
||||
return i, false, false
|
||||
}
|
||||
switch {
|
||||
case strings.HasPrefix(lower[i:], "<![cdata["):
|
||||
end := findToolCDATAEnd(text, lower, i+len("<![cdata["))
|
||||
case hasASCIIPrefixFoldAt(text, i, "<![cdata["):
|
||||
end := findToolCDATAEnd(text, i+len("<![cdata["))
|
||||
if end < 0 {
|
||||
return 0, false, true
|
||||
}
|
||||
return end + len("]]>"), true, false
|
||||
case strings.HasPrefix(lower[i:], "<!--"):
|
||||
end := strings.Index(lower[i+len("<!--"):], "-->")
|
||||
case strings.HasPrefix(text[i:], "<!--"):
|
||||
end := strings.Index(text[i+len("<!--"):], "-->")
|
||||
if end < 0 {
|
||||
return 0, false, true
|
||||
}
|
||||
@@ -225,14 +228,33 @@ func skipXMLIgnoredSection(text, lower string, i int) (next int, advanced bool,
|
||||
}
|
||||
}
|
||||
|
||||
func findToolCDATAEnd(text, lower string, from int) int {
|
||||
if from < 0 || from > len(text) {
|
||||
func hasASCIIPrefixFoldAt(text string, start int, prefix string) bool {
|
||||
if start < 0 || len(text)-start < len(prefix) {
|
||||
return false
|
||||
}
|
||||
for j := 0; j < len(prefix); j++ {
|
||||
if asciiLower(text[start+j]) != asciiLower(prefix[j]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func asciiLower(b byte) byte {
|
||||
if b >= 'A' && b <= 'Z' {
|
||||
return b + ('a' - 'A')
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func findToolCDATAEnd(text string, from int) int {
|
||||
if from < 0 || from >= len(text) {
|
||||
return -1
|
||||
}
|
||||
const closeMarker = "]]>"
|
||||
firstNonFenceEnd := -1
|
||||
for searchFrom := from; searchFrom < len(text); {
|
||||
rel := strings.Index(lower[searchFrom:], closeMarker)
|
||||
rel := strings.Index(text[searchFrom:], closeMarker)
|
||||
if rel < 0 {
|
||||
break
|
||||
}
|
||||
@@ -241,27 +263,28 @@ func findToolCDATAEnd(text, lower string, from int) int {
|
||||
if cdataOffsetIsInsideMarkdownFence(text[from:end]) {
|
||||
continue
|
||||
}
|
||||
if cdataEndLooksStructural(text, searchFrom) {
|
||||
return end
|
||||
}
|
||||
if firstNonFenceEnd < 0 {
|
||||
firstNonFenceEnd = end
|
||||
}
|
||||
if cdataEndLooksStructural(lower, searchFrom) {
|
||||
return end
|
||||
}
|
||||
}
|
||||
return firstNonFenceEnd
|
||||
}
|
||||
|
||||
func cdataEndLooksStructural(lower string, after int) bool {
|
||||
for after < len(lower) {
|
||||
switch lower[after] {
|
||||
case ' ', '\t', '\r', '\n':
|
||||
func cdataEndLooksStructural(text string, after int) bool {
|
||||
for after < len(text) {
|
||||
switch {
|
||||
case text[after] == ' ' || text[after] == '\t' || text[after] == '\r' || text[after] == '\n':
|
||||
after++
|
||||
continue
|
||||
case after+1 < len(text) && text[after] == '<' && text[after+1] == '/':
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
break
|
||||
}
|
||||
return strings.HasPrefix(lower[after:], "</")
|
||||
return false
|
||||
}
|
||||
|
||||
func cdataOffsetIsInsideMarkdownFence(fragment string) bool {
|
||||
|
||||
@@ -28,9 +28,8 @@ type ToolMarkupTag struct {
|
||||
}
|
||||
|
||||
func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
|
||||
lower := strings.ToLower(text)
|
||||
for i := 0; i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
return hasDSML, hasCanonical
|
||||
}
|
||||
@@ -56,9 +55,8 @@ func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical
|
||||
}
|
||||
|
||||
func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
|
||||
lower := strings.ToLower(text)
|
||||
for i := 0; i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
return hasDSML, hasCanonical
|
||||
}
|
||||
@@ -88,9 +86,8 @@ func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanon
|
||||
}
|
||||
|
||||
func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, bool) {
|
||||
lower := strings.ToLower(text)
|
||||
for i := maxInt(start, 0); i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
return ToolMarkupTag{}, false
|
||||
}
|
||||
@@ -107,7 +104,7 @@ func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, boo
|
||||
}
|
||||
|
||||
func FindMatchingToolMarkupClose(text string, open ToolMarkupTag) (ToolMarkupTag, bool) {
|
||||
if text == "" || open.Name == "" || open.Closing {
|
||||
if text == "" || open.Name == "" || open.Closing || open.End >= len(text) {
|
||||
return ToolMarkupTag{}, false
|
||||
}
|
||||
depth := 1
|
||||
|
||||
@@ -892,3 +892,139 @@ func TestParseToolCallsSkipsProseMentionOfSameWrapperVariant(t *testing.T) {
|
||||
t.Fatalf("expected command to parse, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTurkishILowercaseMapping(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
start int
|
||||
wantOk bool
|
||||
wantName string
|
||||
}{
|
||||
{"turkish_i_at_name_start", "İ<tool>", 0, false, ""},
|
||||
{"turkish_i_at_name_end", "<toolİ>", 0, false, ""},
|
||||
{"turkish_i_before_tag", "İ<tool>", 0, false, ""},
|
||||
{"normal_tool_calls", "<tool_calls>", 0, true, "tool_calls"},
|
||||
{"normal_invoke", "<invoke name=\"test\">", 0, true, "invoke"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, ok := FindToolMarkupTagOutsideIgnored(tt.text, tt.start)
|
||||
if ok != tt.wantOk {
|
||||
t.Errorf("FindToolMarkupTagOutsideIgnored(%q, %d) ok = %v, want %v", tt.text, tt.start, ok, tt.wantOk)
|
||||
return
|
||||
}
|
||||
if ok && got.Name != tt.wantName {
|
||||
t.Errorf("FindToolMarkupTagOutsideIgnored(%q, %d) name = %q, want %q", tt.text, tt.start, got.Name, tt.wantName)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSkipXMLIgnoredSectionBoundaryConditions(t *testing.T) {
|
||||
text := "hello"
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
i int
|
||||
wantNext int
|
||||
wantAdv bool
|
||||
wantBlk bool
|
||||
}{
|
||||
{"valid_index", 2, 2, false, false},
|
||||
{"at_end_equal_len", 5, 5, false, false},
|
||||
{"beyond_end", 6, 6, false, false},
|
||||
{"negative", -1, -1, false, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
next, adv, blk := skipXMLIgnoredSection(text, tt.i)
|
||||
if next != tt.wantNext || adv != tt.wantAdv || blk != tt.wantBlk {
|
||||
t.Errorf("skipXMLIgnoredSection(%q, %d) = (%d, %v, %v), want (%d, %v, %v)",
|
||||
text, tt.i, next, adv, blk, tt.wantNext, tt.wantAdv, tt.wantBlk)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSkipXMLIgnoredSectionCommentWithUnicodeKeepsByteOffset(t *testing.T) {
|
||||
text := "<!-- İ -->x<tool_calls>"
|
||||
|
||||
next, adv, blk := skipXMLIgnoredSection(text, 0)
|
||||
if blk || !adv {
|
||||
t.Fatalf("skipXMLIgnoredSection() = (%d, %v, %v), want advanced unblocked comment", next, adv, blk)
|
||||
}
|
||||
if want := len("<!-- İ -->"); next != want {
|
||||
t.Fatalf("skipXMLIgnoredSection() next = %d, want %d", next, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSkipXMLIgnoredSectionMatchesCDATAWithoutAllocatingTail(t *testing.T) {
|
||||
text := "<![cDaTa[<tool_calls>]]><tool_calls>"
|
||||
|
||||
next, adv, blk := skipXMLIgnoredSection(text, 0)
|
||||
if blk || !adv {
|
||||
t.Fatalf("skipXMLIgnoredSection() = (%d, %v, %v), want advanced unblocked CDATA", next, adv, blk)
|
||||
}
|
||||
if want := len("<![cDaTa[<tool_calls>]]>"); next != want {
|
||||
t.Fatalf("skipXMLIgnoredSection() next = %d, want %d", next, want)
|
||||
}
|
||||
|
||||
tag, ok := FindToolMarkupTagOutsideIgnored(text, 0)
|
||||
if !ok {
|
||||
t.Fatal("expected tool tag after skipped CDATA")
|
||||
}
|
||||
if tag.Start != next {
|
||||
t.Fatalf("FindToolMarkupTagOutsideIgnored() start = %d, want %d", tag.Start, next)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindToolCDATAEndBoundaryConditions(t *testing.T) {
|
||||
text := "<![CDATA[hello]]>"
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
from int
|
||||
wantResult int
|
||||
}{
|
||||
{"valid", 12, 14},
|
||||
{"at_end", 17, -1},
|
||||
{"beyond_end", 18, -1},
|
||||
{"negative", -1, -1},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := findToolCDATAEnd(text, tt.from)
|
||||
if got != tt.wantResult {
|
||||
t.Errorf("findToolCDATAEnd(%q, %d) = %d, want %d",
|
||||
text, tt.from, got, tt.wantResult)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindMatchingToolMarkupCloseBoundaryConditions(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
open ToolMarkupTag
|
||||
wantOk bool
|
||||
}{
|
||||
{"empty_text", "", ToolMarkupTag{Name: "tool_calls", End: 0}, false},
|
||||
{"open_end_beyond_text", "hello", ToolMarkupTag{Name: "tool_calls", End: 100}, false},
|
||||
{"open_end_equals_len", "hello", ToolMarkupTag{Name: "tool_calls", End: 5}, false},
|
||||
{"valid_simple", "<tool_calls></tool_calls>", ToolMarkupTag{Name: "tool_calls", End: 11}, true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
_, ok := FindMatchingToolMarkupClose(tt.text, tt.open)
|
||||
if ok != tt.wantOk {
|
||||
t.Errorf("FindMatchingToolMarkupClose(%q, %+v) ok = %v, want %v", tt.text, tt.open, ok, tt.wantOk)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user