mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-09 18:57:43 +08:00
Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7ab5a0e66d | ||
|
|
410efbd70b | ||
|
|
7179b995bb | ||
|
|
fef3798e5e | ||
|
|
00fe18b505 | ||
|
|
9b746e32d8 | ||
|
|
ace440481a |
@@ -17,11 +17,10 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
|
|||||||
if text == "" {
|
if text == "" {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
lower := strings.ToLower(text)
|
|
||||||
var b strings.Builder
|
var b strings.Builder
|
||||||
b.Grow(len(text))
|
b.Grow(len(text))
|
||||||
for i := 0; i < len(text); {
|
for i := 0; i < len(text); {
|
||||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||||
if blocked {
|
if blocked {
|
||||||
b.WriteString(text[i:])
|
b.WriteString(text[i:])
|
||||||
break
|
break
|
||||||
|
|||||||
@@ -144,7 +144,7 @@ func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart i
|
|||||||
lower := strings.ToLower(text)
|
lower := strings.ToLower(text)
|
||||||
target := "<" + strings.ToLower(tag)
|
target := "<" + strings.ToLower(tag)
|
||||||
for i := maxInt(from, 0); i < len(text); {
|
for i := maxInt(from, 0); i < len(text); {
|
||||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||||
if blocked {
|
if blocked {
|
||||||
return -1, -1, "", false
|
return -1, -1, "", false
|
||||||
}
|
}
|
||||||
@@ -170,7 +170,7 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
|
|||||||
closeTarget := "</" + strings.ToLower(tag)
|
closeTarget := "</" + strings.ToLower(tag)
|
||||||
depth := 1
|
depth := 1
|
||||||
for i := maxInt(from, 0); i < len(text); {
|
for i := maxInt(from, 0); i < len(text); {
|
||||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||||
if blocked {
|
if blocked {
|
||||||
return -1, -1, false
|
return -1, -1, false
|
||||||
}
|
}
|
||||||
@@ -206,16 +206,19 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
|
|||||||
return -1, -1, false
|
return -1, -1, false
|
||||||
}
|
}
|
||||||
|
|
||||||
func skipXMLIgnoredSection(text, lower string, i int) (next int, advanced bool, blocked bool) {
|
func skipXMLIgnoredSection(text string, i int) (next int, advanced bool, blocked bool) {
|
||||||
|
if i < 0 || i >= len(text) {
|
||||||
|
return i, false, false
|
||||||
|
}
|
||||||
switch {
|
switch {
|
||||||
case strings.HasPrefix(lower[i:], "<![cdata["):
|
case hasASCIIPrefixFoldAt(text, i, "<![cdata["):
|
||||||
end := findToolCDATAEnd(text, lower, i+len("<![cdata["))
|
end := findToolCDATAEnd(text, i+len("<![cdata["))
|
||||||
if end < 0 {
|
if end < 0 {
|
||||||
return 0, false, true
|
return 0, false, true
|
||||||
}
|
}
|
||||||
return end + len("]]>"), true, false
|
return end + len("]]>"), true, false
|
||||||
case strings.HasPrefix(lower[i:], "<!--"):
|
case strings.HasPrefix(text[i:], "<!--"):
|
||||||
end := strings.Index(lower[i+len("<!--"):], "-->")
|
end := strings.Index(text[i+len("<!--"):], "-->")
|
||||||
if end < 0 {
|
if end < 0 {
|
||||||
return 0, false, true
|
return 0, false, true
|
||||||
}
|
}
|
||||||
@@ -225,14 +228,33 @@ func skipXMLIgnoredSection(text, lower string, i int) (next int, advanced bool,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func findToolCDATAEnd(text, lower string, from int) int {
|
func hasASCIIPrefixFoldAt(text string, start int, prefix string) bool {
|
||||||
if from < 0 || from > len(text) {
|
if start < 0 || len(text)-start < len(prefix) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for j := 0; j < len(prefix); j++ {
|
||||||
|
if asciiLower(text[start+j]) != asciiLower(prefix[j]) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func asciiLower(b byte) byte {
|
||||||
|
if b >= 'A' && b <= 'Z' {
|
||||||
|
return b + ('a' - 'A')
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func findToolCDATAEnd(text string, from int) int {
|
||||||
|
if from < 0 || from >= len(text) {
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
const closeMarker = "]]>"
|
const closeMarker = "]]>"
|
||||||
firstNonFenceEnd := -1
|
firstNonFenceEnd := -1
|
||||||
for searchFrom := from; searchFrom < len(text); {
|
for searchFrom := from; searchFrom < len(text); {
|
||||||
rel := strings.Index(lower[searchFrom:], closeMarker)
|
rel := strings.Index(text[searchFrom:], closeMarker)
|
||||||
if rel < 0 {
|
if rel < 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@@ -241,27 +263,28 @@ func findToolCDATAEnd(text, lower string, from int) int {
|
|||||||
if cdataOffsetIsInsideMarkdownFence(text[from:end]) {
|
if cdataOffsetIsInsideMarkdownFence(text[from:end]) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if cdataEndLooksStructural(text, searchFrom) {
|
||||||
|
return end
|
||||||
|
}
|
||||||
if firstNonFenceEnd < 0 {
|
if firstNonFenceEnd < 0 {
|
||||||
firstNonFenceEnd = end
|
firstNonFenceEnd = end
|
||||||
}
|
}
|
||||||
if cdataEndLooksStructural(lower, searchFrom) {
|
|
||||||
return end
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return firstNonFenceEnd
|
return firstNonFenceEnd
|
||||||
}
|
}
|
||||||
|
|
||||||
func cdataEndLooksStructural(lower string, after int) bool {
|
func cdataEndLooksStructural(text string, after int) bool {
|
||||||
for after < len(lower) {
|
for after < len(text) {
|
||||||
switch lower[after] {
|
switch {
|
||||||
case ' ', '\t', '\r', '\n':
|
case text[after] == ' ' || text[after] == '\t' || text[after] == '\r' || text[after] == '\n':
|
||||||
after++
|
after++
|
||||||
continue
|
case after+1 < len(text) && text[after] == '<' && text[after+1] == '/':
|
||||||
|
return true
|
||||||
default:
|
default:
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
break
|
|
||||||
}
|
}
|
||||||
return strings.HasPrefix(lower[after:], "</")
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func cdataOffsetIsInsideMarkdownFence(fragment string) bool {
|
func cdataOffsetIsInsideMarkdownFence(fragment string) bool {
|
||||||
|
|||||||
@@ -28,9 +28,8 @@ type ToolMarkupTag struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
|
func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
|
||||||
lower := strings.ToLower(text)
|
|
||||||
for i := 0; i < len(text); {
|
for i := 0; i < len(text); {
|
||||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||||
if blocked {
|
if blocked {
|
||||||
return hasDSML, hasCanonical
|
return hasDSML, hasCanonical
|
||||||
}
|
}
|
||||||
@@ -56,9 +55,8 @@ func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical
|
|||||||
}
|
}
|
||||||
|
|
||||||
func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
|
func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
|
||||||
lower := strings.ToLower(text)
|
|
||||||
for i := 0; i < len(text); {
|
for i := 0; i < len(text); {
|
||||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||||
if blocked {
|
if blocked {
|
||||||
return hasDSML, hasCanonical
|
return hasDSML, hasCanonical
|
||||||
}
|
}
|
||||||
@@ -88,9 +86,8 @@ func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanon
|
|||||||
}
|
}
|
||||||
|
|
||||||
func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, bool) {
|
func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, bool) {
|
||||||
lower := strings.ToLower(text)
|
|
||||||
for i := maxInt(start, 0); i < len(text); {
|
for i := maxInt(start, 0); i < len(text); {
|
||||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||||
if blocked {
|
if blocked {
|
||||||
return ToolMarkupTag{}, false
|
return ToolMarkupTag{}, false
|
||||||
}
|
}
|
||||||
@@ -107,7 +104,7 @@ func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, boo
|
|||||||
}
|
}
|
||||||
|
|
||||||
func FindMatchingToolMarkupClose(text string, open ToolMarkupTag) (ToolMarkupTag, bool) {
|
func FindMatchingToolMarkupClose(text string, open ToolMarkupTag) (ToolMarkupTag, bool) {
|
||||||
if text == "" || open.Name == "" || open.Closing {
|
if text == "" || open.Name == "" || open.Closing || open.End >= len(text) {
|
||||||
return ToolMarkupTag{}, false
|
return ToolMarkupTag{}, false
|
||||||
}
|
}
|
||||||
depth := 1
|
depth := 1
|
||||||
|
|||||||
@@ -892,3 +892,139 @@ func TestParseToolCallsSkipsProseMentionOfSameWrapperVariant(t *testing.T) {
|
|||||||
t.Fatalf("expected command to parse, got %q", got)
|
t.Fatalf("expected command to parse, got %q", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestTurkishILowercaseMapping(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
text string
|
||||||
|
start int
|
||||||
|
wantOk bool
|
||||||
|
wantName string
|
||||||
|
}{
|
||||||
|
{"turkish_i_at_name_start", "İ<tool>", 0, false, ""},
|
||||||
|
{"turkish_i_at_name_end", "<toolİ>", 0, false, ""},
|
||||||
|
{"turkish_i_before_tag", "İ<tool>", 0, false, ""},
|
||||||
|
{"normal_tool_calls", "<tool_calls>", 0, true, "tool_calls"},
|
||||||
|
{"normal_invoke", "<invoke name=\"test\">", 0, true, "invoke"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got, ok := FindToolMarkupTagOutsideIgnored(tt.text, tt.start)
|
||||||
|
if ok != tt.wantOk {
|
||||||
|
t.Errorf("FindToolMarkupTagOutsideIgnored(%q, %d) ok = %v, want %v", tt.text, tt.start, ok, tt.wantOk)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if ok && got.Name != tt.wantName {
|
||||||
|
t.Errorf("FindToolMarkupTagOutsideIgnored(%q, %d) name = %q, want %q", tt.text, tt.start, got.Name, tt.wantName)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSkipXMLIgnoredSectionBoundaryConditions(t *testing.T) {
|
||||||
|
text := "hello"
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
i int
|
||||||
|
wantNext int
|
||||||
|
wantAdv bool
|
||||||
|
wantBlk bool
|
||||||
|
}{
|
||||||
|
{"valid_index", 2, 2, false, false},
|
||||||
|
{"at_end_equal_len", 5, 5, false, false},
|
||||||
|
{"beyond_end", 6, 6, false, false},
|
||||||
|
{"negative", -1, -1, false, false},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
next, adv, blk := skipXMLIgnoredSection(text, tt.i)
|
||||||
|
if next != tt.wantNext || adv != tt.wantAdv || blk != tt.wantBlk {
|
||||||
|
t.Errorf("skipXMLIgnoredSection(%q, %d) = (%d, %v, %v), want (%d, %v, %v)",
|
||||||
|
text, tt.i, next, adv, blk, tt.wantNext, tt.wantAdv, tt.wantBlk)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSkipXMLIgnoredSectionCommentWithUnicodeKeepsByteOffset(t *testing.T) {
|
||||||
|
text := "<!-- İ -->x<tool_calls>"
|
||||||
|
|
||||||
|
next, adv, blk := skipXMLIgnoredSection(text, 0)
|
||||||
|
if blk || !adv {
|
||||||
|
t.Fatalf("skipXMLIgnoredSection() = (%d, %v, %v), want advanced unblocked comment", next, adv, blk)
|
||||||
|
}
|
||||||
|
if want := len("<!-- İ -->"); next != want {
|
||||||
|
t.Fatalf("skipXMLIgnoredSection() next = %d, want %d", next, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSkipXMLIgnoredSectionMatchesCDATAWithoutAllocatingTail(t *testing.T) {
|
||||||
|
text := "<![cDaTa[<tool_calls>]]><tool_calls>"
|
||||||
|
|
||||||
|
next, adv, blk := skipXMLIgnoredSection(text, 0)
|
||||||
|
if blk || !adv {
|
||||||
|
t.Fatalf("skipXMLIgnoredSection() = (%d, %v, %v), want advanced unblocked CDATA", next, adv, blk)
|
||||||
|
}
|
||||||
|
if want := len("<![cDaTa[<tool_calls>]]>"); next != want {
|
||||||
|
t.Fatalf("skipXMLIgnoredSection() next = %d, want %d", next, want)
|
||||||
|
}
|
||||||
|
|
||||||
|
tag, ok := FindToolMarkupTagOutsideIgnored(text, 0)
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("expected tool tag after skipped CDATA")
|
||||||
|
}
|
||||||
|
if tag.Start != next {
|
||||||
|
t.Fatalf("FindToolMarkupTagOutsideIgnored() start = %d, want %d", tag.Start, next)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindToolCDATAEndBoundaryConditions(t *testing.T) {
|
||||||
|
text := "<![CDATA[hello]]>"
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
from int
|
||||||
|
wantResult int
|
||||||
|
}{
|
||||||
|
{"valid", 12, 14},
|
||||||
|
{"at_end", 17, -1},
|
||||||
|
{"beyond_end", 18, -1},
|
||||||
|
{"negative", -1, -1},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got := findToolCDATAEnd(text, tt.from)
|
||||||
|
if got != tt.wantResult {
|
||||||
|
t.Errorf("findToolCDATAEnd(%q, %d) = %d, want %d",
|
||||||
|
text, tt.from, got, tt.wantResult)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindMatchingToolMarkupCloseBoundaryConditions(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
text string
|
||||||
|
open ToolMarkupTag
|
||||||
|
wantOk bool
|
||||||
|
}{
|
||||||
|
{"empty_text", "", ToolMarkupTag{Name: "tool_calls", End: 0}, false},
|
||||||
|
{"open_end_beyond_text", "hello", ToolMarkupTag{Name: "tool_calls", End: 100}, false},
|
||||||
|
{"open_end_equals_len", "hello", ToolMarkupTag{Name: "tool_calls", End: 5}, false},
|
||||||
|
{"valid_simple", "<tool_calls></tool_calls>", ToolMarkupTag{Name: "tool_calls", End: 11}, true},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
_, ok := FindMatchingToolMarkupClose(tt.text, tt.open)
|
||||||
|
if ok != tt.wantOk {
|
||||||
|
t.Errorf("FindMatchingToolMarkupClose(%q, %+v) ok = %v, want %v", tt.text, tt.open, ok, tt.wantOk)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user