mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-10 03:07:41 +08:00
Replace all strings.ToLower usage with ASCII case-insensitive matching (hasASCIIPrefixFoldAt, indexASCIIFold, hasDSMLPrefix) to prevent slice bounds errors when Unicode characters change byte length after case folding (e.g., Turkish İ U+0130 → i + combining dot: 2 bytes → 3 bytes). Root cause: code created a strings.ToLower(text) copy, found byte positions in that copy, then used those positions to slice the original text — byte offsets that were valid in the lowercased copy became out-of-bounds in the original when case folding changed byte lengths. Files changed: - toolcalls_scan.go: remove 5 lower usages, add hasDSMLPrefix - toolcalls_parse_markup.go: remove 3 lower usages, add indexASCIIFold - toolcalls_markup.go: SanitizeLooseCDATA lower removal - toolcalls_parse.go: updateCDATAStateForStrip lower removal - tool_prompt.go: align DSML pipe characters with tool call spec - tool_prompt_test.go: fix pre-existing test character mismatch
321 lines
6.8 KiB
Go
321 lines
6.8 KiB
Go
package toolcall
|
||
|
||
import "strings"
|
||
|
||
type toolMarkupNameAlias struct {
|
||
raw string
|
||
canonical string
|
||
dsmlOnly bool
|
||
}
|
||
|
||
var toolMarkupNames = []toolMarkupNameAlias{
|
||
{raw: "tool_calls", canonical: "tool_calls"},
|
||
{raw: "tool-calls", canonical: "tool_calls", dsmlOnly: true},
|
||
{raw: "invoke", canonical: "invoke"},
|
||
{raw: "parameter", canonical: "parameter"},
|
||
}
|
||
|
||
type ToolMarkupTag struct {
|
||
Start int
|
||
End int
|
||
NameStart int
|
||
NameEnd int
|
||
Name string
|
||
Closing bool
|
||
SelfClosing bool
|
||
DSMLLike bool
|
||
Canonical bool
|
||
}
|
||
|
||
func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
|
||
for i := 0; i < len(text); {
|
||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||
if blocked {
|
||
return hasDSML, hasCanonical
|
||
}
|
||
if advanced {
|
||
i = next
|
||
continue
|
||
}
|
||
if tag, ok := scanToolMarkupTagAt(text, i); ok {
|
||
if tag.DSMLLike {
|
||
hasDSML = true
|
||
} else {
|
||
hasCanonical = true
|
||
}
|
||
if hasDSML && hasCanonical {
|
||
return true, true
|
||
}
|
||
i = tag.End + 1
|
||
continue
|
||
}
|
||
i++
|
||
}
|
||
return hasDSML, hasCanonical
|
||
}
|
||
|
||
func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
|
||
for i := 0; i < len(text); {
|
||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||
if blocked {
|
||
return hasDSML, hasCanonical
|
||
}
|
||
if advanced {
|
||
i = next
|
||
continue
|
||
}
|
||
if tag, ok := scanToolMarkupTagAt(text, i); ok {
|
||
if tag.Name != "tool_calls" {
|
||
i = tag.End + 1
|
||
continue
|
||
}
|
||
if tag.DSMLLike {
|
||
hasDSML = true
|
||
} else {
|
||
hasCanonical = true
|
||
}
|
||
if hasDSML && hasCanonical {
|
||
return true, true
|
||
}
|
||
i = tag.End + 1
|
||
continue
|
||
}
|
||
i++
|
||
}
|
||
return hasDSML, hasCanonical
|
||
}
|
||
|
||
func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, bool) {
|
||
for i := maxInt(start, 0); i < len(text); {
|
||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||
if blocked {
|
||
return ToolMarkupTag{}, false
|
||
}
|
||
if advanced {
|
||
i = next
|
||
continue
|
||
}
|
||
if tag, ok := scanToolMarkupTagAt(text, i); ok {
|
||
return tag, true
|
||
}
|
||
i++
|
||
}
|
||
return ToolMarkupTag{}, false
|
||
}
|
||
|
||
func FindMatchingToolMarkupClose(text string, open ToolMarkupTag) (ToolMarkupTag, bool) {
|
||
if text == "" || open.Name == "" || open.Closing || open.End >= len(text) {
|
||
return ToolMarkupTag{}, false
|
||
}
|
||
depth := 1
|
||
for pos := open.End + 1; pos < len(text); {
|
||
tag, ok := FindToolMarkupTagOutsideIgnored(text, pos)
|
||
if !ok {
|
||
return ToolMarkupTag{}, false
|
||
}
|
||
if tag.Name != open.Name {
|
||
pos = tag.End + 1
|
||
continue
|
||
}
|
||
if tag.Closing {
|
||
depth--
|
||
if depth == 0 {
|
||
return tag, true
|
||
}
|
||
} else if !tag.SelfClosing {
|
||
depth++
|
||
}
|
||
pos = tag.End + 1
|
||
}
|
||
return ToolMarkupTag{}, false
|
||
}
|
||
|
||
func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) {
|
||
if start < 0 || start >= len(text) || text[start] != '<' {
|
||
return ToolMarkupTag{}, false
|
||
}
|
||
i := start + 1
|
||
for i < len(text) && text[i] == '<' {
|
||
i++
|
||
}
|
||
closing := false
|
||
if i < len(text) && text[i] == '/' {
|
||
closing = true
|
||
i++
|
||
}
|
||
i, dsmlLike := consumeToolMarkupNamePrefix(text, i)
|
||
name, nameLen := matchToolMarkupName(text, i, dsmlLike)
|
||
if nameLen == 0 {
|
||
return ToolMarkupTag{}, false
|
||
}
|
||
nameEnd := i + nameLen
|
||
nameEndBeforePipes := nameEnd
|
||
for next, ok := consumeToolMarkupPipe(text, nameEnd); ok; next, ok = consumeToolMarkupPipe(text, nameEnd) {
|
||
nameEnd = next
|
||
}
|
||
hasTrailingPipe := nameEnd > nameEndBeforePipes
|
||
if !hasToolMarkupBoundary(text, nameEnd) {
|
||
return ToolMarkupTag{}, false
|
||
}
|
||
end := findXMLTagEnd(text, nameEnd)
|
||
if end < 0 {
|
||
if !hasTrailingPipe {
|
||
return ToolMarkupTag{}, false
|
||
}
|
||
end = nameEnd - 1
|
||
}
|
||
if hasTrailingPipe {
|
||
if nextLT := strings.IndexByte(text[nameEnd:], '<'); nextLT >= 0 && end >= nameEnd+nextLT {
|
||
end = nameEnd - 1
|
||
}
|
||
}
|
||
trimmed := strings.TrimSpace(text[start : end+1])
|
||
return ToolMarkupTag{
|
||
Start: start,
|
||
End: end,
|
||
NameStart: i,
|
||
NameEnd: nameEnd,
|
||
Name: name,
|
||
Closing: closing,
|
||
SelfClosing: strings.HasSuffix(trimmed, "/>"),
|
||
DSMLLike: dsmlLike,
|
||
Canonical: !dsmlLike,
|
||
}, true
|
||
}
|
||
|
||
func IsPartialToolMarkupTagPrefix(text string) bool {
|
||
if text == "" || text[0] != '<' || strings.Contains(text, ">") {
|
||
return false
|
||
}
|
||
i := 1
|
||
for i < len(text) && text[i] == '<' {
|
||
i++
|
||
}
|
||
if i >= len(text) {
|
||
return true
|
||
}
|
||
if text[i] == '/' {
|
||
i++
|
||
}
|
||
for i <= len(text) {
|
||
if i == len(text) {
|
||
return true
|
||
}
|
||
if hasToolMarkupNamePrefix(text, i) {
|
||
return true
|
||
}
|
||
if hasDSMLPrefix(text, i) {
|
||
return true
|
||
}
|
||
next, ok := consumeToolMarkupNamePrefixOnce(text, i)
|
||
if !ok {
|
||
return false
|
||
}
|
||
i = next
|
||
}
|
||
return false
|
||
}
|
||
|
||
func consumeToolMarkupNamePrefix(text string, idx int) (int, bool) {
|
||
dsmlLike := false
|
||
for {
|
||
next, ok := consumeToolMarkupNamePrefixOnce(text, idx)
|
||
if !ok {
|
||
return idx, dsmlLike
|
||
}
|
||
idx = next
|
||
dsmlLike = true
|
||
}
|
||
}
|
||
|
||
func consumeToolMarkupNamePrefixOnce(text string, idx int) (int, bool) {
|
||
if next, ok := consumeToolMarkupPipe(text, idx); ok {
|
||
return next, true
|
||
}
|
||
if idx < len(text) && (text[idx] == ' ' || text[idx] == '\t' || text[idx] == '\r' || text[idx] == '\n') {
|
||
return idx + 1, true
|
||
}
|
||
if hasASCIIPrefixFoldAt(text, idx, "dsml") {
|
||
next := idx + len("dsml")
|
||
if next < len(text) && (text[next] == '-' || text[next] == '_') {
|
||
next++
|
||
}
|
||
return next, true
|
||
}
|
||
return idx, false
|
||
}
|
||
|
||
// hasDSMLPrefix checks if "dsml" starts with text[start:] (case-insensitive).
|
||
func hasDSMLPrefix(text string, start int) bool {
|
||
const dsml = "dsml"
|
||
remain := len(text) - start
|
||
if remain <= 0 || remain > len(dsml) {
|
||
return false
|
||
}
|
||
for j := 0; j < remain; j++ {
|
||
if asciiLower(text[start+j]) != dsml[j] {
|
||
return false
|
||
}
|
||
}
|
||
return true
|
||
}
|
||
|
||
func hasToolMarkupNamePrefix(text string, start int) bool {
|
||
for _, name := range toolMarkupNames {
|
||
if hasASCIIPrefixFoldAt(text, start, name.raw) {
|
||
return true
|
||
}
|
||
tailLen := len(text) - start
|
||
if tailLen > 0 && tailLen <= len(name.raw) {
|
||
match := true
|
||
for j := 0; j < tailLen; j++ {
|
||
if asciiLower(text[start+j]) != asciiLower(name.raw[j]) {
|
||
match = false
|
||
break
|
||
}
|
||
}
|
||
if match {
|
||
return true
|
||
}
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
func matchToolMarkupName(text string, start int, dsmlLike bool) (string, int) {
|
||
for _, name := range toolMarkupNames {
|
||
if name.dsmlOnly && !dsmlLike {
|
||
continue
|
||
}
|
||
if hasASCIIPrefixFoldAt(text, start, name.raw) {
|
||
return name.canonical, len(name.raw)
|
||
}
|
||
}
|
||
return "", 0
|
||
}
|
||
|
||
func consumeToolMarkupPipe(text string, idx int) (int, bool) {
|
||
if idx >= len(text) {
|
||
return idx, false
|
||
}
|
||
if text[idx] == '|' {
|
||
return idx + 1, true
|
||
}
|
||
if strings.HasPrefix(text[idx:], "|") {
|
||
return idx + len("|"), true
|
||
}
|
||
return idx, false
|
||
}
|
||
|
||
func hasToolMarkupBoundary(text string, idx int) bool {
|
||
if idx >= len(text) {
|
||
return true
|
||
}
|
||
switch text[idx] {
|
||
case ' ', '\t', '\n', '\r', '>', '/':
|
||
return true
|
||
default:
|
||
return false
|
||
}
|
||
}
|