mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-22 00:47:45 +08:00
fix(toolcall): eliminate strings.ToLower panics from Unicode case folding
Replace all strings.ToLower usage with ASCII case-insensitive matching (hasASCIIPrefixFoldAt, indexASCIIFold, hasDSMLPrefix) to prevent slice bounds errors when Unicode characters change byte length after case folding (e.g., Turkish İ U+0130 → i + combining dot: 2 bytes → 3 bytes). Root cause: code created a strings.ToLower(text) copy, found byte positions in that copy, then used those positions to slice the original text — byte offsets that were valid in the lowercased copy became out-of-bounds in the original when case folding changed byte lengths. Files changed: - toolcalls_scan.go: remove 5 lower usages, add hasDSMLPrefix - toolcalls_parse_markup.go: remove 3 lower usages, add indexASCIIFold - toolcalls_markup.go: SanitizeLooseCDATA lower removal - toolcalls_parse.go: updateCDATAStateForStrip lower removal - tool_prompt.go: align DSML pipe characters with tool call spec - tool_prompt_test.go: fix pre-existing test character mismatch
This commit is contained in:
@@ -141,7 +141,6 @@ func findXMLElementBlocks(text, tag string) []xmlElementBlock {
|
||||
}
|
||||
|
||||
func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart int, attrs string, ok bool) {
|
||||
lower := strings.ToLower(text)
|
||||
target := "<" + strings.ToLower(tag)
|
||||
for i := maxInt(from, 0); i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
@@ -152,7 +151,7 @@ func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart i
|
||||
i = next
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(lower[i:], target) && hasXMLTagBoundary(text, i+len(target)) {
|
||||
if hasASCIIPrefixFoldAt(text, i, target) && hasXMLTagBoundary(text, i+len(target)) {
|
||||
end := findXMLTagEnd(text, i+len(target))
|
||||
if end < 0 {
|
||||
return -1, -1, "", false
|
||||
@@ -165,7 +164,6 @@ func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart i
|
||||
}
|
||||
|
||||
func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart, closeEnd int, ok bool) {
|
||||
lower := strings.ToLower(text)
|
||||
openTarget := "<" + strings.ToLower(tag)
|
||||
closeTarget := "</" + strings.ToLower(tag)
|
||||
depth := 1
|
||||
@@ -178,7 +176,7 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
|
||||
i = next
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(lower[i:], closeTarget) && hasXMLTagBoundary(text, i+len(closeTarget)) {
|
||||
if hasASCIIPrefixFoldAt(text, i, closeTarget) && hasXMLTagBoundary(text, i+len(closeTarget)) {
|
||||
end := findXMLTagEnd(text, i+len(closeTarget))
|
||||
if end < 0 {
|
||||
return -1, -1, false
|
||||
@@ -190,7 +188,7 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
|
||||
i = end + 1
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(lower[i:], openTarget) && hasXMLTagBoundary(text, i+len(openTarget)) {
|
||||
if hasASCIIPrefixFoldAt(text, i, openTarget) && hasXMLTagBoundary(text, i+len(openTarget)) {
|
||||
end := findXMLTagEnd(text, i+len(openTarget))
|
||||
if end < 0 {
|
||||
return -1, -1, false
|
||||
@@ -247,6 +245,23 @@ func asciiLower(b byte) byte {
|
||||
return b
|
||||
}
|
||||
|
||||
// indexASCIIFold returns the absolute byte position in s where substr (ASCII-only) is
|
||||
// found case-insensitively, scanning forward from start. Returns -1 if not found.
|
||||
// Unlike strings.Index on a lowercased copy, this does not allocate or risk byte-length
|
||||
// mismatch when non-ASCII runes change width under case folding.
|
||||
func indexASCIIFold(s string, start int, substr string) int {
|
||||
if start < 0 || len(s)-start < len(substr) {
|
||||
return -1
|
||||
}
|
||||
end := len(s) - len(substr) + 1
|
||||
for i := start; i < end; i++ {
|
||||
if hasASCIIPrefixFoldAt(s, i, substr) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func findToolCDATAEnd(text string, from int) int {
|
||||
if from < 0 || from >= len(text) {
|
||||
return -1
|
||||
|
||||
Reference in New Issue
Block a user