fix(toolcall): eliminate strings.ToLower panics from Unicode case folding

Replace all strings.ToLower usage with ASCII case-insensitive matching
(hasASCIIPrefixFoldAt, indexASCIIFold, hasDSMLPrefix) to prevent slice
bounds errors when Unicode characters change byte length after case
folding (e.g., Turkish İ U+0130 → i + combining dot: 2 bytes → 3 bytes).

Root cause: code created a strings.ToLower(text) copy, found byte
positions in that copy, then used those positions to slice the
original text — byte offsets that were valid in the lowercased copy
became out-of-bounds in the original when case folding changed byte
lengths.

Files changed:
- toolcalls_scan.go: remove 5 lower usages, add hasDSMLPrefix
- toolcalls_parse_markup.go: remove 3 lower usages, add indexASCIIFold
- toolcalls_markup.go: SanitizeLooseCDATA lower removal
- toolcalls_parse.go: updateCDATAStateForStrip lower removal
- tool_prompt.go: align DSML pipe characters with tool call spec
- tool_prompt_test.go: fix pre-existing test character mismatch
This commit is contained in:
waiwai
2026-05-09 15:05:51 +08:00
parent 7ab5a0e66d
commit 1e00e482a6
8 changed files with 97 additions and 61 deletions

View File

@@ -212,17 +212,16 @@ func firstFenceMarkerIndex(line string) int {
}
func updateCDATAStateForStrip(inCDATA bool, cdataFenceMarker, line string) (bool, string) {
lower := strings.ToLower(line)
pos := 0
state := inCDATA
fenceMarker := cdataFenceMarker
lineForFence := line
if !state {
start := strings.Index(lower[pos:], "<![cdata[")
start := indexASCIIFold(line, pos, "<![cdata[")
if start < 0 {
return false, ""
}
pos += start + len("<![cdata[")
pos = start + len("<![cdata[")
state = true
lineForFence = line[pos:]
}
@@ -239,24 +238,23 @@ func updateCDATAStateForStrip(inCDATA bool, cdataFenceMarker, line string) (bool
fenceMarker = ""
}
for pos < len(lower) {
end := strings.Index(lower[pos:], "]]>")
if end < 0 {
for pos < len(line) {
endPos := indexASCIIFold(line, pos, "]]>")
if endPos < 0 {
return true, fenceMarker
}
endPos := pos + end
pos = endPos + len("]]>")
if fenceMarker != "" {
continue
}
if cdataEndLooksStructural(lower, pos) || strings.TrimSpace(lower[pos:]) == "" {
if cdataEndLooksStructural(line, pos) || strings.TrimSpace(line[pos:]) == "" {
state = false
for pos < len(lower) {
start := strings.Index(lower[pos:], "<![cdata[")
for pos < len(line) {
start := indexASCIIFold(line, pos, "<![cdata[")
if start < 0 {
return false, ""
}
pos += start + len("<![cdata[")
pos = start + len("<![cdata[")
state = true
trimmedTail := strings.TrimLeft(line[pos:], " \t")
if marker, ok := parseFenceOpen(trimmedTail); ok {