fix(toolcall): eliminate strings.ToLower panics from Unicode case folding

Replace all strings.ToLower usage with ASCII case-insensitive matching (hasASCIIPrefixFoldAt, indexASCIIFold, hasDSMLPrefix) to prevent slice bounds errors when Unicode characters change byte length after case folding (e.g., Turkish İ U+0130 → i + combining dot: 2 bytes → 3 bytes). Root cause: code created a strings.ToLower(text) copy, found byte positions in that copy, then used those positions to slice the original text — byte offsets that were valid in the lowercased copy became out-of-bounds in the original when case folding changed byte lengths. Files changed: - toolcalls_scan.go: remove 5 lower usages, add hasDSMLPrefix - toolcalls_parse_markup.go: remove 3 lower usages, add indexASCIIFold - toolcalls_markup.go: SanitizeLooseCDATA lower removal - toolcalls_parse.go: updateCDATAStateForStrip lower removal - tool_prompt.go: align DSML pipe characters with tool call spec - tool_prompt_test.go: fix pre-existing test character mismatch
2026-05-17 06:35:14 +08:00 · 2026-05-09 15:05:51 +08:00
parent 7ab5a0e66d
commit 1e00e482a6
8 changed files with 97 additions and 61 deletions
--- a/internal/toolcall/toolcalls_parse.go
+++ b/internal/toolcall/toolcalls_parse.go
@@ -212,17 +212,16 @@ func firstFenceMarkerIndex(line string) int {
 }

 func updateCDATAStateForStrip(inCDATA bool, cdataFenceMarker, line string) (bool, string) {
-	lower := strings.ToLower(line)
 	pos := 0
 	state := inCDATA
 	fenceMarker := cdataFenceMarker
 	lineForFence := line
 	if !state {
-		start := strings.Index(lower[pos:], "<![cdata[")
+		start := indexASCIIFold(line, pos, "<![cdata[")
 		if start < 0 {
 			return false, ""
 		}
-		pos += start + len("<![cdata[")
+		pos = start + len("<![cdata[")
 		state = true
 		lineForFence = line[pos:]
 	}
@@ -239,24 +238,23 @@ func updateCDATAStateForStrip(inCDATA bool, cdataFenceMarker, line string) (bool
 		fenceMarker = ""
 	}

-	for pos < len(lower) {
-		end := strings.Index(lower[pos:], "]]>")
-		if end < 0 {
+	for pos < len(line) {
+		endPos := indexASCIIFold(line, pos, "]]>")
+		if endPos < 0 {
 			return true, fenceMarker
 		}
-		endPos := pos + end
 		pos = endPos + len("]]>")
 		if fenceMarker != "" {
 			continue
 		}
-		if cdataEndLooksStructural(lower, pos) || strings.TrimSpace(lower[pos:]) == "" {
+		if cdataEndLooksStructural(line, pos) || strings.TrimSpace(line[pos:]) == "" {
 			state = false
-			for pos < len(lower) {
-				start := strings.Index(lower[pos:], "<![cdata[")
+			for pos < len(line) {
+				start := indexASCIIFold(line, pos, "<![cdata[")
 				if start < 0 {
 					return false, ""
 				}
-				pos += start + len("<![cdata[")
+				pos = start + len("<![cdata[")
 				state = true
 				trimmedTail := strings.TrimLeft(line[pos:], " \t")
 				if marker, ok := parseFenceOpen(trimmedTail); ok {