mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-17 06:35:14 +08:00
fix(toolcall): eliminate strings.ToLower panics from Unicode case folding
Replace all strings.ToLower usage with ASCII case-insensitive matching (hasASCIIPrefixFoldAt, indexASCIIFold, hasDSMLPrefix) to prevent slice bounds errors when Unicode characters change byte length after case folding (e.g., Turkish İ U+0130 → i + combining dot: 2 bytes → 3 bytes). Root cause: code created a strings.ToLower(text) copy, found byte positions in that copy, then used those positions to slice the original text — byte offsets that were valid in the lowercased copy became out-of-bounds in the original when case folding changed byte lengths. Files changed: - toolcalls_scan.go: remove 5 lower usages, add hasDSMLPrefix - toolcalls_parse_markup.go: remove 3 lower usages, add indexASCIIFold - toolcalls_markup.go: SanitizeLooseCDATA lower removal - toolcalls_parse.go: updateCDATAStateForStrip lower removal - tool_prompt.go: align DSML pipe characters with tool call spec - tool_prompt_test.go: fix pre-existing test character mismatch
This commit is contained in:
@@ -212,17 +212,16 @@ func firstFenceMarkerIndex(line string) int {
|
||||
}
|
||||
|
||||
func updateCDATAStateForStrip(inCDATA bool, cdataFenceMarker, line string) (bool, string) {
|
||||
lower := strings.ToLower(line)
|
||||
pos := 0
|
||||
state := inCDATA
|
||||
fenceMarker := cdataFenceMarker
|
||||
lineForFence := line
|
||||
if !state {
|
||||
start := strings.Index(lower[pos:], "<![cdata[")
|
||||
start := indexASCIIFold(line, pos, "<![cdata[")
|
||||
if start < 0 {
|
||||
return false, ""
|
||||
}
|
||||
pos += start + len("<![cdata[")
|
||||
pos = start + len("<![cdata[")
|
||||
state = true
|
||||
lineForFence = line[pos:]
|
||||
}
|
||||
@@ -239,24 +238,23 @@ func updateCDATAStateForStrip(inCDATA bool, cdataFenceMarker, line string) (bool
|
||||
fenceMarker = ""
|
||||
}
|
||||
|
||||
for pos < len(lower) {
|
||||
end := strings.Index(lower[pos:], "]]>")
|
||||
if end < 0 {
|
||||
for pos < len(line) {
|
||||
endPos := indexASCIIFold(line, pos, "]]>")
|
||||
if endPos < 0 {
|
||||
return true, fenceMarker
|
||||
}
|
||||
endPos := pos + end
|
||||
pos = endPos + len("]]>")
|
||||
if fenceMarker != "" {
|
||||
continue
|
||||
}
|
||||
if cdataEndLooksStructural(lower, pos) || strings.TrimSpace(lower[pos:]) == "" {
|
||||
if cdataEndLooksStructural(line, pos) || strings.TrimSpace(line[pos:]) == "" {
|
||||
state = false
|
||||
for pos < len(lower) {
|
||||
start := strings.Index(lower[pos:], "<![cdata[")
|
||||
for pos < len(line) {
|
||||
start := indexASCIIFold(line, pos, "<![cdata[")
|
||||
if start < 0 {
|
||||
return false, ""
|
||||
}
|
||||
pos += start + len("<![cdata[")
|
||||
pos = start + len("<![cdata[")
|
||||
state = true
|
||||
trimmedTail := strings.TrimLeft(line[pos:], " \t")
|
||||
if marker, ok := parseFenceOpen(trimmedTail); ok {
|
||||
|
||||
Reference in New Issue
Block a user