Merge pull request #460 from waiwaic/main

fix(toolcall): eliminate strings.ToLower panics from Unicode case folding
This commit is contained in:
CJACK.
2026-05-09 16:56:46 +08:00
committed by GitHub
8 changed files with 97 additions and 61 deletions

View File

@@ -292,7 +292,7 @@ func TestBuildClaudeToolPromptSingleTool(t *testing.T) {
if !containsStr(prompt, "Search the web") {
t.Fatalf("expected description in prompt")
}
if !containsStr(prompt, "<|DSML|tool_calls>") {
if !containsStr(prompt, "<DSMLtool_calls>") {
t.Fatalf("expected DSML tool_calls format in prompt")
}
if !containsStr(prompt, "TOOL CALL FORMAT") {

View File

@@ -74,7 +74,7 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t *
}
finalPrompt, _ := buildOpenAIFinalPrompt(messages, tools, "", false)
if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.") {
if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <DSMLtool_calls>...</DSMLtool_calls> block at the end of your response.") {
t.Fatalf("vercel prepare finalPrompt missing final tool-call anchor instruction: %q", finalPrompt)
}
if !strings.Contains(finalPrompt, "TOOL CALL FORMAT") {

View File

@@ -11,46 +11,45 @@ import "strings"
func BuildToolCallInstructions(toolNames []string) string {
return `TOOL CALL FORMAT — FOLLOW EXACTLY:
<|DSML|tool_calls>
<|DSML|invoke name="TOOL_NAME_HERE">
<|DSML|parameter name="PARAMETER_NAME"><![CDATA[PARAMETER_VALUE]]></|DSML|parameter>
</|DSML|invoke>
</|DSML|tool_calls>
<DSMLtool_calls>
<DSMLinvoke name="TOOL_NAME_HERE">
<DSMLparameter name="PARAMETER_NAME"><![CDATA[PARAMETER_VALUE]]></DSMLparameter>
</DSMLinvoke>
</DSMLtool_calls>
RULES:
1) Use the <|DSML|tool_calls> wrapper format.
2) Put one or more <|DSML|invoke> entries under a single <|DSML|tool_calls> root.
3) Put the tool name in the invoke name attribute: <|DSML|invoke name="TOOL_NAME">.
1) Use the <DSMLtool_calls> wrapper format.
2) Put one or more <DSMLinvoke> entries under a single <DSMLtool_calls> root.
3) Put the tool name in the invoke name attribute: <DSMLinvoke name="TOOL_NAME">.
4) All string values must use <![CDATA[...]]>, even short ones. This includes code, scripts, file contents, prompts, paths, names, and queries.
5) Every top-level argument must be a <|DSML|parameter name="ARG_NAME">...</|DSML|parameter> node.
5) Every top-level argument must be a <DSMLparameter name="ARG_NAME">...</DSMLparameter> node.
6) Objects use nested XML elements inside the parameter body. Arrays may repeat <item> children.
7) Numbers, booleans, and null stay plain text.
8) Use only the parameter names in the tool schema. Do not invent fields.
9) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.
10) If you call a tool, the first non-whitespace characters of that tool block must be exactly <|DSML|tool_calls>.
11) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with </|DSML|tool_calls>.
10) If you call a tool, the first non-whitespace characters of that tool block must be exactly <DSMLtool_calls>.
11) Never omit the opening <DSMLtool_calls> tag, even if you already plan to close with </DSMLtool_calls>.
12) Compatibility note: the runtime also accepts the legacy XML tags <tool_calls> / <invoke> / <parameter>, but prefer the DSML-prefixed form above.
PARAMETER SHAPES:
- string => <|DSML|parameter name="x"><![CDATA[value]]></|DSML|parameter>
- object => <|DSML|parameter name="x"><field>...</field></|DSML|parameter>
- array => <|DSML|parameter name="x"><item>...</item><item>...</item></|DSML|parameter>
- number/bool/null => <|DSML|parameter name="x">plain_text</|DSML|parameter>
- string => <DSMLparameter name="x"><![CDATA[value]]></DSMLparameter>
- object => <DSMLparameter name="x"><field>...</field></DSMLparameter>
- array => <DSMLparameter name="x"><item>...</item><item>...</item></DSMLparameter>
- number/bool/null => <DSMLparameter name="x">plain_text</DSMLparameter>
【WRONG — Do NOT do these】:
Wrong 1 — mixed text after XML:
<|DSML|tool_calls>...</|DSML|tool_calls> I hope this helps.
<DSMLtool_calls>...</DSMLtool_calls> I hope this helps.
Wrong 2 — Markdown code fences:
` + "```xml" + `
<|DSML|tool_calls>...</|DSML|tool_calls>
<DSMLtool_calls>...</DSMLtool_calls>
` + "```" + `
Wrong 3 — missing opening wrapper:
<|DSML|invoke name="TOOL_NAME">...</|DSML|invoke>
</|DSML|tool_calls>
Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.
<DSMLinvoke name="TOOL_NAME">...</DSMLinvoke>
</DSMLtool_calls>
Remember: The ONLY valid way to use tools is the <DSMLtool_calls>...</DSMLtool_calls> block at the end of your response.
` + buildCorrectToolExamples(toolNames)
}

View File

@@ -111,7 +111,7 @@ func TestBuildToolCallInstructions_WriteUsesFilePathAndContent(t *testing.T) {
func TestBuildToolCallInstructions_AnchorsMissingOpeningWrapperFailureMode(t *testing.T) {
out := BuildToolCallInstructions([]string{"read_file"})
if !strings.Contains(out, "Never omit the opening <|DSML|tool_calls> tag") {
if !strings.Contains(out, "Never omit the opening <DSMLtool_calls> tag") {
t.Fatalf("expected explicit missing-opening-tag warning, got: %s", out)
}
if !strings.Contains(out, "Wrong 3 — missing opening wrapper") {

View File

@@ -145,7 +145,6 @@ func SanitizeLooseCDATA(text string) string {
return ""
}
lower := strings.ToLower(text)
const openMarker = "<![cdata["
const closeMarker = "]]>"
@@ -154,17 +153,16 @@ func SanitizeLooseCDATA(text string) string {
changed := false
pos := 0
for pos < len(text) {
startRel := strings.Index(lower[pos:], openMarker)
if startRel < 0 {
start := indexASCIIFold(text, pos, openMarker)
if start < 0 {
b.WriteString(text[pos:])
break
}
start := pos + startRel
contentStart := start + len(openMarker)
b.WriteString(text[pos:start])
if endRel := strings.Index(lower[contentStart:], closeMarker); endRel >= 0 {
end := contentStart + endRel + len(closeMarker)
if endRel := indexASCIIFold(text, contentStart, closeMarker); endRel >= 0 {
end := endRel + len(closeMarker)
b.WriteString(text[start:end])
pos = end
continue

View File

@@ -212,17 +212,16 @@ func firstFenceMarkerIndex(line string) int {
}
func updateCDATAStateForStrip(inCDATA bool, cdataFenceMarker, line string) (bool, string) {
lower := strings.ToLower(line)
pos := 0
state := inCDATA
fenceMarker := cdataFenceMarker
lineForFence := line
if !state {
start := strings.Index(lower[pos:], "<![cdata[")
start := indexASCIIFold(line, pos, "<![cdata[")
if start < 0 {
return false, ""
}
pos += start + len("<![cdata[")
pos = start + len("<![cdata[")
state = true
lineForFence = line[pos:]
}
@@ -239,24 +238,23 @@ func updateCDATAStateForStrip(inCDATA bool, cdataFenceMarker, line string) (bool
fenceMarker = ""
}
for pos < len(lower) {
end := strings.Index(lower[pos:], "]]>")
if end < 0 {
for pos < len(line) {
endPos := indexASCIIFold(line, pos, "]]>")
if endPos < 0 {
return true, fenceMarker
}
endPos := pos + end
pos = endPos + len("]]>")
if fenceMarker != "" {
continue
}
if cdataEndLooksStructural(lower, pos) || strings.TrimSpace(lower[pos:]) == "" {
if cdataEndLooksStructural(line, pos) || strings.TrimSpace(line[pos:]) == "" {
state = false
for pos < len(lower) {
start := strings.Index(lower[pos:], "<![cdata[")
for pos < len(line) {
start := indexASCIIFold(line, pos, "<![cdata[")
if start < 0 {
return false, ""
}
pos += start + len("<![cdata[")
pos = start + len("<![cdata[")
state = true
trimmedTail := strings.TrimLeft(line[pos:], " \t")
if marker, ok := parseFenceOpen(trimmedTail); ok {

View File

@@ -141,7 +141,6 @@ func findXMLElementBlocks(text, tag string) []xmlElementBlock {
}
func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart int, attrs string, ok bool) {
lower := strings.ToLower(text)
target := "<" + strings.ToLower(tag)
for i := maxInt(from, 0); i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(text, i)
@@ -152,7 +151,7 @@ func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart i
i = next
continue
}
if strings.HasPrefix(lower[i:], target) && hasXMLTagBoundary(text, i+len(target)) {
if hasASCIIPrefixFoldAt(text, i, target) && hasXMLTagBoundary(text, i+len(target)) {
end := findXMLTagEnd(text, i+len(target))
if end < 0 {
return -1, -1, "", false
@@ -165,7 +164,6 @@ func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart i
}
func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart, closeEnd int, ok bool) {
lower := strings.ToLower(text)
openTarget := "<" + strings.ToLower(tag)
closeTarget := "</" + strings.ToLower(tag)
depth := 1
@@ -178,7 +176,7 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
i = next
continue
}
if strings.HasPrefix(lower[i:], closeTarget) && hasXMLTagBoundary(text, i+len(closeTarget)) {
if hasASCIIPrefixFoldAt(text, i, closeTarget) && hasXMLTagBoundary(text, i+len(closeTarget)) {
end := findXMLTagEnd(text, i+len(closeTarget))
if end < 0 {
return -1, -1, false
@@ -190,7 +188,7 @@ func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart,
i = end + 1
continue
}
if strings.HasPrefix(lower[i:], openTarget) && hasXMLTagBoundary(text, i+len(openTarget)) {
if hasASCIIPrefixFoldAt(text, i, openTarget) && hasXMLTagBoundary(text, i+len(openTarget)) {
end := findXMLTagEnd(text, i+len(openTarget))
if end < 0 {
return -1, -1, false
@@ -247,6 +245,23 @@ func asciiLower(b byte) byte {
return b
}
// indexASCIIFold returns the absolute byte position in s where substr (ASCII-only) is
// found case-insensitively, scanning forward from start. Returns -1 if not found.
// Unlike strings.Index on a lowercased copy, this does not allocate or risk byte-length
// mismatch when non-ASCII runes change width under case folding.
func indexASCIIFold(s string, start int, substr string) int {
if start < 0 || len(s)-start < len(substr) {
return -1
}
end := len(s) - len(substr) + 1
for i := start; i < end; i++ {
if hasASCIIPrefixFoldAt(s, i, substr) {
return i
}
}
return -1
}
func findToolCDATAEnd(text string, from int) int {
if from < 0 || from >= len(text) {
return -1

View File

@@ -134,7 +134,6 @@ func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) {
if start < 0 || start >= len(text) || text[start] != '<' {
return ToolMarkupTag{}, false
}
lower := strings.ToLower(text)
i := start + 1
for i < len(text) && text[i] == '<' {
i++
@@ -144,8 +143,8 @@ func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) {
closing = true
i++
}
i, dsmlLike := consumeToolMarkupNamePrefix(lower, text, i)
name, nameLen := matchToolMarkupName(lower, i, dsmlLike)
i, dsmlLike := consumeToolMarkupNamePrefix(text, i)
name, nameLen := matchToolMarkupName(text, i, dsmlLike)
if nameLen == 0 {
return ToolMarkupTag{}, false
}
@@ -188,7 +187,6 @@ func IsPartialToolMarkupTagPrefix(text string) bool {
if text == "" || text[0] != '<' || strings.Contains(text, ">") {
return false
}
lower := strings.ToLower(text)
i := 1
for i < len(text) && text[i] == '<' {
i++
@@ -203,13 +201,13 @@ func IsPartialToolMarkupTagPrefix(text string) bool {
if i == len(text) {
return true
}
if hasToolMarkupNamePrefix(lower[i:]) {
if hasToolMarkupNamePrefix(text, i) {
return true
}
if strings.HasPrefix("dsml", lower[i:]) {
if hasDSMLPrefix(text, i) {
return true
}
next, ok := consumeToolMarkupNamePrefixOnce(lower, text, i)
next, ok := consumeToolMarkupNamePrefixOnce(text, i)
if !ok {
return false
}
@@ -218,10 +216,10 @@ func IsPartialToolMarkupTagPrefix(text string) bool {
return false
}
func consumeToolMarkupNamePrefix(lower, text string, idx int) (int, bool) {
func consumeToolMarkupNamePrefix(text string, idx int) (int, bool) {
dsmlLike := false
for {
next, ok := consumeToolMarkupNamePrefixOnce(lower, text, idx)
next, ok := consumeToolMarkupNamePrefixOnce(text, idx)
if !ok {
return idx, dsmlLike
}
@@ -230,14 +228,14 @@ func consumeToolMarkupNamePrefix(lower, text string, idx int) (int, bool) {
}
}
func consumeToolMarkupNamePrefixOnce(lower, text string, idx int) (int, bool) {
func consumeToolMarkupNamePrefixOnce(text string, idx int) (int, bool) {
if next, ok := consumeToolMarkupPipe(text, idx); ok {
return next, true
}
if idx < len(text) && (text[idx] == ' ' || text[idx] == '\t' || text[idx] == '\r' || text[idx] == '\n') {
return idx + 1, true
}
if strings.HasPrefix(lower[idx:], "dsml") {
if hasASCIIPrefixFoldAt(text, idx, "dsml") {
next := idx + len("dsml")
if next < len(text) && (text[next] == '-' || text[next] == '_') {
next++
@@ -247,21 +245,49 @@ func consumeToolMarkupNamePrefixOnce(lower, text string, idx int) (int, bool) {
return idx, false
}
func hasToolMarkupNamePrefix(lowerTail string) bool {
// hasDSMLPrefix checks if "dsml" starts with text[start:] (case-insensitive).
func hasDSMLPrefix(text string, start int) bool {
const dsml = "dsml"
remain := len(text) - start
if remain <= 0 || remain > len(dsml) {
return false
}
for j := 0; j < remain; j++ {
if asciiLower(text[start+j]) != dsml[j] {
return false
}
}
return true
}
func hasToolMarkupNamePrefix(text string, start int) bool {
for _, name := range toolMarkupNames {
if strings.HasPrefix(lowerTail, name.raw) || strings.HasPrefix(name.raw, lowerTail) {
if hasASCIIPrefixFoldAt(text, start, name.raw) {
return true
}
tailLen := len(text) - start
if tailLen > 0 && tailLen <= len(name.raw) {
match := true
for j := 0; j < tailLen; j++ {
if asciiLower(text[start+j]) != asciiLower(name.raw[j]) {
match = false
break
}
}
if match {
return true
}
}
}
return false
}
func matchToolMarkupName(lower string, start int, dsmlLike bool) (string, int) {
func matchToolMarkupName(text string, start int, dsmlLike bool) (string, int) {
for _, name := range toolMarkupNames {
if name.dsmlOnly && !dsmlLike {
continue
}
if strings.HasPrefix(lower[start:], name.raw) {
if hasASCIIPrefixFoldAt(text, start, name.raw) {
return name.canonical, len(name.raw)
}
}