From 1e00e482a618930c00a86a538fa79e8062776c31 Mon Sep 17 00:00:00 2001 From: waiwai <511158080@qq.com> Date: Sat, 9 May 2026 15:05:51 +0800 Subject: [PATCH 1/2] fix(toolcall): eliminate strings.ToLower panics from Unicode case folding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace all strings.ToLower usage with ASCII case-insensitive matching (hasASCIIPrefixFoldAt, indexASCIIFold, hasDSMLPrefix) to prevent slice bounds errors when Unicode characters change byte length after case folding (e.g., Turkish İ U+0130 → i + combining dot: 2 bytes → 3 bytes). Root cause: code created a strings.ToLower(text) copy, found byte positions in that copy, then used those positions to slice the original text — byte offsets that were valid in the lowercased copy became out-of-bounds in the original when case folding changed byte lengths. Files changed: - toolcalls_scan.go: remove 5 lower usages, add hasDSMLPrefix - toolcalls_parse_markup.go: remove 3 lower usages, add indexASCIIFold - toolcalls_markup.go: SanitizeLooseCDATA lower removal - toolcalls_parse.go: updateCDATAStateForStrip lower removal - tool_prompt.go: align DSML pipe characters with tool call spec - tool_prompt_test.go: fix pre-existing test character mismatch --- internal/httpapi/claude/handler_util_test.go | 2 +- internal/promptcompat/prompt_build_test.go | 2 +- internal/toolcall/tool_prompt.go | 41 +++++++------- internal/toolcall/tool_prompt_test.go | 2 +- internal/toolcall/toolcalls_markup.go | 10 ++-- internal/toolcall/toolcalls_parse.go | 20 ++++--- internal/toolcall/toolcalls_parse_markup.go | 25 +++++++-- internal/toolcall/toolcalls_scan.go | 56 ++++++++++++++------ 8 files changed, 97 insertions(+), 61 deletions(-) diff --git a/internal/httpapi/claude/handler_util_test.go b/internal/httpapi/claude/handler_util_test.go index 7b83c88..d69dc25 100644 --- a/internal/httpapi/claude/handler_util_test.go +++ b/internal/httpapi/claude/handler_util_test.go @@ -292,7 +292,7 @@ func TestBuildClaudeToolPromptSingleTool(t *testing.T) { if !containsStr(prompt, "Search the web") { t.Fatalf("expected description in prompt") } - if !containsStr(prompt, "<|DSML|tool_calls>") { + if !containsStr(prompt, "<|DSML|tool_calls>") { t.Fatalf("expected DSML tool_calls format in prompt") } if !containsStr(prompt, "TOOL CALL FORMAT") { diff --git a/internal/promptcompat/prompt_build_test.go b/internal/promptcompat/prompt_build_test.go index dd80b6d..23ec237 100644 --- a/internal/promptcompat/prompt_build_test.go +++ b/internal/promptcompat/prompt_build_test.go @@ -74,7 +74,7 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t * } finalPrompt, _ := buildOpenAIFinalPrompt(messages, tools, "", false) - if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>... block at the end of your response.") { + if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...<|/DSML|tool_calls> block at the end of your response.") { t.Fatalf("vercel prepare finalPrompt missing final tool-call anchor instruction: %q", finalPrompt) } if !strings.Contains(finalPrompt, "TOOL CALL FORMAT") { diff --git a/internal/toolcall/tool_prompt.go b/internal/toolcall/tool_prompt.go index 6844eb4..8be9ee3 100644 --- a/internal/toolcall/tool_prompt.go +++ b/internal/toolcall/tool_prompt.go @@ -11,46 +11,45 @@ import "strings" func BuildToolCallInstructions(toolNames []string) string { return `TOOL CALL FORMAT — FOLLOW EXACTLY: -<|DSML|tool_calls> - <|DSML|invoke name="TOOL_NAME_HERE"> - <|DSML|parameter name="PARAMETER_NAME"> - - +<|DSML|tool_calls> + <|DSML|invoke name="TOOL_NAME_HERE"> + <|DSML|parameter name="PARAMETER_NAME"><|/DSML|parameter> + <|/DSML|invoke> +<|/DSML|tool_calls> RULES: -1) Use the <|DSML|tool_calls> wrapper format. -2) Put one or more <|DSML|invoke> entries under a single <|DSML|tool_calls> root. -3) Put the tool name in the invoke name attribute: <|DSML|invoke name="TOOL_NAME">. +1) Use the <|DSML|tool_calls> wrapper format. +2) Put one or more <|DSML|invoke> entries under a single <|DSML|tool_calls> root. +3) Put the tool name in the invoke name attribute: <|DSML|invoke name="TOOL_NAME">. 4) All string values must use , even short ones. This includes code, scripts, file contents, prompts, paths, names, and queries. -5) Every top-level argument must be a <|DSML|parameter name="ARG_NAME">... node. +5) Every top-level argument must be a <|DSML|parameter name="ARG_NAME">...<|/DSML|parameter> node. 6) Objects use nested XML elements inside the parameter body. Arrays may repeat children. 7) Numbers, booleans, and null stay plain text. 8) Use only the parameter names in the tool schema. Do not invent fields. 9) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue. -10) If you call a tool, the first non-whitespace characters of that tool block must be exactly <|DSML|tool_calls>. -11) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with . +10) If you call a tool, the first non-whitespace characters of that tool block must be exactly <|DSML|tool_calls>. +11) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with <|/DSML|tool_calls>. 12) Compatibility note: the runtime also accepts the legacy XML tags / / , but prefer the DSML-prefixed form above. PARAMETER SHAPES: -- string => <|DSML|parameter name="x"> -- object => <|DSML|parameter name="x">... -- array => <|DSML|parameter name="x">...... -- number/bool/null => <|DSML|parameter name="x">plain_text +- string => <|DSML|parameter name="x"><|/DSML|parameter> +- object => <|DSML|parameter name="x">...<|/DSML|parameter> +- array => <|DSML|parameter name="x">......<|/DSML|parameter> +- number/bool/null => <|DSML|parameter name="x">plain_text<|/DSML|parameter> 【WRONG — Do NOT do these】: Wrong 1 — mixed text after XML: - <|DSML|tool_calls>... I hope this helps. + <|DSML|tool_calls>...<|/DSML|tool_calls> I hope this helps. Wrong 2 — Markdown code fences: ` + "```xml" + ` - <|DSML|tool_calls>... + <|DSML|tool_calls>...<|/DSML|tool_calls> ` + "```" + ` Wrong 3 — missing opening wrapper: - <|DSML|invoke name="TOOL_NAME">... - - -Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>... block at the end of your response. + <|DSML|invoke name="TOOL_NAME">...<|/DSML|invoke> + <|/DSML|tool_calls> +Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...<|/DSML|tool_calls> block at the end of your response. ` + buildCorrectToolExamples(toolNames) } diff --git a/internal/toolcall/tool_prompt_test.go b/internal/toolcall/tool_prompt_test.go index f153e43..482b8bc 100644 --- a/internal/toolcall/tool_prompt_test.go +++ b/internal/toolcall/tool_prompt_test.go @@ -111,7 +111,7 @@ func TestBuildToolCallInstructions_WriteUsesFilePathAndContent(t *testing.T) { func TestBuildToolCallInstructions_AnchorsMissingOpeningWrapperFailureMode(t *testing.T) { out := BuildToolCallInstructions([]string{"read_file"}) - if !strings.Contains(out, "Never omit the opening <|DSML|tool_calls> tag") { + if !strings.Contains(out, "Never omit the opening <|DSML|tool_calls> tag") { t.Fatalf("expected explicit missing-opening-tag warning, got: %s", out) } if !strings.Contains(out, "Wrong 3 — missing opening wrapper") { diff --git a/internal/toolcall/toolcalls_markup.go b/internal/toolcall/toolcalls_markup.go index f9f2b4f..c52ed85 100644 --- a/internal/toolcall/toolcalls_markup.go +++ b/internal/toolcall/toolcalls_markup.go @@ -145,7 +145,6 @@ func SanitizeLooseCDATA(text string) string { return "" } - lower := strings.ToLower(text) const openMarker = "" @@ -154,17 +153,16 @@ func SanitizeLooseCDATA(text string) string { changed := false pos := 0 for pos < len(text) { - startRel := strings.Index(lower[pos:], openMarker) - if startRel < 0 { + start := indexASCIIFold(text, pos, openMarker) + if start < 0 { b.WriteString(text[pos:]) break } - start := pos + startRel contentStart := start + len(openMarker) b.WriteString(text[pos:start]) - if endRel := strings.Index(lower[contentStart:], closeMarker); endRel >= 0 { - end := contentStart + endRel + len(closeMarker) + if endRel := indexASCIIFold(text, contentStart, closeMarker); endRel >= 0 { + end := endRel + len(closeMarker) b.WriteString(text[start:end]) pos = end continue diff --git a/internal/toolcall/toolcalls_parse.go b/internal/toolcall/toolcalls_parse.go index 772b297..05b5a8b 100644 --- a/internal/toolcall/toolcalls_parse.go +++ b/internal/toolcall/toolcalls_parse.go @@ -212,17 +212,16 @@ func firstFenceMarkerIndex(line string) int { } func updateCDATAStateForStrip(inCDATA bool, cdataFenceMarker, line string) (bool, string) { - lower := strings.ToLower(line) pos := 0 state := inCDATA fenceMarker := cdataFenceMarker lineForFence := line if !state { - start := strings.Index(lower[pos:], "") - if end < 0 { + for pos < len(line) { + endPos := indexASCIIFold(line, pos, "]]>") + if endPos < 0 { return true, fenceMarker } - endPos := pos + end pos = endPos + len("]]>") if fenceMarker != "" { continue } - if cdataEndLooksStructural(lower, pos) || strings.TrimSpace(lower[pos:]) == "" { + if cdataEndLooksStructural(line, pos) || strings.TrimSpace(line[pos:]) == "" { state = false - for pos < len(lower) { - start := strings.Index(lower[pos:], "= len(text) { return -1 diff --git a/internal/toolcall/toolcalls_scan.go b/internal/toolcall/toolcalls_scan.go index c635b67..9368e86 100644 --- a/internal/toolcall/toolcalls_scan.go +++ b/internal/toolcall/toolcalls_scan.go @@ -134,7 +134,6 @@ func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) { if start < 0 || start >= len(text) || text[start] != '<' { return ToolMarkupTag{}, false } - lower := strings.ToLower(text) i := start + 1 for i < len(text) && text[i] == '<' { i++ @@ -144,8 +143,8 @@ func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) { closing = true i++ } - i, dsmlLike := consumeToolMarkupNamePrefix(lower, text, i) - name, nameLen := matchToolMarkupName(lower, i, dsmlLike) + i, dsmlLike := consumeToolMarkupNamePrefix(text, i) + name, nameLen := matchToolMarkupName(text, i, dsmlLike) if nameLen == 0 { return ToolMarkupTag{}, false } @@ -188,7 +187,6 @@ func IsPartialToolMarkupTagPrefix(text string) bool { if text == "" || text[0] != '<' || strings.Contains(text, ">") { return false } - lower := strings.ToLower(text) i := 1 for i < len(text) && text[i] == '<' { i++ @@ -203,13 +201,13 @@ func IsPartialToolMarkupTagPrefix(text string) bool { if i == len(text) { return true } - if hasToolMarkupNamePrefix(lower[i:]) { + if hasToolMarkupNamePrefix(text, i) { return true } - if strings.HasPrefix("dsml", lower[i:]) { + if hasDSMLPrefix(text, i) { return true } - next, ok := consumeToolMarkupNamePrefixOnce(lower, text, i) + next, ok := consumeToolMarkupNamePrefixOnce(text, i) if !ok { return false } @@ -218,10 +216,10 @@ func IsPartialToolMarkupTagPrefix(text string) bool { return false } -func consumeToolMarkupNamePrefix(lower, text string, idx int) (int, bool) { +func consumeToolMarkupNamePrefix(text string, idx int) (int, bool) { dsmlLike := false for { - next, ok := consumeToolMarkupNamePrefixOnce(lower, text, idx) + next, ok := consumeToolMarkupNamePrefixOnce(text, idx) if !ok { return idx, dsmlLike } @@ -230,14 +228,14 @@ func consumeToolMarkupNamePrefix(lower, text string, idx int) (int, bool) { } } -func consumeToolMarkupNamePrefixOnce(lower, text string, idx int) (int, bool) { +func consumeToolMarkupNamePrefixOnce(text string, idx int) (int, bool) { if next, ok := consumeToolMarkupPipe(text, idx); ok { return next, true } if idx < len(text) && (text[idx] == ' ' || text[idx] == '\t' || text[idx] == '\r' || text[idx] == '\n') { return idx + 1, true } - if strings.HasPrefix(lower[idx:], "dsml") { + if hasASCIIPrefixFoldAt(text, idx, "dsml") { next := idx + len("dsml") if next < len(text) && (text[next] == '-' || text[next] == '_') { next++ @@ -247,21 +245,49 @@ func consumeToolMarkupNamePrefixOnce(lower, text string, idx int) (int, bool) { return idx, false } -func hasToolMarkupNamePrefix(lowerTail string) bool { +// hasDSMLPrefix checks if "dsml" starts with text[start:] (case-insensitive). +func hasDSMLPrefix(text string, start int) bool { + const dsml = "dsml" + remain := len(text) - start + if remain <= 0 || remain > len(dsml) { + return false + } + for j := 0; j < remain; j++ { + if asciiLower(text[start+j]) != dsml[j] { + return false + } + } + return true +} + +func hasToolMarkupNamePrefix(text string, start int) bool { for _, name := range toolMarkupNames { - if strings.HasPrefix(lowerTail, name.raw) || strings.HasPrefix(name.raw, lowerTail) { + if hasASCIIPrefixFoldAt(text, start, name.raw) { return true } + tailLen := len(text) - start + if tailLen > 0 && tailLen <= len(name.raw) { + match := true + for j := 0; j < tailLen; j++ { + if asciiLower(text[start+j]) != asciiLower(name.raw[j]) { + match = false + break + } + } + if match { + return true + } + } } return false } -func matchToolMarkupName(lower string, start int, dsmlLike bool) (string, int) { +func matchToolMarkupName(text string, start int, dsmlLike bool) (string, int) { for _, name := range toolMarkupNames { if name.dsmlOnly && !dsmlLike { continue } - if strings.HasPrefix(lower[start:], name.raw) { + if hasASCIIPrefixFoldAt(text, start, name.raw) { return name.canonical, len(name.raw) } } From f33789399ee7e53c89446f5288491e3ced8c9ac4 Mon Sep 17 00:00:00 2001 From: waiwai <511158080@qq.com> Date: Sat, 9 May 2026 16:42:22 +0800 Subject: [PATCH 2/2] fix(toolcall): correct DSML closing tag slash position MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The closing tag format was <|/DSML|tag> but must be . The scanner's closing-tag detection checks text[1] == '/', so the slash must come immediately after '<', before the first full-width pipe (U+FF5C). Tags like <|/DSML|tool_calls> would not set closing=true and would not match any tool markup name. Files fixed: - internal/toolcall/tool_prompt.go: all closing tags - internal/promptcompat/prompt_build_test.go: 1 test expectation --- internal/promptcompat/prompt_build_test.go | 2 +- internal/toolcall/tool_prompt.go | 28 +++++++++++----------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/internal/promptcompat/prompt_build_test.go b/internal/promptcompat/prompt_build_test.go index 23ec237..0c9b87b 100644 --- a/internal/promptcompat/prompt_build_test.go +++ b/internal/promptcompat/prompt_build_test.go @@ -74,7 +74,7 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t * } finalPrompt, _ := buildOpenAIFinalPrompt(messages, tools, "", false) - if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...<|/DSML|tool_calls> block at the end of your response.") { + if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>... block at the end of your response.") { t.Fatalf("vercel prepare finalPrompt missing final tool-call anchor instruction: %q", finalPrompt) } if !strings.Contains(finalPrompt, "TOOL CALL FORMAT") { diff --git a/internal/toolcall/tool_prompt.go b/internal/toolcall/tool_prompt.go index 8be9ee3..a327261 100644 --- a/internal/toolcall/tool_prompt.go +++ b/internal/toolcall/tool_prompt.go @@ -13,43 +13,43 @@ func BuildToolCallInstructions(toolNames []string) string { <|DSML|tool_calls> <|DSML|invoke name="TOOL_NAME_HERE"> - <|DSML|parameter name="PARAMETER_NAME"><|/DSML|parameter> - <|/DSML|invoke> -<|/DSML|tool_calls> + <|DSML|parameter name="PARAMETER_NAME"> + + RULES: 1) Use the <|DSML|tool_calls> wrapper format. 2) Put one or more <|DSML|invoke> entries under a single <|DSML|tool_calls> root. 3) Put the tool name in the invoke name attribute: <|DSML|invoke name="TOOL_NAME">. 4) All string values must use , even short ones. This includes code, scripts, file contents, prompts, paths, names, and queries. -5) Every top-level argument must be a <|DSML|parameter name="ARG_NAME">...<|/DSML|parameter> node. +5) Every top-level argument must be a <|DSML|parameter name="ARG_NAME">... node. 6) Objects use nested XML elements inside the parameter body. Arrays may repeat children. 7) Numbers, booleans, and null stay plain text. 8) Use only the parameter names in the tool schema. Do not invent fields. 9) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue. 10) If you call a tool, the first non-whitespace characters of that tool block must be exactly <|DSML|tool_calls>. -11) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with <|/DSML|tool_calls>. +11) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with . 12) Compatibility note: the runtime also accepts the legacy XML tags / / , but prefer the DSML-prefixed form above. PARAMETER SHAPES: -- string => <|DSML|parameter name="x"><|/DSML|parameter> -- object => <|DSML|parameter name="x">...<|/DSML|parameter> -- array => <|DSML|parameter name="x">......<|/DSML|parameter> -- number/bool/null => <|DSML|parameter name="x">plain_text<|/DSML|parameter> +- string => <|DSML|parameter name="x"> +- object => <|DSML|parameter name="x">... +- array => <|DSML|parameter name="x">...... +- number/bool/null => <|DSML|parameter name="x">plain_text 【WRONG — Do NOT do these】: Wrong 1 — mixed text after XML: - <|DSML|tool_calls>...<|/DSML|tool_calls> I hope this helps. + <|DSML|tool_calls>... I hope this helps. Wrong 2 — Markdown code fences: ` + "```xml" + ` - <|DSML|tool_calls>...<|/DSML|tool_calls> + <|DSML|tool_calls>... ` + "```" + ` Wrong 3 — missing opening wrapper: - <|DSML|invoke name="TOOL_NAME">...<|/DSML|invoke> - <|/DSML|tool_calls> + <|DSML|invoke name="TOOL_NAME">... + -Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...<|/DSML|tool_calls> block at the end of your response. +Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>... block at the end of your response. ` + buildCorrectToolExamples(toolNames) }