Files
ds2api/internal/toolcall/tool_prompt.go
waiwai 1e00e482a6 fix(toolcall): eliminate strings.ToLower panics from Unicode case folding
Replace all strings.ToLower usage with ASCII case-insensitive matching
(hasASCIIPrefixFoldAt, indexASCIIFold, hasDSMLPrefix) to prevent slice
bounds errors when Unicode characters change byte length after case
folding (e.g., Turkish İ U+0130 → i + combining dot: 2 bytes → 3 bytes).

Root cause: code created a strings.ToLower(text) copy, found byte
positions in that copy, then used those positions to slice the
original text — byte offsets that were valid in the lowercased copy
became out-of-bounds in the original when case folding changed byte
lengths.

Files changed:
- toolcalls_scan.go: remove 5 lower usages, add hasDSMLPrefix
- toolcalls_parse_markup.go: remove 3 lower usages, add indexASCIIFold
- toolcalls_markup.go: SanitizeLooseCDATA lower removal
- toolcalls_parse.go: updateCDATAStateForStrip lower removal
- tool_prompt.go: align DSML pipe characters with tool call spec
- tool_prompt_test.go: fix pre-existing test character mismatch
2026-05-09 15:05:51 +08:00

249 lines
9.6 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package toolcall
import "strings"
// BuildToolCallInstructions generates the unified tool-calling instruction block
// used by all adapters (OpenAI, Claude, Gemini). It uses attention-optimized
// structure: rules → negative examples → positive examples → anchor.
//
// The toolNames slice should contain the actual tool names available in the
// current request; the function picks real names for examples.
func BuildToolCallInstructions(toolNames []string) string {
return `TOOL CALL FORMAT — FOLLOW EXACTLY:
<DSMLtool_calls>
<DSMLinvoke name="TOOL_NAME_HERE">
<DSMLparameter name="PARAMETER_NAME"><![CDATA[PARAMETER_VALUE]]></DSMLparameter>
</DSMLinvoke>
</DSMLtool_calls>
RULES:
1) Use the <DSMLtool_calls> wrapper format.
2) Put one or more <DSMLinvoke> entries under a single <DSMLtool_calls> root.
3) Put the tool name in the invoke name attribute: <DSMLinvoke name="TOOL_NAME">.
4) All string values must use <![CDATA[...]]>, even short ones. This includes code, scripts, file contents, prompts, paths, names, and queries.
5) Every top-level argument must be a <DSMLparameter name="ARG_NAME">...</DSMLparameter> node.
6) Objects use nested XML elements inside the parameter body. Arrays may repeat <item> children.
7) Numbers, booleans, and null stay plain text.
8) Use only the parameter names in the tool schema. Do not invent fields.
9) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.
10) If you call a tool, the first non-whitespace characters of that tool block must be exactly <DSMLtool_calls>.
11) Never omit the opening <DSMLtool_calls> tag, even if you already plan to close with </DSMLtool_calls>.
12) Compatibility note: the runtime also accepts the legacy XML tags <tool_calls> / <invoke> / <parameter>, but prefer the DSML-prefixed form above.
PARAMETER SHAPES:
- string => <DSMLparameter name="x"><![CDATA[value]]></DSMLparameter>
- object => <DSMLparameter name="x"><field>...</field></DSMLparameter>
- array => <DSMLparameter name="x"><item>...</item><item>...</item></DSMLparameter>
- number/bool/null => <DSMLparameter name="x">plain_text</DSMLparameter>
【WRONG — Do NOT do these】:
Wrong 1 — mixed text after XML:
<DSMLtool_calls>...</DSMLtool_calls> I hope this helps.
Wrong 2 — Markdown code fences:
` + "```xml" + `
<DSMLtool_calls>...</DSMLtool_calls>
` + "```" + `
Wrong 3 — missing opening wrapper:
<DSMLinvoke name="TOOL_NAME">...</DSMLinvoke>
</DSMLtool_calls>
Remember: The ONLY valid way to use tools is the <DSMLtool_calls>...</DSMLtool_calls> block at the end of your response.
` + buildCorrectToolExamples(toolNames)
}
type promptToolExample struct {
name string
params string
}
func buildCorrectToolExamples(toolNames []string) string {
names := uniqueToolNames(toolNames)
examples := make([]string, 0, 4)
if single, ok := firstBasicExample(names); ok {
examples = append(examples, "Example A — Single tool:\n"+renderToolExampleBlock([]promptToolExample{single}))
}
if parallel := firstNBasicExamples(names, 2); len(parallel) >= 2 {
examples = append(examples, "Example B — Two tools in parallel:\n"+renderToolExampleBlock(parallel))
}
if nested, ok := firstNestedExample(names); ok {
examples = append(examples, "Example C — Tool with nested XML parameters:\n"+renderToolExampleBlock([]promptToolExample{nested}))
}
if script, ok := firstScriptExample(names); ok {
examples = append(examples, "Example D — Tool with long script using CDATA (RELIABLE FOR CODE/SCRIPTS):\n"+renderToolExampleBlock([]promptToolExample{script}))
}
if len(examples) == 0 {
return ""
}
return "【CORRECT EXAMPLES】:\n\n" + strings.Join(examples, "\n\n") + "\n\n"
}
func uniqueToolNames(toolNames []string) []string {
names := make([]string, 0, len(toolNames))
seen := map[string]bool{}
for _, name := range toolNames {
name = strings.TrimSpace(name)
if name == "" || seen[name] {
continue
}
seen[name] = true
names = append(names, name)
}
return names
}
func firstBasicExample(names []string) (promptToolExample, bool) {
for _, name := range names {
if params, ok := exampleBasicParams(name); ok {
return promptToolExample{name: name, params: params}, true
}
}
return promptToolExample{}, false
}
func firstNBasicExamples(names []string, count int) []promptToolExample {
out := make([]promptToolExample, 0, count)
for _, name := range names {
if params, ok := exampleBasicParams(name); ok {
out = append(out, promptToolExample{name: name, params: params})
if len(out) == count {
return out
}
}
}
return out
}
func firstNestedExample(names []string) (promptToolExample, bool) {
for _, name := range names {
if params, ok := exampleNestedParams(name); ok {
return promptToolExample{name: name, params: params}, true
}
}
return promptToolExample{}, false
}
func firstScriptExample(names []string) (promptToolExample, bool) {
for _, name := range names {
if params, ok := exampleScriptParams(name); ok {
return promptToolExample{name: name, params: params}, true
}
}
return promptToolExample{}, false
}
func renderToolExampleBlock(calls []promptToolExample) string {
var b strings.Builder
b.WriteString("<|DSML|tool_calls>\n")
for _, call := range calls {
b.WriteString(` <|DSML|invoke name="`)
b.WriteString(call.name)
b.WriteString(`">` + "\n")
b.WriteString(indentPromptParameters(call.params, " "))
b.WriteString("\n </|DSML|invoke>\n")
}
b.WriteString("</|DSML|tool_calls>")
return b.String()
}
func indentPromptParameters(body, indent string) string {
if strings.TrimSpace(body) == "" {
return indent + `<|DSML|parameter name="content"></|DSML|parameter>`
}
lines := strings.Split(body, "\n")
for i, line := range lines {
if strings.TrimSpace(line) == "" {
lines[i] = line
continue
}
lines[i] = indent + line
}
return strings.Join(lines, "\n")
}
func wrapParameter(name, inner string) string {
return `<|DSML|parameter name="` + name + `">` + inner + `</|DSML|parameter>`
}
func exampleBasicParams(name string) (string, bool) {
switch strings.TrimSpace(name) {
case "Read":
return wrapParameter("file_path", promptCDATA("README.md")), true
case "Glob":
return wrapParameter("pattern", promptCDATA("**/*.go")) + "\n" + wrapParameter("path", promptCDATA(".")), true
case "read_file":
return wrapParameter("path", promptCDATA("src/main.go")), true
case "list_files":
return wrapParameter("path", promptCDATA(".")), true
case "search_files":
return wrapParameter("query", promptCDATA("tool call parser")), true
case "Bash", "execute_command":
return wrapParameter("command", promptCDATA("pwd")), true
case "exec_command":
return wrapParameter("cmd", promptCDATA("pwd")), true
case "Write":
return wrapParameter("file_path", promptCDATA("notes.txt")) + "\n" + wrapParameter("content", promptCDATA("Hello world")), true
case "write_to_file":
return wrapParameter("path", promptCDATA("notes.txt")) + "\n" + wrapParameter("content", promptCDATA("Hello world")), true
case "Edit":
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + wrapParameter("old_string", promptCDATA("foo")) + "\n" + wrapParameter("new_string", promptCDATA("bar")), true
case "MultiEdit":
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
}
return "", false
}
func exampleNestedParams(name string) (string, bool) {
switch strings.TrimSpace(name) {
case "MultiEdit":
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
case "Task":
return wrapParameter("description", promptCDATA("Investigate flaky tests")) + "\n" + wrapParameter("prompt", promptCDATA("Run targeted tests and summarize failures")), true
case "ask_followup_question":
return wrapParameter("question", promptCDATA("Which approach do you prefer?")) + "\n" + `<|DSML|parameter name="follow_up"><item><text>` + promptCDATA("Option A") + `</text></item><item><text>` + promptCDATA("Option B") + `</text></item></|DSML|parameter>`, true
}
return "", false
}
func exampleScriptParams(name string) (string, bool) {
scriptCommand := `cat > /tmp/test_escape.sh <<'EOF'
#!/bin/bash
echo 'single "double"'
echo "literal dollar: \$HOME"
EOF
bash /tmp/test_escape.sh`
scriptContent := `#!/bin/bash
echo 'single "double"'
echo "literal dollar: $HOME"`
switch strings.TrimSpace(name) {
case "Bash":
return wrapParameter("command", promptCDATA(scriptCommand)) + "\n" + wrapParameter("description", promptCDATA("Test shell escaping")), true
case "execute_command":
return wrapParameter("command", promptCDATA(scriptCommand)), true
case "exec_command":
return wrapParameter("cmd", promptCDATA(scriptCommand)), true
case "Write":
return wrapParameter("file_path", promptCDATA("test_escape.sh")) + "\n" + wrapParameter("content", promptCDATA(scriptContent)), true
case "write_to_file":
return wrapParameter("path", promptCDATA("test_escape.sh")) + "\n" + wrapParameter("content", promptCDATA(scriptContent)), true
}
return "", false
}
func promptCDATA(text string) string {
if text == "" {
return ""
}
if strings.Contains(text, "]]>") {
return "<![CDATA[" + strings.ReplaceAll(text, "]]>", "]]]]><![CDATA[>") + "]]>"
}
return "<![CDATA[" + text + "]]>"
}