mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-10 11:17:41 +08:00
Replace all strings.ToLower usage with ASCII case-insensitive matching (hasASCIIPrefixFoldAt, indexASCIIFold, hasDSMLPrefix) to prevent slice bounds errors when Unicode characters change byte length after case folding (e.g., Turkish İ U+0130 → i + combining dot: 2 bytes → 3 bytes). Root cause: code created a strings.ToLower(text) copy, found byte positions in that copy, then used those positions to slice the original text — byte offsets that were valid in the lowercased copy became out-of-bounds in the original when case folding changed byte lengths. Files changed: - toolcalls_scan.go: remove 5 lower usages, add hasDSMLPrefix - toolcalls_parse_markup.go: remove 3 lower usages, add indexASCIIFold - toolcalls_markup.go: SanitizeLooseCDATA lower removal - toolcalls_parse.go: updateCDATAStateForStrip lower removal - tool_prompt.go: align DSML pipe characters with tool call spec - tool_prompt_test.go: fix pre-existing test character mismatch
249 lines
9.6 KiB
Go
249 lines
9.6 KiB
Go
package toolcall
|
||
|
||
import "strings"
|
||
|
||
// BuildToolCallInstructions generates the unified tool-calling instruction block
|
||
// used by all adapters (OpenAI, Claude, Gemini). It uses attention-optimized
|
||
// structure: rules → negative examples → positive examples → anchor.
|
||
//
|
||
// The toolNames slice should contain the actual tool names available in the
|
||
// current request; the function picks real names for examples.
|
||
func BuildToolCallInstructions(toolNames []string) string {
|
||
return `TOOL CALL FORMAT — FOLLOW EXACTLY:
|
||
|
||
<|DSML|tool_calls>
|
||
<|DSML|invoke name="TOOL_NAME_HERE">
|
||
<|DSML|parameter name="PARAMETER_NAME"><![CDATA[PARAMETER_VALUE]]><|/DSML|parameter>
|
||
<|/DSML|invoke>
|
||
<|/DSML|tool_calls>
|
||
|
||
RULES:
|
||
1) Use the <|DSML|tool_calls> wrapper format.
|
||
2) Put one or more <|DSML|invoke> entries under a single <|DSML|tool_calls> root.
|
||
3) Put the tool name in the invoke name attribute: <|DSML|invoke name="TOOL_NAME">.
|
||
4) All string values must use <![CDATA[...]]>, even short ones. This includes code, scripts, file contents, prompts, paths, names, and queries.
|
||
5) Every top-level argument must be a <|DSML|parameter name="ARG_NAME">...<|/DSML|parameter> node.
|
||
6) Objects use nested XML elements inside the parameter body. Arrays may repeat <item> children.
|
||
7) Numbers, booleans, and null stay plain text.
|
||
8) Use only the parameter names in the tool schema. Do not invent fields.
|
||
9) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.
|
||
10) If you call a tool, the first non-whitespace characters of that tool block must be exactly <|DSML|tool_calls>.
|
||
11) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with <|/DSML|tool_calls>.
|
||
12) Compatibility note: the runtime also accepts the legacy XML tags <tool_calls> / <invoke> / <parameter>, but prefer the DSML-prefixed form above.
|
||
|
||
PARAMETER SHAPES:
|
||
- string => <|DSML|parameter name="x"><![CDATA[value]]><|/DSML|parameter>
|
||
- object => <|DSML|parameter name="x"><field>...</field><|/DSML|parameter>
|
||
- array => <|DSML|parameter name="x"><item>...</item><item>...</item><|/DSML|parameter>
|
||
- number/bool/null => <|DSML|parameter name="x">plain_text<|/DSML|parameter>
|
||
|
||
【WRONG — Do NOT do these】:
|
||
|
||
Wrong 1 — mixed text after XML:
|
||
<|DSML|tool_calls>...<|/DSML|tool_calls> I hope this helps.
|
||
Wrong 2 — Markdown code fences:
|
||
` + "```xml" + `
|
||
<|DSML|tool_calls>...<|/DSML|tool_calls>
|
||
` + "```" + `
|
||
Wrong 3 — missing opening wrapper:
|
||
<|DSML|invoke name="TOOL_NAME">...<|/DSML|invoke>
|
||
<|/DSML|tool_calls>
|
||
|
||
Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...<|/DSML|tool_calls> block at the end of your response.
|
||
` + buildCorrectToolExamples(toolNames)
|
||
}
|
||
|
||
type promptToolExample struct {
|
||
name string
|
||
params string
|
||
}
|
||
|
||
func buildCorrectToolExamples(toolNames []string) string {
|
||
names := uniqueToolNames(toolNames)
|
||
examples := make([]string, 0, 4)
|
||
|
||
if single, ok := firstBasicExample(names); ok {
|
||
examples = append(examples, "Example A — Single tool:\n"+renderToolExampleBlock([]promptToolExample{single}))
|
||
}
|
||
|
||
if parallel := firstNBasicExamples(names, 2); len(parallel) >= 2 {
|
||
examples = append(examples, "Example B — Two tools in parallel:\n"+renderToolExampleBlock(parallel))
|
||
}
|
||
|
||
if nested, ok := firstNestedExample(names); ok {
|
||
examples = append(examples, "Example C — Tool with nested XML parameters:\n"+renderToolExampleBlock([]promptToolExample{nested}))
|
||
}
|
||
|
||
if script, ok := firstScriptExample(names); ok {
|
||
examples = append(examples, "Example D — Tool with long script using CDATA (RELIABLE FOR CODE/SCRIPTS):\n"+renderToolExampleBlock([]promptToolExample{script}))
|
||
}
|
||
|
||
if len(examples) == 0 {
|
||
return ""
|
||
}
|
||
return "【CORRECT EXAMPLES】:\n\n" + strings.Join(examples, "\n\n") + "\n\n"
|
||
}
|
||
|
||
func uniqueToolNames(toolNames []string) []string {
|
||
names := make([]string, 0, len(toolNames))
|
||
seen := map[string]bool{}
|
||
for _, name := range toolNames {
|
||
name = strings.TrimSpace(name)
|
||
if name == "" || seen[name] {
|
||
continue
|
||
}
|
||
seen[name] = true
|
||
names = append(names, name)
|
||
}
|
||
return names
|
||
}
|
||
|
||
func firstBasicExample(names []string) (promptToolExample, bool) {
|
||
for _, name := range names {
|
||
if params, ok := exampleBasicParams(name); ok {
|
||
return promptToolExample{name: name, params: params}, true
|
||
}
|
||
}
|
||
return promptToolExample{}, false
|
||
}
|
||
|
||
func firstNBasicExamples(names []string, count int) []promptToolExample {
|
||
out := make([]promptToolExample, 0, count)
|
||
for _, name := range names {
|
||
if params, ok := exampleBasicParams(name); ok {
|
||
out = append(out, promptToolExample{name: name, params: params})
|
||
if len(out) == count {
|
||
return out
|
||
}
|
||
}
|
||
}
|
||
return out
|
||
}
|
||
|
||
func firstNestedExample(names []string) (promptToolExample, bool) {
|
||
for _, name := range names {
|
||
if params, ok := exampleNestedParams(name); ok {
|
||
return promptToolExample{name: name, params: params}, true
|
||
}
|
||
}
|
||
return promptToolExample{}, false
|
||
}
|
||
|
||
func firstScriptExample(names []string) (promptToolExample, bool) {
|
||
for _, name := range names {
|
||
if params, ok := exampleScriptParams(name); ok {
|
||
return promptToolExample{name: name, params: params}, true
|
||
}
|
||
}
|
||
return promptToolExample{}, false
|
||
}
|
||
|
||
func renderToolExampleBlock(calls []promptToolExample) string {
|
||
var b strings.Builder
|
||
b.WriteString("<|DSML|tool_calls>\n")
|
||
for _, call := range calls {
|
||
b.WriteString(` <|DSML|invoke name="`)
|
||
b.WriteString(call.name)
|
||
b.WriteString(`">` + "\n")
|
||
b.WriteString(indentPromptParameters(call.params, " "))
|
||
b.WriteString("\n </|DSML|invoke>\n")
|
||
}
|
||
b.WriteString("</|DSML|tool_calls>")
|
||
return b.String()
|
||
}
|
||
|
||
func indentPromptParameters(body, indent string) string {
|
||
if strings.TrimSpace(body) == "" {
|
||
return indent + `<|DSML|parameter name="content"></|DSML|parameter>`
|
||
}
|
||
lines := strings.Split(body, "\n")
|
||
for i, line := range lines {
|
||
if strings.TrimSpace(line) == "" {
|
||
lines[i] = line
|
||
continue
|
||
}
|
||
lines[i] = indent + line
|
||
}
|
||
return strings.Join(lines, "\n")
|
||
}
|
||
|
||
func wrapParameter(name, inner string) string {
|
||
return `<|DSML|parameter name="` + name + `">` + inner + `</|DSML|parameter>`
|
||
}
|
||
|
||
func exampleBasicParams(name string) (string, bool) {
|
||
switch strings.TrimSpace(name) {
|
||
case "Read":
|
||
return wrapParameter("file_path", promptCDATA("README.md")), true
|
||
case "Glob":
|
||
return wrapParameter("pattern", promptCDATA("**/*.go")) + "\n" + wrapParameter("path", promptCDATA(".")), true
|
||
case "read_file":
|
||
return wrapParameter("path", promptCDATA("src/main.go")), true
|
||
case "list_files":
|
||
return wrapParameter("path", promptCDATA(".")), true
|
||
case "search_files":
|
||
return wrapParameter("query", promptCDATA("tool call parser")), true
|
||
case "Bash", "execute_command":
|
||
return wrapParameter("command", promptCDATA("pwd")), true
|
||
case "exec_command":
|
||
return wrapParameter("cmd", promptCDATA("pwd")), true
|
||
case "Write":
|
||
return wrapParameter("file_path", promptCDATA("notes.txt")) + "\n" + wrapParameter("content", promptCDATA("Hello world")), true
|
||
case "write_to_file":
|
||
return wrapParameter("path", promptCDATA("notes.txt")) + "\n" + wrapParameter("content", promptCDATA("Hello world")), true
|
||
case "Edit":
|
||
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + wrapParameter("old_string", promptCDATA("foo")) + "\n" + wrapParameter("new_string", promptCDATA("bar")), true
|
||
case "MultiEdit":
|
||
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
|
||
}
|
||
return "", false
|
||
}
|
||
|
||
func exampleNestedParams(name string) (string, bool) {
|
||
switch strings.TrimSpace(name) {
|
||
case "MultiEdit":
|
||
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
|
||
case "Task":
|
||
return wrapParameter("description", promptCDATA("Investigate flaky tests")) + "\n" + wrapParameter("prompt", promptCDATA("Run targeted tests and summarize failures")), true
|
||
case "ask_followup_question":
|
||
return wrapParameter("question", promptCDATA("Which approach do you prefer?")) + "\n" + `<|DSML|parameter name="follow_up"><item><text>` + promptCDATA("Option A") + `</text></item><item><text>` + promptCDATA("Option B") + `</text></item></|DSML|parameter>`, true
|
||
}
|
||
return "", false
|
||
}
|
||
|
||
func exampleScriptParams(name string) (string, bool) {
|
||
scriptCommand := `cat > /tmp/test_escape.sh <<'EOF'
|
||
#!/bin/bash
|
||
echo 'single "double"'
|
||
echo "literal dollar: \$HOME"
|
||
EOF
|
||
bash /tmp/test_escape.sh`
|
||
scriptContent := `#!/bin/bash
|
||
echo 'single "double"'
|
||
echo "literal dollar: $HOME"`
|
||
|
||
switch strings.TrimSpace(name) {
|
||
case "Bash":
|
||
return wrapParameter("command", promptCDATA(scriptCommand)) + "\n" + wrapParameter("description", promptCDATA("Test shell escaping")), true
|
||
case "execute_command":
|
||
return wrapParameter("command", promptCDATA(scriptCommand)), true
|
||
case "exec_command":
|
||
return wrapParameter("cmd", promptCDATA(scriptCommand)), true
|
||
case "Write":
|
||
return wrapParameter("file_path", promptCDATA("test_escape.sh")) + "\n" + wrapParameter("content", promptCDATA(scriptContent)), true
|
||
case "write_to_file":
|
||
return wrapParameter("path", promptCDATA("test_escape.sh")) + "\n" + wrapParameter("content", promptCDATA(scriptContent)), true
|
||
}
|
||
return "", false
|
||
}
|
||
|
||
func promptCDATA(text string) string {
|
||
if text == "" {
|
||
return ""
|
||
}
|
||
if strings.Contains(text, "]]>") {
|
||
return "<![CDATA[" + strings.ReplaceAll(text, "]]>", "]]]]><![CDATA[>") + "]]>"
|
||
}
|
||
return "<![CDATA[" + text + "]]>"
|
||
}
|