refactor: enforce mandatory CDATA wrapping for all string parameters in tool call XML output

2026-05-05 17:05:32 +08:00 · 2026-04-19 20:11:53 +08:00
parent 0f2b5fee23
commit 69b7bc0c1a
5 changed files with 48 additions and 33 deletions
--- a/internal/adapter/claude/handler_util_test.go
+++ b/internal/adapter/claude/handler_util_test.go
@@ -96,7 +96,7 @@ func TestNormalizeClaudeMessagesToolUseToAssistantToolCalls(t *testing.T) {
 	if !containsStr(content, "<tool_calls>") || !containsStr(content, "<tool_name>search_web</tool_name>") {
 		t.Fatalf("expected assistant content to include XML tool call history, got %q", content)
 	}
-	if !containsStr(content, "<parameters>\n      <query>latest</query>\n    </parameters>") {
+	if !containsStr(content, "<parameters>\n      <query><![CDATA[latest]]></query>\n    </parameters>") {
 		t.Fatalf("expected assistant content to include serialized parameters, got %q", content)
 	}
 }
--- a/internal/prompt/tool_calls.go
+++ b/internal/prompt/tool_calls.go
@@ -230,6 +230,8 @@ func renderPromptToolXMLNode(name string, value any, indent string) (string, boo
 	}
 }

+// renderPromptXMLText emits CDATA for every string so prompt-visible tool
+// history stays uniform and does not drift back toward ad-hoc escaping.
 func renderPromptXMLText(text string) string {
 	if text == "" {
 		return ""
@@ -237,10 +239,7 @@ func renderPromptXMLText(text string) string {
 	if strings.Contains(text, "]]>") {
 		return "<![CDATA[" + strings.ReplaceAll(text, "]]>", "]]]]><![CDATA[>") + "]]>"
 	}
-	if strings.ContainsAny(text, "<>&\n\r") {
-		return "<![CDATA[" + text + "]]>"
-	}
-	return escapeXMLText(text)
+	return "<![CDATA[" + text + "]]>"
 }

 func isValidPromptXMLName(name string) bool {
--- a/internal/prompt/tool_calls_test.go
+++ b/internal/prompt/tool_calls_test.go
@@ -22,7 +22,7 @@ func TestFormatToolCallsForPromptXML(t *testing.T) {
 	if got == "" {
 		t.Fatal("expected non-empty formatted tool calls")
 	}
-	if got != "<tool_calls>\n  <tool_call>\n    <tool_name>search_web</tool_name>\n    <parameters>\n      <query>latest</query>\n    </parameters>\n  </tool_call>\n</tool_calls>" {
+	if got != "<tool_calls>\n  <tool_call>\n    <tool_name>search_web</tool_name>\n    <parameters>\n      <query><![CDATA[latest]]></query>\n    </parameters>\n  </tool_call>\n</tool_calls>" {
 		t.Fatalf("unexpected formatted tool call XML: %q", got)
 	}
 }
@@ -50,7 +50,7 @@ func TestFormatToolCallsForPromptUsesCDATAForMultilineContent(t *testing.T) {
 			},
 		},
 	})
-	want := "<tool_calls>\n  <tool_call>\n    <tool_name>write_file</tool_name>\n    <parameters>\n      <content><![CDATA[#!/bin/bash\nprintf \"hello\"\n]]></content>\n      <path>script.sh</path>\n    </parameters>\n  </tool_call>\n</tool_calls>"
+	want := "<tool_calls>\n  <tool_call>\n    <tool_name>write_file</tool_name>\n    <parameters>\n      <content><![CDATA[#!/bin/bash\nprintf \"hello\"\n]]></content>\n      <path><![CDATA[script.sh]]></path>\n    </parameters>\n  </tool_call>\n</tool_calls>"
 	if got != want {
 		t.Fatalf("unexpected multiline cdata tool call XML: %q", got)
 	}
--- a/internal/toolcall/tool_prompt.go
+++ b/internal/toolcall/tool_prompt.go
@@ -36,34 +36,40 @@ func BuildToolCallInstructions(toolNames []string) string {

 	return `TOOL CALL FORMAT — FOLLOW EXACTLY:

-When calling tools, emit ONLY raw XML at the very end of your response. No text before, no text after, no markdown fences.
+If you need to call tools, your entire response must be exactly one XML block and nothing else.

 <tool_calls>
  <tool_call>
    <tool_name>TOOL_NAME_HERE</tool_name>
    <parameters>
-      <PARAMETER_NAME>PARAMETER_VALUE</PARAMETER_NAME>
+      <PARAMETER_NAME><![CDATA[PARAMETER_VALUE]]></PARAMETER_NAME>
    </parameters>
  </tool_call>
 </tool_calls>

 RULES:
-1) When calling tools, you MUST use the <tool_calls> XML format.
-2) No text is allowed AFTER the XML block.
-3) <parameters> should be XML tags, not JSON. Use nested XML elements for structured data (e.g., <param_name>value</param_name>).
-4) For long text, scripts, novels, or code content, YOU MUST wrap the value in <![CDATA[ content ]]> to preserve formatting and avoid character escaping errors.
-5) Multiple tools must be inside the same <tool_calls> root.
-6) Do NOT wrap XML in markdown fences (` + "```" + `).
-7) Do NOT invent parameters. Use only the provided schema.
-8) CRITICAL: Do NOT output internal monologues (e.g. "I will list files now..."). Just output your answer or the XML.
+1) Use the <tool_calls> XML format only. Never emit JSON or function-call syntax.
+2) Put one or more <tool_call> entries under a single <tool_calls> root.
+3) Parameters must be XML, not JSON.
+4) All string values must use <![CDATA[...]]>, even short ones. This includes code, scripts, file contents, prompts, paths, names, and queries.
+5) Objects use nested XML elements. Arrays may repeat the same tag or use <item> children.
+6) Numbers, booleans, and null stay plain text.
+7) Use only the parameter names in the tool schema. Do not invent fields.
+8) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.
+
+PARAMETER SHAPES:
+- string => <name><![CDATA[value]]></name>
+- object => nested XML elements
+- array => repeated tags or <item> children
+- number/bool/null => plain text

 ❌ WRONG — Do NOT do these:
 Wrong 1 — mixed text after XML:
  <tool_calls>...</tool_calls> I hope this helps.
 Wrong 2 — function-call syntax:
  Grep({"pattern": "token"})
-Wrong 3 — missing <tool_calls> wrapper:
-  <tool_call><tool_name>` + ex1 + `</tool_name><parameters>{}</parameters></tool_call>
+Wrong 3 — JSON parameters:
+  <tool_call><tool_name>` + ex1 + `</tool_name><parameters>{"path":"x"}</parameters></tool_call>
 Wrong 4 — Markdown code fences:
  ` + "```xml" + `
  <tool_calls>...</tool_calls>
@@ -97,7 +103,7 @@ Example B — Two tools in parallel:
  </tool_call>
 </tool_calls>

-Example C — Tool with complex structured XML parameters:
+Example C — Tool with nested XML parameters:
 <tool_calls>
  <tool_call>
    <tool_name>` + ex3 + `</tool_name>
@@ -110,7 +116,7 @@ Example D — Tool with long script using CDATA (RELIABLE FOR CODE/SCRIPTS):
  <tool_call>
    <tool_name>` + ex2 + `</tool_name>
    <parameters>
-      <path>script.sh</path>
+      <path>` + promptCDATA("script.sh") + `</path>
      <content><![CDATA[
 #!/bin/bash
 if [ "$1" == "test" ]; then
@@ -136,34 +142,44 @@ func matchAny(name string, candidates ...string) bool {
 func exampleReadParams(name string) string {
 	switch strings.TrimSpace(name) {
 	case "Read":
-		return `<file_path>README.md</file_path>`
+		return `<file_path>` + promptCDATA("README.md") + `</file_path>`
 	case "Glob":
-		return `<pattern>**/*.go</pattern><path>.</path>`
+		return `<pattern>` + promptCDATA("**/*.go") + `</pattern><path>` + promptCDATA(".") + `</path>`
 	default:
-		return `<path>src/main.go</path>`
+		return `<path>` + promptCDATA("src/main.go") + `</path>`
 	}
 }

 func exampleWriteOrExecParams(name string) string {
 	switch strings.TrimSpace(name) {
 	case "Bash", "execute_command":
-		return `<command>pwd</command>`
+		return `<command>` + promptCDATA("pwd") + `</command>`
 	case "exec_command":
-		return `<cmd>pwd</cmd>`
+		return `<cmd>` + promptCDATA("pwd") + `</cmd>`
 	case "Edit":
-		return `<file_path>README.md</file_path><old_string>foo</old_string><new_string>bar</new_string>`
+		return `<file_path>` + promptCDATA("README.md") + `</file_path><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string>`
 	case "MultiEdit":
-		return `<file_path>README.md</file_path><edits><old_string>foo</old_string><new_string>bar</new_string></edits>`
+		return `<file_path>` + promptCDATA("README.md") + `</file_path><edits><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></edits>`
 	default:
-		return `<path>output.txt</path><content>Hello world</content>`
+		return `<path>` + promptCDATA("output.txt") + `</path><content>` + promptCDATA("Hello world") + `</content>`
 	}
 }

 func exampleInteractiveParams(name string) string {
 	switch strings.TrimSpace(name) {
 	case "Task":
-		return `<description>Investigate flaky tests</description><prompt>Run targeted tests and summarize failures</prompt>`
+		return `<description>` + promptCDATA("Investigate flaky tests") + `</description><prompt>` + promptCDATA("Run targeted tests and summarize failures") + `</prompt>`
 	default:
-		return `<question>Which approach do you prefer?</question><follow_up><text>Option A</text></follow_up><follow_up><text>Option B</text></follow_up>`
+		return `<question>` + promptCDATA("Which approach do you prefer?") + `</question><follow_up><text>` + promptCDATA("Option A") + `</text></follow_up><follow_up><text>` + promptCDATA("Option B") + `</text></follow_up>`
 	}
 }
+
+func promptCDATA(text string) string {
+	if text == "" {
+		return ""
+	}
+	if strings.Contains(text, "]]>") {
+		return "<![CDATA[" + strings.ReplaceAll(text, "]]>", "]]]]><![CDATA[>") + "]]>"
+	}
+	return "<![CDATA[" + text + "]]>"
+}
--- a/internal/toolcall/tool_prompt_test.go
+++ b/internal/toolcall/tool_prompt_test.go
@@ -10,7 +10,7 @@ func TestBuildToolCallInstructions_ExecCommandUsesCmdExample(t *testing.T) {
 	if !strings.Contains(out, `<tool_name>exec_command</tool_name>`) {
 		t.Fatalf("expected exec_command in examples, got: %s", out)
 	}
-	if !strings.Contains(out, `<parameters><cmd>pwd</cmd></parameters>`) {
+	if !strings.Contains(out, `<parameters><cmd><![CDATA[pwd]]></cmd></parameters>`) {
 		t.Fatalf("expected cmd parameter example for exec_command, got: %s", out)
 	}
 }
@@ -20,7 +20,7 @@ func TestBuildToolCallInstructions_ExecuteCommandUsesCommandExample(t *testing.T
 	if !strings.Contains(out, `<tool_name>execute_command</tool_name>`) {
 		t.Fatalf("expected execute_command in examples, got: %s", out)
 	}
-	if !strings.Contains(out, `<parameters><command>pwd</command></parameters>`) {
+	if !strings.Contains(out, `<parameters><command><![CDATA[pwd]]></command></parameters>`) {
 		t.Fatalf("expected command parameter example for execute_command, got: %s", out)
 	}
 }