refactor: update tool call format to prefer XML-style parameters with CDATA support for robust content handling

This commit is contained in:
CJACK
2026-04-19 18:51:25 +08:00
parent 790a8ca980
commit 26d195f2a6
5 changed files with 194 additions and 41 deletions

View File

@@ -19,6 +19,8 @@ const TOOL_CALL_MARKUP_ARGS_PATTERNS = [
/<(?:[a-z0-9_:-]+:)?args\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?args>/i,
/<(?:[a-z0-9_:-]+:)?params\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?params>/i,
];
const CDATA_PATTERN = /<!\[CDATA\[([\s\S]*?)]]>/i;
const HTML_ENTITIES_PATTERN = /&[a-z0-9#]+;/gi;
const {
toStringSafe,
@@ -74,7 +76,7 @@ function parseMarkupSingleToolCall(attrs, inner) {
name = toStringSafe(attrMatch[2]).trim();
}
if (!name) {
name = stripTagText(findMarkupTagValue(inner, TOOL_CALL_MARKUP_NAME_PATTERNS));
name = extractRawTagValue(findMarkupTagValue(inner, TOOL_CALL_MARKUP_NAME_PATTERNS));
}
if (!name) {
return null;
@@ -95,18 +97,21 @@ function parseMarkupSingleToolCall(attrs, inner) {
function parseMarkupInput(raw) {
const s = toStringSafe(raw).trim();
if (!s) {
return {};
}
const parsed = parseToolCallInput(s);
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed) && Object.keys(parsed).length > 0) {
return parsed;
}
// Prioritize XML-style KV tags (e.g., <arg>val</arg>)
const kv = parseMarkupKVObject(s);
if (Object.keys(kv).length > 0) {
return kv;
}
return { _raw: stripTagText(s) };
// Fallback to JSON parsing
const parsed = parseToolCallInput(s);
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
if (Object.keys(parsed).length > 0) {
return parsed;
}
}
return { _raw: extractRawTagValue(s) };
}
function parseMarkupKVObject(text) {
@@ -120,7 +125,7 @@ function parseMarkupKVObject(text) {
if (!key) {
continue;
}
const valueRaw = stripTagText(m[2]);
const valueRaw = extractRawTagValue(m[2]);
if (!valueRaw) {
continue;
}
@@ -133,6 +138,33 @@ function parseMarkupKVObject(text) {
return out;
}
function extractRawTagValue(inner) {
const s = toStringSafe(inner).trim();
if (!s) {
return '';
}
// 1. Check for CDATA
const cdataMatch = s.match(CDATA_PATTERN);
if (cdataMatch && cdataMatch[1] !== undefined) {
return cdataMatch[1];
}
// 2. Fallback to unescaping standard HTML entities
// Note: we avoid broad tag stripping here to preserve user content (like < symbols in code)
return unescapeHtml(inner);
}
function unescapeHtml(safe) {
if (!safe) return '';
return safe.replace(/&amp;/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&#039;/g, "'")
.replace(/&#x27;/g, "'");
}
function stripTagText(text) {
return toStringSafe(text).replace(/<[^>]+>/g, ' ').trim();
}
@@ -141,7 +173,7 @@ function findMarkupTagValue(text, patterns) {
const source = toStringSafe(text);
for (const p of patterns) {
const m = source.match(p);
if (m && m[1]) {
if (m && m[1] !== undefined) {
return toStringSafe(m[1]);
}
}

View File

@@ -0,0 +1,67 @@
package toolcall
import (
"reflect"
"testing"
)
func TestRegression_RobustXMLAndCDATA(t *testing.T) {
tests := []struct {
name string
text string
expected []ParsedToolCall
}{
{
name: "Standard JSON parameters (Regression)",
text: `<tool_call><tool_name>foo</tool_name><parameters>{"a": 1}</parameters></tool_call>`,
expected: []ParsedToolCall{{Name: "foo", Input: map[string]any{"a": float64(1)}}},
},
{
name: "XML tags parameters (Regression)",
text: `<tool_call><tool_name>foo</tool_name><parameters><arg1>hello</arg1></parameters></tool_call>`,
expected: []ParsedToolCall{{Name: "foo", Input: map[string]any{"arg1": "hello"}}},
},
{
name: "CDATA parameters (New Feature)",
text: `<tool_call><tool_name>write_file</tool_name><parameters><content><![CDATA[line 1
line 2 with <tags> and & symbols]]></content></parameters></tool_call>`,
expected: []ParsedToolCall{{
Name: "write_file",
Input: map[string]any{"content": "line 1\nline 2 with <tags> and & symbols"},
}},
},
{
name: "Dirty XML with unescaped symbols (Robustness Improvement)",
text: `<tool_call><tool_name>bash</tool_name><parameters><command>echo "hello" > out.txt && cat out.txt</command></parameters></tool_call>`,
expected: []ParsedToolCall{{
Name: "bash",
Input: map[string]any{"command": "echo \"hello\" > out.txt && cat out.txt"},
}},
},
{
name: "Mixed JSON inside CDATA (New Hybrid Case)",
text: `<tool_call><tool_name>foo</tool_name><parameters><![CDATA[{"json_param": "works"}]]></parameters></tool_call>`,
expected: []ParsedToolCall{{
Name: "foo",
Input: map[string]any{"json_param": "works"},
}},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := ParseToolCalls(tt.text, []string{"foo", "write_file", "bash"})
if len(got) != len(tt.expected) {
t.Fatalf("expected %d calls, got %d", len(tt.expected), len(got))
}
for i := range got {
if got[i].Name != tt.expected[i].Name {
t.Errorf("expected name %q, got %q", tt.expected[i].Name, got[i].Name)
}
if !reflect.DeepEqual(got[i].Input, tt.expected[i].Input) {
t.Errorf("expected input %#v, got %#v", tt.expected[i].Input, got[i].Input)
}
}
})
}
}

View File

@@ -41,20 +41,21 @@ When calling tools, emit ONLY raw XML at the very end of your response. No text
<tool_calls>
<tool_call>
<tool_name>TOOL_NAME_HERE</tool_name>
<parameters>{"key":"value"}</parameters>
<parameters>
<PARAMETER_NAME>PARAMETER_VALUE</PARAMETER_NAME>
</parameters>
</tool_call>
</tool_calls>
RULES:
1) When calling tools, you MUST use the <tool_calls> XML format.
2) No text is allowed AFTER the XML block.
3) <parameters> MUST be a single-line strict JSON object. Use double quotes.
4) Multiple tools must be inside the same <tool_calls> root.
5) Do NOT wrap XML in markdown fences (` + "```" + `).
6) Do NOT invent parameters. Use only the provided schema.
7) CRITICAL: Do NOT use native tool markers like "<Tool>" or "<tool>".
8) CRITICAL: Do NOT output role markers like "<System>", "<User>", or "<Assistant>".
9) CRITICAL: Do NOT output internal monologues (e.g. "I will list files now..."). Just output your answer or the XML.
3) <parameters> should be a list of XML tags (e.g., <param_name>value</param_name>). For simple inputs, a single-line JSON string is also acceptable.
4) For long text, scripts, or code content, YOU MUST wrap the value in <![CDATA[ content ]]> to preserve formatting and avoid character escaping errors.
5) Multiple tools must be inside the same <tool_calls> root.
6) Do NOT wrap XML in markdown fences (` + "```" + `).
7) Do NOT invent parameters. Use only the provided schema.
8) CRITICAL: Do NOT output internal monologues (e.g. "I will list files now..."). Just output your answer or the XML.
❌ WRONG — Do NOT do these:
Wrong 1 — mixed text after XML:
@@ -103,6 +104,22 @@ Example C — Tool with complex nested JSON parameters:
<parameters>` + ex3Params + `</parameters>
</tool_call>
</tool_calls>
Example D — Tool with long script using CDATA (RELIABLE FOR CODE/SCRIPTS):
<tool_calls>
<tool_call>
<tool_name>` + ex2 + `</tool_name>
<parameters>
<path>script.sh</path>
<content><![CDATA[
#!/bin/bash
if [ "$1" == "test" ]; then
echo "Success!"
fi
]]></content>
</parameters>
</tool_call>
</tool_calls>
Remember: Output ONLY the <tool_calls>...</tool_calls> XML block when calling tools.`
}
@@ -119,34 +136,34 @@ func matchAny(name string, candidates ...string) bool {
func exampleReadParams(name string) string {
switch strings.TrimSpace(name) {
case "Read":
return `{"file_path":"README.md"}`
return `<file_path>README.md</file_path>`
case "Glob":
return `{"pattern":"**/*.go","path":"."}`
return `<pattern>**/*.go</pattern><path>.</path>`
default:
return `{"path":"src/main.go"}`
return `<path>src/main.go</path>`
}
}
func exampleWriteOrExecParams(name string) string {
switch strings.TrimSpace(name) {
case "Bash", "execute_command":
return `{"command":"pwd"}`
return `<command>pwd</command>`
case "exec_command":
return `{"cmd":"pwd"}`
return `<cmd>pwd</cmd>`
case "Edit":
return `{"file_path":"README.md","old_string":"foo","new_string":"bar"}`
return `<file_path>README.md</file_path><old_string>foo</old_string><new_string>bar</new_string>`
case "MultiEdit":
return `{"file_path":"README.md","edits":[{"old_string":"foo","new_string":"bar"}]}`
return `<file_path>README.md</file_path><edits><old_string>foo</old_string><new_string>bar</new_string></edits>`
default:
return `{"path":"output.txt","content":"Hello world"}`
return `<path>output.txt</path><content>Hello world</content>`
}
}
func exampleInteractiveParams(name string) string {
switch strings.TrimSpace(name) {
case "Task":
return `{"description":"Investigate flaky tests","prompt":"Run targeted tests and summarize failures"}`
return `<description>Investigate flaky tests</description><prompt>Run targeted tests and summarize failures</prompt>`
default:
return `{"question":"Which approach do you prefer?","follow_up":[{"text":"Option A"},{"text":"Option B"}]}`
return `<question>Which approach do you prefer?</question><follow_up><text>Option A</text></follow_up><follow_up><text>Option B</text></follow_up>`
}
}

View File

@@ -22,6 +22,9 @@ var toolCallMarkupNamePatternByTag = map[string]*regexp.Regexp{
"name": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?name\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?name>`),
"function": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?function\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?function>`),
}
// cdataPattern matches CDATA sections to handle them separately from normal tags.
var cdataPattern = regexp.MustCompile(`(?is)<!\[CDATA\[(.*?)]]>`)
var toolCallMarkupArgsTagNames = []string{"input", "arguments", "argument", "parameters", "parameter", "args", "params"}
var toolCallMarkupArgsPatternByTag = map[string]*regexp.Regexp{
"input": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?input\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?input>`),
@@ -120,12 +123,15 @@ func parseMarkupInput(raw string) map[string]any {
if raw == "" {
return map[string]any{}
}
if parsed := parseToolCallInput(raw); len(parsed) > 0 {
return parsed
}
// Prioritize XML-style KV tags as they are more robust for long text/scripts.
if kv := parseMarkupKVObject(raw); len(kv) > 0 {
return kv
}
// Fallback to JSON parsing for standard/legacy tool calls.
if parsed := parseToolCallInput(raw); len(parsed) > 0 {
return parsed
}
return map[string]any{"_raw": html.UnescapeString(stripTagText(raw))}
}
@@ -147,7 +153,13 @@ func parseMarkupKVObject(text string) map[string]any {
if !strings.EqualFold(key, endKey) {
continue
}
value := strings.TrimSpace(html.UnescapeString(stripTagText(m[2])))
// Robustly extract value to handle CDATA and mixed content
value := extractRawTagValue(m[2])
if value == "" && m[2] != "" {
// If it wasn't empty but extracted to empty, could be whitespace or just tags
value = strings.TrimSpace(m[2])
}
if value == "" {
continue
}
@@ -164,6 +176,30 @@ func parseMarkupKVObject(text string) map[string]any {
return out
}
// extractRawTagValue treats the inner content of a tag robustly.
// It detects CDATA and strips it, otherwise it unescapes standard HTML entities.
// It avoids over-aggressive tag stripping that might break user content.
func extractRawTagValue(inner string) string {
trimmed := strings.TrimSpace(inner)
if trimmed == "" {
return ""
}
// 1. Check for CDATA - if present, it's the ultimate "safe" container.
if cdataMatches := cdataPattern.FindStringSubmatch(trimmed); len(cdataMatches) >= 2 {
return cdataMatches[1] // Return raw content between CDATA brackets
}
// 2. If no CDATA, we still want to be robust.
// We unescape standard HTML entities (like &lt; &gt; &amp;)
// but we DON'T recursively strip tags unless they are actually valid XML tags
// at the start/end (which should have been handled by the outer matcher anyway).
// If it contains what looks like a single tag and no other text, it might be nested XML
// but for KV objects we usually want the value.
return html.UnescapeString(inner)
}
func stripTagText(text string) string {
return strings.TrimSpace(anyTagPattern.ReplaceAllString(text, ""))
}
@@ -175,7 +211,7 @@ func findMarkupTagValue(text string, tagNames []string, patternByTag map[string]
continue
}
if m := pattern.FindStringSubmatch(text); len(m) >= 2 {
value := strings.TrimSpace(m[1])
value := extractRawTagValue(m[1])
if value != "" {
return value
}

View File

@@ -115,11 +115,12 @@ func parseSingleXMLToolCall(block string) (ParsedToolCall, bool) {
if err := dec.DecodeElement(&node, &t); err == nil {
inner := strings.TrimSpace(node.Inner)
if inner != "" {
unescapedInner := html.UnescapeString(inner)
if parsed := parseToolCallInput(unescapedInner); len(parsed) > 0 {
// Cleanly extract content (handles CDATA, entities, etc.)
extracted := extractRawTagValue(inner)
if parsed := parseToolCallInput(extracted); len(parsed) > 0 {
if len(parsed) == 1 {
if _, onlyRaw := parsed["_raw"]; onlyRaw {
if kv := parseMarkupKVObject(unescapedInner); len(kv) > 0 {
if kv := parseMarkupKVObject(extracted); len(kv) > 0 {
for k, vv := range kv {
params[k] = vv
}
@@ -130,7 +131,7 @@ func parseSingleXMLToolCall(block string) (ParsedToolCall, bool) {
for k, vv := range parsed {
params[k] = vv
}
} else if kv := parseMarkupKVObject(unescapedInner); len(kv) > 0 {
} else if kv := parseMarkupKVObject(extracted); len(kv) > 0 {
for k, vv := range kv {
params[k] = vv
}
@@ -293,7 +294,7 @@ func parseSingleAntmlFunctionCallMatch(m []string) (ParsedToolCall, bool) {
continue
}
k := strings.TrimSpace(am[1])
v := strings.TrimSpace(html.UnescapeString(am[2]))
v := extractRawTagValue(am[2])
if k != "" {
input[k] = v
}
@@ -316,7 +317,7 @@ func parseInvokeFunctionCallStyle(text string) (ParsedToolCall, bool) {
continue
}
k := strings.TrimSpace(pm[1])
v := strings.TrimSpace(html.UnescapeString(pm[2]))
v := extractRawTagValue(pm[2])
if k != "" {
input[k] = v
}
@@ -347,7 +348,7 @@ func parseToolUseFunctionStyle(text string) (ParsedToolCall, bool) {
continue
}
k := strings.TrimSpace(pm[1])
v := strings.TrimSpace(html.UnescapeString(pm[2]))
v := extractRawTagValue(pm[2])
if k != "" {
input[k] = v
}