mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-05 00:45:29 +08:00
refactor: update tool call format to prefer XML-style parameters with CDATA support for robust content handling
This commit is contained in:
@@ -19,6 +19,8 @@ const TOOL_CALL_MARKUP_ARGS_PATTERNS = [
|
||||
/<(?:[a-z0-9_:-]+:)?args\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?args>/i,
|
||||
/<(?:[a-z0-9_:-]+:)?params\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?params>/i,
|
||||
];
|
||||
const CDATA_PATTERN = /<!\[CDATA\[([\s\S]*?)]]>/i;
|
||||
const HTML_ENTITIES_PATTERN = /&[a-z0-9#]+;/gi;
|
||||
|
||||
const {
|
||||
toStringSafe,
|
||||
@@ -74,7 +76,7 @@ function parseMarkupSingleToolCall(attrs, inner) {
|
||||
name = toStringSafe(attrMatch[2]).trim();
|
||||
}
|
||||
if (!name) {
|
||||
name = stripTagText(findMarkupTagValue(inner, TOOL_CALL_MARKUP_NAME_PATTERNS));
|
||||
name = extractRawTagValue(findMarkupTagValue(inner, TOOL_CALL_MARKUP_NAME_PATTERNS));
|
||||
}
|
||||
if (!name) {
|
||||
return null;
|
||||
@@ -95,18 +97,21 @@ function parseMarkupSingleToolCall(attrs, inner) {
|
||||
|
||||
function parseMarkupInput(raw) {
|
||||
const s = toStringSafe(raw).trim();
|
||||
if (!s) {
|
||||
return {};
|
||||
}
|
||||
const parsed = parseToolCallInput(s);
|
||||
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed) && Object.keys(parsed).length > 0) {
|
||||
return parsed;
|
||||
}
|
||||
// Prioritize XML-style KV tags (e.g., <arg>val</arg>)
|
||||
const kv = parseMarkupKVObject(s);
|
||||
if (Object.keys(kv).length > 0) {
|
||||
return kv;
|
||||
}
|
||||
return { _raw: stripTagText(s) };
|
||||
|
||||
// Fallback to JSON parsing
|
||||
const parsed = parseToolCallInput(s);
|
||||
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
||||
if (Object.keys(parsed).length > 0) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
|
||||
return { _raw: extractRawTagValue(s) };
|
||||
}
|
||||
|
||||
function parseMarkupKVObject(text) {
|
||||
@@ -120,7 +125,7 @@ function parseMarkupKVObject(text) {
|
||||
if (!key) {
|
||||
continue;
|
||||
}
|
||||
const valueRaw = stripTagText(m[2]);
|
||||
const valueRaw = extractRawTagValue(m[2]);
|
||||
if (!valueRaw) {
|
||||
continue;
|
||||
}
|
||||
@@ -133,6 +138,33 @@ function parseMarkupKVObject(text) {
|
||||
return out;
|
||||
}
|
||||
|
||||
function extractRawTagValue(inner) {
|
||||
const s = toStringSafe(inner).trim();
|
||||
if (!s) {
|
||||
return '';
|
||||
}
|
||||
|
||||
// 1. Check for CDATA
|
||||
const cdataMatch = s.match(CDATA_PATTERN);
|
||||
if (cdataMatch && cdataMatch[1] !== undefined) {
|
||||
return cdataMatch[1];
|
||||
}
|
||||
|
||||
// 2. Fallback to unescaping standard HTML entities
|
||||
// Note: we avoid broad tag stripping here to preserve user content (like < symbols in code)
|
||||
return unescapeHtml(inner);
|
||||
}
|
||||
|
||||
function unescapeHtml(safe) {
|
||||
if (!safe) return '';
|
||||
return safe.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/'/g, "'");
|
||||
}
|
||||
|
||||
function stripTagText(text) {
|
||||
return toStringSafe(text).replace(/<[^>]+>/g, ' ').trim();
|
||||
}
|
||||
@@ -141,7 +173,7 @@ function findMarkupTagValue(text, patterns) {
|
||||
const source = toStringSafe(text);
|
||||
for (const p of patterns) {
|
||||
const m = source.match(p);
|
||||
if (m && m[1]) {
|
||||
if (m && m[1] !== undefined) {
|
||||
return toStringSafe(m[1]);
|
||||
}
|
||||
}
|
||||
|
||||
67
internal/toolcall/regression_test.go
Normal file
67
internal/toolcall/regression_test.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package toolcall
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRegression_RobustXMLAndCDATA(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
expected []ParsedToolCall
|
||||
}{
|
||||
{
|
||||
name: "Standard JSON parameters (Regression)",
|
||||
text: `<tool_call><tool_name>foo</tool_name><parameters>{"a": 1}</parameters></tool_call>`,
|
||||
expected: []ParsedToolCall{{Name: "foo", Input: map[string]any{"a": float64(1)}}},
|
||||
},
|
||||
{
|
||||
name: "XML tags parameters (Regression)",
|
||||
text: `<tool_call><tool_name>foo</tool_name><parameters><arg1>hello</arg1></parameters></tool_call>`,
|
||||
expected: []ParsedToolCall{{Name: "foo", Input: map[string]any{"arg1": "hello"}}},
|
||||
},
|
||||
{
|
||||
name: "CDATA parameters (New Feature)",
|
||||
text: `<tool_call><tool_name>write_file</tool_name><parameters><content><![CDATA[line 1
|
||||
line 2 with <tags> and & symbols]]></content></parameters></tool_call>`,
|
||||
expected: []ParsedToolCall{{
|
||||
Name: "write_file",
|
||||
Input: map[string]any{"content": "line 1\nline 2 with <tags> and & symbols"},
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "Dirty XML with unescaped symbols (Robustness Improvement)",
|
||||
text: `<tool_call><tool_name>bash</tool_name><parameters><command>echo "hello" > out.txt && cat out.txt</command></parameters></tool_call>`,
|
||||
expected: []ParsedToolCall{{
|
||||
Name: "bash",
|
||||
Input: map[string]any{"command": "echo \"hello\" > out.txt && cat out.txt"},
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "Mixed JSON inside CDATA (New Hybrid Case)",
|
||||
text: `<tool_call><tool_name>foo</tool_name><parameters><![CDATA[{"json_param": "works"}]]></parameters></tool_call>`,
|
||||
expected: []ParsedToolCall{{
|
||||
Name: "foo",
|
||||
Input: map[string]any{"json_param": "works"},
|
||||
}},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := ParseToolCalls(tt.text, []string{"foo", "write_file", "bash"})
|
||||
if len(got) != len(tt.expected) {
|
||||
t.Fatalf("expected %d calls, got %d", len(tt.expected), len(got))
|
||||
}
|
||||
for i := range got {
|
||||
if got[i].Name != tt.expected[i].Name {
|
||||
t.Errorf("expected name %q, got %q", tt.expected[i].Name, got[i].Name)
|
||||
}
|
||||
if !reflect.DeepEqual(got[i].Input, tt.expected[i].Input) {
|
||||
t.Errorf("expected input %#v, got %#v", tt.expected[i].Input, got[i].Input)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -41,20 +41,21 @@ When calling tools, emit ONLY raw XML at the very end of your response. No text
|
||||
<tool_calls>
|
||||
<tool_call>
|
||||
<tool_name>TOOL_NAME_HERE</tool_name>
|
||||
<parameters>{"key":"value"}</parameters>
|
||||
<parameters>
|
||||
<PARAMETER_NAME>PARAMETER_VALUE</PARAMETER_NAME>
|
||||
</parameters>
|
||||
</tool_call>
|
||||
</tool_calls>
|
||||
|
||||
RULES:
|
||||
1) When calling tools, you MUST use the <tool_calls> XML format.
|
||||
2) No text is allowed AFTER the XML block.
|
||||
3) <parameters> MUST be a single-line strict JSON object. Use double quotes.
|
||||
4) Multiple tools must be inside the same <tool_calls> root.
|
||||
5) Do NOT wrap XML in markdown fences (` + "```" + `).
|
||||
6) Do NOT invent parameters. Use only the provided schema.
|
||||
7) CRITICAL: Do NOT use native tool markers like "<|Tool|>" or "<|tool|>".
|
||||
8) CRITICAL: Do NOT output role markers like "<|System|>", "<|User|>", or "<|Assistant|>".
|
||||
9) CRITICAL: Do NOT output internal monologues (e.g. "I will list files now..."). Just output your answer or the XML.
|
||||
3) <parameters> should be a list of XML tags (e.g., <param_name>value</param_name>). For simple inputs, a single-line JSON string is also acceptable.
|
||||
4) For long text, scripts, or code content, YOU MUST wrap the value in <![CDATA[ content ]]> to preserve formatting and avoid character escaping errors.
|
||||
5) Multiple tools must be inside the same <tool_calls> root.
|
||||
6) Do NOT wrap XML in markdown fences (` + "```" + `).
|
||||
7) Do NOT invent parameters. Use only the provided schema.
|
||||
8) CRITICAL: Do NOT output internal monologues (e.g. "I will list files now..."). Just output your answer or the XML.
|
||||
|
||||
❌ WRONG — Do NOT do these:
|
||||
Wrong 1 — mixed text after XML:
|
||||
@@ -103,6 +104,22 @@ Example C — Tool with complex nested JSON parameters:
|
||||
<parameters>` + ex3Params + `</parameters>
|
||||
</tool_call>
|
||||
</tool_calls>
|
||||
|
||||
Example D — Tool with long script using CDATA (RELIABLE FOR CODE/SCRIPTS):
|
||||
<tool_calls>
|
||||
<tool_call>
|
||||
<tool_name>` + ex2 + `</tool_name>
|
||||
<parameters>
|
||||
<path>script.sh</path>
|
||||
<content><![CDATA[
|
||||
#!/bin/bash
|
||||
if [ "$1" == "test" ]; then
|
||||
echo "Success!"
|
||||
fi
|
||||
]]></content>
|
||||
</parameters>
|
||||
</tool_call>
|
||||
</tool_calls>
|
||||
|
||||
Remember: Output ONLY the <tool_calls>...</tool_calls> XML block when calling tools.`
|
||||
}
|
||||
@@ -119,34 +136,34 @@ func matchAny(name string, candidates ...string) bool {
|
||||
func exampleReadParams(name string) string {
|
||||
switch strings.TrimSpace(name) {
|
||||
case "Read":
|
||||
return `{"file_path":"README.md"}`
|
||||
return `<file_path>README.md</file_path>`
|
||||
case "Glob":
|
||||
return `{"pattern":"**/*.go","path":"."}`
|
||||
return `<pattern>**/*.go</pattern><path>.</path>`
|
||||
default:
|
||||
return `{"path":"src/main.go"}`
|
||||
return `<path>src/main.go</path>`
|
||||
}
|
||||
}
|
||||
|
||||
func exampleWriteOrExecParams(name string) string {
|
||||
switch strings.TrimSpace(name) {
|
||||
case "Bash", "execute_command":
|
||||
return `{"command":"pwd"}`
|
||||
return `<command>pwd</command>`
|
||||
case "exec_command":
|
||||
return `{"cmd":"pwd"}`
|
||||
return `<cmd>pwd</cmd>`
|
||||
case "Edit":
|
||||
return `{"file_path":"README.md","old_string":"foo","new_string":"bar"}`
|
||||
return `<file_path>README.md</file_path><old_string>foo</old_string><new_string>bar</new_string>`
|
||||
case "MultiEdit":
|
||||
return `{"file_path":"README.md","edits":[{"old_string":"foo","new_string":"bar"}]}`
|
||||
return `<file_path>README.md</file_path><edits><old_string>foo</old_string><new_string>bar</new_string></edits>`
|
||||
default:
|
||||
return `{"path":"output.txt","content":"Hello world"}`
|
||||
return `<path>output.txt</path><content>Hello world</content>`
|
||||
}
|
||||
}
|
||||
|
||||
func exampleInteractiveParams(name string) string {
|
||||
switch strings.TrimSpace(name) {
|
||||
case "Task":
|
||||
return `{"description":"Investigate flaky tests","prompt":"Run targeted tests and summarize failures"}`
|
||||
return `<description>Investigate flaky tests</description><prompt>Run targeted tests and summarize failures</prompt>`
|
||||
default:
|
||||
return `{"question":"Which approach do you prefer?","follow_up":[{"text":"Option A"},{"text":"Option B"}]}`
|
||||
return `<question>Which approach do you prefer?</question><follow_up><text>Option A</text></follow_up><follow_up><text>Option B</text></follow_up>`
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,6 +22,9 @@ var toolCallMarkupNamePatternByTag = map[string]*regexp.Regexp{
|
||||
"name": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?name\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?name>`),
|
||||
"function": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?function\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?function>`),
|
||||
}
|
||||
|
||||
// cdataPattern matches CDATA sections to handle them separately from normal tags.
|
||||
var cdataPattern = regexp.MustCompile(`(?is)<!\[CDATA\[(.*?)]]>`)
|
||||
var toolCallMarkupArgsTagNames = []string{"input", "arguments", "argument", "parameters", "parameter", "args", "params"}
|
||||
var toolCallMarkupArgsPatternByTag = map[string]*regexp.Regexp{
|
||||
"input": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?input\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?input>`),
|
||||
@@ -120,12 +123,15 @@ func parseMarkupInput(raw string) map[string]any {
|
||||
if raw == "" {
|
||||
return map[string]any{}
|
||||
}
|
||||
if parsed := parseToolCallInput(raw); len(parsed) > 0 {
|
||||
return parsed
|
||||
}
|
||||
// Prioritize XML-style KV tags as they are more robust for long text/scripts.
|
||||
if kv := parseMarkupKVObject(raw); len(kv) > 0 {
|
||||
return kv
|
||||
}
|
||||
|
||||
// Fallback to JSON parsing for standard/legacy tool calls.
|
||||
if parsed := parseToolCallInput(raw); len(parsed) > 0 {
|
||||
return parsed
|
||||
}
|
||||
return map[string]any{"_raw": html.UnescapeString(stripTagText(raw))}
|
||||
}
|
||||
|
||||
@@ -147,7 +153,13 @@ func parseMarkupKVObject(text string) map[string]any {
|
||||
if !strings.EqualFold(key, endKey) {
|
||||
continue
|
||||
}
|
||||
value := strings.TrimSpace(html.UnescapeString(stripTagText(m[2])))
|
||||
// Robustly extract value to handle CDATA and mixed content
|
||||
value := extractRawTagValue(m[2])
|
||||
if value == "" && m[2] != "" {
|
||||
// If it wasn't empty but extracted to empty, could be whitespace or just tags
|
||||
value = strings.TrimSpace(m[2])
|
||||
}
|
||||
|
||||
if value == "" {
|
||||
continue
|
||||
}
|
||||
@@ -164,6 +176,30 @@ func parseMarkupKVObject(text string) map[string]any {
|
||||
return out
|
||||
}
|
||||
|
||||
// extractRawTagValue treats the inner content of a tag robustly.
|
||||
// It detects CDATA and strips it, otherwise it unescapes standard HTML entities.
|
||||
// It avoids over-aggressive tag stripping that might break user content.
|
||||
func extractRawTagValue(inner string) string {
|
||||
trimmed := strings.TrimSpace(inner)
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// 1. Check for CDATA - if present, it's the ultimate "safe" container.
|
||||
if cdataMatches := cdataPattern.FindStringSubmatch(trimmed); len(cdataMatches) >= 2 {
|
||||
return cdataMatches[1] // Return raw content between CDATA brackets
|
||||
}
|
||||
|
||||
// 2. If no CDATA, we still want to be robust.
|
||||
// We unescape standard HTML entities (like < > &)
|
||||
// but we DON'T recursively strip tags unless they are actually valid XML tags
|
||||
// at the start/end (which should have been handled by the outer matcher anyway).
|
||||
|
||||
// If it contains what looks like a single tag and no other text, it might be nested XML
|
||||
// but for KV objects we usually want the value.
|
||||
return html.UnescapeString(inner)
|
||||
}
|
||||
|
||||
func stripTagText(text string) string {
|
||||
return strings.TrimSpace(anyTagPattern.ReplaceAllString(text, ""))
|
||||
}
|
||||
@@ -175,7 +211,7 @@ func findMarkupTagValue(text string, tagNames []string, patternByTag map[string]
|
||||
continue
|
||||
}
|
||||
if m := pattern.FindStringSubmatch(text); len(m) >= 2 {
|
||||
value := strings.TrimSpace(m[1])
|
||||
value := extractRawTagValue(m[1])
|
||||
if value != "" {
|
||||
return value
|
||||
}
|
||||
|
||||
@@ -115,11 +115,12 @@ func parseSingleXMLToolCall(block string) (ParsedToolCall, bool) {
|
||||
if err := dec.DecodeElement(&node, &t); err == nil {
|
||||
inner := strings.TrimSpace(node.Inner)
|
||||
if inner != "" {
|
||||
unescapedInner := html.UnescapeString(inner)
|
||||
if parsed := parseToolCallInput(unescapedInner); len(parsed) > 0 {
|
||||
// Cleanly extract content (handles CDATA, entities, etc.)
|
||||
extracted := extractRawTagValue(inner)
|
||||
if parsed := parseToolCallInput(extracted); len(parsed) > 0 {
|
||||
if len(parsed) == 1 {
|
||||
if _, onlyRaw := parsed["_raw"]; onlyRaw {
|
||||
if kv := parseMarkupKVObject(unescapedInner); len(kv) > 0 {
|
||||
if kv := parseMarkupKVObject(extracted); len(kv) > 0 {
|
||||
for k, vv := range kv {
|
||||
params[k] = vv
|
||||
}
|
||||
@@ -130,7 +131,7 @@ func parseSingleXMLToolCall(block string) (ParsedToolCall, bool) {
|
||||
for k, vv := range parsed {
|
||||
params[k] = vv
|
||||
}
|
||||
} else if kv := parseMarkupKVObject(unescapedInner); len(kv) > 0 {
|
||||
} else if kv := parseMarkupKVObject(extracted); len(kv) > 0 {
|
||||
for k, vv := range kv {
|
||||
params[k] = vv
|
||||
}
|
||||
@@ -293,7 +294,7 @@ func parseSingleAntmlFunctionCallMatch(m []string) (ParsedToolCall, bool) {
|
||||
continue
|
||||
}
|
||||
k := strings.TrimSpace(am[1])
|
||||
v := strings.TrimSpace(html.UnescapeString(am[2]))
|
||||
v := extractRawTagValue(am[2])
|
||||
if k != "" {
|
||||
input[k] = v
|
||||
}
|
||||
@@ -316,7 +317,7 @@ func parseInvokeFunctionCallStyle(text string) (ParsedToolCall, bool) {
|
||||
continue
|
||||
}
|
||||
k := strings.TrimSpace(pm[1])
|
||||
v := strings.TrimSpace(html.UnescapeString(pm[2]))
|
||||
v := extractRawTagValue(pm[2])
|
||||
if k != "" {
|
||||
input[k] = v
|
||||
}
|
||||
@@ -347,7 +348,7 @@ func parseToolUseFunctionStyle(text string) (ParsedToolCall, bool) {
|
||||
continue
|
||||
}
|
||||
k := strings.TrimSpace(pm[1])
|
||||
v := strings.TrimSpace(html.UnescapeString(pm[2]))
|
||||
v := extractRawTagValue(pm[2])
|
||||
if k != "" {
|
||||
input[k] = v
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user