refactor: update tool call parsing and stream tool sieve logic

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-04-28 01:39:32 +08:00
parent 516da04bcd
commit 63271aea8c
10 changed files with 205 additions and 4 deletions

View File

@@ -530,6 +530,7 @@ function findPartialToolMarkupStart(text) {
'<|tool_calls', '<|invoke', '<|parameter',
'<tool_calls', '<invoke', '<parameter',
'<|dsml|tool_calls', '<|dsml|invoke', '<|dsml|parameter',
'<dsml|tool_calls', '<dsml|invoke', '<dsml|parameter',
'<dsmltool_calls', '<dsmlinvoke', '<dsmlparameter',
'<dsml tool_calls', '<dsml invoke', '<dsml parameter',
'<dsml|tool_calls', '<dsml|invoke', '<dsml|parameter',
@@ -812,6 +813,9 @@ function parseStructuredCDATAParameterValue(paramName, raw) {
if (!normalized.includes('<') || !normalized.includes('>')) {
return { ok: false, value: null };
}
if (!cdataFragmentLooksExplicitlyStructured(normalized)) {
return { ok: false, value: null };
}
const parsed = parseMarkupInput(normalized);
if (Array.isArray(parsed)) {
return { ok: true, value: parsed };
@@ -826,6 +830,21 @@ function normalizeCDATAForStructuredParse(raw) {
return unescapeHtml(toStringSafe(raw).replace(/<br\s*\/?>/gi, '\n').trim());
}
function cdataFragmentLooksExplicitlyStructured(raw) {
const blocks = findGenericXmlElementBlocks(raw);
if (blocks.length === 0) {
return false;
}
if (blocks.length > 1) {
return true;
}
const block = blocks[0];
if (toStringSafe(block.localName).trim().toLowerCase() === 'item') {
return true;
}
return findGenericXmlElementBlocks(block.body).length > 0;
}
function preservesCDATAStringParameter(name) {
return new Set([
'content',

View File

@@ -2,6 +2,7 @@
const XML_TOOL_SEGMENT_TAGS = [
'<|dsml|tool_calls>', '<|dsml|tool_calls\n', '<|dsml|tool_calls ',
'<dsml|tool_calls>', '<dsml|tool_calls\n', '<dsml|tool_calls ',
'<|dsml|invoke ', '<|dsml|invoke\n', '<|dsml|invoke\t', '<|dsml|invoke\r',
'<|dsmltool_calls>', '<|dsmltool_calls\n', '<|dsmltool_calls ',
'<|dsmlinvoke ', '<|dsmlinvoke\n', '<|dsmlinvoke\t', '<|dsmlinvoke\r',
@@ -23,6 +24,7 @@ const XML_TOOL_SEGMENT_TAGS = [
const XML_TOOL_OPENING_TAGS = [
'<|dsml|tool_calls',
'<dsml|tool_calls',
'<|dsmltool_calls',
'<|dsml tool_calls',
'<dsml|tool_calls',
@@ -35,6 +37,7 @@ const XML_TOOL_OPENING_TAGS = [
const XML_TOOL_CLOSING_TAGS = [
'</|dsml|tool_calls>',
'</dsml|tool_calls>',
'</|dsmltool_calls>',
'</|dsml tool_calls>',
'</dsml|tool_calls>',

View File

@@ -2,6 +2,7 @@ package toolcall
import (
"encoding/json"
"encoding/xml"
"html"
"regexp"
"strings"
@@ -350,6 +351,9 @@ func parseStructuredCDATAParameterValue(paramName, raw string) (any, bool) {
if !strings.Contains(normalized, "<") || !strings.Contains(normalized, ">") {
return nil, false
}
if !cdataFragmentLooksExplicitlyStructured(normalized) {
return nil, false
}
parsed, ok := parseXMLFragmentValue(normalized)
if !ok {
return nil, false
@@ -375,6 +379,65 @@ func normalizeCDATAForStructuredParse(raw string) string {
return html.UnescapeString(strings.TrimSpace(normalized))
}
// Preserve flat CDATA fragments as strings. Only recover structure when the
// fragment clearly encodes a data shape: multiple sibling elements, nested
// child elements, or an explicit item list.
func cdataFragmentLooksExplicitlyStructured(raw string) bool {
trimmed := strings.TrimSpace(raw)
if trimmed == "" {
return false
}
dec := xml.NewDecoder(strings.NewReader("<root>" + trimmed + "</root>"))
tok, err := dec.Token()
if err != nil {
return false
}
start, ok := tok.(xml.StartElement)
if !ok || !strings.EqualFold(start.Name.Local, "root") {
return false
}
depth := 0
directChildren := 0
firstChildName := ""
firstChildHasNested := false
for {
tok, err := dec.Token()
if err != nil {
return false
}
switch t := tok.(type) {
case xml.StartElement:
if depth == 0 {
directChildren++
if directChildren == 1 {
firstChildName = strings.ToLower(strings.TrimSpace(t.Name.Local))
} else {
return true
}
} else if directChildren == 1 && depth == 1 {
firstChildHasNested = true
}
depth++
case xml.EndElement:
if strings.EqualFold(t.Name.Local, "root") {
if directChildren != 1 {
return false
}
if firstChildName == "item" {
return true
}
return firstChildHasNested
}
if depth > 0 {
depth--
}
}
}
}
func preservesCDATAStringParameter(name string) bool {
switch strings.ToLower(strings.TrimSpace(name)) {
case "content", "file_content", "text", "prompt", "query", "command", "cmd", "script", "code", "old_string", "new_string", "pattern", "path", "file_path":

View File

@@ -53,6 +53,21 @@ func TestParseToolCallsSupportsDSMLShellWithCanonicalExampleInCDATA(t *testing.T
}
}
func TestParseToolCallsPreservesSimpleCDATAInlineMarkupAsText(t *testing.T) {
text := `<tool_calls><invoke name="Write"><parameter name="description"><![CDATA[<b>urgent</b>]]></parameter></invoke></tool_calls>`
calls := ParseToolCalls(text, []string{"Write"})
if len(calls) != 1 {
t.Fatalf("expected 1 call, got %#v", calls)
}
got, ok := calls[0].Input["description"].(string)
if !ok {
t.Fatalf("expected description to remain a string, got %#v", calls[0].Input["description"])
}
if got != "<b>urgent</b>" {
t.Fatalf("expected inline markup CDATA to stay raw, got %q", got)
}
}
func TestParseToolCallsTreatsUnclosedCDATAAsText(t *testing.T) {
text := `<tool_calls><invoke name="Write"><parameter name="content"><![CDATA[hello world</parameter></invoke></tool_calls>`
res := ParseToolCallsDetailed(text, []string{"Write"})
@@ -218,6 +233,21 @@ func TestParseToolCallsTreatsCDATAItemOnlyBodyAsArray(t *testing.T) {
}
}
func TestParseToolCallsTreatsSingleItemCDATAAsArray(t *testing.T) {
text := `<tool_calls><invoke name="TodoWrite"><parameter name="todos"><![CDATA[<item>one</item>]]></parameter></invoke></tool_calls>`
calls := ParseToolCalls(text, []string{"TodoWrite"})
if len(calls) != 1 {
t.Fatalf("expected one TodoWrite call, got %#v", calls)
}
items, ok := calls[0].Input["todos"].([]any)
if !ok || len(items) != 1 {
t.Fatalf("expected single-item CDATA body to parse as array, got %#v", calls[0].Input["todos"])
}
if got, ok := items[0].(string); !ok || got != "one" {
t.Fatalf("expected single item value to stay intact, got %#v", items[0])
}
}
func TestParseToolCallsTreatsCDATAObjectFragmentAsObject(t *testing.T) {
payload := `<question><![CDATA[Pick one]]></question><options><item><label><![CDATA[A]]></label></item><item><label><![CDATA[B]]></label></item></options>`
text := `<tool_calls><invoke name="AskUserQuestion"><parameter name="questions"><![CDATA[` + payload + `]]></parameter></invoke></tool_calls>`

View File

@@ -154,6 +154,7 @@ func findPartialXMLToolTagStart(s string) int {
"<|tool_calls", "<|invoke", "<|parameter",
"<tool_calls", "<invoke", "<parameter",
"<|dsml|tool_calls", "<|dsml|invoke", "<|dsml|parameter",
"<dsml|tool_calls", "<dsml|invoke", "<dsml|parameter",
"<dsmltool_calls", "<dsmlinvoke", "<dsmlparameter",
"<dsml tool_calls", "<dsml invoke", "<dsml parameter",
"<dsml|tool_calls", "<dsml|invoke", "<dsml|parameter",

View File

@@ -15,6 +15,7 @@ var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)((?:<tool_calls\b|<\|dsml
// xmlToolTagsToDetect is the set of XML tag prefixes used by findToolSegmentStart.
var xmlToolTagsToDetect = []string{
"<|dsml|tool_calls>", "<|dsml|tool_calls\n", "<|dsml|tool_calls ",
"<dsml|tool_calls>", "<dsml|tool_calls\n", "<dsml|tool_calls ",
"<|dsml|invoke ", "<|dsml|invoke\n", "<|dsml|invoke\t", "<|dsml|invoke\r",
"<|dsmltool_calls>", "<|dsmltool_calls\n", "<|dsmltool_calls ",
"<|dsmlinvoke ", "<|dsmlinvoke\n", "<|dsmlinvoke\t", "<|dsmlinvoke\r",

View File

@@ -745,6 +745,51 @@ func TestProcessToolSieveFullwidthPipeVariantDoesNotLeak(t *testing.T) {
}
}
// Test <DSML|tool_calls> with DSML invoke/parameter tags should buffer the
// wrapper instead of leaking it before the block is complete.
func TestProcessToolSieveFullwidthDSMLPrefixVariantDoesNotLeak(t *testing.T) {
var state State
chunks := []string{
"<DSML|tool",
"_calls>\n",
"<|DSML|invoke name=\"Bash\">\n",
"<|DSML|parameter name=\"command\"><![CDATA[ls -la /Users/aq/Desktop/myproject/ds2api/]]></|DSML|parameter>\n",
"<|DSML|parameter name=\"description\"><![CDATA[List project root contents]]></|DSML|parameter>\n",
"</|DSML|invoke>\n",
"<|DSML|invoke name=\"Bash\">\n",
"<|DSML|parameter name=\"command\"><![CDATA[cat /Users/aq/Desktop/myproject/ds2api/package.json 2>/dev/null || echo \"No package.json found\"]]></|DSML|parameter>\n",
"<|DSML|parameter name=\"description\"><![CDATA[Check for existing package.json]]></|DSML|parameter>\n",
"</|DSML|invoke>\n",
"</|DSML|tool_calls>",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"Bash"})...)
}
events = append(events, Flush(&state, []string{"Bash"})...)
var textContent strings.Builder
var toolCalls int
var names []string
for _, evt := range events {
textContent.WriteString(evt.Content)
for _, call := range evt.ToolCalls {
toolCalls++
names = append(names, call.Name)
}
}
if toolCalls != 2 {
t.Fatalf("expected two tool calls from fullwidth DSML prefix variant, got %d events=%#v", toolCalls, events)
}
if len(names) != 2 || names[0] != "Bash" || names[1] != "Bash" {
t.Fatalf("expected two Bash tool calls, got %v", names)
}
if textContent.Len() != 0 {
t.Fatalf("expected fullwidth DSML prefix variant not to leak text, got %q", textContent.String())
}
}
// Test <DSML|tool_calls> with <|DSML|invoke> (DSML prefix without leading pipe on wrapper).
func TestProcessToolSieveDSMLPrefixVariantDoesNotLeak(t *testing.T) {
var state State