feat: expand DSML tool-call alias and fence handling

Add support for DSML wrapper aliases (<dsml|tool_calls>, <|tool_calls>,
<|tool_calls>) alongside canonical XML. Normalize mixed DSML/canonical
tags instead of rejecting them. Add tilde fence (~~~) support, fix
nested fence and unclosed fence handling, support CDATA-protected fence
content, and skip prose mentions when scanning for real tool blocks.
Mirror all changes between Go and Node.js runtimes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-04-27 13:39:50 +08:00
parent 90ce595325
commit a13293e113
19 changed files with 1524 additions and 125 deletions

View File

@@ -0,0 +1,66 @@
package toolcall
import (
"strings"
"testing"
)
// 4 反引号嵌套 3 反引号
func TestStripFencedCodeBlocks_NestedFourBackticks(t *testing.T) {
text := "Before\n\x60\x60\x60\x60markdown\nHere is \x60\x60\x60 nested \x60\x60\x60 example\n\x60\x60\x60\x60\nAfter"
got := stripFencedCodeBlocks(text)
if !strings.Contains(got, "Before") || !strings.Contains(got, "After") {
t.Fatalf("expected Before and After preserved, got %q", got)
}
if strings.Contains(got, "nested") {
t.Fatalf("expected nested content stripped, got %q", got)
}
}
// 波浪线围栏
func TestStripFencedCodeBlocks_TildeFence(t *testing.T) {
text := "Before\n~~~python\ncode here\n~~~\nAfter"
got := stripFencedCodeBlocks(text)
if !strings.Contains(got, "Before") || !strings.Contains(got, "After") {
t.Fatalf("expected Before/After, got %q", got)
}
if strings.Contains(got, "code here") {
t.Fatalf("expected code stripped, got %q", got)
}
}
// 未闭合围栏 + 后面跟真正的工具调用:不应返回空字符串
func TestStripFencedCodeBlocks_UnclosedFencePreservesToolCall(t *testing.T) {
text := "Example:\n\x60\x60\x60xml\n<tool_calls><invoke name=\"read_file\"><parameter name=\"path\">README.md</parameter></invoke></tool_calls>\n\n<tool_calls><invoke name=\"search\"><parameter name=\"q\">go</parameter></invoke></tool_calls>"
got := stripFencedCodeBlocks(text)
if got == "" {
t.Fatalf("unclosed fence should not truncate everything — real tool call after the fence is lost")
}
}
// CDATA 内的围栏不应被剥离
func TestStripFencedCodeBlocks_FenceInsideCDATA(t *testing.T) {
text := "<tool_calls><invoke name=\"write\">\n<parameter name=\"content\"><![CDATA[\n\x60\x60\x60python\nprint('hello')\n\x60\x60\x60\n]]></parameter>\n</invoke></tool_calls>"
got := stripFencedCodeBlocks(text)
if !strings.Contains(got, "\x60\x60\x60python") {
t.Fatalf("fenced code inside CDATA should be preserved, got %q", got)
}
}
// 连续多个围栏
func TestStripFencedCodeBlocks_MultipleFences(t *testing.T) {
text := "Before\n\x60\x60\x60\nfence1\n\x60\x60\x60\nMiddle\n\x60\x60\x60\nfence2\n\x60\x60\x60\nAfter"
got := stripFencedCodeBlocks(text)
if !strings.Contains(got, "Before") || !strings.Contains(got, "Middle") || !strings.Contains(got, "After") {
t.Fatalf("expected non-fenced content preserved, got %q", got)
}
}
// 围栏包含内嵌 ``` 行但没有独立成行
func TestStripFencedCodeBlocks_InlineBackticksNotFence(t *testing.T) {
text := "Before\n\x60\x60\x60go\nfmt.Println(\x60\x60\x60hello\x60\x60\x60)\n\x60\x60\x60\nAfter"
got := stripFencedCodeBlocks(text)
if !strings.Contains(got, "Before") || !strings.Contains(got, "After") {
t.Fatalf("expected Before/After, got %q", got)
}
}

View File

@@ -6,13 +6,13 @@ func normalizeDSMLToolCallMarkup(text string) (string, bool) {
if text == "" {
return "", true
}
hasDSML, hasCanonical := toolMarkupStylesOutsideIgnored(text)
if hasDSML && hasCanonical {
return text, false
}
if !hasDSML {
hasAliasLikeMarkup, _ := toolMarkupStylesOutsideIgnored(text)
if !hasAliasLikeMarkup {
return text, true
}
// Always normalize DSML aliases to canonical form, even when canonical
// tags coexist. Models frequently mix DSML wrapper tags with canonical
// inner tags (e.g., <tool_calls><invoke name="...">).
return replaceDSMLToolMarkupOutsideIgnored(text), true
}
@@ -26,6 +26,24 @@ var dsmlToolMarkupAliases = []struct {
{"</|dsml|invoke>", "</invoke>"},
{"<|dsml|parameter", "<parameter"},
{"</|dsml|parameter>", "</parameter>"},
{"<dsml|tool_calls", "<tool_calls"},
{"</dsml|tool_calls>", "</tool_calls>"},
{"<dsml|invoke", "<invoke"},
{"</dsml|invoke>", "</invoke>"},
{"<dsml|parameter", "<parameter"},
{"</dsml|parameter>", "</parameter>"},
{"<|tool_calls", "<tool_calls"},
{"</|tool_calls>", "</tool_calls>"},
{"<|invoke", "<invoke"},
{"</|invoke>", "</invoke>"},
{"<|parameter", "<parameter"},
{"</|parameter>", "</parameter>"},
{"<tool_calls", "<tool_calls"},
{"</tool_calls>", "</tool_calls>"},
{"<invoke", "<invoke"},
{"</invoke>", "</invoke>"},
{"<parameter", "<parameter"},
{"</parameter>", "</parameter>"},
}
var canonicalToolMarkupPrefixes = []string{

View File

@@ -93,7 +93,11 @@ func filterToolCallsDetailed(parsed []ParsedToolCall) ([]ParsedToolCall, []strin
func looksLikeToolCallSyntax(text string) bool {
lower := strings.ToLower(text)
return strings.Contains(lower, "<|dsml|tool_calls") || strings.Contains(lower, "<tool_calls")
return strings.Contains(lower, "<|dsml|tool_calls") ||
strings.Contains(lower, "<dsml|tool_calls") ||
strings.Contains(lower, "<tool_calls") ||
strings.Contains(lower, "<|tool_calls") ||
strings.Contains(lower, "<tool_calls")
}
func stripFencedCodeBlocks(text string) string {
@@ -107,6 +111,9 @@ func stripFencedCodeBlocks(text string) string {
inFence := false
fenceMarker := ""
inCDATA := false
// Track builder length when a fence opens so we can preserve content
// collected before the unclosed fence.
beforeFenceLen := 0
for _, line := range lines {
if inCDATA || cdataStartsBeforeFence(line) {
b.WriteString(line)
@@ -118,6 +125,7 @@ func stripFencedCodeBlocks(text string) string {
if marker, ok := parseFenceOpen(trimmed); ok {
inFence = true
fenceMarker = marker
beforeFenceLen = b.Len()
continue
}
b.WriteString(line)
@@ -131,6 +139,12 @@ func stripFencedCodeBlocks(text string) string {
}
if inFence {
// Unclosed fence: preserve content that was collected before the
// fence started rather than dropping everything.
result := b.String()
if beforeFenceLen > 0 && beforeFenceLen <= len(result) {
return result[:beforeFenceLen]
}
return ""
}
return b.String()

View File

@@ -124,7 +124,8 @@ func findXMLElementBlocks(text, tag string) []xmlElementBlock {
}
closeStart, closeEnd, ok := findMatchingXMLEndTagOutsideCDATA(text, tag, bodyStart)
if !ok {
break
pos = bodyStart
continue
}
out = append(out, xmlElementBlock{
Attrs: attrs,

View File

@@ -53,11 +53,16 @@ func TestParseToolCallsSupportsDSMLShellWithCanonicalExampleInCDATA(t *testing.T
}
}
func TestParseToolCallsRejectsMixedDSMLAndCanonicalToolTags(t *testing.T) {
func TestParseToolCallsNormalizesMixedDSMLAndCanonicalToolTags(t *testing.T) {
// Models commonly mix DSML wrapper tags with canonical inner tags.
// These should be normalized and parsed, not rejected.
text := `<|DSML|tool_calls><invoke name="Bash"><|DSML|parameter name="command">pwd</|DSML|parameter></invoke></|DSML|tool_calls>`
calls := ParseToolCalls(text, []string{"Bash"})
if len(calls) != 0 {
t.Fatalf("expected mixed DSML/XML tool tags to be rejected, got %#v", calls)
if len(calls) != 1 {
t.Fatalf("expected mixed DSML/XML tool tags to be normalized and parsed, got %#v", calls)
}
if calls[0].Name != "Bash" || calls[0].Input["command"] != "pwd" {
t.Fatalf("unexpected mixed DSML parse result: %#v", calls[0])
}
}
@@ -438,3 +443,25 @@ func TestParseToolCallsParsesAfterFourBacktickFence(t *testing.T) {
t.Fatalf("expected non-fenced tool call to be parsed, got %#v", res.Calls[0])
}
}
func TestParseToolCallsSkipsProseMentionOfSameWrapperVariant(t *testing.T) {
text := strings.Join([]string{
"Summary: support canonical <tool_calls> and DSML <|DSML|tool_calls> wrappers.",
"",
"<|DSML|tool_calls>",
"<|DSML|invoke name=\"Bash\">",
"<|DSML|parameter name=\"command\"><![CDATA[git status]]></|DSML|parameter>",
"</|DSML|invoke>",
"</|DSML|tool_calls>",
}, "\n")
res := ParseToolCallsDetailed(text, []string{"Bash"})
if len(res.Calls) != 1 {
t.Fatalf("expected one parsed call after prose mention, got %#v", res.Calls)
}
if res.Calls[0].Name != "Bash" {
t.Fatalf("expected Bash call, got %#v", res.Calls[0])
}
if got, _ := res.Calls[0].Input["command"].(string); got != "git status" {
t.Fatalf("expected command to parse, got %q", got)
}
}