mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-16 14:15:20 +08:00
feat: expand DSML tool-call alias and fence handling
Add support for DSML wrapper aliases (<dsml|tool_calls>, <|tool_calls>, <|tool_calls>) alongside canonical XML. Normalize mixed DSML/canonical tags instead of rejecting them. Add tilde fence (~~~) support, fix nested fence and unclosed fence handling, support CDATA-protected fence content, and skip prose mentions when scanning for real tool blocks. Mirror all changes between Go and Node.js runtimes. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
66
internal/toolcall/fence_edge_test.go
Normal file
66
internal/toolcall/fence_edge_test.go
Normal file
@@ -0,0 +1,66 @@
|
||||
package toolcall
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// 4 反引号嵌套 3 反引号
|
||||
func TestStripFencedCodeBlocks_NestedFourBackticks(t *testing.T) {
|
||||
text := "Before\n\x60\x60\x60\x60markdown\nHere is \x60\x60\x60 nested \x60\x60\x60 example\n\x60\x60\x60\x60\nAfter"
|
||||
got := stripFencedCodeBlocks(text)
|
||||
if !strings.Contains(got, "Before") || !strings.Contains(got, "After") {
|
||||
t.Fatalf("expected Before and After preserved, got %q", got)
|
||||
}
|
||||
if strings.Contains(got, "nested") {
|
||||
t.Fatalf("expected nested content stripped, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// 波浪线围栏
|
||||
func TestStripFencedCodeBlocks_TildeFence(t *testing.T) {
|
||||
text := "Before\n~~~python\ncode here\n~~~\nAfter"
|
||||
got := stripFencedCodeBlocks(text)
|
||||
if !strings.Contains(got, "Before") || !strings.Contains(got, "After") {
|
||||
t.Fatalf("expected Before/After, got %q", got)
|
||||
}
|
||||
if strings.Contains(got, "code here") {
|
||||
t.Fatalf("expected code stripped, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// 未闭合围栏 + 后面跟真正的工具调用:不应返回空字符串
|
||||
func TestStripFencedCodeBlocks_UnclosedFencePreservesToolCall(t *testing.T) {
|
||||
text := "Example:\n\x60\x60\x60xml\n<tool_calls><invoke name=\"read_file\"><parameter name=\"path\">README.md</parameter></invoke></tool_calls>\n\n<tool_calls><invoke name=\"search\"><parameter name=\"q\">go</parameter></invoke></tool_calls>"
|
||||
got := stripFencedCodeBlocks(text)
|
||||
if got == "" {
|
||||
t.Fatalf("unclosed fence should not truncate everything — real tool call after the fence is lost")
|
||||
}
|
||||
}
|
||||
|
||||
// CDATA 内的围栏不应被剥离
|
||||
func TestStripFencedCodeBlocks_FenceInsideCDATA(t *testing.T) {
|
||||
text := "<tool_calls><invoke name=\"write\">\n<parameter name=\"content\"><![CDATA[\n\x60\x60\x60python\nprint('hello')\n\x60\x60\x60\n]]></parameter>\n</invoke></tool_calls>"
|
||||
got := stripFencedCodeBlocks(text)
|
||||
if !strings.Contains(got, "\x60\x60\x60python") {
|
||||
t.Fatalf("fenced code inside CDATA should be preserved, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// 连续多个围栏
|
||||
func TestStripFencedCodeBlocks_MultipleFences(t *testing.T) {
|
||||
text := "Before\n\x60\x60\x60\nfence1\n\x60\x60\x60\nMiddle\n\x60\x60\x60\nfence2\n\x60\x60\x60\nAfter"
|
||||
got := stripFencedCodeBlocks(text)
|
||||
if !strings.Contains(got, "Before") || !strings.Contains(got, "Middle") || !strings.Contains(got, "After") {
|
||||
t.Fatalf("expected non-fenced content preserved, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// 围栏包含内嵌 ``` 行但没有独立成行
|
||||
func TestStripFencedCodeBlocks_InlineBackticksNotFence(t *testing.T) {
|
||||
text := "Before\n\x60\x60\x60go\nfmt.Println(\x60\x60\x60hello\x60\x60\x60)\n\x60\x60\x60\nAfter"
|
||||
got := stripFencedCodeBlocks(text)
|
||||
if !strings.Contains(got, "Before") || !strings.Contains(got, "After") {
|
||||
t.Fatalf("expected Before/After, got %q", got)
|
||||
}
|
||||
}
|
||||
@@ -6,13 +6,13 @@ func normalizeDSMLToolCallMarkup(text string) (string, bool) {
|
||||
if text == "" {
|
||||
return "", true
|
||||
}
|
||||
hasDSML, hasCanonical := toolMarkupStylesOutsideIgnored(text)
|
||||
if hasDSML && hasCanonical {
|
||||
return text, false
|
||||
}
|
||||
if !hasDSML {
|
||||
hasAliasLikeMarkup, _ := toolMarkupStylesOutsideIgnored(text)
|
||||
if !hasAliasLikeMarkup {
|
||||
return text, true
|
||||
}
|
||||
// Always normalize DSML aliases to canonical form, even when canonical
|
||||
// tags coexist. Models frequently mix DSML wrapper tags with canonical
|
||||
// inner tags (e.g., <|tool_calls><invoke name="...">).
|
||||
return replaceDSMLToolMarkupOutsideIgnored(text), true
|
||||
}
|
||||
|
||||
@@ -26,6 +26,24 @@ var dsmlToolMarkupAliases = []struct {
|
||||
{"</|dsml|invoke>", "</invoke>"},
|
||||
{"<|dsml|parameter", "<parameter"},
|
||||
{"</|dsml|parameter>", "</parameter>"},
|
||||
{"<dsml|tool_calls", "<tool_calls"},
|
||||
{"</dsml|tool_calls>", "</tool_calls>"},
|
||||
{"<dsml|invoke", "<invoke"},
|
||||
{"</dsml|invoke>", "</invoke>"},
|
||||
{"<dsml|parameter", "<parameter"},
|
||||
{"</dsml|parameter>", "</parameter>"},
|
||||
{"<|tool_calls", "<tool_calls"},
|
||||
{"</|tool_calls>", "</tool_calls>"},
|
||||
{"<|invoke", "<invoke"},
|
||||
{"</|invoke>", "</invoke>"},
|
||||
{"<|parameter", "<parameter"},
|
||||
{"</|parameter>", "</parameter>"},
|
||||
{"<|tool_calls", "<tool_calls"},
|
||||
{"</|tool_calls>", "</tool_calls>"},
|
||||
{"<|invoke", "<invoke"},
|
||||
{"</|invoke>", "</invoke>"},
|
||||
{"<|parameter", "<parameter"},
|
||||
{"</|parameter>", "</parameter>"},
|
||||
}
|
||||
|
||||
var canonicalToolMarkupPrefixes = []string{
|
||||
|
||||
@@ -93,7 +93,11 @@ func filterToolCallsDetailed(parsed []ParsedToolCall) ([]ParsedToolCall, []strin
|
||||
|
||||
func looksLikeToolCallSyntax(text string) bool {
|
||||
lower := strings.ToLower(text)
|
||||
return strings.Contains(lower, "<|dsml|tool_calls") || strings.Contains(lower, "<tool_calls")
|
||||
return strings.Contains(lower, "<|dsml|tool_calls") ||
|
||||
strings.Contains(lower, "<dsml|tool_calls") ||
|
||||
strings.Contains(lower, "<|tool_calls") ||
|
||||
strings.Contains(lower, "<|tool_calls") ||
|
||||
strings.Contains(lower, "<tool_calls")
|
||||
}
|
||||
|
||||
func stripFencedCodeBlocks(text string) string {
|
||||
@@ -107,6 +111,9 @@ func stripFencedCodeBlocks(text string) string {
|
||||
inFence := false
|
||||
fenceMarker := ""
|
||||
inCDATA := false
|
||||
// Track builder length when a fence opens so we can preserve content
|
||||
// collected before the unclosed fence.
|
||||
beforeFenceLen := 0
|
||||
for _, line := range lines {
|
||||
if inCDATA || cdataStartsBeforeFence(line) {
|
||||
b.WriteString(line)
|
||||
@@ -118,6 +125,7 @@ func stripFencedCodeBlocks(text string) string {
|
||||
if marker, ok := parseFenceOpen(trimmed); ok {
|
||||
inFence = true
|
||||
fenceMarker = marker
|
||||
beforeFenceLen = b.Len()
|
||||
continue
|
||||
}
|
||||
b.WriteString(line)
|
||||
@@ -131,6 +139,12 @@ func stripFencedCodeBlocks(text string) string {
|
||||
}
|
||||
|
||||
if inFence {
|
||||
// Unclosed fence: preserve content that was collected before the
|
||||
// fence started rather than dropping everything.
|
||||
result := b.String()
|
||||
if beforeFenceLen > 0 && beforeFenceLen <= len(result) {
|
||||
return result[:beforeFenceLen]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
return b.String()
|
||||
|
||||
@@ -124,7 +124,8 @@ func findXMLElementBlocks(text, tag string) []xmlElementBlock {
|
||||
}
|
||||
closeStart, closeEnd, ok := findMatchingXMLEndTagOutsideCDATA(text, tag, bodyStart)
|
||||
if !ok {
|
||||
break
|
||||
pos = bodyStart
|
||||
continue
|
||||
}
|
||||
out = append(out, xmlElementBlock{
|
||||
Attrs: attrs,
|
||||
|
||||
@@ -53,11 +53,16 @@ func TestParseToolCallsSupportsDSMLShellWithCanonicalExampleInCDATA(t *testing.T
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsRejectsMixedDSMLAndCanonicalToolTags(t *testing.T) {
|
||||
func TestParseToolCallsNormalizesMixedDSMLAndCanonicalToolTags(t *testing.T) {
|
||||
// Models commonly mix DSML wrapper tags with canonical inner tags.
|
||||
// These should be normalized and parsed, not rejected.
|
||||
text := `<|DSML|tool_calls><invoke name="Bash"><|DSML|parameter name="command">pwd</|DSML|parameter></invoke></|DSML|tool_calls>`
|
||||
calls := ParseToolCalls(text, []string{"Bash"})
|
||||
if len(calls) != 0 {
|
||||
t.Fatalf("expected mixed DSML/XML tool tags to be rejected, got %#v", calls)
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected mixed DSML/XML tool tags to be normalized and parsed, got %#v", calls)
|
||||
}
|
||||
if calls[0].Name != "Bash" || calls[0].Input["command"] != "pwd" {
|
||||
t.Fatalf("unexpected mixed DSML parse result: %#v", calls[0])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -438,3 +443,25 @@ func TestParseToolCallsParsesAfterFourBacktickFence(t *testing.T) {
|
||||
t.Fatalf("expected non-fenced tool call to be parsed, got %#v", res.Calls[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsSkipsProseMentionOfSameWrapperVariant(t *testing.T) {
|
||||
text := strings.Join([]string{
|
||||
"Summary: support canonical <tool_calls> and DSML <|DSML|tool_calls> wrappers.",
|
||||
"",
|
||||
"<|DSML|tool_calls>",
|
||||
"<|DSML|invoke name=\"Bash\">",
|
||||
"<|DSML|parameter name=\"command\"><![CDATA[git status]]></|DSML|parameter>",
|
||||
"</|DSML|invoke>",
|
||||
"</|DSML|tool_calls>",
|
||||
}, "\n")
|
||||
res := ParseToolCallsDetailed(text, []string{"Bash"})
|
||||
if len(res.Calls) != 1 {
|
||||
t.Fatalf("expected one parsed call after prose mention, got %#v", res.Calls)
|
||||
}
|
||||
if res.Calls[0].Name != "Bash" {
|
||||
t.Fatalf("expected Bash call, got %#v", res.Calls[0])
|
||||
}
|
||||
if got, _ := res.Calls[0].Input["command"].(string); got != "git status" {
|
||||
t.Fatalf("expected command to parse, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user