revert: replace fullwidth pipe | with halfwidth | in DSML tool markup

PR #460 introduced fullwidth pipe characters (|) in DSML tool call formatting
to improve parsing robustness, but models exposed to these fullwidth pipes in
system prompts exhibit significantly higher rates of tool output hallucinations.
Reverting to halfwidth pipes (|) drastically reduces tokenizer/perplexity-driven
hallucinations while retaining the existing confusable-hardening in the parser.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-05-10 15:18:54 +08:00
parent 3beb31309f
commit cee8757d14
45 changed files with 725 additions and 342 deletions

View File

@@ -11,19 +11,19 @@ import "strings"
func BuildToolCallInstructions(toolNames []string) string {
return `TOOL CALL FORMAT — FOLLOW EXACTLY:
<DSMLtool_calls>
<DSMLinvoke name="TOOL_NAME_HERE">
<DSMLparameter name="PARAMETER_NAME"><![CDATA[PARAMETER_VALUE]]></DSMLparameter>
</DSMLinvoke>
</DSMLtool_calls>
<|DSML|tool_calls>
<|DSML|invoke name="TOOL_NAME_HERE">
<|DSML|parameter name="PARAMETER_NAME"><![CDATA[PARAMETER_VALUE]]></|DSML|parameter>
</|DSML|invoke>
</|DSML|tool_calls>
RULES:
1) Use the <DSMLtool_calls> wrapper format.
2) Put one or more <DSMLinvoke> entries under a single <DSMLtool_calls> root.
3) Put the tool name in the invoke name attribute: <DSMLinvoke name="TOOL_NAME">.
3a) Tag punctuation alphabet: ASCII < > / = " plus the fullwidth vertical bar .
1) Use the <|DSML|tool_calls> wrapper format.
2) Put one or more <|DSML|invoke> entries under a single <|DSML|tool_calls> root.
3) Put the tool name in the invoke name attribute: <|DSML|invoke name="TOOL_NAME">.
3a) Tag punctuation alphabet: ASCII < > / = " plus the halfwidth pipe |.
4) All string values must use <![CDATA[...]]>, even short ones. This includes code, scripts, file contents, prompts, paths, names, and queries.
5) Every top-level argument must be a <DSMLparameter name="ARG_NAME">...</DSMLparameter> node.
5) Every top-level argument must be a <|DSML|parameter name="ARG_NAME">...</|DSML|parameter> node.
6) Objects use nested XML elements inside the parameter body. Arrays may repeat <item> children.
7) Numbers, booleans, and null stay plain text.
8) Use only the parameter names in the tool schema. Do not invent fields.
@@ -31,35 +31,35 @@ RULES:
10) If a required parameter value is unknown, ask the user or answer normally instead of outputting an empty tool call.
11) For shell tools such as Bash / execute_command, the command/script must be inside the command parameter. Never call them with an empty command.
12) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.
13) If you call a tool, the first non-whitespace characters of that tool block must be exactly <DSMLtool_calls>.
14) Never omit the opening <DSMLtool_calls> tag, even if you already plan to close with </DSMLtool_calls>.
13) If you call a tool, the first non-whitespace characters of that tool block must be exactly <|DSML|tool_calls>.
14) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with </|DSML|tool_calls>.
15) Compatibility note: the runtime also accepts the legacy XML tags <tool_calls> / <invoke> / <parameter>, but prefer the DSML-prefixed form above.
PARAMETER SHAPES:
- string => <DSMLparameter name="x"><![CDATA[value]]></DSMLparameter>
- object => <DSMLparameter name="x"><field>...</field></DSMLparameter>
- array => <DSMLparameter name="x"><item>...</item><item>...</item></DSMLparameter>
- number/bool/null => <DSMLparameter name="x">plain_text</DSMLparameter>
- string => <|DSML|parameter name="x"><![CDATA[value]]></|DSML|parameter>
- object => <|DSML|parameter name="x"><field>...</field></|DSML|parameter>
- array => <|DSML|parameter name="x"><item>...</item><item>...</item></|DSML|parameter>
- number/bool/null => <|DSML|parameter name="x">plain_text</|DSML|parameter>
【WRONG — Do NOT do these】:
Wrong 1 — mixed text after XML:
<DSMLtool_calls>...</DSMLtool_calls> I hope this helps.
<|DSML|tool_calls>...</|DSML|tool_calls> I hope this helps.
Wrong 2 — Markdown code fences:
` + "```xml" + `
<DSMLtool_calls>...</DSMLtool_calls>
<|DSML|tool_calls>...</|DSML|tool_calls>
` + "```" + `
Wrong 3 — missing opening wrapper:
<DSMLinvoke name="TOOL_NAME">...</DSMLinvoke>
</DSMLtool_calls>
<|DSML|invoke name="TOOL_NAME">...</|DSML|invoke>
</|DSML|tool_calls>
Wrong 4 — empty parameters:
<DSMLtool_calls>
<DSMLinvoke name="Bash">
<DSMLparameter name="command"></DSMLparameter>
</DSMLinvoke>
</DSMLtool_calls>
<|DSML|tool_calls>
<|DSML|invoke name="Bash">
<|DSML|parameter name="command"></|DSML|parameter>
</|DSML|invoke>
</|DSML|tool_calls>
Remember: The ONLY valid way to use tools is the <DSMLtool_calls>...</DSMLtool_calls> block at the end of your response.
Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.
` + buildCorrectToolExamples(toolNames)
}
@@ -150,21 +150,21 @@ func firstScriptExample(names []string) (promptToolExample, bool) {
func renderToolExampleBlock(calls []promptToolExample) string {
var b strings.Builder
b.WriteString("<DSMLtool_calls>\n")
b.WriteString("<|DSML|tool_calls>\n")
for _, call := range calls {
b.WriteString(` <DSMLinvoke name="`)
b.WriteString(` <|DSML|invoke name="`)
b.WriteString(call.name)
b.WriteString(`">` + "\n")
b.WriteString(indentPromptParameters(call.params, " "))
b.WriteString("\n </DSMLinvoke>\n")
b.WriteString("\n </|DSML|invoke>\n")
}
b.WriteString("</DSMLtool_calls>")
b.WriteString("</|DSML|tool_calls>")
return b.String()
}
func indentPromptParameters(body, indent string) string {
if strings.TrimSpace(body) == "" {
return indent + `<DSMLparameter name="content"></DSMLparameter>`
return indent + `<|DSML|parameter name="content"></|DSML|parameter>`
}
lines := strings.Split(body, "\n")
for i, line := range lines {
@@ -178,7 +178,7 @@ func indentPromptParameters(body, indent string) string {
}
func wrapParameter(name, inner string) string {
return `<DSMLparameter name="` + name + `">` + inner + `</DSMLparameter>`
return `<|DSML|parameter name="` + name + `">` + inner + `</|DSML|parameter>`
}
func exampleBasicParams(name string) (string, bool) {
@@ -204,7 +204,7 @@ func exampleBasicParams(name string) (string, bool) {
case "Edit":
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + wrapParameter("old_string", promptCDATA("foo")) + "\n" + wrapParameter("new_string", promptCDATA("bar")), true
case "MultiEdit":
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<DSMLparameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></DSMLparameter>`, true
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
}
return "", false
}
@@ -212,11 +212,11 @@ func exampleBasicParams(name string) (string, bool) {
func exampleNestedParams(name string) (string, bool) {
switch strings.TrimSpace(name) {
case "MultiEdit":
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<DSMLparameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></DSMLparameter>`, true
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
case "Task":
return wrapParameter("description", promptCDATA("Investigate flaky tests")) + "\n" + wrapParameter("prompt", promptCDATA("Run targeted tests and summarize failures")), true
case "ask_followup_question":
return wrapParameter("question", promptCDATA("Which approach do you prefer?")) + "\n" + `<DSMLparameter name="follow_up"><item><text>` + promptCDATA("Option A") + `</text></item><item><text>` + promptCDATA("Option B") + `</text></item></DSMLparameter>`, true
return wrapParameter("question", promptCDATA("Which approach do you prefer?")) + "\n" + `<|DSML|parameter name="follow_up"><item><text>` + promptCDATA("Option A") + `</text></item><item><text>` + promptCDATA("Option B") + `</text></item></|DSML|parameter>`, true
}
return "", false
}

View File

@@ -7,20 +7,20 @@ import (
func TestBuildToolCallInstructions_ExecCommandUsesCmdExample(t *testing.T) {
out := BuildToolCallInstructions([]string{"exec_command"})
if !strings.Contains(out, `<DSMLinvoke name="exec_command">`) {
if !strings.Contains(out, `<|DSML|invoke name="exec_command">`) {
t.Fatalf("expected exec_command in examples, got: %s", out)
}
if !strings.Contains(out, `<DSMLparameter name="cmd"><![CDATA[pwd]]></DSMLparameter>`) {
if !strings.Contains(out, `<|DSML|parameter name="cmd"><![CDATA[pwd]]></|DSML|parameter>`) {
t.Fatalf("expected cmd parameter example for exec_command, got: %s", out)
}
}
func TestBuildToolCallInstructions_ExecuteCommandUsesCommandExample(t *testing.T) {
out := BuildToolCallInstructions([]string{"execute_command"})
if !strings.Contains(out, `<DSMLinvoke name="execute_command">`) {
if !strings.Contains(out, `<|DSML|invoke name="execute_command">`) {
t.Fatalf("expected execute_command in examples, got: %s", out)
}
if !strings.Contains(out, `<DSMLparameter name="command"><![CDATA[pwd]]></DSMLparameter>`) {
if !strings.Contains(out, `<|DSML|parameter name="command"><![CDATA[pwd]]></|DSML|parameter>`) {
t.Fatalf("expected command parameter example for execute_command, got: %s", out)
}
}
@@ -34,20 +34,20 @@ func TestBuildToolCallInstructions_BashUsesCommandAndDescriptionExamples(t *test
sawDescription := false
for _, block := range blocks {
if !strings.Contains(block, `<DSMLparameter name="command">`) {
if !strings.Contains(block, `<|DSML|parameter name="command">`) {
t.Fatalf("expected every Bash example to use command parameter, got: %s", block)
}
if strings.Contains(block, `<DSMLparameter name="path">`) || strings.Contains(block, `<DSMLparameter name="content">`) {
if strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
t.Fatalf("expected Bash examples not to use file write parameters, got: %s", block)
}
if strings.Contains(block, `<DSMLparameter name="description">`) {
if strings.Contains(block, `<|DSML|parameter name="description">`) {
sawDescription = true
}
}
if !sawDescription {
t.Fatalf("expected Bash long-script example to include description, got: %s", out)
}
if strings.Contains(out, `<DSMLinvoke name="Read">`) {
if strings.Contains(out, `<|DSML|invoke name="Read">`) {
t.Fatalf("expected examples to avoid unavailable hard-coded Read tool, got: %s", out)
}
}
@@ -60,10 +60,10 @@ func TestBuildToolCallInstructions_ExecuteCommandLongScriptUsesCommand(t *testin
}
for _, block := range blocks {
if !strings.Contains(block, `<DSMLparameter name="command">`) {
if !strings.Contains(block, `<|DSML|parameter name="command">`) {
t.Fatalf("expected execute_command examples to use command parameter, got: %s", block)
}
if strings.Contains(block, `<DSMLparameter name="path">`) || strings.Contains(block, `<DSMLparameter name="content">`) {
if strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
t.Fatalf("expected execute_command examples not to use file write parameters, got: %s", block)
}
}
@@ -80,10 +80,10 @@ func TestBuildToolCallInstructions_ExecCommandLongScriptUsesCmd(t *testing.T) {
}
for _, block := range blocks {
if !strings.Contains(block, `<DSMLparameter name="cmd">`) {
if !strings.Contains(block, `<|DSML|parameter name="cmd">`) {
t.Fatalf("expected exec_command examples to use cmd parameter, got: %s", block)
}
if strings.Contains(block, `<DSMLparameter name="command">`) || strings.Contains(block, `<DSMLparameter name="path">`) || strings.Contains(block, `<DSMLparameter name="content">`) {
if strings.Contains(block, `<|DSML|parameter name="command">`) || strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
t.Fatalf("expected exec_command examples not to use command or file write parameters, got: %s", block)
}
}
@@ -100,10 +100,10 @@ func TestBuildToolCallInstructions_WriteUsesFilePathAndContent(t *testing.T) {
}
for _, block := range blocks {
if !strings.Contains(block, `<DSMLparameter name="file_path">`) || !strings.Contains(block, `<DSMLparameter name="content">`) {
if !strings.Contains(block, `<|DSML|parameter name="file_path">`) || !strings.Contains(block, `<|DSML|parameter name="content">`) {
t.Fatalf("expected Write examples to use file_path and content, got: %s", block)
}
if strings.Contains(block, `<DSMLparameter name="path">`) {
if strings.Contains(block, `<|DSML|parameter name="path">`) {
t.Fatalf("expected Write examples not to use path, got: %s", block)
}
}
@@ -111,7 +111,7 @@ func TestBuildToolCallInstructions_WriteUsesFilePathAndContent(t *testing.T) {
func TestBuildToolCallInstructions_AnchorsMissingOpeningWrapperFailureMode(t *testing.T) {
out := BuildToolCallInstructions([]string{"read_file"})
if !strings.Contains(out, "Never omit the opening <DSMLtool_calls> tag") {
if !strings.Contains(out, "Never omit the opening <|DSML|tool_calls> tag") {
t.Fatalf("expected explicit missing-opening-tag warning, got: %s", out)
}
if !strings.Contains(out, "Wrong 3 — missing opening wrapper") {
@@ -135,7 +135,7 @@ func TestBuildToolCallInstructions_RejectsEmptyParametersInPrompt(t *testing.T)
func TestBuildToolCallInstructions_UsesPositiveTagPunctuationAlphabet(t *testing.T) {
out := BuildToolCallInstructions([]string{"Bash"})
want := `Tag punctuation alphabet: ASCII < > / = " plus the fullwidth vertical bar .`
want := `Tag punctuation alphabet: ASCII < > / = " plus the halfwidth pipe |.`
if !strings.Contains(out, want) {
t.Fatalf("expected positive tag punctuation alphabet %q, got: %s", want, out)
}
@@ -147,7 +147,7 @@ func TestBuildToolCallInstructions_UsesPositiveTagPunctuationAlphabet(t *testing
}
func findInvokeBlocks(text, name string) []string {
open := `<DSMLinvoke name="` + name + `">`
open := `<|DSML|invoke name="` + name + `">`
remaining := text
blocks := []string{}
for {
@@ -156,11 +156,11 @@ func findInvokeBlocks(text, name string) []string {
return blocks
}
remaining = remaining[start:]
end := strings.Index(remaining, `</DSMLinvoke>`)
end := strings.Index(remaining, `</|DSML|invoke>`)
if end < 0 {
return blocks
}
end += len(`</DSMLinvoke>`)
end += len(`</|DSML|invoke>`)
blocks = append(blocks, remaining[:end])
remaining = remaining[end:]
}

View File

@@ -491,8 +491,6 @@ func consumeToolMarkupPipe(text string, idx int) (int, bool) {
switch {
case text[idx] == '|':
return idx + 1, true
case strings.HasPrefix(text[idx:], ""):
return idx + len(""), true
case strings.HasPrefix(text[idx:], "│"):
return idx + len("│"), true
case strings.HasPrefix(text[idx:], ""):

View File

@@ -131,14 +131,14 @@ func TestParseToolCallsRejectsCamelPrefixedToolMarkupLookalike(t *testing.T) {
}
func TestParseToolCallsSupportsFullwidthDSMLShell(t *testing.T) {
text := `<tool_calls>
<invoke name="Read">
<parameter name="file_path"<![CDATA[/Users/aq/Desktop/myproject/Personal_Blog/README.md]]</parameter>
</invoke>
<invoke name="Read">
<parameter name="file_path"<![CDATA[/Users/aq/Desktop/myproject/Personal_Blog/index.html]]</parameter>
</invoke>
</tool_calls>`
text := `<|tool_calls>
<|invoke name="Read">
<|parameter name="file_path"<![CDATA[/Users/aq/Desktop/myproject/Personal_Blog/README.md]]</|parameter>
</|invoke>
<|invoke name="Read">
<|parameter name="file_path"<![CDATA[/Users/aq/Desktop/myproject/Personal_Blog/index.html]]</|parameter>
</|invoke>
</|tool_calls>`
calls := ParseToolCalls(text, []string{"Read"})
if len(calls) != 2 {
t.Fatalf("expected two fullwidth DSML calls, got %#v", calls)
@@ -152,20 +152,20 @@ func TestParseToolCallsSupportsFullwidthDSMLShell(t *testing.T) {
}
func TestParseToolCallsSupportsCJKAngleDSMDrift(t *testing.T) {
text := `<DSMtool_calls>
<DSMinvoke name="Bash">
<DSMparameter name="description">〈![CDATA[Show commits on local dev not on origin/dev]]〉〈/DSMparameter〉
<DSMparameter name="command">〈![CDATA[git log --oneline origin/dev..dev]]〉〈/DSMparameter〉
〈/DSMinvoke〉
<DSMinvoke name="Bash">
<DSMparameter name="description">〈![CDATA[Show commits on origin/dev not on local dev]]〉〈/DSMparameter〉
<DSMparameter name="command">〈![CDATA[git log --oneline dev..origin/dev]]〉〈/DSMparameter〉
〈/DSMinvoke〉
<DSMinvoke name="Bash">
<DSMparameter name="description">〈![CDATA[Check tracking branch status]]〉〈/DSMparameter〉
<DSMparameter name="command">〈![CDATA[git status -b --short]]〉〈/DSMparameter〉
〈/DSMinvoke〉
〈/DSMtool_calls〉`
text := `<DSM|tool_calls>
<DSM|invoke name="Bash">
<DSM|parameter name="description"|>〈![CDATA[Show commits on local dev not on origin/dev]]〉〈/DSM|parameter〉
<DSM|parameter name="command"|>〈![CDATA[git log --oneline origin/dev..dev]]〉〈/DSM|parameter〉
〈/DSM|invoke〉
<DSM|invoke name="Bash">
<DSM|parameter name="description"|>〈![CDATA[Show commits on origin/dev not on local dev]]〉〈/DSM|parameter〉
<DSM|parameter name="command"|>〈![CDATA[git log --oneline dev..origin/dev]]〉〈/DSM|parameter〉
〈/DSM|invoke〉
<DSM|invoke name="Bash">
<DSM|parameter name="description"|>〈![CDATA[Check tracking branch status]]〉〈/DSM|parameter〉
<DSM|parameter name="command"|>〈![CDATA[git status -b --short]]〉〈/DSM|parameter〉
〈/DSM|invoke〉
〈/DSM|tool_calls〉`
calls := ParseToolCalls(text, []string{"Bash"})
if len(calls) != 3 {
@@ -1203,7 +1203,7 @@ func TestFindMatchingToolMarkupCloseBoundaryConditions(t *testing.T) {
}
func TestParseToolCallsSupportsDSMLShellWithFullwidthClosingSlash(t *testing.T) {
text := `<DSMLtool_calls><DSMLinvoke name="execute_code"><DSMLparameter name="code"><![CDATA[print("hi")]]></DSMLparameter></DSMLinvoke><DSMLtool_calls>`
text := `<|DSML|tool_calls><|DSML|invoke name="execute_code"><|DSML|parameter name="code"><![CDATA[print("hi")]]></|DSML|parameter></|DSML|invoke><DSML|tool_calls>`
calls := ParseToolCalls(text, []string{"execute_code"})
if len(calls) != 1 {
t.Fatalf("expected 1 DSML call with fullwidth closing slash, got %#v", calls)
@@ -1214,7 +1214,7 @@ func TestParseToolCallsSupportsDSMLShellWithFullwidthClosingSlash(t *testing.T)
}
func TestParseToolCallsSupportsDSMLShellWithSentencePieceSeparatorAndFullwidthGT(t *testing.T) {
text := `<DSML▁tool_calls><DSML▁invoke▁name="execute_code"><DSML▁parameter▁name="code"><![CDATA[print("hi")]]></DSML▁parameter></DSML▁invoke></DSML▁tool_calls`
text := `<|DSML▁tool_calls|><|DSML▁invoke▁name="execute_code"><|DSML▁parameter▁name="code"><![CDATA[print("hi")]]></|DSML▁parameter></|DSML▁invoke></|DSML▁tool_calls`
calls := ParseToolCalls(text, []string{"execute_code"})
if len(calls) != 1 {
t.Fatalf("expected 1 DSML call with sentencepiece separator and fullwidth terminator, got %#v", calls)
@@ -1225,7 +1225,7 @@ func TestParseToolCallsSupportsDSMLShellWithSentencePieceSeparatorAndFullwidthGT
}
func TestParseToolCallsSupportsDSMLShellWithFullwidthLTUnicodeSpaceAndFullwidthAttributes(t *testing.T) {
text := `DSML tool_callsDSML invoke name“execute_code”DSML parameter name“code”<![CDATA[print("hi")]]>DSMLparameterDSMLinvokeDSMLtool_calls`
text := `|DSML tool_calls|DSML invoke name“execute_code”|DSML parameter name“code”<![CDATA[print("hi")]]>DSML|parameterDSML|invokeDSML|tool_calls`
calls := ParseToolCalls(text, []string{"execute_code"})
if len(calls) != 1 {
t.Fatalf("expected 1 DSML call with fullwidth opening delimiter and Unicode attribute confusables, got %#v", calls)