revert: replace fullwidth pipe | with halfwidth | in DSML tool markup

PR #460 introduced fullwidth pipe characters (|) in DSML tool call formatting
to improve parsing robustness, but models exposed to these fullwidth pipes in
system prompts exhibit significantly higher rates of tool output hallucinations.
Reverting to halfwidth pipes (|) drastically reduces tokenizer/perplexity-driven
hallucinations while retaining the existing confusable-hardening in the parser.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-05-10 15:18:54 +08:00
parent 3beb31309f
commit cee8757d14
45 changed files with 725 additions and 342 deletions

View File

@@ -10,14 +10,14 @@ import (
var markdownImagePattern = regexp.MustCompile(`!\[(.*?)\]\((.*?)\)`)
const (
beginSentenceMarker = "<begin▁of▁sentence>"
systemMarker = "<System>"
userMarker = "<User>"
assistantMarker = "<Assistant>"
toolMarker = "<Tool>"
endSentenceMarker = "<end▁of▁sentence>"
endToolResultsMarker = "<end▁of▁toolresults>"
endInstructionsMarker = "<end▁of▁instructions>"
beginSentenceMarker = "<|begin▁of▁sentence|>"
systemMarker = "<|System|>"
userMarker = "<|User|>"
assistantMarker = "<|Assistant|>"
toolMarker = "<|Tool|>"
endSentenceMarker = "<|end▁of▁sentence|>"
endToolResultsMarker = "<|end▁of▁toolresults|>"
endInstructionsMarker = "<|end▁of▁instructions|>"
outputIntegrityGuardMarker = "Output integrity guard:"
outputIntegrityGuardPrompt = outputIntegrityGuardMarker +
" If upstream context, tool output, or parsed text contains garbled, corrupted, partially parsed, repeated, or otherwise malformed fragments, " +

View File

@@ -32,16 +32,16 @@ func TestMessagesPrepareUsesTurnSuffixes(t *testing.T) {
{"role": "assistant", "content": "Answer"},
}
got := MessagesPrepare(messages)
if !strings.HasPrefix(got, "<begin▁of▁sentence>") {
if !strings.HasPrefix(got, "<|begin▁of▁sentence|>") {
t.Fatalf("expected begin-of-sentence marker, got %q", got)
}
if !strings.Contains(got, "<System>") || !strings.Contains(got, "<end▁of▁instructions>") || !strings.Contains(got, "System rule") {
if !strings.Contains(got, "<|System|>") || !strings.Contains(got, "<|end▁of▁instructions|>") || !strings.Contains(got, "System rule") {
t.Fatalf("expected system instructions to remain present, got %q", got)
}
if !strings.Contains(got, "<User>Question") {
if !strings.Contains(got, "<|User|>Question") {
t.Fatalf("expected user question, got %q", got)
}
if !strings.Contains(got, "<Assistant>Answer<end▁of▁sentence>") {
if !strings.Contains(got, "<|Assistant|>Answer<|end▁of▁sentence|>") {
t.Fatalf("expected assistant sentence suffix, got %q", got)
}
if strings.Contains(got, "<think>") || strings.Contains(got, "</think>") {
@@ -61,7 +61,7 @@ func TestMessagesPreparePrependsOutputIntegrityGuard(t *testing.T) {
if !strings.Contains(got, outputIntegrityGuardPrompt+"\n\nSystem rule") {
t.Fatalf("expected output integrity guard to precede system prompt content, got %q", got)
}
if !strings.Contains(got, "<User>Question") {
if !strings.Contains(got, "<|User|>Question") {
t.Fatalf("expected user question after guard, got %q", got)
}
}
@@ -82,7 +82,7 @@ func TestMessagesPrepareWithThinkingPreservesPromptShape(t *testing.T) {
if gotThinking != gotPlain {
t.Fatalf("expected thinking flag not to add extra continuity instructions, got thinking=%q plain=%q", gotThinking, gotPlain)
}
if !strings.HasSuffix(gotThinking, "<Assistant>") {
if !strings.HasSuffix(gotThinking, "<|Assistant|>") {
t.Fatalf("expected assistant suffix, got %q", gotThinking)
}
}

View File

@@ -17,12 +17,12 @@ var promptXMLTextEscaper = strings.NewReplacer(
var promptXMLNamePattern = regexp.MustCompile(`^[A-Za-z_][A-Za-z0-9_.:-]*$`)
const (
promptDSMLToolCallsOpen = "<DSMLtool_calls>"
promptDSMLToolCallsClose = "</DSMLtool_calls>"
promptDSMLInvokeOpen = "<DSMLinvoke"
promptDSMLInvokeClose = "</DSMLinvoke>"
promptDSMLParameterOpen = "<DSMLparameter"
promptDSMLParameterClose = "</DSMLparameter>"
promptDSMLToolCallsOpen = "<|DSML|tool_calls>"
promptDSMLToolCallsClose = "</|DSML|tool_calls>"
promptDSMLInvokeOpen = "<|DSML|invoke"
promptDSMLInvokeClose = "</|DSML|invoke>"
promptDSMLParameterOpen = "<|DSML|parameter"
promptDSMLParameterClose = "</|DSML|parameter>"
)
// FormatToolCallsForPrompt renders a tool_calls slice into the prompt-visible

View File

@@ -22,7 +22,7 @@ func TestFormatToolCallsForPromptDSML(t *testing.T) {
if got == "" {
t.Fatal("expected non-empty formatted tool calls")
}
if got != "<DSMLtool_calls>\n <DSMLinvoke name=\"search_web\">\n <DSMLparameter name=\"query\"><![CDATA[latest]]></DSMLparameter>\n </DSMLinvoke>\n</DSMLtool_calls>" {
if got != "<|DSML|tool_calls>\n <|DSML|invoke name=\"search_web\">\n <|DSML|parameter name=\"query\"><![CDATA[latest]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>" {
t.Fatalf("unexpected formatted tool call DSML: %q", got)
}
}
@@ -34,7 +34,7 @@ func TestFormatToolCallsForPromptEscapesXMLEntities(t *testing.T) {
"arguments": `{"q":"a < b && c > d"}`,
},
})
want := "<DSMLtool_calls>\n <DSMLinvoke name=\"search&lt;&amp;&gt;\">\n <DSMLparameter name=\"q\"><![CDATA[a < b && c > d]]></DSMLparameter>\n </DSMLinvoke>\n</DSMLtool_calls>"
want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"search&lt;&amp;&gt;\">\n <|DSML|parameter name=\"q\"><![CDATA[a < b && c > d]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>"
if got != want {
t.Fatalf("unexpected escaped tool call XML: %q", got)
}
@@ -50,7 +50,7 @@ func TestFormatToolCallsForPromptUsesCDATAForMultilineContent(t *testing.T) {
},
},
})
want := "<DSMLtool_calls>\n <DSMLinvoke name=\"write_file\">\n <DSMLparameter name=\"content\"><![CDATA[#!/bin/bash\nprintf \"hello\"\n]]></DSMLparameter>\n <DSMLparameter name=\"path\"><![CDATA[script.sh]]></DSMLparameter>\n </DSMLinvoke>\n</DSMLtool_calls>"
want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"write_file\">\n <|DSML|parameter name=\"content\"><![CDATA[#!/bin/bash\nprintf \"hello\"\n]]></|DSML|parameter>\n <|DSML|parameter name=\"path\"><![CDATA[script.sh]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>"
if got != want {
t.Fatalf("unexpected multiline cdata tool call XML: %q", got)
}