revert: replace fullwidth pipe | with halfwidth | in DSML tool markup

PR #460 introduced fullwidth pipe characters (|) in DSML tool call formatting
to improve parsing robustness, but models exposed to these fullwidth pipes in
system prompts exhibit significantly higher rates of tool output hallucinations.
Reverting to halfwidth pipes (|) drastically reduces tokenizer/perplexity-driven
hallucinations while retaining the existing confusable-hardening in the parser.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-05-10 15:18:54 +08:00
parent 3beb31309f
commit cee8757d14
45 changed files with 725 additions and 342 deletions

View File

@@ -13,10 +13,10 @@ func TestMessagesPrepareBasic(t *testing.T) {
if got == "" {
t.Fatal("expected non-empty prompt")
}
if !strings.HasPrefix(got, "<begin▁of▁sentence><System>") {
if !strings.HasPrefix(got, "<|begin▁of▁sentence|><|System|>") {
t.Fatalf("expected output integrity guard at the start, got %q", got)
}
if !strings.Contains(got, "Hello") || !strings.HasSuffix(got, "<Assistant>") {
if !strings.Contains(got, "Hello") || !strings.HasSuffix(got, "<|Assistant|>") {
t.Fatalf("unexpected prompt: %q", got)
}
}
@@ -33,31 +33,31 @@ func TestMessagesPrepareRoles(t *testing.T) {
if !contains(got, "Output integrity guard") {
t.Fatalf("expected output integrity guard in %q", got)
}
if !contains(got, "You are helper") || !contains(got, "<User>Hi") {
if !contains(got, "You are helper") || !contains(got, "<|User|>Hi") {
t.Fatalf("expected system/user content in %q", got)
}
if !contains(got, "<begin▁of▁sentence>") {
if !contains(got, "<|begin▁of▁sentence|>") {
t.Fatalf("expected begin marker in %q", got)
}
if !contains(got, "<User>Hi<Assistant>Hello<end▁of▁sentence>") {
if !contains(got, "<|User|>Hi<|Assistant|>Hello<|end▁of▁sentence|>") {
t.Fatalf("expected user/assistant separation in %q", got)
}
if !contains(got, "<Assistant>Hello<end▁of▁sentence><Tool>Search results<end▁of▁toolresults>") {
if !contains(got, "<|Assistant|>Hello<|end▁of▁sentence|><|Tool|>Search results<|end▁of▁toolresults|>") {
t.Fatalf("expected assistant/tool separation in %q", got)
}
if !contains(got, "<Tool>Search results<end▁of▁toolresults><User>How are you") {
if !contains(got, "<|Tool|>Search results<|end▁of▁toolresults|><|User|>How are you") {
t.Fatalf("expected tool/user separation in %q", got)
}
if !contains(got, "<Assistant>") {
if !contains(got, "<|Assistant|>") {
t.Fatalf("expected assistant marker in %q", got)
}
if !contains(got, "<System>") {
if !contains(got, "<|System|>") {
t.Fatalf("expected system marker in %q", got)
}
if !contains(got, "<User>") {
if !contains(got, "<|User|>") {
t.Fatalf("expected user marker in %q", got)
}
if !contains(got, "<Tool>") {
if !contains(got, "<|Tool|>") {
t.Fatalf("expected tool marker in %q", got)
}
}

View File

@@ -162,20 +162,20 @@ func TestMessagesPrepareMergesConsecutiveSameRole(t *testing.T) {
{"role": "user", "content": "World"},
}
got := MessagesPrepare(messages)
if !strings.HasPrefix(got, "<begin▁of▁sentence>") {
if !strings.HasPrefix(got, "<|begin▁of▁sentence|>") {
t.Fatalf("expected user marker at the start, got %q", got)
}
if !strings.Contains(got, "Hello") || !strings.Contains(got, "World") {
t.Fatalf("expected both messages, got %q", got)
}
// Should be merged into a single user turn with one marker at the start.
count := strings.Count(got, "<User>")
count := strings.Count(got, "<|User|>")
if count != 1 {
t.Fatalf("expected one User marker for the merged pair, got %d occurrences", count)
}
// User messages no longer have end_of_sentence markers in the official format.
// The merged pair should have zero end_of_sentence markers (user turn only).
if count := strings.Count(got, "<end▁of▁sentence>"); count != 0 {
if count := strings.Count(got, "<|end▁of▁sentence|>"); count != 0 {
t.Fatalf("expected zero sentence terminators for user-only merge, got %d occurrences", count)
}
}
@@ -186,16 +186,16 @@ func TestMessagesPrepareAssistantMarkers(t *testing.T) {
{"role": "assistant", "content": "Hello!"},
}
got := MessagesPrepare(messages)
if !strings.Contains(got, "<Assistant>") {
if !strings.Contains(got, "<|Assistant|>") {
t.Fatalf("expected assistant marker, got %q", got)
}
if !strings.Contains(got, "<end▁of▁sentence>") {
if !strings.Contains(got, "<|end▁of▁sentence|>") {
t.Fatalf("expected end of sentence marker, got %q", got)
}
if strings.Count(got, "<end▁of▁sentence>") != 1 {
if strings.Count(got, "<|end▁of▁sentence|>") != 1 {
t.Fatalf("expected one end_of_sentence (assistant only), got %q", got)
}
if !strings.Contains(got, "<Assistant>Hello!<end▁of▁sentence>") {
if !strings.Contains(got, "<|Assistant|>Hello!<|end▁of▁sentence|>") {
t.Fatalf("expected assistant EOS suffix, got %q", got)
}
if strings.Contains(got, "<think>") || strings.Contains(got, "</think>") {