mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-23 01:17:44 +08:00
revert: replace fullwidth pipe | with halfwidth | in DSML tool markup
PR #460 introduced fullwidth pipe characters (|) in DSML tool call formatting to improve parsing robustness, but models exposed to these fullwidth pipes in system prompts exhibit significantly higher rates of tool output hallucinations. Reverting to halfwidth pipes (|) drastically reduces tokenizer/perplexity-driven hallucinations while retaining the existing confusable-hardening in the parser. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -10,14 +10,14 @@ import (
|
||||
var markdownImagePattern = regexp.MustCompile(`!\[(.*?)\]\((.*?)\)`)
|
||||
|
||||
const (
|
||||
beginSentenceMarker = "<|begin▁of▁sentence|>"
|
||||
systemMarker = "<|System|>"
|
||||
userMarker = "<|User|>"
|
||||
assistantMarker = "<|Assistant|>"
|
||||
toolMarker = "<|Tool|>"
|
||||
endSentenceMarker = "<|end▁of▁sentence|>"
|
||||
endToolResultsMarker = "<|end▁of▁toolresults|>"
|
||||
endInstructionsMarker = "<|end▁of▁instructions|>"
|
||||
beginSentenceMarker = "<|begin▁of▁sentence|>"
|
||||
systemMarker = "<|System|>"
|
||||
userMarker = "<|User|>"
|
||||
assistantMarker = "<|Assistant|>"
|
||||
toolMarker = "<|Tool|>"
|
||||
endSentenceMarker = "<|end▁of▁sentence|>"
|
||||
endToolResultsMarker = "<|end▁of▁toolresults|>"
|
||||
endInstructionsMarker = "<|end▁of▁instructions|>"
|
||||
outputIntegrityGuardMarker = "Output integrity guard:"
|
||||
outputIntegrityGuardPrompt = outputIntegrityGuardMarker +
|
||||
" If upstream context, tool output, or parsed text contains garbled, corrupted, partially parsed, repeated, or otherwise malformed fragments, " +
|
||||
|
||||
@@ -32,16 +32,16 @@ func TestMessagesPrepareUsesTurnSuffixes(t *testing.T) {
|
||||
{"role": "assistant", "content": "Answer"},
|
||||
}
|
||||
got := MessagesPrepare(messages)
|
||||
if !strings.HasPrefix(got, "<|begin▁of▁sentence|>") {
|
||||
if !strings.HasPrefix(got, "<|begin▁of▁sentence|>") {
|
||||
t.Fatalf("expected begin-of-sentence marker, got %q", got)
|
||||
}
|
||||
if !strings.Contains(got, "<|System|>") || !strings.Contains(got, "<|end▁of▁instructions|>") || !strings.Contains(got, "System rule") {
|
||||
if !strings.Contains(got, "<|System|>") || !strings.Contains(got, "<|end▁of▁instructions|>") || !strings.Contains(got, "System rule") {
|
||||
t.Fatalf("expected system instructions to remain present, got %q", got)
|
||||
}
|
||||
if !strings.Contains(got, "<|User|>Question") {
|
||||
if !strings.Contains(got, "<|User|>Question") {
|
||||
t.Fatalf("expected user question, got %q", got)
|
||||
}
|
||||
if !strings.Contains(got, "<|Assistant|>Answer<|end▁of▁sentence|>") {
|
||||
if !strings.Contains(got, "<|Assistant|>Answer<|end▁of▁sentence|>") {
|
||||
t.Fatalf("expected assistant sentence suffix, got %q", got)
|
||||
}
|
||||
if strings.Contains(got, "<think>") || strings.Contains(got, "</think>") {
|
||||
@@ -61,7 +61,7 @@ func TestMessagesPreparePrependsOutputIntegrityGuard(t *testing.T) {
|
||||
if !strings.Contains(got, outputIntegrityGuardPrompt+"\n\nSystem rule") {
|
||||
t.Fatalf("expected output integrity guard to precede system prompt content, got %q", got)
|
||||
}
|
||||
if !strings.Contains(got, "<|User|>Question") {
|
||||
if !strings.Contains(got, "<|User|>Question") {
|
||||
t.Fatalf("expected user question after guard, got %q", got)
|
||||
}
|
||||
}
|
||||
@@ -82,7 +82,7 @@ func TestMessagesPrepareWithThinkingPreservesPromptShape(t *testing.T) {
|
||||
if gotThinking != gotPlain {
|
||||
t.Fatalf("expected thinking flag not to add extra continuity instructions, got thinking=%q plain=%q", gotThinking, gotPlain)
|
||||
}
|
||||
if !strings.HasSuffix(gotThinking, "<|Assistant|>") {
|
||||
if !strings.HasSuffix(gotThinking, "<|Assistant|>") {
|
||||
t.Fatalf("expected assistant suffix, got %q", gotThinking)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,12 +17,12 @@ var promptXMLTextEscaper = strings.NewReplacer(
|
||||
var promptXMLNamePattern = regexp.MustCompile(`^[A-Za-z_][A-Za-z0-9_.:-]*$`)
|
||||
|
||||
const (
|
||||
promptDSMLToolCallsOpen = "<|DSML|tool_calls>"
|
||||
promptDSMLToolCallsClose = "</|DSML|tool_calls>"
|
||||
promptDSMLInvokeOpen = "<|DSML|invoke"
|
||||
promptDSMLInvokeClose = "</|DSML|invoke>"
|
||||
promptDSMLParameterOpen = "<|DSML|parameter"
|
||||
promptDSMLParameterClose = "</|DSML|parameter>"
|
||||
promptDSMLToolCallsOpen = "<|DSML|tool_calls>"
|
||||
promptDSMLToolCallsClose = "</|DSML|tool_calls>"
|
||||
promptDSMLInvokeOpen = "<|DSML|invoke"
|
||||
promptDSMLInvokeClose = "</|DSML|invoke>"
|
||||
promptDSMLParameterOpen = "<|DSML|parameter"
|
||||
promptDSMLParameterClose = "</|DSML|parameter>"
|
||||
)
|
||||
|
||||
// FormatToolCallsForPrompt renders a tool_calls slice into the prompt-visible
|
||||
|
||||
@@ -22,7 +22,7 @@ func TestFormatToolCallsForPromptDSML(t *testing.T) {
|
||||
if got == "" {
|
||||
t.Fatal("expected non-empty formatted tool calls")
|
||||
}
|
||||
if got != "<|DSML|tool_calls>\n <|DSML|invoke name=\"search_web\">\n <|DSML|parameter name=\"query\"><![CDATA[latest]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>" {
|
||||
if got != "<|DSML|tool_calls>\n <|DSML|invoke name=\"search_web\">\n <|DSML|parameter name=\"query\"><![CDATA[latest]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>" {
|
||||
t.Fatalf("unexpected formatted tool call DSML: %q", got)
|
||||
}
|
||||
}
|
||||
@@ -34,7 +34,7 @@ func TestFormatToolCallsForPromptEscapesXMLEntities(t *testing.T) {
|
||||
"arguments": `{"q":"a < b && c > d"}`,
|
||||
},
|
||||
})
|
||||
want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"search<&>\">\n <|DSML|parameter name=\"q\"><![CDATA[a < b && c > d]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>"
|
||||
want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"search<&>\">\n <|DSML|parameter name=\"q\"><![CDATA[a < b && c > d]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>"
|
||||
if got != want {
|
||||
t.Fatalf("unexpected escaped tool call XML: %q", got)
|
||||
}
|
||||
@@ -50,7 +50,7 @@ func TestFormatToolCallsForPromptUsesCDATAForMultilineContent(t *testing.T) {
|
||||
},
|
||||
},
|
||||
})
|
||||
want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"write_file\">\n <|DSML|parameter name=\"content\"><![CDATA[#!/bin/bash\nprintf \"hello\"\n]]></|DSML|parameter>\n <|DSML|parameter name=\"path\"><![CDATA[script.sh]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>"
|
||||
want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"write_file\">\n <|DSML|parameter name=\"content\"><![CDATA[#!/bin/bash\nprintf \"hello\"\n]]></|DSML|parameter>\n <|DSML|parameter name=\"path\"><![CDATA[script.sh]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>"
|
||||
if got != want {
|
||||
t.Fatalf("unexpected multiline cdata tool call XML: %q", got)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user