revert: replace fullwidth pipe | with halfwidth | in DSML tool markup

PR #460 introduced fullwidth pipe characters (|) in DSML tool call formatting
to improve parsing robustness, but models exposed to these fullwidth pipes in
system prompts exhibit significantly higher rates of tool output hallucinations.
Reverting to halfwidth pipes (|) drastically reduces tokenizer/perplexity-driven
hallucinations while retaining the existing confusable-hardening in the parser.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-05-10 15:18:54 +08:00
parent 3beb31309f
commit cee8757d14
45 changed files with 725 additions and 342 deletions

View File

@@ -7,9 +7,9 @@ const {
SKIP_EXACT_PATHS,
} = require('../shared/deepseek-constants');
const LEAKED_BOS_MARKER_PATTERN = /<[|]\s*begin[_▁]of[_▁]sentence\s*[|]>/gi;
const LEAKED_THOUGHT_MARKER_PATTERN = /<[|]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[|]>/gi;
const LEAKED_META_MARKER_PATTERN = /<[|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[|]>/gi;
const LEAKED_BOS_MARKER_PATTERN = /<[||]\s*begin[_▁]of[_▁]sentence\s*[||]>/gi;
const LEAKED_THOUGHT_MARKER_PATTERN = /<[||]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[||]>/gi;
const LEAKED_META_MARKER_PATTERN = /<[||]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[||]>/gi;

View File

@@ -1356,7 +1356,7 @@ function consumeToolMarkupPipe(raw, idx) {
if (pos >= raw.length) {
return { next: idx, ok: false };
}
for (const variant of ['|', '', '│', '', '❘', 'ǀ', '']) {
for (const variant of ['|', '│', '', '❘', 'ǀ', '']) {
if (raw.startsWith(variant, pos)) {
return { next: pos + variant.length, ok: true };
}