revert: replace fullwidth pipe | with halfwidth | in DSML tool markup

PR #460 introduced fullwidth pipe characters (|) in DSML tool call formatting
to improve parsing robustness, but models exposed to these fullwidth pipes in
system prompts exhibit significantly higher rates of tool output hallucinations.
Reverting to halfwidth pipes (|) drastically reduces tokenizer/perplexity-driven
hallucinations while retaining the existing confusable-hardening in the parser.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-05-10 15:18:54 +08:00
parent 3beb31309f
commit cee8757d14
45 changed files with 725 additions and 342 deletions

View File

@@ -93,7 +93,11 @@ func (d *claudeCurrentInputDS) GetPow(context.Context, *auth.RequestAuth, int) (
func (d *claudeCurrentInputDS) UploadFile(_ context.Context, _ *auth.RequestAuth, req dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) {
d.uploads = append(d.uploads, req)
return &dsclient.UploadFileResult{ID: "file-claude-history"}, nil
id := "file-claude-history"
if len(d.uploads) > 1 {
id = "file-claude-tools"
}
return &dsclient.UploadFileResult{ID: id}, nil
}
func (d *claudeCurrentInputDS) CallCompletion(_ context.Context, _ *auth.RequestAuth, payload map[string]any, _ string, _ int) (*http.Response, error) {
@@ -156,3 +160,47 @@ func TestClaudeDirectAppliesCurrentInputFile(t *testing.T) {
t.Fatalf("expected persisted message to match upstream continuation prompt, got %#v", full.Messages)
}
}
func TestClaudeCurrentInputFileUploadsToolsSeparately(t *testing.T) {
ds := &claudeCurrentInputDS{}
h := &Handler{
Store: mockClaudeConfig{aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}},
Auth: claudeCurrentInputAuth{},
DS: ds,
}
reqBody := `{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hello from claude"}],"tools":[{"name":"search","description":"Search docs","input_schema":{"type":"object"}}],"max_tokens":1024}`
req := httptest.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(reqBody))
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
h.Messages(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
}
if len(ds.uploads) != 2 {
t.Fatalf("expected history and tools uploads, got %d", len(ds.uploads))
}
if ds.uploads[0].Filename != "DS2API_HISTORY.txt" || ds.uploads[1].Filename != "DS2API_TOOLS.txt" {
t.Fatalf("unexpected upload filenames: %#v", ds.uploads)
}
historyText := string(ds.uploads[0].Data)
if strings.Contains(historyText, "You have access to these tools") || strings.Contains(historyText, "Description: Search docs") {
t.Fatalf("history transcript should not embed tool descriptions, got %q", historyText)
}
toolsText := string(ds.uploads[1].Data)
if !strings.Contains(toolsText, "# DS2API_TOOLS.txt") || !strings.Contains(toolsText, "Tool: search") || !strings.Contains(toolsText, "Description: Search docs") {
t.Fatalf("expected tools transcript to include tool schema, got %q", toolsText)
}
refIDs, _ := ds.payload["ref_file_ids"].([]any)
if len(refIDs) < 2 || refIDs[0] != "file-claude-history" || refIDs[1] != "file-claude-tools" {
t.Fatalf("expected history and tools ref ids first, got %#v", ds.payload["ref_file_ids"])
}
prompt, _ := ds.payload["prompt"].(string)
if !strings.Contains(prompt, "DS2API_TOOLS.txt") || !strings.Contains(prompt, "TOOL CALL FORMAT") {
t.Fatalf("expected live prompt to reference tools file and retain format instructions, got %q", prompt)
}
if strings.Contains(prompt, "Description: Search docs") {
t.Fatalf("live prompt should not inline tool descriptions, got %q", prompt)
}
}

View File

@@ -93,10 +93,10 @@ func TestNormalizeClaudeMessagesToolUseToAssistantToolCalls(t *testing.T) {
t.Fatalf("expected call id preserved, got %#v", call)
}
content, _ := m["content"].(string)
if !containsStr(content, "<DSMLtool_calls>") || !containsStr(content, `<DSMLinvoke name="search_web">`) {
if !containsStr(content, "<|DSML|tool_calls>") || !containsStr(content, `<|DSML|invoke name="search_web">`) {
t.Fatalf("expected assistant content to include DSML tool call history, got %q", content)
}
if !containsStr(content, `<DSMLparameter name="query"><![CDATA[latest]]></DSMLparameter>`) {
if !containsStr(content, `<|DSML|parameter name="query"><![CDATA[latest]]></|DSML|parameter>`) {
t.Fatalf("expected assistant content to include serialized parameters, got %q", content)
}
}
@@ -133,7 +133,7 @@ func TestNormalizeClaudeMessagesPreservesThinkingOnToolUseHistory(t *testing.T)
if !containsStr(prompt, "[reasoning_content]\nneed live search before answering\n[/reasoning_content]") {
t.Fatalf("expected thinking in prompt history, got %q", prompt)
}
if !containsStr(prompt, `<DSMLinvoke name="search_web">`) {
if !containsStr(prompt, `<|DSML|invoke name="search_web">`) {
t.Fatalf("expected tool call in prompt history, got %q", prompt)
}
}
@@ -329,7 +329,7 @@ func TestBuildClaudeToolPromptSingleTool(t *testing.T) {
if !containsStr(prompt, "Search the web") {
t.Fatalf("expected description in prompt")
}
if !containsStr(prompt, "<DSMLtool_calls>") {
if !containsStr(prompt, "<|DSML|tool_calls>") {
t.Fatalf("expected DSML tool_calls format in prompt")
}
if !containsStr(prompt, "TOOL CALL FORMAT") {

View File

@@ -52,7 +52,7 @@ func normalizeClaudeRequest(store ConfigReader, req map[string]any) (claudeNorma
RequestedModel: strings.TrimSpace(model),
ResolvedModel: dsModel,
ResponseModel: strings.TrimSpace(model),
Messages: payload["messages"].([]any),
Messages: normalizedMessages,
PromptTokenText: finalPrompt,
ToolsRaw: toolsRequested,
FinalPrompt: finalPrompt,

View File

@@ -89,7 +89,7 @@ func TestGeminiMessagesFromRequestPreservesThoughtOnFunctionCallHistory(t *testi
if !strings.Contains(prompt, "[reasoning_content]\nneed current state before answering\n[/reasoning_content]") {
t.Fatalf("expected thought in prompt history, got %q", prompt)
}
if !strings.Contains(prompt, `<DSMLinvoke name="search_web">`) {
if !strings.Contains(prompt, `<|DSML|invoke name="search_web">`) {
t.Fatalf("expected tool call in prompt history, got %q", prompt)
}
}

View File

@@ -67,7 +67,11 @@ func (m *testGeminiDS) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (st
//nolint:unused // reserved test double for native Gemini DS-call path coverage.
func (m *testGeminiDS) UploadFile(_ context.Context, _ *auth.RequestAuth, req dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) {
m.uploadCalls = append(m.uploadCalls, req)
return &dsclient.UploadFileResult{ID: "file-gemini-history"}, nil
id := "file-gemini-history"
if len(m.uploadCalls) > 1 {
id = "file-gemini-tools"
}
return &dsclient.UploadFileResult{ID: id}, nil
}
//nolint:unused // reserved test double for native Gemini DS-call path coverage.

View File

@@ -2,6 +2,7 @@ package chat
import (
"context"
"fmt"
"io"
"net/http"
"strings"
@@ -148,8 +149,12 @@ func (m *inlineUploadDSStub) UploadFile(ctx context.Context, _ *auth.RequestAuth
if m.uploadErr != nil {
return nil, m.uploadErr
}
id := "file-inline-1"
if len(m.uploadCalls) > 1 {
id = "file-inline-" + fmt.Sprint(len(m.uploadCalls))
}
return &dsclient.UploadFileResult{
ID: "file-inline-1",
ID: id,
Filename: req.Filename,
Bytes: int64(len(req.Data)),
Status: "uploaded",

View File

@@ -141,6 +141,71 @@ func TestHandleVercelStreamPrepareAppliesCurrentInputFile(t *testing.T) {
}
}
func TestHandleVercelStreamPrepareUsesHalfwidthDSMLToolPrompt(t *testing.T) {
t.Setenv("VERCEL", "1")
t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret")
h := &Handler{
Store: mockOpenAIConfig{},
Auth: streamStatusAuthStub{},
DS: &inlineUploadDSStub{},
}
reqBody, _ := json.Marshal(map[string]any{
"model": "deepseek-v4-flash",
"messages": []any{
map[string]any{"role": "user", "content": "search docs"},
},
"tools": []any{
map[string]any{
"type": "function",
"function": map[string]any{
"name": "search",
"description": "search docs",
"parameters": map[string]any{
"type": "object",
"properties": map[string]any{
"query": map[string]any{"type": "string"},
},
"required": []any{"query"},
},
},
},
},
"stream": true,
})
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions?__stream_prepare=1", strings.NewReader(string(reqBody)))
req.Header.Set("Authorization", "Bearer direct-token")
req.Header.Set("Content-Type", "application/json")
req.Header.Set("X-Ds2-Internal-Token", "stream-secret")
rec := httptest.NewRecorder()
h.handleVercelStreamPrepare(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
}
var body map[string]any
if err := json.NewDecoder(rec.Body).Decode(&body); err != nil {
t.Fatalf("decode failed: %v", err)
}
finalPrompt, _ := body["final_prompt"].(string)
payload, _ := body["payload"].(map[string]any)
payloadPrompt, _ := payload["prompt"].(string)
for label, promptText := range map[string]string{"final_prompt": finalPrompt, "payload.prompt": payloadPrompt} {
if !strings.Contains(promptText, "<|DSML|tool_calls>") || !strings.Contains(promptText, "Tag punctuation alphabet: ASCII < > / = \" plus the halfwidth pipe |.") {
t.Fatalf("expected %s to contain halfwidth DSML tool instructions, got %q", label, promptText)
}
if strings.Contains(promptText, "\uff5c") || strings.Contains(promptText, "full"+"width vertical bar") {
t.Fatalf("expected %s not to contain legacy pipe guidance, got %q", label, promptText)
}
}
toolNames, _ := body["tool_names"].([]any)
if len(toolNames) != 1 || toolNames[0] != "search" {
t.Fatalf("expected prepared tool names to align with request tools, got %#v", body["tool_names"])
}
}
func TestHandleVercelStreamPrepareMapsCurrentInputFileManagedAuthFailureTo401(t *testing.T) {
t.Setenv("VERCEL", "1")
t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret")

View File

@@ -103,7 +103,7 @@ func TestNormalizeOpenAIResponsesRequestAlwaysAcceptsWideInput(t *testing.T) {
if out.Surface != "openai_responses" {
t.Fatalf("unexpected surface: %q", out.Surface)
}
if !strings.Contains(out.FinalPrompt, "<User>hi") {
if !strings.Contains(out.FinalPrompt, "<|User|>hi") {
t.Fatalf("unexpected final prompt: %q", out.FinalPrompt)
}
}

View File

@@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"net/http/httptest"
"strings"
@@ -41,8 +42,12 @@ func (m *inlineUploadDSStub) UploadFile(ctx context.Context, _ *auth.RequestAuth
if m.uploadErr != nil {
return nil, m.uploadErr
}
id := "file-inline-1"
if len(m.uploadCalls) > 1 {
id = "file-inline-" + fmt.Sprint(len(m.uploadCalls))
}
return &dsclient.UploadFileResult{
ID: "file-inline-1",
ID: id,
Filename: req.Filename,
Bytes: int64(len(req.Data)),
Status: "uploaded",

View File

@@ -15,6 +15,7 @@ import (
const (
currentInputFilename = promptcompat.CurrentInputContextFilename
currentToolsFilename = promptcompat.CurrentToolsContextFilename
currentInputContentType = "text/plain; charset=utf-8"
currentInputPurpose = "assistants"
)
@@ -50,6 +51,7 @@ func (s Service) ApplyCurrentInputFile(ctx context.Context, a *auth.RequestAuth,
if strings.TrimSpace(fileText) == "" {
return stdReq, errors.New("current user input file produced empty transcript")
}
toolsText, _ := promptcompat.BuildOpenAIToolsContextTranscript(stdReq.ToolsRaw, stdReq.ToolChoice)
modelType := "default"
if resolvedType, ok := config.GetModelType(stdReq.ResolvedModel); ok {
modelType = resolvedType
@@ -69,21 +71,44 @@ func (s Service) ApplyCurrentInputFile(ctx context.Context, a *auth.RequestAuth,
return stdReq, errors.New("upload current user input file returned empty file id")
}
toolFileID := ""
if strings.TrimSpace(toolsText) != "" {
result, err := s.DS.UploadFile(ctx, a, dsclient.UploadFileRequest{
Filename: currentToolsFilename,
ContentType: currentInputContentType,
Purpose: currentInputPurpose,
ModelType: modelType,
Data: []byte(toolsText),
}, 3)
if err != nil {
return stdReq, fmt.Errorf("upload current tools file: %w", err)
}
toolFileID = strings.TrimSpace(result.ID)
if toolFileID == "" {
return stdReq, errors.New("upload current tools file returned empty file id")
}
}
messages := []any{
map[string]any{
"role": "user",
"content": currentInputFilePrompt(),
"content": currentInputFilePrompt(toolFileID != ""),
},
}
stdReq.Messages = messages
stdReq.HistoryText = fileText
stdReq.CurrentInputFileApplied = true
stdReq.RefFileIDs = prependUniqueRefFileID(stdReq.RefFileIDs, fileID)
stdReq.FinalPrompt, stdReq.ToolNames = promptcompat.BuildOpenAIPrompt(messages, stdReq.ToolsRaw, "", stdReq.ToolChoice, stdReq.Thinking)
stdReq.RefFileIDs = prependUniqueRefFileIDs(stdReq.RefFileIDs, fileID, toolFileID)
stdReq.FinalPrompt, stdReq.ToolNames = promptcompat.BuildOpenAIPromptWithToolInstructionsOnly(messages, stdReq.ToolsRaw, "", stdReq.ToolChoice, stdReq.Thinking)
// Token accounting must reflect the actual downstream context:
// the uploaded DS2API_HISTORY.txt file content + the continuation live prompt.
stdReq.PromptTokenText = fileText + "\n" + stdReq.FinalPrompt
// uploaded context files + the continuation live prompt.
tokenParts := []string{fileText}
if strings.TrimSpace(toolsText) != "" {
tokenParts = append(tokenParts, toolsText)
}
tokenParts = append(tokenParts, stdReq.FinalPrompt)
stdReq.PromptTokenText = strings.Join(tokenParts, "\n")
return stdReq, nil
}
@@ -106,23 +131,40 @@ func latestUserInputForFile(messages []any) (int, string) {
return -1, ""
}
func currentInputFilePrompt() string {
return "Continue from the latest state in the attached DS2API_HISTORY.txt context. Treat it as the current working state and answer the latest user request directly."
func currentInputFilePrompt(hasToolsFile bool) string {
prompt := "Continue from the latest state in the attached DS2API_HISTORY.txt context. Treat it as the current working state and answer the latest user request directly."
if hasToolsFile {
prompt += " Available tool descriptions and parameter schemas are attached in DS2API_TOOLS.txt; use only those tools and follow the tool-call format rules in this prompt."
}
return prompt
}
func prependUniqueRefFileID(existing []string, fileID string) []string {
fileID = strings.TrimSpace(fileID)
if fileID == "" {
return existing
}
out := make([]string, 0, len(existing)+1)
out = append(out, fileID)
for _, id := range existing {
trimmed := strings.TrimSpace(id)
if trimmed == "" || strings.EqualFold(trimmed, fileID) {
func prependUniqueRefFileIDs(existing []string, fileIDs ...string) []string {
out := make([]string, 0, len(existing)+len(fileIDs))
seen := map[string]struct{}{}
for _, fileID := range fileIDs {
trimmed := strings.TrimSpace(fileID)
if trimmed == "" {
continue
}
key := strings.ToLower(trimmed)
if _, ok := seen[key]; ok {
continue
}
out = append(out, trimmed)
seen[key] = struct{}{}
}
for _, id := range existing {
trimmed := strings.TrimSpace(id)
if trimmed == "" {
continue
}
key := strings.ToLower(trimmed)
if _, ok := seen[key]; ok {
continue
}
out = append(out, trimmed)
seen[key] = struct{}{}
}
return out
}

View File

@@ -84,7 +84,7 @@ func TestBuildOpenAICurrentInputContextTranscriptUsesNumberedHistorySections(t *
"latest user turn",
"[reasoning_content]",
"hidden reasoning",
"<DSMLtool_calls>",
"<|DSML|tool_calls>",
} {
if !strings.Contains(transcript, want) {
t.Fatalf("expected transcript to contain %q, got %q", want, transcript)
@@ -380,6 +380,79 @@ func TestApplyCurrentInputFileUploadsFullContextFile(t *testing.T) {
}
}
func TestApplyCurrentInputFileUploadsToolsContextSeparately(t *testing.T) {
ds := &inlineUploadDSStub{}
h := &openAITestSurface{
Store: mockOpenAIConfig{
currentInputEnabled: true,
currentInputMin: 0,
},
DS: ds,
}
req := map[string]any{
"model": "deepseek-v4-flash",
"messages": historySplitTestMessages(),
"tools": []any{
map[string]any{
"type": "function",
"function": map[string]any{
"name": "search",
"description": "search docs",
"parameters": map[string]any{
"type": "object",
},
},
},
},
}
stdReq, err := promptcompat.NormalizeOpenAIChatRequest(h.Store, req, "")
if err != nil {
t.Fatalf("normalize failed: %v", err)
}
out, err := h.applyCurrentInputFile(context.Background(), &auth.RequestAuth{DeepSeekToken: "token"}, stdReq)
if err != nil {
t.Fatalf("apply current input file failed: %v", err)
}
if len(ds.uploadCalls) != 2 {
t.Fatalf("expected history and tools uploads, got %d", len(ds.uploadCalls))
}
if ds.uploadCalls[0].Filename != "DS2API_HISTORY.txt" {
t.Fatalf("expected first upload to be DS2API_HISTORY.txt, got %q", ds.uploadCalls[0].Filename)
}
if ds.uploadCalls[1].Filename != "DS2API_TOOLS.txt" {
t.Fatalf("expected second upload to be DS2API_TOOLS.txt, got %q", ds.uploadCalls[1].Filename)
}
historyText := string(ds.uploadCalls[0].Data)
if strings.Contains(historyText, "You have access to these tools") || strings.Contains(historyText, "Description: search docs") {
t.Fatalf("history transcript should not embed tool descriptions, got %q", historyText)
}
toolsText := string(ds.uploadCalls[1].Data)
for _, want := range []string{"# DS2API_TOOLS.txt", "Tool: search", "Description: search docs", `Parameters: {"type":"object"}`} {
if !strings.Contains(toolsText, want) {
t.Fatalf("expected tools transcript to contain %q, got %q", want, toolsText)
}
}
if strings.Contains(toolsText, "TOOL CALL FORMAT") {
t.Fatalf("tools transcript should not duplicate tool format instructions, got %q", toolsText)
}
if !strings.Contains(out.FinalPrompt, "Continue from the latest state in the attached DS2API_HISTORY.txt context.") || !strings.Contains(out.FinalPrompt, "DS2API_TOOLS.txt") {
t.Fatalf("expected live prompt to reference both context files, got %q", out.FinalPrompt)
}
if !strings.Contains(out.FinalPrompt, "TOOL CALL FORMAT") || !strings.Contains(out.FinalPrompt, "Remember: The ONLY valid way to use tools") {
t.Fatalf("expected live prompt to retain tool format instructions, got %q", out.FinalPrompt)
}
if strings.Contains(out.FinalPrompt, "You have access to these tools") || strings.Contains(out.FinalPrompt, "Description: search docs") || strings.Contains(out.FinalPrompt, "Parameters:") {
t.Fatalf("expected live prompt to omit tool descriptions after tools upload, got %q", out.FinalPrompt)
}
if len(out.RefFileIDs) < 2 || out.RefFileIDs[0] != "file-inline-1" || out.RefFileIDs[1] != "file-inline-2" {
t.Fatalf("expected history and tools file ids first, got %#v", out.RefFileIDs)
}
if !strings.Contains(out.PromptTokenText, "# DS2API_HISTORY.txt") || !strings.Contains(out.PromptTokenText, "# DS2API_TOOLS.txt") || !strings.Contains(out.PromptTokenText, "Description: search docs") {
t.Fatalf("expected prompt token text to include uploaded history and tools content, got %q", out.PromptTokenText)
}
}
func TestApplyCurrentInputFileCarriesHistoryText(t *testing.T) {
ds := &inlineUploadDSStub{}
h := &openAITestSurface{

View File

@@ -19,7 +19,7 @@ func TestSanitizeLeakedOutputRemovesLeakedWireToolCallAndResult(t *testing.T) {
}
func TestSanitizeLeakedOutputRemovesStandaloneMetaMarkers(t *testing.T) {
raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<end▁of▁thinking>D<end▁of▁sentence>E<| end_of_toolresults |>F<end▁of▁instructions>G"
raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<|end▁of▁thinking|>D<|end▁of▁sentence|>E<| end_of_toolresults |>F<|end▁of▁instructions|>G"
got := sanitizeLeakedOutput(raw)
if got != "ABCDEFG" {
t.Fatalf("unexpected sanitize result for meta markers: %q", got)
@@ -27,7 +27,7 @@ func TestSanitizeLeakedOutputRemovesStandaloneMetaMarkers(t *testing.T) {
}
func TestSanitizeLeakedOutputRemovesThinkAndBosMarkers(t *testing.T) {
raw := "A<think>B</think>C<begin▁of▁sentence>D<| begin_of_sentence |>E<begin_of_sentence>F"
raw := "A<think>B</think>C<|begin▁of▁sentence|>D<| begin_of_sentence |>E<|begin_of_sentence|>F"
got := sanitizeLeakedOutput(raw)
if got != "ABCDEF" {
t.Fatalf("unexpected sanitize result for think/BOS markers: %q", got)
@@ -35,7 +35,7 @@ func TestSanitizeLeakedOutputRemovesThinkAndBosMarkers(t *testing.T) {
}
func TestSanitizeLeakedOutputRemovesThoughtMarkers(t *testing.T) {
raw := "A<▁of▁thought>B<| of_thought |>C<| begin_of_thought |>D<| end_of_thought |>E"
raw := "A<|▁of▁thought|>B<| of_thought |>C<| begin_of_thought |>D<| end_of_thought |>E"
got := sanitizeLeakedOutput(raw)
if got != "ABCDE" {
t.Fatalf("unexpected sanitize result for leaked thought markers: %q", got)
@@ -51,7 +51,7 @@ func TestSanitizeLeakedOutputRemovesDanglingThinkBlock(t *testing.T) {
}
func TestSanitizeLeakedOutputRemovesCompleteDSMLToolCallWrapper(t *testing.T) {
raw := "前置文本\n<DSMLtool_calls>\n<DSMLinvoke name=\"Bash\">\n<DSMLparameter name=\"command\"></DSMLparameter>\n</DSMLinvoke>\n</DSMLtool_calls>\n后置文本"
raw := "前置文本\n<|DSML|tool_calls>\n<|DSML|invoke name=\"Bash\">\n<|DSML|parameter name=\"command\"></|DSML|parameter>\n</|DSML|invoke>\n</|DSML|tool_calls>\n后置文本"
got := sanitizeLeakedOutput(raw)
if got != "前置文本\n\n后置文本" {
t.Fatalf("unexpected sanitize result for leaked dsml wrapper: %q", got)

View File

@@ -14,20 +14,20 @@ var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*
var leakedThinkTagPattern = regexp.MustCompile(`(?is)</?\s*think\s*>`)
// leakedBOSMarkerPattern matches DeepSeek BOS markers in BOTH forms:
// - ASCII underscore: <begin_of_sentence>
// - U+2581 variant: <begin▁of▁sentence>
var leakedBOSMarkerPattern = regexp.MustCompile(`(?i)<[\|]\s*begin[_▁]of[_▁]sentence\s*[\|]>`)
// - ASCII underscore: <|begin_of_sentence|>
// - U+2581 variant: <|begin▁of▁sentence|>
var leakedBOSMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*begin[_▁]of[_▁]sentence\s*[|\|]>`)
// leakedThoughtMarkerPattern matches leaked thought control markers in both
// explicit and compact forms:
// - ASCII underscore: <| of_thought |>, <| begin_of_thought |>
// - U+2581 variant: <▁of▁thought>, <begin▁of▁thought>
var leakedThoughtMarkerPattern = regexp.MustCompile(`(?i)<[\|]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[\|]>`)
// - U+2581 variant: <|▁of▁thought|>, <|begin▁of▁thought|>
var leakedThoughtMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[|\|]>`)
// leakedMetaMarkerPattern matches the remaining DeepSeek special tokens in BOTH forms:
// - ASCII underscore: <end_of_sentence>, <end_of_toolresults>, <end_of_instructions>
// - U+2581 variant: <end▁of▁sentence>, <end▁of▁toolresults>, <end▁of▁instructions>
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[\|]>`)
// - ASCII underscore: <|end_of_sentence|>, <|end_of_toolresults|>, <|end_of_instructions|>
// - U+2581 variant: <|end▁of▁sentence|>, <|end▁of▁toolresults|>, <|end▁of▁instructions|>
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[|\|]>`)
// leakedAgentXMLBlockPatterns catch agent-style XML blocks that leak through
// when the sieve fails to capture them. These are applied only to complete