revert: replace fullwidth pipe | with halfwidth | in DSML tool markup

PR #460 introduced fullwidth pipe characters (|) in DSML tool call formatting
to improve parsing robustness, but models exposed to these fullwidth pipes in
system prompts exhibit significantly higher rates of tool output hallucinations.
Reverting to halfwidth pipes (|) drastically reduces tokenizer/perplexity-driven
hallucinations while retaining the existing confusable-hardening in the parser.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-05-10 15:18:54 +08:00
parent 3beb31309f
commit cee8757d14
45 changed files with 725 additions and 342 deletions

View File

@@ -2,6 +2,7 @@ package chat
import (
"context"
"fmt"
"io"
"net/http"
"strings"
@@ -148,8 +149,12 @@ func (m *inlineUploadDSStub) UploadFile(ctx context.Context, _ *auth.RequestAuth
if m.uploadErr != nil {
return nil, m.uploadErr
}
id := "file-inline-1"
if len(m.uploadCalls) > 1 {
id = "file-inline-" + fmt.Sprint(len(m.uploadCalls))
}
return &dsclient.UploadFileResult{
ID: "file-inline-1",
ID: id,
Filename: req.Filename,
Bytes: int64(len(req.Data)),
Status: "uploaded",

View File

@@ -141,6 +141,71 @@ func TestHandleVercelStreamPrepareAppliesCurrentInputFile(t *testing.T) {
}
}
func TestHandleVercelStreamPrepareUsesHalfwidthDSMLToolPrompt(t *testing.T) {
t.Setenv("VERCEL", "1")
t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret")
h := &Handler{
Store: mockOpenAIConfig{},
Auth: streamStatusAuthStub{},
DS: &inlineUploadDSStub{},
}
reqBody, _ := json.Marshal(map[string]any{
"model": "deepseek-v4-flash",
"messages": []any{
map[string]any{"role": "user", "content": "search docs"},
},
"tools": []any{
map[string]any{
"type": "function",
"function": map[string]any{
"name": "search",
"description": "search docs",
"parameters": map[string]any{
"type": "object",
"properties": map[string]any{
"query": map[string]any{"type": "string"},
},
"required": []any{"query"},
},
},
},
},
"stream": true,
})
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions?__stream_prepare=1", strings.NewReader(string(reqBody)))
req.Header.Set("Authorization", "Bearer direct-token")
req.Header.Set("Content-Type", "application/json")
req.Header.Set("X-Ds2-Internal-Token", "stream-secret")
rec := httptest.NewRecorder()
h.handleVercelStreamPrepare(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
}
var body map[string]any
if err := json.NewDecoder(rec.Body).Decode(&body); err != nil {
t.Fatalf("decode failed: %v", err)
}
finalPrompt, _ := body["final_prompt"].(string)
payload, _ := body["payload"].(map[string]any)
payloadPrompt, _ := payload["prompt"].(string)
for label, promptText := range map[string]string{"final_prompt": finalPrompt, "payload.prompt": payloadPrompt} {
if !strings.Contains(promptText, "<|DSML|tool_calls>") || !strings.Contains(promptText, "Tag punctuation alphabet: ASCII < > / = \" plus the halfwidth pipe |.") {
t.Fatalf("expected %s to contain halfwidth DSML tool instructions, got %q", label, promptText)
}
if strings.Contains(promptText, "\uff5c") || strings.Contains(promptText, "full"+"width vertical bar") {
t.Fatalf("expected %s not to contain legacy pipe guidance, got %q", label, promptText)
}
}
toolNames, _ := body["tool_names"].([]any)
if len(toolNames) != 1 || toolNames[0] != "search" {
t.Fatalf("expected prepared tool names to align with request tools, got %#v", body["tool_names"])
}
}
func TestHandleVercelStreamPrepareMapsCurrentInputFileManagedAuthFailureTo401(t *testing.T) {
t.Setenv("VERCEL", "1")
t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret")