revert: replace fullwidth pipe | with halfwidth | in DSML tool markup

PR #460 introduced fullwidth pipe characters (|) in DSML tool call formatting
to improve parsing robustness, but models exposed to these fullwidth pipes in
system prompts exhibit significantly higher rates of tool output hallucinations.
Reverting to halfwidth pipes (|) drastically reduces tokenizer/perplexity-driven
hallucinations while retaining the existing confusable-hardening in the parser.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-05-10 15:18:54 +08:00
parent 3beb31309f
commit cee8757d14
45 changed files with 725 additions and 342 deletions

View File

@@ -93,7 +93,11 @@ func (d *claudeCurrentInputDS) GetPow(context.Context, *auth.RequestAuth, int) (
func (d *claudeCurrentInputDS) UploadFile(_ context.Context, _ *auth.RequestAuth, req dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) {
d.uploads = append(d.uploads, req)
return &dsclient.UploadFileResult{ID: "file-claude-history"}, nil
id := "file-claude-history"
if len(d.uploads) > 1 {
id = "file-claude-tools"
}
return &dsclient.UploadFileResult{ID: id}, nil
}
func (d *claudeCurrentInputDS) CallCompletion(_ context.Context, _ *auth.RequestAuth, payload map[string]any, _ string, _ int) (*http.Response, error) {
@@ -156,3 +160,47 @@ func TestClaudeDirectAppliesCurrentInputFile(t *testing.T) {
t.Fatalf("expected persisted message to match upstream continuation prompt, got %#v", full.Messages)
}
}
func TestClaudeCurrentInputFileUploadsToolsSeparately(t *testing.T) {
ds := &claudeCurrentInputDS{}
h := &Handler{
Store: mockClaudeConfig{aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}},
Auth: claudeCurrentInputAuth{},
DS: ds,
}
reqBody := `{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hello from claude"}],"tools":[{"name":"search","description":"Search docs","input_schema":{"type":"object"}}],"max_tokens":1024}`
req := httptest.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(reqBody))
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
h.Messages(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
}
if len(ds.uploads) != 2 {
t.Fatalf("expected history and tools uploads, got %d", len(ds.uploads))
}
if ds.uploads[0].Filename != "DS2API_HISTORY.txt" || ds.uploads[1].Filename != "DS2API_TOOLS.txt" {
t.Fatalf("unexpected upload filenames: %#v", ds.uploads)
}
historyText := string(ds.uploads[0].Data)
if strings.Contains(historyText, "You have access to these tools") || strings.Contains(historyText, "Description: Search docs") {
t.Fatalf("history transcript should not embed tool descriptions, got %q", historyText)
}
toolsText := string(ds.uploads[1].Data)
if !strings.Contains(toolsText, "# DS2API_TOOLS.txt") || !strings.Contains(toolsText, "Tool: search") || !strings.Contains(toolsText, "Description: Search docs") {
t.Fatalf("expected tools transcript to include tool schema, got %q", toolsText)
}
refIDs, _ := ds.payload["ref_file_ids"].([]any)
if len(refIDs) < 2 || refIDs[0] != "file-claude-history" || refIDs[1] != "file-claude-tools" {
t.Fatalf("expected history and tools ref ids first, got %#v", ds.payload["ref_file_ids"])
}
prompt, _ := ds.payload["prompt"].(string)
if !strings.Contains(prompt, "DS2API_TOOLS.txt") || !strings.Contains(prompt, "TOOL CALL FORMAT") {
t.Fatalf("expected live prompt to reference tools file and retain format instructions, got %q", prompt)
}
if strings.Contains(prompt, "Description: Search docs") {
t.Fatalf("live prompt should not inline tool descriptions, got %q", prompt)
}
}

View File

@@ -93,10 +93,10 @@ func TestNormalizeClaudeMessagesToolUseToAssistantToolCalls(t *testing.T) {
t.Fatalf("expected call id preserved, got %#v", call)
}
content, _ := m["content"].(string)
if !containsStr(content, "<DSMLtool_calls>") || !containsStr(content, `<DSMLinvoke name="search_web">`) {
if !containsStr(content, "<|DSML|tool_calls>") || !containsStr(content, `<|DSML|invoke name="search_web">`) {
t.Fatalf("expected assistant content to include DSML tool call history, got %q", content)
}
if !containsStr(content, `<DSMLparameter name="query"><![CDATA[latest]]></DSMLparameter>`) {
if !containsStr(content, `<|DSML|parameter name="query"><![CDATA[latest]]></|DSML|parameter>`) {
t.Fatalf("expected assistant content to include serialized parameters, got %q", content)
}
}
@@ -133,7 +133,7 @@ func TestNormalizeClaudeMessagesPreservesThinkingOnToolUseHistory(t *testing.T)
if !containsStr(prompt, "[reasoning_content]\nneed live search before answering\n[/reasoning_content]") {
t.Fatalf("expected thinking in prompt history, got %q", prompt)
}
if !containsStr(prompt, `<DSMLinvoke name="search_web">`) {
if !containsStr(prompt, `<|DSML|invoke name="search_web">`) {
t.Fatalf("expected tool call in prompt history, got %q", prompt)
}
}
@@ -329,7 +329,7 @@ func TestBuildClaudeToolPromptSingleTool(t *testing.T) {
if !containsStr(prompt, "Search the web") {
t.Fatalf("expected description in prompt")
}
if !containsStr(prompt, "<DSMLtool_calls>") {
if !containsStr(prompt, "<|DSML|tool_calls>") {
t.Fatalf("expected DSML tool_calls format in prompt")
}
if !containsStr(prompt, "TOOL CALL FORMAT") {

View File

@@ -52,7 +52,7 @@ func normalizeClaudeRequest(store ConfigReader, req map[string]any) (claudeNorma
RequestedModel: strings.TrimSpace(model),
ResolvedModel: dsModel,
ResponseModel: strings.TrimSpace(model),
Messages: payload["messages"].([]any),
Messages: normalizedMessages,
PromptTokenText: finalPrompt,
ToolsRaw: toolsRequested,
FinalPrompt: finalPrompt,