revert: replace fullwidth pipe | with halfwidth | in DSML tool markup

PR #460 introduced fullwidth pipe characters (|) in DSML tool call formatting
to improve parsing robustness, but models exposed to these fullwidth pipes in
system prompts exhibit significantly higher rates of tool output hallucinations.
Reverting to halfwidth pipes (|) drastically reduces tokenizer/perplexity-driven
hallucinations while retaining the existing confusable-hardening in the parser.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-05-10 15:18:54 +08:00
parent 3beb31309f
commit cee8757d14
45 changed files with 725 additions and 342 deletions

View File

@@ -93,7 +93,11 @@ func (d *claudeCurrentInputDS) GetPow(context.Context, *auth.RequestAuth, int) (
func (d *claudeCurrentInputDS) UploadFile(_ context.Context, _ *auth.RequestAuth, req dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) {
d.uploads = append(d.uploads, req)
return &dsclient.UploadFileResult{ID: "file-claude-history"}, nil
id := "file-claude-history"
if len(d.uploads) > 1 {
id = "file-claude-tools"
}
return &dsclient.UploadFileResult{ID: id}, nil
}
func (d *claudeCurrentInputDS) CallCompletion(_ context.Context, _ *auth.RequestAuth, payload map[string]any, _ string, _ int) (*http.Response, error) {
@@ -156,3 +160,47 @@ func TestClaudeDirectAppliesCurrentInputFile(t *testing.T) {
t.Fatalf("expected persisted message to match upstream continuation prompt, got %#v", full.Messages)
}
}
func TestClaudeCurrentInputFileUploadsToolsSeparately(t *testing.T) {
ds := &claudeCurrentInputDS{}
h := &Handler{
Store: mockClaudeConfig{aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}},
Auth: claudeCurrentInputAuth{},
DS: ds,
}
reqBody := `{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hello from claude"}],"tools":[{"name":"search","description":"Search docs","input_schema":{"type":"object"}}],"max_tokens":1024}`
req := httptest.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(reqBody))
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
h.Messages(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
}
if len(ds.uploads) != 2 {
t.Fatalf("expected history and tools uploads, got %d", len(ds.uploads))
}
if ds.uploads[0].Filename != "DS2API_HISTORY.txt" || ds.uploads[1].Filename != "DS2API_TOOLS.txt" {
t.Fatalf("unexpected upload filenames: %#v", ds.uploads)
}
historyText := string(ds.uploads[0].Data)
if strings.Contains(historyText, "You have access to these tools") || strings.Contains(historyText, "Description: Search docs") {
t.Fatalf("history transcript should not embed tool descriptions, got %q", historyText)
}
toolsText := string(ds.uploads[1].Data)
if !strings.Contains(toolsText, "# DS2API_TOOLS.txt") || !strings.Contains(toolsText, "Tool: search") || !strings.Contains(toolsText, "Description: Search docs") {
t.Fatalf("expected tools transcript to include tool schema, got %q", toolsText)
}
refIDs, _ := ds.payload["ref_file_ids"].([]any)
if len(refIDs) < 2 || refIDs[0] != "file-claude-history" || refIDs[1] != "file-claude-tools" {
t.Fatalf("expected history and tools ref ids first, got %#v", ds.payload["ref_file_ids"])
}
prompt, _ := ds.payload["prompt"].(string)
if !strings.Contains(prompt, "DS2API_TOOLS.txt") || !strings.Contains(prompt, "TOOL CALL FORMAT") {
t.Fatalf("expected live prompt to reference tools file and retain format instructions, got %q", prompt)
}
if strings.Contains(prompt, "Description: Search docs") {
t.Fatalf("live prompt should not inline tool descriptions, got %q", prompt)
}
}

View File

@@ -93,10 +93,10 @@ func TestNormalizeClaudeMessagesToolUseToAssistantToolCalls(t *testing.T) {
t.Fatalf("expected call id preserved, got %#v", call)
}
content, _ := m["content"].(string)
if !containsStr(content, "<DSMLtool_calls>") || !containsStr(content, `<DSMLinvoke name="search_web">`) {
if !containsStr(content, "<|DSML|tool_calls>") || !containsStr(content, `<|DSML|invoke name="search_web">`) {
t.Fatalf("expected assistant content to include DSML tool call history, got %q", content)
}
if !containsStr(content, `<DSMLparameter name="query"><![CDATA[latest]]></DSMLparameter>`) {
if !containsStr(content, `<|DSML|parameter name="query"><![CDATA[latest]]></|DSML|parameter>`) {
t.Fatalf("expected assistant content to include serialized parameters, got %q", content)
}
}
@@ -133,7 +133,7 @@ func TestNormalizeClaudeMessagesPreservesThinkingOnToolUseHistory(t *testing.T)
if !containsStr(prompt, "[reasoning_content]\nneed live search before answering\n[/reasoning_content]") {
t.Fatalf("expected thinking in prompt history, got %q", prompt)
}
if !containsStr(prompt, `<DSMLinvoke name="search_web">`) {
if !containsStr(prompt, `<|DSML|invoke name="search_web">`) {
t.Fatalf("expected tool call in prompt history, got %q", prompt)
}
}
@@ -329,7 +329,7 @@ func TestBuildClaudeToolPromptSingleTool(t *testing.T) {
if !containsStr(prompt, "Search the web") {
t.Fatalf("expected description in prompt")
}
if !containsStr(prompt, "<DSMLtool_calls>") {
if !containsStr(prompt, "<|DSML|tool_calls>") {
t.Fatalf("expected DSML tool_calls format in prompt")
}
if !containsStr(prompt, "TOOL CALL FORMAT") {

View File

@@ -52,7 +52,7 @@ func normalizeClaudeRequest(store ConfigReader, req map[string]any) (claudeNorma
RequestedModel: strings.TrimSpace(model),
ResolvedModel: dsModel,
ResponseModel: strings.TrimSpace(model),
Messages: payload["messages"].([]any),
Messages: normalizedMessages,
PromptTokenText: finalPrompt,
ToolsRaw: toolsRequested,
FinalPrompt: finalPrompt,

View File

@@ -89,7 +89,7 @@ func TestGeminiMessagesFromRequestPreservesThoughtOnFunctionCallHistory(t *testi
if !strings.Contains(prompt, "[reasoning_content]\nneed current state before answering\n[/reasoning_content]") {
t.Fatalf("expected thought in prompt history, got %q", prompt)
}
if !strings.Contains(prompt, `<DSMLinvoke name="search_web">`) {
if !strings.Contains(prompt, `<|DSML|invoke name="search_web">`) {
t.Fatalf("expected tool call in prompt history, got %q", prompt)
}
}

View File

@@ -67,7 +67,11 @@ func (m *testGeminiDS) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (st
//nolint:unused // reserved test double for native Gemini DS-call path coverage.
func (m *testGeminiDS) UploadFile(_ context.Context, _ *auth.RequestAuth, req dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) {
m.uploadCalls = append(m.uploadCalls, req)
return &dsclient.UploadFileResult{ID: "file-gemini-history"}, nil
id := "file-gemini-history"
if len(m.uploadCalls) > 1 {
id = "file-gemini-tools"
}
return &dsclient.UploadFileResult{ID: id}, nil
}
//nolint:unused // reserved test double for native Gemini DS-call path coverage.

View File

@@ -2,6 +2,7 @@ package chat
import (
"context"
"fmt"
"io"
"net/http"
"strings"
@@ -148,8 +149,12 @@ func (m *inlineUploadDSStub) UploadFile(ctx context.Context, _ *auth.RequestAuth
if m.uploadErr != nil {
return nil, m.uploadErr
}
id := "file-inline-1"
if len(m.uploadCalls) > 1 {
id = "file-inline-" + fmt.Sprint(len(m.uploadCalls))
}
return &dsclient.UploadFileResult{
ID: "file-inline-1",
ID: id,
Filename: req.Filename,
Bytes: int64(len(req.Data)),
Status: "uploaded",

View File

@@ -141,6 +141,71 @@ func TestHandleVercelStreamPrepareAppliesCurrentInputFile(t *testing.T) {
}
}
func TestHandleVercelStreamPrepareUsesHalfwidthDSMLToolPrompt(t *testing.T) {
t.Setenv("VERCEL", "1")
t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret")
h := &Handler{
Store: mockOpenAIConfig{},
Auth: streamStatusAuthStub{},
DS: &inlineUploadDSStub{},
}
reqBody, _ := json.Marshal(map[string]any{
"model": "deepseek-v4-flash",
"messages": []any{
map[string]any{"role": "user", "content": "search docs"},
},
"tools": []any{
map[string]any{
"type": "function",
"function": map[string]any{
"name": "search",
"description": "search docs",
"parameters": map[string]any{
"type": "object",
"properties": map[string]any{
"query": map[string]any{"type": "string"},
},
"required": []any{"query"},
},
},
},
},
"stream": true,
})
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions?__stream_prepare=1", strings.NewReader(string(reqBody)))
req.Header.Set("Authorization", "Bearer direct-token")
req.Header.Set("Content-Type", "application/json")
req.Header.Set("X-Ds2-Internal-Token", "stream-secret")
rec := httptest.NewRecorder()
h.handleVercelStreamPrepare(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
}
var body map[string]any
if err := json.NewDecoder(rec.Body).Decode(&body); err != nil {
t.Fatalf("decode failed: %v", err)
}
finalPrompt, _ := body["final_prompt"].(string)
payload, _ := body["payload"].(map[string]any)
payloadPrompt, _ := payload["prompt"].(string)
for label, promptText := range map[string]string{"final_prompt": finalPrompt, "payload.prompt": payloadPrompt} {
if !strings.Contains(promptText, "<|DSML|tool_calls>") || !strings.Contains(promptText, "Tag punctuation alphabet: ASCII < > / = \" plus the halfwidth pipe |.") {
t.Fatalf("expected %s to contain halfwidth DSML tool instructions, got %q", label, promptText)
}
if strings.Contains(promptText, "\uff5c") || strings.Contains(promptText, "full"+"width vertical bar") {
t.Fatalf("expected %s not to contain legacy pipe guidance, got %q", label, promptText)
}
}
toolNames, _ := body["tool_names"].([]any)
if len(toolNames) != 1 || toolNames[0] != "search" {
t.Fatalf("expected prepared tool names to align with request tools, got %#v", body["tool_names"])
}
}
func TestHandleVercelStreamPrepareMapsCurrentInputFileManagedAuthFailureTo401(t *testing.T) {
t.Setenv("VERCEL", "1")
t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret")

View File

@@ -103,7 +103,7 @@ func TestNormalizeOpenAIResponsesRequestAlwaysAcceptsWideInput(t *testing.T) {
if out.Surface != "openai_responses" {
t.Fatalf("unexpected surface: %q", out.Surface)
}
if !strings.Contains(out.FinalPrompt, "<User>hi") {
if !strings.Contains(out.FinalPrompt, "<|User|>hi") {
t.Fatalf("unexpected final prompt: %q", out.FinalPrompt)
}
}

View File

@@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"net/http/httptest"
"strings"
@@ -41,8 +42,12 @@ func (m *inlineUploadDSStub) UploadFile(ctx context.Context, _ *auth.RequestAuth
if m.uploadErr != nil {
return nil, m.uploadErr
}
id := "file-inline-1"
if len(m.uploadCalls) > 1 {
id = "file-inline-" + fmt.Sprint(len(m.uploadCalls))
}
return &dsclient.UploadFileResult{
ID: "file-inline-1",
ID: id,
Filename: req.Filename,
Bytes: int64(len(req.Data)),
Status: "uploaded",

View File

@@ -15,6 +15,7 @@ import (
const (
currentInputFilename = promptcompat.CurrentInputContextFilename
currentToolsFilename = promptcompat.CurrentToolsContextFilename
currentInputContentType = "text/plain; charset=utf-8"
currentInputPurpose = "assistants"
)
@@ -50,6 +51,7 @@ func (s Service) ApplyCurrentInputFile(ctx context.Context, a *auth.RequestAuth,
if strings.TrimSpace(fileText) == "" {
return stdReq, errors.New("current user input file produced empty transcript")
}
toolsText, _ := promptcompat.BuildOpenAIToolsContextTranscript(stdReq.ToolsRaw, stdReq.ToolChoice)
modelType := "default"
if resolvedType, ok := config.GetModelType(stdReq.ResolvedModel); ok {
modelType = resolvedType
@@ -69,21 +71,44 @@ func (s Service) ApplyCurrentInputFile(ctx context.Context, a *auth.RequestAuth,
return stdReq, errors.New("upload current user input file returned empty file id")
}
toolFileID := ""
if strings.TrimSpace(toolsText) != "" {
result, err := s.DS.UploadFile(ctx, a, dsclient.UploadFileRequest{
Filename: currentToolsFilename,
ContentType: currentInputContentType,
Purpose: currentInputPurpose,
ModelType: modelType,
Data: []byte(toolsText),
}, 3)
if err != nil {
return stdReq, fmt.Errorf("upload current tools file: %w", err)
}
toolFileID = strings.TrimSpace(result.ID)
if toolFileID == "" {
return stdReq, errors.New("upload current tools file returned empty file id")
}
}
messages := []any{
map[string]any{
"role": "user",
"content": currentInputFilePrompt(),
"content": currentInputFilePrompt(toolFileID != ""),
},
}
stdReq.Messages = messages
stdReq.HistoryText = fileText
stdReq.CurrentInputFileApplied = true
stdReq.RefFileIDs = prependUniqueRefFileID(stdReq.RefFileIDs, fileID)
stdReq.FinalPrompt, stdReq.ToolNames = promptcompat.BuildOpenAIPrompt(messages, stdReq.ToolsRaw, "", stdReq.ToolChoice, stdReq.Thinking)
stdReq.RefFileIDs = prependUniqueRefFileIDs(stdReq.RefFileIDs, fileID, toolFileID)
stdReq.FinalPrompt, stdReq.ToolNames = promptcompat.BuildOpenAIPromptWithToolInstructionsOnly(messages, stdReq.ToolsRaw, "", stdReq.ToolChoice, stdReq.Thinking)
// Token accounting must reflect the actual downstream context:
// the uploaded DS2API_HISTORY.txt file content + the continuation live prompt.
stdReq.PromptTokenText = fileText + "\n" + stdReq.FinalPrompt
// uploaded context files + the continuation live prompt.
tokenParts := []string{fileText}
if strings.TrimSpace(toolsText) != "" {
tokenParts = append(tokenParts, toolsText)
}
tokenParts = append(tokenParts, stdReq.FinalPrompt)
stdReq.PromptTokenText = strings.Join(tokenParts, "\n")
return stdReq, nil
}
@@ -106,23 +131,40 @@ func latestUserInputForFile(messages []any) (int, string) {
return -1, ""
}
func currentInputFilePrompt() string {
return "Continue from the latest state in the attached DS2API_HISTORY.txt context. Treat it as the current working state and answer the latest user request directly."
func currentInputFilePrompt(hasToolsFile bool) string {
prompt := "Continue from the latest state in the attached DS2API_HISTORY.txt context. Treat it as the current working state and answer the latest user request directly."
if hasToolsFile {
prompt += " Available tool descriptions and parameter schemas are attached in DS2API_TOOLS.txt; use only those tools and follow the tool-call format rules in this prompt."
}
return prompt
}
func prependUniqueRefFileID(existing []string, fileID string) []string {
fileID = strings.TrimSpace(fileID)
if fileID == "" {
return existing
}
out := make([]string, 0, len(existing)+1)
out = append(out, fileID)
for _, id := range existing {
trimmed := strings.TrimSpace(id)
if trimmed == "" || strings.EqualFold(trimmed, fileID) {
func prependUniqueRefFileIDs(existing []string, fileIDs ...string) []string {
out := make([]string, 0, len(existing)+len(fileIDs))
seen := map[string]struct{}{}
for _, fileID := range fileIDs {
trimmed := strings.TrimSpace(fileID)
if trimmed == "" {
continue
}
key := strings.ToLower(trimmed)
if _, ok := seen[key]; ok {
continue
}
out = append(out, trimmed)
seen[key] = struct{}{}
}
for _, id := range existing {
trimmed := strings.TrimSpace(id)
if trimmed == "" {
continue
}
key := strings.ToLower(trimmed)
if _, ok := seen[key]; ok {
continue
}
out = append(out, trimmed)
seen[key] = struct{}{}
}
return out
}

View File

@@ -84,7 +84,7 @@ func TestBuildOpenAICurrentInputContextTranscriptUsesNumberedHistorySections(t *
"latest user turn",
"[reasoning_content]",
"hidden reasoning",
"<DSMLtool_calls>",
"<|DSML|tool_calls>",
} {
if !strings.Contains(transcript, want) {
t.Fatalf("expected transcript to contain %q, got %q", want, transcript)
@@ -380,6 +380,79 @@ func TestApplyCurrentInputFileUploadsFullContextFile(t *testing.T) {
}
}
func TestApplyCurrentInputFileUploadsToolsContextSeparately(t *testing.T) {
ds := &inlineUploadDSStub{}
h := &openAITestSurface{
Store: mockOpenAIConfig{
currentInputEnabled: true,
currentInputMin: 0,
},
DS: ds,
}
req := map[string]any{
"model": "deepseek-v4-flash",
"messages": historySplitTestMessages(),
"tools": []any{
map[string]any{
"type": "function",
"function": map[string]any{
"name": "search",
"description": "search docs",
"parameters": map[string]any{
"type": "object",
},
},
},
},
}
stdReq, err := promptcompat.NormalizeOpenAIChatRequest(h.Store, req, "")
if err != nil {
t.Fatalf("normalize failed: %v", err)
}
out, err := h.applyCurrentInputFile(context.Background(), &auth.RequestAuth{DeepSeekToken: "token"}, stdReq)
if err != nil {
t.Fatalf("apply current input file failed: %v", err)
}
if len(ds.uploadCalls) != 2 {
t.Fatalf("expected history and tools uploads, got %d", len(ds.uploadCalls))
}
if ds.uploadCalls[0].Filename != "DS2API_HISTORY.txt" {
t.Fatalf("expected first upload to be DS2API_HISTORY.txt, got %q", ds.uploadCalls[0].Filename)
}
if ds.uploadCalls[1].Filename != "DS2API_TOOLS.txt" {
t.Fatalf("expected second upload to be DS2API_TOOLS.txt, got %q", ds.uploadCalls[1].Filename)
}
historyText := string(ds.uploadCalls[0].Data)
if strings.Contains(historyText, "You have access to these tools") || strings.Contains(historyText, "Description: search docs") {
t.Fatalf("history transcript should not embed tool descriptions, got %q", historyText)
}
toolsText := string(ds.uploadCalls[1].Data)
for _, want := range []string{"# DS2API_TOOLS.txt", "Tool: search", "Description: search docs", `Parameters: {"type":"object"}`} {
if !strings.Contains(toolsText, want) {
t.Fatalf("expected tools transcript to contain %q, got %q", want, toolsText)
}
}
if strings.Contains(toolsText, "TOOL CALL FORMAT") {
t.Fatalf("tools transcript should not duplicate tool format instructions, got %q", toolsText)
}
if !strings.Contains(out.FinalPrompt, "Continue from the latest state in the attached DS2API_HISTORY.txt context.") || !strings.Contains(out.FinalPrompt, "DS2API_TOOLS.txt") {
t.Fatalf("expected live prompt to reference both context files, got %q", out.FinalPrompt)
}
if !strings.Contains(out.FinalPrompt, "TOOL CALL FORMAT") || !strings.Contains(out.FinalPrompt, "Remember: The ONLY valid way to use tools") {
t.Fatalf("expected live prompt to retain tool format instructions, got %q", out.FinalPrompt)
}
if strings.Contains(out.FinalPrompt, "You have access to these tools") || strings.Contains(out.FinalPrompt, "Description: search docs") || strings.Contains(out.FinalPrompt, "Parameters:") {
t.Fatalf("expected live prompt to omit tool descriptions after tools upload, got %q", out.FinalPrompt)
}
if len(out.RefFileIDs) < 2 || out.RefFileIDs[0] != "file-inline-1" || out.RefFileIDs[1] != "file-inline-2" {
t.Fatalf("expected history and tools file ids first, got %#v", out.RefFileIDs)
}
if !strings.Contains(out.PromptTokenText, "# DS2API_HISTORY.txt") || !strings.Contains(out.PromptTokenText, "# DS2API_TOOLS.txt") || !strings.Contains(out.PromptTokenText, "Description: search docs") {
t.Fatalf("expected prompt token text to include uploaded history and tools content, got %q", out.PromptTokenText)
}
}
func TestApplyCurrentInputFileCarriesHistoryText(t *testing.T) {
ds := &inlineUploadDSStub{}
h := &openAITestSurface{

View File

@@ -19,7 +19,7 @@ func TestSanitizeLeakedOutputRemovesLeakedWireToolCallAndResult(t *testing.T) {
}
func TestSanitizeLeakedOutputRemovesStandaloneMetaMarkers(t *testing.T) {
raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<end▁of▁thinking>D<end▁of▁sentence>E<| end_of_toolresults |>F<end▁of▁instructions>G"
raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<|end▁of▁thinking|>D<|end▁of▁sentence|>E<| end_of_toolresults |>F<|end▁of▁instructions|>G"
got := sanitizeLeakedOutput(raw)
if got != "ABCDEFG" {
t.Fatalf("unexpected sanitize result for meta markers: %q", got)
@@ -27,7 +27,7 @@ func TestSanitizeLeakedOutputRemovesStandaloneMetaMarkers(t *testing.T) {
}
func TestSanitizeLeakedOutputRemovesThinkAndBosMarkers(t *testing.T) {
raw := "A<think>B</think>C<begin▁of▁sentence>D<| begin_of_sentence |>E<begin_of_sentence>F"
raw := "A<think>B</think>C<|begin▁of▁sentence|>D<| begin_of_sentence |>E<|begin_of_sentence|>F"
got := sanitizeLeakedOutput(raw)
if got != "ABCDEF" {
t.Fatalf("unexpected sanitize result for think/BOS markers: %q", got)
@@ -35,7 +35,7 @@ func TestSanitizeLeakedOutputRemovesThinkAndBosMarkers(t *testing.T) {
}
func TestSanitizeLeakedOutputRemovesThoughtMarkers(t *testing.T) {
raw := "A<▁of▁thought>B<| of_thought |>C<| begin_of_thought |>D<| end_of_thought |>E"
raw := "A<|▁of▁thought|>B<| of_thought |>C<| begin_of_thought |>D<| end_of_thought |>E"
got := sanitizeLeakedOutput(raw)
if got != "ABCDE" {
t.Fatalf("unexpected sanitize result for leaked thought markers: %q", got)
@@ -51,7 +51,7 @@ func TestSanitizeLeakedOutputRemovesDanglingThinkBlock(t *testing.T) {
}
func TestSanitizeLeakedOutputRemovesCompleteDSMLToolCallWrapper(t *testing.T) {
raw := "前置文本\n<DSMLtool_calls>\n<DSMLinvoke name=\"Bash\">\n<DSMLparameter name=\"command\"></DSMLparameter>\n</DSMLinvoke>\n</DSMLtool_calls>\n后置文本"
raw := "前置文本\n<|DSML|tool_calls>\n<|DSML|invoke name=\"Bash\">\n<|DSML|parameter name=\"command\"></|DSML|parameter>\n</|DSML|invoke>\n</|DSML|tool_calls>\n后置文本"
got := sanitizeLeakedOutput(raw)
if got != "前置文本\n\n后置文本" {
t.Fatalf("unexpected sanitize result for leaked dsml wrapper: %q", got)

View File

@@ -14,20 +14,20 @@ var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*
var leakedThinkTagPattern = regexp.MustCompile(`(?is)</?\s*think\s*>`)
// leakedBOSMarkerPattern matches DeepSeek BOS markers in BOTH forms:
// - ASCII underscore: <begin_of_sentence>
// - U+2581 variant: <begin▁of▁sentence>
var leakedBOSMarkerPattern = regexp.MustCompile(`(?i)<[\|]\s*begin[_▁]of[_▁]sentence\s*[\|]>`)
// - ASCII underscore: <|begin_of_sentence|>
// - U+2581 variant: <|begin▁of▁sentence|>
var leakedBOSMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*begin[_▁]of[_▁]sentence\s*[|\|]>`)
// leakedThoughtMarkerPattern matches leaked thought control markers in both
// explicit and compact forms:
// - ASCII underscore: <| of_thought |>, <| begin_of_thought |>
// - U+2581 variant: <▁of▁thought>, <begin▁of▁thought>
var leakedThoughtMarkerPattern = regexp.MustCompile(`(?i)<[\|]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[\|]>`)
// - U+2581 variant: <|▁of▁thought|>, <|begin▁of▁thought|>
var leakedThoughtMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[|\|]>`)
// leakedMetaMarkerPattern matches the remaining DeepSeek special tokens in BOTH forms:
// - ASCII underscore: <end_of_sentence>, <end_of_toolresults>, <end_of_instructions>
// - U+2581 variant: <end▁of▁sentence>, <end▁of▁toolresults>, <end▁of▁instructions>
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[\|]>`)
// - ASCII underscore: <|end_of_sentence|>, <|end_of_toolresults|>, <|end_of_instructions|>
// - U+2581 variant: <|end▁of▁sentence|>, <|end▁of▁toolresults|>, <|end▁of▁instructions|>
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[|\|]>`)
// leakedAgentXMLBlockPatterns catch agent-style XML blocks that leak through
// when the sieve fails to capture them. These are applied only to complete

View File

@@ -7,9 +7,9 @@ const {
SKIP_EXACT_PATHS,
} = require('../shared/deepseek-constants');
const LEAKED_BOS_MARKER_PATTERN = /<[|]\s*begin[_▁]of[_▁]sentence\s*[|]>/gi;
const LEAKED_THOUGHT_MARKER_PATTERN = /<[|]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[|]>/gi;
const LEAKED_META_MARKER_PATTERN = /<[|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[|]>/gi;
const LEAKED_BOS_MARKER_PATTERN = /<[||]\s*begin[_▁]of[_▁]sentence\s*[||]>/gi;
const LEAKED_THOUGHT_MARKER_PATTERN = /<[||]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[||]>/gi;
const LEAKED_META_MARKER_PATTERN = /<[||]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[||]>/gi;

View File

@@ -1356,7 +1356,7 @@ function consumeToolMarkupPipe(raw, idx) {
if (pos >= raw.length) {
return { next: idx, ok: false };
}
for (const variant of ['|', '', '│', '', '❘', 'ǀ', '']) {
for (const variant of ['|', '│', '', '❘', 'ǀ', '']) {
if (raw.startsWith(variant, pos)) {
return { next: pos + variant.length, ok: true };
}

View File

@@ -10,14 +10,14 @@ import (
var markdownImagePattern = regexp.MustCompile(`!\[(.*?)\]\((.*?)\)`)
const (
beginSentenceMarker = "<begin▁of▁sentence>"
systemMarker = "<System>"
userMarker = "<User>"
assistantMarker = "<Assistant>"
toolMarker = "<Tool>"
endSentenceMarker = "<end▁of▁sentence>"
endToolResultsMarker = "<end▁of▁toolresults>"
endInstructionsMarker = "<end▁of▁instructions>"
beginSentenceMarker = "<|begin▁of▁sentence|>"
systemMarker = "<|System|>"
userMarker = "<|User|>"
assistantMarker = "<|Assistant|>"
toolMarker = "<|Tool|>"
endSentenceMarker = "<|end▁of▁sentence|>"
endToolResultsMarker = "<|end▁of▁toolresults|>"
endInstructionsMarker = "<|end▁of▁instructions|>"
outputIntegrityGuardMarker = "Output integrity guard:"
outputIntegrityGuardPrompt = outputIntegrityGuardMarker +
" If upstream context, tool output, or parsed text contains garbled, corrupted, partially parsed, repeated, or otherwise malformed fragments, " +

View File

@@ -32,16 +32,16 @@ func TestMessagesPrepareUsesTurnSuffixes(t *testing.T) {
{"role": "assistant", "content": "Answer"},
}
got := MessagesPrepare(messages)
if !strings.HasPrefix(got, "<begin▁of▁sentence>") {
if !strings.HasPrefix(got, "<|begin▁of▁sentence|>") {
t.Fatalf("expected begin-of-sentence marker, got %q", got)
}
if !strings.Contains(got, "<System>") || !strings.Contains(got, "<end▁of▁instructions>") || !strings.Contains(got, "System rule") {
if !strings.Contains(got, "<|System|>") || !strings.Contains(got, "<|end▁of▁instructions|>") || !strings.Contains(got, "System rule") {
t.Fatalf("expected system instructions to remain present, got %q", got)
}
if !strings.Contains(got, "<User>Question") {
if !strings.Contains(got, "<|User|>Question") {
t.Fatalf("expected user question, got %q", got)
}
if !strings.Contains(got, "<Assistant>Answer<end▁of▁sentence>") {
if !strings.Contains(got, "<|Assistant|>Answer<|end▁of▁sentence|>") {
t.Fatalf("expected assistant sentence suffix, got %q", got)
}
if strings.Contains(got, "<think>") || strings.Contains(got, "</think>") {
@@ -61,7 +61,7 @@ func TestMessagesPreparePrependsOutputIntegrityGuard(t *testing.T) {
if !strings.Contains(got, outputIntegrityGuardPrompt+"\n\nSystem rule") {
t.Fatalf("expected output integrity guard to precede system prompt content, got %q", got)
}
if !strings.Contains(got, "<User>Question") {
if !strings.Contains(got, "<|User|>Question") {
t.Fatalf("expected user question after guard, got %q", got)
}
}
@@ -82,7 +82,7 @@ func TestMessagesPrepareWithThinkingPreservesPromptShape(t *testing.T) {
if gotThinking != gotPlain {
t.Fatalf("expected thinking flag not to add extra continuity instructions, got thinking=%q plain=%q", gotThinking, gotPlain)
}
if !strings.HasSuffix(gotThinking, "<Assistant>") {
if !strings.HasSuffix(gotThinking, "<|Assistant|>") {
t.Fatalf("expected assistant suffix, got %q", gotThinking)
}
}

View File

@@ -17,12 +17,12 @@ var promptXMLTextEscaper = strings.NewReplacer(
var promptXMLNamePattern = regexp.MustCompile(`^[A-Za-z_][A-Za-z0-9_.:-]*$`)
const (
promptDSMLToolCallsOpen = "<DSMLtool_calls>"
promptDSMLToolCallsClose = "</DSMLtool_calls>"
promptDSMLInvokeOpen = "<DSMLinvoke"
promptDSMLInvokeClose = "</DSMLinvoke>"
promptDSMLParameterOpen = "<DSMLparameter"
promptDSMLParameterClose = "</DSMLparameter>"
promptDSMLToolCallsOpen = "<|DSML|tool_calls>"
promptDSMLToolCallsClose = "</|DSML|tool_calls>"
promptDSMLInvokeOpen = "<|DSML|invoke"
promptDSMLInvokeClose = "</|DSML|invoke>"
promptDSMLParameterOpen = "<|DSML|parameter"
promptDSMLParameterClose = "</|DSML|parameter>"
)
// FormatToolCallsForPrompt renders a tool_calls slice into the prompt-visible

View File

@@ -22,7 +22,7 @@ func TestFormatToolCallsForPromptDSML(t *testing.T) {
if got == "" {
t.Fatal("expected non-empty formatted tool calls")
}
if got != "<DSMLtool_calls>\n <DSMLinvoke name=\"search_web\">\n <DSMLparameter name=\"query\"><![CDATA[latest]]></DSMLparameter>\n </DSMLinvoke>\n</DSMLtool_calls>" {
if got != "<|DSML|tool_calls>\n <|DSML|invoke name=\"search_web\">\n <|DSML|parameter name=\"query\"><![CDATA[latest]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>" {
t.Fatalf("unexpected formatted tool call DSML: %q", got)
}
}
@@ -34,7 +34,7 @@ func TestFormatToolCallsForPromptEscapesXMLEntities(t *testing.T) {
"arguments": `{"q":"a < b && c > d"}`,
},
})
want := "<DSMLtool_calls>\n <DSMLinvoke name=\"search&lt;&amp;&gt;\">\n <DSMLparameter name=\"q\"><![CDATA[a < b && c > d]]></DSMLparameter>\n </DSMLinvoke>\n</DSMLtool_calls>"
want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"search&lt;&amp;&gt;\">\n <|DSML|parameter name=\"q\"><![CDATA[a < b && c > d]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>"
if got != want {
t.Fatalf("unexpected escaped tool call XML: %q", got)
}
@@ -50,7 +50,7 @@ func TestFormatToolCallsForPromptUsesCDATAForMultilineContent(t *testing.T) {
},
},
})
want := "<DSMLtool_calls>\n <DSMLinvoke name=\"write_file\">\n <DSMLparameter name=\"content\"><![CDATA[#!/bin/bash\nprintf \"hello\"\n]]></DSMLparameter>\n <DSMLparameter name=\"path\"><![CDATA[script.sh]]></DSMLparameter>\n </DSMLinvoke>\n</DSMLtool_calls>"
want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"write_file\">\n <|DSML|parameter name=\"content\"><![CDATA[#!/bin/bash\nprintf \"hello\"\n]]></|DSML|parameter>\n <|DSML|parameter name=\"path\"><![CDATA[script.sh]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>"
if got != want {
t.Fatalf("unexpected multiline cdata tool call XML: %q", got)
}

View File

@@ -38,10 +38,10 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsAndToolResult(t *tes
t.Fatalf("expected 4 normalized messages with assistant tool history preserved, got %d", len(normalized))
}
assistantContent, _ := normalized[2]["content"].(string)
if !strings.Contains(assistantContent, "<DSMLtool_calls>") {
if !strings.Contains(assistantContent, "<|DSML|tool_calls>") {
t.Fatalf("assistant tool history should be preserved in DSML form, got %q", assistantContent)
}
if !strings.Contains(assistantContent, `<DSMLinvoke name="get_weather">`) {
if !strings.Contains(assistantContent, `<|DSML|invoke name="get_weather">`) {
t.Fatalf("expected tool name in preserved history, got %q", assistantContent)
}
if !strings.Contains(normalized[3]["content"].(string), `"temp":18`) {
@@ -49,7 +49,7 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsAndToolResult(t *tes
}
prompt := util.MessagesPrepare(normalized)
if !strings.Contains(prompt, "<DSMLtool_calls>") {
if !strings.Contains(prompt, "<|DSML|tool_calls>") {
t.Fatalf("expected preserved assistant tool history in prompt: %q", prompt)
}
}
@@ -177,10 +177,10 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantMultipleToolCallsRemainSepara
t.Fatalf("expected assistant tool_call-only message preserved, got %#v", normalized)
}
content, _ := normalized[0]["content"].(string)
if strings.Count(content, "<DSMLinvoke name=") != 2 {
if strings.Count(content, "<|DSML|invoke name=") != 2 {
t.Fatalf("expected two preserved tool call blocks, got %q", content)
}
if !strings.Contains(content, `<DSMLinvoke name="search_web">`) || !strings.Contains(content, `<DSMLinvoke name="eval_javascript">`) {
if !strings.Contains(content, `<|DSML|invoke name="search_web">`) || !strings.Contains(content, `<|DSML|invoke name="eval_javascript">`) {
t.Fatalf("expected both tool names in preserved history, got %q", content)
}
}
@@ -258,7 +258,7 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantNilContentDoesNotInjectNullLi
if strings.Contains(content, "null") {
t.Fatalf("expected no null literal injection, got %q", content)
}
if !strings.Contains(content, "<DSMLtool_calls>") {
if !strings.Contains(content, "<|DSML|tool_calls>") {
t.Fatalf("expected assistant tool history in normalized content, got %q", content)
}
}
@@ -282,11 +282,11 @@ func TestNormalizeOpenAIMessagesForPrompt_CanonicalizesStandaloneAssistantToolMa
}
content, _ := normalized[0]["content"].(string)
for _, want := range []string{
"<DSMLtool_calls>",
`<DSMLinvoke name="Bash">`,
`<DSMLparameter name="command"><![CDATA[lsof -i :4321 -t]]></DSMLparameter>`,
`<DSMLparameter name="description"><![CDATA[Verify port 4321 is free]]></DSMLparameter>`,
"</DSMLtool_calls>",
"<|DSML|tool_calls>",
`<|DSML|invoke name="Bash">`,
`<|DSML|parameter name="command"><![CDATA[lsof -i :4321 -t]]></|DSML|parameter>`,
`<|DSML|parameter name="description"><![CDATA[Verify port 4321 is free]]></|DSML|parameter>`,
"</|DSML|tool_calls>",
} {
if !strings.Contains(content, want) {
t.Fatalf("expected canonicalized assistant tool markup to contain %q, got %q", want, content)

View File

@@ -9,10 +9,22 @@ func buildOpenAIFinalPrompt(messagesRaw []any, toolsRaw any, traceID string, thi
}
func BuildOpenAIPrompt(messagesRaw []any, toolsRaw any, traceID string, toolPolicy ToolChoicePolicy, thinkingEnabled bool) (string, []string) {
return buildOpenAIPrompt(messagesRaw, toolsRaw, traceID, toolPolicy, thinkingEnabled, true)
}
func BuildOpenAIPromptWithToolInstructionsOnly(messagesRaw []any, toolsRaw any, traceID string, toolPolicy ToolChoicePolicy, thinkingEnabled bool) (string, []string) {
return buildOpenAIPrompt(messagesRaw, toolsRaw, traceID, toolPolicy, thinkingEnabled, false)
}
func buildOpenAIPrompt(messagesRaw []any, toolsRaw any, traceID string, toolPolicy ToolChoicePolicy, thinkingEnabled bool, includeToolDescriptions bool) (string, []string) {
messages := NormalizeOpenAIMessagesForPrompt(messagesRaw, traceID)
toolNames := []string{}
if tools, ok := toolsRaw.([]any); ok && len(tools) > 0 {
messages, toolNames = injectToolPrompt(messages, tools, toolPolicy)
if includeToolDescriptions {
messages, toolNames = injectToolPrompt(messages, tools, toolPolicy)
} else {
messages, toolNames = injectToolPromptInstructionsOnly(messages, tools, toolPolicy)
}
}
return prompt.MessagesPrepareWithThinking(messages, thinkingEnabled), toolNames
}

View File

@@ -47,10 +47,10 @@ func TestBuildOpenAIFinalPrompt_HandlerPathIncludesToolRoundtripSemantics(t *tes
if !strings.Contains(finalPrompt, `"condition":"sunny"`) {
t.Fatalf("handler finalPrompt should preserve tool output content: %q", finalPrompt)
}
if !strings.Contains(finalPrompt, "<DSMLtool_calls>") {
if !strings.Contains(finalPrompt, "<|DSML|tool_calls>") {
t.Fatalf("handler finalPrompt should preserve assistant tool history: %q", finalPrompt)
}
if !strings.Contains(finalPrompt, `<DSMLinvoke name="get_weather">`) {
if !strings.Contains(finalPrompt, `<|DSML|invoke name="get_weather">`) {
t.Fatalf("handler finalPrompt should include tool name history: %q", finalPrompt)
}
}
@@ -74,7 +74,7 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t *
}
finalPrompt, _ := buildOpenAIFinalPrompt(messages, tools, "", false)
if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <DSMLtool_calls>...</DSMLtool_calls> block at the end of your response.") {
if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.") {
t.Fatalf("vercel prepare finalPrompt missing final tool-call anchor instruction: %q", finalPrompt)
}
if !strings.Contains(finalPrompt, "TOOL CALL FORMAT") {
@@ -88,6 +88,64 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t *
}
}
func TestBuildOpenAIPromptWithToolInstructionsOnlyOmitsSchemas(t *testing.T) {
messages := []any{
map[string]any{"role": "system", "content": "You are helpful"},
map[string]any{"role": "user", "content": "请调用工具"},
}
tools := []any{
map[string]any{
"type": "function",
"function": map[string]any{
"name": "search",
"description": "search docs",
"parameters": map[string]any{
"type": "object",
},
},
},
}
finalPrompt, toolNames := BuildOpenAIPromptWithToolInstructionsOnly(messages, tools, "", DefaultToolChoicePolicy(), false)
if len(toolNames) != 1 || toolNames[0] != "search" {
t.Fatalf("unexpected tool names: %#v", toolNames)
}
if strings.Contains(finalPrompt, "You have access to these tools") || strings.Contains(finalPrompt, "Description: search docs") || strings.Contains(finalPrompt, "Parameters:") {
t.Fatalf("tool descriptions should be externalized, got: %q", finalPrompt)
}
if !strings.Contains(finalPrompt, "TOOL CALL FORMAT") || !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools") {
t.Fatalf("expected tool format instructions to remain in live prompt, got: %q", finalPrompt)
}
}
func TestBuildOpenAIToolsContextTranscriptContainsOnlyDescriptions(t *testing.T) {
tools := []any{
map[string]any{
"type": "function",
"function": map[string]any{
"name": "search",
"description": "search docs",
"parameters": map[string]any{
"type": "object",
},
},
},
}
transcript, toolNames := BuildOpenAIToolsContextTranscript(tools, DefaultToolChoicePolicy())
if len(toolNames) != 1 || toolNames[0] != "search" {
t.Fatalf("unexpected tool names: %#v", toolNames)
}
for _, want := range []string{"# DS2API_TOOLS.txt", "You have access to these tools", "Tool: search", "Description: search docs", `Parameters: {"type":"object"}`} {
if !strings.Contains(transcript, want) {
t.Fatalf("expected tools transcript to contain %q, got: %q", want, transcript)
}
}
if strings.Contains(transcript, "TOOL CALL FORMAT") || strings.Contains(transcript, "<|DSML|tool_calls>") {
t.Fatalf("tools transcript should not duplicate format instructions, got: %q", transcript)
}
}
func TestBuildOpenAIFinalPromptPrependsOutputIntegrityGuard(t *testing.T) {
messages := []any{
map[string]any{"role": "system", "content": "You are helpful"},

View File

@@ -88,7 +88,7 @@ func TestNormalizeResponsesInputArrayMergesReasoningMessageIntoFunctionCallHisto
if !strings.Contains(history, "[reasoning_content]\nneed fresh docs before answering\n[/reasoning_content]") {
t.Fatalf("expected reasoning in history transcript, got %q", history)
}
if !strings.Contains(history, `<DSMLinvoke name="search_web">`) {
if !strings.Contains(history, `<|DSML|invoke name="search_web">`) {
t.Fatalf("expected tool call in history transcript, got %q", history)
}
}

View File

@@ -9,10 +9,50 @@ import (
"ds2api/internal/toolcall"
)
const CurrentToolsContextFilename = "DS2API_TOOLS.txt"
const toolsTranscriptTitle = "# DS2API_TOOLS.txt"
const toolsTranscriptSummary = "Available tool descriptions and parameter schemas for this request."
type toolPromptParts struct {
Descriptions string
Instructions string
Names []string
}
func injectToolPrompt(messages []map[string]any, tools []any, policy ToolChoicePolicy) ([]map[string]any, []string) {
return injectToolPromptWithDescriptions(messages, tools, policy, true)
}
func injectToolPromptInstructionsOnly(messages []map[string]any, tools []any, policy ToolChoicePolicy) ([]map[string]any, []string) {
return injectToolPromptWithDescriptions(messages, tools, policy, false)
}
func injectToolPromptWithDescriptions(messages []map[string]any, tools []any, policy ToolChoicePolicy, includeDescriptions bool) ([]map[string]any, []string) {
if policy.IsNone() {
return messages, nil
}
parts := buildToolPromptParts(tools, policy)
if parts.Instructions == "" {
return messages, parts.Names
}
toolPrompt := parts.Instructions
if includeDescriptions && parts.Descriptions != "" {
toolPrompt = parts.Descriptions + "\n\n" + toolPrompt
}
for i := range messages {
if messages[i]["role"] == "system" {
old, _ := messages[i]["content"].(string)
messages[i]["content"] = strings.TrimSpace(old + "\n\n" + toolPrompt)
return messages, parts.Names
}
}
messages = append([]map[string]any{{"role": "system", "content": toolPrompt}}, messages...)
return messages, parts.Names
}
func buildToolPromptParts(tools []any, policy ToolChoicePolicy) toolPromptParts {
toolSchemas := make([]string, 0, len(tools))
names := make([]string, 0, len(tools))
isAllowed := func(name string) bool {
@@ -44,29 +84,47 @@ func injectToolPrompt(messages []map[string]any, tools []any, policy ToolChoiceP
toolSchemas = append(toolSchemas, fmt.Sprintf("Tool: %s\nDescription: %s\nParameters: %s", name, desc, string(b)))
}
if len(toolSchemas) == 0 {
return messages, names
return toolPromptParts{Names: names}
}
toolPrompt := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n") + "\n\n" + toolcall.BuildToolCallInstructions(names)
descriptions := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n")
instructions := toolcall.BuildToolCallInstructions(names)
if hasReadLikeTool(names) {
toolPrompt += "\n\nRead-tool cache guard: If a Read/read_file-style tool result says the file is unchanged, already available in history, should be referenced from previous context, or otherwise provides no file body, treat that result as missing content. Do not repeatedly call the same read request for that missing body. Request a full-content read if the tool supports it, or tell the user that the file contents need to be provided again."
instructions += "\n\nRead-tool cache guard: If a Read/read_file-style tool result says the file is unchanged, already available in history, should be referenced from previous context, or otherwise provides no file body, treat that result as missing content. Do not repeatedly call the same read request for that missing body. Request a full-content read if the tool supports it, or tell the user that the file contents need to be provided again."
}
if policy.Mode == ToolChoiceRequired {
toolPrompt += "\n7) For this response, you MUST call at least one tool from the allowed list."
instructions += "\n7) For this response, you MUST call at least one tool from the allowed list."
}
if policy.Mode == ToolChoiceForced && strings.TrimSpace(policy.ForcedName) != "" {
toolPrompt += "\n7) For this response, you MUST call exactly this tool name: " + strings.TrimSpace(policy.ForcedName)
toolPrompt += "\n8) Do not call any other tool."
instructions += "\n7) For this response, you MUST call exactly this tool name: " + strings.TrimSpace(policy.ForcedName)
instructions += "\n8) Do not call any other tool."
}
return toolPromptParts{
Descriptions: descriptions,
Instructions: instructions,
Names: names,
}
}
for i := range messages {
if messages[i]["role"] == "system" {
old, _ := messages[i]["content"].(string)
messages[i]["content"] = strings.TrimSpace(old + "\n\n" + toolPrompt)
return messages, names
}
func BuildOpenAIToolsContextTranscript(toolsRaw any, policy ToolChoicePolicy) (string, []string) {
if policy.IsNone() {
return "", nil
}
messages = append([]map[string]any{{"role": "system", "content": toolPrompt}}, messages...)
return messages, names
tools, ok := toolsRaw.([]any)
if !ok || len(tools) == 0 {
return "", nil
}
parts := buildToolPromptParts(tools, policy)
if strings.TrimSpace(parts.Descriptions) == "" {
return "", parts.Names
}
var b strings.Builder
b.WriteString(toolsTranscriptTitle)
b.WriteString("\n")
b.WriteString(toolsTranscriptSummary)
b.WriteString("\n\n")
b.WriteString(parts.Descriptions)
b.WriteString("\n")
return b.String(), parts.Names
}
func hasReadLikeTool(names []string) bool {

View File

@@ -11,19 +11,19 @@ import "strings"
func BuildToolCallInstructions(toolNames []string) string {
return `TOOL CALL FORMAT — FOLLOW EXACTLY:
<DSMLtool_calls>
<DSMLinvoke name="TOOL_NAME_HERE">
<DSMLparameter name="PARAMETER_NAME"><![CDATA[PARAMETER_VALUE]]></DSMLparameter>
</DSMLinvoke>
</DSMLtool_calls>
<|DSML|tool_calls>
<|DSML|invoke name="TOOL_NAME_HERE">
<|DSML|parameter name="PARAMETER_NAME"><![CDATA[PARAMETER_VALUE]]></|DSML|parameter>
</|DSML|invoke>
</|DSML|tool_calls>
RULES:
1) Use the <DSMLtool_calls> wrapper format.
2) Put one or more <DSMLinvoke> entries under a single <DSMLtool_calls> root.
3) Put the tool name in the invoke name attribute: <DSMLinvoke name="TOOL_NAME">.
3a) Tag punctuation alphabet: ASCII < > / = " plus the fullwidth vertical bar .
1) Use the <|DSML|tool_calls> wrapper format.
2) Put one or more <|DSML|invoke> entries under a single <|DSML|tool_calls> root.
3) Put the tool name in the invoke name attribute: <|DSML|invoke name="TOOL_NAME">.
3a) Tag punctuation alphabet: ASCII < > / = " plus the halfwidth pipe |.
4) All string values must use <![CDATA[...]]>, even short ones. This includes code, scripts, file contents, prompts, paths, names, and queries.
5) Every top-level argument must be a <DSMLparameter name="ARG_NAME">...</DSMLparameter> node.
5) Every top-level argument must be a <|DSML|parameter name="ARG_NAME">...</|DSML|parameter> node.
6) Objects use nested XML elements inside the parameter body. Arrays may repeat <item> children.
7) Numbers, booleans, and null stay plain text.
8) Use only the parameter names in the tool schema. Do not invent fields.
@@ -31,35 +31,35 @@ RULES:
10) If a required parameter value is unknown, ask the user or answer normally instead of outputting an empty tool call.
11) For shell tools such as Bash / execute_command, the command/script must be inside the command parameter. Never call them with an empty command.
12) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.
13) If you call a tool, the first non-whitespace characters of that tool block must be exactly <DSMLtool_calls>.
14) Never omit the opening <DSMLtool_calls> tag, even if you already plan to close with </DSMLtool_calls>.
13) If you call a tool, the first non-whitespace characters of that tool block must be exactly <|DSML|tool_calls>.
14) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with </|DSML|tool_calls>.
15) Compatibility note: the runtime also accepts the legacy XML tags <tool_calls> / <invoke> / <parameter>, but prefer the DSML-prefixed form above.
PARAMETER SHAPES:
- string => <DSMLparameter name="x"><![CDATA[value]]></DSMLparameter>
- object => <DSMLparameter name="x"><field>...</field></DSMLparameter>
- array => <DSMLparameter name="x"><item>...</item><item>...</item></DSMLparameter>
- number/bool/null => <DSMLparameter name="x">plain_text</DSMLparameter>
- string => <|DSML|parameter name="x"><![CDATA[value]]></|DSML|parameter>
- object => <|DSML|parameter name="x"><field>...</field></|DSML|parameter>
- array => <|DSML|parameter name="x"><item>...</item><item>...</item></|DSML|parameter>
- number/bool/null => <|DSML|parameter name="x">plain_text</|DSML|parameter>
【WRONG — Do NOT do these】:
Wrong 1 — mixed text after XML:
<DSMLtool_calls>...</DSMLtool_calls> I hope this helps.
<|DSML|tool_calls>...</|DSML|tool_calls> I hope this helps.
Wrong 2 — Markdown code fences:
` + "```xml" + `
<DSMLtool_calls>...</DSMLtool_calls>
<|DSML|tool_calls>...</|DSML|tool_calls>
` + "```" + `
Wrong 3 — missing opening wrapper:
<DSMLinvoke name="TOOL_NAME">...</DSMLinvoke>
</DSMLtool_calls>
<|DSML|invoke name="TOOL_NAME">...</|DSML|invoke>
</|DSML|tool_calls>
Wrong 4 — empty parameters:
<DSMLtool_calls>
<DSMLinvoke name="Bash">
<DSMLparameter name="command"></DSMLparameter>
</DSMLinvoke>
</DSMLtool_calls>
<|DSML|tool_calls>
<|DSML|invoke name="Bash">
<|DSML|parameter name="command"></|DSML|parameter>
</|DSML|invoke>
</|DSML|tool_calls>
Remember: The ONLY valid way to use tools is the <DSMLtool_calls>...</DSMLtool_calls> block at the end of your response.
Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.
` + buildCorrectToolExamples(toolNames)
}
@@ -150,21 +150,21 @@ func firstScriptExample(names []string) (promptToolExample, bool) {
func renderToolExampleBlock(calls []promptToolExample) string {
var b strings.Builder
b.WriteString("<DSMLtool_calls>\n")
b.WriteString("<|DSML|tool_calls>\n")
for _, call := range calls {
b.WriteString(` <DSMLinvoke name="`)
b.WriteString(` <|DSML|invoke name="`)
b.WriteString(call.name)
b.WriteString(`">` + "\n")
b.WriteString(indentPromptParameters(call.params, " "))
b.WriteString("\n </DSMLinvoke>\n")
b.WriteString("\n </|DSML|invoke>\n")
}
b.WriteString("</DSMLtool_calls>")
b.WriteString("</|DSML|tool_calls>")
return b.String()
}
func indentPromptParameters(body, indent string) string {
if strings.TrimSpace(body) == "" {
return indent + `<DSMLparameter name="content"></DSMLparameter>`
return indent + `<|DSML|parameter name="content"></|DSML|parameter>`
}
lines := strings.Split(body, "\n")
for i, line := range lines {
@@ -178,7 +178,7 @@ func indentPromptParameters(body, indent string) string {
}
func wrapParameter(name, inner string) string {
return `<DSMLparameter name="` + name + `">` + inner + `</DSMLparameter>`
return `<|DSML|parameter name="` + name + `">` + inner + `</|DSML|parameter>`
}
func exampleBasicParams(name string) (string, bool) {
@@ -204,7 +204,7 @@ func exampleBasicParams(name string) (string, bool) {
case "Edit":
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + wrapParameter("old_string", promptCDATA("foo")) + "\n" + wrapParameter("new_string", promptCDATA("bar")), true
case "MultiEdit":
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<DSMLparameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></DSMLparameter>`, true
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
}
return "", false
}
@@ -212,11 +212,11 @@ func exampleBasicParams(name string) (string, bool) {
func exampleNestedParams(name string) (string, bool) {
switch strings.TrimSpace(name) {
case "MultiEdit":
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<DSMLparameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></DSMLparameter>`, true
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
case "Task":
return wrapParameter("description", promptCDATA("Investigate flaky tests")) + "\n" + wrapParameter("prompt", promptCDATA("Run targeted tests and summarize failures")), true
case "ask_followup_question":
return wrapParameter("question", promptCDATA("Which approach do you prefer?")) + "\n" + `<DSMLparameter name="follow_up"><item><text>` + promptCDATA("Option A") + `</text></item><item><text>` + promptCDATA("Option B") + `</text></item></DSMLparameter>`, true
return wrapParameter("question", promptCDATA("Which approach do you prefer?")) + "\n" + `<|DSML|parameter name="follow_up"><item><text>` + promptCDATA("Option A") + `</text></item><item><text>` + promptCDATA("Option B") + `</text></item></|DSML|parameter>`, true
}
return "", false
}

View File

@@ -7,20 +7,20 @@ import (
func TestBuildToolCallInstructions_ExecCommandUsesCmdExample(t *testing.T) {
out := BuildToolCallInstructions([]string{"exec_command"})
if !strings.Contains(out, `<DSMLinvoke name="exec_command">`) {
if !strings.Contains(out, `<|DSML|invoke name="exec_command">`) {
t.Fatalf("expected exec_command in examples, got: %s", out)
}
if !strings.Contains(out, `<DSMLparameter name="cmd"><![CDATA[pwd]]></DSMLparameter>`) {
if !strings.Contains(out, `<|DSML|parameter name="cmd"><![CDATA[pwd]]></|DSML|parameter>`) {
t.Fatalf("expected cmd parameter example for exec_command, got: %s", out)
}
}
func TestBuildToolCallInstructions_ExecuteCommandUsesCommandExample(t *testing.T) {
out := BuildToolCallInstructions([]string{"execute_command"})
if !strings.Contains(out, `<DSMLinvoke name="execute_command">`) {
if !strings.Contains(out, `<|DSML|invoke name="execute_command">`) {
t.Fatalf("expected execute_command in examples, got: %s", out)
}
if !strings.Contains(out, `<DSMLparameter name="command"><![CDATA[pwd]]></DSMLparameter>`) {
if !strings.Contains(out, `<|DSML|parameter name="command"><![CDATA[pwd]]></|DSML|parameter>`) {
t.Fatalf("expected command parameter example for execute_command, got: %s", out)
}
}
@@ -34,20 +34,20 @@ func TestBuildToolCallInstructions_BashUsesCommandAndDescriptionExamples(t *test
sawDescription := false
for _, block := range blocks {
if !strings.Contains(block, `<DSMLparameter name="command">`) {
if !strings.Contains(block, `<|DSML|parameter name="command">`) {
t.Fatalf("expected every Bash example to use command parameter, got: %s", block)
}
if strings.Contains(block, `<DSMLparameter name="path">`) || strings.Contains(block, `<DSMLparameter name="content">`) {
if strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
t.Fatalf("expected Bash examples not to use file write parameters, got: %s", block)
}
if strings.Contains(block, `<DSMLparameter name="description">`) {
if strings.Contains(block, `<|DSML|parameter name="description">`) {
sawDescription = true
}
}
if !sawDescription {
t.Fatalf("expected Bash long-script example to include description, got: %s", out)
}
if strings.Contains(out, `<DSMLinvoke name="Read">`) {
if strings.Contains(out, `<|DSML|invoke name="Read">`) {
t.Fatalf("expected examples to avoid unavailable hard-coded Read tool, got: %s", out)
}
}
@@ -60,10 +60,10 @@ func TestBuildToolCallInstructions_ExecuteCommandLongScriptUsesCommand(t *testin
}
for _, block := range blocks {
if !strings.Contains(block, `<DSMLparameter name="command">`) {
if !strings.Contains(block, `<|DSML|parameter name="command">`) {
t.Fatalf("expected execute_command examples to use command parameter, got: %s", block)
}
if strings.Contains(block, `<DSMLparameter name="path">`) || strings.Contains(block, `<DSMLparameter name="content">`) {
if strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
t.Fatalf("expected execute_command examples not to use file write parameters, got: %s", block)
}
}
@@ -80,10 +80,10 @@ func TestBuildToolCallInstructions_ExecCommandLongScriptUsesCmd(t *testing.T) {
}
for _, block := range blocks {
if !strings.Contains(block, `<DSMLparameter name="cmd">`) {
if !strings.Contains(block, `<|DSML|parameter name="cmd">`) {
t.Fatalf("expected exec_command examples to use cmd parameter, got: %s", block)
}
if strings.Contains(block, `<DSMLparameter name="command">`) || strings.Contains(block, `<DSMLparameter name="path">`) || strings.Contains(block, `<DSMLparameter name="content">`) {
if strings.Contains(block, `<|DSML|parameter name="command">`) || strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
t.Fatalf("expected exec_command examples not to use command or file write parameters, got: %s", block)
}
}
@@ -100,10 +100,10 @@ func TestBuildToolCallInstructions_WriteUsesFilePathAndContent(t *testing.T) {
}
for _, block := range blocks {
if !strings.Contains(block, `<DSMLparameter name="file_path">`) || !strings.Contains(block, `<DSMLparameter name="content">`) {
if !strings.Contains(block, `<|DSML|parameter name="file_path">`) || !strings.Contains(block, `<|DSML|parameter name="content">`) {
t.Fatalf("expected Write examples to use file_path and content, got: %s", block)
}
if strings.Contains(block, `<DSMLparameter name="path">`) {
if strings.Contains(block, `<|DSML|parameter name="path">`) {
t.Fatalf("expected Write examples not to use path, got: %s", block)
}
}
@@ -111,7 +111,7 @@ func TestBuildToolCallInstructions_WriteUsesFilePathAndContent(t *testing.T) {
func TestBuildToolCallInstructions_AnchorsMissingOpeningWrapperFailureMode(t *testing.T) {
out := BuildToolCallInstructions([]string{"read_file"})
if !strings.Contains(out, "Never omit the opening <DSMLtool_calls> tag") {
if !strings.Contains(out, "Never omit the opening <|DSML|tool_calls> tag") {
t.Fatalf("expected explicit missing-opening-tag warning, got: %s", out)
}
if !strings.Contains(out, "Wrong 3 — missing opening wrapper") {
@@ -135,7 +135,7 @@ func TestBuildToolCallInstructions_RejectsEmptyParametersInPrompt(t *testing.T)
func TestBuildToolCallInstructions_UsesPositiveTagPunctuationAlphabet(t *testing.T) {
out := BuildToolCallInstructions([]string{"Bash"})
want := `Tag punctuation alphabet: ASCII < > / = " plus the fullwidth vertical bar .`
want := `Tag punctuation alphabet: ASCII < > / = " plus the halfwidth pipe |.`
if !strings.Contains(out, want) {
t.Fatalf("expected positive tag punctuation alphabet %q, got: %s", want, out)
}
@@ -147,7 +147,7 @@ func TestBuildToolCallInstructions_UsesPositiveTagPunctuationAlphabet(t *testing
}
func findInvokeBlocks(text, name string) []string {
open := `<DSMLinvoke name="` + name + `">`
open := `<|DSML|invoke name="` + name + `">`
remaining := text
blocks := []string{}
for {
@@ -156,11 +156,11 @@ func findInvokeBlocks(text, name string) []string {
return blocks
}
remaining = remaining[start:]
end := strings.Index(remaining, `</DSMLinvoke>`)
end := strings.Index(remaining, `</|DSML|invoke>`)
if end < 0 {
return blocks
}
end += len(`</DSMLinvoke>`)
end += len(`</|DSML|invoke>`)
blocks = append(blocks, remaining[:end])
remaining = remaining[end:]
}

View File

@@ -491,8 +491,6 @@ func consumeToolMarkupPipe(text string, idx int) (int, bool) {
switch {
case text[idx] == '|':
return idx + 1, true
case strings.HasPrefix(text[idx:], ""):
return idx + len(""), true
case strings.HasPrefix(text[idx:], "│"):
return idx + len("│"), true
case strings.HasPrefix(text[idx:], ""):

View File

@@ -131,14 +131,14 @@ func TestParseToolCallsRejectsCamelPrefixedToolMarkupLookalike(t *testing.T) {
}
func TestParseToolCallsSupportsFullwidthDSMLShell(t *testing.T) {
text := `<tool_calls>
<invoke name="Read">
<parameter name="file_path"<![CDATA[/Users/aq/Desktop/myproject/Personal_Blog/README.md]]</parameter>
</invoke>
<invoke name="Read">
<parameter name="file_path"<![CDATA[/Users/aq/Desktop/myproject/Personal_Blog/index.html]]</parameter>
</invoke>
</tool_calls>`
text := `<|tool_calls>
<|invoke name="Read">
<|parameter name="file_path"<![CDATA[/Users/aq/Desktop/myproject/Personal_Blog/README.md]]</|parameter>
</|invoke>
<|invoke name="Read">
<|parameter name="file_path"<![CDATA[/Users/aq/Desktop/myproject/Personal_Blog/index.html]]</|parameter>
</|invoke>
</|tool_calls>`
calls := ParseToolCalls(text, []string{"Read"})
if len(calls) != 2 {
t.Fatalf("expected two fullwidth DSML calls, got %#v", calls)
@@ -152,20 +152,20 @@ func TestParseToolCallsSupportsFullwidthDSMLShell(t *testing.T) {
}
func TestParseToolCallsSupportsCJKAngleDSMDrift(t *testing.T) {
text := `<DSMtool_calls>
<DSMinvoke name="Bash">
<DSMparameter name="description">〈![CDATA[Show commits on local dev not on origin/dev]]〉〈/DSMparameter〉
<DSMparameter name="command">〈![CDATA[git log --oneline origin/dev..dev]]〉〈/DSMparameter〉
〈/DSMinvoke〉
<DSMinvoke name="Bash">
<DSMparameter name="description">〈![CDATA[Show commits on origin/dev not on local dev]]〉〈/DSMparameter〉
<DSMparameter name="command">〈![CDATA[git log --oneline dev..origin/dev]]〉〈/DSMparameter〉
〈/DSMinvoke〉
<DSMinvoke name="Bash">
<DSMparameter name="description">〈![CDATA[Check tracking branch status]]〉〈/DSMparameter〉
<DSMparameter name="command">〈![CDATA[git status -b --short]]〉〈/DSMparameter〉
〈/DSMinvoke〉
〈/DSMtool_calls〉`
text := `<DSM|tool_calls>
<DSM|invoke name="Bash">
<DSM|parameter name="description"|>〈![CDATA[Show commits on local dev not on origin/dev]]〉〈/DSM|parameter〉
<DSM|parameter name="command"|>〈![CDATA[git log --oneline origin/dev..dev]]〉〈/DSM|parameter〉
〈/DSM|invoke〉
<DSM|invoke name="Bash">
<DSM|parameter name="description"|>〈![CDATA[Show commits on origin/dev not on local dev]]〉〈/DSM|parameter〉
<DSM|parameter name="command"|>〈![CDATA[git log --oneline dev..origin/dev]]〉〈/DSM|parameter〉
〈/DSM|invoke〉
<DSM|invoke name="Bash">
<DSM|parameter name="description"|>〈![CDATA[Check tracking branch status]]〉〈/DSM|parameter〉
<DSM|parameter name="command"|>〈![CDATA[git status -b --short]]〉〈/DSM|parameter〉
〈/DSM|invoke〉
〈/DSM|tool_calls〉`
calls := ParseToolCalls(text, []string{"Bash"})
if len(calls) != 3 {
@@ -1203,7 +1203,7 @@ func TestFindMatchingToolMarkupCloseBoundaryConditions(t *testing.T) {
}
func TestParseToolCallsSupportsDSMLShellWithFullwidthClosingSlash(t *testing.T) {
text := `<DSMLtool_calls><DSMLinvoke name="execute_code"><DSMLparameter name="code"><![CDATA[print("hi")]]></DSMLparameter></DSMLinvoke><DSMLtool_calls>`
text := `<|DSML|tool_calls><|DSML|invoke name="execute_code"><|DSML|parameter name="code"><![CDATA[print("hi")]]></|DSML|parameter></|DSML|invoke><DSML|tool_calls>`
calls := ParseToolCalls(text, []string{"execute_code"})
if len(calls) != 1 {
t.Fatalf("expected 1 DSML call with fullwidth closing slash, got %#v", calls)
@@ -1214,7 +1214,7 @@ func TestParseToolCallsSupportsDSMLShellWithFullwidthClosingSlash(t *testing.T)
}
func TestParseToolCallsSupportsDSMLShellWithSentencePieceSeparatorAndFullwidthGT(t *testing.T) {
text := `<DSML▁tool_calls><DSML▁invoke▁name="execute_code"><DSML▁parameter▁name="code"><![CDATA[print("hi")]]></DSML▁parameter></DSML▁invoke></DSML▁tool_calls`
text := `<|DSML▁tool_calls|><|DSML▁invoke▁name="execute_code"><|DSML▁parameter▁name="code"><![CDATA[print("hi")]]></|DSML▁parameter></|DSML▁invoke></|DSML▁tool_calls`
calls := ParseToolCalls(text, []string{"execute_code"})
if len(calls) != 1 {
t.Fatalf("expected 1 DSML call with sentencepiece separator and fullwidth terminator, got %#v", calls)
@@ -1225,7 +1225,7 @@ func TestParseToolCallsSupportsDSMLShellWithSentencePieceSeparatorAndFullwidthGT
}
func TestParseToolCallsSupportsDSMLShellWithFullwidthLTUnicodeSpaceAndFullwidthAttributes(t *testing.T) {
text := `DSML tool_callsDSML invoke name“execute_code”DSML parameter name“code”<![CDATA[print("hi")]]>DSMLparameterDSMLinvokeDSMLtool_calls`
text := `|DSML tool_calls|DSML invoke name“execute_code”|DSML parameter name“code”<![CDATA[print("hi")]]>DSML|parameterDSML|invokeDSML|tool_calls`
calls := ParseToolCalls(text, []string{"execute_code"})
if len(calls) != 1 {
t.Fatalf("expected 1 DSML call with fullwidth opening delimiter and Unicode attribute confusables, got %#v", calls)

View File

@@ -316,11 +316,11 @@ func TestSieve_CharByCharToolCall(t *testing.T) {
func TestSieve_FullwidthPipeWrapperDSMLInvoke(t *testing.T) {
var state State
chunks := []string{
"<tool_calls>\n",
"<|tool_calls>\n",
"<|DSML|invoke name=\"read_file\">\n",
"<|DSML|parameter name=\"path\">README.md</|DSML|parameter>\n",
"</|DSML|invoke>\n",
"</tool_calls>",
"</|tool_calls>",
}
var events []Event
for _, c := range chunks {
@@ -382,7 +382,7 @@ func TestSieve_TagMentionInTextThenRealToolCall(t *testing.T) {
chunks := []string{
"建议的 commit message\n\nfeat: expand DSML alias support\n\n",
"Add support for <dsml|tool_calls>, ",
"<tool_calls> (fullwidth pipe),\n",
"<|tool_calls> (pipe alias),\n",
"and <|tool_calls> wrapper variants.\n\n",
"<|DSML|tool_calls>\n",
"<|DSML|invoke name=\"Bash\">\n",
@@ -466,14 +466,14 @@ func TestSieve_ReviewSampleWithAliasMentionsPreservesBodyAndToolCalls(t *testing
chunks := []string{
"Done reviewing the diff. Here's my analysis before we commit:\n\n",
"Summary of Changes\n",
"DSML wrapper variant support — recognize aliases (<dsml|tool_calls>, <|tool_calls>, <tool_calls>) alongside canonical <tool_calls> and <|DSML|tool_calls> wrappers.\n\n",
"DSML wrapper variant support — recognize aliases (<dsml|tool_calls>, <|tool_calls>) alongside canonical <tool_calls> and <|DSML|tool_calls> wrappers.\n\n",
"<|DSML|tool_calls>\n",
"<|DSML|invoke name=\"Bash\">\n",
"<|DSML|parameter name=\"command\"><![CDATA[git add docs/toolcall-semantics.md internal/toolstream/tool_sieve_xml.go]]></|DSML|parameter>\n",
"<|DSML|parameter name=\"description\"><![CDATA[Stage all relevant changed files]]></|DSML|parameter>\n",
"</|DSML|invoke>\n",
"<|DSML|invoke name=\"Bash\">\n",
"<|DSML|parameter name=\"command\"><![CDATA[git commit -m \"$(cat <<'EOF'\nfeat(toolstream): expand DSML wrapper detection\n\nSupport DSML wrapper aliases: <dsml|tool_calls>, <|tool_calls>, <tool_calls> alongside existing canonical wrappers.\nEOF\n)\"]]></|DSML|parameter>\n",
"<|DSML|parameter name=\"command\"><![CDATA[git commit -m \"$(cat <<'EOF'\nfeat(toolstream): expand DSML wrapper detection\n\nSupport DSML wrapper aliases: <dsml|tool_calls> and <|tool_calls> alongside existing canonical wrappers.\nEOF\n)\"]]></|DSML|parameter>\n",
"<|DSML|parameter name=\"description\"><![CDATA[Create commit with all staged changes]]></|DSML|parameter>\n",
"</|DSML|invoke>\n",
"</|DSML|tool_calls>",

View File

@@ -626,13 +626,13 @@ func TestProcessToolSieveEmitsAllEmptyDSMLToolBlock(t *testing.T) {
func TestProcessToolSieveEmitsChunkedAllEmptyArbitraryPrefixedToolBlock(t *testing.T) {
chunk := strings.Join([]string{
`<TDSMLtool_calls>`,
` <TDSMLinvoke name="TaskOutput">`,
` <TDSMLparameter name="task_id"></TDSMLparameter>`,
` <TDSMLparameter name="block"></TDSMLparameter>`,
` <TDSMLparameter name="timeout"></TDSMLparameter>`,
` </TDSMLinvoke>`,
` </TDSMLtool_calls>`,
`<T|DSML|tool_calls>`,
` <T|DSML|invoke name="TaskOutput">`,
` <T|DSML|parameter name="task_id"></T|DSML|parameter>`,
` <T|DSML|parameter name="block"></T|DSML|parameter>`,
` <T|DSML|parameter name="timeout"></T|DSML|parameter>`,
` </T|DSML|invoke>`,
` </T|DSML|tool_calls>`,
}, "\n")
calls := collectToolCallsForChunks(t, splitEveryNRBytes(chunk, 8), []string{"TaskOutput"})
if len(calls) != 1 {
@@ -811,8 +811,8 @@ func TestFindPartialXMLToolTagStart(t *testing.T) {
{"partial_tool_calls", "Hello <tool_ca", 6},
{"partial_dsml_trailing_pipe", "Hello <|DSML|tool_calls|", 6},
{"partial_dsml_extra_leading_less_than", "Hello <<|DSML|tool_calls", 6},
{"partial_arbitrary_prefix_before_dsml", "Hello <TDS", 6},
{"partial_arbitrary_prefix_after_dsml_pipe", "Hello <TDSML", 6},
{"partial_arbitrary_prefix_before_dsml", "Hello <T|DS", 6},
{"partial_arbitrary_prefix_after_dsml_pipe", "Hello <T|DSML|", 6},
{"partial_invoke", "Hello <inv", 6},
{"bare_tool_call_not_held", "Hello <tool_name", -1},
{"partial_lt_only", "Text <", 5},
@@ -1091,7 +1091,7 @@ func TestProcessToolSieveRepairsMissingOpeningWrapperWithoutLeakingInvokeText(t
}
}
// Test fullwidth pipe variant: <tool_calls> (U+FF5C) should be buffered and parsed.
// Test escaped U+FF5C pipe variant: <\uff5ctool_calls> should be buffered and parsed.
func TestProcessToolSieveFullwidthPipeVariantDoesNotLeak(t *testing.T) {
var state State
chunks := []string{
@@ -1115,19 +1115,19 @@ func TestProcessToolSieveFullwidthPipeVariantDoesNotLeak(t *testing.T) {
}
if strings.Contains(textContent, "invoke") || strings.Contains(textContent, "execute_command") {
t.Fatalf("fullwidth pipe variant leaked to text: %q", textContent)
t.Fatalf("escaped U+FF5C pipe variant leaked to text: %q", textContent)
}
if toolCalls != 1 {
t.Fatalf("expected one tool call from fullwidth pipe variant, got %d events=%#v", toolCalls, events)
t.Fatalf("expected one tool call from escaped U+FF5C pipe variant, got %d events=%#v", toolCalls, events)
}
}
// Test <DSML|tool_calls> with DSML invoke/parameter tags should buffer the
// Test <|DSML|tool_calls> with DSML invoke/parameter tags should buffer the
// wrapper instead of leaking it before the block is complete.
func TestProcessToolSieveFullwidthDSMLPrefixVariantDoesNotLeak(t *testing.T) {
var state State
chunks := []string{
"<DSML|tool",
"<|DSML|tool",
"_calls>\n",
"<|DSML|invoke name=\"Bash\">\n",
"<|DSML|parameter name=\"command\"><![CDATA[ls -la /Users/aq/Desktop/myproject/ds2api/]]></|DSML|parameter>\n",
@@ -1232,12 +1232,12 @@ func TestProcessToolSieveDSMLBarePrefixVariantDoesNotLeak(t *testing.T) {
func TestProcessToolSieveCJKAngleDSMDriftDoesNotLeak(t *testing.T) {
var state State
chunks := []string{
"<DSMtool_calls>\n",
"<DSMinvoke name=\"Bash\">\n",
"<DSMparameter name=\"description\">〈![CDATA[Check tracking branch status]]〉〈/DSMparameter〉\n",
"<DSMparameter name=\"command\">〈![CDATA[git status -b --short]]〉〈/DSMparameter〉\n",
"〈/DSMinvoke〉\n",
"〈/DSMtool_calls〉",
"<DSM|tool_calls>\n",
"<DSM|invoke name=\"Bash\">\n",
"<DSM|parameter name=\"description\"|>〈![CDATA[Check tracking branch status]]〉〈/DSM|parameter〉\n",
"<DSM|parameter name=\"command\"|>〈![CDATA[git status -b --short]]〉〈/DSM|parameter〉\n",
"〈/DSM|invoke〉\n",
"〈/DSM|tool_calls〉",
}
var events []Event
for _, c := range chunks {
@@ -1338,7 +1338,7 @@ func TestProcessToolSieveIdeographicCommaDSMLDriftDoesNotLeak(t *testing.T) {
func TestProcessToolSieveParsesFullwidthClosingSlashAndKeepsSuffixText(t *testing.T) {
var state State
chunk := `<DSMLtool_calls><DSMLinvoke name="execute_code"><DSMLparameter name="code"><![CDATA[print("hi")]]></DSMLparameter></DSMLinvoke><DSMLtool_calls> sao cụm này lại đc trả là 1 message`
chunk := `<|DSML|tool_calls><|DSML|invoke name="execute_code"><|DSML|parameter name="code"><![CDATA[print("hi")]]></|DSML|parameter></|DSML|invoke><DSML|tool_calls> sao cụm này lại đc trả là 1 message`
events := ProcessChunk(&state, chunk, []string{"execute_code"})
events = append(events, Flush(&state, []string{"execute_code"})...)
@@ -1365,7 +1365,7 @@ func TestProcessToolSieveParsesFullwidthClosingSlashAndKeepsSuffixText(t *testin
func TestProcessToolSieveParsesSentencePieceSeparatorAndFullwidthTerminator(t *testing.T) {
var state State
chunk := `<DSML▁tool_calls><DSML▁invoke▁name="execute_code"><DSML▁parameter▁name="code"><![CDATA[print("hi")]]></DSML▁parameter></DSML▁invoke></DSML▁tool_calls suffix`
chunk := `<|DSML▁tool_calls|><|DSML▁invoke▁name="execute_code"><|DSML▁parameter▁name="code"><![CDATA[print("hi")]]></|DSML▁parameter></|DSML▁invoke></|DSML▁tool_calls suffix`
events := ProcessChunk(&state, chunk, []string{"execute_code"})
events = append(events, Flush(&state, []string{"execute_code"})...)
@@ -1392,7 +1392,7 @@ func TestProcessToolSieveParsesSentencePieceSeparatorAndFullwidthTerminator(t *t
func TestProcessToolSieveParsesFullwidthOpeningDelimiterAndUnicodeAttributes(t *testing.T) {
var state State
chunk := `DSML tool_callsDSML invoke name“execute_code”DSML parameter name“code”<![CDATA[print("hi")]]>DSMLparameterDSMLinvokeDSMLtool_calls suffix`
chunk := `|DSML tool_calls|DSML invoke name“execute_code”|DSML parameter name“code”<![CDATA[print("hi")]]>DSML|parameterDSML|invokeDSML|tool_calls suffix`
events := ProcessChunk(&state, chunk, []string{"execute_code"})
events = append(events, Flush(&state, []string{"execute_code"})...)

View File

@@ -13,10 +13,10 @@ func TestMessagesPrepareBasic(t *testing.T) {
if got == "" {
t.Fatal("expected non-empty prompt")
}
if !strings.HasPrefix(got, "<begin▁of▁sentence><System>") {
if !strings.HasPrefix(got, "<|begin▁of▁sentence|><|System|>") {
t.Fatalf("expected output integrity guard at the start, got %q", got)
}
if !strings.Contains(got, "Hello") || !strings.HasSuffix(got, "<Assistant>") {
if !strings.Contains(got, "Hello") || !strings.HasSuffix(got, "<|Assistant|>") {
t.Fatalf("unexpected prompt: %q", got)
}
}
@@ -33,31 +33,31 @@ func TestMessagesPrepareRoles(t *testing.T) {
if !contains(got, "Output integrity guard") {
t.Fatalf("expected output integrity guard in %q", got)
}
if !contains(got, "You are helper") || !contains(got, "<User>Hi") {
if !contains(got, "You are helper") || !contains(got, "<|User|>Hi") {
t.Fatalf("expected system/user content in %q", got)
}
if !contains(got, "<begin▁of▁sentence>") {
if !contains(got, "<|begin▁of▁sentence|>") {
t.Fatalf("expected begin marker in %q", got)
}
if !contains(got, "<User>Hi<Assistant>Hello<end▁of▁sentence>") {
if !contains(got, "<|User|>Hi<|Assistant|>Hello<|end▁of▁sentence|>") {
t.Fatalf("expected user/assistant separation in %q", got)
}
if !contains(got, "<Assistant>Hello<end▁of▁sentence><Tool>Search results<end▁of▁toolresults>") {
if !contains(got, "<|Assistant|>Hello<|end▁of▁sentence|><|Tool|>Search results<|end▁of▁toolresults|>") {
t.Fatalf("expected assistant/tool separation in %q", got)
}
if !contains(got, "<Tool>Search results<end▁of▁toolresults><User>How are you") {
if !contains(got, "<|Tool|>Search results<|end▁of▁toolresults|><|User|>How are you") {
t.Fatalf("expected tool/user separation in %q", got)
}
if !contains(got, "<Assistant>") {
if !contains(got, "<|Assistant|>") {
t.Fatalf("expected assistant marker in %q", got)
}
if !contains(got, "<System>") {
if !contains(got, "<|System|>") {
t.Fatalf("expected system marker in %q", got)
}
if !contains(got, "<User>") {
if !contains(got, "<|User|>") {
t.Fatalf("expected user marker in %q", got)
}
if !contains(got, "<Tool>") {
if !contains(got, "<|Tool|>") {
t.Fatalf("expected tool marker in %q", got)
}
}

View File

@@ -162,20 +162,20 @@ func TestMessagesPrepareMergesConsecutiveSameRole(t *testing.T) {
{"role": "user", "content": "World"},
}
got := MessagesPrepare(messages)
if !strings.HasPrefix(got, "<begin▁of▁sentence>") {
if !strings.HasPrefix(got, "<|begin▁of▁sentence|>") {
t.Fatalf("expected user marker at the start, got %q", got)
}
if !strings.Contains(got, "Hello") || !strings.Contains(got, "World") {
t.Fatalf("expected both messages, got %q", got)
}
// Should be merged into a single user turn with one marker at the start.
count := strings.Count(got, "<User>")
count := strings.Count(got, "<|User|>")
if count != 1 {
t.Fatalf("expected one User marker for the merged pair, got %d occurrences", count)
}
// User messages no longer have end_of_sentence markers in the official format.
// The merged pair should have zero end_of_sentence markers (user turn only).
if count := strings.Count(got, "<end▁of▁sentence>"); count != 0 {
if count := strings.Count(got, "<|end▁of▁sentence|>"); count != 0 {
t.Fatalf("expected zero sentence terminators for user-only merge, got %d occurrences", count)
}
}
@@ -186,16 +186,16 @@ func TestMessagesPrepareAssistantMarkers(t *testing.T) {
{"role": "assistant", "content": "Hello!"},
}
got := MessagesPrepare(messages)
if !strings.Contains(got, "<Assistant>") {
if !strings.Contains(got, "<|Assistant|>") {
t.Fatalf("expected assistant marker, got %q", got)
}
if !strings.Contains(got, "<end▁of▁sentence>") {
if !strings.Contains(got, "<|end▁of▁sentence|>") {
t.Fatalf("expected end of sentence marker, got %q", got)
}
if strings.Count(got, "<end▁of▁sentence>") != 1 {
if strings.Count(got, "<|end▁of▁sentence|>") != 1 {
t.Fatalf("expected one end_of_sentence (assistant only), got %q", got)
}
if !strings.Contains(got, "<Assistant>Hello!<end▁of▁sentence>") {
if !strings.Contains(got, "<|Assistant|>Hello!<|end▁of▁sentence|>") {
t.Fatalf("expected assistant EOS suffix, got %q", got)
}
if strings.Contains(got, "<think>") || strings.Contains(got, "</think>") {