mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-11 03:37:40 +08:00
revert: replace fullwidth pipe | with halfwidth | in DSML tool markup
PR #460 introduced fullwidth pipe characters (|) in DSML tool call formatting to improve parsing robustness, but models exposed to these fullwidth pipes in system prompts exhibit significantly higher rates of tool output hallucinations. Reverting to halfwidth pipes (|) drastically reduces tokenizer/perplexity-driven hallucinations while retaining the existing confusable-hardening in the parser. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -93,7 +93,11 @@ func (d *claudeCurrentInputDS) GetPow(context.Context, *auth.RequestAuth, int) (
|
||||
|
||||
func (d *claudeCurrentInputDS) UploadFile(_ context.Context, _ *auth.RequestAuth, req dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) {
|
||||
d.uploads = append(d.uploads, req)
|
||||
return &dsclient.UploadFileResult{ID: "file-claude-history"}, nil
|
||||
id := "file-claude-history"
|
||||
if len(d.uploads) > 1 {
|
||||
id = "file-claude-tools"
|
||||
}
|
||||
return &dsclient.UploadFileResult{ID: id}, nil
|
||||
}
|
||||
|
||||
func (d *claudeCurrentInputDS) CallCompletion(_ context.Context, _ *auth.RequestAuth, payload map[string]any, _ string, _ int) (*http.Response, error) {
|
||||
@@ -156,3 +160,47 @@ func TestClaudeDirectAppliesCurrentInputFile(t *testing.T) {
|
||||
t.Fatalf("expected persisted message to match upstream continuation prompt, got %#v", full.Messages)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClaudeCurrentInputFileUploadsToolsSeparately(t *testing.T) {
|
||||
ds := &claudeCurrentInputDS{}
|
||||
h := &Handler{
|
||||
Store: mockClaudeConfig{aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}},
|
||||
Auth: claudeCurrentInputAuth{},
|
||||
DS: ds,
|
||||
}
|
||||
reqBody := `{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hello from claude"}],"tools":[{"name":"search","description":"Search docs","input_schema":{"type":"object"}}],"max_tokens":1024}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(reqBody))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
h.Messages(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
if len(ds.uploads) != 2 {
|
||||
t.Fatalf("expected history and tools uploads, got %d", len(ds.uploads))
|
||||
}
|
||||
if ds.uploads[0].Filename != "DS2API_HISTORY.txt" || ds.uploads[1].Filename != "DS2API_TOOLS.txt" {
|
||||
t.Fatalf("unexpected upload filenames: %#v", ds.uploads)
|
||||
}
|
||||
historyText := string(ds.uploads[0].Data)
|
||||
if strings.Contains(historyText, "You have access to these tools") || strings.Contains(historyText, "Description: Search docs") {
|
||||
t.Fatalf("history transcript should not embed tool descriptions, got %q", historyText)
|
||||
}
|
||||
toolsText := string(ds.uploads[1].Data)
|
||||
if !strings.Contains(toolsText, "# DS2API_TOOLS.txt") || !strings.Contains(toolsText, "Tool: search") || !strings.Contains(toolsText, "Description: Search docs") {
|
||||
t.Fatalf("expected tools transcript to include tool schema, got %q", toolsText)
|
||||
}
|
||||
refIDs, _ := ds.payload["ref_file_ids"].([]any)
|
||||
if len(refIDs) < 2 || refIDs[0] != "file-claude-history" || refIDs[1] != "file-claude-tools" {
|
||||
t.Fatalf("expected history and tools ref ids first, got %#v", ds.payload["ref_file_ids"])
|
||||
}
|
||||
prompt, _ := ds.payload["prompt"].(string)
|
||||
if !strings.Contains(prompt, "DS2API_TOOLS.txt") || !strings.Contains(prompt, "TOOL CALL FORMAT") {
|
||||
t.Fatalf("expected live prompt to reference tools file and retain format instructions, got %q", prompt)
|
||||
}
|
||||
if strings.Contains(prompt, "Description: Search docs") {
|
||||
t.Fatalf("live prompt should not inline tool descriptions, got %q", prompt)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -93,10 +93,10 @@ func TestNormalizeClaudeMessagesToolUseToAssistantToolCalls(t *testing.T) {
|
||||
t.Fatalf("expected call id preserved, got %#v", call)
|
||||
}
|
||||
content, _ := m["content"].(string)
|
||||
if !containsStr(content, "<|DSML|tool_calls>") || !containsStr(content, `<|DSML|invoke name="search_web">`) {
|
||||
if !containsStr(content, "<|DSML|tool_calls>") || !containsStr(content, `<|DSML|invoke name="search_web">`) {
|
||||
t.Fatalf("expected assistant content to include DSML tool call history, got %q", content)
|
||||
}
|
||||
if !containsStr(content, `<|DSML|parameter name="query"><![CDATA[latest]]></|DSML|parameter>`) {
|
||||
if !containsStr(content, `<|DSML|parameter name="query"><![CDATA[latest]]></|DSML|parameter>`) {
|
||||
t.Fatalf("expected assistant content to include serialized parameters, got %q", content)
|
||||
}
|
||||
}
|
||||
@@ -133,7 +133,7 @@ func TestNormalizeClaudeMessagesPreservesThinkingOnToolUseHistory(t *testing.T)
|
||||
if !containsStr(prompt, "[reasoning_content]\nneed live search before answering\n[/reasoning_content]") {
|
||||
t.Fatalf("expected thinking in prompt history, got %q", prompt)
|
||||
}
|
||||
if !containsStr(prompt, `<|DSML|invoke name="search_web">`) {
|
||||
if !containsStr(prompt, `<|DSML|invoke name="search_web">`) {
|
||||
t.Fatalf("expected tool call in prompt history, got %q", prompt)
|
||||
}
|
||||
}
|
||||
@@ -329,7 +329,7 @@ func TestBuildClaudeToolPromptSingleTool(t *testing.T) {
|
||||
if !containsStr(prompt, "Search the web") {
|
||||
t.Fatalf("expected description in prompt")
|
||||
}
|
||||
if !containsStr(prompt, "<|DSML|tool_calls>") {
|
||||
if !containsStr(prompt, "<|DSML|tool_calls>") {
|
||||
t.Fatalf("expected DSML tool_calls format in prompt")
|
||||
}
|
||||
if !containsStr(prompt, "TOOL CALL FORMAT") {
|
||||
|
||||
@@ -52,7 +52,7 @@ func normalizeClaudeRequest(store ConfigReader, req map[string]any) (claudeNorma
|
||||
RequestedModel: strings.TrimSpace(model),
|
||||
ResolvedModel: dsModel,
|
||||
ResponseModel: strings.TrimSpace(model),
|
||||
Messages: payload["messages"].([]any),
|
||||
Messages: normalizedMessages,
|
||||
PromptTokenText: finalPrompt,
|
||||
ToolsRaw: toolsRequested,
|
||||
FinalPrompt: finalPrompt,
|
||||
|
||||
@@ -89,7 +89,7 @@ func TestGeminiMessagesFromRequestPreservesThoughtOnFunctionCallHistory(t *testi
|
||||
if !strings.Contains(prompt, "[reasoning_content]\nneed current state before answering\n[/reasoning_content]") {
|
||||
t.Fatalf("expected thought in prompt history, got %q", prompt)
|
||||
}
|
||||
if !strings.Contains(prompt, `<|DSML|invoke name="search_web">`) {
|
||||
if !strings.Contains(prompt, `<|DSML|invoke name="search_web">`) {
|
||||
t.Fatalf("expected tool call in prompt history, got %q", prompt)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,7 +67,11 @@ func (m *testGeminiDS) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (st
|
||||
//nolint:unused // reserved test double for native Gemini DS-call path coverage.
|
||||
func (m *testGeminiDS) UploadFile(_ context.Context, _ *auth.RequestAuth, req dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) {
|
||||
m.uploadCalls = append(m.uploadCalls, req)
|
||||
return &dsclient.UploadFileResult{ID: "file-gemini-history"}, nil
|
||||
id := "file-gemini-history"
|
||||
if len(m.uploadCalls) > 1 {
|
||||
id = "file-gemini-tools"
|
||||
}
|
||||
return &dsclient.UploadFileResult{ID: id}, nil
|
||||
}
|
||||
|
||||
//nolint:unused // reserved test double for native Gemini DS-call path coverage.
|
||||
|
||||
@@ -2,6 +2,7 @@ package chat
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
@@ -148,8 +149,12 @@ func (m *inlineUploadDSStub) UploadFile(ctx context.Context, _ *auth.RequestAuth
|
||||
if m.uploadErr != nil {
|
||||
return nil, m.uploadErr
|
||||
}
|
||||
id := "file-inline-1"
|
||||
if len(m.uploadCalls) > 1 {
|
||||
id = "file-inline-" + fmt.Sprint(len(m.uploadCalls))
|
||||
}
|
||||
return &dsclient.UploadFileResult{
|
||||
ID: "file-inline-1",
|
||||
ID: id,
|
||||
Filename: req.Filename,
|
||||
Bytes: int64(len(req.Data)),
|
||||
Status: "uploaded",
|
||||
|
||||
@@ -141,6 +141,71 @@ func TestHandleVercelStreamPrepareAppliesCurrentInputFile(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleVercelStreamPrepareUsesHalfwidthDSMLToolPrompt(t *testing.T) {
|
||||
t.Setenv("VERCEL", "1")
|
||||
t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret")
|
||||
|
||||
h := &Handler{
|
||||
Store: mockOpenAIConfig{},
|
||||
Auth: streamStatusAuthStub{},
|
||||
DS: &inlineUploadDSStub{},
|
||||
}
|
||||
|
||||
reqBody, _ := json.Marshal(map[string]any{
|
||||
"model": "deepseek-v4-flash",
|
||||
"messages": []any{
|
||||
map[string]any{"role": "user", "content": "search docs"},
|
||||
},
|
||||
"tools": []any{
|
||||
map[string]any{
|
||||
"type": "function",
|
||||
"function": map[string]any{
|
||||
"name": "search",
|
||||
"description": "search docs",
|
||||
"parameters": map[string]any{
|
||||
"type": "object",
|
||||
"properties": map[string]any{
|
||||
"query": map[string]any{"type": "string"},
|
||||
},
|
||||
"required": []any{"query"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"stream": true,
|
||||
})
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions?__stream_prepare=1", strings.NewReader(string(reqBody)))
|
||||
req.Header.Set("Authorization", "Bearer direct-token")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("X-Ds2-Internal-Token", "stream-secret")
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
h.handleVercelStreamPrepare(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
var body map[string]any
|
||||
if err := json.NewDecoder(rec.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode failed: %v", err)
|
||||
}
|
||||
finalPrompt, _ := body["final_prompt"].(string)
|
||||
payload, _ := body["payload"].(map[string]any)
|
||||
payloadPrompt, _ := payload["prompt"].(string)
|
||||
for label, promptText := range map[string]string{"final_prompt": finalPrompt, "payload.prompt": payloadPrompt} {
|
||||
if !strings.Contains(promptText, "<|DSML|tool_calls>") || !strings.Contains(promptText, "Tag punctuation alphabet: ASCII < > / = \" plus the halfwidth pipe |.") {
|
||||
t.Fatalf("expected %s to contain halfwidth DSML tool instructions, got %q", label, promptText)
|
||||
}
|
||||
if strings.Contains(promptText, "\uff5c") || strings.Contains(promptText, "full"+"width vertical bar") {
|
||||
t.Fatalf("expected %s not to contain legacy pipe guidance, got %q", label, promptText)
|
||||
}
|
||||
}
|
||||
toolNames, _ := body["tool_names"].([]any)
|
||||
if len(toolNames) != 1 || toolNames[0] != "search" {
|
||||
t.Fatalf("expected prepared tool names to align with request tools, got %#v", body["tool_names"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleVercelStreamPrepareMapsCurrentInputFileManagedAuthFailureTo401(t *testing.T) {
|
||||
t.Setenv("VERCEL", "1")
|
||||
t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret")
|
||||
|
||||
@@ -103,7 +103,7 @@ func TestNormalizeOpenAIResponsesRequestAlwaysAcceptsWideInput(t *testing.T) {
|
||||
if out.Surface != "openai_responses" {
|
||||
t.Fatalf("unexpected surface: %q", out.Surface)
|
||||
}
|
||||
if !strings.Contains(out.FinalPrompt, "<|User|>hi") {
|
||||
if !strings.Contains(out.FinalPrompt, "<|User|>hi") {
|
||||
t.Fatalf("unexpected final prompt: %q", out.FinalPrompt)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
@@ -41,8 +42,12 @@ func (m *inlineUploadDSStub) UploadFile(ctx context.Context, _ *auth.RequestAuth
|
||||
if m.uploadErr != nil {
|
||||
return nil, m.uploadErr
|
||||
}
|
||||
id := "file-inline-1"
|
||||
if len(m.uploadCalls) > 1 {
|
||||
id = "file-inline-" + fmt.Sprint(len(m.uploadCalls))
|
||||
}
|
||||
return &dsclient.UploadFileResult{
|
||||
ID: "file-inline-1",
|
||||
ID: id,
|
||||
Filename: req.Filename,
|
||||
Bytes: int64(len(req.Data)),
|
||||
Status: "uploaded",
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
|
||||
const (
|
||||
currentInputFilename = promptcompat.CurrentInputContextFilename
|
||||
currentToolsFilename = promptcompat.CurrentToolsContextFilename
|
||||
currentInputContentType = "text/plain; charset=utf-8"
|
||||
currentInputPurpose = "assistants"
|
||||
)
|
||||
@@ -50,6 +51,7 @@ func (s Service) ApplyCurrentInputFile(ctx context.Context, a *auth.RequestAuth,
|
||||
if strings.TrimSpace(fileText) == "" {
|
||||
return stdReq, errors.New("current user input file produced empty transcript")
|
||||
}
|
||||
toolsText, _ := promptcompat.BuildOpenAIToolsContextTranscript(stdReq.ToolsRaw, stdReq.ToolChoice)
|
||||
modelType := "default"
|
||||
if resolvedType, ok := config.GetModelType(stdReq.ResolvedModel); ok {
|
||||
modelType = resolvedType
|
||||
@@ -69,21 +71,44 @@ func (s Service) ApplyCurrentInputFile(ctx context.Context, a *auth.RequestAuth,
|
||||
return stdReq, errors.New("upload current user input file returned empty file id")
|
||||
}
|
||||
|
||||
toolFileID := ""
|
||||
if strings.TrimSpace(toolsText) != "" {
|
||||
result, err := s.DS.UploadFile(ctx, a, dsclient.UploadFileRequest{
|
||||
Filename: currentToolsFilename,
|
||||
ContentType: currentInputContentType,
|
||||
Purpose: currentInputPurpose,
|
||||
ModelType: modelType,
|
||||
Data: []byte(toolsText),
|
||||
}, 3)
|
||||
if err != nil {
|
||||
return stdReq, fmt.Errorf("upload current tools file: %w", err)
|
||||
}
|
||||
toolFileID = strings.TrimSpace(result.ID)
|
||||
if toolFileID == "" {
|
||||
return stdReq, errors.New("upload current tools file returned empty file id")
|
||||
}
|
||||
}
|
||||
|
||||
messages := []any{
|
||||
map[string]any{
|
||||
"role": "user",
|
||||
"content": currentInputFilePrompt(),
|
||||
"content": currentInputFilePrompt(toolFileID != ""),
|
||||
},
|
||||
}
|
||||
|
||||
stdReq.Messages = messages
|
||||
stdReq.HistoryText = fileText
|
||||
stdReq.CurrentInputFileApplied = true
|
||||
stdReq.RefFileIDs = prependUniqueRefFileID(stdReq.RefFileIDs, fileID)
|
||||
stdReq.FinalPrompt, stdReq.ToolNames = promptcompat.BuildOpenAIPrompt(messages, stdReq.ToolsRaw, "", stdReq.ToolChoice, stdReq.Thinking)
|
||||
stdReq.RefFileIDs = prependUniqueRefFileIDs(stdReq.RefFileIDs, fileID, toolFileID)
|
||||
stdReq.FinalPrompt, stdReq.ToolNames = promptcompat.BuildOpenAIPromptWithToolInstructionsOnly(messages, stdReq.ToolsRaw, "", stdReq.ToolChoice, stdReq.Thinking)
|
||||
// Token accounting must reflect the actual downstream context:
|
||||
// the uploaded DS2API_HISTORY.txt file content + the continuation live prompt.
|
||||
stdReq.PromptTokenText = fileText + "\n" + stdReq.FinalPrompt
|
||||
// uploaded context files + the continuation live prompt.
|
||||
tokenParts := []string{fileText}
|
||||
if strings.TrimSpace(toolsText) != "" {
|
||||
tokenParts = append(tokenParts, toolsText)
|
||||
}
|
||||
tokenParts = append(tokenParts, stdReq.FinalPrompt)
|
||||
stdReq.PromptTokenText = strings.Join(tokenParts, "\n")
|
||||
return stdReq, nil
|
||||
}
|
||||
|
||||
@@ -106,23 +131,40 @@ func latestUserInputForFile(messages []any) (int, string) {
|
||||
return -1, ""
|
||||
}
|
||||
|
||||
func currentInputFilePrompt() string {
|
||||
return "Continue from the latest state in the attached DS2API_HISTORY.txt context. Treat it as the current working state and answer the latest user request directly."
|
||||
func currentInputFilePrompt(hasToolsFile bool) string {
|
||||
prompt := "Continue from the latest state in the attached DS2API_HISTORY.txt context. Treat it as the current working state and answer the latest user request directly."
|
||||
if hasToolsFile {
|
||||
prompt += " Available tool descriptions and parameter schemas are attached in DS2API_TOOLS.txt; use only those tools and follow the tool-call format rules in this prompt."
|
||||
}
|
||||
return prompt
|
||||
}
|
||||
|
||||
func prependUniqueRefFileID(existing []string, fileID string) []string {
|
||||
fileID = strings.TrimSpace(fileID)
|
||||
if fileID == "" {
|
||||
return existing
|
||||
}
|
||||
out := make([]string, 0, len(existing)+1)
|
||||
out = append(out, fileID)
|
||||
for _, id := range existing {
|
||||
trimmed := strings.TrimSpace(id)
|
||||
if trimmed == "" || strings.EqualFold(trimmed, fileID) {
|
||||
func prependUniqueRefFileIDs(existing []string, fileIDs ...string) []string {
|
||||
out := make([]string, 0, len(existing)+len(fileIDs))
|
||||
seen := map[string]struct{}{}
|
||||
for _, fileID := range fileIDs {
|
||||
trimmed := strings.TrimSpace(fileID)
|
||||
if trimmed == "" {
|
||||
continue
|
||||
}
|
||||
key := strings.ToLower(trimmed)
|
||||
if _, ok := seen[key]; ok {
|
||||
continue
|
||||
}
|
||||
out = append(out, trimmed)
|
||||
seen[key] = struct{}{}
|
||||
}
|
||||
for _, id := range existing {
|
||||
trimmed := strings.TrimSpace(id)
|
||||
if trimmed == "" {
|
||||
continue
|
||||
}
|
||||
key := strings.ToLower(trimmed)
|
||||
if _, ok := seen[key]; ok {
|
||||
continue
|
||||
}
|
||||
out = append(out, trimmed)
|
||||
seen[key] = struct{}{}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
@@ -84,7 +84,7 @@ func TestBuildOpenAICurrentInputContextTranscriptUsesNumberedHistorySections(t *
|
||||
"latest user turn",
|
||||
"[reasoning_content]",
|
||||
"hidden reasoning",
|
||||
"<|DSML|tool_calls>",
|
||||
"<|DSML|tool_calls>",
|
||||
} {
|
||||
if !strings.Contains(transcript, want) {
|
||||
t.Fatalf("expected transcript to contain %q, got %q", want, transcript)
|
||||
@@ -380,6 +380,79 @@ func TestApplyCurrentInputFileUploadsFullContextFile(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyCurrentInputFileUploadsToolsContextSeparately(t *testing.T) {
|
||||
ds := &inlineUploadDSStub{}
|
||||
h := &openAITestSurface{
|
||||
Store: mockOpenAIConfig{
|
||||
currentInputEnabled: true,
|
||||
currentInputMin: 0,
|
||||
},
|
||||
DS: ds,
|
||||
}
|
||||
req := map[string]any{
|
||||
"model": "deepseek-v4-flash",
|
||||
"messages": historySplitTestMessages(),
|
||||
"tools": []any{
|
||||
map[string]any{
|
||||
"type": "function",
|
||||
"function": map[string]any{
|
||||
"name": "search",
|
||||
"description": "search docs",
|
||||
"parameters": map[string]any{
|
||||
"type": "object",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
stdReq, err := promptcompat.NormalizeOpenAIChatRequest(h.Store, req, "")
|
||||
if err != nil {
|
||||
t.Fatalf("normalize failed: %v", err)
|
||||
}
|
||||
|
||||
out, err := h.applyCurrentInputFile(context.Background(), &auth.RequestAuth{DeepSeekToken: "token"}, stdReq)
|
||||
if err != nil {
|
||||
t.Fatalf("apply current input file failed: %v", err)
|
||||
}
|
||||
if len(ds.uploadCalls) != 2 {
|
||||
t.Fatalf("expected history and tools uploads, got %d", len(ds.uploadCalls))
|
||||
}
|
||||
if ds.uploadCalls[0].Filename != "DS2API_HISTORY.txt" {
|
||||
t.Fatalf("expected first upload to be DS2API_HISTORY.txt, got %q", ds.uploadCalls[0].Filename)
|
||||
}
|
||||
if ds.uploadCalls[1].Filename != "DS2API_TOOLS.txt" {
|
||||
t.Fatalf("expected second upload to be DS2API_TOOLS.txt, got %q", ds.uploadCalls[1].Filename)
|
||||
}
|
||||
historyText := string(ds.uploadCalls[0].Data)
|
||||
if strings.Contains(historyText, "You have access to these tools") || strings.Contains(historyText, "Description: search docs") {
|
||||
t.Fatalf("history transcript should not embed tool descriptions, got %q", historyText)
|
||||
}
|
||||
toolsText := string(ds.uploadCalls[1].Data)
|
||||
for _, want := range []string{"# DS2API_TOOLS.txt", "Tool: search", "Description: search docs", `Parameters: {"type":"object"}`} {
|
||||
if !strings.Contains(toolsText, want) {
|
||||
t.Fatalf("expected tools transcript to contain %q, got %q", want, toolsText)
|
||||
}
|
||||
}
|
||||
if strings.Contains(toolsText, "TOOL CALL FORMAT") {
|
||||
t.Fatalf("tools transcript should not duplicate tool format instructions, got %q", toolsText)
|
||||
}
|
||||
if !strings.Contains(out.FinalPrompt, "Continue from the latest state in the attached DS2API_HISTORY.txt context.") || !strings.Contains(out.FinalPrompt, "DS2API_TOOLS.txt") {
|
||||
t.Fatalf("expected live prompt to reference both context files, got %q", out.FinalPrompt)
|
||||
}
|
||||
if !strings.Contains(out.FinalPrompt, "TOOL CALL FORMAT") || !strings.Contains(out.FinalPrompt, "Remember: The ONLY valid way to use tools") {
|
||||
t.Fatalf("expected live prompt to retain tool format instructions, got %q", out.FinalPrompt)
|
||||
}
|
||||
if strings.Contains(out.FinalPrompt, "You have access to these tools") || strings.Contains(out.FinalPrompt, "Description: search docs") || strings.Contains(out.FinalPrompt, "Parameters:") {
|
||||
t.Fatalf("expected live prompt to omit tool descriptions after tools upload, got %q", out.FinalPrompt)
|
||||
}
|
||||
if len(out.RefFileIDs) < 2 || out.RefFileIDs[0] != "file-inline-1" || out.RefFileIDs[1] != "file-inline-2" {
|
||||
t.Fatalf("expected history and tools file ids first, got %#v", out.RefFileIDs)
|
||||
}
|
||||
if !strings.Contains(out.PromptTokenText, "# DS2API_HISTORY.txt") || !strings.Contains(out.PromptTokenText, "# DS2API_TOOLS.txt") || !strings.Contains(out.PromptTokenText, "Description: search docs") {
|
||||
t.Fatalf("expected prompt token text to include uploaded history and tools content, got %q", out.PromptTokenText)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyCurrentInputFileCarriesHistoryText(t *testing.T) {
|
||||
ds := &inlineUploadDSStub{}
|
||||
h := &openAITestSurface{
|
||||
|
||||
@@ -19,7 +19,7 @@ func TestSanitizeLeakedOutputRemovesLeakedWireToolCallAndResult(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestSanitizeLeakedOutputRemovesStandaloneMetaMarkers(t *testing.T) {
|
||||
raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<|end▁of▁thinking|>D<|end▁of▁sentence|>E<| end_of_toolresults |>F<|end▁of▁instructions|>G"
|
||||
raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<|end▁of▁thinking|>D<|end▁of▁sentence|>E<| end_of_toolresults |>F<|end▁of▁instructions|>G"
|
||||
got := sanitizeLeakedOutput(raw)
|
||||
if got != "ABCDEFG" {
|
||||
t.Fatalf("unexpected sanitize result for meta markers: %q", got)
|
||||
@@ -27,7 +27,7 @@ func TestSanitizeLeakedOutputRemovesStandaloneMetaMarkers(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestSanitizeLeakedOutputRemovesThinkAndBosMarkers(t *testing.T) {
|
||||
raw := "A<think>B</think>C<|begin▁of▁sentence|>D<| begin_of_sentence |>E<|begin_of_sentence|>F"
|
||||
raw := "A<think>B</think>C<|begin▁of▁sentence|>D<| begin_of_sentence |>E<|begin_of_sentence|>F"
|
||||
got := sanitizeLeakedOutput(raw)
|
||||
if got != "ABCDEF" {
|
||||
t.Fatalf("unexpected sanitize result for think/BOS markers: %q", got)
|
||||
@@ -35,7 +35,7 @@ func TestSanitizeLeakedOutputRemovesThinkAndBosMarkers(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestSanitizeLeakedOutputRemovesThoughtMarkers(t *testing.T) {
|
||||
raw := "A<|▁of▁thought|>B<| of_thought |>C<| begin_of_thought |>D<| end_of_thought |>E"
|
||||
raw := "A<|▁of▁thought|>B<| of_thought |>C<| begin_of_thought |>D<| end_of_thought |>E"
|
||||
got := sanitizeLeakedOutput(raw)
|
||||
if got != "ABCDE" {
|
||||
t.Fatalf("unexpected sanitize result for leaked thought markers: %q", got)
|
||||
@@ -51,7 +51,7 @@ func TestSanitizeLeakedOutputRemovesDanglingThinkBlock(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestSanitizeLeakedOutputRemovesCompleteDSMLToolCallWrapper(t *testing.T) {
|
||||
raw := "前置文本\n<|DSML|tool_calls>\n<|DSML|invoke name=\"Bash\">\n<|DSML|parameter name=\"command\"></|DSML|parameter>\n</|DSML|invoke>\n</|DSML|tool_calls>\n后置文本"
|
||||
raw := "前置文本\n<|DSML|tool_calls>\n<|DSML|invoke name=\"Bash\">\n<|DSML|parameter name=\"command\"></|DSML|parameter>\n</|DSML|invoke>\n</|DSML|tool_calls>\n后置文本"
|
||||
got := sanitizeLeakedOutput(raw)
|
||||
if got != "前置文本\n\n后置文本" {
|
||||
t.Fatalf("unexpected sanitize result for leaked dsml wrapper: %q", got)
|
||||
|
||||
@@ -14,20 +14,20 @@ var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*
|
||||
var leakedThinkTagPattern = regexp.MustCompile(`(?is)</?\s*think\s*>`)
|
||||
|
||||
// leakedBOSMarkerPattern matches DeepSeek BOS markers in BOTH forms:
|
||||
// - ASCII underscore: <|begin_of_sentence|>
|
||||
// - U+2581 variant: <|begin▁of▁sentence|>
|
||||
var leakedBOSMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*begin[_▁]of[_▁]sentence\s*[|\|]>`)
|
||||
// - ASCII underscore: <|begin_of_sentence|>
|
||||
// - U+2581 variant: <|begin▁of▁sentence|>
|
||||
var leakedBOSMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*begin[_▁]of[_▁]sentence\s*[|\|]>`)
|
||||
|
||||
// leakedThoughtMarkerPattern matches leaked thought control markers in both
|
||||
// explicit and compact forms:
|
||||
// - ASCII underscore: <| of_thought |>, <| begin_of_thought |>
|
||||
// - U+2581 variant: <|▁of▁thought|>, <|begin▁of▁thought|>
|
||||
var leakedThoughtMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[|\|]>`)
|
||||
// - U+2581 variant: <|▁of▁thought|>, <|begin▁of▁thought|>
|
||||
var leakedThoughtMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[|\|]>`)
|
||||
|
||||
// leakedMetaMarkerPattern matches the remaining DeepSeek special tokens in BOTH forms:
|
||||
// - ASCII underscore: <|end_of_sentence|>, <|end_of_toolresults|>, <|end_of_instructions|>
|
||||
// - U+2581 variant: <|end▁of▁sentence|>, <|end▁of▁toolresults|>, <|end▁of▁instructions|>
|
||||
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[|\|]>`)
|
||||
// - ASCII underscore: <|end_of_sentence|>, <|end_of_toolresults|>, <|end_of_instructions|>
|
||||
// - U+2581 variant: <|end▁of▁sentence|>, <|end▁of▁toolresults|>, <|end▁of▁instructions|>
|
||||
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[|\|]>`)
|
||||
|
||||
// leakedAgentXMLBlockPatterns catch agent-style XML blocks that leak through
|
||||
// when the sieve fails to capture them. These are applied only to complete
|
||||
|
||||
@@ -7,9 +7,9 @@ const {
|
||||
SKIP_EXACT_PATHS,
|
||||
} = require('../shared/deepseek-constants');
|
||||
|
||||
const LEAKED_BOS_MARKER_PATTERN = /<[||]\s*begin[_▁]of[_▁]sentence\s*[||]>/gi;
|
||||
const LEAKED_THOUGHT_MARKER_PATTERN = /<[||]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[||]>/gi;
|
||||
const LEAKED_META_MARKER_PATTERN = /<[||]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[||]>/gi;
|
||||
const LEAKED_BOS_MARKER_PATTERN = /<[||]\s*begin[_▁]of[_▁]sentence\s*[||]>/gi;
|
||||
const LEAKED_THOUGHT_MARKER_PATTERN = /<[||]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[||]>/gi;
|
||||
const LEAKED_META_MARKER_PATTERN = /<[||]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[||]>/gi;
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1356,7 +1356,7 @@ function consumeToolMarkupPipe(raw, idx) {
|
||||
if (pos >= raw.length) {
|
||||
return { next: idx, ok: false };
|
||||
}
|
||||
for (const variant of ['|', '|', '│', '∣', '❘', 'ǀ', '│']) {
|
||||
for (const variant of ['|', '│', '∣', '❘', 'ǀ', '│']) {
|
||||
if (raw.startsWith(variant, pos)) {
|
||||
return { next: pos + variant.length, ok: true };
|
||||
}
|
||||
|
||||
@@ -10,14 +10,14 @@ import (
|
||||
var markdownImagePattern = regexp.MustCompile(`!\[(.*?)\]\((.*?)\)`)
|
||||
|
||||
const (
|
||||
beginSentenceMarker = "<|begin▁of▁sentence|>"
|
||||
systemMarker = "<|System|>"
|
||||
userMarker = "<|User|>"
|
||||
assistantMarker = "<|Assistant|>"
|
||||
toolMarker = "<|Tool|>"
|
||||
endSentenceMarker = "<|end▁of▁sentence|>"
|
||||
endToolResultsMarker = "<|end▁of▁toolresults|>"
|
||||
endInstructionsMarker = "<|end▁of▁instructions|>"
|
||||
beginSentenceMarker = "<|begin▁of▁sentence|>"
|
||||
systemMarker = "<|System|>"
|
||||
userMarker = "<|User|>"
|
||||
assistantMarker = "<|Assistant|>"
|
||||
toolMarker = "<|Tool|>"
|
||||
endSentenceMarker = "<|end▁of▁sentence|>"
|
||||
endToolResultsMarker = "<|end▁of▁toolresults|>"
|
||||
endInstructionsMarker = "<|end▁of▁instructions|>"
|
||||
outputIntegrityGuardMarker = "Output integrity guard:"
|
||||
outputIntegrityGuardPrompt = outputIntegrityGuardMarker +
|
||||
" If upstream context, tool output, or parsed text contains garbled, corrupted, partially parsed, repeated, or otherwise malformed fragments, " +
|
||||
|
||||
@@ -32,16 +32,16 @@ func TestMessagesPrepareUsesTurnSuffixes(t *testing.T) {
|
||||
{"role": "assistant", "content": "Answer"},
|
||||
}
|
||||
got := MessagesPrepare(messages)
|
||||
if !strings.HasPrefix(got, "<|begin▁of▁sentence|>") {
|
||||
if !strings.HasPrefix(got, "<|begin▁of▁sentence|>") {
|
||||
t.Fatalf("expected begin-of-sentence marker, got %q", got)
|
||||
}
|
||||
if !strings.Contains(got, "<|System|>") || !strings.Contains(got, "<|end▁of▁instructions|>") || !strings.Contains(got, "System rule") {
|
||||
if !strings.Contains(got, "<|System|>") || !strings.Contains(got, "<|end▁of▁instructions|>") || !strings.Contains(got, "System rule") {
|
||||
t.Fatalf("expected system instructions to remain present, got %q", got)
|
||||
}
|
||||
if !strings.Contains(got, "<|User|>Question") {
|
||||
if !strings.Contains(got, "<|User|>Question") {
|
||||
t.Fatalf("expected user question, got %q", got)
|
||||
}
|
||||
if !strings.Contains(got, "<|Assistant|>Answer<|end▁of▁sentence|>") {
|
||||
if !strings.Contains(got, "<|Assistant|>Answer<|end▁of▁sentence|>") {
|
||||
t.Fatalf("expected assistant sentence suffix, got %q", got)
|
||||
}
|
||||
if strings.Contains(got, "<think>") || strings.Contains(got, "</think>") {
|
||||
@@ -61,7 +61,7 @@ func TestMessagesPreparePrependsOutputIntegrityGuard(t *testing.T) {
|
||||
if !strings.Contains(got, outputIntegrityGuardPrompt+"\n\nSystem rule") {
|
||||
t.Fatalf("expected output integrity guard to precede system prompt content, got %q", got)
|
||||
}
|
||||
if !strings.Contains(got, "<|User|>Question") {
|
||||
if !strings.Contains(got, "<|User|>Question") {
|
||||
t.Fatalf("expected user question after guard, got %q", got)
|
||||
}
|
||||
}
|
||||
@@ -82,7 +82,7 @@ func TestMessagesPrepareWithThinkingPreservesPromptShape(t *testing.T) {
|
||||
if gotThinking != gotPlain {
|
||||
t.Fatalf("expected thinking flag not to add extra continuity instructions, got thinking=%q plain=%q", gotThinking, gotPlain)
|
||||
}
|
||||
if !strings.HasSuffix(gotThinking, "<|Assistant|>") {
|
||||
if !strings.HasSuffix(gotThinking, "<|Assistant|>") {
|
||||
t.Fatalf("expected assistant suffix, got %q", gotThinking)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,12 +17,12 @@ var promptXMLTextEscaper = strings.NewReplacer(
|
||||
var promptXMLNamePattern = regexp.MustCompile(`^[A-Za-z_][A-Za-z0-9_.:-]*$`)
|
||||
|
||||
const (
|
||||
promptDSMLToolCallsOpen = "<|DSML|tool_calls>"
|
||||
promptDSMLToolCallsClose = "</|DSML|tool_calls>"
|
||||
promptDSMLInvokeOpen = "<|DSML|invoke"
|
||||
promptDSMLInvokeClose = "</|DSML|invoke>"
|
||||
promptDSMLParameterOpen = "<|DSML|parameter"
|
||||
promptDSMLParameterClose = "</|DSML|parameter>"
|
||||
promptDSMLToolCallsOpen = "<|DSML|tool_calls>"
|
||||
promptDSMLToolCallsClose = "</|DSML|tool_calls>"
|
||||
promptDSMLInvokeOpen = "<|DSML|invoke"
|
||||
promptDSMLInvokeClose = "</|DSML|invoke>"
|
||||
promptDSMLParameterOpen = "<|DSML|parameter"
|
||||
promptDSMLParameterClose = "</|DSML|parameter>"
|
||||
)
|
||||
|
||||
// FormatToolCallsForPrompt renders a tool_calls slice into the prompt-visible
|
||||
|
||||
@@ -22,7 +22,7 @@ func TestFormatToolCallsForPromptDSML(t *testing.T) {
|
||||
if got == "" {
|
||||
t.Fatal("expected non-empty formatted tool calls")
|
||||
}
|
||||
if got != "<|DSML|tool_calls>\n <|DSML|invoke name=\"search_web\">\n <|DSML|parameter name=\"query\"><![CDATA[latest]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>" {
|
||||
if got != "<|DSML|tool_calls>\n <|DSML|invoke name=\"search_web\">\n <|DSML|parameter name=\"query\"><![CDATA[latest]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>" {
|
||||
t.Fatalf("unexpected formatted tool call DSML: %q", got)
|
||||
}
|
||||
}
|
||||
@@ -34,7 +34,7 @@ func TestFormatToolCallsForPromptEscapesXMLEntities(t *testing.T) {
|
||||
"arguments": `{"q":"a < b && c > d"}`,
|
||||
},
|
||||
})
|
||||
want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"search<&>\">\n <|DSML|parameter name=\"q\"><![CDATA[a < b && c > d]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>"
|
||||
want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"search<&>\">\n <|DSML|parameter name=\"q\"><![CDATA[a < b && c > d]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>"
|
||||
if got != want {
|
||||
t.Fatalf("unexpected escaped tool call XML: %q", got)
|
||||
}
|
||||
@@ -50,7 +50,7 @@ func TestFormatToolCallsForPromptUsesCDATAForMultilineContent(t *testing.T) {
|
||||
},
|
||||
},
|
||||
})
|
||||
want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"write_file\">\n <|DSML|parameter name=\"content\"><![CDATA[#!/bin/bash\nprintf \"hello\"\n]]></|DSML|parameter>\n <|DSML|parameter name=\"path\"><![CDATA[script.sh]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>"
|
||||
want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"write_file\">\n <|DSML|parameter name=\"content\"><![CDATA[#!/bin/bash\nprintf \"hello\"\n]]></|DSML|parameter>\n <|DSML|parameter name=\"path\"><![CDATA[script.sh]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>"
|
||||
if got != want {
|
||||
t.Fatalf("unexpected multiline cdata tool call XML: %q", got)
|
||||
}
|
||||
|
||||
@@ -38,10 +38,10 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsAndToolResult(t *tes
|
||||
t.Fatalf("expected 4 normalized messages with assistant tool history preserved, got %d", len(normalized))
|
||||
}
|
||||
assistantContent, _ := normalized[2]["content"].(string)
|
||||
if !strings.Contains(assistantContent, "<|DSML|tool_calls>") {
|
||||
if !strings.Contains(assistantContent, "<|DSML|tool_calls>") {
|
||||
t.Fatalf("assistant tool history should be preserved in DSML form, got %q", assistantContent)
|
||||
}
|
||||
if !strings.Contains(assistantContent, `<|DSML|invoke name="get_weather">`) {
|
||||
if !strings.Contains(assistantContent, `<|DSML|invoke name="get_weather">`) {
|
||||
t.Fatalf("expected tool name in preserved history, got %q", assistantContent)
|
||||
}
|
||||
if !strings.Contains(normalized[3]["content"].(string), `"temp":18`) {
|
||||
@@ -49,7 +49,7 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsAndToolResult(t *tes
|
||||
}
|
||||
|
||||
prompt := util.MessagesPrepare(normalized)
|
||||
if !strings.Contains(prompt, "<|DSML|tool_calls>") {
|
||||
if !strings.Contains(prompt, "<|DSML|tool_calls>") {
|
||||
t.Fatalf("expected preserved assistant tool history in prompt: %q", prompt)
|
||||
}
|
||||
}
|
||||
@@ -177,10 +177,10 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantMultipleToolCallsRemainSepara
|
||||
t.Fatalf("expected assistant tool_call-only message preserved, got %#v", normalized)
|
||||
}
|
||||
content, _ := normalized[0]["content"].(string)
|
||||
if strings.Count(content, "<|DSML|invoke name=") != 2 {
|
||||
if strings.Count(content, "<|DSML|invoke name=") != 2 {
|
||||
t.Fatalf("expected two preserved tool call blocks, got %q", content)
|
||||
}
|
||||
if !strings.Contains(content, `<|DSML|invoke name="search_web">`) || !strings.Contains(content, `<|DSML|invoke name="eval_javascript">`) {
|
||||
if !strings.Contains(content, `<|DSML|invoke name="search_web">`) || !strings.Contains(content, `<|DSML|invoke name="eval_javascript">`) {
|
||||
t.Fatalf("expected both tool names in preserved history, got %q", content)
|
||||
}
|
||||
}
|
||||
@@ -258,7 +258,7 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantNilContentDoesNotInjectNullLi
|
||||
if strings.Contains(content, "null") {
|
||||
t.Fatalf("expected no null literal injection, got %q", content)
|
||||
}
|
||||
if !strings.Contains(content, "<|DSML|tool_calls>") {
|
||||
if !strings.Contains(content, "<|DSML|tool_calls>") {
|
||||
t.Fatalf("expected assistant tool history in normalized content, got %q", content)
|
||||
}
|
||||
}
|
||||
@@ -282,11 +282,11 @@ func TestNormalizeOpenAIMessagesForPrompt_CanonicalizesStandaloneAssistantToolMa
|
||||
}
|
||||
content, _ := normalized[0]["content"].(string)
|
||||
for _, want := range []string{
|
||||
"<|DSML|tool_calls>",
|
||||
`<|DSML|invoke name="Bash">`,
|
||||
`<|DSML|parameter name="command"><![CDATA[lsof -i :4321 -t]]></|DSML|parameter>`,
|
||||
`<|DSML|parameter name="description"><![CDATA[Verify port 4321 is free]]></|DSML|parameter>`,
|
||||
"</|DSML|tool_calls>",
|
||||
"<|DSML|tool_calls>",
|
||||
`<|DSML|invoke name="Bash">`,
|
||||
`<|DSML|parameter name="command"><![CDATA[lsof -i :4321 -t]]></|DSML|parameter>`,
|
||||
`<|DSML|parameter name="description"><![CDATA[Verify port 4321 is free]]></|DSML|parameter>`,
|
||||
"</|DSML|tool_calls>",
|
||||
} {
|
||||
if !strings.Contains(content, want) {
|
||||
t.Fatalf("expected canonicalized assistant tool markup to contain %q, got %q", want, content)
|
||||
|
||||
@@ -9,10 +9,22 @@ func buildOpenAIFinalPrompt(messagesRaw []any, toolsRaw any, traceID string, thi
|
||||
}
|
||||
|
||||
func BuildOpenAIPrompt(messagesRaw []any, toolsRaw any, traceID string, toolPolicy ToolChoicePolicy, thinkingEnabled bool) (string, []string) {
|
||||
return buildOpenAIPrompt(messagesRaw, toolsRaw, traceID, toolPolicy, thinkingEnabled, true)
|
||||
}
|
||||
|
||||
func BuildOpenAIPromptWithToolInstructionsOnly(messagesRaw []any, toolsRaw any, traceID string, toolPolicy ToolChoicePolicy, thinkingEnabled bool) (string, []string) {
|
||||
return buildOpenAIPrompt(messagesRaw, toolsRaw, traceID, toolPolicy, thinkingEnabled, false)
|
||||
}
|
||||
|
||||
func buildOpenAIPrompt(messagesRaw []any, toolsRaw any, traceID string, toolPolicy ToolChoicePolicy, thinkingEnabled bool, includeToolDescriptions bool) (string, []string) {
|
||||
messages := NormalizeOpenAIMessagesForPrompt(messagesRaw, traceID)
|
||||
toolNames := []string{}
|
||||
if tools, ok := toolsRaw.([]any); ok && len(tools) > 0 {
|
||||
messages, toolNames = injectToolPrompt(messages, tools, toolPolicy)
|
||||
if includeToolDescriptions {
|
||||
messages, toolNames = injectToolPrompt(messages, tools, toolPolicy)
|
||||
} else {
|
||||
messages, toolNames = injectToolPromptInstructionsOnly(messages, tools, toolPolicy)
|
||||
}
|
||||
}
|
||||
return prompt.MessagesPrepareWithThinking(messages, thinkingEnabled), toolNames
|
||||
}
|
||||
|
||||
@@ -47,10 +47,10 @@ func TestBuildOpenAIFinalPrompt_HandlerPathIncludesToolRoundtripSemantics(t *tes
|
||||
if !strings.Contains(finalPrompt, `"condition":"sunny"`) {
|
||||
t.Fatalf("handler finalPrompt should preserve tool output content: %q", finalPrompt)
|
||||
}
|
||||
if !strings.Contains(finalPrompt, "<|DSML|tool_calls>") {
|
||||
if !strings.Contains(finalPrompt, "<|DSML|tool_calls>") {
|
||||
t.Fatalf("handler finalPrompt should preserve assistant tool history: %q", finalPrompt)
|
||||
}
|
||||
if !strings.Contains(finalPrompt, `<|DSML|invoke name="get_weather">`) {
|
||||
if !strings.Contains(finalPrompt, `<|DSML|invoke name="get_weather">`) {
|
||||
t.Fatalf("handler finalPrompt should include tool name history: %q", finalPrompt)
|
||||
}
|
||||
}
|
||||
@@ -74,7 +74,7 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t *
|
||||
}
|
||||
|
||||
finalPrompt, _ := buildOpenAIFinalPrompt(messages, tools, "", false)
|
||||
if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.") {
|
||||
if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.") {
|
||||
t.Fatalf("vercel prepare finalPrompt missing final tool-call anchor instruction: %q", finalPrompt)
|
||||
}
|
||||
if !strings.Contains(finalPrompt, "TOOL CALL FORMAT") {
|
||||
@@ -88,6 +88,64 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t *
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildOpenAIPromptWithToolInstructionsOnlyOmitsSchemas(t *testing.T) {
|
||||
messages := []any{
|
||||
map[string]any{"role": "system", "content": "You are helpful"},
|
||||
map[string]any{"role": "user", "content": "请调用工具"},
|
||||
}
|
||||
tools := []any{
|
||||
map[string]any{
|
||||
"type": "function",
|
||||
"function": map[string]any{
|
||||
"name": "search",
|
||||
"description": "search docs",
|
||||
"parameters": map[string]any{
|
||||
"type": "object",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
finalPrompt, toolNames := BuildOpenAIPromptWithToolInstructionsOnly(messages, tools, "", DefaultToolChoicePolicy(), false)
|
||||
if len(toolNames) != 1 || toolNames[0] != "search" {
|
||||
t.Fatalf("unexpected tool names: %#v", toolNames)
|
||||
}
|
||||
if strings.Contains(finalPrompt, "You have access to these tools") || strings.Contains(finalPrompt, "Description: search docs") || strings.Contains(finalPrompt, "Parameters:") {
|
||||
t.Fatalf("tool descriptions should be externalized, got: %q", finalPrompt)
|
||||
}
|
||||
if !strings.Contains(finalPrompt, "TOOL CALL FORMAT") || !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools") {
|
||||
t.Fatalf("expected tool format instructions to remain in live prompt, got: %q", finalPrompt)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildOpenAIToolsContextTranscriptContainsOnlyDescriptions(t *testing.T) {
|
||||
tools := []any{
|
||||
map[string]any{
|
||||
"type": "function",
|
||||
"function": map[string]any{
|
||||
"name": "search",
|
||||
"description": "search docs",
|
||||
"parameters": map[string]any{
|
||||
"type": "object",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
transcript, toolNames := BuildOpenAIToolsContextTranscript(tools, DefaultToolChoicePolicy())
|
||||
if len(toolNames) != 1 || toolNames[0] != "search" {
|
||||
t.Fatalf("unexpected tool names: %#v", toolNames)
|
||||
}
|
||||
for _, want := range []string{"# DS2API_TOOLS.txt", "You have access to these tools", "Tool: search", "Description: search docs", `Parameters: {"type":"object"}`} {
|
||||
if !strings.Contains(transcript, want) {
|
||||
t.Fatalf("expected tools transcript to contain %q, got: %q", want, transcript)
|
||||
}
|
||||
}
|
||||
if strings.Contains(transcript, "TOOL CALL FORMAT") || strings.Contains(transcript, "<|DSML|tool_calls>") {
|
||||
t.Fatalf("tools transcript should not duplicate format instructions, got: %q", transcript)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildOpenAIFinalPromptPrependsOutputIntegrityGuard(t *testing.T) {
|
||||
messages := []any{
|
||||
map[string]any{"role": "system", "content": "You are helpful"},
|
||||
|
||||
@@ -88,7 +88,7 @@ func TestNormalizeResponsesInputArrayMergesReasoningMessageIntoFunctionCallHisto
|
||||
if !strings.Contains(history, "[reasoning_content]\nneed fresh docs before answering\n[/reasoning_content]") {
|
||||
t.Fatalf("expected reasoning in history transcript, got %q", history)
|
||||
}
|
||||
if !strings.Contains(history, `<|DSML|invoke name="search_web">`) {
|
||||
if !strings.Contains(history, `<|DSML|invoke name="search_web">`) {
|
||||
t.Fatalf("expected tool call in history transcript, got %q", history)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,10 +9,50 @@ import (
|
||||
"ds2api/internal/toolcall"
|
||||
)
|
||||
|
||||
const CurrentToolsContextFilename = "DS2API_TOOLS.txt"
|
||||
|
||||
const toolsTranscriptTitle = "# DS2API_TOOLS.txt"
|
||||
const toolsTranscriptSummary = "Available tool descriptions and parameter schemas for this request."
|
||||
|
||||
type toolPromptParts struct {
|
||||
Descriptions string
|
||||
Instructions string
|
||||
Names []string
|
||||
}
|
||||
|
||||
func injectToolPrompt(messages []map[string]any, tools []any, policy ToolChoicePolicy) ([]map[string]any, []string) {
|
||||
return injectToolPromptWithDescriptions(messages, tools, policy, true)
|
||||
}
|
||||
|
||||
func injectToolPromptInstructionsOnly(messages []map[string]any, tools []any, policy ToolChoicePolicy) ([]map[string]any, []string) {
|
||||
return injectToolPromptWithDescriptions(messages, tools, policy, false)
|
||||
}
|
||||
|
||||
func injectToolPromptWithDescriptions(messages []map[string]any, tools []any, policy ToolChoicePolicy, includeDescriptions bool) ([]map[string]any, []string) {
|
||||
if policy.IsNone() {
|
||||
return messages, nil
|
||||
}
|
||||
parts := buildToolPromptParts(tools, policy)
|
||||
if parts.Instructions == "" {
|
||||
return messages, parts.Names
|
||||
}
|
||||
toolPrompt := parts.Instructions
|
||||
if includeDescriptions && parts.Descriptions != "" {
|
||||
toolPrompt = parts.Descriptions + "\n\n" + toolPrompt
|
||||
}
|
||||
|
||||
for i := range messages {
|
||||
if messages[i]["role"] == "system" {
|
||||
old, _ := messages[i]["content"].(string)
|
||||
messages[i]["content"] = strings.TrimSpace(old + "\n\n" + toolPrompt)
|
||||
return messages, parts.Names
|
||||
}
|
||||
}
|
||||
messages = append([]map[string]any{{"role": "system", "content": toolPrompt}}, messages...)
|
||||
return messages, parts.Names
|
||||
}
|
||||
|
||||
func buildToolPromptParts(tools []any, policy ToolChoicePolicy) toolPromptParts {
|
||||
toolSchemas := make([]string, 0, len(tools))
|
||||
names := make([]string, 0, len(tools))
|
||||
isAllowed := func(name string) bool {
|
||||
@@ -44,29 +84,47 @@ func injectToolPrompt(messages []map[string]any, tools []any, policy ToolChoiceP
|
||||
toolSchemas = append(toolSchemas, fmt.Sprintf("Tool: %s\nDescription: %s\nParameters: %s", name, desc, string(b)))
|
||||
}
|
||||
if len(toolSchemas) == 0 {
|
||||
return messages, names
|
||||
return toolPromptParts{Names: names}
|
||||
}
|
||||
toolPrompt := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n") + "\n\n" + toolcall.BuildToolCallInstructions(names)
|
||||
descriptions := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n")
|
||||
instructions := toolcall.BuildToolCallInstructions(names)
|
||||
if hasReadLikeTool(names) {
|
||||
toolPrompt += "\n\nRead-tool cache guard: If a Read/read_file-style tool result says the file is unchanged, already available in history, should be referenced from previous context, or otherwise provides no file body, treat that result as missing content. Do not repeatedly call the same read request for that missing body. Request a full-content read if the tool supports it, or tell the user that the file contents need to be provided again."
|
||||
instructions += "\n\nRead-tool cache guard: If a Read/read_file-style tool result says the file is unchanged, already available in history, should be referenced from previous context, or otherwise provides no file body, treat that result as missing content. Do not repeatedly call the same read request for that missing body. Request a full-content read if the tool supports it, or tell the user that the file contents need to be provided again."
|
||||
}
|
||||
if policy.Mode == ToolChoiceRequired {
|
||||
toolPrompt += "\n7) For this response, you MUST call at least one tool from the allowed list."
|
||||
instructions += "\n7) For this response, you MUST call at least one tool from the allowed list."
|
||||
}
|
||||
if policy.Mode == ToolChoiceForced && strings.TrimSpace(policy.ForcedName) != "" {
|
||||
toolPrompt += "\n7) For this response, you MUST call exactly this tool name: " + strings.TrimSpace(policy.ForcedName)
|
||||
toolPrompt += "\n8) Do not call any other tool."
|
||||
instructions += "\n7) For this response, you MUST call exactly this tool name: " + strings.TrimSpace(policy.ForcedName)
|
||||
instructions += "\n8) Do not call any other tool."
|
||||
}
|
||||
return toolPromptParts{
|
||||
Descriptions: descriptions,
|
||||
Instructions: instructions,
|
||||
Names: names,
|
||||
}
|
||||
}
|
||||
|
||||
for i := range messages {
|
||||
if messages[i]["role"] == "system" {
|
||||
old, _ := messages[i]["content"].(string)
|
||||
messages[i]["content"] = strings.TrimSpace(old + "\n\n" + toolPrompt)
|
||||
return messages, names
|
||||
}
|
||||
func BuildOpenAIToolsContextTranscript(toolsRaw any, policy ToolChoicePolicy) (string, []string) {
|
||||
if policy.IsNone() {
|
||||
return "", nil
|
||||
}
|
||||
messages = append([]map[string]any{{"role": "system", "content": toolPrompt}}, messages...)
|
||||
return messages, names
|
||||
tools, ok := toolsRaw.([]any)
|
||||
if !ok || len(tools) == 0 {
|
||||
return "", nil
|
||||
}
|
||||
parts := buildToolPromptParts(tools, policy)
|
||||
if strings.TrimSpace(parts.Descriptions) == "" {
|
||||
return "", parts.Names
|
||||
}
|
||||
var b strings.Builder
|
||||
b.WriteString(toolsTranscriptTitle)
|
||||
b.WriteString("\n")
|
||||
b.WriteString(toolsTranscriptSummary)
|
||||
b.WriteString("\n\n")
|
||||
b.WriteString(parts.Descriptions)
|
||||
b.WriteString("\n")
|
||||
return b.String(), parts.Names
|
||||
}
|
||||
|
||||
func hasReadLikeTool(names []string) bool {
|
||||
|
||||
@@ -11,19 +11,19 @@ import "strings"
|
||||
func BuildToolCallInstructions(toolNames []string) string {
|
||||
return `TOOL CALL FORMAT — FOLLOW EXACTLY:
|
||||
|
||||
<|DSML|tool_calls>
|
||||
<|DSML|invoke name="TOOL_NAME_HERE">
|
||||
<|DSML|parameter name="PARAMETER_NAME"><![CDATA[PARAMETER_VALUE]]></|DSML|parameter>
|
||||
</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
<|DSML|tool_calls>
|
||||
<|DSML|invoke name="TOOL_NAME_HERE">
|
||||
<|DSML|parameter name="PARAMETER_NAME"><![CDATA[PARAMETER_VALUE]]></|DSML|parameter>
|
||||
</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
|
||||
RULES:
|
||||
1) Use the <|DSML|tool_calls> wrapper format.
|
||||
2) Put one or more <|DSML|invoke> entries under a single <|DSML|tool_calls> root.
|
||||
3) Put the tool name in the invoke name attribute: <|DSML|invoke name="TOOL_NAME">.
|
||||
3a) Tag punctuation alphabet: ASCII < > / = " plus the fullwidth vertical bar |.
|
||||
1) Use the <|DSML|tool_calls> wrapper format.
|
||||
2) Put one or more <|DSML|invoke> entries under a single <|DSML|tool_calls> root.
|
||||
3) Put the tool name in the invoke name attribute: <|DSML|invoke name="TOOL_NAME">.
|
||||
3a) Tag punctuation alphabet: ASCII < > / = " plus the halfwidth pipe |.
|
||||
4) All string values must use <![CDATA[...]]>, even short ones. This includes code, scripts, file contents, prompts, paths, names, and queries.
|
||||
5) Every top-level argument must be a <|DSML|parameter name="ARG_NAME">...</|DSML|parameter> node.
|
||||
5) Every top-level argument must be a <|DSML|parameter name="ARG_NAME">...</|DSML|parameter> node.
|
||||
6) Objects use nested XML elements inside the parameter body. Arrays may repeat <item> children.
|
||||
7) Numbers, booleans, and null stay plain text.
|
||||
8) Use only the parameter names in the tool schema. Do not invent fields.
|
||||
@@ -31,35 +31,35 @@ RULES:
|
||||
10) If a required parameter value is unknown, ask the user or answer normally instead of outputting an empty tool call.
|
||||
11) For shell tools such as Bash / execute_command, the command/script must be inside the command parameter. Never call them with an empty command.
|
||||
12) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.
|
||||
13) If you call a tool, the first non-whitespace characters of that tool block must be exactly <|DSML|tool_calls>.
|
||||
14) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with </|DSML|tool_calls>.
|
||||
13) If you call a tool, the first non-whitespace characters of that tool block must be exactly <|DSML|tool_calls>.
|
||||
14) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with </|DSML|tool_calls>.
|
||||
15) Compatibility note: the runtime also accepts the legacy XML tags <tool_calls> / <invoke> / <parameter>, but prefer the DSML-prefixed form above.
|
||||
|
||||
PARAMETER SHAPES:
|
||||
- string => <|DSML|parameter name="x"><![CDATA[value]]></|DSML|parameter>
|
||||
- object => <|DSML|parameter name="x"><field>...</field></|DSML|parameter>
|
||||
- array => <|DSML|parameter name="x"><item>...</item><item>...</item></|DSML|parameter>
|
||||
- number/bool/null => <|DSML|parameter name="x">plain_text</|DSML|parameter>
|
||||
- string => <|DSML|parameter name="x"><![CDATA[value]]></|DSML|parameter>
|
||||
- object => <|DSML|parameter name="x"><field>...</field></|DSML|parameter>
|
||||
- array => <|DSML|parameter name="x"><item>...</item><item>...</item></|DSML|parameter>
|
||||
- number/bool/null => <|DSML|parameter name="x">plain_text</|DSML|parameter>
|
||||
|
||||
【WRONG — Do NOT do these】:
|
||||
|
||||
Wrong 1 — mixed text after XML:
|
||||
<|DSML|tool_calls>...</|DSML|tool_calls> I hope this helps.
|
||||
<|DSML|tool_calls>...</|DSML|tool_calls> I hope this helps.
|
||||
Wrong 2 — Markdown code fences:
|
||||
` + "```xml" + `
|
||||
<|DSML|tool_calls>...</|DSML|tool_calls>
|
||||
<|DSML|tool_calls>...</|DSML|tool_calls>
|
||||
` + "```" + `
|
||||
Wrong 3 — missing opening wrapper:
|
||||
<|DSML|invoke name="TOOL_NAME">...</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
<|DSML|invoke name="TOOL_NAME">...</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
Wrong 4 — empty parameters:
|
||||
<|DSML|tool_calls>
|
||||
<|DSML|invoke name="Bash">
|
||||
<|DSML|parameter name="command"></|DSML|parameter>
|
||||
</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
<|DSML|tool_calls>
|
||||
<|DSML|invoke name="Bash">
|
||||
<|DSML|parameter name="command"></|DSML|parameter>
|
||||
</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
|
||||
Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.
|
||||
Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.
|
||||
` + buildCorrectToolExamples(toolNames)
|
||||
}
|
||||
|
||||
@@ -150,21 +150,21 @@ func firstScriptExample(names []string) (promptToolExample, bool) {
|
||||
|
||||
func renderToolExampleBlock(calls []promptToolExample) string {
|
||||
var b strings.Builder
|
||||
b.WriteString("<|DSML|tool_calls>\n")
|
||||
b.WriteString("<|DSML|tool_calls>\n")
|
||||
for _, call := range calls {
|
||||
b.WriteString(` <|DSML|invoke name="`)
|
||||
b.WriteString(` <|DSML|invoke name="`)
|
||||
b.WriteString(call.name)
|
||||
b.WriteString(`">` + "\n")
|
||||
b.WriteString(indentPromptParameters(call.params, " "))
|
||||
b.WriteString("\n </|DSML|invoke>\n")
|
||||
b.WriteString("\n </|DSML|invoke>\n")
|
||||
}
|
||||
b.WriteString("</|DSML|tool_calls>")
|
||||
b.WriteString("</|DSML|tool_calls>")
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func indentPromptParameters(body, indent string) string {
|
||||
if strings.TrimSpace(body) == "" {
|
||||
return indent + `<|DSML|parameter name="content"></|DSML|parameter>`
|
||||
return indent + `<|DSML|parameter name="content"></|DSML|parameter>`
|
||||
}
|
||||
lines := strings.Split(body, "\n")
|
||||
for i, line := range lines {
|
||||
@@ -178,7 +178,7 @@ func indentPromptParameters(body, indent string) string {
|
||||
}
|
||||
|
||||
func wrapParameter(name, inner string) string {
|
||||
return `<|DSML|parameter name="` + name + `">` + inner + `</|DSML|parameter>`
|
||||
return `<|DSML|parameter name="` + name + `">` + inner + `</|DSML|parameter>`
|
||||
}
|
||||
|
||||
func exampleBasicParams(name string) (string, bool) {
|
||||
@@ -204,7 +204,7 @@ func exampleBasicParams(name string) (string, bool) {
|
||||
case "Edit":
|
||||
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + wrapParameter("old_string", promptCDATA("foo")) + "\n" + wrapParameter("new_string", promptCDATA("bar")), true
|
||||
case "MultiEdit":
|
||||
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
|
||||
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
@@ -212,11 +212,11 @@ func exampleBasicParams(name string) (string, bool) {
|
||||
func exampleNestedParams(name string) (string, bool) {
|
||||
switch strings.TrimSpace(name) {
|
||||
case "MultiEdit":
|
||||
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
|
||||
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
|
||||
case "Task":
|
||||
return wrapParameter("description", promptCDATA("Investigate flaky tests")) + "\n" + wrapParameter("prompt", promptCDATA("Run targeted tests and summarize failures")), true
|
||||
case "ask_followup_question":
|
||||
return wrapParameter("question", promptCDATA("Which approach do you prefer?")) + "\n" + `<|DSML|parameter name="follow_up"><item><text>` + promptCDATA("Option A") + `</text></item><item><text>` + promptCDATA("Option B") + `</text></item></|DSML|parameter>`, true
|
||||
return wrapParameter("question", promptCDATA("Which approach do you prefer?")) + "\n" + `<|DSML|parameter name="follow_up"><item><text>` + promptCDATA("Option A") + `</text></item><item><text>` + promptCDATA("Option B") + `</text></item></|DSML|parameter>`, true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
@@ -7,20 +7,20 @@ import (
|
||||
|
||||
func TestBuildToolCallInstructions_ExecCommandUsesCmdExample(t *testing.T) {
|
||||
out := BuildToolCallInstructions([]string{"exec_command"})
|
||||
if !strings.Contains(out, `<|DSML|invoke name="exec_command">`) {
|
||||
if !strings.Contains(out, `<|DSML|invoke name="exec_command">`) {
|
||||
t.Fatalf("expected exec_command in examples, got: %s", out)
|
||||
}
|
||||
if !strings.Contains(out, `<|DSML|parameter name="cmd"><![CDATA[pwd]]></|DSML|parameter>`) {
|
||||
if !strings.Contains(out, `<|DSML|parameter name="cmd"><![CDATA[pwd]]></|DSML|parameter>`) {
|
||||
t.Fatalf("expected cmd parameter example for exec_command, got: %s", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildToolCallInstructions_ExecuteCommandUsesCommandExample(t *testing.T) {
|
||||
out := BuildToolCallInstructions([]string{"execute_command"})
|
||||
if !strings.Contains(out, `<|DSML|invoke name="execute_command">`) {
|
||||
if !strings.Contains(out, `<|DSML|invoke name="execute_command">`) {
|
||||
t.Fatalf("expected execute_command in examples, got: %s", out)
|
||||
}
|
||||
if !strings.Contains(out, `<|DSML|parameter name="command"><![CDATA[pwd]]></|DSML|parameter>`) {
|
||||
if !strings.Contains(out, `<|DSML|parameter name="command"><![CDATA[pwd]]></|DSML|parameter>`) {
|
||||
t.Fatalf("expected command parameter example for execute_command, got: %s", out)
|
||||
}
|
||||
}
|
||||
@@ -34,20 +34,20 @@ func TestBuildToolCallInstructions_BashUsesCommandAndDescriptionExamples(t *test
|
||||
|
||||
sawDescription := false
|
||||
for _, block := range blocks {
|
||||
if !strings.Contains(block, `<|DSML|parameter name="command">`) {
|
||||
if !strings.Contains(block, `<|DSML|parameter name="command">`) {
|
||||
t.Fatalf("expected every Bash example to use command parameter, got: %s", block)
|
||||
}
|
||||
if strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
|
||||
if strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
|
||||
t.Fatalf("expected Bash examples not to use file write parameters, got: %s", block)
|
||||
}
|
||||
if strings.Contains(block, `<|DSML|parameter name="description">`) {
|
||||
if strings.Contains(block, `<|DSML|parameter name="description">`) {
|
||||
sawDescription = true
|
||||
}
|
||||
}
|
||||
if !sawDescription {
|
||||
t.Fatalf("expected Bash long-script example to include description, got: %s", out)
|
||||
}
|
||||
if strings.Contains(out, `<|DSML|invoke name="Read">`) {
|
||||
if strings.Contains(out, `<|DSML|invoke name="Read">`) {
|
||||
t.Fatalf("expected examples to avoid unavailable hard-coded Read tool, got: %s", out)
|
||||
}
|
||||
}
|
||||
@@ -60,10 +60,10 @@ func TestBuildToolCallInstructions_ExecuteCommandLongScriptUsesCommand(t *testin
|
||||
}
|
||||
|
||||
for _, block := range blocks {
|
||||
if !strings.Contains(block, `<|DSML|parameter name="command">`) {
|
||||
if !strings.Contains(block, `<|DSML|parameter name="command">`) {
|
||||
t.Fatalf("expected execute_command examples to use command parameter, got: %s", block)
|
||||
}
|
||||
if strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
|
||||
if strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
|
||||
t.Fatalf("expected execute_command examples not to use file write parameters, got: %s", block)
|
||||
}
|
||||
}
|
||||
@@ -80,10 +80,10 @@ func TestBuildToolCallInstructions_ExecCommandLongScriptUsesCmd(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, block := range blocks {
|
||||
if !strings.Contains(block, `<|DSML|parameter name="cmd">`) {
|
||||
if !strings.Contains(block, `<|DSML|parameter name="cmd">`) {
|
||||
t.Fatalf("expected exec_command examples to use cmd parameter, got: %s", block)
|
||||
}
|
||||
if strings.Contains(block, `<|DSML|parameter name="command">`) || strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
|
||||
if strings.Contains(block, `<|DSML|parameter name="command">`) || strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
|
||||
t.Fatalf("expected exec_command examples not to use command or file write parameters, got: %s", block)
|
||||
}
|
||||
}
|
||||
@@ -100,10 +100,10 @@ func TestBuildToolCallInstructions_WriteUsesFilePathAndContent(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, block := range blocks {
|
||||
if !strings.Contains(block, `<|DSML|parameter name="file_path">`) || !strings.Contains(block, `<|DSML|parameter name="content">`) {
|
||||
if !strings.Contains(block, `<|DSML|parameter name="file_path">`) || !strings.Contains(block, `<|DSML|parameter name="content">`) {
|
||||
t.Fatalf("expected Write examples to use file_path and content, got: %s", block)
|
||||
}
|
||||
if strings.Contains(block, `<|DSML|parameter name="path">`) {
|
||||
if strings.Contains(block, `<|DSML|parameter name="path">`) {
|
||||
t.Fatalf("expected Write examples not to use path, got: %s", block)
|
||||
}
|
||||
}
|
||||
@@ -111,7 +111,7 @@ func TestBuildToolCallInstructions_WriteUsesFilePathAndContent(t *testing.T) {
|
||||
|
||||
func TestBuildToolCallInstructions_AnchorsMissingOpeningWrapperFailureMode(t *testing.T) {
|
||||
out := BuildToolCallInstructions([]string{"read_file"})
|
||||
if !strings.Contains(out, "Never omit the opening <|DSML|tool_calls> tag") {
|
||||
if !strings.Contains(out, "Never omit the opening <|DSML|tool_calls> tag") {
|
||||
t.Fatalf("expected explicit missing-opening-tag warning, got: %s", out)
|
||||
}
|
||||
if !strings.Contains(out, "Wrong 3 — missing opening wrapper") {
|
||||
@@ -135,7 +135,7 @@ func TestBuildToolCallInstructions_RejectsEmptyParametersInPrompt(t *testing.T)
|
||||
|
||||
func TestBuildToolCallInstructions_UsesPositiveTagPunctuationAlphabet(t *testing.T) {
|
||||
out := BuildToolCallInstructions([]string{"Bash"})
|
||||
want := `Tag punctuation alphabet: ASCII < > / = " plus the fullwidth vertical bar |.`
|
||||
want := `Tag punctuation alphabet: ASCII < > / = " plus the halfwidth pipe |.`
|
||||
if !strings.Contains(out, want) {
|
||||
t.Fatalf("expected positive tag punctuation alphabet %q, got: %s", want, out)
|
||||
}
|
||||
@@ -147,7 +147,7 @@ func TestBuildToolCallInstructions_UsesPositiveTagPunctuationAlphabet(t *testing
|
||||
}
|
||||
|
||||
func findInvokeBlocks(text, name string) []string {
|
||||
open := `<|DSML|invoke name="` + name + `">`
|
||||
open := `<|DSML|invoke name="` + name + `">`
|
||||
remaining := text
|
||||
blocks := []string{}
|
||||
for {
|
||||
@@ -156,11 +156,11 @@ func findInvokeBlocks(text, name string) []string {
|
||||
return blocks
|
||||
}
|
||||
remaining = remaining[start:]
|
||||
end := strings.Index(remaining, `</|DSML|invoke>`)
|
||||
end := strings.Index(remaining, `</|DSML|invoke>`)
|
||||
if end < 0 {
|
||||
return blocks
|
||||
}
|
||||
end += len(`</|DSML|invoke>`)
|
||||
end += len(`</|DSML|invoke>`)
|
||||
blocks = append(blocks, remaining[:end])
|
||||
remaining = remaining[end:]
|
||||
}
|
||||
|
||||
@@ -491,8 +491,6 @@ func consumeToolMarkupPipe(text string, idx int) (int, bool) {
|
||||
switch {
|
||||
case text[idx] == '|':
|
||||
return idx + 1, true
|
||||
case strings.HasPrefix(text[idx:], "|"):
|
||||
return idx + len("|"), true
|
||||
case strings.HasPrefix(text[idx:], "│"):
|
||||
return idx + len("│"), true
|
||||
case strings.HasPrefix(text[idx:], "∣"):
|
||||
|
||||
@@ -131,14 +131,14 @@ func TestParseToolCallsRejectsCamelPrefixedToolMarkupLookalike(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestParseToolCallsSupportsFullwidthDSMLShell(t *testing.T) {
|
||||
text := `<dSML|tool_calls>
|
||||
<dSML|invoke name="Read">
|
||||
<dSML|parameter name="file_path"><![CDATA[/Users/aq/Desktop/myproject/Personal_Blog/README.md]]></dSML|parameter>
|
||||
</dSML|invoke>
|
||||
<dSML|invoke name="Read">
|
||||
<dSML|parameter name="file_path"><![CDATA[/Users/aq/Desktop/myproject/Personal_Blog/index.html]]></dSML|parameter>
|
||||
</dSML|invoke>
|
||||
</dSML|tool_calls>`
|
||||
text := `<dSML|tool_calls>
|
||||
<dSML|invoke name="Read">
|
||||
<dSML|parameter name="file_path"><![CDATA[/Users/aq/Desktop/myproject/Personal_Blog/README.md]]></dSML|parameter>
|
||||
</dSML|invoke>
|
||||
<dSML|invoke name="Read">
|
||||
<dSML|parameter name="file_path"><![CDATA[/Users/aq/Desktop/myproject/Personal_Blog/index.html]]></dSML|parameter>
|
||||
</dSML|invoke>
|
||||
</dSML|tool_calls>`
|
||||
calls := ParseToolCalls(text, []string{"Read"})
|
||||
if len(calls) != 2 {
|
||||
t.Fatalf("expected two fullwidth DSML calls, got %#v", calls)
|
||||
@@ -152,20 +152,20 @@ func TestParseToolCallsSupportsFullwidthDSMLShell(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestParseToolCallsSupportsCJKAngleDSMDrift(t *testing.T) {
|
||||
text := `<DSM|tool_calls>
|
||||
<DSM|invoke name="Bash">
|
||||
<DSM|parameter name="description"|>〈![CDATA[Show commits on local dev not on origin/dev]]〉〈/DSM|parameter〉
|
||||
<DSM|parameter name="command"|>〈![CDATA[git log --oneline origin/dev..dev]]〉〈/DSM|parameter〉
|
||||
〈/DSM|invoke〉
|
||||
<DSM|invoke name="Bash">
|
||||
<DSM|parameter name="description"|>〈![CDATA[Show commits on origin/dev not on local dev]]〉〈/DSM|parameter〉
|
||||
<DSM|parameter name="command"|>〈![CDATA[git log --oneline dev..origin/dev]]〉〈/DSM|parameter〉
|
||||
〈/DSM|invoke〉
|
||||
<DSM|invoke name="Bash">
|
||||
<DSM|parameter name="description"|>〈![CDATA[Check tracking branch status]]〉〈/DSM|parameter〉
|
||||
<DSM|parameter name="command"|>〈![CDATA[git status -b --short]]〉〈/DSM|parameter〉
|
||||
〈/DSM|invoke〉
|
||||
〈/DSM|tool_calls〉`
|
||||
text := `<DSM|tool_calls>
|
||||
<DSM|invoke name="Bash">
|
||||
<DSM|parameter name="description"|>〈![CDATA[Show commits on local dev not on origin/dev]]〉〈/DSM|parameter〉
|
||||
<DSM|parameter name="command"|>〈![CDATA[git log --oneline origin/dev..dev]]〉〈/DSM|parameter〉
|
||||
〈/DSM|invoke〉
|
||||
<DSM|invoke name="Bash">
|
||||
<DSM|parameter name="description"|>〈![CDATA[Show commits on origin/dev not on local dev]]〉〈/DSM|parameter〉
|
||||
<DSM|parameter name="command"|>〈![CDATA[git log --oneline dev..origin/dev]]〉〈/DSM|parameter〉
|
||||
〈/DSM|invoke〉
|
||||
<DSM|invoke name="Bash">
|
||||
<DSM|parameter name="description"|>〈![CDATA[Check tracking branch status]]〉〈/DSM|parameter〉
|
||||
<DSM|parameter name="command"|>〈![CDATA[git status -b --short]]〉〈/DSM|parameter〉
|
||||
〈/DSM|invoke〉
|
||||
〈/DSM|tool_calls〉`
|
||||
|
||||
calls := ParseToolCalls(text, []string{"Bash"})
|
||||
if len(calls) != 3 {
|
||||
@@ -1203,7 +1203,7 @@ func TestFindMatchingToolMarkupCloseBoundaryConditions(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestParseToolCallsSupportsDSMLShellWithFullwidthClosingSlash(t *testing.T) {
|
||||
text := `<|DSML|tool_calls><|DSML|invoke name="execute_code"><|DSML|parameter name="code"><![CDATA[print("hi")]]></|DSML|parameter></|DSML|invoke></DSML|tool_calls>`
|
||||
text := `<|DSML|tool_calls><|DSML|invoke name="execute_code"><|DSML|parameter name="code"><![CDATA[print("hi")]]></|DSML|parameter></|DSML|invoke></DSML|tool_calls>`
|
||||
calls := ParseToolCalls(text, []string{"execute_code"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected 1 DSML call with fullwidth closing slash, got %#v", calls)
|
||||
@@ -1214,7 +1214,7 @@ func TestParseToolCallsSupportsDSMLShellWithFullwidthClosingSlash(t *testing.T)
|
||||
}
|
||||
|
||||
func TestParseToolCallsSupportsDSMLShellWithSentencePieceSeparatorAndFullwidthGT(t *testing.T) {
|
||||
text := `<|DSML▁tool_calls|><|DSML▁invoke▁name="execute_code"><|DSML▁parameter▁name="code"><![CDATA[print("hi")]]></|DSML▁parameter></|DSML▁invoke></|DSML▁tool_calls>`
|
||||
text := `<|DSML▁tool_calls|><|DSML▁invoke▁name="execute_code"><|DSML▁parameter▁name="code"><![CDATA[print("hi")]]></|DSML▁parameter></|DSML▁invoke></|DSML▁tool_calls>`
|
||||
calls := ParseToolCalls(text, []string{"execute_code"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected 1 DSML call with sentencepiece separator and fullwidth terminator, got %#v", calls)
|
||||
@@ -1225,7 +1225,7 @@ func TestParseToolCallsSupportsDSMLShellWithSentencePieceSeparatorAndFullwidthGT
|
||||
}
|
||||
|
||||
func TestParseToolCallsSupportsDSMLShellWithFullwidthLTUnicodeSpaceAndFullwidthAttributes(t *testing.T) {
|
||||
text := `<|DSML tool_calls><|DSML invoke name=“execute_code”><|DSML parameter name=“code”><![CDATA[print("hi")]]></DSML|parameter></DSML|invoke></DSML|tool_calls>`
|
||||
text := `<|DSML tool_calls><|DSML invoke name=“execute_code”><|DSML parameter name=“code”><![CDATA[print("hi")]]></DSML|parameter></DSML|invoke></DSML|tool_calls>`
|
||||
calls := ParseToolCalls(text, []string{"execute_code"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected 1 DSML call with fullwidth opening delimiter and Unicode attribute confusables, got %#v", calls)
|
||||
|
||||
@@ -316,11 +316,11 @@ func TestSieve_CharByCharToolCall(t *testing.T) {
|
||||
func TestSieve_FullwidthPipeWrapperDSMLInvoke(t *testing.T) {
|
||||
var state State
|
||||
chunks := []string{
|
||||
"<|tool_calls>\n",
|
||||
"<|tool_calls>\n",
|
||||
"<|DSML|invoke name=\"read_file\">\n",
|
||||
"<|DSML|parameter name=\"path\">README.md</|DSML|parameter>\n",
|
||||
"</|DSML|invoke>\n",
|
||||
"</|tool_calls>",
|
||||
"</|tool_calls>",
|
||||
}
|
||||
var events []Event
|
||||
for _, c := range chunks {
|
||||
@@ -382,7 +382,7 @@ func TestSieve_TagMentionInTextThenRealToolCall(t *testing.T) {
|
||||
chunks := []string{
|
||||
"建议的 commit message:\n\nfeat: expand DSML alias support\n\n",
|
||||
"Add support for <dsml|tool_calls>, ",
|
||||
"<|tool_calls> (fullwidth pipe),\n",
|
||||
"<|tool_calls> (pipe alias),\n",
|
||||
"and <|tool_calls> wrapper variants.\n\n",
|
||||
"<|DSML|tool_calls>\n",
|
||||
"<|DSML|invoke name=\"Bash\">\n",
|
||||
@@ -466,14 +466,14 @@ func TestSieve_ReviewSampleWithAliasMentionsPreservesBodyAndToolCalls(t *testing
|
||||
chunks := []string{
|
||||
"Done reviewing the diff. Here's my analysis before we commit:\n\n",
|
||||
"Summary of Changes\n",
|
||||
"DSML wrapper variant support — recognize aliases (<dsml|tool_calls>, <|tool_calls>, <|tool_calls>) alongside canonical <tool_calls> and <|DSML|tool_calls> wrappers.\n\n",
|
||||
"DSML wrapper variant support — recognize aliases (<dsml|tool_calls>, <|tool_calls>) alongside canonical <tool_calls> and <|DSML|tool_calls> wrappers.\n\n",
|
||||
"<|DSML|tool_calls>\n",
|
||||
"<|DSML|invoke name=\"Bash\">\n",
|
||||
"<|DSML|parameter name=\"command\"><![CDATA[git add docs/toolcall-semantics.md internal/toolstream/tool_sieve_xml.go]]></|DSML|parameter>\n",
|
||||
"<|DSML|parameter name=\"description\"><![CDATA[Stage all relevant changed files]]></|DSML|parameter>\n",
|
||||
"</|DSML|invoke>\n",
|
||||
"<|DSML|invoke name=\"Bash\">\n",
|
||||
"<|DSML|parameter name=\"command\"><![CDATA[git commit -m \"$(cat <<'EOF'\nfeat(toolstream): expand DSML wrapper detection\n\nSupport DSML wrapper aliases: <dsml|tool_calls>, <|tool_calls>, <|tool_calls> alongside existing canonical wrappers.\nEOF\n)\"]]></|DSML|parameter>\n",
|
||||
"<|DSML|parameter name=\"command\"><![CDATA[git commit -m \"$(cat <<'EOF'\nfeat(toolstream): expand DSML wrapper detection\n\nSupport DSML wrapper aliases: <dsml|tool_calls> and <|tool_calls> alongside existing canonical wrappers.\nEOF\n)\"]]></|DSML|parameter>\n",
|
||||
"<|DSML|parameter name=\"description\"><![CDATA[Create commit with all staged changes]]></|DSML|parameter>\n",
|
||||
"</|DSML|invoke>\n",
|
||||
"</|DSML|tool_calls>",
|
||||
|
||||
@@ -626,13 +626,13 @@ func TestProcessToolSieveEmitsAllEmptyDSMLToolBlock(t *testing.T) {
|
||||
|
||||
func TestProcessToolSieveEmitsChunkedAllEmptyArbitraryPrefixedToolBlock(t *testing.T) {
|
||||
chunk := strings.Join([]string{
|
||||
`<T|DSML|tool_calls>`,
|
||||
` <T|DSML|invoke name="TaskOutput">`,
|
||||
` <T|DSML|parameter name="task_id"></T|DSML|parameter>`,
|
||||
` <T|DSML|parameter name="block"></T|DSML|parameter>`,
|
||||
` <T|DSML|parameter name="timeout"></T|DSML|parameter>`,
|
||||
` </T|DSML|invoke>`,
|
||||
` </T|DSML|tool_calls>`,
|
||||
`<T|DSML|tool_calls>`,
|
||||
` <T|DSML|invoke name="TaskOutput">`,
|
||||
` <T|DSML|parameter name="task_id"></T|DSML|parameter>`,
|
||||
` <T|DSML|parameter name="block"></T|DSML|parameter>`,
|
||||
` <T|DSML|parameter name="timeout"></T|DSML|parameter>`,
|
||||
` </T|DSML|invoke>`,
|
||||
` </T|DSML|tool_calls>`,
|
||||
}, "\n")
|
||||
calls := collectToolCallsForChunks(t, splitEveryNRBytes(chunk, 8), []string{"TaskOutput"})
|
||||
if len(calls) != 1 {
|
||||
@@ -811,8 +811,8 @@ func TestFindPartialXMLToolTagStart(t *testing.T) {
|
||||
{"partial_tool_calls", "Hello <tool_ca", 6},
|
||||
{"partial_dsml_trailing_pipe", "Hello <|DSML|tool_calls|", 6},
|
||||
{"partial_dsml_extra_leading_less_than", "Hello <<|DSML|tool_calls", 6},
|
||||
{"partial_arbitrary_prefix_before_dsml", "Hello <T|DS", 6},
|
||||
{"partial_arbitrary_prefix_after_dsml_pipe", "Hello <T|DSML|", 6},
|
||||
{"partial_arbitrary_prefix_before_dsml", "Hello <T|DS", 6},
|
||||
{"partial_arbitrary_prefix_after_dsml_pipe", "Hello <T|DSML|", 6},
|
||||
{"partial_invoke", "Hello <inv", 6},
|
||||
{"bare_tool_call_not_held", "Hello <tool_name", -1},
|
||||
{"partial_lt_only", "Text <", 5},
|
||||
@@ -1091,7 +1091,7 @@ func TestProcessToolSieveRepairsMissingOpeningWrapperWithoutLeakingInvokeText(t
|
||||
}
|
||||
}
|
||||
|
||||
// Test fullwidth pipe variant: <|tool_calls> (U+FF5C) should be buffered and parsed.
|
||||
// Test escaped U+FF5C pipe variant: <\uff5ctool_calls> should be buffered and parsed.
|
||||
func TestProcessToolSieveFullwidthPipeVariantDoesNotLeak(t *testing.T) {
|
||||
var state State
|
||||
chunks := []string{
|
||||
@@ -1115,19 +1115,19 @@ func TestProcessToolSieveFullwidthPipeVariantDoesNotLeak(t *testing.T) {
|
||||
}
|
||||
|
||||
if strings.Contains(textContent, "invoke") || strings.Contains(textContent, "execute_command") {
|
||||
t.Fatalf("fullwidth pipe variant leaked to text: %q", textContent)
|
||||
t.Fatalf("escaped U+FF5C pipe variant leaked to text: %q", textContent)
|
||||
}
|
||||
if toolCalls != 1 {
|
||||
t.Fatalf("expected one tool call from fullwidth pipe variant, got %d events=%#v", toolCalls, events)
|
||||
t.Fatalf("expected one tool call from escaped U+FF5C pipe variant, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
}
|
||||
|
||||
// Test <|DSML|tool_calls> with DSML invoke/parameter tags should buffer the
|
||||
// Test <|DSML|tool_calls> with DSML invoke/parameter tags should buffer the
|
||||
// wrapper instead of leaking it before the block is complete.
|
||||
func TestProcessToolSieveFullwidthDSMLPrefixVariantDoesNotLeak(t *testing.T) {
|
||||
var state State
|
||||
chunks := []string{
|
||||
"<|DSML|tool",
|
||||
"<|DSML|tool",
|
||||
"_calls>\n",
|
||||
"<|DSML|invoke name=\"Bash\">\n",
|
||||
"<|DSML|parameter name=\"command\"><![CDATA[ls -la /Users/aq/Desktop/myproject/ds2api/]]></|DSML|parameter>\n",
|
||||
@@ -1232,12 +1232,12 @@ func TestProcessToolSieveDSMLBarePrefixVariantDoesNotLeak(t *testing.T) {
|
||||
func TestProcessToolSieveCJKAngleDSMDriftDoesNotLeak(t *testing.T) {
|
||||
var state State
|
||||
chunks := []string{
|
||||
"<DSM|tool_calls>\n",
|
||||
"<DSM|invoke name=\"Bash\">\n",
|
||||
"<DSM|parameter name=\"description\"|>〈![CDATA[Check tracking branch status]]〉〈/DSM|parameter〉\n",
|
||||
"<DSM|parameter name=\"command\"|>〈![CDATA[git status -b --short]]〉〈/DSM|parameter〉\n",
|
||||
"〈/DSM|invoke〉\n",
|
||||
"〈/DSM|tool_calls〉",
|
||||
"<DSM|tool_calls>\n",
|
||||
"<DSM|invoke name=\"Bash\">\n",
|
||||
"<DSM|parameter name=\"description\"|>〈![CDATA[Check tracking branch status]]〉〈/DSM|parameter〉\n",
|
||||
"<DSM|parameter name=\"command\"|>〈![CDATA[git status -b --short]]〉〈/DSM|parameter〉\n",
|
||||
"〈/DSM|invoke〉\n",
|
||||
"〈/DSM|tool_calls〉",
|
||||
}
|
||||
var events []Event
|
||||
for _, c := range chunks {
|
||||
@@ -1338,7 +1338,7 @@ func TestProcessToolSieveIdeographicCommaDSMLDriftDoesNotLeak(t *testing.T) {
|
||||
|
||||
func TestProcessToolSieveParsesFullwidthClosingSlashAndKeepsSuffixText(t *testing.T) {
|
||||
var state State
|
||||
chunk := `<|DSML|tool_calls><|DSML|invoke name="execute_code"><|DSML|parameter name="code"><![CDATA[print("hi")]]></|DSML|parameter></|DSML|invoke></DSML|tool_calls> sao cụm này lại đc trả là 1 message`
|
||||
chunk := `<|DSML|tool_calls><|DSML|invoke name="execute_code"><|DSML|parameter name="code"><![CDATA[print("hi")]]></|DSML|parameter></|DSML|invoke></DSML|tool_calls> sao cụm này lại đc trả là 1 message`
|
||||
events := ProcessChunk(&state, chunk, []string{"execute_code"})
|
||||
events = append(events, Flush(&state, []string{"execute_code"})...)
|
||||
|
||||
@@ -1365,7 +1365,7 @@ func TestProcessToolSieveParsesFullwidthClosingSlashAndKeepsSuffixText(t *testin
|
||||
|
||||
func TestProcessToolSieveParsesSentencePieceSeparatorAndFullwidthTerminator(t *testing.T) {
|
||||
var state State
|
||||
chunk := `<|DSML▁tool_calls|><|DSML▁invoke▁name="execute_code"><|DSML▁parameter▁name="code"><![CDATA[print("hi")]]></|DSML▁parameter></|DSML▁invoke></|DSML▁tool_calls> suffix`
|
||||
chunk := `<|DSML▁tool_calls|><|DSML▁invoke▁name="execute_code"><|DSML▁parameter▁name="code"><![CDATA[print("hi")]]></|DSML▁parameter></|DSML▁invoke></|DSML▁tool_calls> suffix`
|
||||
events := ProcessChunk(&state, chunk, []string{"execute_code"})
|
||||
events = append(events, Flush(&state, []string{"execute_code"})...)
|
||||
|
||||
@@ -1392,7 +1392,7 @@ func TestProcessToolSieveParsesSentencePieceSeparatorAndFullwidthTerminator(t *t
|
||||
|
||||
func TestProcessToolSieveParsesFullwidthOpeningDelimiterAndUnicodeAttributes(t *testing.T) {
|
||||
var state State
|
||||
chunk := `<|DSML tool_calls><|DSML invoke name=“execute_code”><|DSML parameter name=“code”><![CDATA[print("hi")]]></DSML|parameter></DSML|invoke></DSML|tool_calls> suffix`
|
||||
chunk := `<|DSML tool_calls><|DSML invoke name=“execute_code”><|DSML parameter name=“code”><![CDATA[print("hi")]]></DSML|parameter></DSML|invoke></DSML|tool_calls> suffix`
|
||||
events := ProcessChunk(&state, chunk, []string{"execute_code"})
|
||||
events = append(events, Flush(&state, []string{"execute_code"})...)
|
||||
|
||||
|
||||
@@ -13,10 +13,10 @@ func TestMessagesPrepareBasic(t *testing.T) {
|
||||
if got == "" {
|
||||
t.Fatal("expected non-empty prompt")
|
||||
}
|
||||
if !strings.HasPrefix(got, "<|begin▁of▁sentence|><|System|>") {
|
||||
if !strings.HasPrefix(got, "<|begin▁of▁sentence|><|System|>") {
|
||||
t.Fatalf("expected output integrity guard at the start, got %q", got)
|
||||
}
|
||||
if !strings.Contains(got, "Hello") || !strings.HasSuffix(got, "<|Assistant|>") {
|
||||
if !strings.Contains(got, "Hello") || !strings.HasSuffix(got, "<|Assistant|>") {
|
||||
t.Fatalf("unexpected prompt: %q", got)
|
||||
}
|
||||
}
|
||||
@@ -33,31 +33,31 @@ func TestMessagesPrepareRoles(t *testing.T) {
|
||||
if !contains(got, "Output integrity guard") {
|
||||
t.Fatalf("expected output integrity guard in %q", got)
|
||||
}
|
||||
if !contains(got, "You are helper") || !contains(got, "<|User|>Hi") {
|
||||
if !contains(got, "You are helper") || !contains(got, "<|User|>Hi") {
|
||||
t.Fatalf("expected system/user content in %q", got)
|
||||
}
|
||||
if !contains(got, "<|begin▁of▁sentence|>") {
|
||||
if !contains(got, "<|begin▁of▁sentence|>") {
|
||||
t.Fatalf("expected begin marker in %q", got)
|
||||
}
|
||||
if !contains(got, "<|User|>Hi<|Assistant|>Hello<|end▁of▁sentence|>") {
|
||||
if !contains(got, "<|User|>Hi<|Assistant|>Hello<|end▁of▁sentence|>") {
|
||||
t.Fatalf("expected user/assistant separation in %q", got)
|
||||
}
|
||||
if !contains(got, "<|Assistant|>Hello<|end▁of▁sentence|><|Tool|>Search results<|end▁of▁toolresults|>") {
|
||||
if !contains(got, "<|Assistant|>Hello<|end▁of▁sentence|><|Tool|>Search results<|end▁of▁toolresults|>") {
|
||||
t.Fatalf("expected assistant/tool separation in %q", got)
|
||||
}
|
||||
if !contains(got, "<|Tool|>Search results<|end▁of▁toolresults|><|User|>How are you") {
|
||||
if !contains(got, "<|Tool|>Search results<|end▁of▁toolresults|><|User|>How are you") {
|
||||
t.Fatalf("expected tool/user separation in %q", got)
|
||||
}
|
||||
if !contains(got, "<|Assistant|>") {
|
||||
if !contains(got, "<|Assistant|>") {
|
||||
t.Fatalf("expected assistant marker in %q", got)
|
||||
}
|
||||
if !contains(got, "<|System|>") {
|
||||
if !contains(got, "<|System|>") {
|
||||
t.Fatalf("expected system marker in %q", got)
|
||||
}
|
||||
if !contains(got, "<|User|>") {
|
||||
if !contains(got, "<|User|>") {
|
||||
t.Fatalf("expected user marker in %q", got)
|
||||
}
|
||||
if !contains(got, "<|Tool|>") {
|
||||
if !contains(got, "<|Tool|>") {
|
||||
t.Fatalf("expected tool marker in %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -162,20 +162,20 @@ func TestMessagesPrepareMergesConsecutiveSameRole(t *testing.T) {
|
||||
{"role": "user", "content": "World"},
|
||||
}
|
||||
got := MessagesPrepare(messages)
|
||||
if !strings.HasPrefix(got, "<|begin▁of▁sentence|>") {
|
||||
if !strings.HasPrefix(got, "<|begin▁of▁sentence|>") {
|
||||
t.Fatalf("expected user marker at the start, got %q", got)
|
||||
}
|
||||
if !strings.Contains(got, "Hello") || !strings.Contains(got, "World") {
|
||||
t.Fatalf("expected both messages, got %q", got)
|
||||
}
|
||||
// Should be merged into a single user turn with one marker at the start.
|
||||
count := strings.Count(got, "<|User|>")
|
||||
count := strings.Count(got, "<|User|>")
|
||||
if count != 1 {
|
||||
t.Fatalf("expected one User marker for the merged pair, got %d occurrences", count)
|
||||
}
|
||||
// User messages no longer have end_of_sentence markers in the official format.
|
||||
// The merged pair should have zero end_of_sentence markers (user turn only).
|
||||
if count := strings.Count(got, "<|end▁of▁sentence|>"); count != 0 {
|
||||
if count := strings.Count(got, "<|end▁of▁sentence|>"); count != 0 {
|
||||
t.Fatalf("expected zero sentence terminators for user-only merge, got %d occurrences", count)
|
||||
}
|
||||
}
|
||||
@@ -186,16 +186,16 @@ func TestMessagesPrepareAssistantMarkers(t *testing.T) {
|
||||
{"role": "assistant", "content": "Hello!"},
|
||||
}
|
||||
got := MessagesPrepare(messages)
|
||||
if !strings.Contains(got, "<|Assistant|>") {
|
||||
if !strings.Contains(got, "<|Assistant|>") {
|
||||
t.Fatalf("expected assistant marker, got %q", got)
|
||||
}
|
||||
if !strings.Contains(got, "<|end▁of▁sentence|>") {
|
||||
if !strings.Contains(got, "<|end▁of▁sentence|>") {
|
||||
t.Fatalf("expected end of sentence marker, got %q", got)
|
||||
}
|
||||
if strings.Count(got, "<|end▁of▁sentence|>") != 1 {
|
||||
if strings.Count(got, "<|end▁of▁sentence|>") != 1 {
|
||||
t.Fatalf("expected one end_of_sentence (assistant only), got %q", got)
|
||||
}
|
||||
if !strings.Contains(got, "<|Assistant|>Hello!<|end▁of▁sentence|>") {
|
||||
if !strings.Contains(got, "<|Assistant|>Hello!<|end▁of▁sentence|>") {
|
||||
t.Fatalf("expected assistant EOS suffix, got %q", got)
|
||||
}
|
||||
if strings.Contains(got, "<think>") || strings.Contains(got, "</think>") {
|
||||
|
||||
Reference in New Issue
Block a user