diff --git a/internal/adapter/openai/leaked_output_sanitize.go b/internal/adapter/openai/leaked_output_sanitize.go index cb6e7c4..c139feb 100644 --- a/internal/adapter/openai/leaked_output_sanitize.go +++ b/internal/adapter/openai/leaked_output_sanitize.go @@ -9,9 +9,9 @@ var leakedToolCallArrayPattern = regexp.MustCompile(`(?is)\[\{\s*"function"\s*:\ var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*>\s*\{[\s\S]*?"tool_call_id"\s*:\s*"call[^"]*"\s*}`) // leakedMetaMarkerPattern matches DeepSeek special tokens in BOTH forms: -// - ASCII underscore: <|end_of_sentence|> -// - U+2581 variant: <|end▁of▁sentence|> (used in some DeepSeek outputs) -var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking)\s*[|\|]>`) +// - ASCII underscore: <|end_of_sentence|>, <|end_of_toolresults|>, <|end_of_instructions|> +// - U+2581 variant: <|end▁of▁sentence|>, <|end▁of▁toolresults|>, <|end▁of▁instructions|> +var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[|\|]>`) // leakedAgentXMLBlockPatterns catch agent-style XML blocks that leak through // when the sieve fails to capture them. These are applied only to complete diff --git a/internal/adapter/openai/leaked_output_sanitize_test.go b/internal/adapter/openai/leaked_output_sanitize_test.go index 6548d39..558cc48 100644 --- a/internal/adapter/openai/leaked_output_sanitize_test.go +++ b/internal/adapter/openai/leaked_output_sanitize_test.go @@ -19,9 +19,9 @@ func TestSanitizeLeakedOutputRemovesLeakedWireToolCallAndResult(t *testing.T) { } func TestSanitizeLeakedOutputRemovesStandaloneMetaMarkers(t *testing.T) { - raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<|end▁of▁thinking|>D<|end▁of▁sentence|>E" + raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<|end▁of▁thinking|>D<|end▁of▁sentence|>E<| end_of_toolresults |>F<|end▁of▁instructions|>G" got := sanitizeLeakedOutput(raw) - if got != "ABCDE" { + if got != "ABCDEFG" { t.Fatalf("unexpected sanitize result for meta markers: %q", got) } } diff --git a/internal/admin/handler_accounts_testing.go b/internal/admin/handler_accounts_testing.go index a05d6cf..8bfa376 100644 --- a/internal/admin/handler_accounts_testing.go +++ b/internal/admin/handler_accounts_testing.go @@ -13,6 +13,7 @@ import ( authn "ds2api/internal/auth" "ds2api/internal/config" + "ds2api/internal/deepseek" "ds2api/internal/sse" ) @@ -157,7 +158,7 @@ func (h *Handler) testAccount(ctx context.Context, acc config.Account, model, me result["message"] = "获取 PoW 失败: " + err.Error() return result } - payload := map[string]any{"chat_session_id": sessionID, "prompt": "<|User|>\n" + message, "ref_file_ids": []any{}, "thinking_enabled": thinking, "search_enabled": search} + payload := map[string]any{"chat_session_id": sessionID, "prompt": deepseek.MessagesPrepare([]map[string]any{{"role": "user", "content": message}}), "ref_file_ids": []any{}, "thinking_enabled": thinking, "search_enabled": search} resp, err := h.DS.CallCompletion(ctx, authCtx, payload, pow, 1) if err != nil { result["message"] = "请求失败: " + err.Error() diff --git a/internal/prompt/messages.go b/internal/prompt/messages.go index d1c5745..fe69f72 100644 --- a/internal/prompt/messages.go +++ b/internal/prompt/messages.go @@ -10,10 +10,13 @@ import ( var markdownImagePattern = regexp.MustCompile(`!\[(.*?)\]\((.*?)\)`) const ( - systemMarker = "<|System|>" - userMarker = "<|User|>" - assistantMarker = "<|Assistant|>" - toolMarker = "<|Tool|>" + systemMarker = "<|System|>" + userMarker = "<|User|>" + assistantMarker = "<|Assistant|>" + toolMarker = "<|Tool|>" + endSentenceMarker = "<|end▁of▁sentence|>" + endToolResultsMarker = "<|end▁of▁toolresults|>" + endInstructionsMarker = "<|end▁of▁instructions|>" ) func MessagesPrepare(messages []map[string]any) string { @@ -42,17 +45,17 @@ func MessagesPrepare(messages []map[string]any) string { for _, m := range merged { switch m.Role { case "assistant": - parts = append(parts, formatRoleBlock(assistantMarker, m.Text)) + parts = append(parts, formatRoleBlock(assistantMarker, m.Text, endSentenceMarker)) case "tool": if strings.TrimSpace(m.Text) != "" { - parts = append(parts, formatRoleBlock(toolMarker, m.Text)) + parts = append(parts, formatRoleBlock(toolMarker, m.Text, endToolResultsMarker)) } case "system": if text := strings.TrimSpace(m.Text); text != "" { - parts = append(parts, formatRoleBlock(systemMarker, text)) + parts = append(parts, formatRoleBlock(systemMarker, text, endInstructionsMarker)) } case "user": - parts = append(parts, formatRoleBlock(userMarker, m.Text)) + parts = append(parts, formatRoleBlock(userMarker, m.Text, endSentenceMarker)) default: if strings.TrimSpace(m.Text) != "" { parts = append(parts, m.Text) @@ -63,8 +66,13 @@ func MessagesPrepare(messages []map[string]any) string { return markdownImagePattern.ReplaceAllString(out, `[${1}](${2})`) } -func formatRoleBlock(marker, text string) string { - return marker + "\n" + text +// DeepSeek-style turn suffixes stay attached to the same block as the role content. +func formatRoleBlock(marker, text, endMarker string) string { + out := marker + "\n" + text + if strings.TrimSpace(endMarker) != "" { + out += endMarker + } + return out } func NormalizeContent(v any) string { diff --git a/internal/prompt/messages_test.go b/internal/prompt/messages_test.go index fbeba54..5465c7a 100644 --- a/internal/prompt/messages_test.go +++ b/internal/prompt/messages_test.go @@ -25,17 +25,21 @@ func TestMessagesPrepareNilContentNoNullLiteral(t *testing.T) { } } -func TestMessagesPrepareUsesUnifiedSystemMarkerAndNoEOSTag(t *testing.T) { +func TestMessagesPrepareUsesTurnSuffixes(t *testing.T) { messages := []map[string]any{ {"role": "system", "content": "System rule"}, + {"role": "user", "content": "Question"}, {"role": "assistant", "content": "Answer"}, } got := MessagesPrepare(messages) - if !strings.Contains(got, "<|System|>\nSystem rule") { - t.Fatalf("expected unified system marker, got %q", got) + if !strings.Contains(got, "<|System|>\nSystem rule<|end▁of▁instructions|>") { + t.Fatalf("expected system instructions suffix, got %q", got) } - if strings.Contains(got, "<|end▁of▁sentence|>") { - t.Fatalf("did not expect EOS marker, got %q", got) + if !strings.Contains(got, "<|User|>\nQuestion<|end▁of▁sentence|>") { + t.Fatalf("expected user sentence suffix, got %q", got) + } + if !strings.Contains(got, "<|Assistant|>\nAnswer<|end▁of▁sentence|>") { + t.Fatalf("expected assistant sentence suffix, got %q", got) } } diff --git a/internal/util/messages_test.go b/internal/util/messages_test.go index e6e0cdc..1fd2024 100644 --- a/internal/util/messages_test.go +++ b/internal/util/messages_test.go @@ -12,7 +12,7 @@ func TestMessagesPrepareBasic(t *testing.T) { if got == "" { t.Fatal("expected non-empty prompt") } - if got != "<|User|>\nHello" { + if got != "<|User|>\nHello<|end▁of▁sentence|>" { t.Fatalf("unexpected prompt: %q", got) } } @@ -22,17 +22,21 @@ func TestMessagesPrepareRoles(t *testing.T) { {"role": "system", "content": "You are helper"}, {"role": "user", "content": "Hi"}, {"role": "assistant", "content": "Hello"}, + {"role": "tool", "content": "Search results"}, {"role": "user", "content": "How are you"}, } got := MessagesPrepare(messages) - if !contains(got, "<|System|>\nYou are helper\n\n<|User|>\nHi") { + if !contains(got, "<|System|>\nYou are helper<|end▁of▁instructions|>\n\n<|User|>\nHi<|end▁of▁sentence|>") { t.Fatalf("expected system/user separation in %q", got) } - if !contains(got, "<|User|>\nHi\n\n<|Assistant|>\nHello") { + if !contains(got, "<|User|>\nHi<|end▁of▁sentence|>\n\n<|Assistant|>\nHello<|end▁of▁sentence|>") { t.Fatalf("expected user/assistant separation in %q", got) } - if !contains(got, "<|Assistant|>\nHello\n\n<|User|>\nHow are you") { - t.Fatalf("expected assistant/user separation in %q", got) + if !contains(got, "<|Assistant|>\nHello<|end▁of▁sentence|>\n\n<|Tool|>\nSearch results<|end▁of▁toolresults|>") { + t.Fatalf("expected assistant/tool separation in %q", got) + } + if !contains(got, "<|Tool|>\nSearch results<|end▁of▁toolresults|>\n\n<|User|>\nHow are you<|end▁of▁sentence|>") { + t.Fatalf("expected tool/user separation in %q", got) } if !contains(got, "<|Assistant|>") { t.Fatalf("expected assistant marker in %q", got) @@ -43,6 +47,9 @@ func TestMessagesPrepareRoles(t *testing.T) { if !contains(got, "<|User|>") { t.Fatalf("expected user marker in %q", got) } + if !contains(got, "<|Tool|>") { + t.Fatalf("expected tool marker in %q", got) + } } func TestMessagesPrepareObjectContent(t *testing.T) { @@ -67,7 +74,7 @@ func TestMessagesPrepareArrayTextVariants(t *testing.T) { }, } got := MessagesPrepare(messages) - if got != "<|User|>\nline1\nline2" { + if got != "<|User|>\nline1\nline2<|end▁of▁sentence|>" { t.Fatalf("unexpected content from text variants: %q", got) } } diff --git a/internal/util/util_edge_test.go b/internal/util/util_edge_test.go index 439b257..5c1ff94 100644 --- a/internal/util/util_edge_test.go +++ b/internal/util/util_edge_test.go @@ -173,6 +173,9 @@ func TestMessagesPrepareMergesConsecutiveSameRole(t *testing.T) { if count != 1 { t.Fatalf("expected one User marker for the merged pair, got %d occurrences", count) } + if count := strings.Count(got, "<|end▁of▁sentence|>"); count != 1 { + t.Fatalf("expected one sentence terminator for the merged pair, got %d occurrences", count) + } } func TestMessagesPrepareAssistantMarkers(t *testing.T) { @@ -184,8 +187,14 @@ func TestMessagesPrepareAssistantMarkers(t *testing.T) { if !strings.Contains(got, "<|Assistant|>") { t.Fatalf("expected assistant marker, got %q", got) } - if strings.Contains(got, "<|end▁of▁sentence|>") { - t.Fatalf("did not expect end of sentence marker, got %q", got) + if !strings.Contains(got, "<|end▁of▁sentence|>") { + t.Fatalf("expected end of sentence marker, got %q", got) + } + if strings.Count(got, "<|end▁of▁sentence|>") != 2 { + t.Fatalf("expected both turns to be terminated, got %q", got) + } + if !strings.Contains(got, "<|Assistant|>\nHello!<|end▁of▁sentence|>") { + t.Fatalf("expected assistant EOS suffix, got %q", got) } if strings.Contains(got, "") { t.Fatalf("did not expect legacy system marker, got %q", got)