feat: implement mandatory DeepSeek turn termination markers for system, user, assistant, and tool roles

This commit is contained in:
CJACK
2026-04-05 21:48:18 +08:00
parent a28c9fb67f
commit 2a6b787f38
7 changed files with 58 additions and 29 deletions

View File

@@ -9,9 +9,9 @@ var leakedToolCallArrayPattern = regexp.MustCompile(`(?is)\[\{\s*"function"\s*:\
var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*>\s*\{[\s\S]*?"tool_call_id"\s*:\s*"call[^"]*"\s*}`)
// leakedMetaMarkerPattern matches DeepSeek special tokens in BOTH forms:
// - ASCII underscore: <end_of_sentence>
// - U+2581 variant: <end▁of▁sentence> (used in some DeepSeek outputs)
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking)\s*[\|]>`)
// - ASCII underscore: <end_of_sentence>, <end_of_toolresults>, <end_of_instructions>
// - U+2581 variant: <end▁of▁sentence>, <end▁of▁toolresults>, <end▁of▁instructions>
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[\|]>`)
// leakedAgentXMLBlockPatterns catch agent-style XML blocks that leak through
// when the sieve fails to capture them. These are applied only to complete

View File

@@ -19,9 +19,9 @@ func TestSanitizeLeakedOutputRemovesLeakedWireToolCallAndResult(t *testing.T) {
}
func TestSanitizeLeakedOutputRemovesStandaloneMetaMarkers(t *testing.T) {
raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<end▁of▁thinking>D<end▁of▁sentence>E"
raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<end▁of▁thinking>D<end▁of▁sentence>E<| end_of_toolresults |>F<end▁of▁instructions>G"
got := sanitizeLeakedOutput(raw)
if got != "ABCDE" {
if got != "ABCDEFG" {
t.Fatalf("unexpected sanitize result for meta markers: %q", got)
}
}

View File

@@ -13,6 +13,7 @@ import (
authn "ds2api/internal/auth"
"ds2api/internal/config"
"ds2api/internal/deepseek"
"ds2api/internal/sse"
)
@@ -157,7 +158,7 @@ func (h *Handler) testAccount(ctx context.Context, acc config.Account, model, me
result["message"] = "获取 PoW 失败: " + err.Error()
return result
}
payload := map[string]any{"chat_session_id": sessionID, "prompt": "<User>\n" + message, "ref_file_ids": []any{}, "thinking_enabled": thinking, "search_enabled": search}
payload := map[string]any{"chat_session_id": sessionID, "prompt": deepseek.MessagesPrepare([]map[string]any{{"role": "user", "content": message}}), "ref_file_ids": []any{}, "thinking_enabled": thinking, "search_enabled": search}
resp, err := h.DS.CallCompletion(ctx, authCtx, payload, pow, 1)
if err != nil {
result["message"] = "请求失败: " + err.Error()

View File

@@ -10,10 +10,13 @@ import (
var markdownImagePattern = regexp.MustCompile(`!\[(.*?)\]\((.*?)\)`)
const (
systemMarker = "<System>"
userMarker = "<User>"
assistantMarker = "<Assistant>"
toolMarker = "<Tool>"
systemMarker = "<System>"
userMarker = "<User>"
assistantMarker = "<Assistant>"
toolMarker = "<Tool>"
endSentenceMarker = "<end▁of▁sentence>"
endToolResultsMarker = "<end▁of▁toolresults>"
endInstructionsMarker = "<end▁of▁instructions>"
)
func MessagesPrepare(messages []map[string]any) string {
@@ -42,17 +45,17 @@ func MessagesPrepare(messages []map[string]any) string {
for _, m := range merged {
switch m.Role {
case "assistant":
parts = append(parts, formatRoleBlock(assistantMarker, m.Text))
parts = append(parts, formatRoleBlock(assistantMarker, m.Text, endSentenceMarker))
case "tool":
if strings.TrimSpace(m.Text) != "" {
parts = append(parts, formatRoleBlock(toolMarker, m.Text))
parts = append(parts, formatRoleBlock(toolMarker, m.Text, endToolResultsMarker))
}
case "system":
if text := strings.TrimSpace(m.Text); text != "" {
parts = append(parts, formatRoleBlock(systemMarker, text))
parts = append(parts, formatRoleBlock(systemMarker, text, endInstructionsMarker))
}
case "user":
parts = append(parts, formatRoleBlock(userMarker, m.Text))
parts = append(parts, formatRoleBlock(userMarker, m.Text, endSentenceMarker))
default:
if strings.TrimSpace(m.Text) != "" {
parts = append(parts, m.Text)
@@ -63,8 +66,13 @@ func MessagesPrepare(messages []map[string]any) string {
return markdownImagePattern.ReplaceAllString(out, `[${1}](${2})`)
}
func formatRoleBlock(marker, text string) string {
return marker + "\n" + text
// DeepSeek-style turn suffixes stay attached to the same block as the role content.
func formatRoleBlock(marker, text, endMarker string) string {
out := marker + "\n" + text
if strings.TrimSpace(endMarker) != "" {
out += endMarker
}
return out
}
func NormalizeContent(v any) string {

View File

@@ -25,17 +25,21 @@ func TestMessagesPrepareNilContentNoNullLiteral(t *testing.T) {
}
}
func TestMessagesPrepareUsesUnifiedSystemMarkerAndNoEOSTag(t *testing.T) {
func TestMessagesPrepareUsesTurnSuffixes(t *testing.T) {
messages := []map[string]any{
{"role": "system", "content": "System rule"},
{"role": "user", "content": "Question"},
{"role": "assistant", "content": "Answer"},
}
got := MessagesPrepare(messages)
if !strings.Contains(got, "<System>\nSystem rule") {
t.Fatalf("expected unified system marker, got %q", got)
if !strings.Contains(got, "<System>\nSystem rule<end▁of▁instructions>") {
t.Fatalf("expected system instructions suffix, got %q", got)
}
if strings.Contains(got, "<end▁of▁sentence>") {
t.Fatalf("did not expect EOS marker, got %q", got)
if !strings.Contains(got, "<User>\nQuestion<end▁of▁sentence>") {
t.Fatalf("expected user sentence suffix, got %q", got)
}
if !strings.Contains(got, "<Assistant>\nAnswer<end▁of▁sentence>") {
t.Fatalf("expected assistant sentence suffix, got %q", got)
}
}

View File

@@ -12,7 +12,7 @@ func TestMessagesPrepareBasic(t *testing.T) {
if got == "" {
t.Fatal("expected non-empty prompt")
}
if got != "<User>\nHello" {
if got != "<User>\nHello<end▁of▁sentence>" {
t.Fatalf("unexpected prompt: %q", got)
}
}
@@ -22,17 +22,21 @@ func TestMessagesPrepareRoles(t *testing.T) {
{"role": "system", "content": "You are helper"},
{"role": "user", "content": "Hi"},
{"role": "assistant", "content": "Hello"},
{"role": "tool", "content": "Search results"},
{"role": "user", "content": "How are you"},
}
got := MessagesPrepare(messages)
if !contains(got, "<System>\nYou are helper\n\n<User>\nHi") {
if !contains(got, "<System>\nYou are helper<end▁of▁instructions>\n\n<User>\nHi<end▁of▁sentence>") {
t.Fatalf("expected system/user separation in %q", got)
}
if !contains(got, "<User>\nHi\n\n<Assistant>\nHello") {
if !contains(got, "<User>\nHi<end▁of▁sentence>\n\n<Assistant>\nHello<end▁of▁sentence>") {
t.Fatalf("expected user/assistant separation in %q", got)
}
if !contains(got, "<Assistant>\nHello\n\n<User>\nHow are you") {
t.Fatalf("expected assistant/user separation in %q", got)
if !contains(got, "<Assistant>\nHello<end▁of▁sentence>\n\n<Tool>\nSearch results<end▁of▁toolresults>") {
t.Fatalf("expected assistant/tool separation in %q", got)
}
if !contains(got, "<Tool>\nSearch results<end▁of▁toolresults>\n\n<User>\nHow are you<end▁of▁sentence>") {
t.Fatalf("expected tool/user separation in %q", got)
}
if !contains(got, "<Assistant>") {
t.Fatalf("expected assistant marker in %q", got)
@@ -43,6 +47,9 @@ func TestMessagesPrepareRoles(t *testing.T) {
if !contains(got, "<User>") {
t.Fatalf("expected user marker in %q", got)
}
if !contains(got, "<Tool>") {
t.Fatalf("expected tool marker in %q", got)
}
}
func TestMessagesPrepareObjectContent(t *testing.T) {
@@ -67,7 +74,7 @@ func TestMessagesPrepareArrayTextVariants(t *testing.T) {
},
}
got := MessagesPrepare(messages)
if got != "<User>\nline1\nline2" {
if got != "<User>\nline1\nline2<end▁of▁sentence>" {
t.Fatalf("unexpected content from text variants: %q", got)
}
}

View File

@@ -173,6 +173,9 @@ func TestMessagesPrepareMergesConsecutiveSameRole(t *testing.T) {
if count != 1 {
t.Fatalf("expected one User marker for the merged pair, got %d occurrences", count)
}
if count := strings.Count(got, "<end▁of▁sentence>"); count != 1 {
t.Fatalf("expected one sentence terminator for the merged pair, got %d occurrences", count)
}
}
func TestMessagesPrepareAssistantMarkers(t *testing.T) {
@@ -184,8 +187,14 @@ func TestMessagesPrepareAssistantMarkers(t *testing.T) {
if !strings.Contains(got, "<Assistant>") {
t.Fatalf("expected assistant marker, got %q", got)
}
if strings.Contains(got, "<end▁of▁sentence>") {
t.Fatalf("did not expect end of sentence marker, got %q", got)
if !strings.Contains(got, "<end▁of▁sentence>") {
t.Fatalf("expected end of sentence marker, got %q", got)
}
if strings.Count(got, "<end▁of▁sentence>") != 2 {
t.Fatalf("expected both turns to be terminated, got %q", got)
}
if !strings.Contains(got, "<Assistant>\nHello!<end▁of▁sentence>") {
t.Fatalf("expected assistant EOS suffix, got %q", got)
}
if strings.Contains(got, "<system_instructions>") {
t.Fatalf("did not expect legacy system marker, got %q", got)