mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-07 01:45:27 +08:00
feat: implement mandatory DeepSeek turn termination markers for system, user, assistant, and tool roles
This commit is contained in:
@@ -9,9 +9,9 @@ var leakedToolCallArrayPattern = regexp.MustCompile(`(?is)\[\{\s*"function"\s*:\
|
||||
var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*>\s*\{[\s\S]*?"tool_call_id"\s*:\s*"call[^"]*"\s*}`)
|
||||
|
||||
// leakedMetaMarkerPattern matches DeepSeek special tokens in BOTH forms:
|
||||
// - ASCII underscore: <|end_of_sentence|>
|
||||
// - U+2581 variant: <|end▁of▁sentence|> (used in some DeepSeek outputs)
|
||||
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking)\s*[|\|]>`)
|
||||
// - ASCII underscore: <|end_of_sentence|>, <|end_of_toolresults|>, <|end_of_instructions|>
|
||||
// - U+2581 variant: <|end▁of▁sentence|>, <|end▁of▁toolresults|>, <|end▁of▁instructions|>
|
||||
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[|\|]>`)
|
||||
|
||||
// leakedAgentXMLBlockPatterns catch agent-style XML blocks that leak through
|
||||
// when the sieve fails to capture them. These are applied only to complete
|
||||
|
||||
@@ -19,9 +19,9 @@ func TestSanitizeLeakedOutputRemovesLeakedWireToolCallAndResult(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestSanitizeLeakedOutputRemovesStandaloneMetaMarkers(t *testing.T) {
|
||||
raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<|end▁of▁thinking|>D<|end▁of▁sentence|>E"
|
||||
raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<|end▁of▁thinking|>D<|end▁of▁sentence|>E<| end_of_toolresults |>F<|end▁of▁instructions|>G"
|
||||
got := sanitizeLeakedOutput(raw)
|
||||
if got != "ABCDE" {
|
||||
if got != "ABCDEFG" {
|
||||
t.Fatalf("unexpected sanitize result for meta markers: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
|
||||
authn "ds2api/internal/auth"
|
||||
"ds2api/internal/config"
|
||||
"ds2api/internal/deepseek"
|
||||
"ds2api/internal/sse"
|
||||
)
|
||||
|
||||
@@ -157,7 +158,7 @@ func (h *Handler) testAccount(ctx context.Context, acc config.Account, model, me
|
||||
result["message"] = "获取 PoW 失败: " + err.Error()
|
||||
return result
|
||||
}
|
||||
payload := map[string]any{"chat_session_id": sessionID, "prompt": "<|User|>\n" + message, "ref_file_ids": []any{}, "thinking_enabled": thinking, "search_enabled": search}
|
||||
payload := map[string]any{"chat_session_id": sessionID, "prompt": deepseek.MessagesPrepare([]map[string]any{{"role": "user", "content": message}}), "ref_file_ids": []any{}, "thinking_enabled": thinking, "search_enabled": search}
|
||||
resp, err := h.DS.CallCompletion(ctx, authCtx, payload, pow, 1)
|
||||
if err != nil {
|
||||
result["message"] = "请求失败: " + err.Error()
|
||||
|
||||
@@ -10,10 +10,13 @@ import (
|
||||
var markdownImagePattern = regexp.MustCompile(`!\[(.*?)\]\((.*?)\)`)
|
||||
|
||||
const (
|
||||
systemMarker = "<|System|>"
|
||||
userMarker = "<|User|>"
|
||||
assistantMarker = "<|Assistant|>"
|
||||
toolMarker = "<|Tool|>"
|
||||
systemMarker = "<|System|>"
|
||||
userMarker = "<|User|>"
|
||||
assistantMarker = "<|Assistant|>"
|
||||
toolMarker = "<|Tool|>"
|
||||
endSentenceMarker = "<|end▁of▁sentence|>"
|
||||
endToolResultsMarker = "<|end▁of▁toolresults|>"
|
||||
endInstructionsMarker = "<|end▁of▁instructions|>"
|
||||
)
|
||||
|
||||
func MessagesPrepare(messages []map[string]any) string {
|
||||
@@ -42,17 +45,17 @@ func MessagesPrepare(messages []map[string]any) string {
|
||||
for _, m := range merged {
|
||||
switch m.Role {
|
||||
case "assistant":
|
||||
parts = append(parts, formatRoleBlock(assistantMarker, m.Text))
|
||||
parts = append(parts, formatRoleBlock(assistantMarker, m.Text, endSentenceMarker))
|
||||
case "tool":
|
||||
if strings.TrimSpace(m.Text) != "" {
|
||||
parts = append(parts, formatRoleBlock(toolMarker, m.Text))
|
||||
parts = append(parts, formatRoleBlock(toolMarker, m.Text, endToolResultsMarker))
|
||||
}
|
||||
case "system":
|
||||
if text := strings.TrimSpace(m.Text); text != "" {
|
||||
parts = append(parts, formatRoleBlock(systemMarker, text))
|
||||
parts = append(parts, formatRoleBlock(systemMarker, text, endInstructionsMarker))
|
||||
}
|
||||
case "user":
|
||||
parts = append(parts, formatRoleBlock(userMarker, m.Text))
|
||||
parts = append(parts, formatRoleBlock(userMarker, m.Text, endSentenceMarker))
|
||||
default:
|
||||
if strings.TrimSpace(m.Text) != "" {
|
||||
parts = append(parts, m.Text)
|
||||
@@ -63,8 +66,13 @@ func MessagesPrepare(messages []map[string]any) string {
|
||||
return markdownImagePattern.ReplaceAllString(out, `[${1}](${2})`)
|
||||
}
|
||||
|
||||
func formatRoleBlock(marker, text string) string {
|
||||
return marker + "\n" + text
|
||||
// DeepSeek-style turn suffixes stay attached to the same block as the role content.
|
||||
func formatRoleBlock(marker, text, endMarker string) string {
|
||||
out := marker + "\n" + text
|
||||
if strings.TrimSpace(endMarker) != "" {
|
||||
out += endMarker
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func NormalizeContent(v any) string {
|
||||
|
||||
@@ -25,17 +25,21 @@ func TestMessagesPrepareNilContentNoNullLiteral(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMessagesPrepareUsesUnifiedSystemMarkerAndNoEOSTag(t *testing.T) {
|
||||
func TestMessagesPrepareUsesTurnSuffixes(t *testing.T) {
|
||||
messages := []map[string]any{
|
||||
{"role": "system", "content": "System rule"},
|
||||
{"role": "user", "content": "Question"},
|
||||
{"role": "assistant", "content": "Answer"},
|
||||
}
|
||||
got := MessagesPrepare(messages)
|
||||
if !strings.Contains(got, "<|System|>\nSystem rule") {
|
||||
t.Fatalf("expected unified system marker, got %q", got)
|
||||
if !strings.Contains(got, "<|System|>\nSystem rule<|end▁of▁instructions|>") {
|
||||
t.Fatalf("expected system instructions suffix, got %q", got)
|
||||
}
|
||||
if strings.Contains(got, "<|end▁of▁sentence|>") {
|
||||
t.Fatalf("did not expect EOS marker, got %q", got)
|
||||
if !strings.Contains(got, "<|User|>\nQuestion<|end▁of▁sentence|>") {
|
||||
t.Fatalf("expected user sentence suffix, got %q", got)
|
||||
}
|
||||
if !strings.Contains(got, "<|Assistant|>\nAnswer<|end▁of▁sentence|>") {
|
||||
t.Fatalf("expected assistant sentence suffix, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ func TestMessagesPrepareBasic(t *testing.T) {
|
||||
if got == "" {
|
||||
t.Fatal("expected non-empty prompt")
|
||||
}
|
||||
if got != "<|User|>\nHello" {
|
||||
if got != "<|User|>\nHello<|end▁of▁sentence|>" {
|
||||
t.Fatalf("unexpected prompt: %q", got)
|
||||
}
|
||||
}
|
||||
@@ -22,17 +22,21 @@ func TestMessagesPrepareRoles(t *testing.T) {
|
||||
{"role": "system", "content": "You are helper"},
|
||||
{"role": "user", "content": "Hi"},
|
||||
{"role": "assistant", "content": "Hello"},
|
||||
{"role": "tool", "content": "Search results"},
|
||||
{"role": "user", "content": "How are you"},
|
||||
}
|
||||
got := MessagesPrepare(messages)
|
||||
if !contains(got, "<|System|>\nYou are helper\n\n<|User|>\nHi") {
|
||||
if !contains(got, "<|System|>\nYou are helper<|end▁of▁instructions|>\n\n<|User|>\nHi<|end▁of▁sentence|>") {
|
||||
t.Fatalf("expected system/user separation in %q", got)
|
||||
}
|
||||
if !contains(got, "<|User|>\nHi\n\n<|Assistant|>\nHello") {
|
||||
if !contains(got, "<|User|>\nHi<|end▁of▁sentence|>\n\n<|Assistant|>\nHello<|end▁of▁sentence|>") {
|
||||
t.Fatalf("expected user/assistant separation in %q", got)
|
||||
}
|
||||
if !contains(got, "<|Assistant|>\nHello\n\n<|User|>\nHow are you") {
|
||||
t.Fatalf("expected assistant/user separation in %q", got)
|
||||
if !contains(got, "<|Assistant|>\nHello<|end▁of▁sentence|>\n\n<|Tool|>\nSearch results<|end▁of▁toolresults|>") {
|
||||
t.Fatalf("expected assistant/tool separation in %q", got)
|
||||
}
|
||||
if !contains(got, "<|Tool|>\nSearch results<|end▁of▁toolresults|>\n\n<|User|>\nHow are you<|end▁of▁sentence|>") {
|
||||
t.Fatalf("expected tool/user separation in %q", got)
|
||||
}
|
||||
if !contains(got, "<|Assistant|>") {
|
||||
t.Fatalf("expected assistant marker in %q", got)
|
||||
@@ -43,6 +47,9 @@ func TestMessagesPrepareRoles(t *testing.T) {
|
||||
if !contains(got, "<|User|>") {
|
||||
t.Fatalf("expected user marker in %q", got)
|
||||
}
|
||||
if !contains(got, "<|Tool|>") {
|
||||
t.Fatalf("expected tool marker in %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMessagesPrepareObjectContent(t *testing.T) {
|
||||
@@ -67,7 +74,7 @@ func TestMessagesPrepareArrayTextVariants(t *testing.T) {
|
||||
},
|
||||
}
|
||||
got := MessagesPrepare(messages)
|
||||
if got != "<|User|>\nline1\nline2" {
|
||||
if got != "<|User|>\nline1\nline2<|end▁of▁sentence|>" {
|
||||
t.Fatalf("unexpected content from text variants: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -173,6 +173,9 @@ func TestMessagesPrepareMergesConsecutiveSameRole(t *testing.T) {
|
||||
if count != 1 {
|
||||
t.Fatalf("expected one User marker for the merged pair, got %d occurrences", count)
|
||||
}
|
||||
if count := strings.Count(got, "<|end▁of▁sentence|>"); count != 1 {
|
||||
t.Fatalf("expected one sentence terminator for the merged pair, got %d occurrences", count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMessagesPrepareAssistantMarkers(t *testing.T) {
|
||||
@@ -184,8 +187,14 @@ func TestMessagesPrepareAssistantMarkers(t *testing.T) {
|
||||
if !strings.Contains(got, "<|Assistant|>") {
|
||||
t.Fatalf("expected assistant marker, got %q", got)
|
||||
}
|
||||
if strings.Contains(got, "<|end▁of▁sentence|>") {
|
||||
t.Fatalf("did not expect end of sentence marker, got %q", got)
|
||||
if !strings.Contains(got, "<|end▁of▁sentence|>") {
|
||||
t.Fatalf("expected end of sentence marker, got %q", got)
|
||||
}
|
||||
if strings.Count(got, "<|end▁of▁sentence|>") != 2 {
|
||||
t.Fatalf("expected both turns to be terminated, got %q", got)
|
||||
}
|
||||
if !strings.Contains(got, "<|Assistant|>\nHello!<|end▁of▁sentence|>") {
|
||||
t.Fatalf("expected assistant EOS suffix, got %q", got)
|
||||
}
|
||||
if strings.Contains(got, "<system_instructions>") {
|
||||
t.Fatalf("did not expect legacy system marker, got %q", got)
|
||||
|
||||
Reference in New Issue
Block a user