From 585d35e592ab3c8c10d90426bf3e50689c113938 Mon Sep 17 00:00:00 2001 From: CJACK Date: Sun, 5 Apr 2026 20:50:12 +0800 Subject: [PATCH] refactor: standardize prompt markers and remove legacy EOS and system instructions tags --- internal/prompt/messages.go | 23 +++++++++++++++-------- internal/prompt/messages_test.go | 19 ++++++++++++++++++- internal/util/messages_test.go | 7 +++++-- internal/util/util_edge_test.go | 7 +++++-- 4 files changed, 43 insertions(+), 13 deletions(-) diff --git a/internal/prompt/messages.go b/internal/prompt/messages.go index daf5b84..d1c5745 100644 --- a/internal/prompt/messages.go +++ b/internal/prompt/messages.go @@ -9,6 +9,13 @@ import ( var markdownImagePattern = regexp.MustCompile(`!\[(.*?)\]\((.*?)\)`) +const ( + systemMarker = "<|System|>" + userMarker = "<|User|>" + assistantMarker = "<|Assistant|>" + toolMarker = "<|Tool|>" +) + func MessagesPrepare(messages []map[string]any) string { type block struct { Role string @@ -35,21 +42,17 @@ func MessagesPrepare(messages []map[string]any) string { for _, m := range merged { switch m.Role { case "assistant": - // Keep assistant turns on their own block so the model sees a clear - // boundary between prior answer text and the EOS marker. - parts = append(parts, "<|Assistant|>\n"+m.Text+"\n<|end▁of▁sentence|>") + parts = append(parts, formatRoleBlock(assistantMarker, m.Text)) case "tool": if strings.TrimSpace(m.Text) != "" { - parts = append(parts, "<|Tool|>\n"+m.Text) + parts = append(parts, formatRoleBlock(toolMarker, m.Text)) } case "system": - // Clear system boundary improves R1 and V3 context understanding significantly. if text := strings.TrimSpace(m.Text); text != "" { - parts = append(parts, "\n"+text+"\n") + parts = append(parts, formatRoleBlock(systemMarker, text)) } case "user": - // Put user turns on their own line so the role transition is explicit. - parts = append(parts, "<|User|>\n"+m.Text) + parts = append(parts, formatRoleBlock(userMarker, m.Text)) default: if strings.TrimSpace(m.Text) != "" { parts = append(parts, m.Text) @@ -60,6 +63,10 @@ func MessagesPrepare(messages []map[string]any) string { return markdownImagePattern.ReplaceAllString(out, `[${1}](${2})`) } +func formatRoleBlock(marker, text string) string { + return marker + "\n" + text +} + func NormalizeContent(v any) string { if v == nil { return "" diff --git a/internal/prompt/messages_test.go b/internal/prompt/messages_test.go index 9114d39..fbeba54 100644 --- a/internal/prompt/messages_test.go +++ b/internal/prompt/messages_test.go @@ -1,6 +1,9 @@ package prompt -import "testing" +import ( + "strings" + "testing" +) func TestNormalizeContentNilReturnsEmpty(t *testing.T) { if got := NormalizeContent(nil); got != "" { @@ -22,6 +25,20 @@ func TestMessagesPrepareNilContentNoNullLiteral(t *testing.T) { } } +func TestMessagesPrepareUsesUnifiedSystemMarkerAndNoEOSTag(t *testing.T) { + messages := []map[string]any{ + {"role": "system", "content": "System rule"}, + {"role": "assistant", "content": "Answer"}, + } + got := MessagesPrepare(messages) + if !strings.Contains(got, "<|System|>\nSystem rule") { + t.Fatalf("expected unified system marker, got %q", got) + } + if strings.Contains(got, "<|end▁of▁sentence|>") { + t.Fatalf("did not expect EOS marker, got %q", got) + } +} + func TestNormalizeContentArrayFallsBackToContentWhenTextEmpty(t *testing.T) { got := NormalizeContent([]any{ map[string]any{"type": "text", "text": "", "content": "from-content"}, diff --git a/internal/util/messages_test.go b/internal/util/messages_test.go index 2ec3f50..e6e0cdc 100644 --- a/internal/util/messages_test.go +++ b/internal/util/messages_test.go @@ -25,18 +25,21 @@ func TestMessagesPrepareRoles(t *testing.T) { {"role": "user", "content": "How are you"}, } got := MessagesPrepare(messages) - if !contains(got, "\nYou are helper\n\n\n<|User|>\nHi") { + if !contains(got, "<|System|>\nYou are helper\n\n<|User|>\nHi") { t.Fatalf("expected system/user separation in %q", got) } if !contains(got, "<|User|>\nHi\n\n<|Assistant|>\nHello") { t.Fatalf("expected user/assistant separation in %q", got) } - if !contains(got, "<|Assistant|>\nHello\n<|end▁of▁sentence|>\n\n<|User|>\nHow are you") { + if !contains(got, "<|Assistant|>\nHello\n\n<|User|>\nHow are you") { t.Fatalf("expected assistant/user separation in %q", got) } if !contains(got, "<|Assistant|>") { t.Fatalf("expected assistant marker in %q", got) } + if !contains(got, "<|System|>") { + t.Fatalf("expected system marker in %q", got) + } if !contains(got, "<|User|>") { t.Fatalf("expected user marker in %q", got) } diff --git a/internal/util/util_edge_test.go b/internal/util/util_edge_test.go index 9f91602..439b257 100644 --- a/internal/util/util_edge_test.go +++ b/internal/util/util_edge_test.go @@ -184,8 +184,11 @@ func TestMessagesPrepareAssistantMarkers(t *testing.T) { if !strings.Contains(got, "<|Assistant|>") { t.Fatalf("expected assistant marker, got %q", got) } - if !strings.Contains(got, "<|end▁of▁sentence|>") { - t.Fatalf("expected end of sentence marker, got %q", got) + if strings.Contains(got, "<|end▁of▁sentence|>") { + t.Fatalf("did not expect end of sentence marker, got %q", got) + } + if strings.Contains(got, "") { + t.Fatalf("did not expect legacy system marker, got %q", got) } }