refactor: standardize prompt markers and remove legacy EOS and system instructions tags

This commit is contained in:
CJACK
2026-04-05 20:50:12 +08:00
parent dc912419c4
commit 585d35e592
4 changed files with 43 additions and 13 deletions

View File

@@ -9,6 +9,13 @@ import (
var markdownImagePattern = regexp.MustCompile(`!\[(.*?)\]\((.*?)\)`)
const (
systemMarker = "<System>"
userMarker = "<User>"
assistantMarker = "<Assistant>"
toolMarker = "<Tool>"
)
func MessagesPrepare(messages []map[string]any) string {
type block struct {
Role string
@@ -35,21 +42,17 @@ func MessagesPrepare(messages []map[string]any) string {
for _, m := range merged {
switch m.Role {
case "assistant":
// Keep assistant turns on their own block so the model sees a clear
// boundary between prior answer text and the EOS marker.
parts = append(parts, "<Assistant>\n"+m.Text+"\n<end▁of▁sentence>")
parts = append(parts, formatRoleBlock(assistantMarker, m.Text))
case "tool":
if strings.TrimSpace(m.Text) != "" {
parts = append(parts, "<Tool>\n"+m.Text)
parts = append(parts, formatRoleBlock(toolMarker, m.Text))
}
case "system":
// Clear system boundary improves R1 and V3 context understanding significantly.
if text := strings.TrimSpace(m.Text); text != "" {
parts = append(parts, "<system_instructions>\n"+text+"\n</system_instructions>")
parts = append(parts, formatRoleBlock(systemMarker, text))
}
case "user":
// Put user turns on their own line so the role transition is explicit.
parts = append(parts, "<User>\n"+m.Text)
parts = append(parts, formatRoleBlock(userMarker, m.Text))
default:
if strings.TrimSpace(m.Text) != "" {
parts = append(parts, m.Text)
@@ -60,6 +63,10 @@ func MessagesPrepare(messages []map[string]any) string {
return markdownImagePattern.ReplaceAllString(out, `[${1}](${2})`)
}
func formatRoleBlock(marker, text string) string {
return marker + "\n" + text
}
func NormalizeContent(v any) string {
if v == nil {
return ""

View File

@@ -1,6 +1,9 @@
package prompt
import "testing"
import (
"strings"
"testing"
)
func TestNormalizeContentNilReturnsEmpty(t *testing.T) {
if got := NormalizeContent(nil); got != "" {
@@ -22,6 +25,20 @@ func TestMessagesPrepareNilContentNoNullLiteral(t *testing.T) {
}
}
func TestMessagesPrepareUsesUnifiedSystemMarkerAndNoEOSTag(t *testing.T) {
messages := []map[string]any{
{"role": "system", "content": "System rule"},
{"role": "assistant", "content": "Answer"},
}
got := MessagesPrepare(messages)
if !strings.Contains(got, "<System>\nSystem rule") {
t.Fatalf("expected unified system marker, got %q", got)
}
if strings.Contains(got, "<end▁of▁sentence>") {
t.Fatalf("did not expect EOS marker, got %q", got)
}
}
func TestNormalizeContentArrayFallsBackToContentWhenTextEmpty(t *testing.T) {
got := NormalizeContent([]any{
map[string]any{"type": "text", "text": "", "content": "from-content"},

View File

@@ -25,18 +25,21 @@ func TestMessagesPrepareRoles(t *testing.T) {
{"role": "user", "content": "How are you"},
}
got := MessagesPrepare(messages)
if !contains(got, "<system_instructions>\nYou are helper\n</system_instructions>\n\n<User>\nHi") {
if !contains(got, "<System>\nYou are helper\n\n<User>\nHi") {
t.Fatalf("expected system/user separation in %q", got)
}
if !contains(got, "<User>\nHi\n\n<Assistant>\nHello") {
t.Fatalf("expected user/assistant separation in %q", got)
}
if !contains(got, "<Assistant>\nHello\n<end▁of▁sentence>\n\n<User>\nHow are you") {
if !contains(got, "<Assistant>\nHello\n\n<User>\nHow are you") {
t.Fatalf("expected assistant/user separation in %q", got)
}
if !contains(got, "<Assistant>") {
t.Fatalf("expected assistant marker in %q", got)
}
if !contains(got, "<System>") {
t.Fatalf("expected system marker in %q", got)
}
if !contains(got, "<User>") {
t.Fatalf("expected user marker in %q", got)
}

View File

@@ -184,8 +184,11 @@ func TestMessagesPrepareAssistantMarkers(t *testing.T) {
if !strings.Contains(got, "<Assistant>") {
t.Fatalf("expected assistant marker, got %q", got)
}
if !strings.Contains(got, "<end▁of▁sentence>") {
t.Fatalf("expected end of sentence marker, got %q", got)
if strings.Contains(got, "<end▁of▁sentence>") {
t.Fatalf("did not expect end of sentence marker, got %q", got)
}
if strings.Contains(got, "<system_instructions>") {
t.Fatalf("did not expect legacy system marker, got %q", got)
}
}