Files
ds2api/internal/prompt/messages.go

113 lines
2.9 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package prompt
import (
"encoding/json"
"fmt"
"regexp"
"strings"
)
var markdownImagePattern = regexp.MustCompile(`!\[(.*?)\]\((.*?)\)`)
const (
systemMarker = "<System>"
userMarker = "<User>"
assistantMarker = "<Assistant>"
toolMarker = "<Tool>"
endSentenceMarker = "<end▁of▁sentence>"
endToolResultsMarker = "<end▁of▁toolresults>"
endInstructionsMarker = "<end▁of▁instructions>"
)
func MessagesPrepare(messages []map[string]any) string {
type block struct {
Role string
Text string
}
processed := make([]block, 0, len(messages))
for _, m := range messages {
role, _ := m["role"].(string)
text := NormalizeContent(m["content"])
processed = append(processed, block{Role: role, Text: text})
}
if len(processed) == 0 {
return ""
}
merged := make([]block, 0, len(processed))
for _, msg := range processed {
if len(merged) > 0 && merged[len(merged)-1].Role == msg.Role {
merged[len(merged)-1].Text += "\n\n" + msg.Text
continue
}
merged = append(merged, msg)
}
parts := make([]string, 0, len(merged))
for _, m := range merged {
switch m.Role {
case "assistant":
parts = append(parts, formatRoleBlock(assistantMarker, m.Text, endSentenceMarker))
case "tool":
if strings.TrimSpace(m.Text) != "" {
parts = append(parts, formatRoleBlock(toolMarker, m.Text, endToolResultsMarker))
}
case "system":
if text := strings.TrimSpace(m.Text); text != "" {
parts = append(parts, formatRoleBlock(systemMarker, text, endInstructionsMarker))
}
case "user":
parts = append(parts, formatRoleBlock(userMarker, m.Text, endSentenceMarker))
default:
if strings.TrimSpace(m.Text) != "" {
parts = append(parts, m.Text)
}
}
}
out := strings.Join(parts, "\n\n")
return markdownImagePattern.ReplaceAllString(out, `[${1}](${2})`)
}
// DeepSeek-style turn suffixes stay attached to the same block as the role content.
func formatRoleBlock(marker, text, endMarker string) string {
out := marker + "\n" + text
if strings.TrimSpace(endMarker) != "" {
out += endMarker
}
return out
}
func NormalizeContent(v any) string {
if v == nil {
return ""
}
switch x := v.(type) {
case string:
return x
case []any:
parts := make([]string, 0, len(x))
for _, item := range x {
m, ok := item.(map[string]any)
if !ok {
continue
}
typeStr, _ := m["type"].(string)
typeStr = strings.ToLower(strings.TrimSpace(typeStr))
if typeStr == "text" || typeStr == "output_text" || typeStr == "input_text" {
if txt, ok := m["text"].(string); ok && txt != "" {
parts = append(parts, txt)
continue
}
if txt, ok := m["content"].(string); ok && txt != "" {
parts = append(parts, txt)
}
}
}
return strings.Join(parts, "\n")
default:
b, err := json.Marshal(v)
if err != nil {
return fmt.Sprintf("%v", v)
}
return string(b)
}
}