refactor: centralize assistant turn semantics and stream accumulation into new assistantturn and completionruntime packages

This commit is contained in:
CJACK
2026-05-02 23:28:43 +08:00
parent eccd8c957b
commit dc5bffdf89
24 changed files with 1215 additions and 254 deletions

View File

@@ -7,13 +7,14 @@ import (
"strings"
"time"
"ds2api/internal/assistantturn"
dsprotocol "ds2api/internal/deepseek/protocol"
"ds2api/internal/sse"
streamengine "ds2api/internal/stream"
)
//nolint:unused // retained for native Gemini stream handling path.
func (h *Handler) handleStreamGenerateContent(w http.ResponseWriter, r *http.Request, resp *http.Response, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string) {
func (h *Handler) handleStreamGenerateContent(w http.ResponseWriter, r *http.Request, resp *http.Response, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any) {
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
@@ -28,7 +29,7 @@ func (h *Handler) handleStreamGenerateContent(w http.ResponseWriter, r *http.Req
rc := http.NewResponseController(w)
_, canFlush := w.(http.Flusher)
runtime := newGeminiStreamRuntime(w, rc, canFlush, model, finalPrompt, thinkingEnabled, searchEnabled, h.compatStripReferenceMarkers(), toolNames)
runtime := newGeminiStreamRuntime(w, rc, canFlush, model, finalPrompt, thinkingEnabled, searchEnabled, h.compatStripReferenceMarkers(), toolNames, toolsRaw)
initialType := "text"
if thinkingEnabled {
@@ -64,9 +65,11 @@ type geminiStreamRuntime struct {
bufferContent bool
stripReferenceMarkers bool
toolNames []string
toolsRaw any
thinking strings.Builder
text strings.Builder
accumulator *assistantturn.Accumulator
contentFilter bool
responseMessageID int
}
//nolint:unused // retained for native Gemini stream handling path.
@@ -80,6 +83,7 @@ func newGeminiStreamRuntime(
searchEnabled bool,
stripReferenceMarkers bool,
toolNames []string,
toolsRaw any,
) *geminiStreamRuntime {
return &geminiStreamRuntime{
w: w,
@@ -92,6 +96,12 @@ func newGeminiStreamRuntime(
bufferContent: len(toolNames) > 0,
stripReferenceMarkers: stripReferenceMarkers,
toolNames: toolNames,
toolsRaw: toolsRaw,
accumulator: assistantturn.NewAccumulator(assistantturn.AccumulatorOptions{
ThinkingEnabled: thinkingEnabled,
SearchEnabled: searchEnabled,
StripReferenceMarkers: stripReferenceMarkers,
}),
}
}
@@ -111,32 +121,24 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
if !parsed.Parsed {
return streamengine.ParsedDecision{}
}
if parsed.ResponseMessageID > 0 {
s.responseMessageID = parsed.ResponseMessageID
}
if parsed.ContentFilter || parsed.ErrorMessage != "" || parsed.Stop {
if parsed.ContentFilter {
s.contentFilter = true
}
return streamengine.ParsedDecision{Stop: true}
}
contentSeen := false
for _, p := range parsed.Parts {
cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
if cleanedText == "" {
continue
}
if p.Type != "thinking" && s.searchEnabled && sse.IsCitation(cleanedText) {
continue
}
contentSeen = true
accumulated := s.accumulator.Apply(parsed)
for _, p := range accumulated.Parts {
if p.Type == "thinking" {
if s.thinkingEnabled {
if cleanedText != "" {
s.thinking.WriteString(cleanedText)
}
}
continue
}
if cleanedText == "" {
if p.RawText == "" || p.CitationOnly || p.VisibleText == "" {
continue
}
s.text.WriteString(cleanedText)
if s.bufferContent {
continue
}
@@ -146,23 +148,38 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
"index": 0,
"content": map[string]any{
"role": "model",
"parts": []map[string]any{{"text": cleanedText}},
"parts": []map[string]any{{"text": p.VisibleText}},
},
},
},
"modelVersion": s.model,
})
}
return streamengine.ParsedDecision{ContentSeen: contentSeen}
return streamengine.ParsedDecision{ContentSeen: accumulated.ContentSeen}
}
//nolint:unused // retained for native Gemini stream handling path.
func (s *geminiStreamRuntime) finalize() {
finalThinking := s.thinking.String()
finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
rawText, text, rawThinking, thinking, detectionThinking := s.accumulator.Snapshot()
turn := assistantturn.BuildTurnFromStreamSnapshot(assistantturn.StreamSnapshot{
RawText: rawText,
VisibleText: text,
RawThinking: rawThinking,
VisibleThinking: thinking,
DetectionThinking: detectionThinking,
ContentFilter: s.contentFilter,
ResponseMessageID: s.responseMessageID,
}, assistantturn.BuildOptions{
Model: s.model,
Prompt: s.finalPrompt,
SearchEnabled: s.searchEnabled,
StripReferenceMarkers: s.stripReferenceMarkers,
ToolNames: s.toolNames,
ToolsRaw: s.toolsRaw,
})
if s.bufferContent {
parts := buildGeminiPartsFromFinal(finalText, finalThinking, s.toolNames)
parts := buildGeminiPartsFromTurn(turn)
s.sendChunk(map[string]any{
"candidates": []map[string]any{
{
@@ -190,7 +207,11 @@ func (s *geminiStreamRuntime) finalize() {
"finishReason": "STOP",
},
},
"modelVersion": s.model,
"usageMetadata": buildGeminiUsage(s.model, s.finalPrompt, finalThinking, finalText),
"modelVersion": s.model,
"usageMetadata": map[string]any{
"promptTokenCount": turn.Usage.InputTokens,
"candidatesTokenCount": turn.Usage.OutputTokens,
"totalTokenCount": turn.Usage.TotalTokens,
},
})
}