Merge origin/dev into PR 406

This commit is contained in:
CJACK
2026-05-02 21:17:02 +08:00
17 changed files with 1304 additions and 1020 deletions

View File

@@ -6,6 +6,7 @@ import (
"strings"
openaifmt "ds2api/internal/format/openai"
"ds2api/internal/httpapi/openai/shared"
"ds2api/internal/sse"
streamengine "ds2api/internal/stream"
"ds2api/internal/toolstream"
@@ -34,15 +35,11 @@ type chatStreamRuntime struct {
toolCallsEmitted bool
toolCallsDoneEmitted bool
toolSieve toolstream.State
streamToolCallIDs map[int]string
streamToolNames map[int]string
rawThinking strings.Builder
thinking strings.Builder
toolDetectionThinking strings.Builder
rawText strings.Builder
text strings.Builder
responseMessageID int
toolSieve toolstream.State
streamToolCallIDs map[int]string
streamToolNames map[int]string
accumulator shared.StreamAccumulator
responseMessageID int
finalThinking string
finalText string
@@ -112,6 +109,11 @@ func newChatStreamRuntime(
emitEarlyToolDeltas: emitEarlyToolDeltas,
streamToolCallIDs: map[int]string{},
streamToolNames: map[int]string{},
accumulator: shared.StreamAccumulator{
ThinkingEnabled: thinkingEnabled,
SearchEnabled: searchEnabled,
StripReferenceMarkers: stripReferenceMarkers,
},
}
}
@@ -120,7 +122,13 @@ func (s *chatStreamRuntime) sendKeepAlive() {
return
}
_, _ = s.w.Write([]byte(": keep-alive\n\n"))
_ = s.rc.Flush()
s.sendChunk(openaifmt.BuildChatStreamChunk(
s.completionID,
s.created,
s.model,
[]map[string]any{},
nil,
))
}
func (s *chatStreamRuntime) sendChunk(v any) {
@@ -177,8 +185,8 @@ func (s *chatStreamRuntime) markContextCancelled() {
s.finalErrorStatus = 499
s.finalErrorMessage = "request context cancelled"
s.finalErrorCode = string(streamengine.StopReasonContextCancelled)
s.finalThinking = s.thinking.String()
s.finalText = cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
s.finalThinking = s.accumulator.Thinking.String()
s.finalText = cleanVisibleOutput(s.accumulator.Text.String(), s.stripReferenceMarkers)
s.finalFinishReason = string(streamengine.StopReasonContextCancelled)
}
@@ -191,12 +199,12 @@ func (s *chatStreamRuntime) finalize(finishReason string, deferEmptyOutput bool)
s.finalErrorStatus = 0
s.finalErrorMessage = ""
s.finalErrorCode = ""
finalThinking := s.thinking.String()
finalToolDetectionThinking := s.toolDetectionThinking.String()
finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
finalThinking := s.accumulator.Thinking.String()
finalToolDetectionThinking := s.accumulator.ToolDetectionThinking.String()
finalText := cleanVisibleOutput(s.accumulator.Text.String(), s.stripReferenceMarkers)
s.finalThinking = finalThinking
s.finalText = finalText
detected := detectAssistantToolCalls(s.rawText.String(), finalText, s.rawThinking.String(), finalToolDetectionThinking, s.toolNames)
detected := detectAssistantToolCalls(s.accumulator.RawText.String(), finalText, s.accumulator.RawThinking.String(), finalToolDetectionThinking, s.toolNames)
if len(detected.Calls) > 0 && !s.toolCallsDoneEmitted {
finishReason = "tool_calls"
s.sendDelta(map[string]any{
@@ -265,7 +273,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
s.responseMessageID = parsed.ResponseMessageID
}
if parsed.ContentFilter {
if strings.TrimSpace(s.text.String()) == "" {
if strings.TrimSpace(s.accumulator.Text.String()) == "" {
return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("content_filter")}
}
return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReasonHandlerRequested}
@@ -277,86 +285,65 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReasonHandlerRequested}
}
contentSeen := false
batch := chatDeltaBatch{runtime: s}
for _, p := range parsed.ToolDetectionThinkingParts {
trimmed := sse.TrimContinuationOverlapFromBuilder(&s.toolDetectionThinking, p.Text)
if trimmed != "" {
s.toolDetectionThinking.WriteString(trimmed)
}
}
for _, p := range parsed.Parts {
accumulated := s.accumulator.Apply(parsed)
for _, p := range accumulated.Parts {
if p.Type == "thinking" {
s.rawThinking.WriteString(p.Text)
contentSeen = true
if s.thinkingEnabled {
cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
if cleanedText == "" {
continue
}
s.thinking.WriteString(cleanedText)
batch.append("reasoning_content", cleanedText)
}
batch.append("reasoning_content", p.VisibleText)
continue
}
if p.RawText == "" {
continue
}
if p.CitationOnly {
continue
}
if !s.bufferToolContent {
batch.append("content", p.VisibleText)
} else {
s.rawText.WriteString(p.Text)
contentSeen = true
cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
if s.searchEnabled && sse.IsCitation(cleanedText) {
continue
}
if cleanedText != "" {
s.text.WriteString(cleanedText)
}
if !s.bufferToolContent {
if cleanedText == "" {
events := toolstream.ProcessChunk(&s.toolSieve, p.RawText, s.toolNames)
for _, evt := range events {
if len(evt.ToolCallDeltas) > 0 {
if !s.emitEarlyToolDeltas {
continue
}
filtered := filterIncrementalToolCallDeltasByAllowed(evt.ToolCallDeltas, s.streamToolNames)
if len(filtered) == 0 {
continue
}
formatted := formatIncrementalStreamToolCallDeltas(filtered, s.streamToolCallIDs)
if len(formatted) == 0 {
continue
}
batch.flush()
tcDelta := map[string]any{
"tool_calls": formatted,
}
s.toolCallsEmitted = true
s.sendDelta(tcDelta)
continue
}
batch.append("content", cleanedText)
} else {
events := toolstream.ProcessChunk(&s.toolSieve, p.Text, s.toolNames)
for _, evt := range events {
if len(evt.ToolCallDeltas) > 0 {
if !s.emitEarlyToolDeltas {
continue
}
filtered := filterIncrementalToolCallDeltasByAllowed(evt.ToolCallDeltas, s.streamToolNames)
if len(filtered) == 0 {
continue
}
formatted := formatIncrementalStreamToolCallDeltas(filtered, s.streamToolCallIDs)
if len(formatted) == 0 {
continue
}
batch.flush()
tcDelta := map[string]any{
"tool_calls": formatted,
}
s.toolCallsEmitted = true
s.sendDelta(tcDelta)
if len(evt.ToolCalls) > 0 {
batch.flush()
s.toolCallsEmitted = true
s.toolCallsDoneEmitted = true
tcDelta := map[string]any{
"tool_calls": formatFinalStreamToolCallsWithStableIDs(evt.ToolCalls, s.streamToolCallIDs, s.toolsRaw),
}
s.sendDelta(tcDelta)
s.resetStreamToolCallState()
continue
}
if evt.Content != "" {
cleaned := cleanVisibleOutput(evt.Content, s.stripReferenceMarkers)
if cleaned == "" || (s.searchEnabled && sse.IsCitation(cleaned)) {
continue
}
if len(evt.ToolCalls) > 0 {
batch.flush()
s.toolCallsEmitted = true
s.toolCallsDoneEmitted = true
tcDelta := map[string]any{
"tool_calls": formatFinalStreamToolCallsWithStableIDs(evt.ToolCalls, s.streamToolCallIDs, s.toolsRaw),
}
s.sendDelta(tcDelta)
s.resetStreamToolCallState()
continue
}
if evt.Content != "" {
cleaned := cleanVisibleOutput(evt.Content, s.stripReferenceMarkers)
if cleaned == "" || (s.searchEnabled && sse.IsCitation(cleaned)) {
continue
}
batch.append("content", cleaned)
}
batch.append("content", cleaned)
}
}
}
}
batch.flush()
return streamengine.ParsedDecision{ContentSeen: contentSeen}
return streamengine.ParsedDecision{ContentSeen: accumulated.ContentSeen}
}

View File

@@ -0,0 +1,53 @@
package chat
import (
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
)
func TestChatStreamKeepAliveEmitsEmptyChoiceDataFrame(t *testing.T) {
rec := httptest.NewRecorder()
runtime := newChatStreamRuntime(
rec,
http.NewResponseController(rec),
true,
"chatcmpl-test",
time.Now().Unix(),
"deepseek-v4-flash",
"prompt",
false,
false,
true,
nil,
nil,
false,
false,
)
runtime.sendKeepAlive()
body := rec.Body.String()
if !strings.Contains(body, ": keep-alive\n\n") {
t.Fatalf("expected keep-alive comment, got %q", body)
}
frames, done := parseSSEDataFrames(t, body)
if done {
t.Fatalf("keep-alive must not emit [DONE], body=%q", body)
}
if len(frames) != 1 {
t.Fatalf("expected one data frame, got %d body=%q", len(frames), body)
}
if got := asString(frames[0]["id"]); got != "chatcmpl-test" {
t.Fatalf("expected completion id to be preserved, got %q", got)
}
if got := asString(frames[0]["object"]); got != "chat.completion.chunk" {
t.Fatalf("expected chat chunk object, got %q", got)
}
choices, _ := frames[0]["choices"].([]any)
if len(choices) != 0 {
t.Fatalf("expected empty choices heartbeat, got %#v", choices)
}
}

View File

@@ -238,7 +238,7 @@ func (h *Handler) consumeChatStreamAttempt(r *http.Request, resp *http.Response,
OnParsed: func(parsed sse.LineResult) streamengine.ParsedDecision {
decision := streamRuntime.onParsed(parsed)
if historySession != nil {
historySession.progress(streamRuntime.thinking.String(), streamRuntime.text.String())
historySession.progress(streamRuntime.accumulator.Thinking.String(), streamRuntime.accumulator.Text.String())
}
return decision
},
@@ -250,7 +250,7 @@ func (h *Handler) consumeChatStreamAttempt(r *http.Request, resp *http.Response,
OnContextDone: func() {
streamRuntime.markContextCancelled()
if historySession != nil {
historySession.stopped(streamRuntime.thinking.String(), streamRuntime.text.String(), string(streamengine.StopReasonContextCancelled))
historySession.stopped(streamRuntime.accumulator.Thinking.String(), streamRuntime.accumulator.Text.String(), string(streamengine.StopReasonContextCancelled))
}
},
})
@@ -270,7 +270,7 @@ func recordChatStreamHistory(streamRuntime *chatStreamRuntime, historySession *c
return
}
if streamRuntime.finalErrorMessage != "" {
historySession.error(streamRuntime.finalErrorStatus, streamRuntime.finalErrorMessage, streamRuntime.finalErrorCode, streamRuntime.thinking.String(), streamRuntime.text.String())
historySession.error(streamRuntime.finalErrorStatus, streamRuntime.finalErrorMessage, streamRuntime.finalErrorCode, streamRuntime.accumulator.Thinking.String(), streamRuntime.accumulator.Text.String())
return
}
historySession.success(http.StatusOK, streamRuntime.finalThinking, streamRuntime.finalText, streamRuntime.finalFinishReason, streamRuntime.finalUsage)
@@ -279,7 +279,7 @@ func recordChatStreamHistory(streamRuntime *chatStreamRuntime, historySession *c
func failChatStreamRetry(streamRuntime *chatStreamRuntime, historySession *chatHistorySession, status int, message, code string) {
streamRuntime.sendFailedChunk(status, message, code)
if historySession != nil {
historySession.error(status, message, code, streamRuntime.thinking.String(), streamRuntime.text.String())
historySession.error(status, message, code, streamRuntime.accumulator.Thinking.String(), streamRuntime.accumulator.Text.String())
}
}

View File

@@ -254,7 +254,7 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
OnParsed: func(parsed sse.LineResult) streamengine.ParsedDecision {
decision := streamRuntime.onParsed(parsed)
if historySession != nil {
historySession.progress(streamRuntime.thinking.String(), streamRuntime.text.String())
historySession.progress(streamRuntime.accumulator.Thinking.String(), streamRuntime.accumulator.Text.String())
}
return decision
},
@@ -268,14 +268,14 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
return
}
if streamRuntime.finalErrorMessage != "" {
historySession.error(streamRuntime.finalErrorStatus, streamRuntime.finalErrorMessage, streamRuntime.finalErrorCode, streamRuntime.thinking.String(), streamRuntime.text.String())
historySession.error(streamRuntime.finalErrorStatus, streamRuntime.finalErrorMessage, streamRuntime.finalErrorCode, streamRuntime.accumulator.Thinking.String(), streamRuntime.accumulator.Text.String())
return
}
historySession.success(http.StatusOK, streamRuntime.finalThinking, streamRuntime.finalText, streamRuntime.finalFinishReason, streamRuntime.finalUsage)
},
OnContextDone: func() {
if historySession != nil {
historySession.stopped(streamRuntime.thinking.String(), streamRuntime.text.String(), string(streamengine.StopReasonContextCancelled))
historySession.stopped(streamRuntime.accumulator.Thinking.String(), streamRuntime.accumulator.Text.String(), string(streamengine.StopReasonContextCancelled))
}
},
})