refactor: unify empty-output retry logic into shared completionruntime package and normalize protocol adapter boundary.

This commit is contained in:
CJACK
2026-05-10 00:10:53 +08:00
parent 067cf465bb
commit 7c66742a19
32 changed files with 930 additions and 371 deletions

View File

@@ -145,7 +145,7 @@ func (h *Handler) handleClaudeDirectStream(w http.ResponseWriter, r *http.Reques
return
}
streamReq := start.Request
h.handleClaudeStreamRealtime(w, r, start.Response, streamReq.ResponseModel, streamReq.Messages, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, historySession)
h.handleClaudeStreamRealtimeWithRetry(w, r, a, start.Response, start.Payload, start.Pow, streamReq.ResponseModel, streamReq.Messages, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, streamReq.PromptTokenText, historySession)
}
func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, store ConfigReader) bool {
@@ -360,3 +360,112 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ
OnFinalize: streamRuntime.onFinalize,
})
}
func (h *Handler) handleClaudeStreamRealtimeWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, model string, messages []any, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, promptTokenText string, historySession *responsehistory.Session) {
if resp.StatusCode != http.StatusOK {
defer func() { _ = resp.Body.Close() }()
body, _ := io.ReadAll(resp.Body)
if historySession != nil {
historySession.Error(resp.StatusCode, strings.TrimSpace(string(body)), "error", "", "")
}
writeClaudeError(w, http.StatusInternalServerError, string(body))
return
}
w.Header().Set("Content-Type", "text/event-stream")
w.Header().Set("Cache-Control", "no-cache, no-transform")
w.Header().Set("Connection", "keep-alive")
w.Header().Set("X-Accel-Buffering", "no")
rc := http.NewResponseController(w)
_, canFlush := w.(http.Flusher)
if !canFlush {
config.Logger.Warn("[claude_stream] response writer does not support flush; streaming may be buffered")
}
streamRuntime := newClaudeStreamRuntime(
w,
rc,
canFlush,
model,
messages,
thinkingEnabled,
searchEnabled,
stripReferenceMarkersEnabled(),
toolNames,
toolsRaw,
promptTokenText,
historySession,
)
streamRuntime.sendMessageStart()
completionruntime.ExecuteStreamWithRetry(r.Context(), h.DS, a, resp, payload, pow, completionruntime.StreamRetryOptions{
Surface: "claude.messages",
Stream: true,
RetryEnabled: true,
MaxAttempts: 3,
UsagePrompt: promptTokenText,
}, completionruntime.StreamRetryHooks{
ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) {
return h.consumeClaudeStreamAttempt(r, currentResp, streamRuntime, thinkingEnabled, allowDeferEmpty)
},
Finalize: func(_ int) {
streamRuntime.finalize("end_turn", false)
},
ParentMessageID: func() int {
return streamRuntime.responseMessageID
},
OnRetryPrompt: func(prompt string) {
streamRuntime.promptTokenText = prompt
},
OnRetryFailure: func(status int, message, code string) {
streamRuntime.sendErrorWithCode(status, strings.TrimSpace(message), code)
},
})
}
func (h *Handler) consumeClaudeStreamAttempt(r *http.Request, resp *http.Response, streamRuntime *claudeStreamRuntime, thinkingEnabled bool, allowDeferEmpty bool) (bool, bool) {
defer func() { _ = resp.Body.Close() }()
initialType := "text"
if thinkingEnabled {
initialType = "thinking"
}
finalReason := streamengine.StopReason("")
var scannerErr error
streamengine.ConsumeSSE(streamengine.ConsumeConfig{
Context: r.Context(),
Body: resp.Body,
ThinkingEnabled: thinkingEnabled,
InitialType: initialType,
KeepAliveInterval: claudeStreamPingInterval,
IdleTimeout: claudeStreamIdleTimeout,
MaxKeepAliveNoInput: claudeStreamMaxKeepaliveCnt,
}, streamengine.ConsumeHooks{
OnKeepAlive: func() {
streamRuntime.sendPing()
},
OnParsed: streamRuntime.onParsed,
OnFinalize: func(reason streamengine.StopReason, err error) {
finalReason = reason
scannerErr = err
},
})
if string(finalReason) == "upstream_error" {
if streamRuntime.history != nil {
streamRuntime.history.Error(500, streamRuntime.upstreamErr, "upstream_error", responsehistory.ThinkingForArchive(streamRuntime.rawThinking.String(), streamRuntime.toolDetectionThinking.String(), streamRuntime.thinking.String()), responsehistory.TextForArchive(streamRuntime.rawText.String(), streamRuntime.text.String()))
}
streamRuntime.sendError(streamRuntime.upstreamErr)
return true, false
}
if scannerErr != nil {
if streamRuntime.history != nil {
streamRuntime.history.Error(500, scannerErr.Error(), "error", responsehistory.ThinkingForArchive(streamRuntime.rawThinking.String(), streamRuntime.toolDetectionThinking.String(), streamRuntime.thinking.String()), responsehistory.TextForArchive(streamRuntime.rawText.String(), streamRuntime.text.String()))
}
streamRuntime.sendError(scannerErr.Error())
return true, false
}
terminalWritten := streamRuntime.finalize("end_turn", allowDeferEmpty)
if terminalWritten {
return true, false
}
return false, true
}

View File

@@ -29,9 +29,10 @@ type claudeStreamRuntime struct {
bufferToolContent bool
stripReferenceMarkers bool
messageID string
thinking strings.Builder
text strings.Builder
messageID string
thinking strings.Builder
text strings.Builder
responseMessageID int
sieve toolstream.State
rawText strings.Builder
@@ -92,6 +93,9 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
s.upstreamErr = parsed.ErrorMessage
return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("upstream_error")}
}
if parsed.ResponseMessageID > 0 {
s.responseMessageID = parsed.ResponseMessageID
}
if parsed.Stop {
return streamengine.ParsedDecision{Stop: true}
}

View File

@@ -22,16 +22,27 @@ func (s *claudeStreamRuntime) send(event string, v any) {
}
func (s *claudeStreamRuntime) sendError(message string) {
s.sendErrorWithCode(500, message, "internal_error")
}
func (s *claudeStreamRuntime) sendErrorWithCode(status int, message, code string) {
msg := strings.TrimSpace(message)
if msg == "" {
msg = "upstream stream error"
}
if code == "" {
code = "internal_error"
}
errType := "api_error"
if status == 429 {
errType = "rate_limit_error"
}
s.send("error", map[string]any{
"type": "error",
"error": map[string]any{
"type": "api_error",
"type": errType,
"message": msg,
"code": "internal_error",
"code": code,
"param": nil,
},
})

View File

@@ -63,13 +63,10 @@ func (s *claudeStreamRuntime) sendToolUseBlock(idx int, tc toolcall.ParsedToolCa
})
}
func (s *claudeStreamRuntime) finalize(stopReason string) {
func (s *claudeStreamRuntime) finalize(stopReason string, deferEmptyOutput bool) bool {
if s.ended {
return
return true
}
s.ended = true
s.closeThinkingBlock()
if s.bufferToolContent {
for _, evt := range toolstream.Flush(&s.sieve, s.toolNames) {
@@ -123,6 +120,7 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
RawThinking: s.rawThinking.String(),
VisibleThinking: s.thinking.String(),
DetectionThinking: s.toolDetectionThinking.String(),
ResponseMessageID: s.responseMessageID,
AlreadyEmittedCalls: s.toolCallsDetected,
AlreadyEmittedToolRaw: s.toolCallsDetected,
}, assistantturn.BuildOptions{
@@ -137,6 +135,22 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{
AlreadyEmittedToolCalls: s.toolCallsDetected,
})
if outcome.ShouldFail {
if deferEmptyOutput {
return false
}
s.ended = true
s.closeThinkingBlock()
s.closeTextBlock()
if s.history != nil {
s.history.Error(outcome.Error.Status, outcome.Error.Message, outcome.Error.Code, responsehistory.ThinkingForArchive(turn.RawThinking, turn.DetectionThinking, turn.Thinking), responsehistory.TextForArchive(turn.RawText, turn.Text))
}
s.sendErrorWithCode(outcome.Error.Status, outcome.Error.Message, outcome.Error.Code)
return true
}
s.ended = true
s.closeThinkingBlock()
if s.bufferToolContent && !s.toolCallsDetected {
if len(turn.ToolCalls) > 0 {
@@ -197,6 +211,7 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
},
})
s.send("message_stop", map[string]any{"type": "message_stop"})
return true
}
func (s *claudeStreamRuntime) onFinalize(reason streamengine.StopReason, scannerErr error) {
@@ -214,5 +229,5 @@ func (s *claudeStreamRuntime) onFinalize(reason streamengine.StopReason, scanner
s.sendError(scannerErr.Error())
return
}
s.finalize("end_turn")
s.finalize("end_turn", false)
}

View File

@@ -137,7 +137,7 @@ func (h *Handler) handleGeminiDirectStream(w http.ResponseWriter, r *http.Reques
return
}
streamReq := start.Request
h.handleStreamGenerateContent(w, r, start.Response, streamReq.ResponseModel, streamReq.PromptTokenText, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, historySession)
h.handleStreamGenerateContentWithRetry(w, r, a, start.Response, start.Payload, start.Pow, streamReq.ResponseModel, streamReq.PromptTokenText, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, historySession)
}
func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, stream bool) bool {

View File

@@ -1,6 +1,7 @@
package gemini
import (
"context"
"encoding/json"
"io"
"net/http"
@@ -8,6 +9,8 @@ import (
"time"
"ds2api/internal/assistantturn"
"ds2api/internal/auth"
"ds2api/internal/completionruntime"
dsprotocol "ds2api/internal/deepseek/protocol"
"ds2api/internal/responsehistory"
"ds2api/internal/sse"
@@ -54,7 +57,7 @@ func (h *Handler) handleStreamGenerateContent(w http.ResponseWriter, r *http.Req
}, streamengine.ConsumeHooks{
OnParsed: runtime.onParsed,
OnFinalize: func(_ streamengine.StopReason, _ error) {
runtime.finalize()
runtime.finalize(false)
},
})
}
@@ -78,9 +81,83 @@ type geminiStreamRuntime struct {
accumulator *assistantturn.Accumulator
contentFilter bool
responseMessageID int
finalErrorStatus int
finalErrorMessage string
finalErrorCode string
history *responsehistory.Session
}
func (h *Handler) handleStreamGenerateContentWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, historySession *responsehistory.Session) {
if resp.StatusCode != http.StatusOK {
defer func() { _ = resp.Body.Close() }()
body, _ := io.ReadAll(resp.Body)
if historySession != nil {
historySession.Error(resp.StatusCode, strings.TrimSpace(string(body)), "error", "", "")
}
writeGeminiError(w, resp.StatusCode, strings.TrimSpace(string(body)))
return
}
w.Header().Set("Content-Type", "text/event-stream")
w.Header().Set("Cache-Control", "no-cache, no-transform")
w.Header().Set("Connection", "keep-alive")
w.Header().Set("X-Accel-Buffering", "no")
rc := http.NewResponseController(w)
_, canFlush := w.(http.Flusher)
runtime := newGeminiStreamRuntime(w, rc, canFlush, model, finalPrompt, thinkingEnabled, searchEnabled, stripReferenceMarkersEnabled(), toolNames, toolsRaw, historySession)
completionruntime.ExecuteStreamWithRetry(r.Context(), h.DS, a, resp, payload, pow, completionruntime.StreamRetryOptions{
Surface: "gemini.generate_content",
Stream: true,
RetryEnabled: true,
MaxAttempts: 3,
UsagePrompt: finalPrompt,
}, completionruntime.StreamRetryHooks{
ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) {
return h.consumeGeminiStreamAttempt(r.Context(), currentResp, runtime, thinkingEnabled, allowDeferEmpty)
},
Finalize: func(_ int) {
runtime.finalize(false)
},
ParentMessageID: func() int {
return runtime.responseMessageID
},
OnRetryPrompt: func(prompt string) {
runtime.finalPrompt = prompt
},
OnRetryFailure: func(status int, message, _ string) {
runtime.sendErrorChunk(status, strings.TrimSpace(message))
},
})
}
func (h *Handler) consumeGeminiStreamAttempt(ctx context.Context, resp *http.Response, runtime *geminiStreamRuntime, thinkingEnabled bool, allowDeferEmpty bool) (bool, bool) {
defer func() { _ = resp.Body.Close() }()
initialType := "text"
if thinkingEnabled {
initialType = "thinking"
}
streamengine.ConsumeSSE(streamengine.ConsumeConfig{
Context: ctx,
Body: resp.Body,
ThinkingEnabled: thinkingEnabled,
InitialType: initialType,
KeepAliveInterval: time.Duration(dsprotocol.KeepAliveTimeout) * time.Second,
IdleTimeout: time.Duration(dsprotocol.StreamIdleTimeout) * time.Second,
MaxKeepAliveNoInput: dsprotocol.MaxKeepaliveCount,
}, streamengine.ConsumeHooks{
OnParsed: runtime.onParsed,
OnFinalize: func(_ streamengine.StopReason, _ error) {
},
})
terminalWritten := runtime.finalize(allowDeferEmpty)
if terminalWritten {
return true, false
}
return false, true
}
//nolint:unused // retained for native Gemini stream handling path.
func newGeminiStreamRuntime(
w http.ResponseWriter,
@@ -127,6 +204,35 @@ func (s *geminiStreamRuntime) sendChunk(payload map[string]any) {
}
}
func (s *geminiStreamRuntime) sendErrorChunk(status int, message string) {
msg := strings.TrimSpace(message)
if msg == "" {
msg = http.StatusText(status)
}
errorStatus := "INVALID_ARGUMENT"
switch status {
case http.StatusUnauthorized:
errorStatus = "UNAUTHENTICATED"
case http.StatusForbidden:
errorStatus = "PERMISSION_DENIED"
case http.StatusTooManyRequests:
errorStatus = "RESOURCE_EXHAUSTED"
case http.StatusNotFound:
errorStatus = "NOT_FOUND"
default:
if status >= 500 {
errorStatus = "INTERNAL"
}
}
s.sendChunk(map[string]any{
"error": map[string]any{
"code": status,
"message": msg,
"status": errorStatus,
},
})
}
//nolint:unused // retained for native Gemini stream handling path.
func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedDecision {
if !parsed.Parsed {
@@ -192,7 +298,7 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
}
//nolint:unused // retained for native Gemini stream handling path.
func (s *geminiStreamRuntime) finalize() {
func (s *geminiStreamRuntime) finalize(deferEmptyOutput bool) bool {
rawText, text, rawThinking, thinking, detectionThinking := s.accumulator.Snapshot()
turn := assistantturn.BuildTurnFromStreamSnapshot(assistantturn.StreamSnapshot{
RawText: rawText,
@@ -211,6 +317,19 @@ func (s *geminiStreamRuntime) finalize() {
ToolsRaw: s.toolsRaw,
})
outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{})
if outcome.ShouldFail {
if deferEmptyOutput {
s.finalErrorStatus = outcome.Error.Status
s.finalErrorMessage = outcome.Error.Message
s.finalErrorCode = outcome.Error.Code
return false
}
if s.history != nil {
s.history.Error(outcome.Error.Status, outcome.Error.Message, outcome.Error.Code, responsehistory.ThinkingForArchive(turn.RawThinking, turn.DetectionThinking, turn.Thinking), responsehistory.TextForArchive(turn.RawText, turn.Text))
}
s.sendErrorChunk(outcome.Error.Status, outcome.Error.Message)
return true
}
if s.history != nil {
s.history.Success(
http.StatusOK,
@@ -257,4 +376,5 @@ func (s *geminiStreamRuntime) finalize() {
"totalTokenCount": outcome.Usage.TotalTokens,
},
})
return true
}

View File

@@ -4,11 +4,11 @@ import (
"context"
"io"
"net/http"
"strings"
"time"
"ds2api/internal/assistantturn"
"ds2api/internal/auth"
"ds2api/internal/completionruntime"
"ds2api/internal/config"
dsprotocol "ds2api/internal/deepseek/protocol"
openaifmt "ds2api/internal/format/openai"
@@ -17,148 +17,53 @@ import (
streamengine "ds2api/internal/stream"
)
type chatNonStreamResult struct {
rawThinking string
rawText string
thinking string
toolDetectionThinking string
text string
contentFilter bool
detectedCalls int
body map[string]any
finishReason string
responseMessageID int
outputError *assistantturn.OutputError
}
func (r chatNonStreamResult) historyText() string {
return historyTextForArchive(r.rawText, r.text)
}
func (r chatNonStreamResult) historyThinking() string {
return historyThinkingForArchive(r.rawThinking, r.toolDetectionThinking, r.thinking)
}
func (h *Handler) handleNonStreamWithRetry(w http.ResponseWriter, ctx context.Context, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, historySession *chatHistorySession) {
attempts := 0
currentResp := resp
usagePrompt := finalPrompt
accumulatedThinking := ""
accumulatedRawThinking := ""
accumulatedToolDetectionThinking := ""
for {
result, ok := h.collectChatNonStreamAttempt(w, currentResp, completionID, model, usagePrompt, thinkingEnabled, searchEnabled, toolNames, toolsRaw)
if !ok {
return
}
accumulatedThinking += sse.TrimContinuationOverlap(accumulatedThinking, result.thinking)
accumulatedRawThinking += sse.TrimContinuationOverlap(accumulatedRawThinking, result.rawThinking)
accumulatedToolDetectionThinking += sse.TrimContinuationOverlap(accumulatedToolDetectionThinking, result.toolDetectionThinking)
result.thinking = accumulatedThinking
result.rawThinking = accumulatedRawThinking
result.toolDetectionThinking = accumulatedToolDetectionThinking
detected := detectAssistantToolCalls(result.rawText, result.text, result.rawThinking, result.toolDetectionThinking, toolNames)
result.detectedCalls = len(detected.Calls)
result.body = openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, result.thinking, result.text, detected.Calls, toolsRaw)
addRefFileTokensToUsage(result.body, refFileTokens)
result.finishReason = chatFinishReason(result.body)
if !shouldRetryChatNonStream(result, attempts) {
h.finishChatNonStreamResult(w, result, attempts, usagePrompt, refFileTokens, historySession)
return
}
attempts++
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "parent_message_id", result.responseMessageID)
retryPow, powErr := h.DS.GetPow(ctx, a, 3)
if powErr != nil {
config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "error", powErr)
retryPow = pow
}
retryPayload := clonePayloadForEmptyOutputRetry(payload, result.responseMessageID)
nextResp, err := h.DS.CallCompletion(ctx, a, retryPayload, retryPow, 3)
if err != nil {
if historySession != nil {
historySession.error(http.StatusInternalServerError, "Failed to get completion.", "error", result.historyThinking(), result.historyText())
}
writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.")
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "error", err)
return
}
usagePrompt = usagePromptWithEmptyOutputRetry(usagePrompt, attempts)
currentResp = nextResp
}
}
func (h *Handler) collectChatNonStreamAttempt(w http.ResponseWriter, resp *http.Response, completionID, model, usagePrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any) (chatNonStreamResult, bool) {
if resp.StatusCode != http.StatusOK {
defer func() { _ = resp.Body.Close() }()
body, _ := io.ReadAll(resp.Body)
writeOpenAIError(w, resp.StatusCode, string(body))
return chatNonStreamResult{}, false
}
result := sse.CollectStream(resp, thinkingEnabled, true)
turn := assistantturn.BuildTurnFromCollected(result, assistantturn.BuildOptions{
Model: model,
Prompt: usagePrompt,
SearchEnabled: searchEnabled,
ToolNames: toolNames,
ToolsRaw: toolsRaw,
})
respBody := openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, turn.Thinking, turn.Text, turn.ToolCalls, toolsRaw)
return chatNonStreamResult{
rawThinking: result.Thinking,
rawText: result.Text,
thinking: turn.Thinking,
toolDetectionThinking: result.ToolDetectionThinking,
text: turn.Text,
contentFilter: result.ContentFilter,
detectedCalls: len(turn.ToolCalls),
body: respBody,
finishReason: chatFinishReason(respBody),
responseMessageID: result.ResponseMessageID,
outputError: turn.Error,
}, true
}
func (h *Handler) finishChatNonStreamResult(w http.ResponseWriter, result chatNonStreamResult, attempts int, usagePrompt string, refFileTokens int, historySession *chatHistorySession) {
if result.detectedCalls == 0 && strings.TrimSpace(result.text) == "" {
status, message, code := upstreamEmptyOutputDetail(result.contentFilter, result.text, result.thinking)
if result.outputError != nil {
status, message, code = result.outputError.Status, result.outputError.Message, result.outputError.Code
}
if historySession != nil {
historySession.error(status, message, code, result.historyThinking(), result.historyText())
historySession.error(resp.StatusCode, string(body), "error", "", "")
}
writeOpenAIErrorWithCode(w, status, message, code)
config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "chat.completions", "stream", false, "retry_attempts", attempts, "success_source", "none", "content_filter", result.contentFilter)
writeOpenAIError(w, resp.StatusCode, string(body))
return
}
if historySession != nil {
historySession.success(http.StatusOK, result.historyThinking(), result.historyText(), result.finishReason, openaifmt.BuildChatUsageForModel("", usagePrompt, result.thinking, result.text, refFileTokens))
stdReq := promptcompat.StandardRequest{
Surface: "chat.completions",
ResponseModel: model,
PromptTokenText: finalPrompt,
FinalPrompt: finalPrompt,
RefFileTokens: refFileTokens,
Thinking: thinkingEnabled,
Search: searchEnabled,
ToolNames: toolNames,
ToolsRaw: toolsRaw,
ToolChoice: promptcompat.DefaultToolChoicePolicy(),
}
writeJSON(w, http.StatusOK, result.body)
source := "first_attempt"
if attempts > 0 {
source = "synthetic_retry"
}
config.Logger.Info("[openai_empty_retry] completed", "surface", "chat.completions", "stream", false, "retry_attempts", attempts, "success_source", source)
}
func chatFinishReason(respBody map[string]any) string {
if choices, ok := respBody["choices"].([]map[string]any); ok && len(choices) > 0 {
if fr, _ := choices[0]["finish_reason"].(string); strings.TrimSpace(fr) != "" {
return fr
retryEnabled := h != nil && h.DS != nil && emptyOutputRetryEnabled()
result, outErr := completionruntime.ExecuteNonStreamStartedWithRetry(ctx, h.DS, a, completionruntime.StartResult{
SessionID: completionID,
Payload: payload,
Pow: pow,
Response: resp,
Request: stdReq,
}, completionruntime.Options{
RetryEnabled: retryEnabled,
RetryMaxAttempts: emptyOutputRetryMaxAttempts(),
})
if outErr != nil {
if historySession != nil {
historySession.error(outErr.Status, outErr.Message, outErr.Code, historyThinkingForArchive(result.Turn.RawThinking, result.Turn.DetectionThinking, result.Turn.Thinking), historyTextForArchive(result.Turn.RawText, result.Turn.Text))
}
writeOpenAIErrorWithCode(w, outErr.Status, outErr.Message, outErr.Code)
return
}
return "stop"
}
func shouldRetryChatNonStream(result chatNonStreamResult, attempts int) bool {
return emptyOutputRetryEnabled() &&
attempts < emptyOutputRetryMaxAttempts() &&
!result.contentFilter &&
result.detectedCalls == 0 &&
strings.TrimSpace(result.text) == ""
respBody := openaifmt.BuildChatCompletionWithToolCalls(result.SessionID, model, result.Turn.Prompt, result.Turn.Thinking, result.Turn.Text, result.Turn.ToolCalls, toolsRaw)
respBody["usage"] = assistantturn.OpenAIChatUsage(result.Turn)
outcome := assistantturn.FinalizeTurn(result.Turn, assistantturn.FinalizeOptions{})
if historySession != nil {
historySession.success(http.StatusOK, historyThinkingForArchive(result.Turn.RawThinking, result.Turn.DetectionThinking, result.Turn.Thinking), historyTextForArchive(result.Turn.RawText, result.Turn.Text), outcome.FinishReason, assistantturn.OpenAIChatUsage(result.Turn))
}
writeJSON(w, http.StatusOK, respBody)
}
func (h *Handler) handleStreamWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, historySession *chatHistorySession) {
@@ -166,42 +71,35 @@ func (h *Handler) handleStreamWithRetry(w http.ResponseWriter, r *http.Request,
if !ok {
return
}
attempts := 0
currentResp := resp
for {
terminalWritten, retryable := h.consumeChatStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, historySession, attempts < emptyOutputRetryMaxAttempts())
if terminalWritten {
logChatStreamTerminal(streamRuntime, attempts)
return
}
if !retryable || !emptyOutputRetryEnabled() || attempts >= emptyOutputRetryMaxAttempts() {
completionruntime.ExecuteStreamWithRetry(r.Context(), h.DS, a, resp, payload, pow, completionruntime.StreamRetryOptions{
Surface: "chat.completions",
Stream: true,
RetryEnabled: emptyOutputRetryEnabled(),
RetryMaxAttempts: emptyOutputRetryMaxAttempts(),
MaxAttempts: 3,
UsagePrompt: finalPrompt,
}, completionruntime.StreamRetryHooks{
ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) {
return h.consumeChatStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, historySession, allowDeferEmpty)
},
Finalize: func(attempts int) {
streamRuntime.finalize("stop", false)
recordChatStreamHistory(streamRuntime, historySession)
config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "chat.completions", "stream", true, "retry_attempts", attempts, "success_source", "none")
return
}
attempts++
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "parent_message_id", streamRuntime.responseMessageID)
retryPow, powErr := h.DS.GetPow(r.Context(), a, 3)
if powErr != nil {
config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "error", powErr)
retryPow = pow
}
nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadForEmptyOutputRetry(payload, streamRuntime.responseMessageID), retryPow, 3)
if err != nil {
failChatStreamRetry(streamRuntime, historySession, http.StatusInternalServerError, "Failed to get completion.", "error")
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "error", err)
return
}
if nextResp.StatusCode != http.StatusOK {
defer func() { _ = nextResp.Body.Close() }()
body, _ := io.ReadAll(nextResp.Body)
failChatStreamRetry(streamRuntime, historySession, nextResp.StatusCode, string(body), "error")
return
}
streamRuntime.finalPrompt = usagePromptWithEmptyOutputRetry(finalPrompt, attempts)
currentResp = nextResp
}
},
ParentMessageID: func() int {
return streamRuntime.responseMessageID
},
OnRetryPrompt: func(prompt string) {
streamRuntime.finalPrompt = prompt
},
OnRetryFailure: func(status int, message, code string) {
failChatStreamRetry(streamRuntime, historySession, status, message, code)
},
OnTerminal: func(attempts int) {
logChatStreamTerminal(streamRuntime, attempts)
},
})
}
func (h *Handler) prepareChatStreamRuntime(w http.ResponseWriter, resp *http.Response, completionID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, historySession *chatHistorySession) (*chatStreamRuntime, string, bool) {

View File

@@ -106,10 +106,6 @@ func cleanVisibleOutput(text string, stripReferenceMarkers bool) string {
return shared.CleanVisibleOutput(text, stripReferenceMarkers)
}
func upstreamEmptyOutputDetail(contentFilter bool, text, thinking string) (int, string, string) {
return shared.UpstreamEmptyOutputDetail(contentFilter, text, thinking)
}
func emptyOutputRetryEnabled() bool {
return shared.EmptyOutputRetryEnabled()
}
@@ -118,14 +114,6 @@ func emptyOutputRetryMaxAttempts() int {
return shared.EmptyOutputRetryMaxAttempts()
}
func clonePayloadForEmptyOutputRetry(payload map[string]any, parentMessageID int) map[string]any {
return shared.ClonePayloadForEmptyOutputRetry(payload, parentMessageID)
}
func usagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string {
return shared.UsagePromptWithEmptyOutputRetry(originalPrompt, retryAttempts)
}
func formatIncrementalStreamToolCallDeltas(deltas []toolstream.ToolCallDelta, ids map[int]string) []map[string]any {
return shared.FormatIncrementalStreamToolCallDeltas(deltas, ids)
}
@@ -137,7 +125,3 @@ func filterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta,
func formatFinalStreamToolCallsWithStableIDs(calls []toolcall.ParsedToolCall, ids map[int]string, toolsRaw any) []map[string]any {
return shared.FormatFinalStreamToolCallsWithStableIDs(calls, ids, toolsRaw)
}
func detectAssistantToolCalls(rawText, visibleText, exposedThinking, detectionThinking string, toolNames []string) toolcall.ToolCallParseResult {
return shared.DetectAssistantToolCalls(rawText, visibleText, exposedThinking, detectionThinking, toolNames)
}

View File

@@ -85,8 +85,7 @@ func streamFinishReason(frames []map[string]any) string {
return ""
}
// Backward-compatible alias for historical test name used in CI logs.
func TestHandleNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T) {
func TestHandleNonStreamSingleAttemptReturns503WhenUpstreamOutputEmpty(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
`data: {"p":"response/content","v":""}`,
@@ -95,17 +94,17 @@ func TestHandleNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T) {
rec := httptest.NewRecorder()
h.handleNonStream(rec, resp, "cid-empty", "deepseek-v4-flash", "prompt", 0, false, false, nil, nil, nil)
if rec.Code != http.StatusTooManyRequests {
t.Fatalf("expected status 429 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
if rec.Code != http.StatusServiceUnavailable {
t.Fatalf("expected status 503 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
}
out := decodeJSONBody(t, rec.Body.String())
errObj, _ := out["error"].(map[string]any)
if asString(errObj["code"]) != "upstream_empty_output" {
t.Fatalf("expected code=upstream_empty_output, got %#v", out)
if asString(errObj["code"]) != "upstream_unavailable" {
t.Fatalf("expected code=upstream_unavailable, got %#v", out)
}
}
func TestHandleNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) {
func TestHandleNonStreamSingleAttemptReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
`data: {"code":"content_filter"}`,
@@ -124,7 +123,7 @@ func TestHandleNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutp
}
}
func TestHandleNonStreamReturns429WhenUpstreamHasOnlyThinking(t *testing.T) {
func TestHandleNonStreamSingleAttemptReturns429WhenUpstreamHasOnlyThinking(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
`data: {"p":"response/thinking_content","v":"Only thinking"}`,

View File

@@ -1,26 +0,0 @@
package chat
// addRefFileTokensToUsage adds inline-uploaded file token estimates to an existing
// usage map inside a response object. This keeps the token accounting aware of file
// content that the upstream model processes but that is not part of the prompt text.
func addRefFileTokensToUsage(obj map[string]any, refFileTokens int) {
if refFileTokens <= 0 || obj == nil {
return
}
usage, ok := obj["usage"].(map[string]any)
if !ok || usage == nil {
return
}
for _, key := range []string{"input_tokens", "prompt_tokens"} {
if v, ok := usage[key]; ok {
if n, ok := v.(int); ok {
usage[key] = n + refFileTokens
}
}
}
if v, ok := usage["total_tokens"]; ok {
if n, ok := v.(int); ok {
usage["total_tokens"] = n + refFileTokens
}
}
}

View File

@@ -7,6 +7,7 @@ import (
"time"
"ds2api/internal/auth"
"ds2api/internal/completionruntime"
"ds2api/internal/config"
dsprotocol "ds2api/internal/deepseek/protocol"
"ds2api/internal/promptcompat"
@@ -19,41 +20,34 @@ func (h *Handler) handleResponsesStreamWithRetry(w http.ResponseWriter, r *http.
if !ok {
return
}
attempts := 0
currentResp := resp
for {
terminalWritten, retryable := h.consumeResponsesStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, attempts < emptyOutputRetryMaxAttempts())
if terminalWritten {
logResponsesStreamTerminal(streamRuntime, attempts)
return
}
if !retryable || !emptyOutputRetryEnabled() || attempts >= emptyOutputRetryMaxAttempts() {
completionruntime.ExecuteStreamWithRetry(r.Context(), h.DS, a, resp, payload, pow, completionruntime.StreamRetryOptions{
Surface: "responses",
Stream: true,
RetryEnabled: emptyOutputRetryEnabled(),
RetryMaxAttempts: emptyOutputRetryMaxAttempts(),
MaxAttempts: 3,
UsagePrompt: finalPrompt,
}, completionruntime.StreamRetryHooks{
ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) {
return h.consumeResponsesStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, allowDeferEmpty)
},
Finalize: func(attempts int) {
streamRuntime.finalize("stop", false)
config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "responses", "stream", true, "retry_attempts", attempts, "success_source", "none", "error_code", streamRuntime.finalErrorCode)
return
}
attempts++
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "responses", "stream", true, "retry_attempt", attempts, "parent_message_id", streamRuntime.responseMessageID)
retryPow, powErr := h.DS.GetPow(r.Context(), a, 3)
if powErr != nil {
config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "responses", "stream", true, "retry_attempt", attempts, "error", powErr)
retryPow = pow
}
nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadForEmptyOutputRetry(payload, streamRuntime.responseMessageID), retryPow, 3)
if err != nil {
streamRuntime.failResponse(http.StatusInternalServerError, "Failed to get completion.", "error")
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "responses", "stream", true, "retry_attempt", attempts, "error", err)
return
}
if nextResp.StatusCode != http.StatusOK {
defer func() { _ = nextResp.Body.Close() }()
body, _ := io.ReadAll(nextResp.Body)
streamRuntime.failResponse(nextResp.StatusCode, strings.TrimSpace(string(body)), "error")
return
}
streamRuntime.finalPrompt = usagePromptWithEmptyOutputRetry(finalPrompt, attempts)
currentResp = nextResp
}
},
ParentMessageID: func() int {
return streamRuntime.responseMessageID
},
OnRetryPrompt: func(prompt string) {
streamRuntime.finalPrompt = prompt
},
OnRetryFailure: func(status int, message, code string) {
streamRuntime.failResponse(status, strings.TrimSpace(message), code)
},
OnTerminal: func(attempts int) {
logResponsesStreamTerminal(streamRuntime, attempts)
},
})
}
func (h *Handler) prepareResponsesStreamRuntime(w http.ResponseWriter, resp *http.Response, owner, responseID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, traceID string, historySession *responsehistory.Session) (*responsesStreamRuntime, string, bool) {

View File

@@ -103,14 +103,6 @@ func emptyOutputRetryMaxAttempts() int {
return shared.EmptyOutputRetryMaxAttempts()
}
func clonePayloadForEmptyOutputRetry(payload map[string]any, parentMessageID int) map[string]any {
return shared.ClonePayloadForEmptyOutputRetry(payload, parentMessageID)
}
func usagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string {
return shared.UsagePromptWithEmptyOutputRetry(originalPrompt, retryAttempts)
}
func filterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta, seenNames map[int]string) []toolstream.ToolCallDelta {
return shared.FilterIncrementalToolCallDeltasByAllowed(deltas, seenNames)
}

View File

@@ -397,7 +397,7 @@ func TestHandleResponsesNonStreamRequiredToolChoiceIgnoresThinkingToolPayloadWhe
}
}
func TestHandleResponsesNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T) {
func TestHandleResponsesNonStreamSingleAttemptReturns503WhenUpstreamOutputEmpty(t *testing.T) {
h := &Handler{}
rec := httptest.NewRecorder()
resp := &http.Response{
@@ -409,17 +409,17 @@ func TestHandleResponsesNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T)
}
h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-v4-flash", "prompt", 0, false, false, nil, nil, promptcompat.DefaultToolChoicePolicy(), "")
if rec.Code != http.StatusTooManyRequests {
t.Fatalf("expected 429 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
if rec.Code != http.StatusServiceUnavailable {
t.Fatalf("expected 503 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
}
out := decodeJSONBody(t, rec.Body.String())
errObj, _ := out["error"].(map[string]any)
if asString(errObj["code"]) != "upstream_empty_output" {
t.Fatalf("expected code=upstream_empty_output, got %#v", out)
if asString(errObj["code"]) != "upstream_unavailable" {
t.Fatalf("expected code=upstream_unavailable, got %#v", out)
}
}
func TestHandleResponsesNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) {
func TestHandleResponsesNonStreamSingleAttemptReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) {
h := &Handler{}
rec := httptest.NewRecorder()
resp := &http.Response{
@@ -441,7 +441,7 @@ func TestHandleResponsesNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWi
}
}
func TestHandleResponsesNonStreamReturns429WhenUpstreamHasOnlyThinking(t *testing.T) {
func TestHandleResponsesNonStreamSingleAttemptReturns429WhenUpstreamHasOnlyThinking(t *testing.T) {
h := &Handler{}
rec := httptest.NewRecorder()
resp := &http.Response{

View File

@@ -17,7 +17,7 @@ func UpstreamEmptyOutputDetail(contentFilter bool, text, thinking string) (int,
if thinking != "" {
return http.StatusTooManyRequests, "Upstream account hit a rate limit and returned reasoning without visible output.", "upstream_empty_output"
}
return http.StatusTooManyRequests, "Upstream account hit a rate limit and returned empty output.", "upstream_empty_output"
return http.StatusServiceUnavailable, "Upstream service is unavailable and returned no output.", "upstream_unavailable"
}
func WriteUpstreamEmptyOutputError(w http.ResponseWriter, text, thinking string, contentFilter bool) bool {

View File

@@ -274,12 +274,12 @@ func TestChatCompletionsStreamEmitsFailureFrameWhenUpstreamOutputEmpty(t *testin
}
last := frames[0]
statusCode, ok := last["status_code"].(float64)
if !ok || int(statusCode) != http.StatusTooManyRequests {
t.Fatalf("expected status_code=429, got %#v body=%s", last["status_code"], rec.Body.String())
if !ok || int(statusCode) != http.StatusServiceUnavailable {
t.Fatalf("expected status_code=503, got %#v body=%s", last["status_code"], rec.Body.String())
}
errObj, _ := last["error"].(map[string]any)
if asString(errObj["code"]) != "upstream_empty_output" {
t.Fatalf("expected code=upstream_empty_output, got %#v", last)
if asString(errObj["code"]) != "upstream_unavailable" {
t.Fatalf("expected code=upstream_unavailable, got %#v", last)
}
}
@@ -345,7 +345,7 @@ func TestChatCompletionsStreamRetriesEmptyOutputOnSameSession(t *testing.T) {
func TestChatCompletionsNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
ds := &streamStatusDSSeqStub{resps: []*http.Response{
makeOpenAISSEHTTPResponse(`data: {"response_message_id":99}`, "data: [DONE]"),
makeOpenAISSEHTTPResponse(`data: {"response_message_id":99,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
}}
h := &openAITestSurface{
@@ -496,7 +496,7 @@ func TestResponsesStreamRetriesThinkingOnlyOutput(t *testing.T) {
func TestResponsesNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
ds := &streamStatusDSSeqStub{resps: []*http.Response{
makeOpenAISSEHTTPResponse(`data: {"response_message_id":88}`, "data: [DONE]"),
makeOpenAISSEHTTPResponse(`data: {"response_message_id":88,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
}}
h := &openAITestSurface{
@@ -537,8 +537,15 @@ func TestResponsesNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
if len(content) == 0 {
t.Fatalf("expected content entries, got %#v", item)
}
textEntry, _ := content[0].(map[string]any)
if asString(textEntry["type"]) != "output_text" || asString(textEntry["text"]) != "visible" {
var textEntry map[string]any
for _, entry := range content {
obj, _ := entry.(map[string]any)
if asString(obj["type"]) == "output_text" {
textEntry = obj
break
}
}
if asString(textEntry["text"]) != "visible" {
t.Fatalf("expected visible text entry, got %#v", content)
}
}