refactor: unify empty-output retry logic into shared completionruntime package and normalize protocol adapter boundary.

This commit is contained in:
CJACK
2026-05-10 00:10:53 +08:00
parent 067cf465bb
commit 7c66742a19
32 changed files with 930 additions and 371 deletions

View File

@@ -4,11 +4,11 @@ import (
"context"
"io"
"net/http"
"strings"
"time"
"ds2api/internal/assistantturn"
"ds2api/internal/auth"
"ds2api/internal/completionruntime"
"ds2api/internal/config"
dsprotocol "ds2api/internal/deepseek/protocol"
openaifmt "ds2api/internal/format/openai"
@@ -17,148 +17,53 @@ import (
streamengine "ds2api/internal/stream"
)
type chatNonStreamResult struct {
rawThinking string
rawText string
thinking string
toolDetectionThinking string
text string
contentFilter bool
detectedCalls int
body map[string]any
finishReason string
responseMessageID int
outputError *assistantturn.OutputError
}
func (r chatNonStreamResult) historyText() string {
return historyTextForArchive(r.rawText, r.text)
}
func (r chatNonStreamResult) historyThinking() string {
return historyThinkingForArchive(r.rawThinking, r.toolDetectionThinking, r.thinking)
}
func (h *Handler) handleNonStreamWithRetry(w http.ResponseWriter, ctx context.Context, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, historySession *chatHistorySession) {
attempts := 0
currentResp := resp
usagePrompt := finalPrompt
accumulatedThinking := ""
accumulatedRawThinking := ""
accumulatedToolDetectionThinking := ""
for {
result, ok := h.collectChatNonStreamAttempt(w, currentResp, completionID, model, usagePrompt, thinkingEnabled, searchEnabled, toolNames, toolsRaw)
if !ok {
return
}
accumulatedThinking += sse.TrimContinuationOverlap(accumulatedThinking, result.thinking)
accumulatedRawThinking += sse.TrimContinuationOverlap(accumulatedRawThinking, result.rawThinking)
accumulatedToolDetectionThinking += sse.TrimContinuationOverlap(accumulatedToolDetectionThinking, result.toolDetectionThinking)
result.thinking = accumulatedThinking
result.rawThinking = accumulatedRawThinking
result.toolDetectionThinking = accumulatedToolDetectionThinking
detected := detectAssistantToolCalls(result.rawText, result.text, result.rawThinking, result.toolDetectionThinking, toolNames)
result.detectedCalls = len(detected.Calls)
result.body = openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, result.thinking, result.text, detected.Calls, toolsRaw)
addRefFileTokensToUsage(result.body, refFileTokens)
result.finishReason = chatFinishReason(result.body)
if !shouldRetryChatNonStream(result, attempts) {
h.finishChatNonStreamResult(w, result, attempts, usagePrompt, refFileTokens, historySession)
return
}
attempts++
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "parent_message_id", result.responseMessageID)
retryPow, powErr := h.DS.GetPow(ctx, a, 3)
if powErr != nil {
config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "error", powErr)
retryPow = pow
}
retryPayload := clonePayloadForEmptyOutputRetry(payload, result.responseMessageID)
nextResp, err := h.DS.CallCompletion(ctx, a, retryPayload, retryPow, 3)
if err != nil {
if historySession != nil {
historySession.error(http.StatusInternalServerError, "Failed to get completion.", "error", result.historyThinking(), result.historyText())
}
writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.")
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "error", err)
return
}
usagePrompt = usagePromptWithEmptyOutputRetry(usagePrompt, attempts)
currentResp = nextResp
}
}
func (h *Handler) collectChatNonStreamAttempt(w http.ResponseWriter, resp *http.Response, completionID, model, usagePrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any) (chatNonStreamResult, bool) {
if resp.StatusCode != http.StatusOK {
defer func() { _ = resp.Body.Close() }()
body, _ := io.ReadAll(resp.Body)
writeOpenAIError(w, resp.StatusCode, string(body))
return chatNonStreamResult{}, false
}
result := sse.CollectStream(resp, thinkingEnabled, true)
turn := assistantturn.BuildTurnFromCollected(result, assistantturn.BuildOptions{
Model: model,
Prompt: usagePrompt,
SearchEnabled: searchEnabled,
ToolNames: toolNames,
ToolsRaw: toolsRaw,
})
respBody := openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, turn.Thinking, turn.Text, turn.ToolCalls, toolsRaw)
return chatNonStreamResult{
rawThinking: result.Thinking,
rawText: result.Text,
thinking: turn.Thinking,
toolDetectionThinking: result.ToolDetectionThinking,
text: turn.Text,
contentFilter: result.ContentFilter,
detectedCalls: len(turn.ToolCalls),
body: respBody,
finishReason: chatFinishReason(respBody),
responseMessageID: result.ResponseMessageID,
outputError: turn.Error,
}, true
}
func (h *Handler) finishChatNonStreamResult(w http.ResponseWriter, result chatNonStreamResult, attempts int, usagePrompt string, refFileTokens int, historySession *chatHistorySession) {
if result.detectedCalls == 0 && strings.TrimSpace(result.text) == "" {
status, message, code := upstreamEmptyOutputDetail(result.contentFilter, result.text, result.thinking)
if result.outputError != nil {
status, message, code = result.outputError.Status, result.outputError.Message, result.outputError.Code
}
if historySession != nil {
historySession.error(status, message, code, result.historyThinking(), result.historyText())
historySession.error(resp.StatusCode, string(body), "error", "", "")
}
writeOpenAIErrorWithCode(w, status, message, code)
config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "chat.completions", "stream", false, "retry_attempts", attempts, "success_source", "none", "content_filter", result.contentFilter)
writeOpenAIError(w, resp.StatusCode, string(body))
return
}
if historySession != nil {
historySession.success(http.StatusOK, result.historyThinking(), result.historyText(), result.finishReason, openaifmt.BuildChatUsageForModel("", usagePrompt, result.thinking, result.text, refFileTokens))
stdReq := promptcompat.StandardRequest{
Surface: "chat.completions",
ResponseModel: model,
PromptTokenText: finalPrompt,
FinalPrompt: finalPrompt,
RefFileTokens: refFileTokens,
Thinking: thinkingEnabled,
Search: searchEnabled,
ToolNames: toolNames,
ToolsRaw: toolsRaw,
ToolChoice: promptcompat.DefaultToolChoicePolicy(),
}
writeJSON(w, http.StatusOK, result.body)
source := "first_attempt"
if attempts > 0 {
source = "synthetic_retry"
}
config.Logger.Info("[openai_empty_retry] completed", "surface", "chat.completions", "stream", false, "retry_attempts", attempts, "success_source", source)
}
func chatFinishReason(respBody map[string]any) string {
if choices, ok := respBody["choices"].([]map[string]any); ok && len(choices) > 0 {
if fr, _ := choices[0]["finish_reason"].(string); strings.TrimSpace(fr) != "" {
return fr
retryEnabled := h != nil && h.DS != nil && emptyOutputRetryEnabled()
result, outErr := completionruntime.ExecuteNonStreamStartedWithRetry(ctx, h.DS, a, completionruntime.StartResult{
SessionID: completionID,
Payload: payload,
Pow: pow,
Response: resp,
Request: stdReq,
}, completionruntime.Options{
RetryEnabled: retryEnabled,
RetryMaxAttempts: emptyOutputRetryMaxAttempts(),
})
if outErr != nil {
if historySession != nil {
historySession.error(outErr.Status, outErr.Message, outErr.Code, historyThinkingForArchive(result.Turn.RawThinking, result.Turn.DetectionThinking, result.Turn.Thinking), historyTextForArchive(result.Turn.RawText, result.Turn.Text))
}
writeOpenAIErrorWithCode(w, outErr.Status, outErr.Message, outErr.Code)
return
}
return "stop"
}
func shouldRetryChatNonStream(result chatNonStreamResult, attempts int) bool {
return emptyOutputRetryEnabled() &&
attempts < emptyOutputRetryMaxAttempts() &&
!result.contentFilter &&
result.detectedCalls == 0 &&
strings.TrimSpace(result.text) == ""
respBody := openaifmt.BuildChatCompletionWithToolCalls(result.SessionID, model, result.Turn.Prompt, result.Turn.Thinking, result.Turn.Text, result.Turn.ToolCalls, toolsRaw)
respBody["usage"] = assistantturn.OpenAIChatUsage(result.Turn)
outcome := assistantturn.FinalizeTurn(result.Turn, assistantturn.FinalizeOptions{})
if historySession != nil {
historySession.success(http.StatusOK, historyThinkingForArchive(result.Turn.RawThinking, result.Turn.DetectionThinking, result.Turn.Thinking), historyTextForArchive(result.Turn.RawText, result.Turn.Text), outcome.FinishReason, assistantturn.OpenAIChatUsage(result.Turn))
}
writeJSON(w, http.StatusOK, respBody)
}
func (h *Handler) handleStreamWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, historySession *chatHistorySession) {
@@ -166,42 +71,35 @@ func (h *Handler) handleStreamWithRetry(w http.ResponseWriter, r *http.Request,
if !ok {
return
}
attempts := 0
currentResp := resp
for {
terminalWritten, retryable := h.consumeChatStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, historySession, attempts < emptyOutputRetryMaxAttempts())
if terminalWritten {
logChatStreamTerminal(streamRuntime, attempts)
return
}
if !retryable || !emptyOutputRetryEnabled() || attempts >= emptyOutputRetryMaxAttempts() {
completionruntime.ExecuteStreamWithRetry(r.Context(), h.DS, a, resp, payload, pow, completionruntime.StreamRetryOptions{
Surface: "chat.completions",
Stream: true,
RetryEnabled: emptyOutputRetryEnabled(),
RetryMaxAttempts: emptyOutputRetryMaxAttempts(),
MaxAttempts: 3,
UsagePrompt: finalPrompt,
}, completionruntime.StreamRetryHooks{
ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) {
return h.consumeChatStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, historySession, allowDeferEmpty)
},
Finalize: func(attempts int) {
streamRuntime.finalize("stop", false)
recordChatStreamHistory(streamRuntime, historySession)
config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "chat.completions", "stream", true, "retry_attempts", attempts, "success_source", "none")
return
}
attempts++
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "parent_message_id", streamRuntime.responseMessageID)
retryPow, powErr := h.DS.GetPow(r.Context(), a, 3)
if powErr != nil {
config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "error", powErr)
retryPow = pow
}
nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadForEmptyOutputRetry(payload, streamRuntime.responseMessageID), retryPow, 3)
if err != nil {
failChatStreamRetry(streamRuntime, historySession, http.StatusInternalServerError, "Failed to get completion.", "error")
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "error", err)
return
}
if nextResp.StatusCode != http.StatusOK {
defer func() { _ = nextResp.Body.Close() }()
body, _ := io.ReadAll(nextResp.Body)
failChatStreamRetry(streamRuntime, historySession, nextResp.StatusCode, string(body), "error")
return
}
streamRuntime.finalPrompt = usagePromptWithEmptyOutputRetry(finalPrompt, attempts)
currentResp = nextResp
}
},
ParentMessageID: func() int {
return streamRuntime.responseMessageID
},
OnRetryPrompt: func(prompt string) {
streamRuntime.finalPrompt = prompt
},
OnRetryFailure: func(status int, message, code string) {
failChatStreamRetry(streamRuntime, historySession, status, message, code)
},
OnTerminal: func(attempts int) {
logChatStreamTerminal(streamRuntime, attempts)
},
})
}
func (h *Handler) prepareChatStreamRuntime(w http.ResponseWriter, resp *http.Response, completionID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, historySession *chatHistorySession) (*chatStreamRuntime, string, bool) {

View File

@@ -106,10 +106,6 @@ func cleanVisibleOutput(text string, stripReferenceMarkers bool) string {
return shared.CleanVisibleOutput(text, stripReferenceMarkers)
}
func upstreamEmptyOutputDetail(contentFilter bool, text, thinking string) (int, string, string) {
return shared.UpstreamEmptyOutputDetail(contentFilter, text, thinking)
}
func emptyOutputRetryEnabled() bool {
return shared.EmptyOutputRetryEnabled()
}
@@ -118,14 +114,6 @@ func emptyOutputRetryMaxAttempts() int {
return shared.EmptyOutputRetryMaxAttempts()
}
func clonePayloadForEmptyOutputRetry(payload map[string]any, parentMessageID int) map[string]any {
return shared.ClonePayloadForEmptyOutputRetry(payload, parentMessageID)
}
func usagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string {
return shared.UsagePromptWithEmptyOutputRetry(originalPrompt, retryAttempts)
}
func formatIncrementalStreamToolCallDeltas(deltas []toolstream.ToolCallDelta, ids map[int]string) []map[string]any {
return shared.FormatIncrementalStreamToolCallDeltas(deltas, ids)
}
@@ -137,7 +125,3 @@ func filterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta,
func formatFinalStreamToolCallsWithStableIDs(calls []toolcall.ParsedToolCall, ids map[int]string, toolsRaw any) []map[string]any {
return shared.FormatFinalStreamToolCallsWithStableIDs(calls, ids, toolsRaw)
}
func detectAssistantToolCalls(rawText, visibleText, exposedThinking, detectionThinking string, toolNames []string) toolcall.ToolCallParseResult {
return shared.DetectAssistantToolCalls(rawText, visibleText, exposedThinking, detectionThinking, toolNames)
}

View File

@@ -85,8 +85,7 @@ func streamFinishReason(frames []map[string]any) string {
return ""
}
// Backward-compatible alias for historical test name used in CI logs.
func TestHandleNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T) {
func TestHandleNonStreamSingleAttemptReturns503WhenUpstreamOutputEmpty(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
`data: {"p":"response/content","v":""}`,
@@ -95,17 +94,17 @@ func TestHandleNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T) {
rec := httptest.NewRecorder()
h.handleNonStream(rec, resp, "cid-empty", "deepseek-v4-flash", "prompt", 0, false, false, nil, nil, nil)
if rec.Code != http.StatusTooManyRequests {
t.Fatalf("expected status 429 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
if rec.Code != http.StatusServiceUnavailable {
t.Fatalf("expected status 503 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
}
out := decodeJSONBody(t, rec.Body.String())
errObj, _ := out["error"].(map[string]any)
if asString(errObj["code"]) != "upstream_empty_output" {
t.Fatalf("expected code=upstream_empty_output, got %#v", out)
if asString(errObj["code"]) != "upstream_unavailable" {
t.Fatalf("expected code=upstream_unavailable, got %#v", out)
}
}
func TestHandleNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) {
func TestHandleNonStreamSingleAttemptReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
`data: {"code":"content_filter"}`,
@@ -124,7 +123,7 @@ func TestHandleNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutp
}
}
func TestHandleNonStreamReturns429WhenUpstreamHasOnlyThinking(t *testing.T) {
func TestHandleNonStreamSingleAttemptReturns429WhenUpstreamHasOnlyThinking(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
`data: {"p":"response/thinking_content","v":"Only thinking"}`,

View File

@@ -1,26 +0,0 @@
package chat
// addRefFileTokensToUsage adds inline-uploaded file token estimates to an existing
// usage map inside a response object. This keeps the token accounting aware of file
// content that the upstream model processes but that is not part of the prompt text.
func addRefFileTokensToUsage(obj map[string]any, refFileTokens int) {
if refFileTokens <= 0 || obj == nil {
return
}
usage, ok := obj["usage"].(map[string]any)
if !ok || usage == nil {
return
}
for _, key := range []string{"input_tokens", "prompt_tokens"} {
if v, ok := usage[key]; ok {
if n, ok := v.(int); ok {
usage[key] = n + refFileTokens
}
}
}
if v, ok := usage["total_tokens"]; ok {
if n, ok := v.(int); ok {
usage["total_tokens"] = n + refFileTokens
}
}
}

View File

@@ -7,6 +7,7 @@ import (
"time"
"ds2api/internal/auth"
"ds2api/internal/completionruntime"
"ds2api/internal/config"
dsprotocol "ds2api/internal/deepseek/protocol"
"ds2api/internal/promptcompat"
@@ -19,41 +20,34 @@ func (h *Handler) handleResponsesStreamWithRetry(w http.ResponseWriter, r *http.
if !ok {
return
}
attempts := 0
currentResp := resp
for {
terminalWritten, retryable := h.consumeResponsesStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, attempts < emptyOutputRetryMaxAttempts())
if terminalWritten {
logResponsesStreamTerminal(streamRuntime, attempts)
return
}
if !retryable || !emptyOutputRetryEnabled() || attempts >= emptyOutputRetryMaxAttempts() {
completionruntime.ExecuteStreamWithRetry(r.Context(), h.DS, a, resp, payload, pow, completionruntime.StreamRetryOptions{
Surface: "responses",
Stream: true,
RetryEnabled: emptyOutputRetryEnabled(),
RetryMaxAttempts: emptyOutputRetryMaxAttempts(),
MaxAttempts: 3,
UsagePrompt: finalPrompt,
}, completionruntime.StreamRetryHooks{
ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) {
return h.consumeResponsesStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, allowDeferEmpty)
},
Finalize: func(attempts int) {
streamRuntime.finalize("stop", false)
config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "responses", "stream", true, "retry_attempts", attempts, "success_source", "none", "error_code", streamRuntime.finalErrorCode)
return
}
attempts++
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "responses", "stream", true, "retry_attempt", attempts, "parent_message_id", streamRuntime.responseMessageID)
retryPow, powErr := h.DS.GetPow(r.Context(), a, 3)
if powErr != nil {
config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "responses", "stream", true, "retry_attempt", attempts, "error", powErr)
retryPow = pow
}
nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadForEmptyOutputRetry(payload, streamRuntime.responseMessageID), retryPow, 3)
if err != nil {
streamRuntime.failResponse(http.StatusInternalServerError, "Failed to get completion.", "error")
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "responses", "stream", true, "retry_attempt", attempts, "error", err)
return
}
if nextResp.StatusCode != http.StatusOK {
defer func() { _ = nextResp.Body.Close() }()
body, _ := io.ReadAll(nextResp.Body)
streamRuntime.failResponse(nextResp.StatusCode, strings.TrimSpace(string(body)), "error")
return
}
streamRuntime.finalPrompt = usagePromptWithEmptyOutputRetry(finalPrompt, attempts)
currentResp = nextResp
}
},
ParentMessageID: func() int {
return streamRuntime.responseMessageID
},
OnRetryPrompt: func(prompt string) {
streamRuntime.finalPrompt = prompt
},
OnRetryFailure: func(status int, message, code string) {
streamRuntime.failResponse(status, strings.TrimSpace(message), code)
},
OnTerminal: func(attempts int) {
logResponsesStreamTerminal(streamRuntime, attempts)
},
})
}
func (h *Handler) prepareResponsesStreamRuntime(w http.ResponseWriter, resp *http.Response, owner, responseID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, traceID string, historySession *responsehistory.Session) (*responsesStreamRuntime, string, bool) {

View File

@@ -103,14 +103,6 @@ func emptyOutputRetryMaxAttempts() int {
return shared.EmptyOutputRetryMaxAttempts()
}
func clonePayloadForEmptyOutputRetry(payload map[string]any, parentMessageID int) map[string]any {
return shared.ClonePayloadForEmptyOutputRetry(payload, parentMessageID)
}
func usagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string {
return shared.UsagePromptWithEmptyOutputRetry(originalPrompt, retryAttempts)
}
func filterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta, seenNames map[int]string) []toolstream.ToolCallDelta {
return shared.FilterIncrementalToolCallDeltasByAllowed(deltas, seenNames)
}

View File

@@ -397,7 +397,7 @@ func TestHandleResponsesNonStreamRequiredToolChoiceIgnoresThinkingToolPayloadWhe
}
}
func TestHandleResponsesNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T) {
func TestHandleResponsesNonStreamSingleAttemptReturns503WhenUpstreamOutputEmpty(t *testing.T) {
h := &Handler{}
rec := httptest.NewRecorder()
resp := &http.Response{
@@ -409,17 +409,17 @@ func TestHandleResponsesNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T)
}
h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-v4-flash", "prompt", 0, false, false, nil, nil, promptcompat.DefaultToolChoicePolicy(), "")
if rec.Code != http.StatusTooManyRequests {
t.Fatalf("expected 429 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
if rec.Code != http.StatusServiceUnavailable {
t.Fatalf("expected 503 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
}
out := decodeJSONBody(t, rec.Body.String())
errObj, _ := out["error"].(map[string]any)
if asString(errObj["code"]) != "upstream_empty_output" {
t.Fatalf("expected code=upstream_empty_output, got %#v", out)
if asString(errObj["code"]) != "upstream_unavailable" {
t.Fatalf("expected code=upstream_unavailable, got %#v", out)
}
}
func TestHandleResponsesNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) {
func TestHandleResponsesNonStreamSingleAttemptReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) {
h := &Handler{}
rec := httptest.NewRecorder()
resp := &http.Response{
@@ -441,7 +441,7 @@ func TestHandleResponsesNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWi
}
}
func TestHandleResponsesNonStreamReturns429WhenUpstreamHasOnlyThinking(t *testing.T) {
func TestHandleResponsesNonStreamSingleAttemptReturns429WhenUpstreamHasOnlyThinking(t *testing.T) {
h := &Handler{}
rec := httptest.NewRecorder()
resp := &http.Response{

View File

@@ -17,7 +17,7 @@ func UpstreamEmptyOutputDetail(contentFilter bool, text, thinking string) (int,
if thinking != "" {
return http.StatusTooManyRequests, "Upstream account hit a rate limit and returned reasoning without visible output.", "upstream_empty_output"
}
return http.StatusTooManyRequests, "Upstream account hit a rate limit and returned empty output.", "upstream_empty_output"
return http.StatusServiceUnavailable, "Upstream service is unavailable and returned no output.", "upstream_unavailable"
}
func WriteUpstreamEmptyOutputError(w http.ResponseWriter, text, thinking string, contentFilter bool) bool {

View File

@@ -274,12 +274,12 @@ func TestChatCompletionsStreamEmitsFailureFrameWhenUpstreamOutputEmpty(t *testin
}
last := frames[0]
statusCode, ok := last["status_code"].(float64)
if !ok || int(statusCode) != http.StatusTooManyRequests {
t.Fatalf("expected status_code=429, got %#v body=%s", last["status_code"], rec.Body.String())
if !ok || int(statusCode) != http.StatusServiceUnavailable {
t.Fatalf("expected status_code=503, got %#v body=%s", last["status_code"], rec.Body.String())
}
errObj, _ := last["error"].(map[string]any)
if asString(errObj["code"]) != "upstream_empty_output" {
t.Fatalf("expected code=upstream_empty_output, got %#v", last)
if asString(errObj["code"]) != "upstream_unavailable" {
t.Fatalf("expected code=upstream_unavailable, got %#v", last)
}
}
@@ -345,7 +345,7 @@ func TestChatCompletionsStreamRetriesEmptyOutputOnSameSession(t *testing.T) {
func TestChatCompletionsNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
ds := &streamStatusDSSeqStub{resps: []*http.Response{
makeOpenAISSEHTTPResponse(`data: {"response_message_id":99}`, "data: [DONE]"),
makeOpenAISSEHTTPResponse(`data: {"response_message_id":99,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
}}
h := &openAITestSurface{
@@ -496,7 +496,7 @@ func TestResponsesStreamRetriesThinkingOnlyOutput(t *testing.T) {
func TestResponsesNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
ds := &streamStatusDSSeqStub{resps: []*http.Response{
makeOpenAISSEHTTPResponse(`data: {"response_message_id":88}`, "data: [DONE]"),
makeOpenAISSEHTTPResponse(`data: {"response_message_id":88,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
}}
h := &openAITestSurface{
@@ -537,8 +537,15 @@ func TestResponsesNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
if len(content) == 0 {
t.Fatalf("expected content entries, got %#v", item)
}
textEntry, _ := content[0].(map[string]any)
if asString(textEntry["type"]) != "output_text" || asString(textEntry["text"]) != "visible" {
var textEntry map[string]any
for _, entry := range content {
obj, _ := entry.(map[string]any)
if asString(obj["type"]) == "output_text" {
textEntry = obj
break
}
}
if asString(textEntry["text"]) != "visible" {
t.Fatalf("expected visible text entry, got %#v", content)
}
}