mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-16 22:25:15 +08:00
feat: add empty-output retry and Vercel auto-continue support
- Auto-retry Chat/Responses streams once when upstream output is empty but not content-filtered, reusing session/token/PoW and appending a regeneration suffix to the prompt - Wire DeepSeek continue API into Vercel streams for multi-round thinking output exhaustion - Defer empty-output errors in stream finalizers to enable synthetic retry; only surface failure when the retry budget is exhausted - Track content_filter stops to avoid retry on filtered outputs - Add comprehensive tests for stream/non-stream retry, Responses retry, and content_filter no-retry - Update prompt-compatibility.md documentation Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -128,7 +128,10 @@ func (s *chatStreamRuntime) resetStreamToolCallState() {
|
||||
s.streamToolNames = map[int]string{}
|
||||
}
|
||||
|
||||
func (s *chatStreamRuntime) finalize(finishReason string) {
|
||||
func (s *chatStreamRuntime) finalize(finishReason string, deferEmptyOutput bool) bool {
|
||||
s.finalErrorStatus = 0
|
||||
s.finalErrorMessage = ""
|
||||
s.finalErrorCode = ""
|
||||
finalThinking := s.thinking.String()
|
||||
finalToolDetectionThinking := s.toolDetectionThinking.String()
|
||||
finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
|
||||
@@ -204,8 +207,14 @@ func (s *chatStreamRuntime) finalize(finishReason string) {
|
||||
}
|
||||
if len(detected.Calls) == 0 && !s.toolCallsEmitted && strings.TrimSpace(finalText) == "" {
|
||||
status, message, code := upstreamEmptyOutputDetail(finishReason == "content_filter", finalText, finalThinking)
|
||||
if deferEmptyOutput {
|
||||
s.finalErrorStatus = status
|
||||
s.finalErrorMessage = message
|
||||
s.finalErrorCode = code
|
||||
return false
|
||||
}
|
||||
s.sendFailedChunk(status, message, code)
|
||||
return
|
||||
return true
|
||||
}
|
||||
usage := openaifmt.BuildChatUsage(s.finalPrompt, finalThinking, finalText)
|
||||
s.finalFinishReason = finishReason
|
||||
@@ -218,6 +227,7 @@ func (s *chatStreamRuntime) finalize(finishReason string) {
|
||||
usage,
|
||||
))
|
||||
s.sendDone()
|
||||
return true
|
||||
}
|
||||
|
||||
func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedDecision {
|
||||
|
||||
271
internal/httpapi/openai/chat/empty_retry_runtime.go
Normal file
271
internal/httpapi/openai/chat/empty_retry_runtime.go
Normal file
@@ -0,0 +1,271 @@
|
||||
package chat
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"ds2api/internal/auth"
|
||||
"ds2api/internal/config"
|
||||
dsprotocol "ds2api/internal/deepseek/protocol"
|
||||
openaifmt "ds2api/internal/format/openai"
|
||||
"ds2api/internal/sse"
|
||||
streamengine "ds2api/internal/stream"
|
||||
)
|
||||
|
||||
type chatNonStreamResult struct {
|
||||
thinking string
|
||||
toolDetectionThinking string
|
||||
text string
|
||||
contentFilter bool
|
||||
detectedCalls int
|
||||
body map[string]any
|
||||
finishReason string
|
||||
}
|
||||
|
||||
func (h *Handler) handleNonStreamWithRetry(w http.ResponseWriter, ctx context.Context, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, historySession *chatHistorySession) {
|
||||
attempts := 0
|
||||
currentResp := resp
|
||||
usagePrompt := finalPrompt
|
||||
accumulatedThinking := ""
|
||||
accumulatedToolDetectionThinking := ""
|
||||
for {
|
||||
result, ok := h.collectChatNonStreamAttempt(w, currentResp, completionID, model, usagePrompt, thinkingEnabled, searchEnabled, toolNames)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
accumulatedThinking += sse.TrimContinuationOverlap(accumulatedThinking, result.thinking)
|
||||
accumulatedToolDetectionThinking += sse.TrimContinuationOverlap(accumulatedToolDetectionThinking, result.toolDetectionThinking)
|
||||
result.thinking = accumulatedThinking
|
||||
result.toolDetectionThinking = accumulatedToolDetectionThinking
|
||||
detected := detectAssistantToolCalls(result.text, result.thinking, result.toolDetectionThinking, toolNames)
|
||||
result.detectedCalls = len(detected.Calls)
|
||||
result.body = openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, result.thinking, result.text, detected.Calls)
|
||||
result.finishReason = chatFinishReason(result.body)
|
||||
if !shouldRetryChatNonStream(result, attempts) {
|
||||
h.finishChatNonStreamResult(w, result, attempts, usagePrompt, historySession)
|
||||
return
|
||||
}
|
||||
|
||||
attempts++
|
||||
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", false, "retry_attempt", attempts)
|
||||
retryPayload := clonePayloadWithEmptyOutputRetryPrompt(payload)
|
||||
nextResp, err := h.DS.CallCompletion(ctx, a, retryPayload, pow, 3)
|
||||
if err != nil {
|
||||
if historySession != nil {
|
||||
historySession.error(http.StatusInternalServerError, "Failed to get completion.", "error", result.thinking, result.text)
|
||||
}
|
||||
writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.")
|
||||
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "error", err)
|
||||
return
|
||||
}
|
||||
usagePrompt = usagePromptWithEmptyOutputRetry(finalPrompt, attempts)
|
||||
currentResp = nextResp
|
||||
}
|
||||
}
|
||||
|
||||
func (h *Handler) collectChatNonStreamAttempt(w http.ResponseWriter, resp *http.Response, completionID, model, usagePrompt string, thinkingEnabled, searchEnabled bool, toolNames []string) (chatNonStreamResult, bool) {
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
writeOpenAIError(w, resp.StatusCode, string(body))
|
||||
return chatNonStreamResult{}, false
|
||||
}
|
||||
result := sse.CollectStream(resp, thinkingEnabled, true)
|
||||
stripReferenceMarkers := h.compatStripReferenceMarkers()
|
||||
finalThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers)
|
||||
finalToolDetectionThinking := cleanVisibleOutput(result.ToolDetectionThinking, stripReferenceMarkers)
|
||||
finalText := cleanVisibleOutput(result.Text, stripReferenceMarkers)
|
||||
if searchEnabled {
|
||||
finalText = replaceCitationMarkersWithLinks(finalText, result.CitationLinks)
|
||||
}
|
||||
detected := detectAssistantToolCalls(finalText, finalThinking, finalToolDetectionThinking, toolNames)
|
||||
respBody := openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, finalThinking, finalText, detected.Calls)
|
||||
return chatNonStreamResult{
|
||||
thinking: finalThinking,
|
||||
toolDetectionThinking: finalToolDetectionThinking,
|
||||
text: finalText,
|
||||
contentFilter: result.ContentFilter,
|
||||
detectedCalls: len(detected.Calls),
|
||||
body: respBody,
|
||||
finishReason: chatFinishReason(respBody),
|
||||
}, true
|
||||
}
|
||||
|
||||
func (h *Handler) finishChatNonStreamResult(w http.ResponseWriter, result chatNonStreamResult, attempts int, usagePrompt string, historySession *chatHistorySession) {
|
||||
if result.detectedCalls == 0 && shouldWriteUpstreamEmptyOutputError(result.text) {
|
||||
status, message, code := upstreamEmptyOutputDetail(result.contentFilter, result.text, result.thinking)
|
||||
if historySession != nil {
|
||||
historySession.error(status, message, code, result.thinking, result.text)
|
||||
}
|
||||
writeUpstreamEmptyOutputError(w, result.text, result.thinking, result.contentFilter)
|
||||
config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "chat.completions", "stream", false, "retry_attempts", attempts, "success_source", "none", "content_filter", result.contentFilter)
|
||||
return
|
||||
}
|
||||
if historySession != nil {
|
||||
historySession.success(http.StatusOK, result.thinking, result.text, result.finishReason, openaifmt.BuildChatUsage(usagePrompt, result.thinking, result.text))
|
||||
}
|
||||
writeJSON(w, http.StatusOK, result.body)
|
||||
source := "first_attempt"
|
||||
if attempts > 0 {
|
||||
source = "synthetic_retry"
|
||||
}
|
||||
config.Logger.Info("[openai_empty_retry] completed", "surface", "chat.completions", "stream", false, "retry_attempts", attempts, "success_source", source)
|
||||
}
|
||||
|
||||
func chatFinishReason(respBody map[string]any) string {
|
||||
if choices, ok := respBody["choices"].([]map[string]any); ok && len(choices) > 0 {
|
||||
if fr, _ := choices[0]["finish_reason"].(string); strings.TrimSpace(fr) != "" {
|
||||
return fr
|
||||
}
|
||||
}
|
||||
return "stop"
|
||||
}
|
||||
|
||||
func shouldRetryChatNonStream(result chatNonStreamResult, attempts int) bool {
|
||||
return emptyOutputRetryEnabled() &&
|
||||
attempts < emptyOutputRetryMaxAttempts() &&
|
||||
!result.contentFilter &&
|
||||
result.detectedCalls == 0 &&
|
||||
strings.TrimSpace(result.text) == ""
|
||||
}
|
||||
|
||||
func (h *Handler) handleStreamWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, historySession *chatHistorySession) {
|
||||
streamRuntime, initialType, ok := h.prepareChatStreamRuntime(w, resp, completionID, model, finalPrompt, thinkingEnabled, searchEnabled, toolNames, historySession)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
attempts := 0
|
||||
currentResp := resp
|
||||
for {
|
||||
terminalWritten, retryable := h.consumeChatStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, historySession, attempts < emptyOutputRetryMaxAttempts())
|
||||
if terminalWritten {
|
||||
logChatStreamTerminal(streamRuntime, attempts)
|
||||
return
|
||||
}
|
||||
if !retryable || !emptyOutputRetryEnabled() || attempts >= emptyOutputRetryMaxAttempts() {
|
||||
streamRuntime.finalize("stop", false)
|
||||
recordChatStreamHistory(streamRuntime, historySession)
|
||||
config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "chat.completions", "stream", true, "retry_attempts", attempts, "success_source", "none")
|
||||
return
|
||||
}
|
||||
attempts++
|
||||
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", true, "retry_attempt", attempts)
|
||||
nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadWithEmptyOutputRetryPrompt(payload), pow, 3)
|
||||
if err != nil {
|
||||
failChatStreamRetry(streamRuntime, historySession, http.StatusInternalServerError, "Failed to get completion.", "error")
|
||||
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "error", err)
|
||||
return
|
||||
}
|
||||
if nextResp.StatusCode != http.StatusOK {
|
||||
defer func() { _ = nextResp.Body.Close() }()
|
||||
body, _ := io.ReadAll(nextResp.Body)
|
||||
failChatStreamRetry(streamRuntime, historySession, nextResp.StatusCode, string(body), "error")
|
||||
return
|
||||
}
|
||||
streamRuntime.finalPrompt = usagePromptWithEmptyOutputRetry(finalPrompt, attempts)
|
||||
currentResp = nextResp
|
||||
}
|
||||
}
|
||||
|
||||
func (h *Handler) prepareChatStreamRuntime(w http.ResponseWriter, resp *http.Response, completionID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, historySession *chatHistorySession) (*chatStreamRuntime, string, bool) {
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
if historySession != nil {
|
||||
historySession.error(resp.StatusCode, string(body), "error", "", "")
|
||||
}
|
||||
writeOpenAIError(w, resp.StatusCode, string(body))
|
||||
return nil, "", false
|
||||
}
|
||||
w.Header().Set("Content-Type", "text/event-stream")
|
||||
w.Header().Set("Cache-Control", "no-cache, no-transform")
|
||||
w.Header().Set("Connection", "keep-alive")
|
||||
w.Header().Set("X-Accel-Buffering", "no")
|
||||
rc := http.NewResponseController(w)
|
||||
_, canFlush := w.(http.Flusher)
|
||||
if !canFlush {
|
||||
config.Logger.Warn("[stream] response writer does not support flush; streaming may be buffered")
|
||||
}
|
||||
initialType := "text"
|
||||
if thinkingEnabled {
|
||||
initialType = "thinking"
|
||||
}
|
||||
streamRuntime := newChatStreamRuntime(
|
||||
w, rc, canFlush, completionID, time.Now().Unix(), model, finalPrompt,
|
||||
thinkingEnabled, searchEnabled, h.compatStripReferenceMarkers(), toolNames,
|
||||
len(toolNames) > 0, h.toolcallFeatureMatchEnabled() && h.toolcallEarlyEmitHighConfidence(),
|
||||
)
|
||||
return streamRuntime, initialType, true
|
||||
}
|
||||
|
||||
func (h *Handler) consumeChatStreamAttempt(r *http.Request, resp *http.Response, streamRuntime *chatStreamRuntime, initialType string, thinkingEnabled bool, historySession *chatHistorySession, allowDeferEmpty bool) (bool, bool) {
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
finalReason := "stop"
|
||||
streamengine.ConsumeSSE(streamengine.ConsumeConfig{
|
||||
Context: r.Context(),
|
||||
Body: resp.Body,
|
||||
ThinkingEnabled: thinkingEnabled,
|
||||
InitialType: initialType,
|
||||
KeepAliveInterval: time.Duration(dsprotocol.KeepAliveTimeout) * time.Second,
|
||||
IdleTimeout: time.Duration(dsprotocol.StreamIdleTimeout) * time.Second,
|
||||
MaxKeepAliveNoInput: dsprotocol.MaxKeepaliveCount,
|
||||
}, streamengine.ConsumeHooks{
|
||||
OnKeepAlive: streamRuntime.sendKeepAlive,
|
||||
OnParsed: func(parsed sse.LineResult) streamengine.ParsedDecision {
|
||||
decision := streamRuntime.onParsed(parsed)
|
||||
if historySession != nil {
|
||||
historySession.progress(streamRuntime.thinking.String(), streamRuntime.text.String())
|
||||
}
|
||||
return decision
|
||||
},
|
||||
OnFinalize: func(reason streamengine.StopReason, _ error) {
|
||||
if string(reason) == "content_filter" {
|
||||
finalReason = "content_filter"
|
||||
}
|
||||
},
|
||||
OnContextDone: func() {
|
||||
if historySession != nil {
|
||||
historySession.stopped(streamRuntime.thinking.String(), streamRuntime.text.String(), string(streamengine.StopReasonContextCancelled))
|
||||
}
|
||||
},
|
||||
})
|
||||
terminalWritten := streamRuntime.finalize(finalReason, allowDeferEmpty && finalReason != "content_filter")
|
||||
if terminalWritten {
|
||||
recordChatStreamHistory(streamRuntime, historySession)
|
||||
return true, false
|
||||
}
|
||||
return false, true
|
||||
}
|
||||
|
||||
func recordChatStreamHistory(streamRuntime *chatStreamRuntime, historySession *chatHistorySession) {
|
||||
if historySession == nil {
|
||||
return
|
||||
}
|
||||
if streamRuntime.finalErrorMessage != "" {
|
||||
historySession.error(streamRuntime.finalErrorStatus, streamRuntime.finalErrorMessage, streamRuntime.finalErrorCode, streamRuntime.thinking.String(), streamRuntime.text.String())
|
||||
return
|
||||
}
|
||||
historySession.success(http.StatusOK, streamRuntime.finalThinking, streamRuntime.finalText, streamRuntime.finalFinishReason, streamRuntime.finalUsage)
|
||||
}
|
||||
|
||||
func failChatStreamRetry(streamRuntime *chatStreamRuntime, historySession *chatHistorySession, status int, message, code string) {
|
||||
streamRuntime.sendFailedChunk(status, message, code)
|
||||
if historySession != nil {
|
||||
historySession.error(status, message, code, streamRuntime.thinking.String(), streamRuntime.text.String())
|
||||
}
|
||||
}
|
||||
|
||||
func logChatStreamTerminal(streamRuntime *chatStreamRuntime, attempts int) {
|
||||
source := "first_attempt"
|
||||
if attempts > 0 {
|
||||
source = "synthetic_retry"
|
||||
}
|
||||
if streamRuntime.finalErrorMessage != "" {
|
||||
config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "chat.completions", "stream", true, "retry_attempts", attempts, "success_source", "none", "error_code", streamRuntime.finalErrorCode)
|
||||
return
|
||||
}
|
||||
config.Logger.Info("[openai_empty_retry] completed", "surface", "chat.completions", "stream", true, "retry_attempts", attempts, "success_source", source)
|
||||
}
|
||||
@@ -123,6 +123,22 @@ func writeUpstreamEmptyOutputError(w http.ResponseWriter, text, thinking string,
|
||||
return shared.WriteUpstreamEmptyOutputError(w, text, thinking, contentFilter)
|
||||
}
|
||||
|
||||
func emptyOutputRetryEnabled() bool {
|
||||
return shared.EmptyOutputRetryEnabled()
|
||||
}
|
||||
|
||||
func emptyOutputRetryMaxAttempts() int {
|
||||
return shared.EmptyOutputRetryMaxAttempts()
|
||||
}
|
||||
|
||||
func clonePayloadWithEmptyOutputRetryPrompt(payload map[string]any) map[string]any {
|
||||
return shared.ClonePayloadWithEmptyOutputRetryPrompt(payload)
|
||||
}
|
||||
|
||||
func usagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string {
|
||||
return shared.UsagePromptWithEmptyOutputRetry(originalPrompt, retryAttempts)
|
||||
}
|
||||
|
||||
func formatIncrementalStreamToolCallDeltas(deltas []toolstream.ToolCallDelta, ids map[int]string) []map[string]any {
|
||||
return shared.FormatIncrementalStreamToolCallDeltas(deltas, ids)
|
||||
}
|
||||
|
||||
@@ -105,10 +105,10 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
if stdReq.Stream {
|
||||
h.handleStream(w, r, resp, sessionID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames, historySession)
|
||||
h.handleStreamWithRetry(w, r, a, resp, payload, pow, sessionID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames, historySession)
|
||||
return
|
||||
}
|
||||
h.handleNonStream(w, resp, sessionID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames, historySession)
|
||||
h.handleNonStreamWithRetry(w, r.Context(), a, resp, payload, pow, sessionID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames, historySession)
|
||||
}
|
||||
|
||||
func (h *Handler) autoDeleteRemoteSession(ctx context.Context, a *auth.RequestAuth, sessionID string) {
|
||||
@@ -251,9 +251,9 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
|
||||
},
|
||||
OnFinalize: func(reason streamengine.StopReason, _ error) {
|
||||
if string(reason) == "content_filter" {
|
||||
streamRuntime.finalize("content_filter")
|
||||
streamRuntime.finalize("content_filter", false)
|
||||
} else {
|
||||
streamRuntime.finalize("stop")
|
||||
streamRuntime.finalize("stop", false)
|
||||
}
|
||||
if historySession == nil {
|
||||
return
|
||||
|
||||
221
internal/httpapi/openai/responses/empty_retry_runtime.go
Normal file
221
internal/httpapi/openai/responses/empty_retry_runtime.go
Normal file
@@ -0,0 +1,221 @@
|
||||
package responses
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"ds2api/internal/auth"
|
||||
"ds2api/internal/config"
|
||||
dsprotocol "ds2api/internal/deepseek/protocol"
|
||||
openaifmt "ds2api/internal/format/openai"
|
||||
"ds2api/internal/promptcompat"
|
||||
"ds2api/internal/sse"
|
||||
streamengine "ds2api/internal/stream"
|
||||
"ds2api/internal/toolcall"
|
||||
)
|
||||
|
||||
type responsesNonStreamResult struct {
|
||||
thinking string
|
||||
toolDetectionThinking string
|
||||
text string
|
||||
contentFilter bool
|
||||
parsed toolcall.ToolCallParseResult
|
||||
body map[string]any
|
||||
}
|
||||
|
||||
func (h *Handler) handleResponsesNonStreamWithRetry(w http.ResponseWriter, ctx context.Context, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, owner, responseID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolChoice promptcompat.ToolChoicePolicy, traceID string) {
|
||||
attempts := 0
|
||||
currentResp := resp
|
||||
usagePrompt := finalPrompt
|
||||
accumulatedThinking := ""
|
||||
accumulatedToolDetectionThinking := ""
|
||||
for {
|
||||
result, ok := h.collectResponsesNonStreamAttempt(w, currentResp, responseID, model, usagePrompt, thinkingEnabled, searchEnabled, toolNames)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
accumulatedThinking += sse.TrimContinuationOverlap(accumulatedThinking, result.thinking)
|
||||
accumulatedToolDetectionThinking += sse.TrimContinuationOverlap(accumulatedToolDetectionThinking, result.toolDetectionThinking)
|
||||
result.thinking = accumulatedThinking
|
||||
result.toolDetectionThinking = accumulatedToolDetectionThinking
|
||||
result.parsed = detectAssistantToolCalls(result.text, result.thinking, result.toolDetectionThinking, toolNames)
|
||||
result.body = openaifmt.BuildResponseObjectWithToolCalls(responseID, model, usagePrompt, result.thinking, result.text, result.parsed.Calls)
|
||||
|
||||
if !shouldRetryResponsesNonStream(result, attempts) {
|
||||
h.finishResponsesNonStreamResult(w, result, attempts, owner, responseID, toolChoice, traceID)
|
||||
return
|
||||
}
|
||||
|
||||
attempts++
|
||||
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "responses", "stream", false, "retry_attempt", attempts)
|
||||
nextResp, err := h.DS.CallCompletion(ctx, a, clonePayloadWithEmptyOutputRetryPrompt(payload), pow, 3)
|
||||
if err != nil {
|
||||
writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.")
|
||||
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "responses", "stream", false, "retry_attempt", attempts, "error", err)
|
||||
return
|
||||
}
|
||||
usagePrompt = usagePromptWithEmptyOutputRetry(finalPrompt, attempts)
|
||||
currentResp = nextResp
|
||||
}
|
||||
}
|
||||
|
||||
func (h *Handler) collectResponsesNonStreamAttempt(w http.ResponseWriter, resp *http.Response, responseID, model, usagePrompt string, thinkingEnabled, searchEnabled bool, toolNames []string) (responsesNonStreamResult, bool) {
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
writeOpenAIError(w, resp.StatusCode, strings.TrimSpace(string(body)))
|
||||
return responsesNonStreamResult{}, false
|
||||
}
|
||||
result := sse.CollectStream(resp, thinkingEnabled, false)
|
||||
stripReferenceMarkers := h.compatStripReferenceMarkers()
|
||||
sanitizedThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers)
|
||||
toolDetectionThinking := cleanVisibleOutput(result.ToolDetectionThinking, stripReferenceMarkers)
|
||||
sanitizedText := cleanVisibleOutput(result.Text, stripReferenceMarkers)
|
||||
if searchEnabled {
|
||||
sanitizedText = replaceCitationMarkersWithLinks(sanitizedText, result.CitationLinks)
|
||||
}
|
||||
textParsed := detectAssistantToolCalls(sanitizedText, sanitizedThinking, toolDetectionThinking, toolNames)
|
||||
responseObj := openaifmt.BuildResponseObjectWithToolCalls(responseID, model, usagePrompt, sanitizedThinking, sanitizedText, textParsed.Calls)
|
||||
return responsesNonStreamResult{
|
||||
thinking: sanitizedThinking,
|
||||
toolDetectionThinking: toolDetectionThinking,
|
||||
text: sanitizedText,
|
||||
contentFilter: result.ContentFilter,
|
||||
parsed: textParsed,
|
||||
body: responseObj,
|
||||
}, true
|
||||
}
|
||||
|
||||
func (h *Handler) finishResponsesNonStreamResult(w http.ResponseWriter, result responsesNonStreamResult, attempts int, owner, responseID string, toolChoice promptcompat.ToolChoicePolicy, traceID string) {
|
||||
if len(result.parsed.Calls) == 0 && writeUpstreamEmptyOutputError(w, result.text, result.thinking, result.contentFilter) {
|
||||
config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "responses", "stream", false, "retry_attempts", attempts, "success_source", "none", "content_filter", result.contentFilter)
|
||||
return
|
||||
}
|
||||
logResponsesToolPolicyRejection(traceID, toolChoice, result.parsed, "text")
|
||||
if toolChoice.IsRequired() && len(result.parsed.Calls) == 0 {
|
||||
writeOpenAIErrorWithCode(w, http.StatusUnprocessableEntity, "tool_choice requires at least one valid tool call.", "tool_choice_violation")
|
||||
return
|
||||
}
|
||||
h.getResponseStore().put(owner, responseID, result.body)
|
||||
writeJSON(w, http.StatusOK, result.body)
|
||||
source := "first_attempt"
|
||||
if attempts > 0 {
|
||||
source = "synthetic_retry"
|
||||
}
|
||||
config.Logger.Info("[openai_empty_retry] completed", "surface", "responses", "stream", false, "retry_attempts", attempts, "success_source", source)
|
||||
}
|
||||
|
||||
func shouldRetryResponsesNonStream(result responsesNonStreamResult, attempts int) bool {
|
||||
return emptyOutputRetryEnabled() &&
|
||||
attempts < emptyOutputRetryMaxAttempts() &&
|
||||
!result.contentFilter &&
|
||||
len(result.parsed.Calls) == 0 &&
|
||||
strings.TrimSpace(result.text) == ""
|
||||
}
|
||||
|
||||
func (h *Handler) handleResponsesStreamWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, owner, responseID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolChoice promptcompat.ToolChoicePolicy, traceID string) {
|
||||
streamRuntime, initialType, ok := h.prepareResponsesStreamRuntime(w, resp, owner, responseID, model, finalPrompt, thinkingEnabled, searchEnabled, toolNames, toolChoice, traceID)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
attempts := 0
|
||||
currentResp := resp
|
||||
for {
|
||||
terminalWritten, retryable := h.consumeResponsesStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, attempts < emptyOutputRetryMaxAttempts())
|
||||
if terminalWritten {
|
||||
logResponsesStreamTerminal(streamRuntime, attempts)
|
||||
return
|
||||
}
|
||||
if !retryable || !emptyOutputRetryEnabled() || attempts >= emptyOutputRetryMaxAttempts() {
|
||||
streamRuntime.finalize("stop", false)
|
||||
config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "responses", "stream", true, "retry_attempts", attempts, "success_source", "none", "error_code", streamRuntime.finalErrorCode)
|
||||
return
|
||||
}
|
||||
attempts++
|
||||
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "responses", "stream", true, "retry_attempt", attempts)
|
||||
nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadWithEmptyOutputRetryPrompt(payload), pow, 3)
|
||||
if err != nil {
|
||||
streamRuntime.failResponse(http.StatusInternalServerError, "Failed to get completion.", "error")
|
||||
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "responses", "stream", true, "retry_attempt", attempts, "error", err)
|
||||
return
|
||||
}
|
||||
if nextResp.StatusCode != http.StatusOK {
|
||||
defer func() { _ = nextResp.Body.Close() }()
|
||||
body, _ := io.ReadAll(nextResp.Body)
|
||||
streamRuntime.failResponse(nextResp.StatusCode, strings.TrimSpace(string(body)), "error")
|
||||
return
|
||||
}
|
||||
streamRuntime.finalPrompt = usagePromptWithEmptyOutputRetry(finalPrompt, attempts)
|
||||
currentResp = nextResp
|
||||
}
|
||||
}
|
||||
|
||||
func (h *Handler) prepareResponsesStreamRuntime(w http.ResponseWriter, resp *http.Response, owner, responseID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolChoice promptcompat.ToolChoicePolicy, traceID string) (*responsesStreamRuntime, string, bool) {
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
writeOpenAIError(w, resp.StatusCode, strings.TrimSpace(string(body)))
|
||||
return nil, "", false
|
||||
}
|
||||
w.Header().Set("Content-Type", "text/event-stream")
|
||||
w.Header().Set("Cache-Control", "no-cache, no-transform")
|
||||
w.Header().Set("Connection", "keep-alive")
|
||||
w.Header().Set("X-Accel-Buffering", "no")
|
||||
rc := http.NewResponseController(w)
|
||||
_, canFlush := w.(http.Flusher)
|
||||
initialType := "text"
|
||||
if thinkingEnabled {
|
||||
initialType = "thinking"
|
||||
}
|
||||
streamRuntime := newResponsesStreamRuntime(
|
||||
w, rc, canFlush, responseID, model, finalPrompt, thinkingEnabled, searchEnabled,
|
||||
h.compatStripReferenceMarkers(), toolNames, len(toolNames) > 0,
|
||||
h.toolcallFeatureMatchEnabled() && h.toolcallEarlyEmitHighConfidence(),
|
||||
toolChoice, traceID, func(obj map[string]any) {
|
||||
h.getResponseStore().put(owner, responseID, obj)
|
||||
},
|
||||
)
|
||||
streamRuntime.sendCreated()
|
||||
return streamRuntime, initialType, true
|
||||
}
|
||||
|
||||
func (h *Handler) consumeResponsesStreamAttempt(r *http.Request, resp *http.Response, streamRuntime *responsesStreamRuntime, initialType string, thinkingEnabled bool, allowDeferEmpty bool) (bool, bool) {
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
finalReason := "stop"
|
||||
streamengine.ConsumeSSE(streamengine.ConsumeConfig{
|
||||
Context: r.Context(),
|
||||
Body: resp.Body,
|
||||
ThinkingEnabled: thinkingEnabled,
|
||||
InitialType: initialType,
|
||||
KeepAliveInterval: time.Duration(dsprotocol.KeepAliveTimeout) * time.Second,
|
||||
IdleTimeout: time.Duration(dsprotocol.StreamIdleTimeout) * time.Second,
|
||||
MaxKeepAliveNoInput: dsprotocol.MaxKeepaliveCount,
|
||||
}, streamengine.ConsumeHooks{
|
||||
OnParsed: streamRuntime.onParsed,
|
||||
OnFinalize: func(reason streamengine.StopReason, _ error) {
|
||||
if string(reason) == "content_filter" {
|
||||
finalReason = "content_filter"
|
||||
}
|
||||
},
|
||||
})
|
||||
terminalWritten := streamRuntime.finalize(finalReason, allowDeferEmpty && finalReason != "content_filter")
|
||||
if terminalWritten {
|
||||
return true, false
|
||||
}
|
||||
return false, true
|
||||
}
|
||||
|
||||
func logResponsesStreamTerminal(streamRuntime *responsesStreamRuntime, attempts int) {
|
||||
source := "first_attempt"
|
||||
if attempts > 0 {
|
||||
source = "synthetic_retry"
|
||||
}
|
||||
if streamRuntime.failed {
|
||||
config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "responses", "stream", true, "retry_attempts", attempts, "success_source", "none", "error_code", streamRuntime.finalErrorCode)
|
||||
return
|
||||
}
|
||||
config.Logger.Info("[openai_empty_retry] completed", "surface", "responses", "stream", true, "retry_attempts", attempts, "success_source", source)
|
||||
}
|
||||
@@ -113,6 +113,22 @@ func writeUpstreamEmptyOutputError(w http.ResponseWriter, text, thinking string,
|
||||
return shared.WriteUpstreamEmptyOutputError(w, text, thinking, contentFilter)
|
||||
}
|
||||
|
||||
func emptyOutputRetryEnabled() bool {
|
||||
return shared.EmptyOutputRetryEnabled()
|
||||
}
|
||||
|
||||
func emptyOutputRetryMaxAttempts() int {
|
||||
return shared.EmptyOutputRetryMaxAttempts()
|
||||
}
|
||||
|
||||
func clonePayloadWithEmptyOutputRetryPrompt(payload map[string]any) map[string]any {
|
||||
return shared.ClonePayloadWithEmptyOutputRetryPrompt(payload)
|
||||
}
|
||||
|
||||
func usagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string {
|
||||
return shared.UsagePromptWithEmptyOutputRetry(originalPrompt, retryAttempts)
|
||||
}
|
||||
|
||||
func filterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta, seenNames map[int]string) []toolstream.ToolCallDelta {
|
||||
return shared.FilterIncrementalToolCallDeltasByAllowed(deltas, seenNames)
|
||||
}
|
||||
|
||||
@@ -115,10 +115,10 @@ func (h *Handler) Responses(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
responseID := "resp_" + strings.ReplaceAll(uuid.NewString(), "-", "")
|
||||
if stdReq.Stream {
|
||||
h.handleResponsesStream(w, r, resp, owner, responseID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames, stdReq.ToolChoice, traceID)
|
||||
h.handleResponsesStreamWithRetry(w, r, a, resp, payload, pow, owner, responseID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames, stdReq.ToolChoice, traceID)
|
||||
return
|
||||
}
|
||||
h.handleResponsesNonStream(w, resp, owner, responseID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames, stdReq.ToolChoice, traceID)
|
||||
h.handleResponsesNonStreamWithRetry(w, r.Context(), a, resp, payload, pow, owner, responseID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames, stdReq.ToolChoice, traceID)
|
||||
}
|
||||
|
||||
func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Response, owner, responseID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolChoice promptcompat.ToolChoicePolicy, traceID string) {
|
||||
@@ -206,8 +206,12 @@ func (h *Handler) handleResponsesStream(w http.ResponseWriter, r *http.Request,
|
||||
MaxKeepAliveNoInput: dsprotocol.MaxKeepaliveCount,
|
||||
}, streamengine.ConsumeHooks{
|
||||
OnParsed: streamRuntime.onParsed,
|
||||
OnFinalize: func(_ streamengine.StopReason, _ error) {
|
||||
streamRuntime.finalize()
|
||||
OnFinalize: func(reason streamengine.StopReason, _ error) {
|
||||
if string(reason) == "content_filter" {
|
||||
streamRuntime.finalize("content_filter", false)
|
||||
return
|
||||
}
|
||||
streamRuntime.finalize("stop", false)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
@@ -53,6 +53,9 @@ type responsesStreamRuntime struct {
|
||||
messagePartAdded bool
|
||||
sequence int
|
||||
failed bool
|
||||
finalErrorStatus int
|
||||
finalErrorMessage string
|
||||
finalErrorCode string
|
||||
|
||||
persistResponse func(obj map[string]any)
|
||||
}
|
||||
@@ -103,6 +106,9 @@ func newResponsesStreamRuntime(
|
||||
|
||||
func (s *responsesStreamRuntime) failResponse(status int, message, code string) {
|
||||
s.failed = true
|
||||
s.finalErrorStatus = status
|
||||
s.finalErrorMessage = message
|
||||
s.finalErrorCode = code
|
||||
failedResp := map[string]any{
|
||||
"id": s.responseID,
|
||||
"type": "response",
|
||||
@@ -126,7 +132,11 @@ func (s *responsesStreamRuntime) failResponse(status int, message, code string)
|
||||
s.sendDone()
|
||||
}
|
||||
|
||||
func (s *responsesStreamRuntime) finalize() {
|
||||
func (s *responsesStreamRuntime) finalize(finishReason string, deferEmptyOutput bool) bool {
|
||||
s.failed = false
|
||||
s.finalErrorStatus = 0
|
||||
s.finalErrorMessage = ""
|
||||
s.finalErrorCode = ""
|
||||
finalThinking := s.thinking.String()
|
||||
finalToolDetectionThinking := s.toolDetectionThinking.String()
|
||||
finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
|
||||
@@ -150,12 +160,18 @@ func (s *responsesStreamRuntime) finalize() {
|
||||
|
||||
if s.toolChoice.IsRequired() && len(detected) == 0 {
|
||||
s.failResponse(http.StatusUnprocessableEntity, "tool_choice requires at least one valid tool call.", "tool_choice_violation")
|
||||
return
|
||||
return true
|
||||
}
|
||||
if len(detected) == 0 && strings.TrimSpace(finalText) == "" {
|
||||
status, message, code := upstreamEmptyOutputDetail(false, finalText, finalThinking)
|
||||
status, message, code := upstreamEmptyOutputDetail(finishReason == "content_filter", finalText, finalThinking)
|
||||
if deferEmptyOutput {
|
||||
s.finalErrorStatus = status
|
||||
s.finalErrorMessage = message
|
||||
s.finalErrorCode = code
|
||||
return false
|
||||
}
|
||||
s.failResponse(status, message, code)
|
||||
return
|
||||
return true
|
||||
}
|
||||
s.closeIncompleteFunctionItems()
|
||||
|
||||
@@ -165,6 +181,7 @@ func (s *responsesStreamRuntime) finalize() {
|
||||
}
|
||||
s.sendEvent("response.completed", openaifmt.BuildResponsesCompletedPayload(obj))
|
||||
s.sendDone()
|
||||
return true
|
||||
}
|
||||
|
||||
func (s *responsesStreamRuntime) logToolPolicyRejections(textParsed toolcall.ToolCallParseResult) {
|
||||
@@ -188,7 +205,10 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
|
||||
if !parsed.Parsed {
|
||||
return streamengine.ParsedDecision{}
|
||||
}
|
||||
if parsed.ContentFilter || parsed.ErrorMessage != "" || parsed.Stop {
|
||||
if parsed.ContentFilter || parsed.ErrorMessage != "" {
|
||||
return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("content_filter")}
|
||||
}
|
||||
if parsed.Stop {
|
||||
return streamengine.ParsedDecision{Stop: true}
|
||||
}
|
||||
|
||||
|
||||
45
internal/httpapi/openai/shared/empty_retry.go
Normal file
45
internal/httpapi/openai/shared/empty_retry.go
Normal file
@@ -0,0 +1,45 @@
|
||||
package shared
|
||||
|
||||
import "strings"
|
||||
|
||||
const EmptyOutputRetrySuffix = "Previous reply had no visible output. Please regenerate the visible final answer or tool call now."
|
||||
|
||||
func EmptyOutputRetryEnabled() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func EmptyOutputRetryMaxAttempts() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
func ClonePayloadWithEmptyOutputRetryPrompt(payload map[string]any) map[string]any {
|
||||
clone := make(map[string]any, len(payload))
|
||||
for k, v := range payload {
|
||||
clone[k] = v
|
||||
}
|
||||
original, _ := payload["prompt"].(string)
|
||||
clone["prompt"] = AppendEmptyOutputRetrySuffix(original)
|
||||
return clone
|
||||
}
|
||||
|
||||
func AppendEmptyOutputRetrySuffix(prompt string) string {
|
||||
prompt = strings.TrimRight(prompt, "\r\n\t ")
|
||||
if prompt == "" {
|
||||
return EmptyOutputRetrySuffix
|
||||
}
|
||||
return prompt + "\n\n" + EmptyOutputRetrySuffix
|
||||
}
|
||||
|
||||
func UsagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string {
|
||||
if retryAttempts <= 0 {
|
||||
return originalPrompt
|
||||
}
|
||||
parts := make([]string, 0, retryAttempts+1)
|
||||
parts = append(parts, originalPrompt)
|
||||
next := originalPrompt
|
||||
for i := 0; i < retryAttempts; i++ {
|
||||
next = AppendEmptyOutputRetrySuffix(next)
|
||||
parts = append(parts, next)
|
||||
}
|
||||
return strings.Join(parts, "\n")
|
||||
}
|
||||
@@ -66,6 +66,44 @@ func (m streamStatusDSStub) DeleteAllSessionsForToken(_ context.Context, _ strin
|
||||
return nil
|
||||
}
|
||||
|
||||
type streamStatusDSSeqStub struct {
|
||||
resps []*http.Response
|
||||
payloads []map[string]any
|
||||
}
|
||||
|
||||
func (m *streamStatusDSSeqStub) CreateSession(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
|
||||
return "session-id", nil
|
||||
}
|
||||
|
||||
func (m *streamStatusDSSeqStub) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
|
||||
return "pow", nil
|
||||
}
|
||||
|
||||
func (m *streamStatusDSSeqStub) UploadFile(_ context.Context, _ *auth.RequestAuth, _ dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) {
|
||||
return &dsclient.UploadFileResult{ID: "file-id", Filename: "file.txt", Bytes: 1, Status: "uploaded"}, nil
|
||||
}
|
||||
|
||||
func (m *streamStatusDSSeqStub) CallCompletion(_ context.Context, _ *auth.RequestAuth, payload map[string]any, _ string, _ int) (*http.Response, error) {
|
||||
clone := make(map[string]any, len(payload))
|
||||
for k, v := range payload {
|
||||
clone[k] = v
|
||||
}
|
||||
m.payloads = append(m.payloads, clone)
|
||||
idx := len(m.payloads) - 1
|
||||
if idx >= len(m.resps) {
|
||||
idx = len(m.resps) - 1
|
||||
}
|
||||
return m.resps[idx], nil
|
||||
}
|
||||
|
||||
func (m *streamStatusDSSeqStub) DeleteSessionForToken(_ context.Context, _ string, _ string) (*dsclient.DeleteSessionResult, error) {
|
||||
return &dsclient.DeleteSessionResult{Success: true}, nil
|
||||
}
|
||||
|
||||
func (m *streamStatusDSSeqStub) DeleteAllSessionsForToken(_ context.Context, _ string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func makeOpenAISSEHTTPResponse(lines ...string) *http.Response {
|
||||
body := strings.Join(lines, "\n")
|
||||
if !strings.HasSuffix(body, "\n") {
|
||||
@@ -78,6 +116,12 @@ func makeOpenAISSEHTTPResponse(lines ...string) *http.Response {
|
||||
}
|
||||
}
|
||||
|
||||
func newOpenAITestRouter(h *openAITestSurface) http.Handler {
|
||||
r := chi.NewRouter()
|
||||
registerOpenAITestRoutes(r, h)
|
||||
return r
|
||||
}
|
||||
|
||||
func captureStatusMiddleware(statuses *[]int) func(http.Handler) http.Handler {
|
||||
return func(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
@@ -239,6 +283,125 @@ func TestChatCompletionsStreamEmitsFailureFrameWhenUpstreamOutputEmpty(t *testin
|
||||
}
|
||||
}
|
||||
|
||||
func TestChatCompletionsStreamRetriesEmptyOutputOnSameSession(t *testing.T) {
|
||||
ds := &streamStatusDSSeqStub{resps: []*http.Response{
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
|
||||
}}
|
||||
h := &openAITestSurface{
|
||||
Store: mockOpenAIConfig{wideInput: true},
|
||||
Auth: streamStatusAuthStub{},
|
||||
DS: ds,
|
||||
}
|
||||
reqBody := `{"model":"deepseek-v4-pro","messages":[{"role":"user","content":"hi"}],"stream":true}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
|
||||
req.Header.Set("Authorization", "Bearer direct-token")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
newOpenAITestRouter(h).ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
if len(ds.payloads) != 2 {
|
||||
t.Fatalf("expected one synthetic retry call, got %d", len(ds.payloads))
|
||||
}
|
||||
if ds.payloads[0]["chat_session_id"] != ds.payloads[1]["chat_session_id"] {
|
||||
t.Fatalf("expected retry to reuse session, payloads=%#v", ds.payloads)
|
||||
}
|
||||
retryPrompt := asString(ds.payloads[1]["prompt"])
|
||||
if !strings.Contains(retryPrompt, "Previous reply had no visible output. Please regenerate the visible final answer or tool call now.") {
|
||||
t.Fatalf("expected retry suffix in prompt, got %q", retryPrompt)
|
||||
}
|
||||
|
||||
frames, done := parseSSEDataFrames(t, rec.Body.String())
|
||||
if !done {
|
||||
t.Fatalf("expected [DONE], body=%s", rec.Body.String())
|
||||
}
|
||||
doneCount := strings.Count(rec.Body.String(), "data: [DONE]")
|
||||
if doneCount != 1 {
|
||||
t.Fatalf("expected one [DONE], got %d body=%s", doneCount, rec.Body.String())
|
||||
}
|
||||
if len(frames) != 3 {
|
||||
t.Fatalf("expected reasoning, content, finish frames, got %#v body=%s", frames, rec.Body.String())
|
||||
}
|
||||
id := asString(frames[0]["id"])
|
||||
for _, frame := range frames[1:] {
|
||||
if asString(frame["id"]) != id {
|
||||
t.Fatalf("expected same completion id across retry stream, frames=%#v", frames)
|
||||
}
|
||||
}
|
||||
choices, _ := frames[1]["choices"].([]any)
|
||||
choice, _ := choices[0].(map[string]any)
|
||||
delta, _ := choice["delta"].(map[string]any)
|
||||
if asString(delta["content"]) != "visible" {
|
||||
t.Fatalf("expected retry content delta, got %#v body=%s", delta, rec.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestChatCompletionsNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
|
||||
ds := &streamStatusDSSeqStub{resps: []*http.Response{
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
|
||||
}}
|
||||
h := &openAITestSurface{
|
||||
Store: mockOpenAIConfig{wideInput: true},
|
||||
Auth: streamStatusAuthStub{},
|
||||
DS: ds,
|
||||
}
|
||||
reqBody := `{"model":"deepseek-v4-pro","messages":[{"role":"user","content":"hi"}],"stream":false}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
|
||||
req.Header.Set("Authorization", "Bearer direct-token")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
newOpenAITestRouter(h).ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 after retry, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
if len(ds.payloads) != 2 {
|
||||
t.Fatalf("expected one synthetic retry call, got %d", len(ds.payloads))
|
||||
}
|
||||
var out map[string]any
|
||||
if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
|
||||
t.Fatalf("decode response failed: %v body=%s", err, rec.Body.String())
|
||||
}
|
||||
choices, _ := out["choices"].([]any)
|
||||
choice, _ := choices[0].(map[string]any)
|
||||
message, _ := choice["message"].(map[string]any)
|
||||
if asString(message["content"]) != "visible" {
|
||||
t.Fatalf("expected retry visible content, got %#v", message)
|
||||
}
|
||||
if !strings.Contains(asString(message["reasoning_content"]), "plan") {
|
||||
t.Fatalf("expected first-attempt reasoning to be preserved, got %#v", message)
|
||||
}
|
||||
}
|
||||
|
||||
func TestChatCompletionsContentFilterDoesNotRetry(t *testing.T) {
|
||||
ds := &streamStatusDSSeqStub{resps: []*http.Response{
|
||||
makeOpenAISSEHTTPResponse(`data: {"code":"content_filter"}`),
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
|
||||
}}
|
||||
h := &openAITestSurface{
|
||||
Store: mockOpenAIConfig{wideInput: true},
|
||||
Auth: streamStatusAuthStub{},
|
||||
DS: ds,
|
||||
}
|
||||
reqBody := `{"model":"deepseek-v4-flash","messages":[{"role":"user","content":"hi"}],"stream":false}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
|
||||
req.Header.Set("Authorization", "Bearer direct-token")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
newOpenAITestRouter(h).ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected content_filter 400, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
if len(ds.payloads) != 1 {
|
||||
t.Fatalf("expected no retry on content_filter, got %d calls", len(ds.payloads))
|
||||
}
|
||||
}
|
||||
|
||||
func TestResponsesStreamUsageIgnoresBatchAccumulatedTokenUsage(t *testing.T) {
|
||||
statuses := make([]int, 0, 1)
|
||||
h := &openAITestSurface{
|
||||
@@ -287,6 +450,86 @@ func TestResponsesStreamUsageIgnoresBatchAccumulatedTokenUsage(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestResponsesStreamRetriesThinkingOnlyOutput(t *testing.T) {
|
||||
ds := &streamStatusDSSeqStub{resps: []*http.Response{
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
|
||||
}}
|
||||
h := &openAITestSurface{
|
||||
Store: mockOpenAIConfig{wideInput: true},
|
||||
Auth: streamStatusAuthStub{},
|
||||
DS: ds,
|
||||
}
|
||||
reqBody := `{"model":"deepseek-v4-pro","input":"hi","stream":true}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(reqBody))
|
||||
req.Header.Set("Authorization", "Bearer direct-token")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
newOpenAITestRouter(h).ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
if len(ds.payloads) != 2 {
|
||||
t.Fatalf("expected one synthetic retry call, got %d", len(ds.payloads))
|
||||
}
|
||||
body := rec.Body.String()
|
||||
if strings.Contains(body, "response.failed") {
|
||||
t.Fatalf("did not expect premature response.failed, body=%s", body)
|
||||
}
|
||||
if !strings.Contains(body, "response.reasoning.delta") || !strings.Contains(body, "response.output_text.delta") || !strings.Contains(body, "response.completed") {
|
||||
t.Fatalf("expected reasoning, text delta, and completed events, body=%s", body)
|
||||
}
|
||||
if strings.Count(body, "data: [DONE]") != 1 {
|
||||
t.Fatalf("expected one [DONE], body=%s", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResponsesNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
|
||||
ds := &streamStatusDSSeqStub{resps: []*http.Response{
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
|
||||
}}
|
||||
h := &openAITestSurface{
|
||||
Store: mockOpenAIConfig{wideInput: true},
|
||||
Auth: streamStatusAuthStub{},
|
||||
DS: ds,
|
||||
}
|
||||
reqBody := `{"model":"deepseek-v4-pro","input":"hi","stream":false}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(reqBody))
|
||||
req.Header.Set("Authorization", "Bearer direct-token")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
newOpenAITestRouter(h).ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 after retry, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
if len(ds.payloads) != 2 {
|
||||
t.Fatalf("expected one synthetic retry call, got %d", len(ds.payloads))
|
||||
}
|
||||
var out map[string]any
|
||||
if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
|
||||
t.Fatalf("decode response failed: %v body=%s", err, rec.Body.String())
|
||||
}
|
||||
if asString(out["output_text"]) != "visible" {
|
||||
t.Fatalf("expected retry visible output_text, got %#v", out["output_text"])
|
||||
}
|
||||
output, _ := out["output"].([]any)
|
||||
if len(output) == 0 {
|
||||
t.Fatalf("expected output items, got %#v", out)
|
||||
}
|
||||
item, _ := output[0].(map[string]any)
|
||||
content, _ := item["content"].([]any)
|
||||
if len(content) == 0 {
|
||||
t.Fatalf("expected content entries, got %#v", item)
|
||||
}
|
||||
reasoning, _ := content[0].(map[string]any)
|
||||
if asString(reasoning["type"]) != "reasoning" || !strings.Contains(asString(reasoning["text"]), "plan") {
|
||||
t.Fatalf("expected preserved reasoning entry, got %#v", content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResponsesNonStreamUsageIgnoresPromptAndOutputTokenUsage(t *testing.T) {
|
||||
statuses := make([]int, 0, 1)
|
||||
h := &openAITestSurface{
|
||||
|
||||
Reference in New Issue
Block a user