mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-12 12:17:47 +08:00
fix: use parent_message_id and fresh PoW headers for empty-output retry and continue
Previously retry/continue requests reused the initial PoW header and lacked parent_message_id, causing them to land as disconnected root messages in the DeepSeek session instead of proper follow-up turns. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -38,6 +38,7 @@ type chatStreamRuntime struct {
|
||||
thinking strings.Builder
|
||||
toolDetectionThinking strings.Builder
|
||||
text strings.Builder
|
||||
responseMessageID int
|
||||
|
||||
finalThinking string
|
||||
finalText string
|
||||
@@ -234,6 +235,9 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
|
||||
if !parsed.Parsed {
|
||||
return streamengine.ParsedDecision{}
|
||||
}
|
||||
if parsed.ResponseMessageID > 0 {
|
||||
s.responseMessageID = parsed.ResponseMessageID
|
||||
}
|
||||
if parsed.ContentFilter {
|
||||
if strings.TrimSpace(s.text.String()) == "" {
|
||||
return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("content_filter")}
|
||||
|
||||
@@ -23,6 +23,7 @@ type chatNonStreamResult struct {
|
||||
detectedCalls int
|
||||
body map[string]any
|
||||
finishReason string
|
||||
responseMessageID int
|
||||
}
|
||||
|
||||
func (h *Handler) handleNonStreamWithRetry(w http.ResponseWriter, ctx context.Context, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, historySession *chatHistorySession) {
|
||||
@@ -50,9 +51,14 @@ func (h *Handler) handleNonStreamWithRetry(w http.ResponseWriter, ctx context.Co
|
||||
}
|
||||
|
||||
attempts++
|
||||
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", false, "retry_attempt", attempts)
|
||||
retryPayload := clonePayloadWithEmptyOutputRetryPrompt(payload)
|
||||
nextResp, err := h.DS.CallCompletion(ctx, a, retryPayload, pow, 3)
|
||||
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "parent_message_id", result.responseMessageID)
|
||||
retryPow, powErr := h.DS.GetPow(ctx, a, 3)
|
||||
if powErr != nil {
|
||||
config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "error", powErr)
|
||||
retryPow = pow
|
||||
}
|
||||
retryPayload := clonePayloadForEmptyOutputRetry(payload, result.responseMessageID)
|
||||
nextResp, err := h.DS.CallCompletion(ctx, a, retryPayload, retryPow, 3)
|
||||
if err != nil {
|
||||
if historySession != nil {
|
||||
historySession.error(http.StatusInternalServerError, "Failed to get completion.", "error", result.thinking, result.text)
|
||||
@@ -91,6 +97,7 @@ func (h *Handler) collectChatNonStreamAttempt(w http.ResponseWriter, resp *http.
|
||||
detectedCalls: len(detected.Calls),
|
||||
body: respBody,
|
||||
finishReason: chatFinishReason(respBody),
|
||||
responseMessageID: result.ResponseMessageID,
|
||||
}, true
|
||||
}
|
||||
|
||||
@@ -152,8 +159,13 @@ func (h *Handler) handleStreamWithRetry(w http.ResponseWriter, r *http.Request,
|
||||
return
|
||||
}
|
||||
attempts++
|
||||
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", true, "retry_attempt", attempts)
|
||||
nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadWithEmptyOutputRetryPrompt(payload), pow, 3)
|
||||
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "parent_message_id", streamRuntime.responseMessageID)
|
||||
retryPow, powErr := h.DS.GetPow(r.Context(), a, 3)
|
||||
if powErr != nil {
|
||||
config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "error", powErr)
|
||||
retryPow = pow
|
||||
}
|
||||
nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadForEmptyOutputRetry(payload, streamRuntime.responseMessageID), retryPow, 3)
|
||||
if err != nil {
|
||||
failChatStreamRetry(streamRuntime, historySession, http.StatusInternalServerError, "Failed to get completion.", "error")
|
||||
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "error", err)
|
||||
|
||||
@@ -131,8 +131,8 @@ func emptyOutputRetryMaxAttempts() int {
|
||||
return shared.EmptyOutputRetryMaxAttempts()
|
||||
}
|
||||
|
||||
func clonePayloadWithEmptyOutputRetryPrompt(payload map[string]any) map[string]any {
|
||||
return shared.ClonePayloadWithEmptyOutputRetryPrompt(payload)
|
||||
func clonePayloadForEmptyOutputRetry(payload map[string]any, parentMessageID int) map[string]any {
|
||||
return shared.ClonePayloadForEmptyOutputRetry(payload, parentMessageID)
|
||||
}
|
||||
|
||||
func usagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string {
|
||||
|
||||
@@ -22,6 +22,10 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
|
||||
h.handleVercelStreamRelease(w, r)
|
||||
return
|
||||
}
|
||||
if isVercelStreamPowRequest(r) {
|
||||
h.handleVercelStreamPow(w, r)
|
||||
return
|
||||
}
|
||||
if isVercelStreamPrepareRequest(r) {
|
||||
h.handleVercelStreamPrepare(w, r)
|
||||
return
|
||||
|
||||
@@ -150,6 +150,44 @@ func (h *Handler) handleVercelStreamRelease(w http.ResponseWriter, r *http.Reque
|
||||
writeJSON(w, http.StatusOK, map[string]any{"success": true})
|
||||
}
|
||||
|
||||
func (h *Handler) handleVercelStreamPow(w http.ResponseWriter, r *http.Request) {
|
||||
if !config.IsVercel() {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
internalSecret := vercelInternalSecret()
|
||||
internalToken := strings.TrimSpace(r.Header.Get("X-Ds2-Internal-Token"))
|
||||
if internalSecret == "" || subtle.ConstantTimeCompare([]byte(internalToken), []byte(internalSecret)) != 1 {
|
||||
writeOpenAIError(w, http.StatusUnauthorized, "unauthorized internal request")
|
||||
return
|
||||
}
|
||||
|
||||
var req map[string]any
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
writeOpenAIError(w, http.StatusBadRequest, "invalid json")
|
||||
return
|
||||
}
|
||||
leaseID, _ := req["lease_id"].(string)
|
||||
leaseID = strings.TrimSpace(leaseID)
|
||||
if leaseID == "" {
|
||||
writeOpenAIError(w, http.StatusBadRequest, "lease_id is required")
|
||||
return
|
||||
}
|
||||
leaseAuth := h.lookupStreamLeaseAuth(leaseID)
|
||||
if leaseAuth == nil {
|
||||
writeOpenAIError(w, http.StatusNotFound, "stream lease not found or expired")
|
||||
return
|
||||
}
|
||||
powHeader, err := h.DS.GetPow(r.Context(), leaseAuth, 3)
|
||||
if err != nil {
|
||||
writeOpenAIError(w, http.StatusInternalServerError, "Failed to get PoW.")
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, map[string]any{
|
||||
"pow_header": powHeader,
|
||||
})
|
||||
}
|
||||
|
||||
func isVercelStreamPrepareRequest(r *http.Request) bool {
|
||||
if r == nil {
|
||||
return false
|
||||
@@ -164,6 +202,13 @@ func isVercelStreamReleaseRequest(r *http.Request) bool {
|
||||
return strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1"
|
||||
}
|
||||
|
||||
func isVercelStreamPowRequest(r *http.Request) bool {
|
||||
if r == nil {
|
||||
return false
|
||||
}
|
||||
return strings.TrimSpace(r.URL.Query().Get("__stream_pow")) == "1"
|
||||
}
|
||||
|
||||
func vercelInternalSecret() string {
|
||||
if v := strings.TrimSpace(os.Getenv("DS2API_VERCEL_INTERNAL_SECRET")); v != "" {
|
||||
return v
|
||||
@@ -199,6 +244,20 @@ func (h *Handler) holdStreamLease(a *auth.RequestAuth) string {
|
||||
return leaseID
|
||||
}
|
||||
|
||||
func (h *Handler) lookupStreamLeaseAuth(leaseID string) *auth.RequestAuth {
|
||||
leaseID = strings.TrimSpace(leaseID)
|
||||
if leaseID == "" {
|
||||
return nil
|
||||
}
|
||||
h.leaseMu.Lock()
|
||||
lease, ok := h.streamLeases[leaseID]
|
||||
h.leaseMu.Unlock()
|
||||
if !ok || time.Now().After(lease.ExpiresAt) {
|
||||
return nil
|
||||
}
|
||||
return lease.Auth
|
||||
}
|
||||
|
||||
func (h *Handler) releaseStreamLease(leaseID string) bool {
|
||||
leaseID = strings.TrimSpace(leaseID)
|
||||
if leaseID == "" {
|
||||
|
||||
@@ -24,6 +24,7 @@ type responsesNonStreamResult struct {
|
||||
contentFilter bool
|
||||
parsed toolcall.ToolCallParseResult
|
||||
body map[string]any
|
||||
responseMessageID int
|
||||
}
|
||||
|
||||
func (h *Handler) handleResponsesNonStreamWithRetry(w http.ResponseWriter, ctx context.Context, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, owner, responseID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolChoice promptcompat.ToolChoicePolicy, traceID string) {
|
||||
@@ -50,8 +51,13 @@ func (h *Handler) handleResponsesNonStreamWithRetry(w http.ResponseWriter, ctx c
|
||||
}
|
||||
|
||||
attempts++
|
||||
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "responses", "stream", false, "retry_attempt", attempts)
|
||||
nextResp, err := h.DS.CallCompletion(ctx, a, clonePayloadWithEmptyOutputRetryPrompt(payload), pow, 3)
|
||||
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "responses", "stream", false, "retry_attempt", attempts, "parent_message_id", result.responseMessageID)
|
||||
retryPow, powErr := h.DS.GetPow(ctx, a, 3)
|
||||
if powErr != nil {
|
||||
config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "responses", "stream", false, "retry_attempt", attempts, "error", powErr)
|
||||
retryPow = pow
|
||||
}
|
||||
nextResp, err := h.DS.CallCompletion(ctx, a, clonePayloadForEmptyOutputRetry(payload, result.responseMessageID), retryPow, 3)
|
||||
if err != nil {
|
||||
writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.")
|
||||
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "responses", "stream", false, "retry_attempt", attempts, "error", err)
|
||||
@@ -86,6 +92,7 @@ func (h *Handler) collectResponsesNonStreamAttempt(w http.ResponseWriter, resp *
|
||||
contentFilter: result.ContentFilter,
|
||||
parsed: textParsed,
|
||||
body: responseObj,
|
||||
responseMessageID: result.ResponseMessageID,
|
||||
}, true
|
||||
}
|
||||
|
||||
@@ -135,8 +142,13 @@ func (h *Handler) handleResponsesStreamWithRetry(w http.ResponseWriter, r *http.
|
||||
return
|
||||
}
|
||||
attempts++
|
||||
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "responses", "stream", true, "retry_attempt", attempts)
|
||||
nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadWithEmptyOutputRetryPrompt(payload), pow, 3)
|
||||
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "responses", "stream", true, "retry_attempt", attempts, "parent_message_id", streamRuntime.responseMessageID)
|
||||
retryPow, powErr := h.DS.GetPow(r.Context(), a, 3)
|
||||
if powErr != nil {
|
||||
config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "responses", "stream", true, "retry_attempt", attempts, "error", powErr)
|
||||
retryPow = pow
|
||||
}
|
||||
nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadForEmptyOutputRetry(payload, streamRuntime.responseMessageID), retryPow, 3)
|
||||
if err != nil {
|
||||
streamRuntime.failResponse(http.StatusInternalServerError, "Failed to get completion.", "error")
|
||||
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "responses", "stream", true, "retry_attempt", attempts, "error", err)
|
||||
|
||||
@@ -121,8 +121,8 @@ func emptyOutputRetryMaxAttempts() int {
|
||||
return shared.EmptyOutputRetryMaxAttempts()
|
||||
}
|
||||
|
||||
func clonePayloadWithEmptyOutputRetryPrompt(payload map[string]any) map[string]any {
|
||||
return shared.ClonePayloadWithEmptyOutputRetryPrompt(payload)
|
||||
func clonePayloadForEmptyOutputRetry(payload map[string]any, parentMessageID int) map[string]any {
|
||||
return shared.ClonePayloadForEmptyOutputRetry(payload, parentMessageID)
|
||||
}
|
||||
|
||||
func usagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string {
|
||||
|
||||
@@ -39,6 +39,7 @@ type responsesStreamRuntime struct {
|
||||
toolDetectionThinking strings.Builder
|
||||
text strings.Builder
|
||||
visibleText strings.Builder
|
||||
responseMessageID int
|
||||
streamToolCallIDs map[int]string
|
||||
functionItemIDs map[int]string
|
||||
functionOutputIDs map[int]int
|
||||
@@ -205,6 +206,9 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
|
||||
if !parsed.Parsed {
|
||||
return streamengine.ParsedDecision{}
|
||||
}
|
||||
if parsed.ResponseMessageID > 0 {
|
||||
s.responseMessageID = parsed.ResponseMessageID
|
||||
}
|
||||
if parsed.ContentFilter || parsed.ErrorMessage != "" {
|
||||
return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("content_filter")}
|
||||
}
|
||||
|
||||
@@ -13,12 +13,23 @@ func EmptyOutputRetryMaxAttempts() int {
|
||||
}
|
||||
|
||||
func ClonePayloadWithEmptyOutputRetryPrompt(payload map[string]any) map[string]any {
|
||||
return ClonePayloadForEmptyOutputRetry(payload, 0)
|
||||
}
|
||||
|
||||
// ClonePayloadForEmptyOutputRetry creates a retry payload with the suffix
|
||||
// appended and, if parentMessageID > 0, sets parent_message_id so the
|
||||
// retry is submitted as a proper follow-up turn in the same DeepSeek
|
||||
// session rather than a disconnected root message.
|
||||
func ClonePayloadForEmptyOutputRetry(payload map[string]any, parentMessageID int) map[string]any {
|
||||
clone := make(map[string]any, len(payload))
|
||||
for k, v := range payload {
|
||||
clone[k] = v
|
||||
}
|
||||
original, _ := payload["prompt"].(string)
|
||||
clone["prompt"] = AppendEmptyOutputRetrySuffix(original)
|
||||
if parentMessageID > 0 {
|
||||
clone["parent_message_id"] = parentMessageID
|
||||
}
|
||||
return clone
|
||||
}
|
||||
|
||||
|
||||
@@ -285,7 +285,7 @@ func TestChatCompletionsStreamEmitsFailureFrameWhenUpstreamOutputEmpty(t *testin
|
||||
|
||||
func TestChatCompletionsStreamRetriesEmptyOutputOnSameSession(t *testing.T) {
|
||||
ds := &streamStatusDSSeqStub{resps: []*http.Response{
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
|
||||
makeOpenAISSEHTTPResponse(`data: {"response_message_id":42,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
|
||||
}}
|
||||
h := &openAITestSurface{
|
||||
@@ -313,6 +313,10 @@ func TestChatCompletionsStreamRetriesEmptyOutputOnSameSession(t *testing.T) {
|
||||
if !strings.Contains(retryPrompt, "Previous reply had no visible output. Please regenerate the visible final answer or tool call now.") {
|
||||
t.Fatalf("expected retry suffix in prompt, got %q", retryPrompt)
|
||||
}
|
||||
// Verify multi-turn chaining: retry must set parent_message_id from first call's response_message_id.
|
||||
if parentID, ok := ds.payloads[1]["parent_message_id"].(int); !ok || parentID != 42 {
|
||||
t.Fatalf("expected retry parent_message_id=42, got %#v", ds.payloads[1]["parent_message_id"])
|
||||
}
|
||||
|
||||
frames, done := parseSSEDataFrames(t, rec.Body.String())
|
||||
if !done {
|
||||
@@ -341,7 +345,7 @@ func TestChatCompletionsStreamRetriesEmptyOutputOnSameSession(t *testing.T) {
|
||||
|
||||
func TestChatCompletionsNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
|
||||
ds := &streamStatusDSSeqStub{resps: []*http.Response{
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
|
||||
makeOpenAISSEHTTPResponse(`data: {"response_message_id":99,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
|
||||
}}
|
||||
h := &openAITestSurface{
|
||||
@@ -362,6 +366,10 @@ func TestChatCompletionsNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
|
||||
if len(ds.payloads) != 2 {
|
||||
t.Fatalf("expected one synthetic retry call, got %d", len(ds.payloads))
|
||||
}
|
||||
// Verify multi-turn chaining.
|
||||
if parentID, ok := ds.payloads[1]["parent_message_id"].(int); !ok || parentID != 99 {
|
||||
t.Fatalf("expected retry parent_message_id=99, got %#v", ds.payloads[1]["parent_message_id"])
|
||||
}
|
||||
var out map[string]any
|
||||
if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
|
||||
t.Fatalf("decode response failed: %v body=%s", err, rec.Body.String())
|
||||
@@ -452,7 +460,7 @@ func TestResponsesStreamUsageIgnoresBatchAccumulatedTokenUsage(t *testing.T) {
|
||||
|
||||
func TestResponsesStreamRetriesThinkingOnlyOutput(t *testing.T) {
|
||||
ds := &streamStatusDSSeqStub{resps: []*http.Response{
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
|
||||
makeOpenAISSEHTTPResponse(`data: {"response_message_id":77,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
|
||||
}}
|
||||
h := &openAITestSurface{
|
||||
@@ -473,6 +481,10 @@ func TestResponsesStreamRetriesThinkingOnlyOutput(t *testing.T) {
|
||||
if len(ds.payloads) != 2 {
|
||||
t.Fatalf("expected one synthetic retry call, got %d", len(ds.payloads))
|
||||
}
|
||||
// Verify multi-turn chaining.
|
||||
if parentID, ok := ds.payloads[1]["parent_message_id"].(int); !ok || parentID != 77 {
|
||||
t.Fatalf("expected retry parent_message_id=77, got %#v", ds.payloads[1]["parent_message_id"])
|
||||
}
|
||||
body := rec.Body.String()
|
||||
if strings.Contains(body, "response.failed") {
|
||||
t.Fatalf("did not expect premature response.failed, body=%s", body)
|
||||
@@ -487,7 +499,7 @@ func TestResponsesStreamRetriesThinkingOnlyOutput(t *testing.T) {
|
||||
|
||||
func TestResponsesNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
|
||||
ds := &streamStatusDSSeqStub{resps: []*http.Response{
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
|
||||
makeOpenAISSEHTTPResponse(`data: {"response_message_id":88,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
|
||||
}}
|
||||
h := &openAITestSurface{
|
||||
@@ -508,6 +520,10 @@ func TestResponsesNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
|
||||
if len(ds.payloads) != 2 {
|
||||
t.Fatalf("expected one synthetic retry call, got %d", len(ds.payloads))
|
||||
}
|
||||
// Verify multi-turn chaining.
|
||||
if parentID, ok := ds.payloads[1]["parent_message_id"].(int); !ok || parentID != 88 {
|
||||
t.Fatalf("expected retry parent_message_id=88, got %#v", ds.payloads[1]["parent_message_id"])
|
||||
}
|
||||
var out map[string]any
|
||||
if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
|
||||
t.Fatalf("decode response failed: %v body=%s", err, rec.Body.String())
|
||||
|
||||
Reference in New Issue
Block a user