fix: use parent_message_id and fresh PoW headers for empty-output retry and continue

Previously retry/continue requests reused the initial PoW header and
lacked parent_message_id, causing them to land as disconnected root
messages in the DeepSeek session instead of proper follow-up turns.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-04-27 21:31:51 +08:00
parent fb43bd92f5
commit b82bc1311a
16 changed files with 324 additions and 32 deletions

View File

@@ -38,6 +38,7 @@ type chatStreamRuntime struct {
thinking strings.Builder
toolDetectionThinking strings.Builder
text strings.Builder
responseMessageID int
finalThinking string
finalText string
@@ -234,6 +235,9 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
if !parsed.Parsed {
return streamengine.ParsedDecision{}
}
if parsed.ResponseMessageID > 0 {
s.responseMessageID = parsed.ResponseMessageID
}
if parsed.ContentFilter {
if strings.TrimSpace(s.text.String()) == "" {
return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("content_filter")}

View File

@@ -23,6 +23,7 @@ type chatNonStreamResult struct {
detectedCalls int
body map[string]any
finishReason string
responseMessageID int
}
func (h *Handler) handleNonStreamWithRetry(w http.ResponseWriter, ctx context.Context, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, historySession *chatHistorySession) {
@@ -50,9 +51,14 @@ func (h *Handler) handleNonStreamWithRetry(w http.ResponseWriter, ctx context.Co
}
attempts++
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", false, "retry_attempt", attempts)
retryPayload := clonePayloadWithEmptyOutputRetryPrompt(payload)
nextResp, err := h.DS.CallCompletion(ctx, a, retryPayload, pow, 3)
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "parent_message_id", result.responseMessageID)
retryPow, powErr := h.DS.GetPow(ctx, a, 3)
if powErr != nil {
config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "error", powErr)
retryPow = pow
}
retryPayload := clonePayloadForEmptyOutputRetry(payload, result.responseMessageID)
nextResp, err := h.DS.CallCompletion(ctx, a, retryPayload, retryPow, 3)
if err != nil {
if historySession != nil {
historySession.error(http.StatusInternalServerError, "Failed to get completion.", "error", result.thinking, result.text)
@@ -91,6 +97,7 @@ func (h *Handler) collectChatNonStreamAttempt(w http.ResponseWriter, resp *http.
detectedCalls: len(detected.Calls),
body: respBody,
finishReason: chatFinishReason(respBody),
responseMessageID: result.ResponseMessageID,
}, true
}
@@ -152,8 +159,13 @@ func (h *Handler) handleStreamWithRetry(w http.ResponseWriter, r *http.Request,
return
}
attempts++
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", true, "retry_attempt", attempts)
nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadWithEmptyOutputRetryPrompt(payload), pow, 3)
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "parent_message_id", streamRuntime.responseMessageID)
retryPow, powErr := h.DS.GetPow(r.Context(), a, 3)
if powErr != nil {
config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "error", powErr)
retryPow = pow
}
nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadForEmptyOutputRetry(payload, streamRuntime.responseMessageID), retryPow, 3)
if err != nil {
failChatStreamRetry(streamRuntime, historySession, http.StatusInternalServerError, "Failed to get completion.", "error")
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "error", err)

View File

@@ -131,8 +131,8 @@ func emptyOutputRetryMaxAttempts() int {
return shared.EmptyOutputRetryMaxAttempts()
}
func clonePayloadWithEmptyOutputRetryPrompt(payload map[string]any) map[string]any {
return shared.ClonePayloadWithEmptyOutputRetryPrompt(payload)
func clonePayloadForEmptyOutputRetry(payload map[string]any, parentMessageID int) map[string]any {
return shared.ClonePayloadForEmptyOutputRetry(payload, parentMessageID)
}
func usagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string {

View File

@@ -22,6 +22,10 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
h.handleVercelStreamRelease(w, r)
return
}
if isVercelStreamPowRequest(r) {
h.handleVercelStreamPow(w, r)
return
}
if isVercelStreamPrepareRequest(r) {
h.handleVercelStreamPrepare(w, r)
return

View File

@@ -150,6 +150,44 @@ func (h *Handler) handleVercelStreamRelease(w http.ResponseWriter, r *http.Reque
writeJSON(w, http.StatusOK, map[string]any{"success": true})
}
func (h *Handler) handleVercelStreamPow(w http.ResponseWriter, r *http.Request) {
if !config.IsVercel() {
http.NotFound(w, r)
return
}
internalSecret := vercelInternalSecret()
internalToken := strings.TrimSpace(r.Header.Get("X-Ds2-Internal-Token"))
if internalSecret == "" || subtle.ConstantTimeCompare([]byte(internalToken), []byte(internalSecret)) != 1 {
writeOpenAIError(w, http.StatusUnauthorized, "unauthorized internal request")
return
}
var req map[string]any
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeOpenAIError(w, http.StatusBadRequest, "invalid json")
return
}
leaseID, _ := req["lease_id"].(string)
leaseID = strings.TrimSpace(leaseID)
if leaseID == "" {
writeOpenAIError(w, http.StatusBadRequest, "lease_id is required")
return
}
leaseAuth := h.lookupStreamLeaseAuth(leaseID)
if leaseAuth == nil {
writeOpenAIError(w, http.StatusNotFound, "stream lease not found or expired")
return
}
powHeader, err := h.DS.GetPow(r.Context(), leaseAuth, 3)
if err != nil {
writeOpenAIError(w, http.StatusInternalServerError, "Failed to get PoW.")
return
}
writeJSON(w, http.StatusOK, map[string]any{
"pow_header": powHeader,
})
}
func isVercelStreamPrepareRequest(r *http.Request) bool {
if r == nil {
return false
@@ -164,6 +202,13 @@ func isVercelStreamReleaseRequest(r *http.Request) bool {
return strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1"
}
func isVercelStreamPowRequest(r *http.Request) bool {
if r == nil {
return false
}
return strings.TrimSpace(r.URL.Query().Get("__stream_pow")) == "1"
}
func vercelInternalSecret() string {
if v := strings.TrimSpace(os.Getenv("DS2API_VERCEL_INTERNAL_SECRET")); v != "" {
return v
@@ -199,6 +244,20 @@ func (h *Handler) holdStreamLease(a *auth.RequestAuth) string {
return leaseID
}
func (h *Handler) lookupStreamLeaseAuth(leaseID string) *auth.RequestAuth {
leaseID = strings.TrimSpace(leaseID)
if leaseID == "" {
return nil
}
h.leaseMu.Lock()
lease, ok := h.streamLeases[leaseID]
h.leaseMu.Unlock()
if !ok || time.Now().After(lease.ExpiresAt) {
return nil
}
return lease.Auth
}
func (h *Handler) releaseStreamLease(leaseID string) bool {
leaseID = strings.TrimSpace(leaseID)
if leaseID == "" {

View File

@@ -24,6 +24,7 @@ type responsesNonStreamResult struct {
contentFilter bool
parsed toolcall.ToolCallParseResult
body map[string]any
responseMessageID int
}
func (h *Handler) handleResponsesNonStreamWithRetry(w http.ResponseWriter, ctx context.Context, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, owner, responseID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolChoice promptcompat.ToolChoicePolicy, traceID string) {
@@ -50,8 +51,13 @@ func (h *Handler) handleResponsesNonStreamWithRetry(w http.ResponseWriter, ctx c
}
attempts++
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "responses", "stream", false, "retry_attempt", attempts)
nextResp, err := h.DS.CallCompletion(ctx, a, clonePayloadWithEmptyOutputRetryPrompt(payload), pow, 3)
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "responses", "stream", false, "retry_attempt", attempts, "parent_message_id", result.responseMessageID)
retryPow, powErr := h.DS.GetPow(ctx, a, 3)
if powErr != nil {
config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "responses", "stream", false, "retry_attempt", attempts, "error", powErr)
retryPow = pow
}
nextResp, err := h.DS.CallCompletion(ctx, a, clonePayloadForEmptyOutputRetry(payload, result.responseMessageID), retryPow, 3)
if err != nil {
writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.")
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "responses", "stream", false, "retry_attempt", attempts, "error", err)
@@ -86,6 +92,7 @@ func (h *Handler) collectResponsesNonStreamAttempt(w http.ResponseWriter, resp *
contentFilter: result.ContentFilter,
parsed: textParsed,
body: responseObj,
responseMessageID: result.ResponseMessageID,
}, true
}
@@ -135,8 +142,13 @@ func (h *Handler) handleResponsesStreamWithRetry(w http.ResponseWriter, r *http.
return
}
attempts++
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "responses", "stream", true, "retry_attempt", attempts)
nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadWithEmptyOutputRetryPrompt(payload), pow, 3)
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "responses", "stream", true, "retry_attempt", attempts, "parent_message_id", streamRuntime.responseMessageID)
retryPow, powErr := h.DS.GetPow(r.Context(), a, 3)
if powErr != nil {
config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "responses", "stream", true, "retry_attempt", attempts, "error", powErr)
retryPow = pow
}
nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadForEmptyOutputRetry(payload, streamRuntime.responseMessageID), retryPow, 3)
if err != nil {
streamRuntime.failResponse(http.StatusInternalServerError, "Failed to get completion.", "error")
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "responses", "stream", true, "retry_attempt", attempts, "error", err)

View File

@@ -121,8 +121,8 @@ func emptyOutputRetryMaxAttempts() int {
return shared.EmptyOutputRetryMaxAttempts()
}
func clonePayloadWithEmptyOutputRetryPrompt(payload map[string]any) map[string]any {
return shared.ClonePayloadWithEmptyOutputRetryPrompt(payload)
func clonePayloadForEmptyOutputRetry(payload map[string]any, parentMessageID int) map[string]any {
return shared.ClonePayloadForEmptyOutputRetry(payload, parentMessageID)
}
func usagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string {

View File

@@ -39,6 +39,7 @@ type responsesStreamRuntime struct {
toolDetectionThinking strings.Builder
text strings.Builder
visibleText strings.Builder
responseMessageID int
streamToolCallIDs map[int]string
functionItemIDs map[int]string
functionOutputIDs map[int]int
@@ -205,6 +206,9 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
if !parsed.Parsed {
return streamengine.ParsedDecision{}
}
if parsed.ResponseMessageID > 0 {
s.responseMessageID = parsed.ResponseMessageID
}
if parsed.ContentFilter || parsed.ErrorMessage != "" {
return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("content_filter")}
}

View File

@@ -13,12 +13,23 @@ func EmptyOutputRetryMaxAttempts() int {
}
func ClonePayloadWithEmptyOutputRetryPrompt(payload map[string]any) map[string]any {
return ClonePayloadForEmptyOutputRetry(payload, 0)
}
// ClonePayloadForEmptyOutputRetry creates a retry payload with the suffix
// appended and, if parentMessageID > 0, sets parent_message_id so the
// retry is submitted as a proper follow-up turn in the same DeepSeek
// session rather than a disconnected root message.
func ClonePayloadForEmptyOutputRetry(payload map[string]any, parentMessageID int) map[string]any {
clone := make(map[string]any, len(payload))
for k, v := range payload {
clone[k] = v
}
original, _ := payload["prompt"].(string)
clone["prompt"] = AppendEmptyOutputRetrySuffix(original)
if parentMessageID > 0 {
clone["parent_message_id"] = parentMessageID
}
return clone
}

View File

@@ -285,7 +285,7 @@ func TestChatCompletionsStreamEmitsFailureFrameWhenUpstreamOutputEmpty(t *testin
func TestChatCompletionsStreamRetriesEmptyOutputOnSameSession(t *testing.T) {
ds := &streamStatusDSSeqStub{resps: []*http.Response{
makeOpenAISSEHTTPResponse(`data: {"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
makeOpenAISSEHTTPResponse(`data: {"response_message_id":42,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
}}
h := &openAITestSurface{
@@ -313,6 +313,10 @@ func TestChatCompletionsStreamRetriesEmptyOutputOnSameSession(t *testing.T) {
if !strings.Contains(retryPrompt, "Previous reply had no visible output. Please regenerate the visible final answer or tool call now.") {
t.Fatalf("expected retry suffix in prompt, got %q", retryPrompt)
}
// Verify multi-turn chaining: retry must set parent_message_id from first call's response_message_id.
if parentID, ok := ds.payloads[1]["parent_message_id"].(int); !ok || parentID != 42 {
t.Fatalf("expected retry parent_message_id=42, got %#v", ds.payloads[1]["parent_message_id"])
}
frames, done := parseSSEDataFrames(t, rec.Body.String())
if !done {
@@ -341,7 +345,7 @@ func TestChatCompletionsStreamRetriesEmptyOutputOnSameSession(t *testing.T) {
func TestChatCompletionsNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
ds := &streamStatusDSSeqStub{resps: []*http.Response{
makeOpenAISSEHTTPResponse(`data: {"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
makeOpenAISSEHTTPResponse(`data: {"response_message_id":99,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
}}
h := &openAITestSurface{
@@ -362,6 +366,10 @@ func TestChatCompletionsNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
if len(ds.payloads) != 2 {
t.Fatalf("expected one synthetic retry call, got %d", len(ds.payloads))
}
// Verify multi-turn chaining.
if parentID, ok := ds.payloads[1]["parent_message_id"].(int); !ok || parentID != 99 {
t.Fatalf("expected retry parent_message_id=99, got %#v", ds.payloads[1]["parent_message_id"])
}
var out map[string]any
if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
t.Fatalf("decode response failed: %v body=%s", err, rec.Body.String())
@@ -452,7 +460,7 @@ func TestResponsesStreamUsageIgnoresBatchAccumulatedTokenUsage(t *testing.T) {
func TestResponsesStreamRetriesThinkingOnlyOutput(t *testing.T) {
ds := &streamStatusDSSeqStub{resps: []*http.Response{
makeOpenAISSEHTTPResponse(`data: {"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
makeOpenAISSEHTTPResponse(`data: {"response_message_id":77,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
}}
h := &openAITestSurface{
@@ -473,6 +481,10 @@ func TestResponsesStreamRetriesThinkingOnlyOutput(t *testing.T) {
if len(ds.payloads) != 2 {
t.Fatalf("expected one synthetic retry call, got %d", len(ds.payloads))
}
// Verify multi-turn chaining.
if parentID, ok := ds.payloads[1]["parent_message_id"].(int); !ok || parentID != 77 {
t.Fatalf("expected retry parent_message_id=77, got %#v", ds.payloads[1]["parent_message_id"])
}
body := rec.Body.String()
if strings.Contains(body, "response.failed") {
t.Fatalf("did not expect premature response.failed, body=%s", body)
@@ -487,7 +499,7 @@ func TestResponsesStreamRetriesThinkingOnlyOutput(t *testing.T) {
func TestResponsesNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
ds := &streamStatusDSSeqStub{resps: []*http.Response{
makeOpenAISSEHTTPResponse(`data: {"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
makeOpenAISSEHTTPResponse(`data: {"response_message_id":88,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
}}
h := &openAITestSurface{
@@ -508,6 +520,10 @@ func TestResponsesNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
if len(ds.payloads) != 2 {
t.Fatalf("expected one synthetic retry call, got %d", len(ds.payloads))
}
// Verify multi-turn chaining.
if parentID, ok := ds.payloads[1]["parent_message_id"].(int); !ok || parentID != 88 {
t.Fatalf("expected retry parent_message_id=88, got %#v", ds.payloads[1]["parent_message_id"])
}
var out map[string]any
if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
t.Fatalf("decode response failed: %v body=%s", err, rec.Body.String())