fix: increase stream timeout constants for large-context models; guard against context-cancelled double-recording

- Increase StreamIdleTimeout from 90s to 300s and MaxKeepaliveCount from 10 to 40
  to prevent premature stream termination with DeepSeek V4 Pro (~50K token contexts)
- Add r.Context().Err() check after ConsumeSSE in empty_retry_runtime (chat + responses)
  to prevent historySession.error() from overwriting historySession.stopped()
  when the request context is cancelled

References:
- MaxKeepaliveCount=10 creates a 50s no-content timeout that kills the stream
  before DeepSeek V4 Pro can produce its first token with large contexts
- Hermes Agent reports 'No response from provider for 180s' because the
  underlying SSE connection was already terminated by ds2api at 50s
- Context cancellation path: OnContextDone -> stopped(), then finalize()
  with empty output -> retry -> error() overwrites stopped()
This commit is contained in:
2026-05-01 21:11:36 +08:00
parent 445c95a4f2
commit 706e68de23
3 changed files with 8 additions and 2 deletions

View File

@@ -159,6 +159,6 @@ func toStringSet(in []string) map[string]struct{} {
const (
KeepAliveTimeout = 5
StreamIdleTimeout = 90
MaxKeepaliveCount = 10
StreamIdleTimeout = 300
MaxKeepaliveCount = 40
)

View File

@@ -252,6 +252,9 @@ func (h *Handler) consumeChatStreamAttempt(r *http.Request, resp *http.Response,
}
},
})
if r.Context().Err() != nil {
return true, false
}
terminalWritten := streamRuntime.finalize(finalReason, allowDeferEmpty && finalReason != "content_filter")
if terminalWritten {
recordChatStreamHistory(streamRuntime, historySession)

View File

@@ -223,6 +223,9 @@ func (h *Handler) consumeResponsesStreamAttempt(r *http.Request, resp *http.Resp
}
},
})
if r.Context().Err() != nil {
return true, false
}
terminalWritten := streamRuntime.finalize(finalReason, allowDeferEmpty && finalReason != "content_filter")
if terminalWritten {
return true, false