From 706e68de233340e1cec392ba4fc5dbeb55b814a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B?= Date: Fri, 1 May 2026 21:11:36 +0800 Subject: [PATCH] fix: increase stream timeout constants for large-context models; guard against context-cancelled double-recording - Increase StreamIdleTimeout from 90s to 300s and MaxKeepaliveCount from 10 to 40 to prevent premature stream termination with DeepSeek V4 Pro (~50K token contexts) - Add r.Context().Err() check after ConsumeSSE in empty_retry_runtime (chat + responses) to prevent historySession.error() from overwriting historySession.stopped() when the request context is cancelled References: - MaxKeepaliveCount=10 creates a 50s no-content timeout that kills the stream before DeepSeek V4 Pro can produce its first token with large contexts - Hermes Agent reports 'No response from provider for 180s' because the underlying SSE connection was already terminated by ds2api at 50s - Context cancellation path: OnContextDone -> stopped(), then finalize() with empty output -> retry -> error() overwrites stopped() --- internal/deepseek/protocol/constants.go | 4 ++-- internal/httpapi/openai/chat/empty_retry_runtime.go | 3 +++ internal/httpapi/openai/responses/empty_retry_runtime.go | 3 +++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/internal/deepseek/protocol/constants.go b/internal/deepseek/protocol/constants.go index 3cb6c4d..83daa31 100644 --- a/internal/deepseek/protocol/constants.go +++ b/internal/deepseek/protocol/constants.go @@ -159,6 +159,6 @@ func toStringSet(in []string) map[string]struct{} { const ( KeepAliveTimeout = 5 - StreamIdleTimeout = 90 - MaxKeepaliveCount = 10 + StreamIdleTimeout = 300 + MaxKeepaliveCount = 40 ) diff --git a/internal/httpapi/openai/chat/empty_retry_runtime.go b/internal/httpapi/openai/chat/empty_retry_runtime.go index de2ff12..147024f 100644 --- a/internal/httpapi/openai/chat/empty_retry_runtime.go +++ b/internal/httpapi/openai/chat/empty_retry_runtime.go @@ -252,6 +252,9 @@ func (h *Handler) consumeChatStreamAttempt(r *http.Request, resp *http.Response, } }, }) + if r.Context().Err() != nil { + return true, false + } terminalWritten := streamRuntime.finalize(finalReason, allowDeferEmpty && finalReason != "content_filter") if terminalWritten { recordChatStreamHistory(streamRuntime, historySession) diff --git a/internal/httpapi/openai/responses/empty_retry_runtime.go b/internal/httpapi/openai/responses/empty_retry_runtime.go index 627f074..45d861d 100644 --- a/internal/httpapi/openai/responses/empty_retry_runtime.go +++ b/internal/httpapi/openai/responses/empty_retry_runtime.go @@ -223,6 +223,9 @@ func (h *Handler) consumeResponsesStreamAttempt(r *http.Request, resp *http.Resp } }, }) + if r.Context().Err() != nil { + return true, false + } terminalWritten := streamRuntime.finalize(finalReason, allowDeferEmpty && finalReason != "content_filter") if terminalWritten { return true, false