From 706e68de233340e1cec392ba4fc5dbeb55b814a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B?= <wang@wangs-MacBook-Air-3.local>
Date: Fri, 1 May 2026 21:11:36 +0800
Subject: [PATCH] fix: increase stream timeout constants for large-context
 models; guard against context-cancelled double-recording

- Increase StreamIdleTimeout from 90s to 300s and MaxKeepaliveCount from 10 to 40
  to prevent premature stream termination with DeepSeek V4 Pro (~50K token contexts)
- Add r.Context().Err() check after ConsumeSSE in empty_retry_runtime (chat + responses)
  to prevent historySession.error() from overwriting historySession.stopped()
  when the request context is cancelled

References:
- MaxKeepaliveCount=10 creates a 50s no-content timeout that kills the stream
  before DeepSeek V4 Pro can produce its first token with large contexts
- Hermes Agent reports 'No response from provider for 180s' because the
  underlying SSE connection was already terminated by ds2api at 50s
- Context cancellation path: OnContextDone -> stopped(), then finalize()
  with empty output -> retry -> error() overwrites stopped()
---
 internal/deepseek/protocol/constants.go                  | 4 ++--
 internal/httpapi/openai/chat/empty_retry_runtime.go      | 3 +++
 internal/httpapi/openai/responses/empty_retry_runtime.go | 3 +++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/internal/deepseek/protocol/constants.go b/internal/deepseek/protocol/constants.go
index 3cb6c4d..83daa31 100644
--- a/internal/deepseek/protocol/constants.go
+++ b/internal/deepseek/protocol/constants.go
@@ -159,6 +159,6 @@ func toStringSet(in []string) map[string]struct{} {
 
 const (
 	KeepAliveTimeout  = 5
-	StreamIdleTimeout = 90
-	MaxKeepaliveCount = 10
+	StreamIdleTimeout = 300
+	MaxKeepaliveCount = 40
 )
diff --git a/internal/httpapi/openai/chat/empty_retry_runtime.go b/internal/httpapi/openai/chat/empty_retry_runtime.go
index de2ff12..147024f 100644
--- a/internal/httpapi/openai/chat/empty_retry_runtime.go
+++ b/internal/httpapi/openai/chat/empty_retry_runtime.go
@@ -252,6 +252,9 @@ func (h *Handler) consumeChatStreamAttempt(r *http.Request, resp *http.Response,
 			}
 		},
 	})
+	if r.Context().Err() != nil {
+		return true, false
+	}
 	terminalWritten := streamRuntime.finalize(finalReason, allowDeferEmpty && finalReason != "content_filter")
 	if terminalWritten {
 		recordChatStreamHistory(streamRuntime, historySession)
diff --git a/internal/httpapi/openai/responses/empty_retry_runtime.go b/internal/httpapi/openai/responses/empty_retry_runtime.go
index 627f074..45d861d 100644
--- a/internal/httpapi/openai/responses/empty_retry_runtime.go
+++ b/internal/httpapi/openai/responses/empty_retry_runtime.go
@@ -223,6 +223,9 @@ func (h *Handler) consumeResponsesStreamAttempt(r *http.Request, resp *http.Resp
 			}
 		},
 	})
+	if r.Context().Err() != nil {
+		return true, false
+	}
 	terminalWritten := streamRuntime.finalize(finalReason, allowDeferEmpty && finalReason != "content_filter")
 	if terminalWritten {
 		return true, false