feat: reimplement Claude streaming to use full SSE events with thinking, tool calls, and stream management, and add related test cases.

This commit is contained in:
CJACK
2026-02-17 02:31:56 +08:00
parent 8cbb5a4262
commit a19f281229
6 changed files with 1098 additions and 32 deletions

View File

@@ -24,6 +24,12 @@ type Handler struct {
DS *deepseek.Client
}
var (
claudeStreamPingInterval = time.Duration(deepseek.KeepAliveTimeout) * time.Second
claudeStreamIdleTimeout = time.Duration(deepseek.StreamIdleTimeout) * time.Second
claudeStreamMaxKeepaliveCnt = deepseek.MaxKeepaliveCount
)
func RegisterRoutes(r chi.Router, h *Handler) {
r.Get("/anthropic/v1/models", h.ListModels)
r.Post("/anthropic/v1/messages", h.Messages)
@@ -74,7 +80,6 @@ func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) {
thinkingEnabled = false
searchEnabled = false
}
_ = searchEnabled
finalPrompt := util.MessagesPrepare(toMessageMaps(dsPayload["messages"]))
sessionID, err := h.DS.CreateSession(r.Context(), a, 3)
@@ -107,13 +112,13 @@ func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) {
return
}
fullText, fullThinking := collectDeepSeek(resp, thinkingEnabled)
toolNames := extractClaudeToolNames(toolsRequested)
detected := util.ParseToolCalls(fullText, toolNames)
if toBool(req["stream"]) {
h.writeClaudeStream(w, r, model, normalized, fullText, detected)
h.handleClaudeStreamRealtime(w, r, resp, model, normalized, thinkingEnabled, searchEnabled, toolNames)
return
}
fullText, fullThinking := collectDeepSeek(resp, thinkingEnabled)
detected := util.ParseToolCalls(fullText, toolNames)
content := make([]map[string]any, 0, 4)
if fullThinking != "" {
content = append(content, map[string]any{"type": "thinking", "thinking": fullThinking})
@@ -228,7 +233,14 @@ func collectDeepSeek(resp *http.Response, thinkingEnabled bool) (string, string)
return text.String(), thinking.String()
}
func (h *Handler) writeClaudeStream(w http.ResponseWriter, r *http.Request, model string, messages []any, fullText string, detected []util.ParsedToolCall) {
func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Request, resp *http.Response, model string, messages []any, thinkingEnabled, searchEnabled bool, toolNames []string) {
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
writeJSON(w, http.StatusInternalServerError, map[string]any{"error": map[string]any{"type": "api_error", "message": string(body)}})
return
}
w.Header().Set("Content-Type", "text/event-stream")
w.Header().Set("Cache-Control", "no-cache, no-transform")
w.Header().Set("Connection", "keep-alive")
@@ -238,8 +250,25 @@ func (h *Handler) writeClaudeStream(w http.ResponseWriter, r *http.Request, mode
if !canFlush {
config.Logger.Warn("[claude_stream] response writer does not support flush; streaming may be buffered")
}
send := func(v any) {
lines := make(chan []byte, 128)
done := make(chan error, 1)
go func() {
scanner := bufio.NewScanner(resp.Body)
buf := make([]byte, 0, 64*1024)
scanner.Buffer(buf, 2*1024*1024)
for scanner.Scan() {
b := append([]byte{}, scanner.Bytes()...)
lines <- b
}
close(lines)
done <- scanner.Err()
}()
send := func(event string, v any) {
b, _ := json.Marshal(v)
_, _ = w.Write([]byte("event: "))
_, _ = w.Write([]byte(event))
_, _ = w.Write([]byte("\n"))
_, _ = w.Write([]byte("data: "))
_, _ = w.Write(b)
_, _ = w.Write([]byte("\n\n"))
@@ -247,9 +276,23 @@ func (h *Handler) writeClaudeStream(w http.ResponseWriter, r *http.Request, mode
_ = rc.Flush()
}
}
sendError := func(message string) {
msg := strings.TrimSpace(message)
if msg == "" {
msg = "upstream stream error"
}
send("error", map[string]any{
"type": "error",
"error": map[string]any{
"type": "api_error",
"message": msg,
},
})
}
messageID := fmt.Sprintf("msg_%d", time.Now().UnixNano())
inputTokens := util.EstimateTokens(fmt.Sprintf("%v", messages))
send(map[string]any{
send("message_start", map[string]any{
"type": "message_start",
"message": map[string]any{
"id": messageID,
@@ -262,26 +305,247 @@ func (h *Handler) writeClaudeStream(w http.ResponseWriter, r *http.Request, mode
"usage": map[string]any{"input_tokens": inputTokens, "output_tokens": 0},
},
})
outputTokens := 0
stopReason := "end_turn"
if len(detected) > 0 {
stopReason = "tool_use"
for i, tc := range detected {
send(map[string]any{"type": "content_block_start", "index": i, "content_block": map[string]any{"type": "tool_use", "id": fmt.Sprintf("toolu_%d_%d", time.Now().Unix(), i), "name": tc.Name, "input": tc.Input}})
send(map[string]any{"type": "content_block_stop", "index": i})
outputTokens += util.EstimateTokens(fmt.Sprintf("%v", tc.Input))
currentType := "text"
if thinkingEnabled {
currentType = "thinking"
}
bufferToolContent := len(toolNames) > 0
hasContent := false
lastContent := time.Now()
keepaliveCount := 0
thinking := strings.Builder{}
text := strings.Builder{}
nextBlockIndex := 0
thinkingBlockOpen := false
thinkingBlockIndex := -1
textBlockOpen := false
textBlockIndex := -1
ended := false
closeThinkingBlock := func() {
if !thinkingBlockOpen {
return
}
} else {
if fullText != "" {
send(map[string]any{"type": "content_block_start", "index": 0, "content_block": map[string]any{"type": "text", "text": ""}})
send(map[string]any{"type": "content_block_delta", "index": 0, "delta": map[string]any{"type": "text_delta", "text": fullText}})
send(map[string]any{"type": "content_block_stop", "index": 0})
outputTokens = util.EstimateTokens(fullText)
send("content_block_stop", map[string]any{
"type": "content_block_stop",
"index": thinkingBlockIndex,
})
thinkingBlockOpen = false
thinkingBlockIndex = -1
}
closeTextBlock := func() {
if !textBlockOpen {
return
}
send("content_block_stop", map[string]any{
"type": "content_block_stop",
"index": textBlockIndex,
})
textBlockOpen = false
textBlockIndex = -1
}
finalize := func(stopReason string) {
if ended {
return
}
ended = true
closeThinkingBlock()
closeTextBlock()
finalThinking := thinking.String()
finalText := text.String()
if bufferToolContent {
detected := util.ParseToolCalls(finalText, toolNames)
if len(detected) > 0 {
stopReason = "tool_use"
for i, tc := range detected {
idx := nextBlockIndex + i
send("content_block_start", map[string]any{
"type": "content_block_start",
"index": idx,
"content_block": map[string]any{
"type": "tool_use",
"id": fmt.Sprintf("toolu_%d_%d", time.Now().Unix(), idx),
"name": tc.Name,
"input": tc.Input,
},
})
send("content_block_stop", map[string]any{
"type": "content_block_stop",
"index": idx,
})
}
nextBlockIndex += len(detected)
} else if finalText != "" {
idx := nextBlockIndex
nextBlockIndex++
send("content_block_start", map[string]any{
"type": "content_block_start",
"index": idx,
"content_block": map[string]any{
"type": "text",
"text": "",
},
})
send("content_block_delta", map[string]any{
"type": "content_block_delta",
"index": idx,
"delta": map[string]any{
"type": "text_delta",
"text": finalText,
},
})
send("content_block_stop", map[string]any{
"type": "content_block_stop",
"index": idx,
})
}
}
outputTokens := util.EstimateTokens(finalThinking) + util.EstimateTokens(finalText)
send("message_delta", map[string]any{
"type": "message_delta",
"delta": map[string]any{
"stop_reason": stopReason,
"stop_sequence": nil,
},
"usage": map[string]any{
"output_tokens": outputTokens,
},
})
send("message_stop", map[string]any{"type": "message_stop"})
}
pingTicker := time.NewTicker(claudeStreamPingInterval)
defer pingTicker.Stop()
for {
select {
case <-r.Context().Done():
return
case <-pingTicker.C:
if !hasContent {
keepaliveCount++
if keepaliveCount >= claudeStreamMaxKeepaliveCnt {
finalize("end_turn")
return
}
}
if hasContent && time.Since(lastContent) > claudeStreamIdleTimeout {
finalize("end_turn")
return
}
send("ping", map[string]any{"type": "ping"})
case line, ok := <-lines:
if !ok {
if err := <-done; err != nil {
sendError(err.Error())
return
}
finalize("end_turn")
return
}
chunk, doneSignal, parsed := sse.ParseDeepSeekSSELine(line)
if !parsed {
continue
}
if doneSignal {
finalize("end_turn")
return
}
if errObj, hasErr := chunk["error"]; hasErr {
sendError(fmt.Sprintf("%v", errObj))
return
}
if code, _ := chunk["code"].(string); code == "content_filter" {
sendError("content filtered by upstream")
return
}
parts, finished, newType := sse.ParseSSEChunkForContent(chunk, thinkingEnabled, currentType)
currentType = newType
if finished {
finalize("end_turn")
return
}
for _, p := range parts {
if p.Text == "" {
continue
}
if p.Type != "thinking" && searchEnabled && sse.IsCitation(p.Text) {
continue
}
hasContent = true
lastContent = time.Now()
keepaliveCount = 0
if p.Type == "thinking" {
if !thinkingEnabled {
continue
}
thinking.WriteString(p.Text)
closeTextBlock()
if !thinkingBlockOpen {
thinkingBlockIndex = nextBlockIndex
nextBlockIndex++
send("content_block_start", map[string]any{
"type": "content_block_start",
"index": thinkingBlockIndex,
"content_block": map[string]any{
"type": "thinking",
"thinking": "",
},
})
thinkingBlockOpen = true
}
send("content_block_delta", map[string]any{
"type": "content_block_delta",
"index": thinkingBlockIndex,
"delta": map[string]any{
"type": "thinking_delta",
"thinking": p.Text,
},
})
continue
}
text.WriteString(p.Text)
if bufferToolContent {
continue
}
closeThinkingBlock()
if !textBlockOpen {
textBlockIndex = nextBlockIndex
nextBlockIndex++
send("content_block_start", map[string]any{
"type": "content_block_start",
"index": textBlockIndex,
"content_block": map[string]any{
"type": "text",
"text": "",
},
})
textBlockOpen = true
}
send("content_block_delta", map[string]any{
"type": "content_block_delta",
"index": textBlockIndex,
"delta": map[string]any{
"type": "text_delta",
"text": p.Text,
},
})
}
}
}
send(map[string]any{"type": "message_delta", "delta": map[string]any{"stop_reason": stopReason, "stop_sequence": nil}, "usage": map[string]any{"output_tokens": outputTokens}})
send(map[string]any{"type": "message_stop"})
_ = r
}
func normalizeClaudeMessages(messages []any) []any {

View File

@@ -0,0 +1,256 @@
package claude
import (
"encoding/json"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
)
type claudeFrame struct {
Event string
Payload map[string]any
}
func makeClaudeSSEHTTPResponse(lines ...string) *http.Response {
body := strings.Join(lines, "\n")
if !strings.HasSuffix(body, "\n") {
body += "\n"
}
return &http.Response{
StatusCode: http.StatusOK,
Header: make(http.Header),
Body: io.NopCloser(strings.NewReader(body)),
}
}
func parseClaudeFrames(t *testing.T, body string) []claudeFrame {
t.Helper()
chunks := strings.Split(body, "\n\n")
frames := make([]claudeFrame, 0, len(chunks))
for _, chunk := range chunks {
chunk = strings.TrimSpace(chunk)
if chunk == "" {
continue
}
lines := strings.Split(chunk, "\n")
eventName := ""
dataPayload := ""
for _, line := range lines {
line = strings.TrimSpace(line)
switch {
case strings.HasPrefix(line, "event:"):
eventName = strings.TrimSpace(strings.TrimPrefix(line, "event:"))
case strings.HasPrefix(line, "data:"):
dataPayload = strings.TrimSpace(strings.TrimPrefix(line, "data:"))
}
}
if eventName == "" || dataPayload == "" {
continue
}
var payload map[string]any
if err := json.Unmarshal([]byte(dataPayload), &payload); err != nil {
t.Fatalf("decode frame failed: %v, payload=%s", err, dataPayload)
}
frames = append(frames, claudeFrame{Event: eventName, Payload: payload})
}
return frames
}
func findClaudeFrames(frames []claudeFrame, event string) []claudeFrame {
out := make([]claudeFrame, 0)
for _, f := range frames {
if f.Event == event {
out = append(out, f)
}
}
return out
}
func TestHandleClaudeStreamRealtimeTextIncrementsWithEventHeaders(t *testing.T) {
h := &Handler{}
resp := makeClaudeSSEHTTPResponse(
`data: {"p":"response/content","v":"Hel"}`,
`data: {"p":"response/content","v":"lo"}`,
`data: [DONE]`,
)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-20250514", []any{map[string]any{"role": "user", "content": "hi"}}, false, false, nil)
body := rec.Body.String()
if !strings.Contains(body, "event: message_start") {
t.Fatalf("missing event header: message_start, body=%s", body)
}
if !strings.Contains(body, "event: content_block_delta") {
t.Fatalf("missing event header: content_block_delta, body=%s", body)
}
if !strings.Contains(body, "event: message_stop") {
t.Fatalf("missing event header: message_stop, body=%s", body)
}
frames := parseClaudeFrames(t, body)
deltas := findClaudeFrames(frames, "content_block_delta")
if len(deltas) < 2 {
t.Fatalf("expected at least 2 text deltas, got=%d body=%s", len(deltas), body)
}
combined := strings.Builder{}
for _, f := range deltas {
delta, _ := f.Payload["delta"].(map[string]any)
if delta["type"] == "text_delta" {
combined.WriteString(asString(delta["text"]))
}
}
if combined.String() != "Hello" {
t.Fatalf("unexpected combined text: %q body=%s", combined.String(), body)
}
}
func TestHandleClaudeStreamRealtimeThinkingDelta(t *testing.T) {
h := &Handler{}
resp := makeClaudeSSEHTTPResponse(
`data: {"p":"response/thinking_content","v":"思"}`,
`data: {"p":"response/thinking_content","v":"考"}`,
`data: {"p":"response/content","v":"ok"}`,
`data: [DONE]`,
)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-20250514", []any{map[string]any{"role": "user", "content": "hi"}}, true, false, nil)
frames := parseClaudeFrames(t, rec.Body.String())
foundThinkingDelta := false
for _, f := range findClaudeFrames(frames, "content_block_delta") {
delta, _ := f.Payload["delta"].(map[string]any)
if delta["type"] == "thinking_delta" {
foundThinkingDelta = true
break
}
}
if !foundThinkingDelta {
t.Fatalf("expected thinking_delta event, body=%s", rec.Body.String())
}
}
func TestHandleClaudeStreamRealtimeToolSafety(t *testing.T) {
h := &Handler{}
resp := makeClaudeSSEHTTPResponse(
`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\""}`,
`data: {"p":"response/content","v":",\"input\":{\"q\":\"go\"}}]}"}`,
`data: [DONE]`,
)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-20250514", []any{map[string]any{"role": "user", "content": "use tool"}}, false, false, []string{"search"})
frames := parseClaudeFrames(t, rec.Body.String())
for _, f := range findClaudeFrames(frames, "content_block_delta") {
delta, _ := f.Payload["delta"].(map[string]any)
if delta["type"] == "text_delta" && strings.Contains(asString(delta["text"]), `"tool_calls"`) {
t.Fatalf("raw tool_calls JSON leaked in text delta: body=%s", rec.Body.String())
}
}
foundToolUse := false
for _, f := range findClaudeFrames(frames, "content_block_start") {
contentBlock, _ := f.Payload["content_block"].(map[string]any)
if contentBlock["type"] == "tool_use" {
foundToolUse = true
break
}
}
if !foundToolUse {
t.Fatalf("expected tool_use block in stream, body=%s", rec.Body.String())
}
foundToolUseStop := false
for _, f := range findClaudeFrames(frames, "message_delta") {
delta, _ := f.Payload["delta"].(map[string]any)
if delta["stop_reason"] == "tool_use" {
foundToolUseStop = true
break
}
}
if !foundToolUseStop {
t.Fatalf("expected stop_reason=tool_use, body=%s", rec.Body.String())
}
}
func TestHandleClaudeStreamRealtimeUpstreamErrorEvent(t *testing.T) {
h := &Handler{}
resp := makeClaudeSSEHTTPResponse(
`data: {"error":{"message":"boom"}}`,
)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-20250514", []any{map[string]any{"role": "user", "content": "hi"}}, false, false, nil)
frames := parseClaudeFrames(t, rec.Body.String())
errFrames := findClaudeFrames(frames, "error")
if len(errFrames) == 0 {
t.Fatalf("expected error event frame, body=%s", rec.Body.String())
}
if errFrames[0].Payload["type"] != "error" {
t.Fatalf("expected error payload type, body=%s", rec.Body.String())
}
}
func TestHandleClaudeStreamRealtimePingEvent(t *testing.T) {
h := &Handler{}
oldPing := claudeStreamPingInterval
oldIdle := claudeStreamIdleTimeout
oldKeepalive := claudeStreamMaxKeepaliveCnt
claudeStreamPingInterval = 10 * time.Millisecond
claudeStreamIdleTimeout = 300 * time.Millisecond
claudeStreamMaxKeepaliveCnt = 50
defer func() {
claudeStreamPingInterval = oldPing
claudeStreamIdleTimeout = oldIdle
claudeStreamMaxKeepaliveCnt = oldKeepalive
}()
pr, pw := io.Pipe()
resp := &http.Response{StatusCode: http.StatusOK, Header: make(http.Header), Body: pr}
go func() {
time.Sleep(40 * time.Millisecond)
_, _ = io.WriteString(pw, "data: {\"p\":\"response/content\",\"v\":\"hi\"}\n")
_, _ = io.WriteString(pw, "data: [DONE]\n")
_ = pw.Close()
}()
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-20250514", []any{map[string]any{"role": "user", "content": "hi"}}, false, false, nil)
frames := parseClaudeFrames(t, rec.Body.String())
if len(findClaudeFrames(frames, "ping")) == 0 {
t.Fatalf("expected ping event in stream, body=%s", rec.Body.String())
}
}
func TestCollectDeepSeekRegression(t *testing.T) {
resp := makeClaudeSSEHTTPResponse(
`data: {"p":"response/thinking_content","v":"想"}`,
`data: {"p":"response/content","v":"答"}`,
`data: [DONE]`,
)
text, thinking := collectDeepSeek(resp, true)
if thinking != "想" {
t.Fatalf("unexpected thinking: %q", thinking)
}
if text != "答" {
t.Fatalf("unexpected text: %q", text)
}
}
func asString(v any) string {
s, _ := v.(string)
return s
}