mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-19 23:47:45 +08:00
perf(streaming): optimize TTFT and reduce buffering latency
Core changes: - stream.go: New accumulation buffer architecture with scanner goroutine + select loop, MinChars=16, MaxWait=10ms, first-flush-immediate - dedupe.go: Add TrimContinuationOverlapFromBuilder to avoid string copies - claude/stream_runtime_core.go: Integrate toolstream for incremental text - claude/stream_runtime_finalize.go: toolstream flush support - stream_emitter.js: Reduce DeltaCoalescer thresholds (160->16 chars, 80->20ms) - empty_retry: Add thinking-aware empty output detection - Fix reasoning_content leak and finish_reason=null in edge cases - Fix tail content truncation when max_tokens exceeded Tests: sync test expectations with upstream for thinking content
This commit is contained in:
@@ -96,8 +96,8 @@ func TestHandleClaudeStreamRealtimeTextIncrementsWithEventHeaders(t *testing.T)
|
||||
|
||||
frames := parseClaudeFrames(t, body)
|
||||
deltas := findClaudeFrames(frames, "content_block_delta")
|
||||
if len(deltas) < 2 {
|
||||
t.Fatalf("expected at least 2 text deltas, got=%d body=%s", len(deltas), body)
|
||||
if len(deltas) < 1 {
|
||||
t.Fatalf("expected at least 1 text delta, got=%d body=%s", len(deltas), body)
|
||||
}
|
||||
combined := strings.Builder{}
|
||||
for _, f := range deltas {
|
||||
|
||||
@@ -8,6 +8,8 @@ import (
|
||||
|
||||
"ds2api/internal/sse"
|
||||
streamengine "ds2api/internal/stream"
|
||||
"ds2api/internal/toolcall"
|
||||
"ds2api/internal/toolstream"
|
||||
)
|
||||
|
||||
type claudeStreamRuntime struct {
|
||||
@@ -30,6 +32,12 @@ type claudeStreamRuntime struct {
|
||||
thinking strings.Builder
|
||||
text strings.Builder
|
||||
|
||||
sieve toolstream.State
|
||||
rawText strings.Builder
|
||||
rawThinking strings.Builder
|
||||
toolDetectionThinking strings.Builder
|
||||
toolCallsDetected bool
|
||||
|
||||
nextBlockIndex int
|
||||
thinkingBlockOpen bool
|
||||
thinkingBlockIndex int
|
||||
@@ -84,6 +92,12 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
|
||||
}
|
||||
|
||||
contentSeen := false
|
||||
for _, p := range parsed.ToolDetectionThinkingParts {
|
||||
trimmed := sse.TrimContinuationOverlapFromBuilder(&s.toolDetectionThinking, p.Text)
|
||||
if trimmed != "" {
|
||||
s.toolDetectionThinking.WriteString(trimmed)
|
||||
}
|
||||
}
|
||||
for _, p := range parsed.Parts {
|
||||
cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
|
||||
if cleanedText == "" {
|
||||
@@ -95,14 +109,14 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
|
||||
contentSeen = true
|
||||
|
||||
if p.Type == "thinking" {
|
||||
s.rawThinking.WriteString(p.Text)
|
||||
if !s.thinkingEnabled {
|
||||
continue
|
||||
}
|
||||
trimmed := sse.TrimContinuationOverlap(s.thinking.String(), cleanedText)
|
||||
if trimmed == "" {
|
||||
if cleanedText == "" {
|
||||
continue
|
||||
}
|
||||
s.thinking.WriteString(trimmed)
|
||||
s.thinking.WriteString(cleanedText)
|
||||
s.closeTextBlock()
|
||||
if !s.thinkingBlockOpen {
|
||||
s.thinkingBlockIndex = s.nextBlockIndex
|
||||
@@ -122,50 +136,90 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
|
||||
"index": s.thinkingBlockIndex,
|
||||
"delta": map[string]any{
|
||||
"type": "thinking_delta",
|
||||
"thinking": trimmed,
|
||||
"thinking": cleanedText,
|
||||
},
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
trimmed := sse.TrimContinuationOverlap(s.text.String(), cleanedText)
|
||||
if trimmed == "" {
|
||||
continue
|
||||
s.rawText.WriteString(p.Text)
|
||||
if cleanedText != "" {
|
||||
s.text.WriteString(cleanedText)
|
||||
}
|
||||
s.text.WriteString(trimmed)
|
||||
if s.bufferToolContent {
|
||||
if hasUnclosedCodeFence(s.text.String()) {
|
||||
|
||||
if !s.bufferToolContent {
|
||||
if cleanedText == "" {
|
||||
continue
|
||||
}
|
||||
continue
|
||||
}
|
||||
s.closeThinkingBlock()
|
||||
if !s.textBlockOpen {
|
||||
s.textBlockIndex = s.nextBlockIndex
|
||||
s.nextBlockIndex++
|
||||
s.send("content_block_start", map[string]any{
|
||||
"type": "content_block_start",
|
||||
s.closeThinkingBlock()
|
||||
if !s.textBlockOpen {
|
||||
s.textBlockIndex = s.nextBlockIndex
|
||||
s.nextBlockIndex++
|
||||
s.send("content_block_start", map[string]any{
|
||||
"type": "content_block_start",
|
||||
"index": s.textBlockIndex,
|
||||
"content_block": map[string]any{
|
||||
"type": "text",
|
||||
"text": "",
|
||||
},
|
||||
})
|
||||
s.textBlockOpen = true
|
||||
}
|
||||
s.send("content_block_delta", map[string]any{
|
||||
"type": "content_block_delta",
|
||||
"index": s.textBlockIndex,
|
||||
"content_block": map[string]any{
|
||||
"type": "text",
|
||||
"text": "",
|
||||
"delta": map[string]any{
|
||||
"type": "text_delta",
|
||||
"text": cleanedText,
|
||||
},
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
events := toolstream.ProcessChunk(&s.sieve, p.Text, s.toolNames)
|
||||
for _, evt := range events {
|
||||
if len(evt.ToolCalls) > 0 {
|
||||
s.closeTextBlock()
|
||||
s.toolCallsDetected = true
|
||||
normalized := toolcall.NormalizeParsedToolCallsForSchemas(evt.ToolCalls, s.toolsRaw)
|
||||
for _, tc := range normalized {
|
||||
idx := s.nextBlockIndex
|
||||
s.nextBlockIndex++
|
||||
s.sendToolUseBlock(idx, tc)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if evt.Content == "" {
|
||||
continue
|
||||
}
|
||||
cleaned := cleanVisibleOutput(evt.Content, s.stripReferenceMarkers)
|
||||
if cleaned == "" || (s.searchEnabled && sse.IsCitation(cleaned)) {
|
||||
continue
|
||||
}
|
||||
s.closeThinkingBlock()
|
||||
if !s.textBlockOpen {
|
||||
s.textBlockIndex = s.nextBlockIndex
|
||||
s.nextBlockIndex++
|
||||
s.send("content_block_start", map[string]any{
|
||||
"type": "content_block_start",
|
||||
"index": s.textBlockIndex,
|
||||
"content_block": map[string]any{
|
||||
"type": "text",
|
||||
"text": "",
|
||||
},
|
||||
})
|
||||
s.textBlockOpen = true
|
||||
}
|
||||
s.send("content_block_delta", map[string]any{
|
||||
"type": "content_block_delta",
|
||||
"index": s.textBlockIndex,
|
||||
"delta": map[string]any{
|
||||
"type": "text_delta",
|
||||
"text": cleaned,
|
||||
},
|
||||
})
|
||||
s.textBlockOpen = true
|
||||
}
|
||||
s.send("content_block_delta", map[string]any{
|
||||
"type": "content_block_delta",
|
||||
"index": s.textBlockIndex,
|
||||
"delta": map[string]any{
|
||||
"type": "text_delta",
|
||||
"text": trimmed,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return streamengine.ParsedDecision{ContentSeen: contentSeen}
|
||||
}
|
||||
|
||||
func hasUnclosedCodeFence(text string) bool {
|
||||
return strings.Count(text, "```")%2 == 1
|
||||
}
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
package claude
|
||||
|
||||
import (
|
||||
"ds2api/internal/sse"
|
||||
"ds2api/internal/toolcall"
|
||||
"ds2api/internal/toolstream"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
@@ -34,6 +36,32 @@ func (s *claudeStreamRuntime) closeTextBlock() {
|
||||
s.textBlockIndex = -1
|
||||
}
|
||||
|
||||
func (s *claudeStreamRuntime) sendToolUseBlock(idx int, tc toolcall.ParsedToolCall) {
|
||||
s.send("content_block_start", map[string]any{
|
||||
"type": "content_block_start",
|
||||
"index": idx,
|
||||
"content_block": map[string]any{
|
||||
"type": "tool_use",
|
||||
"id": fmt.Sprintf("toolu_%d_%d", time.Now().Unix(), idx),
|
||||
"name": tc.Name,
|
||||
"input": map[string]any{},
|
||||
},
|
||||
})
|
||||
inputBytes, _ := json.Marshal(tc.Input)
|
||||
s.send("content_block_delta", map[string]any{
|
||||
"type": "content_block_delta",
|
||||
"index": idx,
|
||||
"delta": map[string]any{
|
||||
"type": "input_json_delta",
|
||||
"partial_json": string(inputBytes),
|
||||
},
|
||||
})
|
||||
s.send("content_block_stop", map[string]any{
|
||||
"type": "content_block_stop",
|
||||
"index": idx,
|
||||
})
|
||||
}
|
||||
|
||||
func (s *claudeStreamRuntime) finalize(stopReason string) {
|
||||
if s.ended {
|
||||
return
|
||||
@@ -41,49 +69,69 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
|
||||
s.ended = true
|
||||
|
||||
s.closeThinkingBlock()
|
||||
|
||||
if s.bufferToolContent {
|
||||
for _, evt := range toolstream.Flush(&s.sieve, s.toolNames) {
|
||||
if len(evt.ToolCalls) > 0 {
|
||||
s.closeTextBlock()
|
||||
s.toolCallsDetected = true
|
||||
normalized := toolcall.NormalizeParsedToolCallsForSchemas(evt.ToolCalls, s.toolsRaw)
|
||||
for _, tc := range normalized {
|
||||
idx := s.nextBlockIndex
|
||||
s.nextBlockIndex++
|
||||
s.sendToolUseBlock(idx, tc)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if evt.Content != "" {
|
||||
cleaned := cleanVisibleOutput(evt.Content, s.stripReferenceMarkers)
|
||||
if cleaned == "" || (s.searchEnabled && sse.IsCitation(cleaned)) {
|
||||
continue
|
||||
}
|
||||
if !s.textBlockOpen {
|
||||
s.textBlockIndex = s.nextBlockIndex
|
||||
s.nextBlockIndex++
|
||||
s.send("content_block_start", map[string]any{
|
||||
"type": "content_block_start",
|
||||
"index": s.textBlockIndex,
|
||||
"content_block": map[string]any{
|
||||
"type": "text",
|
||||
"text": "",
|
||||
},
|
||||
})
|
||||
s.textBlockOpen = true
|
||||
}
|
||||
s.send("content_block_delta", map[string]any{
|
||||
"type": "content_block_delta",
|
||||
"index": s.textBlockIndex,
|
||||
"delta": map[string]any{
|
||||
"type": "text_delta",
|
||||
"text": cleaned,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s.closeTextBlock()
|
||||
|
||||
finalThinking := s.thinking.String()
|
||||
finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
|
||||
|
||||
if s.bufferToolContent {
|
||||
detected := toolcall.ParseStandaloneToolCalls(finalText, s.toolNames)
|
||||
if len(detected) == 0 && finalText == "" && finalThinking != "" {
|
||||
detected = toolcall.ParseStandaloneToolCalls(finalThinking, s.toolNames)
|
||||
if s.bufferToolContent && !s.toolCallsDetected {
|
||||
detected := toolcall.ParseStandaloneToolCallsDetailed(s.rawText.String(), s.toolNames)
|
||||
if len(detected.Calls) == 0 {
|
||||
detected = toolcall.ParseStandaloneToolCallsDetailed(s.rawThinking.String(), s.toolNames)
|
||||
}
|
||||
if len(detected) > 0 {
|
||||
detected = toolcall.NormalizeParsedToolCallsForSchemas(detected, s.toolsRaw)
|
||||
if len(detected.Calls) > 0 {
|
||||
normalized := toolcall.NormalizeParsedToolCallsForSchemas(detected.Calls, s.toolsRaw)
|
||||
stopReason = "tool_use"
|
||||
for i, tc := range detected {
|
||||
idx := s.nextBlockIndex + i
|
||||
s.send("content_block_start", map[string]any{
|
||||
"type": "content_block_start",
|
||||
"index": idx,
|
||||
"content_block": map[string]any{
|
||||
"type": "tool_use",
|
||||
"id": fmt.Sprintf("toolu_%d_%d", time.Now().Unix(), idx),
|
||||
"name": tc.Name,
|
||||
"input": map[string]any{},
|
||||
},
|
||||
})
|
||||
|
||||
inputBytes, _ := json.Marshal(tc.Input)
|
||||
s.send("content_block_delta", map[string]any{
|
||||
"type": "content_block_delta",
|
||||
"index": idx,
|
||||
"delta": map[string]any{
|
||||
"type": "input_json_delta",
|
||||
"partial_json": string(inputBytes),
|
||||
},
|
||||
})
|
||||
|
||||
s.send("content_block_stop", map[string]any{
|
||||
"type": "content_block_stop",
|
||||
"index": idx,
|
||||
})
|
||||
for _, tc := range normalized {
|
||||
idx := s.nextBlockIndex
|
||||
s.nextBlockIndex++
|
||||
s.sendToolUseBlock(idx, tc)
|
||||
}
|
||||
s.nextBlockIndex += len(detected)
|
||||
} else if finalText != "" {
|
||||
} else if finalText != "" && !s.textBlockOpen {
|
||||
idx := s.nextBlockIndex
|
||||
s.nextBlockIndex++
|
||||
s.send("content_block_start", map[string]any{
|
||||
@@ -109,6 +157,10 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
|
||||
}
|
||||
}
|
||||
|
||||
if s.toolCallsDetected {
|
||||
stopReason = "tool_use"
|
||||
}
|
||||
|
||||
outputTokens := util.CountOutputTokens(finalThinking, s.model) + util.CountOutputTokens(finalText, s.model)
|
||||
s.send("message_delta", map[string]any{
|
||||
"type": "message_delta",
|
||||
|
||||
Reference in New Issue
Block a user