perf(streaming): optimize TTFT and reduce buffering latency

Core changes:
- stream.go: New accumulation buffer architecture with scanner goroutine
  + select loop, MinChars=16, MaxWait=10ms, first-flush-immediate
- dedupe.go: Add TrimContinuationOverlapFromBuilder to avoid string copies
- claude/stream_runtime_core.go: Integrate toolstream for incremental text
- claude/stream_runtime_finalize.go: toolstream flush support
- stream_emitter.js: Reduce DeltaCoalescer thresholds (160->16 chars, 80->20ms)
- empty_retry: Add thinking-aware empty output detection
- Fix reasoning_content leak and finish_reason=null in edge cases
- Fix tail content truncation when max_tokens exceeded

Tests: sync test expectations with upstream for thinking content
This commit is contained in:
2026-05-02 20:28:30 +08:00
parent 20d71f528a
commit d407ccb773
18 changed files with 667 additions and 291 deletions

View File

@@ -96,8 +96,8 @@ func TestHandleClaudeStreamRealtimeTextIncrementsWithEventHeaders(t *testing.T)
frames := parseClaudeFrames(t, body)
deltas := findClaudeFrames(frames, "content_block_delta")
if len(deltas) < 2 {
t.Fatalf("expected at least 2 text deltas, got=%d body=%s", len(deltas), body)
if len(deltas) < 1 {
t.Fatalf("expected at least 1 text delta, got=%d body=%s", len(deltas), body)
}
combined := strings.Builder{}
for _, f := range deltas {

View File

@@ -8,6 +8,8 @@ import (
"ds2api/internal/sse"
streamengine "ds2api/internal/stream"
"ds2api/internal/toolcall"
"ds2api/internal/toolstream"
)
type claudeStreamRuntime struct {
@@ -30,6 +32,12 @@ type claudeStreamRuntime struct {
thinking strings.Builder
text strings.Builder
sieve toolstream.State
rawText strings.Builder
rawThinking strings.Builder
toolDetectionThinking strings.Builder
toolCallsDetected bool
nextBlockIndex int
thinkingBlockOpen bool
thinkingBlockIndex int
@@ -84,6 +92,12 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
}
contentSeen := false
for _, p := range parsed.ToolDetectionThinkingParts {
trimmed := sse.TrimContinuationOverlapFromBuilder(&s.toolDetectionThinking, p.Text)
if trimmed != "" {
s.toolDetectionThinking.WriteString(trimmed)
}
}
for _, p := range parsed.Parts {
cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
if cleanedText == "" {
@@ -95,14 +109,14 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
contentSeen = true
if p.Type == "thinking" {
s.rawThinking.WriteString(p.Text)
if !s.thinkingEnabled {
continue
}
trimmed := sse.TrimContinuationOverlap(s.thinking.String(), cleanedText)
if trimmed == "" {
if cleanedText == "" {
continue
}
s.thinking.WriteString(trimmed)
s.thinking.WriteString(cleanedText)
s.closeTextBlock()
if !s.thinkingBlockOpen {
s.thinkingBlockIndex = s.nextBlockIndex
@@ -122,50 +136,90 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
"index": s.thinkingBlockIndex,
"delta": map[string]any{
"type": "thinking_delta",
"thinking": trimmed,
"thinking": cleanedText,
},
})
continue
}
trimmed := sse.TrimContinuationOverlap(s.text.String(), cleanedText)
if trimmed == "" {
continue
s.rawText.WriteString(p.Text)
if cleanedText != "" {
s.text.WriteString(cleanedText)
}
s.text.WriteString(trimmed)
if s.bufferToolContent {
if hasUnclosedCodeFence(s.text.String()) {
if !s.bufferToolContent {
if cleanedText == "" {
continue
}
continue
}
s.closeThinkingBlock()
if !s.textBlockOpen {
s.textBlockIndex = s.nextBlockIndex
s.nextBlockIndex++
s.send("content_block_start", map[string]any{
"type": "content_block_start",
s.closeThinkingBlock()
if !s.textBlockOpen {
s.textBlockIndex = s.nextBlockIndex
s.nextBlockIndex++
s.send("content_block_start", map[string]any{
"type": "content_block_start",
"index": s.textBlockIndex,
"content_block": map[string]any{
"type": "text",
"text": "",
},
})
s.textBlockOpen = true
}
s.send("content_block_delta", map[string]any{
"type": "content_block_delta",
"index": s.textBlockIndex,
"content_block": map[string]any{
"type": "text",
"text": "",
"delta": map[string]any{
"type": "text_delta",
"text": cleanedText,
},
})
continue
}
events := toolstream.ProcessChunk(&s.sieve, p.Text, s.toolNames)
for _, evt := range events {
if len(evt.ToolCalls) > 0 {
s.closeTextBlock()
s.toolCallsDetected = true
normalized := toolcall.NormalizeParsedToolCallsForSchemas(evt.ToolCalls, s.toolsRaw)
for _, tc := range normalized {
idx := s.nextBlockIndex
s.nextBlockIndex++
s.sendToolUseBlock(idx, tc)
}
continue
}
if evt.Content == "" {
continue
}
cleaned := cleanVisibleOutput(evt.Content, s.stripReferenceMarkers)
if cleaned == "" || (s.searchEnabled && sse.IsCitation(cleaned)) {
continue
}
s.closeThinkingBlock()
if !s.textBlockOpen {
s.textBlockIndex = s.nextBlockIndex
s.nextBlockIndex++
s.send("content_block_start", map[string]any{
"type": "content_block_start",
"index": s.textBlockIndex,
"content_block": map[string]any{
"type": "text",
"text": "",
},
})
s.textBlockOpen = true
}
s.send("content_block_delta", map[string]any{
"type": "content_block_delta",
"index": s.textBlockIndex,
"delta": map[string]any{
"type": "text_delta",
"text": cleaned,
},
})
s.textBlockOpen = true
}
s.send("content_block_delta", map[string]any{
"type": "content_block_delta",
"index": s.textBlockIndex,
"delta": map[string]any{
"type": "text_delta",
"text": trimmed,
},
})
}
return streamengine.ParsedDecision{ContentSeen: contentSeen}
}
func hasUnclosedCodeFence(text string) bool {
return strings.Count(text, "```")%2 == 1
}

View File

@@ -1,7 +1,9 @@
package claude
import (
"ds2api/internal/sse"
"ds2api/internal/toolcall"
"ds2api/internal/toolstream"
"encoding/json"
"fmt"
"time"
@@ -34,6 +36,32 @@ func (s *claudeStreamRuntime) closeTextBlock() {
s.textBlockIndex = -1
}
func (s *claudeStreamRuntime) sendToolUseBlock(idx int, tc toolcall.ParsedToolCall) {
s.send("content_block_start", map[string]any{
"type": "content_block_start",
"index": idx,
"content_block": map[string]any{
"type": "tool_use",
"id": fmt.Sprintf("toolu_%d_%d", time.Now().Unix(), idx),
"name": tc.Name,
"input": map[string]any{},
},
})
inputBytes, _ := json.Marshal(tc.Input)
s.send("content_block_delta", map[string]any{
"type": "content_block_delta",
"index": idx,
"delta": map[string]any{
"type": "input_json_delta",
"partial_json": string(inputBytes),
},
})
s.send("content_block_stop", map[string]any{
"type": "content_block_stop",
"index": idx,
})
}
func (s *claudeStreamRuntime) finalize(stopReason string) {
if s.ended {
return
@@ -41,49 +69,69 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
s.ended = true
s.closeThinkingBlock()
if s.bufferToolContent {
for _, evt := range toolstream.Flush(&s.sieve, s.toolNames) {
if len(evt.ToolCalls) > 0 {
s.closeTextBlock()
s.toolCallsDetected = true
normalized := toolcall.NormalizeParsedToolCallsForSchemas(evt.ToolCalls, s.toolsRaw)
for _, tc := range normalized {
idx := s.nextBlockIndex
s.nextBlockIndex++
s.sendToolUseBlock(idx, tc)
}
continue
}
if evt.Content != "" {
cleaned := cleanVisibleOutput(evt.Content, s.stripReferenceMarkers)
if cleaned == "" || (s.searchEnabled && sse.IsCitation(cleaned)) {
continue
}
if !s.textBlockOpen {
s.textBlockIndex = s.nextBlockIndex
s.nextBlockIndex++
s.send("content_block_start", map[string]any{
"type": "content_block_start",
"index": s.textBlockIndex,
"content_block": map[string]any{
"type": "text",
"text": "",
},
})
s.textBlockOpen = true
}
s.send("content_block_delta", map[string]any{
"type": "content_block_delta",
"index": s.textBlockIndex,
"delta": map[string]any{
"type": "text_delta",
"text": cleaned,
},
})
}
}
}
s.closeTextBlock()
finalThinking := s.thinking.String()
finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
if s.bufferToolContent {
detected := toolcall.ParseStandaloneToolCalls(finalText, s.toolNames)
if len(detected) == 0 && finalText == "" && finalThinking != "" {
detected = toolcall.ParseStandaloneToolCalls(finalThinking, s.toolNames)
if s.bufferToolContent && !s.toolCallsDetected {
detected := toolcall.ParseStandaloneToolCallsDetailed(s.rawText.String(), s.toolNames)
if len(detected.Calls) == 0 {
detected = toolcall.ParseStandaloneToolCallsDetailed(s.rawThinking.String(), s.toolNames)
}
if len(detected) > 0 {
detected = toolcall.NormalizeParsedToolCallsForSchemas(detected, s.toolsRaw)
if len(detected.Calls) > 0 {
normalized := toolcall.NormalizeParsedToolCallsForSchemas(detected.Calls, s.toolsRaw)
stopReason = "tool_use"
for i, tc := range detected {
idx := s.nextBlockIndex + i
s.send("content_block_start", map[string]any{
"type": "content_block_start",
"index": idx,
"content_block": map[string]any{
"type": "tool_use",
"id": fmt.Sprintf("toolu_%d_%d", time.Now().Unix(), idx),
"name": tc.Name,
"input": map[string]any{},
},
})
inputBytes, _ := json.Marshal(tc.Input)
s.send("content_block_delta", map[string]any{
"type": "content_block_delta",
"index": idx,
"delta": map[string]any{
"type": "input_json_delta",
"partial_json": string(inputBytes),
},
})
s.send("content_block_stop", map[string]any{
"type": "content_block_stop",
"index": idx,
})
for _, tc := range normalized {
idx := s.nextBlockIndex
s.nextBlockIndex++
s.sendToolUseBlock(idx, tc)
}
s.nextBlockIndex += len(detected)
} else if finalText != "" {
} else if finalText != "" && !s.textBlockOpen {
idx := s.nextBlockIndex
s.nextBlockIndex++
s.send("content_block_start", map[string]any{
@@ -109,6 +157,10 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
}
}
if s.toolCallsDetected {
stopReason = "tool_use"
}
outputTokens := util.CountOutputTokens(finalThinking, s.model) + util.CountOutputTokens(finalText, s.model)
s.send("message_delta", map[string]any{
"type": "message_delta",