mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-15 05:35:07 +08:00
perf(streaming): optimize TTFT and reduce buffering latency
Core changes: - stream.go: New accumulation buffer architecture with scanner goroutine + select loop, MinChars=16, MaxWait=10ms, first-flush-immediate - dedupe.go: Add TrimContinuationOverlapFromBuilder to avoid string copies - claude/stream_runtime_core.go: Integrate toolstream for incremental text - claude/stream_runtime_finalize.go: toolstream flush support - stream_emitter.js: Reduce DeltaCoalescer thresholds (160->16 chars, 80->20ms) - empty_retry: Add thinking-aware empty output detection - Fix reasoning_content leak and finish_reason=null in edge cases - Fix tail content truncation when max_tokens exceeded Tests: sync test expectations with upstream for thinking content
This commit is contained in:
@@ -127,19 +127,16 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
|
||||
contentSeen = true
|
||||
if p.Type == "thinking" {
|
||||
if s.thinkingEnabled {
|
||||
trimmed := sse.TrimContinuationOverlap(s.thinking.String(), cleanedText)
|
||||
if trimmed == "" {
|
||||
continue
|
||||
if cleanedText != "" {
|
||||
s.thinking.WriteString(cleanedText)
|
||||
}
|
||||
s.thinking.WriteString(trimmed)
|
||||
}
|
||||
continue
|
||||
}
|
||||
trimmed := sse.TrimContinuationOverlap(s.text.String(), cleanedText)
|
||||
if trimmed == "" {
|
||||
if cleanedText == "" {
|
||||
continue
|
||||
}
|
||||
s.text.WriteString(trimmed)
|
||||
s.text.WriteString(cleanedText)
|
||||
if s.bufferContent {
|
||||
continue
|
||||
}
|
||||
@@ -149,7 +146,7 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
|
||||
"index": 0,
|
||||
"content": map[string]any{
|
||||
"role": "model",
|
||||
"parts": []map[string]any{{"text": trimmed}},
|
||||
"parts": []map[string]any{{"text": cleanedText}},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user