修复吞字问题

This commit is contained in:
CJACK
2026-05-01 01:31:48 +08:00
parent fca8c01397
commit 92e321fe2c
11 changed files with 257 additions and 47 deletions

View File

@@ -107,6 +107,23 @@ func (s *chatStreamRuntime) sendChunk(v any) {
}
}
func (s *chatStreamRuntime) sendDelta(delta map[string]any) {
if len(delta) == 0 {
return
}
if !s.firstChunkSent {
delta["role"] = "assistant"
s.firstChunkSent = true
}
s.sendChunk(openaifmt.BuildChatStreamChunk(
s.completionID,
s.created,
s.model,
[]map[string]any{openaifmt.BuildChatStreamDeltaChoice(0, delta)},
nil,
))
}
func (s *chatStreamRuntime) sendDone() {
_, _ = s.w.Write([]byte("data: [DONE]\n\n"))
if s.canFlush {
@@ -257,7 +274,6 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReasonHandlerRequested}
}
newChoices := make([]map[string]any, 0, len(parsed.Parts))
contentSeen := false
for _, p := range parsed.ToolDetectionThinkingParts {
trimmed := sse.TrimContinuationOverlap(s.toolDetectionThinking.String(), p.Text)
@@ -266,11 +282,6 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
}
}
for _, p := range parsed.Parts {
delta := map[string]any{}
if !s.firstChunkSent {
delta["role"] = "assistant"
s.firstChunkSent = true
}
if p.Type == "thinking" {
rawTrimmed := sse.TrimContinuationOverlap(s.rawThinking.String(), p.Text)
if rawTrimmed != "" {
@@ -287,7 +298,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
continue
}
s.thinking.WriteString(trimmed)
delta["reasoning_content"] = trimmed
s.sendDelta(map[string]any{"reasoning_content": trimmed})
}
} else {
rawTrimmed := sse.TrimContinuationOverlap(s.rawText.String(), p.Text)
@@ -308,7 +319,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
if trimmed == "" {
continue
}
delta["content"] = trimmed
s.sendDelta(map[string]any{"content": trimmed})
} else {
events := toolstream.ProcessChunk(&s.toolSieve, rawTrimmed, s.toolNames)
for _, evt := range events {
@@ -328,11 +339,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
"tool_calls": formatted,
}
s.toolCallsEmitted = true
if !s.firstChunkSent {
tcDelta["role"] = "assistant"
s.firstChunkSent = true
}
newChoices = append(newChoices, openaifmt.BuildChatStreamDeltaChoice(0, tcDelta))
s.sendDelta(tcDelta)
continue
}
if len(evt.ToolCalls) > 0 {
@@ -341,11 +348,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
tcDelta := map[string]any{
"tool_calls": formatFinalStreamToolCallsWithStableIDs(evt.ToolCalls, s.streamToolCallIDs, s.toolsRaw),
}
if !s.firstChunkSent {
tcDelta["role"] = "assistant"
s.firstChunkSent = true
}
newChoices = append(newChoices, openaifmt.BuildChatStreamDeltaChoice(0, tcDelta))
s.sendDelta(tcDelta)
s.resetStreamToolCallState()
continue
}
@@ -357,22 +360,11 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
contentDelta := map[string]any{
"content": cleaned,
}
if !s.firstChunkSent {
contentDelta["role"] = "assistant"
s.firstChunkSent = true
}
newChoices = append(newChoices, openaifmt.BuildChatStreamDeltaChoice(0, contentDelta))
s.sendDelta(contentDelta)
}
}
}
}
if len(delta) > 0 {
newChoices = append(newChoices, openaifmt.BuildChatStreamDeltaChoice(0, delta))
}
}
if len(newChoices) > 0 {
s.sendChunk(openaifmt.BuildChatStreamChunk(s.completionID, s.created, s.model, newChoices, nil))
}
return streamengine.ParsedDecision{ContentSeen: contentSeen}
}

View File

@@ -49,6 +49,7 @@ func (h *Handler) handleNonStreamWithRetry(w http.ResponseWriter, ctx context.Co
detected := detectAssistantToolCalls(result.rawText, result.text, result.rawThinking, result.toolDetectionThinking, toolNames)
result.detectedCalls = len(detected.Calls)
result.body = openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, result.thinking, result.text, detected.Calls, toolsRaw)
addRefFileTokensToUsage(result.body, refFileTokens)
result.finishReason = chatFinishReason(result.body)
if !shouldRetryChatNonStream(result, attempts) {
h.finishChatNonStreamResult(w, result, attempts, usagePrompt, refFileTokens, historySession)

View File

@@ -1,6 +1,7 @@
package chat
import (
"context"
"encoding/json"
"io"
"net/http"
@@ -239,6 +240,75 @@ func TestHandleStreamToolsPlainTextStreamsBeforeFinish(t *testing.T) {
}
}
func TestHandleStreamThinkingDisabledDoesNotLeakHiddenFragmentContinuations(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
`data: {"p":"response/fragments","o":"APPEND","v":[{"type":"THINK","content":"我们"}]}`,
`data: {"p":"response/fragments/-1/content","v":"被"}`,
`data: {"v":"要求"}`,
`data: {"p":"response/fragments","o":"APPEND","v":[{"type":"RESPONSE","content":"答"}]}`,
`data: {"p":"response/fragments/-1/content","v":"案"}`,
`data: [DONE]`,
)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
h.handleStream(rec, req, resp, "cid-hidden-fragment", "deepseek-v4-flash", "prompt", 0, false, false, nil, nil, nil)
frames, done := parseSSEDataFrames(t, rec.Body.String())
if !done {
t.Fatalf("expected [DONE], body=%s", rec.Body.String())
}
content := strings.Builder{}
for _, frame := range frames {
choices, _ := frame["choices"].([]any)
for _, item := range choices {
choice, _ := item.(map[string]any)
delta, _ := choice["delta"].(map[string]any)
if c, ok := delta["content"].(string); ok {
content.WriteString(c)
}
}
}
if got := content.String(); got != "答案" {
t.Fatalf("expected only visible response text, got %q body=%s", got, rec.Body.String())
}
}
func TestHandleStreamEmitsSingleChoiceFramesForMultipleParsedParts(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
`data: {"p":"response/fragments","o":"APPEND","v":[{"type":"THINK","content":"我们"},{"type":"THINK","content":"被"},{"type":"THINK","content":"要求"},{"type":"RESPONSE","content":"答"},{"type":"RESPONSE","content":"案"}]}`,
`data: [DONE]`,
)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
h.handleStream(rec, req, resp, "cid-multi-parts", "deepseek-v4-pro", "prompt", 0, true, false, nil, nil, nil)
frames, done := parseSSEDataFrames(t, rec.Body.String())
if !done {
t.Fatalf("expected [DONE], body=%s", rec.Body.String())
}
var reasoning, content strings.Builder
for _, frame := range frames {
choices, _ := frame["choices"].([]any)
if len(choices) != 1 {
t.Fatalf("expected exactly one choice per stream frame, got %d frame=%#v body=%s", len(choices), frame, rec.Body.String())
}
choice, _ := choices[0].(map[string]any)
delta, _ := choice["delta"].(map[string]any)
reasoning.WriteString(asString(delta["reasoning_content"]))
content.WriteString(asString(delta["content"]))
}
if got := reasoning.String(); got != "我们被要求" {
t.Fatalf("first-choice-only client would miss reasoning tokens: got %q body=%s", got, rec.Body.String())
}
if got := content.String(); got != "答案" {
t.Fatalf("first-choice-only client would miss content tokens: got %q body=%s", got, rec.Body.String())
}
}
func TestHandleStreamIncompleteCapturedToolJSONFlushesAsTextOnFinalize(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
@@ -447,3 +517,45 @@ func TestHandleStreamCoercesSchemaDeclaredStringArgumentsOnFinalize(t *testing.T
}
t.Fatalf("expected at least one streamed tool call delta, body=%s", rec.Body.String())
}
func TestHandleNonStreamWithRetryIncludesRefFileTokensInUsage(t *testing.T) {
h := &Handler{}
run := func(refFileTokens int) map[string]any {
resp := makeSSEHTTPResponse(
`data: {"p":"response/content","v":"hello world"}`,
`data: [DONE]`,
)
rec := httptest.NewRecorder()
h.handleNonStreamWithRetry(rec, context.Background(), nil, resp, nil, "", "cid-ref", "deepseek-v4-flash", "prompt", refFileTokens, false, false, nil, nil, nil)
if rec.Code != http.StatusOK {
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
}
return decodeJSONBody(t, rec.Body.String())
}
base := run(0)
withRef := run(7)
baseUsage, _ := base["usage"].(map[string]any)
refUsage, _ := withRef["usage"].(map[string]any)
if baseUsage == nil || refUsage == nil {
t.Fatalf("expected usage objects, base=%#v ref=%#v", base["usage"], withRef["usage"])
}
getInt := func(m map[string]any, key string) int {
t.Helper()
v, ok := m[key].(float64)
if !ok {
t.Fatalf("expected numeric %s, got %#v", key, m[key])
}
return int(v)
}
if got := getInt(refUsage, "prompt_tokens") - getInt(baseUsage, "prompt_tokens"); got != 7 {
t.Fatalf("expected prompt_tokens delta 7, got %d", got)
}
if got := getInt(refUsage, "total_tokens") - getInt(baseUsage, "total_tokens"); got != 7 {
t.Fatalf("expected total_tokens delta 7, got %d", got)
}
}