mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-09 18:57:43 +08:00
修复吞字问题
This commit is contained in:
@@ -107,6 +107,23 @@ func (s *chatStreamRuntime) sendChunk(v any) {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *chatStreamRuntime) sendDelta(delta map[string]any) {
|
||||
if len(delta) == 0 {
|
||||
return
|
||||
}
|
||||
if !s.firstChunkSent {
|
||||
delta["role"] = "assistant"
|
||||
s.firstChunkSent = true
|
||||
}
|
||||
s.sendChunk(openaifmt.BuildChatStreamChunk(
|
||||
s.completionID,
|
||||
s.created,
|
||||
s.model,
|
||||
[]map[string]any{openaifmt.BuildChatStreamDeltaChoice(0, delta)},
|
||||
nil,
|
||||
))
|
||||
}
|
||||
|
||||
func (s *chatStreamRuntime) sendDone() {
|
||||
_, _ = s.w.Write([]byte("data: [DONE]\n\n"))
|
||||
if s.canFlush {
|
||||
@@ -257,7 +274,6 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
|
||||
return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReasonHandlerRequested}
|
||||
}
|
||||
|
||||
newChoices := make([]map[string]any, 0, len(parsed.Parts))
|
||||
contentSeen := false
|
||||
for _, p := range parsed.ToolDetectionThinkingParts {
|
||||
trimmed := sse.TrimContinuationOverlap(s.toolDetectionThinking.String(), p.Text)
|
||||
@@ -266,11 +282,6 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
|
||||
}
|
||||
}
|
||||
for _, p := range parsed.Parts {
|
||||
delta := map[string]any{}
|
||||
if !s.firstChunkSent {
|
||||
delta["role"] = "assistant"
|
||||
s.firstChunkSent = true
|
||||
}
|
||||
if p.Type == "thinking" {
|
||||
rawTrimmed := sse.TrimContinuationOverlap(s.rawThinking.String(), p.Text)
|
||||
if rawTrimmed != "" {
|
||||
@@ -287,7 +298,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
|
||||
continue
|
||||
}
|
||||
s.thinking.WriteString(trimmed)
|
||||
delta["reasoning_content"] = trimmed
|
||||
s.sendDelta(map[string]any{"reasoning_content": trimmed})
|
||||
}
|
||||
} else {
|
||||
rawTrimmed := sse.TrimContinuationOverlap(s.rawText.String(), p.Text)
|
||||
@@ -308,7 +319,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
|
||||
if trimmed == "" {
|
||||
continue
|
||||
}
|
||||
delta["content"] = trimmed
|
||||
s.sendDelta(map[string]any{"content": trimmed})
|
||||
} else {
|
||||
events := toolstream.ProcessChunk(&s.toolSieve, rawTrimmed, s.toolNames)
|
||||
for _, evt := range events {
|
||||
@@ -328,11 +339,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
|
||||
"tool_calls": formatted,
|
||||
}
|
||||
s.toolCallsEmitted = true
|
||||
if !s.firstChunkSent {
|
||||
tcDelta["role"] = "assistant"
|
||||
s.firstChunkSent = true
|
||||
}
|
||||
newChoices = append(newChoices, openaifmt.BuildChatStreamDeltaChoice(0, tcDelta))
|
||||
s.sendDelta(tcDelta)
|
||||
continue
|
||||
}
|
||||
if len(evt.ToolCalls) > 0 {
|
||||
@@ -341,11 +348,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
|
||||
tcDelta := map[string]any{
|
||||
"tool_calls": formatFinalStreamToolCallsWithStableIDs(evt.ToolCalls, s.streamToolCallIDs, s.toolsRaw),
|
||||
}
|
||||
if !s.firstChunkSent {
|
||||
tcDelta["role"] = "assistant"
|
||||
s.firstChunkSent = true
|
||||
}
|
||||
newChoices = append(newChoices, openaifmt.BuildChatStreamDeltaChoice(0, tcDelta))
|
||||
s.sendDelta(tcDelta)
|
||||
s.resetStreamToolCallState()
|
||||
continue
|
||||
}
|
||||
@@ -357,22 +360,11 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
|
||||
contentDelta := map[string]any{
|
||||
"content": cleaned,
|
||||
}
|
||||
if !s.firstChunkSent {
|
||||
contentDelta["role"] = "assistant"
|
||||
s.firstChunkSent = true
|
||||
}
|
||||
newChoices = append(newChoices, openaifmt.BuildChatStreamDeltaChoice(0, contentDelta))
|
||||
s.sendDelta(contentDelta)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(delta) > 0 {
|
||||
newChoices = append(newChoices, openaifmt.BuildChatStreamDeltaChoice(0, delta))
|
||||
}
|
||||
}
|
||||
|
||||
if len(newChoices) > 0 {
|
||||
s.sendChunk(openaifmt.BuildChatStreamChunk(s.completionID, s.created, s.model, newChoices, nil))
|
||||
}
|
||||
return streamengine.ParsedDecision{ContentSeen: contentSeen}
|
||||
}
|
||||
|
||||
@@ -49,6 +49,7 @@ func (h *Handler) handleNonStreamWithRetry(w http.ResponseWriter, ctx context.Co
|
||||
detected := detectAssistantToolCalls(result.rawText, result.text, result.rawThinking, result.toolDetectionThinking, toolNames)
|
||||
result.detectedCalls = len(detected.Calls)
|
||||
result.body = openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, result.thinking, result.text, detected.Calls, toolsRaw)
|
||||
addRefFileTokensToUsage(result.body, refFileTokens)
|
||||
result.finishReason = chatFinishReason(result.body)
|
||||
if !shouldRetryChatNonStream(result, attempts) {
|
||||
h.finishChatNonStreamResult(w, result, attempts, usagePrompt, refFileTokens, historySession)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package chat
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -239,6 +240,75 @@ func TestHandleStreamToolsPlainTextStreamsBeforeFinish(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleStreamThinkingDisabledDoesNotLeakHiddenFragmentContinuations(t *testing.T) {
|
||||
h := &Handler{}
|
||||
resp := makeSSEHTTPResponse(
|
||||
`data: {"p":"response/fragments","o":"APPEND","v":[{"type":"THINK","content":"我们"}]}`,
|
||||
`data: {"p":"response/fragments/-1/content","v":"被"}`,
|
||||
`data: {"v":"要求"}`,
|
||||
`data: {"p":"response/fragments","o":"APPEND","v":[{"type":"RESPONSE","content":"答"}]}`,
|
||||
`data: {"p":"response/fragments/-1/content","v":"案"}`,
|
||||
`data: [DONE]`,
|
||||
)
|
||||
rec := httptest.NewRecorder()
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
|
||||
|
||||
h.handleStream(rec, req, resp, "cid-hidden-fragment", "deepseek-v4-flash", "prompt", 0, false, false, nil, nil, nil)
|
||||
|
||||
frames, done := parseSSEDataFrames(t, rec.Body.String())
|
||||
if !done {
|
||||
t.Fatalf("expected [DONE], body=%s", rec.Body.String())
|
||||
}
|
||||
content := strings.Builder{}
|
||||
for _, frame := range frames {
|
||||
choices, _ := frame["choices"].([]any)
|
||||
for _, item := range choices {
|
||||
choice, _ := item.(map[string]any)
|
||||
delta, _ := choice["delta"].(map[string]any)
|
||||
if c, ok := delta["content"].(string); ok {
|
||||
content.WriteString(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
if got := content.String(); got != "答案" {
|
||||
t.Fatalf("expected only visible response text, got %q body=%s", got, rec.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleStreamEmitsSingleChoiceFramesForMultipleParsedParts(t *testing.T) {
|
||||
h := &Handler{}
|
||||
resp := makeSSEHTTPResponse(
|
||||
`data: {"p":"response/fragments","o":"APPEND","v":[{"type":"THINK","content":"我们"},{"type":"THINK","content":"被"},{"type":"THINK","content":"要求"},{"type":"RESPONSE","content":"答"},{"type":"RESPONSE","content":"案"}]}`,
|
||||
`data: [DONE]`,
|
||||
)
|
||||
rec := httptest.NewRecorder()
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
|
||||
|
||||
h.handleStream(rec, req, resp, "cid-multi-parts", "deepseek-v4-pro", "prompt", 0, true, false, nil, nil, nil)
|
||||
|
||||
frames, done := parseSSEDataFrames(t, rec.Body.String())
|
||||
if !done {
|
||||
t.Fatalf("expected [DONE], body=%s", rec.Body.String())
|
||||
}
|
||||
var reasoning, content strings.Builder
|
||||
for _, frame := range frames {
|
||||
choices, _ := frame["choices"].([]any)
|
||||
if len(choices) != 1 {
|
||||
t.Fatalf("expected exactly one choice per stream frame, got %d frame=%#v body=%s", len(choices), frame, rec.Body.String())
|
||||
}
|
||||
choice, _ := choices[0].(map[string]any)
|
||||
delta, _ := choice["delta"].(map[string]any)
|
||||
reasoning.WriteString(asString(delta["reasoning_content"]))
|
||||
content.WriteString(asString(delta["content"]))
|
||||
}
|
||||
if got := reasoning.String(); got != "我们被要求" {
|
||||
t.Fatalf("first-choice-only client would miss reasoning tokens: got %q body=%s", got, rec.Body.String())
|
||||
}
|
||||
if got := content.String(); got != "答案" {
|
||||
t.Fatalf("first-choice-only client would miss content tokens: got %q body=%s", got, rec.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleStreamIncompleteCapturedToolJSONFlushesAsTextOnFinalize(t *testing.T) {
|
||||
h := &Handler{}
|
||||
resp := makeSSEHTTPResponse(
|
||||
@@ -447,3 +517,45 @@ func TestHandleStreamCoercesSchemaDeclaredStringArgumentsOnFinalize(t *testing.T
|
||||
}
|
||||
t.Fatalf("expected at least one streamed tool call delta, body=%s", rec.Body.String())
|
||||
}
|
||||
|
||||
func TestHandleNonStreamWithRetryIncludesRefFileTokensInUsage(t *testing.T) {
|
||||
h := &Handler{}
|
||||
|
||||
run := func(refFileTokens int) map[string]any {
|
||||
resp := makeSSEHTTPResponse(
|
||||
`data: {"p":"response/content","v":"hello world"}`,
|
||||
`data: [DONE]`,
|
||||
)
|
||||
rec := httptest.NewRecorder()
|
||||
h.handleNonStreamWithRetry(rec, context.Background(), nil, resp, nil, "", "cid-ref", "deepseek-v4-flash", "prompt", refFileTokens, false, false, nil, nil, nil)
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
return decodeJSONBody(t, rec.Body.String())
|
||||
}
|
||||
|
||||
base := run(0)
|
||||
withRef := run(7)
|
||||
|
||||
baseUsage, _ := base["usage"].(map[string]any)
|
||||
refUsage, _ := withRef["usage"].(map[string]any)
|
||||
if baseUsage == nil || refUsage == nil {
|
||||
t.Fatalf("expected usage objects, base=%#v ref=%#v", base["usage"], withRef["usage"])
|
||||
}
|
||||
|
||||
getInt := func(m map[string]any, key string) int {
|
||||
t.Helper()
|
||||
v, ok := m[key].(float64)
|
||||
if !ok {
|
||||
t.Fatalf("expected numeric %s, got %#v", key, m[key])
|
||||
}
|
||||
return int(v)
|
||||
}
|
||||
|
||||
if got := getInt(refUsage, "prompt_tokens") - getInt(baseUsage, "prompt_tokens"); got != 7 {
|
||||
t.Fatalf("expected prompt_tokens delta 7, got %d", got)
|
||||
}
|
||||
if got := getInt(refUsage, "total_tokens") - getInt(baseUsage, "total_tokens"); got != 7 {
|
||||
t.Fatalf("expected total_tokens delta 7, got %d", got)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user