mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-12 12:17:47 +08:00
Fix stream compatibility and vision model exposure
This commit is contained in:
@@ -15,6 +15,7 @@ import (
|
||||
"ds2api/internal/devcapture"
|
||||
adminshared "ds2api/internal/httpapi/admin/shared"
|
||||
"ds2api/internal/rawsample"
|
||||
"ds2api/internal/util"
|
||||
)
|
||||
|
||||
type captureChain struct {
|
||||
@@ -479,10 +480,13 @@ func previewCaptureChainResponse(chain captureChain) string {
|
||||
|
||||
func previewText(text string, limit int) string {
|
||||
text = strings.TrimSpace(text)
|
||||
if limit <= 0 || len(text) <= limit {
|
||||
if limit <= 0 {
|
||||
return text
|
||||
}
|
||||
return text[:limit] + "..."
|
||||
if truncated, ok := util.TruncateRunes(text, limit); ok {
|
||||
return truncated + "..."
|
||||
}
|
||||
return text
|
||||
}
|
||||
|
||||
func captureChainHasTruncatedResponse(chain captureChain) bool {
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"ds2api/internal/devcapture"
|
||||
)
|
||||
@@ -231,6 +232,16 @@ func TestCombineCaptureBodiesPreservesOrderAndSeparators(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPreviewTextPreservesUTF8MB4Characters(t *testing.T) {
|
||||
preview := previewText(strings.Repeat("😀", 281), 280)
|
||||
if !utf8.ValidString(preview) {
|
||||
t.Fatalf("expected valid utf-8 preview, got %q", preview)
|
||||
}
|
||||
if preview != strings.Repeat("😀", 280)+"..." {
|
||||
t.Fatalf("unexpected preview: %q", preview)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQueryRawSampleCapturesGroupsBySessionAndMatchesQuestion(t *testing.T) {
|
||||
devcapture.Global().Clear()
|
||||
defer devcapture.Global().Clear()
|
||||
|
||||
@@ -310,8 +310,8 @@ func TestChatCompletionsCurrentInputFilePersistsNeutralPrompt(t *testing.T) {
|
||||
if len(ds.uploadCalls) != 1 {
|
||||
t.Fatalf("expected current input upload to happen, got %d", len(ds.uploadCalls))
|
||||
}
|
||||
if ds.uploadCalls[0].Filename != "IGNORE.txt" {
|
||||
t.Fatalf("expected IGNORE.txt upload, got %q", ds.uploadCalls[0].Filename)
|
||||
if ds.uploadCalls[0].Filename != "history.txt" {
|
||||
t.Fatalf("expected history.txt upload, got %q", ds.uploadCalls[0].Filename)
|
||||
}
|
||||
if full.HistoryText != string(ds.uploadCalls[0].Data) {
|
||||
t.Fatalf("expected uploaded current input file to be persisted in history text")
|
||||
|
||||
@@ -36,8 +36,10 @@ type chatStreamRuntime struct {
|
||||
toolSieve toolstream.State
|
||||
streamToolCallIDs map[int]string
|
||||
streamToolNames map[int]string
|
||||
rawThinking strings.Builder
|
||||
thinking strings.Builder
|
||||
toolDetectionThinking strings.Builder
|
||||
rawText strings.Builder
|
||||
text strings.Builder
|
||||
responseMessageID int
|
||||
|
||||
@@ -141,7 +143,7 @@ func (s *chatStreamRuntime) finalize(finishReason string, deferEmptyOutput bool)
|
||||
finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
|
||||
s.finalThinking = finalThinking
|
||||
s.finalText = finalText
|
||||
detected := detectAssistantToolCalls(finalText, finalThinking, finalToolDetectionThinking, s.toolNames)
|
||||
detected := detectAssistantToolCalls(s.rawText.String(), s.rawThinking.String(), finalToolDetectionThinking, s.toolNames)
|
||||
if len(detected.Calls) > 0 && !s.toolCallsDoneEmitted {
|
||||
finishReason = "tool_calls"
|
||||
delta := map[string]any{
|
||||
@@ -186,7 +188,7 @@ func (s *chatStreamRuntime) finalize(finishReason string, deferEmptyOutput bool)
|
||||
continue
|
||||
}
|
||||
cleaned := cleanVisibleOutput(evt.Content, s.stripReferenceMarkers)
|
||||
if cleaned == "" {
|
||||
if cleaned == "" || (s.searchEnabled && sse.IsCitation(cleaned)) {
|
||||
continue
|
||||
}
|
||||
delta := map[string]any{
|
||||
@@ -263,21 +265,22 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
|
||||
}
|
||||
}
|
||||
for _, p := range parsed.Parts {
|
||||
cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
|
||||
if s.searchEnabled && sse.IsCitation(cleanedText) {
|
||||
continue
|
||||
}
|
||||
if cleanedText == "" {
|
||||
continue
|
||||
}
|
||||
contentSeen = true
|
||||
delta := map[string]any{}
|
||||
if !s.firstChunkSent {
|
||||
delta["role"] = "assistant"
|
||||
s.firstChunkSent = true
|
||||
}
|
||||
if p.Type == "thinking" {
|
||||
rawTrimmed := sse.TrimContinuationOverlap(s.rawThinking.String(), p.Text)
|
||||
if rawTrimmed != "" {
|
||||
s.rawThinking.WriteString(rawTrimmed)
|
||||
contentSeen = true
|
||||
}
|
||||
if s.thinkingEnabled {
|
||||
cleanedText := cleanVisibleOutput(rawTrimmed, s.stripReferenceMarkers)
|
||||
if cleanedText == "" {
|
||||
continue
|
||||
}
|
||||
trimmed := sse.TrimContinuationOverlap(s.thinking.String(), cleanedText)
|
||||
if trimmed == "" {
|
||||
continue
|
||||
@@ -286,15 +289,27 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
|
||||
delta["reasoning_content"] = trimmed
|
||||
}
|
||||
} else {
|
||||
trimmed := sse.TrimContinuationOverlap(s.text.String(), cleanedText)
|
||||
if trimmed == "" {
|
||||
rawTrimmed := sse.TrimContinuationOverlap(s.rawText.String(), p.Text)
|
||||
if rawTrimmed == "" {
|
||||
continue
|
||||
}
|
||||
s.text.WriteString(trimmed)
|
||||
s.rawText.WriteString(rawTrimmed)
|
||||
contentSeen = true
|
||||
cleanedText := cleanVisibleOutput(rawTrimmed, s.stripReferenceMarkers)
|
||||
if s.searchEnabled && sse.IsCitation(cleanedText) {
|
||||
continue
|
||||
}
|
||||
trimmed := sse.TrimContinuationOverlap(s.text.String(), cleanedText)
|
||||
if trimmed != "" {
|
||||
s.text.WriteString(trimmed)
|
||||
}
|
||||
if !s.bufferToolContent {
|
||||
if trimmed == "" {
|
||||
continue
|
||||
}
|
||||
delta["content"] = trimmed
|
||||
} else {
|
||||
events := toolstream.ProcessChunk(&s.toolSieve, trimmed, s.toolNames)
|
||||
events := toolstream.ProcessChunk(&s.toolSieve, rawTrimmed, s.toolNames)
|
||||
for _, evt := range events {
|
||||
if len(evt.ToolCallDeltas) > 0 {
|
||||
if !s.emitEarlyToolDeltas {
|
||||
@@ -335,7 +350,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
|
||||
}
|
||||
if evt.Content != "" {
|
||||
cleaned := cleanVisibleOutput(evt.Content, s.stripReferenceMarkers)
|
||||
if cleaned == "" {
|
||||
if cleaned == "" || (s.searchEnabled && sse.IsCitation(cleaned)) {
|
||||
continue
|
||||
}
|
||||
contentDelta := map[string]any{
|
||||
|
||||
@@ -16,6 +16,8 @@ import (
|
||||
)
|
||||
|
||||
type chatNonStreamResult struct {
|
||||
rawThinking string
|
||||
rawText string
|
||||
thinking string
|
||||
toolDetectionThinking string
|
||||
text string
|
||||
@@ -31,6 +33,7 @@ func (h *Handler) handleNonStreamWithRetry(w http.ResponseWriter, ctx context.Co
|
||||
currentResp := resp
|
||||
usagePrompt := finalPrompt
|
||||
accumulatedThinking := ""
|
||||
accumulatedRawThinking := ""
|
||||
accumulatedToolDetectionThinking := ""
|
||||
for {
|
||||
result, ok := h.collectChatNonStreamAttempt(w, currentResp, completionID, model, usagePrompt, thinkingEnabled, searchEnabled, toolNames, toolsRaw)
|
||||
@@ -38,10 +41,12 @@ func (h *Handler) handleNonStreamWithRetry(w http.ResponseWriter, ctx context.Co
|
||||
return
|
||||
}
|
||||
accumulatedThinking += sse.TrimContinuationOverlap(accumulatedThinking, result.thinking)
|
||||
accumulatedRawThinking += sse.TrimContinuationOverlap(accumulatedRawThinking, result.rawThinking)
|
||||
accumulatedToolDetectionThinking += sse.TrimContinuationOverlap(accumulatedToolDetectionThinking, result.toolDetectionThinking)
|
||||
result.thinking = accumulatedThinking
|
||||
result.rawThinking = accumulatedRawThinking
|
||||
result.toolDetectionThinking = accumulatedToolDetectionThinking
|
||||
detected := detectAssistantToolCalls(result.text, result.thinking, result.toolDetectionThinking, toolNames)
|
||||
detected := detectAssistantToolCalls(result.rawText, result.rawThinking, result.toolDetectionThinking, toolNames)
|
||||
result.detectedCalls = len(detected.Calls)
|
||||
result.body = openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, result.thinking, result.text, detected.Calls, toolsRaw)
|
||||
result.finishReason = chatFinishReason(result.body)
|
||||
@@ -82,16 +87,17 @@ func (h *Handler) collectChatNonStreamAttempt(w http.ResponseWriter, resp *http.
|
||||
result := sse.CollectStream(resp, thinkingEnabled, true)
|
||||
stripReferenceMarkers := h.compatStripReferenceMarkers()
|
||||
finalThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers)
|
||||
finalToolDetectionThinking := cleanVisibleOutput(result.ToolDetectionThinking, stripReferenceMarkers)
|
||||
finalText := cleanVisibleOutput(result.Text, stripReferenceMarkers)
|
||||
if searchEnabled {
|
||||
finalText = replaceCitationMarkersWithLinks(finalText, result.CitationLinks)
|
||||
}
|
||||
detected := detectAssistantToolCalls(finalText, finalThinking, finalToolDetectionThinking, toolNames)
|
||||
detected := detectAssistantToolCalls(result.Text, result.Thinking, result.ToolDetectionThinking, toolNames)
|
||||
respBody := openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, finalThinking, finalText, detected.Calls, toolsRaw)
|
||||
return chatNonStreamResult{
|
||||
rawThinking: result.Thinking,
|
||||
rawText: result.Text,
|
||||
thinking: finalThinking,
|
||||
toolDetectionThinking: finalToolDetectionThinking,
|
||||
toolDetectionThinking: result.ToolDetectionThinking,
|
||||
text: finalText,
|
||||
contentFilter: result.ContentFilter,
|
||||
detectedCalls: len(detected.Calls),
|
||||
|
||||
@@ -162,12 +162,11 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, resp *http.Response, co
|
||||
|
||||
stripReferenceMarkers := h.compatStripReferenceMarkers()
|
||||
finalThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers)
|
||||
finalToolDetectionThinking := cleanVisibleOutput(result.ToolDetectionThinking, stripReferenceMarkers)
|
||||
finalText := cleanVisibleOutput(result.Text, stripReferenceMarkers)
|
||||
if searchEnabled {
|
||||
finalText = replaceCitationMarkersWithLinks(finalText, result.CitationLinks)
|
||||
}
|
||||
detected := detectAssistantToolCalls(finalText, finalThinking, finalToolDetectionThinking, toolNames)
|
||||
detected := detectAssistantToolCalls(result.Text, result.Thinking, result.ToolDetectionThinking, toolNames)
|
||||
if shouldWriteUpstreamEmptyOutputError(finalText) && len(detected.Calls) == 0 {
|
||||
status, message, code := upstreamEmptyOutputDetail(result.ContentFilter, finalText, finalThinking)
|
||||
if historySession != nil {
|
||||
|
||||
@@ -291,20 +291,16 @@ func TestHandleStreamPromotesThinkingToolCallsOnFinalizeWithoutMidstreamIntercep
|
||||
if !streamHasToolCallsDelta(frames) {
|
||||
t.Fatalf("expected tool_calls delta from finalize fallback, body=%s", rec.Body.String())
|
||||
}
|
||||
reasoningSeen := false
|
||||
for _, frame := range frames {
|
||||
choices, _ := frame["choices"].([]any)
|
||||
for _, item := range choices {
|
||||
choice, _ := item.(map[string]any)
|
||||
delta, _ := choice["delta"].(map[string]any)
|
||||
if asString(delta["reasoning_content"]) != "" {
|
||||
reasoningSeen = true
|
||||
t.Fatalf("did not expect leaked reasoning_content markup, body=%s", rec.Body.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
if !reasoningSeen {
|
||||
t.Fatalf("expected reasoning_content to stream before finalize fallback, body=%s", rec.Body.String())
|
||||
}
|
||||
if streamFinishReason(frames) != "tool_calls" {
|
||||
t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
currentInputFilename = "IGNORE.txt"
|
||||
currentInputFilename = promptcompat.CurrentInputContextFilename
|
||||
currentInputContentType = "text/plain; charset=utf-8"
|
||||
currentInputPurpose = "assistants"
|
||||
)
|
||||
|
||||
@@ -79,7 +79,7 @@ func TestBuildOpenAICurrentInputContextTranscriptUsesInjectedFileWrapper(t *test
|
||||
if !strings.Contains(transcript, "<|DSML|tool_calls>") {
|
||||
t.Fatalf("expected tool calls preserved, got %q", transcript)
|
||||
}
|
||||
if !strings.HasSuffix(transcript, "\n[file name]: IGNORE\n[file content begin]\n") {
|
||||
if !strings.HasSuffix(transcript, "\n[file name]: history.txt\n[file content begin]\n") {
|
||||
t.Fatalf("expected injected file wrapper suffix, got %q", transcript)
|
||||
}
|
||||
}
|
||||
@@ -274,7 +274,7 @@ func TestApplyCurrentInputFileUploadsFirstTurnWithInjectedWrapper(t *testing.T)
|
||||
t.Fatalf("expected 1 current input upload, got %d", len(ds.uploadCalls))
|
||||
}
|
||||
upload := ds.uploadCalls[0]
|
||||
if upload.Filename != "IGNORE.txt" {
|
||||
if upload.Filename != "history.txt" {
|
||||
t.Fatalf("unexpected upload filename: %q", upload.Filename)
|
||||
}
|
||||
uploadedText := string(upload.Data)
|
||||
@@ -287,13 +287,13 @@ func TestApplyCurrentInputFileUploadsFirstTurnWithInjectedWrapper(t *testing.T)
|
||||
if !strings.Contains(uploadedText, promptcompat.ThinkingInjectionMarker) {
|
||||
t.Fatalf("expected thinking injection in current input file, got %q", uploadedText)
|
||||
}
|
||||
if !strings.HasSuffix(uploadedText, "\n[file name]: IGNORE\n[file content begin]\n") {
|
||||
if !strings.HasSuffix(uploadedText, "\n[file name]: history.txt\n[file content begin]\n") {
|
||||
t.Fatalf("expected injected file wrapper suffix, got %q", uploadedText)
|
||||
}
|
||||
if strings.Contains(out.FinalPrompt, "first turn content that is long enough") {
|
||||
t.Fatalf("expected current input text to be replaced in live prompt, got %s", out.FinalPrompt)
|
||||
}
|
||||
if strings.Contains(out.FinalPrompt, "CURRENT_USER_INPUT.txt") || strings.Contains(out.FinalPrompt, "IGNORE.txt") || strings.Contains(out.FinalPrompt, "Read that file") {
|
||||
if strings.Contains(out.FinalPrompt, "CURRENT_USER_INPUT.txt") || strings.Contains(out.FinalPrompt, "history.txt") || strings.Contains(out.FinalPrompt, "Read that file") {
|
||||
t.Fatalf("expected live prompt not to instruct file reads, got %s", out.FinalPrompt)
|
||||
}
|
||||
if !strings.Contains(out.FinalPrompt, "Answer the latest user request directly.") {
|
||||
@@ -335,8 +335,8 @@ func TestApplyCurrentInputFileUploadsFullContextFile(t *testing.T) {
|
||||
t.Fatalf("expected one current input upload, got %d", len(ds.uploadCalls))
|
||||
}
|
||||
upload := ds.uploadCalls[0]
|
||||
if upload.Filename != "IGNORE.txt" {
|
||||
t.Fatalf("expected IGNORE.txt upload, got %q", upload.Filename)
|
||||
if upload.Filename != "history.txt" {
|
||||
t.Fatalf("expected history.txt upload, got %q", upload.Filename)
|
||||
}
|
||||
uploadedText := string(upload.Data)
|
||||
for _, want := range []string{"system instructions", "first user turn", "hidden reasoning", "tool result", "latest user turn", promptcompat.ThinkingInjectionMarker} {
|
||||
@@ -344,7 +344,7 @@ func TestApplyCurrentInputFileUploadsFullContextFile(t *testing.T) {
|
||||
t.Fatalf("expected full context file to contain %q, got %q", want, uploadedText)
|
||||
}
|
||||
}
|
||||
if strings.Contains(out.FinalPrompt, "first user turn") || strings.Contains(out.FinalPrompt, "latest user turn") || strings.Contains(out.FinalPrompt, "CURRENT_USER_INPUT.txt") || strings.Contains(out.FinalPrompt, "IGNORE.txt") || strings.Contains(out.FinalPrompt, "Read that file") {
|
||||
if strings.Contains(out.FinalPrompt, "first user turn") || strings.Contains(out.FinalPrompt, "latest user turn") || strings.Contains(out.FinalPrompt, "CURRENT_USER_INPUT.txt") || strings.Contains(out.FinalPrompt, "history.txt") || strings.Contains(out.FinalPrompt, "Read that file") {
|
||||
t.Fatalf("expected live prompt to use only a neutral continuation instruction, got %s", out.FinalPrompt)
|
||||
}
|
||||
if !strings.Contains(out.FinalPrompt, "Answer the latest user request directly.") {
|
||||
@@ -411,15 +411,15 @@ func TestChatCompletionsCurrentInputFileUploadsContextAndKeepsNeutralPrompt(t *t
|
||||
t.Fatalf("expected 1 upload call, got %d", len(ds.uploadCalls))
|
||||
}
|
||||
upload := ds.uploadCalls[0]
|
||||
if upload.Filename != "IGNORE.txt" {
|
||||
if upload.Filename != "history.txt" {
|
||||
t.Fatalf("unexpected upload filename: %q", upload.Filename)
|
||||
}
|
||||
if upload.Purpose != "assistants" {
|
||||
t.Fatalf("unexpected purpose: %q", upload.Purpose)
|
||||
}
|
||||
historyText := string(upload.Data)
|
||||
if !strings.Contains(historyText, "[file content end]") || !strings.Contains(historyText, "[file name]: IGNORE") {
|
||||
t.Fatalf("expected injected IGNORE wrapper, got %s", historyText)
|
||||
if !strings.Contains(historyText, "[file content end]") || !strings.Contains(historyText, "[file name]: history.txt") {
|
||||
t.Fatalf("expected injected history.txt wrapper, got %s", historyText)
|
||||
}
|
||||
if !strings.Contains(historyText, "latest user turn") {
|
||||
t.Fatalf("expected full context to include latest turn, got %s", historyText)
|
||||
|
||||
@@ -42,6 +42,14 @@ func TestSanitizeLeakedOutputRemovesDanglingThinkBlock(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeLeakedOutputRemovesCompleteDSMLToolCallWrapper(t *testing.T) {
|
||||
raw := "前置文本\n<|DSML|tool_calls>\n<|DSML|invoke name=\"Bash\">\n<|DSML|parameter name=\"command\"></|DSML|parameter>\n</|DSML|invoke>\n</|DSML|tool_calls>\n后置文本"
|
||||
got := sanitizeLeakedOutput(raw)
|
||||
if got != "前置文本\n\n后置文本" {
|
||||
t.Fatalf("unexpected sanitize result for leaked dsml wrapper: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeLeakedOutputRemovesAgentXMLLeaks(t *testing.T) {
|
||||
raw := "Done.<attempt_completion><result>Some final answer</result></attempt_completion>"
|
||||
got := sanitizeLeakedOutput(raw)
|
||||
|
||||
@@ -18,6 +18,8 @@ import (
|
||||
)
|
||||
|
||||
type responsesNonStreamResult struct {
|
||||
rawThinking string
|
||||
rawText string
|
||||
thinking string
|
||||
toolDetectionThinking string
|
||||
text string
|
||||
@@ -32,6 +34,7 @@ func (h *Handler) handleResponsesNonStreamWithRetry(w http.ResponseWriter, ctx c
|
||||
currentResp := resp
|
||||
usagePrompt := finalPrompt
|
||||
accumulatedThinking := ""
|
||||
accumulatedRawThinking := ""
|
||||
accumulatedToolDetectionThinking := ""
|
||||
for {
|
||||
result, ok := h.collectResponsesNonStreamAttempt(w, currentResp, responseID, model, usagePrompt, thinkingEnabled, searchEnabled, toolNames, toolsRaw)
|
||||
@@ -39,10 +42,12 @@ func (h *Handler) handleResponsesNonStreamWithRetry(w http.ResponseWriter, ctx c
|
||||
return
|
||||
}
|
||||
accumulatedThinking += sse.TrimContinuationOverlap(accumulatedThinking, result.thinking)
|
||||
accumulatedRawThinking += sse.TrimContinuationOverlap(accumulatedRawThinking, result.rawThinking)
|
||||
accumulatedToolDetectionThinking += sse.TrimContinuationOverlap(accumulatedToolDetectionThinking, result.toolDetectionThinking)
|
||||
result.thinking = accumulatedThinking
|
||||
result.rawThinking = accumulatedRawThinking
|
||||
result.toolDetectionThinking = accumulatedToolDetectionThinking
|
||||
result.parsed = detectAssistantToolCalls(result.text, result.thinking, result.toolDetectionThinking, toolNames)
|
||||
result.parsed = detectAssistantToolCalls(result.rawText, result.rawThinking, result.toolDetectionThinking, toolNames)
|
||||
result.body = openaifmt.BuildResponseObjectWithToolCalls(responseID, model, usagePrompt, result.thinking, result.text, result.parsed.Calls, toolsRaw)
|
||||
|
||||
if !shouldRetryResponsesNonStream(result, attempts) {
|
||||
@@ -78,16 +83,17 @@ func (h *Handler) collectResponsesNonStreamAttempt(w http.ResponseWriter, resp *
|
||||
result := sse.CollectStream(resp, thinkingEnabled, false)
|
||||
stripReferenceMarkers := h.compatStripReferenceMarkers()
|
||||
sanitizedThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers)
|
||||
toolDetectionThinking := cleanVisibleOutput(result.ToolDetectionThinking, stripReferenceMarkers)
|
||||
sanitizedText := cleanVisibleOutput(result.Text, stripReferenceMarkers)
|
||||
if searchEnabled {
|
||||
sanitizedText = replaceCitationMarkersWithLinks(sanitizedText, result.CitationLinks)
|
||||
}
|
||||
textParsed := detectAssistantToolCalls(sanitizedText, sanitizedThinking, toolDetectionThinking, toolNames)
|
||||
textParsed := detectAssistantToolCalls(result.Text, result.Thinking, result.ToolDetectionThinking, toolNames)
|
||||
responseObj := openaifmt.BuildResponseObjectWithToolCalls(responseID, model, usagePrompt, sanitizedThinking, sanitizedText, textParsed.Calls, toolsRaw)
|
||||
return responsesNonStreamResult{
|
||||
rawThinking: result.Thinking,
|
||||
rawText: result.Text,
|
||||
thinking: sanitizedThinking,
|
||||
toolDetectionThinking: toolDetectionThinking,
|
||||
toolDetectionThinking: result.ToolDetectionThinking,
|
||||
text: sanitizedText,
|
||||
contentFilter: result.ContentFilter,
|
||||
parsed: textParsed,
|
||||
|
||||
@@ -131,12 +131,11 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
|
||||
result := sse.CollectStream(resp, thinkingEnabled, true)
|
||||
stripReferenceMarkers := h.compatStripReferenceMarkers()
|
||||
sanitizedThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers)
|
||||
toolDetectionThinking := cleanVisibleOutput(result.ToolDetectionThinking, stripReferenceMarkers)
|
||||
sanitizedText := cleanVisibleOutput(result.Text, stripReferenceMarkers)
|
||||
if searchEnabled {
|
||||
sanitizedText = replaceCitationMarkersWithLinks(sanitizedText, result.CitationLinks)
|
||||
}
|
||||
textParsed := detectAssistantToolCalls(sanitizedText, sanitizedThinking, toolDetectionThinking, toolNames)
|
||||
textParsed := detectAssistantToolCalls(result.Text, result.Thinking, result.ToolDetectionThinking, toolNames)
|
||||
if len(textParsed.Calls) == 0 && writeUpstreamEmptyOutputError(w, sanitizedText, sanitizedThinking, result.ContentFilter) {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -36,8 +36,10 @@ type responsesStreamRuntime struct {
|
||||
toolCallsDoneEmitted bool
|
||||
|
||||
sieve toolstream.State
|
||||
rawThinking strings.Builder
|
||||
thinking strings.Builder
|
||||
toolDetectionThinking strings.Builder
|
||||
rawText strings.Builder
|
||||
text strings.Builder
|
||||
visibleText strings.Builder
|
||||
responseMessageID int
|
||||
@@ -141,15 +143,14 @@ func (s *responsesStreamRuntime) finalize(finishReason string, deferEmptyOutput
|
||||
s.finalErrorStatus = 0
|
||||
s.finalErrorMessage = ""
|
||||
s.finalErrorCode = ""
|
||||
finalThinking := s.thinking.String()
|
||||
finalToolDetectionThinking := s.toolDetectionThinking.String()
|
||||
finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
|
||||
|
||||
if s.bufferToolContent {
|
||||
s.processToolStreamEvents(toolstream.Flush(&s.sieve, s.toolNames), true, true)
|
||||
}
|
||||
|
||||
textParsed := detectAssistantToolCalls(finalText, finalThinking, finalToolDetectionThinking, s.toolNames)
|
||||
finalThinking := s.thinking.String()
|
||||
finalToolDetectionThinking := s.toolDetectionThinking.String()
|
||||
finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
|
||||
textParsed := detectAssistantToolCalls(s.rawText.String(), s.rawThinking.String(), finalToolDetectionThinking, s.toolNames)
|
||||
detected := textParsed.Calls
|
||||
s.logToolPolicyRejections(textParsed)
|
||||
|
||||
@@ -227,18 +228,19 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
|
||||
}
|
||||
}
|
||||
for _, p := range parsed.Parts {
|
||||
cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
|
||||
if cleanedText == "" {
|
||||
continue
|
||||
}
|
||||
if p.Type != "thinking" && s.searchEnabled && sse.IsCitation(cleanedText) {
|
||||
continue
|
||||
}
|
||||
contentSeen = true
|
||||
if p.Type == "thinking" {
|
||||
rawTrimmed := sse.TrimContinuationOverlap(s.rawThinking.String(), p.Text)
|
||||
if rawTrimmed != "" {
|
||||
s.rawThinking.WriteString(rawTrimmed)
|
||||
contentSeen = true
|
||||
}
|
||||
if !s.thinkingEnabled {
|
||||
continue
|
||||
}
|
||||
cleanedText := cleanVisibleOutput(rawTrimmed, s.stripReferenceMarkers)
|
||||
if cleanedText == "" {
|
||||
continue
|
||||
}
|
||||
trimmed := sse.TrimContinuationOverlap(s.thinking.String(), cleanedText)
|
||||
if trimmed == "" {
|
||||
continue
|
||||
@@ -248,16 +250,28 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
|
||||
continue
|
||||
}
|
||||
|
||||
trimmed := sse.TrimContinuationOverlap(s.text.String(), cleanedText)
|
||||
if trimmed == "" {
|
||||
rawTrimmed := sse.TrimContinuationOverlap(s.rawText.String(), p.Text)
|
||||
if rawTrimmed == "" {
|
||||
continue
|
||||
}
|
||||
s.text.WriteString(trimmed)
|
||||
s.rawText.WriteString(rawTrimmed)
|
||||
contentSeen = true
|
||||
cleanedText := cleanVisibleOutput(rawTrimmed, s.stripReferenceMarkers)
|
||||
if s.searchEnabled && sse.IsCitation(cleanedText) {
|
||||
continue
|
||||
}
|
||||
trimmed := sse.TrimContinuationOverlap(s.text.String(), cleanedText)
|
||||
if trimmed != "" {
|
||||
s.text.WriteString(trimmed)
|
||||
}
|
||||
if !s.bufferToolContent {
|
||||
if trimmed == "" {
|
||||
continue
|
||||
}
|
||||
s.emitTextDelta(trimmed)
|
||||
continue
|
||||
}
|
||||
s.processToolStreamEvents(toolstream.ProcessChunk(&s.sieve, trimmed, s.toolNames), true, true)
|
||||
s.processToolStreamEvents(toolstream.ProcessChunk(&s.sieve, rawTrimmed, s.toolNames), true, true)
|
||||
}
|
||||
|
||||
return streamengine.ParsedDecision{ContentSeen: contentSeen}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"encoding/json"
|
||||
|
||||
openaifmt "ds2api/internal/format/openai"
|
||||
"ds2api/internal/sse"
|
||||
"ds2api/internal/toolstream"
|
||||
)
|
||||
|
||||
@@ -43,7 +44,10 @@ func (s *responsesStreamRuntime) sendDone() {
|
||||
func (s *responsesStreamRuntime) processToolStreamEvents(events []toolstream.Event, emitContent bool, resetAfterToolCalls bool) {
|
||||
for _, evt := range events {
|
||||
if emitContent && evt.Content != "" {
|
||||
s.emitTextDelta(evt.Content)
|
||||
cleaned := cleanVisibleOutput(evt.Content, s.stripReferenceMarkers)
|
||||
if cleaned != "" && !(s.searchEnabled && sse.IsCitation(cleaned)) {
|
||||
s.emitTextDelta(cleaned)
|
||||
}
|
||||
}
|
||||
if len(evt.ToolCallDeltas) > 0 {
|
||||
if !s.emitEarlyToolDeltas {
|
||||
|
||||
@@ -254,8 +254,8 @@ func TestHandleResponsesStreamPromotesThinkingToolCallsOnFinalizeWithoutMidstrea
|
||||
h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-v4-pro", "prompt", true, false, []string{"read_file"}, nil, promptcompat.DefaultToolChoicePolicy(), "")
|
||||
|
||||
body := rec.Body.String()
|
||||
if !strings.Contains(body, "event: response.reasoning.delta") {
|
||||
t.Fatalf("expected reasoning delta in stream body, got %s", body)
|
||||
if strings.Contains(body, "event: response.reasoning.delta") {
|
||||
t.Fatalf("did not expect leaked reasoning delta in stream body, got %s", body)
|
||||
}
|
||||
if !strings.Contains(body, "event: response.function_call_arguments.done") {
|
||||
t.Fatalf("expected finalize fallback function call event, got %s", body)
|
||||
|
||||
@@ -3,6 +3,8 @@ package shared
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"ds2api/internal/toolcall"
|
||||
)
|
||||
|
||||
var emptyJSONFencePattern = regexp.MustCompile("(?is)```json\\s*```")
|
||||
@@ -47,10 +49,42 @@ func sanitizeLeakedOutput(text string) string {
|
||||
out = leakedThinkTagPattern.ReplaceAllString(out, "")
|
||||
out = leakedBOSMarkerPattern.ReplaceAllString(out, "")
|
||||
out = leakedMetaMarkerPattern.ReplaceAllString(out, "")
|
||||
out = stripLeakedToolCallWrapperBlocks(out)
|
||||
out = sanitizeLeakedAgentXMLBlocks(out)
|
||||
return out
|
||||
}
|
||||
|
||||
func stripLeakedToolCallWrapperBlocks(text string) string {
|
||||
if text == "" {
|
||||
return text
|
||||
}
|
||||
var b strings.Builder
|
||||
pos := 0
|
||||
for pos < len(text) {
|
||||
tag, ok := toolcall.FindToolMarkupTagOutsideIgnored(text, pos)
|
||||
if !ok {
|
||||
b.WriteString(text[pos:])
|
||||
break
|
||||
}
|
||||
if tag.Start > pos {
|
||||
b.WriteString(text[pos:tag.Start])
|
||||
}
|
||||
if tag.Closing || tag.Name != "tool_calls" {
|
||||
b.WriteString(text[tag.Start : tag.End+1])
|
||||
pos = tag.End + 1
|
||||
continue
|
||||
}
|
||||
closeTag, ok := toolcall.FindMatchingToolMarkupClose(text, tag)
|
||||
if !ok {
|
||||
b.WriteString(text[tag.Start : tag.End+1])
|
||||
pos = tag.End + 1
|
||||
continue
|
||||
}
|
||||
pos = closeTag.End + 1
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func stripDanglingThinkSuffix(text string) string {
|
||||
matches := leakedThinkTagPattern.FindAllStringIndex(text, -1)
|
||||
if len(matches) == 0 {
|
||||
|
||||
Reference in New Issue
Block a user