mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-16 22:25:15 +08:00
refactor: differentiate reference marker handling between stream and non-stream modes
- Stream: strip both and [reference:N] markers to prevent leaking partial link metadata during incremental output - Non-stream: convert citation/reference markers to Markdown links for Claude Messages, Gemini generateContent, and OpenAI Chat/Responses - Remove StripReferenceMarkers option from call sites; behavior is now determined automatically by stream vs non-stream context - Extend JS runtime stripReferenceMarkersText() to also match [citation:N] - Add tests for streaming marker stripping and non-stream link conversion Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -11,7 +11,7 @@ func TestBuildTurnFromCollectedTextCitation(t *testing.T) {
|
||||
turn := BuildTurnFromCollected(sse.CollectResult{
|
||||
Text: "See [citation:1]",
|
||||
CitationLinks: map[int]string{1: "https://example.com"},
|
||||
}, BuildOptions{Model: "deepseek-v4-flash", Prompt: "prompt", SearchEnabled: true, StripReferenceMarkers: true})
|
||||
}, BuildOptions{Model: "deepseek-v4-flash", Prompt: "prompt", SearchEnabled: true})
|
||||
if turn.Text != "See [1](https://example.com)" {
|
||||
t.Fatalf("text mismatch: %q", turn.Text)
|
||||
}
|
||||
@@ -23,6 +23,20 @@ func TestBuildTurnFromCollectedTextCitation(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildTurnFromCollectedKeepsNonStreamReferenceLinks(t *testing.T) {
|
||||
turn := BuildTurnFromCollected(sse.CollectResult{
|
||||
Text: "结论[reference:0],补充[reference:1]。",
|
||||
CitationLinks: map[int]string{
|
||||
1: "https://example.com/a",
|
||||
2: "https://example.com/b",
|
||||
},
|
||||
}, BuildOptions{Model: "deepseek-v4-flash-search", Prompt: "prompt", SearchEnabled: true})
|
||||
want := "结论[0](https://example.com/a),补充[1](https://example.com/b)。"
|
||||
if turn.Text != want {
|
||||
t.Fatalf("text mismatch: got %q want %q", turn.Text, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildTurnFromCollectedToolCall(t *testing.T) {
|
||||
turn := BuildTurnFromCollected(sse.CollectResult{
|
||||
Text: `<tool_calls><invoke name="Write"><parameter name="content">{"x":1}</parameter></invoke></tool_calls>`,
|
||||
|
||||
@@ -119,6 +119,29 @@ func TestExecuteNonStreamWithRetryUsesParentMessageForEmptyRetry(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestExecuteNonStreamWithRetryConvertsReferenceMarkers(t *testing.T) {
|
||||
ds := &fakeDeepSeekCaller{responses: []*http.Response{sseHTTPResponse(
|
||||
http.StatusOK,
|
||||
`data: {"p":"response/content","v":"答案[reference:0]。","citation":{"cite_index":0,"url":"https://example.com/ref"}}`,
|
||||
)}}
|
||||
stdReq := promptcompat.StandardRequest{
|
||||
Surface: "test",
|
||||
ResponseModel: "deepseek-v4-flash-search",
|
||||
PromptTokenText: "prompt",
|
||||
FinalPrompt: "final prompt",
|
||||
Search: true,
|
||||
}
|
||||
|
||||
result, outErr := ExecuteNonStreamWithRetry(context.Background(), ds, &auth.RequestAuth{}, stdReq, Options{})
|
||||
if outErr != nil {
|
||||
t.Fatalf("unexpected output error: %#v", outErr)
|
||||
}
|
||||
want := "答案[0](https://example.com/ref)。"
|
||||
if result.Turn.Text != want {
|
||||
t.Fatalf("text mismatch: got %q want %q", result.Turn.Text, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStartCompletionAppliesCurrentInputFileGlobally(t *testing.T) {
|
||||
ds := &fakeDeepSeekCaller{responses: []*http.Response{sseHTTPResponse(http.StatusOK, `data: {"p":"response/content","v":"ok"}`)}}
|
||||
stdReq := promptcompat.StandardRequest{
|
||||
|
||||
@@ -100,9 +100,8 @@ func (h *Handler) handleClaudeDirect(w http.ResponseWriter, r *http.Request) boo
|
||||
return true
|
||||
}
|
||||
result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{
|
||||
StripReferenceMarkers: stripReferenceMarkersEnabled(),
|
||||
RetryEnabled: true,
|
||||
CurrentInputFile: h.Store,
|
||||
RetryEnabled: true,
|
||||
CurrentInputFile: h.Store,
|
||||
})
|
||||
if outErr != nil {
|
||||
if historySession != nil {
|
||||
|
||||
@@ -97,9 +97,8 @@ func (h *Handler) handleGeminiDirect(w http.ResponseWriter, r *http.Request, str
|
||||
return true
|
||||
}
|
||||
result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{
|
||||
StripReferenceMarkers: stripReferenceMarkersEnabled(),
|
||||
RetryEnabled: true,
|
||||
CurrentInputFile: h.Store,
|
||||
RetryEnabled: true,
|
||||
CurrentInputFile: h.Store,
|
||||
})
|
||||
if outErr != nil {
|
||||
if historySession != nil {
|
||||
@@ -330,12 +329,11 @@ func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *ht
|
||||
}
|
||||
|
||||
result := sse.CollectStream(resp, thinkingEnabled, true)
|
||||
stripReferenceMarkers := stripReferenceMarkersEnabled()
|
||||
writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponse(
|
||||
model,
|
||||
finalPrompt,
|
||||
cleanVisibleOutput(result.Thinking, stripReferenceMarkers),
|
||||
cleanVisibleOutput(result.Text, stripReferenceMarkers),
|
||||
cleanVisibleOutput(result.Thinking, false),
|
||||
cleanVisibleOutput(result.Text, false),
|
||||
toolNames,
|
||||
))
|
||||
}
|
||||
|
||||
@@ -98,12 +98,11 @@ func (h *Handler) collectChatNonStreamAttempt(w http.ResponseWriter, resp *http.
|
||||
}
|
||||
result := sse.CollectStream(resp, thinkingEnabled, true)
|
||||
turn := assistantturn.BuildTurnFromCollected(result, assistantturn.BuildOptions{
|
||||
Model: model,
|
||||
Prompt: usagePrompt,
|
||||
SearchEnabled: searchEnabled,
|
||||
StripReferenceMarkers: stripReferenceMarkersEnabled(),
|
||||
ToolNames: toolNames,
|
||||
ToolsRaw: toolsRaw,
|
||||
Model: model,
|
||||
Prompt: usagePrompt,
|
||||
SearchEnabled: searchEnabled,
|
||||
ToolNames: toolNames,
|
||||
ToolsRaw: toolsRaw,
|
||||
})
|
||||
respBody := openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, turn.Thinking, turn.Text, turn.ToolCalls, toolsRaw)
|
||||
return chatNonStreamResult{
|
||||
|
||||
@@ -80,9 +80,8 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
if !stdReq.Stream {
|
||||
result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{
|
||||
StripReferenceMarkers: stripReferenceMarkersEnabled(),
|
||||
RetryEnabled: true,
|
||||
CurrentInputFile: h.Store,
|
||||
RetryEnabled: true,
|
||||
CurrentInputFile: h.Store,
|
||||
})
|
||||
sessionID = result.SessionID
|
||||
if outErr != nil {
|
||||
@@ -164,14 +163,13 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, resp *http.Response, co
|
||||
result := sse.CollectStream(resp, thinkingEnabled, true)
|
||||
|
||||
turn := assistantturn.BuildTurnFromCollected(result, assistantturn.BuildOptions{
|
||||
Model: model,
|
||||
Prompt: finalPrompt,
|
||||
RefFileTokens: refFileTokens,
|
||||
SearchEnabled: searchEnabled,
|
||||
StripReferenceMarkers: stripReferenceMarkersEnabled(),
|
||||
ToolNames: toolNames,
|
||||
ToolsRaw: toolsRaw,
|
||||
ToolChoice: promptcompat.DefaultToolChoicePolicy(),
|
||||
Model: model,
|
||||
Prompt: finalPrompt,
|
||||
RefFileTokens: refFileTokens,
|
||||
SearchEnabled: searchEnabled,
|
||||
ToolNames: toolNames,
|
||||
ToolsRaw: toolsRaw,
|
||||
ToolChoice: promptcompat.DefaultToolChoicePolicy(),
|
||||
})
|
||||
outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{})
|
||||
if outcome.ShouldFail {
|
||||
|
||||
@@ -105,9 +105,8 @@ func (h *Handler) Responses(w http.ResponseWriter, r *http.Request) {
|
||||
})
|
||||
if !stdReq.Stream {
|
||||
result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{
|
||||
StripReferenceMarkers: stripReferenceMarkersEnabled(),
|
||||
RetryEnabled: true,
|
||||
CurrentInputFile: h.Store,
|
||||
RetryEnabled: true,
|
||||
CurrentInputFile: h.Store,
|
||||
})
|
||||
if outErr != nil {
|
||||
if historySession != nil {
|
||||
@@ -152,14 +151,13 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
|
||||
result := sse.CollectStream(resp, thinkingEnabled, true)
|
||||
|
||||
turn := assistantturn.BuildTurnFromCollected(result, assistantturn.BuildOptions{
|
||||
Model: model,
|
||||
Prompt: finalPrompt,
|
||||
RefFileTokens: refFileTokens,
|
||||
SearchEnabled: searchEnabled,
|
||||
StripReferenceMarkers: stripReferenceMarkersEnabled(),
|
||||
ToolNames: toolNames,
|
||||
ToolsRaw: toolsRaw,
|
||||
ToolChoice: toolChoice,
|
||||
Model: model,
|
||||
Prompt: finalPrompt,
|
||||
RefFileTokens: refFileTokens,
|
||||
SearchEnabled: searchEnabled,
|
||||
ToolNames: toolNames,
|
||||
ToolsRaw: toolsRaw,
|
||||
ToolChoice: toolChoice,
|
||||
})
|
||||
logResponsesToolPolicyRejection(traceID, toolChoice, turn.ParsedToolCalls, "text")
|
||||
outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{})
|
||||
|
||||
@@ -89,11 +89,11 @@ func (a *StreamAccumulator) applyTextPart(text string) StreamPartDelta {
|
||||
}
|
||||
a.RawText.WriteString(rawTrimmed)
|
||||
delta := StreamPartDelta{Type: "text", RawText: rawTrimmed}
|
||||
cleanedText := CleanVisibleOutput(rawTrimmed, a.StripReferenceMarkers)
|
||||
if a.SearchEnabled && sse.IsCitation(cleanedText) {
|
||||
if a.SearchEnabled && sse.IsCitation(rawTrimmed) {
|
||||
delta.CitationOnly = true
|
||||
return delta
|
||||
}
|
||||
cleanedText := CleanVisibleOutput(rawTrimmed, a.StripReferenceMarkers)
|
||||
trimmed := sse.TrimContinuationOverlapFromBuilder(&a.Text, cleanedText)
|
||||
if trimmed == "" {
|
||||
return delta
|
||||
|
||||
@@ -95,3 +95,21 @@ func TestStreamAccumulatorSuppressesCitationTextWhenSearchEnabled(t *testing.T)
|
||||
t.Fatalf("visible text = %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamAccumulatorStripsInlineCitationAndReferenceMarkers(t *testing.T) {
|
||||
acc := StreamAccumulator{SearchEnabled: true, StripReferenceMarkers: true}
|
||||
result := acc.Apply(sse.LineResult{
|
||||
Parsed: true,
|
||||
Parts: []sse.ContentPart{{Type: "text", Text: "广州天气[citation:1] 多云[reference:0]"}},
|
||||
})
|
||||
|
||||
if !result.ContentSeen {
|
||||
t.Fatalf("expected marker chunk to mark upstream content")
|
||||
}
|
||||
if got := acc.Text.String(); got != "广州天气 多云" {
|
||||
t.Fatalf("visible text = %q", got)
|
||||
}
|
||||
if len(result.Parts) != 1 || result.Parts[0].VisibleText != "广州天气 多云" {
|
||||
t.Fatalf("unexpected parts: %#v", result.Parts)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -621,7 +621,7 @@ function stripReferenceMarkersText(text) {
|
||||
if (!text) {
|
||||
return text;
|
||||
}
|
||||
return text.replace(/\[reference:\s*\d+\]/gi, '');
|
||||
return text.replace(/\[(?:citation|reference):\s*\d+\]/gi, '');
|
||||
}
|
||||
|
||||
function asString(v) {
|
||||
|
||||
@@ -2,19 +2,18 @@ package textclean
|
||||
|
||||
import "regexp"
|
||||
|
||||
var referenceMarkerPattern = regexp.MustCompile(`(?i)\[reference:\s*\d+\]`)
|
||||
var citationReferenceMarkerPattern = regexp.MustCompile(`(?i)\[(citation|reference):\s*\d+\]`)
|
||||
|
||||
func StripReferenceMarkers(text string) string {
|
||||
if text == "" {
|
||||
return text
|
||||
}
|
||||
return referenceMarkerPattern.ReplaceAllString(text, "")
|
||||
return citationReferenceMarkerPattern.ReplaceAllString(text, "")
|
||||
}
|
||||
|
||||
// StripReferenceMarkersEnabled returns true while reference-marker
|
||||
// stripping remains the fixed runtime default. When the behaviour is
|
||||
// eventually removed this function can be deleted and callers can drop
|
||||
// the conditional.
|
||||
// StripReferenceMarkersEnabled returns the default for streaming surfaces,
|
||||
// where partial citation/reference markers are hidden before the final
|
||||
// link metadata is available.
|
||||
func StripReferenceMarkersEnabled() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user