refactor: differentiate reference marker handling between stream and non-stream modes

- Stream: strip both and [reference:N] markers to prevent
  leaking partial link metadata during incremental output
- Non-stream: convert citation/reference markers to Markdown links for
  Claude Messages, Gemini generateContent, and OpenAI Chat/Responses
- Remove StripReferenceMarkers option from call sites; behavior is now
  determined automatically by stream vs non-stream context
- Extend JS runtime stripReferenceMarkersText() to also match [citation:N]
- Add tests for streaming marker stripping and non-stream link conversion

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-05-03 17:53:49 +08:00
parent c099a6f7bf
commit 112bedb05d
14 changed files with 99 additions and 52 deletions

View File

@@ -100,9 +100,8 @@ func (h *Handler) handleClaudeDirect(w http.ResponseWriter, r *http.Request) boo
return true
}
result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{
StripReferenceMarkers: stripReferenceMarkersEnabled(),
RetryEnabled: true,
CurrentInputFile: h.Store,
RetryEnabled: true,
CurrentInputFile: h.Store,
})
if outErr != nil {
if historySession != nil {

View File

@@ -97,9 +97,8 @@ func (h *Handler) handleGeminiDirect(w http.ResponseWriter, r *http.Request, str
return true
}
result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{
StripReferenceMarkers: stripReferenceMarkersEnabled(),
RetryEnabled: true,
CurrentInputFile: h.Store,
RetryEnabled: true,
CurrentInputFile: h.Store,
})
if outErr != nil {
if historySession != nil {
@@ -330,12 +329,11 @@ func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *ht
}
result := sse.CollectStream(resp, thinkingEnabled, true)
stripReferenceMarkers := stripReferenceMarkersEnabled()
writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponse(
model,
finalPrompt,
cleanVisibleOutput(result.Thinking, stripReferenceMarkers),
cleanVisibleOutput(result.Text, stripReferenceMarkers),
cleanVisibleOutput(result.Thinking, false),
cleanVisibleOutput(result.Text, false),
toolNames,
))
}

View File

@@ -98,12 +98,11 @@ func (h *Handler) collectChatNonStreamAttempt(w http.ResponseWriter, resp *http.
}
result := sse.CollectStream(resp, thinkingEnabled, true)
turn := assistantturn.BuildTurnFromCollected(result, assistantturn.BuildOptions{
Model: model,
Prompt: usagePrompt,
SearchEnabled: searchEnabled,
StripReferenceMarkers: stripReferenceMarkersEnabled(),
ToolNames: toolNames,
ToolsRaw: toolsRaw,
Model: model,
Prompt: usagePrompt,
SearchEnabled: searchEnabled,
ToolNames: toolNames,
ToolsRaw: toolsRaw,
})
respBody := openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, turn.Thinking, turn.Text, turn.ToolCalls, toolsRaw)
return chatNonStreamResult{

View File

@@ -80,9 +80,8 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
if !stdReq.Stream {
result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{
StripReferenceMarkers: stripReferenceMarkersEnabled(),
RetryEnabled: true,
CurrentInputFile: h.Store,
RetryEnabled: true,
CurrentInputFile: h.Store,
})
sessionID = result.SessionID
if outErr != nil {
@@ -164,14 +163,13 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, resp *http.Response, co
result := sse.CollectStream(resp, thinkingEnabled, true)
turn := assistantturn.BuildTurnFromCollected(result, assistantturn.BuildOptions{
Model: model,
Prompt: finalPrompt,
RefFileTokens: refFileTokens,
SearchEnabled: searchEnabled,
StripReferenceMarkers: stripReferenceMarkersEnabled(),
ToolNames: toolNames,
ToolsRaw: toolsRaw,
ToolChoice: promptcompat.DefaultToolChoicePolicy(),
Model: model,
Prompt: finalPrompt,
RefFileTokens: refFileTokens,
SearchEnabled: searchEnabled,
ToolNames: toolNames,
ToolsRaw: toolsRaw,
ToolChoice: promptcompat.DefaultToolChoicePolicy(),
})
outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{})
if outcome.ShouldFail {

View File

@@ -105,9 +105,8 @@ func (h *Handler) Responses(w http.ResponseWriter, r *http.Request) {
})
if !stdReq.Stream {
result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{
StripReferenceMarkers: stripReferenceMarkersEnabled(),
RetryEnabled: true,
CurrentInputFile: h.Store,
RetryEnabled: true,
CurrentInputFile: h.Store,
})
if outErr != nil {
if historySession != nil {
@@ -152,14 +151,13 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
result := sse.CollectStream(resp, thinkingEnabled, true)
turn := assistantturn.BuildTurnFromCollected(result, assistantturn.BuildOptions{
Model: model,
Prompt: finalPrompt,
RefFileTokens: refFileTokens,
SearchEnabled: searchEnabled,
StripReferenceMarkers: stripReferenceMarkersEnabled(),
ToolNames: toolNames,
ToolsRaw: toolsRaw,
ToolChoice: toolChoice,
Model: model,
Prompt: finalPrompt,
RefFileTokens: refFileTokens,
SearchEnabled: searchEnabled,
ToolNames: toolNames,
ToolsRaw: toolsRaw,
ToolChoice: toolChoice,
})
logResponsesToolPolicyRejection(traceID, toolChoice, turn.ParsedToolCalls, "text")
outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{})

View File

@@ -89,11 +89,11 @@ func (a *StreamAccumulator) applyTextPart(text string) StreamPartDelta {
}
a.RawText.WriteString(rawTrimmed)
delta := StreamPartDelta{Type: "text", RawText: rawTrimmed}
cleanedText := CleanVisibleOutput(rawTrimmed, a.StripReferenceMarkers)
if a.SearchEnabled && sse.IsCitation(cleanedText) {
if a.SearchEnabled && sse.IsCitation(rawTrimmed) {
delta.CitationOnly = true
return delta
}
cleanedText := CleanVisibleOutput(rawTrimmed, a.StripReferenceMarkers)
trimmed := sse.TrimContinuationOverlapFromBuilder(&a.Text, cleanedText)
if trimmed == "" {
return delta

View File

@@ -95,3 +95,21 @@ func TestStreamAccumulatorSuppressesCitationTextWhenSearchEnabled(t *testing.T)
t.Fatalf("visible text = %q", got)
}
}
func TestStreamAccumulatorStripsInlineCitationAndReferenceMarkers(t *testing.T) {
acc := StreamAccumulator{SearchEnabled: true, StripReferenceMarkers: true}
result := acc.Apply(sse.LineResult{
Parsed: true,
Parts: []sse.ContentPart{{Type: "text", Text: "广州天气[citation:1] 多云[reference:0]"}},
})
if !result.ContentSeen {
t.Fatalf("expected marker chunk to mark upstream content")
}
if got := acc.Text.String(); got != "广州天气 多云" {
t.Fatalf("visible text = %q", got)
}
if len(result.Parts) != 1 || result.Parts[0].VisibleText != "广州天气 多云" {
t.Fatalf("unexpected parts: %#v", result.Parts)
}
}