feat: add compatibility setting to strip reference markers from model output and update stream handlers accordingly

2026-05-15 21:55:09 +08:00 · 2026-04-05 00:50:30 +08:00
parent a6836455dc
commit c9201174f6
62 changed files with 7831 additions and 1105 deletions
--- a/internal/adapter/claude/deps.go
+++ b/internal/adapter/claude/deps.go
@@ -22,6 +22,7 @@ type DeepSeekCaller interface {

 type ConfigReader interface {
 	ClaudeMapping() map[string]string
+	CompatStripReferenceMarkers() bool
 }

 type OpenAIChatRunner interface {
--- a/internal/adapter/claude/deps_injection_test.go
+++ b/internal/adapter/claude/deps_injection_test.go
@@ -7,6 +7,7 @@ type mockClaudeConfig struct {
 }

 func (m mockClaudeConfig) ClaudeMapping() map[string]string { return m.m }
+func (mockClaudeConfig) CompatStripReferenceMarkers() bool  { return true }

 func TestNormalizeClaudeRequestUsesConfigInterfaceMapping(t *testing.T) {
 	req := map[string]any{
--- a/internal/adapter/claude/handler_messages.go
+++ b/internal/adapter/claude/handler_messages.go
@@ -149,6 +149,7 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ
 		messages,
 		thinkingEnabled,
 		searchEnabled,
+		h.compatStripReferenceMarkers(),
 		toolNames,
 	)
 	streamRuntime.sendMessageStart()
--- a/internal/adapter/claude/handler_routes.go
+++ b/internal/adapter/claude/handler_routes.go
@@ -21,6 +21,13 @@ type Handler struct {
 	OpenAI OpenAIChatRunner
 }

+func (h *Handler) compatStripReferenceMarkers() bool {
+	if h == nil || h.Store == nil {
+		return true
+	}
+	return h.Store.CompatStripReferenceMarkers()
+}
+
 var (
 	claudeStreamPingInterval    = time.Duration(deepseek.KeepAliveTimeout) * time.Second
 	claudeStreamIdleTimeout     = time.Duration(deepseek.StreamIdleTimeout) * time.Second
--- a/internal/adapter/claude/output_clean.go
+++ b/internal/adapter/claude/output_clean.go
@@ -0,0 +1,13 @@
+package claude
+
+import textclean "ds2api/internal/textclean"
+
+func cleanVisibleOutput(text string, stripReferenceMarkers bool) string {
+	if text == "" {
+		return text
+	}
+	if stripReferenceMarkers {
+		text = textclean.StripReferenceMarkers(text)
+	}
+	return text
+}
--- a/internal/adapter/claude/proxy_vercel_test.go
+++ b/internal/adapter/claude/proxy_vercel_test.go
@@ -16,6 +16,8 @@ func (s claudeProxyStoreStub) ClaudeMapping() map[string]string {
 	return s.mapping
 }

+func (claudeProxyStoreStub) CompatStripReferenceMarkers() bool { return true }
+
 type openAIProxyStub struct {
 	status int
 	body   string
--- a/internal/adapter/claude/stream_runtime_core.go
+++ b/internal/adapter/claude/stream_runtime_core.go
@@ -19,13 +19,14 @@ type claudeStreamRuntime struct {
 	toolNames []string
 	messages  []any

-	thinkingEnabled   bool
-	searchEnabled     bool
-	bufferToolContent bool
+	thinkingEnabled       bool
+	searchEnabled         bool
+	bufferToolContent     bool
+	stripReferenceMarkers bool

-	messageID string
-	thinking  strings.Builder
-	text      strings.Builder
+	messageID    string
+	thinking     strings.Builder
+	text         strings.Builder
 	outputTokens int

 	nextBlockIndex     int
@@ -45,21 +46,23 @@ func newClaudeStreamRuntime(
 	messages []any,
 	thinkingEnabled bool,
 	searchEnabled bool,
+	stripReferenceMarkers bool,
 	toolNames []string,
 ) *claudeStreamRuntime {
 	return &claudeStreamRuntime{
-		w:                  w,
-		rc:                 rc,
-		canFlush:           canFlush,
-		model:              model,
-		messages:           messages,
-		thinkingEnabled:    thinkingEnabled,
-		searchEnabled:      searchEnabled,
-		bufferToolContent:  len(toolNames) > 0,
-		toolNames:          toolNames,
-		messageID:          fmt.Sprintf("msg_%d", time.Now().UnixNano()),
-		thinkingBlockIndex: -1,
-		textBlockIndex:     -1,
+		w:                     w,
+		rc:                    rc,
+		canFlush:              canFlush,
+		model:                 model,
+		messages:              messages,
+		thinkingEnabled:       thinkingEnabled,
+		searchEnabled:         searchEnabled,
+		bufferToolContent:     len(toolNames) > 0,
+		stripReferenceMarkers: stripReferenceMarkers,
+		toolNames:             toolNames,
+		messageID:             fmt.Sprintf("msg_%d", time.Now().UnixNano()),
+		thinkingBlockIndex:    -1,
+		textBlockIndex:        -1,
 	}
 }

@@ -80,10 +83,11 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse

 	contentSeen := false
 	for _, p := range parsed.Parts {
-		if p.Text == "" {
+		cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
+		if cleanedText == "" {
 			continue
 		}
-		if p.Type != "thinking" && s.searchEnabled && sse.IsCitation(p.Text) {
+		if p.Type != "thinking" && s.searchEnabled && sse.IsCitation(cleanedText) {
 			continue
 		}
 		contentSeen = true
@@ -92,7 +96,7 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
 			if !s.thinkingEnabled {
 				continue
 			}
-			s.thinking.WriteString(p.Text)
+			s.thinking.WriteString(cleanedText)
 			s.closeTextBlock()
 			if !s.thinkingBlockOpen {
 				s.thinkingBlockIndex = s.nextBlockIndex
@@ -112,13 +116,13 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
 				"index": s.thinkingBlockIndex,
 				"delta": map[string]any{
 					"type":     "thinking_delta",
-					"thinking": p.Text,
+					"thinking": cleanedText,
 				},
 			})
 			continue
 		}

-		s.text.WriteString(p.Text)
+		s.text.WriteString(cleanedText)
 		if s.bufferToolContent {
 			if hasUnclosedCodeFence(s.text.String()) {
 				continue
@@ -144,7 +148,7 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
 			"index": s.textBlockIndex,
 			"delta": map[string]any{
 				"type": "text_delta",
-				"text": p.Text,
+				"text": cleanedText,
 			},
 		})
 	}
--- a/internal/adapter/claude/stream_runtime_finalize.go
+++ b/internal/adapter/claude/stream_runtime_finalize.go
@@ -43,7 +43,7 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
 	s.closeTextBlock()

 	finalThinking := s.thinking.String()
-	finalText := s.text.String()
+	finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)

 	if s.bufferToolContent {
 		detected := util.ParseStandaloneToolCalls(finalText, s.toolNames)
@@ -64,7 +64,7 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
 						"input": map[string]any{},
 					},
 				})
-				
+
 				inputBytes, _ := json.Marshal(tc.Input)
 				s.send("content_block_delta", map[string]any{
 					"type":  "content_block_delta",
--- a/internal/adapter/claude/stream_status_test.go
+++ b/internal/adapter/claude/stream_status_test.go
@@ -28,6 +28,8 @@ func (streamStatusClaudeStoreStub) ClaudeMapping() map[string]string {
 	}
 }

+func (streamStatusClaudeStoreStub) CompatStripReferenceMarkers() bool { return true }
+
 func captureClaudeStatusMiddleware(statuses *[]int) func(http.Handler) http.Handler {
 	return func(next http.Handler) http.Handler {
 		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
--- a/internal/adapter/gemini/deps.go
+++ b/internal/adapter/gemini/deps.go
@@ -22,6 +22,7 @@ type DeepSeekCaller interface {

 type ConfigReader interface {
 	ModelAliases() map[string]string
+	CompatStripReferenceMarkers() bool
 }

 type OpenAIChatRunner interface {
--- a/internal/adapter/gemini/handler_generate.go
+++ b/internal/adapter/gemini/handler_generate.go
@@ -140,7 +140,15 @@ func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *ht
 	}

 	result := sse.CollectStream(resp, thinkingEnabled, true)
-	writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponse(model, finalPrompt, result.Thinking, result.Text, toolNames, result.OutputTokens))
+	stripReferenceMarkers := h.compatStripReferenceMarkers()
+	writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponse(
+		model,
+		finalPrompt,
+		cleanVisibleOutput(result.Thinking, stripReferenceMarkers),
+		cleanVisibleOutput(result.Text, stripReferenceMarkers),
+		toolNames,
+		result.OutputTokens,
+	))
 }

 func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string, outputTokens int) map[string]any {
@@ -179,7 +187,7 @@ func buildGeminiUsage(finalPrompt, finalThinking, finalText string, outputTokens

 func buildGeminiPartsFromFinal(finalText, finalThinking string, toolNames []string) []map[string]any {
 	detected := util.ParseToolCalls(finalText, toolNames)
-	if len(detected) == 0 && strings.TrimSpace(finalThinking) != "" {
+	if len(detected) == 0 && finalThinking != "" {
 		detected = util.ParseToolCalls(finalThinking, toolNames)
 	}
 	if len(detected) > 0 {
@@ -196,7 +204,7 @@ func buildGeminiPartsFromFinal(finalText, finalThinking string, toolNames []stri
 	}

 	text := finalText
-	if strings.TrimSpace(text) == "" {
+	if text == "" {
 		text = finalThinking
 	}
 	return []map[string]any{{"text": text}}
--- a/internal/adapter/gemini/handler_routes.go
+++ b/internal/adapter/gemini/handler_routes.go
@@ -17,6 +17,13 @@ type Handler struct {
 	OpenAI OpenAIChatRunner
 }

+func (h *Handler) compatStripReferenceMarkers() bool {
+	if h == nil || h.Store == nil {
+		return true
+	}
+	return h.Store.CompatStripReferenceMarkers()
+}
+
 func RegisterRoutes(r chi.Router, h *Handler) {
 	r.Post("/v1beta/models/{model}:generateContent", h.GenerateContent)
 	r.Post("/v1beta/models/{model}:streamGenerateContent", h.StreamGenerateContent)
--- a/internal/adapter/gemini/handler_stream_runtime.go
+++ b/internal/adapter/gemini/handler_stream_runtime.go
@@ -27,7 +27,7 @@ func (h *Handler) handleStreamGenerateContent(w http.ResponseWriter, r *http.Req

 	rc := http.NewResponseController(w)
 	_, canFlush := w.(http.Flusher)
-	runtime := newGeminiStreamRuntime(w, rc, canFlush, model, finalPrompt, thinkingEnabled, searchEnabled, toolNames)
+	runtime := newGeminiStreamRuntime(w, rc, canFlush, model, finalPrompt, thinkingEnabled, searchEnabled, h.compatStripReferenceMarkers(), toolNames)

 	initialType := "text"
 	if thinkingEnabled {
@@ -57,13 +57,14 @@ type geminiStreamRuntime struct {
 	model       string
 	finalPrompt string

-	thinkingEnabled bool
-	searchEnabled   bool
-	bufferContent   bool
-	toolNames       []string
+	thinkingEnabled       bool
+	searchEnabled         bool
+	bufferContent         bool
+	stripReferenceMarkers bool
+	toolNames             []string

-	thinking strings.Builder
-	text     strings.Builder
+	thinking     strings.Builder
+	text         strings.Builder
 	outputTokens int
 }

@@ -75,18 +76,20 @@ func newGeminiStreamRuntime(
 	finalPrompt string,
 	thinkingEnabled bool,
 	searchEnabled bool,
+	stripReferenceMarkers bool,
 	toolNames []string,
 ) *geminiStreamRuntime {
 	return &geminiStreamRuntime{
-		w:               w,
-		rc:              rc,
-		canFlush:        canFlush,
-		model:           model,
-		finalPrompt:     finalPrompt,
-		thinkingEnabled: thinkingEnabled,
-		searchEnabled:   searchEnabled,
-		bufferContent:   len(toolNames) > 0,
-		toolNames:       toolNames,
+		w:                     w,
+		rc:                    rc,
+		canFlush:              canFlush,
+		model:                 model,
+		finalPrompt:           finalPrompt,
+		thinkingEnabled:       thinkingEnabled,
+		searchEnabled:         searchEnabled,
+		bufferContent:         len(toolNames) > 0,
+		stripReferenceMarkers: stripReferenceMarkers,
+		toolNames:             toolNames,
 	}
 }

@@ -113,20 +116,21 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse

 	contentSeen := false
 	for _, p := range parsed.Parts {
-		if p.Text == "" {
+		cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
+		if cleanedText == "" {
 			continue
 		}
-		if p.Type != "thinking" && s.searchEnabled && sse.IsCitation(p.Text) {
+		if p.Type != "thinking" && s.searchEnabled && sse.IsCitation(cleanedText) {
 			continue
 		}
 		contentSeen = true
 		if p.Type == "thinking" {
 			if s.thinkingEnabled {
-				s.thinking.WriteString(p.Text)
+				s.thinking.WriteString(cleanedText)
 			}
 			continue
 		}
-		s.text.WriteString(p.Text)
+		s.text.WriteString(cleanedText)
 		if s.bufferContent {
 			continue
 		}
@@ -136,7 +140,7 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
 					"index": 0,
 					"content": map[string]any{
 						"role":  "model",
-						"parts": []map[string]any{{"text": p.Text}},
+						"parts": []map[string]any{{"text": cleanedText}},
 					},
 				},
 			},
@@ -148,7 +152,7 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse

 func (s *geminiStreamRuntime) finalize() {
 	finalThinking := s.thinking.String()
-	finalText := s.text.String()
+	finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)

 	if s.bufferContent {
 		parts := buildGeminiPartsFromFinal(finalText, finalThinking, s.toolNames)
--- a/internal/adapter/gemini/handler_test.go
+++ b/internal/adapter/gemini/handler_test.go
@@ -17,7 +17,8 @@ import (

 type testGeminiConfig struct{}

-func (testGeminiConfig) ModelAliases() map[string]string { return nil }
+func (testGeminiConfig) ModelAliases() map[string]string   { return nil }
+func (testGeminiConfig) CompatStripReferenceMarkers() bool { return true }

 type testGeminiAuth struct {
 	a   *auth.RequestAuth
@@ -62,8 +63,8 @@ func (m testGeminiDS) CallCompletion(_ context.Context, _ *auth.RequestAuth, _ m
 }

 type geminiOpenAIErrorStub struct {
-	status int
-	body   string
+	status  int
+	body    string
 	headers map[string]string
 }

@@ -247,7 +248,7 @@ func TestStreamGenerateContentEmitsSSE(t *testing.T) {

 func TestGenerateContentOpenAIProxyErrorUsesGeminiEnvelope(t *testing.T) {
 	h := &Handler{
-		Store:  testGeminiConfig{},
+		Store: testGeminiConfig{},
 		OpenAI: geminiOpenAIErrorStub{
 			status: http.StatusUnauthorized,
 			body:   `{"error":{"message":"invalid api key"}}`,
--- a/internal/adapter/gemini/output_clean.go
+++ b/internal/adapter/gemini/output_clean.go
@@ -0,0 +1,13 @@
+package gemini
+
+import textclean "ds2api/internal/textclean"
+
+func cleanVisibleOutput(text string, stripReferenceMarkers bool) string {
+	if text == "" {
+		return text
+	}
+	if stripReferenceMarkers {
+		text = textclean.StripReferenceMarkers(text)
+	}
+	return text
+}
--- a/internal/adapter/openai/chat_stream_runtime.go
+++ b/internal/adapter/openai/chat_stream_runtime.go
@@ -22,8 +22,9 @@ type chatStreamRuntime struct {
 	finalPrompt  string
 	toolNames    []string

-	thinkingEnabled bool
-	searchEnabled   bool
+	thinkingEnabled       bool
+	searchEnabled         bool
+	stripReferenceMarkers bool

 	firstChunkSent       bool
 	bufferToolContent    bool
@@ -49,25 +50,27 @@ func newChatStreamRuntime(
 	finalPrompt string,
 	thinkingEnabled bool,
 	searchEnabled bool,
+	stripReferenceMarkers bool,
 	toolNames []string,
 	bufferToolContent bool,
 	emitEarlyToolDeltas bool,
 ) *chatStreamRuntime {
 	return &chatStreamRuntime{
-		w:                   w,
-		rc:                  rc,
-		canFlush:            canFlush,
-		completionID:        completionID,
-		created:             created,
-		model:               model,
-		finalPrompt:         finalPrompt,
-		toolNames:           toolNames,
-		thinkingEnabled:     thinkingEnabled,
-		searchEnabled:       searchEnabled,
-		bufferToolContent:   bufferToolContent,
-		emitEarlyToolDeltas: emitEarlyToolDeltas,
-		streamToolCallIDs:   map[int]string{},
-		streamToolNames:     map[int]string{},
+		w:                     w,
+		rc:                    rc,
+		canFlush:              canFlush,
+		completionID:          completionID,
+		created:               created,
+		model:                 model,
+		finalPrompt:           finalPrompt,
+		toolNames:             toolNames,
+		thinkingEnabled:       thinkingEnabled,
+		searchEnabled:         searchEnabled,
+		stripReferenceMarkers: stripReferenceMarkers,
+		bufferToolContent:     bufferToolContent,
+		emitEarlyToolDeltas:   emitEarlyToolDeltas,
+		streamToolCallIDs:     map[int]string{},
+		streamToolNames:       map[int]string{},
 	}
 }

@@ -98,7 +101,7 @@ func (s *chatStreamRuntime) sendDone() {

 func (s *chatStreamRuntime) finalize(finishReason string) {
 	finalThinking := s.thinking.String()
-	finalText := sanitizeLeakedOutput(s.text.String())
+	finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
 	detected := util.ParseStandaloneToolCallsDetailed(finalText, s.toolNames)
 	if len(detected.Calls) > 0 && !s.toolCallsDoneEmitted {
 		finishReason = "tool_calls"
@@ -142,7 +145,7 @@ func (s *chatStreamRuntime) finalize(finishReason string) {
 			if evt.Content == "" {
 				continue
 			}
-			cleaned := sanitizeLeakedOutput(evt.Content)
+			cleaned := cleanVisibleOutput(evt.Content, s.stripReferenceMarkers)
 			if cleaned == "" {
 				continue
 			}
@@ -203,10 +206,11 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
 	newChoices := make([]map[string]any, 0, len(parsed.Parts))
 	contentSeen := false
 	for _, p := range parsed.Parts {
-		if s.searchEnabled && sse.IsCitation(p.Text) {
+		cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
+		if s.searchEnabled && sse.IsCitation(cleanedText) {
 			continue
 		}
-		if p.Text == "" {
+		if cleanedText == "" {
 			continue
 		}
 		contentSeen = true
@@ -217,15 +221,15 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
 		}
 		if p.Type == "thinking" {
 			if s.thinkingEnabled {
-				s.thinking.WriteString(p.Text)
-				delta["reasoning_content"] = p.Text
+				s.thinking.WriteString(cleanedText)
+				delta["reasoning_content"] = cleanedText
 			}
 		} else {
-			s.text.WriteString(p.Text)
+			s.text.WriteString(cleanedText)
 			if !s.bufferToolContent {
-				delta["content"] = p.Text
+				delta["content"] = cleanedText
 			} else {
-				events := processToolSieveChunk(&s.toolSieve, p.Text, s.toolNames)
+				events := processToolSieveChunk(&s.toolSieve, cleanedText, s.toolNames)
 				for _, evt := range events {
 					if len(evt.ToolCallDeltas) > 0 {
 						if !s.emitEarlyToolDeltas {
@@ -264,7 +268,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
 						continue
 					}
 					if evt.Content != "" {
-						cleaned := sanitizeLeakedOutput(evt.Content)
+						cleaned := cleanVisibleOutput(evt.Content, s.stripReferenceMarkers)
 						if cleaned == "" {
 							continue
 						}
--- a/internal/adapter/openai/deps.go
+++ b/internal/adapter/openai/deps.go
@@ -25,6 +25,7 @@ type DeepSeekCaller interface {
 type ConfigReader interface {
 	ModelAliases() map[string]string
 	CompatWideInputStrictOutput() bool
+	CompatStripReferenceMarkers() bool
 	ToolcallMode() string
 	ToolcallEarlyEmitConfidence() string
 	ResponsesStoreTTLSeconds() int
--- a/internal/adapter/openai/deps_injection_test.go
+++ b/internal/adapter/openai/deps_injection_test.go
@@ -15,6 +15,7 @@ func (m mockOpenAIConfig) ModelAliases() map[string]string { return m.aliases }
 func (m mockOpenAIConfig) CompatWideInputStrictOutput() bool {
 	return m.wideInput
 }
+func (m mockOpenAIConfig) CompatStripReferenceMarkers() bool   { return true }
 func (m mockOpenAIConfig) ToolcallMode() string                { return m.toolMode }
 func (m mockOpenAIConfig) ToolcallEarlyEmitConfidence() string { return m.earlyEmit }
 func (m mockOpenAIConfig) ResponsesStoreTTLSeconds() int       { return m.responsesTTL }
--- a/internal/adapter/openai/handler_chat.go
+++ b/internal/adapter/openai/handler_chat.go
@@ -104,9 +104,10 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, ctx context.Context, re
 	_ = ctx
 	result := sse.CollectStream(resp, thinkingEnabled, true)

-	finalThinking := result.Thinking
-	finalText := sanitizeLeakedOutput(result.Text)
-	if writeUpstreamEmptyOutputError(w, result) {
+	stripReferenceMarkers := h.compatStripReferenceMarkers()
+	finalThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers)
+	finalText := cleanVisibleOutput(result.Text, stripReferenceMarkers)
+	if writeUpstreamEmptyOutputError(w, finalThinking, finalText, result.ContentFilter) {
 		return
 	}
 	respBody := openaifmt.BuildChatCompletion(completionID, model, finalPrompt, finalThinking, finalText, toolNames)
@@ -141,6 +142,7 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
 	created := time.Now().Unix()
 	bufferToolContent := len(toolNames) > 0
 	emitEarlyToolDeltas := h.toolcallFeatureMatchEnabled() && h.toolcallEarlyEmitHighConfidence()
+	stripReferenceMarkers := h.compatStripReferenceMarkers()
 	initialType := "text"
 	if thinkingEnabled {
 		initialType = "thinking"
@@ -156,6 +158,7 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
 		finalPrompt,
 		thinkingEnabled,
 		searchEnabled,
+		stripReferenceMarkers,
 		toolNames,
 		bufferToolContent,
 		emitEarlyToolDeltas,
--- a/internal/adapter/openai/handler_routes.go
+++ b/internal/adapter/openai/handler_routes.go
@@ -28,6 +28,13 @@ type Handler struct {
 	responses    *responseStore
 }

+func (h *Handler) compatStripReferenceMarkers() bool {
+	if h == nil || h.Store == nil {
+		return true
+	}
+	return h.Store.CompatStripReferenceMarkers()
+}
+
 type streamLease struct {
 	Auth      *auth.RequestAuth
 	ExpiresAt time.Time
--- a/internal/adapter/openai/output_clean.go
+++ b/internal/adapter/openai/output_clean.go
@@ -0,0 +1,13 @@
+package openai
+
+import textclean "ds2api/internal/textclean"
+
+func cleanVisibleOutput(text string, stripReferenceMarkers bool) string {
+	if text == "" {
+		return text
+	}
+	if stripReferenceMarkers {
+		text = textclean.StripReferenceMarkers(text)
+	}
+	return sanitizeLeakedOutput(text)
+}
--- a/internal/adapter/openai/responses_handler.go
+++ b/internal/adapter/openai/responses_handler.go
@@ -113,8 +113,10 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
 		return
 	}
 	result := sse.CollectStream(resp, thinkingEnabled, true)
-	sanitizedText := sanitizeLeakedOutput(result.Text)
-	if writeUpstreamEmptyOutputError(w, result) {
+	stripReferenceMarkers := h.compatStripReferenceMarkers()
+	sanitizedThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers)
+	sanitizedText := cleanVisibleOutput(result.Text, stripReferenceMarkers)
+	if writeUpstreamEmptyOutputError(w, sanitizedThinking, sanitizedText, result.ContentFilter) {
 		return
 	}
 	textParsed := util.ParseStandaloneToolCallsDetailed(sanitizedText, toolNames)
@@ -126,7 +128,7 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
 		return
 	}

-	responseObj := openaifmt.BuildResponseObject(responseID, model, finalPrompt, result.Thinking, sanitizedText, toolNames)
+	responseObj := openaifmt.BuildResponseObject(responseID, model, finalPrompt, sanitizedThinking, sanitizedText, toolNames)
 	if result.OutputTokens > 0 {
 		if usage, ok := responseObj["usage"].(map[string]any); ok {
 			usage["output_tokens"] = result.OutputTokens
@@ -159,6 +161,7 @@ func (h *Handler) handleResponsesStream(w http.ResponseWriter, r *http.Request,
 	}
 	bufferToolContent := len(toolNames) > 0
 	emitEarlyToolDeltas := h.toolcallFeatureMatchEnabled() && h.toolcallEarlyEmitHighConfidence()
+	stripReferenceMarkers := h.compatStripReferenceMarkers()

 	streamRuntime := newResponsesStreamRuntime(
 		w,
@@ -169,6 +172,7 @@ func (h *Handler) handleResponsesStream(w http.ResponseWriter, r *http.Request,
 		finalPrompt,
 		thinkingEnabled,
 		searchEnabled,
+		stripReferenceMarkers,
 		toolNames,
 		bufferToolContent,
 		emitEarlyToolDeltas,
--- a/internal/adapter/openai/responses_stream_runtime_core.go
+++ b/internal/adapter/openai/responses_stream_runtime_core.go
@@ -23,8 +23,9 @@ type responsesStreamRuntime struct {
 	traceID     string
 	toolChoice  util.ToolChoicePolicy

-	thinkingEnabled bool
-	searchEnabled   bool
+	thinkingEnabled       bool
+	searchEnabled         bool
+	stripReferenceMarkers bool

 	bufferToolContent    bool
 	emitEarlyToolDeltas  bool
@@ -63,6 +64,7 @@ func newResponsesStreamRuntime(
 	finalPrompt string,
 	thinkingEnabled bool,
 	searchEnabled bool,
+	stripReferenceMarkers bool,
 	toolNames []string,
 	bufferToolContent bool,
 	emitEarlyToolDeltas bool,
@@ -71,34 +73,35 @@ func newResponsesStreamRuntime(
 	persistResponse func(obj map[string]any),
 ) *responsesStreamRuntime {
 	return &responsesStreamRuntime{
-		w:                   w,
-		rc:                  rc,
-		canFlush:            canFlush,
-		responseID:          responseID,
-		model:               model,
-		finalPrompt:         finalPrompt,
-		thinkingEnabled:     thinkingEnabled,
-		searchEnabled:       searchEnabled,
-		toolNames:           toolNames,
-		bufferToolContent:   bufferToolContent,
-		emitEarlyToolDeltas: emitEarlyToolDeltas,
-		streamToolCallIDs:   map[int]string{},
-		functionItemIDs:     map[int]string{},
-		functionOutputIDs:   map[int]int{},
-		functionArgs:        map[int]string{},
-		functionDone:        map[int]bool{},
-		functionAdded:       map[int]bool{},
-		functionNames:       map[int]string{},
-		messageOutputID:     -1,
-		toolChoice:          toolChoice,
-		traceID:             traceID,
-		persistResponse:     persistResponse,
+		w:                     w,
+		rc:                    rc,
+		canFlush:              canFlush,
+		responseID:            responseID,
+		model:                 model,
+		finalPrompt:           finalPrompt,
+		thinkingEnabled:       thinkingEnabled,
+		searchEnabled:         searchEnabled,
+		stripReferenceMarkers: stripReferenceMarkers,
+		toolNames:             toolNames,
+		bufferToolContent:     bufferToolContent,
+		emitEarlyToolDeltas:   emitEarlyToolDeltas,
+		streamToolCallIDs:     map[int]string{},
+		functionItemIDs:       map[int]string{},
+		functionOutputIDs:     map[int]int{},
+		functionArgs:          map[int]string{},
+		functionDone:          map[int]bool{},
+		functionAdded:         map[int]bool{},
+		functionNames:         map[int]string{},
+		messageOutputID:       -1,
+		toolChoice:            toolChoice,
+		traceID:               traceID,
+		persistResponse:       persistResponse,
 	}
 }

 func (s *responsesStreamRuntime) finalize() {
 	finalThinking := s.thinking.String()
-	finalText := sanitizeLeakedOutput(s.text.String())
+	finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)

 	if s.bufferToolContent {
 		s.processToolStreamEvents(flushToolSieve(&s.sieve, s.toolNames), true)
@@ -190,10 +193,11 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa

 	contentSeen := false
 	for _, p := range parsed.Parts {
-		if p.Text == "" {
+		cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
+		if cleanedText == "" {
 			continue
 		}
-		if p.Type != "thinking" && s.searchEnabled && sse.IsCitation(p.Text) {
+		if p.Type != "thinking" && s.searchEnabled && sse.IsCitation(cleanedText) {
 			continue
 		}
 		contentSeen = true
@@ -201,15 +205,11 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
 			if !s.thinkingEnabled {
 				continue
 			}
-			s.thinking.WriteString(p.Text)
-			s.sendEvent("response.reasoning.delta", openaifmt.BuildResponsesReasoningDeltaPayload(s.responseID, p.Text))
+			s.thinking.WriteString(cleanedText)
+			s.sendEvent("response.reasoning.delta", openaifmt.BuildResponsesReasoningDeltaPayload(s.responseID, cleanedText))
 			continue
 		}

-		cleanedText := sanitizeLeakedOutput(p.Text)
-		if cleanedText == "" {
-			continue
-		}
 		s.text.WriteString(cleanedText)
 		if !s.bufferToolContent {
 			s.emitTextDelta(cleanedText)
--- a/internal/adapter/openai/responses_stream_runtime_toolcalls.go
+++ b/internal/adapter/openai/responses_stream_runtime_toolcalls.go
@@ -69,7 +69,7 @@ func (s *responsesStreamRuntime) ensureMessageContentPartAdded() {
 }

 func (s *responsesStreamRuntime) emitTextDelta(content string) {
-	if strings.TrimSpace(content) == "" {
+	if content == "" {
 		return
 	}
 	s.ensureMessageContentPartAdded()
--- a/internal/adapter/openai/responses_stream_runtime_toolcalls_finalize.go
+++ b/internal/adapter/openai/responses_stream_runtime_toolcalls_finalize.go
@@ -83,13 +83,13 @@ func (s *responsesStreamRuntime) buildCompletedResponseObject(finalThinking, fin
 		})
 	} else if len(calls) == 0 {
 		content := make([]map[string]any, 0, 2)
-		if strings.TrimSpace(finalThinking) != "" {
+		if finalThinking != "" {
 			content = append(content, map[string]any{
 				"type": "reasoning",
 				"text": finalThinking,
 			})
 		}
-		if strings.TrimSpace(finalText) != "" {
+		if finalText != "" {
 			content = append(content, map[string]any{
 				"type": "output_text",
 				"text": finalText,
@@ -136,10 +136,10 @@ func (s *responsesStreamRuntime) buildCompletedResponseObject(finalThinking, fin
 	}

 	outputText := s.visibleText.String()
-	if strings.TrimSpace(outputText) == "" && len(calls) == 0 {
-		if strings.TrimSpace(finalText) != "" {
+	if outputText == "" && len(calls) == 0 {
+		if finalText != "" {
 			outputText = finalText
-		} else if strings.TrimSpace(finalThinking) != "" {
+		} else if finalThinking != "" {
 			outputText = finalThinking
 		}
 	}
--- a/internal/adapter/openai/tool_sieve_state.go
+++ b/internal/adapter/openai/tool_sieve_state.go
@@ -48,7 +48,7 @@ func (s *toolStreamSieveState) resetIncrementalToolState() {
 }

 func (s *toolStreamSieveState) noteText(content string) {
-	if strings.TrimSpace(content) == "" {
+	if content == "" {
 		return
 	}
 	s.recentTextTail = appendTail(s.recentTextTail, content, toolSieveContextTailLimit)
--- a/internal/adapter/openai/upstream_empty.go
+++ b/internal/adapter/openai/upstream_empty.go
@@ -1,17 +1,12 @@
 package openai

-import (
-	"net/http"
-	"strings"
+import "net/http"

-	"ds2api/internal/sse"
-)
-
-func writeUpstreamEmptyOutputError(w http.ResponseWriter, result sse.CollectResult) bool {
-	if strings.TrimSpace(result.Thinking) != "" || strings.TrimSpace(sanitizeLeakedOutput(result.Text)) != "" {
+func writeUpstreamEmptyOutputError(w http.ResponseWriter, thinking, text string, contentFilter bool) bool {
+	if thinking != "" || text != "" {
 		return false
 	}
-	if result.ContentFilter {
+	if contentFilter {
 		writeOpenAIErrorWithCode(w, http.StatusBadRequest, "Upstream content filtered the response and returned no output.", "content_filter")
 		return true
 	}
--- a/internal/adapter/openai/vercel_stream.go
+++ b/internal/adapter/openai/vercel_stream.go
@@ -99,10 +99,13 @@ func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Reque
 		"final_prompt":     stdReq.FinalPrompt,
 		"thinking_enabled": stdReq.Thinking,
 		"search_enabled":   stdReq.Search,
-		"tool_names":       stdReq.ToolNames,
-		"deepseek_token":   a.DeepSeekToken,
-		"pow_header":       powHeader,
-		"payload":          payload,
+		"compat": map[string]any{
+			"strip_reference_markers": h.compatStripReferenceMarkers(),
+		},
+		"tool_names":     stdReq.ToolNames,
+		"deepseek_token": a.DeepSeekToken,
+		"pow_header":     powHeader,
+		"payload":        payload,
 	})
 }

--- a/internal/admin/deps.go
+++ b/internal/admin/deps.go
@@ -32,6 +32,7 @@ type ConfigStore interface {
 	RuntimeAccountMaxQueue(defaultSize int) int
 	RuntimeGlobalMaxInflight(defaultSize int) int
 	RuntimeTokenRefreshIntervalHours() int
+	CompatStripReferenceMarkers() bool
 	AutoDeleteSessions() bool
 }

--- a/internal/admin/handler_settings_parse.go
+++ b/internal/admin/handler_settings_parse.go
@@ -21,10 +21,11 @@ func boolFrom(v any) bool {
 	}
 }

-func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *config.RuntimeConfig, *config.ResponsesConfig, *config.EmbeddingsConfig, *config.AutoDeleteConfig, map[string]string, map[string]string, error) {
+func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *config.RuntimeConfig, *config.CompatConfig, *config.ResponsesConfig, *config.EmbeddingsConfig, *config.AutoDeleteConfig, map[string]string, map[string]string, error) {
 	var (
 		adminCfg      *config.AdminConfig
 		runtimeCfg    *config.RuntimeConfig
+		compatCfg     *config.CompatConfig
 		respCfg       *config.ResponsesConfig
 		embCfg        *config.EmbeddingsConfig
 		autoDeleteCfg *config.AutoDeleteConfig
@@ -37,7 +38,7 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		if v, exists := raw["jwt_expire_hours"]; exists {
 			n := intFrom(v)
 			if n < 1 || n > 720 {
-				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("admin.jwt_expire_hours must be between 1 and 720")
+				return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("admin.jwt_expire_hours must be between 1 and 720")
 			}
 			cfg.JWTExpireHours = n
 		}
@@ -49,43 +50,56 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		if v, exists := raw["account_max_inflight"]; exists {
 			n := intFrom(v)
 			if n < 1 || n > 256 {
-				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.account_max_inflight must be between 1 and 256")
+				return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.account_max_inflight must be between 1 and 256")
 			}
 			cfg.AccountMaxInflight = n
 		}
 		if v, exists := raw["account_max_queue"]; exists {
 			n := intFrom(v)
 			if n < 1 || n > 200000 {
-				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.account_max_queue must be between 1 and 200000")
+				return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.account_max_queue must be between 1 and 200000")
 			}
 			cfg.AccountMaxQueue = n
 		}
 		if v, exists := raw["global_max_inflight"]; exists {
 			n := intFrom(v)
 			if n < 1 || n > 200000 {
-				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be between 1 and 200000")
+				return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be between 1 and 200000")
 			}
 			cfg.GlobalMaxInflight = n
 		}
 		if v, exists := raw["token_refresh_interval_hours"]; exists {
 			n := intFrom(v)
 			if n < 1 || n > 720 {
-				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.token_refresh_interval_hours must be between 1 and 720")
+				return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.token_refresh_interval_hours must be between 1 and 720")
 			}
 			cfg.TokenRefreshIntervalHours = n
 		}
 		if cfg.AccountMaxInflight > 0 && cfg.GlobalMaxInflight > 0 && cfg.GlobalMaxInflight < cfg.AccountMaxInflight {
-			return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight")
+			return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight")
 		}
 		runtimeCfg = cfg
 	}

+	if raw, ok := req["compat"].(map[string]any); ok {
+		cfg := &config.CompatConfig{}
+		if v, exists := raw["wide_input_strict_output"]; exists {
+			b := boolFrom(v)
+			cfg.WideInputStrictOutput = &b
+		}
+		if v, exists := raw["strip_reference_markers"]; exists {
+			b := boolFrom(v)
+			cfg.StripReferenceMarkers = &b
+		}
+		compatCfg = cfg
+	}
+
 	if raw, ok := req["responses"].(map[string]any); ok {
 		cfg := &config.ResponsesConfig{}
 		if v, exists := raw["store_ttl_seconds"]; exists {
 			n := intFrom(v)
 			if n < 30 || n > 86400 {
-				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("responses.store_ttl_seconds must be between 30 and 86400")
+				return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("responses.store_ttl_seconds must be between 30 and 86400")
 			}
 			cfg.StoreTTLSeconds = n
 		}
@@ -133,5 +147,5 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		autoDeleteCfg = cfg
 	}

-	return adminCfg, runtimeCfg, respCfg, embCfg, autoDeleteCfg, claudeMap, aliasMap, nil
+	return adminCfg, runtimeCfg, compatCfg, respCfg, embCfg, autoDeleteCfg, claudeMap, aliasMap, nil
 }
--- a/internal/admin/handler_settings_read.go
+++ b/internal/admin/handler_settings_read.go
@@ -26,6 +26,7 @@ func (h *Handler) getSettings(w http.ResponseWriter, _ *http.Request) {
 			"global_max_inflight":          h.Store.RuntimeGlobalMaxInflight(recommended),
 			"token_refresh_interval_hours": h.Store.RuntimeTokenRefreshIntervalHours(),
 		},
+		"compat":            snap.Compat,
 		"responses":         snap.Responses,
 		"embeddings":        snap.Embeddings,
 		"auto_delete":       snap.AutoDelete,
--- a/internal/admin/handler_settings_write.go
+++ b/internal/admin/handler_settings_write.go
@@ -17,7 +17,7 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) {
 		return
 	}

-	adminCfg, runtimeCfg, responsesCfg, embeddingsCfg, autoDeleteCfg, claudeMap, aliasMap, err := parseSettingsUpdateRequest(req)
+	adminCfg, runtimeCfg, compatCfg, responsesCfg, embeddingsCfg, autoDeleteCfg, claudeMap, aliasMap, err := parseSettingsUpdateRequest(req)
 	if err != nil {
 		writeJSON(w, http.StatusBadRequest, map[string]any{"detail": err.Error()})
 		return
@@ -49,6 +49,14 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) {
 				c.Runtime.TokenRefreshIntervalHours = runtimeCfg.TokenRefreshIntervalHours
 			}
 		}
+		if compatCfg != nil {
+			if compatCfg.WideInputStrictOutput != nil {
+				c.Compat.WideInputStrictOutput = compatCfg.WideInputStrictOutput
+			}
+			if compatCfg.StripReferenceMarkers != nil {
+				c.Compat.StripReferenceMarkers = compatCfg.StripReferenceMarkers
+			}
+		}
 		if responsesCfg != nil && responsesCfg.StoreTTLSeconds > 0 {
 			c.Responses.StoreTTLSeconds = responsesCfg.StoreTTLSeconds
 		}
--- a/internal/config/codec.go
+++ b/internal/config/codec.go
@@ -35,7 +35,7 @@ func (c Config) MarshalJSON() ([]byte, error) {
 	if c.Runtime.AccountMaxInflight > 0 || c.Runtime.AccountMaxQueue > 0 || c.Runtime.GlobalMaxInflight > 0 || c.Runtime.TokenRefreshIntervalHours > 0 {
 		m["runtime"] = c.Runtime
 	}
-	if c.Compat.WideInputStrictOutput != nil {
+	if c.Compat.WideInputStrictOutput != nil || c.Compat.StripReferenceMarkers != nil {
 		m["compat"] = c.Compat
 	}
 	if c.Responses.StoreTTLSeconds > 0 {
@@ -137,6 +137,7 @@ func (c Config) Clone() Config {
 		Runtime:        c.Runtime,
 		Compat: CompatConfig{
 			WideInputStrictOutput: cloneBoolPtr(c.Compat.WideInputStrictOutput),
+			StripReferenceMarkers: cloneBoolPtr(c.Compat.StripReferenceMarkers),
 		},
 		Responses:        c.Responses,
 		Embeddings:       c.Embeddings,
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -52,6 +52,7 @@ func (c *Config) DropInvalidAccounts() {

 type CompatConfig struct {
 	WideInputStrictOutput *bool `json:"wide_input_strict_output,omitempty"`
+	StripReferenceMarkers *bool `json:"strip_reference_markers,omitempty"`
 }

 type AdminConfig struct {
--- a/internal/config/config_edge_test.go
+++ b/internal/config/config_edge_test.go
@@ -97,6 +97,8 @@ func TestLowerFunction(t *testing.T) {
 // ─── Config.MarshalJSON / UnmarshalJSON roundtrip ────────────────────

 func TestConfigJSONRoundtrip(t *testing.T) {
+	trueVal := true
+	falseVal := false
 	cfg := Config{
 		Keys:     []string{"key1", "key2"},
 		Accounts: []Account{{Email: "user@example.com", Password: "pass", Token: "tok"}},
@@ -107,6 +109,10 @@ func TestConfigJSONRoundtrip(t *testing.T) {
 		Runtime: RuntimeConfig{
 			TokenRefreshIntervalHours: 12,
 		},
+		Compat: CompatConfig{
+			WideInputStrictOutput: &trueVal,
+			StripReferenceMarkers: &falseVal,
+		},
 		VercelSyncHash: "hash123",
 		VercelSyncTime: 1234567890,
 		AdditionalFields: map[string]any{
@@ -136,6 +142,12 @@ func TestConfigJSONRoundtrip(t *testing.T) {
 	if decoded.Runtime.TokenRefreshIntervalHours != 12 {
 		t.Fatalf("unexpected runtime refresh interval: %#v", decoded.Runtime.TokenRefreshIntervalHours)
 	}
+	if decoded.Compat.WideInputStrictOutput == nil || !*decoded.Compat.WideInputStrictOutput {
+		t.Fatalf("unexpected compat wide_input_strict_output: %#v", decoded.Compat.WideInputStrictOutput)
+	}
+	if decoded.Compat.StripReferenceMarkers == nil || *decoded.Compat.StripReferenceMarkers {
+		t.Fatalf("unexpected compat strip_reference_markers: %#v", decoded.Compat.StripReferenceMarkers)
+	}
 	if decoded.VercelSyncHash != "hash123" {
 		t.Fatalf("unexpected vercel sync hash: %q", decoded.VercelSyncHash)
 	}
@@ -162,12 +174,16 @@ func TestConfigUnmarshalJSONPreservesUnknownFields(t *testing.T) {
 // ─── Config.Clone ────────────────────────────────────────────────────

 func TestConfigCloneIsDeepCopy(t *testing.T) {
+	falseVal := false
 	cfg := Config{
 		Keys:     []string{"key1"},
 		Accounts: []Account{{Email: "user@test.com", Token: "token"}},
 		ClaudeMapping: map[string]string{
 			"fast": "deepseek-chat",
 		},
+		Compat: CompatConfig{
+			StripReferenceMarkers: &falseVal,
+		},
 		AdditionalFields: map[string]any{"custom": "value"},
 	}

@@ -177,6 +193,9 @@ func TestConfigCloneIsDeepCopy(t *testing.T) {
 	cfg.Keys[0] = "modified"
 	cfg.Accounts[0].Email = "modified@test.com"
 	cfg.ClaudeMapping["fast"] = "modified-model"
+	if cfg.Compat.StripReferenceMarkers != nil {
+		*cfg.Compat.StripReferenceMarkers = true
+	}

 	// Cloned should not be affected
 	if cloned.Keys[0] != "key1" {
@@ -188,6 +207,9 @@ func TestConfigCloneIsDeepCopy(t *testing.T) {
 	if cloned.ClaudeMapping["fast"] != "deepseek-chat" {
 		t.Fatalf("clone claude mapping was affected: %#v", cloned.ClaudeMapping)
 	}
+	if cloned.Compat.StripReferenceMarkers == nil || *cloned.Compat.StripReferenceMarkers {
+		t.Fatalf("clone compat was affected: %#v", cloned.Compat.StripReferenceMarkers)
+	}
 }

 func TestConfigCloneNilMaps(t *testing.T) {
@@ -359,6 +381,39 @@ func TestStoreCompatWideInputStrictOutputCanDisable(t *testing.T) {
 	}
 }

+func TestStoreCompatStripReferenceMarkersDefaultTrue(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[]}`)
+	store := LoadStore()
+	if !store.CompatStripReferenceMarkers() {
+		t.Fatal("expected default strip_reference_markers=true when unset")
+	}
+}
+
+func TestStoreCompatStripReferenceMarkersCanDisable(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[],"compat":{"strip_reference_markers":false}}`)
+	store := LoadStore()
+	if store.CompatStripReferenceMarkers() {
+		t.Fatal("expected strip_reference_markers=false when explicitly configured")
+	}
+
+	snap := store.Snapshot()
+	data, err := snap.MarshalJSON()
+	if err != nil {
+		t.Fatalf("marshal failed: %v", err)
+	}
+	var out map[string]any
+	if err := json.Unmarshal(data, &out); err != nil {
+		t.Fatalf("decode failed: %v", err)
+	}
+	rawCompat, ok := out["compat"].(map[string]any)
+	if !ok {
+		t.Fatalf("expected compat in marshaled output, got %#v", out)
+	}
+	if rawCompat["strip_reference_markers"] != false {
+		t.Fatalf("expected explicit false in compat, got %#v", rawCompat)
+	}
+}
+
 func TestStoreIsEnvBacked(t *testing.T) {
 	t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[]}`)
 	store := LoadStore()
--- a/internal/config/store_accessors.go
+++ b/internal/config/store_accessors.go
@@ -42,6 +42,15 @@ func (s *Store) CompatWideInputStrictOutput() bool {
 	return *s.cfg.Compat.WideInputStrictOutput
 }

+func (s *Store) CompatStripReferenceMarkers() bool {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	if s.cfg.Compat.StripReferenceMarkers == nil {
+		return true
+	}
+	return *s.cfg.Compat.StripReferenceMarkers
+}
+
 func (s *Store) ToolcallMode() string {
 	return "feature_match"
 }
--- a/internal/js/chat-stream/index.js
+++ b/internal/js/chat-stream/index.js
@@ -7,6 +7,7 @@ const {
  parseChunkForContent,
  extractContentRecursive,
  shouldSkipPath,
+  stripReferenceMarkers,
 } = require('./sse_parse');
 const {
  resolveToolcallPolicy,
@@ -100,6 +101,7 @@ module.exports.__test = {
  parseChunkForContent,
  extractContentRecursive,
  shouldSkipPath,
+  stripReferenceMarkers,
  asString,
  resolveToolcallPolicy,
  formatIncrementalToolCallDeltas,
--- a/internal/js/chat-stream/sse_parse.js
+++ b/internal/js/chat-stream/sse_parse.js
@@ -5,7 +5,7 @@ const {
  SKIP_EXACT_PATHS,
 } = require('../shared/deepseek-constants');

-function parseChunkForContent(chunk, thinkingEnabled, currentType) {
+function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenceMarkers = true) {
  if (!chunk || typeof chunk !== 'object' || !Object.prototype.hasOwnProperty.call(chunk, 'v')) {
    return { parts: [], finished: false, newType: currentType };
  }
@@ -26,7 +26,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType) {
        continue;
      }
      const fragType = asString(frag.type).toUpperCase();
-      const content = asString(frag.content);
+      const content = asContentString(frag.content, stripReferenceMarkers);
      if (!content) {
        continue;
      }
@@ -76,14 +76,15 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType) {
    if (val === 'FINISHED' && (!pathValue || pathValue === 'status')) {
      return { parts: [], finished: true, newType };
    }
-    if (val) {
-      parts.push({ text: val, type: partType });
+    const content = asContentString(val, stripReferenceMarkers);
+    if (content) {
+      parts.push({ text: content, type: partType });
    }
    return { parts, finished: false, newType };
  }

  if (Array.isArray(val)) {
-    const extracted = extractContentRecursive(val, partType);
+    const extracted = extractContentRecursive(val, partType, stripReferenceMarkers);
    if (extracted.finished) {
      return { parts: [], finished: true, newType };
    }
@@ -98,7 +99,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType) {
        if (!frag || typeof frag !== 'object') {
          continue;
        }
-        const content = asString(frag.content);
+        const content = asContentString(frag.content, stripReferenceMarkers);
        if (!content) {
          continue;
        }
@@ -118,7 +119,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType) {
  return { parts, finished: false, newType };
 }

-function extractContentRecursive(items, defaultType) {
+function extractContentRecursive(items, defaultType, stripReferenceMarkers = true) {
  const parts = [];
  for (const it of items) {
    if (!it || typeof it !== 'object') {
@@ -135,7 +136,7 @@ function extractContentRecursive(items, defaultType) {
    if (shouldSkipPath(itemPath)) {
      continue;
    }
-    const content = asString(it.content);
+    const content = asContentString(it.content, stripReferenceMarkers);
    if (content) {
      const typeName = asString(it.type).toUpperCase();
      if (typeName === 'THINK' || typeName === 'THINKING') {
@@ -157,7 +158,10 @@ function extractContentRecursive(items, defaultType) {

    if (typeof itemV === 'string') {
      if (itemV && itemV !== 'FINISHED') {
-        parts.push({ text: itemV, type: partType });
+        const content = asContentString(itemV, stripReferenceMarkers);
+        if (content) {
+          parts.push({ text: content, type: partType });
+        }
      }
      continue;
    }
@@ -168,14 +172,17 @@ function extractContentRecursive(items, defaultType) {
    for (const inner of itemV) {
      if (typeof inner === 'string') {
        if (inner) {
-          parts.push({ text: inner, type: partType });
+          const content = asContentString(inner, stripReferenceMarkers);
+          if (content) {
+            parts.push({ text: content, type: partType });
+          }
        }
        continue;
      }
      if (!inner || typeof inner !== 'object') {
        continue;
      }
-      const ct = asString(inner.content);
+      const ct = asContentString(inner.content, stripReferenceMarkers);
      if (!ct) {
        continue;
      }
@@ -218,6 +225,40 @@ function isCitation(text) {
  return asString(text).trim().startsWith('[citation:');
 }

+function asContentString(v, stripReferenceMarkers = true) {
+  if (typeof v === 'string') {
+    return stripReferenceMarkers ? stripReferenceMarkersText(v) : v;
+  }
+  if (Array.isArray(v)) {
+    let out = '';
+    for (const item of v) {
+      out += asContentString(item, stripReferenceMarkers);
+    }
+    return out;
+  }
+  if (v && typeof v === 'object') {
+    if (Object.prototype.hasOwnProperty.call(v, 'content')) {
+      return asContentString(v.content, stripReferenceMarkers);
+    }
+    if (Object.prototype.hasOwnProperty.call(v, 'v')) {
+      return asContentString(v.v, stripReferenceMarkers);
+    }
+    return '';
+  }
+  if (v == null) {
+    return '';
+  }
+  const text = String(v);
+  return stripReferenceMarkers ? stripReferenceMarkersText(text) : text;
+}
+
+function stripReferenceMarkersText(text) {
+  if (!text) {
+    return text;
+  }
+  return text.replace(/\[reference:\s*\d+\]/gi, '');
+}
+
 function asString(v) {
  if (typeof v === 'string') {
    return v.trim();
@@ -237,4 +278,5 @@ module.exports = {
  shouldSkipPath,
  isFragmentStatusPath,
  isCitation,
+  stripReferenceMarkers: stripReferenceMarkersText,
 };
--- a/internal/js/chat-stream/vercel_stream.js
+++ b/internal/js/chat-stream/vercel_stream.js
@@ -15,6 +15,7 @@ const {
  resolveToolcallPolicy,
  formatIncrementalToolCallDeltas,
  filterIncrementalToolCallDeltasByAllowed,
+  boolDefaultTrue,
 } = require('./toolcall_policy');
 const { createChatCompletionEmitter } = require('./stream_emitter');
 const {
@@ -47,6 +48,7 @@ async function handleVercelStream(req, res, rawBody, payload) {
  const toolPolicy = resolveToolcallPolicy(prep.body, payload.tools);
  const toolNames = toolPolicy.toolNames;
  const emitEarlyToolDeltas = toolPolicy.emitEarlyToolDeltas;
+  const stripReferenceMarkers = boolDefaultTrue(prep.body.compat && prep.body.compat.strip_reference_markers);

  if (!model || !leaseID || !deepseekToken || !powHeader || !completionPayload) {
    writeOpenAIError(res, 500, 'invalid vercel prepare response');
@@ -219,7 +221,7 @@ async function handleVercelStream(req, res, rawBody, payload) {
            await finish('content_filter');
            return;
          }
-          const parsed = parseChunkForContent(chunk, thinkingEnabled, currentType);
+          const parsed = parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenceMarkers);
          currentType = parsed.newType;
          if (parsed.finished) {
            await finish('stop');
--- a/internal/textclean/reference_markers.go
+++ b/internal/textclean/reference_markers.go
@@ -0,0 +1,12 @@
+package textclean
+
+import "regexp"
+
+var referenceMarkerPattern = regexp.MustCompile(`(?i)\[reference:\s*\d+\]`)
+
+func StripReferenceMarkers(text string) string {
+	if text == "" {
+		return text
+	}
+	return referenceMarkerPattern.ReplaceAllString(text, "")
+}