diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md
index f733f58..5bf6025 100644
--- a/docs/prompt-compatibility.md
+++ b/docs/prompt-compatibility.md
@@ -259,16 +259,11 @@ OpenAI 文件相关实现：
 - 旧历史拆分兼容壳：
   [internal/httpapi/openai/history/history_split.go](../internal/httpapi/openai/history/history_split.go)
 
-当前输入转文件启用并触发时，上传文件的真实文件名是 `history.txt`，文件内容是完整 `messages` 上下文；它仍会先用 OpenAI 消息标准化和 DeepSeek 角色标记序列化，再包进 `history.txt` 文件边界里：
+当前输入转文件启用并触发时，上传文件的真实文件名是 `history.txt`，文件内容是完整 `messages` 上下文；它仍会先用 OpenAI 消息标准化和 DeepSeek 角色标记序列化，并直接作为 `history.txt` 的纯文本内容上传（不再注入文件边界标签）：
 
 ```text
 [uploaded filename]: history.txt
-[file content end]
-
 <｜begin▁of▁sentence｜><｜System｜>...<｜User｜>...<｜Assistant｜>...<｜Tool｜>...<｜User｜>...
-
-[file name]: history.txt
-[file content begin]
 ```
 
 开启后，请求的 live prompt 不再直接内联完整上下文，而是保留一个 user role 的短提示，提示模型基于已提供上下文直接回答最新请求；上传后的 `file_id` 会进入 `ref_file_ids`。
diff --git a/internal/httpapi/openai/chat/chat_stream_runtime.go b/internal/httpapi/openai/chat/chat_stream_runtime.go
index e5cbeb3..21d1f4f 100644
--- a/internal/httpapi/openai/chat/chat_stream_runtime.go
+++ b/internal/httpapi/openai/chat/chat_stream_runtime.go
@@ -143,7 +143,7 @@ func (s *chatStreamRuntime) finalize(finishReason string, deferEmptyOutput bool)
 	finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
 	s.finalThinking = finalThinking
 	s.finalText = finalText
-	detected := detectAssistantToolCalls(s.rawText.String(), s.rawThinking.String(), finalToolDetectionThinking, s.toolNames)
+	detected := detectAssistantToolCalls(s.rawText.String(), finalText, s.rawThinking.String(), finalToolDetectionThinking, s.toolNames)
 	if len(detected.Calls) > 0 && !s.toolCallsDoneEmitted {
 		finishReason = "tool_calls"
 		delta := map[string]any{
diff --git a/internal/httpapi/openai/chat/empty_retry_runtime.go b/internal/httpapi/openai/chat/empty_retry_runtime.go
index 61642b5..c3d37b9 100644
--- a/internal/httpapi/openai/chat/empty_retry_runtime.go
+++ b/internal/httpapi/openai/chat/empty_retry_runtime.go
@@ -46,7 +46,7 @@ func (h *Handler) handleNonStreamWithRetry(w http.ResponseWriter, ctx context.Co
 		result.thinking = accumulatedThinking
 		result.rawThinking = accumulatedRawThinking
 		result.toolDetectionThinking = accumulatedToolDetectionThinking
-		detected := detectAssistantToolCalls(result.rawText, result.rawThinking, result.toolDetectionThinking, toolNames)
+		detected := detectAssistantToolCalls(result.rawText, result.text, result.rawThinking, result.toolDetectionThinking, toolNames)
 		result.detectedCalls = len(detected.Calls)
 		result.body = openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, result.thinking, result.text, detected.Calls, toolsRaw)
 		result.finishReason = chatFinishReason(result.body)
@@ -91,7 +91,7 @@ func (h *Handler) collectChatNonStreamAttempt(w http.ResponseWriter, resp *http.
 	if searchEnabled {
 		finalText = replaceCitationMarkersWithLinks(finalText, result.CitationLinks)
 	}
-	detected := detectAssistantToolCalls(result.Text, result.Thinking, result.ToolDetectionThinking, toolNames)
+	detected := detectAssistantToolCalls(result.Text, finalText, result.Thinking, result.ToolDetectionThinking, toolNames)
 	respBody := openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, finalThinking, finalText, detected.Calls, toolsRaw)
 	return chatNonStreamResult{
 		rawThinking:           result.Thinking,
diff --git a/internal/httpapi/openai/chat/handler.go b/internal/httpapi/openai/chat/handler.go
index 92da0c6..4ad7aad 100644
--- a/internal/httpapi/openai/chat/handler.go
+++ b/internal/httpapi/openai/chat/handler.go
@@ -148,6 +148,6 @@ func formatFinalStreamToolCallsWithStableIDs(calls []toolcall.ParsedToolCall, id
 	return shared.FormatFinalStreamToolCallsWithStableIDs(calls, ids, toolsRaw)
 }
 
-func detectAssistantToolCalls(text, exposedThinking, detectionThinking string, toolNames []string) toolcall.ToolCallParseResult {
-	return shared.DetectAssistantToolCalls(text, exposedThinking, detectionThinking, toolNames)
+func detectAssistantToolCalls(rawText, visibleText, exposedThinking, detectionThinking string, toolNames []string) toolcall.ToolCallParseResult {
+	return shared.DetectAssistantToolCalls(rawText, visibleText, exposedThinking, detectionThinking, toolNames)
 }
diff --git a/internal/httpapi/openai/chat/handler_chat.go b/internal/httpapi/openai/chat/handler_chat.go
index 3b56f12..a2e421a 100644
--- a/internal/httpapi/openai/chat/handler_chat.go
+++ b/internal/httpapi/openai/chat/handler_chat.go
@@ -166,7 +166,7 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, resp *http.Response, co
 	if searchEnabled {
 		finalText = replaceCitationMarkersWithLinks(finalText, result.CitationLinks)
 	}
-	detected := detectAssistantToolCalls(result.Text, result.Thinking, result.ToolDetectionThinking, toolNames)
+	detected := detectAssistantToolCalls(result.Text, finalText, result.Thinking, result.ToolDetectionThinking, toolNames)
 	if shouldWriteUpstreamEmptyOutputError(finalText) && len(detected.Calls) == 0 {
 		status, message, code := upstreamEmptyOutputDetail(result.ContentFilter, finalText, finalThinking)
 		if historySession != nil {
diff --git a/internal/httpapi/openai/history_split_test.go b/internal/httpapi/openai/history_split_test.go
index 6e6c0ce..593735a 100644
--- a/internal/httpapi/openai/history_split_test.go
+++ b/internal/httpapi/openai/history_split_test.go
@@ -64,8 +64,8 @@ func TestBuildOpenAICurrentInputContextTranscriptUsesInjectedFileWrapper(t *test
 	_, historyMessages := splitOpenAIHistoryMessages(historySplitTestMessages(), 1)
 	transcript := buildOpenAICurrentInputContextTranscript(historyMessages)
 
-	if !strings.HasPrefix(transcript, "[file content end]\n\n") {
-		t.Fatalf("expected injected file wrapper prefix, got %q", transcript)
+	if strings.Contains(transcript, "[file content end]") || strings.Contains(transcript, "[file content begin]") || strings.Contains(transcript, "[file name]:") {
+		t.Fatalf("expected plain transcript without file wrapper tags, got %q", transcript)
 	}
 	if !strings.Contains(transcript, "<｜begin▁of▁sentence｜>") {
 		t.Fatalf("expected serialized conversation markers, got %q", transcript)
@@ -79,9 +79,7 @@ func TestBuildOpenAICurrentInputContextTranscriptUsesInjectedFileWrapper(t *test
 	if !strings.Contains(transcript, "<|DSML|tool_calls>") {
 		t.Fatalf("expected tool calls preserved, got %q", transcript)
 	}
-	if !strings.HasSuffix(transcript, "\n[file name]: history.txt\n[file content begin]\n") {
-		t.Fatalf("expected injected file wrapper suffix, got %q", transcript)
-	}
+
 }
 
 func TestSplitOpenAIHistoryMessagesUsesLatestUserTurn(t *testing.T) {
@@ -278,8 +276,8 @@ func TestApplyCurrentInputFileUploadsFirstTurnWithInjectedWrapper(t *testing.T)
 		t.Fatalf("unexpected upload filename: %q", upload.Filename)
 	}
 	uploadedText := string(upload.Data)
-	if !strings.HasPrefix(uploadedText, "[file content end]\n\n") {
-		t.Fatalf("expected injected file wrapper prefix, got %q", uploadedText)
+	if strings.Contains(uploadedText, "[file content end]") || strings.Contains(uploadedText, "[file content begin]") || strings.Contains(uploadedText, "[file name]:") {
+		t.Fatalf("expected uploaded transcript without file wrapper tags, got %q", uploadedText)
 	}
 	if !strings.Contains(uploadedText, "<｜begin▁of▁sentence｜><｜User｜>first turn content that is long enough") {
 		t.Fatalf("expected serialized current user turn markers, got %q", uploadedText)
@@ -287,9 +285,7 @@ func TestApplyCurrentInputFileUploadsFirstTurnWithInjectedWrapper(t *testing.T)
 	if !strings.Contains(uploadedText, promptcompat.ThinkingInjectionMarker) {
 		t.Fatalf("expected thinking injection in current input file, got %q", uploadedText)
 	}
-	if !strings.HasSuffix(uploadedText, "\n[file name]: history.txt\n[file content begin]\n") {
-		t.Fatalf("expected injected file wrapper suffix, got %q", uploadedText)
-	}
+
 	if strings.Contains(out.FinalPrompt, "first turn content that is long enough") {
 		t.Fatalf("expected current input text to be replaced in live prompt, got %s", out.FinalPrompt)
 	}
@@ -418,8 +414,8 @@ func TestChatCompletionsCurrentInputFileUploadsContextAndKeepsNeutralPrompt(t *t
 		t.Fatalf("unexpected purpose: %q", upload.Purpose)
 	}
 	historyText := string(upload.Data)
-	if !strings.Contains(historyText, "[file content end]") || !strings.Contains(historyText, "[file name]: history.txt") {
-		t.Fatalf("expected injected history.txt wrapper, got %s", historyText)
+	if strings.Contains(historyText, "[file content end]") || strings.Contains(historyText, "[file content begin]") || strings.Contains(historyText, "[file name]:") {
+		t.Fatalf("expected plain history transcript without wrapper tags, got %s", historyText)
 	}
 	if !strings.Contains(historyText, "latest user turn") {
 		t.Fatalf("expected full context to include latest turn, got %s", historyText)
diff --git a/internal/httpapi/openai/responses/empty_retry_runtime.go b/internal/httpapi/openai/responses/empty_retry_runtime.go
index c3f8fd9..a451c92 100644
--- a/internal/httpapi/openai/responses/empty_retry_runtime.go
+++ b/internal/httpapi/openai/responses/empty_retry_runtime.go
@@ -47,7 +47,7 @@ func (h *Handler) handleResponsesNonStreamWithRetry(w http.ResponseWriter, ctx c
 		result.thinking = accumulatedThinking
 		result.rawThinking = accumulatedRawThinking
 		result.toolDetectionThinking = accumulatedToolDetectionThinking
-		result.parsed = detectAssistantToolCalls(result.rawText, result.rawThinking, result.toolDetectionThinking, toolNames)
+		result.parsed = detectAssistantToolCalls(result.rawText, result.text, result.rawThinking, result.toolDetectionThinking, toolNames)
 		result.body = openaifmt.BuildResponseObjectWithToolCalls(responseID, model, usagePrompt, result.thinking, result.text, result.parsed.Calls, toolsRaw)
 
 		if !shouldRetryResponsesNonStream(result, attempts) {
@@ -87,7 +87,7 @@ func (h *Handler) collectResponsesNonStreamAttempt(w http.ResponseWriter, resp *
 	if searchEnabled {
 		sanitizedText = replaceCitationMarkersWithLinks(sanitizedText, result.CitationLinks)
 	}
-	textParsed := detectAssistantToolCalls(result.Text, result.Thinking, result.ToolDetectionThinking, toolNames)
+	textParsed := detectAssistantToolCalls(result.Text, sanitizedText, result.Thinking, result.ToolDetectionThinking, toolNames)
 	responseObj := openaifmt.BuildResponseObjectWithToolCalls(responseID, model, usagePrompt, sanitizedThinking, sanitizedText, textParsed.Calls, toolsRaw)
 	return responsesNonStreamResult{
 		rawThinking:           result.Thinking,
diff --git a/internal/httpapi/openai/responses/handler.go b/internal/httpapi/openai/responses/handler.go
index a5f243f..ac8cd04 100644
--- a/internal/httpapi/openai/responses/handler.go
+++ b/internal/httpapi/openai/responses/handler.go
@@ -130,6 +130,6 @@ func filterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta,
 	return shared.FilterIncrementalToolCallDeltasByAllowed(deltas, seenNames)
 }
 
-func detectAssistantToolCalls(text, exposedThinking, detectionThinking string, toolNames []string) toolcall.ToolCallParseResult {
-	return shared.DetectAssistantToolCalls(text, exposedThinking, detectionThinking, toolNames)
+func detectAssistantToolCalls(rawText, visibleText, exposedThinking, detectionThinking string, toolNames []string) toolcall.ToolCallParseResult {
+	return shared.DetectAssistantToolCalls(rawText, visibleText, exposedThinking, detectionThinking, toolNames)
 }
diff --git a/internal/httpapi/openai/responses/responses_handler.go b/internal/httpapi/openai/responses/responses_handler.go
index c26e4be..a04e7b1 100644
--- a/internal/httpapi/openai/responses/responses_handler.go
+++ b/internal/httpapi/openai/responses/responses_handler.go
@@ -135,7 +135,7 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
 	if searchEnabled {
 		sanitizedText = replaceCitationMarkersWithLinks(sanitizedText, result.CitationLinks)
 	}
-	textParsed := detectAssistantToolCalls(result.Text, result.Thinking, result.ToolDetectionThinking, toolNames)
+	textParsed := detectAssistantToolCalls(result.Text, sanitizedText, result.Thinking, result.ToolDetectionThinking, toolNames)
 	if len(textParsed.Calls) == 0 && writeUpstreamEmptyOutputError(w, sanitizedText, sanitizedThinking, result.ContentFilter) {
 		return
 	}
diff --git a/internal/httpapi/openai/responses/responses_stream_runtime_core.go b/internal/httpapi/openai/responses/responses_stream_runtime_core.go
index b6a2088..2047dfe 100644
--- a/internal/httpapi/openai/responses/responses_stream_runtime_core.go
+++ b/internal/httpapi/openai/responses/responses_stream_runtime_core.go
@@ -150,7 +150,7 @@ func (s *responsesStreamRuntime) finalize(finishReason string, deferEmptyOutput
 	finalThinking := s.thinking.String()
 	finalToolDetectionThinking := s.toolDetectionThinking.String()
 	finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
-	textParsed := detectAssistantToolCalls(s.rawText.String(), s.rawThinking.String(), finalToolDetectionThinking, s.toolNames)
+	textParsed := detectAssistantToolCalls(s.rawText.String(), finalText, s.rawThinking.String(), finalToolDetectionThinking, s.toolNames)
 	detected := textParsed.Calls
 	s.logToolPolicyRejections(textParsed)
 
diff --git a/internal/httpapi/openai/responses/responses_stream_runtime_events.go b/internal/httpapi/openai/responses/responses_stream_runtime_events.go
index 550673f..20b9108 100644
--- a/internal/httpapi/openai/responses/responses_stream_runtime_events.go
+++ b/internal/httpapi/openai/responses/responses_stream_runtime_events.go
@@ -45,7 +45,7 @@ func (s *responsesStreamRuntime) processToolStreamEvents(events []toolstream.Eve
 	for _, evt := range events {
 		if emitContent && evt.Content != "" {
 			cleaned := cleanVisibleOutput(evt.Content, s.stripReferenceMarkers)
-			if cleaned != "" && !(s.searchEnabled && sse.IsCitation(cleaned)) {
+			if cleaned != "" && (!s.searchEnabled || !sse.IsCitation(cleaned)) {
 				s.emitTextDelta(cleaned)
 			}
 		}
diff --git a/internal/httpapi/openai/shared/assistant_toolcalls.go b/internal/httpapi/openai/shared/assistant_toolcalls.go
index 25f930b..f90860f 100644
--- a/internal/httpapi/openai/shared/assistant_toolcalls.go
+++ b/internal/httpapi/openai/shared/assistant_toolcalls.go
@@ -6,12 +6,12 @@ import (
 	"ds2api/internal/toolcall"
 )
 
-func DetectAssistantToolCalls(text, exposedThinking, detectionThinking string, toolNames []string) toolcall.ToolCallParseResult {
-	textParsed := toolcall.ParseStandaloneToolCallsDetailed(text, toolNames)
+func DetectAssistantToolCalls(rawText, visibleText, exposedThinking, detectionThinking string, toolNames []string) toolcall.ToolCallParseResult {
+	textParsed := toolcall.ParseStandaloneToolCallsDetailed(rawText, toolNames)
 	if len(textParsed.Calls) > 0 {
 		return textParsed
 	}
-	if strings.TrimSpace(text) != "" {
+	if strings.TrimSpace(visibleText) != "" {
 		return textParsed
 	}
 	thinking := detectionThinking
diff --git a/internal/promptcompat/history_transcript.go b/internal/promptcompat/history_transcript.go
index befcd1d..a3f7905 100644
--- a/internal/promptcompat/history_transcript.go
+++ b/internal/promptcompat/history_transcript.go
@@ -1,7 +1,6 @@
 package promptcompat
 
 import (
-	"fmt"
 	"strings"
 
 	"ds2api/internal/prompt"
@@ -32,5 +31,5 @@ func buildOpenAIInjectedFileTranscript(messages []any) string {
 	if transcript == "" {
 		return ""
 	}
-	return fmt.Sprintf("[file content end]\n\n%s\n\n[file name]: %s\n[file content begin]\n", transcript, CurrentInputContextFilename)
+	return transcript
 }