diff --git a/API.md b/API.md index 7d668ed..4c44237 100644 --- a/API.md +++ b/API.md @@ -42,7 +42,7 @@ - Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致:推荐模型输出 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容层也接受 DSML wrapper 别名 ``、`<|tool_calls>`、`<|tool_calls>`、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>`)、`DSML` 与工具标签名黏连的常见 typo(如 ``),以及旧式 canonical XML `` → `` → ``。实现上采用窄容错结构扫描:只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 会进入工具路径,裸 `` 不计为已支持语法;流式场景继续执行防泄漏筛分。若参数体本身是合法 JSON 字面量(如 `123`、`true`、`null`、数组或对象),会按结构化值输出,不再一律当作字符串;若 CDATA 偶发漏闭合,则会在最终 parse / flush 恢复阶段做窄修复,尽量保住已完整包裹的外层工具调用。 - `Admin API` 将配置与运行时策略分开:`/admin/config*` 管静态配置,`/admin/settings*` 管运行时行为。 - 当上游返回 thinking-only 响应(模型输出了推理链但无可见文本)时,非流式补全会自动重试一次:以多轮对话 follow-up 方式追加 prompt 后缀 `"Previous reply had no visible output. Please regenerate the visible final answer or tool call now."` 并设置 `parent_message_id` 在同一 DeepSeek session 内让模型重新输出;重试最大 1 次。 -- 引用标记剥离(strip reference markers)当前为固定开启的运行时行为,所有协议适配层统一生效。 +- 引用标记处理边界:流式输出默认隐藏 `[citation:N]` / `[reference:N]` 这类上游内部占位符;非流式输出默认把 DeepSeek 搜索引用标记转换为 Markdown 引用链接。 --- diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md index 799219f..ee97b2a 100644 --- a/docs/prompt-compatibility.md +++ b/docs/prompt-compatibility.md @@ -114,7 +114,8 @@ DS2API 当前的核心思路,不是把客户端传来的 `messages`、`tools` - 对 OpenAI Chat / Responses 的非流式收尾,如果最终可见正文为空,兼容层会优先尝试把思维链中的独立 DSML / XML 工具块当作真实工具调用解析出来。流式链路也会在收尾阶段做同样的 fallback 检测,但不会因为思维链内容去中途拦截或改写流式输出;真正的工具识别始终基于原始上游文本,而不是基于“已经做过可见输出清洗”的版本,因此即使最终可见层会剥离完整 leaked DSML / XML `tool_calls` wrapper、并抑制全空参数或无效 wrapper 块,也不会影响真实工具调用转成结构化 `tool_calls` / `function_call`。补发结果会作为本轮 assistant 的结构化 `tool_calls` / `function_call` 输出返回,而不是塞进 `content` 文本;如果客户端没有开启 thinking / reasoning,思维链只用于检测,不会作为 `reasoning_content` 或可见正文暴露。只有正文为空且思维链里也没有可执行工具调用时,才继续按空回复错误处理。 - OpenAI Chat / Responses 的空回复错误处理之前会默认做一次内部补偿重试:第一次上游完整结束后,如果最终可见正文为空、没有解析到工具调用、也没有已经向客户端流式发出工具调用,并且终止原因不是 `content_filter`,兼容层会复用同一个 `chat_session_id`、账号、token 与工具策略,把原始 completion `prompt` 追加固定后缀 `Previous reply had no visible output. Please regenerate the visible final answer or tool call now.` 后重新提交一次。重试遵循 DeepSeek 多轮对话协议:从第一次上游 SSE 流中提取 `response_message_id`,并在重试 payload 中设置 `parent_message_id` 为该值,使重试成为同一会话的后续轮次而非断裂的根消息;同时重新获取一次 PoW(若 PoW 获取失败则回退到原始 PoW)。该重试不会重新标准化消息、不会新建 session、不会切换账号,也不会向流式客户端插入重试标记;第二次 thinking / reasoning 会按正常增量直接接到第一次之后,并继续使用 overlap trim 去重。若第二次仍为空,终端错误码仍保持现有 `upstream_empty_output`;若任一尝试触发空 `content_filter`,不做补偿重试并保持 `content_filter` 错误。JS Vercel 运行时同样设置 `parent_message_id`,但因无法直接调用 PoW API 而复用原始 PoW。 -- OpenAI Chat / Responses 在最终可见正文渲染阶段,会把 DeepSeek 搜索返回中的 `[citation:N]` / `[reference:N]` 标记替换成对应 Markdown 链接。`citation` 标记按一基序号解析;`reference` 标记只有在同一段正文中出现 `[reference:0]`(允许冒号后有空格)时才按零基序号映射,并且不会影响同段正文里的 `citation` 标记。 +- 非流式 OpenAI Chat / Responses、Claude Messages、Gemini generateContent 在最终可见正文渲染阶段,会把 DeepSeek 搜索返回中的 `[citation:N]` / `[reference:N]` 标记替换成对应 Markdown 链接。`citation` 标记按一基序号解析;`reference` 标记只有在同一段正文中出现 `[reference:0]`(允许冒号后有空格)时才按零基序号映射,并且不会影响同段正文里的 `citation` 标记。 +- 流式输出仍默认隐藏 `[citation:N]` / `[reference:N]` 这类上游内部标记,避免分片输出中泄漏尚未完成映射的引用占位符。 ## 5. prompt 是怎么拼出来的 diff --git a/internal/assistantturn/turn_test.go b/internal/assistantturn/turn_test.go index 15565ce..4fa6c99 100644 --- a/internal/assistantturn/turn_test.go +++ b/internal/assistantturn/turn_test.go @@ -11,7 +11,7 @@ func TestBuildTurnFromCollectedTextCitation(t *testing.T) { turn := BuildTurnFromCollected(sse.CollectResult{ Text: "See [citation:1]", CitationLinks: map[int]string{1: "https://example.com"}, - }, BuildOptions{Model: "deepseek-v4-flash", Prompt: "prompt", SearchEnabled: true, StripReferenceMarkers: true}) + }, BuildOptions{Model: "deepseek-v4-flash", Prompt: "prompt", SearchEnabled: true}) if turn.Text != "See [1](https://example.com)" { t.Fatalf("text mismatch: %q", turn.Text) } @@ -23,6 +23,20 @@ func TestBuildTurnFromCollectedTextCitation(t *testing.T) { } } +func TestBuildTurnFromCollectedKeepsNonStreamReferenceLinks(t *testing.T) { + turn := BuildTurnFromCollected(sse.CollectResult{ + Text: "结论[reference:0],补充[reference:1]。", + CitationLinks: map[int]string{ + 1: "https://example.com/a", + 2: "https://example.com/b", + }, + }, BuildOptions{Model: "deepseek-v4-flash-search", Prompt: "prompt", SearchEnabled: true}) + want := "结论[0](https://example.com/a),补充[1](https://example.com/b)。" + if turn.Text != want { + t.Fatalf("text mismatch: got %q want %q", turn.Text, want) + } +} + func TestBuildTurnFromCollectedToolCall(t *testing.T) { turn := BuildTurnFromCollected(sse.CollectResult{ Text: `{"x":1}`, diff --git a/internal/completionruntime/nonstream_test.go b/internal/completionruntime/nonstream_test.go index 56e1d1e..e10b927 100644 --- a/internal/completionruntime/nonstream_test.go +++ b/internal/completionruntime/nonstream_test.go @@ -119,6 +119,29 @@ func TestExecuteNonStreamWithRetryUsesParentMessageForEmptyRetry(t *testing.T) { } } +func TestExecuteNonStreamWithRetryConvertsReferenceMarkers(t *testing.T) { + ds := &fakeDeepSeekCaller{responses: []*http.Response{sseHTTPResponse( + http.StatusOK, + `data: {"p":"response/content","v":"答案[reference:0]。","citation":{"cite_index":0,"url":"https://example.com/ref"}}`, + )}} + stdReq := promptcompat.StandardRequest{ + Surface: "test", + ResponseModel: "deepseek-v4-flash-search", + PromptTokenText: "prompt", + FinalPrompt: "final prompt", + Search: true, + } + + result, outErr := ExecuteNonStreamWithRetry(context.Background(), ds, &auth.RequestAuth{}, stdReq, Options{}) + if outErr != nil { + t.Fatalf("unexpected output error: %#v", outErr) + } + want := "答案[0](https://example.com/ref)。" + if result.Turn.Text != want { + t.Fatalf("text mismatch: got %q want %q", result.Turn.Text, want) + } +} + func TestStartCompletionAppliesCurrentInputFileGlobally(t *testing.T) { ds := &fakeDeepSeekCaller{responses: []*http.Response{sseHTTPResponse(http.StatusOK, `data: {"p":"response/content","v":"ok"}`)}} stdReq := promptcompat.StandardRequest{ diff --git a/internal/httpapi/claude/handler_messages.go b/internal/httpapi/claude/handler_messages.go index 0e8f94f..8478dc7 100644 --- a/internal/httpapi/claude/handler_messages.go +++ b/internal/httpapi/claude/handler_messages.go @@ -100,9 +100,8 @@ func (h *Handler) handleClaudeDirect(w http.ResponseWriter, r *http.Request) boo return true } result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{ - StripReferenceMarkers: stripReferenceMarkersEnabled(), - RetryEnabled: true, - CurrentInputFile: h.Store, + RetryEnabled: true, + CurrentInputFile: h.Store, }) if outErr != nil { if historySession != nil { diff --git a/internal/httpapi/gemini/handler_generate.go b/internal/httpapi/gemini/handler_generate.go index 9161036..b2a4114 100644 --- a/internal/httpapi/gemini/handler_generate.go +++ b/internal/httpapi/gemini/handler_generate.go @@ -97,9 +97,8 @@ func (h *Handler) handleGeminiDirect(w http.ResponseWriter, r *http.Request, str return true } result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{ - StripReferenceMarkers: stripReferenceMarkersEnabled(), - RetryEnabled: true, - CurrentInputFile: h.Store, + RetryEnabled: true, + CurrentInputFile: h.Store, }) if outErr != nil { if historySession != nil { @@ -330,12 +329,11 @@ func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *ht } result := sse.CollectStream(resp, thinkingEnabled, true) - stripReferenceMarkers := stripReferenceMarkersEnabled() writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponse( model, finalPrompt, - cleanVisibleOutput(result.Thinking, stripReferenceMarkers), - cleanVisibleOutput(result.Text, stripReferenceMarkers), + cleanVisibleOutput(result.Thinking, false), + cleanVisibleOutput(result.Text, false), toolNames, )) } diff --git a/internal/httpapi/openai/chat/empty_retry_runtime.go b/internal/httpapi/openai/chat/empty_retry_runtime.go index 72cbcdb..748a39b 100644 --- a/internal/httpapi/openai/chat/empty_retry_runtime.go +++ b/internal/httpapi/openai/chat/empty_retry_runtime.go @@ -98,12 +98,11 @@ func (h *Handler) collectChatNonStreamAttempt(w http.ResponseWriter, resp *http. } result := sse.CollectStream(resp, thinkingEnabled, true) turn := assistantturn.BuildTurnFromCollected(result, assistantturn.BuildOptions{ - Model: model, - Prompt: usagePrompt, - SearchEnabled: searchEnabled, - StripReferenceMarkers: stripReferenceMarkersEnabled(), - ToolNames: toolNames, - ToolsRaw: toolsRaw, + Model: model, + Prompt: usagePrompt, + SearchEnabled: searchEnabled, + ToolNames: toolNames, + ToolsRaw: toolsRaw, }) respBody := openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, turn.Thinking, turn.Text, turn.ToolCalls, toolsRaw) return chatNonStreamResult{ diff --git a/internal/httpapi/openai/chat/handler_chat.go b/internal/httpapi/openai/chat/handler_chat.go index 2b6b24d..61703a0 100644 --- a/internal/httpapi/openai/chat/handler_chat.go +++ b/internal/httpapi/openai/chat/handler_chat.go @@ -80,9 +80,8 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) { if !stdReq.Stream { result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{ - StripReferenceMarkers: stripReferenceMarkersEnabled(), - RetryEnabled: true, - CurrentInputFile: h.Store, + RetryEnabled: true, + CurrentInputFile: h.Store, }) sessionID = result.SessionID if outErr != nil { @@ -164,14 +163,13 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, resp *http.Response, co result := sse.CollectStream(resp, thinkingEnabled, true) turn := assistantturn.BuildTurnFromCollected(result, assistantturn.BuildOptions{ - Model: model, - Prompt: finalPrompt, - RefFileTokens: refFileTokens, - SearchEnabled: searchEnabled, - StripReferenceMarkers: stripReferenceMarkersEnabled(), - ToolNames: toolNames, - ToolsRaw: toolsRaw, - ToolChoice: promptcompat.DefaultToolChoicePolicy(), + Model: model, + Prompt: finalPrompt, + RefFileTokens: refFileTokens, + SearchEnabled: searchEnabled, + ToolNames: toolNames, + ToolsRaw: toolsRaw, + ToolChoice: promptcompat.DefaultToolChoicePolicy(), }) outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{}) if outcome.ShouldFail { diff --git a/internal/httpapi/openai/responses/responses_handler.go b/internal/httpapi/openai/responses/responses_handler.go index 4b45a36..3a6680d 100644 --- a/internal/httpapi/openai/responses/responses_handler.go +++ b/internal/httpapi/openai/responses/responses_handler.go @@ -105,9 +105,8 @@ func (h *Handler) Responses(w http.ResponseWriter, r *http.Request) { }) if !stdReq.Stream { result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{ - StripReferenceMarkers: stripReferenceMarkersEnabled(), - RetryEnabled: true, - CurrentInputFile: h.Store, + RetryEnabled: true, + CurrentInputFile: h.Store, }) if outErr != nil { if historySession != nil { @@ -152,14 +151,13 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res result := sse.CollectStream(resp, thinkingEnabled, true) turn := assistantturn.BuildTurnFromCollected(result, assistantturn.BuildOptions{ - Model: model, - Prompt: finalPrompt, - RefFileTokens: refFileTokens, - SearchEnabled: searchEnabled, - StripReferenceMarkers: stripReferenceMarkersEnabled(), - ToolNames: toolNames, - ToolsRaw: toolsRaw, - ToolChoice: toolChoice, + Model: model, + Prompt: finalPrompt, + RefFileTokens: refFileTokens, + SearchEnabled: searchEnabled, + ToolNames: toolNames, + ToolsRaw: toolsRaw, + ToolChoice: toolChoice, }) logResponsesToolPolicyRejection(traceID, toolChoice, turn.ParsedToolCalls, "text") outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{}) diff --git a/internal/httpapi/openai/shared/stream_accumulator.go b/internal/httpapi/openai/shared/stream_accumulator.go index 95ded0e..472748e 100644 --- a/internal/httpapi/openai/shared/stream_accumulator.go +++ b/internal/httpapi/openai/shared/stream_accumulator.go @@ -89,11 +89,11 @@ func (a *StreamAccumulator) applyTextPart(text string) StreamPartDelta { } a.RawText.WriteString(rawTrimmed) delta := StreamPartDelta{Type: "text", RawText: rawTrimmed} - cleanedText := CleanVisibleOutput(rawTrimmed, a.StripReferenceMarkers) - if a.SearchEnabled && sse.IsCitation(cleanedText) { + if a.SearchEnabled && sse.IsCitation(rawTrimmed) { delta.CitationOnly = true return delta } + cleanedText := CleanVisibleOutput(rawTrimmed, a.StripReferenceMarkers) trimmed := sse.TrimContinuationOverlapFromBuilder(&a.Text, cleanedText) if trimmed == "" { return delta diff --git a/internal/httpapi/openai/shared/stream_accumulator_test.go b/internal/httpapi/openai/shared/stream_accumulator_test.go index 375cfbf..1f4fe93 100644 --- a/internal/httpapi/openai/shared/stream_accumulator_test.go +++ b/internal/httpapi/openai/shared/stream_accumulator_test.go @@ -95,3 +95,21 @@ func TestStreamAccumulatorSuppressesCitationTextWhenSearchEnabled(t *testing.T) t.Fatalf("visible text = %q", got) } } + +func TestStreamAccumulatorStripsInlineCitationAndReferenceMarkers(t *testing.T) { + acc := StreamAccumulator{SearchEnabled: true, StripReferenceMarkers: true} + result := acc.Apply(sse.LineResult{ + Parsed: true, + Parts: []sse.ContentPart{{Type: "text", Text: "广州天气[citation:1] 多云[reference:0]"}}, + }) + + if !result.ContentSeen { + t.Fatalf("expected marker chunk to mark upstream content") + } + if got := acc.Text.String(); got != "广州天气 多云" { + t.Fatalf("visible text = %q", got) + } + if len(result.Parts) != 1 || result.Parts[0].VisibleText != "广州天气 多云" { + t.Fatalf("unexpected parts: %#v", result.Parts) + } +} diff --git a/internal/js/chat-stream/sse_parse_impl.js b/internal/js/chat-stream/sse_parse_impl.js index aff7104..6f5922e 100644 --- a/internal/js/chat-stream/sse_parse_impl.js +++ b/internal/js/chat-stream/sse_parse_impl.js @@ -621,7 +621,7 @@ function stripReferenceMarkersText(text) { if (!text) { return text; } - return text.replace(/\[reference:\s*\d+\]/gi, ''); + return text.replace(/\[(?:citation|reference):\s*\d+\]/gi, ''); } function asString(v) { diff --git a/internal/textclean/reference_markers.go b/internal/textclean/reference_markers.go index ec41ce9..267f0fb 100644 --- a/internal/textclean/reference_markers.go +++ b/internal/textclean/reference_markers.go @@ -2,19 +2,18 @@ package textclean import "regexp" -var referenceMarkerPattern = regexp.MustCompile(`(?i)\[reference:\s*\d+\]`) +var citationReferenceMarkerPattern = regexp.MustCompile(`(?i)\[(citation|reference):\s*\d+\]`) func StripReferenceMarkers(text string) string { if text == "" { return text } - return referenceMarkerPattern.ReplaceAllString(text, "") + return citationReferenceMarkerPattern.ReplaceAllString(text, "") } -// StripReferenceMarkersEnabled returns true while reference-marker -// stripping remains the fixed runtime default. When the behaviour is -// eventually removed this function can be deleted and callers can drop -// the conditional. +// StripReferenceMarkersEnabled returns the default for streaming surfaces, +// where partial citation/reference markers are hidden before the final +// link metadata is available. func StripReferenceMarkersEnabled() bool { return true } diff --git a/tests/node/chat-stream.test.js b/tests/node/chat-stream.test.js index 0171de6..5ac771b 100644 --- a/tests/node/chat-stream.test.js +++ b/tests/node/chat-stream.test.js @@ -615,17 +615,17 @@ test('parseChunkForContent preserves space-only content tokens', () => { assert.deepEqual(parsed.parts, [{ text: ' ', type: 'text' }]); }); -test('parseChunkForContent strips reference markers from fragment content', () => { +test('parseChunkForContent strips citation and reference markers from fragment content', () => { const chunk = { p: 'response/fragments', o: 'APPEND', v: [ - { type: 'RESPONSE', content: '广州天气 [reference:12] 多云' }, + { type: 'RESPONSE', content: '广州天气 [citation:1] [reference:12] 多云' }, ], }; const parsed = parseChunkForContent(chunk, false, 'text'); assert.equal(parsed.finished, false); - assert.deepEqual(parsed.parts, [{ text: '广州天气 多云', type: 'text' }]); + assert.deepEqual(parsed.parts, [{ text: '广州天气 多云', type: 'text' }]); }); test('parseChunkForContent detects content_filter status and ignores upstream output tokens', () => {