From df61f06d9abcf80ace4f8fdcbb349dd5d3e650d6 Mon Sep 17 00:00:00 2001 From: CJACK Date: Sun, 26 Apr 2026 04:44:55 +0800 Subject: [PATCH] =?UTF-8?q?=E5=BD=92=E4=B8=80=E5=8C=96=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AGENTS.md | 1 + VERSION | 2 +- docs/prompt-compatibility.md | 3 +- .../adapter/claude/deps_injection_test.go | 23 +++++- internal/adapter/claude/handler_messages.go | 15 ++-- internal/adapter/claude/proxy_vercel_test.go | 40 +++++++++++ internal/adapter/claude/standard_request.go | 5 +- internal/adapter/gemini/handler_generate.go | 72 +++++++++++++++++++ internal/adapter/gemini/handler_test.go | 40 +++++++++++ internal/js/chat-stream/sse_parse_impl.js | 52 +++++++++----- internal/translatorcliproxy/bridge_test.go | 8 +++ tests/node/chat-stream.test.js | 18 +++++ 12 files changed, 248 insertions(+), 31 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index ff2006e..1c71307 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -24,5 +24,6 @@ These rules apply to all agent-made changes in this repository. ## Documentation Sync +- When business logic or user-visible behavior changes, update the corresponding documentation in the same change. - `docs/prompt-compatibility.md` is the source-of-truth document for the “API -> pure-text web-chat context” compatibility flow. - If a change affects message normalization, tool prompt injection, prompt-visible tool history, file/reference handling, history split, or completion payload assembly, update `docs/prompt-compatibility.md` in the same change. diff --git a/VERSION b/VERSION index 9575d51..fcdb2e1 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.6.1 +4.0.0 diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md index c9baaa6..e6ef59d 100644 --- a/docs/prompt-compatibility.md +++ b/docs/prompt-compatibility.md @@ -96,7 +96,8 @@ DS2API 当前的核心思路,不是把客户端传来的 `messages`、`tools` - `prompt` 才是对话上下文主载体。 - `ref_file_ids` 只承载文件引用,不承载普通文本消息。 - `tools` 不会作为“原生工具 schema”直接下发给下游,而是被改写进 `prompt`。 -- 客户端显式传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`;关闭时即使上游返回 `response/thinking_content`,兼容层也不会把它当作可见正文输出。 +- OpenAI、Responses、Claude、Gemini 等入口都会先翻译成统一的 OpenAI 兼容请求形状,再走同一套归一化与 DeepSeek payload 组装。 +- 客户端传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`。Claude surface 没有 `thinking` 字段时按 Anthropic 语义视为关闭;Gemini `generationConfig.thinkingConfig.thinkingBudget` 会翻译成同一套 thinking 开关;关闭时即使上游返回 `response/thinking_content`,兼容层也不会把它当作可见正文输出。 ## 5. prompt 是怎么拼出来的 diff --git a/internal/adapter/claude/deps_injection_test.go b/internal/adapter/claude/deps_injection_test.go index c880dc4..e30ec2f 100644 --- a/internal/adapter/claude/deps_injection_test.go +++ b/internal/adapter/claude/deps_injection_test.go @@ -27,11 +27,32 @@ func TestNormalizeClaudeRequestUsesGlobalAliasMapping(t *testing.T) { if out.Standard.ResolvedModel != "deepseek-v4-pro-search" { t.Fatalf("resolved model mismatch: got=%q", out.Standard.ResolvedModel) } - if !out.Standard.Thinking || !out.Standard.Search { + if out.Standard.Thinking || !out.Standard.Search { t.Fatalf("unexpected flags: thinking=%v search=%v", out.Standard.Thinking, out.Standard.Search) } } +func TestNormalizeClaudeRequestEnablesThinkingWhenRequested(t *testing.T) { + req := map[string]any{ + "model": "claude-opus-4-6", + "messages": []any{ + map[string]any{"role": "user", "content": "hello"}, + }, + "thinking": map[string]any{"type": "enabled", "budget_tokens": 1024}, + } + out, err := normalizeClaudeRequest(mockClaudeConfig{ + aliases: map[string]string{ + "claude-opus-4-6": "deepseek-v4-pro", + }, + }, req) + if err != nil { + t.Fatalf("normalizeClaudeRequest error: %v", err) + } + if !out.Standard.Thinking { + t.Fatalf("expected explicit Claude thinking request to enable downstream thinking") + } +} + func TestNormalizeClaudeRequestPrefersGlobalAliasMapping(t *testing.T) { req := map[string]any{ "model": "claude-sonnet-4-6", diff --git a/internal/adapter/claude/handler_messages.go b/internal/adapter/claude/handler_messages.go index d4f099e..e424503 100644 --- a/internal/adapter/claude/handler_messages.go +++ b/internal/adapter/claude/handler_messages.go @@ -52,7 +52,7 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, store C } } translatedReq := translatorcliproxy.ToOpenAI(sdktranslator.FormatClaude, translateModel, raw, stream) - translatedReq = applyExplicitThinkingOverrideToOpenAIRequest(translatedReq, req) + translatedReq = applyClaudeThinkingPolicyToOpenAIRequest(translatedReq, req) isVercelPrepare := strings.TrimSpace(r.URL.Query().Get("__stream_prepare")) == "1" isVercelRelease := strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1" @@ -124,15 +124,18 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, store C return true } -func applyExplicitThinkingOverrideToOpenAIRequest(translated []byte, original map[string]any) []byte { - enabled, ok := util.ResolveThinkingOverride(original) - if !ok { - return translated - } +func applyClaudeThinkingPolicyToOpenAIRequest(translated []byte, original map[string]any) []byte { req := map[string]any{} if err := json.Unmarshal(translated, &req); err != nil { return translated } + enabled, ok := util.ResolveThinkingOverride(original) + if !ok { + if _, translatedHasOverride := util.ResolveThinkingOverride(req); translatedHasOverride { + return translated + } + enabled = false + } typ := "disabled" if enabled { typ = "enabled" diff --git a/internal/adapter/claude/proxy_vercel_test.go b/internal/adapter/claude/proxy_vercel_test.go index 750e092..2eff38b 100644 --- a/internal/adapter/claude/proxy_vercel_test.go +++ b/internal/adapter/claude/proxy_vercel_test.go @@ -126,6 +126,46 @@ func TestClaudeProxyViaOpenAIPreservesThinkingOverride(t *testing.T) { } } +func TestClaudeProxyViaOpenAIDisablesThinkingByDefault(t *testing.T) { + openAI := &openAIProxyCaptureStub{} + h := &Handler{ + Store: claudeProxyStoreStub{aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}}, + OpenAI: openAI, + } + req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", strings.NewReader(`{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hi"}],"stream":false}`)) + rec := httptest.NewRecorder() + + h.Messages(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String()) + } + thinking, _ := openAI.seenReq["thinking"].(map[string]any) + if thinking["type"] != "disabled" { + t.Fatalf("expected Claude default to disable downstream thinking, got %#v", openAI.seenReq) + } +} + +func TestClaudeProxyViaOpenAIEnablesThinkingWhenRequested(t *testing.T) { + openAI := &openAIProxyCaptureStub{} + h := &Handler{ + Store: claudeProxyStoreStub{aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}}, + OpenAI: openAI, + } + req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", strings.NewReader(`{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":1024},"stream":false}`)) + rec := httptest.NewRecorder() + + h.Messages(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String()) + } + thinking, _ := openAI.seenReq["thinking"].(map[string]any) + if thinking["type"] != "enabled" { + t.Fatalf("expected Claude explicit thinking to enable downstream thinking, got %#v", openAI.seenReq) + } +} + func TestClaudeProxyTranslatesInlineImageToOpenAIDataURL(t *testing.T) { openAI := &openAIProxyCaptureStub{} h := &Handler{OpenAI: openAI} diff --git a/internal/adapter/claude/standard_request.go b/internal/adapter/claude/standard_request.go index 7d4ec0a..0779602 100644 --- a/internal/adapter/claude/standard_request.go +++ b/internal/adapter/claude/standard_request.go @@ -31,12 +31,11 @@ func normalizeClaudeRequest(store ConfigReader, req map[string]any) (claudeNorma dsPayload := convertClaudeToDeepSeek(payload, store) dsModel, _ := dsPayload["model"].(string) - defaultThinkingEnabled, searchEnabled, ok := config.GetModelConfig(dsModel) + _, searchEnabled, ok := config.GetModelConfig(dsModel) if !ok { - defaultThinkingEnabled = false searchEnabled = false } - thinkingEnabled := util.ResolveThinkingEnabled(req, defaultThinkingEnabled) + thinkingEnabled := util.ResolveThinkingEnabled(req, false) finalPrompt := deepseek.MessagesPrepareWithThinking(toMessageMaps(dsPayload["messages"]), thinkingEnabled) toolNames := extractClaudeToolNames(toolsRequested) if len(toolNames) == 0 && len(toolsRequested) > 0 { diff --git a/internal/adapter/gemini/handler_generate.go b/internal/adapter/gemini/handler_generate.go index ea70c0f..c6a08eb 100644 --- a/internal/adapter/gemini/handler_generate.go +++ b/internal/adapter/gemini/handler_generate.go @@ -36,6 +36,11 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, stream return true } routeModel := strings.TrimSpace(chi.URLParam(r, "model")) + var req map[string]any + if err := json.Unmarshal(raw, &req); err != nil { + writeGeminiError(w, http.StatusBadRequest, "invalid json") + return true + } translatedReq := translatorcliproxy.ToOpenAI(sdktranslator.FormatGemini, routeModel, raw, stream) if !strings.Contains(string(translatedReq), `"stream"`) { var reqMap map[string]any @@ -46,6 +51,7 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, stream } } } + translatedReq = applyGeminiThinkingPolicyToOpenAIRequest(translatedReq, req) isVercelPrepare := strings.TrimSpace(r.URL.Query().Get("__stream_prepare")) == "1" isVercelRelease := strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1" @@ -116,6 +122,72 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, stream return true } +func applyGeminiThinkingPolicyToOpenAIRequest(translated []byte, original map[string]any) []byte { + req := map[string]any{} + if err := json.Unmarshal(translated, &req); err != nil { + return translated + } + enabled, ok := resolveGeminiThinkingOverride(original) + if !ok { + return translated + } + typ := "disabled" + if enabled { + typ = "enabled" + } + req["thinking"] = map[string]any{"type": typ} + out, err := json.Marshal(req) + if err != nil { + return translated + } + return out +} + +func resolveGeminiThinkingOverride(req map[string]any) (bool, bool) { + generationConfig, ok := req["generationConfig"].(map[string]any) + if !ok { + generationConfig, ok = req["generation_config"].(map[string]any) + } + if !ok { + return false, false + } + thinkingConfig, ok := generationConfig["thinkingConfig"].(map[string]any) + if !ok { + thinkingConfig, ok = generationConfig["thinking_config"].(map[string]any) + } + if !ok { + return false, false + } + budget, ok := numericAny(thinkingConfig["thinkingBudget"]) + if !ok { + budget, ok = numericAny(thinkingConfig["thinking_budget"]) + } + if !ok { + return false, false + } + return budget > 0, true +} + +func numericAny(raw any) (float64, bool) { + switch v := raw.(type) { + case float64: + return v, true + case float32: + return float64(v), true + case int: + return float64(v), true + case int64: + return float64(v), true + case int32: + return float64(v), true + case json.Number: + f, err := v.Float64() + return f, err == nil + default: + return 0, false + } +} + func writeGeminiErrorFromOpenAI(w http.ResponseWriter, status int, raw []byte) { message := strings.TrimSpace(string(raw)) var parsed map[string]any diff --git a/internal/adapter/gemini/handler_test.go b/internal/adapter/gemini/handler_test.go index 94a1a4e..01a36a4 100644 --- a/internal/adapter/gemini/handler_test.go +++ b/internal/adapter/gemini/handler_test.go @@ -290,6 +290,46 @@ func TestGeminiProxyTranslatesInlineImageToOpenAIDataURL(t *testing.T) { } } +func TestGeminiProxyViaOpenAIDisablesThinkingBudgetZero(t *testing.T) { + openAI := &geminiOpenAISuccessStub{} + h := &Handler{Store: testGeminiConfig{}, OpenAI: openAI} + r := chi.NewRouter() + RegisterRoutes(r, h) + + body := `{"contents":[{"role":"user","parts":[{"text":"hello"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":0}}}` + req := httptest.NewRequest(http.MethodPost, "/v1beta/models/gemini-2.5-flash:generateContent", strings.NewReader(body)) + rec := httptest.NewRecorder() + r.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + thinking, _ := openAI.seenReq["thinking"].(map[string]any) + if thinking["type"] != "disabled" { + t.Fatalf("expected Gemini thinkingBudget=0 to disable OpenAI thinking, got %#v", openAI.seenReq) + } +} + +func TestGeminiProxyViaOpenAIEnablesPositiveThinkingBudget(t *testing.T) { + openAI := &geminiOpenAISuccessStub{} + h := &Handler{Store: testGeminiConfig{}, OpenAI: openAI} + r := chi.NewRouter() + RegisterRoutes(r, h) + + body := `{"contents":[{"role":"user","parts":[{"text":"hello"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":1024}}}` + req := httptest.NewRequest(http.MethodPost, "/v1beta/models/gemini-2.5-flash:generateContent", strings.NewReader(body)) + rec := httptest.NewRecorder() + r.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + thinking, _ := openAI.seenReq["thinking"].(map[string]any) + if thinking["type"] != "enabled" { + t.Fatalf("expected Gemini positive thinkingBudget to enable OpenAI thinking, got %#v", openAI.seenReq) + } +} + func TestGenerateContentOpenAIProxyErrorUsesGeminiEnvelope(t *testing.T) { h := &Handler{ Store: testGeminiConfig{}, diff --git a/internal/js/chat-stream/sse_parse_impl.js b/internal/js/chat-stream/sse_parse_impl.js index 8db00ae..7c6cfae 100644 --- a/internal/js/chat-stream/sse_parse_impl.js +++ b/internal/js/chat-stream/sse_parse_impl.js @@ -54,6 +54,27 @@ function splitThinkingParts(parts) { return { parts: out, transitioned: thinkingDone }; } +function dropThinkingParts(parts) { + if (!Array.isArray(parts) || parts.length === 0) { + return parts; + } + return parts.filter((p) => p && p.type !== 'thinking'); +} + +function finalizeThinkingParts(parts, thinkingEnabled, newType) { + const splitResult = splitThinkingParts(parts); + let finalType = newType; + let finalParts = splitResult.parts; + if (splitResult.transitioned) { + finalType = 'text'; + } + if (!thinkingEnabled) { + finalParts = dropThinkingParts(finalParts); + finalType = 'text'; + } + return { parts: finalParts, newType: finalType }; +} + function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenceMarkers = true) { if (!chunk || typeof chunk !== 'object') { return { @@ -194,7 +215,9 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc let partType = 'text'; if (pathValue === 'response/thinking_content') { - if (newType === 'text') { + if (!thinkingEnabled) { + partType = 'thinking'; + } else if (newType === 'text') { partType = 'text'; } else { partType = 'thinking'; @@ -239,20 +262,17 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc } let resolvedParts = filterLeakedContentFilterParts(parts); - const splitResult = splitThinkingParts(resolvedParts); - if (splitResult.transitioned) { - newType = 'text'; - } + const finalized = finalizeThinkingParts(resolvedParts, thinkingEnabled, newType); return { parsed: true, - parts: splitResult.parts, + parts: finalized.parts, finished: false, contentFilter: false, errorMessage: '', promptTokens, outputTokens, - newType, + newType: finalized.newType, }; } @@ -273,20 +293,17 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc parts.push(...extracted.parts); let resolvedParts = filterLeakedContentFilterParts(parts); - const splitResult = splitThinkingParts(resolvedParts); - if (splitResult.transitioned) { - newType = 'text'; - } + const finalized = finalizeThinkingParts(resolvedParts, thinkingEnabled, newType); return { parsed: true, - parts: splitResult.parts, + parts: finalized.parts, finished: false, contentFilter: false, errorMessage: '', promptTokens, outputTokens, - newType, + newType: finalized.newType, }; } @@ -316,20 +333,17 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc } let resolvedParts = filterLeakedContentFilterParts(parts); - const splitResult = splitThinkingParts(resolvedParts); - if (splitResult.transitioned) { - newType = 'text'; - } + const finalized = finalizeThinkingParts(resolvedParts, thinkingEnabled, newType); return { parsed: true, - parts: splitResult.parts, + parts: finalized.parts, finished: false, contentFilter: false, errorMessage: '', promptTokens, outputTokens, - newType, + newType: finalized.newType, }; } diff --git a/internal/translatorcliproxy/bridge_test.go b/internal/translatorcliproxy/bridge_test.go index 9dbfe30..3370360 100644 --- a/internal/translatorcliproxy/bridge_test.go +++ b/internal/translatorcliproxy/bridge_test.go @@ -16,6 +16,14 @@ func TestToOpenAIClaude(t *testing.T) { } } +func TestToOpenAIGeminiThinkingBudgetZeroDisablesReasoning(t *testing.T) { + raw := []byte(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":0}}}`) + got := string(ToOpenAI(sdktranslator.FormatGemini, "gemini-2.5-flash", raw, false)) + if !strings.Contains(got, `"reasoning_effort":"none"`) { + t.Fatalf("expected Gemini thinkingBudget=0 to translate to reasoning_effort none, got: %s", got) + } +} + func TestFromOpenAINonStreamClaude(t *testing.T) { original := []byte(`{"model":"claude-sonnet-4-5","messages":[{"role":"user","content":"hi"}],"stream":false}`) translatedReq := []byte(`{"model":"claude-sonnet-4-5","messages":[{"role":"user","content":"hi"}],"stream":false}`) diff --git a/tests/node/chat-stream.test.js b/tests/node/chat-stream.test.js index 4394a86..167c5c7 100644 --- a/tests/node/chat-stream.test.js +++ b/tests/node/chat-stream.test.js @@ -233,6 +233,24 @@ test('parseChunkForContent handles response/fragments APPEND with thinking and r ]); }); +test('parseChunkForContent drops thinking content when thinking is disabled', () => { + const thinking = parseChunkForContent( + { p: 'response/thinking_content', v: 'hidden thought' }, + false, + 'text', + ); + assert.equal(thinking.finished, false); + assert.equal(thinking.newType, 'text'); + assert.deepEqual(thinking.parts, []); + + const answer = parseChunkForContent( + { p: 'response/content', v: 'visible answer' }, + false, + thinking.newType, + ); + assert.deepEqual(answer.parts, [{ text: 'visible answer', type: 'text' }]); +}); + test('parseChunkForContent supports wrapped response.fragments object shape', () => { const chunk = { p: 'response',