mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-01 23:15:27 +08:00
归一化优化
This commit is contained in:
@@ -24,5 +24,6 @@ These rules apply to all agent-made changes in this repository.
|
||||
|
||||
## Documentation Sync
|
||||
|
||||
- When business logic or user-visible behavior changes, update the corresponding documentation in the same change.
|
||||
- `docs/prompt-compatibility.md` is the source-of-truth document for the “API -> pure-text web-chat context” compatibility flow.
|
||||
- If a change affects message normalization, tool prompt injection, prompt-visible tool history, file/reference handling, history split, or completion payload assembly, update `docs/prompt-compatibility.md` in the same change.
|
||||
|
||||
@@ -96,7 +96,8 @@ DS2API 当前的核心思路,不是把客户端传来的 `messages`、`tools`
|
||||
- `prompt` 才是对话上下文主载体。
|
||||
- `ref_file_ids` 只承载文件引用,不承载普通文本消息。
|
||||
- `tools` 不会作为“原生工具 schema”直接下发给下游,而是被改写进 `prompt`。
|
||||
- 客户端显式传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`;关闭时即使上游返回 `response/thinking_content`,兼容层也不会把它当作可见正文输出。
|
||||
- OpenAI、Responses、Claude、Gemini 等入口都会先翻译成统一的 OpenAI 兼容请求形状,再走同一套归一化与 DeepSeek payload 组装。
|
||||
- 客户端传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`。Claude surface 没有 `thinking` 字段时按 Anthropic 语义视为关闭;Gemini `generationConfig.thinkingConfig.thinkingBudget` 会翻译成同一套 thinking 开关;关闭时即使上游返回 `response/thinking_content`,兼容层也不会把它当作可见正文输出。
|
||||
|
||||
## 5. prompt 是怎么拼出来的
|
||||
|
||||
|
||||
@@ -27,11 +27,32 @@ func TestNormalizeClaudeRequestUsesGlobalAliasMapping(t *testing.T) {
|
||||
if out.Standard.ResolvedModel != "deepseek-v4-pro-search" {
|
||||
t.Fatalf("resolved model mismatch: got=%q", out.Standard.ResolvedModel)
|
||||
}
|
||||
if !out.Standard.Thinking || !out.Standard.Search {
|
||||
if out.Standard.Thinking || !out.Standard.Search {
|
||||
t.Fatalf("unexpected flags: thinking=%v search=%v", out.Standard.Thinking, out.Standard.Search)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeClaudeRequestEnablesThinkingWhenRequested(t *testing.T) {
|
||||
req := map[string]any{
|
||||
"model": "claude-opus-4-6",
|
||||
"messages": []any{
|
||||
map[string]any{"role": "user", "content": "hello"},
|
||||
},
|
||||
"thinking": map[string]any{"type": "enabled", "budget_tokens": 1024},
|
||||
}
|
||||
out, err := normalizeClaudeRequest(mockClaudeConfig{
|
||||
aliases: map[string]string{
|
||||
"claude-opus-4-6": "deepseek-v4-pro",
|
||||
},
|
||||
}, req)
|
||||
if err != nil {
|
||||
t.Fatalf("normalizeClaudeRequest error: %v", err)
|
||||
}
|
||||
if !out.Standard.Thinking {
|
||||
t.Fatalf("expected explicit Claude thinking request to enable downstream thinking")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeClaudeRequestPrefersGlobalAliasMapping(t *testing.T) {
|
||||
req := map[string]any{
|
||||
"model": "claude-sonnet-4-6",
|
||||
|
||||
@@ -52,7 +52,7 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, store C
|
||||
}
|
||||
}
|
||||
translatedReq := translatorcliproxy.ToOpenAI(sdktranslator.FormatClaude, translateModel, raw, stream)
|
||||
translatedReq = applyExplicitThinkingOverrideToOpenAIRequest(translatedReq, req)
|
||||
translatedReq = applyClaudeThinkingPolicyToOpenAIRequest(translatedReq, req)
|
||||
|
||||
isVercelPrepare := strings.TrimSpace(r.URL.Query().Get("__stream_prepare")) == "1"
|
||||
isVercelRelease := strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1"
|
||||
@@ -124,15 +124,18 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, store C
|
||||
return true
|
||||
}
|
||||
|
||||
func applyExplicitThinkingOverrideToOpenAIRequest(translated []byte, original map[string]any) []byte {
|
||||
enabled, ok := util.ResolveThinkingOverride(original)
|
||||
if !ok {
|
||||
return translated
|
||||
}
|
||||
func applyClaudeThinkingPolicyToOpenAIRequest(translated []byte, original map[string]any) []byte {
|
||||
req := map[string]any{}
|
||||
if err := json.Unmarshal(translated, &req); err != nil {
|
||||
return translated
|
||||
}
|
||||
enabled, ok := util.ResolveThinkingOverride(original)
|
||||
if !ok {
|
||||
if _, translatedHasOverride := util.ResolveThinkingOverride(req); translatedHasOverride {
|
||||
return translated
|
||||
}
|
||||
enabled = false
|
||||
}
|
||||
typ := "disabled"
|
||||
if enabled {
|
||||
typ = "enabled"
|
||||
|
||||
@@ -126,6 +126,46 @@ func TestClaudeProxyViaOpenAIPreservesThinkingOverride(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestClaudeProxyViaOpenAIDisablesThinkingByDefault(t *testing.T) {
|
||||
openAI := &openAIProxyCaptureStub{}
|
||||
h := &Handler{
|
||||
Store: claudeProxyStoreStub{aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}},
|
||||
OpenAI: openAI,
|
||||
}
|
||||
req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", strings.NewReader(`{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hi"}],"stream":false}`))
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
h.Messages(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
thinking, _ := openAI.seenReq["thinking"].(map[string]any)
|
||||
if thinking["type"] != "disabled" {
|
||||
t.Fatalf("expected Claude default to disable downstream thinking, got %#v", openAI.seenReq)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClaudeProxyViaOpenAIEnablesThinkingWhenRequested(t *testing.T) {
|
||||
openAI := &openAIProxyCaptureStub{}
|
||||
h := &Handler{
|
||||
Store: claudeProxyStoreStub{aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}},
|
||||
OpenAI: openAI,
|
||||
}
|
||||
req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", strings.NewReader(`{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":1024},"stream":false}`))
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
h.Messages(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
thinking, _ := openAI.seenReq["thinking"].(map[string]any)
|
||||
if thinking["type"] != "enabled" {
|
||||
t.Fatalf("expected Claude explicit thinking to enable downstream thinking, got %#v", openAI.seenReq)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClaudeProxyTranslatesInlineImageToOpenAIDataURL(t *testing.T) {
|
||||
openAI := &openAIProxyCaptureStub{}
|
||||
h := &Handler{OpenAI: openAI}
|
||||
|
||||
@@ -31,12 +31,11 @@ func normalizeClaudeRequest(store ConfigReader, req map[string]any) (claudeNorma
|
||||
|
||||
dsPayload := convertClaudeToDeepSeek(payload, store)
|
||||
dsModel, _ := dsPayload["model"].(string)
|
||||
defaultThinkingEnabled, searchEnabled, ok := config.GetModelConfig(dsModel)
|
||||
_, searchEnabled, ok := config.GetModelConfig(dsModel)
|
||||
if !ok {
|
||||
defaultThinkingEnabled = false
|
||||
searchEnabled = false
|
||||
}
|
||||
thinkingEnabled := util.ResolveThinkingEnabled(req, defaultThinkingEnabled)
|
||||
thinkingEnabled := util.ResolveThinkingEnabled(req, false)
|
||||
finalPrompt := deepseek.MessagesPrepareWithThinking(toMessageMaps(dsPayload["messages"]), thinkingEnabled)
|
||||
toolNames := extractClaudeToolNames(toolsRequested)
|
||||
if len(toolNames) == 0 && len(toolsRequested) > 0 {
|
||||
|
||||
@@ -36,6 +36,11 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, stream
|
||||
return true
|
||||
}
|
||||
routeModel := strings.TrimSpace(chi.URLParam(r, "model"))
|
||||
var req map[string]any
|
||||
if err := json.Unmarshal(raw, &req); err != nil {
|
||||
writeGeminiError(w, http.StatusBadRequest, "invalid json")
|
||||
return true
|
||||
}
|
||||
translatedReq := translatorcliproxy.ToOpenAI(sdktranslator.FormatGemini, routeModel, raw, stream)
|
||||
if !strings.Contains(string(translatedReq), `"stream"`) {
|
||||
var reqMap map[string]any
|
||||
@@ -46,6 +51,7 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, stream
|
||||
}
|
||||
}
|
||||
}
|
||||
translatedReq = applyGeminiThinkingPolicyToOpenAIRequest(translatedReq, req)
|
||||
|
||||
isVercelPrepare := strings.TrimSpace(r.URL.Query().Get("__stream_prepare")) == "1"
|
||||
isVercelRelease := strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1"
|
||||
@@ -116,6 +122,72 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, stream
|
||||
return true
|
||||
}
|
||||
|
||||
func applyGeminiThinkingPolicyToOpenAIRequest(translated []byte, original map[string]any) []byte {
|
||||
req := map[string]any{}
|
||||
if err := json.Unmarshal(translated, &req); err != nil {
|
||||
return translated
|
||||
}
|
||||
enabled, ok := resolveGeminiThinkingOverride(original)
|
||||
if !ok {
|
||||
return translated
|
||||
}
|
||||
typ := "disabled"
|
||||
if enabled {
|
||||
typ = "enabled"
|
||||
}
|
||||
req["thinking"] = map[string]any{"type": typ}
|
||||
out, err := json.Marshal(req)
|
||||
if err != nil {
|
||||
return translated
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func resolveGeminiThinkingOverride(req map[string]any) (bool, bool) {
|
||||
generationConfig, ok := req["generationConfig"].(map[string]any)
|
||||
if !ok {
|
||||
generationConfig, ok = req["generation_config"].(map[string]any)
|
||||
}
|
||||
if !ok {
|
||||
return false, false
|
||||
}
|
||||
thinkingConfig, ok := generationConfig["thinkingConfig"].(map[string]any)
|
||||
if !ok {
|
||||
thinkingConfig, ok = generationConfig["thinking_config"].(map[string]any)
|
||||
}
|
||||
if !ok {
|
||||
return false, false
|
||||
}
|
||||
budget, ok := numericAny(thinkingConfig["thinkingBudget"])
|
||||
if !ok {
|
||||
budget, ok = numericAny(thinkingConfig["thinking_budget"])
|
||||
}
|
||||
if !ok {
|
||||
return false, false
|
||||
}
|
||||
return budget > 0, true
|
||||
}
|
||||
|
||||
func numericAny(raw any) (float64, bool) {
|
||||
switch v := raw.(type) {
|
||||
case float64:
|
||||
return v, true
|
||||
case float32:
|
||||
return float64(v), true
|
||||
case int:
|
||||
return float64(v), true
|
||||
case int64:
|
||||
return float64(v), true
|
||||
case int32:
|
||||
return float64(v), true
|
||||
case json.Number:
|
||||
f, err := v.Float64()
|
||||
return f, err == nil
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
|
||||
func writeGeminiErrorFromOpenAI(w http.ResponseWriter, status int, raw []byte) {
|
||||
message := strings.TrimSpace(string(raw))
|
||||
var parsed map[string]any
|
||||
|
||||
@@ -290,6 +290,46 @@ func TestGeminiProxyTranslatesInlineImageToOpenAIDataURL(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGeminiProxyViaOpenAIDisablesThinkingBudgetZero(t *testing.T) {
|
||||
openAI := &geminiOpenAISuccessStub{}
|
||||
h := &Handler{Store: testGeminiConfig{}, OpenAI: openAI}
|
||||
r := chi.NewRouter()
|
||||
RegisterRoutes(r, h)
|
||||
|
||||
body := `{"contents":[{"role":"user","parts":[{"text":"hello"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":0}}}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1beta/models/gemini-2.5-flash:generateContent", strings.NewReader(body))
|
||||
rec := httptest.NewRecorder()
|
||||
r.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
thinking, _ := openAI.seenReq["thinking"].(map[string]any)
|
||||
if thinking["type"] != "disabled" {
|
||||
t.Fatalf("expected Gemini thinkingBudget=0 to disable OpenAI thinking, got %#v", openAI.seenReq)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGeminiProxyViaOpenAIEnablesPositiveThinkingBudget(t *testing.T) {
|
||||
openAI := &geminiOpenAISuccessStub{}
|
||||
h := &Handler{Store: testGeminiConfig{}, OpenAI: openAI}
|
||||
r := chi.NewRouter()
|
||||
RegisterRoutes(r, h)
|
||||
|
||||
body := `{"contents":[{"role":"user","parts":[{"text":"hello"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":1024}}}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1beta/models/gemini-2.5-flash:generateContent", strings.NewReader(body))
|
||||
rec := httptest.NewRecorder()
|
||||
r.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
thinking, _ := openAI.seenReq["thinking"].(map[string]any)
|
||||
if thinking["type"] != "enabled" {
|
||||
t.Fatalf("expected Gemini positive thinkingBudget to enable OpenAI thinking, got %#v", openAI.seenReq)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateContentOpenAIProxyErrorUsesGeminiEnvelope(t *testing.T) {
|
||||
h := &Handler{
|
||||
Store: testGeminiConfig{},
|
||||
|
||||
@@ -54,6 +54,27 @@ function splitThinkingParts(parts) {
|
||||
return { parts: out, transitioned: thinkingDone };
|
||||
}
|
||||
|
||||
function dropThinkingParts(parts) {
|
||||
if (!Array.isArray(parts) || parts.length === 0) {
|
||||
return parts;
|
||||
}
|
||||
return parts.filter((p) => p && p.type !== 'thinking');
|
||||
}
|
||||
|
||||
function finalizeThinkingParts(parts, thinkingEnabled, newType) {
|
||||
const splitResult = splitThinkingParts(parts);
|
||||
let finalType = newType;
|
||||
let finalParts = splitResult.parts;
|
||||
if (splitResult.transitioned) {
|
||||
finalType = 'text';
|
||||
}
|
||||
if (!thinkingEnabled) {
|
||||
finalParts = dropThinkingParts(finalParts);
|
||||
finalType = 'text';
|
||||
}
|
||||
return { parts: finalParts, newType: finalType };
|
||||
}
|
||||
|
||||
function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenceMarkers = true) {
|
||||
if (!chunk || typeof chunk !== 'object') {
|
||||
return {
|
||||
@@ -194,7 +215,9 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
|
||||
|
||||
let partType = 'text';
|
||||
if (pathValue === 'response/thinking_content') {
|
||||
if (newType === 'text') {
|
||||
if (!thinkingEnabled) {
|
||||
partType = 'thinking';
|
||||
} else if (newType === 'text') {
|
||||
partType = 'text';
|
||||
} else {
|
||||
partType = 'thinking';
|
||||
@@ -239,20 +262,17 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
|
||||
}
|
||||
|
||||
let resolvedParts = filterLeakedContentFilterParts(parts);
|
||||
const splitResult = splitThinkingParts(resolvedParts);
|
||||
if (splitResult.transitioned) {
|
||||
newType = 'text';
|
||||
}
|
||||
const finalized = finalizeThinkingParts(resolvedParts, thinkingEnabled, newType);
|
||||
|
||||
return {
|
||||
parsed: true,
|
||||
parts: splitResult.parts,
|
||||
parts: finalized.parts,
|
||||
finished: false,
|
||||
contentFilter: false,
|
||||
errorMessage: '',
|
||||
promptTokens,
|
||||
outputTokens,
|
||||
newType,
|
||||
newType: finalized.newType,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -273,20 +293,17 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
|
||||
parts.push(...extracted.parts);
|
||||
|
||||
let resolvedParts = filterLeakedContentFilterParts(parts);
|
||||
const splitResult = splitThinkingParts(resolvedParts);
|
||||
if (splitResult.transitioned) {
|
||||
newType = 'text';
|
||||
}
|
||||
const finalized = finalizeThinkingParts(resolvedParts, thinkingEnabled, newType);
|
||||
|
||||
return {
|
||||
parsed: true,
|
||||
parts: splitResult.parts,
|
||||
parts: finalized.parts,
|
||||
finished: false,
|
||||
contentFilter: false,
|
||||
errorMessage: '',
|
||||
promptTokens,
|
||||
outputTokens,
|
||||
newType,
|
||||
newType: finalized.newType,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -316,20 +333,17 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
|
||||
}
|
||||
|
||||
let resolvedParts = filterLeakedContentFilterParts(parts);
|
||||
const splitResult = splitThinkingParts(resolvedParts);
|
||||
if (splitResult.transitioned) {
|
||||
newType = 'text';
|
||||
}
|
||||
const finalized = finalizeThinkingParts(resolvedParts, thinkingEnabled, newType);
|
||||
|
||||
return {
|
||||
parsed: true,
|
||||
parts: splitResult.parts,
|
||||
parts: finalized.parts,
|
||||
finished: false,
|
||||
contentFilter: false,
|
||||
errorMessage: '',
|
||||
promptTokens,
|
||||
outputTokens,
|
||||
newType,
|
||||
newType: finalized.newType,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,14 @@ func TestToOpenAIClaude(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestToOpenAIGeminiThinkingBudgetZeroDisablesReasoning(t *testing.T) {
|
||||
raw := []byte(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":0}}}`)
|
||||
got := string(ToOpenAI(sdktranslator.FormatGemini, "gemini-2.5-flash", raw, false))
|
||||
if !strings.Contains(got, `"reasoning_effort":"none"`) {
|
||||
t.Fatalf("expected Gemini thinkingBudget=0 to translate to reasoning_effort none, got: %s", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFromOpenAINonStreamClaude(t *testing.T) {
|
||||
original := []byte(`{"model":"claude-sonnet-4-5","messages":[{"role":"user","content":"hi"}],"stream":false}`)
|
||||
translatedReq := []byte(`{"model":"claude-sonnet-4-5","messages":[{"role":"user","content":"hi"}],"stream":false}`)
|
||||
|
||||
@@ -233,6 +233,24 @@ test('parseChunkForContent handles response/fragments APPEND with thinking and r
|
||||
]);
|
||||
});
|
||||
|
||||
test('parseChunkForContent drops thinking content when thinking is disabled', () => {
|
||||
const thinking = parseChunkForContent(
|
||||
{ p: 'response/thinking_content', v: 'hidden thought' },
|
||||
false,
|
||||
'text',
|
||||
);
|
||||
assert.equal(thinking.finished, false);
|
||||
assert.equal(thinking.newType, 'text');
|
||||
assert.deepEqual(thinking.parts, []);
|
||||
|
||||
const answer = parseChunkForContent(
|
||||
{ p: 'response/content', v: 'visible answer' },
|
||||
false,
|
||||
thinking.newType,
|
||||
);
|
||||
assert.deepEqual(answer.parts, [{ text: 'visible answer', type: 'text' }]);
|
||||
});
|
||||
|
||||
test('parseChunkForContent supports wrapped response.fragments object shape', () => {
|
||||
const chunk = {
|
||||
p: 'response',
|
||||
|
||||
Reference in New Issue
Block a user