From 90ce595325dba7b6d4a0dc0d52271acedbb4bd96 Mon Sep 17 00:00:00 2001 From: CJACK Date: Mon, 27 Apr 2026 02:09:11 +0800 Subject: [PATCH] chore: update project files --- API.md | 35 ++-- README.MD | 17 +- docs/DEPLOY.en.md | 2 +- docs/DEPLOY.md | 2 +- docs/prompt-compatibility.md | 18 +- internal/config/config_edge_test.go | 32 +++- internal/config/model_alias_test.go | 21 +++ internal/config/models.go | 178 ++++++++++++------ .../httpapi/claude/deps_injection_test.go | 20 ++ internal/httpapi/claude/standard_request.go | 3 + internal/httpapi/gemini/convert_request.go | 3 + .../httpapi/gemini/convert_request_test.go | 28 +++ .../httpapi/openai/deps_injection_test.go | 22 +++ internal/httpapi/openai/models_route_test.go | 18 ++ internal/prompt/messages.go | 16 -- internal/prompt/messages_test.go | 15 +- internal/promptcompat/prompt_build_test.go | 12 +- internal/promptcompat/request_normalize.go | 6 + .../promptcompat/standard_request_test.go | 1 + internal/promptcompat/thinking_injection.go | 10 +- internal/util/messages_test.go | 12 ++ internal/util/util_edge_test.go | 13 ++ .../features/apiTester/ApiTesterContainer.jsx | 130 ++++++++++++- webui/src/features/apiTester/ChatPanel.jsx | 10 +- webui/src/features/apiTester/ConfigPanel.jsx | 19 +- webui/src/locales/en.json | 9 +- webui/src/locales/zh.json | 9 +- 27 files changed, 511 insertions(+), 150 deletions(-) create mode 100644 internal/httpapi/gemini/convert_request_test.go diff --git a/API.md b/API.md index d7d7963..7bf19f3 100644 --- a/API.md +++ b/API.md @@ -196,16 +196,22 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=` "object": "list", "data": [ {"id": "deepseek-v4-flash", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}, + {"id": "deepseek-v4-flash-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}, {"id": "deepseek-v4-pro", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}, + {"id": "deepseek-v4-pro-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}, {"id": "deepseek-v4-flash-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}, + {"id": "deepseek-v4-flash-search-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}, {"id": "deepseek-v4-pro-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}, + {"id": "deepseek-v4-pro-search-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}, {"id": "deepseek-v4-vision", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}, - {"id": "deepseek-v4-vision-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []} + {"id": "deepseek-v4-vision-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}, + {"id": "deepseek-v4-vision-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}, + {"id": "deepseek-v4-vision-search-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []} ] } ``` -> 说明:`/v1/models` 返回的是规范化后的 DeepSeek 原生模型 ID;常见 alias 仅用于请求入参解析,不会在该接口中单独展开返回。 +> 说明:`/v1/models` 返回的是规范化后的 DeepSeek 原生模型 ID;常见 alias 仅用于请求入参解析,不会在该接口中单独展开返回。带 `-nothinking` 后缀的模型表示无论请求里是否显式开启 thinking / reasoning,都会强制关闭思考输出。 ### 模型 alias 解析策略 @@ -213,8 +219,9 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=` 1. 先匹配 DeepSeek 原生模型。 2. 再匹配 `model_aliases` 精确映射。 -3. 未命中时按模型家族规则回退(如 `o*`、`gpt-*`、`claude-*`)。 -4. 仍未命中则返回 `invalid_request_error`。 +3. 如果请求名以 `-nothinking` 结尾,则在最终解析出的规范模型上追加对应的无思考变体。 +4. 未命中时按模型家族规则回退(如 `o*`、`gpt-*`、`claude-*`)。 +5. 仍未命中则返回 `invalid_request_error`。 当前内置默认 alias 来自 `internal/config/models.go`,`config.model_aliases` 会在运行时覆盖或补充同名映射。节选: @@ -224,6 +231,8 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=` - Gemini:`gemini-2.5-pro`、`gemini-2.5-flash`、`gemini-pro-vision` - 其他兼容族:`llama-*`、`qwen-*`、`mistral-*`、`command-*` 会按家族启发式回退 +上述 alias 若在请求名后追加 `-nothinking` 后缀,也会映射到对应的强制关闭 thinking 版本。 + 退役历史模型(如 `claude-1.*`、`claude-2.*`、`claude-instant-*`、`gpt-3.5*`)会被显式拒绝。 ### `POST /v1/chat/completions` @@ -239,7 +248,7 @@ Content-Type: application/json | 字段 | 类型 | 必填 | 说明 | | --- | --- | --- | --- | -| `model` | string | ✅ | 支持 DeepSeek 原生模型 + 常见 alias(如 `gpt-5.5`、`gpt-5.4-mini`、`gpt-5.3-codex`、`o3`、`claude-opus-4-6`、`claude-sonnet-4-6`、`gemini-2.5-pro`、`gemini-2.5-flash` 等) | +| `model` | string | ✅ | 支持 DeepSeek 原生模型 + 常见 alias(如 `gpt-5.5`、`gpt-5.4-mini`、`gpt-5.3-codex`、`o3`、`claude-opus-4-6`、`claude-sonnet-4-6`、`gemini-2.5-pro`、`gemini-2.5-flash` 等);若模型名带 `-nothinking` 后缀,则强制关闭 thinking / reasoning | | `messages` | array | ✅ | OpenAI 风格消息数组 | | `stream` | boolean | ❌ | 默认 `false` | | `tools` | array | ❌ | Function Calling 定义 | @@ -449,16 +458,19 @@ data: [DONE] "object": "list", "data": [ {"id": "claude-sonnet-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"}, + {"id": "claude-sonnet-4-6-nothinking", "object": "model", "created": 1715635200, "owned_by": "anthropic"}, {"id": "claude-haiku-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"}, - {"id": "claude-opus-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"} + {"id": "claude-haiku-4-5-nothinking", "object": "model", "created": 1715635200, "owned_by": "anthropic"}, + {"id": "claude-opus-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"}, + {"id": "claude-opus-4-6-nothinking", "object": "model", "created": 1715635200, "owned_by": "anthropic"} ], "first_id": "claude-opus-4-6", - "last_id": "claude-3-haiku-20240307", + "last_id": "claude-3-haiku-20240307-nothinking", "has_more": false } ``` -> 说明:示例仅展示部分模型;实际返回除当前主别名外,还包含 Claude 4.x snapshots,以及 3.x 历史模型 ID 与常见别名。 +> 说明:示例仅展示部分模型;实际返回除当前主别名外,还包含 Claude 4.x snapshots、3.x 历史模型 ID 与常见别名,并为这些可映射模型额外提供 `-nothinking` 变体。 ### `POST /anthropic/v1/messages` @@ -476,7 +488,7 @@ anthropic-version: 2023-06-01 | 字段 | 类型 | 必填 | 说明 | | --- | --- | --- | --- | -| `model` | string | ✅ | 例如 `claude-sonnet-4-6` / `claude-opus-4-6` / `claude-haiku-4-5`(兼容 `claude-sonnet-4-5`、`claude-3-5-haiku-latest`),并支持历史 Claude 模型 ID | +| `model` | string | ✅ | 例如 `claude-sonnet-4-6` / `claude-opus-4-6` / `claude-haiku-4-5`(兼容 `claude-sonnet-4-5`、`claude-3-5-haiku-latest`),并支持历史 Claude 模型 ID;若模型名带 `-nothinking` 后缀,则强制关闭 thinking / reasoning | | `messages` | array | ✅ | Claude 风格消息数组 | | `max_tokens` | number | ❌ | 缺省自动补 `8192`;当前实现不会硬性截断上游输出 | | `stream` | boolean | ❌ | 默认 `false` | @@ -534,7 +546,8 @@ data: {"type":"message_stop"} **说明**: -- 名称中包含 `opus` / `reasoner` / `slow` 的模型会输出 `thinking_delta` +- 默认模型会按各 surface 的既有规则输出 thinking / reasoning 相关增量 +- 带 `-nothinking` 后缀的模型会强制关闭 thinking,即使请求显式传了 `thinking` / `reasoning` / `reasoning_effort` 也不会输出 `thinking_delta` - 不会输出 `signature_delta`(上游 DeepSeek 未提供可验证签名) - `tools` 场景优先避免泄露原始工具 JSON,不强制发送 `input_json_delta` @@ -575,7 +588,7 @@ data: {"type":"message_stop"} ### `POST /v1beta/models/{model}:generateContent` -请求体兼容 Gemini `contents` / `tools` 字段,模型名可用 alias 自动映射到 DeepSeek 模型。 +请求体兼容 Gemini `contents` / `tools` 字段,模型名可用 alias 自动映射到 DeepSeek 模型;若路径中的模型名带 `-nothinking` 后缀,则最终会映射到对应的无思考模型。 响应为 Gemini 兼容结构,核心字段包括: diff --git a/README.MD b/README.MD index a4fe4e0..fd975bf 100644 --- a/README.MD +++ b/README.MD @@ -125,23 +125,32 @@ flowchart LR | 模型类型 | 模型 ID | thinking | search | | --- | --- | --- | --- | | default | `deepseek-v4-flash` | 默认开启,可由请求参数控制 | ❌ | +| default | `deepseek-v4-flash-nothinking` | 永久关闭,不受请求参数影响 | ❌ | | expert | `deepseek-v4-pro` | 默认开启,可由请求参数控制 | ❌ | +| expert | `deepseek-v4-pro-nothinking` | 永久关闭,不受请求参数影响 | ❌ | | default | `deepseek-v4-flash-search` | 默认开启,可由请求参数控制 | ✅ | +| default | `deepseek-v4-flash-search-nothinking` | 永久关闭,不受请求参数影响 | ✅ | | expert | `deepseek-v4-pro-search` | 默认开启,可由请求参数控制 | ✅ | +| expert | `deepseek-v4-pro-search-nothinking` | 永久关闭,不受请求参数影响 | ✅ | | vision | `deepseek-v4-vision` | 默认开启,可由请求参数控制 | ❌ | +| vision | `deepseek-v4-vision-nothinking` | 永久关闭,不受请求参数影响 | ❌ | | vision | `deepseek-v4-vision-search` | 默认开启,可由请求参数控制 | ✅ | +| vision | `deepseek-v4-vision-search-nothinking` | 永久关闭,不受请求参数影响 | ✅ | -除原生模型外,也支持常见 alias 输入(如 `gpt-4.1`、`gpt-5`、`gpt-5-codex`、`o3`、`claude-*`、`gemini-*` 等),但 `/v1/models` 返回的是规范化后的 DeepSeek 原生模型 ID。完整 alias 行为以 [API.md](API.md#模型-alias-解析策略) 和 `config.example.json` 为准。 +除原生模型外,也支持常见 alias 输入(如 `gpt-4.1`、`gpt-5`、`gpt-5-codex`、`o3`、`claude-*`、`gemini-*` 等),但 `/v1/models` 返回的是规范化后的 DeepSeek 原生模型 ID。若 alias 名本身追加 `-nothinking` 后缀,也会映射到对应的强制关思考模型。完整 alias 行为以 [API.md](API.md#模型-alias-解析策略) 和 `config.example.json` 为准。 ### Claude 接口(`GET /anthropic/v1/models`) | 当前常用模型 | 默认映射 | | --- | --- | | `claude-sonnet-4-6` | `deepseek-v4-flash` | +| `claude-sonnet-4-6-nothinking` | `deepseek-v4-flash-nothinking` | | `claude-haiku-4-5`(兼容 `claude-3-5-haiku-latest`) | `deepseek-v4-flash` | +| `claude-haiku-4-5-nothinking` | `deepseek-v4-flash-nothinking` | | `claude-opus-4-6` | `deepseek-v4-pro` | +| `claude-opus-4-6-nothinking` | `deepseek-v4-pro-nothinking` | -可通过配置中的 `model_aliases` 覆盖映射关系。 +可通过配置中的 `model_aliases` 覆盖映射关系;若请求模型名带 `-nothinking`,会在最终映射结果上强制追加无思考语义。 `/anthropic/v1/models` 除上述主别名外,还会返回 Claude 4.x snapshots、3.x 历史模型 ID 与常见 alias,便于旧客户端直接兼容。 #### Claude Code 接入避坑(实测) @@ -153,7 +162,7 @@ flowchart LR ### Gemini 接口 -Gemini 适配器将模型名通过 `model_aliases` 或内置规则映射到 DeepSeek 原生模型,支持 `generateContent` 和 `streamGenerateContent` 两种调用方式,并完整支持 Tool Calling(`functionDeclarations` → `functionCall` 输出)。 +Gemini 适配器将模型名通过 `model_aliases` 或内置规则映射到 DeepSeek 原生模型,支持 `generateContent` 和 `streamGenerateContent` 两种调用方式,并完整支持 Tool Calling(`functionDeclarations` → `functionCall` 输出)。若 Gemini 模型名带 `-nothinking` 后缀,例如 `gemini-2.5-pro-nothinking`,会映射到对应的强制关闭思考模型。 ## 快速开始 @@ -284,7 +293,7 @@ go run ./cmd/ds2api - `history_split`:轮次拆分策略;默认关闭,开启后默认从第二轮开始将旧历史上传为 `HISTORY.txt`。 - `current_input_file`:独立拆分策略;默认开启且阈值为 `0`,触发时将完整上下文合并上传为隐藏上下文文件,并跳过 `HISTORY.txt`。 - `history_split` 与 `current_input_file` 互斥,最多启用一个;两者都关闭时请求直接透传。 -- `thinking_injection`:默认开启;在最新 user 消息末尾追加思考格式增强提示,提高工具调用前的思考结构稳定性;`prompt` 留空时使用内置默认提示词。 +- `thinking_injection`:默认开启;在最新 user 消息末尾追加思考增强提示词,提高高强度推理与工具调用前的思考稳定性;`prompt` 留空时使用内置默认提示词。 环境变量完整列表见 [部署指南](docs/DEPLOY.md),接口鉴权规则见 [API.md](API.md#鉴权规则)。 diff --git a/docs/DEPLOY.en.md b/docs/DEPLOY.en.md index 3e06322..f81de01 100644 --- a/docs/DEPLOY.en.md +++ b/docs/DEPLOY.en.md @@ -538,7 +538,7 @@ curl -s http://127.0.0.1:5001/readyz # 3. Model list curl -s http://127.0.0.1:5001/v1/models -# Expected: {"object":"list","data":[...]} +# Expected: {"object":"list","data":[...]} (including `*-nothinking` variants) # 4. Admin panel (if WebUI is built) curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:5001/admin diff --git a/docs/DEPLOY.md b/docs/DEPLOY.md index 29612f2..0f91fdf 100644 --- a/docs/DEPLOY.md +++ b/docs/DEPLOY.md @@ -548,7 +548,7 @@ curl -s http://127.0.0.1:5001/readyz # 3. 模型列表 curl -s http://127.0.0.1:5001/v1/models -# 预期: {"object":"list","data":[...]} +# 预期: {"object":"list","data":[...]}(包含 `*-nothinking` 变体) # 4. 管理台页面(如果已构建 WebUI) curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:5001/admin diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md index ee227df..19b6dde 100644 --- a/docs/prompt-compatibility.md +++ b/docs/prompt-compatibility.md @@ -99,12 +99,12 @@ DS2API 当前的核心思路,不是把客户端传来的 `messages`、`tools` - `ref_file_ids` 只承载文件引用,不承载普通文本消息。 - `tools` 不会作为“原生工具 schema”直接下发给下游,而是被改写进 `prompt`。 - OpenAI Chat / Responses 原生走统一 OpenAI 标准化与 DeepSeek payload 组装;Claude / Gemini 会尽量复用 OpenAI prompt/tool 语义,其中 Gemini 直接复用 `promptcompat.BuildOpenAIPromptForAdapter`,Claude 消息接口在可代理场景会转换为 OpenAI chat 形态再执行。 -- 客户端传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`。Gemini `generationConfig.thinkingConfig.thinkingBudget` 会翻译成同一套 thinking 开关;关闭时即使上游返回 `response/thinking_content`,兼容层也不会把它当作可见正文输出。Claude surface 在流式请求且未显式声明 `thinking` 时,仍按 Anthropic 语义默认关闭;但在非流式代理场景,兼容层会内部开启一次下游 thinking,用于捕获“正文为空、工具调用落在 thinking 里”的情况,随后在回包前剥离用户不可见的 thinking block。 +- 客户端传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`。Gemini `generationConfig.thinkingConfig.thinkingBudget` 会翻译成同一套 thinking 开关;关闭时即使上游返回 `response/thinking_content`,兼容层也不会把它当作可见正文输出。若最终解析出的模型名带 `-nothinking` 后缀,则会无条件强制关闭 thinking,优先级高于请求体中的 `thinking` / `reasoning` / `reasoning_effort`。Claude surface 在流式请求且未显式声明 `thinking` 时,仍按 Anthropic 语义默认关闭;但在非流式代理场景,兼容层会内部开启一次下游 thinking,用于捕获“正文为空、工具调用落在 thinking 里”的情况,随后在回包前剥离用户不可见的 thinking block。 - 对 OpenAI Chat / Responses 的非流式收尾,如果最终可见正文为空,兼容层会优先尝试把思维链中的独立 DSML / XML 工具块当作真实工具调用解析出来。流式链路也会在收尾阶段做同样的 fallback 检测,但不会因为思维链内容去中途拦截或改写流式输出;thinking / reasoning 增量仍按原样先发,只有在结束收尾时才可能补发最终工具调用结果。补发结果会作为本轮 assistant 的结构化 `tool_calls` / `function_call` 输出返回,而不是塞进 `content` 文本;如果客户端没有开启 thinking / reasoning,思维链只用于检测,不会作为 `reasoning_content` 或可见正文暴露。只有正文为空且思维链里也没有可执行工具调用时,才继续按空回复错误处理。 ## 5. prompt 是怎么拼出来的 -OpenAI Chat / Responses 在标准化后、history split / current input file 之前,会默认执行 `thinking_injection` 增强。它参考 DeepSeek V4 “把控制指令放在 user 消息末尾更稳定”的用法,在最新 user message 后追加思考格式提示词,默认内容以 `【思维链格式要求】...` 开头,要求模型在 `` 内按分析、构思、工具调用、XML 工具格式回顾这几个阶段组织思考。该开关默认启用,可通过 `thinking_injection.enabled=false` 关闭;也可以通过 `thinking_injection.prompt` 自定义提示词,留空时使用内置默认提示词。 +OpenAI Chat / Responses 在标准化后、history split / current input file 之前,会默认执行 `thinking_injection` 增强。它参考 DeepSeek V4 “把控制指令放在 user 消息末尾更稳定”的用法,在最新 user message 后追加思考增强提示词。当前内置默认提示词以 `Reasoning Effort: Absolute maximum with no shortcuts permitted.` 开头,并继续要求模型充分分解问题、覆盖潜在路径与边界条件、把完整推演过程显式写出。该开关默认启用,可通过 `thinking_injection.enabled=false` 关闭;也可以通过 `thinking_injection.prompt` 自定义提示词,留空时使用内置默认提示词。 这段增强属于 prompt 可见上下文: @@ -128,17 +128,7 @@ OpenAI Chat / Responses 在标准化后、history split / current input file 之 实现位置: [internal/prompt/messages.go](../internal/prompt/messages.go) -### 5.2 thinking continuity 说明 - -如果启用了 thinking,会在最前面额外插入一个 system block,提醒模型: - -- 继续既有会话,不要重开 -- earlier messages 是 binding context -- 不要把最终回答只留在 reasoning 里 - -这部分不是客户端原始消息,而是兼容层主动补进去的连续性契约。 - -### 5.3 相邻同角色消息会合并 +### 5.2 相邻同角色消息会合并 在最终 `MessagesPrepareWithThinking` 中,相邻同 role 的消息会被合并成一个块,中间插入空行。 @@ -352,7 +342,7 @@ history split 触发后行为: ```json { - "prompt": "<|begin▁of▁sentence|><|System|>continuity instructions...\\n\\n原 system / developer\\n\\nYou have access to these tools: ...<|end▁of▁instructions|><|User|>最新问题<|Assistant|>", + "prompt": "<|begin▁of▁sentence|><|System|>原 system / developer\n\nYou have access to these tools: ...<|end▁of▁instructions|><|User|>最新问题<|Assistant|>", "ref_file_ids": [ "file-history-ignore", "file-systemprompt", diff --git a/internal/config/config_edge_test.go b/internal/config/config_edge_test.go index 7741777..678ef29 100644 --- a/internal/config/config_edge_test.go +++ b/internal/config/config_edge_test.go @@ -19,6 +19,16 @@ func TestGetModelConfigDeepSeekChat(t *testing.T) { } } +func TestGetModelConfigDeepSeekChatNoThinking(t *testing.T) { + thinking, search, ok := GetModelConfig("deepseek-v4-flash-nothinking") + if !ok { + t.Fatal("expected ok for deepseek-v4-flash-nothinking") + } + if thinking || search { + t.Fatalf("expected thinking=false search=false for deepseek-v4-flash-nothinking, got thinking=%v search=%v", thinking, search) + } +} + func TestGetModelConfigDeepSeekReasoner(t *testing.T) { thinking, search, ok := GetModelConfig("deepseek-v4-pro") if !ok { @@ -84,6 +94,10 @@ func TestGetModelTypeDefaultExpertAndVision(t *testing.T) { if !ok || defaultType != "default" { t.Fatalf("expected default model_type, got ok=%v model_type=%q", ok, defaultType) } + defaultNoThinkingType, ok := GetModelType("deepseek-v4-flash-nothinking") + if !ok || defaultNoThinkingType != "default" { + t.Fatalf("expected default model_type for nothinking, got ok=%v model_type=%q", ok, defaultNoThinkingType) + } expertType, ok := GetModelType("deepseek-v4-pro") if !ok || expertType != "expert" { t.Fatalf("expected expert model_type, got ok=%v model_type=%q", ok, expertType) @@ -734,12 +748,18 @@ func TestOpenAIModelsResponse(t *testing.T) { t.Fatal("expected non-empty models list") } expected := map[string]bool{ - "deepseek-v4-flash": false, - "deepseek-v4-pro": false, - "deepseek-v4-flash-search": false, - "deepseek-v4-pro-search": false, - "deepseek-v4-vision": false, - "deepseek-v4-vision-search": false, + "deepseek-v4-flash": false, + "deepseek-v4-flash-nothinking": false, + "deepseek-v4-pro": false, + "deepseek-v4-pro-nothinking": false, + "deepseek-v4-flash-search": false, + "deepseek-v4-flash-search-nothinking": false, + "deepseek-v4-pro-search": false, + "deepseek-v4-pro-search-nothinking": false, + "deepseek-v4-vision": false, + "deepseek-v4-vision-nothinking": false, + "deepseek-v4-vision-search": false, + "deepseek-v4-vision-search-nothinking": false, } for _, model := range data { if _, ok := expected[model.ID]; ok { diff --git a/internal/config/model_alias_test.go b/internal/config/model_alias_test.go index f537b21..64cbda8 100644 --- a/internal/config/model_alias_test.go +++ b/internal/config/model_alias_test.go @@ -13,6 +13,13 @@ func TestResolveModelDirectDeepSeek(t *testing.T) { } } +func TestResolveModelDirectDeepSeekNoThinking(t *testing.T) { + got, ok := ResolveModel(nil, "deepseek-v4-flash-nothinking") + if !ok || got != "deepseek-v4-flash-nothinking" { + t.Fatalf("expected deepseek-v4-flash-nothinking, got ok=%v model=%q", ok, got) + } +} + func TestResolveModelAlias(t *testing.T) { got, ok := ResolveModel(nil, "gpt-4.1") if !ok || got != "deepseek-v4-flash" { @@ -34,6 +41,13 @@ func TestResolveLatestClaudeAlias(t *testing.T) { } } +func TestResolveLatestClaudeAliasNoThinking(t *testing.T) { + got, ok := ResolveModel(nil, "claude-sonnet-4-6-nothinking") + if !ok || got != "deepseek-v4-flash-nothinking" { + t.Fatalf("expected alias claude-sonnet-4-6-nothinking -> deepseek-v4-flash-nothinking, got ok=%v model=%q", ok, got) + } +} + func TestResolveExpandedHistoricalAliases(t *testing.T) { cases := []struct { name string @@ -68,6 +82,13 @@ func TestResolveModelHeuristicReasoner(t *testing.T) { } } +func TestResolveModelHeuristicReasonerNoThinking(t *testing.T) { + got, ok := ResolveModel(nil, "o3-super-nothinking") + if !ok || got != "deepseek-v4-pro-nothinking" { + t.Fatalf("expected heuristic reasoner nothinking, got ok=%v model=%q", ok, got) + } +} + func TestResolveModelUnknown(t *testing.T) { _, ok := ResolveModel(nil, "totally-custom-model") if ok { diff --git a/internal/config/models.go b/internal/config/models.go index 7b28ec3..1349ef1 100644 --- a/internal/config/models.go +++ b/internal/config/models.go @@ -14,7 +14,9 @@ type ModelAliasReader interface { ModelAliases() map[string]string } -var DeepSeekModels = []ModelInfo{ +const noThinkingModelSuffix = "-nothinking" + +var deepSeekBaseModels = []ModelInfo{ {ID: "deepseek-v4-flash", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}}, {ID: "deepseek-v4-pro", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}}, {ID: "deepseek-v4-flash-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}}, @@ -23,7 +25,9 @@ var DeepSeekModels = []ModelInfo{ {ID: "deepseek-v4-vision-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}}, } -var ClaudeModels = []ModelInfo{ +var DeepSeekModels = appendNoThinkingVariants(deepSeekBaseModels) + +var claudeBaseModels = []ModelInfo{ // Current aliases {ID: "claude-opus-4-6", Object: "model", Created: 1715635200, OwnedBy: "anthropic"}, {ID: "claude-sonnet-4-6", Object: "model", Created: 1715635200, OwnedBy: "anthropic"}, @@ -53,19 +57,26 @@ var ClaudeModels = []ModelInfo{ {ID: "claude-3-haiku-20240307", Object: "model", Created: 1715635200, OwnedBy: "anthropic"}, } +var ClaudeModels = appendNoThinkingVariants(claudeBaseModels) + func GetModelConfig(model string) (thinking bool, search bool, ok bool) { - switch lower(model) { + baseModel, noThinking := splitNoThinkingModel(model) + if baseModel == "" { + return false, false, false + } + switch baseModel { case "deepseek-v4-flash", "deepseek-v4-pro", "deepseek-v4-vision": - return true, false, true + return !noThinking, false, true case "deepseek-v4-flash-search", "deepseek-v4-pro-search", "deepseek-v4-vision-search": - return true, true, true + return !noThinking, true, true default: return false, false, false } } func GetModelType(model string) (modelType string, ok bool) { - switch lower(model) { + baseModel, _ := splitNoThinkingModel(model) + switch baseModel { case "deepseek-v4-flash", "deepseek-v4-flash-search": return "default", true case "deepseek-v4-pro", "deepseek-v4-pro-search": @@ -82,6 +93,11 @@ func IsSupportedDeepSeekModel(model string) bool { return ok } +func IsNoThinkingModel(model string) bool { + _, noThinking := splitNoThinkingModel(model) + return noThinking +} + func DefaultModelAliases() map[string]string { return map[string]string{ // OpenAI GPT / ChatGPT families @@ -191,62 +207,19 @@ func ResolveModel(store ModelAliasReader, requested string) (string, bool) { if model == "" { return "", false } - if isRetiredHistoricalModel(model) { - return "", false - } + aliases := loadModelAliases(store) if IsSupportedDeepSeekModel(model) { return model, true } - aliases := DefaultModelAliases() - if store != nil { - for k, v := range store.ModelAliases() { - aliases[lower(strings.TrimSpace(k))] = lower(strings.TrimSpace(v)) - } - } if mapped, ok := aliases[model]; ok && IsSupportedDeepSeekModel(mapped) { return mapped, true } - if strings.HasPrefix(model, "deepseek-") { + baseModel, noThinking := splitNoThinkingModel(model) + resolvedModel, ok := resolveCanonicalModel(aliases, baseModel) + if !ok { return "", false } - - knownFamily := false - for _, prefix := range []string{ - "gpt-", "o1", "o3", "claude-", "gemini-", "llama-", "qwen-", "mistral-", "command-", - } { - if strings.HasPrefix(model, prefix) { - knownFamily = true - break - } - } - if !knownFamily { - return "", false - } - - useVision := strings.Contains(model, "vision") - useReasoner := strings.Contains(model, "reason") || - strings.Contains(model, "reasoner") || - strings.HasPrefix(model, "o1") || - strings.HasPrefix(model, "o3") || - strings.Contains(model, "opus") || - strings.Contains(model, "slow") || - strings.Contains(model, "r1") - useSearch := strings.Contains(model, "search") - - switch { - case useVision && useSearch: - return "deepseek-v4-vision-search", true - case useVision: - return "deepseek-v4-vision", true - case useReasoner && useSearch: - return "deepseek-v4-pro-search", true - case useReasoner: - return "deepseek-v4-pro", true - case useSearch: - return "deepseek-v4-flash-search", true - default: - return "deepseek-v4-flash", true - } + return withNoThinkingVariant(resolvedModel, noThinking), true } func isRetiredHistoricalModel(model string) bool { @@ -303,3 +276,100 @@ func ClaudeModelsResponse() map[string]any { resp["has_more"] = false return resp } + +func appendNoThinkingVariants(models []ModelInfo) []ModelInfo { + out := make([]ModelInfo, 0, len(models)*2) + for _, model := range models { + out = append(out, model) + variant := model + variant.ID = withNoThinkingVariant(model.ID, true) + out = append(out, variant) + } + return out +} + +func splitNoThinkingModel(model string) (string, bool) { + model = lower(strings.TrimSpace(model)) + if strings.HasSuffix(model, noThinkingModelSuffix) { + return strings.TrimSuffix(model, noThinkingModelSuffix), true + } + return model, false +} + +func withNoThinkingVariant(model string, enabled bool) string { + baseModel, _ := splitNoThinkingModel(model) + if !enabled { + return baseModel + } + if baseModel == "" { + return "" + } + return baseModel + noThinkingModelSuffix +} + +func loadModelAliases(store ModelAliasReader) map[string]string { + aliases := DefaultModelAliases() + if store != nil { + for k, v := range store.ModelAliases() { + aliases[lower(strings.TrimSpace(k))] = lower(strings.TrimSpace(v)) + } + } + return aliases +} + +func resolveCanonicalModel(aliases map[string]string, model string) (string, bool) { + model = lower(strings.TrimSpace(model)) + if model == "" { + return "", false + } + if isRetiredHistoricalModel(model) { + return "", false + } + if IsSupportedDeepSeekModel(model) { + return model, true + } + if mapped, ok := aliases[model]; ok && IsSupportedDeepSeekModel(mapped) { + return mapped, true + } + if strings.HasPrefix(model, "deepseek-") { + return "", false + } + + knownFamily := false + for _, prefix := range []string{ + "gpt-", "o1", "o3", "claude-", "gemini-", "llama-", "qwen-", "mistral-", "command-", + } { + if strings.HasPrefix(model, prefix) { + knownFamily = true + break + } + } + if !knownFamily { + return "", false + } + + useVision := strings.Contains(model, "vision") + useReasoner := strings.Contains(model, "reason") || + strings.Contains(model, "reasoner") || + strings.HasPrefix(model, "o1") || + strings.HasPrefix(model, "o3") || + strings.Contains(model, "opus") || + strings.Contains(model, "slow") || + strings.Contains(model, "r1") + useSearch := strings.Contains(model, "search") + + switch { + case useVision && useSearch: + return "deepseek-v4-vision-search", true + case useVision: + return "deepseek-v4-vision", true + case useReasoner && useSearch: + return "deepseek-v4-pro-search", true + case useReasoner: + return "deepseek-v4-pro", true + case useSearch: + return "deepseek-v4-flash-search", true + default: + return "deepseek-v4-flash", true + } +} diff --git a/internal/httpapi/claude/deps_injection_test.go b/internal/httpapi/claude/deps_injection_test.go index e30ec2f..41afd68 100644 --- a/internal/httpapi/claude/deps_injection_test.go +++ b/internal/httpapi/claude/deps_injection_test.go @@ -53,6 +53,26 @@ func TestNormalizeClaudeRequestEnablesThinkingWhenRequested(t *testing.T) { } } +func TestNormalizeClaudeRequestNoThinkingAliasForcesThinkingOff(t *testing.T) { + req := map[string]any{ + "model": "claude-opus-4-6-nothinking", + "messages": []any{ + map[string]any{"role": "user", "content": "hello"}, + }, + "thinking": map[string]any{"type": "enabled", "budget_tokens": 1024}, + } + out, err := normalizeClaudeRequest(mockClaudeConfig{}, req) + if err != nil { + t.Fatalf("normalizeClaudeRequest error: %v", err) + } + if out.Standard.ResolvedModel != "deepseek-v4-pro-nothinking" { + t.Fatalf("resolved model mismatch: got=%q", out.Standard.ResolvedModel) + } + if out.Standard.Thinking { + t.Fatalf("expected nothinking alias to force downstream thinking off") + } +} + func TestNormalizeClaudeRequestPrefersGlobalAliasMapping(t *testing.T) { req := map[string]any{ "model": "claude-sonnet-4-6", diff --git a/internal/httpapi/claude/standard_request.go b/internal/httpapi/claude/standard_request.go index 26c6fda..3f3e238 100644 --- a/internal/httpapi/claude/standard_request.go +++ b/internal/httpapi/claude/standard_request.go @@ -37,6 +37,9 @@ func normalizeClaudeRequest(store ConfigReader, req map[string]any) (claudeNorma searchEnabled = false } thinkingEnabled := util.ResolveThinkingEnabled(req, false) + if config.IsNoThinkingModel(dsModel) { + thinkingEnabled = false + } finalPrompt := prompt.MessagesPrepareWithThinking(toMessageMaps(dsPayload["messages"]), thinkingEnabled) toolNames := extractClaudeToolNames(toolsRequested) if len(toolNames) == 0 && len(toolsRequested) > 0 { diff --git a/internal/httpapi/gemini/convert_request.go b/internal/httpapi/gemini/convert_request.go index 1d32105..ca1497a 100644 --- a/internal/httpapi/gemini/convert_request.go +++ b/internal/httpapi/gemini/convert_request.go @@ -22,6 +22,9 @@ func normalizeGeminiRequest(store ConfigReader, routeModel string, req map[strin } defaultThinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel) thinkingEnabled := util.ResolveThinkingEnabled(req, defaultThinkingEnabled) + if config.IsNoThinkingModel(resolvedModel) { + thinkingEnabled = false + } messagesRaw := geminiMessagesFromRequest(req) if len(messagesRaw) == 0 { diff --git a/internal/httpapi/gemini/convert_request_test.go b/internal/httpapi/gemini/convert_request_test.go new file mode 100644 index 0000000..1a12fbf --- /dev/null +++ b/internal/httpapi/gemini/convert_request_test.go @@ -0,0 +1,28 @@ +package gemini + +import "testing" + +func TestNormalizeGeminiRequestNoThinkingModelForcesThinkingOff(t *testing.T) { + req := map[string]any{ + "contents": []any{ + map[string]any{ + "role": "user", + "parts": []any{map[string]any{"text": "hello"}}, + }, + }, + "reasoning_effort": "high", + } + out, err := normalizeGeminiRequest(testGeminiConfig{}, "gemini-2.5-pro-nothinking", req, false) + if err != nil { + t.Fatalf("normalizeGeminiRequest error: %v", err) + } + if out.ResolvedModel != "deepseek-v4-pro-nothinking" { + t.Fatalf("resolved model mismatch: got=%q", out.ResolvedModel) + } + if out.Thinking { + t.Fatalf("expected nothinking model to force thinking off") + } + if out.Search { + t.Fatalf("expected search=false, got=%v", out.Search) + } +} diff --git a/internal/httpapi/openai/deps_injection_test.go b/internal/httpapi/openai/deps_injection_test.go index 17ee0a9..e4c3428 100644 --- a/internal/httpapi/openai/deps_injection_test.go +++ b/internal/httpapi/openai/deps_injection_test.go @@ -80,6 +80,28 @@ func TestNormalizeOpenAIChatRequestWithConfigInterface(t *testing.T) { } } +func TestNormalizeOpenAIChatRequestDisablesThinkingForNoThinkingModel(t *testing.T) { + cfg := mockOpenAIConfig{wideInput: true} + req := map[string]any{ + "model": "deepseek-v4-pro-nothinking", + "messages": []any{map[string]any{"role": "user", "content": "hello"}}, + "reasoning_effort": "high", + } + out, err := promptcompat.NormalizeOpenAIChatRequest(cfg, req, "") + if err != nil { + t.Fatalf("promptcompat.NormalizeOpenAIChatRequest error: %v", err) + } + if out.ResolvedModel != "deepseek-v4-pro-nothinking" { + t.Fatalf("resolved model mismatch: got=%q", out.ResolvedModel) + } + if out.Thinking { + t.Fatalf("expected nothinking model to force thinking off") + } + if out.Search { + t.Fatalf("expected search=false for deepseek-v4-pro-nothinking, got=%v", out.Search) + } +} + func TestNormalizeOpenAIResponsesRequestWideInputPolicyFromInterface(t *testing.T) { req := map[string]any{ "model": "deepseek-v4-flash", diff --git a/internal/httpapi/openai/models_route_test.go b/internal/httpapi/openai/models_route_test.go index 9e318f9..60b014d 100644 --- a/internal/httpapi/openai/models_route_test.go +++ b/internal/httpapi/openai/models_route_test.go @@ -22,6 +22,15 @@ func TestGetModelRouteDirectAndAlias(t *testing.T) { } }) + t.Run("direct_nothinking", func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/v1/models/deepseek-v4-flash-nothinking", nil) + rec := httptest.NewRecorder() + r.ServeHTTP(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + }) + t.Run("direct_expert", func(t *testing.T) { req := httptest.NewRequest(http.MethodGet, "/v1/models/deepseek-v4-pro", nil) rec := httptest.NewRecorder() @@ -48,6 +57,15 @@ func TestGetModelRouteDirectAndAlias(t *testing.T) { t.Fatalf("expected 200 for alias, got %d body=%s", rec.Code, rec.Body.String()) } }) + + t.Run("alias_nothinking", func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/v1/models/claude-sonnet-4-6-nothinking", nil) + rec := httptest.NewRecorder() + r.ServeHTTP(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("expected 200 for nothinking alias, got %d body=%s", rec.Code, rec.Body.String()) + } + }) } func TestGetModelRouteNotFound(t *testing.T) { diff --git a/internal/prompt/messages.go b/internal/prompt/messages.go index 993eeef..d882f34 100644 --- a/internal/prompt/messages.go +++ b/internal/prompt/messages.go @@ -30,11 +30,6 @@ func MessagesPrepareWithThinking(messages []map[string]any, thinkingEnabled bool Text string } processed := make([]block, 0, len(messages)) - if thinkingEnabled { - if instruction := buildConversationContinuityInstructions(thinkingEnabled); strings.TrimSpace(instruction) != "" { - processed = append(processed, block{Role: "system", Text: instruction}) - } - } for _, m := range messages { role, _ := m["role"].(string) text := NormalizeContent(m["content"]) @@ -93,17 +88,6 @@ func formatRoleBlock(marker, text, endMarker string) string { return out } -func buildConversationContinuityInstructions(thinkingEnabled bool) string { - lines := []string{ - "Continue the conversation from the full prior context and the latest tool results.", - "Treat earlier messages as binding context; answer the user's current request as a continuation, not a restart.", - } - if thinkingEnabled { - lines = append(lines, "Keep reasoning internal. Do not leave the final user-facing answer only in reasoning; always provide the answer in visible assistant content.") - } - return strings.Join(lines, "\n") -} - func NormalizeContent(v any) string { if v == nil { return "" diff --git a/internal/prompt/messages_test.go b/internal/prompt/messages_test.go index 8be34b2..a992ae6 100644 --- a/internal/prompt/messages_test.go +++ b/internal/prompt/messages_test.go @@ -58,23 +58,14 @@ func TestNormalizeContentArrayFallsBackToContentWhenTextEmpty(t *testing.T) { } } -func TestMessagesPrepareWithThinkingAddsContinuityContract(t *testing.T) { +func TestMessagesPrepareWithThinkingPreservesPromptShape(t *testing.T) { messages := []map[string]any{{"role": "user", "content": "Question"}} gotThinking := MessagesPrepareWithThinking(messages, true) gotPlain := MessagesPrepareWithThinking(messages, false) - if gotThinking == gotPlain { - t.Fatalf("expected thinking-enabled prompt to include extra continuity instructions") + if gotThinking != gotPlain { + t.Fatalf("expected thinking flag not to add extra continuity instructions, got thinking=%q plain=%q", gotThinking, gotPlain) } if !strings.HasSuffix(gotThinking, "<|Assistant|>") { t.Fatalf("expected assistant suffix, got %q", gotThinking) } - if !strings.Contains(gotThinking, "Continue the conversation from the full prior context") { - t.Fatalf("expected continuity instruction in thinking prompt, got %q", gotThinking) - } - if !strings.Contains(gotThinking, "final user-facing answer only in reasoning") { - t.Fatalf("expected visible-answer instruction in thinking prompt, got %q", gotThinking) - } - if strings.Contains(gotPlain, "Continue the conversation from the full prior context") { - t.Fatalf("did not expect thinking-only instruction in plain prompt, got %q", gotPlain) - } } diff --git a/internal/promptcompat/prompt_build_test.go b/internal/promptcompat/prompt_build_test.go index 225cae8..b649fea 100644 --- a/internal/promptcompat/prompt_build_test.go +++ b/internal/promptcompat/prompt_build_test.go @@ -88,16 +88,14 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t * } } -func TestBuildOpenAIFinalPromptWithThinkingAddsContinuationContract(t *testing.T) { +func TestBuildOpenAIFinalPromptWithThinkingKeepsPromptUnchanged(t *testing.T) { messages := []any{ map[string]any{"role": "user", "content": "继续回答上一个问题"}, } - finalPrompt, _ := buildOpenAIFinalPrompt(messages, nil, "", true) - if !strings.Contains(finalPrompt, "Continue the conversation from the full prior context") { - t.Fatalf("expected continuation contract in thinking prompt, got=%q", finalPrompt) - } - if !strings.Contains(finalPrompt, "final user-facing answer only in reasoning") { - t.Fatalf("expected visible-answer contract in thinking prompt, got=%q", finalPrompt) + finalPromptThinking, _ := buildOpenAIFinalPrompt(messages, nil, "", true) + finalPromptPlain, _ := buildOpenAIFinalPrompt(messages, nil, "", false) + if finalPromptThinking != finalPromptPlain { + t.Fatalf("expected thinking flag not to prepend continuation contract, thinking=%q plain=%q", finalPromptThinking, finalPromptPlain) } } diff --git a/internal/promptcompat/request_normalize.go b/internal/promptcompat/request_normalize.go index 6d3f12d..8efa772 100644 --- a/internal/promptcompat/request_normalize.go +++ b/internal/promptcompat/request_normalize.go @@ -25,6 +25,9 @@ func NormalizeOpenAIChatRequest(store ConfigReader, req map[string]any, traceID } defaultThinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel) thinkingEnabled := util.ResolveThinkingEnabled(req, defaultThinkingEnabled) + if config.IsNoThinkingModel(resolvedModel) { + thinkingEnabled = false + } responseModel := strings.TrimSpace(model) if responseModel == "" { responseModel = resolvedModel @@ -65,6 +68,9 @@ func NormalizeOpenAIResponsesRequest(store ConfigReader, req map[string]any, tra } defaultThinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel) thinkingEnabled := util.ResolveThinkingEnabled(req, defaultThinkingEnabled) + if config.IsNoThinkingModel(resolvedModel) { + thinkingEnabled = false + } // Keep width-control as an explicit policy hook even if current default is true. allowWideInput := true diff --git a/internal/promptcompat/standard_request_test.go b/internal/promptcompat/standard_request_test.go index 7b529a6..437888d 100644 --- a/internal/promptcompat/standard_request_test.go +++ b/internal/promptcompat/standard_request_test.go @@ -11,6 +11,7 @@ func TestStandardRequestCompletionPayloadSetsModelTypeFromResolvedModel(t *testi modelType string }{ {name: "default", model: "deepseek-v4-flash", thinking: false, search: false, modelType: "default"}, + {name: "default_nothinking", model: "deepseek-v4-flash-nothinking", thinking: false, search: false, modelType: "default"}, {name: "expert", model: "deepseek-v4-pro", thinking: true, search: false, modelType: "expert"}, {name: "vision", model: "deepseek-v4-vision-search", thinking: false, search: true, modelType: "vision"}, } diff --git a/internal/promptcompat/thinking_injection.go b/internal/promptcompat/thinking_injection.go index 573faa7..6daa454 100644 --- a/internal/promptcompat/thinking_injection.go +++ b/internal/promptcompat/thinking_injection.go @@ -3,12 +3,10 @@ package promptcompat import "strings" const ( - ThinkingInjectionMarker = "【思维链格式要求】" - DefaultThinkingInjectionPrompt = ThinkingInjectionMarker + "在你的思考过程(标签内)中,请严格按照以下规则进行思考,不要遗漏:\n" + - "1. 分析阶段:分析用户需求是什么。\n" + - "2. 构思阶段:构思下一步动作,我要干什么。\n" + - "3. 工具调用阶段:为了满足用户需求,我需要调用什么工具;如果不需要工具,明确说明不需要调用工具。\n" + - "4. 回顾格式:完整复述一遍 System 要求的 XML 工具调用格式要求,回顾错误示例和正确示例,说明我要如何正确调用工具。" + ThinkingInjectionMarker = "Reasoning Effort: Absolute maximum with no shortcuts permitted." + DefaultThinkingInjectionPrompt = ThinkingInjectionMarker + "\n" + + "You MUST be very thorough in your thinking and comprehensively decompose the problem to resolve the root cause, rigorously stress-testing your logic against all potential paths, edge cases, and adversarial scenarios.\n" + + "Explicitly write out your entire deliberation process, documenting every intermediate step, considered alternative, and rejected hypothesis to ensure absolutely no assumption is left unchecked." ) func AppendThinkingInjectionToLatestUser(messages []any) ([]any, bool) { diff --git a/internal/util/messages_test.go b/internal/util/messages_test.go index 077e903..9ddafd6 100644 --- a/internal/util/messages_test.go +++ b/internal/util/messages_test.go @@ -116,6 +116,18 @@ func TestConvertClaudeToDeepSeekUsesGlobalAliasResolution(t *testing.T) { } } +func TestConvertClaudeToDeepSeekUsesNoThinkingAliasResolution(t *testing.T) { + store := config.LoadStore() + req := map[string]any{ + "model": "claude-sonnet-4-6-nothinking", + "messages": []any{map[string]any{"role": "user", "content": "Hi"}}, + } + out := ConvertClaudeToDeepSeek(req, store) + if out["model"] != "deepseek-v4-flash-nothinking" { + t.Fatalf("expected noThinking alias resolution, got model=%q", out["model"]) + } +} + func contains(s, sub string) bool { return len(s) >= len(sub) && (s == sub || len(sub) == 0 || (len(s) > 0 && (indexOf(s, sub) >= 0))) } diff --git a/internal/util/util_edge_test.go b/internal/util/util_edge_test.go index 6084d9c..463df1a 100644 --- a/internal/util/util_edge_test.go +++ b/internal/util/util_edge_test.go @@ -372,3 +372,16 @@ func TestConvertClaudeToDeepSeekUsesExplicitModelAlias(t *testing.T) { t.Fatalf("expected explicit alias override, got %q", out["model"]) } } + +func TestConvertClaudeToDeepSeekUsesExplicitNoThinkingModelAlias(t *testing.T) { + t.Setenv("DS2API_CONFIG_JSON", `{"keys":[],"accounts":[],"model_aliases":{"claude-sonnet-4-6":"deepseek-v4-pro-search"}}`) + store := config.LoadStore() + req := map[string]any{ + "model": "claude-sonnet-4-6-nothinking", + "messages": []any{map[string]any{"role": "user", "content": "Hi"}}, + } + out := ConvertClaudeToDeepSeek(req, store) + if out["model"] != "deepseek-v4-pro-search-nothinking" { + t.Fatalf("expected explicit alias override with nothinking suffix, got %q", out["model"]) + } +} diff --git a/webui/src/features/apiTester/ApiTesterContainer.jsx b/webui/src/features/apiTester/ApiTesterContainer.jsx index bf70d22..fe79a35 100644 --- a/webui/src/features/apiTester/ApiTesterContainer.jsx +++ b/webui/src/features/apiTester/ApiTesterContainer.jsx @@ -1,3 +1,4 @@ +import { useEffect, useMemo, useState } from 'react' import clsx from 'clsx' import { useI18n } from '../../i18n' @@ -6,8 +7,75 @@ import { useChatStreamClient } from './useChatStreamClient' import ConfigPanel from './ConfigPanel' import ChatPanel from './ChatPanel' +function describeModel(t, modelID) { + const noThinking = modelID.endsWith('-nothinking') + + let description = t('apiTester.models.generic') + if (modelID.includes('vision-search')) { + description = t('apiTester.models.visionSearch') + } else if (modelID.includes('vision')) { + description = t('apiTester.models.vision') + } else if (modelID.includes('pro-search')) { + description = t('apiTester.models.proSearch') + } else if (modelID.includes('pro')) { + description = t('apiTester.models.pro') + } else if (modelID.includes('flash-search')) { + description = t('apiTester.models.flashSearch') + } else if (modelID.includes('flash')) { + description = t('apiTester.models.flash') + } + + if (noThinking) { + return `${description} · ${t('apiTester.models.noThinking')}` + } + return description +} + +function decorateModel(t, modelID) { + const isVision = modelID.includes('vision') + const isSearch = modelID.includes('search') + const isPro = modelID.includes('pro') + + if (isVision && isSearch) { + return { + id: modelID, + name: modelID, + icon: 'ImageIcon', + desc: describeModel(t, modelID), + color: 'text-fuchsia-600', + } + } + if (isVision) { + return { + id: modelID, + name: modelID, + icon: 'ImageIcon', + desc: describeModel(t, modelID), + color: 'text-violet-500', + } + } + if (isSearch) { + return { + id: modelID, + name: modelID, + icon: 'SearchIcon', + desc: describeModel(t, modelID), + color: isPro ? 'text-cyan-600' : 'text-cyan-500', + } + } + return { + id: modelID, + name: modelID, + icon: isPro ? 'Cpu' : 'MessageSquare', + desc: describeModel(t, modelID), + color: isPro ? 'text-amber-600' : 'text-amber-500', + } +} + export default function ApiTesterContainer({ config, onMessage, authFetch }) { const { t } = useI18n() + const [availableModelIDs, setAvailableModelIDs] = useState([]) + const [modelsLoaded, setModelsLoaded] = useState(false) const { model, @@ -49,14 +117,58 @@ export default function ApiTesterContainer({ config, onMessage, authFetch }) { const customKeyActive = trimmedApiKey !== '' const customKeyManaged = customKeyActive && configuredKeys.includes(trimmedApiKey) - const models = [ - { id: 'deepseek-v4-flash', name: 'deepseek-v4-flash', icon: 'MessageSquare', desc: t('apiTester.models.flash'), color: 'text-amber-500' }, - { id: 'deepseek-v4-pro', name: 'deepseek-v4-pro', icon: 'Cpu', desc: t('apiTester.models.pro'), color: 'text-amber-600' }, - { id: 'deepseek-v4-flash-search', name: 'deepseek-v4-flash-search', icon: 'SearchIcon', desc: t('apiTester.models.flashSearch'), color: 'text-cyan-500' }, - { id: 'deepseek-v4-pro-search', name: 'deepseek-v4-pro-search', icon: 'SearchIcon', desc: t('apiTester.models.proSearch'), color: 'text-cyan-600' }, - { id: 'deepseek-v4-vision', name: 'deepseek-v4-vision', icon: 'ImageIcon', desc: t('apiTester.models.vision'), color: 'text-violet-500' }, - { id: 'deepseek-v4-vision-search', name: 'deepseek-v4-vision-search', icon: 'SearchIcon', desc: t('apiTester.models.visionSearch'), color: 'text-fuchsia-600' }, - ] + useEffect(() => { + let disposed = false + + async function loadModels() { + try { + const res = await authFetch('/v1/models') + if (!res.ok) { + throw new Error(`failed to fetch models: ${res.status}`) + } + const data = await res.json() + const modelIDs = Array.isArray(data?.data) + ? data.data + .map((item) => String(item?.id || '').trim()) + .filter(Boolean) + : [] + if (!disposed) { + setAvailableModelIDs(modelIDs) + } + } catch (_err) { + if (!disposed) { + setAvailableModelIDs([]) + } + } finally { + if (!disposed) { + setModelsLoaded(true) + } + } + } + + setModelsLoaded(false) + loadModels() + return () => { + disposed = true + } + }, [authFetch]) + + const models = useMemo( + () => availableModelIDs.map((modelID) => decorateModel(t, modelID)), + [availableModelIDs, t] + ) + + useEffect(() => { + if (!models.length) { + if (model) { + setModel('') + } + return + } + if (!model || !models.some((item) => item.id === model)) { + setModel(models[0].id) + } + }, [model, models, setModel]) const { runTest, stopGeneration } = useChatStreamClient({ t, @@ -84,6 +196,7 @@ export default function ApiTesterContainer({ config, onMessage, authFetch }) { models={models} model={model} setModel={setModel} + modelsLoaded={modelsLoaded} streamingMode={streamingMode} setStreamingMode={setStreamingMode} selectedAccount={selectedAccount} @@ -114,6 +227,7 @@ export default function ApiTesterContainer({ config, onMessage, authFetch }) { streamingContent={streamingContent} onRunTest={runTest} onStopGeneration={stopGeneration} + hasAvailableModel={models.length > 0} /> ) diff --git a/webui/src/features/apiTester/ChatPanel.jsx b/webui/src/features/apiTester/ChatPanel.jsx index 5da6684..32b160e 100644 --- a/webui/src/features/apiTester/ChatPanel.jsx +++ b/webui/src/features/apiTester/ChatPanel.jsx @@ -21,6 +21,7 @@ export default function ChatPanel({ streamingContent, onRunTest, onStopGeneration, + hasAvailableModel, }) { const fileInputRef = useRef(null) const [uploadingFiles, setUploadingFiles] = useState(false) @@ -181,7 +182,7 @@ export default function ChatPanel({