mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-10 19:27:41 +08:00
chore: update project files
This commit is contained in:
35
API.md
35
API.md
@@ -196,16 +196,22 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=`
|
||||
"object": "list",
|
||||
"data": [
|
||||
{"id": "deepseek-v4-flash", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
|
||||
{"id": "deepseek-v4-flash-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
|
||||
{"id": "deepseek-v4-pro", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
|
||||
{"id": "deepseek-v4-pro-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
|
||||
{"id": "deepseek-v4-flash-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
|
||||
{"id": "deepseek-v4-flash-search-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
|
||||
{"id": "deepseek-v4-pro-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
|
||||
{"id": "deepseek-v4-pro-search-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
|
||||
{"id": "deepseek-v4-vision", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
|
||||
{"id": "deepseek-v4-vision-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}
|
||||
{"id": "deepseek-v4-vision-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
|
||||
{"id": "deepseek-v4-vision-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
|
||||
{"id": "deepseek-v4-vision-search-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
> 说明:`/v1/models` 返回的是规范化后的 DeepSeek 原生模型 ID;常见 alias 仅用于请求入参解析,不会在该接口中单独展开返回。
|
||||
> 说明:`/v1/models` 返回的是规范化后的 DeepSeek 原生模型 ID;常见 alias 仅用于请求入参解析,不会在该接口中单独展开返回。带 `-nothinking` 后缀的模型表示无论请求里是否显式开启 thinking / reasoning,都会强制关闭思考输出。
|
||||
|
||||
### 模型 alias 解析策略
|
||||
|
||||
@@ -213,8 +219,9 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=`
|
||||
|
||||
1. 先匹配 DeepSeek 原生模型。
|
||||
2. 再匹配 `model_aliases` 精确映射。
|
||||
3. 未命中时按模型家族规则回退(如 `o*`、`gpt-*`、`claude-*`)。
|
||||
4. 仍未命中则返回 `invalid_request_error`。
|
||||
3. 如果请求名以 `-nothinking` 结尾,则在最终解析出的规范模型上追加对应的无思考变体。
|
||||
4. 未命中时按模型家族规则回退(如 `o*`、`gpt-*`、`claude-*`)。
|
||||
5. 仍未命中则返回 `invalid_request_error`。
|
||||
|
||||
当前内置默认 alias 来自 `internal/config/models.go`,`config.model_aliases` 会在运行时覆盖或补充同名映射。节选:
|
||||
|
||||
@@ -224,6 +231,8 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=`
|
||||
- Gemini:`gemini-2.5-pro`、`gemini-2.5-flash`、`gemini-pro-vision`
|
||||
- 其他兼容族:`llama-*`、`qwen-*`、`mistral-*`、`command-*` 会按家族启发式回退
|
||||
|
||||
上述 alias 若在请求名后追加 `-nothinking` 后缀,也会映射到对应的强制关闭 thinking 版本。
|
||||
|
||||
退役历史模型(如 `claude-1.*`、`claude-2.*`、`claude-instant-*`、`gpt-3.5*`)会被显式拒绝。
|
||||
|
||||
### `POST /v1/chat/completions`
|
||||
@@ -239,7 +248,7 @@ Content-Type: application/json
|
||||
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
| --- | --- | --- | --- |
|
||||
| `model` | string | ✅ | 支持 DeepSeek 原生模型 + 常见 alias(如 `gpt-5.5`、`gpt-5.4-mini`、`gpt-5.3-codex`、`o3`、`claude-opus-4-6`、`claude-sonnet-4-6`、`gemini-2.5-pro`、`gemini-2.5-flash` 等) |
|
||||
| `model` | string | ✅ | 支持 DeepSeek 原生模型 + 常见 alias(如 `gpt-5.5`、`gpt-5.4-mini`、`gpt-5.3-codex`、`o3`、`claude-opus-4-6`、`claude-sonnet-4-6`、`gemini-2.5-pro`、`gemini-2.5-flash` 等);若模型名带 `-nothinking` 后缀,则强制关闭 thinking / reasoning |
|
||||
| `messages` | array | ✅ | OpenAI 风格消息数组 |
|
||||
| `stream` | boolean | ❌ | 默认 `false` |
|
||||
| `tools` | array | ❌ | Function Calling 定义 |
|
||||
@@ -449,16 +458,19 @@ data: [DONE]
|
||||
"object": "list",
|
||||
"data": [
|
||||
{"id": "claude-sonnet-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
|
||||
{"id": "claude-sonnet-4-6-nothinking", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
|
||||
{"id": "claude-haiku-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
|
||||
{"id": "claude-opus-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"}
|
||||
{"id": "claude-haiku-4-5-nothinking", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
|
||||
{"id": "claude-opus-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
|
||||
{"id": "claude-opus-4-6-nothinking", "object": "model", "created": 1715635200, "owned_by": "anthropic"}
|
||||
],
|
||||
"first_id": "claude-opus-4-6",
|
||||
"last_id": "claude-3-haiku-20240307",
|
||||
"last_id": "claude-3-haiku-20240307-nothinking",
|
||||
"has_more": false
|
||||
}
|
||||
```
|
||||
|
||||
> 说明:示例仅展示部分模型;实际返回除当前主别名外,还包含 Claude 4.x snapshots,以及 3.x 历史模型 ID 与常见别名。
|
||||
> 说明:示例仅展示部分模型;实际返回除当前主别名外,还包含 Claude 4.x snapshots、3.x 历史模型 ID 与常见别名,并为这些可映射模型额外提供 `-nothinking` 变体。
|
||||
|
||||
### `POST /anthropic/v1/messages`
|
||||
|
||||
@@ -476,7 +488,7 @@ anthropic-version: 2023-06-01
|
||||
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
| --- | --- | --- | --- |
|
||||
| `model` | string | ✅ | 例如 `claude-sonnet-4-6` / `claude-opus-4-6` / `claude-haiku-4-5`(兼容 `claude-sonnet-4-5`、`claude-3-5-haiku-latest`),并支持历史 Claude 模型 ID |
|
||||
| `model` | string | ✅ | 例如 `claude-sonnet-4-6` / `claude-opus-4-6` / `claude-haiku-4-5`(兼容 `claude-sonnet-4-5`、`claude-3-5-haiku-latest`),并支持历史 Claude 模型 ID;若模型名带 `-nothinking` 后缀,则强制关闭 thinking / reasoning |
|
||||
| `messages` | array | ✅ | Claude 风格消息数组 |
|
||||
| `max_tokens` | number | ❌ | 缺省自动补 `8192`;当前实现不会硬性截断上游输出 |
|
||||
| `stream` | boolean | ❌ | 默认 `false` |
|
||||
@@ -534,7 +546,8 @@ data: {"type":"message_stop"}
|
||||
|
||||
**说明**:
|
||||
|
||||
- 名称中包含 `opus` / `reasoner` / `slow` 的模型会输出 `thinking_delta`
|
||||
- 默认模型会按各 surface 的既有规则输出 thinking / reasoning 相关增量
|
||||
- 带 `-nothinking` 后缀的模型会强制关闭 thinking,即使请求显式传了 `thinking` / `reasoning` / `reasoning_effort` 也不会输出 `thinking_delta`
|
||||
- 不会输出 `signature_delta`(上游 DeepSeek 未提供可验证签名)
|
||||
- `tools` 场景优先避免泄露原始工具 JSON,不强制发送 `input_json_delta`
|
||||
|
||||
@@ -575,7 +588,7 @@ data: {"type":"message_stop"}
|
||||
|
||||
### `POST /v1beta/models/{model}:generateContent`
|
||||
|
||||
请求体兼容 Gemini `contents` / `tools` 字段,模型名可用 alias 自动映射到 DeepSeek 模型。
|
||||
请求体兼容 Gemini `contents` / `tools` 字段,模型名可用 alias 自动映射到 DeepSeek 模型;若路径中的模型名带 `-nothinking` 后缀,则最终会映射到对应的无思考模型。
|
||||
|
||||
响应为 Gemini 兼容结构,核心字段包括:
|
||||
|
||||
|
||||
17
README.MD
17
README.MD
@@ -125,23 +125,32 @@ flowchart LR
|
||||
| 模型类型 | 模型 ID | thinking | search |
|
||||
| --- | --- | --- | --- |
|
||||
| default | `deepseek-v4-flash` | 默认开启,可由请求参数控制 | ❌ |
|
||||
| default | `deepseek-v4-flash-nothinking` | 永久关闭,不受请求参数影响 | ❌ |
|
||||
| expert | `deepseek-v4-pro` | 默认开启,可由请求参数控制 | ❌ |
|
||||
| expert | `deepseek-v4-pro-nothinking` | 永久关闭,不受请求参数影响 | ❌ |
|
||||
| default | `deepseek-v4-flash-search` | 默认开启,可由请求参数控制 | ✅ |
|
||||
| default | `deepseek-v4-flash-search-nothinking` | 永久关闭,不受请求参数影响 | ✅ |
|
||||
| expert | `deepseek-v4-pro-search` | 默认开启,可由请求参数控制 | ✅ |
|
||||
| expert | `deepseek-v4-pro-search-nothinking` | 永久关闭,不受请求参数影响 | ✅ |
|
||||
| vision | `deepseek-v4-vision` | 默认开启,可由请求参数控制 | ❌ |
|
||||
| vision | `deepseek-v4-vision-nothinking` | 永久关闭,不受请求参数影响 | ❌ |
|
||||
| vision | `deepseek-v4-vision-search` | 默认开启,可由请求参数控制 | ✅ |
|
||||
| vision | `deepseek-v4-vision-search-nothinking` | 永久关闭,不受请求参数影响 | ✅ |
|
||||
|
||||
除原生模型外,也支持常见 alias 输入(如 `gpt-4.1`、`gpt-5`、`gpt-5-codex`、`o3`、`claude-*`、`gemini-*` 等),但 `/v1/models` 返回的是规范化后的 DeepSeek 原生模型 ID。完整 alias 行为以 [API.md](API.md#模型-alias-解析策略) 和 `config.example.json` 为准。
|
||||
除原生模型外,也支持常见 alias 输入(如 `gpt-4.1`、`gpt-5`、`gpt-5-codex`、`o3`、`claude-*`、`gemini-*` 等),但 `/v1/models` 返回的是规范化后的 DeepSeek 原生模型 ID。若 alias 名本身追加 `-nothinking` 后缀,也会映射到对应的强制关思考模型。完整 alias 行为以 [API.md](API.md#模型-alias-解析策略) 和 `config.example.json` 为准。
|
||||
|
||||
### Claude 接口(`GET /anthropic/v1/models`)
|
||||
|
||||
| 当前常用模型 | 默认映射 |
|
||||
| --- | --- |
|
||||
| `claude-sonnet-4-6` | `deepseek-v4-flash` |
|
||||
| `claude-sonnet-4-6-nothinking` | `deepseek-v4-flash-nothinking` |
|
||||
| `claude-haiku-4-5`(兼容 `claude-3-5-haiku-latest`) | `deepseek-v4-flash` |
|
||||
| `claude-haiku-4-5-nothinking` | `deepseek-v4-flash-nothinking` |
|
||||
| `claude-opus-4-6` | `deepseek-v4-pro` |
|
||||
| `claude-opus-4-6-nothinking` | `deepseek-v4-pro-nothinking` |
|
||||
|
||||
可通过配置中的 `model_aliases` 覆盖映射关系。
|
||||
可通过配置中的 `model_aliases` 覆盖映射关系;若请求模型名带 `-nothinking`,会在最终映射结果上强制追加无思考语义。
|
||||
`/anthropic/v1/models` 除上述主别名外,还会返回 Claude 4.x snapshots、3.x 历史模型 ID 与常见 alias,便于旧客户端直接兼容。
|
||||
|
||||
#### Claude Code 接入避坑(实测)
|
||||
@@ -153,7 +162,7 @@ flowchart LR
|
||||
|
||||
### Gemini 接口
|
||||
|
||||
Gemini 适配器将模型名通过 `model_aliases` 或内置规则映射到 DeepSeek 原生模型,支持 `generateContent` 和 `streamGenerateContent` 两种调用方式,并完整支持 Tool Calling(`functionDeclarations` → `functionCall` 输出)。
|
||||
Gemini 适配器将模型名通过 `model_aliases` 或内置规则映射到 DeepSeek 原生模型,支持 `generateContent` 和 `streamGenerateContent` 两种调用方式,并完整支持 Tool Calling(`functionDeclarations` → `functionCall` 输出)。若 Gemini 模型名带 `-nothinking` 后缀,例如 `gemini-2.5-pro-nothinking`,会映射到对应的强制关闭思考模型。
|
||||
|
||||
## 快速开始
|
||||
|
||||
@@ -284,7 +293,7 @@ go run ./cmd/ds2api
|
||||
- `history_split`:轮次拆分策略;默认关闭,开启后默认从第二轮开始将旧历史上传为 `HISTORY.txt`。
|
||||
- `current_input_file`:独立拆分策略;默认开启且阈值为 `0`,触发时将完整上下文合并上传为隐藏上下文文件,并跳过 `HISTORY.txt`。
|
||||
- `history_split` 与 `current_input_file` 互斥,最多启用一个;两者都关闭时请求直接透传。
|
||||
- `thinking_injection`:默认开启;在最新 user 消息末尾追加思考格式增强提示,提高工具调用前的思考结构稳定性;`prompt` 留空时使用内置默认提示词。
|
||||
- `thinking_injection`:默认开启;在最新 user 消息末尾追加思考增强提示词,提高高强度推理与工具调用前的思考稳定性;`prompt` 留空时使用内置默认提示词。
|
||||
|
||||
环境变量完整列表见 [部署指南](docs/DEPLOY.md),接口鉴权规则见 [API.md](API.md#鉴权规则)。
|
||||
|
||||
|
||||
@@ -538,7 +538,7 @@ curl -s http://127.0.0.1:5001/readyz
|
||||
|
||||
# 3. Model list
|
||||
curl -s http://127.0.0.1:5001/v1/models
|
||||
# Expected: {"object":"list","data":[...]}
|
||||
# Expected: {"object":"list","data":[...]} (including `*-nothinking` variants)
|
||||
|
||||
# 4. Admin panel (if WebUI is built)
|
||||
curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:5001/admin
|
||||
|
||||
@@ -548,7 +548,7 @@ curl -s http://127.0.0.1:5001/readyz
|
||||
|
||||
# 3. 模型列表
|
||||
curl -s http://127.0.0.1:5001/v1/models
|
||||
# 预期: {"object":"list","data":[...]}
|
||||
# 预期: {"object":"list","data":[...]}(包含 `*-nothinking` 变体)
|
||||
|
||||
# 4. 管理台页面(如果已构建 WebUI)
|
||||
curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:5001/admin
|
||||
|
||||
@@ -99,12 +99,12 @@ DS2API 当前的核心思路,不是把客户端传来的 `messages`、`tools`
|
||||
- `ref_file_ids` 只承载文件引用,不承载普通文本消息。
|
||||
- `tools` 不会作为“原生工具 schema”直接下发给下游,而是被改写进 `prompt`。
|
||||
- OpenAI Chat / Responses 原生走统一 OpenAI 标准化与 DeepSeek payload 组装;Claude / Gemini 会尽量复用 OpenAI prompt/tool 语义,其中 Gemini 直接复用 `promptcompat.BuildOpenAIPromptForAdapter`,Claude 消息接口在可代理场景会转换为 OpenAI chat 形态再执行。
|
||||
- 客户端传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`。Gemini `generationConfig.thinkingConfig.thinkingBudget` 会翻译成同一套 thinking 开关;关闭时即使上游返回 `response/thinking_content`,兼容层也不会把它当作可见正文输出。Claude surface 在流式请求且未显式声明 `thinking` 时,仍按 Anthropic 语义默认关闭;但在非流式代理场景,兼容层会内部开启一次下游 thinking,用于捕获“正文为空、工具调用落在 thinking 里”的情况,随后在回包前剥离用户不可见的 thinking block。
|
||||
- 客户端传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`。Gemini `generationConfig.thinkingConfig.thinkingBudget` 会翻译成同一套 thinking 开关;关闭时即使上游返回 `response/thinking_content`,兼容层也不会把它当作可见正文输出。若最终解析出的模型名带 `-nothinking` 后缀,则会无条件强制关闭 thinking,优先级高于请求体中的 `thinking` / `reasoning` / `reasoning_effort`。Claude surface 在流式请求且未显式声明 `thinking` 时,仍按 Anthropic 语义默认关闭;但在非流式代理场景,兼容层会内部开启一次下游 thinking,用于捕获“正文为空、工具调用落在 thinking 里”的情况,随后在回包前剥离用户不可见的 thinking block。
|
||||
- 对 OpenAI Chat / Responses 的非流式收尾,如果最终可见正文为空,兼容层会优先尝试把思维链中的独立 DSML / XML 工具块当作真实工具调用解析出来。流式链路也会在收尾阶段做同样的 fallback 检测,但不会因为思维链内容去中途拦截或改写流式输出;thinking / reasoning 增量仍按原样先发,只有在结束收尾时才可能补发最终工具调用结果。补发结果会作为本轮 assistant 的结构化 `tool_calls` / `function_call` 输出返回,而不是塞进 `content` 文本;如果客户端没有开启 thinking / reasoning,思维链只用于检测,不会作为 `reasoning_content` 或可见正文暴露。只有正文为空且思维链里也没有可执行工具调用时,才继续按空回复错误处理。
|
||||
|
||||
## 5. prompt 是怎么拼出来的
|
||||
|
||||
OpenAI Chat / Responses 在标准化后、history split / current input file 之前,会默认执行 `thinking_injection` 增强。它参考 DeepSeek V4 “把控制指令放在 user 消息末尾更稳定”的用法,在最新 user message 后追加思考格式提示词,默认内容以 `【思维链格式要求】...` 开头,要求模型在 `<think>` 内按分析、构思、工具调用、XML 工具格式回顾这几个阶段组织思考。该开关默认启用,可通过 `thinking_injection.enabled=false` 关闭;也可以通过 `thinking_injection.prompt` 自定义提示词,留空时使用内置默认提示词。
|
||||
OpenAI Chat / Responses 在标准化后、history split / current input file 之前,会默认执行 `thinking_injection` 增强。它参考 DeepSeek V4 “把控制指令放在 user 消息末尾更稳定”的用法,在最新 user message 后追加思考增强提示词。当前内置默认提示词以 `Reasoning Effort: Absolute maximum with no shortcuts permitted.` 开头,并继续要求模型充分分解问题、覆盖潜在路径与边界条件、把完整推演过程显式写出。该开关默认启用,可通过 `thinking_injection.enabled=false` 关闭;也可以通过 `thinking_injection.prompt` 自定义提示词,留空时使用内置默认提示词。
|
||||
|
||||
这段增强属于 prompt 可见上下文:
|
||||
|
||||
@@ -128,17 +128,7 @@ OpenAI Chat / Responses 在标准化后、history split / current input file 之
|
||||
实现位置:
|
||||
[internal/prompt/messages.go](../internal/prompt/messages.go)
|
||||
|
||||
### 5.2 thinking continuity 说明
|
||||
|
||||
如果启用了 thinking,会在最前面额外插入一个 system block,提醒模型:
|
||||
|
||||
- 继续既有会话,不要重开
|
||||
- earlier messages 是 binding context
|
||||
- 不要把最终回答只留在 reasoning 里
|
||||
|
||||
这部分不是客户端原始消息,而是兼容层主动补进去的连续性契约。
|
||||
|
||||
### 5.3 相邻同角色消息会合并
|
||||
### 5.2 相邻同角色消息会合并
|
||||
|
||||
在最终 `MessagesPrepareWithThinking` 中,相邻同 role 的消息会被合并成一个块,中间插入空行。
|
||||
|
||||
@@ -352,7 +342,7 @@ history split 触发后行为:
|
||||
|
||||
```json
|
||||
{
|
||||
"prompt": "<|begin▁of▁sentence|><|System|>continuity instructions...\\n\\n原 system / developer\\n\\nYou have access to these tools: ...<|end▁of▁instructions|><|User|>最新问题<|Assistant|>",
|
||||
"prompt": "<|begin▁of▁sentence|><|System|>原 system / developer\n\nYou have access to these tools: ...<|end▁of▁instructions|><|User|>最新问题<|Assistant|>",
|
||||
"ref_file_ids": [
|
||||
"file-history-ignore",
|
||||
"file-systemprompt",
|
||||
|
||||
@@ -19,6 +19,16 @@ func TestGetModelConfigDeepSeekChat(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetModelConfigDeepSeekChatNoThinking(t *testing.T) {
|
||||
thinking, search, ok := GetModelConfig("deepseek-v4-flash-nothinking")
|
||||
if !ok {
|
||||
t.Fatal("expected ok for deepseek-v4-flash-nothinking")
|
||||
}
|
||||
if thinking || search {
|
||||
t.Fatalf("expected thinking=false search=false for deepseek-v4-flash-nothinking, got thinking=%v search=%v", thinking, search)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetModelConfigDeepSeekReasoner(t *testing.T) {
|
||||
thinking, search, ok := GetModelConfig("deepseek-v4-pro")
|
||||
if !ok {
|
||||
@@ -84,6 +94,10 @@ func TestGetModelTypeDefaultExpertAndVision(t *testing.T) {
|
||||
if !ok || defaultType != "default" {
|
||||
t.Fatalf("expected default model_type, got ok=%v model_type=%q", ok, defaultType)
|
||||
}
|
||||
defaultNoThinkingType, ok := GetModelType("deepseek-v4-flash-nothinking")
|
||||
if !ok || defaultNoThinkingType != "default" {
|
||||
t.Fatalf("expected default model_type for nothinking, got ok=%v model_type=%q", ok, defaultNoThinkingType)
|
||||
}
|
||||
expertType, ok := GetModelType("deepseek-v4-pro")
|
||||
if !ok || expertType != "expert" {
|
||||
t.Fatalf("expected expert model_type, got ok=%v model_type=%q", ok, expertType)
|
||||
@@ -734,12 +748,18 @@ func TestOpenAIModelsResponse(t *testing.T) {
|
||||
t.Fatal("expected non-empty models list")
|
||||
}
|
||||
expected := map[string]bool{
|
||||
"deepseek-v4-flash": false,
|
||||
"deepseek-v4-pro": false,
|
||||
"deepseek-v4-flash-search": false,
|
||||
"deepseek-v4-pro-search": false,
|
||||
"deepseek-v4-vision": false,
|
||||
"deepseek-v4-vision-search": false,
|
||||
"deepseek-v4-flash": false,
|
||||
"deepseek-v4-flash-nothinking": false,
|
||||
"deepseek-v4-pro": false,
|
||||
"deepseek-v4-pro-nothinking": false,
|
||||
"deepseek-v4-flash-search": false,
|
||||
"deepseek-v4-flash-search-nothinking": false,
|
||||
"deepseek-v4-pro-search": false,
|
||||
"deepseek-v4-pro-search-nothinking": false,
|
||||
"deepseek-v4-vision": false,
|
||||
"deepseek-v4-vision-nothinking": false,
|
||||
"deepseek-v4-vision-search": false,
|
||||
"deepseek-v4-vision-search-nothinking": false,
|
||||
}
|
||||
for _, model := range data {
|
||||
if _, ok := expected[model.ID]; ok {
|
||||
|
||||
@@ -13,6 +13,13 @@ func TestResolveModelDirectDeepSeek(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveModelDirectDeepSeekNoThinking(t *testing.T) {
|
||||
got, ok := ResolveModel(nil, "deepseek-v4-flash-nothinking")
|
||||
if !ok || got != "deepseek-v4-flash-nothinking" {
|
||||
t.Fatalf("expected deepseek-v4-flash-nothinking, got ok=%v model=%q", ok, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveModelAlias(t *testing.T) {
|
||||
got, ok := ResolveModel(nil, "gpt-4.1")
|
||||
if !ok || got != "deepseek-v4-flash" {
|
||||
@@ -34,6 +41,13 @@ func TestResolveLatestClaudeAlias(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveLatestClaudeAliasNoThinking(t *testing.T) {
|
||||
got, ok := ResolveModel(nil, "claude-sonnet-4-6-nothinking")
|
||||
if !ok || got != "deepseek-v4-flash-nothinking" {
|
||||
t.Fatalf("expected alias claude-sonnet-4-6-nothinking -> deepseek-v4-flash-nothinking, got ok=%v model=%q", ok, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveExpandedHistoricalAliases(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
@@ -68,6 +82,13 @@ func TestResolveModelHeuristicReasoner(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveModelHeuristicReasonerNoThinking(t *testing.T) {
|
||||
got, ok := ResolveModel(nil, "o3-super-nothinking")
|
||||
if !ok || got != "deepseek-v4-pro-nothinking" {
|
||||
t.Fatalf("expected heuristic reasoner nothinking, got ok=%v model=%q", ok, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveModelUnknown(t *testing.T) {
|
||||
_, ok := ResolveModel(nil, "totally-custom-model")
|
||||
if ok {
|
||||
|
||||
@@ -14,7 +14,9 @@ type ModelAliasReader interface {
|
||||
ModelAliases() map[string]string
|
||||
}
|
||||
|
||||
var DeepSeekModels = []ModelInfo{
|
||||
const noThinkingModelSuffix = "-nothinking"
|
||||
|
||||
var deepSeekBaseModels = []ModelInfo{
|
||||
{ID: "deepseek-v4-flash", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
|
||||
{ID: "deepseek-v4-pro", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
|
||||
{ID: "deepseek-v4-flash-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
|
||||
@@ -23,7 +25,9 @@ var DeepSeekModels = []ModelInfo{
|
||||
{ID: "deepseek-v4-vision-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
|
||||
}
|
||||
|
||||
var ClaudeModels = []ModelInfo{
|
||||
var DeepSeekModels = appendNoThinkingVariants(deepSeekBaseModels)
|
||||
|
||||
var claudeBaseModels = []ModelInfo{
|
||||
// Current aliases
|
||||
{ID: "claude-opus-4-6", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
|
||||
{ID: "claude-sonnet-4-6", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
|
||||
@@ -53,19 +57,26 @@ var ClaudeModels = []ModelInfo{
|
||||
{ID: "claude-3-haiku-20240307", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
|
||||
}
|
||||
|
||||
var ClaudeModels = appendNoThinkingVariants(claudeBaseModels)
|
||||
|
||||
func GetModelConfig(model string) (thinking bool, search bool, ok bool) {
|
||||
switch lower(model) {
|
||||
baseModel, noThinking := splitNoThinkingModel(model)
|
||||
if baseModel == "" {
|
||||
return false, false, false
|
||||
}
|
||||
switch baseModel {
|
||||
case "deepseek-v4-flash", "deepseek-v4-pro", "deepseek-v4-vision":
|
||||
return true, false, true
|
||||
return !noThinking, false, true
|
||||
case "deepseek-v4-flash-search", "deepseek-v4-pro-search", "deepseek-v4-vision-search":
|
||||
return true, true, true
|
||||
return !noThinking, true, true
|
||||
default:
|
||||
return false, false, false
|
||||
}
|
||||
}
|
||||
|
||||
func GetModelType(model string) (modelType string, ok bool) {
|
||||
switch lower(model) {
|
||||
baseModel, _ := splitNoThinkingModel(model)
|
||||
switch baseModel {
|
||||
case "deepseek-v4-flash", "deepseek-v4-flash-search":
|
||||
return "default", true
|
||||
case "deepseek-v4-pro", "deepseek-v4-pro-search":
|
||||
@@ -82,6 +93,11 @@ func IsSupportedDeepSeekModel(model string) bool {
|
||||
return ok
|
||||
}
|
||||
|
||||
func IsNoThinkingModel(model string) bool {
|
||||
_, noThinking := splitNoThinkingModel(model)
|
||||
return noThinking
|
||||
}
|
||||
|
||||
func DefaultModelAliases() map[string]string {
|
||||
return map[string]string{
|
||||
// OpenAI GPT / ChatGPT families
|
||||
@@ -191,62 +207,19 @@ func ResolveModel(store ModelAliasReader, requested string) (string, bool) {
|
||||
if model == "" {
|
||||
return "", false
|
||||
}
|
||||
if isRetiredHistoricalModel(model) {
|
||||
return "", false
|
||||
}
|
||||
aliases := loadModelAliases(store)
|
||||
if IsSupportedDeepSeekModel(model) {
|
||||
return model, true
|
||||
}
|
||||
aliases := DefaultModelAliases()
|
||||
if store != nil {
|
||||
for k, v := range store.ModelAliases() {
|
||||
aliases[lower(strings.TrimSpace(k))] = lower(strings.TrimSpace(v))
|
||||
}
|
||||
}
|
||||
if mapped, ok := aliases[model]; ok && IsSupportedDeepSeekModel(mapped) {
|
||||
return mapped, true
|
||||
}
|
||||
if strings.HasPrefix(model, "deepseek-") {
|
||||
baseModel, noThinking := splitNoThinkingModel(model)
|
||||
resolvedModel, ok := resolveCanonicalModel(aliases, baseModel)
|
||||
if !ok {
|
||||
return "", false
|
||||
}
|
||||
|
||||
knownFamily := false
|
||||
for _, prefix := range []string{
|
||||
"gpt-", "o1", "o3", "claude-", "gemini-", "llama-", "qwen-", "mistral-", "command-",
|
||||
} {
|
||||
if strings.HasPrefix(model, prefix) {
|
||||
knownFamily = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !knownFamily {
|
||||
return "", false
|
||||
}
|
||||
|
||||
useVision := strings.Contains(model, "vision")
|
||||
useReasoner := strings.Contains(model, "reason") ||
|
||||
strings.Contains(model, "reasoner") ||
|
||||
strings.HasPrefix(model, "o1") ||
|
||||
strings.HasPrefix(model, "o3") ||
|
||||
strings.Contains(model, "opus") ||
|
||||
strings.Contains(model, "slow") ||
|
||||
strings.Contains(model, "r1")
|
||||
useSearch := strings.Contains(model, "search")
|
||||
|
||||
switch {
|
||||
case useVision && useSearch:
|
||||
return "deepseek-v4-vision-search", true
|
||||
case useVision:
|
||||
return "deepseek-v4-vision", true
|
||||
case useReasoner && useSearch:
|
||||
return "deepseek-v4-pro-search", true
|
||||
case useReasoner:
|
||||
return "deepseek-v4-pro", true
|
||||
case useSearch:
|
||||
return "deepseek-v4-flash-search", true
|
||||
default:
|
||||
return "deepseek-v4-flash", true
|
||||
}
|
||||
return withNoThinkingVariant(resolvedModel, noThinking), true
|
||||
}
|
||||
|
||||
func isRetiredHistoricalModel(model string) bool {
|
||||
@@ -303,3 +276,100 @@ func ClaudeModelsResponse() map[string]any {
|
||||
resp["has_more"] = false
|
||||
return resp
|
||||
}
|
||||
|
||||
func appendNoThinkingVariants(models []ModelInfo) []ModelInfo {
|
||||
out := make([]ModelInfo, 0, len(models)*2)
|
||||
for _, model := range models {
|
||||
out = append(out, model)
|
||||
variant := model
|
||||
variant.ID = withNoThinkingVariant(model.ID, true)
|
||||
out = append(out, variant)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func splitNoThinkingModel(model string) (string, bool) {
|
||||
model = lower(strings.TrimSpace(model))
|
||||
if strings.HasSuffix(model, noThinkingModelSuffix) {
|
||||
return strings.TrimSuffix(model, noThinkingModelSuffix), true
|
||||
}
|
||||
return model, false
|
||||
}
|
||||
|
||||
func withNoThinkingVariant(model string, enabled bool) string {
|
||||
baseModel, _ := splitNoThinkingModel(model)
|
||||
if !enabled {
|
||||
return baseModel
|
||||
}
|
||||
if baseModel == "" {
|
||||
return ""
|
||||
}
|
||||
return baseModel + noThinkingModelSuffix
|
||||
}
|
||||
|
||||
func loadModelAliases(store ModelAliasReader) map[string]string {
|
||||
aliases := DefaultModelAliases()
|
||||
if store != nil {
|
||||
for k, v := range store.ModelAliases() {
|
||||
aliases[lower(strings.TrimSpace(k))] = lower(strings.TrimSpace(v))
|
||||
}
|
||||
}
|
||||
return aliases
|
||||
}
|
||||
|
||||
func resolveCanonicalModel(aliases map[string]string, model string) (string, bool) {
|
||||
model = lower(strings.TrimSpace(model))
|
||||
if model == "" {
|
||||
return "", false
|
||||
}
|
||||
if isRetiredHistoricalModel(model) {
|
||||
return "", false
|
||||
}
|
||||
if IsSupportedDeepSeekModel(model) {
|
||||
return model, true
|
||||
}
|
||||
if mapped, ok := aliases[model]; ok && IsSupportedDeepSeekModel(mapped) {
|
||||
return mapped, true
|
||||
}
|
||||
if strings.HasPrefix(model, "deepseek-") {
|
||||
return "", false
|
||||
}
|
||||
|
||||
knownFamily := false
|
||||
for _, prefix := range []string{
|
||||
"gpt-", "o1", "o3", "claude-", "gemini-", "llama-", "qwen-", "mistral-", "command-",
|
||||
} {
|
||||
if strings.HasPrefix(model, prefix) {
|
||||
knownFamily = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !knownFamily {
|
||||
return "", false
|
||||
}
|
||||
|
||||
useVision := strings.Contains(model, "vision")
|
||||
useReasoner := strings.Contains(model, "reason") ||
|
||||
strings.Contains(model, "reasoner") ||
|
||||
strings.HasPrefix(model, "o1") ||
|
||||
strings.HasPrefix(model, "o3") ||
|
||||
strings.Contains(model, "opus") ||
|
||||
strings.Contains(model, "slow") ||
|
||||
strings.Contains(model, "r1")
|
||||
useSearch := strings.Contains(model, "search")
|
||||
|
||||
switch {
|
||||
case useVision && useSearch:
|
||||
return "deepseek-v4-vision-search", true
|
||||
case useVision:
|
||||
return "deepseek-v4-vision", true
|
||||
case useReasoner && useSearch:
|
||||
return "deepseek-v4-pro-search", true
|
||||
case useReasoner:
|
||||
return "deepseek-v4-pro", true
|
||||
case useSearch:
|
||||
return "deepseek-v4-flash-search", true
|
||||
default:
|
||||
return "deepseek-v4-flash", true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,6 +53,26 @@ func TestNormalizeClaudeRequestEnablesThinkingWhenRequested(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeClaudeRequestNoThinkingAliasForcesThinkingOff(t *testing.T) {
|
||||
req := map[string]any{
|
||||
"model": "claude-opus-4-6-nothinking",
|
||||
"messages": []any{
|
||||
map[string]any{"role": "user", "content": "hello"},
|
||||
},
|
||||
"thinking": map[string]any{"type": "enabled", "budget_tokens": 1024},
|
||||
}
|
||||
out, err := normalizeClaudeRequest(mockClaudeConfig{}, req)
|
||||
if err != nil {
|
||||
t.Fatalf("normalizeClaudeRequest error: %v", err)
|
||||
}
|
||||
if out.Standard.ResolvedModel != "deepseek-v4-pro-nothinking" {
|
||||
t.Fatalf("resolved model mismatch: got=%q", out.Standard.ResolvedModel)
|
||||
}
|
||||
if out.Standard.Thinking {
|
||||
t.Fatalf("expected nothinking alias to force downstream thinking off")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeClaudeRequestPrefersGlobalAliasMapping(t *testing.T) {
|
||||
req := map[string]any{
|
||||
"model": "claude-sonnet-4-6",
|
||||
|
||||
@@ -37,6 +37,9 @@ func normalizeClaudeRequest(store ConfigReader, req map[string]any) (claudeNorma
|
||||
searchEnabled = false
|
||||
}
|
||||
thinkingEnabled := util.ResolveThinkingEnabled(req, false)
|
||||
if config.IsNoThinkingModel(dsModel) {
|
||||
thinkingEnabled = false
|
||||
}
|
||||
finalPrompt := prompt.MessagesPrepareWithThinking(toMessageMaps(dsPayload["messages"]), thinkingEnabled)
|
||||
toolNames := extractClaudeToolNames(toolsRequested)
|
||||
if len(toolNames) == 0 && len(toolsRequested) > 0 {
|
||||
|
||||
@@ -22,6 +22,9 @@ func normalizeGeminiRequest(store ConfigReader, routeModel string, req map[strin
|
||||
}
|
||||
defaultThinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
|
||||
thinkingEnabled := util.ResolveThinkingEnabled(req, defaultThinkingEnabled)
|
||||
if config.IsNoThinkingModel(resolvedModel) {
|
||||
thinkingEnabled = false
|
||||
}
|
||||
|
||||
messagesRaw := geminiMessagesFromRequest(req)
|
||||
if len(messagesRaw) == 0 {
|
||||
|
||||
28
internal/httpapi/gemini/convert_request_test.go
Normal file
28
internal/httpapi/gemini/convert_request_test.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package gemini
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestNormalizeGeminiRequestNoThinkingModelForcesThinkingOff(t *testing.T) {
|
||||
req := map[string]any{
|
||||
"contents": []any{
|
||||
map[string]any{
|
||||
"role": "user",
|
||||
"parts": []any{map[string]any{"text": "hello"}},
|
||||
},
|
||||
},
|
||||
"reasoning_effort": "high",
|
||||
}
|
||||
out, err := normalizeGeminiRequest(testGeminiConfig{}, "gemini-2.5-pro-nothinking", req, false)
|
||||
if err != nil {
|
||||
t.Fatalf("normalizeGeminiRequest error: %v", err)
|
||||
}
|
||||
if out.ResolvedModel != "deepseek-v4-pro-nothinking" {
|
||||
t.Fatalf("resolved model mismatch: got=%q", out.ResolvedModel)
|
||||
}
|
||||
if out.Thinking {
|
||||
t.Fatalf("expected nothinking model to force thinking off")
|
||||
}
|
||||
if out.Search {
|
||||
t.Fatalf("expected search=false, got=%v", out.Search)
|
||||
}
|
||||
}
|
||||
@@ -80,6 +80,28 @@ func TestNormalizeOpenAIChatRequestWithConfigInterface(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeOpenAIChatRequestDisablesThinkingForNoThinkingModel(t *testing.T) {
|
||||
cfg := mockOpenAIConfig{wideInput: true}
|
||||
req := map[string]any{
|
||||
"model": "deepseek-v4-pro-nothinking",
|
||||
"messages": []any{map[string]any{"role": "user", "content": "hello"}},
|
||||
"reasoning_effort": "high",
|
||||
}
|
||||
out, err := promptcompat.NormalizeOpenAIChatRequest(cfg, req, "")
|
||||
if err != nil {
|
||||
t.Fatalf("promptcompat.NormalizeOpenAIChatRequest error: %v", err)
|
||||
}
|
||||
if out.ResolvedModel != "deepseek-v4-pro-nothinking" {
|
||||
t.Fatalf("resolved model mismatch: got=%q", out.ResolvedModel)
|
||||
}
|
||||
if out.Thinking {
|
||||
t.Fatalf("expected nothinking model to force thinking off")
|
||||
}
|
||||
if out.Search {
|
||||
t.Fatalf("expected search=false for deepseek-v4-pro-nothinking, got=%v", out.Search)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeOpenAIResponsesRequestWideInputPolicyFromInterface(t *testing.T) {
|
||||
req := map[string]any{
|
||||
"model": "deepseek-v4-flash",
|
||||
|
||||
@@ -22,6 +22,15 @@ func TestGetModelRouteDirectAndAlias(t *testing.T) {
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("direct_nothinking", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodGet, "/v1/models/deepseek-v4-flash-nothinking", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
r.ServeHTTP(rec, req)
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("direct_expert", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodGet, "/v1/models/deepseek-v4-pro", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
@@ -48,6 +57,15 @@ func TestGetModelRouteDirectAndAlias(t *testing.T) {
|
||||
t.Fatalf("expected 200 for alias, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("alias_nothinking", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodGet, "/v1/models/claude-sonnet-4-6-nothinking", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
r.ServeHTTP(rec, req)
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 for nothinking alias, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestGetModelRouteNotFound(t *testing.T) {
|
||||
|
||||
@@ -30,11 +30,6 @@ func MessagesPrepareWithThinking(messages []map[string]any, thinkingEnabled bool
|
||||
Text string
|
||||
}
|
||||
processed := make([]block, 0, len(messages))
|
||||
if thinkingEnabled {
|
||||
if instruction := buildConversationContinuityInstructions(thinkingEnabled); strings.TrimSpace(instruction) != "" {
|
||||
processed = append(processed, block{Role: "system", Text: instruction})
|
||||
}
|
||||
}
|
||||
for _, m := range messages {
|
||||
role, _ := m["role"].(string)
|
||||
text := NormalizeContent(m["content"])
|
||||
@@ -93,17 +88,6 @@ func formatRoleBlock(marker, text, endMarker string) string {
|
||||
return out
|
||||
}
|
||||
|
||||
func buildConversationContinuityInstructions(thinkingEnabled bool) string {
|
||||
lines := []string{
|
||||
"Continue the conversation from the full prior context and the latest tool results.",
|
||||
"Treat earlier messages as binding context; answer the user's current request as a continuation, not a restart.",
|
||||
}
|
||||
if thinkingEnabled {
|
||||
lines = append(lines, "Keep reasoning internal. Do not leave the final user-facing answer only in reasoning; always provide the answer in visible assistant content.")
|
||||
}
|
||||
return strings.Join(lines, "\n")
|
||||
}
|
||||
|
||||
func NormalizeContent(v any) string {
|
||||
if v == nil {
|
||||
return ""
|
||||
|
||||
@@ -58,23 +58,14 @@ func TestNormalizeContentArrayFallsBackToContentWhenTextEmpty(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMessagesPrepareWithThinkingAddsContinuityContract(t *testing.T) {
|
||||
func TestMessagesPrepareWithThinkingPreservesPromptShape(t *testing.T) {
|
||||
messages := []map[string]any{{"role": "user", "content": "Question"}}
|
||||
gotThinking := MessagesPrepareWithThinking(messages, true)
|
||||
gotPlain := MessagesPrepareWithThinking(messages, false)
|
||||
if gotThinking == gotPlain {
|
||||
t.Fatalf("expected thinking-enabled prompt to include extra continuity instructions")
|
||||
if gotThinking != gotPlain {
|
||||
t.Fatalf("expected thinking flag not to add extra continuity instructions, got thinking=%q plain=%q", gotThinking, gotPlain)
|
||||
}
|
||||
if !strings.HasSuffix(gotThinking, "<|Assistant|>") {
|
||||
t.Fatalf("expected assistant suffix, got %q", gotThinking)
|
||||
}
|
||||
if !strings.Contains(gotThinking, "Continue the conversation from the full prior context") {
|
||||
t.Fatalf("expected continuity instruction in thinking prompt, got %q", gotThinking)
|
||||
}
|
||||
if !strings.Contains(gotThinking, "final user-facing answer only in reasoning") {
|
||||
t.Fatalf("expected visible-answer instruction in thinking prompt, got %q", gotThinking)
|
||||
}
|
||||
if strings.Contains(gotPlain, "Continue the conversation from the full prior context") {
|
||||
t.Fatalf("did not expect thinking-only instruction in plain prompt, got %q", gotPlain)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,16 +88,14 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t *
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildOpenAIFinalPromptWithThinkingAddsContinuationContract(t *testing.T) {
|
||||
func TestBuildOpenAIFinalPromptWithThinkingKeepsPromptUnchanged(t *testing.T) {
|
||||
messages := []any{
|
||||
map[string]any{"role": "user", "content": "继续回答上一个问题"},
|
||||
}
|
||||
|
||||
finalPrompt, _ := buildOpenAIFinalPrompt(messages, nil, "", true)
|
||||
if !strings.Contains(finalPrompt, "Continue the conversation from the full prior context") {
|
||||
t.Fatalf("expected continuation contract in thinking prompt, got=%q", finalPrompt)
|
||||
}
|
||||
if !strings.Contains(finalPrompt, "final user-facing answer only in reasoning") {
|
||||
t.Fatalf("expected visible-answer contract in thinking prompt, got=%q", finalPrompt)
|
||||
finalPromptThinking, _ := buildOpenAIFinalPrompt(messages, nil, "", true)
|
||||
finalPromptPlain, _ := buildOpenAIFinalPrompt(messages, nil, "", false)
|
||||
if finalPromptThinking != finalPromptPlain {
|
||||
t.Fatalf("expected thinking flag not to prepend continuation contract, thinking=%q plain=%q", finalPromptThinking, finalPromptPlain)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,6 +25,9 @@ func NormalizeOpenAIChatRequest(store ConfigReader, req map[string]any, traceID
|
||||
}
|
||||
defaultThinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
|
||||
thinkingEnabled := util.ResolveThinkingEnabled(req, defaultThinkingEnabled)
|
||||
if config.IsNoThinkingModel(resolvedModel) {
|
||||
thinkingEnabled = false
|
||||
}
|
||||
responseModel := strings.TrimSpace(model)
|
||||
if responseModel == "" {
|
||||
responseModel = resolvedModel
|
||||
@@ -65,6 +68,9 @@ func NormalizeOpenAIResponsesRequest(store ConfigReader, req map[string]any, tra
|
||||
}
|
||||
defaultThinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
|
||||
thinkingEnabled := util.ResolveThinkingEnabled(req, defaultThinkingEnabled)
|
||||
if config.IsNoThinkingModel(resolvedModel) {
|
||||
thinkingEnabled = false
|
||||
}
|
||||
|
||||
// Keep width-control as an explicit policy hook even if current default is true.
|
||||
allowWideInput := true
|
||||
|
||||
@@ -11,6 +11,7 @@ func TestStandardRequestCompletionPayloadSetsModelTypeFromResolvedModel(t *testi
|
||||
modelType string
|
||||
}{
|
||||
{name: "default", model: "deepseek-v4-flash", thinking: false, search: false, modelType: "default"},
|
||||
{name: "default_nothinking", model: "deepseek-v4-flash-nothinking", thinking: false, search: false, modelType: "default"},
|
||||
{name: "expert", model: "deepseek-v4-pro", thinking: true, search: false, modelType: "expert"},
|
||||
{name: "vision", model: "deepseek-v4-vision-search", thinking: false, search: true, modelType: "vision"},
|
||||
}
|
||||
|
||||
@@ -3,12 +3,10 @@ package promptcompat
|
||||
import "strings"
|
||||
|
||||
const (
|
||||
ThinkingInjectionMarker = "【思维链格式要求】"
|
||||
DefaultThinkingInjectionPrompt = ThinkingInjectionMarker + "在你的思考过程(<think>标签内)中,请严格按照以下规则进行思考,不要遗漏:\n" +
|
||||
"1. 分析阶段:分析用户需求是什么。\n" +
|
||||
"2. 构思阶段:构思下一步动作,我要干什么。\n" +
|
||||
"3. 工具调用阶段:为了满足用户需求,我需要调用什么工具;如果不需要工具,明确说明不需要调用工具。\n" +
|
||||
"4. 回顾格式:完整复述一遍 System 要求的 XML 工具调用格式要求,回顾错误示例和正确示例,说明我要如何正确调用工具。"
|
||||
ThinkingInjectionMarker = "Reasoning Effort: Absolute maximum with no shortcuts permitted."
|
||||
DefaultThinkingInjectionPrompt = ThinkingInjectionMarker + "\n" +
|
||||
"You MUST be very thorough in your thinking and comprehensively decompose the problem to resolve the root cause, rigorously stress-testing your logic against all potential paths, edge cases, and adversarial scenarios.\n" +
|
||||
"Explicitly write out your entire deliberation process, documenting every intermediate step, considered alternative, and rejected hypothesis to ensure absolutely no assumption is left unchecked."
|
||||
)
|
||||
|
||||
func AppendThinkingInjectionToLatestUser(messages []any) ([]any, bool) {
|
||||
|
||||
@@ -116,6 +116,18 @@ func TestConvertClaudeToDeepSeekUsesGlobalAliasResolution(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertClaudeToDeepSeekUsesNoThinkingAliasResolution(t *testing.T) {
|
||||
store := config.LoadStore()
|
||||
req := map[string]any{
|
||||
"model": "claude-sonnet-4-6-nothinking",
|
||||
"messages": []any{map[string]any{"role": "user", "content": "Hi"}},
|
||||
}
|
||||
out := ConvertClaudeToDeepSeek(req, store)
|
||||
if out["model"] != "deepseek-v4-flash-nothinking" {
|
||||
t.Fatalf("expected noThinking alias resolution, got model=%q", out["model"])
|
||||
}
|
||||
}
|
||||
|
||||
func contains(s, sub string) bool {
|
||||
return len(s) >= len(sub) && (s == sub || len(sub) == 0 || (len(s) > 0 && (indexOf(s, sub) >= 0)))
|
||||
}
|
||||
|
||||
@@ -372,3 +372,16 @@ func TestConvertClaudeToDeepSeekUsesExplicitModelAlias(t *testing.T) {
|
||||
t.Fatalf("expected explicit alias override, got %q", out["model"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertClaudeToDeepSeekUsesExplicitNoThinkingModelAlias(t *testing.T) {
|
||||
t.Setenv("DS2API_CONFIG_JSON", `{"keys":[],"accounts":[],"model_aliases":{"claude-sonnet-4-6":"deepseek-v4-pro-search"}}`)
|
||||
store := config.LoadStore()
|
||||
req := map[string]any{
|
||||
"model": "claude-sonnet-4-6-nothinking",
|
||||
"messages": []any{map[string]any{"role": "user", "content": "Hi"}},
|
||||
}
|
||||
out := ConvertClaudeToDeepSeek(req, store)
|
||||
if out["model"] != "deepseek-v4-pro-search-nothinking" {
|
||||
t.Fatalf("expected explicit alias override with nothinking suffix, got %q", out["model"])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { useEffect, useMemo, useState } from 'react'
|
||||
import clsx from 'clsx'
|
||||
|
||||
import { useI18n } from '../../i18n'
|
||||
@@ -6,8 +7,75 @@ import { useChatStreamClient } from './useChatStreamClient'
|
||||
import ConfigPanel from './ConfigPanel'
|
||||
import ChatPanel from './ChatPanel'
|
||||
|
||||
function describeModel(t, modelID) {
|
||||
const noThinking = modelID.endsWith('-nothinking')
|
||||
|
||||
let description = t('apiTester.models.generic')
|
||||
if (modelID.includes('vision-search')) {
|
||||
description = t('apiTester.models.visionSearch')
|
||||
} else if (modelID.includes('vision')) {
|
||||
description = t('apiTester.models.vision')
|
||||
} else if (modelID.includes('pro-search')) {
|
||||
description = t('apiTester.models.proSearch')
|
||||
} else if (modelID.includes('pro')) {
|
||||
description = t('apiTester.models.pro')
|
||||
} else if (modelID.includes('flash-search')) {
|
||||
description = t('apiTester.models.flashSearch')
|
||||
} else if (modelID.includes('flash')) {
|
||||
description = t('apiTester.models.flash')
|
||||
}
|
||||
|
||||
if (noThinking) {
|
||||
return `${description} · ${t('apiTester.models.noThinking')}`
|
||||
}
|
||||
return description
|
||||
}
|
||||
|
||||
function decorateModel(t, modelID) {
|
||||
const isVision = modelID.includes('vision')
|
||||
const isSearch = modelID.includes('search')
|
||||
const isPro = modelID.includes('pro')
|
||||
|
||||
if (isVision && isSearch) {
|
||||
return {
|
||||
id: modelID,
|
||||
name: modelID,
|
||||
icon: 'ImageIcon',
|
||||
desc: describeModel(t, modelID),
|
||||
color: 'text-fuchsia-600',
|
||||
}
|
||||
}
|
||||
if (isVision) {
|
||||
return {
|
||||
id: modelID,
|
||||
name: modelID,
|
||||
icon: 'ImageIcon',
|
||||
desc: describeModel(t, modelID),
|
||||
color: 'text-violet-500',
|
||||
}
|
||||
}
|
||||
if (isSearch) {
|
||||
return {
|
||||
id: modelID,
|
||||
name: modelID,
|
||||
icon: 'SearchIcon',
|
||||
desc: describeModel(t, modelID),
|
||||
color: isPro ? 'text-cyan-600' : 'text-cyan-500',
|
||||
}
|
||||
}
|
||||
return {
|
||||
id: modelID,
|
||||
name: modelID,
|
||||
icon: isPro ? 'Cpu' : 'MessageSquare',
|
||||
desc: describeModel(t, modelID),
|
||||
color: isPro ? 'text-amber-600' : 'text-amber-500',
|
||||
}
|
||||
}
|
||||
|
||||
export default function ApiTesterContainer({ config, onMessage, authFetch }) {
|
||||
const { t } = useI18n()
|
||||
const [availableModelIDs, setAvailableModelIDs] = useState([])
|
||||
const [modelsLoaded, setModelsLoaded] = useState(false)
|
||||
|
||||
const {
|
||||
model,
|
||||
@@ -49,14 +117,58 @@ export default function ApiTesterContainer({ config, onMessage, authFetch }) {
|
||||
const customKeyActive = trimmedApiKey !== ''
|
||||
const customKeyManaged = customKeyActive && configuredKeys.includes(trimmedApiKey)
|
||||
|
||||
const models = [
|
||||
{ id: 'deepseek-v4-flash', name: 'deepseek-v4-flash', icon: 'MessageSquare', desc: t('apiTester.models.flash'), color: 'text-amber-500' },
|
||||
{ id: 'deepseek-v4-pro', name: 'deepseek-v4-pro', icon: 'Cpu', desc: t('apiTester.models.pro'), color: 'text-amber-600' },
|
||||
{ id: 'deepseek-v4-flash-search', name: 'deepseek-v4-flash-search', icon: 'SearchIcon', desc: t('apiTester.models.flashSearch'), color: 'text-cyan-500' },
|
||||
{ id: 'deepseek-v4-pro-search', name: 'deepseek-v4-pro-search', icon: 'SearchIcon', desc: t('apiTester.models.proSearch'), color: 'text-cyan-600' },
|
||||
{ id: 'deepseek-v4-vision', name: 'deepseek-v4-vision', icon: 'ImageIcon', desc: t('apiTester.models.vision'), color: 'text-violet-500' },
|
||||
{ id: 'deepseek-v4-vision-search', name: 'deepseek-v4-vision-search', icon: 'SearchIcon', desc: t('apiTester.models.visionSearch'), color: 'text-fuchsia-600' },
|
||||
]
|
||||
useEffect(() => {
|
||||
let disposed = false
|
||||
|
||||
async function loadModels() {
|
||||
try {
|
||||
const res = await authFetch('/v1/models')
|
||||
if (!res.ok) {
|
||||
throw new Error(`failed to fetch models: ${res.status}`)
|
||||
}
|
||||
const data = await res.json()
|
||||
const modelIDs = Array.isArray(data?.data)
|
||||
? data.data
|
||||
.map((item) => String(item?.id || '').trim())
|
||||
.filter(Boolean)
|
||||
: []
|
||||
if (!disposed) {
|
||||
setAvailableModelIDs(modelIDs)
|
||||
}
|
||||
} catch (_err) {
|
||||
if (!disposed) {
|
||||
setAvailableModelIDs([])
|
||||
}
|
||||
} finally {
|
||||
if (!disposed) {
|
||||
setModelsLoaded(true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setModelsLoaded(false)
|
||||
loadModels()
|
||||
return () => {
|
||||
disposed = true
|
||||
}
|
||||
}, [authFetch])
|
||||
|
||||
const models = useMemo(
|
||||
() => availableModelIDs.map((modelID) => decorateModel(t, modelID)),
|
||||
[availableModelIDs, t]
|
||||
)
|
||||
|
||||
useEffect(() => {
|
||||
if (!models.length) {
|
||||
if (model) {
|
||||
setModel('')
|
||||
}
|
||||
return
|
||||
}
|
||||
if (!model || !models.some((item) => item.id === model)) {
|
||||
setModel(models[0].id)
|
||||
}
|
||||
}, [model, models, setModel])
|
||||
|
||||
const { runTest, stopGeneration } = useChatStreamClient({
|
||||
t,
|
||||
@@ -84,6 +196,7 @@ export default function ApiTesterContainer({ config, onMessage, authFetch }) {
|
||||
models={models}
|
||||
model={model}
|
||||
setModel={setModel}
|
||||
modelsLoaded={modelsLoaded}
|
||||
streamingMode={streamingMode}
|
||||
setStreamingMode={setStreamingMode}
|
||||
selectedAccount={selectedAccount}
|
||||
@@ -114,6 +227,7 @@ export default function ApiTesterContainer({ config, onMessage, authFetch }) {
|
||||
streamingContent={streamingContent}
|
||||
onRunTest={runTest}
|
||||
onStopGeneration={stopGeneration}
|
||||
hasAvailableModel={models.length > 0}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
|
||||
@@ -21,6 +21,7 @@ export default function ChatPanel({
|
||||
streamingContent,
|
||||
onRunTest,
|
||||
onStopGeneration,
|
||||
hasAvailableModel,
|
||||
}) {
|
||||
const fileInputRef = useRef(null)
|
||||
const [uploadingFiles, setUploadingFiles] = useState(false)
|
||||
@@ -181,7 +182,7 @@ export default function ChatPanel({
|
||||
<div className="absolute left-2 bottom-2 z-10">
|
||||
<button
|
||||
onClick={() => fileInputRef.current?.click()}
|
||||
disabled={uploadingFiles || isStreaming}
|
||||
disabled={uploadingFiles || isStreaming || !hasAvailableModel}
|
||||
className="p-2 text-muted-foreground hover:text-primary transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
title="Attach files"
|
||||
>
|
||||
@@ -189,11 +190,12 @@ export default function ChatPanel({
|
||||
</button>
|
||||
</div>
|
||||
<textarea
|
||||
className="w-full bg-[#09090b] border border-border rounded-xl pl-12 pr-12 py-3 text-sm focus:ring-2 focus:ring-primary/20 focus:border-primary transition-all resize-none custom-scrollbar placeholder:text-muted-foreground/50 text-foreground shadow-inner"
|
||||
placeholder={t('apiTester.enterMessage')}
|
||||
className="w-full bg-[#09090b] border border-border rounded-xl pl-12 pr-12 py-3 text-sm focus:ring-2 focus:ring-primary/20 focus:border-primary transition-all resize-none custom-scrollbar placeholder:text-muted-foreground/50 text-foreground shadow-inner disabled:opacity-60 disabled:cursor-not-allowed"
|
||||
placeholder={hasAvailableModel ? t('apiTester.enterMessage') : t('apiTester.noModelsMessagePlaceholder')}
|
||||
rows={1}
|
||||
style={{ minHeight: '52px' }}
|
||||
value={message}
|
||||
disabled={!hasAvailableModel}
|
||||
onChange={e => setMessage(e.target.value)}
|
||||
onKeyDown={e => {
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
@@ -212,7 +214,7 @@ export default function ChatPanel({
|
||||
) : (
|
||||
<button
|
||||
onClick={onRunTest}
|
||||
disabled={loading || uploadingFiles || (!message.trim() && attachedFiles.length === 0)}
|
||||
disabled={loading || uploadingFiles || !hasAvailableModel || (!message.trim() && attachedFiles.length === 0)}
|
||||
className="p-2 text-primary hover:text-primary/80 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
{loading ? <Loader2 className="w-4 h-4 animate-spin" /> : <Send className="w-4 h-4" />}
|
||||
|
||||
@@ -19,6 +19,7 @@ export default function ConfigPanel({
|
||||
models,
|
||||
model,
|
||||
setModel,
|
||||
modelsLoaded,
|
||||
streamingMode,
|
||||
setStreamingMode,
|
||||
selectedAccount,
|
||||
@@ -43,6 +44,7 @@ export default function ConfigPanel({
|
||||
const selectedModel = models.find(m => m.id === model) || models[0]
|
||||
const SelectedModelIcon = selectedModel ? (iconMap[selectedModel.icon] || MessageSquare) : MessageSquare
|
||||
const defaultKeyPreview = maskSecret(config.keys?.[0])
|
||||
const hasModels = models.length > 0
|
||||
|
||||
return (
|
||||
<div className={clsx(
|
||||
@@ -73,19 +75,24 @@ export default function ConfigPanel({
|
||||
<label className="text-[11px] font-semibold text-muted-foreground uppercase tracking-wider ml-0.5">{t('apiTester.modelLabel')}</label>
|
||||
<div className="relative">
|
||||
<select
|
||||
className="w-full h-11 pl-3 pr-9 bg-secondary border border-border rounded-lg text-sm appearance-none focus:outline-none focus:ring-1 focus:ring-ring focus:border-ring transition-all cursor-pointer hover:bg-muted/70 text-foreground"
|
||||
className="w-full h-11 pl-3 pr-9 bg-secondary border border-border rounded-lg text-sm appearance-none focus:outline-none focus:ring-1 focus:ring-ring focus:border-ring transition-all cursor-pointer hover:bg-muted/70 text-foreground disabled:opacity-60 disabled:cursor-not-allowed"
|
||||
value={model}
|
||||
onChange={e => setModel(e.target.value)}
|
||||
disabled={!hasModels}
|
||||
>
|
||||
{models.map(m => (
|
||||
{hasModels ? models.map(m => (
|
||||
<option key={m.id} value={m.id} className="bg-popover text-popover-foreground">
|
||||
{m.name}
|
||||
</option>
|
||||
))}
|
||||
)) : (
|
||||
<option value="" className="bg-popover text-popover-foreground">
|
||||
{modelsLoaded ? t('apiTester.noModels') : t('apiTester.loadingModels')}
|
||||
</option>
|
||||
)}
|
||||
</select>
|
||||
<ChevronDown className="absolute right-2.5 top-3.5 w-4 h-4 text-muted-foreground pointer-events-none" />
|
||||
</div>
|
||||
{selectedModel && (
|
||||
{selectedModel ? (
|
||||
<div className="mt-3 rounded-lg border border-border bg-muted/20 p-3">
|
||||
<div className="flex items-start gap-3">
|
||||
<div className={clsx(
|
||||
@@ -107,6 +114,10 @@ export default function ConfigPanel({
|
||||
{t('apiTester.modelPickerHint')}
|
||||
</p>
|
||||
</div>
|
||||
) : (
|
||||
<div className="mt-3 rounded-lg border border-dashed border-border bg-muted/10 p-3 text-[11px] text-muted-foreground leading-relaxed">
|
||||
{modelsLoaded ? t('apiTester.noModelsHint') : t('apiTester.loadingModelsHint')}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
|
||||
@@ -224,7 +224,9 @@
|
||||
"flashSearch": "v4 Flash (with search)",
|
||||
"proSearch": "v4 Pro (with search)",
|
||||
"vision": "v4 Vision (thinking on by default)",
|
||||
"visionSearch": "v4 Vision (with search)"
|
||||
"visionSearch": "v4 Vision (with search)",
|
||||
"generic": "Compatible model",
|
||||
"noThinking": "thinking forced off"
|
||||
},
|
||||
"missingApiKey": "Please provide an API key.",
|
||||
"requestFailed": "Request failed.",
|
||||
@@ -234,6 +236,11 @@
|
||||
"config": "Configuration",
|
||||
"modelLabel": "Model",
|
||||
"modelPickerHint": "Use the dropdown to pick a model. The list scrolls automatically.",
|
||||
"loadingModels": "Loading models...",
|
||||
"loadingModelsHint": "Fetching the available model list from /v1/models.",
|
||||
"noModels": "No models available",
|
||||
"noModelsHint": "The /v1/models endpoint did not return any usable models. Check the backend configuration or API status.",
|
||||
"noModelsMessagePlaceholder": "No models are available right now, so the tester cannot send a request.",
|
||||
"streamMode": "Streaming",
|
||||
"accountSelector": "Account",
|
||||
"autoRandom": "🤖 Auto / Random",
|
||||
|
||||
@@ -224,7 +224,9 @@
|
||||
"flashSearch": "v4 Flash(带搜索)",
|
||||
"proSearch": "v4 Pro(带搜索)",
|
||||
"vision": "v4 Vision(默认开启思考)",
|
||||
"visionSearch": "v4 Vision(带搜索)"
|
||||
"visionSearch": "v4 Vision(带搜索)",
|
||||
"generic": "兼容模型",
|
||||
"noThinking": "强制关闭思考"
|
||||
},
|
||||
"missingApiKey": "请提供 API 密钥",
|
||||
"requestFailed": "请求失败",
|
||||
@@ -234,6 +236,11 @@
|
||||
"config": "配置",
|
||||
"modelLabel": "模型",
|
||||
"modelPickerHint": "使用下拉列表选择模型,长列表会自动滚动。",
|
||||
"loadingModels": "正在加载模型...",
|
||||
"loadingModelsHint": "正在从 /v1/models 拉取可用模型列表。",
|
||||
"noModels": "没有可用模型",
|
||||
"noModelsHint": "/v1/models 当前没有返回任何可用模型,请先检查后端配置或接口状态。",
|
||||
"noModelsMessagePlaceholder": "当前没有可用模型,暂时无法发起测试。",
|
||||
"streamMode": "流式模式",
|
||||
"accountSelector": "选择账号",
|
||||
"autoRandom": "🤖 自动 / 随机",
|
||||
|
||||
Reference in New Issue
Block a user