feat: Introduce model alias resolution, enhanced configuration options, and improved OpenAI/Claude adapter handling for responses, embeddings, and tool calls.

2026-05-04 08:25:26 +08:00 · 2026-02-18 23:06:18 +08:00
parent 27ecb4b69b
commit 3a75b75ae0
28 changed files with 1665 additions and 183 deletions
--- a/API.en.md
+++ b/API.en.md
@@ -28,7 +28,7 @@ This document describes the actual behavior of the current Go codebase.
 | Base URL | `http://localhost:5001` or your deployment domain |
 | Default Content-Type | `application/json` |
 | Health probes | `GET /healthz`, `GET /readyz` |
-| CORS | Enabled (`Access-Control-Allow-Origin: *`, allows `Content-Type`, `Authorization`) |
+| CORS | Enabled (`Access-Control-Allow-Origin: *`, allows `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Vercel-Protection-Bypass`) |

 ---

@@ -89,7 +89,11 @@ Two header formats accepted:
 | GET | `/healthz` | None | Liveness probe |
 | GET | `/readyz` | None | Readiness probe |
 | GET | `/v1/models` | None | OpenAI model list |
+| GET | `/v1/models/{id}` | None | OpenAI single-model query (alias accepted) |
 | POST | `/v1/chat/completions` | Business | OpenAI chat completions |
+| POST | `/v1/responses` | Business | OpenAI Responses API (stream/non-stream) |
+| GET | `/v1/responses/{response_id}` | Business | Query stored response (in-memory TTL) |
+| POST | `/v1/embeddings` | Business | OpenAI Embeddings API |
 | GET | `/anthropic/v1/models` | None | Claude model list |
 | POST | `/anthropic/v1/messages` | Business | Claude messages |
 | POST | `/anthropic/v1/messages/count_tokens` | Business | Claude token counting |
@@ -150,6 +154,15 @@ No auth required. Returns supported models.
 }
 ```

+### Model Alias Resolution
+
+For `chat` / `responses` / `embeddings`, DS2API follows a wide-input/strict-output policy:
+
+1. Match DeepSeek native model IDs first.
+2. Then match exact keys in `model_aliases`.
+3. If still unmatched, fall back by known family heuristics (`o*`, `gpt-*`, `claude-*`, etc.).
+4. If still unmatched, return `invalid_request_error`.
+
 ### `POST /v1/chat/completions`

 **Headers**:
@@ -163,7 +176,7 @@ Content-Type: application/json

 | Field | Type | Required | Notes |
 | --- | --- | --- | --- |
-| `model` | string | ✅ | `deepseek-chat` / `deepseek-reasoner` / `deepseek-chat-search` / `deepseek-reasoner-search` |
+| `model` | string | ✅ | DeepSeek native models + common aliases (`gpt-4o`, `gpt-5-codex`, `o3`, `claude-sonnet-4-5`, etc.) |
 | `messages` | array | ✅ | OpenAI-style messages |
 | `stream` | boolean | ❌ | Default `false` |
 | `tools` | array | ❌ | Function calling schema |
@@ -253,7 +266,63 @@ When `tools` is present, DS2API performs anti-leak handling:
 }
 ```

-**Stream**: DS2API buffers text first. If tool call detected → only structured `delta.tool_calls` (each with `index`); otherwise emits buffered text at once.
+**Stream**: Once high-confidence toolcall features are matched, DS2API emits `delta.tool_calls` immediately (without waiting for full JSON closure), then keeps sending argument deltas; confirmed raw tool JSON is never forwarded as `delta.content`.
+
+---
+
+### `GET /v1/models/{id}`
+
+No auth required. Alias values are accepted as path params (for example `gpt-4o`), and the returned object is the mapped DeepSeek model.
+
+### `POST /v1/responses`
+
+OpenAI Responses-style endpoint, accepting either `input` or `messages`.
+
+| Field | Type | Required | Notes |
+| --- | --- | --- | --- |
+| `model` | string | ✅ | Supports native models + alias mapping |
+| `input` | string/array/object | ❌ | One of `input` or `messages` is required |
+| `messages` | array | ❌ | One of `input` or `messages` is required |
+| `instructions` | string | ❌ | Prepended as a system message |
+| `stream` | boolean | ❌ | Default `false` |
+| `tools` | array | ❌ | Same tool detection/translation policy as chat |
+
+**Non-stream**: Returns a standard `response` object with an ID like `resp_xxx`, and stores it in in-memory TTL cache.
+
+**Stream (SSE)**: minimal event sequence:
+
+```text
+event: response.created
+data: {"type":"response.created","id":"resp_xxx","status":"in_progress",...}
+
+event: response.output_text.delta
+data: {"type":"response.output_text.delta","id":"resp_xxx","delta":"..."}
+
+event: response.output_tool_call.delta
+data: {"type":"response.output_tool_call.delta","id":"resp_xxx","tool_calls":[...]}
+
+event: response.completed
+data: {"type":"response.completed","response":{...}}
+
+data: [DONE]
+```
+
+### `GET /v1/responses/{response_id}`
+
+Business auth required. Fetches cached responses created by `POST /v1/responses`.
+
+> Backed by in-memory TTL store. Default TTL is `900s` (configurable via `responses.store_ttl_seconds`).
+
+### `POST /v1/embeddings`
+
+Business auth required. Returns OpenAI-compatible embeddings shape.
+
+| Field | Type | Required | Notes |
+| --- | --- | --- | --- |
+| `model` | string | ✅ | Supports native models + alias mapping |
+| `input` | string/array | ✅ | Supports string, string array, token array |
+
+> Requires `embeddings.provider`. Current supported values: `mock` / `deterministic` / `builtin`. If missing/unsupported, returns standard error shape with HTTP 501.

 ---

@@ -272,7 +341,10 @@ No auth required.
    {"id": "claude-sonnet-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
    {"id": "claude-haiku-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
    {"id": "claude-opus-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"}
-  ]
+  ],
+  "first_id": "claude-opus-4-6",
+  "last_id": "claude-instant-1.0",
+  "has_more": false
 }
 ```

@@ -288,13 +360,15 @@ Content-Type: application/json
 anthropic-version: 2023-06-01
 ```

+> `anthropic-version` is optional; DS2API auto-fills `2023-06-01` when absent.
+
 **Request body**:

 | Field | Type | Required | Notes |
 | --- | --- | --- | --- |
 | `model` | string | ✅ | For example `claude-sonnet-4-5` / `claude-opus-4-6` / `claude-haiku-4-5` (compatible with `claude-3-5-haiku-latest`), plus historical Claude model IDs |
 | `messages` | array | ✅ | Claude-style messages |
-| `max_tokens` | number | ❌ | Not strictly enforced by upstream bridge |
+| `max_tokens` | number | ❌ | Auto-filled to `8192` when omitted; not strictly enforced by upstream bridge |
 | `stream` | boolean | ❌ | Default `false` |
 | `system` | string | ❌ | Optional system prompt |
 | `tools` | array | ❌ | Claude tool schema |
@@ -684,13 +758,20 @@ Or manual deploy required:

 ## Error Payloads

-Error formats vary by module:
+Compatible routes (`/v1/*`, `/anthropic/*`) use the same error envelope:

-| Module | Format |
-| --- | --- |
-| OpenAI routes | `{"error": {"message": "...", "type": "..."}}` |
-| Claude routes | `{"error": {"type": "...", "message": "..."}}` |
-| Admin routes | `{"detail": "..."}` |
+```json
+{
+  "error": {
+    "message": "...",
+    "type": "invalid_request_error",
+    "code": "invalid_request",
+    "param": null
+  }
+}
+```
+
+Admin routes keep `{"detail":"..."}`.

 Clients should handle HTTP status code plus `error` / `detail` fields.

@@ -732,6 +813,31 @@ curl http://localhost:5001/v1/chat/completions \
  }'
 ```

+### OpenAI Responses (Stream)
+
+```bash
+curl http://localhost:5001/v1/responses \
+  -H "Authorization: Bearer your-api-key" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-5-codex",
+    "input": "Write a hello world in golang",
+    "stream": true
+  }'
+```
+
+### OpenAI Embeddings
+
+```bash
+curl http://localhost:5001/v1/embeddings \
+  -H "Authorization: Bearer your-api-key" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-4o",
+    "input": ["first text", "second text"]
+  }'
+```
+
 ### OpenAI with Search

 ```bash
--- a/API.md
+++ b/API.md
@@ -28,7 +28,7 @@
 | Base URL | `http://localhost:5001` 或你的部署域名 |
 | 默认 Content-Type | `application/json` |
 | 健康检查 | `GET /healthz`、`GET /readyz` |
-| CORS | 已启用（`Access-Control-Allow-Origin: *`，允许 `Content-Type`, `Authorization`） |
+| CORS | 已启用（`Access-Control-Allow-Origin: *`，允许 `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Vercel-Protection-Bypass`） |

 ---

@@ -89,7 +89,11 @@ Vercel 一键部署可先只填 `DS2API_ADMIN_KEY`，部署后在 `/admin` 导
 | GET | `/healthz` | 无 | 存活探针 |
 | GET | `/readyz` | 无 | 就绪探针 |
 | GET | `/v1/models` | 无 | OpenAI 模型列表 |
+| GET | `/v1/models/{id}` | 无 | OpenAI 单模型查询（支持 alias 入参） |
 | POST | `/v1/chat/completions` | 业务 | OpenAI 对话补全 |
+| POST | `/v1/responses` | 业务 | OpenAI Responses 接口（流式/非流式） |
+| GET | `/v1/responses/{response_id}` | 业务 | 查询已生成 response（内存 TTL） |
+| POST | `/v1/embeddings` | 业务 | OpenAI Embeddings 接口 |
 | GET | `/anthropic/v1/models` | 无 | Claude 模型列表 |
 | POST | `/anthropic/v1/messages` | 业务 | Claude 消息接口 |
 | POST | `/anthropic/v1/messages/count_tokens` | 业务 | Claude token 计数 |
@@ -150,6 +154,15 @@ Vercel 一键部署可先只填 `DS2API_ADMIN_KEY`，部署后在 `/admin` 导
 }
 ```

+### 模型 alias 解析策略
+
+对 `chat` / `responses` / `embeddings` 的 `model` 字段采用“宽进严出”：
+
+1. 先匹配 DeepSeek 原生模型。
+2. 再匹配 `model_aliases` 精确映射。
+3. 未命中时按模型家族规则回退（如 `o*`、`gpt-*`、`claude-*`）。
+4. 仍未命中则返回 `invalid_request_error`。
+
 ### `POST /v1/chat/completions`

 **请求头**：
@@ -163,7 +176,7 @@ Content-Type: application/json

 | 字段 | 类型 | 必填 | 说明 |
 | --- | --- | --- | --- |
-| `model` | string | ✅ | `deepseek-chat` / `deepseek-reasoner` / `deepseek-chat-search` / `deepseek-reasoner-search` |
+| `model` | string | ✅ | 支持 DeepSeek 原生模型 + 常见 alias（如 `gpt-4o`、`gpt-5-codex`、`o3`、`claude-sonnet-4-5`） |
 | `messages` | array | ✅ | OpenAI 风格消息数组 |
 | `stream` | boolean | ❌ | 默认 `false` |
 | `tools` | array | ❌ | Function Calling 定义 |
@@ -253,7 +266,63 @@ data: [DONE]
 }
 ```

-**流式**：先缓冲正文片段。识别到工具调用 → 仅输出结构化 `delta.tool_calls`（每个 tool call 带 `index`）；否则一次性输出普通文本。
+**流式**：命中高置信特征后立即输出 `delta.tool_calls`（不等待完整 JSON 闭合），并持续发送 arguments 增量；已确认的 toolcall 原始 JSON 不会回流到 `delta.content`。
+
+---
+
+### `GET /v1/models/{id}`
+
+无需鉴权。入参支持 alias（例如 `gpt-4o`），返回的是映射后的 DeepSeek 模型对象。
+
+### `POST /v1/responses`
+
+OpenAI Responses 风格接口，兼容 `input` 或 `messages`。
+
+| 字段 | 类型 | 必填 | 说明 |
+| --- | --- | --- | --- |
+| `model` | string | ✅ | 支持原生模型 + alias 自动映射 |
+| `input` | string/array/object | ❌ | 与 `messages` 二选一 |
+| `messages` | array | ❌ | 与 `input` 二选一 |
+| `instructions` | string | ❌ | 自动前置为 system 消息 |
+| `stream` | boolean | ❌ | 默认 `false` |
+| `tools` | array | ❌ | 与 chat 同样的工具识别与转译策略 |
+
+**非流式响应**：返回标准 `response` 对象，`id` 形如 `resp_xxx`，并写入内存 TTL 存储。
+
+**流式响应（SSE）**：最小事件序列如下。
+
+```text
+event: response.created
+data: {"type":"response.created","id":"resp_xxx","status":"in_progress",...}
+
+event: response.output_text.delta
+data: {"type":"response.output_text.delta","id":"resp_xxx","delta":"..."}
+
+event: response.output_tool_call.delta
+data: {"type":"response.output_tool_call.delta","id":"resp_xxx","tool_calls":[...]}
+
+event: response.completed
+data: {"type":"response.completed","response":{...}}
+
+data: [DONE]
+```
+
+### `GET /v1/responses/{response_id}`
+
+需要业务鉴权。查询 `POST /v1/responses` 生成并缓存的 response 对象。
+
+> 当前为内存 TTL 存储，默认过期时间 `900s`（可用 `responses.store_ttl_seconds` 调整）。
+
+### `POST /v1/embeddings`
+
+需要业务鉴权。返回 OpenAI Embeddings 兼容结构。
+
+| 字段 | 类型 | 必填 | 说明 |
+| --- | --- | --- | --- |
+| `model` | string | ✅ | 支持原生模型 + alias 自动映射 |
+| `input` | string/array | ✅ | 支持字符串、字符串数组、token 数组 |
+
+> 需配置 `embeddings.provider`。当前支持：`mock` / `deterministic` / `builtin`。未配置或不支持时返回标准错误结构（HTTP 501）。

 ---

@@ -272,7 +341,10 @@ data: [DONE]
    {"id": "claude-sonnet-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
    {"id": "claude-haiku-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
    {"id": "claude-opus-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"}
-  ]
+  ],
+  "first_id": "claude-opus-4-6",
+  "last_id": "claude-instant-1.0",
+  "has_more": false
 }
 ```

@@ -288,13 +360,15 @@ Content-Type: application/json
 anthropic-version: 2023-06-01
 ```

+> `anthropic-version` 可省略，服务端会自动补为 `2023-06-01`。
+
 **请求体**：

 | 字段 | 类型 | 必填 | 说明 |
 | --- | --- | --- | --- |
 | `model` | string | ✅ | 例如 `claude-sonnet-4-5` / `claude-opus-4-6` / `claude-haiku-4-5`（兼容 `claude-3-5-haiku-latest`），并支持历史 Claude 模型 ID |
 | `messages` | array | ✅ | Claude 风格消息数组 |
-| `max_tokens` | number | ❌ | 当前实现不会硬性截断上游输出 |
+| `max_tokens` | number | ❌ | 缺省自动补 `8192`；当前实现不会硬性截断上游输出 |
 | `stream` | boolean | ❌ | 默认 `false` |
 | `system` | string | ❌ | 可选系统提示 |
 | `tools` | array | ❌ | Claude tool 定义 |
@@ -684,13 +758,20 @@ data: {"type":"message_stop"}

 ## 错误响应格式

-不同模块的错误格式略有差异：
+兼容路由（`/v1/*`、`/anthropic/*`）统一使用以下结构：

-| 模块 | 格式 |
-| --- | --- |
-| OpenAI 接口 | `{"error": {"message": "...", "type": "..."}}` |
-| Claude 接口 | `{"error": {"type": "...", "message": "..."}}` |
-| Admin 接口 | `{"detail": "..."}` |
+```json
+{
+  "error": {
+    "message": "...",
+    "type": "invalid_request_error",
+    "code": "invalid_request",
+    "param": null
+  }
+}
+```
+
+Admin 接口保持 `{"detail":"..."}`。

 建议客户端处理逻辑：检查 HTTP 状态码 + 解析 `error` 或 `detail` 字段。

@@ -732,6 +813,31 @@ curl http://localhost:5001/v1/chat/completions \
  }'
 ```

+### OpenAI Responses（流式）
+
+```bash
+curl http://localhost:5001/v1/responses \
+  -H "Authorization: Bearer your-api-key" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-5-codex",
+    "input": "写一个 golang 的 hello world",
+    "stream": true
+  }'
+```
+
+### OpenAI Embeddings
+
+```bash
+curl http://localhost:5001/v1/embeddings \
+  -H "Authorization: Bearer your-api-key" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-4o",
+    "input": ["第一段文本", "第二段文本"]
+  }'
+```
+
 ### OpenAI 带搜索

 ```bash
--- a/README.MD
+++ b/README.MD
@@ -54,16 +54,27 @@ flowchart LR

 | 能力 | 说明 |
 | --- | --- |
-| OpenAI 兼容 | `GET /v1/models`、`POST /v1/chat/completions`（流式/非流式） |
+| OpenAI 兼容 | `GET /v1/models`、`GET /v1/models/{id}`、`POST /v1/chat/completions`、`POST /v1/responses`、`GET /v1/responses/{response_id}`、`POST /v1/embeddings` |
 | Claude 兼容 | `GET /anthropic/v1/models`、`POST /anthropic/v1/messages`、`POST /anthropic/v1/messages/count_tokens` |
 | 多账号轮询 | 自动 token 刷新、邮箱/手机号双登录方式 |
 | 并发队列控制 | 每账号 in-flight 上限 + 等待队列，动态计算建议并发值 |
 | DeepSeek PoW | WASM 计算（`wazero`），无需外部 Node.js 依赖 |
-| Tool Calling | 防泄漏处理：自动缓冲、识别、结构化输出 |
+| Tool Calling | 防泄漏处理：非代码块高置信特征识别、`delta.tool_calls` 早发、结构化增量输出 |
 | Admin API | 配置管理、账号测试 / 批量测试、导入导出、Vercel 同步 |
 | WebUI 管理台 | `/admin` 单页应用（中英文双语、深色模式） |
 | 运维探针 | `GET /healthz`（存活）、`GET /readyz`（就绪） |

+## 平台兼容矩阵
+
+| 级别 | 平台 | 当前状态 |
+| --- | --- | --- |
+| P0 | Codex CLI/SDK（`wire_api=chat` / `wire_api=responses`） | ✅ |
+| P0 | OpenAI SDK（JS/Python，chat + responses） | ✅ |
+| P0 | Vercel AI SDK（openai-compatible） | ✅ |
+| P0 | Anthropic SDK（messages） | ✅ |
+| P1 | LangChain / LlamaIndex / OpenWebUI（OpenAI 兼容接入） | ✅ |
+| P2 | MCP 独立桥接层 | 规划中 |
+
 ## 模型支持

 ### OpenAI 接口
@@ -196,6 +207,7 @@ cp opencode.json.example opencode.json
 3. 在项目目录启动 OpenCode CLI（按你的安装方式运行 `opencode`）。

 > 建议优先使用 OpenAI 兼容路径（`/v1/*`），即示例里的 `@ai-sdk/openai-compatible` provider。
+> 若客户端支持 `wire_api`，可分别测试 `responses` 与 `chat`，DS2API 两条链路都兼容。

 ## 配置说明

@@ -216,6 +228,24 @@ cp opencode.json.example opencode.json
      "token": ""
    }
  ],
+  "model_aliases": {
+    "gpt-4o": "deepseek-chat",
+    "gpt-5-codex": "deepseek-reasoner",
+    "o3": "deepseek-reasoner"
+  },
+  "compat": {
+    "wide_input_strict_output": true
+  },
+  "toolcall": {
+    "mode": "feature_match",
+    "early_emit_confidence": "high"
+  },
+  "responses": {
+    "store_ttl_seconds": 900
+  },
+  "embeddings": {
+    "provider": "deterministic"
+  },
  "claude_model_mapping": {
    "fast": "deepseek-chat",
    "slow": "deepseek-reasoner"
@@ -226,6 +256,11 @@ cp opencode.json.example opencode.json
 - `keys`：API 访问密钥列表，客户端通过 `Authorization: Bearer <key>` 鉴权
 - `accounts`：DeepSeek 账号列表，支持 `email` 或 `mobile` 登录
 - `token`：留空则首次请求时自动登录获取；也可预填已有 token
+- `model_aliases`：常见模型名（如 GPT/Codex/Claude）到 DeepSeek 模型的映射
+- `compat.wide_input_strict_output`：建议保持 `true`（当前实现默认宽进严出）
+- `toolcall`：固定采用特征匹配 + 高置信早发策略
+- `responses.store_ttl_seconds`：`/v1/responses/{id}` 的内存缓存 TTL
+- `embeddings.provider`：embedding 提供方（当前内置 `deterministic/mock/builtin`）
 - `claude_model_mapping`：字典中 `fast`/`slow` 后缀映射到对应 DeepSeek 模型

 ### 环境变量
@@ -281,10 +316,10 @@ cp opencode.json.example opencode.json

 当请求中带 `tools` 时，DS2API 会做防泄漏处理：

-1. `stream=true` 时先**缓冲**正文片段
-2. 若识别到工具调用 → 仅输出结构化 `tool_calls`，不透传原始 JSON 文本
-3. 若最终不是工具调用 → 一次性输出普通文本
-4. 解析器支持混合文本、fenced JSON、`function.arguments` 字符串等格式
+1. 只在**非代码块上下文**启用 toolcall 特征识别（代码块示例不会触发）
+2. 一旦命中高置信特征（`tool_calls` + `name` + `arguments/input` 起始）就立即输出 `delta.tool_calls`
+3. 已确认的 toolcall JSON 片段不会泄漏到 `delta.content`
+4. 前文/后文自然语言保持顺序透传，支持混合文本与增量参数输出

 ## 项目结构

--- a/README.en.md
+++ b/README.en.md
@@ -54,16 +54,27 @@ flowchart LR

 | Capability | Details |
 | --- | --- |
-| OpenAI compatible | `GET /v1/models`, `POST /v1/chat/completions` (stream/non-stream) |
+| OpenAI compatible | `GET /v1/models`, `GET /v1/models/{id}`, `POST /v1/chat/completions`, `POST /v1/responses`, `GET /v1/responses/{response_id}`, `POST /v1/embeddings` |
 | Claude compatible | `GET /anthropic/v1/models`, `POST /anthropic/v1/messages`, `POST /anthropic/v1/messages/count_tokens` |
 | Multi-account rotation | Auto token refresh, email/mobile dual login |
 | Concurrency control | Per-account in-flight limit + waiting queue, dynamic recommended concurrency |
 | DeepSeek PoW | WASM solving via `wazero`, no external Node.js dependency |
-| Tool Calling | Anti-leak handling: auto buffer, detect, structured output |
+| Tool Calling | Anti-leak handling: non-code-block feature match, early `delta.tool_calls`, structured incremental output |
 | Admin API | Config management, account testing/batch test, import/export, Vercel sync |
 | WebUI Admin Panel | SPA at `/admin` (bilingual Chinese/English, dark mode) |
 | Health Probes | `GET /healthz` (liveness), `GET /readyz` (readiness) |

+## Platform Compatibility Matrix
+
+| Tier | Platform | Status |
+| --- | --- | --- |
+| P0 | Codex CLI/SDK (`wire_api=chat` / `wire_api=responses`) | ✅ |
+| P0 | OpenAI SDK (JS/Python, chat + responses) | ✅ |
+| P0 | Vercel AI SDK (openai-compatible) | ✅ |
+| P0 | Anthropic SDK (messages) | ✅ |
+| P1 | LangChain / LlamaIndex / OpenWebUI (OpenAI-compatible integration) | ✅ |
+| P2 | MCP standalone bridge | Planned |
+
 ## Model Support

 ### OpenAI Endpoint
@@ -196,6 +207,7 @@ cp opencode.json.example opencode.json
 3. Start OpenCode CLI in the project directory (run `opencode` using your installed method).

 > Recommended: use the OpenAI-compatible path (`/v1/*`) via `@ai-sdk/openai-compatible` as shown in the example.
+> If your client supports `wire_api`, test both `responses` and `chat`; DS2API supports both paths.

 ## Configuration

@@ -216,6 +228,24 @@ cp opencode.json.example opencode.json
      "token": ""
    }
  ],
+  "model_aliases": {
+    "gpt-4o": "deepseek-chat",
+    "gpt-5-codex": "deepseek-reasoner",
+    "o3": "deepseek-reasoner"
+  },
+  "compat": {
+    "wide_input_strict_output": true
+  },
+  "toolcall": {
+    "mode": "feature_match",
+    "early_emit_confidence": "high"
+  },
+  "responses": {
+    "store_ttl_seconds": 900
+  },
+  "embeddings": {
+    "provider": "deterministic"
+  },
  "claude_model_mapping": {
    "fast": "deepseek-chat",
    "slow": "deepseek-reasoner"
@@ -226,6 +256,11 @@ cp opencode.json.example opencode.json
 - `keys`: API access keys; clients authenticate via `Authorization: Bearer <key>`
 - `accounts`: DeepSeek account list, supports `email` or `mobile` login
 - `token`: Leave empty for auto-login on first request; or pre-fill an existing token
+- `model_aliases`: Map common model names (GPT/Codex/Claude) to DeepSeek models
+- `compat.wide_input_strict_output`: Keep `true` (current default policy)
+- `toolcall`: Fixed to feature matching + high-confidence early emit
+- `responses.store_ttl_seconds`: In-memory TTL for `/v1/responses/{id}`
+- `embeddings.provider`: Embeddings provider (`deterministic/mock/builtin` built-in)
 - `claude_model_mapping`: Maps `fast`/`slow` suffixes to corresponding DeepSeek models

 ### Environment Variables
@@ -281,10 +316,10 @@ Queue limit = DS2API_ACCOUNT_MAX_QUEUE (default = recommended concurrency)

 When `tools` is present in the request, DS2API performs anti-leak handling:

-1. With `stream=true`, DS2API **buffers** text deltas first
-2. If a tool call is detected → only structured `tool_calls` are emitted, raw JSON is not leaked
-3. If no tool call → buffered text is emitted at once
-4. Parser supports mixed text, fenced JSON, and `function.arguments` payloads
+1. Toolcall feature matching is enabled only in **non-code-block context** (fenced examples are ignored)
+2. Once high-confidence features are matched (`tool_calls` + `name` + `arguments/input` start), `delta.tool_calls` is emitted immediately
+3. Confirmed toolcall JSON fragments are never leaked into `delta.content`
+4. Natural language before/after toolcalls keeps original order, with incremental argument output supported

 ## Project Structure

--- a/api/chat-stream.js
+++ b/api/chat-stream.js
@@ -7,7 +7,7 @@ const {
  createToolSieveState,
  processToolSieveChunk,
  flushToolSieve,
-  parseStandaloneToolCalls,
+  parseToolCalls,
  formatOpenAIStreamToolCalls,
 } = require('./helpers/stream-tool-sieve');

@@ -199,7 +199,7 @@ module.exports = async function handler(req, res) {
        await releaseLease();
        return;
      }
-      const detected = parseStandaloneToolCalls(outputText, toolNames);
+      const detected = parseToolCalls(outputText, toolNames);
      if (detected.length > 0 && !toolCallsEmitted) {
        toolCallsEmitted = true;
        sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(detected) });
--- a/api/helpers/stream-tool-sieve.js
+++ b/api/helpers/stream-tool-sieve.js
@@ -28,7 +28,6 @@ function createToolSieveState() {
    pending: '',
    capture: '',
    capturing: false,
-    hasMeaningfulText: false,
    recentTextTail: '',
    toolNameSent: false,
    toolName: '',
@@ -192,12 +191,21 @@ function findToolSegmentStart(s) {
    return -1;
  }
  const lower = s.toLowerCase();
-  const keyIdx = lower.indexOf('tool_calls');
-  if (keyIdx < 0) {
-    return -1;
+  let offset = 0;
+  // eslint-disable-next-line no-constant-condition
+  while (true) {
+    const keyRel = lower.indexOf('tool_calls', offset);
+    if (keyRel < 0) {
+      return -1;
+    }
+    const keyIdx = keyRel;
+    const start = s.slice(0, keyIdx).lastIndexOf('{');
+    const candidateStart = start >= 0 ? start : keyIdx;
+    if (!insideCodeFence(s.slice(0, candidateStart))) {
+      return candidateStart;
+    }
+    offset = keyIdx + 'tool_calls'.length;
  }
-  const start = s.slice(0, keyIdx).lastIndexOf('{');
-  return start >= 0 ? start : keyIdx;
 }

 function consumeToolCapture(state, toolNames) {
@@ -220,7 +228,7 @@ function consumeToolCapture(state, toolNames) {
  }
  const prefixPart = captured.slice(0, start);
  const suffixPart = captured.slice(obj.end);
-  if (!state.toolNameSent && (hasMeaningfulText(prefixPart) || looksLikeToolExampleContext(state.recentTextTail) || looksLikeToolExampleContext(suffixPart))) {
+  if (insideCodeFence((state.recentTextTail || '') + prefixPart)) {
    return {
      ready: true,
      prefix: captured,
@@ -283,7 +291,10 @@ function buildIncrementalToolDeltas(state) {
    return [];
  }
  const start = captured.slice(0, keyIdx).lastIndexOf('{');
-  if (start < 0 || hasMeaningfulText(captured.slice(0, start))) {
+  if (start < 0) {
+    return [];
+  }
+  if (insideCodeFence((state.recentTextTail || '') + captured.slice(0, start))) {
    return [];
  }
  const callStart = findFirstToolCallObjectStart(captured, keyIdx);
@@ -621,7 +632,11 @@ function parseToolCalls(text, toolNames) {
  if (!toStringSafe(text)) {
    return [];
  }
-  const candidates = buildToolCallCandidates(text);
+  const sanitized = stripFencedCodeBlocks(text);
+  if (!toStringSafe(sanitized)) {
+    return [];
+  }
+  const candidates = buildToolCallCandidates(sanitized);
  let parsed = [];
  for (const c of candidates) {
    parsed = parseToolCallsPayload(c);
@@ -635,11 +650,22 @@ function parseToolCalls(text, toolNames) {
  return filterToolCalls(parsed, toolNames);
 }

+function stripFencedCodeBlocks(text) {
+  const t = typeof text === 'string' ? text : '';
+  if (!t) {
+    return '';
+  }
+  return t.replace(/```[\s\S]*?```/g, ' ');
+}
+
 function parseStandaloneToolCalls(text, toolNames) {
  const trimmed = toStringSafe(text);
  if (!trimmed) {
    return [];
  }
+  if ((trimmed.startsWith('```') && trimmed.endsWith('```')) || trimmed.includes('```')) {
+    return [];
+  }
  if (looksLikeToolExampleContext(trimmed)) {
    return [];
  }
@@ -852,7 +878,6 @@ function noteText(state, text) {
  if (!state || !hasMeaningfulText(text)) {
    return;
  }
-  state.hasMeaningfulText = true;
  state.recentTextTail = appendTail(state.recentTextTail, text, TOOL_SIEVE_CONTEXT_TAIL_LIMIT);
 }

@@ -870,22 +895,16 @@ function appendTail(prev, next, max) {
 }

 function looksLikeToolExampleContext(text) {
-  const t = toStringSafe(text).toLowerCase();
+  return insideCodeFence(text);
+}
+
+function insideCodeFence(text) {
+  const t = typeof text === 'string' ? text : '';
  if (!t) {
    return false;
  }
-  const cues = [
-    '示例',
-    '例子',
-    'for example',
-    'example',
-    'demo',
-    '请勿执行',
-    '不要执行',
-    'do not execute',
-    '```',
-  ];
-  return cues.some((cue) => t.includes(cue));
+  const ticks = (t.match(/```/g) || []).length;
+  return ticks % 2 === 1;
 }

 function hasMeaningfulText(text) {
--- a/api/helpers/stream-tool-sieve.test.js
+++ b/api/helpers/stream-tool-sieve.test.js
@@ -69,9 +69,7 @@ test('parseToolCalls supports fenced json and function.arguments string payload'
    '```',
  ].join('\n');
  const calls = parseToolCalls(text, ['read_file']);
-  assert.equal(calls.length, 1);
-  assert.equal(calls[0].name, 'read_file');
-  assert.deepEqual(calls[0].input, { path: 'README.md' });
+  assert.equal(calls.length, 0);
 });

 test('parseStandaloneToolCalls only matches standalone payload and ignores mixed prose', () => {
--- a/config.example.json
+++ b/config.example.json
@@ -24,5 +24,27 @@
      "password": "your-password-3",
      "token": ""
    }
-  ]
-}
+  ],
+  "model_aliases": {
+    "gpt-4o": "deepseek-chat",
+    "gpt-5-codex": "deepseek-reasoner",
+    "o3": "deepseek-reasoner"
+  },
+  "compat": {
+    "wide_input_strict_output": true
+  },
+  "toolcall": {
+    "mode": "feature_match",
+    "early_emit_confidence": "high"
+  },
+  "responses": {
+    "store_ttl_seconds": 900
+  },
+  "embeddings": {
+    "provider": "deterministic"
+  },
+  "claude_model_mapping": {
+    "fast": "deepseek-chat",
+    "slow": "deepseek-reasoner"
+  }
+}
--- a/internal/adapter/claude/error_shape_test.go
+++ b/internal/adapter/claude/error_shape_test.go
@@ -0,0 +1,35 @@
+package claude
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+)
+
+func TestWriteClaudeErrorIncludesUnifiedFields(t *testing.T) {
+	rec := httptest.NewRecorder()
+	writeClaudeError(rec, http.StatusUnauthorized, "bad token")
+	if rec.Code != http.StatusUnauthorized {
+		t.Fatalf("expected 401, got %d", rec.Code)
+	}
+
+	var body map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil {
+		t.Fatalf("decode body: %v", err)
+	}
+	errObj, _ := body["error"].(map[string]any)
+	if errObj["message"] != "bad token" {
+		t.Fatalf("unexpected message: %v", errObj["message"])
+	}
+	if errObj["type"] != "invalid_request_error" {
+		t.Fatalf("unexpected type: %v", errObj["type"])
+	}
+	if errObj["code"] != "authentication_failed" {
+		t.Fatalf("unexpected code: %v", errObj["code"])
+	}
+	if _, ok := errObj["param"]; !ok {
+		t.Fatal("expected param field")
+	}
+}
+
--- a/internal/adapter/claude/handler.go
+++ b/internal/adapter/claude/handler.go
@@ -43,6 +43,9 @@ func (h *Handler) ListModels(w http.ResponseWriter, _ *http.Request) {
 }

 func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) {
+	if strings.TrimSpace(r.Header.Get("anthropic-version")) == "" {
+		r.Header.Set("anthropic-version", "2023-06-01")
+	}
 	a, err := h.Auth.Determine(r)
 	if err != nil {
 		status := http.StatusUnauthorized
@@ -50,22 +53,25 @@ func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) {
 		if err == auth.ErrNoAccount {
 			status = http.StatusTooManyRequests
 		}
-		writeJSON(w, status, map[string]any{"error": map[string]any{"type": "invalid_request_error", "message": detail}})
+		writeClaudeError(w, status, detail)
 		return
 	}
 	defer h.Auth.Release(a)

 	var req map[string]any
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-		writeJSON(w, http.StatusBadRequest, map[string]any{"error": map[string]any{"type": "invalid_request_error", "message": "invalid json"}})
+		writeClaudeError(w, http.StatusBadRequest, "invalid json")
 		return
 	}
 	model, _ := req["model"].(string)
 	messagesRaw, _ := req["messages"].([]any)
 	if model == "" || len(messagesRaw) == 0 {
-		writeJSON(w, http.StatusBadRequest, map[string]any{"error": map[string]any{"type": "invalid_request_error", "message": "Request must include 'model' and 'messages'."}})
+		writeClaudeError(w, http.StatusBadRequest, "Request must include 'model' and 'messages'.")
 		return
 	}
+	if _, ok := req["max_tokens"]; !ok {
+		req["max_tokens"] = 8192
+	}

 	normalized := normalizeClaudeMessages(messagesRaw)
 	payload := cloneMap(req)
@@ -86,12 +92,12 @@ func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) {

 	sessionID, err := h.DS.CreateSession(r.Context(), a, 3)
 	if err != nil {
-		writeJSON(w, http.StatusUnauthorized, map[string]any{"error": map[string]any{"type": "api_error", "message": "invalid token."}})
+		writeClaudeError(w, http.StatusUnauthorized, "invalid token.")
 		return
 	}
 	pow, err := h.DS.GetPow(r.Context(), a, 3)
 	if err != nil {
-		writeJSON(w, http.StatusUnauthorized, map[string]any{"error": map[string]any{"type": "api_error", "message": "Failed to get PoW"}})
+		writeClaudeError(w, http.StatusUnauthorized, "Failed to get PoW")
 		return
 	}
 	requestPayload := map[string]any{
@@ -104,13 +110,13 @@ func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) {
 	}
 	resp, err := h.DS.CallCompletion(r.Context(), a, requestPayload, pow, 3)
 	if err != nil {
-		writeJSON(w, http.StatusInternalServerError, map[string]any{"error": map[string]any{"type": "api_error", "message": "Failed to get Claude response."}})
+		writeClaudeError(w, http.StatusInternalServerError, "Failed to get Claude response.")
 		return
 	}
 	if resp.StatusCode != http.StatusOK {
 		defer resp.Body.Close()
 		body, _ := io.ReadAll(resp.Body)
-		writeJSON(w, http.StatusInternalServerError, map[string]any{"error": map[string]any{"type": "api_error", "message": string(body)}})
+		writeClaudeError(w, http.StatusInternalServerError, string(body))
 		return
 	}

@@ -162,20 +168,20 @@ func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) {
 func (h *Handler) CountTokens(w http.ResponseWriter, r *http.Request) {
 	a, err := h.Auth.Determine(r)
 	if err != nil {
-		writeJSON(w, http.StatusUnauthorized, map[string]any{"error": err.Error()})
+		writeClaudeError(w, http.StatusUnauthorized, err.Error())
 		return
 	}
 	defer h.Auth.Release(a)

 	var req map[string]any
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-		writeJSON(w, http.StatusBadRequest, map[string]any{"error": "invalid json"})
+		writeClaudeError(w, http.StatusBadRequest, "invalid json")
 		return
 	}
 	model, _ := req["model"].(string)
 	messages, _ := req["messages"].([]any)
 	if model == "" || len(messages) == 0 {
-		writeJSON(w, http.StatusBadRequest, map[string]any{"error": "Request must include 'model' and 'messages'."})
+		writeClaudeError(w, http.StatusBadRequest, "Request must include 'model' and 'messages'.")
 		return
 	}
 	inputTokens := 0
@@ -206,7 +212,7 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ
 	defer resp.Body.Close()
 	if resp.StatusCode != http.StatusOK {
 		body, _ := io.ReadAll(resp.Body)
-		writeJSON(w, http.StatusInternalServerError, map[string]any{"error": map[string]any{"type": "api_error", "message": string(body)}})
+		writeClaudeError(w, http.StatusInternalServerError, string(body))
 		return
 	}

@@ -241,6 +247,8 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ
 			"error": map[string]any{
 				"type":    "api_error",
 				"message": msg,
+				"code":    "internal_error",
+				"param":   nil,
 			},
 		})
 	}
@@ -492,6 +500,28 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ
 	}
 }

+func writeClaudeError(w http.ResponseWriter, status int, message string) {
+	code := "invalid_request"
+	switch status {
+	case http.StatusUnauthorized:
+		code = "authentication_failed"
+	case http.StatusTooManyRequests:
+		code = "rate_limit_exceeded"
+	case http.StatusNotFound:
+		code = "not_found"
+	case http.StatusInternalServerError:
+		code = "internal_error"
+	}
+	writeJSON(w, status, map[string]any{
+		"error": map[string]any{
+			"type":    "invalid_request_error",
+			"message": message,
+			"code":    code,
+			"param":   nil,
+		},
+	})
+}
+
 func normalizeClaudeMessages(messages []any) []any {
 	out := make([]any, 0, len(messages))
 	for _, m := range messages {
--- a/internal/adapter/openai/embeddings_handler.go
+++ b/internal/adapter/openai/embeddings_handler.go
@@ -0,0 +1,138 @@
+package openai
+
+import (
+	"crypto/sha256"
+	"encoding/binary"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"strings"
+
+	"ds2api/internal/auth"
+	"ds2api/internal/config"
+	"ds2api/internal/util"
+)
+
+func (h *Handler) Embeddings(w http.ResponseWriter, r *http.Request) {
+	a, err := h.Auth.Determine(r)
+	if err != nil {
+		status := http.StatusUnauthorized
+		detail := err.Error()
+		if err == auth.ErrNoAccount {
+			status = http.StatusTooManyRequests
+		}
+		writeOpenAIError(w, status, detail)
+		return
+	}
+	defer h.Auth.Release(a)
+
+	var req map[string]any
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		writeOpenAIError(w, http.StatusBadRequest, "invalid json")
+		return
+	}
+	model, _ := req["model"].(string)
+	model = strings.TrimSpace(model)
+	if model == "" {
+		writeOpenAIError(w, http.StatusBadRequest, "Request must include 'model'.")
+		return
+	}
+	if _, ok := config.ResolveModel(h.Store, model); !ok {
+		writeOpenAIError(w, http.StatusBadRequest, fmt.Sprintf("Model '%s' is not available.", model))
+		return
+	}
+
+	inputs := extractEmbeddingInputs(req["input"])
+	if len(inputs) == 0 {
+		writeOpenAIError(w, http.StatusBadRequest, "Request must include non-empty 'input'.")
+		return
+	}
+
+	provider := ""
+	if h.Store != nil {
+		provider = strings.ToLower(strings.TrimSpace(h.Store.EmbeddingsProvider()))
+	}
+	if provider == "" {
+		writeOpenAIError(w, http.StatusNotImplemented, "Embeddings provider is not configured. Set embeddings.provider in config.")
+		return
+	}
+	switch provider {
+	case "mock", "deterministic", "builtin":
+		// supported local deterministic provider
+	default:
+		writeOpenAIError(w, http.StatusNotImplemented, fmt.Sprintf("Embeddings provider '%s' is not supported.", provider))
+		return
+	}
+
+	data := make([]map[string]any, 0, len(inputs))
+	totalTokens := 0
+	for i, input := range inputs {
+		totalTokens += util.EstimateTokens(input)
+		data = append(data, map[string]any{
+			"object":    "embedding",
+			"index":     i,
+			"embedding": deterministicEmbedding(input),
+		})
+	}
+	writeJSON(w, http.StatusOK, map[string]any{
+		"object": "list",
+		"data":   data,
+		"model":  model,
+		"usage": map[string]any{
+			"prompt_tokens": totalTokens,
+			"total_tokens":  totalTokens,
+		},
+	})
+}
+
+func extractEmbeddingInputs(raw any) []string {
+	switch v := raw.(type) {
+	case string:
+		s := strings.TrimSpace(v)
+		if s == "" {
+			return nil
+		}
+		return []string{s}
+	case []any:
+		out := make([]string, 0, len(v))
+		for _, item := range v {
+			switch iv := item.(type) {
+			case string:
+				s := strings.TrimSpace(iv)
+				if s != "" {
+					out = append(out, s)
+				}
+			case []any:
+				// Token array input support: convert to stable string form.
+				out = append(out, fmt.Sprintf("%v", iv))
+			default:
+				s := strings.TrimSpace(fmt.Sprintf("%v", iv))
+				if s != "" {
+					out = append(out, s)
+				}
+			}
+		}
+		return out
+	default:
+		return nil
+	}
+}
+
+func deterministicEmbedding(input string) []float64 {
+	// Keep response shape stable without external dependencies.
+	const dims = 64
+	out := make([]float64, dims)
+	seed := sha256.Sum256([]byte(input))
+	buf := seed[:]
+	for i := 0; i < dims; i++ {
+		if len(buf) < 4 {
+			next := sha256.Sum256(buf)
+			buf = next[:]
+		}
+		v := binary.BigEndian.Uint32(buf[:4])
+		buf = buf[4:]
+		// map [0, 2^32) -> [-1, 1]
+		out[i] = (float64(v)/2147483647.5 - 1.0)
+	}
+	return out
+}
--- a/internal/adapter/openai/error_shape_test.go
+++ b/internal/adapter/openai/error_shape_test.go
@@ -0,0 +1,35 @@
+package openai
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+)
+
+func TestWriteOpenAIErrorIncludesUnifiedFields(t *testing.T) {
+	rec := httptest.NewRecorder()
+	writeOpenAIError(rec, http.StatusBadRequest, "invalid input")
+	if rec.Code != http.StatusBadRequest {
+		t.Fatalf("expected 400, got %d", rec.Code)
+	}
+
+	var body map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil {
+		t.Fatalf("decode body: %v", err)
+	}
+	errObj, _ := body["error"].(map[string]any)
+	if errObj["message"] != "invalid input" {
+		t.Fatalf("unexpected message: %v", errObj["message"])
+	}
+	if errObj["type"] != "invalid_request_error" {
+		t.Fatalf("unexpected type: %v", errObj["type"])
+	}
+	if errObj["code"] != "invalid_request" {
+		t.Fatalf("unexpected code: %v", errObj["code"])
+	}
+	if _, ok := errObj["param"]; !ok {
+		t.Fatal("expected param field")
+	}
+}
+
--- a/internal/adapter/openai/handler.go
+++ b/internal/adapter/openai/handler.go
@@ -31,6 +31,8 @@ type Handler struct {

 	leaseMu      sync.Mutex
 	streamLeases map[string]streamLease
+	responsesMu  sync.Mutex
+	responses    *responseStore
 }

 type streamLease struct {
@@ -40,13 +42,27 @@ type streamLease struct {

 func RegisterRoutes(r chi.Router, h *Handler) {
 	r.Get("/v1/models", h.ListModels)
+	r.Get("/v1/models/{model_id}", h.GetModel)
 	r.Post("/v1/chat/completions", h.ChatCompletions)
+	r.Post("/v1/responses", h.Responses)
+	r.Get("/v1/responses/{response_id}", h.GetResponseByID)
+	r.Post("/v1/embeddings", h.Embeddings)
 }

 func (h *Handler) ListModels(w http.ResponseWriter, _ *http.Request) {
 	writeJSON(w, http.StatusOK, config.OpenAIModelsResponse())
 }

+func (h *Handler) GetModel(w http.ResponseWriter, r *http.Request) {
+	modelID := strings.TrimSpace(chi.URLParam(r, "model_id"))
+	model, ok := config.OpenAIModelByID(h.Store, modelID)
+	if !ok {
+		writeOpenAIError(w, http.StatusNotFound, "Model not found.")
+		return
+	}
+	writeJSON(w, http.StatusOK, model)
+}
+
 func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
 	if isVercelStreamReleaseRequest(r) {
 		h.handleVercelStreamRelease(w, r)
@@ -81,11 +97,16 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
 		writeOpenAIError(w, http.StatusBadRequest, "Request must include 'model' and 'messages'.")
 		return
 	}
-	thinkingEnabled, searchEnabled, ok := config.GetModelConfig(model)
+	resolvedModel, ok := config.ResolveModel(h.Store, model)
 	if !ok {
-		writeOpenAIError(w, http.StatusServiceUnavailable, fmt.Sprintf("Model '%s' is not available.", model))
+		writeOpenAIError(w, http.StatusBadRequest, fmt.Sprintf("Model '%s' is not available.", model))
 		return
 	}
+	thinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
+	responseModel := strings.TrimSpace(model)
+	if responseModel == "" {
+		responseModel = resolvedModel
+	}

 	finalPrompt, toolNames := buildOpenAIFinalPrompt(messagesRaw, req["tools"])

@@ -111,16 +132,17 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
 		"thinking_enabled":  thinkingEnabled,
 		"search_enabled":    searchEnabled,
 	}
+	applyOpenAIChatPassThrough(req, payload)
 	resp, err := h.DS.CallCompletion(r.Context(), a, payload, pow, 3)
 	if err != nil {
 		writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.")
 		return
 	}
 	if util.ToBool(req["stream"]) {
-		h.handleStream(w, r, resp, sessionID, model, finalPrompt, thinkingEnabled, searchEnabled, toolNames)
+		h.handleStream(w, r, resp, sessionID, responseModel, finalPrompt, thinkingEnabled, searchEnabled, toolNames)
 		return
 	}
-	h.handleNonStream(w, r.Context(), resp, sessionID, model, finalPrompt, thinkingEnabled, toolNames)
+	h.handleNonStream(w, r.Context(), resp, sessionID, responseModel, finalPrompt, thinkingEnabled, toolNames)
 }

 func (h *Handler) handleNonStream(w http.ResponseWriter, ctx context.Context, resp *http.Response, completionID, model, finalPrompt string, thinkingEnabled bool, toolNames []string) {
@@ -135,7 +157,7 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, ctx context.Context, re

 	finalThinking := result.Thinking
 	finalText := result.Text
-	detected := util.ParseStandaloneToolCalls(finalText, toolNames)
+	detected := util.ParseToolCalls(finalText, toolNames)
 	finishReason := "stop"
 	messageObj := map[string]any{"role": "assistant", "content": finalText}
 	if thinkingEnabled && finalThinking != "" {
@@ -222,7 +244,7 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
 	finalize := func(finishReason string) {
 		finalThinking := thinking.String()
 		finalText := text.String()
-		detected := util.ParseStandaloneToolCalls(finalText, toolNames)
+		detected := util.ParseToolCalls(finalText, toolNames)
 		if len(detected) > 0 && !toolCallsEmitted {
 			finishReason = "tool_calls"
 			delta := map[string]any{
@@ -497,6 +519,8 @@ func writeOpenAIError(w http.ResponseWriter, status int, message string) {
 		"error": map[string]any{
 			"message": message,
 			"type":    openAIErrorType(status),
+			"code":    openAIErrorCode(status),
+			"param":   nil,
 		},
 	})
 }
@@ -520,3 +544,41 @@ func openAIErrorType(status int) string {
 		return "invalid_request_error"
 	}
 }
+
+func openAIErrorCode(status int) string {
+	switch status {
+	case http.StatusBadRequest:
+		return "invalid_request"
+	case http.StatusUnauthorized:
+		return "authentication_failed"
+	case http.StatusForbidden:
+		return "forbidden"
+	case http.StatusTooManyRequests:
+		return "rate_limit_exceeded"
+	case http.StatusNotFound:
+		return "not_found"
+	case http.StatusServiceUnavailable:
+		return "service_unavailable"
+	default:
+		if status >= 500 {
+			return "internal_error"
+		}
+		return "invalid_request"
+	}
+}
+
+func applyOpenAIChatPassThrough(req map[string]any, payload map[string]any) {
+	for _, k := range []string{
+		"temperature",
+		"top_p",
+		"max_tokens",
+		"max_completion_tokens",
+		"presence_penalty",
+		"frequency_penalty",
+		"stop",
+	} {
+		if v, ok := req[k]; ok {
+			payload[k] = v
+		}
+	}
+}
--- a/internal/adapter/openai/handler_toolcall_test.go
+++ b/internal/adapter/openai/handler_toolcall_test.go
@@ -210,7 +210,7 @@ func TestHandleNonStreamUnknownToolStillIntercepted(t *testing.T) {
 	}
 }

-func TestHandleNonStreamEmbeddedToolCallExampleNotIntercepted(t *testing.T) {
+func TestHandleNonStreamEmbeddedToolCallExampleIntercepted(t *testing.T) {
 	h := &Handler{}
 	resp := makeSSEHTTPResponse(
 		`data: {"p":"response/content","v":"下面是示例："}`,
@@ -228,16 +228,16 @@ func TestHandleNonStreamEmbeddedToolCallExampleNotIntercepted(t *testing.T) {
 	out := decodeJSONBody(t, rec.Body.String())
 	choices, _ := out["choices"].([]any)
 	choice, _ := choices[0].(map[string]any)
-	if choice["finish_reason"] != "stop" {
-		t.Fatalf("expected finish_reason=stop, got %#v", choice["finish_reason"])
+	if choice["finish_reason"] != "tool_calls" {
+		t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"])
 	}
 	msg, _ := choice["message"].(map[string]any)
-	if _, ok := msg["tool_calls"]; ok {
-		t.Fatalf("did not expect tool_calls field for embedded example: %#v", msg["tool_calls"])
+	toolCalls, _ := msg["tool_calls"].([]any)
+	if len(toolCalls) == 0 {
+		t.Fatalf("expected tool_calls field for embedded example: %#v", msg["tool_calls"])
 	}
-	content, _ := msg["content"].(string)
-	if !strings.Contains(content, "示例") || !strings.Contains(content, `"tool_calls"`) {
-		t.Fatalf("expected embedded example to pass through as text, got %q", content)
+	if msg["content"] != nil {
+		t.Fatalf("expected content nil when tool_calls detected, got %#v", msg["content"])
 	}
 }

@@ -471,8 +471,8 @@ func TestHandleStreamToolCallMixedWithPlainTextSegments(t *testing.T) {
 	if !done {
 		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
 	}
-	if streamHasToolCallsDelta(frames) {
-		t.Fatalf("did not expect tool_calls delta in mixed prose stream, body=%s", rec.Body.String())
+	if !streamHasToolCallsDelta(frames) {
+		t.Fatalf("expected tool_calls delta in mixed prose stream, body=%s", rec.Body.String())
 	}
 	content := strings.Builder{}
 	for _, frame := range frames {
@@ -489,11 +489,11 @@ func TestHandleStreamToolCallMixedWithPlainTextSegments(t *testing.T) {
 	if !strings.Contains(got, "下面是示例：") || !strings.Contains(got, "请勿执行。") {
 		t.Fatalf("expected pre/post plain text to pass sieve, got=%q", got)
 	}
-	if !strings.Contains(got, `"tool_calls"`) {
-		t.Fatalf("expected mixed stream to preserve embedded tool_calls example text, got=%q", got)
+	if strings.Contains(strings.ToLower(got), `"tool_calls"`) {
+		t.Fatalf("expected no raw tool_calls json leak in content, got=%q", got)
 	}
-	if streamFinishReason(frames) != "stop" {
-		t.Fatalf("expected finish_reason=stop for mixed prose, body=%s", rec.Body.String())
+	if streamFinishReason(frames) != "tool_calls" {
+		t.Fatalf("expected finish_reason=tool_calls for mixed prose, body=%s", rec.Body.String())
 	}
 }

--- a/internal/adapter/openai/models_route_test.go
+++ b/internal/adapter/openai/models_route_test.go
@@ -0,0 +1,46 @@
+package openai
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/go-chi/chi/v5"
+)
+
+func TestGetModelRouteDirectAndAlias(t *testing.T) {
+	h := &Handler{}
+	r := chi.NewRouter()
+	RegisterRoutes(r, h)
+
+	t.Run("direct", func(t *testing.T) {
+		req := httptest.NewRequest(http.MethodGet, "/v1/models/deepseek-chat", nil)
+		rec := httptest.NewRecorder()
+		r.ServeHTTP(rec, req)
+		if rec.Code != http.StatusOK {
+			t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+		}
+	})
+
+	t.Run("alias", func(t *testing.T) {
+		req := httptest.NewRequest(http.MethodGet, "/v1/models/gpt-4.1", nil)
+		rec := httptest.NewRecorder()
+		r.ServeHTTP(rec, req)
+		if rec.Code != http.StatusOK {
+			t.Fatalf("expected 200 for alias, got %d body=%s", rec.Code, rec.Body.String())
+		}
+	})
+}
+
+func TestGetModelRouteNotFound(t *testing.T) {
+	h := &Handler{}
+	r := chi.NewRouter()
+	RegisterRoutes(r, h)
+
+	req := httptest.NewRequest(http.MethodGet, "/v1/models/not-exists", nil)
+	rec := httptest.NewRecorder()
+	r.ServeHTTP(rec, req)
+	if rec.Code != http.StatusNotFound {
+		t.Fatalf("expected 404, got %d body=%s", rec.Code, rec.Body.String())
+	}
+}
--- a/internal/adapter/openai/response_store.go
+++ b/internal/adapter/openai/response_store.go
@@ -0,0 +1,91 @@
+package openai
+
+import (
+	"sync"
+	"time"
+)
+
+type storedResponse struct {
+	Value     map[string]any
+	ExpiresAt time.Time
+}
+
+type responseStore struct {
+	mu    sync.Mutex
+	ttl   time.Duration
+	items map[string]storedResponse
+}
+
+func newResponseStore(ttl time.Duration) *responseStore {
+	if ttl <= 0 {
+		ttl = 15 * time.Minute
+	}
+	return &responseStore{
+		ttl:   ttl,
+		items: make(map[string]storedResponse),
+	}
+}
+
+func (s *responseStore) put(id string, value map[string]any) {
+	if s == nil || id == "" || value == nil {
+		return
+	}
+	now := time.Now()
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.sweepLocked(now)
+	s.items[id] = storedResponse{
+		Value:     cloneAnyMap(value),
+		ExpiresAt: now.Add(s.ttl),
+	}
+}
+
+func (s *responseStore) get(id string) (map[string]any, bool) {
+	if s == nil || id == "" {
+		return nil, false
+	}
+	now := time.Now()
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.sweepLocked(now)
+	item, ok := s.items[id]
+	if !ok {
+		return nil, false
+	}
+	return cloneAnyMap(item.Value), true
+}
+
+func (s *responseStore) sweepLocked(now time.Time) {
+	for k, v := range s.items {
+		if now.After(v.ExpiresAt) {
+			delete(s.items, k)
+		}
+	}
+}
+
+func cloneAnyMap(in map[string]any) map[string]any {
+	if in == nil {
+		return nil
+	}
+	out := make(map[string]any, len(in))
+	for k, v := range in {
+		out[k] = v
+	}
+	return out
+}
+
+func (h *Handler) getResponseStore() *responseStore {
+	if h == nil {
+		return nil
+	}
+	h.responsesMu.Lock()
+	defer h.responsesMu.Unlock()
+	if h.responses == nil {
+		ttl := 15 * time.Minute
+		if h.Store != nil {
+			ttl = time.Duration(h.Store.ResponsesStoreTTLSeconds()) * time.Second
+		}
+		h.responses = newResponseStore(ttl)
+	}
+	return h.responses
+}
--- a/internal/adapter/openai/responses_embeddings_test.go
+++ b/internal/adapter/openai/responses_embeddings_test.go
@@ -0,0 +1,65 @@
+package openai
+
+import (
+	"testing"
+	"time"
+)
+
+func TestNormalizeResponsesInputAsMessagesString(t *testing.T) {
+	msgs := normalizeResponsesInputAsMessages("hello")
+	if len(msgs) != 1 {
+		t.Fatalf("expected one message, got %d", len(msgs))
+	}
+	m, _ := msgs[0].(map[string]any)
+	if m["role"] != "user" || m["content"] != "hello" {
+		t.Fatalf("unexpected message: %#v", m)
+	}
+}
+
+func TestResponsesMessagesFromRequestWithInstructions(t *testing.T) {
+	req := map[string]any{
+		"model":        "gpt-4.1",
+		"input":        "ping",
+		"instructions": "system text",
+	}
+	msgs := responsesMessagesFromRequest(req)
+	if len(msgs) != 2 {
+		t.Fatalf("expected two messages, got %d", len(msgs))
+	}
+	sys, _ := msgs[0].(map[string]any)
+	if sys["role"] != "system" {
+		t.Fatalf("unexpected first message: %#v", sys)
+	}
+}
+
+func TestExtractEmbeddingInputs(t *testing.T) {
+	got := extractEmbeddingInputs([]any{"a", "b"})
+	if len(got) != 2 || got[0] != "a" || got[1] != "b" {
+		t.Fatalf("unexpected inputs: %#v", got)
+	}
+}
+
+func TestDeterministicEmbeddingStable(t *testing.T) {
+	a := deterministicEmbedding("hello")
+	b := deterministicEmbedding("hello")
+	if len(a) != 64 || len(b) != 64 {
+		t.Fatalf("expected 64 dims, got %d and %d", len(a), len(b))
+	}
+	for i := range a {
+		if a[i] != b[i] {
+			t.Fatalf("expected stable embedding at %d: %v != %v", i, a[i], b[i])
+		}
+	}
+}
+
+func TestResponseStorePutGet(t *testing.T) {
+	st := newResponseStore(100 * time.Millisecond)
+	st.put("resp_1", map[string]any{"id": "resp_1"})
+	got, ok := st.get("resp_1")
+	if !ok {
+		t.Fatal("expected stored response")
+	}
+	if got["id"] != "resp_1" {
+		t.Fatalf("unexpected response payload: %#v", got)
+	}
+}
--- a/internal/adapter/openai/responses_handler.go
+++ b/internal/adapter/openai/responses_handler.go
@@ -0,0 +1,407 @@
+package openai
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/go-chi/chi/v5"
+	"github.com/google/uuid"
+
+	"ds2api/internal/auth"
+	"ds2api/internal/config"
+	"ds2api/internal/sse"
+	"ds2api/internal/util"
+)
+
+func (h *Handler) GetResponseByID(w http.ResponseWriter, r *http.Request) {
+	id := strings.TrimSpace(chi.URLParam(r, "response_id"))
+	if id == "" {
+		writeOpenAIError(w, http.StatusBadRequest, "response_id is required.")
+		return
+	}
+	st := h.getResponseStore()
+	item, ok := st.get(id)
+	if !ok {
+		writeOpenAIError(w, http.StatusNotFound, "Response not found.")
+		return
+	}
+	writeJSON(w, http.StatusOK, item)
+}
+
+func (h *Handler) Responses(w http.ResponseWriter, r *http.Request) {
+	a, err := h.Auth.Determine(r)
+	if err != nil {
+		status := http.StatusUnauthorized
+		detail := err.Error()
+		if err == auth.ErrNoAccount {
+			status = http.StatusTooManyRequests
+		}
+		writeOpenAIError(w, status, detail)
+		return
+	}
+	defer h.Auth.Release(a)
+	r = r.WithContext(auth.WithAuth(r.Context(), a))
+
+	var req map[string]any
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		writeOpenAIError(w, http.StatusBadRequest, "invalid json")
+		return
+	}
+
+	model, _ := req["model"].(string)
+	model = strings.TrimSpace(model)
+	if model == "" {
+		writeOpenAIError(w, http.StatusBadRequest, "Request must include 'model'.")
+		return
+	}
+	resolvedModel, ok := config.ResolveModel(h.Store, model)
+	if !ok {
+		writeOpenAIError(w, http.StatusBadRequest, fmt.Sprintf("Model '%s' is not available.", model))
+		return
+	}
+	thinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
+
+	messagesRaw := responsesMessagesFromRequest(req)
+	if len(messagesRaw) == 0 {
+		writeOpenAIError(w, http.StatusBadRequest, "Request must include 'input' or 'messages'.")
+		return
+	}
+	finalPrompt, toolNames := buildOpenAIFinalPrompt(messagesRaw, req["tools"])
+
+	sessionID, err := h.DS.CreateSession(r.Context(), a, 3)
+	if err != nil {
+		if a.UseConfigToken {
+			writeOpenAIError(w, http.StatusUnauthorized, "Account token is invalid. Please re-login the account in admin.")
+		} else {
+			writeOpenAIError(w, http.StatusUnauthorized, "Invalid token. If this should be a DS2API key, add it to config.keys first.")
+		}
+		return
+	}
+	pow, err := h.DS.GetPow(r.Context(), a, 3)
+	if err != nil {
+		writeOpenAIError(w, http.StatusUnauthorized, "Failed to get PoW (invalid token or unknown error).")
+		return
+	}
+	payload := map[string]any{
+		"chat_session_id":   sessionID,
+		"parent_message_id": nil,
+		"prompt":            finalPrompt,
+		"ref_file_ids":      []any{},
+		"thinking_enabled":  thinkingEnabled,
+		"search_enabled":    searchEnabled,
+	}
+	applyOpenAIChatPassThrough(req, payload)
+	resp, err := h.DS.CallCompletion(r.Context(), a, payload, pow, 3)
+	if err != nil {
+		writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.")
+		return
+	}
+
+	responseID := "resp_" + strings.ReplaceAll(uuid.NewString(), "-", "")
+	if util.ToBool(req["stream"]) {
+		h.handleResponsesStream(w, r, resp, responseID, model, finalPrompt, thinkingEnabled, searchEnabled, toolNames)
+		return
+	}
+	h.handleResponsesNonStream(w, resp, responseID, model, finalPrompt, thinkingEnabled, toolNames)
+}
+
+func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Response, responseID, model, finalPrompt string, thinkingEnabled bool, toolNames []string) {
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		writeOpenAIError(w, resp.StatusCode, strings.TrimSpace(string(body)))
+		return
+	}
+	result := sse.CollectStream(resp, thinkingEnabled, true)
+	responseObj := buildResponseObject(responseID, model, finalPrompt, result.Thinking, result.Text, toolNames)
+	h.getResponseStore().put(responseID, responseObj)
+	writeJSON(w, http.StatusOK, responseObj)
+}
+
+func (h *Handler) handleResponsesStream(w http.ResponseWriter, r *http.Request, resp *http.Response, responseID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string) {
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		writeOpenAIError(w, resp.StatusCode, strings.TrimSpace(string(body)))
+		return
+	}
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache, no-transform")
+	w.Header().Set("Connection", "keep-alive")
+	w.Header().Set("X-Accel-Buffering", "no")
+	rc := http.NewResponseController(w)
+	canFlush := rc.Flush() == nil
+
+	sendEvent := func(event string, payload map[string]any) {
+		b, _ := json.Marshal(payload)
+		_, _ = w.Write([]byte("event: " + event + "\n"))
+		_, _ = w.Write([]byte("data: "))
+		_, _ = w.Write(b)
+		_, _ = w.Write([]byte("\n\n"))
+		if canFlush {
+			_ = rc.Flush()
+		}
+	}
+
+	sendEvent("response.created", map[string]any{
+		"type":   "response.created",
+		"id":     responseID,
+		"object": "response",
+		"model":  model,
+		"status": "in_progress",
+	})
+
+	initialType := "text"
+	if thinkingEnabled {
+		initialType = "thinking"
+	}
+	parsedLines, done := sse.StartParsedLinePump(r.Context(), resp.Body, thinkingEnabled, initialType)
+	bufferToolContent := len(toolNames) > 0
+	var sieve toolStreamSieveState
+	thinking := strings.Builder{}
+	text := strings.Builder{}
+	toolCallsEmitted := false
+	streamToolCallIDs := map[int]string{}
+
+	finalize := func() {
+		finalThinking := thinking.String()
+		finalText := text.String()
+		if bufferToolContent {
+			for _, evt := range flushToolSieve(&sieve, toolNames) {
+				if evt.Content != "" {
+					finalText += evt.Content
+					sendEvent("response.output_text.delta", map[string]any{
+						"type":  "response.output_text.delta",
+						"id":    responseID,
+						"delta": evt.Content,
+					})
+				}
+				if len(evt.ToolCalls) > 0 {
+					toolCallsEmitted = true
+					sendEvent("response.output_tool_call.done", map[string]any{
+						"type":       "response.output_tool_call.done",
+						"id":         responseID,
+						"tool_calls": util.FormatOpenAIStreamToolCalls(evt.ToolCalls),
+					})
+				}
+			}
+		}
+		obj := buildResponseObject(responseID, model, finalPrompt, finalThinking, finalText, toolNames)
+		if toolCallsEmitted {
+			obj["status"] = "completed"
+		}
+		h.getResponseStore().put(responseID, obj)
+		sendEvent("response.completed", map[string]any{
+			"type":     "response.completed",
+			"response": obj,
+		})
+		_, _ = w.Write([]byte("data: [DONE]\n\n"))
+		if canFlush {
+			_ = rc.Flush()
+		}
+	}
+
+	for {
+		select {
+		case <-r.Context().Done():
+			return
+		case parsed, ok := <-parsedLines:
+			if !ok {
+				_ = <-done
+				finalize()
+				return
+			}
+			if !parsed.Parsed {
+				continue
+			}
+			if parsed.ContentFilter || parsed.ErrorMessage != "" || parsed.Stop {
+				finalize()
+				return
+			}
+			for _, p := range parsed.Parts {
+				if p.Text == "" {
+					continue
+				}
+				if p.Type != "thinking" && searchEnabled && sse.IsCitation(p.Text) {
+					continue
+				}
+				if p.Type == "thinking" {
+					if !thinkingEnabled {
+						continue
+					}
+					thinking.WriteString(p.Text)
+					sendEvent("response.reasoning.delta", map[string]any{
+						"type":  "response.reasoning.delta",
+						"id":    responseID,
+						"delta": p.Text,
+					})
+					continue
+				}
+				text.WriteString(p.Text)
+				if !bufferToolContent {
+					sendEvent("response.output_text.delta", map[string]any{
+						"type":  "response.output_text.delta",
+						"id":    responseID,
+						"delta": p.Text,
+					})
+					continue
+				}
+				for _, evt := range processToolSieveChunk(&sieve, p.Text, toolNames) {
+					if evt.Content != "" {
+						sendEvent("response.output_text.delta", map[string]any{
+							"type":  "response.output_text.delta",
+							"id":    responseID,
+							"delta": evt.Content,
+						})
+					}
+					if len(evt.ToolCallDeltas) > 0 {
+						toolCallsEmitted = true
+						sendEvent("response.output_tool_call.delta", map[string]any{
+							"type":       "response.output_tool_call.delta",
+							"id":         responseID,
+							"tool_calls": formatIncrementalStreamToolCallDeltas(evt.ToolCallDeltas, streamToolCallIDs),
+						})
+					}
+					if len(evt.ToolCalls) > 0 {
+						toolCallsEmitted = true
+						sendEvent("response.output_tool_call.done", map[string]any{
+							"type":       "response.output_tool_call.done",
+							"id":         responseID,
+							"tool_calls": util.FormatOpenAIStreamToolCalls(evt.ToolCalls),
+						})
+					}
+				}
+			}
+		}
+	}
+}
+
+func buildResponseObject(responseID, model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any {
+	detected := util.ParseToolCalls(finalText, toolNames)
+	output := make([]any, 0, 2)
+	if len(detected) > 0 {
+		toolCalls := make([]any, 0, len(detected))
+		for _, tc := range detected {
+			toolCalls = append(toolCalls, map[string]any{
+				"type":      "tool_call",
+				"name":      tc.Name,
+				"arguments": tc.Input,
+			})
+		}
+		output = append(output, map[string]any{
+			"type":       "tool_calls",
+			"tool_calls": toolCalls,
+		})
+	} else {
+		content := []any{
+			map[string]any{
+				"type": "output_text",
+				"text": finalText,
+			},
+		}
+		if finalThinking != "" {
+			content = append([]any{map[string]any{
+				"type": "reasoning",
+				"text": finalThinking,
+			}}, content...)
+		}
+		output = append(output, map[string]any{
+			"type":    "message",
+			"id":      "msg_" + strings.ReplaceAll(uuid.NewString(), "-", ""),
+			"role":    "assistant",
+			"content": content,
+		})
+	}
+	promptTokens := util.EstimateTokens(finalPrompt)
+	reasoningTokens := util.EstimateTokens(finalThinking)
+	completionTokens := util.EstimateTokens(finalText)
+	return map[string]any{
+		"id":          responseID,
+		"type":        "response",
+		"object":      "response",
+		"created_at":  time.Now().Unix(),
+		"status":      "completed",
+		"model":       model,
+		"output":      output,
+		"output_text": finalText,
+		"usage": map[string]any{
+			"input_tokens":  promptTokens,
+			"output_tokens": reasoningTokens + completionTokens,
+			"total_tokens":  promptTokens + reasoningTokens + completionTokens,
+		},
+	}
+}
+
+func responsesMessagesFromRequest(req map[string]any) []any {
+	if msgs, ok := req["messages"].([]any); ok && len(msgs) > 0 {
+		return prependInstructionMessage(msgs, req["instructions"])
+	}
+	if rawInput, ok := req["input"]; ok {
+		if msgs := normalizeResponsesInputAsMessages(rawInput); len(msgs) > 0 {
+			return prependInstructionMessage(msgs, req["instructions"])
+		}
+	}
+	return nil
+}
+
+func prependInstructionMessage(messages []any, instructions any) []any {
+	sys, _ := instructions.(string)
+	sys = strings.TrimSpace(sys)
+	if sys == "" {
+		return messages
+	}
+	out := make([]any, 0, len(messages)+1)
+	out = append(out, map[string]any{"role": "system", "content": sys})
+	out = append(out, messages...)
+	return out
+}
+
+func normalizeResponsesInputAsMessages(input any) []any {
+	switch v := input.(type) {
+	case string:
+		if strings.TrimSpace(v) == "" {
+			return nil
+		}
+		return []any{map[string]any{"role": "user", "content": v}}
+	case []any:
+		if len(v) == 0 {
+			return nil
+		}
+		// If caller already provides role-shaped items, keep as-is.
+		if first, ok := v[0].(map[string]any); ok {
+			if _, hasRole := first["role"]; hasRole {
+				return v
+			}
+		}
+		parts := make([]string, 0, len(v))
+		for _, item := range v {
+			if m, ok := item.(map[string]any); ok {
+				if t, _ := m["type"].(string); strings.EqualFold(strings.TrimSpace(t), "input_text") {
+					if txt, _ := m["text"].(string); strings.TrimSpace(txt) != "" {
+						parts = append(parts, txt)
+						continue
+					}
+				}
+			}
+			if s := strings.TrimSpace(fmt.Sprintf("%v", item)); s != "" {
+				parts = append(parts, s)
+			}
+		}
+		if len(parts) == 0 {
+			return nil
+		}
+		return []any{map[string]any{"role": "user", "content": strings.Join(parts, "\n")}}
+	case map[string]any:
+		if txt, _ := v["text"].(string); strings.TrimSpace(txt) != "" {
+			return []any{map[string]any{"role": "user", "content": txt}}
+		}
+		if content, ok := v["content"].(string); ok && strings.TrimSpace(content) != "" {
+			return []any{map[string]any{"role": "user", "content": content}}
+		}
+	}
+	return nil
+}
--- a/internal/adapter/openai/tool_sieve.go
+++ b/internal/adapter/openai/tool_sieve.go
@@ -7,17 +7,16 @@ import (
 )

 type toolStreamSieveState struct {
-	pending           strings.Builder
-	capture           strings.Builder
-	capturing         bool
-	hasMeaningfulText bool
-	recentTextTail    string
-	toolNameSent      bool
-	toolName          string
-	toolArgsStart     int
-	toolArgsSent      int
-	toolArgsString    bool
-	toolArgsDone      bool
+	pending        strings.Builder
+	capture        strings.Builder
+	capturing      bool
+	recentTextTail string
+	toolNameSent   bool
+	toolName       string
+	toolArgsStart  int
+	toolArgsSent   int
+	toolArgsString bool
+	toolArgsDone   bool
 }

 type toolStreamEvent struct {
@@ -197,14 +196,22 @@ func findToolSegmentStart(s string) int {
 		return -1
 	}
 	lower := strings.ToLower(s)
-	keyIdx := strings.Index(lower, "tool_calls")
-	if keyIdx < 0 {
-		return -1
+	offset := 0
+	for {
+		keyRel := strings.Index(lower[offset:], "tool_calls")
+		if keyRel < 0 {
+			return -1
+		}
+		keyIdx := offset + keyRel
+		start := strings.LastIndex(s[:keyIdx], "{")
+		if start < 0 {
+			start = keyIdx
+		}
+		if !insideCodeFence(s[:start]) {
+			return start
+		}
+		offset = keyIdx + len("tool_calls")
 	}
-	if start := strings.LastIndex(s[:keyIdx], "{"); start >= 0 {
-		return start
-	}
-	return keyIdx
 }

 func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix string, calls []util.ParsedToolCall, suffix string, ready bool) {
@@ -227,7 +234,7 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix
 	}
 	prefixPart := captured[:start]
 	suffixPart := captured[end:]
-	if !state.toolNameSent && (strings.TrimSpace(prefixPart) != "" || looksLikeToolExampleContext(state.recentTextTail) || looksLikeToolExampleContext(suffixPart)) {
+	if insideCodeFence(state.recentTextTail + prefixPart) {
 		return captured, nil, "", true
 	}
 	parsed := util.ParseStandaloneToolCalls(obj, toolNames)
@@ -293,16 +300,16 @@ func buildIncrementalToolDeltas(state *toolStreamSieveState) []toolCallDelta {
 	if captured == "" {
 		return nil
 	}
-	if looksLikeToolExampleContext(state.recentTextTail) {
-		return nil
-	}
 	lower := strings.ToLower(captured)
 	keyIdx := strings.Index(lower, "tool_calls")
 	if keyIdx < 0 {
 		return nil
 	}
 	start := strings.LastIndex(captured[:keyIdx], "{")
-	if start < 0 || strings.TrimSpace(captured[:start]) != "" {
+	if start < 0 {
+		return nil
+	}
+	if insideCodeFence(state.recentTextTail + captured[:start]) {
 		return nil
 	}
 	callStart, ok := findFirstToolCallObjectStart(captured, keyIdx)
@@ -612,7 +619,6 @@ func (s *toolStreamSieveState) noteText(content string) {
 	if strings.TrimSpace(content) == "" {
 		return
 	}
-	s.hasMeaningfulText = true
 	s.recentTextTail = appendTail(s.recentTextTail, content, toolSieveContextTailLimit)
 }

@@ -628,25 +634,12 @@ func appendTail(prev, next string, max int) string {
 }

 func looksLikeToolExampleContext(text string) bool {
-	t := strings.ToLower(strings.TrimSpace(text))
-	if t == "" {
+	return insideCodeFence(text)
+}
+
+func insideCodeFence(text string) bool {
+	if text == "" {
 		return false
 	}
-	cues := []string{
-		"示例",
-		"例子",
-		"for example",
-		"example",
-		"demo",
-		"请勿执行",
-		"不要执行",
-		"do not execute",
-		"```",
-	}
-	for _, cue := range cues {
-		if strings.Contains(t, cue) {
-			return true
-		}
-	}
-	return false
+	return strings.Count(text, "```")%2 == 1
 }
--- a/internal/adapter/openai/vercel_stream.go
+++ b/internal/adapter/openai/vercel_stream.go
@@ -62,11 +62,16 @@ func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Reque
 		writeOpenAIError(w, http.StatusBadRequest, "Request must include 'model' and 'messages'.")
 		return
 	}
-	thinkingEnabled, searchEnabled, ok := config.GetModelConfig(model)
+	resolvedModel, ok := config.ResolveModel(h.Store, model)
 	if !ok {
-		writeOpenAIError(w, http.StatusServiceUnavailable, fmt.Sprintf("Model '%s' is not available.", model))
+		writeOpenAIError(w, http.StatusBadRequest, fmt.Sprintf("Model '%s' is not available.", model))
 		return
 	}
+	thinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
+	responseModel := strings.TrimSpace(model)
+	if responseModel == "" {
+		responseModel = resolvedModel
+	}

 	finalPrompt, _ := buildOpenAIFinalPrompt(messagesRaw, req["tools"])

@@ -97,6 +102,7 @@ func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Reque
 		"thinking_enabled":  thinkingEnabled,
 		"search_enabled":    searchEnabled,
 	}
+	applyOpenAIChatPassThrough(req, payload)
 	leaseID := h.holdStreamLease(a)
 	if leaseID == "" {
 		writeOpenAIError(w, http.StatusInternalServerError, "failed to create stream lease")
@@ -106,7 +112,7 @@ func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Reque
 	writeJSON(w, http.StatusOK, map[string]any{
 		"session_id":       sessionID,
 		"lease_id":         leaseID,
-		"model":            model,
+		"model":            responseModel,
 		"final_prompt":     finalPrompt,
 		"thinking_enabled": thinkingEnabled,
 		"search_enabled":   searchEnabled,
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -62,11 +62,33 @@ type Config struct {
 	Accounts         []Account         `json:"accounts,omitempty"`
 	ClaudeMapping    map[string]string `json:"claude_mapping,omitempty"`
 	ClaudeModelMap   map[string]string `json:"claude_model_mapping,omitempty"`
+	ModelAliases     map[string]string `json:"model_aliases,omitempty"`
+	Compat           CompatConfig      `json:"compat,omitempty"`
+	Toolcall         ToolcallConfig    `json:"toolcall,omitempty"`
+	Responses        ResponsesConfig   `json:"responses,omitempty"`
+	Embeddings       EmbeddingsConfig  `json:"embeddings,omitempty"`
 	VercelSyncHash   string            `json:"_vercel_sync_hash,omitempty"`
 	VercelSyncTime   int64             `json:"_vercel_sync_time,omitempty"`
 	AdditionalFields map[string]any    `json:"-"`
 }

+type CompatConfig struct {
+	WideInputStrictOutput bool `json:"wide_input_strict_output,omitempty"`
+}
+
+type ToolcallConfig struct {
+	Mode                string `json:"mode,omitempty"`
+	EarlyEmitConfidence string `json:"early_emit_confidence,omitempty"`
+}
+
+type ResponsesConfig struct {
+	StoreTTLSeconds int `json:"store_ttl_seconds,omitempty"`
+}
+
+type EmbeddingsConfig struct {
+	Provider string `json:"provider,omitempty"`
+}
+
 func (c Config) MarshalJSON() ([]byte, error) {
 	m := map[string]any{}
 	for k, v := range c.AdditionalFields {
@@ -84,6 +106,21 @@ func (c Config) MarshalJSON() ([]byte, error) {
 	if len(c.ClaudeModelMap) > 0 {
 		m["claude_model_mapping"] = c.ClaudeModelMap
 	}
+	if len(c.ModelAliases) > 0 {
+		m["model_aliases"] = c.ModelAliases
+	}
+	if c.Compat.WideInputStrictOutput {
+		m["compat"] = c.Compat
+	}
+	if strings.TrimSpace(c.Toolcall.Mode) != "" || strings.TrimSpace(c.Toolcall.EarlyEmitConfidence) != "" {
+		m["toolcall"] = c.Toolcall
+	}
+	if c.Responses.StoreTTLSeconds > 0 {
+		m["responses"] = c.Responses
+	}
+	if strings.TrimSpace(c.Embeddings.Provider) != "" {
+		m["embeddings"] = c.Embeddings
+	}
 	if c.VercelSyncHash != "" {
 		m["_vercel_sync_hash"] = c.VercelSyncHash
 	}
@@ -117,6 +154,26 @@ func (c *Config) UnmarshalJSON(b []byte) error {
 			if err := json.Unmarshal(v, &c.ClaudeModelMap); err != nil {
 				return fmt.Errorf("invalid field %q: %w", k, err)
 			}
+		case "model_aliases":
+			if err := json.Unmarshal(v, &c.ModelAliases); err != nil {
+				return fmt.Errorf("invalid field %q: %w", k, err)
+			}
+		case "compat":
+			if err := json.Unmarshal(v, &c.Compat); err != nil {
+				return fmt.Errorf("invalid field %q: %w", k, err)
+			}
+		case "toolcall":
+			if err := json.Unmarshal(v, &c.Toolcall); err != nil {
+				return fmt.Errorf("invalid field %q: %w", k, err)
+			}
+		case "responses":
+			if err := json.Unmarshal(v, &c.Responses); err != nil {
+				return fmt.Errorf("invalid field %q: %w", k, err)
+			}
+		case "embeddings":
+			if err := json.Unmarshal(v, &c.Embeddings); err != nil {
+				return fmt.Errorf("invalid field %q: %w", k, err)
+			}
 		case "_vercel_sync_hash":
 			if err := json.Unmarshal(v, &c.VercelSyncHash); err != nil {
 				return fmt.Errorf("invalid field %q: %w", k, err)
@@ -141,6 +198,11 @@ func (c Config) Clone() Config {
 		Accounts:         slices.Clone(c.Accounts),
 		ClaudeMapping:    cloneStringMap(c.ClaudeMapping),
 		ClaudeModelMap:   cloneStringMap(c.ClaudeModelMap),
+		ModelAliases:     cloneStringMap(c.ModelAliases),
+		Compat:           c.Compat,
+		Toolcall:         c.Toolcall,
+		Responses:        c.Responses,
+		Embeddings:       c.Embeddings,
 		VercelSyncHash:   c.VercelSyncHash,
 		VercelSyncTime:   c.VercelSyncTime,
 		AdditionalFields: map[string]any{},
@@ -490,3 +552,59 @@ func (s *Store) ClaudeMapping() map[string]string {
 	}
 	return map[string]string{"fast": "deepseek-chat", "slow": "deepseek-reasoner"}
 }
+
+func (s *Store) ModelAliases() map[string]string {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	out := DefaultModelAliases()
+	for k, v := range s.cfg.ModelAliases {
+		key := strings.TrimSpace(lower(k))
+		val := strings.TrimSpace(lower(v))
+		if key == "" || val == "" {
+			continue
+		}
+		out[key] = val
+	}
+	return out
+}
+
+func (s *Store) CompatWideInputStrictOutput() bool {
+	// Current default policy is always wide-input / strict-output.
+	// Kept as a method so callers do not depend on storage shape.
+	return true
+}
+
+func (s *Store) ToolcallMode() string {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	mode := strings.TrimSpace(strings.ToLower(s.cfg.Toolcall.Mode))
+	if mode == "" {
+		return "feature_match"
+	}
+	return mode
+}
+
+func (s *Store) ToolcallEarlyEmitConfidence() string {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	level := strings.TrimSpace(strings.ToLower(s.cfg.Toolcall.EarlyEmitConfidence))
+	if level == "" {
+		return "high"
+	}
+	return level
+}
+
+func (s *Store) ResponsesStoreTTLSeconds() int {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	if s.cfg.Responses.StoreTTLSeconds > 0 {
+		return s.cfg.Responses.StoreTTLSeconds
+	}
+	return 900
+}
+
+func (s *Store) EmbeddingsProvider() string {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	return strings.TrimSpace(s.cfg.Embeddings.Provider)
+}
--- a/internal/config/model_alias_test.go
+++ b/internal/config/model_alias_test.go
@@ -0,0 +1,44 @@
+package config
+
+import "testing"
+
+func TestResolveModelDirectDeepSeek(t *testing.T) {
+	got, ok := ResolveModel(nil, "deepseek-chat")
+	if !ok || got != "deepseek-chat" {
+		t.Fatalf("expected deepseek-chat, got ok=%v model=%q", ok, got)
+	}
+}
+
+func TestResolveModelAlias(t *testing.T) {
+	got, ok := ResolveModel(nil, "gpt-4.1")
+	if !ok || got != "deepseek-chat" {
+		t.Fatalf("expected alias gpt-4.1 -> deepseek-chat, got ok=%v model=%q", ok, got)
+	}
+}
+
+func TestResolveModelHeuristicReasoner(t *testing.T) {
+	got, ok := ResolveModel(nil, "o3-super")
+	if !ok || got != "deepseek-reasoner" {
+		t.Fatalf("expected heuristic reasoner, got ok=%v model=%q", ok, got)
+	}
+}
+
+func TestResolveModelUnknown(t *testing.T) {
+	_, ok := ResolveModel(nil, "totally-custom-model")
+	if ok {
+		t.Fatal("expected unknown model to fail resolve")
+	}
+}
+
+func TestClaudeModelsResponsePaginationFields(t *testing.T) {
+	resp := ClaudeModelsResponse()
+	if _, ok := resp["first_id"]; !ok {
+		t.Fatalf("expected first_id in response: %#v", resp)
+	}
+	if _, ok := resp["last_id"]; !ok {
+		t.Fatalf("expected last_id in response: %#v", resp)
+	}
+	if _, ok := resp["has_more"]; !ok {
+		t.Fatalf("expected has_more in response: %#v", resp)
+	}
+}
--- a/internal/config/models.go
+++ b/internal/config/models.go
@@ -1,5 +1,7 @@
 package config

+import "strings"
+
 type ModelInfo struct {
 	ID         string `json:"id"`
 	Object     string `json:"object"`
@@ -71,6 +73,91 @@ func GetModelConfig(model string) (thinking bool, search bool, ok bool) {
 	}
 }

+func IsSupportedDeepSeekModel(model string) bool {
+	_, _, ok := GetModelConfig(model)
+	return ok
+}
+
+func DefaultModelAliases() map[string]string {
+	return map[string]string{
+		"gpt-4o":                 "deepseek-chat",
+		"gpt-4.1":                "deepseek-chat",
+		"gpt-4.1-mini":           "deepseek-chat",
+		"gpt-4.1-nano":           "deepseek-chat",
+		"gpt-5":                  "deepseek-chat",
+		"gpt-5-mini":             "deepseek-chat",
+		"gpt-5-codex":            "deepseek-reasoner",
+		"o1":                     "deepseek-reasoner",
+		"o1-mini":                "deepseek-reasoner",
+		"o3":                     "deepseek-reasoner",
+		"o3-mini":                "deepseek-reasoner",
+		"claude-sonnet-4-5":      "deepseek-chat",
+		"claude-haiku-4-5":       "deepseek-chat",
+		"claude-opus-4-6":        "deepseek-reasoner",
+		"claude-3-5-sonnet":      "deepseek-chat",
+		"claude-3-5-haiku":       "deepseek-chat",
+		"claude-3-opus":          "deepseek-reasoner",
+		"gemini-2.5-pro":         "deepseek-chat",
+		"gemini-2.5-flash":       "deepseek-chat",
+		"llama-3.1-70b-instruct": "deepseek-chat",
+		"qwen-max":               "deepseek-chat",
+	}
+}
+
+func ResolveModel(store *Store, requested string) (string, bool) {
+	model := lower(strings.TrimSpace(requested))
+	if model == "" {
+		return "", false
+	}
+	if IsSupportedDeepSeekModel(model) {
+		return model, true
+	}
+	aliases := DefaultModelAliases()
+	if store != nil {
+		for k, v := range store.ModelAliases() {
+			aliases[lower(strings.TrimSpace(k))] = lower(strings.TrimSpace(v))
+		}
+	}
+	if mapped, ok := aliases[model]; ok && IsSupportedDeepSeekModel(mapped) {
+		return mapped, true
+	}
+	if strings.HasPrefix(model, "deepseek-") {
+		return "", false
+	}
+
+	knownFamily := false
+	for _, prefix := range []string{
+		"gpt-", "o1", "o3", "claude-", "gemini-", "llama-", "qwen-", "mistral-", "command-",
+	} {
+		if strings.HasPrefix(model, prefix) {
+			knownFamily = true
+			break
+		}
+	}
+	if !knownFamily {
+		return "", false
+	}
+
+	useReasoner := strings.Contains(model, "reason") ||
+		strings.Contains(model, "reasoner") ||
+		strings.HasPrefix(model, "o1") ||
+		strings.HasPrefix(model, "o3") ||
+		strings.Contains(model, "opus") ||
+		strings.Contains(model, "r1")
+	useSearch := strings.Contains(model, "search")
+
+	switch {
+	case useReasoner && useSearch:
+		return "deepseek-reasoner-search", true
+	case useReasoner:
+		return "deepseek-reasoner", true
+	case useSearch:
+		return "deepseek-chat-search", true
+	default:
+		return "deepseek-chat", true
+	}
+}
+
 func lower(s string) string {
 	b := []byte(s)
 	for i, c := range b {
@@ -85,6 +172,28 @@ func OpenAIModelsResponse() map[string]any {
 	return map[string]any{"object": "list", "data": DeepSeekModels}
 }

-func ClaudeModelsResponse() map[string]any {
-	return map[string]any{"object": "list", "data": ClaudeModels}
+func OpenAIModelByID(store *Store, id string) (ModelInfo, bool) {
+	canonical, ok := ResolveModel(store, id)
+	if !ok {
+		return ModelInfo{}, false
+	}
+	for _, model := range DeepSeekModels {
+		if model.ID == canonical {
+			return model, true
+		}
+	}
+	return ModelInfo{}, false
+}
+
+func ClaudeModelsResponse() map[string]any {
+	resp := map[string]any{"object": "list", "data": ClaudeModels}
+	if len(ClaudeModels) > 0 {
+		resp["first_id"] = ClaudeModels[0].ID
+		resp["last_id"] = ClaudeModels[len(ClaudeModels)-1].ID
+	} else {
+		resp["first_id"] = nil
+		resp["last_id"] = nil
+	}
+	resp["has_more"] = false
+	return resp
 }
--- a/internal/server/router.go
+++ b/internal/server/router.go
@@ -92,7 +92,7 @@ func cors(next http.Handler) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		w.Header().Set("Access-Control-Allow-Origin", "*")
 		w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS, PUT, DELETE")
-		w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
+		w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, X-API-Key, X-Ds2-Target-Account, X-Vercel-Protection-Bypass")
 		if r.Method == http.MethodOptions {
 			w.WriteHeader(http.StatusNoContent)
 			return
--- a/internal/util/toolcalls.go
+++ b/internal/util/toolcalls.go
@@ -10,6 +10,7 @@ import (

 var toolCallPattern = regexp.MustCompile(`\{\s*["']tool_calls["']\s*:\s*\[(.*?)\]\s*\}`)
 var fencedJSONPattern = regexp.MustCompile("(?s)```(?:json)?\\s*(.*?)\\s*```")
+var fencedBlockPattern = regexp.MustCompile("(?s)```.*?```")

 type ParsedToolCall struct {
 	Name  string         `json:"name"`
@@ -20,6 +21,10 @@ func ParseToolCalls(text string, availableToolNames []string) []ParsedToolCall {
 	if strings.TrimSpace(text) == "" {
 		return nil
 	}
+	text = stripFencedCodeBlocks(text)
+	if strings.TrimSpace(text) == "" {
+		return nil
+	}

 	candidates := buildToolCallCandidates(text)
 	var parsed []ParsedToolCall
@@ -45,11 +50,6 @@ func ParseStandaloneToolCalls(text string, availableToolNames []string) []Parsed
 		return nil
 	}
 	candidates := []string{trimmed}
-	if strings.HasPrefix(trimmed, "```") && strings.HasSuffix(trimmed, "```") {
-		if m := fencedJSONPattern.FindStringSubmatch(trimmed); len(m) >= 2 {
-			candidates = append(candidates, strings.TrimSpace(m[1]))
-		}
-	}
 	for _, candidate := range candidates {
 		candidate = strings.TrimSpace(candidate)
 		if candidate == "" {
@@ -321,23 +321,14 @@ func looksLikeToolExampleContext(text string) bool {
 	if t == "" {
 		return false
 	}
-	cues := []string{
-		"```",
-		"示例",
-		"例子",
-		"for example",
-		"example",
-		"demo",
-		"请勿执行",
-		"不要执行",
-		"do not execute",
+	return strings.Contains(t, "```")
+}
+
+func stripFencedCodeBlocks(text string) string {
+	if strings.TrimSpace(text) == "" {
+		return ""
 	}
-	for _, cue := range cues {
-		if strings.Contains(t, cue) {
-			return true
-		}
-	}
-	return false
+	return fencedBlockPattern.ReplaceAllString(text, " ")
 }

 func FormatOpenAIToolCalls(calls []ParsedToolCall) []map[string]any {
--- a/internal/util/toolcalls_test.go
+++ b/internal/util/toolcalls_test.go
@@ -19,11 +19,8 @@ func TestParseToolCalls(t *testing.T) {
 func TestParseToolCallsFromFencedJSON(t *testing.T) {
 	text := "I will call tools now\n```json\n{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"news\"}}]}\n```"
 	calls := ParseToolCalls(text, []string{"search"})
-	if len(calls) != 1 {
-		t.Fatalf("expected 1 call, got %d", len(calls))
-	}
-	if calls[0].Input["q"] != "news" {
-		t.Fatalf("unexpected args: %#v", calls[0].Input)
+	if len(calls) != 0 {
+		t.Fatalf("expected fenced tool_call example to be ignored, got %#v", calls)
 	}
 }

--- a/internal/util/util_edge_test.go
+++ b/internal/util/util_edge_test.go
@@ -416,18 +416,6 @@ func TestParseStandaloneToolCallsFencedCodeBlock(t *testing.T) {

 // ─── looksLikeToolExampleContext ─────────────────────────────────────

-func TestLooksLikeToolExampleContextChinese(t *testing.T) {
-	if !looksLikeToolExampleContext("下面是示例") {
-		t.Fatal("expected true for Chinese example context")
-	}
-}
-
-func TestLooksLikeToolExampleContextEnglish(t *testing.T) {
-	if !looksLikeToolExampleContext("here is an example of") {
-		t.Fatal("expected true for English example context")
-	}
-}
-
 func TestLooksLikeToolExampleContextNone(t *testing.T) {
 	if looksLikeToolExampleContext("I will call the tool now") {
 		t.Fatal("expected false for non-example context")
--- a/opencode.json.example
+++ b/opencode.json.example
@@ -9,6 +9,12 @@
        "apiKey": "your-api-key"
      },
      "models": {
+        "gpt-4o": {
+          "name": "GPT-4o (aliased to deepseek-chat)"
+        },
+        "gpt-5-codex": {
+          "name": "GPT-5 Codex (aliased to deepseek-reasoner)"
+        },
        "deepseek-chat": {
          "name": "DeepSeek Chat (DS2API)"
        },
@@ -18,5 +24,5 @@
      }
    }
  },
-  "model": "ds2api/deepseek-chat"
+  "model": "ds2api/gpt-5-codex"
 }