diff --git a/AGENTS.md b/AGENTS.md
index 77991bc..1c71307 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -21,3 +21,9 @@ These rules apply to all agent-made changes in this repository.
 
 - Keep changes additive and tightly scoped to the requested feature or bugfix.
 - Do not mix unrelated refactors into feature PRs unless they are required to make the change pass gates.
+
+## Documentation Sync
+
+- When business logic or user-visible behavior changes, update the corresponding documentation in the same change.
+- `docs/prompt-compatibility.md` is the source-of-truth document for the “API -> pure-text web-chat context” compatibility flow.
+- If a change affects message normalization, tool prompt injection, prompt-visible tool history, file/reference handling, history split, or completion payload assembly, update `docs/prompt-compatibility.md` in the same change.
diff --git a/API.en.md b/API.en.md
index 11e597c..ca1e7a9 100644
--- a/API.en.md
+++ b/API.en.md
@@ -31,13 +31,13 @@ Docs: [Overview](README.en.md) / [Architecture](docs/ARCHITECTURE.en.md) / [Depl
 | Base URL | `http://localhost:5001` or your deployment domain |
 | Default Content-Type | `application/json` |
 | Health probes | `GET /healthz`, `GET /readyz` |
-| CORS | Enabled (`Access-Control-Allow-Origin: *`, allows `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Ds2-Source`, `X-Vercel-Protection-Bypass`) |
+| CORS | Enabled (uniformly covers `/v1/*`, `/anthropic/*`, `/v1beta/models/*`, and `/admin/*`; echoes the browser `Origin` when present, otherwise `*`; default allow-list includes `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Ds2-Source`, `X-Vercel-Protection-Bypass`, `X-Goog-Api-Key`, `Anthropic-Version`, `Anthropic-Beta`, and also accepts third-party preflight-requested headers such as `x-stainless-*`; `/v1/chat/completions` on Vercel Node Runtime matches the same behavior; internal-only `X-Ds2-Internal-Token` remains blocked) |
 
 ### 3.0 Adapter-Layer Notes
 
 - OpenAI / Claude / Gemini protocols are now mounted on one shared `chi` router tree assembled in `internal/server/router.go`.
 - Adapter responsibilities are streamlined to: **request normalization → DeepSeek invocation → protocol-shaped rendering**, reducing legacy split-logic paths.
-- Tool-calling semantics are aligned between Go and Node runtime: parsing is now centered on XML/Markup-family tool syntax (`<tool_call>` / `<function_call>` / `<invoke>` / `tool_use` / antml variants), plus stream-time anti-leak filtering.
+- Tool-calling semantics are aligned between Go and Node runtime: the only executable model-output syntax is the canonical XML tool block `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`, plus stream-time anti-leak filtering.
 - `Admin API` separates static config from runtime policy: `/admin/config*` for configuration state, `/admin/settings*` for runtime behavior.
 
 ---
@@ -160,6 +160,7 @@ Gemini-compatible clients can also send `x-goog-api-key`, `?key=`, or `?api_key=
 | DELETE | `/admin/dev/captures` | Admin | Clear local packet-capture entries |
 | GET | `/admin/chat-history` | Admin | Read server-side conversation history |
 | DELETE | `/admin/chat-history` | Admin | Clear server-side conversation history |
+| GET | `/admin/chat-history/{id}` | Admin | Read one server-side conversation entry |
 | DELETE | `/admin/chat-history/{id}` | Admin | Delete one server-side conversation entry |
 | PUT | `/admin/chat-history/settings` | Admin | Update conversation history retention limit |
 | GET | `/admin/version` | Admin | Check current version and latest Release |
@@ -194,18 +195,12 @@ No auth required. Returns the currently supported DeepSeek native model list.
 {
   "object": "list",
   "data": [
-    {"id": "deepseek-chat", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-reasoner", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-chat-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-reasoner-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-expert-chat", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-expert-reasoner", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-expert-chat-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-expert-reasoner-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-vision-chat", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-vision-reasoner", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-vision-chat-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-vision-reasoner-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}
+    {"id": "deepseek-v4-flash", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-pro", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-flash-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-pro-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-vision", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-vision-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}
   ]
 }
 ```
@@ -221,12 +216,15 @@ For `chat` / `responses` / `embeddings`, DS2API follows a wide-input/strict-outp
 3. If still unmatched, fall back by known family heuristics (`o*`, `gpt-*`, `claude-*`, etc.).
 4. If still unmatched, return `invalid_request_error`.
 
-Current built-in default aliases (excerpt):
+Built-in aliases come from `internal/config/models.go`; `config.model_aliases` can override or add mappings at runtime. Excerpt:
 
-- OpenAI: `gpt-4o`, `gpt-4.1`, `gpt-4.1-mini`, `gpt-4.1-nano`, `gpt-5`, `gpt-5-mini`, `gpt-5-codex`
-- OpenAI reasoning: `o1`, `o1-mini`, `o3`, `o3-mini`
-- Claude: `claude-sonnet-4-5`, `claude-haiku-4-5`, `claude-opus-4-6` (plus compatibility aliases `claude-3-5-sonnet` / `claude-3-5-haiku` / `claude-3-opus`)
-- Gemini: `gemini-2.5-pro`, `gemini-2.5-flash`
+- OpenAI / Codex: `gpt-4o`, `gpt-4.1`, `gpt-5`, `gpt-5.5`, `gpt-5-codex`, `gpt-5.3-codex`, `codex-mini-latest`
+- OpenAI reasoning: `o1`, `o3`, `o3-deep-research`, `o4-mini`
+- Claude: `claude-opus-4-6`, `claude-sonnet-4-6`, `claude-haiku-4-5`, `claude-3-5-sonnet-latest`
+- Gemini: `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-pro-vision`
+- Other compatibility families: `llama-*`, `qwen-*`, `mistral-*`, and `command-*` fall back through family heuristics
+
+Retired historical families such as `claude-1.*`, `claude-2.*`, `claude-instant-*`, and `gpt-3.5*` are explicitly rejected.
 
 ### `POST /v1/chat/completions`
 
@@ -241,7 +239,7 @@ Content-Type: application/json
 
 | Field | Type | Required | Notes |
 | --- | --- | --- | --- |
-| `model` | string | ✅ | DeepSeek native models + common aliases (`gpt-5`, `gpt-5-mini`, `gpt-5-codex`, `o3`, `claude-opus-4-6`, `gemini-2.5-pro`, `gemini-2.5-flash`, etc.) |
+| `model` | string | ✅ | DeepSeek native models + common aliases (`gpt-5.5`, `gpt-5.4-mini`, `gpt-5.3-codex`, `o3`, `claude-opus-4-6`, `gemini-2.5-pro`, `gemini-2.5-flash`, etc.) |
 | `messages` | array | ✅ | OpenAI-style messages |
 | `stream` | boolean | ❌ | Default `false` |
 | `tools` | array | ❌ | Function calling schema |
@@ -254,14 +252,14 @@ Content-Type: application/json
   "id": "<chat_session_id>",
   "object": "chat.completion",
   "created": 1738400000,
-  "model": "deepseek-reasoner",
+  "model": "deepseek-v4-pro",
   "choices": [
     {
       "index": 0,
       "message": {
         "role": "assistant",
         "content": "final response",
-        "reasoning_content": "reasoning trace (reasoner models)"
+        "reasoning_content": "reasoning trace (when thinking is enabled)"
       },
       "finish_reason": "stop"
     }
@@ -296,7 +294,7 @@ data: [DONE]
 **Field notes**:
 
 - First delta includes `role: assistant`
-- `deepseek-reasoner` / `deepseek-reasoner-search` models emit `delta.reasoning_content`
+- When thinking is enabled, the stream may emit `delta.reasoning_content`
 - Text emits `delta.content`
 - Last chunk includes `finish_reason` and `usage`
 - Token counting prefers pass-through from upstream DeepSeek SSE (`accumulated_token_usage` / `token_usage`), and only falls back to local estimation when upstream usage is absent
@@ -336,7 +334,7 @@ When `tools` is present, DS2API performs anti-leak handling:
 
 Additional notes:
 
-- The parser currently follows XML/Markup-family tool payloads (`<tool_call>`, `<function_call>`, `<invoke>`, `tool_use`, antml variants). Standalone JSON `tool_calls` payloads are not treated as executable tool calls by default.
+- The parser currently treats only canonical XML tool blocks (`<tool_calls>` / `<invoke name="...">` / `<parameter name="...">`) as executable tool calls. Legacy `<tools>`, `<tool_call>`, `<tool_name>`, `<param>`, `<function_call>`, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text.
 - `tool_calls` shown inside fenced markdown code blocks (for example, ```json ... ```) are treated as examples, not executable calls.
 
 ---
@@ -448,17 +446,17 @@ No auth required.
 {
   "object": "list",
   "data": [
-    {"id": "claude-sonnet-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
+    {"id": "claude-sonnet-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
     {"id": "claude-haiku-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
     {"id": "claude-opus-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"}
   ],
   "first_id": "claude-opus-4-6",
-  "last_id": "claude-instant-1.0",
+  "last_id": "claude-3-haiku-20240307",
   "has_more": false
 }
 ```
 
-> Note: the example is partial; besides the current primary aliases, the real response also includes Claude 4.x snapshots plus historical 3.x / 2.x / 1.x IDs and common aliases.
+> Note: the example is partial; besides the current primary aliases, the real response also includes Claude 4.x snapshots plus historical 3.x IDs and common aliases.
 
 ### `POST /anthropic/v1/messages`
 
@@ -476,7 +474,7 @@ anthropic-version: 2023-06-01
 
 | Field | Type | Required | Notes |
 | --- | --- | --- | --- |
-| `model` | string | ✅ | For example `claude-sonnet-4-5` / `claude-opus-4-6` / `claude-haiku-4-5` (compatible with `claude-3-5-haiku-latest`), plus historical Claude model IDs |
+| `model` | string | ✅ | For example `claude-sonnet-4-6` / `claude-opus-4-6` / `claude-haiku-4-5` (compatible with `claude-3-5-haiku-latest`), plus historical Claude model IDs |
 | `messages` | array | ✅ | Claude-style messages |
 | `max_tokens` | number | ❌ | Auto-filled to `8192` when omitted; not strictly enforced by upstream bridge |
 | `stream` | boolean | ❌ | Default `false` |
@@ -490,7 +488,7 @@ anthropic-version: 2023-06-01
   "id": "msg_1738400000000000000",
   "type": "message",
   "role": "assistant",
-  "model": "claude-sonnet-4-5",
+  "model": "claude-sonnet-4-6",
   "content": [
     {"type": "text", "text": "response"}
   ],
@@ -544,7 +542,7 @@ data: {"type":"message_stop"}
 
 ```json
 {
-  "model": "claude-sonnet-4-5",
+  "model": "claude-sonnet-4-6",
   "messages": [
     {"role": "user", "content": "Hello"}
   ]
@@ -672,16 +670,16 @@ Returns sanitized config, including both `keys` and `api_keys`.
       "token_preview": "abcde..."
     }
   ],
-  "claude_mapping": {
-    "fast": "deepseek-chat",
-    "slow": "deepseek-reasoner"
+  "model_aliases": {
+    "claude-sonnet-4-6": "deepseek-v4-flash",
+    "claude-opus-4-6": "deepseek-v4-pro"
   }
 }
 ```
 
 ### `POST /admin/config`
 
-Only updates `keys`, `api_keys`, `accounts`, and `claude_mapping`.
+Only updates `keys`, `api_keys`, `accounts`, and `model_aliases`.
 If both `api_keys` and `keys` are sent, the structured `api_keys` entries win so `name` / `remark` metadata is preserved; `keys` remains a legacy fallback.
 
 **Request**:
@@ -696,9 +694,9 @@ If both `api_keys` and `keys` are sent, the structured `api_keys` entries win so
   "accounts": [
     {"email": "user@example.com", "password": "pwd", "token": ""}
   ],
-  "claude_mapping": {
-    "fast": "deepseek-chat",
-    "slow": "deepseek-reasoner"
+  "model_aliases": {
+    "claude-sonnet-4-6": "deepseek-v4-flash",
+    "claude-opus-4-6": "deepseek-v4-pro"
   }
 }
 ```
@@ -713,7 +711,8 @@ Reads runtime settings and status, including:
 - `compat` (`wide_input_strict_output`, `strip_reference_markers`)
 - `responses` / `embeddings`
 - `auto_delete` (`mode`: `none` / `single` / `all`; legacy `sessions=true` is still treated as `all`)
-- `claude_mapping` / `model_aliases`
+- `history_split` (`enabled` always returns `true`, `trigger_after_turns`)
+- `model_aliases`
 - `env_backed`, `needs_vercel_sync`
 - `toolcall` policy is fixed to `feature_match + high` and is no longer returned or editable via settings
 
@@ -727,7 +726,7 @@ Hot-updates runtime settings. Supported fields:
 - `responses.store_ttl_seconds`
 - `embeddings.provider`
 - `auto_delete.mode`
-- `claude_mapping`
+- `history_split.trigger_after_turns` (`history_split.enabled` is forced on globally; legacy client writes are stored as `true`)
 - `model_aliases`
 - `toolcall` policy is fixed and is no longer writable through settings
 
@@ -752,9 +751,9 @@ Imports full config with:
 
 The request can send config directly, or wrapped as `{"config": {...}, "mode":"merge"}`.
 Query params `?mode=merge` / `?mode=replace` are also supported.
-Import accepts `keys`, `api_keys`, `accounts`, `claude_mapping` / `claude_model_mapping`, `model_aliases`, `admin`, `runtime`, `responses`, `embeddings`, and `auto_delete`; legacy `toolcall` fields are ignored.
+`replace` mode replaces the full config shape while preserving Vercel sync metadata. `merge` mode merges `keys`, `api_keys`, `accounts`, and `model_aliases`, and overwrites non-empty fields under `admin`, `runtime`, `responses`, and `embeddings`. Manage `compat`, `auto_delete`, and `history_split` via `/admin/settings` or the config file; legacy `toolcall` fields are ignored.
 
-> `compat` fields are managed via `/admin/settings` or the config file; this import endpoint does not update `compat`.
+> Note: `merge` mode does not update `compat`, `auto_delete`, or `history_split`.
 
 ### `GET /admin/config/export`
 
@@ -903,7 +902,7 @@ Updates proxy binding for a specific account.
 | Field | Required | Notes |
 | --- | --- | --- |
 | `identifier` | ✅ | email / mobile / token-only synthetic id |
-| `model` | ❌ | default `deepseek-chat` |
+| `model` | ❌ | default `deepseek-v4-flash` |
 | `message` | ❌ | if empty, only session creation is tested |
 
 **Response**:
@@ -914,7 +913,7 @@ Updates proxy binding for a specific account.
   "success": true,
   "response_time": 1240,
   "message": "API test successful (session creation only)",
-  "model": "deepseek-chat",
+  "model": "deepseek-v4-flash",
   "session_count": 0,
   "config_writable": true
 }
@@ -985,7 +984,7 @@ Test API availability through the service itself.
 
 | Field | Required | Default |
 | --- | --- | --- |
-| `model` | ❌ | `deepseek-chat` |
+| `model` | ❌ | `deepseek-v4-flash` |
 | `message` | ❌ | `你好` |
 | `api_key` | ❌ | First key in config |
 
@@ -1009,7 +1008,7 @@ Common request fields:
 | --- | --- | --- | --- |
 | `message` | No | `你好` | Convenience single-turn user message |
 | `messages` | No | Auto-derived from `message` | OpenAI-style message array |
-| `model` | No | `deepseek-chat` | Target model |
+| `model` | No | `deepseek-v4-flash` | Target model |
 | `stream` | No | `true` | Recommended to keep streaming enabled so raw SSE is recorded |
 | `api_key` | No | First configured key | Business API key to use |
 | `sample_id` | No | Auto-generated | Sample directory name |
@@ -1219,7 +1218,7 @@ curl http://localhost:5001/v1/chat/completions \
   -H "Authorization: Bearer your-api-key" \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "deepseek-chat",
+    "model": "deepseek-v4-flash",
     "messages": [{"role": "user", "content": "Hello"}],
     "stream": false
   }'
@@ -1232,7 +1231,7 @@ curl http://localhost:5001/v1/chat/completions \
   -H "Authorization: Bearer your-api-key" \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "deepseek-reasoner",
+    "model": "deepseek-v4-pro",
     "messages": [{"role": "user", "content": "Explain quantum entanglement"}],
     "stream": true
   }'
@@ -1270,7 +1269,7 @@ curl http://localhost:5001/v1/chat/completions \
   -H "Authorization: Bearer your-api-key" \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "deepseek-chat-search",
+    "model": "deepseek-v4-flash-search",
     "messages": [{"role": "user", "content": "Latest news today"}],
     "stream": true
   }'
@@ -1283,7 +1282,7 @@ curl http://localhost:5001/v1/chat/completions \
   -H "Authorization: Bearer your-api-key" \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "deepseek-chat",
+    "model": "deepseek-v4-flash",
     "messages": [{"role": "user", "content": "What is the weather in Beijing?"}],
     "tools": [
       {
@@ -1344,7 +1343,7 @@ curl http://localhost:5001/anthropic/v1/messages \
   -H "Content-Type: application/json" \
   -H "anthropic-version: 2023-06-01" \
   -d '{
-    "model": "claude-sonnet-4-5",
+    "model": "claude-sonnet-4-6",
     "max_tokens": 1024,
     "messages": [{"role": "user", "content": "Hello"}]
   }'
@@ -1381,7 +1380,7 @@ curl http://localhost:5001/v1/chat/completions \
   -H "X-Ds2-Target-Account: user@example.com" \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "deepseek-chat",
+    "model": "deepseek-v4-flash",
     "messages": [{"role": "user", "content": "Hello"}]
   }'
 ```
diff --git a/API.md b/API.md
index c86876f..35d97d4 100644
--- a/API.md
+++ b/API.md
@@ -31,13 +31,13 @@
 | Base URL | `http://localhost:5001` 或你的部署域名 |
 | 默认 Content-Type | `application/json` |
 | 健康检查 | `GET /healthz`、`GET /readyz` |
-| CORS | 已启用（`Access-Control-Allow-Origin: *`，允许 `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Ds2-Source`, `X-Vercel-Protection-Bypass`） |
+| CORS | 已启用（统一覆盖 `/v1/*`、`/anthropic/*`、`/v1beta/models/*`、`/admin/*`；浏览器有 `Origin` 时回显该 Origin，否则为 `*`；默认允许 `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Ds2-Source`, `X-Vercel-Protection-Bypass`, `X-Goog-Api-Key`, `Anthropic-Version`, `Anthropic-Beta`，并会放行预检里声明的第三方请求头，如 `x-stainless-*`；Vercel 上 `/v1/chat/completions` 的 Node Runtime 也对齐相同行为；内部专用头 `X-Ds2-Internal-Token` 仍被拦截） |
 
 ### 3.0 接口适配层说明
 
 - OpenAI / Claude / Gemini 三套协议已统一挂在同一 `chi` 路由树上，由 `internal/server/router.go` 负责装配。
 - 适配器层职责收敛为：**请求归一化 → DeepSeek 调用 → 协议形态渲染**，减少历史版本中“同能力多处实现”的分叉。
-- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致：当前以 XML/Markup 家族解析为主（含 `<tool_call>` / `<function_call>` / `<invoke>` / `tool_use` / antml 变体），并在流式场景执行防泄漏筛分。
+- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致：当前唯一可执行的模型输出语法是 canonical XML 工具块 `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`，并在流式场景执行防泄漏筛分。
 - `Admin API` 将配置与运行时策略分开：`/admin/config*` 管静态配置，`/admin/settings*` 管运行时行为。
 
 ---
@@ -160,6 +160,7 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=`
 | DELETE | `/admin/dev/captures` | Admin | 清空本地抓包记录 |
 | GET | `/admin/chat-history` | Admin | 查看服务器端对话记录 |
 | DELETE | `/admin/chat-history` | Admin | 清空服务器端对话记录 |
+| GET | `/admin/chat-history/{id}` | Admin | 查看单条服务器端对话记录 |
 | DELETE | `/admin/chat-history/{id}` | Admin | 删除单条服务器端对话记录 |
 | PUT | `/admin/chat-history/settings` | Admin | 更新对话记录保留条数 |
 | GET | `/admin/version` | Admin | 查询当前版本与最新 Release |
@@ -194,18 +195,12 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=`
 {
   "object": "list",
   "data": [
-    {"id": "deepseek-chat", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-reasoner", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-chat-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-reasoner-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-expert-chat", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-expert-reasoner", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-expert-chat-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-expert-reasoner-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-vision-chat", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-vision-reasoner", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-vision-chat-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-vision-reasoner-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}
+    {"id": "deepseek-v4-flash", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-pro", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-flash-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-pro-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-vision", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-vision-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}
   ]
 }
 ```
@@ -221,12 +216,15 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=`
 3. 未命中时按模型家族规则回退（如 `o*`、`gpt-*`、`claude-*`）。
 4. 仍未命中则返回 `invalid_request_error`。
 
-当前内置默认 alias（节选）：
+当前内置默认 alias 来自 `internal/config/models.go`，`config.model_aliases` 会在运行时覆盖或补充同名映射。节选：
 
-- OpenAI：`gpt-4o`、`gpt-4.1`、`gpt-4.1-mini`、`gpt-4.1-nano`、`gpt-5`、`gpt-5-mini`、`gpt-5-codex`
-- OpenAI Reasoning：`o1`、`o1-mini`、`o3`、`o3-mini`
-- Claude：`claude-sonnet-4-5`、`claude-haiku-4-5`、`claude-opus-4-6`（及 `claude-3-5-sonnet` / `claude-3-5-haiku` / `claude-3-opus` 兼容别名）
-- Gemini：`gemini-2.5-pro`、`gemini-2.5-flash`
+- OpenAI / Codex：`gpt-4o`、`gpt-4.1`、`gpt-5`、`gpt-5.5`、`gpt-5-codex`、`gpt-5.3-codex`、`codex-mini-latest`
+- OpenAI reasoning：`o1`、`o3`、`o3-deep-research`、`o4-mini`
+- Claude：`claude-opus-4-6`、`claude-sonnet-4-6`、`claude-haiku-4-5`、`claude-3-5-sonnet-latest`
+- Gemini：`gemini-2.5-pro`、`gemini-2.5-flash`、`gemini-pro-vision`
+- 其他兼容族：`llama-*`、`qwen-*`、`mistral-*`、`command-*` 会按家族启发式回退
+
+退役历史模型（如 `claude-1.*`、`claude-2.*`、`claude-instant-*`、`gpt-3.5*`）会被显式拒绝。
 
 ### `POST /v1/chat/completions`
 
@@ -241,7 +239,7 @@ Content-Type: application/json
 
 | 字段 | 类型 | 必填 | 说明 |
 | --- | --- | --- | --- |
-| `model` | string | ✅ | 支持 DeepSeek 原生模型 + 常见 alias（如 `gpt-5`、`gpt-5-mini`、`gpt-5-codex`、`o3`、`claude-opus-4-6`、`gemini-2.5-pro`、`gemini-2.5-flash` 等） |
+| `model` | string | ✅ | 支持 DeepSeek 原生模型 + 常见 alias（如 `gpt-5.5`、`gpt-5.4-mini`、`gpt-5.3-codex`、`o3`、`claude-opus-4-6`、`claude-sonnet-4-6`、`gemini-2.5-pro`、`gemini-2.5-flash` 等） |
 | `messages` | array | ✅ | OpenAI 风格消息数组 |
 | `stream` | boolean | ❌ | 默认 `false` |
 | `tools` | array | ❌ | Function Calling 定义 |
@@ -254,14 +252,14 @@ Content-Type: application/json
   "id": "<chat_session_id>",
   "object": "chat.completion",
   "created": 1738400000,
-  "model": "deepseek-reasoner",
+  "model": "deepseek-v4-pro",
   "choices": [
     {
       "index": 0,
       "message": {
         "role": "assistant",
         "content": "最终回复",
-        "reasoning_content": "思考内容（reasoner 模型）"
+        "reasoning_content": "思考内容（开启 thinking 时）"
       },
       "finish_reason": "stop"
     }
@@ -296,7 +294,7 @@ data: [DONE]
 **字段说明**：
 
 - 首个 delta 包含 `role: assistant`
-- `deepseek-reasoner` / `deepseek-reasoner-search` 模型输出 `delta.reasoning_content`
+- 开启 thinking 时会输出 `delta.reasoning_content`
 - 普通文本输出 `delta.content`
 - 最后一段包含 `finish_reason` 和 `usage`
 - token 计数优先透传上游 DeepSeek SSE（如 `accumulated_token_usage` / `token_usage`）；仅在上游缺失时回退本地估算
@@ -337,7 +335,7 @@ data: [DONE]
 补充说明：
 
 - **非代码块上下文**下，工具负载即使与普通文本混合，也会按特征识别并产出可执行 tool call（前后普通文本仍可透传）。
-- 解析器当前走 XML/Markup 家族（包含 `<tool_call>`、`<function_call>`、`<invoke>`、`tool_use`、antml 风格）；纯 JSON `tool_calls` 片段默认不会直接作为可执行调用解析。
+- 解析器当前只把 canonical XML 工具块（`<tool_calls>` / `<invoke name="...">` / `<parameter name="...">`）作为可执行调用解析；旧式 `<tools>`、`<tool_call>`、`<tool_name>`、`<param>`、`<function_call>`、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理。
 - Markdown fenced code block（例如 ```json ... ```）中的 `tool_calls` 仅视为示例文本，不会被执行。
 
 ---
@@ -449,17 +447,17 @@ data: [DONE]
 {
   "object": "list",
   "data": [
-    {"id": "claude-sonnet-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
+    {"id": "claude-sonnet-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
     {"id": "claude-haiku-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
     {"id": "claude-opus-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"}
   ],
   "first_id": "claude-opus-4-6",
-  "last_id": "claude-instant-1.0",
+  "last_id": "claude-3-haiku-20240307",
   "has_more": false
 }
 ```
 
-> 说明：示例仅展示部分模型；实际返回除当前主别名外，还包含 Claude 4.x snapshots，以及 3.x / 2.x / 1.x 历史模型 ID 与常见别名。
+> 说明：示例仅展示部分模型；实际返回除当前主别名外，还包含 Claude 4.x snapshots，以及 3.x 历史模型 ID 与常见别名。
 
 ### `POST /anthropic/v1/messages`
 
@@ -477,7 +475,7 @@ anthropic-version: 2023-06-01
 
 | 字段 | 类型 | 必填 | 说明 |
 | --- | --- | --- | --- |
-| `model` | string | ✅ | 例如 `claude-sonnet-4-5` / `claude-opus-4-6` / `claude-haiku-4-5`（兼容 `claude-3-5-haiku-latest`），并支持历史 Claude 模型 ID |
+| `model` | string | ✅ | 例如 `claude-sonnet-4-6` / `claude-opus-4-6` / `claude-haiku-4-5`（兼容 `claude-sonnet-4-5`、`claude-3-5-haiku-latest`），并支持历史 Claude 模型 ID |
 | `messages` | array | ✅ | Claude 风格消息数组 |
 | `max_tokens` | number | ❌ | 缺省自动补 `8192`；当前实现不会硬性截断上游输出 |
 | `stream` | boolean | ❌ | 默认 `false` |
@@ -491,7 +489,7 @@ anthropic-version: 2023-06-01
   "id": "msg_1738400000000000000",
   "type": "message",
   "role": "assistant",
-  "model": "claude-sonnet-4-5",
+  "model": "claude-sonnet-4-6",
   "content": [
     {"type": "text", "text": "回复内容"}
   ],
@@ -545,7 +543,7 @@ data: {"type":"message_stop"}
 
 ```json
 {
-  "model": "claude-sonnet-4-5",
+  "model": "claude-sonnet-4-6",
   "messages": [
     {"role": "user", "content": "你好"}
   ]
@@ -673,16 +671,16 @@ data: {"type":"message_stop"}
       "token_preview": "abcde..."
     }
   ],
-  "claude_mapping": {
-    "fast": "deepseek-chat",
-    "slow": "deepseek-reasoner"
+  "model_aliases": {
+    "claude-sonnet-4-6": "deepseek-v4-flash",
+    "claude-opus-4-6": "deepseek-v4-pro"
   }
 }
 ```
 
 ### `POST /admin/config`
 
-只更新 `keys`、`api_keys`、`accounts`、`claude_mapping`。
+只更新 `keys`、`api_keys`、`accounts`、`model_aliases`。
 如果同时发送 `api_keys` 与 `keys`，优先保留 `api_keys` 中的结构化 `name` / `remark`；`keys` 仅作为旧格式兼容回退。
 
 **请求**：
@@ -697,9 +695,9 @@ data: {"type":"message_stop"}
   "accounts": [
     {"email": "user@example.com", "password": "pwd", "token": ""}
   ],
-  "claude_mapping": {
-    "fast": "deepseek-chat",
-    "slow": "deepseek-reasoner"
+  "model_aliases": {
+    "claude-sonnet-4-6": "deepseek-v4-flash",
+    "claude-opus-4-6": "deepseek-v4-pro"
   }
 }
 ```
@@ -714,7 +712,8 @@ data: {"type":"message_stop"}
 - `compat`（`wide_input_strict_output`、`strip_reference_markers`）
 - `responses` / `embeddings`
 - `auto_delete`（`mode`：`none` / `single` / `all`；旧配置 `sessions=true` 仍按 `all` 处理）
-- `claude_mapping` / `model_aliases`
+- `history_split`（`enabled` 固定返回 `true`、`trigger_after_turns`）
+- `model_aliases`
 - `env_backed`、`needs_vercel_sync`
 - `toolcall` 策略已固定为 `feature_match + high`，不再通过 settings 返回或修改
 
@@ -728,7 +727,7 @@ data: {"type":"message_stop"}
 - `responses.store_ttl_seconds`
 - `embeddings.provider`
 - `auto_delete.mode`
-- `claude_mapping`
+- `history_split.trigger_after_turns`（`history_split.enabled` 已全局强制开启；旧客户端传入时会被保存为 `true`）
 - `model_aliases`
 - `toolcall` 策略已固定，不再作为可写入字段
 
@@ -753,9 +752,9 @@ data: {"type":"message_stop"}
 
 请求可直接传配置对象，或使用 `{"config": {...}, "mode":"merge"}` 包裹格式。
 也支持在查询参数里传 `?mode=merge` / `?mode=replace`。
-导入时会接受 `keys`、`api_keys`、`accounts`、`claude_mapping` / `claude_model_mapping`、`model_aliases`、`admin`、`runtime`、`responses`、`embeddings`、`auto_delete` 等字段；`toolcall` 相关字段会被忽略。
+`replace` 模式会按完整配置结构替换（保留 Vercel 同步元信息）；`merge` 模式会合并 `keys`、`api_keys`、`accounts`、`model_aliases`，并覆盖 `admin`、`runtime`、`responses`、`embeddings` 中的非空字段。`compat`、`auto_delete`、`history_split` 建议通过 `/admin/settings` 或配置文件管理；`toolcall` 相关字段会被忽略。
 
-> `compat` 相关字段请通过 `/admin/settings` 或配置文件管理；该导入接口不会更新 `compat`。
+> 注意：`merge` 模式不会更新 `compat`、`auto_delete`、`history_split`。
 
 ### `GET /admin/config/export`
 
@@ -907,7 +906,7 @@ data: {"type":"message_stop"}
 | 字段 | 必填 | 说明 |
 | --- | --- | --- |
 | `identifier` | ✅ | email / mobile / token-only 合成标识 |
-| `model` | ❌ | 默认 `deepseek-chat` |
+| `model` | ❌ | 默认 `deepseek-v4-flash` |
 | `message` | ❌ | 空字符串时仅测试会话创建 |
 
 **响应**：
@@ -918,7 +917,7 @@ data: {"type":"message_stop"}
   "success": true,
   "response_time": 1240,
   "message": "API 测试成功（仅会话创建）",
-  "model": "deepseek-chat",
+  "model": "deepseek-v4-flash",
   "session_count": 0,
   "config_writable": true
 }
@@ -988,7 +987,7 @@ data: {"type":"message_stop"}
 
 | 字段 | 必填 | 默认值 |
 | --- | --- | --- |
-| `model` | ❌ | `deepseek-chat` |
+| `model` | ❌ | `deepseek-v4-flash` |
 | `message` | ❌ | `你好` |
 | `api_key` | ❌ | 配置中第一个 key |
 
@@ -1012,7 +1011,7 @@ data: {"type":"message_stop"}
 | --- | --- | --- | --- |
 | `message` | 否 | `你好` | 便捷单轮用户消息 |
 | `messages` | 否 | 自动由 `message` 生成 | OpenAI 风格消息数组 |
-| `model` | 否 | `deepseek-chat` | 目标模型 |
+| `model` | 否 | `deepseek-v4-flash` | 目标模型 |
 | `stream` | 否 | `true` | 建议保留流式，以记录原始 SSE |
 | `api_key` | 否 | 配置中第一个 key | 调用业务接口使用的 key |
 | `sample_id` | 否 | 自动生成 | 样本目录名 |
@@ -1222,7 +1221,7 @@ curl http://localhost:5001/v1/chat/completions \
   -H "Authorization: Bearer your-api-key" \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "deepseek-chat",
+    "model": "deepseek-v4-flash",
     "messages": [{"role": "user", "content": "你好"}],
     "stream": false
   }'
@@ -1235,7 +1234,7 @@ curl http://localhost:5001/v1/chat/completions \
   -H "Authorization: Bearer your-api-key" \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "deepseek-reasoner",
+    "model": "deepseek-v4-pro",
     "messages": [{"role": "user", "content": "解释一下量子纠缠"}],
     "stream": true
   }'
@@ -1248,7 +1247,7 @@ curl http://localhost:5001/v1/responses \
   -H "Authorization: Bearer your-api-key" \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "gpt-5-codex",
+    "model": "gpt-5.3-codex",
     "input": "写一个 golang 的 hello world",
     "stream": true
   }'
@@ -1273,7 +1272,7 @@ curl http://localhost:5001/v1/chat/completions \
   -H "Authorization: Bearer your-api-key" \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "deepseek-chat-search",
+    "model": "deepseek-v4-flash-search",
     "messages": [{"role": "user", "content": "今天的新闻"}],
     "stream": true
   }'
@@ -1286,7 +1285,7 @@ curl http://localhost:5001/v1/chat/completions \
   -H "Authorization: Bearer your-api-key" \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "deepseek-chat",
+    "model": "deepseek-v4-flash",
     "messages": [{"role": "user", "content": "北京今天天气怎么样？"}],
     "tools": [
       {
@@ -1347,7 +1346,7 @@ curl http://localhost:5001/anthropic/v1/messages \
   -H "Content-Type: application/json" \
   -H "anthropic-version: 2023-06-01" \
   -d '{
-    "model": "claude-sonnet-4-5",
+    "model": "claude-sonnet-4-6",
     "max_tokens": 1024,
     "messages": [{"role": "user", "content": "你好"}]
   }'
@@ -1384,7 +1383,7 @@ curl http://localhost:5001/v1/chat/completions \
   -H "X-Ds2-Target-Account: user@example.com" \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "deepseek-chat",
+    "model": "deepseek-v4-flash",
     "messages": [{"role": "user", "content": "你好"}]
   }'
 ```
diff --git a/README.MD b/README.MD
index 3aeccb8..a525f02 100644
--- a/README.MD
+++ b/README.MD
@@ -89,6 +89,7 @@ flowchart LR
 | OpenAI 兼容 | `GET /v1/models`、`GET /v1/models/{id}`、`POST /v1/chat/completions`、`POST /v1/responses`、`GET /v1/responses/{response_id}`、`POST /v1/embeddings`、`POST /v1/files` |
 | Claude 兼容 | `GET /anthropic/v1/models`、`POST /anthropic/v1/messages`、`POST /anthropic/v1/messages/count_tokens`（及快捷路径 `/v1/messages`、`/messages`） |
 | Gemini 兼容 | `POST /v1beta/models/{model}:generateContent`、`POST /v1beta/models/{model}:streamGenerateContent`（及 `/v1/models/{model}:*` 路径） |
+| 统一 CORS 兼容 | `/v1/*`、`/anthropic/*`、`/v1beta/models/*`、`/admin/*` 统一走同一套 CORS 策略；Vercel 上 `/v1/chat/completions` 的 Node Runtime 也对齐相同放行规则，尽量减少第三方预检请求头限制 |
 | 多账号轮询 | 自动 token 刷新、邮箱/手机号双登录方式 |
 | 并发队列控制 | 每账号 in-flight 上限 + 等待队列，动态计算建议并发值 |
 | DeepSeek PoW | 纯 Go 高性能实现（DeepSeekHashV1），毫秒级响应 |
@@ -114,38 +115,32 @@ flowchart LR
 
 | 模型类型 | 模型 ID | thinking | search |
 | --- | --- | --- | --- |
-| default | `deepseek-chat` | ❌ | ❌ |
-| default | `deepseek-reasoner` | ✅ | ❌ |
-| default | `deepseek-chat-search` | ❌ | ✅ |
-| default | `deepseek-reasoner-search` | ✅ | ✅ |
-| expert | `deepseek-expert-chat` | ❌ | ❌ |
-| expert | `deepseek-expert-reasoner` | ✅ | ❌ |
-| expert | `deepseek-expert-chat-search` | ❌ | ✅ |
-| expert | `deepseek-expert-reasoner-search` | ✅ | ✅ |
-| vision | `deepseek-vision-chat` | ❌ | ❌ |
-| vision | `deepseek-vision-reasoner` | ✅ | ❌ |
-| vision | `deepseek-vision-chat-search` | ❌ | ✅ |
-| vision | `deepseek-vision-reasoner-search` | ✅ | ✅ |
+| default | `deepseek-v4-flash` | 默认开启，可由请求参数控制 | ❌ |
+| expert | `deepseek-v4-pro` | 默认开启，可由请求参数控制 | ❌ |
+| default | `deepseek-v4-flash-search` | 默认开启，可由请求参数控制 | ✅ |
+| expert | `deepseek-v4-pro-search` | 默认开启，可由请求参数控制 | ✅ |
+| vision | `deepseek-v4-vision` | 默认开启，可由请求参数控制 | ❌ |
+| vision | `deepseek-v4-vision-search` | 默认开启，可由请求参数控制 | ✅ |
 
-除原生模型外，也支持常见 alias 输入（如 `gpt-5`、`gpt-5-mini`、`gpt-5-codex`、`gpt-4.1`、`o3`、`claude-opus-4-6`、`claude-sonnet-4-5`、`gemini-2.5-pro`、`gemini-2.5-flash` 等），但 `/v1/models` 返回的是规范化后的 DeepSeek 原生模型 ID。
+除原生模型外，也支持常见 alias 输入（如 `gpt-4.1`、`gpt-5`、`gpt-5-codex`、`o3`、`claude-*`、`gemini-*` 等），但 `/v1/models` 返回的是规范化后的 DeepSeek 原生模型 ID。完整 alias 行为以 [API.md](API.md#模型-alias-解析策略) 和 `config.example.json` 为准。
 
 ### Claude 接口（`GET /anthropic/v1/models`）
 
 | 当前常用模型 | 默认映射 |
 | --- | --- |
-| `claude-sonnet-4-5` | `deepseek-chat` |
-| `claude-haiku-4-5`（兼容 `claude-3-5-haiku-latest`） | `deepseek-chat` |
-| `claude-opus-4-6` | `deepseek-reasoner` |
+| `claude-sonnet-4-6` | `deepseek-v4-flash` |
+| `claude-haiku-4-5`（兼容 `claude-3-5-haiku-latest`） | `deepseek-v4-flash` |
+| `claude-opus-4-6` | `deepseek-v4-pro` |
 
-可通过配置中的 `claude_mapping` 或 `claude_model_mapping` 覆盖映射关系。
-`/anthropic/v1/models` 除上述当前主别名外，还会返回 Claude 4.x snapshots，以及 3.x / 2.x / 1.x 历史模型 ID 与常见 alias，便于旧客户端直接兼容。
+可通过配置中的 `model_aliases` 覆盖映射关系。
+`/anthropic/v1/models` 除上述主别名外，还会返回 Claude 4.x snapshots、3.x 历史模型 ID 与常见 alias，便于旧客户端直接兼容。
 
 #### Claude Code 接入避坑（实测）
 
 - `ANTHROPIC_BASE_URL` 推荐直接指向 DS2API 根地址（例如 `http://127.0.0.1:5001`），Claude Code 会请求 `/v1/messages?beta=true`。
 - `ANTHROPIC_API_KEY` 需要与 `config.json` 中 `keys` 一致；建议同时保留常规 key 与 `sk-ant-*` 形态 key，兼容不同客户端校验习惯。
 - 若系统设置了代理，建议对 DS2API 地址配置 `NO_PROXY=127.0.0.1,localhost,<你的主机IP>`，避免本地回环请求被代理拦截。
-- 如遇“工具调用输出成文本、未执行”问题，请优先检查模型输出是否为受支持的 XML/Markup 工具块（例如 `<tool_call>` / `<function_call>` / `<invoke>` / `tool_use`），而不是纯 JSON `tool_calls` 片段。
+- 如遇“工具调用输出成文本、未执行”问题，请优先检查模型输出是否为当前唯一受支持的 XML 工具块：`<tool_calls><invoke name="..."><parameter name="...">...`，而不是旧式 `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`、`<function_call>`、`tool_use` 或纯 JSON `tool_calls` 片段。
 
 ### Gemini 接口
 
@@ -239,7 +234,7 @@ cp config.example.json config.json
 base64 < config.json | tr -d '\n'
 ```
 
-> **流式说明**：`/v1/chat/completions` 在 Vercel 上默认走 `api/chat-stream.js`（Node Runtime）以保证实时 SSE。鉴权、账号选择、会话/PoW 准备仍由 Go 内部 prepare 接口完成；流式响应（含 `tools`）在 Node 侧执行与 Go 对齐的输出组装与防泄漏处理。
+> **流式说明**：`/v1/chat/completions` 在 Vercel 上默认走 `api/chat-stream.js`（Node Runtime）以保证实时 SSE。鉴权、账号选择、会话/PoW 准备仍由 Go 内部 prepare 接口完成；流式响应（含 `tools`）在 Node 侧执行与 Go 对齐的输出组装与防泄漏处理。虽然这里只有 OpenAI chat 流式走 Node，但 CORS 放行策略仍与 Go 主路由保持一致，统一覆盖第三方客户端预检场景。
 
 详细部署说明请参阅 [部署指南](docs/DEPLOY.md)。
 
@@ -268,123 +263,18 @@ go run ./cmd/ds2api
 
 ## 配置说明
 
-### `config.json` 示例
+`README` 只保留快速入口，完整字段请以 [config.example.json](config.example.json) 为模板，并参考 [部署指南](docs/DEPLOY.md#0-前置要求) 与 [API 配置最佳实践](API.md#配置最佳实践)。
 
-```json
-{
-  "keys": ["your-api-key-1", "your-api-key-2"],
-  "api_keys": [
-    {
-      "key": "your-api-key-1",
-      "name": "主 Key",
-      "remark": "生产流量"
-    }
-  ],
-  "accounts": [
-    {
-      "name": "账号 A",
-      "remark": "主账号",
-      "email": "user@example.com",
-      "password": "your-password"
-    },
-    {
-      "mobile": "12345678901",
-      "password": "your-password"
-    }
-  ],
-  "model_aliases": {
-    "gpt-4o": "deepseek-chat",
-    "gpt-5": "deepseek-chat",
-    "gpt-5-mini": "deepseek-chat",
-    "gpt-5-codex": "deepseek-reasoner",
-    "o3": "deepseek-reasoner",
-    "claude-opus-4-6": "deepseek-reasoner",
-    "gemini-2.5-flash": "deepseek-chat"
-  },
-  "compat": {
-    "wide_input_strict_output": true,
-    "strip_reference_markers": true
-  },
-  "responses": {
-    "store_ttl_seconds": 900
-  },
-  "embeddings": {
-    "provider": "deterministic"
-  },
-  "claude_mapping": {
-    "fast": "deepseek-chat",
-    "slow": "deepseek-reasoner"
-  },
-  "admin": {
-    "jwt_expire_hours": 24
-  },
-  "runtime": {
-    "account_max_inflight": 2,
-    "account_max_queue": 0,
-    "global_max_inflight": 0,
-    "token_refresh_interval_hours": 6
-  },
-  "auto_delete": {
-    "mode": "none"
-  }
-}
-```
+常用字段：
 
-- `keys`：API 访问密钥列表，客户端通过 `Authorization: Bearer <key>` 鉴权
-- `api_keys`：推荐使用的新结构化密钥列表，支持 `key` + `name` + `remark`（`keys` 仍兼容）
-- `accounts`：DeepSeek 账号列表，支持 `email` 或 `mobile` 登录；可额外填写 `name` / `remark` 便于管理
-- `token`：配置文件中即使填写也会在加载时被清空（不会从 `config.json` 读取 token）；实际 token 仅在运行时内存中维护并自动刷新
-- `model_aliases`：常见模型名（如 GPT/Codex/Claude）到 DeepSeek 模型的映射
-- `compat.wide_input_strict_output`：建议保持 `true`（当前实现默认宽进严出）
-- `compat.strip_reference_markers`：建议保持 `true`，用于清理可见输出中的引用/标记
-- `toolcall`：旧字段，当前实现已固定为特征匹配 + 高置信早发；即使保留在配置里也会被忽略
-- `responses.store_ttl_seconds`：`/v1/responses/{id}` 的内存缓存 TTL
-- `embeddings.provider`：embedding 提供方（当前内置 `deterministic/mock/builtin`）
-- `claude_mapping`：字典中 `fast`/`slow` 后缀映射到对应 DeepSeek 模型（兼容读取 `claude_model_mapping`）
-- `admin`：管理后台设置（JWT 过期时间、密码哈希等），可通过 Admin Settings API 热更新
-- `runtime`：运行时参数（并发限制、队列大小、托管账号 token 刷新间隔），可通过 Admin Settings API 热更新；`account_max_queue=0`/`global_max_inflight=0` 表示按推荐值自动计算，`token_refresh_interval_hours=6` 为默认强制重登间隔
-- `auto_delete.mode`：请求结束后如何清理 DeepSeek 远端聊天记录，支持 `none`（默认，不删除）、`single`（仅删除当前会话）、`all`（清空全部会话）；旧配置里的 `auto_delete.sessions=true` 仍会被视为 `all`
+- `keys` / `api_keys`：客户端访问密钥，`api_keys` 支持 `name` 与 `remark` 元信息，`keys` 继续兼容。
+- `accounts`：DeepSeek 托管账号，支持 `email` 或 `mobile` 登录，可配置代理、名称和备注。
+- `model_aliases`：OpenAI / Claude / Gemini 共用的模型 alias 映射。
+- `runtime`：账号并发、队列与 token 刷新策略，可通过 Admin Settings 热更新。
+- `auto_delete.mode`：请求结束后的远端会话清理策略，支持 `none` / `single` / `all`。
+- `history_split`：多轮历史拆分策略，已全局强制开启；可调整触发阈值，避免长历史全部内联进 prompt。
 
-### 环境变量
-
-> 建议：长期维护请优先以 `config.json`（或其 Base64）为单一配置源。环境变量仅保留部署必需项；`DS2API_CONFIG_JSON` 主要用于 Vercel/无持久盘场景，后续可能进一步收敛。
-
-| 变量 | 用途 | 默认值 |
-| --- | --- | --- |
-| `PORT` | 服务端口 | `5001` |
-| `LOG_LEVEL` | 日志级别 | `INFO`（可选：`DEBUG`/`WARN`/`ERROR`） |
-| `DS2API_ADMIN_KEY` | Admin 登录密钥 | `admin` |
-| `DS2API_JWT_SECRET` | Admin JWT 签名密钥 | 等同 `DS2API_ADMIN_KEY` |
-| `DS2API_JWT_EXPIRE_HOURS` | Admin JWT 过期小时数 | `24` |
-| `DS2API_CONFIG_PATH` | 配置文件路径 | `config.json` |
-| `DS2API_CONFIG_JSON` | 直接注入配置（JSON 或 Base64） | — |
-| `DS2API_CHAT_HISTORY_PATH` | 服务器端对话记录文件路径 | `data/chat_history.json` |
-| `DS2API_ENV_WRITEBACK` | 环境变量模式下自动写回配置文件并切换文件模式（`1/true/yes/on`） | 关闭 |
-| `DS2API_STATIC_ADMIN_DIR` | 管理台静态文件目录 | `static/admin` |
-| `DS2API_AUTO_BUILD_WEBUI` | 启动时自动构建 WebUI | 本地开启，Vercel 关闭 |
-| `DS2API_DEV_PACKET_CAPTURE` | 本地开发抓包开关（记录最近会话请求/响应体） | 本地非 Vercel 默认开启 |
-| `DS2API_DEV_PACKET_CAPTURE_LIMIT` | 本地抓包保留条数（超出自动淘汰） | `20` |
-| `DS2API_DEV_PACKET_CAPTURE_MAX_BODY_BYTES` | 单条响应体最大记录字节数 | `5242880` |
-| `DS2API_ACCOUNT_MAX_INFLIGHT` | 每账号最大并发 in-flight 请求数 | `2` |
-| `DS2API_ACCOUNT_MAX_QUEUE` | 等待队列上限 | `recommended_concurrency` |
-| `DS2API_GLOBAL_MAX_INFLIGHT` | 全局最大 in-flight 请求数 | `recommended_concurrency` |
-| `DS2API_VERCEL_INTERNAL_SECRET` | Vercel 混合流式内部鉴权密钥 | 回退用 `DS2API_ADMIN_KEY` |
-| `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | 流式 lease 过期秒数 | `900` |
-| `VERCEL_TOKEN` | Vercel 同步 token | — |
-| `VERCEL_PROJECT_ID` | Vercel 项目 ID | — |
-| `VERCEL_TEAM_ID` | Vercel 团队 ID | — |
-| `DS2API_VERCEL_PROTECTION_BYPASS` | Vercel 部署保护绕过密钥（内部 Node→Go 调用） | — |
-
-> 提示：当检测到 `DS2API_CONFIG_JSON` 时，管理台会显示当前模式风险与自动持久化状态（含 `DS2API_CONFIG_PATH` 路径与模式切换说明）。
-
-#### 必填 / 可选（按部署方式）
-
-- **所有部署都必填**：`DS2API_ADMIN_KEY`
-- **配置来源二选一（推荐前者）**：
-  - `config.json` 文件（推荐，持久化更直观）
-  - `DS2API_CONFIG_JSON`（可选，适合 Vercel；支持 JSON 或 Base64）
-- **仅在环境变量配置模式建议开启**：`DS2API_ENV_WRITEBACK=1`（避免管理台改动重启后丢失）
-- 其余环境变量均为可选调优项。
+环境变量完整列表见 [部署指南](docs/DEPLOY.md)，接口鉴权规则见 [API.md](API.md#鉴权规则)。
 
 ## 鉴权模式
 
@@ -416,7 +306,7 @@ Gemini 路由还可以使用 `x-goog-api-key`，或在没有认证头时使用 `
 当请求中带 `tools` 时，DS2API 会做防泄漏处理与结构化转译：
 
 1. 只在**非代码块上下文**启用执行型 toolcall 识别（代码块示例默认不触发）
-2. 解析层当前以 XML/Markup 家族为准（`<tool_call>` / `<function_call>` / `<invoke>` / `tool_use` / antml 变体）；纯 JSON `tool_calls` 片段默认不作为可执行调用解析
+2. 解析层当前只把 canonical XML 工具块视为可执行调用：`<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`；旧式 `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`、`<function_call>`、`tool_use` / antml 变体与纯 JSON `tool_calls` 片段都会按普通文本处理
 3. `responses` 流式严格使用官方 item 生命周期事件（`response.output_item.*`、`response.content_part.*`、`response.function_call_arguments.*`）
 4. `responses` 支持并执行 `tool_choice`（`auto`/`none`/`required`/强制函数）；`required` 违规时非流式返回 `422`，流式返回 `response.failed`
 5. 客户端请求哪种协议，就按该协议返回工具调用（OpenAI/Claude/Gemini 各自原生结构）；模型侧优先约束输出规范 XML，再由兼容层转译
@@ -467,44 +357,18 @@ go run ./cmd/ds2api
 
 ## 测试
 
-```bash
-# 单元测试（Go + Node）
-./tests/scripts/run-unit-all.sh
-
-# 一键端到端全链路测试（真实账号，生成完整请求/响应日志）
-./tests/scripts/run-live.sh
-
-# 或自定义参数
-go run ./cmd/ds2api-tests \
-  --config config.json \
-  --admin-key admin \
-  --out artifacts/testsuite \
-  --timeout 120 \
-  --retries 2
-```
-
-```bash
-# 发布前阻断门禁
-./tests/scripts/check-stage6-manual-smoke.sh
-./tests/scripts/check-refactor-line-gate.sh
-./tests/scripts/run-unit-all.sh
-npm ci --prefix webui && npm run build --prefix webui
-```
-
-## 测试
-
 详细测试指南请参阅 [docs/TESTING.md](docs/TESTING.md)。
 
 ### 快速测试命令
 
 ```bash
-# 运行所有单元测试
-go test ./...
+# 本地 PR 门禁
+./scripts/lint.sh
+./tests/scripts/check-refactor-line-gate.sh
+./tests/scripts/run-unit-all.sh
+npm run build --prefix webui
 
-# 运行 tool calls 相关测试（调试工具调用问题）
-go test -v -run 'TestParseToolCalls|TestRepair' ./internal/toolcall/
-
-# 运行端到端测试
+# 端到端全链路测试（真实账号，生成完整请求/响应日志）
 ./tests/scripts/run-live.sh
 ```
 
diff --git a/README.en.md b/README.en.md
index b1a4a7a..299d228 100644
--- a/README.en.md
+++ b/README.en.md
@@ -87,6 +87,7 @@ For the full module-by-module architecture and directory responsibilities, see [
 | OpenAI compatible | `GET /v1/models`, `GET /v1/models/{id}`, `POST /v1/chat/completions`, `POST /v1/responses`, `GET /v1/responses/{response_id}`, `POST /v1/embeddings`, `POST /v1/files` |
 | Claude compatible | `GET /anthropic/v1/models`, `POST /anthropic/v1/messages`, `POST /anthropic/v1/messages/count_tokens` (plus shortcut paths `/v1/messages`, `/messages`) |
 | Gemini compatible | `POST /v1beta/models/{model}:generateContent`, `POST /v1beta/models/{model}:streamGenerateContent` (plus `/v1/models/{model}:*` paths) |
+| Unified CORS compatibility | `/v1/*`, `/anthropic/*`, `/v1beta/models/*`, and `/admin/*` share one CORS policy; on Vercel, the Node Runtime for `/v1/chat/completions` mirrors the same relaxed preflight behavior for third-party clients |
 | Multi-account rotation | Auto token refresh, email/mobile dual login |
 | Concurrency control | Per-account in-flight limit + waiting queue, dynamic recommended concurrency |
 | DeepSeek PoW | Pure Go high-performance solver (DeepSeekHashV1), ms-level response |
@@ -112,38 +113,32 @@ For the full module-by-module architecture and directory responsibilities, see [
 
 | Family | Model ID | thinking | search |
 | --- | --- | --- | --- |
-| default | `deepseek-chat` | ❌ | ❌ |
-| default | `deepseek-reasoner` | ✅ | ❌ |
-| default | `deepseek-chat-search` | ❌ | ✅ |
-| default | `deepseek-reasoner-search` | ✅ | ✅ |
-| expert | `deepseek-expert-chat` | ❌ | ❌ |
-| expert | `deepseek-expert-reasoner` | ✅ | ❌ |
-| expert | `deepseek-expert-chat-search` | ❌ | ✅ |
-| expert | `deepseek-expert-reasoner-search` | ✅ | ✅ |
-| vision | `deepseek-vision-chat` | ❌ | ❌ |
-| vision | `deepseek-vision-reasoner` | ✅ | ❌ |
-| vision | `deepseek-vision-chat-search` | ❌ | ✅ |
-| vision | `deepseek-vision-reasoner-search` | ✅ | ✅ |
+| default | `deepseek-v4-flash` | enabled by default, request-controlled | ❌ |
+| expert | `deepseek-v4-pro` | enabled by default, request-controlled | ❌ |
+| default | `deepseek-v4-flash-search` | enabled by default, request-controlled | ✅ |
+| expert | `deepseek-v4-pro-search` | enabled by default, request-controlled | ✅ |
+| vision | `deepseek-v4-vision` | enabled by default, request-controlled | ❌ |
+| vision | `deepseek-v4-vision-search` | enabled by default, request-controlled | ✅ |
 
-Besides native IDs, DS2API also accepts common aliases as input (for example `gpt-5`, `gpt-5-mini`, `gpt-5-codex`, `gpt-4.1`, `o3`, `claude-opus-4-6`, `claude-sonnet-4-5`, `gemini-2.5-pro`, `gemini-2.5-flash`), but `/v1/models` returns normalized DeepSeek native model IDs.
+Besides native IDs, DS2API also accepts common aliases as input (for example `gpt-4.1`, `gpt-5`, `gpt-5-codex`, `o3`, `claude-*`, `gemini-*`), but `/v1/models` returns normalized DeepSeek native model IDs. The complete alias behavior is documented in [API.en.md](API.en.md#model-alias-resolution) and `config.example.json`.
 
 ### Claude Endpoint (`GET /anthropic/v1/models`)
 
 | Current common model | Default Mapping |
 | --- | --- |
-| `claude-sonnet-4-5` | `deepseek-chat` |
-| `claude-haiku-4-5` (compatible with `claude-3-5-haiku-latest`) | `deepseek-chat` |
-| `claude-opus-4-6` | `deepseek-reasoner` |
+| `claude-sonnet-4-6` | `deepseek-v4-flash` |
+| `claude-haiku-4-5` (compatible with `claude-3-5-haiku-latest`) | `deepseek-v4-flash` |
+| `claude-opus-4-6` | `deepseek-v4-pro` |
 
-Override mapping via `claude_mapping` or `claude_model_mapping` in config.
-Besides the current primary aliases above, `/anthropic/v1/models` also returns Claude 4.x snapshots plus historical 3.x / 2.x / 1.x IDs and common aliases for legacy client compatibility.
+Override mapping via the global `model_aliases` config.
+Besides the primary aliases above, `/anthropic/v1/models` also returns Claude 4.x snapshots plus historical 3.x IDs and common aliases for legacy client compatibility.
 
 #### Claude Code integration pitfalls (validated)
 
 - Set `ANTHROPIC_BASE_URL` to the DS2API root URL (for example `http://127.0.0.1:5001`). Claude Code sends requests to `/v1/messages?beta=true`.
 - `ANTHROPIC_API_KEY` must match an entry in `keys` from `config.json`. Keeping both a regular key and an `sk-ant-*` style key improves client compatibility.
 - If your environment has proxy variables, set `NO_PROXY=127.0.0.1,localhost,<your_host_ip>` for DS2API to avoid proxy interception of local traffic.
-- If tool calls are rendered as plain text and not executed, first verify the model output uses supported XML/Markup tool blocks (`<tool_call>` / `<function_call>` / `<invoke>` / `tool_use`) rather than standalone JSON `tool_calls`.
+- If tool calls are rendered as plain text and not executed, first verify the model output uses the only supported XML block: `<tool_calls><invoke name="..."><parameter name="...">...`, not legacy `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`, `<function_call>`, `tool_use`, or standalone JSON `tool_calls`.
 
 ### Gemini Endpoint
 
@@ -237,7 +232,7 @@ Recommended: convert `config.json` to Base64 locally, then paste into `DS2API_CO
 base64 < config.json | tr -d '\n'
 ```
 
-> **Streaming note**: `/v1/chat/completions` on Vercel is routed to `api/chat-stream.js` (Node Runtime) for real-time SSE. Auth, account selection, and session/PoW preparation are still handled by the Go internal prepare endpoint; streaming output (including `tools`) is assembled on Node with Go-aligned anti-leak handling.
+> **Streaming note**: `/v1/chat/completions` on Vercel is routed to `api/chat-stream.js` (Node Runtime) for real-time SSE. Auth, account selection, and session/PoW preparation are still handled by the Go internal prepare endpoint; streaming output (including `tools`) is assembled on Node with Go-aligned anti-leak handling. This is the only interface family currently routed through Node, and its CORS allow behavior is kept aligned with the Go router so third-party preflight handling stays unified.
 
 For detailed deployment instructions, see the [Deployment Guide](docs/DEPLOY.en.md).
 
@@ -266,102 +261,18 @@ The server actually binds to `0.0.0.0:5001`, so devices on the same LAN can usua
 
 ## Configuration
 
-### `config.json` Example
+`README` keeps only the onboarding path. Use [config.example.json](config.example.json) as the field template, and see the [deployment guide](docs/DEPLOY.en.md#0-prerequisites) plus [API configuration notes](API.en.md#configuration-best-practice) for full details.
 
-```json
-{
-  "keys": ["your-api-key-1", "your-api-key-2"],
-  "accounts": [
-    {
-      "email": "user@example.com",
-      "password": "your-password"
-    },
-    {
-      "mobile": "12345678901",
-      "password": "your-password"
-    }
-  ],
-  "model_aliases": {
-    "gpt-4o": "deepseek-chat",
-    "gpt-5": "deepseek-chat",
-    "gpt-5-mini": "deepseek-chat",
-    "gpt-5-codex": "deepseek-reasoner",
-    "o3": "deepseek-reasoner",
-    "claude-opus-4-6": "deepseek-reasoner",
-    "gemini-2.5-flash": "deepseek-chat"
-  },
-  "compat": {
-    "wide_input_strict_output": true,
-    "strip_reference_markers": true
-  },
-  "responses": {
-    "store_ttl_seconds": 900
-  },
-  "embeddings": {
-    "provider": "deterministic"
-  },
-  "claude_mapping": {
-    "fast": "deepseek-chat",
-    "slow": "deepseek-reasoner"
-  },
-  "admin": {
-    "jwt_expire_hours": 24
-  },
-  "runtime": {
-    "account_max_inflight": 2,
-    "account_max_queue": 0,
-    "global_max_inflight": 0,
-    "token_refresh_interval_hours": 6
-  },
-  "auto_delete": {
-    "mode": "none"
-  }
-}
-```
+Common fields:
 
-- `keys`: API access keys; clients authenticate via `Authorization: Bearer <key>`
-- `accounts`: DeepSeek account list, supports `email` or `mobile` login
-- `token`: Even if set in `config.json`, it is cleared during load (DS2API does not read persisted tokens from config); runtime tokens are maintained/refreshed in memory only
-- `model_aliases`: Map common model names (GPT/Codex/Claude) to DeepSeek models
-- `compat.wide_input_strict_output`: Keep `true` (current default policy)
-- `compat.strip_reference_markers`: Keep `true`; it strips reference markers from visible output
-- `toolcall`: Legacy field; the current behavior is fixed to feature matching + high-confidence early emit, and any config value is ignored
-- `responses.store_ttl_seconds`: In-memory TTL for `/v1/responses/{id}`
-- `embeddings.provider`: Embeddings provider (`deterministic/mock/builtin` built-in)
-- `claude_mapping`: Maps `fast`/`slow` suffixes to corresponding DeepSeek models (still compatible with `claude_model_mapping`)
-- `admin`: Admin panel settings (JWT expiry, password hash, etc.), hot-reloadable via Admin Settings API
-- `runtime`: Runtime parameters (concurrency limits, queue sizes, managed token refresh interval), hot-reloadable via Admin Settings API; `account_max_queue=0`/`global_max_inflight=0` means auto-calculate from recommended values, `token_refresh_interval_hours=6` is the default forced re-login interval
-- `auto_delete.mode`: How to clean up DeepSeek remote chat records after each request completes. Supported values: `none` (default, no deletion), `single` (delete only the current session), `all` (delete all sessions); legacy `auto_delete.sessions=true` is still treated as `all`
+- `keys` / `api_keys`: client API keys; `api_keys` adds `name` and `remark` metadata while `keys` remains compatible.
+- `accounts`: managed DeepSeek accounts, supporting `email` or `mobile` login plus proxy/name/remark metadata.
+- `model_aliases`: one shared alias map for OpenAI / Claude / Gemini model names.
+- `runtime`: account concurrency, queueing, and token refresh behavior, hot-reloadable via Admin Settings.
+- `auto_delete.mode`: remote session cleanup after each request, supporting `none` / `single` / `all`.
+- `history_split`: multi-turn history split policy, now forced on globally; tune its trigger threshold to avoid inlining all long history into the prompt.
 
-### Environment Variables
-
-| Variable | Purpose | Default |
-| --- | --- | --- |
-| `PORT` | Service port | `5001` |
-| `LOG_LEVEL` | Log level | `INFO` (`DEBUG`/`WARN`/`ERROR`) |
-| `DS2API_ADMIN_KEY` | Admin login key | `admin` |
-| `DS2API_JWT_SECRET` | Admin JWT signing secret | Same as `DS2API_ADMIN_KEY` |
-| `DS2API_JWT_EXPIRE_HOURS` | Admin JWT TTL in hours | `24` |
-| `DS2API_CONFIG_PATH` | Config file path | `config.json` |
-| `DS2API_CONFIG_JSON` | Inline config (JSON or Base64) | — |
-| `DS2API_CHAT_HISTORY_PATH` | Server-side conversation history file path | `data/chat_history.json` |
-| `DS2API_ENV_WRITEBACK` | Auto-write env-backed config to file and transition to file mode (`1/true/yes/on`) | Disabled |
-| `DS2API_STATIC_ADMIN_DIR` | Admin static assets dir | `static/admin` |
-| `DS2API_AUTO_BUILD_WEBUI` | Auto-build WebUI on startup | Enabled locally, disabled on Vercel |
-| `DS2API_ACCOUNT_MAX_INFLIGHT` | Max in-flight requests per account | `2` |
-| `DS2API_ACCOUNT_MAX_QUEUE` | Waiting queue limit | `recommended_concurrency` |
-| `DS2API_GLOBAL_MAX_INFLIGHT` | Global max in-flight requests | `recommended_concurrency` |
-| `DS2API_VERCEL_INTERNAL_SECRET` | Vercel hybrid streaming internal auth | Falls back to `DS2API_ADMIN_KEY` |
-| `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | Stream lease TTL seconds | `900` |
-| `DS2API_DEV_PACKET_CAPTURE` | Local dev packet capture switch (record recent request/response bodies) | Enabled by default on non-Vercel local runtime |
-| `DS2API_DEV_PACKET_CAPTURE_LIMIT` | Number of captured sessions to retain (auto-evict overflow) | `20` |
-| `DS2API_DEV_PACKET_CAPTURE_MAX_BODY_BYTES` | Max recorded bytes per captured response body | `5242880` |
-| `VERCEL_TOKEN` | Vercel sync token | — |
-| `VERCEL_PROJECT_ID` | Vercel project ID | — |
-| `VERCEL_TEAM_ID` | Vercel team ID | — |
-| `DS2API_VERCEL_PROTECTION_BYPASS` | Vercel deployment protection bypass for internal Node→Go calls | — |
-
-> Note: when `DS2API_CONFIG_JSON` is detected, the Admin UI shows mode risk and auto-persistence status (including `DS2API_CONFIG_PATH` and mode-transition hints).
+For the full environment variable list, see [docs/DEPLOY.en.md](docs/DEPLOY.en.md). For auth behavior, see [API.en.md](API.en.md#authentication).
 
 ## Authentication Modes
 
@@ -393,7 +304,7 @@ Queue limit = DS2API_ACCOUNT_MAX_QUEUE (default = recommended concurrency)
 When `tools` is present in the request, DS2API performs anti-leak handling:
 
 1. Toolcall feature matching is enabled only in **non-code-block context** (fenced examples are ignored)
-2. The parser currently targets XML/Markup-family tool syntax (`<tool_call>` / `<function_call>` / `<invoke>` / `tool_use` / antml variants); standalone JSON `tool_calls` payloads are not treated as executable calls by default
+2. The parser now treats only the canonical XML wrapper as executable tool-calling syntax: `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`; legacy `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`, `<function_call>`, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text
 3. `responses` streaming strictly uses official item lifecycle events (`response.output_item.*`, `response.content_part.*`, `response.function_call_arguments.*`)
 4. `responses` supports and enforces `tool_choice` (`auto`/`none`/`required`/forced function); `required` violations return `422` for non-stream and `response.failed` for stream
 5. The output protocol follows the client request (OpenAI / Claude / Gemini native shapes); model-side prompting can prefer XML, and the compatibility layer handles the protocol-specific translation
@@ -442,28 +353,19 @@ The save endpoint can target a chain by `query`, `chain_key`, or `capture_id`. E
 
 ## Testing
 
-```bash
-# Unit tests (Go + Node)
-./tests/scripts/run-unit-all.sh
+For the full testing guide, see [docs/TESTING.md](docs/TESTING.md).
 
-# One-command live end-to-end tests (real accounts, full request/response logs)
-./tests/scripts/run-live.sh
-
-# Or with custom flags
-go run ./cmd/ds2api-tests \
-  --config config.json \
-  --admin-key admin \
-  --out artifacts/testsuite \
-  --timeout 120 \
-  --retries 2
-```
+Quick commands:
 
 ```bash
-# Release-blocking gates
-./tests/scripts/check-stage6-manual-smoke.sh
+# Local PR gates
+./scripts/lint.sh
 ./tests/scripts/check-refactor-line-gate.sh
 ./tests/scripts/run-unit-all.sh
-npm ci --prefix webui && npm run build --prefix webui
+npm run build --prefix webui
+
+# Live end-to-end tests (real accounts, full request/response logs)
+./tests/scripts/run-live.sh
 ```
 
 ## Release Artifact Automation (GitHub Actions)
diff --git a/VERSION b/VERSION
index 9575d51..fcdb2e1 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-3.6.1
+4.0.0
diff --git a/config.example.json b/config.example.json
index 0c13de4..f93a2c3 100644
--- a/config.example.json
+++ b/config.example.json
@@ -38,9 +38,10 @@
     }
   ],
   "model_aliases": {
-    "gpt-4o": "deepseek-chat",
-    "gpt-5-codex": "deepseek-reasoner",
-    "o3": "deepseek-reasoner"
+    "gpt-4o": "deepseek-v4-flash",
+    "gpt-5.5": "deepseek-v4-flash",
+    "gpt-5.3-codex": "deepseek-v4-pro",
+    "o3": "deepseek-v4-pro"
   },
   "compat": {
     "wide_input_strict_output": true,
@@ -56,10 +57,6 @@
   "embeddings": {
     "provider": "deterministic"
   },
-  "claude_mapping": {
-    "fast": "deepseek-chat",
-    "slow": "deepseek-reasoner"
-  },
   "admin": {
     "jwt_expire_hours": 24
   },
diff --git a/docs/ARCHITECTURE.en.md b/docs/ARCHITECTURE.en.md
index 81bb928..ed23416 100644
--- a/docs/ARCHITECTURE.en.md
+++ b/docs/ARCHITECTURE.en.md
@@ -4,9 +4,9 @@ Language: [中文](ARCHITECTURE.md) | [English](ARCHITECTURE.en.md)
 
 > This file is the single architecture source for directory layout, module boundaries, and execution flow.
 
-## 1. Top-level Layout (expanded)
+## 1. Top-level Layout (core directories)
 
-> Notes: this is the **fully expanded** project directory list (excluding metadata/dependency dirs such as `.git/` and `webui/node_modules/`), with each folder annotated by purpose.
+> Notes: this lists the main business directories (excluding metadata/dependency dirs such as `.git/` and `webui/node_modules/`), with each folder annotated by purpose. Newly added directories should be verified from the code tree rather than treated as a per-file inventory here.
 
 ```text
 ds2api/
@@ -21,34 +21,46 @@ ds2api/
 ├── docs/                                 # Project documentation
 ├── internal/                             # Core implementation (non-public packages)
 │   ├── account/                          # Account pool, inflight slots, waiting queue
-│   ├── adapter/                          # Multi-protocol adapters
-│   │   ├── claude/                       # Claude protocol adapter
-│   │   ├── gemini/                       # Gemini protocol adapter
-│   │   └── openai/                       # OpenAI adapter and shared execution core
-│   ├── admin/                            # Admin API (config/accounts/ops)
 │   ├── auth/                             # Auth/JWT/credential resolution
+│   ├── chathistory/                      # Server-side conversation history storage/query
 │   ├── claudeconv/                       # Claude message conversion helpers
 │   ├── compat/                           # Compatibility and regression helpers
 │   ├── config/                           # Config loading/validation/hot reload
-│   ├── deepseek/                         # DeepSeek upstream client capabilities
+│   ├── deepseek/                         # DeepSeek upstream client/protocol/transport
+│   │   ├── client/                       # Login/session/completion/upload/delete calls
+│   │   ├── protocol/                     # DeepSeek URLs, constants, skip path/pattern
 │   │   └── transport/                    # DeepSeek transport details
 │   ├── devcapture/                       # Dev capture and troubleshooting
 │   ├── format/                           # Response formatting layer
 │   │   ├── claude/                       # Claude output formatting
 │   │   └── openai/                       # OpenAI output formatting
+│   ├── httpapi/                          # HTTP surfaces: OpenAI/Claude/Gemini/Admin
+│   │   ├── admin/                        # Admin API root assembly and resource packages
+│   │   ├── claude/                       # Claude HTTP protocol adapter
+│   │   ├── gemini/                       # Gemini HTTP protocol adapter
+│   │   └── openai/                       # OpenAI HTTP surface
+│   │       ├── chat/                     # Chat Completions execution entrypoint
+│   │       ├── responses/                # Responses API and response store
+│   │       ├── files/                    # Files API and inline-file preprocessing
+│   │       ├── embeddings/               # Embeddings API
+│   │       ├── history/                  # OpenAI history split
+│   │       └── shared/                   # OpenAI HTTP errors/models/tool formatting
 │   ├── js/                               # Node runtime related logic
 │   │   ├── chat-stream/                  # Node streaming bridge
 │   │   ├── helpers/                      # JS helper modules
 │   │   │   └── stream-tool-sieve/        # JS implementation of tool sieve
 │   │   └── shared/                       # Shared semantics between Go/Node
 │   ├── prompt/                           # Prompt composition
+│   ├── promptcompat/                     # API request -> DeepSeek web-chat plain-text compatibility
 │   ├── rawsample/                        # Raw sample read/write and management
 │   ├── server/                           # Router and middleware assembly
+│   │   └── data/                         # Router/runtime helper data
 │   ├── sse/                              # SSE parsing utilities
 │   ├── stream/                           # Unified stream consumption engine
 │   ├── testsuite/                        # Testsuite execution framework
 │   ├── textclean/                        # Text cleanup
 │   ├── toolcall/                         # Tool-call parsing and repair
+│   ├── toolstream/                       # Go streaming tool-call anti-leak and delta detection
 │   ├── translatorcliproxy/               # Cross-protocol translation bridge
 │   ├── util/                             # Shared utility helpers
 │   ├── version/                          # Version query/compare
@@ -91,33 +103,35 @@ ds2api/
 ```mermaid
 flowchart LR
     C[Client/SDK] --> R[internal/server/router.go]
-    R --> OA[OpenAI Adapter]
-    R --> CA[Claude Adapter]
-    R --> GA[Gemini Adapter]
-    R --> AD[Admin API]
+    R --> OA[OpenAI HTTP API]
+    R --> CA[Claude HTTP API]
+    R --> GA[Gemini HTTP API]
+    R --> AD[Admin HTTP API]
 
     CA --> BR[translatorcliproxy]
     GA --> BR
-    BR --> CORE[internal/adapter/openai ChatCompletions]
+    BR --> CORE[internal/httpapi/openai/chat ChatCompletions]
     OA --> CORE
 
     CORE --> AUTH[internal/auth + config key/account resolver]
     CORE --> POOL[internal/account queue + concurrency]
-    CORE --> TOOL[internal/toolcall parser + sieve]
-    CORE --> DS[internal/deepseek client]
+    CORE --> TOOL[internal/toolcall parser + internal/toolstream sieve]
+    CORE --> DS[internal/deepseek/client]
     DS --> U[DeepSeek upstream]
 ```
 
 ## 3. Responsibilities in `internal/`
 
 - `internal/server`: router tree + middlewares (health, protocol routes, Admin/WebUI).
-- `internal/adapter/openai`: shared execution core (chat/responses/embeddings + tool semantics).
-- `internal/adapter/{claude,gemini}`: protocol wrappers only (no duplicated upstream execution).
+- `internal/httpapi/openai/*`: OpenAI HTTP surface split into chat, responses, files, embeddings, history, and shared packages.
+- `internal/httpapi/{claude,gemini}`: protocol wrappers only (no duplicated upstream execution).
+- `internal/promptcompat`: compatibility core for turning OpenAI/Claude/Gemini requests into DeepSeek web-chat plain-text context.
 - `internal/translatorcliproxy`: structure translation between Claude/Gemini and OpenAI.
-- `internal/deepseek`: upstream request/session/PoW/SSE handling.
+- `internal/deepseek/{client,protocol,transport}`: upstream requests, sessions, PoW adaptation, protocol constants, and transport details.
 - `internal/stream` + `internal/sse`: stream parsing and incremental assembly.
-- `internal/toolcall`: XML/Markup-family tool-call parsing + anti-leak sieve (`<tool_call>` / `<function_call>` / `<invoke>` / `tool_use` / antml variants).
-- `internal/admin`: config/accounts/vercel sync/version/dev-capture endpoints.
+- `internal/toolcall` + `internal/toolstream`: canonical XML tool-call parsing + anti-leak sieve (the only executable format is `<tool_calls>` / `<invoke name="...">` / `<parameter name="...">`).
+- `internal/httpapi/admin/*`: Admin API root assembly plus auth/accounts/config/settings/proxies/rawsamples/vercel/history/devcapture/version resource packages.
+- `internal/chathistory`: server-side conversation history persistence, pagination, detail lookup, and retention policy.
 - `internal/config`: config loading/validation + runtime settings hot-reload.
 - `internal/account`: managed account pool, inflight slots, waiting queue.
 
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index b439127..24ea5c3 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -4,9 +4,9 @@
 
 > 本文档用于集中维护“代码目录结构 + 模块边界 + 主链路调用关系”。
 
-## 1. 顶层目录结构（展开）
+## 1. 顶层目录结构（核心目录）
 
-> 说明：以下为仓库内业务相关目录的**完整展开**（排除 `.git/` 与 `webui/node_modules/` 这类依赖/元数据目录），并标注每个文件夹作用。
+> 说明：以下为仓库内主要业务目录（排除 `.git/` 与 `webui/node_modules/` 这类依赖/元数据目录），并标注每个文件夹作用。新增目录以代码为准，不要求在本文做逐文件展开。
 
 ```text
 ds2api/
@@ -21,34 +21,46 @@ ds2api/
 ├── docs/                                 # 项目文档目录
 ├── internal/                             # 核心业务实现（不对外暴露）
 │   ├── account/                          # 账号池、并发槽位、等待队列
-│   ├── adapter/                          # 多协议适配层
-│   │   ├── claude/                       # Claude 协议适配
-│   │   ├── gemini/                       # Gemini 协议适配
-│   │   └── openai/                       # OpenAI 协议与统一执行核心
-│   ├── admin/                            # Admin API（配置/账号/运维）
 │   ├── auth/                             # 鉴权/JWT/凭证解析
+│   ├── chathistory/                      # 服务器端对话记录存储与查询
 │   ├── claudeconv/                       # Claude 消息格式转换工具
 │   ├── compat/                           # 兼容性辅助与回归支持
 │   ├── config/                           # 配置加载、校验、热更新
-│   ├── deepseek/                         # DeepSeek 上游客户端能力
+│   ├── deepseek/                         # DeepSeek 上游 client/protocol/transport
+│   │   ├── client/                       # 登录、会话、completion、上传/删除等上游调用
+│   │   ├── protocol/                     # DeepSeek URL、常量、skip path/pattern
 │   │   └── transport/                    # DeepSeek 传输层细节
 │   ├── devcapture/                       # 开发抓包与调试采集
 │   ├── format/                           # 响应格式化层
 │   │   ├── claude/                       # Claude 输出格式化
 │   │   └── openai/                       # OpenAI 输出格式化
+│   ├── httpapi/                          # HTTP surface：OpenAI/Claude/Gemini/Admin
+│   │   ├── admin/                        # Admin API 根装配与资源子包
+│   │   ├── claude/                       # Claude HTTP 协议适配
+│   │   ├── gemini/                       # Gemini HTTP 协议适配
+│   │   └── openai/                       # OpenAI HTTP surface
+│   │       ├── chat/                     # Chat Completions 执行入口
+│   │       ├── responses/                # Responses API 与 response store
+│   │       ├── files/                    # Files API 与 inline file 预处理
+│   │       ├── embeddings/               # Embeddings API
+│   │       ├── history/                  # OpenAI history split
+│   │       └── shared/                   # OpenAI HTTP 公共错误/模型/工具格式
 │   ├── js/                               # Node Runtime 相关逻辑
 │   │   ├── chat-stream/                  # Node 流式输出桥接
 │   │   ├── helpers/                      # JS 辅助函数
 │   │   │   └── stream-tool-sieve/        # Tool sieve JS 实现
 │   │   └── shared/                       # Go/Node 共用语义片段
 │   ├── prompt/                           # Prompt 组装
+│   ├── promptcompat/                     # API 请求到 DeepSeek 网页纯文本上下文兼容层
 │   ├── rawsample/                        # raw sample 读写与管理
 │   ├── server/                           # 路由与中间件装配
+│   │   └── data/                         # 路由/运行时辅助数据
 │   ├── sse/                              # SSE 解析工具
 │   ├── stream/                           # 统一流式消费引擎
 │   ├── testsuite/                        # 测试集执行框架
 │   ├── textclean/                        # 文本清洗
 │   ├── toolcall/                         # 工具调用解析与修复
+│   ├── toolstream/                       # Go 流式 tool call 防泄漏与增量检测
 │   ├── translatorcliproxy/               # 多协议互转桥
 │   ├── util/                             # 通用工具函数
 │   ├── version/                          # 版本查询/比较
@@ -91,33 +103,35 @@ ds2api/
 ```mermaid
 flowchart LR
     C[Client/SDK] --> R[internal/server/router.go]
-    R --> OA[OpenAI Adapter]
-    R --> CA[Claude Adapter]
-    R --> GA[Gemini Adapter]
-    R --> AD[Admin API]
+    R --> OA[OpenAI HTTP API]
+    R --> CA[Claude HTTP API]
+    R --> GA[Gemini HTTP API]
+    R --> AD[Admin HTTP API]
 
     CA --> BR[translatorcliproxy]
     GA --> BR
-    BR --> CORE[internal/adapter/openai ChatCompletions]
+    BR --> CORE[internal/httpapi/openai/chat ChatCompletions]
     OA --> CORE
 
     CORE --> AUTH[internal/auth + config key/account resolver]
     CORE --> POOL[internal/account queue + concurrency]
-    CORE --> TOOL[internal/toolcall parser + sieve]
-    CORE --> DS[internal/deepseek client]
+    CORE --> TOOL[internal/toolcall parser + internal/toolstream sieve]
+    CORE --> DS[internal/deepseek/client]
     DS --> U[DeepSeek upstream]
 ```
 
 ## 3. internal/ 子模块职责
 
 - `internal/server`：路由树和中间件挂载（健康检查、协议入口、Admin/WebUI）。
-- `internal/adapter/openai`：统一执行内核（chat/responses/embeddings 与 tool calling 语义）。
-- `internal/adapter/{claude,gemini}`：协议输入输出适配，不重复实现上游调用逻辑。
+- `internal/httpapi/openai/*`：OpenAI HTTP surface，按 chat、responses、files、embeddings、history、shared 拆分。
+- `internal/httpapi/{claude,gemini}`：协议输入输出适配，不重复实现上游调用逻辑。
+- `internal/promptcompat`：OpenAI/Claude/Gemini 请求到 DeepSeek 网页纯文本上下文的兼容内核。
 - `internal/translatorcliproxy`：Claude/Gemini 与 OpenAI 结构互转。
-- `internal/deepseek`：上游请求、会话、PoW、SSE 消费。
+- `internal/deepseek/{client,protocol,transport}`：上游请求、会话、PoW 适配、协议常量与传输层。
 - `internal/stream` + `internal/sse`：流式解析与增量处理。
-- `internal/toolcall`：以 XML/Markup 家族为核心的工具调用解析与防泄漏筛分（`<tool_call>` / `<function_call>` / `<invoke>` / `tool_use` / antml 变体）。
-- `internal/admin`：配置管理、账号管理、Vercel 同步、版本检查、开发抓包。
+- `internal/toolcall` + `internal/toolstream`：canonical XML 工具调用解析与防泄漏筛分（唯一可执行格式：`<tool_calls>` / `<invoke name="...">` / `<parameter name="...">`）。
+- `internal/httpapi/admin/*`：Admin API 根装配与 auth/accounts/config/settings/proxies/rawsamples/vercel/history/devcapture/version 等资源子包。
+- `internal/chathistory`：服务器端对话记录持久化、分页、单条详情和保留策略。
 - `internal/config`：配置加载、校验、运行时 settings 热更新。
 - `internal/account`：托管账号池、并发槽位、等待队列。
 
diff --git a/docs/CONTRIBUTING.en.md b/docs/CONTRIBUTING.en.md
index aa9dd19..8dd9a40 100644
--- a/docs/CONTRIBUTING.en.md
+++ b/docs/CONTRIBUTING.en.md
@@ -59,10 +59,12 @@ docker-compose -f docker-compose.dev.yml up
 
 | Language | Standards |
 | --- | --- |
-| **Go** | Run `./scripts/lint.sh` (gofmt + golangci-lint) and ensure `go test ./...` passes before committing |
+| **Go** | Run `gofmt -w` after editing Go files; before committing, run `./scripts/lint.sh` (format check + golangci-lint) |
 | **JavaScript/React** | Follow existing project style (functional components) |
 | **Commit messages** | Use semantic prefixes: `feat:`, `fix:`, `docs:`, `refactor:`, `style:`, `perf:`, `chore:` |
 
+Do not silently ignore cleanup errors from I/O-style calls such as `Close`, `Flush`, or `Sync`; return them when possible, otherwise log them explicitly.
+
 ## Submitting a PR
 
 1. Fork the repo
@@ -85,10 +87,13 @@ Manually build WebUI to `static/admin/`:
 ## Running Tests
 
 ```bash
-# Go + Node unit tests (recommended)
+# Local PR gates (kept aligned with the quality-gates workflow)
+./scripts/lint.sh
+./tests/scripts/check-refactor-line-gate.sh
 ./tests/scripts/run-unit-all.sh
+npm run build --prefix webui
 
-# End-to-end live tests (real accounts)
+# End-to-end live tests (real accounts; recommended for releases or high-risk changes)
 ./tests/scripts/run-live.sh
 ```
 
diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md
index 6fae956..0a9187d 100644
--- a/docs/CONTRIBUTING.md
+++ b/docs/CONTRIBUTING.md
@@ -59,10 +59,12 @@ docker-compose -f docker-compose.dev.yml up
 
 | 语言 | 规范 |
 | --- | --- |
-| **Go** | 提交前运行 `./scripts/lint.sh`（包含 gofmt+golangci-lint）并确保 `go test ./...` 通过 |
+| **Go** | 修改 Go 文件后运行 `gofmt -w`；提交前运行 `./scripts/lint.sh`（包含格式化检查和 golangci-lint） |
 | **JavaScript/React** | 保持现有代码风格（函数组件） |
 | **提交信息** | 使用语义化前缀：`feat:`、`fix:`、`docs:`、`refactor:`、`style:`、`perf:`、`chore:` |
 
+I/O 类清理调用（如 `Close`、`Flush`、`Sync`）的错误不要直接忽略；无法向上返回时请显式记录日志。
+
 ## 提交 PR
 
 1. Fork 仓库
@@ -85,10 +87,13 @@ docker-compose -f docker-compose.dev.yml up
 ## 运行测试
 
 ```bash
-# Go + Node 单元测试（推荐）
+# PR 本地门禁（与 quality-gates 工作流保持一致）
+./scripts/lint.sh
+./tests/scripts/check-refactor-line-gate.sh
 ./tests/scripts/run-unit-all.sh
+npm run build --prefix webui
 
-# 端到端全链路测试（真实账号）
+# 端到端全链路测试（真实账号，发布或高风险改动时建议执行）
 ./tests/scripts/run-live.sh
 ```
 
diff --git a/docs/DEPLOY.en.md b/docs/DEPLOY.en.md
index 2bd6bbd..de52b4c 100644
--- a/docs/DEPLOY.en.md
+++ b/docs/DEPLOY.en.md
@@ -259,12 +259,13 @@ VERCEL_TEAM_ID=team_xxxxxxxxxxxx   # optional for personal accounts
 | `DS2API_ENV_WRITEBACK` | When `DS2API_CONFIG_JSON` is present, auto-write to `DS2API_CONFIG_PATH` and switch to file-backed mode after success (`1/true/yes/on`) | Disabled |
 | `DS2API_VERCEL_INTERNAL_SECRET` | Hybrid streaming internal auth | Falls back to `DS2API_ADMIN_KEY` |
 | `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | Stream lease TTL | `900` |
+| `DS2API_RAW_STREAM_SAMPLE_ROOT` | Raw stream sample root for saving/reading samples | `tests/raw_stream_samples` |
 | `VERCEL_TOKEN` | Vercel sync token | — |
 | `VERCEL_PROJECT_ID` | Vercel project ID | — |
 | `VERCEL_TEAM_ID` | Vercel team ID | — |
 | `DS2API_VERCEL_PROTECTION_BYPASS` | Deployment protection bypass for internal Node→Go calls | — |
 
-### 3.3 Vercel Architecture
+### 3.4 Vercel Architecture
 
 ```text
 Request ──────┐
@@ -300,13 +301,14 @@ Vercel Go Runtime applies platform-level response buffering, so this project use
 
 - `api/chat-stream.js` falls back to Go entry (`?__go=1`) for non-stream requests only
 - Streaming requests (including requests with `tools`) stay on the Node path and use Go-aligned tool-call anti-leak handling
+- The Node stream path also mirrors Go finalization semantics: empty visible output returns the same shaped error SSE, and empty `content_filter` returns a `content_filter` error
 - WebUI non-stream test calls `?__go=1` directly to avoid Node hop timeout on long requests
 
 #### Function Duration
 
 `vercel.json` sets `maxDuration: 300` for both `api/chat-stream.js` and `api/index.go` (subject to your Vercel plan limits).
 
-### 3.4 Vercel Troubleshooting
+### 3.5 Vercel Troubleshooting
 
 #### Go Build Failure
 
@@ -350,7 +352,7 @@ If API responses return Vercel HTML `Authentication Required`:
 - **Option B**: Add `x-vercel-protection-bypass` header to requests
 - **Option C**: Set `VERCEL_AUTOMATION_BYPASS_SECRET` (or `DS2API_VERCEL_PROTECTION_BYPASS`) for internal Node→Go calls
 
-### 3.5 Build Artifacts Not Committed
+### 3.6 Build Artifacts Not Committed
 
 - `static/admin` directory is not in Git
 - Vercel / Docker automatically generate WebUI assets during build
@@ -546,7 +548,7 @@ curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:5001/admin
 curl http://127.0.0.1:5001/v1/chat/completions \
   -H "Authorization: Bearer your-api-key" \
   -H "Content-Type: application/json" \
-  -d '{"model":"deepseek-chat","messages":[{"role":"user","content":"hello"}]}'
+  -d '{"model":"deepseek-v4-flash","messages":[{"role":"user","content":"hello"}]}'
 ```
 
 ---
@@ -577,4 +579,4 @@ The testsuite automatically performs:
 - ✅ Live scenario verification (OpenAI/Claude/Admin/concurrency/toolcall/streaming)
 - ✅ Full request/response artifact logging for debugging
 
-For detailed testsuite documentation, see [TESTING.md](TESTING.md).
+For detailed testsuite documentation, see [TESTING.md](TESTING.md). The fixed local PR gates are listed in [TESTING.md](TESTING.md#pr-门禁--pr-gates).
diff --git a/docs/DEPLOY.md b/docs/DEPLOY.md
index f0e0068..7509cb3 100644
--- a/docs/DEPLOY.md
+++ b/docs/DEPLOY.md
@@ -258,7 +258,8 @@ VERCEL_TEAM_ID=team_xxxxxxxxxxxx   # 个人账号可留空
 | `DS2API_GLOBAL_MAX_INFLIGHT` | 全局并发上限 | `recommended_concurrency` |
 | `DS2API_ENV_WRITEBACK` | 检测到 `DS2API_CONFIG_JSON` 时自动写入 `DS2API_CONFIG_PATH`，并在成功后转为文件模式（`1/true/yes/on`） | 关闭 |
 | `DS2API_VERCEL_INTERNAL_SECRET` | 混合流式内部鉴权 | 回退用 `DS2API_ADMIN_KEY` |
-| `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | 流式 lease TTL | 默认与 `responses.store_ttl_seconds` 同步，若未设置则为 `900` |
+| `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | 流式 lease TTL | `900` |
+| `DS2API_RAW_STREAM_SAMPLE_ROOT` | raw stream 样本保存/读取根目录 | `tests/raw_stream_samples` |
 | `VERCEL_TOKEN` | Vercel 同步 token | — |
 | `VERCEL_PROJECT_ID` | Vercel 项目 ID | — |
 | `VERCEL_TEAM_ID` | Vercel 团队 ID | — |
@@ -274,7 +275,7 @@ VERCEL_TEAM_ID=team_xxxxxxxxxxxx   # 个人账号可留空
 
 详细说明参见 [API.md](../API.md#admin-接口) 中 `/admin/settings` 部分。
 
-### 3.3 Vercel 架构说明
+### 3.4 Vercel 架构说明
 
 ```text
 请求 ─────┐
@@ -310,13 +311,14 @@ api/index.go  api/chat-stream.js
 
 - `api/chat-stream.js` 仅对非流式请求回退到 Go 入口（`?__go=1`）
 - 流式请求（包括带 `tools`）走 Node 路径，并执行与 Go 对齐的 tool-call 防泄漏处理
+- Node 流式路径同时对齐 Go 的终结态语义：空可见输出会返回同形状错误 SSE，空 `content_filter` 会返回 `content_filter` 错误
 - WebUI 的"非流式测试"直接请求 `?__go=1`，避免 Node 中转造成长请求超时
 
 #### 函数时长
 
 `vercel.json` 已将 `api/chat-stream.js` 与 `api/index.go` 的 `maxDuration` 设为 `300`（受 Vercel 套餐上限约束）。
 
-### 3.4 Vercel 常见报错排查
+### 3.5 Vercel 常见报错排查
 
 #### Go 构建失败
 
@@ -360,7 +362,7 @@ No Output Directory named "public" found after the Build completed.
 - **方案 B**：请求中添加 `x-vercel-protection-bypass` 头
 - **方案 C**：设置 `VERCEL_AUTOMATION_BYPASS_SECRET`（或 `DS2API_VERCEL_PROTECTION_BYPASS`），仅影响内部 Node→Go 调用
 
-### 3.5 仓库不提交构建产物
+### 3.6 仓库不提交构建产物
 
 - `static/admin` 目录不在 Git 中
 - Vercel / Docker 构建阶段自动生成 WebUI 静态文件
@@ -556,7 +558,7 @@ curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:5001/admin
 curl http://127.0.0.1:5001/v1/chat/completions \
   -H "Authorization: Bearer your-api-key" \
   -H "Content-Type: application/json" \
-  -d '{"model":"deepseek-chat","messages":[{"role":"user","content":"hello"}]}'
+  -d '{"model":"deepseek-v4-flash","messages":[{"role":"user","content":"hello"}]}'
 ```
 
 ---
@@ -587,4 +589,4 @@ go run ./cmd/ds2api-tests \
 - ✅ 真实调用场景验证（OpenAI/Claude/Admin/并发/toolcall/流式）
 - ✅ 全量请求与响应日志落盘（用于故障复盘）
 
-详细测试集说明参阅 [TESTING.md](TESTING.md)。
+详细测试集说明参阅 [TESTING.md](TESTING.md)。PR 前的固定本地门禁以 [TESTING.md](TESTING.md#pr-门禁--pr-gates) 为准。
diff --git a/docs/README.md b/docs/README.md
index f8b5d8d..a80093c 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -15,14 +15,17 @@
 
 ### 专题文档
 
+- [API -> 网页对话纯文本兼容主链路说明](./prompt-compatibility.md)
 - [Tool Calling 统一语义](./toolcall-semantics.md)
 - [DeepSeek SSE 行为结构说明（逆向观察）](./DeepSeekSSE行为结构说明-2026-04-05.md)
 
 ### 文档维护约定
 
+- 文档更新必须以实际代码实现为依据：总路由装配看 `internal/server/router.go`，协议/resource 路由看 `internal/httpapi/*/**/routes.go` 与 `internal/httpapi/admin/handler.go`，配置默认值看 `internal/config/*`，模型/alias 看 `internal/config/models.go`，prompt 兼容链路看 `docs/prompt-compatibility.md` 列出的代码入口。
 - `README.MD` / `README.en.md`：面向首次接触用户，保留“是什么 + 怎么快速跑起来”。
 - `docs/ARCHITECTURE*.md`：面向开发者，集中维护项目结构、模块职责与调用链。
 - `API*.md`：面向客户端接入者，聚焦接口行为、鉴权和示例。
+- `docs/prompt-compatibility.md`：面向维护者，集中维护“API -> 网页对话纯文本上下文”的统一兼容语义；相关行为修改时必须同步更新。
 - 其他 `docs/*.md`：主题化说明，避免在多个文档重复粘贴同一段内容。
 
 ---
@@ -42,12 +45,15 @@ Recommended reading order:
 
 ### Topical docs
 
+- [API -> pure-text web-chat compatibility pipeline](./prompt-compatibility.md)
 - [Tool-calling unified semantics](./toolcall-semantics.md)
 - [DeepSeek SSE behavior notes (reverse-engineered)](./DeepSeekSSE行为结构说明-2026-04-05.md)
 
 ### Maintenance conventions
 
+- Documentation updates must be grounded in the actual implementation: root routing lives in `internal/server/router.go`, protocol/resource routes live in `internal/httpapi/*/**/routes.go` and `internal/httpapi/admin/handler.go`, config defaults in `internal/config/*`, models/aliases in `internal/config/models.go`, and the prompt compatibility pipeline in the code entrypoints listed by `docs/prompt-compatibility.md`.
 - `README.MD` / `README.en.md`: onboarding-oriented (“what + quick start”).
 - `docs/ARCHITECTURE*.md`: developer-oriented source of truth for module boundaries and execution flow.
 - `API*.md`: integration-oriented behavior/contracts.
+- `docs/prompt-compatibility.md`: maintainer-oriented source of truth for the “API -> pure-text web-chat context” compatibility flow; update it whenever related behavior changes.
 - Other `docs/*.md`: focused topics, avoid copy-pasting the same section into multiple files.
diff --git a/docs/TESTING.md b/docs/TESTING.md
index fd4e8f3..40c3501 100644
--- a/docs/TESTING.md
+++ b/docs/TESTING.md
@@ -20,6 +20,25 @@ Node 单元测试脚本会先做 `node --check` 语法门禁，再以 `--test-co
 
 ---
 
+## PR 门禁 | PR Gates
+
+打开或更新 PR 前，按 `.github/workflows/quality-gates.yml` 的同等本地门禁执行：
+
+```bash
+./scripts/lint.sh
+./tests/scripts/check-refactor-line-gate.sh
+./tests/scripts/run-unit-all.sh
+npm run build --prefix webui
+```
+
+说明：
+
+- `./scripts/lint.sh` 会运行 Go 格式化检查和 `golangci-lint`；修改 Go 文件后仍建议先执行 `gofmt -w <files>`。
+- `run-unit-all.sh` 串行调用 Go 与 Node 单元测试入口。
+- `run-live.sh` 是真实账号端到端测试，适合作为发布或高风险改动后的补充验证，不属于每次 PR 的固定本地门禁。
+
+---
+
 ## 快速开始 | Quick Start
 
 ### 单元测试 | Unit Tests
@@ -39,7 +58,7 @@ Node 单元测试脚本会先做 `node --check` 语法门禁，再以 `--test-co
 ./tests/scripts/check-refactor-line-gate.sh
 ./tests/scripts/check-node-split-syntax.sh
 
-# 发布阻断：阶段 6 手工烟测签字检查（默认读取 plans/stage6-manual-smoke.md）
+# 历史阶段门禁：阶段 6 手工烟测签字检查（默认读取 plans/stage6-manual-smoke.md）
 ./tests/scripts/check-stage6-manual-smoke.sh
 ```
 
@@ -190,8 +209,8 @@ go test -v -run TestParseToolCallsWithDeepSeekHallucination ./internal/toolcall/
 # 运行 format 相关测试
 go test -v ./internal/format/...
 
-# 运行 adapter 相关测试
-go test -v ./internal/adapter/openai/...
+# 运行 HTTP API 相关测试
+go test -v ./internal/httpapi/openai/...
 ```
 
 ### 调试 Tool Call 问题 | Debugging Tool Call Issues
diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md
new file mode 100644
index 0000000..a84dd0f
--- /dev/null
+++ b/docs/prompt-compatibility.md
@@ -0,0 +1,400 @@
+# API -> 网页对话纯文本兼容主链路说明
+
+文档导航：[总览](../README.MD) / [架构说明](./ARCHITECTURE.md) / [接口文档](../API.md) / [测试指南](./TESTING.md)
+
+> 本文档是 DS2API“把 OpenAI / Claude / Gemini 风格 API 请求兼容成 DeepSeek 网页对话纯文本上下文”的专项说明。
+> 这是项目最重要的兼容产物之一。凡是修改消息标准化、tool prompt 注入、tool history 保留、文件引用、history split、下游 completion payload 组装等行为，都必须同步更新本文档。
+
+## 1. 核心结论
+
+DS2API 当前的核心思路，不是把客户端传来的 `messages`、`tools`、`attachments` 原样转发给下游。
+
+而是把这些高层 API 语义，统一压缩成 DeepSeek 网页对话更容易理解的三类输入：
+
+1. `prompt`
+   一个单字符串，里面带有角色标记、system 指令、历史消息、assistant reasoning 标签、历史 tool call XML 等。
+2. `ref_file_ids`
+   一个文件引用数组，承载附件、inline 上传文件，以及必要时被拆出去的历史文件。
+3. 控制位
+   例如 `thinking_enabled`、`search_enabled`、部分 passthrough 参数。
+
+也就是说，项目最重要的兼容动作，是把“结构化 API 会话”翻译成“网页对话纯文本上下文 + 文件引用”。
+
+## 2. 为什么这是核心产物
+
+因为对下游来说，真正稳定的输入面不是 OpenAI/Claude/Gemini 的原生 schema，而是：
+
+- 一段连续的对话 prompt
+- 一组可引用文件
+- 少量开关位
+
+这也是为什么很多表面上看像“协议兼容”的代码，最终都会收敛到同一类逻辑：
+
+- 先把不同协议的消息统一成内部消息序列
+- 再把工具声明改写成 system prompt 文本
+- 再把历史 tool call / tool result 改写成 prompt 可见内容
+- 最后输出成 DeepSeek completion payload
+
+## 3. 统一心智模型
+
+当前主链路可以这样理解：
+
+```text
+客户端请求
+  -> 协议适配层（OpenAI / Claude / Gemini）
+  -> 统一消息标准化
+  -> tool prompt 注入
+  -> DeepSeek 风格 prompt 拼装
+  -> 文件收集 / inline 上传 / history split（OpenAI 链路）
+  -> completion payload
+  -> 下游网页对话接口
+```
+
+对应的关键代码入口：
+
+- OpenAI Chat / Responses：
+  [internal/promptcompat/request_normalize.go](../internal/promptcompat/request_normalize.go)
+- OpenAI prompt 组装：
+  [internal/promptcompat/prompt_build.go](../internal/promptcompat/prompt_build.go)
+- OpenAI 消息标准化：
+  [internal/promptcompat/message_normalize.go](../internal/promptcompat/message_normalize.go)
+- Claude 标准化：
+  [internal/httpapi/claude/standard_request.go](../internal/httpapi/claude/standard_request.go)
+- Claude 消息与 tool_use/tool_result 归一：
+  [internal/httpapi/claude/handler_utils.go](../internal/httpapi/claude/handler_utils.go)
+- Gemini 复用 OpenAI prompt builder：
+  [internal/httpapi/gemini/convert_request.go](../internal/httpapi/gemini/convert_request.go)
+- DeepSeek prompt 角色标记拼装：
+  [internal/prompt/messages.go](../internal/prompt/messages.go)
+- prompt 可见 tool history XML：
+  [internal/prompt/tool_calls.go](../internal/prompt/tool_calls.go)
+- completion payload：
+  [internal/promptcompat/standard_request.go](../internal/promptcompat/standard_request.go)
+
+## 4. 下游真正收到的东西
+
+在“完成标准化后”，下游 completion payload 的核心形态是：
+
+```json
+{
+  "chat_session_id": "session-id",
+  "model_type": "default",
+  "parent_message_id": null,
+  "prompt": "<｜begin▁of▁sentence｜>...",
+  "ref_file_ids": [
+    "file-history",
+    "file-systemprompt",
+    "file-other-attachment"
+  ],
+  "thinking_enabled": true,
+  "search_enabled": false
+}
+```
+
+重点是：
+
+- `prompt` 才是对话上下文主载体。
+- `ref_file_ids` 只承载文件引用，不承载普通文本消息。
+- `tools` 不会作为“原生工具 schema”直接下发给下游，而是被改写进 `prompt`。
+- OpenAI Chat / Responses 原生走统一 OpenAI 标准化与 DeepSeek payload 组装；Claude / Gemini 会尽量复用 OpenAI prompt/tool 语义，其中 Gemini 直接复用 `promptcompat.BuildOpenAIPromptForAdapter`，Claude 消息接口在可代理场景会转换为 OpenAI chat 形态再执行。
+- 客户端传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`。Claude surface 没有 `thinking` 字段时按 Anthropic 语义视为关闭；Gemini `generationConfig.thinkingConfig.thinkingBudget` 会翻译成同一套 thinking 开关；关闭时即使上游返回 `response/thinking_content`，兼容层也不会把它当作可见正文输出。
+
+## 5. prompt 是怎么拼出来的
+
+### 5.1 角色标记
+
+最终 prompt 使用 DeepSeek 风格角色标记：
+
+- `<｜begin▁of▁sentence｜>`
+- `<｜System｜>`
+- `<｜User｜>`
+- `<｜Assistant｜>`
+- `<｜Tool｜>`
+- `<｜end▁of▁instructions｜>`
+- `<｜end▁of▁sentence｜>`
+- `<｜end▁of▁toolresults｜>`
+
+实现位置：
+[internal/prompt/messages.go](../internal/prompt/messages.go)
+
+### 5.2 thinking continuity 说明
+
+如果启用了 thinking，会在最前面额外插入一个 system block，提醒模型：
+
+- 继续既有会话，不要重开
+- earlier messages 是 binding context
+- 不要把最终回答只留在 reasoning 里
+
+这部分不是客户端原始消息，而是兼容层主动补进去的连续性契约。
+
+### 5.3 相邻同角色消息会合并
+
+在最终 `MessagesPrepareWithThinking` 中，相邻同 role 的消息会被合并成一个块，中间插入空行。
+
+这意味着：
+
+- prompt 中看到的是“合并后的 role block”
+- 不是客户端传来的逐条 message 原样排列
+
+## 6. tools 为什么是“文本注入”，不是原生下发
+
+当前项目把工具能力视为“prompt 约束的一部分”。
+
+具体做法：
+
+1. 把每个 tool 的名称、描述、参数 schema 序列化成文本。
+2. 拼成 `You have access to these tools:` 大段说明。
+3. 再附上统一的 XML tool call 格式约束。
+4. 把这整段内容并入 system prompt。
+
+工具调用正例仍只示范 canonical XML：`<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`。
+正例中的工具名只会来自当前请求实际声明的工具；如果当前请求没有足够的已知工具形态，就省略对应的单工具、多工具或嵌套示例，避免把不可用工具名写进 prompt。
+对执行类工具，脚本内容必须进入执行参数本身：`Bash` / `execute_command` 使用 `command`，`exec_command` 使用 `cmd`；不要把脚本示范成 `path` / `content` 文件写入参数。
+
+OpenAI 路径实现：
+[internal/promptcompat/tool_prompt.go](../internal/promptcompat/tool_prompt.go)
+
+Claude 路径实现：
+[internal/httpapi/claude/handler_utils.go](../internal/httpapi/claude/handler_utils.go)
+
+统一工具调用格式模板：
+[internal/toolcall/tool_prompt.go](../internal/toolcall/tool_prompt.go)
+
+这也是项目“网页对话纯文本兼容”的关键设计：
+
+- tools 对下游来说，本质上是 prompt 内规则
+- 不是 native tool schema transport
+
+## 7. assistant 的 tool_calls / reasoning 如何保留
+
+### 7.1 reasoning 保留方式
+
+assistant 的 reasoning 会变成一个显式标签块：
+
+```text
+[reasoning_content]
+...
+[/reasoning_content]
+```
+
+然后再接可见回答正文。
+
+### 7.2 历史 tool_calls 保留方式
+
+assistant 历史 `tool_calls` 不会保留成 OpenAI 原生 JSON，而会转成 prompt 可见的 XML：
+
+```xml
+<tool_calls>
+  <invoke name="read_file">
+    <parameter name="path"><![CDATA[src/main.go]]></parameter>
+  </invoke>
+</tool_calls>
+```
+
+这也是当前项目里唯一受支持的 canonical tool-calling 形态；其他形态都会作为普通文本保留，不会作为可执行调用语法。
+
+这件事很重要，因为它决定了：
+
+- 历史工具调用在 prompt 中是“可见文本历史”
+- 不是“隐藏结构化元数据”
+
+实现位置：
+[internal/prompt/tool_calls.go](../internal/prompt/tool_calls.go)
+
+### 7.3 tool result 保留方式
+
+tool / function role 的结果会作为 `<｜Tool｜>...<｜end▁of▁toolresults｜>` 进入 prompt。
+
+如果 tool content 为空，当前会补成字符串 `"null"`，避免整个 tool turn 丢失。
+
+## 8. files、附件、systemprompt 文件的实际语义
+
+这里要明确区分两类东西：
+
+1. 文本型 system prompt
+   例如 OpenAI `developer` / `system` / Responses `instructions` / Claude top-level `system`
+   这类会进入 `prompt`。
+2. 文件型 systemprompt
+   例如通过附件、`input_file`、base64、data URL 上传的文件
+   这类不会直接内联进 `prompt`，而是进入 `ref_file_ids`。
+
+OpenAI 文件相关实现：
+
+- inline/base64/data URL 上传：
+  [internal/httpapi/openai/files/file_inline_upload.go](../internal/httpapi/openai/files/file_inline_upload.go)
+- 文件 ID 收集：
+  [internal/promptcompat/file_refs.go](../internal/promptcompat/file_refs.go)
+
+结论：
+
+- “systemprompt 文字”在 prompt 里
+- “systemprompt 文件”通常只在 `ref_file_ids` 里
+
+除非调用方自己把文件内容展开后再塞进 system/developer 文本，否则文件内容不会自动出现在 prompt 正文。
+
+## 9. 多轮历史为什么不会一直完整内联在 prompt
+
+history split 现在全局强制开启；旧配置中的 `history_split.enabled=false` 会被忽略。默认从第 2 个 user turn 起就可能触发，仍可通过 `history_split.trigger_after_turns` 调整触发阈值。
+
+相关实现：
+
+- 配置访问器：
+  [internal/config/store_accessors.go](../internal/config/store_accessors.go)
+- 历史拆分：
+  [internal/httpapi/openai/history/history_split.go](../internal/httpapi/openai/history/history_split.go)
+
+触发后行为：
+
+1. 旧历史消息被切出去。
+2. 旧历史会被重新序列化成一个文本文件。
+3. 真正上传的文件名固定是 `HISTORY.txt`。
+4. 文件内容内部会使用 `IGNORE` 这层包装名来闭合 DeepSeek 官网原生文件标记。
+5. 该文件上传后，其 `file_id` 会排在 `ref_file_ids` 最前面。
+6. live prompt 只保留：
+   - system / developer
+   - 最新 user turn 起的上下文
+
+历史文件内容不是普通自由文本，而是用同一套角色标记再次序列化出的 transcript：
+
+```text
+[uploaded filename]: HISTORY.txt
+[file content end]
+
+<｜begin▁of▁sentence｜><｜User｜>...<｜Assistant｜>...<｜Tool｜>...
+
+[file name]: IGNORE
+[file content begin]
+```
+
+所以“完整上下文”在当前实现里，其实通常分散在两处：
+
+- `prompt` 里的 live context
+- `ref_file_ids` 指向的 history transcript file
+
+## 10. 各协议入口的差异
+
+### 10.1 OpenAI Chat / Responses
+
+特点：
+
+- `developer` 会映射到 `system`
+- Responses `instructions` 会 prepend 为 system message
+- `tools` 会注入 system prompt
+- `attachments` / `input_file` / inline 文件会进入 `ref_file_ids`
+- history split 主要在这条链路里生效
+
+### 10.2 Claude Messages
+
+特点：
+
+- top-level `system` 优先作为系统提示
+- `tool_use` / `tool_result` 会被转换成统一的 assistant/tool 历史语义
+- `tools` 同样会被并进 system prompt
+- 常规执行通过 `internal/httpapi/claude/handler_messages.go` 转到 OpenAI chat 路径，模型 alias 会先解析成 DeepSeek 原生模型
+- 当前代码里没有像 OpenAI 那样完整的 `ref_file_ids` 附件链路
+
+### 10.3 Gemini
+
+特点：
+
+- `systemInstruction`、`contents.parts`、`functionCall`、`functionResponse` 会先归一
+- tools 会转成 OpenAI 风格 function schema
+- prompt 构建复用 OpenAI 的 `promptcompat.BuildOpenAIPromptForAdapter`
+- 未识别的非文本 part 会被安全序列化进 prompt，并对二进制/疑似 base64 内容做省略或截断处理
+
+也就是说，Gemini 在“最终 prompt 语义”上，尽量和 OpenAI 保持一致。
+
+## 11. 一份贴近真实的最终上下文示意
+
+假设用户发来一个多轮请求：
+
+- 有 system/developer 文本
+- 有 tools
+- 有一个文件型 systemprompt 附件
+- 有历史 assistant tool call / tool result
+- history split 已触发
+
+那么最终上下文更接近：
+
+```json
+{
+  "prompt": "<｜begin▁of▁sentence｜><｜System｜>continuity instructions...\\n\\n原 system / developer\\n\\nYou have access to these tools: ...<｜end▁of▁instructions｜><｜User｜>最新问题<｜Assistant｜>",
+  "ref_file_ids": [
+    "file-history-ignore",
+    "file-systemprompt",
+    "file-other-attachment"
+  ],
+  "thinking_enabled": true,
+  "search_enabled": false
+}
+```
+
+这正是“API 转网页对话纯文本”的核心成果：
+
+- 大部分结构化语义被压进 `prompt`
+- 文件保持文件
+- 历史必要时拆文件
+
+## 12. 修改时必须同步本文档的场景
+
+只要触碰以下任一类行为，就必须在同一提交或同一 PR 中更新本文档：
+
+- 角色映射变更
+- system / developer / instructions 合并规则变更
+- assistant reasoning 保留格式变更
+- assistant 历史 `tool_calls` 的 XML 呈现方式变更
+- tool result 注入方式变更
+- tool prompt 模板或 tool_choice 约束变更
+- inline 文件上传 / 文件引用收集规则变更
+- history split 触发条件、上传格式、`IGNORE` 包装格式变更
+- completion payload 字段语义变更
+- Claude / Gemini 对这套统一语义的复用关系变更
+
+优先检查这些文件：
+
+- `internal/promptcompat/request_normalize.go`
+- `internal/promptcompat/prompt_build.go`
+- `internal/promptcompat/message_normalize.go`
+- `internal/promptcompat/tool_prompt.go`
+- `internal/httpapi/openai/files/file_inline_upload.go`
+- `internal/promptcompat/file_refs.go`
+- `internal/httpapi/openai/history/history_split.go`
+- `internal/promptcompat/responses_input_normalize.go`
+- `internal/httpapi/claude/standard_request.go`
+- `internal/httpapi/claude/handler_utils.go`
+- `internal/httpapi/gemini/convert_request.go`
+- `internal/httpapi/gemini/convert_messages.go`
+- `internal/httpapi/gemini/convert_tools.go`
+- `internal/prompt/messages.go`
+- `internal/prompt/tool_calls.go`
+- `internal/promptcompat/standard_request.go`
+
+## 13. 建议的最小验证
+
+改动这条链路后，至少补齐或检查这些测试：
+
+- `go test ./internal/prompt/...`
+- `go test ./internal/httpapi/openai/...`
+- `go test ./internal/httpapi/claude/...`
+- `go test ./internal/httpapi/gemini/...`
+- `go test ./internal/util/...`
+
+如果改的是 tool call 相关兼容语义，还应同时检查：
+
+- `go test ./internal/toolcall/...`
+- `node --test tests/node/stream-tool-sieve.test.js`
+
+## 14. 文档同步约定
+
+本文档是这条兼容链路的专项说明。
+
+如果外部接口行为也变了，还应同步检查：
+
+- [API.md](../API.md)
+- [API.en.md](../API.en.md)
+- [docs/toolcall-semantics.md](./toolcall-semantics.md)
+
+原则是：
+
+- 内部主链路变化，至少更新本文档
+- 外部可见契约变化，再同步更新 API 文档
diff --git a/docs/toolcall-semantics.md b/docs/toolcall-semantics.md
index 889e3ca..2627a0a 100644
--- a/docs/toolcall-semantics.md
+++ b/docs/toolcall-semantics.md
@@ -1,74 +1,69 @@
 # Tool call parsing semantics（Go/Node 统一语义）
 
-本文档描述当前代码中工具调用解析链路的**实际行为**（以 `internal/toolcall` 与 `internal/js/helpers/stream-tool-sieve` 为准）。
+本文档描述当前代码中的**实际行为**，以 `internal/toolcall`、`internal/toolstream` 与 `internal/js/helpers/stream-tool-sieve` 为准。
 
 文档导航：[总览](../README.MD) / [架构说明](./ARCHITECTURE.md) / [测试指南](./TESTING.md)
 
-## 1) 当前输出结构
+## 1) 当前唯一可执行格式
+
+当前版本只把下面这类 canonical XML 视为可执行工具调用：
+
+```xml
+<tool_calls>
+  <invoke name="read_file">
+    <parameter name="path"><![CDATA[README.MD]]></parameter>
+  </invoke>
+</tool_calls>
+```
+
+约束：
+
+- 必须有 `<tool_calls>...</tool_calls>` wrapper
+- 每个调用必须在 `<invoke name="...">...</invoke>` 内
+- 工具名必须放在 `invoke` 的 `name` 属性
+- 参数必须使用 `<parameter name="...">...</parameter>`
+
+## 2) 非 canonical 内容
+
+任何不满足上述 canonical XML 形态的内容，都会保留为普通文本，不会执行。
+
+当前 parser 不把 allow-list 当作硬安全边界：即使传入了已声明工具名列表，XML 里出现未声明工具名时也会尽量解析并交给上层协议输出；真正的执行侧仍必须自行校验工具名和参数。
+
+## 3) 流式与防泄漏行为
+
+在流式链路中（Go / Node 一致）：
+
+- 只有从 `<tool_calls` 开始的 canonical wrapper 才会进入结构化捕获
+- 已识别成功的工具调用不会再次回流到普通文本
+- 不符合新格式的块不会执行，并继续按原样文本透传
+- fenced code block 中的 XML 示例始终按普通文本处理
+
+## 4) 输出结构
 
 `ParseToolCallsDetailed` / `parseToolCallsDetailed` 返回：
 
-- `calls`：解析出的工具调用列表（`name` + `input`）。
-- `sawToolCallSyntax`：检测到工具调用语法特征时为 `true`。
-- `rejectedByPolicy`：当前实现固定为 `false`（预留字段）。
-- `rejectedToolNames`：当前实现固定为空数组（预留字段）。
+- `calls`：解析出的工具调用列表（`name` + `input`）
+- `sawToolCallSyntax`：只有检测到 `<tool_calls` 时才会为 `true`
+- `rejectedByPolicy`：当前固定为 `false`
+- `rejectedToolNames`：当前固定为空数组
 
-> 当前 `filterToolCallsDetailed` 仅做结构清洗，不做 allow-list 工具名硬拒绝。
+## 5) 落地建议
 
-## 2) 解析范围（重点）
+1. Prompt 里只示范 canonical XML 语法。
+2. 上游客户端需要直接输出 canonical XML；DS2API 不会把其他形态改写成工具调用。
+3. 不要依赖 parser 做安全控制；执行器侧仍应做工具名和参数校验。
 
-当前版本的可执行解析以 **XML/Markup 家族**为主：
-
-- `<tool_call>...</tool_call>`
-- `<function_call>...</function_call>`
-- `<invoke ...>...</invoke>`（含自闭合）
-- `<tool_use>...</tool_use>`
-- antml 变体（如 `antml:function_call` / `antml:argument`）
-
-并支持在这些标记块内部解析：
-
-- JSON 参数字符串
-- 标签参数（`<parameter name="...">...`）
-- key/value 风格子标签
-
-## 3) 不应再假设的行为
-
-以下说法在当前实现中已不成立：
-
-1. “纯 JSON `tool_calls` 片段会被直接当作可执行工具调用解析”。
-2. “存在 `toolcall.mode` / `toolcall.early_emit_confidence` 等可配置开关可以改变解析策略”。
-
-当前策略在代码中固定为：
-
-- 特征匹配开启（feature-match on）
-- 高置信度早发开启（early emit on）
-- policy 拒绝字段保留但未启用
-
-## 4) 流式与防泄漏语义
-
-在流式链路中（OpenAI / Claude / Gemini 统一内核）：
-
-- 工具调用片段会被优先提取为结构化增量输出；
-- 已识别的工具调用原始片段不会作为普通文本再次回流；
-- fenced code block 中的示例内容按文本处理，不作为可执行工具调用。
-
-## 5) 落地建议（按当前实现）
-
-1. Prompt 里优先约束模型输出 XML/Markup 工具块。
-2. 执行器侧继续做工具名白名单与参数 schema 校验（不要依赖 parser 代替安全策略）。
-3. 需要兼容历史“纯 JSON tool_calls”模型输出时，请在上游模板层把输出规范化为 XML/Markup 风格再进入 DS2API。
-
-## 6) 回归验证建议
+## 6) 回归验证
 
 可直接运行：
 
 ```bash
-go test -v -run 'TestParseToolCalls|TestRepair' ./internal/toolcall/
+go test -v -run 'TestParseToolCalls|TestProcessToolSieve' ./internal/toolcall ./internal/toolstream ./internal/httpapi/openai/...
 node --test tests/node/stream-tool-sieve.test.js
 ```
 
 重点覆盖：
 
-- `<tool_call>` / `<function_call>` / `<invoke>` / `tool_use` / antml 变体
-- 参数 JSON 修复与解析
-- 流式增量下的工具调用提取与文本防泄漏
+- canonical `<tool_calls>` wrapper 正常解析
+- 非 canonical 内容按普通文本透传
+- 代码块示例不执行
diff --git a/internal/adapter/claude/deps_injection_test.go b/internal/adapter/claude/deps_injection_test.go
deleted file mode 100644
index ae0c38f..0000000
--- a/internal/adapter/claude/deps_injection_test.go
+++ /dev/null
@@ -1,34 +0,0 @@
-package claude
-
-import "testing"
-
-type mockClaudeConfig struct {
-	m map[string]string
-}
-
-func (m mockClaudeConfig) ClaudeMapping() map[string]string { return m.m }
-func (mockClaudeConfig) CompatStripReferenceMarkers() bool  { return true }
-
-func TestNormalizeClaudeRequestUsesConfigInterfaceMapping(t *testing.T) {
-	req := map[string]any{
-		"model": "claude-opus-4-6",
-		"messages": []any{
-			map[string]any{"role": "user", "content": "hello"},
-		},
-	}
-	out, err := normalizeClaudeRequest(mockClaudeConfig{
-		m: map[string]string{
-			"fast": "deepseek-chat",
-			"slow": "deepseek-reasoner-search",
-		},
-	}, req)
-	if err != nil {
-		t.Fatalf("normalizeClaudeRequest error: %v", err)
-	}
-	if out.Standard.ResolvedModel != "deepseek-reasoner-search" {
-		t.Fatalf("resolved model mismatch: got=%q", out.Standard.ResolvedModel)
-	}
-	if !out.Standard.Thinking || !out.Standard.Search {
-		t.Fatalf("unexpected flags: thinking=%v search=%v", out.Standard.Thinking, out.Standard.Search)
-	}
-}
diff --git a/internal/adapter/openai/handler_routes.go b/internal/adapter/openai/handler_routes.go
deleted file mode 100644
index a08be15..0000000
--- a/internal/adapter/openai/handler_routes.go
+++ /dev/null
@@ -1,74 +0,0 @@
-package openai
-
-import (
-	"net/http"
-	"strings"
-	"sync"
-	"time"
-
-	"github.com/go-chi/chi/v5"
-
-	"ds2api/internal/auth"
-	"ds2api/internal/chathistory"
-	"ds2api/internal/config"
-	"ds2api/internal/util"
-)
-
-const (
-	// openAIUploadMaxSize limits total multipart request body size (100 MiB).
-	openAIUploadMaxSize = 100 << 20
-	// openAIGeneralMaxSize limits total JSON request body size (100 MiB).
-	openAIGeneralMaxSize = 100 << 20
-)
-
-// writeJSON is a package-internal alias kept to avoid mass-renaming across
-// every call-site in this package.
-var writeJSON = util.WriteJSON
-
-type Handler struct {
-	Store       ConfigReader
-	Auth        AuthResolver
-	DS          DeepSeekCaller
-	ChatHistory *chathistory.Store
-
-	leaseMu      sync.Mutex
-	streamLeases map[string]streamLease
-	responsesMu  sync.Mutex
-	responses    *responseStore
-}
-
-func (h *Handler) compatStripReferenceMarkers() bool {
-	if h == nil || h.Store == nil {
-		return true
-	}
-	return h.Store.CompatStripReferenceMarkers()
-}
-
-type streamLease struct {
-	Auth      *auth.RequestAuth
-	ExpiresAt time.Time
-}
-
-func RegisterRoutes(r chi.Router, h *Handler) {
-	r.Get("/v1/models", h.ListModels)
-	r.Get("/v1/models/{model_id}", h.GetModel)
-	r.Post("/v1/chat/completions", h.ChatCompletions)
-	r.Post("/v1/responses", h.Responses)
-	r.Get("/v1/responses/{response_id}", h.GetResponseByID)
-	r.Post("/v1/files", h.UploadFile)
-	r.Post("/v1/embeddings", h.Embeddings)
-}
-
-func (h *Handler) ListModels(w http.ResponseWriter, _ *http.Request) {
-	writeJSON(w, http.StatusOK, config.OpenAIModelsResponse())
-}
-
-func (h *Handler) GetModel(w http.ResponseWriter, r *http.Request) {
-	modelID := strings.TrimSpace(chi.URLParam(r, "model_id"))
-	model, ok := config.OpenAIModelByID(h.Store, modelID)
-	if !ok {
-		writeOpenAIError(w, http.StatusNotFound, "Model not found.")
-		return
-	}
-	writeJSON(w, http.StatusOK, model)
-}
diff --git a/internal/adapter/openai/handler_toolcall_format.go b/internal/adapter/openai/handler_toolcall_format.go
deleted file mode 100644
index 3937610..0000000
--- a/internal/adapter/openai/handler_toolcall_format.go
+++ /dev/null
@@ -1,170 +0,0 @@
-package openai
-
-import (
-	"ds2api/internal/toolcall"
-	"encoding/json"
-	"fmt"
-	"strings"
-
-	"github.com/google/uuid"
-
-	"ds2api/internal/util"
-)
-
-func injectToolPrompt(messages []map[string]any, tools []any, policy util.ToolChoicePolicy) ([]map[string]any, []string) {
-	if policy.IsNone() {
-		return messages, nil
-	}
-	toolSchemas := make([]string, 0, len(tools))
-	names := make([]string, 0, len(tools))
-	isAllowed := func(name string) bool {
-		if strings.TrimSpace(name) == "" {
-			return false
-		}
-		if len(policy.Allowed) == 0 {
-			return true
-		}
-		_, ok := policy.Allowed[name]
-		return ok
-	}
-
-	for _, t := range tools {
-		tool, ok := t.(map[string]any)
-		if !ok {
-			continue
-		}
-		fn, _ := tool["function"].(map[string]any)
-		if len(fn) == 0 {
-			fn = tool
-		}
-		name, _ := fn["name"].(string)
-		desc, _ := fn["description"].(string)
-		schema, _ := fn["parameters"].(map[string]any)
-		name = strings.TrimSpace(name)
-		if !isAllowed(name) {
-			continue
-		}
-		names = append(names, name)
-		if desc == "" {
-			desc = "No description available"
-		}
-		b, _ := json.Marshal(schema)
-		toolSchemas = append(toolSchemas, fmt.Sprintf("Tool: %s\nDescription: %s\nParameters: %s", name, desc, string(b)))
-	}
-	if len(toolSchemas) == 0 {
-		return messages, names
-	}
-	toolPrompt := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n") + "\n\n" + buildToolCallInstructions(names)
-	if policy.Mode == util.ToolChoiceRequired {
-		toolPrompt += "\n7) For this response, you MUST call at least one tool from the allowed list."
-	}
-	if policy.Mode == util.ToolChoiceForced && strings.TrimSpace(policy.ForcedName) != "" {
-		toolPrompt += "\n7) For this response, you MUST call exactly this tool name: " + strings.TrimSpace(policy.ForcedName)
-		toolPrompt += "\n8) Do not call any other tool."
-	}
-
-	for i := range messages {
-		if messages[i]["role"] == "system" {
-			old, _ := messages[i]["content"].(string)
-			messages[i]["content"] = strings.TrimSpace(old + "\n\n" + toolPrompt)
-			return messages, names
-		}
-	}
-	messages = append([]map[string]any{{"role": "system", "content": toolPrompt}}, messages...)
-	return messages, names
-}
-
-// buildToolCallInstructions delegates to the shared util implementation.
-func buildToolCallInstructions(toolNames []string) string {
-	return toolcall.BuildToolCallInstructions(toolNames)
-}
-
-func formatIncrementalStreamToolCallDeltas(deltas []toolCallDelta, ids map[int]string) []map[string]any {
-	if len(deltas) == 0 {
-		return nil
-	}
-	out := make([]map[string]any, 0, len(deltas))
-	for _, d := range deltas {
-		if d.Name == "" && d.Arguments == "" {
-			continue
-		}
-		callID, ok := ids[d.Index]
-		if !ok || callID == "" {
-			callID = "call_" + strings.ReplaceAll(uuid.NewString(), "-", "")
-			ids[d.Index] = callID
-		}
-		item := map[string]any{
-			"index": d.Index,
-			"id":    callID,
-			"type":  "function",
-		}
-		fn := map[string]any{}
-		if d.Name != "" {
-			fn["name"] = d.Name
-		}
-		if d.Arguments != "" {
-			fn["arguments"] = d.Arguments
-		}
-		if len(fn) > 0 {
-			item["function"] = fn
-		}
-		out = append(out, item)
-	}
-	return out
-}
-
-func filterIncrementalToolCallDeltasByAllowed(deltas []toolCallDelta, seenNames map[int]string) []toolCallDelta {
-	if len(deltas) == 0 {
-		return nil
-	}
-	out := make([]toolCallDelta, 0, len(deltas))
-	for _, d := range deltas {
-		if d.Name != "" {
-			if seenNames != nil {
-				seenNames[d.Index] = d.Name
-			}
-			out = append(out, d)
-			continue
-		}
-		if seenNames == nil {
-			out = append(out, d)
-			continue
-		}
-		name := strings.TrimSpace(seenNames[d.Index])
-		if name == "" {
-			continue
-		}
-		out = append(out, d)
-	}
-	return out
-}
-
-func formatFinalStreamToolCallsWithStableIDs(calls []toolcall.ParsedToolCall, ids map[int]string) []map[string]any {
-	if len(calls) == 0 {
-		return nil
-	}
-	out := make([]map[string]any, 0, len(calls))
-	for i, c := range calls {
-		callID := ""
-		if ids != nil {
-			callID = strings.TrimSpace(ids[i])
-		}
-		if callID == "" {
-			callID = "call_" + strings.ReplaceAll(uuid.NewString(), "-", "")
-			if ids != nil {
-				ids[i] = callID
-			}
-		}
-		args, _ := json.Marshal(c.Input)
-		out = append(out, map[string]any{
-			"index": i,
-			"id":    callID,
-			"type":  "function",
-			"function": map[string]any{
-				"name":      c.Name,
-				"arguments": string(args),
-			},
-		})
-	}
-	return out
-}
diff --git a/internal/adapter/openai/handler_toolcall_policy.go b/internal/adapter/openai/handler_toolcall_policy.go
deleted file mode 100644
index b29c91f..0000000
--- a/internal/adapter/openai/handler_toolcall_policy.go
+++ /dev/null
@@ -1,9 +0,0 @@
-package openai
-
-func (h *Handler) toolcallFeatureMatchEnabled() bool {
-	return true
-}
-
-func (h *Handler) toolcallEarlyEmitHighConfidence() bool {
-	return true
-}
diff --git a/internal/adapter/openai/history_split.go b/internal/adapter/openai/history_split.go
deleted file mode 100644
index 1cd1491..0000000
--- a/internal/adapter/openai/history_split.go
+++ /dev/null
@@ -1,290 +0,0 @@
-package openai
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"strings"
-
-	"ds2api/internal/auth"
-	"ds2api/internal/deepseek"
-	"ds2api/internal/util"
-)
-
-const (
-	historySplitFilename    = "HISTORY.txt"
-	historySplitContentType = "text/plain; charset=utf-8"
-	historySplitPurpose     = "assistants"
-)
-
-func (h *Handler) applyHistorySplit(ctx context.Context, a *auth.RequestAuth, stdReq util.StandardRequest) (util.StandardRequest, error) {
-	if h == nil || h.DS == nil || h.Store == nil || a == nil {
-		return stdReq, nil
-	}
-	if !h.Store.HistorySplitEnabled() {
-		return stdReq, nil
-	}
-
-	promptMessages, historyMessages := splitOpenAIHistoryMessages(stdReq.Messages, h.Store.HistorySplitTriggerAfterTurns())
-	if len(historyMessages) == 0 {
-		return stdReq, nil
-	}
-
-	reasoningContent := extractHistorySplitReasoningContent(historyMessages)
-	historyText := buildOpenAIHistoryTranscript(historyMessages)
-	if strings.TrimSpace(historyText) == "" {
-		return stdReq, errors.New("history split produced empty transcript")
-	}
-
-	result, err := h.DS.UploadFile(ctx, a, deepseek.UploadFileRequest{
-		Filename:    historySplitFilename,
-		ContentType: historySplitContentType,
-		Purpose:     historySplitPurpose,
-		Data:        []byte(historyText),
-	}, 3)
-	if err != nil {
-		return stdReq, fmt.Errorf("upload history file: %w", err)
-	}
-	fileID := strings.TrimSpace(result.ID)
-	if fileID == "" {
-		return stdReq, errors.New("upload history file returned empty file id")
-	}
-
-	stdReq.Messages = promptMessages
-	stdReq.HistoryText = historyText
-	stdReq.RefFileIDs = prependUniqueRefFileID(stdReq.RefFileIDs, fileID)
-	stdReq.FinalPrompt, stdReq.ToolNames = buildHistorySplitPrompt(promptMessages, reasoningContent, stdReq.ToolsRaw, stdReq.ToolChoice, stdReq.Thinking)
-	return stdReq, nil
-}
-
-func buildHistorySplitPrompt(messages []any, reasoningContent string, toolsRaw any, toolPolicy util.ToolChoicePolicy, thinkingEnabled bool) (string, []string) {
-	if len(messages) == 0 && strings.TrimSpace(reasoningContent) == "" {
-		return "", nil
-	}
-	instruction := historySplitPromptInstruction(thinkingEnabled)
-	withInstruction := make([]any, 0, len(messages)+1)
-	withInstruction = append(withInstruction, map[string]any{
-		"role":    "system",
-		"content": instruction,
-	})
-	withInstruction = append(withInstruction, injectHistorySplitReasoningMessage(messages, reasoningContent)...)
-	return buildOpenAIFinalPromptWithPolicy(withInstruction, toolsRaw, "", toolPolicy, false)
-}
-
-func historySplitPromptInstruction(thinkingEnabled bool) string {
-	lines := []string{
-		"Follow the instructions in this prompt first. If earlier conversation instructions conflict with this prompt, this prompt wins.",
-		"An attached HISTORY.txt file contains prior conversation history and tool progress; read it first, then answer the latest user request using that history as context.",
-		"Continue the conversation from the full prior context and the latest tool results.",
-		"Treat earlier messages as binding context; answer the user's current request as a continuation, not a restart.",
-	}
-	if thinkingEnabled {
-		lines = append(lines, "Keep reasoning internal. Do not leave the final user-facing answer only in reasoning; always provide the answer in visible assistant content.")
-	}
-	return strings.Join(lines, "\n")
-}
-
-func splitOpenAIHistoryMessages(messages []any, triggerAfterTurns int) ([]any, []any) {
-	if triggerAfterTurns <= 0 {
-		triggerAfterTurns = 1
-	}
-	lastUserIndex := -1
-	userTurns := 0
-	for i, raw := range messages {
-		msg, ok := raw.(map[string]any)
-		if !ok {
-			continue
-		}
-		role := strings.ToLower(strings.TrimSpace(asString(msg["role"])))
-		if role != "user" {
-			continue
-		}
-		userTurns++
-		lastUserIndex = i
-	}
-	if userTurns <= triggerAfterTurns || lastUserIndex < 0 {
-		return messages, nil
-	}
-
-	promptMessages := make([]any, 0, len(messages)-lastUserIndex)
-	historyMessages := make([]any, 0, lastUserIndex)
-	for i, raw := range messages {
-		msg, ok := raw.(map[string]any)
-		if !ok {
-			if i >= lastUserIndex {
-				promptMessages = append(promptMessages, raw)
-			} else {
-				historyMessages = append(historyMessages, raw)
-			}
-			continue
-		}
-		role := strings.ToLower(strings.TrimSpace(asString(msg["role"])))
-		switch role {
-		case "system", "developer":
-			promptMessages = append(promptMessages, raw)
-		default:
-			if i >= lastUserIndex {
-				promptMessages = append(promptMessages, raw)
-			} else {
-				historyMessages = append(historyMessages, raw)
-			}
-		}
-	}
-	if len(promptMessages) == 0 {
-		return messages, nil
-	}
-	return promptMessages, historyMessages
-}
-
-func buildOpenAIHistoryTranscript(messages []any) string {
-	var b strings.Builder
-	b.WriteString("# HISTORY.txt\n")
-	b.WriteString("Prior conversation history and tool progress.\n\n")
-
-	entry := 0
-	for _, raw := range messages {
-		msg, ok := raw.(map[string]any)
-		if !ok {
-			continue
-		}
-		role := strings.ToLower(strings.TrimSpace(asString(msg["role"])))
-		content := buildOpenAIHistoryEntry(role, msg)
-		if strings.TrimSpace(content) == "" {
-			continue
-		}
-		entry++
-		fmt.Fprintf(&b, "=== %d. %s ===\n%s\n\n", entry, strings.ToUpper(roleLabelForHistory(role)), content)
-	}
-	return strings.TrimSpace(b.String()) + "\n"
-}
-
-func buildOpenAIHistoryEntry(role string, msg map[string]any) string {
-	switch role {
-	case "assistant":
-		return strings.TrimSpace(buildAssistantHistoryContent(msg))
-	case "tool", "function":
-		return strings.TrimSpace(buildToolHistoryContent(msg))
-	case "user":
-		return strings.TrimSpace(normalizeOpenAIContentForPrompt(msg["content"]))
-	default:
-		return strings.TrimSpace(normalizeOpenAIContentForPrompt(msg["content"]))
-	}
-}
-
-func buildAssistantHistoryContent(msg map[string]any) string {
-	return strings.TrimSpace(buildAssistantContentForPrompt(msg))
-}
-
-func buildToolHistoryContent(msg map[string]any) string {
-	content := strings.TrimSpace(normalizeOpenAIContentForPrompt(msg["content"]))
-	parts := make([]string, 0, 2)
-	if name := strings.TrimSpace(asString(msg["name"])); name != "" {
-		parts = append(parts, "name="+name)
-	}
-	if callID := strings.TrimSpace(asString(msg["tool_call_id"])); callID != "" {
-		parts = append(parts, "tool_call_id="+callID)
-	}
-	header := ""
-	if len(parts) > 0 {
-		header = "[" + strings.Join(parts, " ") + "]"
-	}
-	switch {
-	case header != "" && content != "":
-		return header + "\n" + content
-	case header != "":
-		return header
-	default:
-		return content
-	}
-}
-
-func extractHistorySplitReasoningContent(messages []any) string {
-	for i := len(messages) - 1; i >= 0; i-- {
-		msg, ok := messages[i].(map[string]any)
-		if !ok {
-			continue
-		}
-		role := strings.ToLower(strings.TrimSpace(asString(msg["role"])))
-		if role != "assistant" {
-			continue
-		}
-		reasoning := strings.TrimSpace(normalizeOpenAIReasoningContentForPrompt(msg["reasoning_content"]))
-		if reasoning == "" {
-			reasoning = strings.TrimSpace(extractOpenAIReasoningContentFromMessage(msg["content"]))
-		}
-		if reasoning != "" {
-			return reasoning
-		}
-	}
-	return ""
-}
-
-func injectHistorySplitReasoningMessage(messages []any, reasoningContent string) []any {
-	reasoningContent = strings.TrimSpace(reasoningContent)
-	if reasoningContent == "" {
-		return messages
-	}
-	reasoningMsg := map[string]any{
-		"role":              "assistant",
-		"content":           "",
-		"reasoning_content": reasoningContent,
-	}
-	lastUserIndex := lastOpenAIUserMessageIndex(messages)
-	if lastUserIndex < 0 {
-		out := make([]any, 0, len(messages)+1)
-		out = append(out, reasoningMsg)
-		out = append(out, messages...)
-		return out
-	}
-	out := make([]any, 0, len(messages)+1)
-	for i, raw := range messages {
-		if i == lastUserIndex {
-			out = append(out, reasoningMsg)
-		}
-		out = append(out, raw)
-	}
-	return out
-}
-
-func lastOpenAIUserMessageIndex(messages []any) int {
-	last := -1
-	for i, raw := range messages {
-		msg, ok := raw.(map[string]any)
-		if !ok {
-			continue
-		}
-		if strings.ToLower(strings.TrimSpace(asString(msg["role"]))) == "user" {
-			last = i
-		}
-	}
-	return last
-}
-
-func roleLabelForHistory(role string) string {
-	role = strings.ToLower(strings.TrimSpace(role))
-	switch role {
-	case "function":
-		return "tool"
-	case "":
-		return "unknown"
-	default:
-		return role
-	}
-}
-
-func prependUniqueRefFileID(existing []string, fileID string) []string {
-	fileID = strings.TrimSpace(fileID)
-	if fileID == "" {
-		return existing
-	}
-	out := make([]string, 0, len(existing)+1)
-	out = append(out, fileID)
-	for _, id := range existing {
-		trimmed := strings.TrimSpace(id)
-		if trimmed == "" || strings.EqualFold(trimmed, fileID) {
-			continue
-		}
-		out = append(out, trimmed)
-	}
-	return out
-}
diff --git a/internal/adapter/openai/prompt_build.go b/internal/adapter/openai/prompt_build.go
deleted file mode 100644
index 2e1d891..0000000
--- a/internal/adapter/openai/prompt_build.go
+++ /dev/null
@@ -1,26 +0,0 @@
-package openai
-
-import (
-	"ds2api/internal/deepseek"
-	"ds2api/internal/util"
-)
-
-func buildOpenAIFinalPrompt(messagesRaw []any, toolsRaw any, traceID string, thinkingEnabled bool) (string, []string) {
-	return buildOpenAIFinalPromptWithPolicy(messagesRaw, toolsRaw, traceID, util.DefaultToolChoicePolicy(), thinkingEnabled)
-}
-
-func buildOpenAIFinalPromptWithPolicy(messagesRaw []any, toolsRaw any, traceID string, toolPolicy util.ToolChoicePolicy, thinkingEnabled bool) (string, []string) {
-	messages := normalizeOpenAIMessagesForPrompt(messagesRaw, traceID)
-	toolNames := []string{}
-	if tools, ok := toolsRaw.([]any); ok && len(tools) > 0 {
-		messages, toolNames = injectToolPrompt(messages, tools, toolPolicy)
-	}
-	return deepseek.MessagesPrepareWithThinking(messages, thinkingEnabled), toolNames
-}
-
-// BuildPromptForAdapter exposes the OpenAI-compatible prompt building flow so
-// other protocol adapters (for example Gemini) can reuse the same tool/history
-// normalization logic and remain behavior-compatible with chat/completions.
-func BuildPromptForAdapter(messagesRaw []any, toolsRaw any, traceID string, thinkingEnabled bool) (string, []string) {
-	return buildOpenAIFinalPrompt(messagesRaw, toolsRaw, traceID, thinkingEnabled)
-}
diff --git a/internal/adapter/openai/standard_request_test.go b/internal/adapter/openai/standard_request_test.go
deleted file mode 100644
index dace3af..0000000
--- a/internal/adapter/openai/standard_request_test.go
+++ /dev/null
@@ -1,210 +0,0 @@
-package openai
-
-import (
-	"testing"
-
-	"ds2api/internal/config"
-	"ds2api/internal/util"
-)
-
-func newEmptyStoreForNormalizeTest(t *testing.T) *config.Store {
-	t.Helper()
-	t.Setenv("DS2API_CONFIG_JSON", `{}`)
-	return config.LoadStore()
-}
-
-func TestNormalizeOpenAIChatRequest(t *testing.T) {
-	store := newEmptyStoreForNormalizeTest(t)
-	req := map[string]any{
-		"model": "gpt-5-codex",
-		"messages": []any{
-			map[string]any{"role": "user", "content": "hello"},
-		},
-		"temperature": 0.3,
-		"stream":      true,
-	}
-	n, err := normalizeOpenAIChatRequest(store, req, "")
-	if err != nil {
-		t.Fatalf("normalize failed: %v", err)
-	}
-	if n.ResolvedModel != "deepseek-reasoner" {
-		t.Fatalf("unexpected resolved model: %s", n.ResolvedModel)
-	}
-	if !n.Stream {
-		t.Fatalf("expected stream=true")
-	}
-	if _, ok := n.PassThrough["temperature"]; !ok {
-		t.Fatalf("expected temperature passthrough")
-	}
-	if n.FinalPrompt == "" {
-		t.Fatalf("expected non-empty final prompt")
-	}
-}
-
-func TestNormalizeOpenAIChatRequestCollectsRefFileIDs(t *testing.T) {
-	store := newEmptyStoreForNormalizeTest(t)
-	req := map[string]any{
-		"model": "gpt-5-codex",
-		"messages": []any{
-			map[string]any{
-				"role": "user",
-				"content": []any{
-					map[string]any{"type": "input_text", "text": "hello"},
-					map[string]any{"type": "input_file", "file_id": "file-msg"},
-				},
-			},
-		},
-		"attachments": []any{
-			map[string]any{"file_id": "file-attachment"},
-		},
-		"ref_file_ids": []any{"file-top", "file-attachment"},
-	}
-	n, err := normalizeOpenAIChatRequest(store, req, "")
-	if err != nil {
-		t.Fatalf("normalize failed: %v", err)
-	}
-	if len(n.RefFileIDs) != 3 {
-		t.Fatalf("expected 3 distinct file ids, got %#v", n.RefFileIDs)
-	}
-	if n.RefFileIDs[0] != "file-top" || n.RefFileIDs[1] != "file-attachment" || n.RefFileIDs[2] != "file-msg" {
-		t.Fatalf("unexpected file ids: %#v", n.RefFileIDs)
-	}
-}
-
-func TestNormalizeOpenAIResponsesRequestInput(t *testing.T) {
-	store := newEmptyStoreForNormalizeTest(t)
-	req := map[string]any{
-		"model":        "gpt-4o",
-		"input":        "ping",
-		"instructions": "system",
-	}
-	n, err := normalizeOpenAIResponsesRequest(store, req, "")
-	if err != nil {
-		t.Fatalf("normalize failed: %v", err)
-	}
-	if n.ResolvedModel != "deepseek-chat" {
-		t.Fatalf("unexpected resolved model: %s", n.ResolvedModel)
-	}
-	if len(n.Messages) != 2 {
-		t.Fatalf("expected 2 normalized messages, got %d", len(n.Messages))
-	}
-}
-
-func TestNormalizeOpenAIResponsesRequestToolChoiceRequired(t *testing.T) {
-	store := newEmptyStoreForNormalizeTest(t)
-	req := map[string]any{
-		"model": "gpt-4o",
-		"input": "ping",
-		"tools": []any{
-			map[string]any{
-				"type": "function",
-				"function": map[string]any{
-					"name": "search",
-					"parameters": map[string]any{
-						"type": "object",
-					},
-				},
-			},
-		},
-		"tool_choice": "required",
-	}
-	n, err := normalizeOpenAIResponsesRequest(store, req, "")
-	if err != nil {
-		t.Fatalf("normalize failed: %v", err)
-	}
-	if n.ToolChoice.Mode != util.ToolChoiceRequired {
-		t.Fatalf("expected tool choice mode required, got %q", n.ToolChoice.Mode)
-	}
-	if len(n.ToolNames) != 1 || n.ToolNames[0] != "search" {
-		t.Fatalf("unexpected tool names: %#v", n.ToolNames)
-	}
-}
-
-func TestNormalizeOpenAIResponsesRequestToolChoiceForcedFunction(t *testing.T) {
-	store := newEmptyStoreForNormalizeTest(t)
-	req := map[string]any{
-		"model": "gpt-4o",
-		"input": "ping",
-		"tools": []any{
-			map[string]any{
-				"type": "function",
-				"function": map[string]any{
-					"name": "search",
-				},
-			},
-			map[string]any{
-				"type": "function",
-				"function": map[string]any{
-					"name": "read_file",
-				},
-			},
-		},
-		"tool_choice": map[string]any{
-			"type": "function",
-			"name": "read_file",
-		},
-	}
-	n, err := normalizeOpenAIResponsesRequest(store, req, "")
-	if err != nil {
-		t.Fatalf("normalize failed: %v", err)
-	}
-	if n.ToolChoice.Mode != util.ToolChoiceForced {
-		t.Fatalf("expected tool choice mode forced, got %q", n.ToolChoice.Mode)
-	}
-	if n.ToolChoice.ForcedName != "read_file" {
-		t.Fatalf("expected forced tool name read_file, got %q", n.ToolChoice.ForcedName)
-	}
-	if len(n.ToolNames) != 1 || n.ToolNames[0] != "read_file" {
-		t.Fatalf("expected filtered tool names [read_file], got %#v", n.ToolNames)
-	}
-}
-
-func TestNormalizeOpenAIResponsesRequestToolChoiceForcedUndeclaredFails(t *testing.T) {
-	store := newEmptyStoreForNormalizeTest(t)
-	req := map[string]any{
-		"model": "gpt-4o",
-		"input": "ping",
-		"tools": []any{
-			map[string]any{
-				"type": "function",
-				"function": map[string]any{
-					"name": "search",
-				},
-			},
-		},
-		"tool_choice": map[string]any{
-			"type": "function",
-			"name": "read_file",
-		},
-	}
-	if _, err := normalizeOpenAIResponsesRequest(store, req, ""); err == nil {
-		t.Fatalf("expected forced undeclared tool to fail")
-	}
-}
-
-func TestNormalizeOpenAIResponsesRequestToolChoiceNoneKeepsToolDetectionEnabled(t *testing.T) {
-	store := newEmptyStoreForNormalizeTest(t)
-	req := map[string]any{
-		"model": "gpt-4o",
-		"input": "ping",
-		"tools": []any{
-			map[string]any{
-				"type": "function",
-				"function": map[string]any{
-					"name": "search",
-				},
-			},
-		},
-		"tool_choice": "none",
-	}
-	n, err := normalizeOpenAIResponsesRequest(store, req, "")
-	if err != nil {
-		t.Fatalf("normalize failed: %v", err)
-	}
-	if n.ToolChoice.Mode != util.ToolChoiceNone {
-		t.Fatalf("expected tool choice mode none, got %q", n.ToolChoice.Mode)
-	}
-	if len(n.ToolNames) == 0 {
-		t.Fatalf("expected tool detection sentinel when tool_choice=none, got %#v", n.ToolNames)
-	}
-}
diff --git a/internal/adapter/openai/upstream_empty.go b/internal/adapter/openai/upstream_empty.go
deleted file mode 100644
index bb2da1f..0000000
--- a/internal/adapter/openai/upstream_empty.go
+++ /dev/null
@@ -1,27 +0,0 @@
-package openai
-
-import "net/http"
-
-func shouldWriteUpstreamEmptyOutputError(text string) bool {
-	return text == ""
-}
-
-func upstreamEmptyOutputDetail(contentFilter bool, text, thinking string) (int, string, string) {
-	_ = text
-	if contentFilter {
-		return http.StatusBadRequest, "Upstream content filtered the response and returned no output.", "content_filter"
-	}
-	if thinking != "" {
-		return http.StatusTooManyRequests, "Upstream model returned reasoning without visible output.", "upstream_empty_output"
-	}
-	return http.StatusTooManyRequests, "Upstream model returned empty output.", "upstream_empty_output"
-}
-
-func writeUpstreamEmptyOutputError(w http.ResponseWriter, text string, contentFilter bool) bool {
-	if !shouldWriteUpstreamEmptyOutputError(text) {
-		return false
-	}
-	status, message, code := upstreamEmptyOutputDetail(contentFilter, text, "")
-	writeOpenAIErrorWithCode(w, status, message, code)
-	return true
-}
diff --git a/internal/adapter/openai/vercel_prepare_test.go b/internal/adapter/openai/vercel_prepare_test.go
deleted file mode 100644
index 0dfaf28..0000000
--- a/internal/adapter/openai/vercel_prepare_test.go
+++ /dev/null
@@ -1,83 +0,0 @@
-package openai
-
-import (
-	"ds2api/internal/auth"
-	"net/http/httptest"
-	"testing"
-	"time"
-)
-
-func TestIsVercelStreamPrepareRequest(t *testing.T) {
-	req := httptest.NewRequest("POST", "/v1/chat/completions?__stream_prepare=1", nil)
-	if !isVercelStreamPrepareRequest(req) {
-		t.Fatalf("expected prepare request to be detected")
-	}
-
-	req2 := httptest.NewRequest("POST", "/v1/chat/completions", nil)
-	if isVercelStreamPrepareRequest(req2) {
-		t.Fatalf("expected non-prepare request")
-	}
-}
-
-func TestIsVercelStreamReleaseRequest(t *testing.T) {
-	req := httptest.NewRequest("POST", "/v1/chat/completions?__stream_release=1", nil)
-	if !isVercelStreamReleaseRequest(req) {
-		t.Fatalf("expected release request to be detected")
-	}
-
-	req2 := httptest.NewRequest("POST", "/v1/chat/completions", nil)
-	if isVercelStreamReleaseRequest(req2) {
-		t.Fatalf("expected non-release request")
-	}
-}
-
-func TestVercelInternalSecret(t *testing.T) {
-	t.Run("prefer explicit secret", func(t *testing.T) {
-		t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret")
-		t.Setenv("DS2API_ADMIN_KEY", "admin-fallback")
-		if got := vercelInternalSecret(); got != "stream-secret" {
-			t.Fatalf("expected explicit secret, got %q", got)
-		}
-	})
-
-	t.Run("fallback to admin key", func(t *testing.T) {
-		t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "")
-		t.Setenv("DS2API_ADMIN_KEY", "admin-fallback")
-		if got := vercelInternalSecret(); got != "admin-fallback" {
-			t.Fatalf("expected admin key fallback, got %q", got)
-		}
-	})
-
-	t.Run("default admin when env missing", func(t *testing.T) {
-		t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "")
-		t.Setenv("DS2API_ADMIN_KEY", "")
-		if got := vercelInternalSecret(); got != "admin" {
-			t.Fatalf("expected default admin fallback, got %q", got)
-		}
-	})
-}
-
-func TestStreamLeaseLifecycle(t *testing.T) {
-	h := &Handler{}
-	leaseID := h.holdStreamLease(&auth.RequestAuth{UseConfigToken: false})
-	if leaseID == "" {
-		t.Fatalf("expected non-empty lease id")
-	}
-	if ok := h.releaseStreamLease(leaseID); !ok {
-		t.Fatalf("expected lease release success")
-	}
-	if ok := h.releaseStreamLease(leaseID); ok {
-		t.Fatalf("expected duplicate release to fail")
-	}
-}
-
-func TestStreamLeaseTTL(t *testing.T) {
-	t.Setenv("DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS", "120")
-	if got := streamLeaseTTL(); got != 120*time.Second {
-		t.Fatalf("expected ttl=120s, got %v", got)
-	}
-	t.Setenv("DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS", "invalid")
-	if got := streamLeaseTTL(); got != 15*time.Minute {
-		t.Fatalf("expected default ttl on invalid value, got %v", got)
-	}
-}
diff --git a/internal/admin/handler.go b/internal/admin/handler.go
deleted file mode 100644
index a3eb796..0000000
--- a/internal/admin/handler.go
+++ /dev/null
@@ -1,65 +0,0 @@
-package admin
-
-import (
-	"github.com/go-chi/chi/v5"
-
-	"ds2api/internal/chathistory"
-)
-
-type Handler struct {
-	Store       ConfigStore
-	Pool        PoolController
-	DS          DeepSeekCaller
-	OpenAI      OpenAIChatCaller
-	ChatHistory *chathistory.Store
-}
-
-func RegisterRoutes(r chi.Router, h *Handler) {
-	r.Post("/login", h.login)
-	r.Get("/verify", h.verify)
-	r.Group(func(pr chi.Router) {
-		pr.Use(h.requireAdmin)
-		pr.Get("/vercel/config", h.getVercelConfig)
-		pr.Get("/config", h.getConfig)
-		pr.Post("/config", h.updateConfig)
-		pr.Get("/settings", h.getSettings)
-		pr.Put("/settings", h.updateSettings)
-		pr.Post("/settings/password", h.updateSettingsPassword)
-		pr.Post("/config/import", h.configImport)
-		pr.Get("/config/export", h.configExport)
-		pr.Post("/keys", h.addKey)
-		pr.Put("/keys/{key}", h.updateKey)
-		pr.Delete("/keys/{key}", h.deleteKey)
-		pr.Get("/proxies", h.listProxies)
-		pr.Post("/proxies", h.addProxy)
-		pr.Put("/proxies/{proxyID}", h.updateProxy)
-		pr.Delete("/proxies/{proxyID}", h.deleteProxy)
-		pr.Post("/proxies/test", h.testProxy)
-		pr.Get("/accounts", h.listAccounts)
-		pr.Post("/accounts", h.addAccount)
-		pr.Put("/accounts/{identifier}", h.updateAccount)
-		pr.Delete("/accounts/{identifier}", h.deleteAccount)
-		pr.Put("/accounts/{identifier}/proxy", h.updateAccountProxy)
-		pr.Get("/queue/status", h.queueStatus)
-		pr.Post("/accounts/test", h.testSingleAccount)
-		pr.Post("/accounts/test-all", h.testAllAccounts)
-		pr.Post("/accounts/sessions/delete-all", h.deleteAllSessions)
-		pr.Post("/import", h.batchImport)
-		pr.Post("/test", h.testAPI)
-		pr.Post("/dev/raw-samples/capture", h.captureRawSample)
-		pr.Get("/dev/raw-samples/query", h.queryRawSampleCaptures)
-		pr.Post("/dev/raw-samples/save", h.saveRawSampleFromCaptures)
-		pr.Post("/vercel/sync", h.syncVercel)
-		pr.Get("/vercel/status", h.vercelStatus)
-		pr.Post("/vercel/status", h.vercelStatus)
-		pr.Get("/export", h.exportConfig)
-		pr.Get("/dev/captures", h.getDevCaptures)
-		pr.Delete("/dev/captures", h.clearDevCaptures)
-		pr.Get("/chat-history", h.getChatHistory)
-		pr.Get("/chat-history/{id}", h.getChatHistoryItem)
-		pr.Delete("/chat-history", h.clearChatHistory)
-		pr.Delete("/chat-history/{id}", h.deleteChatHistoryItem)
-		pr.Put("/chat-history/settings", h.updateChatHistorySettings)
-		pr.Get("/version", h.getVersion)
-	})
-}
diff --git a/internal/chathistory/store_test.go b/internal/chathistory/store_test.go
index 78e3e08..e923755 100644
--- a/internal/chathistory/store_test.go
+++ b/internal/chathistory/store_test.go
@@ -46,7 +46,7 @@ func TestStoreCreatesAndPersistsEntries(t *testing.T) {
 	started, err := store.Start(StartParams{
 		CallerID:  "caller:abc",
 		AccountID: "user@example.com",
-		Model:     "deepseek-chat",
+		Model:     "deepseek-v4-flash",
 		Stream:    true,
 		UserInput: "hello",
 	})
@@ -113,7 +113,7 @@ func TestStoreTrimsToConfiguredLimit(t *testing.T) {
 	}
 
 	for i := 0; i < 12; i++ {
-		entry, err := store.Start(StartParams{Model: "deepseek-chat", UserInput: "msg"})
+		entry, err := store.Start(StartParams{Model: "deepseek-v4-flash", UserInput: "msg"})
 		if err != nil {
 			t.Fatalf("start %d failed: %v", i, err)
 		}
@@ -197,7 +197,7 @@ func TestStoreConcurrentUpdatesKeepSplitFilesValid(t *testing.T) {
 			defer wg.Done()
 			entry, err := store.Start(StartParams{
 				CallerID:  "caller:test",
-				Model:     "deepseek-chat",
+				Model:     "deepseek-v4-flash",
 				UserInput: "hello",
 			})
 			if err != nil {
@@ -299,7 +299,7 @@ func TestStoreAutoMigratesMetadataOnlyLegacyMonolith(t *testing.T) {
 			Status:       "error",
 			CallerID:     "caller:test",
 			AccountID:    "acct:test",
-			Model:        "deepseek-chat",
+			Model:        "deepseek-v4-flash",
 			Stream:       true,
 			UserInput:    "hello",
 			Error:        "boom",
diff --git a/internal/claudeconv/convert.go b/internal/claudeconv/convert.go
index 1ce1f01..cd6e156 100644
--- a/internal/claudeconv/convert.go
+++ b/internal/claudeconv/convert.go
@@ -1,32 +1,21 @@
 package claudeconv
 
-import "strings"
+import (
+	"strings"
 
-type ClaudeMappingProvider interface {
-	ClaudeMapping() map[string]string
-}
+	"ds2api/internal/config"
+)
 
-func ConvertClaudeToDeepSeek(claudeReq map[string]any, mappingProvider ClaudeMappingProvider, defaultClaudeModel string) map[string]any {
+func ConvertClaudeToDeepSeek(claudeReq map[string]any, aliasProvider config.ModelAliasReader, defaultClaudeModel string) map[string]any {
 	messages, _ := claudeReq["messages"].([]any)
 	model, _ := claudeReq["model"].(string)
 	if model == "" {
 		model = defaultClaudeModel
 	}
 
-	mapping := map[string]string{}
-	if mappingProvider != nil {
-		mapping = mappingProvider.ClaudeMapping()
-	}
-	dsModel := mapping["fast"]
-	if dsModel == "" {
-		dsModel = "deepseek-chat"
-	}
-
-	modelLower := strings.ToLower(model)
-	if strings.Contains(modelLower, "opus") || strings.Contains(modelLower, "reasoner") || strings.Contains(modelLower, "slow") {
-		if slow := mapping["slow"]; slow != "" {
-			dsModel = slow
-		}
+	dsModel, ok := config.ResolveModel(aliasProvider, model)
+	if !ok || strings.TrimSpace(dsModel) == "" {
+		dsModel = "deepseek-v4-flash"
 	}
 
 	convertedMessages := make([]any, 0, len(messages)+1)
diff --git a/internal/config/codec.go b/internal/config/codec.go
index 11bf1d6..246df9b 100644
--- a/internal/config/codec.go
+++ b/internal/config/codec.go
@@ -26,12 +26,6 @@ func (c Config) MarshalJSON() ([]byte, error) {
 	if len(c.Proxies) > 0 {
 		m["proxies"] = c.Proxies
 	}
-	if len(c.ClaudeMapping) > 0 {
-		m["claude_mapping"] = c.ClaudeMapping
-	}
-	if len(c.ClaudeModelMap) > 0 {
-		m["claude_model_mapping"] = c.ClaudeModelMap
-	}
 	if len(c.ModelAliases) > 0 {
 		m["model_aliases"] = c.ModelAliases
 	}
@@ -88,13 +82,8 @@ func (c *Config) UnmarshalJSON(b []byte) error {
 				return fmt.Errorf("invalid field %q: %w", k, err)
 			}
 		case "claude_mapping":
-			if err := json.Unmarshal(v, &c.ClaudeMapping); err != nil {
-				return fmt.Errorf("invalid field %q: %w", k, err)
-			}
 		case "claude_model_mapping":
-			if err := json.Unmarshal(v, &c.ClaudeModelMap); err != nil {
-				return fmt.Errorf("invalid field %q: %w", k, err)
-			}
+			// Removed legacy mapping fields are ignored instead of persisted.
 		case "model_aliases":
 			if err := json.Unmarshal(v, &c.ModelAliases); err != nil {
 				return fmt.Errorf("invalid field %q: %w", k, err)
@@ -150,15 +139,13 @@ func (c *Config) UnmarshalJSON(b []byte) error {
 
 func (c Config) Clone() Config {
 	clone := Config{
-		Keys:           slices.Clone(c.Keys),
-		APIKeys:        slices.Clone(c.APIKeys),
-		Accounts:       slices.Clone(c.Accounts),
-		Proxies:        slices.Clone(c.Proxies),
-		ClaudeMapping:  cloneStringMap(c.ClaudeMapping),
-		ClaudeModelMap: cloneStringMap(c.ClaudeModelMap),
-		ModelAliases:   cloneStringMap(c.ModelAliases),
-		Admin:          c.Admin,
-		Runtime:        c.Runtime,
+		Keys:         slices.Clone(c.Keys),
+		APIKeys:      slices.Clone(c.APIKeys),
+		Accounts:     slices.Clone(c.Accounts),
+		Proxies:      slices.Clone(c.Proxies),
+		ModelAliases: cloneStringMap(c.ModelAliases),
+		Admin:        c.Admin,
+		Runtime:      c.Runtime,
 		Compat: CompatConfig{
 			WideInputStrictOutput: cloneBoolPtr(c.Compat.WideInputStrictOutput),
 			StripReferenceMarkers: cloneBoolPtr(c.Compat.StripReferenceMarkers),
diff --git a/internal/config/config.go b/internal/config/config.go
index dd1d5df..4053798 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -12,8 +12,6 @@ type Config struct {
 	APIKeys          []APIKey           `json:"api_keys,omitempty"`
 	Accounts         []Account          `json:"accounts,omitempty"`
 	Proxies          []Proxy            `json:"proxies,omitempty"`
-	ClaudeMapping    map[string]string  `json:"claude_mapping,omitempty"`
-	ClaudeModelMap   map[string]string  `json:"claude_model_mapping,omitempty"`
 	ModelAliases     map[string]string  `json:"model_aliases,omitempty"`
 	Admin            AdminConfig        `json:"admin,omitempty"`
 	Runtime          RuntimeConfig      `json:"runtime,omitempty"`
@@ -100,6 +98,9 @@ func (c *Config) NormalizeCredentials() {
 		c.Accounts[i].Name = strings.TrimSpace(c.Accounts[i].Name)
 		c.Accounts[i].Remark = strings.TrimSpace(c.Accounts[i].Remark)
 	}
+
+	c.normalizeModelAliases()
+	c.forceHistorySplitEnabled()
 }
 
 // DropInvalidAccounts removes accounts that cannot be addressed by admin APIs
@@ -119,6 +120,35 @@ func (c *Config) DropInvalidAccounts() {
 	c.Accounts = kept
 }
 
+func (c *Config) normalizeModelAliases() {
+	if c == nil {
+		return
+	}
+
+	aliases := map[string]string{}
+	for k, v := range c.ModelAliases {
+		key := strings.TrimSpace(lower(k))
+		val := strings.TrimSpace(lower(v))
+		if key == "" || val == "" {
+			continue
+		}
+		aliases[key] = val
+	}
+	if len(aliases) == 0 {
+		c.ModelAliases = nil
+	} else {
+		c.ModelAliases = aliases
+	}
+}
+
+func (c *Config) forceHistorySplitEnabled() {
+	if c == nil {
+		return
+	}
+	enabled := true
+	c.HistorySplit.Enabled = &enabled
+}
+
 type CompatConfig struct {
 	WideInputStrictOutput *bool `json:"wide_input_strict_output,omitempty"`
 	StripReferenceMarkers *bool `json:"strip_reference_markers,omitempty"`
diff --git a/internal/config/config_edge_test.go b/internal/config/config_edge_test.go
index 95a6eba..7741777 100644
--- a/internal/config/config_edge_test.go
+++ b/internal/config/config_edge_test.go
@@ -10,19 +10,19 @@ import (
 // ─── GetModelConfig edge cases ───────────────────────────────────────
 
 func TestGetModelConfigDeepSeekChat(t *testing.T) {
-	thinking, search, ok := GetModelConfig("deepseek-chat")
+	thinking, search, ok := GetModelConfig("deepseek-v4-flash")
 	if !ok {
-		t.Fatal("expected ok for deepseek-chat")
+		t.Fatal("expected ok for deepseek-v4-flash")
 	}
-	if thinking || search {
-		t.Fatalf("expected no thinking/search for deepseek-chat, got thinking=%v search=%v", thinking, search)
+	if !thinking || search {
+		t.Fatalf("expected thinking=true search=false for deepseek-v4-flash, got thinking=%v search=%v", thinking, search)
 	}
 }
 
 func TestGetModelConfigDeepSeekReasoner(t *testing.T) {
-	thinking, search, ok := GetModelConfig("deepseek-reasoner")
+	thinking, search, ok := GetModelConfig("deepseek-v4-pro")
 	if !ok {
-		t.Fatal("expected ok for deepseek-reasoner")
+		t.Fatal("expected ok for deepseek-v4-pro")
 	}
 	if !thinking || search {
 		t.Fatalf("expected thinking=true search=false, got thinking=%v search=%v", thinking, search)
@@ -30,19 +30,19 @@ func TestGetModelConfigDeepSeekReasoner(t *testing.T) {
 }
 
 func TestGetModelConfigDeepSeekChatSearch(t *testing.T) {
-	thinking, search, ok := GetModelConfig("deepseek-chat-search")
+	thinking, search, ok := GetModelConfig("deepseek-v4-flash-search")
 	if !ok {
-		t.Fatal("expected ok for deepseek-chat-search")
+		t.Fatal("expected ok for deepseek-v4-flash-search")
 	}
-	if thinking || !search {
-		t.Fatalf("expected thinking=false search=true, got thinking=%v search=%v", thinking, search)
+	if !thinking || !search {
+		t.Fatalf("expected thinking=true search=true, got thinking=%v search=%v", thinking, search)
 	}
 }
 
 func TestGetModelConfigDeepSeekReasonerSearch(t *testing.T) {
-	thinking, search, ok := GetModelConfig("deepseek-reasoner-search")
+	thinking, search, ok := GetModelConfig("deepseek-v4-pro-search")
 	if !ok {
-		t.Fatal("expected ok for deepseek-reasoner-search")
+		t.Fatal("expected ok for deepseek-v4-pro-search")
 	}
 	if !thinking || !search {
 		t.Fatalf("expected both true, got thinking=%v search=%v", thinking, search)
@@ -50,19 +50,19 @@ func TestGetModelConfigDeepSeekReasonerSearch(t *testing.T) {
 }
 
 func TestGetModelConfigDeepSeekExpertChat(t *testing.T) {
-	thinking, search, ok := GetModelConfig("deepseek-expert-chat")
+	thinking, search, ok := GetModelConfig("deepseek-v4-pro")
 	if !ok {
-		t.Fatal("expected ok for deepseek-expert-chat")
+		t.Fatal("expected ok for deepseek-v4-pro")
 	}
-	if thinking || search {
-		t.Fatalf("expected no thinking/search for deepseek-expert-chat, got thinking=%v search=%v", thinking, search)
+	if !thinking || search {
+		t.Fatalf("expected thinking=true search=false for deepseek-v4-pro, got thinking=%v search=%v", thinking, search)
 	}
 }
 
 func TestGetModelConfigDeepSeekExpertReasonerSearch(t *testing.T) {
-	thinking, search, ok := GetModelConfig("deepseek-expert-reasoner-search")
+	thinking, search, ok := GetModelConfig("deepseek-v4-pro-search")
 	if !ok {
-		t.Fatal("expected ok for deepseek-expert-reasoner-search")
+		t.Fatal("expected ok for deepseek-v4-pro-search")
 	}
 	if !thinking || !search {
 		t.Fatalf("expected both true, got thinking=%v search=%v", thinking, search)
@@ -70,9 +70,9 @@ func TestGetModelConfigDeepSeekExpertReasonerSearch(t *testing.T) {
 }
 
 func TestGetModelConfigDeepSeekVisionReasonerSearch(t *testing.T) {
-	thinking, search, ok := GetModelConfig("deepseek-vision-reasoner-search")
+	thinking, search, ok := GetModelConfig("deepseek-v4-vision-search")
 	if !ok {
-		t.Fatal("expected ok for deepseek-vision-reasoner-search")
+		t.Fatal("expected ok for deepseek-v4-vision-search")
 	}
 	if !thinking || !search {
 		t.Fatalf("expected both true, got thinking=%v search=%v", thinking, search)
@@ -80,27 +80,27 @@ func TestGetModelConfigDeepSeekVisionReasonerSearch(t *testing.T) {
 }
 
 func TestGetModelTypeDefaultExpertAndVision(t *testing.T) {
-	defaultType, ok := GetModelType("deepseek-chat")
+	defaultType, ok := GetModelType("deepseek-v4-flash")
 	if !ok || defaultType != "default" {
 		t.Fatalf("expected default model_type, got ok=%v model_type=%q", ok, defaultType)
 	}
-	expertType, ok := GetModelType("deepseek-expert-chat")
+	expertType, ok := GetModelType("deepseek-v4-pro")
 	if !ok || expertType != "expert" {
 		t.Fatalf("expected expert model_type, got ok=%v model_type=%q", ok, expertType)
 	}
-	visionType, ok := GetModelType("deepseek-vision-chat")
+	visionType, ok := GetModelType("deepseek-v4-vision")
 	if !ok || visionType != "vision" {
 		t.Fatalf("expected vision model_type, got ok=%v model_type=%q", ok, visionType)
 	}
 }
 
 func TestGetModelConfigCaseInsensitive(t *testing.T) {
-	thinking, search, ok := GetModelConfig("DeepSeek-Chat")
+	thinking, search, ok := GetModelConfig("DeepSeek-V4-Flash")
 	if !ok {
-		t.Fatal("expected ok for case-insensitive deepseek-chat")
+		t.Fatal("expected ok for case-insensitive deepseek-v4-flash")
 	}
-	if thinking || search {
-		t.Fatalf("expected no thinking/search for case-insensitive deepseek-chat")
+	if !thinking || search {
+		t.Fatalf("expected thinking=true search=false for case-insensitive deepseek-v4-flash")
 	}
 }
 
@@ -145,12 +145,9 @@ func TestConfigJSONRoundtrip(t *testing.T) {
 	trueVal := true
 	falseVal := false
 	cfg := Config{
-		Keys:     []string{"key1", "key2"},
-		Accounts: []Account{{Email: "user@example.com", Password: "pass", Token: "tok"}},
-		ClaudeMapping: map[string]string{
-			"fast": "deepseek-chat",
-			"slow": "deepseek-reasoner",
-		},
+		Keys:         []string{"key1", "key2"},
+		Accounts:     []Account{{Email: "user@example.com", Password: "pass", Token: "tok"}},
+		ModelAliases: map[string]string{"Claude-Sonnet-4-6": "DeepSeek-V4-Flash"},
 		AutoDelete: AutoDeleteConfig{
 			Mode: "single",
 		},
@@ -188,8 +185,8 @@ func TestConfigJSONRoundtrip(t *testing.T) {
 	if len(decoded.Accounts) != 1 || decoded.Accounts[0].Email != "user@example.com" {
 		t.Fatalf("unexpected accounts: %#v", decoded.Accounts)
 	}
-	if decoded.ClaudeMapping["fast"] != "deepseek-chat" {
-		t.Fatalf("unexpected claude mapping: %#v", decoded.ClaudeMapping)
+	if decoded.ModelAliases["claude-sonnet-4-6"] != "deepseek-v4-flash" {
+		t.Fatalf("unexpected normalized model aliases: %#v", decoded.ModelAliases)
 	}
 	if decoded.Runtime.TokenRefreshIntervalHours != 12 {
 		t.Fatalf("unexpected runtime refresh interval: %#v", decoded.Runtime.TokenRefreshIntervalHours)
@@ -255,6 +252,23 @@ func TestConfigUnmarshalJSONPreservesUnknownFields(t *testing.T) {
 	}
 }
 
+func TestConfigUnmarshalJSONIgnoresRemovedLegacyModelMappings(t *testing.T) {
+	raw := `{"keys":["k1"],"accounts":[],"claude_mapping":{"fast":"deepseek-v4-pro"},"claude_model_mapping":{"slow":"deepseek-v4-pro"}}`
+	var cfg Config
+	if err := json.Unmarshal([]byte(raw), &cfg); err != nil {
+		t.Fatalf("unmarshal error: %v", err)
+	}
+	if len(cfg.ModelAliases) != 0 {
+		t.Fatalf("expected removed legacy mappings to be ignored, got %#v", cfg.ModelAliases)
+	}
+	if _, ok := cfg.AdditionalFields["claude_mapping"]; ok {
+		t.Fatalf("expected removed legacy field not to persist in additional fields: %#v", cfg.AdditionalFields)
+	}
+	if _, ok := cfg.AdditionalFields["claude_model_mapping"]; ok {
+		t.Fatalf("expected removed legacy field not to persist in additional fields: %#v", cfg.AdditionalFields)
+	}
+}
+
 // ─── Config.Clone ────────────────────────────────────────────────────
 
 func TestConfigCloneIsDeepCopy(t *testing.T) {
@@ -262,11 +276,9 @@ func TestConfigCloneIsDeepCopy(t *testing.T) {
 	trueVal := true
 	turns := 2
 	cfg := Config{
-		Keys:     []string{"key1"},
-		Accounts: []Account{{Email: "user@test.com", Token: "token"}},
-		ClaudeMapping: map[string]string{
-			"fast": "deepseek-chat",
-		},
+		Keys:         []string{"key1"},
+		Accounts:     []Account{{Email: "user@test.com", Token: "token"}},
+		ModelAliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"},
 		Compat: CompatConfig{
 			StripReferenceMarkers: &falseVal,
 		},
@@ -282,7 +294,7 @@ func TestConfigCloneIsDeepCopy(t *testing.T) {
 	// Modify original
 	cfg.Keys[0] = "modified"
 	cfg.Accounts[0].Email = "modified@test.com"
-	cfg.ClaudeMapping["fast"] = "modified-model"
+	cfg.ModelAliases["claude-sonnet-4-6"] = "modified-model"
 	if cfg.Compat.StripReferenceMarkers != nil {
 		*cfg.Compat.StripReferenceMarkers = true
 	}
@@ -300,8 +312,8 @@ func TestConfigCloneIsDeepCopy(t *testing.T) {
 	if cloned.Accounts[0].Email != "user@test.com" {
 		t.Fatalf("clone accounts was affected: %#v", cloned.Accounts)
 	}
-	if cloned.ClaudeMapping["fast"] != "deepseek-chat" {
-		t.Fatalf("clone claude mapping was affected: %#v", cloned.ClaudeMapping)
+	if cloned.ModelAliases["claude-sonnet-4-6"] != "deepseek-v4-flash" {
+		t.Fatalf("clone model aliases was affected: %#v", cloned.ModelAliases)
 	}
 	if cloned.Compat.StripReferenceMarkers == nil || *cloned.Compat.StripReferenceMarkers {
 		t.Fatalf("clone compat was affected: %#v", cloned.Compat.StripReferenceMarkers)
@@ -652,25 +664,27 @@ func TestNormalizeCredentialsPrefersStructuredAPIKeys(t *testing.T) {
 	}
 }
 
-func TestStoreClaudeMapping(t *testing.T) {
-	t.Setenv("DS2API_CONFIG_JSON", `{"keys":[],"accounts":[],"claude_mapping":{"fast":"deepseek-chat","slow":"deepseek-reasoner"}}`)
+func TestStoreModelAliasesIncludesDefaultsAndOverrides(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{"keys":[],"accounts":[],"model_aliases":{"claude-opus-4-6":"deepseek-v4-pro-search"}}`)
 	store := LoadStore()
-	mapping := store.ClaudeMapping()
-	if mapping["fast"] != "deepseek-chat" {
-		t.Fatalf("unexpected fast mapping: %q", mapping["fast"])
+	aliases := store.ModelAliases()
+	if aliases["claude-sonnet-4-6"] != "deepseek-v4-flash" {
+		t.Fatalf("expected default alias to remain available, got %q", aliases["claude-sonnet-4-6"])
 	}
-	if mapping["slow"] != "deepseek-reasoner" {
-		t.Fatalf("unexpected slow mapping: %q", mapping["slow"])
+	if aliases["claude-opus-4-6"] != "deepseek-v4-pro-search" {
+		t.Fatalf("expected custom alias override, got %q", aliases["claude-opus-4-6"])
 	}
 }
 
-func TestStoreClaudeMappingEmpty(t *testing.T) {
+func TestStoreModelAliasesDefault(t *testing.T) {
 	t.Setenv("DS2API_CONFIG_JSON", `{"keys":[],"accounts":[]}`)
 	store := LoadStore()
-	mapping := store.ClaudeMapping()
-	// Even without config mapping, there are defaults
-	if mapping == nil {
-		t.Fatal("expected non-nil mapping (may contain defaults)")
+	aliases := store.ModelAliases()
+	if aliases == nil {
+		t.Fatal("expected non-nil aliases")
+	}
+	if aliases["claude-sonnet-4-6"] != "deepseek-v4-flash" {
+		t.Fatalf("expected built-in alias, got %q", aliases["claude-sonnet-4-6"])
 	}
 }
 
@@ -720,18 +734,12 @@ func TestOpenAIModelsResponse(t *testing.T) {
 		t.Fatal("expected non-empty models list")
 	}
 	expected := map[string]bool{
-		"deepseek-chat":                   false,
-		"deepseek-reasoner":               false,
-		"deepseek-chat-search":            false,
-		"deepseek-reasoner-search":        false,
-		"deepseek-expert-chat":            false,
-		"deepseek-expert-reasoner":        false,
-		"deepseek-expert-chat-search":     false,
-		"deepseek-expert-reasoner-search": false,
-		"deepseek-vision-chat":            false,
-		"deepseek-vision-reasoner":        false,
-		"deepseek-vision-chat-search":     false,
-		"deepseek-vision-reasoner-search": false,
+		"deepseek-v4-flash":         false,
+		"deepseek-v4-pro":           false,
+		"deepseek-v4-flash-search":  false,
+		"deepseek-v4-pro-search":    false,
+		"deepseek-v4-vision":        false,
+		"deepseek-v4-vision-search": false,
 	}
 	for _, model := range data {
 		if _, ok := expected[model.ID]; ok {
diff --git a/internal/config/model_alias_test.go b/internal/config/model_alias_test.go
index 0a8a7ca..f537b21 100644
--- a/internal/config/model_alias_test.go
+++ b/internal/config/model_alias_test.go
@@ -7,22 +7,63 @@ type mockModelAliasReader map[string]string
 func (m mockModelAliasReader) ModelAliases() map[string]string { return m }
 
 func TestResolveModelDirectDeepSeek(t *testing.T) {
-	got, ok := ResolveModel(nil, "deepseek-chat")
-	if !ok || got != "deepseek-chat" {
-		t.Fatalf("expected deepseek-chat, got ok=%v model=%q", ok, got)
+	got, ok := ResolveModel(nil, "deepseek-v4-flash")
+	if !ok || got != "deepseek-v4-flash" {
+		t.Fatalf("expected deepseek-v4-flash, got ok=%v model=%q", ok, got)
 	}
 }
 
 func TestResolveModelAlias(t *testing.T) {
 	got, ok := ResolveModel(nil, "gpt-4.1")
-	if !ok || got != "deepseek-chat" {
-		t.Fatalf("expected alias gpt-4.1 -> deepseek-chat, got ok=%v model=%q", ok, got)
+	if !ok || got != "deepseek-v4-flash" {
+		t.Fatalf("expected alias gpt-4.1 -> deepseek-v4-flash, got ok=%v model=%q", ok, got)
+	}
+}
+
+func TestResolveLatestOpenAIAlias(t *testing.T) {
+	got, ok := ResolveModel(nil, "gpt-5.5")
+	if !ok || got != "deepseek-v4-flash" {
+		t.Fatalf("expected alias gpt-5.5 -> deepseek-v4-flash, got ok=%v model=%q", ok, got)
+	}
+}
+
+func TestResolveLatestClaudeAlias(t *testing.T) {
+	got, ok := ResolveModel(nil, "claude-sonnet-4-6")
+	if !ok || got != "deepseek-v4-flash" {
+		t.Fatalf("expected alias claude-sonnet-4-6 -> deepseek-v4-flash, got ok=%v model=%q", ok, got)
+	}
+}
+
+func TestResolveExpandedHistoricalAliases(t *testing.T) {
+	cases := []struct {
+		name  string
+		model string
+		want  string
+	}{
+		{name: "openai old chatgpt", model: "chatgpt-4o", want: "deepseek-v4-flash"},
+		{name: "openai codex max", model: "gpt-5.1-codex-max", want: "deepseek-v4-pro"},
+		{name: "openai deep research", model: "o3-deep-research", want: "deepseek-v4-pro-search"},
+		{name: "openai historical reasoning", model: "o1-preview", want: "deepseek-v4-pro"},
+		{name: "claude latest historical", model: "claude-3-5-sonnet-latest", want: "deepseek-v4-flash"},
+		{name: "claude historical opus", model: "claude-3-opus-20240229", want: "deepseek-v4-pro"},
+		{name: "claude historical haiku", model: "claude-3-haiku-20240307", want: "deepseek-v4-flash"},
+		{name: "gemini latest alias", model: "gemini-flash-latest", want: "deepseek-v4-flash"},
+		{name: "gemini historical pro", model: "gemini-1.5-pro", want: "deepseek-v4-pro"},
+		{name: "gemini vision legacy", model: "gemini-pro-vision", want: "deepseek-v4-vision"},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got, ok := ResolveModel(nil, tc.model)
+			if !ok || got != tc.want {
+				t.Fatalf("expected alias %s -> %s, got ok=%v model=%q", tc.model, tc.want, ok, got)
+			}
+		})
 	}
 }
 
 func TestResolveModelHeuristicReasoner(t *testing.T) {
 	got, ok := ResolveModel(nil, "o3-super")
-	if !ok || got != "deepseek-reasoner" {
+	if !ok || got != "deepseek-v4-pro" {
 		t.Fatalf("expected heuristic reasoner, got ok=%v model=%q", ok, got)
 	}
 }
@@ -34,28 +75,58 @@ func TestResolveModelUnknown(t *testing.T) {
 	}
 }
 
+func TestResolveModelRejectsLegacyDeepSeekIDs(t *testing.T) {
+	legacyModels := []string{
+		"deepseek-chat",
+		"deepseek-reasoner",
+		"deepseek-chat-search",
+		"deepseek-reasoner-search",
+		"deepseek-expert-chat",
+		"deepseek-expert-reasoner",
+		"deepseek-vision-chat",
+	}
+	for _, model := range legacyModels {
+		if got, ok := ResolveModel(nil, model); ok {
+			t.Fatalf("expected legacy model %q to be rejected, got %q", model, got)
+		}
+	}
+}
+
+func TestResolveModelRejectsRetiredHistoricalModels(t *testing.T) {
+	retiredModels := []string{
+		"claude-2.1",
+		"claude-instant-1.2",
+		"gpt-3.5-turbo",
+	}
+	for _, model := range retiredModels {
+		if got, ok := ResolveModel(nil, model); ok {
+			t.Fatalf("expected retired model %q to be rejected, got %q", model, got)
+		}
+	}
+}
+
 func TestResolveModelDirectDeepSeekExpert(t *testing.T) {
-	got, ok := ResolveModel(nil, "deepseek-expert-chat")
-	if !ok || got != "deepseek-expert-chat" {
-		t.Fatalf("expected deepseek-expert-chat, got ok=%v model=%q", ok, got)
+	got, ok := ResolveModel(nil, "deepseek-v4-pro")
+	if !ok || got != "deepseek-v4-pro" {
+		t.Fatalf("expected deepseek-v4-pro, got ok=%v model=%q", ok, got)
 	}
 }
 
 func TestResolveModelCustomAliasToExpert(t *testing.T) {
 	got, ok := ResolveModel(mockModelAliasReader{
-		"my-expert-model": "deepseek-expert-reasoner-search",
+		"my-expert-model": "deepseek-v4-pro-search",
 	}, "my-expert-model")
-	if !ok || got != "deepseek-expert-reasoner-search" {
-		t.Fatalf("expected alias -> deepseek-expert-reasoner-search, got ok=%v model=%q", ok, got)
+	if !ok || got != "deepseek-v4-pro-search" {
+		t.Fatalf("expected alias -> deepseek-v4-pro-search, got ok=%v model=%q", ok, got)
 	}
 }
 
 func TestResolveModelCustomAliasToVision(t *testing.T) {
 	got, ok := ResolveModel(mockModelAliasReader{
-		"my-vision-model": "deepseek-vision-chat-search",
+		"my-vision-model": "deepseek-v4-vision-search",
 	}, "my-vision-model")
-	if !ok || got != "deepseek-vision-chat-search" {
-		t.Fatalf("expected alias -> deepseek-vision-chat-search, got ok=%v model=%q", ok, got)
+	if !ok || got != "deepseek-v4-vision-search" {
+		t.Fatalf("expected alias -> deepseek-v4-vision-search, got ok=%v model=%q", ok, got)
 	}
 }
 
diff --git a/internal/config/models.go b/internal/config/models.go
index 00b9cd2..7b28ec3 100644
--- a/internal/config/models.go
+++ b/internal/config/models.go
@@ -15,28 +15,22 @@ type ModelAliasReader interface {
 }
 
 var DeepSeekModels = []ModelInfo{
-	{ID: "deepseek-chat", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
-	{ID: "deepseek-reasoner", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
-	{ID: "deepseek-chat-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
-	{ID: "deepseek-reasoner-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
-	{ID: "deepseek-expert-chat", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
-	{ID: "deepseek-expert-reasoner", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
-	{ID: "deepseek-expert-chat-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
-	{ID: "deepseek-expert-reasoner-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
-	{ID: "deepseek-vision-chat", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
-	{ID: "deepseek-vision-reasoner", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
-	{ID: "deepseek-vision-chat-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
-	{ID: "deepseek-vision-reasoner-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
+	{ID: "deepseek-v4-flash", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
+	{ID: "deepseek-v4-pro", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
+	{ID: "deepseek-v4-flash-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
+	{ID: "deepseek-v4-pro-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
+	{ID: "deepseek-v4-vision", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
+	{ID: "deepseek-v4-vision-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
 }
 
 var ClaudeModels = []ModelInfo{
 	// Current aliases
 	{ID: "claude-opus-4-6", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-sonnet-4-5", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
+	{ID: "claude-sonnet-4-6", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
 	{ID: "claude-haiku-4-5", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
 
-	// Current snapshots
-	{ID: "claude-opus-4-5-20251101", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
+	// Claude 4.x snapshots and prior aliases kept for compatibility
+	{ID: "claude-sonnet-4-5", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
 	{ID: "claude-opus-4-1", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
 	{ID: "claude-opus-4-1-20250805", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
 	{ID: "claude-opus-4-0", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
@@ -57,44 +51,13 @@ var ClaudeModels = []ModelInfo{
 	{ID: "claude-3-5-haiku-latest", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
 	{ID: "claude-3-5-haiku-20241022", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
 	{ID: "claude-3-haiku-20240307", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-
-	// Claude 2.x and 1.x (retired but accepted for compatibility)
-	{ID: "claude-2.1", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-2.0", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-1.3", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-1.2", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-1.1", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-1.0", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-instant-1.2", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-instant-1.1", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-instant-1.0", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
 }
 
 func GetModelConfig(model string) (thinking bool, search bool, ok bool) {
 	switch lower(model) {
-	case "deepseek-chat":
-		return false, false, true
-	case "deepseek-reasoner":
+	case "deepseek-v4-flash", "deepseek-v4-pro", "deepseek-v4-vision":
 		return true, false, true
-	case "deepseek-chat-search":
-		return false, true, true
-	case "deepseek-reasoner-search":
-		return true, true, true
-	case "deepseek-expert-chat":
-		return false, false, true
-	case "deepseek-expert-reasoner":
-		return true, false, true
-	case "deepseek-expert-chat-search":
-		return false, true, true
-	case "deepseek-expert-reasoner-search":
-		return true, true, true
-	case "deepseek-vision-chat":
-		return false, false, true
-	case "deepseek-vision-reasoner":
-		return true, false, true
-	case "deepseek-vision-chat-search":
-		return false, true, true
-	case "deepseek-vision-reasoner-search":
+	case "deepseek-v4-flash-search", "deepseek-v4-pro-search", "deepseek-v4-vision-search":
 		return true, true, true
 	default:
 		return false, false, false
@@ -103,11 +66,11 @@ func GetModelConfig(model string) (thinking bool, search bool, ok bool) {
 
 func GetModelType(model string) (modelType string, ok bool) {
 	switch lower(model) {
-	case "deepseek-chat", "deepseek-reasoner", "deepseek-chat-search", "deepseek-reasoner-search":
+	case "deepseek-v4-flash", "deepseek-v4-flash-search":
 		return "default", true
-	case "deepseek-expert-chat", "deepseek-expert-reasoner", "deepseek-expert-chat-search", "deepseek-expert-reasoner-search":
+	case "deepseek-v4-pro", "deepseek-v4-pro-search":
 		return "expert", true
-	case "deepseek-vision-chat", "deepseek-vision-reasoner", "deepseek-vision-chat-search", "deepseek-vision-reasoner-search":
+	case "deepseek-v4-vision", "deepseek-v4-vision-search":
 		return "vision", true
 	default:
 		return "", false
@@ -121,27 +84,105 @@ func IsSupportedDeepSeekModel(model string) bool {
 
 func DefaultModelAliases() map[string]string {
 	return map[string]string{
-		"gpt-4o":                 "deepseek-chat",
-		"gpt-4.1":                "deepseek-chat",
-		"gpt-4.1-mini":           "deepseek-chat",
-		"gpt-4.1-nano":           "deepseek-chat",
-		"gpt-5":                  "deepseek-chat",
-		"gpt-5-mini":             "deepseek-chat",
-		"gpt-5-codex":            "deepseek-reasoner",
-		"o1":                     "deepseek-reasoner",
-		"o1-mini":                "deepseek-reasoner",
-		"o3":                     "deepseek-reasoner",
-		"o3-mini":                "deepseek-reasoner",
-		"claude-sonnet-4-5":      "deepseek-chat",
-		"claude-haiku-4-5":       "deepseek-chat",
-		"claude-opus-4-6":        "deepseek-reasoner",
-		"claude-3-5-sonnet":      "deepseek-chat",
-		"claude-3-5-haiku":       "deepseek-chat",
-		"claude-3-opus":          "deepseek-reasoner",
-		"gemini-2.5-pro":         "deepseek-chat",
-		"gemini-2.5-flash":       "deepseek-chat",
-		"llama-3.1-70b-instruct": "deepseek-chat",
-		"qwen-max":               "deepseek-chat",
+		// OpenAI GPT / ChatGPT families
+		"chatgpt-4o":          "deepseek-v4-flash",
+		"gpt-4":               "deepseek-v4-flash",
+		"gpt-4-turbo":         "deepseek-v4-flash",
+		"gpt-4-turbo-preview": "deepseek-v4-flash",
+		"gpt-4.5-preview":     "deepseek-v4-flash",
+		"gpt-4o":              "deepseek-v4-flash",
+		"gpt-4o-mini":         "deepseek-v4-flash",
+		"gpt-4.1":             "deepseek-v4-flash",
+		"gpt-4.1-mini":        "deepseek-v4-flash",
+		"gpt-4.1-nano":        "deepseek-v4-flash",
+		"gpt-5":               "deepseek-v4-flash",
+		"gpt-5-chat":          "deepseek-v4-flash",
+		"gpt-5.1":             "deepseek-v4-flash",
+		"gpt-5.1-chat":        "deepseek-v4-flash",
+		"gpt-5.2":             "deepseek-v4-flash",
+		"gpt-5.2-chat":        "deepseek-v4-flash",
+		"gpt-5.3-chat":        "deepseek-v4-flash",
+		"gpt-5.4":             "deepseek-v4-flash",
+		"gpt-5.5":             "deepseek-v4-flash",
+		"gpt-5-mini":          "deepseek-v4-flash",
+		"gpt-5-nano":          "deepseek-v4-flash",
+		"gpt-5.4-mini":        "deepseek-v4-flash",
+		"gpt-5.4-nano":        "deepseek-v4-flash",
+		"gpt-5-pro":           "deepseek-v4-pro",
+		"gpt-5.2-pro":         "deepseek-v4-pro",
+		"gpt-5.4-pro":         "deepseek-v4-pro",
+		"gpt-5.5-pro":         "deepseek-v4-pro",
+		"gpt-5-codex":         "deepseek-v4-pro",
+		"gpt-5.1-codex":       "deepseek-v4-pro",
+		"gpt-5.1-codex-mini":  "deepseek-v4-pro",
+		"gpt-5.1-codex-max":   "deepseek-v4-pro",
+		"gpt-5.2-codex":       "deepseek-v4-pro",
+		"gpt-5.3-codex":       "deepseek-v4-pro",
+		"codex-mini-latest":   "deepseek-v4-pro",
+
+		// OpenAI reasoning / research families
+		"o1":                    "deepseek-v4-pro",
+		"o1-preview":            "deepseek-v4-pro",
+		"o1-mini":               "deepseek-v4-pro",
+		"o1-pro":                "deepseek-v4-pro",
+		"o3":                    "deepseek-v4-pro",
+		"o3-mini":               "deepseek-v4-pro",
+		"o3-pro":                "deepseek-v4-pro",
+		"o3-deep-research":      "deepseek-v4-pro-search",
+		"o4-mini":               "deepseek-v4-pro",
+		"o4-mini-deep-research": "deepseek-v4-pro-search",
+
+		// Claude current and historical aliases
+		"claude-opus-4-6":            "deepseek-v4-pro",
+		"claude-opus-4-1":            "deepseek-v4-pro",
+		"claude-opus-4-1-20250805":   "deepseek-v4-pro",
+		"claude-opus-4-0":            "deepseek-v4-pro",
+		"claude-opus-4-20250514":     "deepseek-v4-pro",
+		"claude-sonnet-4-6":          "deepseek-v4-flash",
+		"claude-sonnet-4-5":          "deepseek-v4-flash",
+		"claude-sonnet-4-5-20250929": "deepseek-v4-flash",
+		"claude-sonnet-4-0":          "deepseek-v4-flash",
+		"claude-sonnet-4-20250514":   "deepseek-v4-flash",
+		"claude-haiku-4-5":           "deepseek-v4-flash",
+		"claude-haiku-4-5-20251001":  "deepseek-v4-flash",
+		"claude-3-7-sonnet":          "deepseek-v4-flash",
+		"claude-3-7-sonnet-latest":   "deepseek-v4-flash",
+		"claude-3-7-sonnet-20250219": "deepseek-v4-flash",
+		"claude-3-5-sonnet":          "deepseek-v4-flash",
+		"claude-3-5-sonnet-latest":   "deepseek-v4-flash",
+		"claude-3-5-sonnet-20240620": "deepseek-v4-flash",
+		"claude-3-5-sonnet-20241022": "deepseek-v4-flash",
+		"claude-3-5-haiku":           "deepseek-v4-flash",
+		"claude-3-5-haiku-latest":    "deepseek-v4-flash",
+		"claude-3-5-haiku-20241022":  "deepseek-v4-flash",
+		"claude-3-opus":              "deepseek-v4-pro",
+		"claude-3-opus-20240229":     "deepseek-v4-pro",
+		"claude-3-sonnet":            "deepseek-v4-flash",
+		"claude-3-sonnet-20240229":   "deepseek-v4-flash",
+		"claude-3-haiku":             "deepseek-v4-flash",
+		"claude-3-haiku-20240307":    "deepseek-v4-flash",
+
+		// Gemini current and historical text / multimodal models
+		"gemini-pro":            "deepseek-v4-pro",
+		"gemini-pro-vision":     "deepseek-v4-vision",
+		"gemini-pro-latest":     "deepseek-v4-pro",
+		"gemini-flash-latest":   "deepseek-v4-flash",
+		"gemini-1.5-pro":        "deepseek-v4-pro",
+		"gemini-1.5-flash":      "deepseek-v4-flash",
+		"gemini-1.5-flash-8b":   "deepseek-v4-flash",
+		"gemini-2.0-flash":      "deepseek-v4-flash",
+		"gemini-2.0-flash-lite": "deepseek-v4-flash",
+		"gemini-2.5-pro":        "deepseek-v4-pro",
+		"gemini-2.5-flash":      "deepseek-v4-flash",
+		"gemini-2.5-flash-lite": "deepseek-v4-flash",
+		"gemini-3.1-pro":        "deepseek-v4-pro",
+		"gemini-3-pro":          "deepseek-v4-pro",
+		"gemini-3-flash":        "deepseek-v4-flash",
+		"gemini-3.1-flash":      "deepseek-v4-flash",
+		"gemini-3.1-flash-lite": "deepseek-v4-flash",
+
+		"llama-3.1-70b-instruct": "deepseek-v4-flash",
+		"qwen-max":               "deepseek-v4-flash",
 	}
 }
 
@@ -150,6 +191,9 @@ func ResolveModel(store ModelAliasReader, requested string) (string, bool) {
 	if model == "" {
 		return "", false
 	}
+	if isRetiredHistoricalModel(model) {
+		return "", false
+	}
 	if IsSupportedDeepSeekModel(model) {
 		return model, true
 	}
@@ -179,23 +223,44 @@ func ResolveModel(store ModelAliasReader, requested string) (string, bool) {
 		return "", false
 	}
 
+	useVision := strings.Contains(model, "vision")
 	useReasoner := strings.Contains(model, "reason") ||
 		strings.Contains(model, "reasoner") ||
 		strings.HasPrefix(model, "o1") ||
 		strings.HasPrefix(model, "o3") ||
 		strings.Contains(model, "opus") ||
+		strings.Contains(model, "slow") ||
 		strings.Contains(model, "r1")
 	useSearch := strings.Contains(model, "search")
 
 	switch {
+	case useVision && useSearch:
+		return "deepseek-v4-vision-search", true
+	case useVision:
+		return "deepseek-v4-vision", true
 	case useReasoner && useSearch:
-		return "deepseek-reasoner-search", true
+		return "deepseek-v4-pro-search", true
 	case useReasoner:
-		return "deepseek-reasoner", true
+		return "deepseek-v4-pro", true
 	case useSearch:
-		return "deepseek-chat-search", true
+		return "deepseek-v4-flash-search", true
 	default:
-		return "deepseek-chat", true
+		return "deepseek-v4-flash", true
+	}
+}
+
+func isRetiredHistoricalModel(model string) bool {
+	switch {
+	case strings.HasPrefix(model, "claude-1."):
+		return true
+	case strings.HasPrefix(model, "claude-2."):
+		return true
+	case strings.HasPrefix(model, "claude-instant-"):
+		return true
+	case strings.HasPrefix(model, "gpt-3.5"):
+		return true
+	default:
+		return false
 	}
 }
 
diff --git a/internal/config/store_accessors.go b/internal/config/store_accessors.go
index 4b8c003..4b25284 100644
--- a/internal/config/store_accessors.go
+++ b/internal/config/store_accessors.go
@@ -6,18 +6,6 @@ import (
 	"strings"
 )
 
-func (s *Store) ClaudeMapping() map[string]string {
-	s.mu.RLock()
-	defer s.mu.RUnlock()
-	if len(s.cfg.ClaudeModelMap) > 0 {
-		return cloneStringMap(s.cfg.ClaudeModelMap)
-	}
-	if len(s.cfg.ClaudeMapping) > 0 {
-		return cloneStringMap(s.cfg.ClaudeMapping)
-	}
-	return map[string]string{"fast": "deepseek-chat", "slow": "deepseek-reasoner"}
-}
-
 func (s *Store) ModelAliases() map[string]string {
 	s.mu.RLock()
 	defer s.mu.RUnlock()
@@ -176,12 +164,7 @@ func (s *Store) AutoDeleteSessions() bool {
 }
 
 func (s *Store) HistorySplitEnabled() bool {
-	s.mu.RLock()
-	defer s.mu.RUnlock()
-	if s.cfg.HistorySplit.Enabled == nil {
-		return true
-	}
-	return *s.cfg.HistorySplit.Enabled
+	return true
 }
 
 func (s *Store) HistorySplitTriggerAfterTurns() int {
diff --git a/internal/config/store_accessors_test.go b/internal/config/store_accessors_test.go
index 6939602..af197ce 100644
--- a/internal/config/store_accessors_test.go
+++ b/internal/config/store_accessors_test.go
@@ -18,10 +18,25 @@ func TestStoreHistorySplitAccessors(t *testing.T) {
 		TriggerAfterTurns: &turns,
 	}
 
-	if store.HistorySplitEnabled() {
-		t.Fatal("expected history split disabled after override")
+	if !store.HistorySplitEnabled() {
+		t.Fatal("expected history split to stay enabled after legacy disabled override")
 	}
 	if got := store.HistorySplitTriggerAfterTurns(); got != 3 {
 		t.Fatalf("history split trigger_after_turns=%d want=3", got)
 	}
 }
+
+func TestStoreHistorySplitLegacyDisabledConfigNormalizesToEnabled(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"history_split":{"enabled":false,"trigger_after_turns":2}}`)
+	store := LoadStore()
+	if !store.HistorySplitEnabled() {
+		t.Fatal("expected history split enabled when legacy config disables it")
+	}
+	snap := store.Snapshot()
+	if snap.HistorySplit.Enabled == nil || !*snap.HistorySplit.Enabled {
+		t.Fatalf("expected normalized history_split.enabled=true, got %#v", snap.HistorySplit.Enabled)
+	}
+	if got := store.HistorySplitTriggerAfterTurns(); got != 2 {
+		t.Fatalf("history split trigger_after_turns=%d want=2", got)
+	}
+}
diff --git a/internal/deepseek/client_auth.go b/internal/deepseek/client/client_auth.go
similarity index 83%
rename from internal/deepseek/client_auth.go
rename to internal/deepseek/client/client_auth.go
index 23beb78..b582df9 100644
--- a/internal/deepseek/client_auth.go
+++ b/internal/deepseek/client/client_auth.go
@@ -1,7 +1,8 @@
-package deepseek
+package client
 
 import (
 	"context"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 	"errors"
 	"fmt"
 	"net/http"
@@ -28,7 +29,7 @@ func (c *Client) Login(ctx context.Context, acc config.Account) (string, error)
 	} else {
 		return "", errors.New("missing email/mobile")
 	}
-	resp, err := c.postJSON(ctx, clients.regular, clients.fallback, DeepSeekLoginURL, BaseHeaders, payload)
+	resp, err := c.postJSON(ctx, clients.regular, clients.fallback, dsprotocol.DeepSeekLoginURL, dsprotocol.BaseHeaders, payload)
 	if err != nil {
 		return "", err
 	}
@@ -58,7 +59,7 @@ func (c *Client) CreateSession(ctx context.Context, a *auth.RequestAuth, maxAtte
 	refreshed := false
 	for attempts < maxAttempts {
 		headers := c.authHeaders(a.DeepSeekToken)
-		resp, status, err := c.postJSONWithStatus(ctx, clients.regular, clients.fallback, DeepSeekCreateSessionURL, headers, map[string]any{"agent": "chat"})
+		resp, status, err := c.postJSONWithStatus(ctx, clients.regular, clients.fallback, dsprotocol.DeepSeekCreateSessionURL, headers, map[string]any{"agent": "chat"})
 		if err != nil {
 			config.Logger.Warn("[create_session] request error", "error", err, "account", a.AccountID)
 			attempts++
@@ -91,7 +92,7 @@ func (c *Client) CreateSession(ctx context.Context, a *auth.RequestAuth, maxAtte
 }
 
 func (c *Client) GetPow(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error) {
-	return c.GetPowForTarget(ctx, a, DeepSeekCompletionTargetPath, maxAttempts)
+	return c.GetPowForTarget(ctx, a, dsprotocol.DeepSeekCompletionTargetPath, maxAttempts)
 }
 
 func (c *Client) GetPowForTarget(ctx context.Context, a *auth.RequestAuth, targetPath string, maxAttempts int) (string, error) {
@@ -100,16 +101,20 @@ func (c *Client) GetPowForTarget(ctx context.Context, a *auth.RequestAuth, targe
 	}
 	targetPath = strings.TrimSpace(targetPath)
 	if targetPath == "" {
-		targetPath = DeepSeekCompletionTargetPath
+		targetPath = dsprotocol.DeepSeekCompletionTargetPath
 	}
 	clients := c.requestClientsForAuth(ctx, a)
 	attempts := 0
 	refreshed := false
+	lastFailureKind := FailureUnknown
+	lastFailureMessage := ""
 	for attempts < maxAttempts {
 		headers := c.authHeaders(a.DeepSeekToken)
-		resp, status, err := c.postJSONWithStatus(ctx, clients.regular, clients.fallback, DeepSeekCreatePowURL, headers, map[string]any{"target_path": targetPath})
+		resp, status, err := c.postJSONWithStatus(ctx, clients.regular, clients.fallback, dsprotocol.DeepSeekCreatePowURL, headers, map[string]any{"target_path": targetPath})
 		if err != nil {
 			config.Logger.Warn("[get_pow] request error", "error", err, "account", a.AccountID, "target_path", targetPath)
+			lastFailureKind = FailureUnknown
+			lastFailureMessage = err.Error()
 			attempts++
 			continue
 		}
@@ -126,6 +131,12 @@ func (c *Client) GetPowForTarget(ctx context.Context, a *auth.RequestAuth, targe
 			return BuildPowHeader(challenge, answer)
 		}
 		config.Logger.Warn("[get_pow] failed", "status", status, "code", code, "biz_code", bizCode, "msg", msg, "biz_msg", bizMsg, "use_config_token", a.UseConfigToken, "account", a.AccountID, "target_path", targetPath)
+		lastFailureMessage = failureMessage(msg, bizMsg, "get pow failed")
+		if isTokenInvalid(status, code, bizCode, msg, bizMsg) || isAuthIndicativeBizFailure(msg, bizMsg) {
+			lastFailureKind = authFailureKind(a.UseConfigToken)
+		} else {
+			lastFailureKind = FailureUnknown
+		}
 		if a.UseConfigToken {
 			if !refreshed && shouldAttemptRefresh(status, code, bizCode, msg, bizMsg) {
 				if c.Auth.RefreshToken(ctx, a) {
@@ -141,12 +152,15 @@ func (c *Client) GetPowForTarget(ctx context.Context, a *auth.RequestAuth, targe
 		}
 		attempts++
 	}
+	if lastFailureKind != FailureUnknown {
+		return "", &RequestFailure{Op: "get pow", Kind: lastFailureKind, Message: lastFailureMessage}
+	}
 	return "", errors.New("get pow failed")
 }
 
 func (c *Client) authHeaders(token string) map[string]string {
-	headers := make(map[string]string, len(BaseHeaders)+1)
-	for k, v := range BaseHeaders {
+	headers := make(map[string]string, len(dsprotocol.BaseHeaders)+1)
+	for k, v := range dsprotocol.BaseHeaders {
 		headers[k] = v
 	}
 	headers["authorization"] = "Bearer " + token
@@ -210,6 +224,23 @@ func isAuthIndicativeBizFailure(msg string, bizMsg string) bool {
 	return false
 }
 
+func authFailureKind(useConfigToken bool) FailureKind {
+	if useConfigToken {
+		return FailureManagedUnauthorized
+	}
+	return FailureDirectUnauthorized
+}
+
+func failureMessage(msg string, bizMsg string, fallback string) string {
+	if trimmed := strings.TrimSpace(bizMsg); trimmed != "" {
+		return trimmed
+	}
+	if trimmed := strings.TrimSpace(msg); trimmed != "" {
+		return trimmed
+	}
+	return strings.TrimSpace(fallback)
+}
+
 // DeepSeek has returned create-session ids in both biz_data.id and
 // biz_data.chat_session.id across observed response variants; accept either.
 func extractCreateSessionID(resp map[string]any) string {
diff --git a/internal/deepseek/client_auth_mobile_test.go b/internal/deepseek/client/client_auth_mobile_test.go
similarity index 98%
rename from internal/deepseek/client_auth_mobile_test.go
rename to internal/deepseek/client/client_auth_mobile_test.go
index de81690..e676b4e 100644
--- a/internal/deepseek/client_auth_mobile_test.go
+++ b/internal/deepseek/client/client_auth_mobile_test.go
@@ -1,4 +1,4 @@
-package deepseek
+package client
 
 import "testing"
 
diff --git a/internal/deepseek/client_auth_refresh_test.go b/internal/deepseek/client/client_auth_refresh_test.go
similarity index 98%
rename from internal/deepseek/client_auth_refresh_test.go
rename to internal/deepseek/client/client_auth_refresh_test.go
index 2506a00..2cc1ff1 100644
--- a/internal/deepseek/client_auth_refresh_test.go
+++ b/internal/deepseek/client/client_auth_refresh_test.go
@@ -1,4 +1,4 @@
-package deepseek
+package client
 
 import "testing"
 
diff --git a/internal/deepseek/client_auth_test.go b/internal/deepseek/client/client_auth_test.go
similarity index 97%
rename from internal/deepseek/client_auth_test.go
rename to internal/deepseek/client/client_auth_test.go
index 3ce81d5..6e23877 100644
--- a/internal/deepseek/client_auth_test.go
+++ b/internal/deepseek/client/client_auth_test.go
@@ -1,4 +1,4 @@
-package deepseek
+package client
 
 import "testing"
 
diff --git a/internal/deepseek/client_completion.go b/internal/deepseek/client/client_completion.go
similarity index 87%
rename from internal/deepseek/client_completion.go
rename to internal/deepseek/client/client_completion.go
index c27a88f..1b91ce2 100644
--- a/internal/deepseek/client_completion.go
+++ b/internal/deepseek/client/client_completion.go
@@ -1,8 +1,9 @@
-package deepseek
+package client
 
 import (
 	"bytes"
 	"context"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 	"encoding/json"
 	"errors"
 	"net/http"
@@ -20,10 +21,10 @@ func (c *Client) CallCompletion(ctx context.Context, a *auth.RequestAuth, payloa
 	clients := c.requestClientsForAuth(ctx, a)
 	headers := c.authHeaders(a.DeepSeekToken)
 	headers["x-ds-pow-response"] = powResp
-	captureSession := c.capture.Start("deepseek_completion", DeepSeekCompletionURL, a.AccountID, payload)
+	captureSession := c.capture.Start("deepseek_completion", dsprotocol.DeepSeekCompletionURL, a.AccountID, payload)
 	attempts := 0
 	for attempts < maxAttempts {
-		resp, err := c.streamPost(ctx, clients.stream, DeepSeekCompletionURL, headers, payload)
+		resp, err := c.streamPost(ctx, clients.stream, dsprotocol.DeepSeekCompletionURL, headers, payload)
 		if err != nil {
 			attempts++
 			time.Sleep(time.Second)
diff --git a/internal/deepseek/client_continue.go b/internal/deepseek/client/client_continue.go
similarity index 96%
rename from internal/deepseek/client_continue.go
rename to internal/deepseek/client/client_continue.go
index f3354f7..aea30cc 100644
--- a/internal/deepseek/client_continue.go
+++ b/internal/deepseek/client/client_continue.go
@@ -1,9 +1,10 @@
-package deepseek
+package client
 
 import (
 	"bufio"
 	"bytes"
 	"context"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 	"encoding/json"
 	"errors"
 	"io"
@@ -60,8 +61,8 @@ func (c *Client) callContinue(ctx context.Context, a *auth.RequestAuth, sessionI
 		"fallback_to_resume": true,
 	}
 	config.Logger.Info("[auto_continue] calling continue", "session_id", sessionID, "message_id", responseMessageID)
-	captureSession := c.capture.Start("deepseek_continue", DeepSeekContinueURL, a.AccountID, payload)
-	resp, err := c.streamPost(ctx, clients.stream, DeepSeekContinueURL, headers, payload)
+	captureSession := c.capture.Start("deepseek_continue", dsprotocol.DeepSeekContinueURL, a.AccountID, payload)
+	resp, err := c.streamPost(ctx, clients.stream, dsprotocol.DeepSeekContinueURL, headers, payload)
 	if err != nil {
 		return nil, err
 	}
diff --git a/internal/deepseek/client_continue_test.go b/internal/deepseek/client/client_continue_test.go
similarity index 91%
rename from internal/deepseek/client_continue_test.go
rename to internal/deepseek/client/client_continue_test.go
index 4758ab0..83a42af 100644
--- a/internal/deepseek/client_continue_test.go
+++ b/internal/deepseek/client/client_continue_test.go
@@ -1,8 +1,9 @@
-package deepseek
+package client
 
 import (
 	"bytes"
 	"context"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 	"errors"
 	"io"
 	"net/http"
@@ -58,8 +59,8 @@ func TestCallContinuePropagatesPowHeaderToFallbackRequest(t *testing.T) {
 	if seenPow != "pow-response-abc" {
 		t.Fatalf("continue request pow header=%q want=%q", seenPow, "pow-response-abc")
 	}
-	if seenURL != DeepSeekContinueURL {
-		t.Fatalf("continue request url=%q want=%q", seenURL, DeepSeekContinueURL)
+	if seenURL != dsprotocol.DeepSeekContinueURL {
+		t.Fatalf("continue request url=%q want=%q", seenURL, dsprotocol.DeepSeekContinueURL)
 	}
 }
 
@@ -112,8 +113,8 @@ func TestCallCompletionAutoContinueThreadsPowHeader(t *testing.T) {
 	if seenPow != "pow-response-xyz" {
 		t.Fatalf("threaded continue pow header=%q want=%q", seenPow, "pow-response-xyz")
 	}
-	if seenContinueURL != DeepSeekContinueURL {
-		t.Fatalf("continue url=%q want=%q", seenContinueURL, DeepSeekContinueURL)
+	if seenContinueURL != dsprotocol.DeepSeekContinueURL {
+		t.Fatalf("continue url=%q want=%q", seenContinueURL, dsprotocol.DeepSeekContinueURL)
 	}
 	if !bytes.Contains(out, []byte(`"status":"WIP"`)) {
 		t.Fatalf("expected initial stream content in body, got=%s", string(out))
diff --git a/internal/deepseek/client_core.go b/internal/deepseek/client/client_core.go
similarity index 98%
rename from internal/deepseek/client_core.go
rename to internal/deepseek/client/client_core.go
index 57aeadb..f730e88 100644
--- a/internal/deepseek/client_core.go
+++ b/internal/deepseek/client/client_core.go
@@ -1,4 +1,4 @@
-package deepseek
+package client
 
 import (
 	"context"
diff --git a/internal/deepseek/client_file_status.go b/internal/deepseek/client/client_file_status.go
similarity index 97%
rename from internal/deepseek/client_file_status.go
rename to internal/deepseek/client/client_file_status.go
index ba50ab8..e9bfe28 100644
--- a/internal/deepseek/client_file_status.go
+++ b/internal/deepseek/client/client_file_status.go
@@ -1,7 +1,8 @@
-package deepseek
+package client
 
 import (
 	"context"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 	"errors"
 	"fmt"
 	"net/http"
@@ -70,7 +71,7 @@ func (c *Client) fetchUploadedFile(ctx context.Context, a *auth.RequestAuth, fil
 		return nil, errors.New("file id is required")
 	}
 	clients := c.requestClientsForAuth(ctx, a)
-	reqURL := DeepSeekFetchFilesURL + "?file_ids=" + url.QueryEscape(fileID)
+	reqURL := dsprotocol.DeepSeekFetchFilesURL + "?file_ids=" + url.QueryEscape(fileID)
 	headers := c.authHeaders(a.DeepSeekToken)
 
 	resp, status, err := c.getJSONWithStatus(ctx, clients.regular, reqURL, headers)
diff --git a/internal/deepseek/client_http_helpers.go b/internal/deepseek/client/client_http_helpers.go
similarity index 70%
rename from internal/deepseek/client_http_helpers.go
rename to internal/deepseek/client/client_http_helpers.go
index 14cfbdd..dd690d9 100644
--- a/internal/deepseek/client_http_helpers.go
+++ b/internal/deepseek/client/client_http_helpers.go
@@ -1,7 +1,6 @@
-package deepseek
+package client
 
 import (
-	"bufio"
 	"compress/gzip"
 	"io"
 	"net/http"
@@ -41,17 +40,10 @@ func (c *Client) jsonHeaders(headers map[string]string) map[string]string {
 	return out
 }
 
-func ScanSSELines(resp *http.Response, onLine func([]byte) bool) error {
-	scanner := bufio.NewScanner(resp.Body)
-	buf := make([]byte, 0, 64*1024)
-	scanner.Buffer(buf, 2*1024*1024)
-	for scanner.Scan() {
-		if !onLine(scanner.Bytes()) {
-			break
-		}
+func cloneStringMap(in map[string]string) map[string]string {
+	out := make(map[string]string, len(in))
+	for k, v := range in {
+		out[k] = v
 	}
-	if err := scanner.Err(); err != nil {
-		return err
-	}
-	return nil
+	return out
 }
diff --git a/internal/deepseek/client_http_json.go b/internal/deepseek/client/client_http_json.go
similarity index 99%
rename from internal/deepseek/client_http_json.go
rename to internal/deepseek/client/client_http_json.go
index 88eebae..06c8138 100644
--- a/internal/deepseek/client_http_json.go
+++ b/internal/deepseek/client/client_http_json.go
@@ -1,4 +1,4 @@
-package deepseek
+package client
 
 import (
 	"bytes"
diff --git a/internal/deepseek/client_http_json_test.go b/internal/deepseek/client/client_http_json_test.go
similarity index 98%
rename from internal/deepseek/client_http_json_test.go
rename to internal/deepseek/client/client_http_json_test.go
index ee553ab..d2188e9 100644
--- a/internal/deepseek/client_http_json_test.go
+++ b/internal/deepseek/client/client_http_json_test.go
@@ -1,4 +1,4 @@
-package deepseek
+package client
 
 import (
 	"context"
diff --git a/internal/deepseek/client_session.go b/internal/deepseek/client/client_session.go
similarity index 96%
rename from internal/deepseek/client_session.go
rename to internal/deepseek/client/client_session.go
index 4b571d1..98a7feb 100644
--- a/internal/deepseek/client_session.go
+++ b/internal/deepseek/client/client_session.go
@@ -1,7 +1,8 @@
-package deepseek
+package client
 
 import (
 	"context"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 	"errors"
 	"fmt"
 	"net/http"
@@ -49,7 +50,7 @@ func (c *Client) GetSessionCount(ctx context.Context, a *auth.RequestAuth, maxAt
 		headers := c.authHeaders(a.DeepSeekToken)
 
 		// 构建请求 URL
-		reqURL := DeepSeekFetchSessionURL + "?lte_cursor.pinned=false"
+		reqURL := dsprotocol.DeepSeekFetchSessionURL + "?lte_cursor.pinned=false"
 
 		resp, status, err := c.getJSONWithStatus(ctx, clients.regular, reqURL, headers)
 		if err != nil {
@@ -109,7 +110,7 @@ func (c *Client) GetSessionCount(ctx context.Context, a *auth.RequestAuth, maxAt
 func (c *Client) GetSessionCountForToken(ctx context.Context, token string) (*SessionStats, error) {
 	clients := c.requestClientsFromContext(ctx)
 	headers := c.authHeaders(token)
-	reqURL := DeepSeekFetchSessionURL + "?lte_cursor.pinned=false"
+	reqURL := dsprotocol.DeepSeekFetchSessionURL + "?lte_cursor.pinned=false"
 
 	resp, status, err := c.getJSONWithStatus(ctx, clients.regular, reqURL, headers)
 	if err != nil {
@@ -202,7 +203,7 @@ func (c *Client) FetchSessionPage(ctx context.Context, a *auth.RequestAuth, curs
 	if cursor != "" {
 		params.Set("lte_cursor", cursor)
 	}
-	reqURL := DeepSeekFetchSessionURL + "?" + params.Encode()
+	reqURL := dsprotocol.DeepSeekFetchSessionURL + "?" + params.Encode()
 
 	resp, status, err := c.getJSONWithStatus(ctx, clients.regular, reqURL, headers)
 	if err != nil {
diff --git a/internal/deepseek/client_session_delete.go b/internal/deepseek/client/client_session_delete.go
similarity index 92%
rename from internal/deepseek/client_session_delete.go
rename to internal/deepseek/client/client_session_delete.go
index 2df4abe..fa810fd 100644
--- a/internal/deepseek/client_session_delete.go
+++ b/internal/deepseek/client/client_session_delete.go
@@ -1,7 +1,8 @@
-package deepseek
+package client
 
 import (
 	"context"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 	"errors"
 	"fmt"
 	"net/http"
@@ -43,7 +44,7 @@ func (c *Client) DeleteSession(ctx context.Context, a *auth.RequestAuth, session
 			"chat_session_id": sessionID,
 		}
 
-		resp, status, err := c.postJSONWithStatus(ctx, clients.regular, clients.fallback, DeepSeekDeleteSessionURL, headers, payload)
+		resp, status, err := c.postJSONWithStatus(ctx, clients.regular, clients.fallback, dsprotocol.DeepSeekDeleteSessionURL, headers, payload)
 		if err != nil {
 			config.Logger.Warn("[delete_session] request error", "error", err, "session_id", sessionID)
 			attempts++
@@ -97,7 +98,7 @@ func (c *Client) DeleteSessionForToken(ctx context.Context, token string, sessio
 		"chat_session_id": sessionID,
 	}
 
-	resp, status, err := c.postJSONWithStatus(ctx, clients.regular, clients.fallback, DeepSeekDeleteSessionURL, headers, payload)
+	resp, status, err := c.postJSONWithStatus(ctx, clients.regular, clients.fallback, dsprotocol.DeepSeekDeleteSessionURL, headers, payload)
 	if err != nil {
 		result.ErrorMessage = err.Error()
 		return result, err
@@ -120,7 +121,7 @@ func (c *Client) DeleteAllSessions(ctx context.Context, a *auth.RequestAuth) err
 	headers := c.authHeaders(a.DeepSeekToken)
 	payload := map[string]any{}
 
-	resp, status, err := c.postJSONWithStatus(ctx, clients.regular, clients.fallback, DeepSeekDeleteAllSessionsURL, headers, payload)
+	resp, status, err := c.postJSONWithStatus(ctx, clients.regular, clients.fallback, dsprotocol.DeepSeekDeleteAllSessionsURL, headers, payload)
 	if err != nil {
 		config.Logger.Warn("[delete_all_sessions] request error", "error", err)
 		return err
@@ -142,7 +143,7 @@ func (c *Client) DeleteAllSessionsForToken(ctx context.Context, token string) er
 	headers := c.authHeaders(token)
 	payload := map[string]any{}
 
-	resp, status, err := c.postJSONWithStatus(ctx, clients.regular, clients.fallback, DeepSeekDeleteAllSessionsURL, headers, payload)
+	resp, status, err := c.postJSONWithStatus(ctx, clients.regular, clients.fallback, dsprotocol.DeepSeekDeleteAllSessionsURL, headers, payload)
 	if err != nil {
 		config.Logger.Warn("[delete_all_sessions_for_token] request error", "error", err)
 		return err
diff --git a/internal/deepseek/client_upload.go b/internal/deepseek/client/client_upload.go
similarity index 89%
rename from internal/deepseek/client_upload.go
rename to internal/deepseek/client/client_upload.go
index c494b7b..9e95a23 100644
--- a/internal/deepseek/client_upload.go
+++ b/internal/deepseek/client/client_upload.go
@@ -1,8 +1,9 @@
-package deepseek
+package client
 
 import (
 	"bytes"
 	"context"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -63,14 +64,16 @@ func (c *Client) UploadFile(ctx context.Context, a *auth.RequestAuth, req Upload
 		"purpose":      purpose,
 		"bytes":        len(req.Data),
 	}
-	captureSession := c.capture.Start("deepseek_upload_file", DeepSeekUploadFileURL, a.AccountID, capturePayload)
+	captureSession := c.capture.Start("deepseek_upload_file", dsprotocol.DeepSeekUploadFileURL, a.AccountID, capturePayload)
 	attempts := 0
 	refreshed := false
 	powHeader := ""
+	lastFailureKind := FailureUnknown
+	lastFailureMessage := ""
 	for attempts < maxAttempts {
 		clients := c.requestClientsForAuth(ctx, a)
 		if strings.TrimSpace(powHeader) == "" {
-			powHeader, err = c.GetPowForTarget(ctx, a, DeepSeekUploadTargetPath, maxAttempts)
+			powHeader, err = c.GetPowForTarget(ctx, a, dsprotocol.DeepSeekUploadTargetPath, maxAttempts)
 			if err != nil {
 				return nil, err
 			}
@@ -81,10 +84,12 @@ func (c *Client) UploadFile(ctx context.Context, a *auth.RequestAuth, req Upload
 		headers["x-ds-pow-response"] = powHeader
 		headers["x-file-size"] = strconv.Itoa(len(req.Data))
 		headers["x-thinking-enabled"] = "1"
-		resp, err := c.doUpload(ctx, clients.regular, clients.fallback, DeepSeekUploadFileURL, headers, body)
+		resp, err := c.doUpload(ctx, clients.regular, clients.fallback, dsprotocol.DeepSeekUploadFileURL, headers, body)
 		if err != nil {
 			config.Logger.Warn("[upload_file] request error", "error", err, "account", a.AccountID, "filename", filename)
 			powHeader = ""
+			lastFailureKind = FailureUnknown
+			lastFailureMessage = err.Error()
 			attempts++
 			continue
 		}
@@ -131,6 +136,12 @@ func (c *Client) UploadFile(ctx context.Context, a *auth.RequestAuth, req Upload
 		}
 		config.Logger.Warn("[upload_file] failed", "status", resp.StatusCode, "code", code, "biz_code", bizCode, "msg", msg, "biz_msg", bizMsg, "account", a.AccountID, "filename", filename)
 		powHeader = ""
+		lastFailureMessage = failureMessage(msg, bizMsg, "upload file failed")
+		if isTokenInvalid(resp.StatusCode, code, bizCode, msg, bizMsg) || isAuthIndicativeBizFailure(msg, bizMsg) {
+			lastFailureKind = authFailureKind(a.UseConfigToken)
+		} else {
+			lastFailureKind = FailureUnknown
+		}
 		if a.UseConfigToken {
 			if !refreshed && shouldAttemptRefresh(resp.StatusCode, code, bizCode, msg, bizMsg) {
 				if c.Auth.RefreshToken(ctx, a) {
@@ -147,6 +158,9 @@ func (c *Client) UploadFile(ctx context.Context, a *auth.RequestAuth, req Upload
 		}
 		attempts++
 	}
+	if lastFailureKind != FailureUnknown {
+		return nil, &RequestFailure{Op: "upload file", Kind: lastFailureKind, Message: lastFailureMessage}
+	}
 	return nil, errors.New("upload file failed")
 }
 
diff --git a/internal/deepseek/client_upload_test.go b/internal/deepseek/client/client_upload_test.go
similarity index 93%
rename from internal/deepseek/client_upload_test.go
rename to internal/deepseek/client/client_upload_test.go
index 7a41073..90e11cd 100644
--- a/internal/deepseek/client_upload_test.go
+++ b/internal/deepseek/client/client_upload_test.go
@@ -1,7 +1,8 @@
-package deepseek
+package client
 
 import (
 	"context"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 	"encoding/base64"
 	"encoding/hex"
 	"encoding/json"
@@ -75,7 +76,7 @@ func TestExtractUploadFileResultSupportsNestedShapes(t *testing.T) {
 
 func TestUploadFileUsesUploadTargetPowAndMultipartHeaders(t *testing.T) {
 	challengeHash := powpkg.DeepSeekHashV1([]byte(powpkg.BuildPrefix("salt", 1712345678) + "42"))
-	powResponse := `{"code":0,"msg":"ok","data":{"biz_code":0,"biz_data":{"challenge":{"algorithm":"DeepSeekHashV1","challenge":"` + hex.EncodeToString(challengeHash[:]) + `","salt":"salt","expire_at":1712345678,"difficulty":1000,"signature":"sig","target_path":"` + DeepSeekUploadTargetPath + `"}}}}`
+	powResponse := `{"code":0,"msg":"ok","data":{"biz_code":0,"biz_data":{"challenge":{"algorithm":"DeepSeekHashV1","challenge":"` + hex.EncodeToString(challengeHash[:]) + `","salt":"salt","expire_at":1712345678,"difficulty":1000,"signature":"sig","target_path":"` + dsprotocol.DeepSeekUploadTargetPath + `"}}}}`
 	uploadResponse := `{"code":0,"msg":"ok","data":{"biz_code":0,"biz_data":{"file":{"file_id":"file_789","filename":"demo.txt","bytes":5,"status":"processed","purpose":"assistants","is_image":false}}}}`
 	var seenPow string
 	var seenTargetPath string
@@ -119,7 +120,7 @@ func TestUploadFileUsesUploadTargetPowAndMultipartHeaders(t *testing.T) {
 	if result.ID != "file_789" {
 		t.Fatalf("expected uploaded file id file_789, got %#v", result)
 	}
-	if !strings.Contains(seenTargetPath, `"target_path":"`+DeepSeekUploadTargetPath+`"`) {
+	if !strings.Contains(seenTargetPath, `"target_path":"`+dsprotocol.DeepSeekUploadTargetPath+`"`) {
 		t.Fatalf("expected upload target_path in pow request, got %q", seenTargetPath)
 	}
 	if strings.TrimSpace(seenPow) == "" {
@@ -133,8 +134,8 @@ func TestUploadFileUsesUploadTargetPowAndMultipartHeaders(t *testing.T) {
 	if err := json.Unmarshal(rawPow, &powHeader); err != nil {
 		t.Fatalf("unmarshal pow header failed: %v", err)
 	}
-	if powHeader["target_path"] != DeepSeekUploadTargetPath {
-		t.Fatalf("expected pow target_path %q, got %#v", DeepSeekUploadTargetPath, powHeader["target_path"])
+	if powHeader["target_path"] != dsprotocol.DeepSeekUploadTargetPath {
+		t.Fatalf("expected pow target_path %q, got %#v", dsprotocol.DeepSeekUploadTargetPath, powHeader["target_path"])
 	}
 	if seenFileSize != "5" {
 		t.Fatalf("expected x-file-size=5, got %q", seenFileSize)
@@ -153,7 +154,7 @@ func TestUploadFileWaitsForProcessedFetchFiles(t *testing.T) {
 	defer func() { fileReadySleep = oldSleep }()
 
 	challengeHash := powpkg.DeepSeekHashV1([]byte(powpkg.BuildPrefix("salt", 1712345678) + "42"))
-	powResponse := `{"code":0,"msg":"ok","data":{"biz_code":0,"biz_data":{"challenge":{"algorithm":"DeepSeekHashV1","challenge":"` + hex.EncodeToString(challengeHash[:]) + `","salt":"salt","expire_at":1712345678,"difficulty":1000,"signature":"sig","target_path":"` + DeepSeekUploadTargetPath + `"}}}}`
+	powResponse := `{"code":0,"msg":"ok","data":{"biz_code":0,"biz_data":{"challenge":{"algorithm":"DeepSeekHashV1","challenge":"` + hex.EncodeToString(challengeHash[:]) + `","salt":"salt","expire_at":1712345678,"difficulty":1000,"signature":"sig","target_path":"` + dsprotocol.DeepSeekUploadTargetPath + `"}}}}`
 	uploadResponse := `{"code":0,"msg":"ok","data":{"biz_code":0,"biz_data":{"file":{"file_id":"file_789","filename":"demo.txt","bytes":5,"status":"PENDING","purpose":"assistants","is_image":false}}}}`
 	pendingFetchResponse := `{"code":0,"msg":"ok","data":{"biz_code":0,"biz_data":{"files":[{"file_id":"file_789","filename":"demo.txt","bytes":5,"status":"PENDING","purpose":"assistants","is_image":false}]}}}`
 	processedFetchResponse := `{"code":0,"msg":"ok","data":{"biz_code":0,"biz_data":{"files":[{"file_id":"file_789","filename":"demo.txt","bytes":5,"status":"processed","purpose":"assistants","is_image":true}]}}}`
@@ -165,7 +166,7 @@ func TestUploadFileWaitsForProcessedFetchFiles(t *testing.T) {
 			switch call {
 			case 1:
 				bodyBytes, _ := io.ReadAll(req.Body)
-				if !strings.Contains(string(bodyBytes), `"target_path":"`+DeepSeekUploadTargetPath+`"`) {
+				if !strings.Contains(string(bodyBytes), `"target_path":"`+dsprotocol.DeepSeekUploadTargetPath+`"`) {
 					t.Fatalf("expected pow target path request, got %s", string(bodyBytes))
 				}
 				return &http.Response{StatusCode: http.StatusOK, Header: make(http.Header), Body: io.NopCloser(strings.NewReader(powResponse)), Request: req}, nil
diff --git a/internal/deepseek/deepseek_edge_test.go b/internal/deepseek/client/deepseek_edge_test.go
similarity index 99%
rename from internal/deepseek/deepseek_edge_test.go
rename to internal/deepseek/client/deepseek_edge_test.go
index e321954..fb0b413 100644
--- a/internal/deepseek/deepseek_edge_test.go
+++ b/internal/deepseek/client/deepseek_edge_test.go
@@ -1,4 +1,4 @@
-package deepseek
+package client
 
 import (
 	"context"
diff --git a/internal/deepseek/client/errors.go b/internal/deepseek/client/errors.go
new file mode 100644
index 0000000..0c2c18a
--- /dev/null
+++ b/internal/deepseek/client/errors.go
@@ -0,0 +1,46 @@
+package client
+
+import (
+	"errors"
+	"fmt"
+)
+
+type FailureKind string
+
+const (
+	FailureUnknown             FailureKind = ""
+	FailureDirectUnauthorized  FailureKind = "direct_unauthorized"
+	FailureManagedUnauthorized FailureKind = "managed_unauthorized"
+)
+
+type RequestFailure struct {
+	Op      string
+	Kind    FailureKind
+	Message string
+}
+
+func (e *RequestFailure) Error() string {
+	if e == nil {
+		return ""
+	}
+	switch {
+	case e.Op != "" && e.Message != "":
+		return fmt.Sprintf("%s: %s", e.Op, e.Message)
+	case e.Op != "":
+		return e.Op + " failed"
+	case e.Message != "":
+		return e.Message
+	default:
+		return "request failed"
+	}
+}
+
+func IsManagedUnauthorizedError(err error) bool {
+	var failure *RequestFailure
+	return errors.As(err, &failure) && failure.Kind == FailureManagedUnauthorized
+}
+
+func IsDirectUnauthorizedError(err error) bool {
+	var failure *RequestFailure
+	return errors.As(err, &failure) && failure.Kind == FailureDirectUnauthorized
+}
diff --git a/internal/deepseek/pow.go b/internal/deepseek/client/pow.go
similarity index 99%
rename from internal/deepseek/pow.go
rename to internal/deepseek/client/pow.go
index 9d839de..6a58fe1 100644
--- a/internal/deepseek/pow.go
+++ b/internal/deepseek/client/pow.go
@@ -1,4 +1,4 @@
-package deepseek
+package client
 
 import (
 	"context"
diff --git a/internal/deepseek/pow_test.go b/internal/deepseek/client/pow_test.go
similarity index 96%
rename from internal/deepseek/pow_test.go
rename to internal/deepseek/client/pow_test.go
index 0161f62..5367e0a 100644
--- a/internal/deepseek/pow_test.go
+++ b/internal/deepseek/client/pow_test.go
@@ -1,4 +1,4 @@
-package deepseek
+package client
 
 import (
 	"context"
diff --git a/internal/deepseek/proxy.go b/internal/deepseek/client/proxy.go
similarity index 98%
rename from internal/deepseek/proxy.go
rename to internal/deepseek/client/proxy.go
index 84bf439..f09cf9f 100644
--- a/internal/deepseek/proxy.go
+++ b/internal/deepseek/client/proxy.go
@@ -1,7 +1,8 @@
-package deepseek
+package client
 
 import (
 	"context"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 	"fmt"
 	"net"
 	"net/http"
@@ -172,7 +173,7 @@ func applyProxyConnectivityHeaders(req *http.Request) {
 	if req == nil {
 		return
 	}
-	for key, value := range BaseHeaders {
+	for key, value := range dsprotocol.BaseHeaders {
 		key = strings.TrimSpace(key)
 		value = strings.TrimSpace(value)
 		if key == "" || value == "" {
diff --git a/internal/deepseek/proxy_test.go b/internal/deepseek/client/proxy_test.go
similarity index 95%
rename from internal/deepseek/proxy_test.go
rename to internal/deepseek/client/proxy_test.go
index 102adee..cbb931d 100644
--- a/internal/deepseek/proxy_test.go
+++ b/internal/deepseek/client/proxy_test.go
@@ -1,7 +1,8 @@
-package deepseek
+package client
 
 import (
 	"context"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 	"net/http"
 	"strings"
 	"testing"
@@ -52,7 +53,7 @@ func TestApplyProxyConnectivityHeadersUsesBaseHeaders(t *testing.T) {
 
 	applyProxyConnectivityHeaders(req)
 
-	for key, want := range BaseHeaders {
+	for key, want := range dsprotocol.BaseHeaders {
 		if got := req.Header.Get(key); got != want {
 			t.Fatalf("expected header %q=%q, got %q", key, want, got)
 		}
diff --git a/internal/deepseek/prompt.go b/internal/deepseek/prompt.go
deleted file mode 100644
index 77fd36f..0000000
--- a/internal/deepseek/prompt.go
+++ /dev/null
@@ -1,11 +0,0 @@
-package deepseek
-
-import "ds2api/internal/prompt"
-
-func MessagesPrepare(messages []map[string]any) string {
-	return prompt.MessagesPrepare(messages)
-}
-
-func MessagesPrepareWithThinking(messages []map[string]any, thinkingEnabled bool) string {
-	return prompt.MessagesPrepareWithThinking(messages, thinkingEnabled)
-}
diff --git a/internal/deepseek/constants.go b/internal/deepseek/protocol/constants.go
similarity index 99%
rename from internal/deepseek/constants.go
rename to internal/deepseek/protocol/constants.go
index 577725f..79e218e 100644
--- a/internal/deepseek/constants.go
+++ b/internal/deepseek/protocol/constants.go
@@ -1,4 +1,4 @@
-package deepseek
+package protocol
 
 import (
 	_ "embed"
diff --git a/internal/deepseek/constants_shared.json b/internal/deepseek/protocol/constants_shared.json
similarity index 100%
rename from internal/deepseek/constants_shared.json
rename to internal/deepseek/protocol/constants_shared.json
diff --git a/internal/deepseek/constants_test.go b/internal/deepseek/protocol/constants_test.go
similarity index 96%
rename from internal/deepseek/constants_test.go
rename to internal/deepseek/protocol/constants_test.go
index 03c6788..b64e579 100644
--- a/internal/deepseek/constants_test.go
+++ b/internal/deepseek/protocol/constants_test.go
@@ -1,4 +1,4 @@
-package deepseek
+package protocol
 
 import "testing"
 
diff --git a/internal/deepseek/protocol/sse.go b/internal/deepseek/protocol/sse.go
new file mode 100644
index 0000000..c11b72b
--- /dev/null
+++ b/internal/deepseek/protocol/sse.go
@@ -0,0 +1,21 @@
+package protocol
+
+import (
+	"bufio"
+	"net/http"
+)
+
+func ScanSSELines(resp *http.Response, onLine func([]byte) bool) error {
+	scanner := bufio.NewScanner(resp.Body)
+	buf := make([]byte, 0, 64*1024)
+	scanner.Buffer(buf, 2*1024*1024)
+	for scanner.Scan() {
+		if !onLine(scanner.Bytes()) {
+			break
+		}
+	}
+	if err := scanner.Err(); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/internal/format/openai/render_stream_events.go b/internal/format/openai/render_stream_events.go
index 6c1121a..33c7c09 100644
--- a/internal/format/openai/render_stream_events.go
+++ b/internal/format/openai/render_stream_events.go
@@ -117,7 +117,7 @@ func BuildResponsesFunctionCallArgumentsDonePayload(responseID, itemID string, o
 	}
 }
 
-func BuildResponsesFailedPayload(responseID, model, message, code string) map[string]any {
+func BuildResponsesFailedPayload(responseID, model string, status int, message, code string) map[string]any {
 	code = strings.TrimSpace(code)
 	if code == "" {
 		code = "api_error"
@@ -129,15 +129,36 @@ func BuildResponsesFailedPayload(responseID, model, message, code string) map[st
 		"object":      "response",
 		"model":       model,
 		"status":      "failed",
+		"status_code": status,
 		"error": map[string]any{
 			"message": message,
-			"type":    "invalid_request_error",
+			"type":    responsesErrorType(status),
 			"code":    code,
 			"param":   nil,
 		},
 	}
 }
 
+func responsesErrorType(status int) string {
+	switch status {
+	case 400, 404, 422:
+		return "invalid_request_error"
+	case 401:
+		return "authentication_error"
+	case 403:
+		return "permission_error"
+	case 429:
+		return "rate_limit_error"
+	case 503:
+		return "service_unavailable_error"
+	default:
+		if status >= 500 {
+			return "api_error"
+		}
+		return "invalid_request_error"
+	}
+}
+
 func BuildResponsesCompletedPayload(response map[string]any) map[string]any {
 	responseID, _ := response["id"].(string)
 	return map[string]any{
diff --git a/internal/httpapi/admin/accounts/deps.go b/internal/httpapi/admin/accounts/deps.go
new file mode 100644
index 0000000..568487c
--- /dev/null
+++ b/internal/httpapi/admin/accounts/deps.go
@@ -0,0 +1,46 @@
+package accounts
+
+import (
+	"net/http"
+
+	"ds2api/internal/chathistory"
+	"ds2api/internal/config"
+	adminshared "ds2api/internal/httpapi/admin/shared"
+)
+
+type Handler struct {
+	Store       adminshared.ConfigStore
+	Pool        adminshared.PoolController
+	DS          adminshared.DeepSeekCaller
+	OpenAI      adminshared.OpenAIChatCaller
+	ChatHistory *chathistory.Store
+}
+
+var writeJSON = adminshared.WriteJSON
+
+func reverseAccounts(a []config.Account) { adminshared.ReverseAccounts(a) }
+func intFromQuery(r *http.Request, key string, d int) int {
+	return adminshared.IntFromQuery(r, key, d)
+}
+func maskSecretPreview(secret string) string {
+	return adminshared.MaskSecretPreview(secret)
+}
+func toAccount(m map[string]any) config.Account {
+	return adminshared.ToAccount(m)
+}
+func fieldStringOptional(m map[string]any, key string) (string, bool) {
+	return adminshared.FieldStringOptional(m, key)
+}
+func accountMatchesIdentifier(acc config.Account, identifier string) bool {
+	return adminshared.AccountMatchesIdentifier(acc, identifier)
+}
+func findProxyByID(c config.Config, proxyID string) (config.Proxy, bool) {
+	return adminshared.FindProxyByID(c, proxyID)
+}
+func findAccountByIdentifier(store adminshared.ConfigStore, identifier string) (config.Account, bool) {
+	return adminshared.FindAccountByIdentifier(store, identifier)
+}
+func newRequestError(detail string) error { return adminshared.NewRequestError(detail) }
+func requestErrorDetail(err error) (string, bool) {
+	return adminshared.RequestErrorDetail(err)
+}
diff --git a/internal/admin/handler_accounts_crud.go b/internal/httpapi/admin/accounts/handler_accounts_crud.go
similarity index 96%
rename from internal/admin/handler_accounts_crud.go
rename to internal/httpapi/admin/accounts/handler_accounts_crud.go
index b9d7146..7375b40 100644
--- a/internal/admin/handler_accounts_crud.go
+++ b/internal/httpapi/admin/accounts/handler_accounts_crud.go
@@ -1,4 +1,4 @@
-package admin
+package accounts
 
 import (
 	"encoding/json"
@@ -58,14 +58,6 @@ func (h *Handler) listAccounts(w http.ResponseWriter, r *http.Request) {
 	for _, acc := range accounts[start:end] {
 		testStatus, _ := h.Store.AccountTestStatus(acc.Identifier())
 		token := strings.TrimSpace(acc.Token)
-		preview := ""
-		if token != "" {
-			if len(token) > 20 {
-				preview = token[:20] + "..."
-			} else {
-				preview = token
-			}
-		}
 		items = append(items, map[string]any{
 			"identifier":    acc.Identifier(),
 			"name":          acc.Name,
@@ -75,7 +67,7 @@ func (h *Handler) listAccounts(w http.ResponseWriter, r *http.Request) {
 			"proxy_id":      acc.ProxyID,
 			"has_password":  acc.Password != "",
 			"has_token":     token != "",
-			"token_preview": preview,
+			"token_preview": maskSecretPreview(token),
 			"test_status":   testStatus,
 		})
 	}
diff --git a/internal/admin/handler_accounts_crud_test.go b/internal/httpapi/admin/accounts/handler_accounts_crud_test.go
similarity index 74%
rename from internal/admin/handler_accounts_crud_test.go
rename to internal/httpapi/admin/accounts/handler_accounts_crud_test.go
index fb4d3cc..be2b0ba 100644
--- a/internal/admin/handler_accounts_crud_test.go
+++ b/internal/httpapi/admin/accounts/handler_accounts_crud_test.go
@@ -1,4 +1,4 @@
-package admin
+package accounts
 
 import (
 	"encoding/json"
@@ -86,3 +86,33 @@ func TestUpdateAccountMetadataPreservesCredentials(t *testing.T) {
 		t.Fatalf("password should be preserved, got %#v", acc)
 	}
 }
+
+func TestListAccountsMasksTokenPreview(t *testing.T) {
+	h := newAdminTestHandler(t, `{
+		"accounts":[{"email":"u@example.com","password":"pwd"}]
+	}`)
+	if err := h.Store.UpdateAccountToken("u@example.com", "abcdefgh"); err != nil {
+		t.Fatalf("seed runtime token: %v", err)
+	}
+
+	req := httptest.NewRequest(http.MethodGet, "/admin/accounts?page=1&page_size=10", nil)
+	rec := httptest.NewRecorder()
+	h.listAccounts(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
+	}
+
+	var payload map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &payload); err != nil {
+		t.Fatalf("decode response failed: %v", err)
+	}
+	items, _ := payload["items"].([]any)
+	if len(items) != 1 {
+		t.Fatalf("expected 1 item, got %d", len(items))
+	}
+	first, _ := items[0].(map[string]any)
+	if got, _ := first["token_preview"].(string); got != "ab****gh" {
+		t.Fatalf("expected masked token preview, got %q", got)
+	}
+}
diff --git a/internal/admin/handler_accounts_identifier_test.go b/internal/httpapi/admin/accounts/handler_accounts_identifier_test.go
similarity index 99%
rename from internal/admin/handler_accounts_identifier_test.go
rename to internal/httpapi/admin/accounts/handler_accounts_identifier_test.go
index 6dd6efe..5edaf27 100644
--- a/internal/admin/handler_accounts_identifier_test.go
+++ b/internal/httpapi/admin/accounts/handler_accounts_identifier_test.go
@@ -1,4 +1,4 @@
-package admin
+package accounts
 
 import (
 	"bytes"
diff --git a/internal/admin/handler_accounts_queue.go b/internal/httpapi/admin/accounts/handler_accounts_queue.go
similarity index 89%
rename from internal/admin/handler_accounts_queue.go
rename to internal/httpapi/admin/accounts/handler_accounts_queue.go
index 108f802..48b68e8 100644
--- a/internal/admin/handler_accounts_queue.go
+++ b/internal/httpapi/admin/accounts/handler_accounts_queue.go
@@ -1,4 +1,4 @@
-package admin
+package accounts
 
 import "net/http"
 
diff --git a/internal/admin/handler_accounts_testing.go b/internal/httpapi/admin/accounts/handler_accounts_testing.go
similarity index 96%
rename from internal/admin/handler_accounts_testing.go
rename to internal/httpapi/admin/accounts/handler_accounts_testing.go
index 85ae924..3b41c60 100644
--- a/internal/admin/handler_accounts_testing.go
+++ b/internal/httpapi/admin/accounts/handler_accounts_testing.go
@@ -1,4 +1,4 @@
-package admin
+package accounts
 
 import (
 	"bytes"
@@ -13,9 +13,9 @@ import (
 
 	authn "ds2api/internal/auth"
 	"ds2api/internal/config"
-	"ds2api/internal/deepseek"
+	"ds2api/internal/prompt"
+	"ds2api/internal/promptcompat"
 	"ds2api/internal/sse"
-	"ds2api/internal/util"
 )
 
 type modelAliasSnapshotReader struct {
@@ -41,7 +41,7 @@ func (h *Handler) testSingleAccount(w http.ResponseWriter, r *http.Request) {
 	}
 	model, _ := req["model"].(string)
 	if model == "" {
-		model = "deepseek-chat"
+		model = "deepseek-v4-flash"
 	}
 	message, _ := req["message"].(string)
 	result := h.testAccount(r.Context(), acc, model, message)
@@ -53,7 +53,7 @@ func (h *Handler) testAllAccounts(w http.ResponseWriter, r *http.Request) {
 	_ = json.NewDecoder(r.Body).Decode(&req)
 	model, _ := req["model"].(string)
 	if model == "" {
-		model = "deepseek-chat"
+		model = "deepseek-v4-flash"
 	}
 	accounts := h.Store.Snapshot().Accounts
 	if len(accounts) == 0 {
@@ -174,9 +174,9 @@ func (h *Handler) testAccount(ctx context.Context, acc config.Account, model, me
 		result["message"] = "获取 PoW 失败: " + err.Error()
 		return result
 	}
-	payload := util.StandardRequest{
+	payload := promptcompat.StandardRequest{
 		ResolvedModel: model,
-		FinalPrompt:   deepseek.MessagesPrepare([]map[string]any{{"role": "user", "content": message}}),
+		FinalPrompt:   prompt.MessagesPrepare([]map[string]any{{"role": "user", "content": message}}),
 		Thinking:      thinking,
 		Search:        search,
 	}.CompletionPayload(sessionID)
@@ -211,7 +211,7 @@ func (h *Handler) testAPI(w http.ResponseWriter, r *http.Request) {
 	message, _ := req["message"].(string)
 	apiKey, _ := req["api_key"].(string)
 	if model == "" {
-		model = "deepseek-chat"
+		model = "deepseek-v4-flash"
 	}
 	if message == "" {
 		message = "你好"
diff --git a/internal/admin/handler_accounts_testing_test.go b/internal/httpapi/admin/accounts/handler_accounts_testing_test.go
similarity index 92%
rename from internal/admin/handler_accounts_testing_test.go
rename to internal/httpapi/admin/accounts/handler_accounts_testing_test.go
index bd695bc..d8f6ece 100644
--- a/internal/admin/handler_accounts_testing_test.go
+++ b/internal/httpapi/admin/accounts/handler_accounts_testing_test.go
@@ -1,4 +1,4 @@
-package admin
+package accounts
 
 import (
 	"bytes"
@@ -13,7 +13,7 @@ import (
 
 	"ds2api/internal/auth"
 	"ds2api/internal/config"
-	"ds2api/internal/deepseek"
+	dsclient "ds2api/internal/deepseek/client"
 )
 
 type testingDSMock struct {
@@ -58,8 +58,8 @@ func (m *testingDSMock) DeleteAllSessionsForToken(_ context.Context, _ string) e
 	return nil
 }
 
-func (m *testingDSMock) GetSessionCountForToken(_ context.Context, _ string) (*deepseek.SessionStats, error) {
-	return &deepseek.SessionStats{Success: true}, nil
+func (m *testingDSMock) GetSessionCountForToken(_ context.Context, _ string) (*dsclient.SessionStats, error) {
+	return &dsclient.SessionStats{Success: true}, nil
 }
 
 func TestTestAccount_BatchModeOnlyCreatesSession(t *testing.T) {
@@ -72,7 +72,7 @@ func TestTestAccount_BatchModeOnlyCreatesSession(t *testing.T) {
 		t.Fatal("expected test account")
 	}
 
-	result := h.testAccount(context.Background(), acc, "deepseek-chat", "")
+	result := h.testAccount(context.Background(), acc, "deepseek-v4-flash", "")
 
 	if ok, _ := result["success"].(bool); !ok {
 		t.Fatalf("expected success=true, got %#v", result)
@@ -163,8 +163,8 @@ func (m *completionPayloadDSMock) DeleteAllSessionsForToken(_ context.Context, _
 	return nil
 }
 
-func (m *completionPayloadDSMock) GetSessionCountForToken(_ context.Context, _ string) (*deepseek.SessionStats, error) {
-	return &deepseek.SessionStats{Success: true}, nil
+func (m *completionPayloadDSMock) GetSessionCountForToken(_ context.Context, _ string) (*dsclient.SessionStats, error) {
+	return &dsclient.SessionStats{Success: true}, nil
 }
 
 func TestTestAccount_MessageModeUsesExpertModelTypeForExpertModel(t *testing.T) {
@@ -177,7 +177,7 @@ func TestTestAccount_MessageModeUsesExpertModelTypeForExpertModel(t *testing.T)
 		t.Fatal("expected test account")
 	}
 
-	result := h.testAccount(context.Background(), acc, "deepseek-expert-chat", "hello")
+	result := h.testAccount(context.Background(), acc, "deepseek-v4-pro", "hello")
 
 	if ok, _ := result["success"].(bool); !ok {
 		t.Fatalf("expected success=true, got %#v", result)
@@ -200,7 +200,7 @@ func TestTestAccount_MessageModeUsesVisionModelTypeForVisionModel(t *testing.T)
 		t.Fatal("expected test account")
 	}
 
-	result := h.testAccount(context.Background(), acc, "deepseek-vision-chat", "hello")
+	result := h.testAccount(context.Background(), acc, "deepseek-v4-vision", "hello")
 
 	if ok, _ := result["success"].(bool); !ok {
 		t.Fatalf("expected success=true, got %#v", result)
diff --git a/internal/httpapi/admin/accounts/routes.go b/internal/httpapi/admin/accounts/routes.go
new file mode 100644
index 0000000..13491c1
--- /dev/null
+++ b/internal/httpapi/admin/accounts/routes.go
@@ -0,0 +1,38 @@
+package accounts
+
+import (
+	"context"
+	"net/http"
+
+	"github.com/go-chi/chi/v5"
+
+	"ds2api/internal/config"
+)
+
+func RegisterRoutes(r chi.Router, h *Handler) {
+	r.Get("/accounts", h.listAccounts)
+	r.Post("/accounts", h.addAccount)
+	r.Put("/accounts/{identifier}", h.updateAccount)
+	r.Delete("/accounts/{identifier}", h.deleteAccount)
+	r.Get("/queue/status", h.queueStatus)
+	r.Post("/accounts/test", h.testSingleAccount)
+	r.Post("/accounts/test-all", h.testAllAccounts)
+	r.Post("/accounts/sessions/delete-all", h.deleteAllSessions)
+	r.Post("/test", h.testAPI)
+}
+
+func RunAccountTestsConcurrently(accounts []config.Account, maxConcurrency int, testFn func(int, config.Account) map[string]any) []map[string]any {
+	return runAccountTestsConcurrently(accounts, maxConcurrency, testFn)
+}
+
+func (h *Handler) TestAccount(ctx context.Context, acc config.Account, model, message string) map[string]any {
+	return h.testAccount(ctx, acc, model, message)
+}
+
+func (h *Handler) ListAccounts(w http.ResponseWriter, r *http.Request)  { h.listAccounts(w, r) }
+func (h *Handler) AddAccount(w http.ResponseWriter, r *http.Request)    { h.addAccount(w, r) }
+func (h *Handler) UpdateAccount(w http.ResponseWriter, r *http.Request) { h.updateAccount(w, r) }
+func (h *Handler) DeleteAccount(w http.ResponseWriter, r *http.Request) { h.deleteAccount(w, r) }
+func (h *Handler) DeleteAllSessions(w http.ResponseWriter, r *http.Request) {
+	h.deleteAllSessions(w, r)
+}
diff --git a/internal/httpapi/admin/accounts/test_http_helpers_test.go b/internal/httpapi/admin/accounts/test_http_helpers_test.go
new file mode 100644
index 0000000..4a4f736
--- /dev/null
+++ b/internal/httpapi/admin/accounts/test_http_helpers_test.go
@@ -0,0 +1,35 @@
+package accounts
+
+import (
+	"bytes"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/go-chi/chi/v5"
+
+	"ds2api/internal/account"
+	"ds2api/internal/config"
+	adminshared "ds2api/internal/httpapi/admin/shared"
+)
+
+func newHTTPAdminHarness(t *testing.T, rawConfig string, ds adminshared.DeepSeekCaller) http.Handler {
+	t.Helper()
+	t.Setenv("DS2API_CONFIG_JSON", rawConfig)
+	store := config.LoadStore()
+	h := &Handler{
+		Store: store,
+		Pool:  account.NewPool(store),
+		DS:    ds,
+	}
+	r := chi.NewRouter()
+	RegisterRoutes(r, h)
+	return r
+}
+
+func adminReq(method, path string, body []byte) *http.Request {
+	req := httptest.NewRequest(method, path, bytes.NewReader(body))
+	req.Header.Set("Authorization", "Bearer admin")
+	req.Header.Set("Content-Type", "application/json")
+	return req
+}
diff --git a/internal/httpapi/admin/auth/deps.go b/internal/httpapi/admin/auth/deps.go
new file mode 100644
index 0000000..72063f6
--- /dev/null
+++ b/internal/httpapi/admin/auth/deps.go
@@ -0,0 +1,19 @@
+package auth
+
+import (
+	"ds2api/internal/chathistory"
+	adminshared "ds2api/internal/httpapi/admin/shared"
+)
+
+type Handler struct {
+	Store       adminshared.ConfigStore
+	Pool        adminshared.PoolController
+	DS          adminshared.DeepSeekCaller
+	OpenAI      adminshared.OpenAIChatCaller
+	ChatHistory *chathistory.Store
+}
+
+var writeJSON = adminshared.WriteJSON
+var intFrom = adminshared.IntFrom
+
+func nilIfEmpty(s string) any { return adminshared.NilIfEmpty(s) }
diff --git a/internal/admin/handler_auth.go b/internal/httpapi/admin/auth/handler_auth.go
similarity index 99%
rename from internal/admin/handler_auth.go
rename to internal/httpapi/admin/auth/handler_auth.go
index 9b96b2f..18ef6fa 100644
--- a/internal/admin/handler_auth.go
+++ b/internal/httpapi/admin/auth/handler_auth.go
@@ -1,4 +1,4 @@
-package admin
+package auth
 
 import (
 	"encoding/json"
diff --git a/internal/httpapi/admin/auth/routes.go b/internal/httpapi/admin/auth/routes.go
new file mode 100644
index 0000000..91ec102
--- /dev/null
+++ b/internal/httpapi/admin/auth/routes.go
@@ -0,0 +1,20 @@
+package auth
+
+import (
+	"net/http"
+
+	"github.com/go-chi/chi/v5"
+)
+
+func (h *Handler) RequireAdmin(next http.Handler) http.Handler {
+	return h.requireAdmin(next)
+}
+
+func RegisterPublicRoutes(r chi.Router, h *Handler) {
+	r.Post("/login", h.login)
+	r.Get("/verify", h.verify)
+}
+
+func RegisterProtectedRoutes(r chi.Router, h *Handler) {
+	r.Get("/vercel/config", h.getVercelConfig)
+}
diff --git a/internal/httpapi/admin/configmgmt/deps.go b/internal/httpapi/admin/configmgmt/deps.go
new file mode 100644
index 0000000..8b9a1cc
--- /dev/null
+++ b/internal/httpapi/admin/configmgmt/deps.go
@@ -0,0 +1,50 @@
+package configmgmt
+
+import (
+	"ds2api/internal/chathistory"
+	"ds2api/internal/config"
+	adminshared "ds2api/internal/httpapi/admin/shared"
+)
+
+type Handler struct {
+	Store       adminshared.ConfigStore
+	Pool        adminshared.PoolController
+	DS          adminshared.DeepSeekCaller
+	OpenAI      adminshared.OpenAIChatCaller
+	ChatHistory *chathistory.Store
+}
+
+var writeJSON = adminshared.WriteJSON
+
+func maskSecretPreview(secret string) string {
+	return adminshared.MaskSecretPreview(secret)
+}
+func toStringSlice(v any) ([]string, bool) { return adminshared.ToStringSlice(v) }
+func toAccount(m map[string]any) config.Account {
+	return adminshared.ToAccount(m)
+}
+func toAPIKeys(v any) ([]config.APIKey, bool) { return adminshared.ToAPIKeys(v) }
+func mergeAPIKeysPreferStructured(existing, incoming []config.APIKey) ([]config.APIKey, int) {
+	return adminshared.MergeAPIKeysPreferStructured(existing, incoming)
+}
+func fieldString(m map[string]any, key string) string {
+	return adminshared.FieldString(m, key)
+}
+func fieldStringOptional(m map[string]any, key string) (string, bool) {
+	return adminshared.FieldStringOptional(m, key)
+}
+func normalizeAccountForStorage(acc config.Account) config.Account {
+	return adminshared.NormalizeAccountForStorage(acc)
+}
+func accountDedupeKey(acc config.Account) string { return adminshared.AccountDedupeKey(acc) }
+func normalizeAndDedupeAccounts(accounts []config.Account) []config.Account {
+	return adminshared.NormalizeAndDedupeAccounts(accounts)
+}
+func newRequestError(detail string) error { return adminshared.NewRequestError(detail) }
+func requestErrorDetail(err error) (string, bool) {
+	return adminshared.RequestErrorDetail(err)
+}
+func normalizeSettingsConfig(c *config.Config) { adminshared.NormalizeSettingsConfig(c) }
+func validateSettingsConfig(c config.Config) error {
+	return adminshared.ValidateSettingsConfig(c)
+}
diff --git a/internal/admin/handler_config_import.go b/internal/httpapi/admin/configmgmt/handler_config_import.go
similarity index 85%
rename from internal/admin/handler_config_import.go
rename to internal/httpapi/admin/configmgmt/handler_config_import.go
index 7decbde..cd1d860 100644
--- a/internal/admin/handler_config_import.go
+++ b/internal/httpapi/admin/configmgmt/handler_config_import.go
@@ -1,9 +1,7 @@
-package admin
+package configmgmt
 
 import (
-	"crypto/md5"
 	"encoding/json"
-	"fmt"
 	"net/http"
 	"strings"
 
@@ -82,23 +80,6 @@ func (h *Handler) configImport(w http.ResponseWriter, r *http.Request) {
 				importedAccounts++
 			}
 
-			if len(incoming.ClaudeMapping) > 0 {
-				if next.ClaudeMapping == nil {
-					next.ClaudeMapping = map[string]string{}
-				}
-				for k, v := range incoming.ClaudeMapping {
-					next.ClaudeMapping[k] = v
-				}
-			}
-			if len(incoming.ClaudeModelMap) > 0 {
-				if next.ClaudeModelMap == nil {
-					next.ClaudeModelMap = map[string]string{}
-				}
-				for k, v := range incoming.ClaudeModelMap {
-					next.ClaudeModelMap[k] = v
-				}
-			}
-
 			if len(incoming.ModelAliases) > 0 {
 				if next.ModelAliases == nil {
 					next.ModelAliases = map[string]string{}
@@ -162,13 +143,3 @@ func (h *Handler) configImport(w http.ResponseWriter, r *http.Request) {
 		"message":           "config imported",
 	})
 }
-
-func (h *Handler) computeSyncHash() string {
-	snap := h.Store.Snapshot().Clone()
-	snap.ClearAccountTokens()
-	snap.VercelSyncHash = ""
-	snap.VercelSyncTime = 0
-	b, _ := json.Marshal(snap)
-	sum := md5.Sum(b)
-	return fmt.Sprintf("%x", sum)
-}
diff --git a/internal/admin/handler_config_read.go b/internal/httpapi/admin/configmgmt/handler_config_read.go
similarity index 86%
rename from internal/admin/handler_config_read.go
rename to internal/httpapi/admin/configmgmt/handler_config_read.go
index ceeb523..e039bd1 100644
--- a/internal/admin/handler_config_read.go
+++ b/internal/httpapi/admin/configmgmt/handler_config_read.go
@@ -1,4 +1,4 @@
-package admin
+package configmgmt
 
 import (
 	"net/http"
@@ -18,24 +18,11 @@ func (h *Handler) getConfig(w http.ResponseWriter, _ *http.Request) {
 		"env_source_present":    h.Store.HasEnvConfigSource(),
 		"env_writeback_enabled": h.Store.IsEnvWritebackEnabled(),
 		"config_path":           h.Store.ConfigPath(),
-		"claude_mapping": func() map[string]string {
-			if len(snap.ClaudeMapping) > 0 {
-				return snap.ClaudeMapping
-			}
-			return snap.ClaudeModelMap
-		}(),
+		"model_aliases":         snap.ModelAliases,
 	}
 	accounts := make([]map[string]any, 0, len(snap.Accounts))
 	for _, acc := range snap.Accounts {
 		token := strings.TrimSpace(acc.Token)
-		preview := ""
-		if token != "" {
-			if len(token) > 20 {
-				preview = token[:20] + "..."
-			} else {
-				preview = token
-			}
-		}
 		accounts = append(accounts, map[string]any{
 			"identifier":    acc.Identifier(),
 			"name":          acc.Name,
@@ -45,7 +32,7 @@ func (h *Handler) getConfig(w http.ResponseWriter, _ *http.Request) {
 			"proxy_id":      acc.ProxyID,
 			"has_password":  strings.TrimSpace(acc.Password) != "",
 			"has_token":     token != "",
-			"token_preview": preview,
+			"token_preview": maskSecretPreview(token),
 		})
 	}
 	safe["accounts"] = accounts
diff --git a/internal/admin/handler_config_write.go b/internal/httpapi/admin/configmgmt/handler_config_write.go
similarity index 96%
rename from internal/admin/handler_config_write.go
rename to internal/httpapi/admin/configmgmt/handler_config_write.go
index 1929f26..8b1aa88 100644
--- a/internal/admin/handler_config_write.go
+++ b/internal/httpapi/admin/configmgmt/handler_config_write.go
@@ -1,4 +1,4 @@
-package admin
+package configmgmt
 
 import (
 	"encoding/json"
@@ -58,12 +58,12 @@ func (h *Handler) updateConfig(w http.ResponseWriter, r *http.Request) {
 			}
 			c.Accounts = accounts
 		}
-		if m, ok := req["claude_mapping"].(map[string]any); ok {
-			newMap := map[string]string{}
+		if m, ok := req["model_aliases"].(map[string]any); ok {
+			aliases := make(map[string]string, len(m))
 			for k, v := range m {
-				newMap[k] = fmt.Sprintf("%v", v)
+				aliases[k] = fmt.Sprintf("%v", v)
 			}
-			c.ClaudeMapping = newMap
+			c.ModelAliases = aliases
 		}
 		return nil
 	})
diff --git a/internal/admin/handler_keys_test.go b/internal/httpapi/admin/configmgmt/handler_keys_test.go
similarity index 99%
rename from internal/admin/handler_keys_test.go
rename to internal/httpapi/admin/configmgmt/handler_keys_test.go
index 82ff5e2..9c2c80c 100644
--- a/internal/admin/handler_keys_test.go
+++ b/internal/httpapi/admin/configmgmt/handler_keys_test.go
@@ -1,4 +1,4 @@
-package admin
+package configmgmt
 
 import (
 	"bytes"
diff --git a/internal/httpapi/admin/configmgmt/routes.go b/internal/httpapi/admin/configmgmt/routes.go
new file mode 100644
index 0000000..a3ece47
--- /dev/null
+++ b/internal/httpapi/admin/configmgmt/routes.go
@@ -0,0 +1,27 @@
+package configmgmt
+
+import (
+	"net/http"
+
+	"github.com/go-chi/chi/v5"
+)
+
+func RegisterRoutes(r chi.Router, h *Handler) {
+	r.Get("/config", h.getConfig)
+	r.Post("/config", h.updateConfig)
+	r.Post("/config/import", h.configImport)
+	r.Get("/config/export", h.configExport)
+	r.Get("/export", h.exportConfig)
+	r.Post("/keys", h.addKey)
+	r.Put("/keys/{key}", h.updateKey)
+	r.Delete("/keys/{key}", h.deleteKey)
+	r.Post("/import", h.batchImport)
+}
+
+func (h *Handler) GetConfig(w http.ResponseWriter, r *http.Request)    { h.getConfig(w, r) }
+func (h *Handler) UpdateConfig(w http.ResponseWriter, r *http.Request) { h.updateConfig(w, r) }
+func (h *Handler) ConfigImport(w http.ResponseWriter, r *http.Request) { h.configImport(w, r) }
+func (h *Handler) BatchImport(w http.ResponseWriter, r *http.Request)  { h.batchImport(w, r) }
+func (h *Handler) AddKey(w http.ResponseWriter, r *http.Request)       { h.addKey(w, r) }
+func (h *Handler) UpdateKey(w http.ResponseWriter, r *http.Request)    { h.updateKey(w, r) }
+func (h *Handler) DeleteKey(w http.ResponseWriter, r *http.Request)    { h.deleteKey(w, r) }
diff --git a/internal/httpapi/admin/configmgmt/test_helpers_test.go b/internal/httpapi/admin/configmgmt/test_helpers_test.go
new file mode 100644
index 0000000..1d2f96c
--- /dev/null
+++ b/internal/httpapi/admin/configmgmt/test_helpers_test.go
@@ -0,0 +1,18 @@
+package configmgmt
+
+import (
+	"testing"
+
+	"ds2api/internal/account"
+	"ds2api/internal/config"
+)
+
+func newAdminTestHandler(t *testing.T, raw string) *Handler {
+	t.Helper()
+	t.Setenv("DS2API_CONFIG_JSON", raw)
+	store := config.LoadStore()
+	return &Handler{
+		Store: store,
+		Pool:  account.NewPool(store),
+	}
+}
diff --git a/internal/httpapi/admin/devcapture/deps.go b/internal/httpapi/admin/devcapture/deps.go
new file mode 100644
index 0000000..5eaa7cd
--- /dev/null
+++ b/internal/httpapi/admin/devcapture/deps.go
@@ -0,0 +1,16 @@
+package devcapture
+
+import (
+	"ds2api/internal/chathistory"
+	adminshared "ds2api/internal/httpapi/admin/shared"
+)
+
+type Handler struct {
+	Store       adminshared.ConfigStore
+	Pool        adminshared.PoolController
+	DS          adminshared.DeepSeekCaller
+	OpenAI      adminshared.OpenAIChatCaller
+	ChatHistory *chathistory.Store
+}
+
+var writeJSON = adminshared.WriteJSON
diff --git a/internal/admin/handler_dev_capture.go b/internal/httpapi/admin/devcapture/handler_dev_capture.go
similarity index 96%
rename from internal/admin/handler_dev_capture.go
rename to internal/httpapi/admin/devcapture/handler_dev_capture.go
index 9b3615c..b1f4ced 100644
--- a/internal/admin/handler_dev_capture.go
+++ b/internal/httpapi/admin/devcapture/handler_dev_capture.go
@@ -1,4 +1,4 @@
-package admin
+package devcapture
 
 import (
 	"net/http"
diff --git a/internal/admin/handler_dev_capture_test.go b/internal/httpapi/admin/devcapture/handler_dev_capture_test.go
similarity index 98%
rename from internal/admin/handler_dev_capture_test.go
rename to internal/httpapi/admin/devcapture/handler_dev_capture_test.go
index 90ced8b..a588cca 100644
--- a/internal/admin/handler_dev_capture_test.go
+++ b/internal/httpapi/admin/devcapture/handler_dev_capture_test.go
@@ -1,4 +1,4 @@
-package admin
+package devcapture
 
 import (
 	"encoding/json"
diff --git a/internal/httpapi/admin/devcapture/routes.go b/internal/httpapi/admin/devcapture/routes.go
new file mode 100644
index 0000000..34e826a
--- /dev/null
+++ b/internal/httpapi/admin/devcapture/routes.go
@@ -0,0 +1,8 @@
+package devcapture
+
+import "github.com/go-chi/chi/v5"
+
+func RegisterRoutes(r chi.Router, h *Handler) {
+	r.Get("/dev/captures", h.getDevCaptures)
+	r.Delete("/dev/captures", h.clearDevCaptures)
+}
diff --git a/internal/httpapi/admin/handler.go b/internal/httpapi/admin/handler.go
new file mode 100644
index 0000000..a524593
--- /dev/null
+++ b/internal/httpapi/admin/handler.go
@@ -0,0 +1,70 @@
+package admin
+
+import (
+	"github.com/go-chi/chi/v5"
+
+	"ds2api/internal/chathistory"
+	adminaccounts "ds2api/internal/httpapi/admin/accounts"
+	adminauth "ds2api/internal/httpapi/admin/auth"
+	adminconfig "ds2api/internal/httpapi/admin/configmgmt"
+	admindevcapture "ds2api/internal/httpapi/admin/devcapture"
+	adminhistory "ds2api/internal/httpapi/admin/history"
+	adminproxies "ds2api/internal/httpapi/admin/proxies"
+	adminrawsamples "ds2api/internal/httpapi/admin/rawsamples"
+	adminsettings "ds2api/internal/httpapi/admin/settings"
+	adminshared "ds2api/internal/httpapi/admin/shared"
+	adminvercel "ds2api/internal/httpapi/admin/vercel"
+	adminversion "ds2api/internal/httpapi/admin/version"
+)
+
+type Handler struct {
+	Store       adminshared.ConfigStore
+	Pool        adminshared.PoolController
+	DS          adminshared.DeepSeekCaller
+	OpenAI      adminshared.OpenAIChatCaller
+	ChatHistory *chathistory.Store
+}
+
+func RegisterRoutes(r chi.Router, h *Handler) {
+	deps := adminsharedDeps(h)
+	authHandler := &adminauth.Handler{Store: deps.Store, Pool: deps.Pool, DS: deps.DS, OpenAI: deps.OpenAI, ChatHistory: deps.ChatHistory}
+	accountsHandler := &adminaccounts.Handler{Store: deps.Store, Pool: deps.Pool, DS: deps.DS, OpenAI: deps.OpenAI, ChatHistory: deps.ChatHistory}
+	configHandler := &adminconfig.Handler{Store: deps.Store, Pool: deps.Pool, DS: deps.DS, OpenAI: deps.OpenAI, ChatHistory: deps.ChatHistory}
+	settingsHandler := &adminsettings.Handler{Store: deps.Store, Pool: deps.Pool, DS: deps.DS, OpenAI: deps.OpenAI, ChatHistory: deps.ChatHistory}
+	proxiesHandler := &adminproxies.Handler{Store: deps.Store, Pool: deps.Pool, DS: deps.DS, OpenAI: deps.OpenAI, ChatHistory: deps.ChatHistory}
+	rawSamplesHandler := &adminrawsamples.Handler{Store: deps.Store, Pool: deps.Pool, DS: deps.DS, OpenAI: deps.OpenAI, ChatHistory: deps.ChatHistory}
+	vercelHandler := &adminvercel.Handler{Store: deps.Store, Pool: deps.Pool, DS: deps.DS, OpenAI: deps.OpenAI, ChatHistory: deps.ChatHistory}
+	historyHandler := &adminhistory.Handler{Store: deps.Store, Pool: deps.Pool, DS: deps.DS, OpenAI: deps.OpenAI, ChatHistory: deps.ChatHistory}
+	devCaptureHandler := &admindevcapture.Handler{Store: deps.Store, Pool: deps.Pool, DS: deps.DS, OpenAI: deps.OpenAI, ChatHistory: deps.ChatHistory}
+	versionHandler := &adminversion.Handler{Store: deps.Store, Pool: deps.Pool, DS: deps.DS, OpenAI: deps.OpenAI, ChatHistory: deps.ChatHistory}
+
+	adminauth.RegisterPublicRoutes(r, authHandler)
+	r.Group(func(pr chi.Router) {
+		pr.Use(authHandler.RequireAdmin)
+		adminauth.RegisterProtectedRoutes(pr, authHandler)
+		adminconfig.RegisterRoutes(pr, configHandler)
+		adminsettings.RegisterRoutes(pr, settingsHandler)
+		adminproxies.RegisterRoutes(pr, proxiesHandler)
+		adminaccounts.RegisterRoutes(pr, accountsHandler)
+		adminrawsamples.RegisterRoutes(pr, rawSamplesHandler)
+		adminvercel.RegisterRoutes(pr, vercelHandler)
+		admindevcapture.RegisterRoutes(pr, devCaptureHandler)
+		adminhistory.RegisterRoutes(pr, historyHandler)
+		adminversion.RegisterRoutes(pr, versionHandler)
+	})
+}
+
+func adminsharedDeps(h *Handler) adminsharedDepsValue {
+	if h == nil {
+		return adminsharedDepsValue{}
+	}
+	return adminsharedDepsValue{Store: h.Store, Pool: h.Pool, DS: h.DS, OpenAI: h.OpenAI, ChatHistory: h.ChatHistory}
+}
+
+type adminsharedDepsValue struct {
+	Store       adminshared.ConfigStore
+	Pool        adminshared.PoolController
+	DS          adminshared.DeepSeekCaller
+	OpenAI      adminshared.OpenAIChatCaller
+	ChatHistory *chathistory.Store
+}
diff --git a/internal/admin/handler_settings_test.go b/internal/httpapi/admin/handler_settings_test.go
similarity index 95%
rename from internal/admin/handler_settings_test.go
rename to internal/httpapi/admin/handler_settings_test.go
index 4300cfe..aefc1bd 100644
--- a/internal/admin/handler_settings_test.go
+++ b/internal/httpapi/admin/handler_settings_test.go
@@ -189,8 +189,8 @@ func TestUpdateSettingsHistorySplit(t *testing.T) {
 		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
 	}
 	snap := h.Store.Snapshot()
-	if snap.HistorySplit.Enabled == nil || *snap.HistorySplit.Enabled {
-		t.Fatalf("expected history_split.enabled=false, got %#v", snap.HistorySplit.Enabled)
+	if snap.HistorySplit.Enabled == nil || !*snap.HistorySplit.Enabled {
+		t.Fatalf("expected history_split.enabled to be forced true, got %#v", snap.HistorySplit.Enabled)
 	}
 	if snap.HistorySplit.TriggerAfterTurns == nil || *snap.HistorySplit.TriggerAfterTurns != 3 {
 		t.Fatalf("expected history_split.trigger_after_turns=3, got %#v", snap.HistorySplit.TriggerAfterTurns)
@@ -346,6 +346,34 @@ func TestUpdateConfigLegacyKeysPreserveStructuredMetadata(t *testing.T) {
 	}
 }
 
+func TestUpdateConfigReplacesModelAliases(t *testing.T) {
+	h := newAdminTestHandler(t, `{
+		"keys":["k1"],
+		"model_aliases":{"claude-sonnet-4-6":"deepseek-v4-flash"}
+	}`)
+
+	payload := map[string]any{
+		"model_aliases": map[string]any{
+			"gpt-5.5": "deepseek-v4-pro",
+		},
+	}
+	b, _ := json.Marshal(payload)
+	req := httptest.NewRequest(http.MethodPost, "/admin/config", bytes.NewReader(b))
+	rec := httptest.NewRecorder()
+	h.updateConfig(rec, req)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
+	}
+
+	snap := h.Store.Snapshot()
+	if len(snap.ModelAliases) != 1 {
+		t.Fatalf("expected aliases to be replaced, got %#v", snap.ModelAliases)
+	}
+	if snap.ModelAliases["gpt-5.5"] != "deepseek-v4-pro" {
+		t.Fatalf("expected updated alias, got %#v", snap.ModelAliases)
+	}
+}
+
 func TestUpdateSettingsPasswordInvalidatesOldJWT(t *testing.T) {
 	hash := authn.HashAdminPassword("old-password")
 	h := newAdminTestHandler(t, `{"admin":{"password_hash":"`+hash+`"}}`)
diff --git a/internal/admin/handler_test.go b/internal/httpapi/admin/handler_test.go
similarity index 64%
rename from internal/admin/handler_test.go
rename to internal/httpapi/admin/handler_test.go
index a31e344..aa2db24 100644
--- a/internal/admin/handler_test.go
+++ b/internal/httpapi/admin/handler_test.go
@@ -1,6 +1,9 @@
 package admin
 
 import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
 	"sync/atomic"
 	"testing"
 	"time"
@@ -33,6 +36,53 @@ func TestFieldStringNilToEmpty(t *testing.T) {
 	}
 }
 
+func TestMaskSecretPreviewKeepsOnlyFirstAndLastTwoChars(t *testing.T) {
+	cases := map[string]string{
+		"":         "",
+		"a":        "*",
+		"ab":       "**",
+		"abcd":     "****",
+		"abcdef":   "ab****ef",
+		"abc12345": "ab****45",
+	}
+
+	for input, want := range cases {
+		if got := maskSecretPreview(input); got != want {
+			t.Fatalf("maskSecretPreview(%q)=%q want %q", input, got, want)
+		}
+	}
+}
+
+func TestGetConfigMasksAccountTokenPreview(t *testing.T) {
+	h := newAdminTestHandler(t, `{
+		"accounts":[{"email":"u@example.com","password":"pwd"}]
+	}`)
+	if err := h.Store.UpdateAccountToken("u@example.com", "abcdefgh"); err != nil {
+		t.Fatalf("seed runtime token: %v", err)
+	}
+
+	req := httptest.NewRequest(http.MethodGet, "/admin/config", nil)
+	rec := httptest.NewRecorder()
+	h.getConfig(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
+	}
+
+	var payload map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &payload); err != nil {
+		t.Fatalf("decode response failed: %v", err)
+	}
+	accounts, _ := payload["accounts"].([]any)
+	if len(accounts) != 1 {
+		t.Fatalf("expected 1 account, got %d", len(accounts))
+	}
+	first, _ := accounts[0].(map[string]any)
+	if got, _ := first["token_preview"].(string); got != "ab****gh" {
+		t.Fatalf("expected masked token preview, got %q", got)
+	}
+}
+
 func TestRunAccountTestsConcurrentlyKeepsInputOrder(t *testing.T) {
 	accounts := []config.Account{
 		{Email: "a@example.com"},
diff --git a/internal/httpapi/admin/history/deps.go b/internal/httpapi/admin/history/deps.go
new file mode 100644
index 0000000..7552596
--- /dev/null
+++ b/internal/httpapi/admin/history/deps.go
@@ -0,0 +1,16 @@
+package history
+
+import (
+	"ds2api/internal/chathistory"
+	adminshared "ds2api/internal/httpapi/admin/shared"
+)
+
+type Handler struct {
+	Store       adminshared.ConfigStore
+	Pool        adminshared.PoolController
+	DS          adminshared.DeepSeekCaller
+	OpenAI      adminshared.OpenAIChatCaller
+	ChatHistory *chathistory.Store
+}
+
+var writeJSON = adminshared.WriteJSON
diff --git a/internal/admin/handler_chat_history.go b/internal/httpapi/admin/history/handler_chat_history.go
similarity index 99%
rename from internal/admin/handler_chat_history.go
rename to internal/httpapi/admin/history/handler_chat_history.go
index 2eb61e6..e05a9e3 100644
--- a/internal/admin/handler_chat_history.go
+++ b/internal/httpapi/admin/history/handler_chat_history.go
@@ -1,4 +1,4 @@
-package admin
+package history
 
 import (
 	"encoding/json"
diff --git a/internal/admin/handler_chat_history_test.go b/internal/httpapi/admin/history/handler_chat_history_test.go
similarity index 99%
rename from internal/admin/handler_chat_history_test.go
rename to internal/httpapi/admin/history/handler_chat_history_test.go
index ca61110..1397bae 100644
--- a/internal/admin/handler_chat_history_test.go
+++ b/internal/httpapi/admin/history/handler_chat_history_test.go
@@ -1,4 +1,4 @@
-package admin
+package history
 
 import (
 	"bytes"
@@ -38,7 +38,7 @@ func TestGetChatHistoryAndUpdateSettings(t *testing.T) {
 	entry, err := historyStore.Start(chathistory.StartParams{
 		CallerID:  "caller:test",
 		AccountID: "user@example.com",
-		Model:     "deepseek-chat",
+		Model:     "deepseek-v4-flash",
 		UserInput: "hello",
 	})
 	if err != nil {
diff --git a/internal/httpapi/admin/history/routes.go b/internal/httpapi/admin/history/routes.go
new file mode 100644
index 0000000..c6f1f43
--- /dev/null
+++ b/internal/httpapi/admin/history/routes.go
@@ -0,0 +1,11 @@
+package history
+
+import "github.com/go-chi/chi/v5"
+
+func RegisterRoutes(r chi.Router, h *Handler) {
+	r.Get("/chat-history", h.getChatHistory)
+	r.Get("/chat-history/{id}", h.getChatHistoryItem)
+	r.Delete("/chat-history", h.clearChatHistory)
+	r.Delete("/chat-history/{id}", h.deleteChatHistoryItem)
+	r.Put("/chat-history/settings", h.updateChatHistorySettings)
+}
diff --git a/internal/httpapi/admin/proxies/deps.go b/internal/httpapi/admin/proxies/deps.go
new file mode 100644
index 0000000..f02a639
--- /dev/null
+++ b/internal/httpapi/admin/proxies/deps.go
@@ -0,0 +1,32 @@
+package proxies
+
+import (
+	"ds2api/internal/chathistory"
+	"ds2api/internal/config"
+	adminshared "ds2api/internal/httpapi/admin/shared"
+)
+
+type Handler struct {
+	Store       adminshared.ConfigStore
+	Pool        adminshared.PoolController
+	DS          adminshared.DeepSeekCaller
+	OpenAI      adminshared.OpenAIChatCaller
+	ChatHistory *chathistory.Store
+}
+
+var writeJSON = adminshared.WriteJSON
+
+func fieldString(m map[string]any, key string) string {
+	return adminshared.FieldString(m, key)
+}
+func accountMatchesIdentifier(acc config.Account, identifier string) bool {
+	return adminshared.AccountMatchesIdentifier(acc, identifier)
+}
+func toProxy(m map[string]any) config.Proxy { return adminshared.ToProxy(m) }
+func findProxyByID(c config.Config, proxyID string) (config.Proxy, bool) {
+	return adminshared.FindProxyByID(c, proxyID)
+}
+func newRequestError(detail string) error { return adminshared.NewRequestError(detail) }
+func requestErrorDetail(err error) (string, bool) {
+	return adminshared.RequestErrorDetail(err)
+}
diff --git a/internal/admin/handler_proxies.go b/internal/httpapi/admin/proxies/handler_proxies.go
similarity index 98%
rename from internal/admin/handler_proxies.go
rename to internal/httpapi/admin/proxies/handler_proxies.go
index eeb653c..b87ce8f 100644
--- a/internal/admin/handler_proxies.go
+++ b/internal/httpapi/admin/proxies/handler_proxies.go
@@ -1,4 +1,4 @@
-package admin
+package proxies
 
 import (
 	"context"
@@ -10,11 +10,11 @@ import (
 	"github.com/go-chi/chi/v5"
 
 	"ds2api/internal/config"
-	"ds2api/internal/deepseek"
+	dsclient "ds2api/internal/deepseek/client"
 )
 
 var proxyConnectivityTester = func(ctx context.Context, proxy config.Proxy) map[string]any {
-	return deepseek.TestProxyConnectivity(ctx, proxy)
+	return dsclient.TestProxyConnectivity(ctx, proxy)
 }
 
 func validateProxyMutation(cfg *config.Config) error {
diff --git a/internal/admin/handler_proxies_test.go b/internal/httpapi/admin/proxies/handler_proxies_test.go
similarity index 99%
rename from internal/admin/handler_proxies_test.go
rename to internal/httpapi/admin/proxies/handler_proxies_test.go
index f1f6d33..2c6a81c 100644
--- a/internal/admin/handler_proxies_test.go
+++ b/internal/httpapi/admin/proxies/handler_proxies_test.go
@@ -1,4 +1,4 @@
-package admin
+package proxies
 
 import (
 	"bytes"
diff --git a/internal/httpapi/admin/proxies/routes.go b/internal/httpapi/admin/proxies/routes.go
new file mode 100644
index 0000000..bf03701
--- /dev/null
+++ b/internal/httpapi/admin/proxies/routes.go
@@ -0,0 +1,24 @@
+package proxies
+
+import (
+	"net/http"
+
+	"github.com/go-chi/chi/v5"
+)
+
+func RegisterRoutes(r chi.Router, h *Handler) {
+	r.Get("/proxies", h.listProxies)
+	r.Post("/proxies", h.addProxy)
+	r.Put("/proxies/{proxyID}", h.updateProxy)
+	r.Delete("/proxies/{proxyID}", h.deleteProxy)
+	r.Post("/proxies/test", h.testProxy)
+	r.Put("/accounts/{identifier}/proxy", h.updateAccountProxy)
+}
+
+func (h *Handler) AddProxy(w http.ResponseWriter, r *http.Request)    { h.addProxy(w, r) }
+func (h *Handler) UpdateProxy(w http.ResponseWriter, r *http.Request) { h.updateProxy(w, r) }
+func (h *Handler) DeleteProxy(w http.ResponseWriter, r *http.Request) { h.deleteProxy(w, r) }
+func (h *Handler) TestProxy(w http.ResponseWriter, r *http.Request)   { h.testProxy(w, r) }
+func (h *Handler) UpdateAccountProxy(w http.ResponseWriter, r *http.Request) {
+	h.updateAccountProxy(w, r)
+}
diff --git a/internal/httpapi/admin/proxies/test_http_helpers_test.go b/internal/httpapi/admin/proxies/test_http_helpers_test.go
new file mode 100644
index 0000000..96c609e
--- /dev/null
+++ b/internal/httpapi/admin/proxies/test_http_helpers_test.go
@@ -0,0 +1,57 @@
+package proxies
+
+import (
+	"bytes"
+	"context"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/go-chi/chi/v5"
+
+	"ds2api/internal/account"
+	"ds2api/internal/auth"
+	"ds2api/internal/config"
+	dsclient "ds2api/internal/deepseek/client"
+	adminconfig "ds2api/internal/httpapi/admin/configmgmt"
+	adminshared "ds2api/internal/httpapi/admin/shared"
+)
+
+type testingDSMock struct{}
+
+func (m *testingDSMock) Login(_ context.Context, _ config.Account) (string, error) {
+	return "token", nil
+}
+func (m *testingDSMock) CreateSession(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
+	return "session-id", nil
+}
+func (m *testingDSMock) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
+	return "pow", nil
+}
+func (m *testingDSMock) CallCompletion(_ context.Context, _ *auth.RequestAuth, _ map[string]any, _ string, _ int) (*http.Response, error) {
+	return &http.Response{StatusCode: http.StatusOK, Body: http.NoBody}, nil
+}
+func (m *testingDSMock) DeleteAllSessionsForToken(_ context.Context, _ string) error { return nil }
+func (m *testingDSMock) GetSessionCountForToken(_ context.Context, _ string) (*dsclient.SessionStats, error) {
+	return &dsclient.SessionStats{}, nil
+}
+
+func newHTTPAdminHarness(t *testing.T, rawConfig string, ds adminshared.DeepSeekCaller) http.Handler {
+	t.Helper()
+	t.Setenv("DS2API_CONFIG_JSON", rawConfig)
+	store := config.LoadStore()
+	pool := account.NewPool(store)
+	h := &Handler{Store: store, Pool: pool, DS: ds}
+	configHandler := &adminconfig.Handler{Store: store, Pool: pool, DS: ds}
+	r := chi.NewRouter()
+	RegisterRoutes(r, h)
+	r.Get("/config", configHandler.GetConfig)
+	return r
+}
+
+func adminReq(method, path string, body []byte) *http.Request {
+	req := httptest.NewRequest(method, path, bytes.NewReader(body))
+	req.Header.Set("Authorization", "Bearer admin")
+	req.Header.Set("Content-Type", "application/json")
+	return req
+}
diff --git a/internal/httpapi/admin/rawsamples/deps.go b/internal/httpapi/admin/rawsamples/deps.go
new file mode 100644
index 0000000..618d0d1
--- /dev/null
+++ b/internal/httpapi/admin/rawsamples/deps.go
@@ -0,0 +1,27 @@
+package rawsamples
+
+import (
+	"net/http"
+
+	"ds2api/internal/chathistory"
+	adminshared "ds2api/internal/httpapi/admin/shared"
+)
+
+type Handler struct {
+	Store       adminshared.ConfigStore
+	Pool        adminshared.PoolController
+	DS          adminshared.DeepSeekCaller
+	OpenAI      adminshared.OpenAIChatCaller
+	ChatHistory *chathistory.Store
+}
+
+var writeJSON = adminshared.WriteJSON
+
+func intFromQuery(r *http.Request, key string, d int) int {
+	return adminshared.IntFromQuery(r, key, d)
+}
+func nilIfEmpty(s string) any              { return adminshared.NilIfEmpty(s) }
+func toStringSlice(v any) ([]string, bool) { return adminshared.ToStringSlice(v) }
+func fieldString(m map[string]any, key string) string {
+	return adminshared.FieldString(m, key)
+}
diff --git a/internal/admin/handler_raw_samples.go b/internal/httpapi/admin/rawsamples/handler_raw_samples.go
similarity index 98%
rename from internal/admin/handler_raw_samples.go
rename to internal/httpapi/admin/rawsamples/handler_raw_samples.go
index c9ad58e..a30e214 100644
--- a/internal/admin/handler_raw_samples.go
+++ b/internal/httpapi/admin/rawsamples/handler_raw_samples.go
@@ -1,4 +1,4 @@
-package admin
+package rawsamples
 
 import (
 	"bytes"
@@ -13,6 +13,7 @@ import (
 
 	"ds2api/internal/config"
 	"ds2api/internal/devcapture"
+	adminshared "ds2api/internal/httpapi/admin/shared"
 	"ds2api/internal/rawsample"
 )
 
@@ -93,7 +94,7 @@ func (h *Handler) captureRawSample(w http.ResponseWriter, r *http.Request) {
 	_, _ = io.Copy(w, bytes.NewReader(rec.Body.Bytes()))
 }
 
-func prepareRawSampleCaptureRequest(store ConfigStore, req map[string]any) (map[string]any, string, string, error) {
+func prepareRawSampleCaptureRequest(store adminshared.ConfigStore, req map[string]any) (map[string]any, string, string, error) {
 	payload := cloneMap(req)
 	sampleID := strings.TrimSpace(fieldString(payload, "sample_id"))
 	apiKey := strings.TrimSpace(fieldString(payload, "api_key"))
@@ -114,7 +115,7 @@ func prepareRawSampleCaptureRequest(store ConfigStore, req map[string]any) (map[
 	}
 
 	if model := strings.TrimSpace(fieldString(payload, "model")); model == "" {
-		payload["model"] = "deepseek-chat"
+		payload["model"] = "deepseek-v4-flash"
 	}
 	if _, ok := payload["stream"]; !ok {
 		payload["stream"] = true
diff --git a/internal/admin/handler_raw_samples_test.go b/internal/httpapi/admin/rawsamples/handler_raw_samples_test.go
similarity index 97%
rename from internal/admin/handler_raw_samples_test.go
rename to internal/httpapi/admin/rawsamples/handler_raw_samples_test.go
index a3dbe39..780c0ef 100644
--- a/internal/admin/handler_raw_samples_test.go
+++ b/internal/httpapi/admin/rawsamples/handler_raw_samples_test.go
@@ -1,4 +1,4 @@
-package admin
+package rawsamples
 
 import (
 	"bytes"
@@ -18,7 +18,7 @@ type stubOpenAIChatCaller struct{}
 
 func (stubOpenAIChatCaller) ChatCompletions(w http.ResponseWriter, _ *http.Request) {
 	store := devcapture.Global()
-	session := store.Start("deepseek_completion", "https://chat.deepseek.com/api/v0/chat/completion", "acct-test", map[string]any{"model": "deepseek-chat"})
+	session := store.Start("deepseek_completion", "https://chat.deepseek.com/api/v0/chat/completion", "acct-test", map[string]any{"model": "deepseek-v4-flash"})
 	raw := io.NopCloser(strings.NewReader(
 		"data: {\"v\":\"hello [reference:1]\"}\n\n" +
 			"data: {\"v\":\"FINISHED\",\"p\":\"response/status\"}\n\n",
@@ -37,7 +37,7 @@ func (stubOpenAIChatCaller) ChatCompletions(w http.ResponseWriter, _ *http.Reque
 type stubOpenAIChatCallerWithContinuations struct{}
 
 func (stubOpenAIChatCallerWithContinuations) ChatCompletions(w http.ResponseWriter, _ *http.Request) {
-	recordCapturedResponse("deepseek_completion", "https://chat.deepseek.com/api/v0/chat/completion", http.StatusOK, map[string]any{"model": "deepseek-chat"}, "data: {\"v\":\"hello [reference:1]\"}\n\n"+"data: [DONE]\n\n")
+	recordCapturedResponse("deepseek_completion", "https://chat.deepseek.com/api/v0/chat/completion", http.StatusOK, map[string]any{"model": "deepseek-v4-flash"}, "data: {\"v\":\"hello [reference:1]\"}\n\n"+"data: [DONE]\n\n")
 	recordCapturedResponse("deepseek_continue", "https://chat.deepseek.com/api/v0/chat/continue", http.StatusOK, map[string]any{"chat_session_id": "session-1", "message_id": 2}, "data: {\"v\":\"continued\"}\n\n"+"data: [DONE]\n\n")
 
 	w.Header().Set("Content-Type", "text/event-stream")
@@ -73,7 +73,7 @@ func TestCaptureRawSampleWritesPersistentSample(t *testing.T) {
 	reqBody := `{
 		"sample_id":"My Sample 01",
 		"api_key":"local-key",
-		"model":"deepseek-chat",
+		"model":"deepseek-v4-flash",
 		"message":"广州天气",
 		"stream":true
 	}`
@@ -130,7 +130,7 @@ func TestCaptureRawSampleCombinesContinuationCaptures(t *testing.T) {
 	reqBody := `{
 		"sample_id":"My Sample 02",
 		"api_key":"local-key",
-		"model":"deepseek-chat",
+		"model":"deepseek-v4-flash",
 		"message":"广州天气",
 		"stream":true
 	}`
@@ -194,13 +194,13 @@ func TestCaptureRawSampleReturnsErrorWhenNoNewCaptureRecorded(t *testing.T) {
 	devcapture.Global().Clear()
 	defer devcapture.Global().Clear()
 
-	recordCapturedResponse("preexisting", "https://chat.deepseek.com/api/v0/chat/completion", http.StatusOK, map[string]any{"model": "deepseek-chat"}, "data: {\"v\":\"old\"}\n\n")
+	recordCapturedResponse("preexisting", "https://chat.deepseek.com/api/v0/chat/completion", http.StatusOK, map[string]any{"model": "deepseek-v4-flash"}, "data: {\"v\":\"old\"}\n\n")
 
 	h := &Handler{OpenAI: stubOpenAIChatCallerWithoutCapture{}}
 	reqBody := `{
 		"sample_id":"My Sample 03",
 		"api_key":"local-key",
-		"model":"deepseek-chat",
+		"model":"deepseek-v4-flash",
 		"message":"广州天气",
 		"stream":true
 	}`
diff --git a/internal/httpapi/admin/rawsamples/routes.go b/internal/httpapi/admin/rawsamples/routes.go
new file mode 100644
index 0000000..9eb2109
--- /dev/null
+++ b/internal/httpapi/admin/rawsamples/routes.go
@@ -0,0 +1,9 @@
+package rawsamples
+
+import "github.com/go-chi/chi/v5"
+
+func RegisterRoutes(r chi.Router, h *Handler) {
+	r.Post("/dev/raw-samples/capture", h.captureRawSample)
+	r.Get("/dev/raw-samples/query", h.queryRawSampleCaptures)
+	r.Post("/dev/raw-samples/save", h.saveRawSampleFromCaptures)
+}
diff --git a/internal/httpapi/admin/settings/deps.go b/internal/httpapi/admin/settings/deps.go
new file mode 100644
index 0000000..6df91f4
--- /dev/null
+++ b/internal/httpapi/admin/settings/deps.go
@@ -0,0 +1,29 @@
+package settings
+
+import (
+	"ds2api/internal/chathistory"
+	"ds2api/internal/config"
+	adminshared "ds2api/internal/httpapi/admin/shared"
+)
+
+type Handler struct {
+	Store       adminshared.ConfigStore
+	Pool        adminshared.PoolController
+	DS          adminshared.DeepSeekCaller
+	OpenAI      adminshared.OpenAIChatCaller
+	ChatHistory *chathistory.Store
+}
+
+var writeJSON = adminshared.WriteJSON
+var intFrom = adminshared.IntFrom
+
+func fieldString(m map[string]any, key string) string {
+	return adminshared.FieldString(m, key)
+}
+func validateRuntimeSettings(runtime config.RuntimeConfig) error {
+	return adminshared.ValidateRuntimeSettings(runtime)
+}
+
+func (h *Handler) computeSyncHash() string {
+	return adminshared.ComputeSyncHash(h.Store)
+}
diff --git a/internal/admin/handler_settings_parse.go b/internal/httpapi/admin/settings/handler_settings_parse.go
similarity index 76%
rename from internal/admin/handler_settings_parse.go
rename to internal/httpapi/admin/settings/handler_settings_parse.go
index c02d421..14fb92d 100644
--- a/internal/admin/handler_settings_parse.go
+++ b/internal/httpapi/admin/settings/handler_settings_parse.go
@@ -1,4 +1,4 @@
-package admin
+package settings
 
 import (
 	"fmt"
@@ -21,7 +21,7 @@ func boolFrom(v any) bool {
 	}
 }
 
-func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *config.RuntimeConfig, *config.CompatConfig, *config.ResponsesConfig, *config.EmbeddingsConfig, *config.AutoDeleteConfig, *config.HistorySplitConfig, map[string]string, map[string]string, error) {
+func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *config.RuntimeConfig, *config.CompatConfig, *config.ResponsesConfig, *config.EmbeddingsConfig, *config.AutoDeleteConfig, *config.HistorySplitConfig, map[string]string, error) {
 	var (
 		adminCfg        *config.AdminConfig
 		runtimeCfg      *config.RuntimeConfig
@@ -30,7 +30,6 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		embCfg          *config.EmbeddingsConfig
 		autoDeleteCfg   *config.AutoDeleteConfig
 		historySplitCfg *config.HistorySplitConfig
-		claudeMap       map[string]string
 		aliasMap        map[string]string
 	)
 
@@ -39,7 +38,7 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		if v, exists := raw["jwt_expire_hours"]; exists {
 			n := intFrom(v)
 			if err := config.ValidateIntRange("admin.jwt_expire_hours", n, 1, 720, true); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			cfg.JWTExpireHours = n
 		}
@@ -51,33 +50,33 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		if v, exists := raw["account_max_inflight"]; exists {
 			n := intFrom(v)
 			if err := config.ValidateIntRange("runtime.account_max_inflight", n, 1, 256, true); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			cfg.AccountMaxInflight = n
 		}
 		if v, exists := raw["account_max_queue"]; exists {
 			n := intFrom(v)
 			if err := config.ValidateIntRange("runtime.account_max_queue", n, 1, 200000, true); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			cfg.AccountMaxQueue = n
 		}
 		if v, exists := raw["global_max_inflight"]; exists {
 			n := intFrom(v)
 			if err := config.ValidateIntRange("runtime.global_max_inflight", n, 1, 200000, true); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			cfg.GlobalMaxInflight = n
 		}
 		if v, exists := raw["token_refresh_interval_hours"]; exists {
 			n := intFrom(v)
 			if err := config.ValidateIntRange("runtime.token_refresh_interval_hours", n, 1, 720, true); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			cfg.TokenRefreshIntervalHours = n
 		}
 		if cfg.AccountMaxInflight > 0 && cfg.GlobalMaxInflight > 0 && cfg.GlobalMaxInflight < cfg.AccountMaxInflight {
-			return nil, nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight")
+			return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight")
 		}
 		runtimeCfg = cfg
 	}
@@ -100,7 +99,7 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		if v, exists := raw["store_ttl_seconds"]; exists {
 			n := intFrom(v)
 			if err := config.ValidateIntRange("responses.store_ttl_seconds", n, 30, 86400, true); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			cfg.StoreTTLSeconds = n
 		}
@@ -112,27 +111,17 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		if v, exists := raw["provider"]; exists {
 			p := strings.TrimSpace(fmt.Sprintf("%v", v))
 			if err := config.ValidateTrimmedString("embeddings.provider", p, false); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			cfg.Provider = p
 		}
 		embCfg = cfg
 	}
 
-	if raw, ok := req["claude_mapping"].(map[string]any); ok {
-		claudeMap = map[string]string{}
-		for k, v := range raw {
-			key := strings.TrimSpace(k)
-			val := strings.TrimSpace(fmt.Sprintf("%v", v))
-			if key == "" || val == "" {
-				continue
-			}
-			claudeMap[key] = val
-		}
-	}
-
 	if raw, ok := req["model_aliases"].(map[string]any); ok {
-		aliasMap = map[string]string{}
+		if aliasMap == nil {
+			aliasMap = map[string]string{}
+		}
 		for k, v := range raw {
 			key := strings.TrimSpace(k)
 			val := strings.TrimSpace(fmt.Sprintf("%v", v))
@@ -148,7 +137,7 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		if v, exists := raw["mode"]; exists {
 			mode := strings.ToLower(strings.TrimSpace(fmt.Sprintf("%v", v)))
 			if err := config.ValidateAutoDeleteMode(mode); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			if mode == "" {
 				mode = "none"
@@ -163,22 +152,20 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 
 	if raw, ok := req["history_split"].(map[string]any); ok {
 		cfg := &config.HistorySplitConfig{}
-		if v, exists := raw["enabled"]; exists {
-			b := boolFrom(v)
-			cfg.Enabled = &b
-		}
+		enabled := true
+		cfg.Enabled = &enabled
 		if v, exists := raw["trigger_after_turns"]; exists {
 			n := intFrom(v)
 			if err := config.ValidateIntRange("history_split.trigger_after_turns", n, 1, 1000, true); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			cfg.TriggerAfterTurns = &n
 		}
 		if err := config.ValidateHistorySplitConfig(*cfg); err != nil {
-			return nil, nil, nil, nil, nil, nil, nil, nil, nil, err
+			return nil, nil, nil, nil, nil, nil, nil, nil, err
 		}
 		historySplitCfg = cfg
 	}
 
-	return adminCfg, runtimeCfg, compatCfg, respCfg, embCfg, autoDeleteCfg, historySplitCfg, claudeMap, aliasMap, nil
+	return adminCfg, runtimeCfg, compatCfg, respCfg, embCfg, autoDeleteCfg, historySplitCfg, aliasMap, nil
 }
diff --git a/internal/admin/handler_settings_read.go b/internal/httpapi/admin/settings/handler_settings_read.go
similarity index 95%
rename from internal/admin/handler_settings_read.go
rename to internal/httpapi/admin/settings/handler_settings_read.go
index dc060a8..7587004 100644
--- a/internal/admin/handler_settings_read.go
+++ b/internal/httpapi/admin/settings/handler_settings_read.go
@@ -1,4 +1,4 @@
-package admin
+package settings
 
 import (
 	"net/http"
@@ -34,7 +34,6 @@ func (h *Handler) getSettings(w http.ResponseWriter, _ *http.Request) {
 			"enabled":             h.Store.HistorySplitEnabled(),
 			"trigger_after_turns": h.Store.HistorySplitTriggerAfterTurns(),
 		},
-		"claude_mapping":    settingsClaudeMapping(snap),
 		"model_aliases":     snap.ModelAliases,
 		"env_backed":        h.Store.IsEnvBacked(),
 		"needs_vercel_sync": needsSync,
diff --git a/internal/admin/handler_settings_runtime.go b/internal/httpapi/admin/settings/handler_settings_runtime.go
similarity index 81%
rename from internal/admin/handler_settings_runtime.go
rename to internal/httpapi/admin/settings/handler_settings_runtime.go
index 091c5ae..eee3c6e 100644
--- a/internal/admin/handler_settings_runtime.go
+++ b/internal/httpapi/admin/settings/handler_settings_runtime.go
@@ -1,4 +1,4 @@
-package admin
+package settings
 
 import "ds2api/internal/config"
 
@@ -42,13 +42,3 @@ func defaultRuntimeRecommended(accountCount, maxPer int) int {
 	}
 	return accountCount * maxPer
 }
-
-func settingsClaudeMapping(c config.Config) map[string]string {
-	if len(c.ClaudeMapping) > 0 {
-		return c.ClaudeMapping
-	}
-	if len(c.ClaudeModelMap) > 0 {
-		return c.ClaudeModelMap
-	}
-	return map[string]string{"fast": "deepseek-chat", "slow": "deepseek-reasoner"}
-}
diff --git a/internal/admin/handler_settings_write.go b/internal/httpapi/admin/settings/handler_settings_write.go
similarity index 95%
rename from internal/admin/handler_settings_write.go
rename to internal/httpapi/admin/settings/handler_settings_write.go
index ee4105a..11ac6b4 100644
--- a/internal/admin/handler_settings_write.go
+++ b/internal/httpapi/admin/settings/handler_settings_write.go
@@ -1,4 +1,4 @@
-package admin
+package settings
 
 import (
 	"encoding/json"
@@ -17,7 +17,7 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	adminCfg, runtimeCfg, compatCfg, responsesCfg, embeddingsCfg, autoDeleteCfg, historySplitCfg, claudeMap, aliasMap, err := parseSettingsUpdateRequest(req)
+	adminCfg, runtimeCfg, compatCfg, responsesCfg, embeddingsCfg, autoDeleteCfg, historySplitCfg, aliasMap, err := parseSettingsUpdateRequest(req)
 	if err != nil {
 		writeJSON(w, http.StatusBadRequest, map[string]any{"detail": err.Error()})
 		return
@@ -75,10 +75,6 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) {
 				c.HistorySplit.TriggerAfterTurns = historySplitCfg.TriggerAfterTurns
 			}
 		}
-		if claudeMap != nil {
-			c.ClaudeMapping = claudeMap
-			c.ClaudeModelMap = nil
-		}
 		if aliasMap != nil {
 			c.ModelAliases = aliasMap
 		}
diff --git a/internal/httpapi/admin/settings/routes.go b/internal/httpapi/admin/settings/routes.go
new file mode 100644
index 0000000..0d44584
--- /dev/null
+++ b/internal/httpapi/admin/settings/routes.go
@@ -0,0 +1,20 @@
+package settings
+
+import (
+	"net/http"
+
+	"github.com/go-chi/chi/v5"
+)
+
+func RegisterRoutes(r chi.Router, h *Handler) {
+	r.Get("/settings", h.getSettings)
+	r.Put("/settings", h.updateSettings)
+	r.Post("/settings/password", h.updateSettingsPassword)
+}
+
+func (h *Handler) GetSettings(w http.ResponseWriter, r *http.Request)    { h.getSettings(w, r) }
+func (h *Handler) UpdateSettings(w http.ResponseWriter, r *http.Request) { h.updateSettings(w, r) }
+func (h *Handler) UpdateSettingsPassword(w http.ResponseWriter, r *http.Request) {
+	h.updateSettingsPassword(w, r)
+}
+func BoolFrom(v any) bool { return boolFrom(v) }
diff --git a/internal/admin/deps.go b/internal/httpapi/admin/shared/deps.go
similarity index 90%
rename from internal/admin/deps.go
rename to internal/httpapi/admin/shared/deps.go
index 436775c..9adc755 100644
--- a/internal/admin/deps.go
+++ b/internal/httpapi/admin/shared/deps.go
@@ -1,4 +1,4 @@
-package admin
+package shared
 
 import (
 	"context"
@@ -7,7 +7,7 @@ import (
 	"ds2api/internal/account"
 	"ds2api/internal/auth"
 	"ds2api/internal/config"
-	"ds2api/internal/deepseek"
+	dsclient "ds2api/internal/deepseek/client"
 )
 
 type ConfigStore interface {
@@ -54,10 +54,10 @@ type DeepSeekCaller interface {
 	CreateSession(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
 	GetPow(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
 	CallCompletion(ctx context.Context, a *auth.RequestAuth, payload map[string]any, powResp string, maxAttempts int) (*http.Response, error)
-	GetSessionCountForToken(ctx context.Context, token string) (*deepseek.SessionStats, error)
+	GetSessionCountForToken(ctx context.Context, token string) (*dsclient.SessionStats, error)
 	DeleteAllSessionsForToken(ctx context.Context, token string) error
 }
 
 var _ ConfigStore = (*config.Store)(nil)
 var _ PoolController = (*account.Pool)(nil)
-var _ DeepSeekCaller = (*deepseek.Client)(nil)
+var _ DeepSeekCaller = (*dsclient.Client)(nil)
diff --git a/internal/admin/helpers.go b/internal/httpapi/admin/shared/helpers.go
similarity index 69%
rename from internal/admin/helpers.go
rename to internal/httpapi/admin/shared/helpers.go
index c7af36f..93b6937 100644
--- a/internal/admin/helpers.go
+++ b/internal/httpapi/admin/shared/helpers.go
@@ -1,6 +1,8 @@
-package admin
+package shared
 
 import (
+	"crypto/md5"
+	"encoding/json"
 	"fmt"
 	"net/http"
 	"strconv"
@@ -10,10 +12,95 @@ import (
 	"ds2api/internal/util"
 )
 
-// writeJSON and intFrom are package-internal aliases for the shared util versions.
-var writeJSON = util.WriteJSON
 var intFrom = util.IntFrom
 
+var WriteJSON = util.WriteJSON
+var IntFrom = util.IntFrom
+
+func ReverseAccounts(a []config.Account) { reverseAccounts(a) }
+func IntFromQuery(r *http.Request, key string, d int) int {
+	return intFromQuery(r, key, d)
+}
+func NilIfEmpty(s string) any { return nilIfEmpty(s) }
+func NilIfZero(v int64) any   { return nilIfZero(v) }
+func MaskSecretPreview(secret string) string {
+	return maskSecretPreview(secret)
+}
+func ToStringSlice(v any) ([]string, bool) { return toStringSlice(v) }
+func ToAccount(m map[string]any) config.Account {
+	return toAccount(m)
+}
+func ToAPIKeys(v any) ([]config.APIKey, bool) {
+	return toAPIKeys(v)
+}
+func NormalizeAPIKeyForStorage(item config.APIKey) config.APIKey {
+	return normalizeAPIKeyForStorage(item)
+}
+func APIKeyHasMetadata(item config.APIKey) bool {
+	return apiKeyHasMetadata(item)
+}
+func MergeAPIKeysPreferStructured(existing, incoming []config.APIKey) ([]config.APIKey, int) {
+	return mergeAPIKeysPreferStructured(existing, incoming)
+}
+func MergeAPIKeyRecord(existing, incoming config.APIKey) config.APIKey {
+	return mergeAPIKeyRecord(existing, incoming)
+}
+func FieldString(m map[string]any, key string) string {
+	return fieldString(m, key)
+}
+func FieldStringOptional(m map[string]any, key string) (string, bool) {
+	return fieldStringOptional(m, key)
+}
+func StatusOr(v int, d int) int { return statusOr(v, d) }
+func AccountMatchesIdentifier(acc config.Account, identifier string) bool {
+	return accountMatchesIdentifier(acc, identifier)
+}
+func NormalizeAccountForStorage(acc config.Account) config.Account {
+	return normalizeAccountForStorage(acc)
+}
+func ToProxy(m map[string]any) config.Proxy {
+	return toProxy(m)
+}
+func FindProxyByID(c config.Config, proxyID string) (config.Proxy, bool) {
+	return findProxyByID(c, proxyID)
+}
+func AccountDedupeKey(acc config.Account) string { return accountDedupeKey(acc) }
+func NormalizeAndDedupeAccounts(accounts []config.Account) []config.Account {
+	return normalizeAndDedupeAccounts(accounts)
+}
+func FindAccountByIdentifier(store ConfigStore, identifier string) (config.Account, bool) {
+	return findAccountByIdentifier(store, identifier)
+}
+
+func ComputeSyncHash(store ConfigStore) string {
+	if store == nil {
+		return ""
+	}
+	snap := store.Snapshot().Clone()
+	snap.ClearAccountTokens()
+	snap.VercelSyncHash = ""
+	snap.VercelSyncTime = 0
+	b, _ := json.Marshal(snap)
+	sum := md5.Sum(b)
+	return fmt.Sprintf("%x", sum)
+}
+
+func SyncHashForJSON(s string) string {
+	var cfg config.Config
+	if err := json.Unmarshal([]byte(s), &cfg); err != nil {
+		return ""
+	}
+	cfg.VercelSyncHash = ""
+	cfg.VercelSyncTime = 0
+	cfg.ClearAccountTokens()
+	b, err := json.Marshal(cfg)
+	if err != nil {
+		return ""
+	}
+	sum := md5.Sum(b)
+	return fmt.Sprintf("%x", sum)
+}
+
 func reverseAccounts(a []config.Account) {
 	for i, j := 0, len(a)-1; i < j; i, j = i+1, j-1 {
 		a[i], a[j] = a[j], a[i]
@@ -46,6 +133,17 @@ func nilIfZero(v int64) any {
 	return v
 }
 
+func maskSecretPreview(secret string) string {
+	secret = strings.TrimSpace(secret)
+	if secret == "" {
+		return ""
+	}
+	if len(secret) <= 4 {
+		return strings.Repeat("*", len(secret))
+	}
+	return secret[:2] + "****" + secret[len(secret)-2:]
+}
+
 func toStringSlice(v any) ([]string, bool) {
 	arr, ok := v.([]any)
 	if !ok {
diff --git a/internal/admin/helpers_edge_test.go b/internal/httpapi/admin/shared/helpers_edge_test.go
similarity index 99%
rename from internal/admin/helpers_edge_test.go
rename to internal/httpapi/admin/shared/helpers_edge_test.go
index 17bb3d7..5163005 100644
--- a/internal/admin/helpers_edge_test.go
+++ b/internal/httpapi/admin/shared/helpers_edge_test.go
@@ -1,4 +1,4 @@
-package admin
+package shared
 
 import (
 	"net/http"
diff --git a/internal/admin/request_error.go b/internal/httpapi/admin/shared/request_error.go
similarity index 67%
rename from internal/admin/request_error.go
rename to internal/httpapi/admin/shared/request_error.go
index 5431a3d..e17433e 100644
--- a/internal/admin/request_error.go
+++ b/internal/httpapi/admin/shared/request_error.go
@@ -1,4 +1,4 @@
-package admin
+package shared
 
 import "errors"
 
@@ -14,6 +14,10 @@ func newRequestError(detail string) error {
 	return &requestError{detail: detail}
 }
 
+func NewRequestError(detail string) error {
+	return newRequestError(detail)
+}
+
 func requestErrorDetail(err error) (string, bool) {
 	var reqErr *requestError
 	if errors.As(err, &reqErr) {
@@ -21,3 +25,7 @@ func requestErrorDetail(err error) (string, bool) {
 	}
 	return "", false
 }
+
+func RequestErrorDetail(err error) (string, bool) {
+	return requestErrorDetail(err)
+}
diff --git a/internal/admin/settings_validation.go b/internal/httpapi/admin/shared/settings_validation.go
similarity index 61%
rename from internal/admin/settings_validation.go
rename to internal/httpapi/admin/shared/settings_validation.go
index c18f955..981e19e 100644
--- a/internal/admin/settings_validation.go
+++ b/internal/httpapi/admin/shared/settings_validation.go
@@ -1,4 +1,4 @@
-package admin
+package shared
 
 import (
 	"strings"
@@ -14,10 +14,22 @@ func normalizeSettingsConfig(c *config.Config) {
 	c.Embeddings.Provider = strings.TrimSpace(c.Embeddings.Provider)
 }
 
+func NormalizeSettingsConfig(c *config.Config) {
+	normalizeSettingsConfig(c)
+}
+
 func validateSettingsConfig(c config.Config) error {
 	return config.ValidateConfig(c)
 }
 
+func ValidateSettingsConfig(c config.Config) error {
+	return validateSettingsConfig(c)
+}
+
 func validateRuntimeSettings(runtime config.RuntimeConfig) error {
 	return config.ValidateRuntimeConfig(runtime)
 }
+
+func ValidateRuntimeSettings(runtime config.RuntimeConfig) error {
+	return validateRuntimeSettings(runtime)
+}
diff --git a/internal/httpapi/admin/test_bridge_test.go b/internal/httpapi/admin/test_bridge_test.go
new file mode 100644
index 0000000..5d523b1
--- /dev/null
+++ b/internal/httpapi/admin/test_bridge_test.go
@@ -0,0 +1,123 @@
+package admin
+
+import (
+	"context"
+	"net/http"
+	"testing"
+
+	"ds2api/internal/account"
+	"ds2api/internal/auth"
+	"ds2api/internal/config"
+	dsclient "ds2api/internal/deepseek/client"
+	adminaccounts "ds2api/internal/httpapi/admin/accounts"
+	adminconfig "ds2api/internal/httpapi/admin/configmgmt"
+	adminsettings "ds2api/internal/httpapi/admin/settings"
+	adminshared "ds2api/internal/httpapi/admin/shared"
+)
+
+var intFrom = adminshared.IntFrom
+
+func toAccount(m map[string]any) config.Account { return adminshared.ToAccount(m) }
+func fieldString(m map[string]any, key string) string {
+	return adminshared.FieldString(m, key)
+}
+func maskSecretPreview(secret string) string { return adminshared.MaskSecretPreview(secret) }
+func boolFrom(v any) bool                    { return adminsettings.BoolFrom(v) }
+
+func newAdminTestHandler(t *testing.T, raw string) *Handler {
+	t.Helper()
+	t.Setenv("DS2API_CONFIG_JSON", raw)
+	store := config.LoadStore()
+	return &Handler{
+		Store: store,
+		Pool:  account.NewPool(store),
+	}
+}
+
+type testingDSMock struct {
+	loginToken                 string
+	deleteAllSessionsError     error
+	deleteAllSessionsErrorOnce bool
+	sessionCount               *dsclient.SessionStats
+	loginCalls                 int
+	deleteAllCalls             int
+}
+
+func (m *testingDSMock) Login(_ context.Context, _ config.Account) (string, error) {
+	m.loginCalls++
+	if m.loginToken == "" {
+		return "token", nil
+	}
+	return m.loginToken, nil
+}
+
+func (m *testingDSMock) CreateSession(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
+	return "session-id", nil
+}
+
+func (m *testingDSMock) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
+	return "pow", nil
+}
+
+func (m *testingDSMock) CallCompletion(_ context.Context, _ *auth.RequestAuth, _ map[string]any, _ string, _ int) (*http.Response, error) {
+	return &http.Response{StatusCode: http.StatusOK, Body: http.NoBody}, nil
+}
+
+func (m *testingDSMock) DeleteAllSessionsForToken(_ context.Context, _ string) error {
+	m.deleteAllCalls++
+	if m.deleteAllSessionsError != nil {
+		err := m.deleteAllSessionsError
+		if m.deleteAllSessionsErrorOnce {
+			m.deleteAllSessionsError = nil
+		}
+		return err
+	}
+	return nil
+}
+
+func (m *testingDSMock) GetSessionCountForToken(_ context.Context, _ string) (*dsclient.SessionStats, error) {
+	if m.sessionCount != nil {
+		return m.sessionCount, nil
+	}
+	return &dsclient.SessionStats{}, nil
+}
+
+func (h *Handler) configHandler() *adminconfig.Handler {
+	return &adminconfig.Handler{Store: h.Store, Pool: h.Pool, DS: h.DS, OpenAI: h.OpenAI, ChatHistory: h.ChatHistory}
+}
+
+func (h *Handler) settingsHandler() *adminsettings.Handler {
+	return &adminsettings.Handler{Store: h.Store, Pool: h.Pool, DS: h.DS, OpenAI: h.OpenAI, ChatHistory: h.ChatHistory}
+}
+
+func (h *Handler) getConfig(w http.ResponseWriter, r *http.Request) {
+	h.configHandler().GetConfig(w, r)
+}
+
+func (h *Handler) updateConfig(w http.ResponseWriter, r *http.Request) {
+	h.configHandler().UpdateConfig(w, r)
+}
+
+func (h *Handler) configImport(w http.ResponseWriter, r *http.Request) {
+	h.configHandler().ConfigImport(w, r)
+}
+
+func (h *Handler) batchImport(w http.ResponseWriter, r *http.Request) {
+	h.configHandler().BatchImport(w, r)
+}
+
+func (h *Handler) getSettings(w http.ResponseWriter, r *http.Request) {
+	h.settingsHandler().GetSettings(w, r)
+}
+
+func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) {
+	h.settingsHandler().UpdateSettings(w, r)
+}
+
+func (h *Handler) updateSettingsPassword(w http.ResponseWriter, r *http.Request) {
+	h.settingsHandler().UpdateSettingsPassword(w, r)
+}
+
+func runAccountTestsConcurrently(accounts []config.Account, maxConcurrency int, testFn func(int, config.Account) map[string]any) []map[string]any {
+	return adminaccounts.RunAccountTestsConcurrently(accounts, maxConcurrency, testFn)
+}
diff --git a/internal/admin/token_runtime_http_test.go b/internal/httpapi/admin/token_runtime_http_test.go
similarity index 95%
rename from internal/admin/token_runtime_http_test.go
rename to internal/httpapi/admin/token_runtime_http_test.go
index 3af3da0..0933fb7 100644
--- a/internal/admin/token_runtime_http_test.go
+++ b/internal/httpapi/admin/token_runtime_http_test.go
@@ -12,9 +12,10 @@ import (
 
 	"ds2api/internal/account"
 	"ds2api/internal/config"
+	adminshared "ds2api/internal/httpapi/admin/shared"
 )
 
-func newHTTPAdminHarness(t *testing.T, rawConfig string, ds DeepSeekCaller) http.Handler {
+func newHTTPAdminHarness(t *testing.T, rawConfig string, ds adminshared.DeepSeekCaller) http.Handler {
 	t.Helper()
 	t.Setenv("DS2API_CONFIG_JSON", rawConfig)
 	store := config.LoadStore()
diff --git a/internal/httpapi/admin/vercel/deps.go b/internal/httpapi/admin/vercel/deps.go
new file mode 100644
index 0000000..c719edc
--- /dev/null
+++ b/internal/httpapi/admin/vercel/deps.go
@@ -0,0 +1,24 @@
+package vercel
+
+import (
+	"ds2api/internal/chathistory"
+	adminshared "ds2api/internal/httpapi/admin/shared"
+)
+
+type Handler struct {
+	Store       adminshared.ConfigStore
+	Pool        adminshared.PoolController
+	DS          adminshared.DeepSeekCaller
+	OpenAI      adminshared.OpenAIChatCaller
+	ChatHistory *chathistory.Store
+}
+
+var writeJSON = adminshared.WriteJSON
+var intFrom = adminshared.IntFrom
+
+func nilIfZero(v int64) any     { return adminshared.NilIfZero(v) }
+func statusOr(v int, d int) int { return adminshared.StatusOr(v, d) }
+
+func (h *Handler) computeSyncHash() string {
+	return adminshared.ComputeSyncHash(h.Store)
+}
diff --git a/internal/admin/handler_vercel.go b/internal/httpapi/admin/vercel/handler_vercel.go
similarity index 99%
rename from internal/admin/handler_vercel.go
rename to internal/httpapi/admin/vercel/handler_vercel.go
index e0734ed..cfd13e1 100644
--- a/internal/admin/handler_vercel.go
+++ b/internal/httpapi/admin/vercel/handler_vercel.go
@@ -1,4 +1,4 @@
-package admin
+package vercel
 
 import (
 	"bytes"
diff --git a/internal/httpapi/admin/vercel/routes.go b/internal/httpapi/admin/vercel/routes.go
new file mode 100644
index 0000000..dec4d1b
--- /dev/null
+++ b/internal/httpapi/admin/vercel/routes.go
@@ -0,0 +1,9 @@
+package vercel
+
+import "github.com/go-chi/chi/v5"
+
+func RegisterRoutes(r chi.Router, h *Handler) {
+	r.Post("/vercel/sync", h.syncVercel)
+	r.Get("/vercel/status", h.vercelStatus)
+	r.Post("/vercel/status", h.vercelStatus)
+}
diff --git a/internal/httpapi/admin/version/deps.go b/internal/httpapi/admin/version/deps.go
new file mode 100644
index 0000000..cf181ca
--- /dev/null
+++ b/internal/httpapi/admin/version/deps.go
@@ -0,0 +1,16 @@
+package version
+
+import (
+	"ds2api/internal/chathistory"
+	adminshared "ds2api/internal/httpapi/admin/shared"
+)
+
+type Handler struct {
+	Store       adminshared.ConfigStore
+	Pool        adminshared.PoolController
+	DS          adminshared.DeepSeekCaller
+	OpenAI      adminshared.OpenAIChatCaller
+	ChatHistory *chathistory.Store
+}
+
+var writeJSON = adminshared.WriteJSON
diff --git a/internal/admin/handler_version.go b/internal/httpapi/admin/version/handler_version.go
similarity index 99%
rename from internal/admin/handler_version.go
rename to internal/httpapi/admin/version/handler_version.go
index 2d2ef53..fb6271e 100644
--- a/internal/admin/handler_version.go
+++ b/internal/httpapi/admin/version/handler_version.go
@@ -1,4 +1,4 @@
-package admin
+package version
 
 import (
 	"encoding/json"
diff --git a/internal/httpapi/admin/version/routes.go b/internal/httpapi/admin/version/routes.go
new file mode 100644
index 0000000..31368b0
--- /dev/null
+++ b/internal/httpapi/admin/version/routes.go
@@ -0,0 +1,7 @@
+package version
+
+import "github.com/go-chi/chi/v5"
+
+func RegisterRoutes(r chi.Router, h *Handler) {
+	r.Get("/version", h.getVersion)
+}
diff --git a/internal/adapter/claude/convert.go b/internal/httpapi/claude/convert.go
similarity index 83%
rename from internal/adapter/claude/convert.go
rename to internal/httpapi/claude/convert.go
index dbb5e1a..2233a65 100644
--- a/internal/adapter/claude/convert.go
+++ b/internal/httpapi/claude/convert.go
@@ -4,7 +4,7 @@ import (
 	"ds2api/internal/claudeconv"
 )
 
-const defaultClaudeModel = "claude-sonnet-4-5"
+const defaultClaudeModel = "claude-sonnet-4-6"
 
 func convertClaudeToDeepSeek(claudeReq map[string]any, store ConfigReader) map[string]any {
 	return claudeconv.ConvertClaudeToDeepSeek(claudeReq, store, defaultClaudeModel)
diff --git a/internal/adapter/claude/deps.go b/internal/httpapi/claude/deps.go
similarity index 86%
rename from internal/adapter/claude/deps.go
rename to internal/httpapi/claude/deps.go
index 0088e81..f5c27f9 100644
--- a/internal/adapter/claude/deps.go
+++ b/internal/httpapi/claude/deps.go
@@ -6,7 +6,7 @@ import (
 
 	"ds2api/internal/auth"
 	"ds2api/internal/config"
-	"ds2api/internal/deepseek"
+	dsclient "ds2api/internal/deepseek/client"
 )
 
 type AuthResolver interface {
@@ -21,7 +21,7 @@ type DeepSeekCaller interface {
 }
 
 type ConfigReader interface {
-	ClaudeMapping() map[string]string
+	ModelAliases() map[string]string
 	CompatStripReferenceMarkers() bool
 }
 
@@ -30,5 +30,5 @@ type OpenAIChatRunner interface {
 }
 
 var _ AuthResolver = (*auth.Resolver)(nil)
-var _ DeepSeekCaller = (*deepseek.Client)(nil)
+var _ DeepSeekCaller = (*dsclient.Client)(nil)
 var _ ConfigReader = (*config.Store)(nil)
diff --git a/internal/httpapi/claude/deps_injection_test.go b/internal/httpapi/claude/deps_injection_test.go
new file mode 100644
index 0000000..e30ec2f
--- /dev/null
+++ b/internal/httpapi/claude/deps_injection_test.go
@@ -0,0 +1,74 @@
+package claude
+
+import "testing"
+
+type mockClaudeConfig struct {
+	aliases map[string]string
+}
+
+func (m mockClaudeConfig) ModelAliases() map[string]string { return m.aliases }
+func (mockClaudeConfig) CompatStripReferenceMarkers() bool { return true }
+
+func TestNormalizeClaudeRequestUsesGlobalAliasMapping(t *testing.T) {
+	req := map[string]any{
+		"model": "claude-opus-4-6",
+		"messages": []any{
+			map[string]any{"role": "user", "content": "hello"},
+		},
+	}
+	out, err := normalizeClaudeRequest(mockClaudeConfig{
+		aliases: map[string]string{
+			"claude-opus-4-6": "deepseek-v4-pro-search",
+		},
+	}, req)
+	if err != nil {
+		t.Fatalf("normalizeClaudeRequest error: %v", err)
+	}
+	if out.Standard.ResolvedModel != "deepseek-v4-pro-search" {
+		t.Fatalf("resolved model mismatch: got=%q", out.Standard.ResolvedModel)
+	}
+	if out.Standard.Thinking || !out.Standard.Search {
+		t.Fatalf("unexpected flags: thinking=%v search=%v", out.Standard.Thinking, out.Standard.Search)
+	}
+}
+
+func TestNormalizeClaudeRequestEnablesThinkingWhenRequested(t *testing.T) {
+	req := map[string]any{
+		"model": "claude-opus-4-6",
+		"messages": []any{
+			map[string]any{"role": "user", "content": "hello"},
+		},
+		"thinking": map[string]any{"type": "enabled", "budget_tokens": 1024},
+	}
+	out, err := normalizeClaudeRequest(mockClaudeConfig{
+		aliases: map[string]string{
+			"claude-opus-4-6": "deepseek-v4-pro",
+		},
+	}, req)
+	if err != nil {
+		t.Fatalf("normalizeClaudeRequest error: %v", err)
+	}
+	if !out.Standard.Thinking {
+		t.Fatalf("expected explicit Claude thinking request to enable downstream thinking")
+	}
+}
+
+func TestNormalizeClaudeRequestPrefersGlobalAliasMapping(t *testing.T) {
+	req := map[string]any{
+		"model": "claude-sonnet-4-6",
+		"messages": []any{
+			map[string]any{"role": "user", "content": "hello"},
+		},
+	}
+	out, err := normalizeClaudeRequest(mockClaudeConfig{
+		aliases: map[string]string{
+			"claude-sonnet-4-6": "deepseek-v4-flash",
+		},
+	}, req)
+	if err != nil {
+		t.Fatalf("normalizeClaudeRequest error: %v", err)
+	}
+	if out.Standard.ResolvedModel != "deepseek-v4-flash" {
+		t.Fatalf("expected global alias to win for explicit model, got=%q", out.Standard.ResolvedModel)
+	}
+}
diff --git a/internal/adapter/claude/error_shape_test.go b/internal/httpapi/claude/error_shape_test.go
similarity index 100%
rename from internal/adapter/claude/error_shape_test.go
rename to internal/httpapi/claude/error_shape_test.go
diff --git a/internal/adapter/claude/handler_errors.go b/internal/httpapi/claude/handler_errors.go
similarity index 100%
rename from internal/adapter/claude/handler_errors.go
rename to internal/httpapi/claude/handler_errors.go
diff --git a/internal/adapter/claude/handler_helpers_misc.go b/internal/httpapi/claude/handler_helpers_misc.go
similarity index 100%
rename from internal/adapter/claude/handler_helpers_misc.go
rename to internal/httpapi/claude/handler_helpers_misc.go
diff --git a/internal/adapter/claude/handler_messages.go b/internal/httpapi/claude/handler_messages.go
similarity index 87%
rename from internal/adapter/claude/handler_messages.go
rename to internal/httpapi/claude/handler_messages.go
index 526d316..e424503 100644
--- a/internal/adapter/claude/handler_messages.go
+++ b/internal/httpapi/claude/handler_messages.go
@@ -44,7 +44,7 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, store C
 	model, _ := req["model"].(string)
 	stream := util.ToBool(req["stream"])
 
-	// Preserve claude_mapping (fast/slow/opus routing) while proxying via OpenAI.
+	// Use the shared global model resolver so Claude/OpenAI/Gemini stay consistent.
 	translateModel := model
 	if store != nil {
 		if norm, normErr := normalizeClaudeRequest(store, cloneMap(req)); normErr == nil && strings.TrimSpace(norm.Standard.ResolvedModel) != "" {
@@ -52,6 +52,7 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, store C
 		}
 	}
 	translatedReq := translatorcliproxy.ToOpenAI(sdktranslator.FormatClaude, translateModel, raw, stream)
+	translatedReq = applyClaudeThinkingPolicyToOpenAIRequest(translatedReq, req)
 
 	isVercelPrepare := strings.TrimSpace(r.URL.Query().Get("__stream_prepare")) == "1"
 	isVercelRelease := strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1"
@@ -123,6 +124,30 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, store C
 	return true
 }
 
+func applyClaudeThinkingPolicyToOpenAIRequest(translated []byte, original map[string]any) []byte {
+	req := map[string]any{}
+	if err := json.Unmarshal(translated, &req); err != nil {
+		return translated
+	}
+	enabled, ok := util.ResolveThinkingOverride(original)
+	if !ok {
+		if _, translatedHasOverride := util.ResolveThinkingOverride(req); translatedHasOverride {
+			return translated
+		}
+		enabled = false
+	}
+	typ := "disabled"
+	if enabled {
+		typ = "enabled"
+	}
+	req["thinking"] = map[string]any{"type": typ}
+	out, err := json.Marshal(req)
+	if err != nil {
+		return translated
+	}
+	return out
+}
+
 func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Request, resp *http.Response, model string, messages []any, thinkingEnabled, searchEnabled bool, toolNames []string) {
 	defer func() { _ = resp.Body.Close() }()
 	if resp.StatusCode != http.StatusOK {
diff --git a/internal/adapter/claude/handler_routes.go b/internal/httpapi/claude/handler_routes.go
similarity index 78%
rename from internal/adapter/claude/handler_routes.go
rename to internal/httpapi/claude/handler_routes.go
index 3683456..390b97d 100644
--- a/internal/adapter/claude/handler_routes.go
+++ b/internal/httpapi/claude/handler_routes.go
@@ -7,7 +7,7 @@ import (
 	"github.com/go-chi/chi/v5"
 
 	"ds2api/internal/config"
-	"ds2api/internal/deepseek"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 	"ds2api/internal/util"
 )
 
@@ -29,9 +29,9 @@ func (h *Handler) compatStripReferenceMarkers() bool {
 }
 
 var (
-	claudeStreamPingInterval    = time.Duration(deepseek.KeepAliveTimeout) * time.Second
-	claudeStreamIdleTimeout     = time.Duration(deepseek.StreamIdleTimeout) * time.Second
-	claudeStreamMaxKeepaliveCnt = deepseek.MaxKeepaliveCount
+	claudeStreamPingInterval    = time.Duration(dsprotocol.KeepAliveTimeout) * time.Second
+	claudeStreamIdleTimeout     = time.Duration(dsprotocol.StreamIdleTimeout) * time.Second
+	claudeStreamMaxKeepaliveCnt = dsprotocol.MaxKeepaliveCount
 )
 
 func RegisterRoutes(r chi.Router, h *Handler) {
diff --git a/internal/adapter/claude/handler_stream_test.go b/internal/httpapi/claude/handler_stream_test.go
similarity index 86%
rename from internal/adapter/claude/handler_stream_test.go
rename to internal/httpapi/claude/handler_stream_test.go
index f5f7d75..354ed89 100644
--- a/internal/adapter/claude/handler_stream_test.go
+++ b/internal/httpapi/claude/handler_stream_test.go
@@ -247,16 +247,18 @@ func asString(v any) string {
 
 func TestHandleClaudeStreamRealtimeToolSafetyAcrossStructuredFormats(t *testing.T) {
 	tests := []struct {
-		name    string
-		payload string
+		name        string
+		payload     string
+		wantToolUse bool
 	}{
-		{name: "xml_tool_call", payload: `<tool_call><tool_name>Bash</tool_name><parameters><command>pwd</command></parameters></tool_call>`},
-		{name: "xml_json_tool_call", payload: `<tool_call>{"tool":"Bash","params":{"command":"pwd"}}</tool_call>`},
-		{name: "nested_tool_tag_style", payload: `<tool_call><tool name="Bash"><command>pwd</command></tool></tool_call>`},
-		{name: "function_tag_style", payload: `<function_call>Bash</function_call><function parameter name="command">pwd</function parameter>`},
-		{name: "antml_argument_style", payload: `<antml:function_calls><antml:function_call id="1" name="Bash"><antml:argument name="command">pwd</antml:argument></antml:function_call></antml:function_calls>`},
-		{name: "antml_function_attr_parameters", payload: `<antml:function_calls><antml:function_call id="1" function="Bash"><antml:parameters>{"command":"pwd"}</antml:parameters></antml:function_call></antml:function_calls>`},
-		{name: "invoke_parameter_style", payload: `<function_calls><invoke name="Bash"><parameter name="command">pwd</parameter></invoke></function_calls>`},
+		{name: "invoke_parameter_wrapper", payload: `<tool_calls><invoke name="Bash"><parameter name="command">pwd</parameter></invoke></tool_calls>`, wantToolUse: true},
+		{name: "legacy_single_tool_root", payload: `<tool><tool_name>Bash</tool_name><param><command>pwd</command></param></tool>`, wantToolUse: false},
+		{name: "legacy_tool_call_json", payload: `<tool>{"tool":"Bash","params":{"command":"pwd"}}</tool>`, wantToolUse: false},
+		{name: "legacy_nested_tool_tag_style", payload: `<tool><tool name="Bash"><command>pwd</command></tool_call></tool>`, wantToolUse: false},
+		{name: "legacy_function_tag_style", payload: `<function_call>Bash</function_call><function parameter name="command">pwd</function parameter>`, wantToolUse: false},
+		{name: "legacy_antml_argument_style", payload: `<antml:function_calls><antml:function_call id="1" name="Bash"><antml:argument name="command">pwd</antml:argument></antml:function_call></antml:function_calls>`, wantToolUse: false},
+		{name: "legacy_antml_function_attr_parameters", payload: `<antml:function_calls><antml:function_call id="1" function="Bash"><antml:parameters>{"command":"pwd"}</antml:parameters></antml:function_call></antml:function_calls>`, wantToolUse: false},
+		{name: "legacy_function_calls_wrapper", payload: `<function_calls><invoke name="Bash"><parameter name="command">pwd</parameter></invoke></function_calls>`, wantToolUse: false},
 	}
 
 	for _, tc := range tests {
@@ -280,8 +282,8 @@ func TestHandleClaudeStreamRealtimeToolSafetyAcrossStructuredFormats(t *testing.
 					break
 				}
 			}
-			if !foundToolUse {
-				t.Fatalf("expected tool_use block for format %s, body=%s", tc.name, rec.Body.String())
+			if foundToolUse != tc.wantToolUse {
+				t.Fatalf("unexpected tool_use=%v for format %s, body=%s", foundToolUse, tc.name, rec.Body.String())
 			}
 		})
 	}
@@ -289,7 +291,7 @@ func TestHandleClaudeStreamRealtimeToolSafetyAcrossStructuredFormats(t *testing.
 
 func TestHandleClaudeStreamRealtimeDetectsToolUseWithLeadingProse(t *testing.T) {
 	h := &Handler{}
-	payload := "I'll call a tool now.\\n<tool_use><tool_name>write_file</tool_name><parameters>{\\\"path\\\":\\\"/tmp/a.txt\\\",\\\"content\\\":\\\"abc\\\"}</parameters></tool_use>"
+	payload := "I'll call a tool now.\\n<tool_calls><invoke name=\\\"write_file\\\"><parameter name=\\\"path\\\">/tmp/a.txt</parameter><parameter name=\\\"content\\\">abc</parameter></invoke></tool_calls>"
 	resp := makeClaudeSSEHTTPResponse(
 		`data: {"p":"response/content","v":"`+payload+`"}`,
 		`data: [DONE]`,
diff --git a/internal/adapter/claude/handler_tokens.go b/internal/httpapi/claude/handler_tokens.go
similarity index 100%
rename from internal/adapter/claude/handler_tokens.go
rename to internal/httpapi/claude/handler_tokens.go
diff --git a/internal/adapter/claude/handler_util_test.go b/internal/httpapi/claude/handler_util_test.go
similarity index 99%
rename from internal/adapter/claude/handler_util_test.go
rename to internal/httpapi/claude/handler_util_test.go
index 171c52a..68f68ca 100644
--- a/internal/adapter/claude/handler_util_test.go
+++ b/internal/httpapi/claude/handler_util_test.go
@@ -93,10 +93,10 @@ func TestNormalizeClaudeMessagesToolUseToAssistantToolCalls(t *testing.T) {
 		t.Fatalf("expected call id preserved, got %#v", call)
 	}
 	content, _ := m["content"].(string)
-	if !containsStr(content, "<tool_calls>") || !containsStr(content, "<tool_name>search_web</tool_name>") {
+	if !containsStr(content, "<tool_calls>") || !containsStr(content, `<invoke name="search_web">`) {
 		t.Fatalf("expected assistant content to include XML tool call history, got %q", content)
 	}
-	if !containsStr(content, "<parameters>\n      <query><![CDATA[latest]]></query>\n    </parameters>") {
+	if !containsStr(content, `<parameter name="query"><![CDATA[latest]]></parameter>`) {
 		t.Fatalf("expected assistant content to include serialized parameters, got %q", content)
 	}
 }
diff --git a/internal/adapter/claude/handler_utils.go b/internal/httpapi/claude/handler_utils.go
similarity index 100%
rename from internal/adapter/claude/handler_utils.go
rename to internal/httpapi/claude/handler_utils.go
diff --git a/internal/adapter/claude/handler_utils_sanitize.go b/internal/httpapi/claude/handler_utils_sanitize.go
similarity index 100%
rename from internal/adapter/claude/handler_utils_sanitize.go
rename to internal/httpapi/claude/handler_utils_sanitize.go
diff --git a/internal/adapter/claude/output_clean.go b/internal/httpapi/claude/output_clean.go
similarity index 100%
rename from internal/adapter/claude/output_clean.go
rename to internal/httpapi/claude/output_clean.go
diff --git a/internal/adapter/claude/proxy_vercel_test.go b/internal/httpapi/claude/proxy_vercel_test.go
similarity index 50%
rename from internal/adapter/claude/proxy_vercel_test.go
rename to internal/httpapi/claude/proxy_vercel_test.go
index 18f0f98..2eff38b 100644
--- a/internal/adapter/claude/proxy_vercel_test.go
+++ b/internal/httpapi/claude/proxy_vercel_test.go
@@ -9,12 +9,10 @@ import (
 )
 
 type claudeProxyStoreStub struct {
-	mapping map[string]string
+	aliases map[string]string
 }
 
-func (s claudeProxyStoreStub) ClaudeMapping() map[string]string {
-	return s.mapping
-}
+func (s claudeProxyStoreStub) ModelAliases() map[string]string { return s.aliases }
 
 func (claudeProxyStoreStub) CompatStripReferenceMarkers() bool { return true }
 
@@ -23,6 +21,27 @@ type openAIProxyStub struct {
 	body   string
 }
 
+func TestClaudeProxyViaOpenAIPrefersGlobalAliasMapping(t *testing.T) {
+	openAI := &openAIProxyCaptureStub{}
+	h := &Handler{
+		Store: claudeProxyStoreStub{
+			aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"},
+		},
+		OpenAI: openAI,
+	}
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", strings.NewReader(`{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hi"}],"stream":false}`))
+	rec := httptest.NewRecorder()
+
+	h.Messages(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
+	}
+	if got := strings.TrimSpace(openAI.seenModel); got != "deepseek-v4-flash" {
+		t.Fatalf("expected global alias mapped proxy model deepseek-v4-flash, got %q", got)
+	}
+}
+
 func (s openAIProxyStub) ChatCompletions(w http.ResponseWriter, _ *http.Request) {
 	if s.status == 0 {
 		s.status = http.StatusOK
@@ -68,10 +87,10 @@ func TestClaudeProxyViaOpenAIVercelPreparePassthrough(t *testing.T) {
 	}
 }
 
-func TestClaudeProxyViaOpenAIPreservesClaudeMapping(t *testing.T) {
+func TestClaudeProxyViaOpenAIUsesGlobalAliasMapping(t *testing.T) {
 	openAI := &openAIProxyCaptureStub{}
 	h := &Handler{
-		Store:  claudeProxyStoreStub{mapping: map[string]string{"fast": "deepseek-chat", "slow": "deepseek-reasoner"}},
+		Store:  claudeProxyStoreStub{aliases: map[string]string{"claude-3-opus": "deepseek-v4-pro"}},
 		OpenAI: openAI,
 	}
 	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", strings.NewReader(`{"model":"claude-3-opus","messages":[{"role":"user","content":"hi"}],"stream":false}`))
@@ -82,8 +101,68 @@ func TestClaudeProxyViaOpenAIPreservesClaudeMapping(t *testing.T) {
 	if rec.Code != http.StatusOK {
 		t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
 	}
-	if got := strings.TrimSpace(openAI.seenModel); got != "deepseek-reasoner" {
-		t.Fatalf("expected mapped proxy model deepseek-reasoner, got %q", got)
+	if got := strings.TrimSpace(openAI.seenModel); got != "deepseek-v4-pro" {
+		t.Fatalf("expected mapped proxy model deepseek-v4-pro, got %q", got)
+	}
+}
+
+func TestClaudeProxyViaOpenAIPreservesThinkingOverride(t *testing.T) {
+	openAI := &openAIProxyCaptureStub{}
+	h := &Handler{
+		Store:  claudeProxyStoreStub{aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}},
+		OpenAI: openAI,
+	}
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", strings.NewReader(`{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"disabled"},"stream":false}`))
+	rec := httptest.NewRecorder()
+
+	h.Messages(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
+	}
+	thinking, _ := openAI.seenReq["thinking"].(map[string]any)
+	if thinking["type"] != "disabled" {
+		t.Fatalf("expected translated OpenAI request to preserve disabled thinking, got %#v", openAI.seenReq)
+	}
+}
+
+func TestClaudeProxyViaOpenAIDisablesThinkingByDefault(t *testing.T) {
+	openAI := &openAIProxyCaptureStub{}
+	h := &Handler{
+		Store:  claudeProxyStoreStub{aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}},
+		OpenAI: openAI,
+	}
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", strings.NewReader(`{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hi"}],"stream":false}`))
+	rec := httptest.NewRecorder()
+
+	h.Messages(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
+	}
+	thinking, _ := openAI.seenReq["thinking"].(map[string]any)
+	if thinking["type"] != "disabled" {
+		t.Fatalf("expected Claude default to disable downstream thinking, got %#v", openAI.seenReq)
+	}
+}
+
+func TestClaudeProxyViaOpenAIEnablesThinkingWhenRequested(t *testing.T) {
+	openAI := &openAIProxyCaptureStub{}
+	h := &Handler{
+		Store:  claudeProxyStoreStub{aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}},
+		OpenAI: openAI,
+	}
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", strings.NewReader(`{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":1024},"stream":false}`))
+	rec := httptest.NewRecorder()
+
+	h.Messages(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
+	}
+	thinking, _ := openAI.seenReq["thinking"].(map[string]any)
+	if thinking["type"] != "enabled" {
+		t.Fatalf("expected Claude explicit thinking to enable downstream thinking, got %#v", openAI.seenReq)
 	}
 }
 
diff --git a/internal/adapter/claude/route_alias_test.go b/internal/httpapi/claude/route_alias_test.go
similarity index 100%
rename from internal/adapter/claude/route_alias_test.go
rename to internal/httpapi/claude/route_alias_test.go
diff --git a/internal/adapter/claude/standard_request.go b/internal/httpapi/claude/standard_request.go
similarity index 89%
rename from internal/adapter/claude/standard_request.go
rename to internal/httpapi/claude/standard_request.go
index d73ffda..26c6fda 100644
--- a/internal/adapter/claude/standard_request.go
+++ b/internal/httpapi/claude/standard_request.go
@@ -5,12 +5,13 @@ import (
 	"strings"
 
 	"ds2api/internal/config"
-	"ds2api/internal/deepseek"
+	"ds2api/internal/prompt"
+	"ds2api/internal/promptcompat"
 	"ds2api/internal/util"
 )
 
 type claudeNormalizedRequest struct {
-	Standard           util.StandardRequest
+	Standard           promptcompat.StandardRequest
 	NormalizedMessages []any
 }
 
@@ -31,19 +32,19 @@ func normalizeClaudeRequest(store ConfigReader, req map[string]any) (claudeNorma
 
 	dsPayload := convertClaudeToDeepSeek(payload, store)
 	dsModel, _ := dsPayload["model"].(string)
-	thinkingEnabled, searchEnabled, ok := config.GetModelConfig(dsModel)
+	_, searchEnabled, ok := config.GetModelConfig(dsModel)
 	if !ok {
-		thinkingEnabled = false
 		searchEnabled = false
 	}
-	finalPrompt := deepseek.MessagesPrepareWithThinking(toMessageMaps(dsPayload["messages"]), thinkingEnabled)
+	thinkingEnabled := util.ResolveThinkingEnabled(req, false)
+	finalPrompt := prompt.MessagesPrepareWithThinking(toMessageMaps(dsPayload["messages"]), thinkingEnabled)
 	toolNames := extractClaudeToolNames(toolsRequested)
 	if len(toolNames) == 0 && len(toolsRequested) > 0 {
 		toolNames = []string{"__any_tool__"}
 	}
 
 	return claudeNormalizedRequest{
-		Standard: util.StandardRequest{
+		Standard: promptcompat.StandardRequest{
 			Surface:        "anthropic_messages",
 			RequestedModel: strings.TrimSpace(model),
 			ResolvedModel:  dsModel,
diff --git a/internal/adapter/claude/standard_request_test.go b/internal/httpapi/claude/standard_request_test.go
similarity index 100%
rename from internal/adapter/claude/standard_request_test.go
rename to internal/httpapi/claude/standard_request_test.go
diff --git a/internal/adapter/claude/stream_runtime_core.go b/internal/httpapi/claude/stream_runtime_core.go
similarity index 100%
rename from internal/adapter/claude/stream_runtime_core.go
rename to internal/httpapi/claude/stream_runtime_core.go
diff --git a/internal/adapter/claude/stream_runtime_emit.go b/internal/httpapi/claude/stream_runtime_emit.go
similarity index 100%
rename from internal/adapter/claude/stream_runtime_emit.go
rename to internal/httpapi/claude/stream_runtime_emit.go
diff --git a/internal/adapter/claude/stream_runtime_finalize.go b/internal/httpapi/claude/stream_runtime_finalize.go
similarity index 100%
rename from internal/adapter/claude/stream_runtime_finalize.go
rename to internal/httpapi/claude/stream_runtime_finalize.go
diff --git a/internal/adapter/claude/stream_status_test.go b/internal/httpapi/claude/stream_status_test.go
similarity index 92%
rename from internal/adapter/claude/stream_status_test.go
rename to internal/httpapi/claude/stream_status_test.go
index 7577792..2a2586f 100644
--- a/internal/adapter/claude/stream_status_test.go
+++ b/internal/httpapi/claude/stream_status_test.go
@@ -21,12 +21,7 @@ func (streamStatusClaudeOpenAIStub) ChatCompletions(w http.ResponseWriter, _ *ht
 
 type streamStatusClaudeStoreStub struct{}
 
-func (streamStatusClaudeStoreStub) ClaudeMapping() map[string]string {
-	return map[string]string{
-		"fast": "deepseek-chat",
-		"slow": "deepseek-reasoner",
-	}
-}
+func (streamStatusClaudeStoreStub) ModelAliases() map[string]string { return nil }
 
 func (streamStatusClaudeStoreStub) CompatStripReferenceMarkers() bool { return true }
 
diff --git a/internal/adapter/claude/tool_call_state.go b/internal/httpapi/claude/tool_call_state.go
similarity index 100%
rename from internal/adapter/claude/tool_call_state.go
rename to internal/httpapi/claude/tool_call_state.go
diff --git a/internal/adapter/gemini/convert_messages.go b/internal/httpapi/gemini/convert_messages.go
similarity index 100%
rename from internal/adapter/gemini/convert_messages.go
rename to internal/httpapi/gemini/convert_messages.go
diff --git a/internal/adapter/gemini/convert_messages_test.go b/internal/httpapi/gemini/convert_messages_test.go
similarity index 100%
rename from internal/adapter/gemini/convert_messages_test.go
rename to internal/httpapi/gemini/convert_messages_test.go
diff --git a/internal/adapter/gemini/convert_passthrough.go b/internal/httpapi/gemini/convert_passthrough.go
similarity index 100%
rename from internal/adapter/gemini/convert_passthrough.go
rename to internal/httpapi/gemini/convert_passthrough.go
diff --git a/internal/adapter/gemini/convert_request.go b/internal/httpapi/gemini/convert_request.go
similarity index 56%
rename from internal/adapter/gemini/convert_request.go
rename to internal/httpapi/gemini/convert_request.go
index 5a9ff95..1d32105 100644
--- a/internal/adapter/gemini/convert_request.go
+++ b/internal/httpapi/gemini/convert_request.go
@@ -4,34 +4,35 @@ import (
 	"fmt"
 	"strings"
 
-	"ds2api/internal/adapter/openai"
 	"ds2api/internal/config"
+	"ds2api/internal/promptcompat"
 	"ds2api/internal/util"
 )
 
 //nolint:unused // kept for native Gemini adapter route compatibility.
-func normalizeGeminiRequest(store ConfigReader, routeModel string, req map[string]any, stream bool) (util.StandardRequest, error) {
+func normalizeGeminiRequest(store ConfigReader, routeModel string, req map[string]any, stream bool) (promptcompat.StandardRequest, error) {
 	requestedModel := strings.TrimSpace(routeModel)
 	if requestedModel == "" {
-		return util.StandardRequest{}, fmt.Errorf("model is required in request path")
+		return promptcompat.StandardRequest{}, fmt.Errorf("model is required in request path")
 	}
 
 	resolvedModel, ok := config.ResolveModel(store, requestedModel)
 	if !ok {
-		return util.StandardRequest{}, fmt.Errorf("model %q is not available", requestedModel)
+		return promptcompat.StandardRequest{}, fmt.Errorf("model %q is not available", requestedModel)
 	}
-	thinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
+	defaultThinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
+	thinkingEnabled := util.ResolveThinkingEnabled(req, defaultThinkingEnabled)
 
 	messagesRaw := geminiMessagesFromRequest(req)
 	if len(messagesRaw) == 0 {
-		return util.StandardRequest{}, fmt.Errorf("request must include non-empty contents")
+		return promptcompat.StandardRequest{}, fmt.Errorf("request must include non-empty contents")
 	}
 
 	toolsRaw := convertGeminiTools(req["tools"])
-	finalPrompt, toolNames := openai.BuildPromptForAdapter(messagesRaw, toolsRaw, "", thinkingEnabled)
+	finalPrompt, toolNames := promptcompat.BuildOpenAIPromptForAdapter(messagesRaw, toolsRaw, "", thinkingEnabled)
 	passThrough := collectGeminiPassThrough(req)
 
-	return util.StandardRequest{
+	return promptcompat.StandardRequest{
 		Surface:        "google_gemini",
 		RequestedModel: requestedModel,
 		ResolvedModel:  resolvedModel,
diff --git a/internal/adapter/gemini/convert_tools.go b/internal/httpapi/gemini/convert_tools.go
similarity index 100%
rename from internal/adapter/gemini/convert_tools.go
rename to internal/httpapi/gemini/convert_tools.go
diff --git a/internal/adapter/gemini/deps.go b/internal/httpapi/gemini/deps.go
similarity index 90%
rename from internal/adapter/gemini/deps.go
rename to internal/httpapi/gemini/deps.go
index 9a9e658..326d56c 100644
--- a/internal/adapter/gemini/deps.go
+++ b/internal/httpapi/gemini/deps.go
@@ -6,7 +6,7 @@ import (
 
 	"ds2api/internal/auth"
 	"ds2api/internal/config"
-	"ds2api/internal/deepseek"
+	dsclient "ds2api/internal/deepseek/client"
 )
 
 type AuthResolver interface {
@@ -30,5 +30,5 @@ type OpenAIChatRunner interface {
 }
 
 var _ AuthResolver = (*auth.Resolver)(nil)
-var _ DeepSeekCaller = (*deepseek.Client)(nil)
+var _ DeepSeekCaller = (*dsclient.Client)(nil)
 var _ ConfigReader = (*config.Store)(nil)
diff --git a/internal/adapter/gemini/handler_errors.go b/internal/httpapi/gemini/handler_errors.go
similarity index 100%
rename from internal/adapter/gemini/handler_errors.go
rename to internal/httpapi/gemini/handler_errors.go
diff --git a/internal/adapter/gemini/handler_generate.go b/internal/httpapi/gemini/handler_generate.go
similarity index 79%
rename from internal/adapter/gemini/handler_generate.go
rename to internal/httpapi/gemini/handler_generate.go
index ea70c0f..c6a08eb 100644
--- a/internal/adapter/gemini/handler_generate.go
+++ b/internal/httpapi/gemini/handler_generate.go
@@ -36,6 +36,11 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, stream
 		return true
 	}
 	routeModel := strings.TrimSpace(chi.URLParam(r, "model"))
+	var req map[string]any
+	if err := json.Unmarshal(raw, &req); err != nil {
+		writeGeminiError(w, http.StatusBadRequest, "invalid json")
+		return true
+	}
 	translatedReq := translatorcliproxy.ToOpenAI(sdktranslator.FormatGemini, routeModel, raw, stream)
 	if !strings.Contains(string(translatedReq), `"stream"`) {
 		var reqMap map[string]any
@@ -46,6 +51,7 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, stream
 			}
 		}
 	}
+	translatedReq = applyGeminiThinkingPolicyToOpenAIRequest(translatedReq, req)
 
 	isVercelPrepare := strings.TrimSpace(r.URL.Query().Get("__stream_prepare")) == "1"
 	isVercelRelease := strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1"
@@ -116,6 +122,72 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, stream
 	return true
 }
 
+func applyGeminiThinkingPolicyToOpenAIRequest(translated []byte, original map[string]any) []byte {
+	req := map[string]any{}
+	if err := json.Unmarshal(translated, &req); err != nil {
+		return translated
+	}
+	enabled, ok := resolveGeminiThinkingOverride(original)
+	if !ok {
+		return translated
+	}
+	typ := "disabled"
+	if enabled {
+		typ = "enabled"
+	}
+	req["thinking"] = map[string]any{"type": typ}
+	out, err := json.Marshal(req)
+	if err != nil {
+		return translated
+	}
+	return out
+}
+
+func resolveGeminiThinkingOverride(req map[string]any) (bool, bool) {
+	generationConfig, ok := req["generationConfig"].(map[string]any)
+	if !ok {
+		generationConfig, ok = req["generation_config"].(map[string]any)
+	}
+	if !ok {
+		return false, false
+	}
+	thinkingConfig, ok := generationConfig["thinkingConfig"].(map[string]any)
+	if !ok {
+		thinkingConfig, ok = generationConfig["thinking_config"].(map[string]any)
+	}
+	if !ok {
+		return false, false
+	}
+	budget, ok := numericAny(thinkingConfig["thinkingBudget"])
+	if !ok {
+		budget, ok = numericAny(thinkingConfig["thinking_budget"])
+	}
+	if !ok {
+		return false, false
+	}
+	return budget > 0, true
+}
+
+func numericAny(raw any) (float64, bool) {
+	switch v := raw.(type) {
+	case float64:
+		return v, true
+	case float32:
+		return float64(v), true
+	case int:
+		return float64(v), true
+	case int64:
+		return float64(v), true
+	case int32:
+		return float64(v), true
+	case json.Number:
+		f, err := v.Float64()
+		return f, err == nil
+	default:
+		return 0, false
+	}
+}
+
 func writeGeminiErrorFromOpenAI(w http.ResponseWriter, status int, raw []byte) {
 	message := strings.TrimSpace(string(raw))
 	var parsed map[string]any
diff --git a/internal/adapter/gemini/handler_routes.go b/internal/httpapi/gemini/handler_routes.go
similarity index 100%
rename from internal/adapter/gemini/handler_routes.go
rename to internal/httpapi/gemini/handler_routes.go
diff --git a/internal/adapter/gemini/handler_stream_runtime.go b/internal/httpapi/gemini/handler_stream_runtime.go
similarity index 95%
rename from internal/adapter/gemini/handler_stream_runtime.go
rename to internal/httpapi/gemini/handler_stream_runtime.go
index 5c7d1ee..13729fb 100644
--- a/internal/adapter/gemini/handler_stream_runtime.go
+++ b/internal/httpapi/gemini/handler_stream_runtime.go
@@ -7,7 +7,7 @@ import (
 	"strings"
 	"time"
 
-	"ds2api/internal/deepseek"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 	"ds2api/internal/sse"
 	streamengine "ds2api/internal/stream"
 )
@@ -39,9 +39,9 @@ func (h *Handler) handleStreamGenerateContent(w http.ResponseWriter, r *http.Req
 		Body:                resp.Body,
 		ThinkingEnabled:     thinkingEnabled,
 		InitialType:         initialType,
-		KeepAliveInterval:   time.Duration(deepseek.KeepAliveTimeout) * time.Second,
-		IdleTimeout:         time.Duration(deepseek.StreamIdleTimeout) * time.Second,
-		MaxKeepAliveNoInput: deepseek.MaxKeepaliveCount,
+		KeepAliveInterval:   time.Duration(dsprotocol.KeepAliveTimeout) * time.Second,
+		IdleTimeout:         time.Duration(dsprotocol.StreamIdleTimeout) * time.Second,
+		MaxKeepAliveNoInput: dsprotocol.MaxKeepaliveCount,
 	}, streamengine.ConsumeHooks{
 		OnParsed: runtime.onParsed,
 		OnFinalize: func(_ streamengine.StopReason, _ error) {
diff --git a/internal/adapter/gemini/handler_test.go b/internal/httpapi/gemini/handler_test.go
similarity index 87%
rename from internal/adapter/gemini/handler_test.go
rename to internal/httpapi/gemini/handler_test.go
index 94a1a4e..01a36a4 100644
--- a/internal/adapter/gemini/handler_test.go
+++ b/internal/httpapi/gemini/handler_test.go
@@ -290,6 +290,46 @@ func TestGeminiProxyTranslatesInlineImageToOpenAIDataURL(t *testing.T) {
 	}
 }
 
+func TestGeminiProxyViaOpenAIDisablesThinkingBudgetZero(t *testing.T) {
+	openAI := &geminiOpenAISuccessStub{}
+	h := &Handler{Store: testGeminiConfig{}, OpenAI: openAI}
+	r := chi.NewRouter()
+	RegisterRoutes(r, h)
+
+	body := `{"contents":[{"role":"user","parts":[{"text":"hello"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":0}}}`
+	req := httptest.NewRequest(http.MethodPost, "/v1beta/models/gemini-2.5-flash:generateContent", strings.NewReader(body))
+	rec := httptest.NewRecorder()
+	r.ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	thinking, _ := openAI.seenReq["thinking"].(map[string]any)
+	if thinking["type"] != "disabled" {
+		t.Fatalf("expected Gemini thinkingBudget=0 to disable OpenAI thinking, got %#v", openAI.seenReq)
+	}
+}
+
+func TestGeminiProxyViaOpenAIEnablesPositiveThinkingBudget(t *testing.T) {
+	openAI := &geminiOpenAISuccessStub{}
+	h := &Handler{Store: testGeminiConfig{}, OpenAI: openAI}
+	r := chi.NewRouter()
+	RegisterRoutes(r, h)
+
+	body := `{"contents":[{"role":"user","parts":[{"text":"hello"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":1024}}}`
+	req := httptest.NewRequest(http.MethodPost, "/v1beta/models/gemini-2.5-flash:generateContent", strings.NewReader(body))
+	rec := httptest.NewRecorder()
+	r.ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	thinking, _ := openAI.seenReq["thinking"].(map[string]any)
+	if thinking["type"] != "enabled" {
+		t.Fatalf("expected Gemini positive thinkingBudget to enable OpenAI thinking, got %#v", openAI.seenReq)
+	}
+}
+
 func TestGenerateContentOpenAIProxyErrorUsesGeminiEnvelope(t *testing.T) {
 	h := &Handler{
 		Store: testGeminiConfig{},
diff --git a/internal/adapter/gemini/output_clean.go b/internal/httpapi/gemini/output_clean.go
similarity index 100%
rename from internal/adapter/gemini/output_clean.go
rename to internal/httpapi/gemini/output_clean.go
diff --git a/internal/adapter/gemini/proxy_vercel_test.go b/internal/httpapi/gemini/proxy_vercel_test.go
similarity index 100%
rename from internal/adapter/gemini/proxy_vercel_test.go
rename to internal/httpapi/gemini/proxy_vercel_test.go
diff --git a/internal/adapter/openai/chat_history.go b/internal/httpapi/openai/chat/chat_history.go
similarity index 98%
rename from internal/adapter/openai/chat_history.go
rename to internal/httpapi/openai/chat/chat_history.go
index 41b4c54..fb274fc 100644
--- a/internal/adapter/openai/chat_history.go
+++ b/internal/httpapi/openai/chat/chat_history.go
@@ -1,4 +1,4 @@
-package openai
+package chat
 
 import (
 	"errors"
@@ -11,7 +11,7 @@ import (
 	"ds2api/internal/config"
 	openaifmt "ds2api/internal/format/openai"
 	"ds2api/internal/prompt"
-	"ds2api/internal/util"
+	"ds2api/internal/promptcompat"
 )
 
 const adminWebUISourceHeader = "X-Ds2-Source"
@@ -27,7 +27,7 @@ type chatHistorySession struct {
 	disabled    bool
 }
 
-func startChatHistory(store *chathistory.Store, r *http.Request, a *auth.RequestAuth, stdReq util.StandardRequest) *chatHistorySession {
+func startChatHistory(store *chathistory.Store, r *http.Request, a *auth.RequestAuth, stdReq promptcompat.StandardRequest) *chatHistorySession {
 	if store == nil || r == nil || a == nil {
 		return nil
 	}
diff --git a/internal/adapter/openai/chat_history_test.go b/internal/httpapi/openai/chat/chat_history_test.go
similarity index 90%
rename from internal/adapter/openai/chat_history_test.go
rename to internal/httpapi/openai/chat/chat_history_test.go
index 7787e98..66dfc59 100644
--- a/internal/adapter/openai/chat_history_test.go
+++ b/internal/httpapi/openai/chat/chat_history_test.go
@@ -1,4 +1,4 @@
-package openai
+package chat
 
 import (
 	"context"
@@ -13,7 +13,7 @@ import (
 
 	"ds2api/internal/auth"
 	"ds2api/internal/chathistory"
-	"ds2api/internal/util"
+	"ds2api/internal/promptcompat"
 )
 
 func newTestChatHistoryStore(t *testing.T) *chathistory.Store {
@@ -63,7 +63,7 @@ func TestChatCompletionsNonStreamPersistsHistory(t *testing.T) {
 		ChatHistory: historyStore,
 	}
 
-	reqBody := `{"model":"deepseek-chat","messages":[{"role":"system","content":"be precise"},{"role":"user","content":"hi there"},{"role":"assistant","content":"previous answer"}],"stream":false}`
+	reqBody := `{"model":"deepseek-v4-flash","messages":[{"role":"system","content":"be precise"},{"role":"user","content":"hi there"},{"role":"assistant","content":"previous answer"}],"stream":false}`
 	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
 	req.Header.Set("Authorization", "Bearer direct-token")
 	req.Header.Set("Content-Type", "application/json")
@@ -114,8 +114,8 @@ func TestStartChatHistoryRecoversFromTransientWriteFailure(t *testing.T) {
 		CallerID:  "caller:test",
 		AccountID: "acct:test",
 	}
-	stdReq := util.StandardRequest{
-		ResponseModel: "deepseek-chat",
+	stdReq := promptcompat.StandardRequest{
+		ResponseModel: "deepseek-v4-flash",
 		Stream:        true,
 		Messages: []any{
 			map[string]any{"role": "user", "content": "hello"},
@@ -172,7 +172,7 @@ func TestHandleStreamContextCancelledMarksHistoryStopped(t *testing.T) {
 	historyStore := newTestChatHistoryStore(t)
 	entry, err := historyStore.Start(chathistory.StartParams{
 		CallerID:  "caller:test",
-		Model:     "deepseek-chat",
+		Model:     "deepseek-v4-flash",
 		Stream:    true,
 		UserInput: "hello",
 	})
@@ -194,7 +194,7 @@ func TestHandleStreamContextCancelledMarksHistoryStopped(t *testing.T) {
 	rec := httptest.NewRecorder()
 	resp := makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"hello"}`, `data: [DONE]`)
 
-	h.handleStream(rec, req, resp, "cid-stop", "deepseek-chat", "prompt", false, false, nil, session)
+	h.handleStream(rec, req, resp, "cid-stop", "deepseek-v4-flash", "prompt", false, false, nil, session)
 
 	snapshot, err := historyStore.Snapshot()
 	if err != nil {
@@ -221,7 +221,7 @@ func TestChatCompletionsSkipsAdminWebUISource(t *testing.T) {
 		ChatHistory: historyStore,
 	}
 
-	reqBody := `{"model":"deepseek-chat","messages":[{"role":"user","content":"hi there"}],"stream":false}`
+	reqBody := `{"model":"deepseek-v4-flash","messages":[{"role":"user","content":"hi there"}],"stream":false}`
 	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
 	req.Header.Set("Authorization", "Bearer direct-token")
 	req.Header.Set("Content-Type", "application/json")
@@ -253,7 +253,7 @@ func TestChatCompletionsSkipsHistoryWhenDisabled(t *testing.T) {
 		ChatHistory: historyStore,
 	}
 
-	reqBody := `{"model":"deepseek-chat","messages":[{"role":"user","content":"hi there"}],"stream":false}`
+	reqBody := `{"model":"deepseek-v4-flash","messages":[{"role":"user","content":"hi there"}],"stream":false}`
 	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
 	req.Header.Set("Authorization", "Bearer direct-token")
 	req.Header.Set("Content-Type", "application/json")
@@ -286,7 +286,7 @@ func TestChatCompletionsHistorySplitPersistsHistoryText(t *testing.T) {
 		ChatHistory: historyStore,
 	}
 
-	reqBody := `{"model":"deepseek-chat","messages":[{"role":"system","content":"system instructions"},{"role":"user","content":"first user turn"},{"role":"assistant","content":"","reasoning_content":"hidden reasoning","tool_calls":[{"name":"search","arguments":{"query":"docs"}}]},{"role":"tool","name":"search","tool_call_id":"call-1","content":"tool result"},{"role":"user","content":"latest user turn"}],"stream":false}`
+	reqBody := `{"model":"deepseek-v4-flash","messages":[{"role":"system","content":"system instructions"},{"role":"user","content":"first user turn"},{"role":"assistant","content":"","reasoning_content":"hidden reasoning","tool_calls":[{"name":"search","arguments":{"query":"docs"}}]},{"role":"tool","name":"search","tool_call_id":"call-1","content":"tool result"},{"role":"user","content":"latest user turn"}],"stream":false}`
 	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
 	req.Header.Set("Authorization", "Bearer direct-token")
 	req.Header.Set("Content-Type", "application/json")
diff --git a/internal/adapter/openai/chat_stream_runtime.go b/internal/httpapi/openai/chat/chat_stream_runtime.go
similarity index 93%
rename from internal/adapter/openai/chat_stream_runtime.go
rename to internal/httpapi/openai/chat/chat_stream_runtime.go
index 1d7fff6..0f65fd0 100644
--- a/internal/adapter/openai/chat_stream_runtime.go
+++ b/internal/httpapi/openai/chat/chat_stream_runtime.go
@@ -1,4 +1,4 @@
-package openai
+package chat
 
 import (
 	"ds2api/internal/toolcall"
@@ -9,6 +9,7 @@ import (
 	openaifmt "ds2api/internal/format/openai"
 	"ds2api/internal/sse"
 	streamengine "ds2api/internal/stream"
+	"ds2api/internal/toolstream"
 )
 
 type chatStreamRuntime struct {
@@ -32,7 +33,7 @@ type chatStreamRuntime struct {
 	toolCallsEmitted     bool
 	toolCallsDoneEmitted bool
 
-	toolSieve         toolStreamSieveState
+	toolSieve         toolstream.State
 	streamToolCallIDs map[int]string
 	streamToolNames   map[int]string
 	thinking          strings.Builder
@@ -152,7 +153,7 @@ func (s *chatStreamRuntime) finalize(finishReason string) {
 		s.toolCallsEmitted = true
 		s.toolCallsDoneEmitted = true
 	} else if s.bufferToolContent {
-		for _, evt := range flushToolSieve(&s.toolSieve, s.toolNames) {
+		for _, evt := range toolstream.Flush(&s.toolSieve, s.toolNames) {
 			if len(evt.ToolCalls) > 0 {
 				finishReason = "tool_calls"
 				s.toolCallsEmitted = true
@@ -201,17 +202,7 @@ func (s *chatStreamRuntime) finalize(finishReason string) {
 		finishReason = "tool_calls"
 	}
 	if len(detected.Calls) == 0 && !s.toolCallsEmitted && strings.TrimSpace(finalText) == "" {
-		status := http.StatusTooManyRequests
-		message := "Upstream model returned empty output."
-		code := "upstream_empty_output"
-		if strings.TrimSpace(finalThinking) != "" {
-			message = "Upstream model returned reasoning without visible output."
-		}
-		if finishReason == "content_filter" {
-			status = http.StatusBadRequest
-			message = "Upstream content filtered the response and returned no output."
-			code = "content_filter"
-		}
+		status, message, code := upstreamEmptyOutputDetail(finishReason == "content_filter", finalText, finalThinking)
 		s.sendFailedChunk(status, message, code)
 		return
 	}
@@ -279,7 +270,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
 			if !s.bufferToolContent {
 				delta["content"] = trimmed
 			} else {
-				events := processToolSieveChunk(&s.toolSieve, trimmed, s.toolNames)
+				events := toolstream.ProcessChunk(&s.toolSieve, trimmed, s.toolNames)
 				for _, evt := range events {
 					if len(evt.ToolCallDeltas) > 0 {
 						if !s.emitEarlyToolDeltas {
diff --git a/internal/httpapi/openai/chat/handler.go b/internal/httpapi/openai/chat/handler.go
new file mode 100644
index 0000000..81d1d22
--- /dev/null
+++ b/internal/httpapi/openai/chat/handler.go
@@ -0,0 +1,127 @@
+package chat
+
+import (
+	"context"
+	"net/http"
+	"sync"
+	"time"
+
+	"ds2api/internal/auth"
+	"ds2api/internal/chathistory"
+	"ds2api/internal/httpapi/openai/files"
+	"ds2api/internal/httpapi/openai/history"
+	"ds2api/internal/httpapi/openai/shared"
+	"ds2api/internal/promptcompat"
+	"ds2api/internal/toolcall"
+	"ds2api/internal/toolstream"
+)
+
+const openAIGeneralMaxSize = shared.GeneralMaxSize
+
+var writeJSON = shared.WriteJSON
+
+type Handler struct {
+	Store       shared.ConfigReader
+	Auth        shared.AuthResolver
+	DS          shared.DeepSeekCaller
+	ChatHistory *chathistory.Store
+
+	leaseMu      sync.Mutex
+	streamLeases map[string]streamLease
+}
+
+type streamLease struct {
+	Auth      *auth.RequestAuth
+	ExpiresAt time.Time
+}
+
+func (h *Handler) compatStripReferenceMarkers() bool {
+	if h == nil {
+		return true
+	}
+	return shared.CompatStripReferenceMarkers(h.Store)
+}
+
+func (h *Handler) applyHistorySplit(ctx context.Context, a *auth.RequestAuth, stdReq promptcompat.StandardRequest) (promptcompat.StandardRequest, error) {
+	if h == nil {
+		return stdReq, nil
+	}
+	return history.Service{Store: h.Store, DS: h.DS}.Apply(ctx, a, stdReq)
+}
+
+func (h *Handler) preprocessInlineFileInputs(ctx context.Context, a *auth.RequestAuth, req map[string]any) error {
+	if h == nil {
+		return nil
+	}
+	return (&files.Handler{Store: h.Store, Auth: h.Auth, DS: h.DS, ChatHistory: h.ChatHistory}).PreprocessInlineFileInputs(ctx, a, req)
+}
+
+func (h *Handler) toolcallFeatureMatchEnabled() bool {
+	if h == nil {
+		return shared.ToolcallFeatureMatchEnabled(nil)
+	}
+	return shared.ToolcallFeatureMatchEnabled(h.Store)
+}
+
+func (h *Handler) toolcallEarlyEmitHighConfidence() bool {
+	if h == nil {
+		return shared.ToolcallEarlyEmitHighConfidence(nil)
+	}
+	return shared.ToolcallEarlyEmitHighConfidence(h.Store)
+}
+
+func writeOpenAIError(w http.ResponseWriter, status int, message string) {
+	shared.WriteOpenAIError(w, status, message)
+}
+
+func openAIErrorType(status int) string {
+	return shared.OpenAIErrorType(status)
+}
+
+func writeOpenAIInlineFileError(w http.ResponseWriter, err error) {
+	files.WriteInlineFileError(w, err)
+}
+
+func mapHistorySplitError(err error) (int, string) {
+	return history.MapError(err)
+}
+
+func requestTraceID(r *http.Request) string {
+	return shared.RequestTraceID(r)
+}
+
+func asString(v any) string {
+	return shared.AsString(v)
+}
+
+func cleanVisibleOutput(text string, stripReferenceMarkers bool) string {
+	return shared.CleanVisibleOutput(text, stripReferenceMarkers)
+}
+
+func replaceCitationMarkersWithLinks(text string, links map[int]string) string {
+	return shared.ReplaceCitationMarkersWithLinks(text, links)
+}
+
+func shouldWriteUpstreamEmptyOutputError(text string) bool {
+	return shared.ShouldWriteUpstreamEmptyOutputError(text)
+}
+
+func upstreamEmptyOutputDetail(contentFilter bool, text, thinking string) (int, string, string) {
+	return shared.UpstreamEmptyOutputDetail(contentFilter, text, thinking)
+}
+
+func writeUpstreamEmptyOutputError(w http.ResponseWriter, text, thinking string, contentFilter bool) bool {
+	return shared.WriteUpstreamEmptyOutputError(w, text, thinking, contentFilter)
+}
+
+func formatIncrementalStreamToolCallDeltas(deltas []toolstream.ToolCallDelta, ids map[int]string) []map[string]any {
+	return shared.FormatIncrementalStreamToolCallDeltas(deltas, ids)
+}
+
+func filterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta, seenNames map[int]string) []toolstream.ToolCallDelta {
+	return shared.FilterIncrementalToolCallDeltasByAllowed(deltas, seenNames)
+}
+
+func formatFinalStreamToolCallsWithStableIDs(calls []toolcall.ParsedToolCall, ids map[int]string) []map[string]any {
+	return shared.FormatFinalStreamToolCallsWithStableIDs(calls, ids)
+}
diff --git a/internal/adapter/openai/handler_chat.go b/internal/httpapi/openai/chat/handler_chat.go
similarity index 94%
rename from internal/adapter/openai/handler_chat.go
rename to internal/httpapi/openai/chat/handler_chat.go
index b7d76ba..4a6d01a 100644
--- a/internal/adapter/openai/handler_chat.go
+++ b/internal/httpapi/openai/chat/handler_chat.go
@@ -1,4 +1,4 @@
-package openai
+package chat
 
 import (
 	"context"
@@ -10,8 +10,9 @@ import (
 
 	"ds2api/internal/auth"
 	"ds2api/internal/config"
-	"ds2api/internal/deepseek"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 	openaifmt "ds2api/internal/format/openai"
+	"ds2api/internal/promptcompat"
 	"ds2api/internal/sse"
 	streamengine "ds2api/internal/stream"
 )
@@ -58,14 +59,15 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
 		writeOpenAIInlineFileError(w, err)
 		return
 	}
-	stdReq, err := normalizeOpenAIChatRequest(h.Store, req, requestTraceID(r))
+	stdReq, err := promptcompat.NormalizeOpenAIChatRequest(h.Store, req, requestTraceID(r))
 	if err != nil {
 		writeOpenAIError(w, http.StatusBadRequest, err.Error())
 		return
 	}
 	stdReq, err = h.applyHistorySplit(r.Context(), a, stdReq)
 	if err != nil {
-		writeOpenAIError(w, http.StatusInternalServerError, err.Error())
+		status, message := mapHistorySplitError(err)
+		writeOpenAIError(w, status, message)
 		return
 	}
 	historySession := startChatHistory(h.ChatHistory, r, a, stdReq)
@@ -165,7 +167,7 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, resp *http.Response, co
 		if historySession != nil {
 			historySession.error(status, message, code, finalThinking, finalText)
 		}
-		writeUpstreamEmptyOutputError(w, finalText, result.ContentFilter)
+		writeUpstreamEmptyOutputError(w, finalText, finalThinking, result.ContentFilter)
 		return
 	}
 	respBody := openaifmt.BuildChatCompletion(completionID, model, finalPrompt, finalThinking, finalText, toolNames)
@@ -231,9 +233,9 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
 		Body:                resp.Body,
 		ThinkingEnabled:     thinkingEnabled,
 		InitialType:         initialType,
-		KeepAliveInterval:   time.Duration(deepseek.KeepAliveTimeout) * time.Second,
-		IdleTimeout:         time.Duration(deepseek.StreamIdleTimeout) * time.Second,
-		MaxKeepAliveNoInput: deepseek.MaxKeepaliveCount,
+		KeepAliveInterval:   time.Duration(dsprotocol.KeepAliveTimeout) * time.Second,
+		IdleTimeout:         time.Duration(dsprotocol.StreamIdleTimeout) * time.Second,
+		MaxKeepAliveNoInput: dsprotocol.MaxKeepaliveCount,
 	}, streamengine.ConsumeHooks{
 		OnKeepAlive: func() {
 			streamRuntime.sendKeepAlive()
diff --git a/internal/adapter/openai/handler_chat_auto_delete_test.go b/internal/httpapi/openai/chat/handler_chat_auto_delete_test.go
similarity index 84%
rename from internal/adapter/openai/handler_chat_auto_delete_test.go
rename to internal/httpapi/openai/chat/handler_chat_auto_delete_test.go
index 5a5577a..15645aa 100644
--- a/internal/adapter/openai/handler_chat_auto_delete_test.go
+++ b/internal/httpapi/openai/chat/handler_chat_auto_delete_test.go
@@ -1,4 +1,4 @@
-package openai
+package chat
 
 import (
 	"context"
@@ -8,7 +8,7 @@ import (
 	"testing"
 
 	"ds2api/internal/auth"
-	"ds2api/internal/deepseek"
+	dsclient "ds2api/internal/deepseek/client"
 )
 
 type autoDeleteModeDSStub struct {
@@ -27,18 +27,18 @@ func (m *autoDeleteModeDSStub) GetPow(_ context.Context, _ *auth.RequestAuth, _
 	return "pow", nil
 }
 
-func (m *autoDeleteModeDSStub) UploadFile(_ context.Context, _ *auth.RequestAuth, _ deepseek.UploadFileRequest, _ int) (*deepseek.UploadFileResult, error) {
-	return &deepseek.UploadFileResult{ID: "file-id", Filename: "file.txt", Bytes: 1, Status: "uploaded"}, nil
+func (m *autoDeleteModeDSStub) UploadFile(_ context.Context, _ *auth.RequestAuth, _ dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) {
+	return &dsclient.UploadFileResult{ID: "file-id", Filename: "file.txt", Bytes: 1, Status: "uploaded"}, nil
 }
 
 func (m *autoDeleteModeDSStub) CallCompletion(_ context.Context, _ *auth.RequestAuth, _ map[string]any, _ string, _ int) (*http.Response, error) {
 	return m.resp, nil
 }
 
-func (m *autoDeleteModeDSStub) DeleteSessionForToken(_ context.Context, _ string, sessionID string) (*deepseek.DeleteSessionResult, error) {
+func (m *autoDeleteModeDSStub) DeleteSessionForToken(_ context.Context, _ string, sessionID string) (*dsclient.DeleteSessionResult, error) {
 	m.singleCalls++
 	m.lastSessionID = sessionID
-	return &deepseek.DeleteSessionResult{SessionID: sessionID, Success: true}, nil
+	return &dsclient.DeleteSessionResult{SessionID: sessionID, Success: true}, nil
 }
 
 func (m *autoDeleteModeDSStub) DeleteAllSessionsForToken(_ context.Context, _ string) error {
@@ -46,11 +46,11 @@ func (m *autoDeleteModeDSStub) DeleteAllSessionsForToken(_ context.Context, _ st
 	return nil
 }
 
-func (m *autoDeleteModeDSStub) DeleteSessionForTokenCtx(ctx context.Context, _ string, sessionID string) (*deepseek.DeleteSessionResult, error) {
+func (m *autoDeleteModeDSStub) DeleteSessionForTokenCtx(ctx context.Context, _ string, sessionID string) (*dsclient.DeleteSessionResult, error) {
 	m.singleCalls++
 	m.lastSessionID = sessionID
 	m.lastCtxErr = ctx.Err()
-	return &deepseek.DeleteSessionResult{SessionID: sessionID, Success: true}, nil
+	return &dsclient.DeleteSessionResult{SessionID: sessionID, Success: true}, nil
 }
 
 func TestChatCompletionsAutoDeleteModes(t *testing.T) {
@@ -82,7 +82,7 @@ func TestChatCompletionsAutoDeleteModes(t *testing.T) {
 				DS:   ds,
 			}
 
-			reqBody := `{"model":"deepseek-chat","messages":[{"role":"user","content":"hi"}],"stream":false}`
+			reqBody := `{"model":"deepseek-v4-flash","messages":[{"role":"user","content":"hi"}],"stream":false}`
 			req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
 			req.Header.Set("Authorization", "Bearer direct-token")
 			req.Header.Set("Content-Type", "application/json")
@@ -110,7 +110,7 @@ type autoDeleteCtxDSStub struct {
 	autoDeleteModeDSStub
 }
 
-func (m *autoDeleteCtxDSStub) DeleteSessionForToken(ctx context.Context, token string, sessionID string) (*deepseek.DeleteSessionResult, error) {
+func (m *autoDeleteCtxDSStub) DeleteSessionForToken(ctx context.Context, token string, sessionID string) (*dsclient.DeleteSessionResult, error) {
 	return m.DeleteSessionForTokenCtx(ctx, token, sessionID)
 }
 
diff --git a/internal/adapter/openai/handler_toolcall_test.go b/internal/httpapi/openai/chat/handler_toolcall_test.go
similarity index 88%
rename from internal/adapter/openai/handler_toolcall_test.go
rename to internal/httpapi/openai/chat/handler_toolcall_test.go
index e0f11ba..f949a46 100644
--- a/internal/adapter/openai/handler_toolcall_test.go
+++ b/internal/httpapi/openai/chat/handler_toolcall_test.go
@@ -1,4 +1,4 @@
-package openai
+package chat
 
 import (
 	"encoding/json"
@@ -93,7 +93,7 @@ func TestHandleNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T) {
 	)
 	rec := httptest.NewRecorder()
 
-	h.handleNonStream(rec, resp, "cid-empty", "deepseek-chat", "prompt", false, false, nil, nil)
+	h.handleNonStream(rec, resp, "cid-empty", "deepseek-v4-flash", "prompt", false, false, nil, nil)
 	if rec.Code != http.StatusTooManyRequests {
 		t.Fatalf("expected status 429 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
 	}
@@ -112,7 +112,7 @@ func TestHandleNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutp
 	)
 	rec := httptest.NewRecorder()
 
-	h.handleNonStream(rec, resp, "cid-empty-filtered", "deepseek-chat", "prompt", false, false, nil, nil)
+	h.handleNonStream(rec, resp, "cid-empty-filtered", "deepseek-v4-flash", "prompt", false, false, nil, nil)
 	if rec.Code != http.StatusBadRequest {
 		t.Fatalf("expected status 400 for filtered upstream output, got %d body=%s", rec.Code, rec.Body.String())
 	}
@@ -131,7 +131,7 @@ func TestHandleNonStreamReturns429WhenUpstreamHasOnlyThinking(t *testing.T) {
 	)
 	rec := httptest.NewRecorder()
 
-	h.handleNonStream(rec, resp, "cid-thinking-only", "deepseek-reasoner", "prompt", true, false, nil, nil)
+	h.handleNonStream(rec, resp, "cid-thinking-only", "deepseek-v4-pro", "prompt", true, false, nil, nil)
 	if rec.Code != http.StatusTooManyRequests {
 		t.Fatalf("expected status 429 for thinking-only upstream output, got %d body=%s", rec.Code, rec.Body.String())
 	}
@@ -152,7 +152,7 @@ func TestHandleStreamToolsPlainTextStreamsBeforeFinish(t *testing.T) {
 	rec := httptest.NewRecorder()
 	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
 
-	h.handleStream(rec, req, resp, "cid6", "deepseek-chat", "prompt", false, false, []string{"search"}, nil)
+	h.handleStream(rec, req, resp, "cid6", "deepseek-v4-flash", "prompt", false, false, []string{"search"}, nil)
 
 	frames, done := parseSSEDataFrames(t, rec.Body.String())
 	if !done {
@@ -189,7 +189,7 @@ func TestHandleStreamIncompleteCapturedToolJSONFlushesAsTextOnFinalize(t *testin
 	rec := httptest.NewRecorder()
 	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
 
-	h.handleStream(rec, req, resp, "cid10", "deepseek-chat", "prompt", false, false, []string{"search"}, nil)
+	h.handleStream(rec, req, resp, "cid10", "deepseek-v4-flash", "prompt", false, false, []string{"search"}, nil)
 
 	frames, done := parseSSEDataFrames(t, rec.Body.String())
 	if !done {
@@ -217,14 +217,14 @@ func TestHandleStreamIncompleteCapturedToolJSONFlushesAsTextOnFinalize(t *testin
 func TestHandleStreamEmitsDistinctToolCallIDsAcrossSeparateToolBlocks(t *testing.T) {
 	h := &Handler{}
 	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/content","v":"前置文本\n<tool_calls>\n  <tool_call>\n    <tool_name>read_file</tool_name>\n    <parameters>{\"path\":\"README.MD\"}</parameters>\n  </tool_call>\n</tool_calls>"}`,
-		`data: {"p":"response/content","v":"中间文本\n<tool_calls>\n  <tool_call>\n    <tool_name>search</tool_name>\n    <parameters>{\"q\":\"golang\"}</parameters>\n  </tool_call>\n</tool_calls>"}`,
+		`data: {"p":"response/content","v":"前置文本\n<tool_calls>\n  <invoke name=\"read_file\">\n    <parameter name=\"path\">README.MD</parameter>\n  </invoke>\n</tool_calls>"}`,
+		`data: {"p":"response/content","v":"中间文本\n<tool_calls>\n  <invoke name=\"search\">\n    <parameter name=\"q\">golang</parameter>\n  </invoke>\n</tool_calls>"}`,
 		`data: [DONE]`,
 	)
 	rec := httptest.NewRecorder()
 	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
 
-	h.handleStream(rec, req, resp, "cid-multi", "deepseek-chat", "prompt", false, false, []string{"read_file", "search"}, nil)
+	h.handleStream(rec, req, resp, "cid-multi", "deepseek-v4-flash", "prompt", false, false, []string{"read_file", "search"}, nil)
 
 	frames, done := parseSSEDataFrames(t, rec.Body.String())
 	if !done {
diff --git a/internal/httpapi/openai/chat/test_helpers_test.go b/internal/httpapi/openai/chat/test_helpers_test.go
new file mode 100644
index 0000000..0423f4e
--- /dev/null
+++ b/internal/httpapi/openai/chat/test_helpers_test.go
@@ -0,0 +1,202 @@
+package chat
+
+import (
+	"context"
+	"io"
+	"net/http"
+	"strings"
+
+	"ds2api/internal/auth"
+	dsclient "ds2api/internal/deepseek/client"
+)
+
+type mockOpenAIConfig struct {
+	aliases             map[string]string
+	wideInput           bool
+	autoDeleteMode      string
+	toolMode            string
+	earlyEmit           string
+	responsesTTL        int
+	embedProv           string
+	historySplitEnabled bool
+	historySplitTurns   int
+}
+
+func (m mockOpenAIConfig) ModelAliases() map[string]string { return m.aliases }
+func (m mockOpenAIConfig) CompatWideInputStrictOutput() bool {
+	return m.wideInput
+}
+func (m mockOpenAIConfig) CompatStripReferenceMarkers() bool   { return true }
+func (m mockOpenAIConfig) ToolcallMode() string                { return m.toolMode }
+func (m mockOpenAIConfig) ToolcallEarlyEmitConfidence() string { return m.earlyEmit }
+func (m mockOpenAIConfig) ResponsesStoreTTLSeconds() int       { return m.responsesTTL }
+func (m mockOpenAIConfig) EmbeddingsProvider() string          { return m.embedProv }
+func (m mockOpenAIConfig) AutoDeleteMode() string {
+	if m.autoDeleteMode == "" {
+		return "none"
+	}
+	return m.autoDeleteMode
+}
+func (m mockOpenAIConfig) AutoDeleteSessions() bool  { return false }
+func (m mockOpenAIConfig) HistorySplitEnabled() bool { return m.historySplitEnabled }
+func (m mockOpenAIConfig) HistorySplitTriggerAfterTurns() int {
+	if m.historySplitTurns <= 0 {
+		return 1
+	}
+	return m.historySplitTurns
+}
+
+type streamStatusAuthStub struct{}
+
+func (streamStatusAuthStub) Determine(_ *http.Request) (*auth.RequestAuth, error) {
+	return &auth.RequestAuth{
+		UseConfigToken: false,
+		DeepSeekToken:  "direct-token",
+		CallerID:       "caller:test",
+		TriedAccounts:  map[string]bool{},
+	}, nil
+}
+
+func (streamStatusAuthStub) DetermineCaller(_ *http.Request) (*auth.RequestAuth, error) {
+	return (&streamStatusAuthStub{}).Determine(nil)
+}
+
+func (streamStatusAuthStub) Release(_ *auth.RequestAuth) {}
+
+type streamStatusManagedAuthStub struct{}
+
+func (streamStatusManagedAuthStub) Determine(_ *http.Request) (*auth.RequestAuth, error) {
+	return &auth.RequestAuth{
+		UseConfigToken: true,
+		DeepSeekToken:  "managed-token",
+		CallerID:       "caller:test",
+		AccountID:      "acct:test",
+		TriedAccounts:  map[string]bool{},
+	}, nil
+}
+
+func (streamStatusManagedAuthStub) DetermineCaller(_ *http.Request) (*auth.RequestAuth, error) {
+	return (&streamStatusManagedAuthStub{}).Determine(nil)
+}
+
+func (streamStatusManagedAuthStub) Release(_ *auth.RequestAuth) {}
+
+type streamStatusDSStub struct {
+	resp *http.Response
+}
+
+func (m streamStatusDSStub) CreateSession(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
+	return "session-id", nil
+}
+
+func (m streamStatusDSStub) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
+	return "pow", nil
+}
+
+func (m streamStatusDSStub) UploadFile(_ context.Context, _ *auth.RequestAuth, _ dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) {
+	return &dsclient.UploadFileResult{ID: "file-id", Filename: "file.txt", Bytes: 1, Status: "uploaded"}, nil
+}
+
+func (m streamStatusDSStub) CallCompletion(_ context.Context, _ *auth.RequestAuth, _ map[string]any, _ string, _ int) (*http.Response, error) {
+	return m.resp, nil
+}
+
+func (m streamStatusDSStub) DeleteSessionForToken(_ context.Context, _ string, _ string) (*dsclient.DeleteSessionResult, error) {
+	return &dsclient.DeleteSessionResult{Success: true}, nil
+}
+
+func (m streamStatusDSStub) DeleteAllSessionsForToken(_ context.Context, _ string) error {
+	return nil
+}
+
+func makeOpenAISSEHTTPResponse(lines ...string) *http.Response {
+	body := strings.Join(lines, "\n")
+	if !strings.HasSuffix(body, "\n") {
+		body += "\n"
+	}
+	return &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     make(http.Header),
+		Body:       io.NopCloser(strings.NewReader(body)),
+	}
+}
+
+type inlineUploadDSStub struct {
+	uploadCalls    []dsclient.UploadFileRequest
+	lastCtx        context.Context
+	completionReq  map[string]any
+	createSession  string
+	uploadErr      error
+	completionResp *http.Response
+}
+
+func (m *inlineUploadDSStub) CreateSession(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
+	if strings.TrimSpace(m.createSession) == "" {
+		return "session-id", nil
+	}
+	return m.createSession, nil
+}
+
+func (m *inlineUploadDSStub) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
+	return "pow", nil
+}
+
+func (m *inlineUploadDSStub) UploadFile(ctx context.Context, _ *auth.RequestAuth, req dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) {
+	m.lastCtx = ctx
+	m.uploadCalls = append(m.uploadCalls, req)
+	if m.uploadErr != nil {
+		return nil, m.uploadErr
+	}
+	return &dsclient.UploadFileResult{
+		ID:       "file-inline-1",
+		Filename: req.Filename,
+		Bytes:    int64(len(req.Data)),
+		Status:   "uploaded",
+		Purpose:  req.Purpose,
+	}, nil
+}
+
+func (m *inlineUploadDSStub) CallCompletion(_ context.Context, _ *auth.RequestAuth, payload map[string]any, _ string, _ int) (*http.Response, error) {
+	m.completionReq = payload
+	if m.completionResp != nil {
+		return m.completionResp, nil
+	}
+	return makeOpenAISSEHTTPResponse(
+		`data: {"p":"response/content","v":"ok"}`,
+		`data: [DONE]`,
+	), nil
+}
+
+func (m *inlineUploadDSStub) DeleteSessionForToken(_ context.Context, _ string, _ string) (*dsclient.DeleteSessionResult, error) {
+	return &dsclient.DeleteSessionResult{Success: true}, nil
+}
+
+func (m *inlineUploadDSStub) DeleteAllSessionsForToken(_ context.Context, _ string) error {
+	return nil
+}
+
+func historySplitTestMessages() []any {
+	toolCalls := []any{
+		map[string]any{
+			"name":      "search",
+			"arguments": map[string]any{"query": "docs"},
+		},
+	}
+	return []any{
+		map[string]any{"role": "system", "content": "system instructions"},
+		map[string]any{"role": "user", "content": "first user turn"},
+		map[string]any{
+			"role":              "assistant",
+			"content":           "",
+			"reasoning_content": "hidden reasoning",
+			"tool_calls":        toolCalls,
+		},
+		map[string]any{
+			"role":         "tool",
+			"name":         "search",
+			"tool_call_id": "call-1",
+			"content":      "tool result",
+		},
+		map[string]any{"role": "user", "content": "latest user turn"},
+	}
+}
diff --git a/internal/httpapi/openai/chat/vercel_prepare_test.go b/internal/httpapi/openai/chat/vercel_prepare_test.go
new file mode 100644
index 0000000..8cd948f
--- /dev/null
+++ b/internal/httpapi/openai/chat/vercel_prepare_test.go
@@ -0,0 +1,182 @@
+package chat
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+	"time"
+
+	"ds2api/internal/auth"
+	dsclient "ds2api/internal/deepseek/client"
+)
+
+func TestIsVercelStreamPrepareRequest(t *testing.T) {
+	req := httptest.NewRequest("POST", "/v1/chat/completions?__stream_prepare=1", nil)
+	if !isVercelStreamPrepareRequest(req) {
+		t.Fatalf("expected prepare request to be detected")
+	}
+
+	req2 := httptest.NewRequest("POST", "/v1/chat/completions", nil)
+	if isVercelStreamPrepareRequest(req2) {
+		t.Fatalf("expected non-prepare request")
+	}
+}
+
+func TestIsVercelStreamReleaseRequest(t *testing.T) {
+	req := httptest.NewRequest("POST", "/v1/chat/completions?__stream_release=1", nil)
+	if !isVercelStreamReleaseRequest(req) {
+		t.Fatalf("expected release request to be detected")
+	}
+
+	req2 := httptest.NewRequest("POST", "/v1/chat/completions", nil)
+	if isVercelStreamReleaseRequest(req2) {
+		t.Fatalf("expected non-release request")
+	}
+}
+
+func TestVercelInternalSecret(t *testing.T) {
+	t.Run("prefer explicit secret", func(t *testing.T) {
+		t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret")
+		t.Setenv("DS2API_ADMIN_KEY", "admin-fallback")
+		if got := vercelInternalSecret(); got != "stream-secret" {
+			t.Fatalf("expected explicit secret, got %q", got)
+		}
+	})
+
+	t.Run("fallback to admin key", func(t *testing.T) {
+		t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "")
+		t.Setenv("DS2API_ADMIN_KEY", "admin-fallback")
+		if got := vercelInternalSecret(); got != "admin-fallback" {
+			t.Fatalf("expected admin key fallback, got %q", got)
+		}
+	})
+
+	t.Run("default admin when env missing", func(t *testing.T) {
+		t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "")
+		t.Setenv("DS2API_ADMIN_KEY", "")
+		if got := vercelInternalSecret(); got != "admin" {
+			t.Fatalf("expected default admin fallback, got %q", got)
+		}
+	})
+}
+
+func TestStreamLeaseLifecycle(t *testing.T) {
+	h := &Handler{}
+	leaseID := h.holdStreamLease(&auth.RequestAuth{UseConfigToken: false})
+	if leaseID == "" {
+		t.Fatalf("expected non-empty lease id")
+	}
+	if ok := h.releaseStreamLease(leaseID); !ok {
+		t.Fatalf("expected lease release success")
+	}
+	if ok := h.releaseStreamLease(leaseID); ok {
+		t.Fatalf("expected duplicate release to fail")
+	}
+}
+
+func TestStreamLeaseTTL(t *testing.T) {
+	t.Setenv("DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS", "120")
+	if got := streamLeaseTTL(); got != 120*time.Second {
+		t.Fatalf("expected ttl=120s, got %v", got)
+	}
+	t.Setenv("DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS", "invalid")
+	if got := streamLeaseTTL(); got != 15*time.Minute {
+		t.Fatalf("expected default ttl on invalid value, got %v", got)
+	}
+}
+
+func TestHandleVercelStreamPrepareAppliesHistorySplit(t *testing.T) {
+	t.Setenv("VERCEL", "1")
+	t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret")
+
+	ds := &inlineUploadDSStub{}
+	h := &Handler{
+		Store: mockOpenAIConfig{
+			wideInput:           true,
+			historySplitEnabled: true,
+			historySplitTurns:   1,
+		},
+		Auth: streamStatusAuthStub{},
+		DS:   ds,
+	}
+
+	reqBody, _ := json.Marshal(map[string]any{
+		"model":    "deepseek-v4-flash",
+		"messages": historySplitTestMessages(),
+		"stream":   true,
+	})
+	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions?__stream_prepare=1", strings.NewReader(string(reqBody)))
+	req.Header.Set("Authorization", "Bearer direct-token")
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("X-Ds2-Internal-Token", "stream-secret")
+	rec := httptest.NewRecorder()
+
+	h.handleVercelStreamPrepare(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	if len(ds.uploadCalls) != 1 {
+		t.Fatalf("expected 1 history upload, got %d", len(ds.uploadCalls))
+	}
+
+	var body map[string]any
+	if err := json.NewDecoder(rec.Body).Decode(&body); err != nil {
+		t.Fatalf("decode failed: %v", err)
+	}
+	payload, _ := body["payload"].(map[string]any)
+	if payload == nil {
+		t.Fatalf("expected payload object, got %#v", body["payload"])
+	}
+	promptText, _ := payload["prompt"].(string)
+	if !strings.Contains(promptText, "latest user turn") {
+		t.Fatalf("expected latest user turn in prompt, got %s", promptText)
+	}
+	if strings.Contains(promptText, "first user turn") {
+		t.Fatalf("expected historical turns removed from prompt, got %s", promptText)
+	}
+	refIDs, _ := payload["ref_file_ids"].([]any)
+	if len(refIDs) == 0 || refIDs[0] != "file-inline-1" {
+		t.Fatalf("expected uploaded history file first in ref_file_ids, got %#v", payload["ref_file_ids"])
+	}
+}
+
+func TestHandleVercelStreamPrepareMapsHistorySplitManagedAuthFailureTo401(t *testing.T) {
+	t.Setenv("VERCEL", "1")
+	t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret")
+
+	ds := &inlineUploadDSStub{
+		uploadErr: &dsclient.RequestFailure{Op: "upload file", Kind: dsclient.FailureManagedUnauthorized, Message: "expired token"},
+	}
+	h := &Handler{
+		Store: mockOpenAIConfig{
+			wideInput:           true,
+			historySplitEnabled: true,
+			historySplitTurns:   1,
+		},
+		Auth: streamStatusManagedAuthStub{},
+		DS:   ds,
+	}
+
+	reqBody, _ := json.Marshal(map[string]any{
+		"model":    "deepseek-v4-flash",
+		"messages": historySplitTestMessages(),
+		"stream":   true,
+	})
+	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions?__stream_prepare=1", strings.NewReader(string(reqBody)))
+	req.Header.Set("Authorization", "Bearer managed-key")
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("X-Ds2-Internal-Token", "stream-secret")
+	rec := httptest.NewRecorder()
+
+	h.handleVercelStreamPrepare(rec, req)
+
+	if rec.Code != http.StatusUnauthorized {
+		t.Fatalf("expected 401, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	if !strings.Contains(rec.Body.String(), "Please re-login the account in admin") {
+		t.Fatalf("expected managed auth error message, got %s", rec.Body.String())
+	}
+}
diff --git a/internal/adapter/openai/vercel_stream.go b/internal/httpapi/openai/chat/vercel_stream.go
similarity index 94%
rename from internal/adapter/openai/vercel_stream.go
rename to internal/httpapi/openai/chat/vercel_stream.go
index 3e56b3e..1a3c00d 100644
--- a/internal/adapter/openai/vercel_stream.go
+++ b/internal/httpapi/openai/chat/vercel_stream.go
@@ -1,11 +1,8 @@
-package openai
+package chat
 
 import (
-	"crypto/rand"
 	"crypto/subtle"
-	"encoding/hex"
 	"encoding/json"
-	"fmt"
 	"net/http"
 	"os"
 	"strconv"
@@ -14,7 +11,10 @@ import (
 
 	"ds2api/internal/auth"
 	"ds2api/internal/config"
+	"ds2api/internal/promptcompat"
 	"ds2api/internal/util"
+
+	"github.com/google/uuid"
 )
 
 func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Request) {
@@ -60,7 +60,7 @@ func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Reque
 		writeOpenAIError(w, http.StatusBadRequest, "stream must be true")
 		return
 	}
-	stdReq, err := normalizeOpenAIChatRequest(h.Store, req, requestTraceID(r))
+	stdReq, err := promptcompat.NormalizeOpenAIChatRequest(h.Store, req, requestTraceID(r))
 	if err != nil {
 		writeOpenAIError(w, http.StatusBadRequest, err.Error())
 		return
@@ -69,6 +69,12 @@ func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Reque
 		writeOpenAIError(w, http.StatusBadRequest, "stream must be true")
 		return
 	}
+	stdReq, err = h.applyHistorySplit(r.Context(), a, stdReq)
+	if err != nil {
+		status, message := mapHistorySplitError(err)
+		writeOpenAIError(w, status, message)
+		return
+	}
 
 	sessionID, err := h.DS.CreateSession(r.Context(), a, 3)
 	if err != nil {
@@ -260,9 +266,5 @@ func streamLeaseTTL() time.Duration {
 }
 
 func newLeaseID() string {
-	buf := make([]byte, 16)
-	if _, err := rand.Read(buf); err == nil {
-		return hex.EncodeToString(buf)
-	}
-	return fmt.Sprintf("lease-%d", time.Now().UnixNano())
+	return strings.ReplaceAll(uuid.NewString(), "-", "")
 }
diff --git a/internal/adapter/openai/citation_links_test.go b/internal/httpapi/openai/citation_links_test.go
similarity index 100%
rename from internal/adapter/openai/citation_links_test.go
rename to internal/httpapi/openai/citation_links_test.go
diff --git a/internal/adapter/openai/deps_injection_test.go b/internal/httpapi/openai/deps_injection_test.go
similarity index 82%
rename from internal/adapter/openai/deps_injection_test.go
rename to internal/httpapi/openai/deps_injection_test.go
index f3c9741..0d906aa 100644
--- a/internal/adapter/openai/deps_injection_test.go
+++ b/internal/httpapi/openai/deps_injection_test.go
@@ -1,6 +1,10 @@
 package openai
 
-import "testing"
+import (
+	"testing"
+
+	"ds2api/internal/promptcompat"
+)
 
 type mockOpenAIConfig struct {
 	aliases             map[string]string
@@ -41,7 +45,7 @@ func (m mockOpenAIConfig) HistorySplitTriggerAfterTurns() int {
 func TestNormalizeOpenAIChatRequestWithConfigInterface(t *testing.T) {
 	cfg := mockOpenAIConfig{
 		aliases: map[string]string{
-			"my-model": "deepseek-chat-search",
+			"my-model": "deepseek-v4-flash-search",
 		},
 		wideInput: true,
 	}
@@ -49,25 +53,25 @@ func TestNormalizeOpenAIChatRequestWithConfigInterface(t *testing.T) {
 		"model":    "my-model",
 		"messages": []any{map[string]any{"role": "user", "content": "hello"}},
 	}
-	out, err := normalizeOpenAIChatRequest(cfg, req, "")
+	out, err := promptcompat.NormalizeOpenAIChatRequest(cfg, req, "")
 	if err != nil {
-		t.Fatalf("normalizeOpenAIChatRequest error: %v", err)
+		t.Fatalf("promptcompat.NormalizeOpenAIChatRequest error: %v", err)
 	}
-	if out.ResolvedModel != "deepseek-chat-search" {
+	if out.ResolvedModel != "deepseek-v4-flash-search" {
 		t.Fatalf("resolved model mismatch: got=%q", out.ResolvedModel)
 	}
-	if !out.Search || out.Thinking {
+	if !out.Search || !out.Thinking {
 		t.Fatalf("unexpected model flags: thinking=%v search=%v", out.Thinking, out.Search)
 	}
 }
 
 func TestNormalizeOpenAIResponsesRequestWideInputPolicyFromInterface(t *testing.T) {
 	req := map[string]any{
-		"model": "deepseek-chat",
+		"model": "deepseek-v4-flash",
 		"input": "hi",
 	}
 
-	_, err := normalizeOpenAIResponsesRequest(mockOpenAIConfig{
+	_, err := promptcompat.NormalizeOpenAIResponsesRequest(mockOpenAIConfig{
 		aliases:   map[string]string{},
 		wideInput: false,
 	}, req, "")
@@ -75,7 +79,7 @@ func TestNormalizeOpenAIResponsesRequestWideInputPolicyFromInterface(t *testing.
 		t.Fatal("expected error when wide input is disabled and only input is provided")
 	}
 
-	out, err := normalizeOpenAIResponsesRequest(mockOpenAIConfig{
+	out, err := promptcompat.NormalizeOpenAIResponsesRequest(mockOpenAIConfig{
 		aliases:   map[string]string{},
 		wideInput: true,
 	}, req, "")
diff --git a/internal/adapter/openai/embeddings_handler.go b/internal/httpapi/openai/embeddings/embeddings_handler.go
similarity index 66%
rename from internal/adapter/openai/embeddings_handler.go
rename to internal/httpapi/openai/embeddings/embeddings_handler.go
index 48dfdd8..8c5b340 100644
--- a/internal/adapter/openai/embeddings_handler.go
+++ b/internal/httpapi/openai/embeddings/embeddings_handler.go
@@ -1,4 +1,4 @@
-package openai
+package embeddings
 
 import (
 	"crypto/sha256"
@@ -9,10 +9,19 @@ import (
 	"strings"
 
 	"ds2api/internal/auth"
+	"ds2api/internal/chathistory"
 	"ds2api/internal/config"
+	"ds2api/internal/httpapi/openai/shared"
 	"ds2api/internal/util"
 )
 
+type Handler struct {
+	Store       shared.ConfigReader
+	Auth        shared.AuthResolver
+	DS          shared.DeepSeekCaller
+	ChatHistory *chathistory.Store
+}
+
 func (h *Handler) Embeddings(w http.ResponseWriter, r *http.Request) {
 	a, err := h.Auth.Determine(r)
 	if err != nil {
@@ -21,35 +30,35 @@ func (h *Handler) Embeddings(w http.ResponseWriter, r *http.Request) {
 		if err == auth.ErrNoAccount {
 			status = http.StatusTooManyRequests
 		}
-		writeOpenAIError(w, status, detail)
+		shared.WriteOpenAIError(w, status, detail)
 		return
 	}
 	defer h.Auth.Release(a)
 
-	r.Body = http.MaxBytesReader(w, r.Body, openAIGeneralMaxSize)
+	r.Body = http.MaxBytesReader(w, r.Body, shared.GeneralMaxSize)
 	var req map[string]any
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 		if strings.Contains(strings.ToLower(err.Error()), "too large") {
-			writeOpenAIError(w, http.StatusRequestEntityTooLarge, "request body too large")
+			shared.WriteOpenAIError(w, http.StatusRequestEntityTooLarge, "request body too large")
 			return
 		}
-		writeOpenAIError(w, http.StatusBadRequest, "invalid json")
+		shared.WriteOpenAIError(w, http.StatusBadRequest, "invalid json")
 		return
 	}
 	model, _ := req["model"].(string)
 	model = strings.TrimSpace(model)
 	if model == "" {
-		writeOpenAIError(w, http.StatusBadRequest, "Request must include 'model'.")
+		shared.WriteOpenAIError(w, http.StatusBadRequest, "Request must include 'model'.")
 		return
 	}
 	if _, ok := config.ResolveModel(h.Store, model); !ok {
-		writeOpenAIError(w, http.StatusBadRequest, fmt.Sprintf("Model '%s' is not available.", model))
+		shared.WriteOpenAIError(w, http.StatusBadRequest, fmt.Sprintf("Model '%s' is not available.", model))
 		return
 	}
 
-	inputs := extractEmbeddingInputs(req["input"])
+	inputs := ExtractEmbeddingInputs(req["input"])
 	if len(inputs) == 0 {
-		writeOpenAIError(w, http.StatusBadRequest, "Request must include non-empty 'input'.")
+		shared.WriteOpenAIError(w, http.StatusBadRequest, "Request must include non-empty 'input'.")
 		return
 	}
 
@@ -58,14 +67,14 @@ func (h *Handler) Embeddings(w http.ResponseWriter, r *http.Request) {
 		provider = strings.ToLower(strings.TrimSpace(h.Store.EmbeddingsProvider()))
 	}
 	if provider == "" {
-		writeOpenAIError(w, http.StatusNotImplemented, "Embeddings provider is not configured. Set embeddings.provider in config.")
+		shared.WriteOpenAIError(w, http.StatusNotImplemented, "Embeddings provider is not configured. Set embeddings.provider in config.")
 		return
 	}
 	switch provider {
 	case "mock", "deterministic", "builtin":
 		// supported local deterministic provider
 	default:
-		writeOpenAIError(w, http.StatusNotImplemented, fmt.Sprintf("Embeddings provider '%s' is not supported.", provider))
+		shared.WriteOpenAIError(w, http.StatusNotImplemented, fmt.Sprintf("Embeddings provider '%s' is not supported.", provider))
 		return
 	}
 
@@ -76,10 +85,10 @@ func (h *Handler) Embeddings(w http.ResponseWriter, r *http.Request) {
 		data = append(data, map[string]any{
 			"object":    "embedding",
 			"index":     i,
-			"embedding": deterministicEmbedding(input),
+			"embedding": DeterministicEmbedding(input),
 		})
 	}
-	writeJSON(w, http.StatusOK, map[string]any{
+	shared.WriteJSON(w, http.StatusOK, map[string]any{
 		"object": "list",
 		"data":   data,
 		"model":  model,
@@ -90,7 +99,7 @@ func (h *Handler) Embeddings(w http.ResponseWriter, r *http.Request) {
 	})
 }
 
-func extractEmbeddingInputs(raw any) []string {
+func ExtractEmbeddingInputs(raw any) []string {
 	switch v := raw.(type) {
 	case string:
 		s := strings.TrimSpace(v)
@@ -123,7 +132,7 @@ func extractEmbeddingInputs(raw any) []string {
 	}
 }
 
-func deterministicEmbedding(input string) []float64 {
+func DeterministicEmbedding(input string) []float64 {
 	// Keep response shape stable without external dependencies.
 	const dims = 64
 	out := make([]float64, dims)
diff --git a/internal/adapter/openai/embeddings_route_test.go b/internal/httpapi/openai/embeddings_route_test.go
similarity index 94%
rename from internal/adapter/openai/embeddings_route_test.go
rename to internal/httpapi/openai/embeddings_route_test.go
index 4395d16..6962a05 100644
--- a/internal/adapter/openai/embeddings_route_test.go
+++ b/internal/httpapi/openai/embeddings_route_test.go
@@ -28,9 +28,9 @@ func newResolverWithConfigJSON(t *testing.T, cfgJSON string) (*config.Store, *au
 
 func TestEmbeddingsRouteContract(t *testing.T) {
 	store, resolver := newResolverWithConfigJSON(t, `{"embeddings":{"provider":"deterministic"}}`)
-	h := &Handler{Store: store, Auth: resolver}
+	h := &openAITestSurface{Store: store, Auth: resolver}
 	r := chi.NewRouter()
-	RegisterRoutes(r, h)
+	registerOpenAITestRoutes(r, h)
 
 	t.Run("unauthorized", func(t *testing.T) {
 		body := bytes.NewBufferString(`{"model":"gpt-4o","input":"hello"}`)
@@ -69,9 +69,9 @@ func TestEmbeddingsRouteContract(t *testing.T) {
 
 func TestEmbeddingsRouteProviderMissing(t *testing.T) {
 	store, resolver := newResolverWithConfigJSON(t, `{}`)
-	h := &Handler{Store: store, Auth: resolver}
+	h := &openAITestSurface{Store: store, Auth: resolver}
 	r := chi.NewRouter()
-	RegisterRoutes(r, h)
+	registerOpenAITestRoutes(r, h)
 
 	body := bytes.NewBufferString(`{"model":"gpt-4o","input":"hello"}`)
 	req := httptest.NewRequest(http.MethodPost, "/v1/embeddings", body)
diff --git a/internal/adapter/openai/error_shape_test.go b/internal/httpapi/openai/error_shape_test.go
similarity index 100%
rename from internal/adapter/openai/error_shape_test.go
rename to internal/httpapi/openai/error_shape_test.go
diff --git a/internal/adapter/openai/file_inline_upload_test.go b/internal/httpapi/openai/file_inline_upload_test.go
similarity index 80%
rename from internal/adapter/openai/file_inline_upload_test.go
rename to internal/httpapi/openai/file_inline_upload_test.go
index f1c7c81..4ea2445 100644
--- a/internal/adapter/openai/file_inline_upload_test.go
+++ b/internal/httpapi/openai/file_inline_upload_test.go
@@ -12,11 +12,11 @@ import (
 	"github.com/go-chi/chi/v5"
 
 	"ds2api/internal/auth"
-	"ds2api/internal/deepseek"
+	dsclient "ds2api/internal/deepseek/client"
 )
 
 type inlineUploadDSStub struct {
-	uploadCalls    []deepseek.UploadFileRequest
+	uploadCalls    []dsclient.UploadFileRequest
 	lastCtx        context.Context
 	completionReq  map[string]any
 	createSession  string
@@ -35,13 +35,13 @@ func (m *inlineUploadDSStub) GetPow(_ context.Context, _ *auth.RequestAuth, _ in
 	return "pow", nil
 }
 
-func (m *inlineUploadDSStub) UploadFile(ctx context.Context, _ *auth.RequestAuth, req deepseek.UploadFileRequest, _ int) (*deepseek.UploadFileResult, error) {
+func (m *inlineUploadDSStub) UploadFile(ctx context.Context, _ *auth.RequestAuth, req dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) {
 	m.lastCtx = ctx
 	m.uploadCalls = append(m.uploadCalls, req)
 	if m.uploadErr != nil {
 		return nil, m.uploadErr
 	}
-	return &deepseek.UploadFileResult{
+	return &dsclient.UploadFileResult{
 		ID:       "file-inline-1",
 		Filename: req.Filename,
 		Bytes:    int64(len(req.Data)),
@@ -61,8 +61,8 @@ func (m *inlineUploadDSStub) CallCompletion(_ context.Context, _ *auth.RequestAu
 	), nil
 }
 
-func (m *inlineUploadDSStub) DeleteSessionForToken(_ context.Context, _ string, _ string) (*deepseek.DeleteSessionResult, error) {
-	return &deepseek.DeleteSessionResult{Success: true}, nil
+func (m *inlineUploadDSStub) DeleteSessionForToken(_ context.Context, _ string, _ string) (*dsclient.DeleteSessionResult, error) {
+	return &dsclient.DeleteSessionResult{Success: true}, nil
 }
 
 func (m *inlineUploadDSStub) DeleteAllSessionsForToken(_ context.Context, _ string) error {
@@ -71,7 +71,7 @@ func (m *inlineUploadDSStub) DeleteAllSessionsForToken(_ context.Context, _ stri
 
 func TestPreprocessInlineFileInputsReplacesDataURLAndCollectsRefFileIDs(t *testing.T) {
 	ds := &inlineUploadDSStub{}
-	h := &Handler{DS: ds}
+	h := &openAITestSurface{DS: ds}
 	req := map[string]any{
 		"messages": []any{
 			map[string]any{
@@ -121,7 +121,7 @@ func TestPreprocessInlineFileInputsReplacesDataURLAndCollectsRefFileIDs(t *testi
 
 func TestPreprocessInlineFileInputsDeduplicatesIdenticalPayloads(t *testing.T) {
 	ds := &inlineUploadDSStub{}
-	h := &Handler{DS: ds}
+	h := &openAITestSurface{DS: ds}
 	req := map[string]any{
 		"messages": []any{
 			map[string]any{
@@ -148,8 +148,8 @@ func TestPreprocessInlineFileInputsDeduplicatesIdenticalPayloads(t *testing.T) {
 
 func TestChatCompletionsUploadsInlineFilesBeforeCompletion(t *testing.T) {
 	ds := &inlineUploadDSStub{}
-	h := &Handler{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds}
-	reqBody := `{"model":"deepseek-chat","messages":[{"role":"user","content":[{"type":"input_text","text":"hi"},{"type":"image_url","image_url":{"url":"data:image/png;base64,QUJDRA=="}}]}],"stream":false}`
+	h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds}
+	reqBody := `{"model":"deepseek-v4-flash","messages":[{"role":"user","content":[{"type":"input_text","text":"hi"},{"type":"image_url","image_url":{"url":"data:image/png;base64,QUJDRA=="}}]}],"stream":false}`
 	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
 	req.Header.Set("Authorization", "Bearer direct-token")
 	req.Header.Set("Content-Type", "application/json")
@@ -174,10 +174,10 @@ func TestChatCompletionsUploadsInlineFilesBeforeCompletion(t *testing.T) {
 
 func TestResponsesUploadsInlineFilesBeforeCompletion(t *testing.T) {
 	ds := &inlineUploadDSStub{}
-	h := &Handler{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds}
+	h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds}
 	r := chi.NewRouter()
-	RegisterRoutes(r, h)
-	reqBody := `{"model":"deepseek-chat","input":[{"role":"user","content":[{"type":"input_text","text":"hi"},{"type":"input_image","image_url":{"url":"data:image/png;base64,QUJDRA=="}}]}],"stream":false}`
+	registerOpenAITestRoutes(r, h)
+	reqBody := `{"model":"deepseek-v4-flash","input":[{"role":"user","content":[{"type":"input_text","text":"hi"},{"type":"input_image","image_url":{"url":"data:image/png;base64,QUJDRA=="}}]}],"stream":false}`
 	req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(reqBody))
 	req.Header.Set("Authorization", "Bearer direct-token")
 	req.Header.Set("Content-Type", "application/json")
@@ -199,8 +199,8 @@ func TestResponsesUploadsInlineFilesBeforeCompletion(t *testing.T) {
 
 func TestChatCompletionsInlineUploadFailureReturnsBadRequest(t *testing.T) {
 	ds := &inlineUploadDSStub{}
-	h := &Handler{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds}
-	reqBody := `{"model":"deepseek-chat","messages":[{"role":"user","content":[{"type":"image_url","image_url":{"url":"data:image/png;base64,%%%"}}]}],"stream":false}`
+	h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds}
+	reqBody := `{"model":"deepseek-v4-flash","messages":[{"role":"user","content":[{"type":"image_url","image_url":{"url":"data:image/png;base64,%%%"}}]}],"stream":false}`
 	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
 	req.Header.Set("Authorization", "Bearer direct-token")
 	req.Header.Set("Content-Type", "application/json")
@@ -218,10 +218,10 @@ func TestChatCompletionsInlineUploadFailureReturnsBadRequest(t *testing.T) {
 
 func TestResponsesInlineUploadFailureReturnsInternalServerError(t *testing.T) {
 	ds := &inlineUploadDSStub{uploadErr: errors.New("boom")}
-	h := &Handler{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds}
+	h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds}
 	r := chi.NewRouter()
-	RegisterRoutes(r, h)
-	reqBody := `{"model":"deepseek-chat","input":[{"role":"user","content":[{"type":"image_url","image_url":{"url":"data:image/png;base64,QUJDRA=="}}]}],"stream":false}`
+	registerOpenAITestRoutes(r, h)
+	reqBody := `{"model":"deepseek-v4-flash","input":[{"role":"user","content":[{"type":"image_url","image_url":{"url":"data:image/png;base64,QUJDRA=="}}]}],"stream":false}`
 	req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(reqBody))
 	req.Header.Set("Authorization", "Bearer direct-token")
 	req.Header.Set("Content-Type", "application/json")
@@ -241,10 +241,10 @@ func TestVercelPrepareUploadsInlineFilesBeforeLeasePayload(t *testing.T) {
 	t.Setenv("VERCEL", "1")
 	t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret")
 	ds := &inlineUploadDSStub{}
-	h := &Handler{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds}
+	h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds}
 	r := chi.NewRouter()
-	RegisterRoutes(r, h)
-	reqBody := `{"model":"deepseek-chat","messages":[{"role":"user","content":[{"type":"input_text","text":"hi"},{"type":"image_url","image_url":{"url":"data:image/png;base64,QUJDRA=="}}]}],"stream":true}`
+	registerOpenAITestRoutes(r, h)
+	reqBody := `{"model":"deepseek-v4-flash","messages":[{"role":"user","content":[{"type":"input_text","text":"hi"},{"type":"image_url","image_url":{"url":"data:image/png;base64,QUJDRA=="}}]}],"stream":true}`
 	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions?__stream_prepare=1", strings.NewReader(reqBody))
 	req.Header.Set("Authorization", "Bearer direct-token")
 	req.Header.Set("X-Ds2-Internal-Token", "stream-secret")
diff --git a/internal/adapter/openai/file_inline_upload.go b/internal/httpapi/openai/files/file_inline_upload.go
similarity index 89%
rename from internal/adapter/openai/file_inline_upload.go
rename to internal/httpapi/openai/files/file_inline_upload.go
index 5955e81..c8d59a9 100644
--- a/internal/adapter/openai/file_inline_upload.go
+++ b/internal/httpapi/openai/files/file_inline_upload.go
@@ -1,4 +1,4 @@
-package openai
+package files
 
 import (
 	"context"
@@ -12,7 +12,9 @@ import (
 	"strings"
 
 	"ds2api/internal/auth"
-	"ds2api/internal/deepseek"
+	dsclient "ds2api/internal/deepseek/client"
+	"ds2api/internal/httpapi/openai/shared"
+	"ds2api/internal/promptcompat"
 )
 
 const maxInlineFilesPerRequest = 50
@@ -51,7 +53,7 @@ type inlineDecodedFile struct {
 	ReplacementType string
 }
 
-func (h *Handler) preprocessInlineFileInputs(ctx context.Context, a *auth.RequestAuth, req map[string]any) error {
+func (h *Handler) PreprocessInlineFileInputs(ctx context.Context, a *auth.RequestAuth, req map[string]any) error {
 	if h == nil || h.DS == nil || len(req) == 0 {
 		return nil
 	}
@@ -70,16 +72,16 @@ func (h *Handler) preprocessInlineFileInputs(ctx context.Context, a *auth.Reques
 			req[key] = updated
 		}
 	}
-	if refIDs := collectOpenAIRefFileIDs(req); len(refIDs) > 0 {
+	if refIDs := promptcompat.CollectOpenAIRefFileIDs(req); len(refIDs) > 0 {
 		req["ref_file_ids"] = stringsToAnySlice(refIDs)
 	}
 	return nil
 }
 
-func writeOpenAIInlineFileError(w http.ResponseWriter, err error) {
+func WriteInlineFileError(w http.ResponseWriter, err error) {
 	inlineErr, ok := err.(*inlineFileUploadError)
 	if !ok || inlineErr == nil {
-		writeOpenAIError(w, http.StatusInternalServerError, "Failed to process file input.")
+		shared.WriteOpenAIError(w, http.StatusInternalServerError, "Failed to process file input.")
 		return
 	}
 	status := inlineErr.status
@@ -90,7 +92,7 @@ func writeOpenAIInlineFileError(w http.ResponseWriter, err error) {
 	if message == "" {
 		message = "Failed to process file input."
 	}
-	writeOpenAIError(w, status, message)
+	shared.WriteOpenAIError(w, status, message)
 }
 
 func (s *inlineUploadState) walk(raw any) (any, error) {
@@ -163,7 +165,7 @@ func (s *inlineUploadState) uploadInlineFile(file inlineDecodedFile) (string, er
 	if contentType == "" {
 		contentType = http.DetectContentType(file.Data)
 	}
-	result, err := s.handler.DS.UploadFile(s.ctx, s.auth, deepseek.UploadFileRequest{
+	result, err := s.handler.DS.UploadFile(s.ctx, s.auth, dsclient.UploadFileRequest{
 		Filename:    file.Filename,
 		ContentType: contentType,
 		Data:        file.Data,
@@ -183,7 +185,7 @@ func decodeOpenAIInlineFileBlock(block map[string]any) (inlineDecodedFile, bool,
 	if block == nil {
 		return inlineDecodedFile{}, false, nil
 	}
-	if strings.TrimSpace(asString(block["file_id"])) != "" {
+	if strings.TrimSpace(shared.AsString(block["file_id"])) != "" {
 		return inlineDecodedFile{}, false, nil
 	}
 	if nested, ok := block["file"].(map[string]any); ok {
@@ -196,7 +198,7 @@ func decodeOpenAIInlineFileBlock(block map[string]any) (inlineDecodedFile, bool,
 		}
 		return decoded, true, nil
 	}
-	blockType := strings.ToLower(strings.TrimSpace(asString(block["type"])))
+	blockType := strings.ToLower(strings.TrimSpace(shared.AsString(block["type"])))
 	if raw, matched := extractInlineImageDataURL(block); matched {
 		data, contentType, err := decodeInlinePayload(raw, contentTypeFromMap(block))
 		if err != nil {
@@ -232,11 +234,11 @@ func extractInlineImageDataURL(block map[string]any) (string, bool) {
 			return strings.TrimSpace(x), true
 		}
 	case map[string]any:
-		if raw := strings.TrimSpace(asString(x["url"])); isDataURL(raw) {
+		if raw := strings.TrimSpace(shared.AsString(x["url"])); isDataURL(raw) {
 			return raw, true
 		}
 	}
-	if raw := strings.TrimSpace(asString(block["url"])); isDataURL(raw) {
+	if raw := strings.TrimSpace(shared.AsString(block["url"])); isDataURL(raw) {
 		return raw, true
 	}
 	return "", false
@@ -244,7 +246,7 @@ func extractInlineImageDataURL(block map[string]any) (string, bool) {
 
 func extractInlineFilePayload(block map[string]any, blockType string) (string, bool) {
 	for _, value := range []any{block["file_data"], block["base64"], block["data"]} {
-		if raw := strings.TrimSpace(asString(value)); raw != "" {
+		if raw := strings.TrimSpace(shared.AsString(value)); raw != "" {
 			if strings.Contains(blockType, "file") || block["file_data"] != nil || block["filename"] != nil || block["file_name"] != nil || block["name"] != nil {
 				return raw, true
 			}
@@ -319,13 +321,13 @@ func decodeBase64Flexible(raw string) ([]byte, error) {
 
 func contentTypeFromMap(block map[string]any) string {
 	for _, value := range []any{block["mime_type"], block["mimeType"], block["content_type"], block["contentType"], block["media_type"], block["mediaType"]} {
-		if contentType := strings.TrimSpace(asString(value)); contentType != "" {
+		if contentType := strings.TrimSpace(shared.AsString(value)); contentType != "" {
 			return contentType
 		}
 	}
 	if imageURL, ok := block["image_url"].(map[string]any); ok {
 		for _, value := range []any{imageURL["mime_type"], imageURL["mimeType"], imageURL["content_type"], imageURL["contentType"]} {
-			if contentType := strings.TrimSpace(asString(value)); contentType != "" {
+			if contentType := strings.TrimSpace(shared.AsString(value)); contentType != "" {
 				return contentType
 			}
 		}
@@ -335,7 +337,7 @@ func contentTypeFromMap(block map[string]any) string {
 
 func pickInlineFilename(block map[string]any, contentType string, prefix string) string {
 	for _, value := range []any{block["filename"], block["file_name"], block["name"]} {
-		if name := strings.TrimSpace(asString(value)); name != "" {
+		if name := strings.TrimSpace(shared.AsString(value)); name != "" {
 			return filepath.Base(name)
 		}
 	}
diff --git a/internal/adapter/openai/handler_files.go b/internal/httpapi/openai/files/handler_files.go
similarity index 66%
rename from internal/adapter/openai/handler_files.go
rename to internal/httpapi/openai/files/handler_files.go
index f15ea3b..edfb653 100644
--- a/internal/adapter/openai/handler_files.go
+++ b/internal/httpapi/openai/files/handler_files.go
@@ -1,4 +1,4 @@
-package openai
+package files
 
 import (
 	"io"
@@ -7,11 +7,20 @@ import (
 	"time"
 
 	"ds2api/internal/auth"
-	"ds2api/internal/deepseek"
+	"ds2api/internal/chathistory"
+	dsclient "ds2api/internal/deepseek/client"
+	"ds2api/internal/httpapi/openai/shared"
 )
 
 const openAIUploadMaxMemory = 32 << 20
 
+type Handler struct {
+	Store       shared.ConfigReader
+	Auth        shared.AuthResolver
+	DS          shared.DeepSeekCaller
+	ChatHistory *chathistory.Store
+}
+
 func (h *Handler) UploadFile(w http.ResponseWriter, r *http.Request) {
 	a, err := h.Auth.Determine(r)
 	if err != nil {
@@ -20,22 +29,22 @@ func (h *Handler) UploadFile(w http.ResponseWriter, r *http.Request) {
 		if err == auth.ErrNoAccount {
 			status = http.StatusTooManyRequests
 		}
-		writeOpenAIError(w, status, detail)
+		shared.WriteOpenAIError(w, status, detail)
 		return
 	}
 	defer h.Auth.Release(a)
 	if !strings.HasPrefix(strings.ToLower(strings.TrimSpace(r.Header.Get("Content-Type"))), "multipart/form-data") {
-		writeOpenAIError(w, http.StatusBadRequest, "content-type must be multipart/form-data")
+		shared.WriteOpenAIError(w, http.StatusBadRequest, "content-type must be multipart/form-data")
 		return
 	}
 	// Enforce a hard cap on the total request body size to prevent OOM
-	r.Body = http.MaxBytesReader(w, r.Body, openAIUploadMaxSize)
+	r.Body = http.MaxBytesReader(w, r.Body, shared.UploadMaxSize)
 	if err := r.ParseMultipartForm(openAIUploadMaxMemory); err != nil {
 		if strings.Contains(strings.ToLower(err.Error()), "too large") {
-			writeOpenAIError(w, http.StatusRequestEntityTooLarge, "file size exceeds limit")
+			shared.WriteOpenAIError(w, http.StatusRequestEntityTooLarge, "file size exceeds limit")
 			return
 		}
-		writeOpenAIError(w, http.StatusBadRequest, "invalid multipart form")
+		shared.WriteOpenAIError(w, http.StatusBadRequest, "invalid multipart form")
 		return
 	}
 	if r.MultipartForm != nil {
@@ -44,36 +53,36 @@ func (h *Handler) UploadFile(w http.ResponseWriter, r *http.Request) {
 	r = r.WithContext(auth.WithAuth(r.Context(), a))
 	file, header, err := r.FormFile("file")
 	if err != nil {
-		writeOpenAIError(w, http.StatusBadRequest, "file is required")
+		shared.WriteOpenAIError(w, http.StatusBadRequest, "file is required")
 		return
 	}
 	defer func() { _ = file.Close() }()
 	data, err := io.ReadAll(file)
 	if err != nil {
-		writeOpenAIError(w, http.StatusBadRequest, "failed to read uploaded file")
+		shared.WriteOpenAIError(w, http.StatusBadRequest, "failed to read uploaded file")
 		return
 	}
 	contentType := strings.TrimSpace(header.Header.Get("Content-Type"))
 	if contentType == "" && len(data) > 0 {
 		contentType = http.DetectContentType(data)
 	}
-	result, err := h.DS.UploadFile(r.Context(), a, deepseek.UploadFileRequest{
+	result, err := h.DS.UploadFile(r.Context(), a, dsclient.UploadFileRequest{
 		Filename:    header.Filename,
 		ContentType: contentType,
 		Purpose:     strings.TrimSpace(r.FormValue("purpose")),
 		Data:        data,
 	}, 3)
 	if err != nil {
-		writeOpenAIError(w, http.StatusInternalServerError, "Failed to upload file.")
+		shared.WriteOpenAIError(w, http.StatusInternalServerError, "Failed to upload file.")
 		return
 	}
 	if result != nil && result.AccountID == "" {
 		result.AccountID = a.AccountID
 	}
-	writeJSON(w, http.StatusOK, buildOpenAIFileObject(result))
+	shared.WriteJSON(w, http.StatusOK, buildOpenAIFileObject(result))
 }
 
-func buildOpenAIFileObject(result *deepseek.UploadFileResult) map[string]any {
+func buildOpenAIFileObject(result *dsclient.UploadFileResult) map[string]any {
 	if result == nil {
 		obj := map[string]any{
 			"id":             "",
diff --git a/internal/adapter/openai/files_route_test.go b/internal/httpapi/openai/files_route_test.go
similarity index 85%
rename from internal/adapter/openai/files_route_test.go
rename to internal/httpapi/openai/files_route_test.go
index 6c8eb0b..2b9c205 100644
--- a/internal/adapter/openai/files_route_test.go
+++ b/internal/httpapi/openai/files_route_test.go
@@ -13,7 +13,7 @@ import (
 	"github.com/go-chi/chi/v5"
 
 	"ds2api/internal/auth"
-	"ds2api/internal/deepseek"
+	dsclient "ds2api/internal/deepseek/client"
 )
 
 type managedFilesAuthStub struct{}
@@ -41,8 +41,8 @@ func (managedFilesAuthStub) DetermineCaller(_ *http.Request) (*auth.RequestAuth,
 func (managedFilesAuthStub) Release(_ *auth.RequestAuth) {}
 
 type filesRouteDSStub struct {
-	lastReq deepseek.UploadFileRequest
-	upload  *deepseek.UploadFileResult
+	lastReq dsclient.UploadFileRequest
+	upload  *dsclient.UploadFileResult
 	err     error
 }
 
@@ -54,7 +54,7 @@ func (m *filesRouteDSStub) GetPow(_ context.Context, _ *auth.RequestAuth, _ int)
 	return "", nil
 }
 
-func (m *filesRouteDSStub) UploadFile(_ context.Context, _ *auth.RequestAuth, req deepseek.UploadFileRequest, _ int) (*deepseek.UploadFileResult, error) {
+func (m *filesRouteDSStub) UploadFile(_ context.Context, _ *auth.RequestAuth, req dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) {
 	m.lastReq = req
 	if m.err != nil {
 		return nil, m.err
@@ -62,15 +62,15 @@ func (m *filesRouteDSStub) UploadFile(_ context.Context, _ *auth.RequestAuth, re
 	if m.upload != nil {
 		return m.upload, nil
 	}
-	return &deepseek.UploadFileResult{ID: "file-123", Filename: req.Filename, Bytes: int64(len(req.Data)), Purpose: req.Purpose, Status: "uploaded"}, nil
+	return &dsclient.UploadFileResult{ID: "file-123", Filename: req.Filename, Bytes: int64(len(req.Data)), Purpose: req.Purpose, Status: "uploaded"}, nil
 }
 
 func (m *filesRouteDSStub) CallCompletion(_ context.Context, _ *auth.RequestAuth, _ map[string]any, _ string, _ int) (*http.Response, error) {
 	return nil, errors.New("not implemented")
 }
 
-func (m *filesRouteDSStub) DeleteSessionForToken(_ context.Context, _ string, _ string) (*deepseek.DeleteSessionResult, error) {
-	return &deepseek.DeleteSessionResult{Success: true}, nil
+func (m *filesRouteDSStub) DeleteSessionForToken(_ context.Context, _ string, _ string) (*dsclient.DeleteSessionResult, error) {
+	return &dsclient.DeleteSessionResult{Success: true}, nil
 }
 
 func (m *filesRouteDSStub) DeleteAllSessionsForToken(_ context.Context, _ string) error {
@@ -104,9 +104,9 @@ func newMultipartUploadRequest(t *testing.T, purpose string, filename string, da
 
 func TestFilesRouteUploadSuccess(t *testing.T) {
 	ds := &filesRouteDSStub{}
-	h := &Handler{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds}
+	h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds}
 	r := chi.NewRouter()
-	RegisterRoutes(r, h)
+	registerOpenAITestRoutes(r, h)
 
 	req := newMultipartUploadRequest(t, "assistants", "notes.txt", []byte("hello world"))
 	rec := httptest.NewRecorder()
@@ -141,9 +141,9 @@ func TestFilesRouteUploadSuccess(t *testing.T) {
 
 func TestFilesRouteUploadIncludesAccountIDForManagedAccount(t *testing.T) {
 	ds := &filesRouteDSStub{}
-	h := &Handler{Store: mockOpenAIConfig{wideInput: true}, Auth: managedFilesAuthStub{}, DS: ds}
+	h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: managedFilesAuthStub{}, DS: ds}
 	r := chi.NewRouter()
-	RegisterRoutes(r, h)
+	registerOpenAITestRoutes(r, h)
 
 	req := newMultipartUploadRequest(t, "assistants", "notes.txt", []byte("hello world"))
 	rec := httptest.NewRecorder()
@@ -162,9 +162,9 @@ func TestFilesRouteUploadIncludesAccountIDForManagedAccount(t *testing.T) {
 }
 
 func TestFilesRouteRejectsNonMultipart(t *testing.T) {
-	h := &Handler{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: &filesRouteDSStub{}}
+	h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: &filesRouteDSStub{}}
 	r := chi.NewRouter()
-	RegisterRoutes(r, h)
+	registerOpenAITestRoutes(r, h)
 
 	req := httptest.NewRequest(http.MethodPost, "/v1/files", bytes.NewBufferString(`{"purpose":"assistants"}`))
 	req.Header.Set("Authorization", "Bearer direct-token")
@@ -178,9 +178,9 @@ func TestFilesRouteRejectsNonMultipart(t *testing.T) {
 }
 
 func TestFilesRouteRequiresFileField(t *testing.T) {
-	h := &Handler{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: &filesRouteDSStub{}}
+	h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: &filesRouteDSStub{}}
 	r := chi.NewRouter()
-	RegisterRoutes(r, h)
+	registerOpenAITestRoutes(r, h)
 
 	var body bytes.Buffer
 	writer := multipart.NewWriter(&body)
diff --git a/internal/httpapi/openai/history/history_split.go b/internal/httpapi/openai/history/history_split.go
new file mode 100644
index 0000000..96775ef
--- /dev/null
+++ b/internal/httpapi/openai/history/history_split.go
@@ -0,0 +1,129 @@
+package history
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"strings"
+
+	"ds2api/internal/auth"
+	dsclient "ds2api/internal/deepseek/client"
+	"ds2api/internal/httpapi/openai/shared"
+	"ds2api/internal/promptcompat"
+)
+
+const (
+	historySplitFilename    = "HISTORY.txt"
+	historySplitContentType = "text/plain; charset=utf-8"
+	historySplitPurpose     = "assistants"
+)
+
+type Service struct {
+	Store shared.ConfigReader
+	DS    shared.DeepSeekCaller
+}
+
+func (s Service) Apply(ctx context.Context, a *auth.RequestAuth, stdReq promptcompat.StandardRequest) (promptcompat.StandardRequest, error) {
+	if s.DS == nil || s.Store == nil || a == nil {
+		return stdReq, nil
+	}
+
+	promptMessages, historyMessages := SplitOpenAIHistoryMessages(stdReq.Messages, s.Store.HistorySplitTriggerAfterTurns())
+	if len(historyMessages) == 0 {
+		return stdReq, nil
+	}
+
+	historyText := promptcompat.BuildOpenAIHistoryTranscript(historyMessages)
+	if strings.TrimSpace(historyText) == "" {
+		return stdReq, errors.New("history split produced empty transcript")
+	}
+
+	result, err := s.DS.UploadFile(ctx, a, dsclient.UploadFileRequest{
+		Filename:    historySplitFilename,
+		ContentType: historySplitContentType,
+		Purpose:     historySplitPurpose,
+		Data:        []byte(historyText),
+	}, 3)
+	if err != nil {
+		return stdReq, fmt.Errorf("upload history file: %w", err)
+	}
+	fileID := strings.TrimSpace(result.ID)
+	if fileID == "" {
+		return stdReq, errors.New("upload history file returned empty file id")
+	}
+
+	stdReq.Messages = promptMessages
+	stdReq.HistoryText = historyText
+	stdReq.RefFileIDs = prependUniqueRefFileID(stdReq.RefFileIDs, fileID)
+	stdReq.FinalPrompt, stdReq.ToolNames = promptcompat.BuildOpenAIPrompt(promptMessages, stdReq.ToolsRaw, "", stdReq.ToolChoice, stdReq.Thinking)
+	return stdReq, nil
+}
+
+func SplitOpenAIHistoryMessages(messages []any, triggerAfterTurns int) ([]any, []any) {
+	if triggerAfterTurns <= 0 {
+		triggerAfterTurns = 1
+	}
+	lastUserIndex := -1
+	userTurns := 0
+	for i, raw := range messages {
+		msg, ok := raw.(map[string]any)
+		if !ok {
+			continue
+		}
+		role := strings.ToLower(strings.TrimSpace(shared.AsString(msg["role"])))
+		if role != "user" {
+			continue
+		}
+		userTurns++
+		lastUserIndex = i
+	}
+	if userTurns <= triggerAfterTurns || lastUserIndex < 0 {
+		return messages, nil
+	}
+
+	promptMessages := make([]any, 0, len(messages)-lastUserIndex)
+	historyMessages := make([]any, 0, lastUserIndex)
+	for i, raw := range messages {
+		msg, ok := raw.(map[string]any)
+		if !ok {
+			if i >= lastUserIndex {
+				promptMessages = append(promptMessages, raw)
+			} else {
+				historyMessages = append(historyMessages, raw)
+			}
+			continue
+		}
+		role := strings.ToLower(strings.TrimSpace(shared.AsString(msg["role"])))
+		switch role {
+		case "system", "developer":
+			promptMessages = append(promptMessages, raw)
+		default:
+			if i >= lastUserIndex {
+				promptMessages = append(promptMessages, raw)
+			} else {
+				historyMessages = append(historyMessages, raw)
+			}
+		}
+	}
+	if len(promptMessages) == 0 {
+		return messages, nil
+	}
+	return promptMessages, historyMessages
+}
+
+func prependUniqueRefFileID(existing []string, fileID string) []string {
+	fileID = strings.TrimSpace(fileID)
+	if fileID == "" {
+		return existing
+	}
+	out := make([]string, 0, len(existing)+1)
+	out = append(out, fileID)
+	for _, id := range existing {
+		trimmed := strings.TrimSpace(id)
+		if trimmed == "" || strings.EqualFold(trimmed, fileID) {
+			continue
+		}
+		out = append(out, trimmed)
+	}
+	return out
+}
diff --git a/internal/httpapi/openai/history/history_split_error.go b/internal/httpapi/openai/history/history_split_error.go
new file mode 100644
index 0000000..df7c503
--- /dev/null
+++ b/internal/httpapi/openai/history/history_split_error.go
@@ -0,0 +1,18 @@
+package history
+
+import (
+	"net/http"
+
+	dsclient "ds2api/internal/deepseek/client"
+)
+
+func MapError(err error) (int, string) {
+	switch {
+	case dsclient.IsManagedUnauthorizedError(err):
+		return http.StatusUnauthorized, "Account token is invalid. Please re-login the account in admin."
+	case dsclient.IsDirectUnauthorizedError(err):
+		return http.StatusUnauthorized, "Invalid token. If this should be a DS2API key, add it to config.keys first."
+	default:
+		return http.StatusInternalServerError, err.Error()
+	}
+}
diff --git a/internal/adapter/openai/history_split_test.go b/internal/httpapi/openai/history_split_test.go
similarity index 52%
rename from internal/adapter/openai/history_split_test.go
rename to internal/httpapi/openai/history_split_test.go
index 7a90049..c6059d7 100644
--- a/internal/adapter/openai/history_split_test.go
+++ b/internal/httpapi/openai/history_split_test.go
@@ -3,6 +3,7 @@ package openai
 import (
 	"context"
 	"encoding/json"
+	"errors"
 	"net/http"
 	"net/http/httptest"
 	"strings"
@@ -11,7 +12,8 @@ import (
 	"github.com/go-chi/chi/v5"
 
 	"ds2api/internal/auth"
-	"ds2api/internal/util"
+	dsclient "ds2api/internal/deepseek/client"
+	"ds2api/internal/promptcompat"
 )
 
 func historySplitTestMessages() []any {
@@ -40,96 +42,55 @@ func historySplitTestMessages() []any {
 	}
 }
 
-func TestBuildOpenAIHistoryTranscriptPreservesOrderAndToolHistory(t *testing.T) {
-	promptMessages, historyMessages := splitOpenAIHistoryMessages(historySplitTestMessages(), 1)
-	if len(promptMessages) != 2 {
-		t.Fatalf("expected 2 prompt messages, got %d", len(promptMessages))
-	}
-	if len(historyMessages) != 3 {
-		t.Fatalf("expected 3 history messages, got %d", len(historyMessages))
-	}
+type streamStatusManagedAuthStub struct{}
 
+func (streamStatusManagedAuthStub) Determine(_ *http.Request) (*auth.RequestAuth, error) {
+	return &auth.RequestAuth{
+		UseConfigToken: true,
+		DeepSeekToken:  "managed-token",
+		CallerID:       "caller:test",
+		AccountID:      "acct:test",
+		TriedAccounts:  map[string]bool{},
+	}, nil
+}
+
+func (streamStatusManagedAuthStub) DetermineCaller(_ *http.Request) (*auth.RequestAuth, error) {
+	return (&streamStatusManagedAuthStub{}).Determine(nil)
+}
+
+func (streamStatusManagedAuthStub) Release(_ *auth.RequestAuth) {}
+
+func TestBuildOpenAIHistoryTranscriptUsesInjectedFileWrapper(t *testing.T) {
+	_, historyMessages := splitOpenAIHistoryMessages(historySplitTestMessages(), 1)
 	transcript := buildOpenAIHistoryTranscript(historyMessages)
-	if !strings.Contains(transcript, "first user turn") {
-		t.Fatalf("expected user history in transcript, got %s", transcript)
+
+	if !strings.HasPrefix(transcript, "[file content end]\n\n") {
+		t.Fatalf("expected injected file wrapper prefix, got %q", transcript)
+	}
+	if !strings.Contains(transcript, "<｜begin▁of▁sentence｜>") {
+		t.Fatalf("expected serialized conversation markers, got %q", transcript)
+	}
+	if !strings.Contains(transcript, "first user turn") || !strings.Contains(transcript, "tool result") {
+		t.Fatalf("expected historical turns preserved, got %q", transcript)
+	}
+	if !strings.Contains(transcript, "[reasoning_content]") || !strings.Contains(transcript, "hidden reasoning") {
+		t.Fatalf("expected reasoning block preserved, got %q", transcript)
 	}
 	if !strings.Contains(transcript, "<tool_calls>") {
-		t.Fatalf("expected assistant tool_calls in transcript, got %s", transcript)
+		t.Fatalf("expected tool calls preserved, got %q", transcript)
 	}
-	if !strings.Contains(transcript, "tool_call_id=call-1") {
-		t.Fatalf("expected tool call id in transcript, got %s", transcript)
-	}
-	if !strings.Contains(transcript, "[reasoning_content]") {
-		t.Fatalf("expected reasoning block in HISTORY.txt, got %s", transcript)
-	}
-	if !strings.Contains(transcript, "hidden reasoning") {
-		t.Fatalf("expected reasoning text in HISTORY.txt, got %s", transcript)
-	}
-
-	userIdx := strings.Index(transcript, "=== 1. USER ===")
-	assistantIdx := strings.Index(transcript, "=== 2. ASSISTANT ===")
-	toolIdx := strings.Index(transcript, "=== 3. TOOL ===")
-	if userIdx < 0 || assistantIdx < 0 || toolIdx < 0 {
-		t.Fatalf("expected ordered role sections, got %s", transcript)
-	}
-	if userIdx >= assistantIdx || assistantIdx >= toolIdx {
-		t.Fatalf("expected USER -> ASSISTANT -> TOOL order, got %s", transcript)
-	}
-	if reasoningIdx := strings.Index(transcript, "[reasoning_content]"); reasoningIdx < 0 || reasoningIdx > strings.Index(transcript, "<tool_calls>") {
-		t.Fatalf("expected reasoning block before tool calls, got %s", transcript)
-	}
-	reasoning := extractHistorySplitReasoningContent(historyMessages)
-	if reasoning != "hidden reasoning" {
-		t.Fatalf("expected latest assistant reasoning to be extracted, got %q", reasoning)
-	}
-
-	finalPrompt, _ := buildHistorySplitPrompt(promptMessages, reasoning, nil, util.DefaultToolChoicePolicy(), false)
-	if !strings.Contains(finalPrompt, "latest user turn") {
-		t.Fatalf("expected latest user turn in final prompt, got %s", finalPrompt)
-	}
-	if strings.Contains(finalPrompt, "first user turn") {
-		t.Fatalf("expected earlier history to be removed from final prompt, got %s", finalPrompt)
-	}
-	if !strings.Contains(finalPrompt, "[reasoning_content]") || !strings.Contains(finalPrompt, "hidden reasoning") {
-		t.Fatalf("expected latest assistant reasoning to be attached to prompt, got %s", finalPrompt)
-	}
-	if !strings.Contains(finalPrompt, "HISTORY.txt") {
-		t.Fatalf("expected history instruction in final prompt, got %s", finalPrompt)
-	}
-	if !strings.Contains(finalPrompt, "Follow the instructions in this prompt first") {
-		t.Fatalf("expected stronger prompt override in final prompt, got %s", finalPrompt)
-	}
-	if strings.Index(finalPrompt, "Follow the instructions in this prompt first") > strings.Index(finalPrompt, "Continue the conversation") {
-		t.Fatalf("expected history split instruction before continuity instructions, got %s", finalPrompt)
+	if !strings.HasSuffix(transcript, "\n[file name]: IGNORE\n[file content begin]\n") {
+		t.Fatalf("expected injected file wrapper suffix, got %q", transcript)
 	}
 }
 
 func TestSplitOpenAIHistoryMessagesUsesLatestUserTurn(t *testing.T) {
-	toolCalls := []any{
-		map[string]any{
-			"name":      "search",
-			"arguments": map[string]any{"query": "docs"},
-		},
-	}
 	messages := []any{
 		map[string]any{"role": "system", "content": "system instructions"},
 		map[string]any{"role": "user", "content": "first user turn"},
-		map[string]any{
-			"role":       "assistant",
-			"content":    "",
-			"tool_calls": toolCalls,
-		},
-		map[string]any{
-			"role":         "tool",
-			"name":         "search",
-			"tool_call_id": "call-1",
-			"content":      "tool result",
-		},
+		map[string]any{"role": "assistant", "content": "first assistant turn"},
 		map[string]any{"role": "user", "content": "middle user turn"},
-		map[string]any{
-			"role":    "assistant",
-			"content": "middle assistant turn",
-		},
+		map[string]any{"role": "assistant", "content": "middle assistant turn"},
 		map[string]any{"role": "user", "content": "latest user turn"},
 	}
 
@@ -137,31 +98,27 @@ func TestSplitOpenAIHistoryMessagesUsesLatestUserTurn(t *testing.T) {
 	if len(promptMessages) == 0 || len(historyMessages) == 0 {
 		t.Fatalf("expected both prompt and history messages, got prompt=%d history=%d", len(promptMessages), len(historyMessages))
 	}
-	reasoning := extractHistorySplitReasoningContent(historyMessages)
-	if reasoning != "" {
-		t.Fatalf("expected no reasoning in this fixture, got %q", reasoning)
-	}
 
-	promptText, _ := buildHistorySplitPrompt(promptMessages, reasoning, nil, util.DefaultToolChoicePolicy(), false)
+	promptText, _ := promptcompat.BuildOpenAIPrompt(promptMessages, nil, "", defaultToolChoicePolicy(), true)
 	if !strings.Contains(promptText, "latest user turn") {
 		t.Fatalf("expected latest user turn in prompt, got %s", promptText)
 	}
 	if strings.Contains(promptText, "middle user turn") {
-		t.Fatalf("expected middle user turn to be split into history, got %s", promptText)
+		t.Fatalf("expected middle user turn to be moved into history, got %s", promptText)
 	}
 
 	historyText := buildOpenAIHistoryTranscript(historyMessages)
 	if !strings.Contains(historyText, "middle user turn") {
-		t.Fatalf("expected middle user turn in HISTORY.txt, got %s", historyText)
+		t.Fatalf("expected middle user turn in split history, got %s", historyText)
 	}
 	if strings.Contains(historyText, "latest user turn") {
-		t.Fatalf("expected latest user turn to remain in prompt, got %s", historyText)
+		t.Fatalf("expected latest user turn to remain live, got %s", historyText)
 	}
 }
 
 func TestApplyHistorySplitSkipsFirstTurn(t *testing.T) {
 	ds := &inlineUploadDSStub{}
-	h := &Handler{
+	h := &openAITestSurface{
 		Store: mockOpenAIConfig{
 			wideInput:           true,
 			historySplitEnabled: true,
@@ -170,12 +127,12 @@ func TestApplyHistorySplitSkipsFirstTurn(t *testing.T) {
 		DS: ds,
 	}
 	req := map[string]any{
-		"model": "deepseek-chat",
+		"model": "deepseek-v4-flash",
 		"messages": []any{
 			map[string]any{"role": "user", "content": "hello"},
 		},
 	}
-	stdReq, err := normalizeOpenAIChatRequest(h.Store, req, "")
+	stdReq, err := promptcompat.NormalizeOpenAIChatRequest(h.Store, req, "")
 	if err != nil {
 		t.Fatalf("normalize failed: %v", err)
 	}
@@ -190,14 +147,11 @@ func TestApplyHistorySplitSkipsFirstTurn(t *testing.T) {
 	if out.FinalPrompt != stdReq.FinalPrompt {
 		t.Fatalf("expected prompt unchanged on first turn")
 	}
-	if len(out.RefFileIDs) != len(stdReq.RefFileIDs) {
-		t.Fatalf("expected ref files unchanged on first turn")
-	}
 }
 
 func TestApplyHistorySplitCarriesHistoryText(t *testing.T) {
 	ds := &inlineUploadDSStub{}
-	h := &Handler{
+	h := &openAITestSurface{
 		Store: mockOpenAIConfig{
 			wideInput:           true,
 			historySplitEnabled: true,
@@ -206,10 +160,10 @@ func TestApplyHistorySplitCarriesHistoryText(t *testing.T) {
 		DS: ds,
 	}
 	req := map[string]any{
-		"model":    "deepseek-chat",
+		"model":    "deepseek-v4-flash",
 		"messages": historySplitTestMessages(),
 	}
-	stdReq, err := normalizeOpenAIChatRequest(h.Store, req, "")
+	stdReq, err := promptcompat.NormalizeOpenAIChatRequest(h.Store, req, "")
 	if err != nil {
 		t.Fatalf("normalize failed: %v", err)
 	}
@@ -226,9 +180,9 @@ func TestApplyHistorySplitCarriesHistoryText(t *testing.T) {
 	}
 }
 
-func TestChatCompletionsHistorySplitUploadsHistoryAndKeepsLatestPrompt(t *testing.T) {
+func TestChatCompletionsHistorySplitUploadsHistoryFileAndKeepsLatestPrompt(t *testing.T) {
 	ds := &inlineUploadDSStub{}
-	h := &Handler{
+	h := &openAITestSurface{
 		Store: mockOpenAIConfig{
 			wideInput:           true,
 			historySplitEnabled: true,
@@ -238,7 +192,7 @@ func TestChatCompletionsHistorySplitUploadsHistoryAndKeepsLatestPrompt(t *testin
 		DS:   ds,
 	}
 	reqBody, _ := json.Marshal(map[string]any{
-		"model":    "deepseek-chat",
+		"model":    "deepseek-v4-flash",
 		"messages": historySplitTestMessages(),
 		"stream":   false,
 	})
@@ -259,18 +213,15 @@ func TestChatCompletionsHistorySplitUploadsHistoryAndKeepsLatestPrompt(t *testin
 	if upload.Filename != "HISTORY.txt" {
 		t.Fatalf("unexpected upload filename: %q", upload.Filename)
 	}
-	if upload.ContentType != "text/plain; charset=utf-8" {
-		t.Fatalf("unexpected content type: %q", upload.ContentType)
-	}
 	if upload.Purpose != "assistants" {
 		t.Fatalf("unexpected purpose: %q", upload.Purpose)
 	}
 	historyText := string(upload.Data)
-	if !strings.Contains(historyText, "first user turn") || !strings.Contains(historyText, "tool result") {
-		t.Fatalf("expected older turns in HISTORY.txt, got %s", historyText)
+	if !strings.Contains(historyText, "[file content end]") || !strings.Contains(historyText, "[file name]: IGNORE") {
+		t.Fatalf("expected injected IGNORE wrapper, got %s", historyText)
 	}
 	if strings.Contains(historyText, "latest user turn") {
-		t.Fatalf("expected latest turn to remain in prompt, got %s", historyText)
+		t.Fatalf("expected latest turn to remain live, got %s", historyText)
 	}
 	if ds.completionReq == nil {
 		t.Fatal("expected completion payload to be captured")
@@ -282,18 +233,6 @@ func TestChatCompletionsHistorySplitUploadsHistoryAndKeepsLatestPrompt(t *testin
 	if strings.Contains(promptText, "first user turn") {
 		t.Fatalf("expected historical turns removed from completion prompt, got %s", promptText)
 	}
-	if !strings.Contains(promptText, "[reasoning_content]") || !strings.Contains(promptText, "hidden reasoning") {
-		t.Fatalf("expected latest assistant reasoning to be attached to completion prompt, got %s", promptText)
-	}
-	if !strings.Contains(promptText, "HISTORY.txt") {
-		t.Fatalf("expected history instruction in completion prompt, got %s", promptText)
-	}
-	if !strings.Contains(promptText, "Follow the instructions in this prompt first") {
-		t.Fatalf("expected stronger prompt override in completion prompt, got %s", promptText)
-	}
-	if strings.Index(promptText, "Follow the instructions in this prompt first") > strings.Index(promptText, "Continue the conversation") {
-		t.Fatalf("expected history split instruction before continuity instructions, got %s", promptText)
-	}
 	refIDs, _ := ds.completionReq["ref_file_ids"].([]any)
 	if len(refIDs) == 0 || refIDs[0] != "file-inline-1" {
 		t.Fatalf("expected uploaded history file to be first ref_file_id, got %#v", ds.completionReq["ref_file_ids"])
@@ -302,7 +241,7 @@ func TestChatCompletionsHistorySplitUploadsHistoryAndKeepsLatestPrompt(t *testin
 
 func TestResponsesHistorySplitUploadsHistoryAndKeepsLatestPrompt(t *testing.T) {
 	ds := &inlineUploadDSStub{}
-	h := &Handler{
+	h := &openAITestSurface{
 		Store: mockOpenAIConfig{
 			wideInput:           true,
 			historySplitEnabled: true,
@@ -312,9 +251,9 @@ func TestResponsesHistorySplitUploadsHistoryAndKeepsLatestPrompt(t *testing.T) {
 		DS:   ds,
 	}
 	r := chi.NewRouter()
-	RegisterRoutes(r, h)
+	registerOpenAITestRoutes(r, h)
 	reqBody, _ := json.Marshal(map[string]any{
-		"model":    "deepseek-chat",
+		"model":    "deepseek-v4-flash",
 		"messages": historySplitTestMessages(),
 		"stream":   false,
 	})
@@ -341,20 +280,79 @@ func TestResponsesHistorySplitUploadsHistoryAndKeepsLatestPrompt(t *testing.T) {
 	if strings.Contains(promptText, "first user turn") {
 		t.Fatalf("expected historical turns removed from completion prompt, got %s", promptText)
 	}
-	if !strings.Contains(promptText, "[reasoning_content]") || !strings.Contains(promptText, "hidden reasoning") {
-		t.Fatalf("expected latest assistant reasoning to be attached to completion prompt, got %s", promptText)
+}
+
+func TestChatCompletionsHistorySplitMapsManagedAuthFailureTo401(t *testing.T) {
+	ds := &inlineUploadDSStub{
+		uploadErr: &dsclient.RequestFailure{Op: "upload file", Kind: dsclient.FailureManagedUnauthorized, Message: "expired token"},
 	}
-	if !strings.Contains(promptText, "Follow the instructions in this prompt first") {
-		t.Fatalf("expected stronger prompt override in completion prompt, got %s", promptText)
+	h := &openAITestSurface{
+		Store: mockOpenAIConfig{
+			wideInput:           true,
+			historySplitEnabled: true,
+			historySplitTurns:   1,
+		},
+		Auth: streamStatusManagedAuthStub{},
+		DS:   ds,
 	}
-	if strings.Index(promptText, "Follow the instructions in this prompt first") > strings.Index(promptText, "Continue the conversation") {
-		t.Fatalf("expected history split instruction before continuity instructions, got %s", promptText)
+	reqBody, _ := json.Marshal(map[string]any{
+		"model":    "deepseek-v4-flash",
+		"messages": historySplitTestMessages(),
+		"stream":   false,
+	})
+	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(string(reqBody)))
+	req.Header.Set("Authorization", "Bearer managed-key")
+	req.Header.Set("Content-Type", "application/json")
+	rec := httptest.NewRecorder()
+
+	h.ChatCompletions(rec, req)
+
+	if rec.Code != http.StatusUnauthorized {
+		t.Fatalf("expected 401, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	if !strings.Contains(rec.Body.String(), "Please re-login the account in admin") {
+		t.Fatalf("expected managed auth error message, got %s", rec.Body.String())
+	}
+}
+
+func TestResponsesHistorySplitMapsDirectAuthFailureTo401(t *testing.T) {
+	ds := &inlineUploadDSStub{
+		uploadErr: &dsclient.RequestFailure{Op: "upload file", Kind: dsclient.FailureDirectUnauthorized, Message: "invalid token"},
+	}
+	h := &openAITestSurface{
+		Store: mockOpenAIConfig{
+			wideInput:           true,
+			historySplitEnabled: true,
+			historySplitTurns:   1,
+		},
+		Auth: streamStatusAuthStub{},
+		DS:   ds,
+	}
+	r := chi.NewRouter()
+	registerOpenAITestRoutes(r, h)
+	reqBody, _ := json.Marshal(map[string]any{
+		"model":    "deepseek-v4-flash",
+		"messages": historySplitTestMessages(),
+		"stream":   false,
+	})
+	req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(string(reqBody)))
+	req.Header.Set("Authorization", "Bearer direct-token")
+	req.Header.Set("Content-Type", "application/json")
+	rec := httptest.NewRecorder()
+
+	r.ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusUnauthorized {
+		t.Fatalf("expected 401, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	if !strings.Contains(rec.Body.String(), "Invalid token") {
+		t.Fatalf("expected direct auth error message, got %s", rec.Body.String())
 	}
 }
 
 func TestChatCompletionsHistorySplitUploadFailureReturnsInternalServerError(t *testing.T) {
-	ds := &inlineUploadDSStub{uploadErr: context.DeadlineExceeded}
-	h := &Handler{
+	ds := &inlineUploadDSStub{uploadErr: errors.New("boom")}
+	h := &openAITestSurface{
 		Store: mockOpenAIConfig{
 			wideInput:           true,
 			historySplitEnabled: true,
@@ -364,7 +362,7 @@ func TestChatCompletionsHistorySplitUploadFailureReturnsInternalServerError(t *t
 		DS:   ds,
 	}
 	reqBody, _ := json.Marshal(map[string]any{
-		"model":    "deepseek-chat",
+		"model":    "deepseek-v4-flash",
 		"messages": historySplitTestMessages(),
 		"stream":   false,
 	})
@@ -378,7 +376,51 @@ func TestChatCompletionsHistorySplitUploadFailureReturnsInternalServerError(t *t
 	if rec.Code != http.StatusInternalServerError {
 		t.Fatalf("expected 500, got %d body=%s", rec.Code, rec.Body.String())
 	}
-	if ds.completionReq != nil {
-		t.Fatalf("did not expect completion payload on upload failure")
+}
+
+func TestHistorySplitWorksAcrossAutoDeleteModes(t *testing.T) {
+	for _, mode := range []string{"none", "single", "all"} {
+		t.Run(mode, func(t *testing.T) {
+			ds := &inlineUploadDSStub{}
+			h := &openAITestSurface{
+				Store: mockOpenAIConfig{
+					wideInput:           true,
+					autoDeleteMode:      mode,
+					historySplitEnabled: true,
+					historySplitTurns:   1,
+				},
+				Auth: streamStatusAuthStub{},
+				DS:   ds,
+			}
+			reqBody, _ := json.Marshal(map[string]any{
+				"model":    "deepseek-v4-flash",
+				"messages": historySplitTestMessages(),
+				"stream":   false,
+			})
+			req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(string(reqBody)))
+			req.Header.Set("Authorization", "Bearer direct-token")
+			req.Header.Set("Content-Type", "application/json")
+			rec := httptest.NewRecorder()
+
+			h.ChatCompletions(rec, req)
+
+			if rec.Code != http.StatusOK {
+				t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+			}
+			if len(ds.uploadCalls) != 1 {
+				t.Fatalf("expected history split upload for mode=%s, got %d", mode, len(ds.uploadCalls))
+			}
+			if ds.completionReq == nil {
+				t.Fatalf("expected completion payload for mode=%s", mode)
+			}
+			promptText, _ := ds.completionReq["prompt"].(string)
+			if !strings.Contains(promptText, "latest user turn") || strings.Contains(promptText, "first user turn") {
+				t.Fatalf("unexpected prompt for mode=%s: %s", mode, promptText)
+			}
+		})
 	}
 }
+
+func defaultToolChoicePolicy() promptcompat.ToolChoicePolicy {
+	return promptcompat.DefaultToolChoicePolicy()
+}
diff --git a/internal/adapter/openai/leaked_output_sanitize_test.go b/internal/httpapi/openai/leaked_output_sanitize_test.go
similarity index 100%
rename from internal/adapter/openai/leaked_output_sanitize_test.go
rename to internal/httpapi/openai/leaked_output_sanitize_test.go
diff --git a/internal/adapter/openai/models_route_test.go b/internal/httpapi/openai/models_route_test.go
similarity index 90%
rename from internal/adapter/openai/models_route_test.go
rename to internal/httpapi/openai/models_route_test.go
index 54c6b9a..9e318f9 100644
--- a/internal/adapter/openai/models_route_test.go
+++ b/internal/httpapi/openai/models_route_test.go
@@ -9,12 +9,12 @@ import (
 )
 
 func TestGetModelRouteDirectAndAlias(t *testing.T) {
-	h := &Handler{}
+	h := &openAITestSurface{}
 	r := chi.NewRouter()
-	RegisterRoutes(r, h)
+	registerOpenAITestRoutes(r, h)
 
 	t.Run("direct", func(t *testing.T) {
-		req := httptest.NewRequest(http.MethodGet, "/v1/models/deepseek-chat", nil)
+		req := httptest.NewRequest(http.MethodGet, "/v1/models/deepseek-v4-flash", nil)
 		rec := httptest.NewRecorder()
 		r.ServeHTTP(rec, req)
 		if rec.Code != http.StatusOK {
@@ -23,7 +23,7 @@ func TestGetModelRouteDirectAndAlias(t *testing.T) {
 	})
 
 	t.Run("direct_expert", func(t *testing.T) {
-		req := httptest.NewRequest(http.MethodGet, "/v1/models/deepseek-expert-chat", nil)
+		req := httptest.NewRequest(http.MethodGet, "/v1/models/deepseek-v4-pro", nil)
 		rec := httptest.NewRecorder()
 		r.ServeHTTP(rec, req)
 		if rec.Code != http.StatusOK {
@@ -32,7 +32,7 @@ func TestGetModelRouteDirectAndAlias(t *testing.T) {
 	})
 
 	t.Run("direct_vision", func(t *testing.T) {
-		req := httptest.NewRequest(http.MethodGet, "/v1/models/deepseek-vision-chat", nil)
+		req := httptest.NewRequest(http.MethodGet, "/v1/models/deepseek-v4-vision", nil)
 		rec := httptest.NewRecorder()
 		r.ServeHTTP(rec, req)
 		if rec.Code != http.StatusOK {
@@ -51,9 +51,9 @@ func TestGetModelRouteDirectAndAlias(t *testing.T) {
 }
 
 func TestGetModelRouteNotFound(t *testing.T) {
-	h := &Handler{}
+	h := &openAITestSurface{}
 	r := chi.NewRouter()
-	RegisterRoutes(r, h)
+	registerOpenAITestRoutes(r, h)
 
 	req := httptest.NewRequest(http.MethodGet, "/v1/models/not-exists", nil)
 	rec := httptest.NewRecorder()
diff --git a/internal/httpapi/openai/responses/handler.go b/internal/httpapi/openai/responses/handler.go
new file mode 100644
index 0000000..09feb91
--- /dev/null
+++ b/internal/httpapi/openai/responses/handler.go
@@ -0,0 +1,108 @@
+package responses
+
+import (
+	"context"
+	"net/http"
+	"sync"
+
+	"ds2api/internal/auth"
+	"ds2api/internal/chathistory"
+	"ds2api/internal/httpapi/openai/files"
+	"ds2api/internal/httpapi/openai/history"
+	"ds2api/internal/httpapi/openai/shared"
+	"ds2api/internal/promptcompat"
+	"ds2api/internal/toolstream"
+)
+
+const openAIGeneralMaxSize = shared.GeneralMaxSize
+
+var writeJSON = shared.WriteJSON
+
+type Handler struct {
+	Store       shared.ConfigReader
+	Auth        shared.AuthResolver
+	DS          shared.DeepSeekCaller
+	ChatHistory *chathistory.Store
+
+	responsesMu sync.Mutex
+	responses   *responseStore
+}
+
+func (h *Handler) compatStripReferenceMarkers() bool {
+	if h == nil {
+		return true
+	}
+	return shared.CompatStripReferenceMarkers(h.Store)
+}
+
+func (h *Handler) applyHistorySplit(ctx context.Context, a *auth.RequestAuth, stdReq promptcompat.StandardRequest) (promptcompat.StandardRequest, error) {
+	if h == nil {
+		return stdReq, nil
+	}
+	return history.Service{Store: h.Store, DS: h.DS}.Apply(ctx, a, stdReq)
+}
+
+func (h *Handler) preprocessInlineFileInputs(ctx context.Context, a *auth.RequestAuth, req map[string]any) error {
+	if h == nil {
+		return nil
+	}
+	return (&files.Handler{Store: h.Store, Auth: h.Auth, DS: h.DS, ChatHistory: h.ChatHistory}).PreprocessInlineFileInputs(ctx, a, req)
+}
+
+func (h *Handler) toolcallFeatureMatchEnabled() bool {
+	if h == nil {
+		return shared.ToolcallFeatureMatchEnabled(nil)
+	}
+	return shared.ToolcallFeatureMatchEnabled(h.Store)
+}
+
+func (h *Handler) toolcallEarlyEmitHighConfidence() bool {
+	if h == nil {
+		return shared.ToolcallEarlyEmitHighConfidence(nil)
+	}
+	return shared.ToolcallEarlyEmitHighConfidence(h.Store)
+}
+
+func writeOpenAIError(w http.ResponseWriter, status int, message string) {
+	shared.WriteOpenAIError(w, status, message)
+}
+
+func writeOpenAIErrorWithCode(w http.ResponseWriter, status int, message, code string) {
+	shared.WriteOpenAIErrorWithCode(w, status, message, code)
+}
+
+func openAIErrorType(status int) string {
+	return shared.OpenAIErrorType(status)
+}
+
+func writeOpenAIInlineFileError(w http.ResponseWriter, err error) {
+	files.WriteInlineFileError(w, err)
+}
+
+func mapHistorySplitError(err error) (int, string) {
+	return history.MapError(err)
+}
+
+func requestTraceID(r *http.Request) string {
+	return shared.RequestTraceID(r)
+}
+
+func cleanVisibleOutput(text string, stripReferenceMarkers bool) string {
+	return shared.CleanVisibleOutput(text, stripReferenceMarkers)
+}
+
+func replaceCitationMarkersWithLinks(text string, links map[int]string) string {
+	return shared.ReplaceCitationMarkersWithLinks(text, links)
+}
+
+func upstreamEmptyOutputDetail(contentFilter bool, text, thinking string) (int, string, string) {
+	return shared.UpstreamEmptyOutputDetail(contentFilter, text, thinking)
+}
+
+func writeUpstreamEmptyOutputError(w http.ResponseWriter, text, thinking string, contentFilter bool) bool {
+	return shared.WriteUpstreamEmptyOutputError(w, text, thinking, contentFilter)
+}
+
+func filterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta, seenNames map[int]string) []toolstream.ToolCallDelta {
+	return shared.FilterIncrementalToolCallDeltasByAllowed(deltas, seenNames)
+}
diff --git a/internal/adapter/openai/response_store.go b/internal/httpapi/openai/responses/response_store.go
similarity index 99%
rename from internal/adapter/openai/response_store.go
rename to internal/httpapi/openai/responses/response_store.go
index 63ebbaa..8d7ec75 100644
--- a/internal/adapter/openai/response_store.go
+++ b/internal/httpapi/openai/responses/response_store.go
@@ -1,4 +1,4 @@
-package openai
+package responses
 
 import (
 	"sync"
diff --git a/internal/adapter/openai/responses_embeddings_test.go b/internal/httpapi/openai/responses/responses_embeddings_test.go
similarity index 87%
rename from internal/adapter/openai/responses_embeddings_test.go
rename to internal/httpapi/openai/responses/responses_embeddings_test.go
index a75cc3f..cfff04b 100644
--- a/internal/adapter/openai/responses_embeddings_test.go
+++ b/internal/httpapi/openai/responses/responses_embeddings_test.go
@@ -1,13 +1,16 @@
-package openai
+package responses
 
 import (
 	"strings"
 	"testing"
 	"time"
+
+	"ds2api/internal/httpapi/openai/embeddings"
+	"ds2api/internal/promptcompat"
 )
 
 func TestNormalizeResponsesInputAsMessagesString(t *testing.T) {
-	msgs := normalizeResponsesInputAsMessages("hello")
+	msgs := promptcompat.NormalizeResponsesInputAsMessages("hello")
 	if len(msgs) != 1 {
 		t.Fatalf("expected one message, got %d", len(msgs))
 	}
@@ -23,7 +26,7 @@ func TestResponsesMessagesFromRequestWithInstructions(t *testing.T) {
 		"input":        "ping",
 		"instructions": "system text",
 	}
-	msgs := responsesMessagesFromRequest(req)
+	msgs := promptcompat.ResponsesMessagesFromRequest(req)
 	if len(msgs) != 2 {
 		t.Fatalf("expected two messages, got %d", len(msgs))
 	}
@@ -34,7 +37,7 @@ func TestResponsesMessagesFromRequestWithInstructions(t *testing.T) {
 }
 
 func TestNormalizeResponsesInputAsMessagesObjectRoleContentBlocks(t *testing.T) {
-	msgs := normalizeResponsesInputAsMessages(map[string]any{
+	msgs := promptcompat.NormalizeResponsesInputAsMessages(map[string]any{
 		"role": "user",
 		"content": []any{
 			map[string]any{"type": "input_text", "text": "line-1"},
@@ -48,13 +51,13 @@ func TestNormalizeResponsesInputAsMessagesObjectRoleContentBlocks(t *testing.T)
 	if m["role"] != "user" {
 		t.Fatalf("unexpected role: %#v", m)
 	}
-	if strings.TrimSpace(normalizeOpenAIContentForPrompt(m["content"])) != "line-1\nline-2" {
+	if strings.TrimSpace(promptcompat.NormalizeOpenAIContentForPrompt(m["content"])) != "line-1\nline-2" {
 		t.Fatalf("unexpected content: %#v", m["content"])
 	}
 }
 
 func TestNormalizeResponsesInputAsMessagesFunctionCallOutput(t *testing.T) {
-	msgs := normalizeResponsesInputAsMessages([]any{
+	msgs := promptcompat.NormalizeResponsesInputAsMessages([]any{
 		map[string]any{
 			"type":    "function_call_output",
 			"call_id": "call_123",
@@ -74,7 +77,7 @@ func TestNormalizeResponsesInputAsMessagesFunctionCallOutput(t *testing.T) {
 }
 
 func TestNormalizeResponsesInputAsMessagesBackfillsToolResultNameFromCallID(t *testing.T) {
-	msgs := normalizeResponsesInputAsMessages([]any{
+	msgs := promptcompat.NormalizeResponsesInputAsMessages([]any{
 		map[string]any{
 			"type":      "function_call",
 			"call_id":   "call_999",
@@ -100,7 +103,7 @@ func TestNormalizeResponsesInputAsMessagesBackfillsToolResultNameFromCallID(t *t
 }
 
 func TestNormalizeResponsesInputAsMessagesFunctionCallItem(t *testing.T) {
-	msgs := normalizeResponsesInputAsMessages([]any{
+	msgs := promptcompat.NormalizeResponsesInputAsMessages([]any{
 		map[string]any{
 			"type":      "function_call",
 			"call_id":   "call_456",
@@ -136,7 +139,7 @@ func TestNormalizeResponsesInputAsMessagesFunctionCallItem(t *testing.T) {
 }
 
 func TestNormalizeResponsesInputAsMessagesFunctionCallItemPreservesConcatenatedArguments(t *testing.T) {
-	msgs := normalizeResponsesInputAsMessages([]any{
+	msgs := promptcompat.NormalizeResponsesInputAsMessages([]any{
 		map[string]any{
 			"type":      "function_call",
 			"call_id":   "call_456",
@@ -157,7 +160,7 @@ func TestNormalizeResponsesInputAsMessagesFunctionCallItemPreservesConcatenatedA
 }
 
 func TestCollectOpenAIRefFileIDs(t *testing.T) {
-	got := collectOpenAIRefFileIDs(map[string]any{
+	got := promptcompat.CollectOpenAIRefFileIDs(map[string]any{
 		"ref_file_ids": []any{"file-top", "file-dup"},
 		"attachments": []any{
 			map[string]any{"file_id": "file-attachment"},
@@ -184,15 +187,15 @@ func TestCollectOpenAIRefFileIDs(t *testing.T) {
 }
 
 func TestExtractEmbeddingInputs(t *testing.T) {
-	got := extractEmbeddingInputs([]any{"a", "b"})
+	got := embeddings.ExtractEmbeddingInputs([]any{"a", "b"})
 	if len(got) != 2 || got[0] != "a" || got[1] != "b" {
 		t.Fatalf("unexpected inputs: %#v", got)
 	}
 }
 
 func TestDeterministicEmbeddingStable(t *testing.T) {
-	a := deterministicEmbedding("hello")
-	b := deterministicEmbedding("hello")
+	a := embeddings.DeterministicEmbedding("hello")
+	b := embeddings.DeterministicEmbedding("hello")
 	if len(a) != 64 || len(b) != 64 {
 		t.Fatalf("expected 64 dims, got %d and %d", len(a), len(b))
 	}
diff --git a/internal/adapter/openai/responses_handler.go b/internal/httpapi/openai/responses/responses_handler.go
similarity index 89%
rename from internal/adapter/openai/responses_handler.go
rename to internal/httpapi/openai/responses/responses_handler.go
index 2994088..8913322 100644
--- a/internal/adapter/openai/responses_handler.go
+++ b/internal/httpapi/openai/responses/responses_handler.go
@@ -1,4 +1,4 @@
-package openai
+package responses
 
 import (
 	"ds2api/internal/toolcall"
@@ -13,11 +13,11 @@ import (
 
 	"ds2api/internal/auth"
 	"ds2api/internal/config"
-	"ds2api/internal/deepseek"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 	openaifmt "ds2api/internal/format/openai"
+	"ds2api/internal/promptcompat"
 	"ds2api/internal/sse"
 	streamengine "ds2api/internal/stream"
-	"ds2api/internal/util"
 )
 
 func (h *Handler) GetResponseByID(w http.ResponseWriter, r *http.Request) {
@@ -80,14 +80,15 @@ func (h *Handler) Responses(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 	traceID := requestTraceID(r)
-	stdReq, err := normalizeOpenAIResponsesRequest(h.Store, req, traceID)
+	stdReq, err := promptcompat.NormalizeOpenAIResponsesRequest(h.Store, req, traceID)
 	if err != nil {
 		writeOpenAIError(w, http.StatusBadRequest, err.Error())
 		return
 	}
 	stdReq, err = h.applyHistorySplit(r.Context(), a, stdReq)
 	if err != nil {
-		writeOpenAIError(w, http.StatusInternalServerError, err.Error())
+		status, message := mapHistorySplitError(err)
+		writeOpenAIError(w, status, message)
 		return
 	}
 
@@ -120,7 +121,7 @@ func (h *Handler) Responses(w http.ResponseWriter, r *http.Request) {
 	h.handleResponsesNonStream(w, resp, owner, responseID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames, stdReq.ToolChoice, traceID)
 }
 
-func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Response, owner, responseID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolChoice util.ToolChoicePolicy, traceID string) {
+func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Response, owner, responseID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolChoice promptcompat.ToolChoicePolicy, traceID string) {
 	defer func() { _ = resp.Body.Close() }()
 	if resp.StatusCode != http.StatusOK {
 		body, _ := io.ReadAll(resp.Body)
@@ -134,7 +135,7 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
 	if searchEnabled {
 		sanitizedText = replaceCitationMarkersWithLinks(sanitizedText, result.CitationLinks)
 	}
-	if writeUpstreamEmptyOutputError(w, sanitizedText, result.ContentFilter) {
+	if writeUpstreamEmptyOutputError(w, sanitizedText, sanitizedThinking, result.ContentFilter) {
 		return
 	}
 	textParsed := toolcall.ParseStandaloneToolCallsDetailed(sanitizedText, toolNames)
@@ -151,7 +152,7 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
 	writeJSON(w, http.StatusOK, responseObj)
 }
 
-func (h *Handler) handleResponsesStream(w http.ResponseWriter, r *http.Request, resp *http.Response, owner, responseID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolChoice util.ToolChoicePolicy, traceID string) {
+func (h *Handler) handleResponsesStream(w http.ResponseWriter, r *http.Request, resp *http.Response, owner, responseID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolChoice promptcompat.ToolChoicePolicy, traceID string) {
 	defer func() { _ = resp.Body.Close() }()
 	if resp.StatusCode != http.StatusOK {
 		body, _ := io.ReadAll(resp.Body)
@@ -199,9 +200,9 @@ func (h *Handler) handleResponsesStream(w http.ResponseWriter, r *http.Request,
 		Body:                resp.Body,
 		ThinkingEnabled:     thinkingEnabled,
 		InitialType:         initialType,
-		KeepAliveInterval:   time.Duration(deepseek.KeepAliveTimeout) * time.Second,
-		IdleTimeout:         time.Duration(deepseek.StreamIdleTimeout) * time.Second,
-		MaxKeepAliveNoInput: deepseek.MaxKeepaliveCount,
+		KeepAliveInterval:   time.Duration(dsprotocol.KeepAliveTimeout) * time.Second,
+		IdleTimeout:         time.Duration(dsprotocol.StreamIdleTimeout) * time.Second,
+		MaxKeepAliveNoInput: dsprotocol.MaxKeepaliveCount,
 	}, streamengine.ConsumeHooks{
 		OnParsed: streamRuntime.onParsed,
 		OnFinalize: func(_ streamengine.StopReason, _ error) {
@@ -210,7 +211,7 @@ func (h *Handler) handleResponsesStream(w http.ResponseWriter, r *http.Request,
 	})
 }
 
-func logResponsesToolPolicyRejection(traceID string, policy util.ToolChoicePolicy, parsed toolcall.ToolCallParseResult, channel string) {
+func logResponsesToolPolicyRejection(traceID string, policy promptcompat.ToolChoicePolicy, parsed toolcall.ToolCallParseResult, channel string) {
 	rejected := filteredRejectedToolNamesForLog(parsed.RejectedToolNames)
 	if !parsed.RejectedByPolicy || len(rejected) == 0 {
 		return
diff --git a/internal/adapter/openai/responses_route_test.go b/internal/httpapi/openai/responses/responses_route_test.go
similarity index 99%
rename from internal/adapter/openai/responses_route_test.go
rename to internal/httpapi/openai/responses/responses_route_test.go
index 574c6fa..1d6a847 100644
--- a/internal/adapter/openai/responses_route_test.go
+++ b/internal/httpapi/openai/responses/responses_route_test.go
@@ -1,4 +1,4 @@
-package openai
+package responses
 
 import (
 	"bytes"
diff --git a/internal/adapter/openai/responses_stream_runtime_core.go b/internal/httpapi/openai/responses/responses_stream_runtime_core.go
similarity index 87%
rename from internal/adapter/openai/responses_stream_runtime_core.go
rename to internal/httpapi/openai/responses/responses_stream_runtime_core.go
index af7eb8e..1bd81e6 100644
--- a/internal/adapter/openai/responses_stream_runtime_core.go
+++ b/internal/httpapi/openai/responses/responses_stream_runtime_core.go
@@ -1,4 +1,4 @@
-package openai
+package responses
 
 import (
 	"ds2api/internal/toolcall"
@@ -7,9 +7,10 @@ import (
 
 	"ds2api/internal/config"
 	openaifmt "ds2api/internal/format/openai"
+	"ds2api/internal/promptcompat"
 	"ds2api/internal/sse"
 	streamengine "ds2api/internal/stream"
-	"ds2api/internal/util"
+	"ds2api/internal/toolstream"
 )
 
 type responsesStreamRuntime struct {
@@ -22,7 +23,7 @@ type responsesStreamRuntime struct {
 	finalPrompt string
 	toolNames   []string
 	traceID     string
-	toolChoice  util.ToolChoicePolicy
+	toolChoice  promptcompat.ToolChoicePolicy
 
 	thinkingEnabled       bool
 	searchEnabled         bool
@@ -33,7 +34,7 @@ type responsesStreamRuntime struct {
 	toolCallsEmitted     bool
 	toolCallsDoneEmitted bool
 
-	sieve             toolStreamSieveState
+	sieve             toolstream.State
 	thinking          strings.Builder
 	text              strings.Builder
 	visibleText       strings.Builder
@@ -68,7 +69,7 @@ func newResponsesStreamRuntime(
 	toolNames []string,
 	bufferToolContent bool,
 	emitEarlyToolDeltas bool,
-	toolChoice util.ToolChoicePolicy,
+	toolChoice promptcompat.ToolChoicePolicy,
 	traceID string,
 	persistResponse func(obj map[string]any),
 ) *responsesStreamRuntime {
@@ -99,7 +100,7 @@ func newResponsesStreamRuntime(
 	}
 }
 
-func (s *responsesStreamRuntime) failResponse(message, code string) {
+func (s *responsesStreamRuntime) failResponse(status int, message, code string) {
 	s.failed = true
 	failedResp := map[string]any{
 		"id":          s.responseID,
@@ -107,11 +108,12 @@ func (s *responsesStreamRuntime) failResponse(message, code string) {
 		"object":      "response",
 		"model":       s.model,
 		"status":      "failed",
+		"status_code": status,
 		"output":      []any{},
 		"output_text": "",
 		"error": map[string]any{
 			"message": message,
-			"type":    "invalid_request_error",
+			"type":    openAIErrorType(status),
 			"code":    code,
 			"param":   nil,
 		},
@@ -119,7 +121,7 @@ func (s *responsesStreamRuntime) failResponse(message, code string) {
 	if s.persistResponse != nil {
 		s.persistResponse(failedResp)
 	}
-	s.sendEvent("response.failed", openaifmt.BuildResponsesFailedPayload(s.responseID, s.model, message, code))
+	s.sendEvent("response.failed", openaifmt.BuildResponsesFailedPayload(s.responseID, s.model, status, message, code))
 	s.sendDone()
 }
 
@@ -128,7 +130,7 @@ func (s *responsesStreamRuntime) finalize() {
 	finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
 
 	if s.bufferToolContent {
-		s.processToolStreamEvents(flushToolSieve(&s.sieve, s.toolNames), true, true)
+		s.processToolStreamEvents(toolstream.Flush(&s.sieve, s.toolNames), true, true)
 	}
 
 	textParsed := toolcall.ParseStandaloneToolCallsDetailed(finalText, s.toolNames)
@@ -145,16 +147,12 @@ func (s *responsesStreamRuntime) finalize() {
 	s.closeMessageItem()
 
 	if s.toolChoice.IsRequired() && len(detected) == 0 {
-		s.failResponse("tool_choice requires at least one valid tool call.", "tool_choice_violation")
+		s.failResponse(http.StatusUnprocessableEntity, "tool_choice requires at least one valid tool call.", "tool_choice_violation")
 		return
 	}
 	if len(detected) == 0 && strings.TrimSpace(finalText) == "" {
-		code := "upstream_empty_output"
-		message := "Upstream model returned empty output."
-		if finalThinking != "" {
-			message = "Upstream model returned reasoning without visible output."
-		}
-		s.failResponse(message, code)
+		status, message, code := upstreamEmptyOutputDetail(false, finalText, finalThinking)
+		s.failResponse(status, message, code)
 		return
 	}
 	s.closeIncompleteFunctionItems()
@@ -224,7 +222,7 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
 			s.emitTextDelta(trimmed)
 			continue
 		}
-		s.processToolStreamEvents(processToolSieveChunk(&s.sieve, trimmed, s.toolNames), true, true)
+		s.processToolStreamEvents(toolstream.ProcessChunk(&s.sieve, trimmed, s.toolNames), true, true)
 	}
 
 	return streamengine.ParsedDecision{ContentSeen: contentSeen}
diff --git a/internal/adapter/openai/responses_stream_runtime_events.go b/internal/httpapi/openai/responses/responses_stream_runtime_events.go
similarity index 92%
rename from internal/adapter/openai/responses_stream_runtime_events.go
rename to internal/httpapi/openai/responses/responses_stream_runtime_events.go
index a010236..d497f04 100644
--- a/internal/adapter/openai/responses_stream_runtime_events.go
+++ b/internal/httpapi/openai/responses/responses_stream_runtime_events.go
@@ -1,9 +1,10 @@
-package openai
+package responses
 
 import (
 	"encoding/json"
 
 	openaifmt "ds2api/internal/format/openai"
+	"ds2api/internal/toolstream"
 )
 
 func (s *responsesStreamRuntime) nextSequence() int {
@@ -39,7 +40,7 @@ func (s *responsesStreamRuntime) sendDone() {
 	}
 }
 
-func (s *responsesStreamRuntime) processToolStreamEvents(events []toolStreamEvent, emitContent bool, resetAfterToolCalls bool) {
+func (s *responsesStreamRuntime) processToolStreamEvents(events []toolstream.Event, emitContent bool, resetAfterToolCalls bool) {
 	for _, evt := range events {
 		if emitContent && evt.Content != "" {
 			s.emitTextDelta(evt.Content)
diff --git a/internal/adapter/openai/responses_stream_runtime_toolcalls.go b/internal/httpapi/openai/responses/responses_stream_runtime_toolcalls.go
similarity index 98%
rename from internal/adapter/openai/responses_stream_runtime_toolcalls.go
rename to internal/httpapi/openai/responses/responses_stream_runtime_toolcalls.go
index 639a6d0..d3023c9 100644
--- a/internal/adapter/openai/responses_stream_runtime_toolcalls.go
+++ b/internal/httpapi/openai/responses/responses_stream_runtime_toolcalls.go
@@ -1,7 +1,8 @@
-package openai
+package responses
 
 import (
 	"ds2api/internal/toolcall"
+	"ds2api/internal/toolstream"
 	"encoding/json"
 	"strings"
 
@@ -201,7 +202,7 @@ func (s *responsesStreamRuntime) ensureFunctionItemAdded(callIndex int, name str
 	s.toolCallsEmitted = true
 }
 
-func (s *responsesStreamRuntime) emitFunctionCallDeltaEvents(deltas []toolCallDelta) {
+func (s *responsesStreamRuntime) emitFunctionCallDeltaEvents(deltas []toolstream.ToolCallDelta) {
 	for _, d := range deltas {
 		s.ensureFunctionItemAdded(d.Index, d.Name)
 		if strings.TrimSpace(d.Arguments) == "" {
diff --git a/internal/adapter/openai/responses_stream_runtime_toolcalls_finalize.go b/internal/httpapi/openai/responses/responses_stream_runtime_toolcalls_finalize.go
similarity index 99%
rename from internal/adapter/openai/responses_stream_runtime_toolcalls_finalize.go
rename to internal/httpapi/openai/responses/responses_stream_runtime_toolcalls_finalize.go
index 249ad22..4195c80 100644
--- a/internal/adapter/openai/responses_stream_runtime_toolcalls_finalize.go
+++ b/internal/httpapi/openai/responses/responses_stream_runtime_toolcalls_finalize.go
@@ -1,4 +1,4 @@
-package openai
+package responses
 
 import (
 	"ds2api/internal/toolcall"
diff --git a/internal/adapter/openai/responses_stream_test.go b/internal/httpapi/openai/responses/responses_stream_test.go
similarity index 88%
rename from internal/adapter/openai/responses_stream_test.go
rename to internal/httpapi/openai/responses/responses_stream_test.go
index 078b03d..c19f311 100644
--- a/internal/adapter/openai/responses_stream_test.go
+++ b/internal/httpapi/openai/responses/responses_stream_test.go
@@ -1,4 +1,4 @@
-package openai
+package responses
 
 import (
 	"bufio"
@@ -9,7 +9,7 @@ import (
 	"strings"
 	"testing"
 
-	"ds2api/internal/util"
+	"ds2api/internal/promptcompat"
 )
 
 func TestHandleResponsesStreamDoesNotEmitReasoningTextCompatEvents(t *testing.T) {
@@ -27,7 +27,7 @@ func TestHandleResponsesStreamDoesNotEmitReasoningTextCompatEvents(t *testing.T)
 		Body:       io.NopCloser(strings.NewReader(streamBody)),
 	}
 
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-reasoner", "prompt", true, false, nil, util.DefaultToolChoicePolicy(), "")
+	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-v4-pro", "prompt", true, false, nil, promptcompat.DefaultToolChoicePolicy(), "")
 
 	body := rec.Body.String()
 	if !strings.Contains(body, "event: response.reasoning.delta") {
@@ -57,7 +57,7 @@ func TestHandleResponsesStreamEmitsOutputTextDoneBeforeContentPartDone(t *testin
 		Body:       io.NopCloser(strings.NewReader(streamBody)),
 	}
 
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, nil, util.DefaultToolChoicePolicy(), "")
+	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-v4-flash", "prompt", false, false, nil, promptcompat.DefaultToolChoicePolicy(), "")
 	body := rec.Body.String()
 	if !strings.Contains(body, "event: response.output_text.done") {
 		t.Fatalf("expected response.output_text.done payload, body=%s", body)
@@ -91,7 +91,7 @@ func TestHandleResponsesStreamOutputTextDeltaCarriesItemIndexes(t *testing.T) {
 		Body:       io.NopCloser(strings.NewReader(streamBody)),
 	}
 
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, nil, util.DefaultToolChoicePolicy(), "")
+	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-v4-flash", "prompt", false, false, nil, promptcompat.DefaultToolChoicePolicy(), "")
 	body := rec.Body.String()
 
 	deltaPayload, ok := extractSSEEventPayload(body, "response.output_text.delta")
@@ -122,15 +122,15 @@ func TestHandleResponsesStreamEmitsDistinctToolCallIDsAcrossSeparateToolBlocks(t
 		return "data: " + string(b) + "\n"
 	}
 
-	streamBody := sseLine("前置文本\n<tool_calls>\n  <tool_call>\n    <tool_name>read_file</tool_name>\n    <parameters>{\"path\":\"README.MD\"}</parameters>\n  </tool_call>\n</tool_calls>") +
-		sseLine("中间文本\n<tool_calls>\n  <tool_call>\n    <tool_name>search</tool_name>\n    <parameters>{\"q\":\"golang\"}</parameters>\n  </tool_call>\n</tool_calls>") +
+	streamBody := sseLine("前置文本\n<tool_calls>\n  <invoke name=\"read_file\">\n    <parameter name=\"path\">README.MD</parameter>\n  </invoke>\n</tool_calls>") +
+		sseLine("中间文本\n<tool_calls>\n  <invoke name=\"search\">\n    <parameter name=\"q\">golang</parameter>\n  </invoke>\n</tool_calls>") +
 		"data: [DONE]\n"
 	resp := &http.Response{
 		StatusCode: http.StatusOK,
 		Body:       io.NopCloser(strings.NewReader(streamBody)),
 	}
 
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, []string{"read_file", "search"}, util.DefaultToolChoicePolicy(), "")
+	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-v4-flash", "prompt", false, false, []string{"read_file", "search"}, promptcompat.DefaultToolChoicePolicy(), "")
 
 	body := rec.Body.String()
 	doneEvents := extractSSEEventPayloads(body, "response.function_call_arguments.done")
@@ -179,11 +179,11 @@ func TestHandleResponsesStreamRequiredToolChoiceFailure(t *testing.T) {
 		Body:       io.NopCloser(strings.NewReader(streamBody)),
 	}
 
-	policy := util.ToolChoicePolicy{
-		Mode:    util.ToolChoiceRequired,
+	policy := promptcompat.ToolChoicePolicy{
+		Mode:    promptcompat.ToolChoiceRequired,
 		Allowed: map[string]struct{}{"read_file": {}},
 	}
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, []string{"read_file"}, policy, "")
+	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-v4-flash", "prompt", false, false, []string{"read_file"}, policy, "")
 
 	body := rec.Body.String()
 	if !strings.Contains(body, "event: response.failed") {
@@ -213,7 +213,7 @@ func TestHandleResponsesStreamFailsWhenUpstreamHasOnlyThinking(t *testing.T) {
 		Body:       io.NopCloser(strings.NewReader(streamBody)),
 	}
 
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-reasoner", "prompt", true, false, nil, util.DefaultToolChoicePolicy(), "")
+	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-v4-pro", "prompt", true, false, nil, promptcompat.DefaultToolChoicePolicy(), "")
 
 	body := rec.Body.String()
 	if !strings.Contains(body, "event: response.failed") {
@@ -242,12 +242,12 @@ func TestHandleResponsesNonStreamRequiredToolChoiceViolation(t *testing.T) {
 				`data: [DONE]` + "\n",
 		)),
 	}
-	policy := util.ToolChoicePolicy{
-		Mode:    util.ToolChoiceRequired,
+	policy := promptcompat.ToolChoicePolicy{
+		Mode:    promptcompat.ToolChoiceRequired,
 		Allowed: map[string]struct{}{"read_file": {}},
 	}
 
-	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, []string{"read_file"}, policy, "")
+	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-v4-flash", "prompt", false, false, []string{"read_file"}, policy, "")
 	if rec.Code != http.StatusUnprocessableEntity {
 		t.Fatalf("expected 422 for required tool_choice violation, got %d body=%s", rec.Code, rec.Body.String())
 	}
@@ -269,12 +269,12 @@ func TestHandleResponsesNonStreamRequiredToolChoiceIgnoresThinkingToolPayload(t
 				`data: [DONE]` + "\n",
 		)),
 	}
-	policy := util.ToolChoicePolicy{
-		Mode:    util.ToolChoiceRequired,
+	policy := promptcompat.ToolChoicePolicy{
+		Mode:    promptcompat.ToolChoiceRequired,
 		Allowed: map[string]struct{}{"read_file": {}},
 	}
 
-	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", true, false, []string{"read_file"}, policy, "")
+	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-v4-flash", "prompt", true, false, []string{"read_file"}, policy, "")
 	if rec.Code != http.StatusUnprocessableEntity {
 		t.Fatalf("expected 422 for required tool_choice violation, got %d body=%s", rec.Code, rec.Body.String())
 	}
@@ -296,7 +296,7 @@ func TestHandleResponsesNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T)
 		)),
 	}
 
-	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, nil, util.DefaultToolChoicePolicy(), "")
+	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-v4-flash", "prompt", false, false, nil, promptcompat.DefaultToolChoicePolicy(), "")
 	if rec.Code != http.StatusTooManyRequests {
 		t.Fatalf("expected 429 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
 	}
@@ -318,7 +318,7 @@ func TestHandleResponsesNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWi
 		)),
 	}
 
-	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, nil, util.DefaultToolChoicePolicy(), "")
+	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-v4-flash", "prompt", false, false, nil, promptcompat.DefaultToolChoicePolicy(), "")
 	if rec.Code != http.StatusBadRequest {
 		t.Fatalf("expected 400 for filtered empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
 	}
@@ -340,7 +340,7 @@ func TestHandleResponsesNonStreamReturns429WhenUpstreamHasOnlyThinking(t *testin
 		)),
 	}
 
-	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-reasoner", "prompt", true, false, nil, util.DefaultToolChoicePolicy(), "")
+	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-v4-pro", "prompt", true, false, nil, promptcompat.DefaultToolChoicePolicy(), "")
 	if rec.Code != http.StatusTooManyRequests {
 		t.Fatalf("expected 429 for thinking-only upstream output, got %d body=%s", rec.Code, rec.Body.String())
 	}
diff --git a/internal/httpapi/openai/responses/test_helpers_test.go b/internal/httpapi/openai/responses/test_helpers_test.go
new file mode 100644
index 0000000..f239aa5
--- /dev/null
+++ b/internal/httpapi/openai/responses/test_helpers_test.go
@@ -0,0 +1,28 @@
+package responses
+
+import (
+	"encoding/json"
+	"testing"
+
+	"github.com/go-chi/chi/v5"
+
+	"ds2api/internal/httpapi/openai/shared"
+)
+
+func asString(v any) string {
+	return shared.AsString(v)
+}
+
+func decodeJSONBody(t *testing.T, body string) map[string]any {
+	t.Helper()
+	var out map[string]any
+	if err := json.Unmarshal([]byte(body), &out); err != nil {
+		t.Fatalf("decode json failed: %v, body=%s", err, body)
+	}
+	return out
+}
+
+func RegisterRoutes(r chi.Router, h *Handler) {
+	r.Post("/v1/responses", h.Responses)
+	r.Get("/v1/responses/{response_id}", h.GetResponseByID)
+}
diff --git a/internal/adapter/openai/citation_links.go b/internal/httpapi/openai/shared/citation_links.go
similarity index 88%
rename from internal/adapter/openai/citation_links.go
rename to internal/httpapi/openai/shared/citation_links.go
index 009d728..60d7408 100644
--- a/internal/adapter/openai/citation_links.go
+++ b/internal/httpapi/openai/shared/citation_links.go
@@ -1,4 +1,4 @@
-package openai
+package shared
 
 import (
 	"fmt"
@@ -9,7 +9,7 @@ import (
 
 var citationMarkerPattern = regexp.MustCompile(`(?i)\[citation:\s*(\d+)\]`)
 
-func replaceCitationMarkersWithLinks(text string, links map[int]string) string {
+func ReplaceCitationMarkersWithLinks(text string, links map[int]string) string {
 	if strings.TrimSpace(text) == "" || len(links) == 0 {
 		return text
 	}
diff --git a/internal/adapter/openai/deps.go b/internal/httpapi/openai/shared/deps.go
similarity index 58%
rename from internal/adapter/openai/deps.go
rename to internal/httpapi/openai/shared/deps.go
index 50118ff..3db5b37 100644
--- a/internal/adapter/openai/deps.go
+++ b/internal/httpapi/openai/shared/deps.go
@@ -1,12 +1,21 @@
-package openai
+package shared
 
 import (
 	"context"
 	"net/http"
 
 	"ds2api/internal/auth"
+	"ds2api/internal/chathistory"
 	"ds2api/internal/config"
-	"ds2api/internal/deepseek"
+	dsclient "ds2api/internal/deepseek/client"
+	"ds2api/internal/util"
+)
+
+const (
+	// UploadMaxSize limits total multipart request body size (100 MiB).
+	UploadMaxSize = 100 << 20
+	// GeneralMaxSize limits total JSON request body size (100 MiB).
+	GeneralMaxSize = 100 << 20
 )
 
 type AuthResolver interface {
@@ -18,9 +27,9 @@ type AuthResolver interface {
 type DeepSeekCaller interface {
 	CreateSession(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
 	GetPow(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
-	UploadFile(ctx context.Context, a *auth.RequestAuth, req deepseek.UploadFileRequest, maxAttempts int) (*deepseek.UploadFileResult, error)
+	UploadFile(ctx context.Context, a *auth.RequestAuth, req dsclient.UploadFileRequest, maxAttempts int) (*dsclient.UploadFileResult, error)
 	CallCompletion(ctx context.Context, a *auth.RequestAuth, payload map[string]any, powResp string, maxAttempts int) (*http.Response, error)
-	DeleteSessionForToken(ctx context.Context, token string, sessionID string) (*deepseek.DeleteSessionResult, error)
+	DeleteSessionForToken(ctx context.Context, token string, sessionID string) (*dsclient.DeleteSessionResult, error)
 	DeleteAllSessionsForToken(ctx context.Context, token string) error
 }
 
@@ -38,6 +47,22 @@ type ConfigReader interface {
 	HistorySplitTriggerAfterTurns() int
 }
 
+type Deps struct {
+	Store       ConfigReader
+	Auth        AuthResolver
+	DS          DeepSeekCaller
+	ChatHistory *chathistory.Store
+}
+
+func CompatStripReferenceMarkers(store ConfigReader) bool {
+	if store == nil {
+		return true
+	}
+	return store.CompatStripReferenceMarkers()
+}
+
+var WriteJSON = util.WriteJSON
+
 var _ AuthResolver = (*auth.Resolver)(nil)
-var _ DeepSeekCaller = (*deepseek.Client)(nil)
+var _ DeepSeekCaller = (*dsclient.Client)(nil)
 var _ ConfigReader = (*config.Store)(nil)
diff --git a/internal/adapter/openai/handler_errors.go b/internal/httpapi/openai/shared/handler_errors.go
similarity index 73%
rename from internal/adapter/openai/handler_errors.go
rename to internal/httpapi/openai/shared/handler_errors.go
index 2e60d73..52f399e 100644
--- a/internal/adapter/openai/handler_errors.go
+++ b/internal/httpapi/openai/shared/handler_errors.go
@@ -1,26 +1,26 @@
-package openai
+package shared
 
 import "net/http"
 
-func writeOpenAIError(w http.ResponseWriter, status int, message string) {
-	writeOpenAIErrorWithCode(w, status, message, "")
+func WriteOpenAIError(w http.ResponseWriter, status int, message string) {
+	WriteOpenAIErrorWithCode(w, status, message, "")
 }
 
-func writeOpenAIErrorWithCode(w http.ResponseWriter, status int, message, code string) {
+func WriteOpenAIErrorWithCode(w http.ResponseWriter, status int, message, code string) {
 	if code == "" {
-		code = openAIErrorCode(status)
+		code = OpenAIErrorCode(status)
 	}
-	writeJSON(w, status, map[string]any{
+	WriteJSON(w, status, map[string]any{
 		"error": map[string]any{
 			"message": message,
-			"type":    openAIErrorType(status),
+			"type":    OpenAIErrorType(status),
 			"code":    code,
 			"param":   nil,
 		},
 	})
 }
 
-func openAIErrorType(status int) string {
+func OpenAIErrorType(status int) string {
 	switch status {
 	case http.StatusBadRequest:
 		return "invalid_request_error"
@@ -40,7 +40,7 @@ func openAIErrorType(status int) string {
 	}
 }
 
-func openAIErrorCode(status int) string {
+func OpenAIErrorCode(status int) string {
 	switch status {
 	case http.StatusBadRequest:
 		return "invalid_request"
diff --git a/internal/httpapi/openai/shared/handler_toolcall_format.go b/internal/httpapi/openai/shared/handler_toolcall_format.go
new file mode 100644
index 0000000..15cd7ea
--- /dev/null
+++ b/internal/httpapi/openai/shared/handler_toolcall_format.go
@@ -0,0 +1,101 @@
+package shared
+
+import (
+	"ds2api/internal/toolcall"
+	"encoding/json"
+	"strings"
+
+	"github.com/google/uuid"
+
+	"ds2api/internal/toolstream"
+)
+
+func FormatIncrementalStreamToolCallDeltas(deltas []toolstream.ToolCallDelta, ids map[int]string) []map[string]any {
+	if len(deltas) == 0 {
+		return nil
+	}
+	out := make([]map[string]any, 0, len(deltas))
+	for _, d := range deltas {
+		if d.Name == "" && d.Arguments == "" {
+			continue
+		}
+		callID, ok := ids[d.Index]
+		if !ok || callID == "" {
+			callID = "call_" + strings.ReplaceAll(uuid.NewString(), "-", "")
+			ids[d.Index] = callID
+		}
+		item := map[string]any{
+			"index": d.Index,
+			"id":    callID,
+			"type":  "function",
+		}
+		fn := map[string]any{}
+		if d.Name != "" {
+			fn["name"] = d.Name
+		}
+		if d.Arguments != "" {
+			fn["arguments"] = d.Arguments
+		}
+		if len(fn) > 0 {
+			item["function"] = fn
+		}
+		out = append(out, item)
+	}
+	return out
+}
+
+func FilterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta, seenNames map[int]string) []toolstream.ToolCallDelta {
+	if len(deltas) == 0 {
+		return nil
+	}
+	out := make([]toolstream.ToolCallDelta, 0, len(deltas))
+	for _, d := range deltas {
+		if d.Name != "" {
+			if seenNames != nil {
+				seenNames[d.Index] = d.Name
+			}
+			out = append(out, d)
+			continue
+		}
+		if seenNames == nil {
+			out = append(out, d)
+			continue
+		}
+		name := strings.TrimSpace(seenNames[d.Index])
+		if name == "" {
+			continue
+		}
+		out = append(out, d)
+	}
+	return out
+}
+
+func FormatFinalStreamToolCallsWithStableIDs(calls []toolcall.ParsedToolCall, ids map[int]string) []map[string]any {
+	if len(calls) == 0 {
+		return nil
+	}
+	out := make([]map[string]any, 0, len(calls))
+	for i, c := range calls {
+		callID := ""
+		if ids != nil {
+			callID = strings.TrimSpace(ids[i])
+		}
+		if callID == "" {
+			callID = "call_" + strings.ReplaceAll(uuid.NewString(), "-", "")
+			if ids != nil {
+				ids[i] = callID
+			}
+		}
+		args, _ := json.Marshal(c.Input)
+		out = append(out, map[string]any{
+			"index": i,
+			"id":    callID,
+			"type":  "function",
+			"function": map[string]any{
+				"name":      c.Name,
+				"arguments": string(args),
+			},
+		})
+	}
+	return out
+}
diff --git a/internal/httpapi/openai/shared/handler_toolcall_policy.go b/internal/httpapi/openai/shared/handler_toolcall_policy.go
new file mode 100644
index 0000000..181a627
--- /dev/null
+++ b/internal/httpapi/openai/shared/handler_toolcall_policy.go
@@ -0,0 +1,9 @@
+package shared
+
+func ToolcallFeatureMatchEnabled(_ ConfigReader) bool {
+	return true
+}
+
+func ToolcallEarlyEmitHighConfidence(_ ConfigReader) bool {
+	return true
+}
diff --git a/internal/adapter/openai/leaked_output_sanitize.go b/internal/httpapi/openai/shared/leaked_output_sanitize.go
similarity index 99%
rename from internal/adapter/openai/leaked_output_sanitize.go
rename to internal/httpapi/openai/shared/leaked_output_sanitize.go
index 70f6eeb..0b0b897 100644
--- a/internal/adapter/openai/leaked_output_sanitize.go
+++ b/internal/httpapi/openai/shared/leaked_output_sanitize.go
@@ -1,4 +1,4 @@
-package openai
+package shared
 
 import (
 	"regexp"
diff --git a/internal/httpapi/openai/shared/models.go b/internal/httpapi/openai/shared/models.go
new file mode 100644
index 0000000..81ba607
--- /dev/null
+++ b/internal/httpapi/openai/shared/models.go
@@ -0,0 +1,28 @@
+package shared
+
+import (
+	"net/http"
+	"strings"
+
+	"github.com/go-chi/chi/v5"
+
+	"ds2api/internal/config"
+)
+
+type ModelsHandler struct {
+	Store ConfigReader
+}
+
+func (h *ModelsHandler) ListModels(w http.ResponseWriter, _ *http.Request) {
+	WriteJSON(w, http.StatusOK, config.OpenAIModelsResponse())
+}
+
+func (h *ModelsHandler) GetModel(w http.ResponseWriter, r *http.Request) {
+	modelID := strings.TrimSpace(chi.URLParam(r, "model_id"))
+	model, ok := config.OpenAIModelByID(h.Store, modelID)
+	if !ok {
+		WriteOpenAIError(w, http.StatusNotFound, "Model not found.")
+		return
+	}
+	WriteJSON(w, http.StatusOK, model)
+}
diff --git a/internal/adapter/openai/output_clean.go b/internal/httpapi/openai/shared/output_clean.go
similarity index 72%
rename from internal/adapter/openai/output_clean.go
rename to internal/httpapi/openai/shared/output_clean.go
index b749876..a890565 100644
--- a/internal/adapter/openai/output_clean.go
+++ b/internal/httpapi/openai/shared/output_clean.go
@@ -1,8 +1,8 @@
-package openai
+package shared
 
 import textclean "ds2api/internal/textclean"
 
-func cleanVisibleOutput(text string, stripReferenceMarkers bool) string {
+func CleanVisibleOutput(text string, stripReferenceMarkers bool) string {
 	if text == "" {
 		return text
 	}
diff --git a/internal/httpapi/openai/shared/string_helpers.go b/internal/httpapi/openai/shared/string_helpers.go
new file mode 100644
index 0000000..2c334a9
--- /dev/null
+++ b/internal/httpapi/openai/shared/string_helpers.go
@@ -0,0 +1,8 @@
+package shared
+
+func AsString(v any) string {
+	if s, ok := v.(string); ok {
+		return s
+	}
+	return ""
+}
diff --git a/internal/adapter/openai/trace.go b/internal/httpapi/openai/shared/trace.go
similarity index 84%
rename from internal/adapter/openai/trace.go
rename to internal/httpapi/openai/shared/trace.go
index 8ea58f0..06dd9f9 100644
--- a/internal/adapter/openai/trace.go
+++ b/internal/httpapi/openai/shared/trace.go
@@ -1,4 +1,4 @@
-package openai
+package shared
 
 import (
 	"net/http"
@@ -7,7 +7,7 @@ import (
 	"github.com/go-chi/chi/v5/middleware"
 )
 
-func requestTraceID(r *http.Request) string {
+func RequestTraceID(r *http.Request) string {
 	if r == nil {
 		return ""
 	}
diff --git a/internal/httpapi/openai/shared/upstream_empty.go b/internal/httpapi/openai/shared/upstream_empty.go
new file mode 100644
index 0000000..a52c4b3
--- /dev/null
+++ b/internal/httpapi/openai/shared/upstream_empty.go
@@ -0,0 +1,27 @@
+package shared
+
+import "net/http"
+
+func ShouldWriteUpstreamEmptyOutputError(text string) bool {
+	return text == ""
+}
+
+func UpstreamEmptyOutputDetail(contentFilter bool, text, thinking string) (int, string, string) {
+	_ = text
+	if contentFilter {
+		return http.StatusBadRequest, "Upstream content filtered the response and returned no output.", "content_filter"
+	}
+	if thinking != "" {
+		return http.StatusTooManyRequests, "Upstream account hit a rate limit and returned reasoning without visible output.", "upstream_empty_output"
+	}
+	return http.StatusTooManyRequests, "Upstream account hit a rate limit and returned empty output.", "upstream_empty_output"
+}
+
+func WriteUpstreamEmptyOutputError(w http.ResponseWriter, text, thinking string, contentFilter bool) bool {
+	if !ShouldWriteUpstreamEmptyOutputError(text) {
+		return false
+	}
+	status, message, code := UpstreamEmptyOutputDetail(contentFilter, text, thinking)
+	WriteOpenAIErrorWithCode(w, status, message, code)
+	return true
+}
diff --git a/internal/adapter/openai/stream_status_test.go b/internal/httpapi/openai/stream_status_test.go
similarity index 90%
rename from internal/adapter/openai/stream_status_test.go
rename to internal/httpapi/openai/stream_status_test.go
index 6562ab0..3c2827f 100644
--- a/internal/adapter/openai/stream_status_test.go
+++ b/internal/httpapi/openai/stream_status_test.go
@@ -13,7 +13,7 @@ import (
 	chimw "github.com/go-chi/chi/v5/middleware"
 
 	"ds2api/internal/auth"
-	"ds2api/internal/deepseek"
+	dsclient "ds2api/internal/deepseek/client"
 )
 
 type streamStatusAuthStub struct{}
@@ -50,16 +50,16 @@ func (m streamStatusDSStub) GetPow(_ context.Context, _ *auth.RequestAuth, _ int
 	return "pow", nil
 }
 
-func (m streamStatusDSStub) UploadFile(_ context.Context, _ *auth.RequestAuth, _ deepseek.UploadFileRequest, _ int) (*deepseek.UploadFileResult, error) {
-	return &deepseek.UploadFileResult{ID: "file-id", Filename: "file.txt", Bytes: 1, Status: "uploaded"}, nil
+func (m streamStatusDSStub) UploadFile(_ context.Context, _ *auth.RequestAuth, _ dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) {
+	return &dsclient.UploadFileResult{ID: "file-id", Filename: "file.txt", Bytes: 1, Status: "uploaded"}, nil
 }
 
 func (m streamStatusDSStub) CallCompletion(_ context.Context, _ *auth.RequestAuth, _ map[string]any, _ string, _ int) (*http.Response, error) {
 	return m.resp, nil
 }
 
-func (m streamStatusDSStub) DeleteSessionForToken(_ context.Context, _ string, _ string) (*deepseek.DeleteSessionResult, error) {
-	return &deepseek.DeleteSessionResult{Success: true}, nil
+func (m streamStatusDSStub) DeleteSessionForToken(_ context.Context, _ string, _ string) (*dsclient.DeleteSessionResult, error) {
+	return &dsclient.DeleteSessionResult{Success: true}, nil
 }
 
 func (m streamStatusDSStub) DeleteAllSessionsForToken(_ context.Context, _ string) error {
@@ -90,16 +90,16 @@ func captureStatusMiddleware(statuses *[]int) func(http.Handler) http.Handler {
 
 func TestChatCompletionsStreamStatusCapturedAs200(t *testing.T) {
 	statuses := make([]int, 0, 1)
-	h := &Handler{
+	h := &openAITestSurface{
 		Store: mockOpenAIConfig{wideInput: true},
 		Auth:  streamStatusAuthStub{},
 		DS:    streamStatusDSStub{resp: makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"hello"}`, "data: [DONE]")},
 	}
 	r := chi.NewRouter()
 	r.Use(captureStatusMiddleware(&statuses))
-	RegisterRoutes(r, h)
+	registerOpenAITestRoutes(r, h)
 
-	reqBody := `{"model":"deepseek-chat","messages":[{"role":"user","content":"hi"}],"stream":true}`
+	reqBody := `{"model":"deepseek-v4-flash","messages":[{"role":"user","content":"hi"}],"stream":true}`
 	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
 	req.Header.Set("Authorization", "Bearer direct-token")
 	req.Header.Set("Content-Type", "application/json")
@@ -119,16 +119,16 @@ func TestChatCompletionsStreamStatusCapturedAs200(t *testing.T) {
 
 func TestResponsesStreamStatusCapturedAs200(t *testing.T) {
 	statuses := make([]int, 0, 1)
-	h := &Handler{
+	h := &openAITestSurface{
 		Store: mockOpenAIConfig{wideInput: true},
 		Auth:  streamStatusAuthStub{},
 		DS:    streamStatusDSStub{resp: makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"hello"}`, "data: [DONE]")},
 	}
 	r := chi.NewRouter()
 	r.Use(captureStatusMiddleware(&statuses))
-	RegisterRoutes(r, h)
+	registerOpenAITestRoutes(r, h)
 
-	reqBody := `{"model":"deepseek-chat","input":"hi","stream":true}`
+	reqBody := `{"model":"deepseek-v4-flash","input":"hi","stream":true}`
 	req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(reqBody))
 	req.Header.Set("Authorization", "Bearer direct-token")
 	req.Header.Set("Content-Type", "application/json")
@@ -148,7 +148,7 @@ func TestResponsesStreamStatusCapturedAs200(t *testing.T) {
 
 func TestChatCompletionsStreamContentFilterStopsNormallyWithoutLeak(t *testing.T) {
 	statuses := make([]int, 0, 1)
-	h := &Handler{
+	h := &openAITestSurface{
 		Store: mockOpenAIConfig{wideInput: true},
 		Auth:  streamStatusAuthStub{},
 		DS: streamStatusDSStub{resp: makeOpenAISSEHTTPResponse(
@@ -159,9 +159,9 @@ func TestChatCompletionsStreamContentFilterStopsNormallyWithoutLeak(t *testing.T
 	}
 	r := chi.NewRouter()
 	r.Use(captureStatusMiddleware(&statuses))
-	RegisterRoutes(r, h)
+	registerOpenAITestRoutes(r, h)
 
-	reqBody := `{"model":"deepseek-chat","messages":[{"role":"user","content":"hi"}],"stream":true}`
+	reqBody := `{"model":"deepseek-v4-flash","messages":[{"role":"user","content":"hi"}],"stream":true}`
 	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
 	req.Header.Set("Authorization", "Bearer direct-token")
 	req.Header.Set("Content-Type", "application/json")
@@ -198,16 +198,16 @@ func TestChatCompletionsStreamContentFilterStopsNormallyWithoutLeak(t *testing.T
 
 func TestChatCompletionsStreamEmitsFailureFrameWhenUpstreamOutputEmpty(t *testing.T) {
 	statuses := make([]int, 0, 1)
-	h := &Handler{
+	h := &openAITestSurface{
 		Store: mockOpenAIConfig{wideInput: true},
 		Auth:  streamStatusAuthStub{},
 		DS:    streamStatusDSStub{resp: makeOpenAISSEHTTPResponse("data: [DONE]")},
 	}
 	r := chi.NewRouter()
 	r.Use(captureStatusMiddleware(&statuses))
-	RegisterRoutes(r, h)
+	registerOpenAITestRoutes(r, h)
 
-	reqBody := `{"model":"deepseek-chat","messages":[{"role":"user","content":"hi"}],"stream":true}`
+	reqBody := `{"model":"deepseek-v4-flash","messages":[{"role":"user","content":"hi"}],"stream":true}`
 	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
 	req.Header.Set("Authorization", "Bearer direct-token")
 	req.Header.Set("Content-Type", "application/json")
@@ -241,7 +241,7 @@ func TestChatCompletionsStreamEmitsFailureFrameWhenUpstreamOutputEmpty(t *testin
 
 func TestResponsesStreamUsageIgnoresBatchAccumulatedTokenUsage(t *testing.T) {
 	statuses := make([]int, 0, 1)
-	h := &Handler{
+	h := &openAITestSurface{
 		Store: mockOpenAIConfig{wideInput: true},
 		Auth:  streamStatusAuthStub{},
 		DS: streamStatusDSStub{resp: makeOpenAISSEHTTPResponse(
@@ -251,9 +251,9 @@ func TestResponsesStreamUsageIgnoresBatchAccumulatedTokenUsage(t *testing.T) {
 	}
 	r := chi.NewRouter()
 	r.Use(captureStatusMiddleware(&statuses))
-	RegisterRoutes(r, h)
+	registerOpenAITestRoutes(r, h)
 
-	reqBody := `{"model":"deepseek-chat","input":"hi","stream":true}`
+	reqBody := `{"model":"deepseek-v4-flash","input":"hi","stream":true}`
 	req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(reqBody))
 	req.Header.Set("Authorization", "Bearer direct-token")
 	req.Header.Set("Content-Type", "application/json")
@@ -289,7 +289,7 @@ func TestResponsesStreamUsageIgnoresBatchAccumulatedTokenUsage(t *testing.T) {
 
 func TestResponsesNonStreamUsageIgnoresPromptAndOutputTokenUsage(t *testing.T) {
 	statuses := make([]int, 0, 1)
-	h := &Handler{
+	h := &openAITestSurface{
 		Store: mockOpenAIConfig{wideInput: true},
 		Auth:  streamStatusAuthStub{},
 		DS: streamStatusDSStub{resp: makeOpenAISSEHTTPResponse(
@@ -299,9 +299,9 @@ func TestResponsesNonStreamUsageIgnoresPromptAndOutputTokenUsage(t *testing.T) {
 	}
 	r := chi.NewRouter()
 	r.Use(captureStatusMiddleware(&statuses))
-	RegisterRoutes(r, h)
+	registerOpenAITestRoutes(r, h)
 
-	reqBody := `{"model":"deepseek-chat","input":"hi","stream":false}`
+	reqBody := `{"model":"deepseek-v4-flash","input":"hi","stream":false}`
 	req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(reqBody))
 	req.Header.Set("Authorization", "Bearer direct-token")
 	req.Header.Set("Content-Type", "application/json")
diff --git a/internal/httpapi/openai/test_bridge_test.go b/internal/httpapi/openai/test_bridge_test.go
new file mode 100644
index 0000000..91549ce
--- /dev/null
+++ b/internal/httpapi/openai/test_bridge_test.go
@@ -0,0 +1,157 @@
+package openai
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"strings"
+	"testing"
+
+	"github.com/go-chi/chi/v5"
+
+	"ds2api/internal/auth"
+	"ds2api/internal/chathistory"
+	"ds2api/internal/httpapi/openai/chat"
+	"ds2api/internal/httpapi/openai/embeddings"
+	"ds2api/internal/httpapi/openai/files"
+	"ds2api/internal/httpapi/openai/history"
+	"ds2api/internal/httpapi/openai/responses"
+	"ds2api/internal/httpapi/openai/shared"
+	"ds2api/internal/promptcompat"
+)
+
+type openAITestSurface struct {
+	Store       shared.ConfigReader
+	Auth        shared.AuthResolver
+	DS          shared.DeepSeekCaller
+	ChatHistory *chathistory.Store
+
+	chat       *chat.Handler
+	responses  *responses.Handler
+	files      *files.Handler
+	embeddings *embeddings.Handler
+	models     *shared.ModelsHandler
+}
+
+func (h *openAITestSurface) deps() shared.Deps {
+	if h == nil {
+		return shared.Deps{}
+	}
+	return shared.Deps{Store: h.Store, Auth: h.Auth, DS: h.DS, ChatHistory: h.ChatHistory}
+}
+
+func (h *openAITestSurface) chatHandler() *chat.Handler {
+	if h.chat == nil {
+		deps := h.deps()
+		h.chat = &chat.Handler{Store: deps.Store, Auth: deps.Auth, DS: deps.DS, ChatHistory: deps.ChatHistory}
+	}
+	return h.chat
+}
+
+func (h *openAITestSurface) responsesHandler() *responses.Handler {
+	if h.responses == nil {
+		deps := h.deps()
+		h.responses = &responses.Handler{Store: deps.Store, Auth: deps.Auth, DS: deps.DS, ChatHistory: deps.ChatHistory}
+	}
+	return h.responses
+}
+
+func (h *openAITestSurface) filesHandler() *files.Handler {
+	if h.files == nil {
+		deps := h.deps()
+		h.files = &files.Handler{Store: deps.Store, Auth: deps.Auth, DS: deps.DS, ChatHistory: deps.ChatHistory}
+	}
+	return h.files
+}
+
+func (h *openAITestSurface) embeddingsHandler() *embeddings.Handler {
+	if h.embeddings == nil {
+		deps := h.deps()
+		h.embeddings = &embeddings.Handler{Store: deps.Store, Auth: deps.Auth, DS: deps.DS, ChatHistory: deps.ChatHistory}
+	}
+	return h.embeddings
+}
+
+func (h *openAITestSurface) modelsHandler() *shared.ModelsHandler {
+	if h.models == nil {
+		h.models = &shared.ModelsHandler{Store: h.Store}
+	}
+	return h.models
+}
+
+func (h *openAITestSurface) ChatCompletions(w http.ResponseWriter, r *http.Request) {
+	h.chatHandler().ChatCompletions(w, r)
+}
+
+func (h *openAITestSurface) applyHistorySplit(ctx context.Context, a *auth.RequestAuth, stdReq promptcompat.StandardRequest) (promptcompat.StandardRequest, error) {
+	return history.Service{Store: h.Store, DS: h.DS}.Apply(ctx, a, stdReq)
+}
+
+func (h *openAITestSurface) preprocessInlineFileInputs(ctx context.Context, a *auth.RequestAuth, req map[string]any) error {
+	return h.filesHandler().PreprocessInlineFileInputs(ctx, a, req)
+}
+
+func registerOpenAITestRoutes(r chi.Router, h *openAITestSurface) {
+	r.Get("/v1/models", h.modelsHandler().ListModels)
+	r.Get("/v1/models/{model_id}", h.modelsHandler().GetModel)
+	r.Post("/v1/chat/completions", h.chatHandler().ChatCompletions)
+	r.Post("/v1/responses", h.responsesHandler().Responses)
+	r.Get("/v1/responses/{response_id}", h.responsesHandler().GetResponseByID)
+	r.Post("/v1/files", h.filesHandler().UploadFile)
+	r.Post("/v1/embeddings", h.embeddingsHandler().Embeddings)
+}
+
+func splitOpenAIHistoryMessages(messages []any, triggerAfterTurns int) ([]any, []any) {
+	return history.SplitOpenAIHistoryMessages(messages, triggerAfterTurns)
+}
+
+func buildOpenAIHistoryTranscript(messages []any) string {
+	return promptcompat.BuildOpenAIHistoryTranscript(messages)
+}
+
+func writeOpenAIError(w http.ResponseWriter, status int, message string) {
+	shared.WriteOpenAIError(w, status, message)
+}
+
+func replaceCitationMarkersWithLinks(text string, links map[int]string) string {
+	return shared.ReplaceCitationMarkersWithLinks(text, links)
+}
+
+func sanitizeLeakedOutput(text string) string {
+	return shared.CleanVisibleOutput(text, false)
+}
+
+func requestTraceID(r *http.Request) string {
+	return shared.RequestTraceID(r)
+}
+
+func asString(v any) string {
+	return shared.AsString(v)
+}
+
+func parseSSEDataFrames(t *testing.T, body string) ([]map[string]any, bool) {
+	t.Helper()
+	lines := strings.Split(body, "\n")
+	frames := make([]map[string]any, 0, len(lines))
+	done := false
+	for _, line := range lines {
+		line = strings.TrimSpace(line)
+		if !strings.HasPrefix(line, "data:") {
+			continue
+		}
+		payload := strings.TrimSpace(strings.TrimPrefix(line, "data:"))
+		if payload == "" {
+			continue
+		}
+		if payload == "[DONE]" {
+			done = true
+			continue
+		}
+		var frame map[string]any
+		if err := json.Unmarshal([]byte(payload), &frame); err != nil {
+			t.Fatalf("decode sse frame failed: %v, payload=%s", err, payload)
+		}
+		frames = append(frames, frame)
+	}
+	return frames, done
+}
diff --git a/internal/adapter/openai/trace_test.go b/internal/httpapi/openai/trace_test.go
similarity index 100%
rename from internal/adapter/openai/trace_test.go
rename to internal/httpapi/openai/trace_test.go
diff --git a/internal/js/chat-stream/cors.js b/internal/js/chat-stream/cors.js
new file mode 100644
index 0000000..1a4b36a
--- /dev/null
+++ b/internal/js/chat-stream/cors.js
@@ -0,0 +1,134 @@
+'use strict';
+
+const DEFAULT_CORS_ALLOW_HEADERS = [
+  'Content-Type',
+  'Authorization',
+  'X-API-Key',
+  'X-Ds2-Target-Account',
+  'X-Ds2-Source',
+  'X-Vercel-Protection-Bypass',
+  'X-Goog-Api-Key',
+  'Anthropic-Version',
+  'Anthropic-Beta',
+];
+
+const BLOCKED_CORS_REQUEST_HEADERS = new Set([
+  'x-ds2-internal-token',
+]);
+
+function setCorsHeaders(res, req) {
+  const origin = asString(readHeader(req, 'origin'));
+  res.setHeader('Access-Control-Allow-Origin', origin || '*');
+  res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS, PUT, DELETE');
+  res.setHeader('Access-Control-Max-Age', '600');
+  res.setHeader(
+    'Access-Control-Allow-Headers',
+    buildCORSAllowHeaders(req),
+  );
+  addVaryHeader(res, 'Origin');
+  addVaryHeader(res, 'Access-Control-Request-Headers');
+  if (asString(readHeader(req, 'access-control-request-private-network')).toLowerCase() === 'true') {
+    res.setHeader('Access-Control-Allow-Private-Network', 'true');
+    addVaryHeader(res, 'Access-Control-Request-Private-Network');
+  }
+}
+
+function buildCORSAllowHeaders(req) {
+  const seen = new Set();
+  const headers = [];
+  for (const name of DEFAULT_CORS_ALLOW_HEADERS) {
+    appendCORSHeaderName(headers, seen, name);
+  }
+  for (const name of splitCORSRequestHeaders(readHeader(req, 'access-control-request-headers'))) {
+    appendCORSHeaderName(headers, seen, name);
+  }
+  return headers.join(', ');
+}
+
+function splitCORSRequestHeaders(raw) {
+  const text = asString(raw);
+  if (!text) {
+    return [];
+  }
+  return text
+    .split(',')
+    .map((part) => asString(part))
+    .filter((name) => isValidCORSHeaderToken(name))
+    .filter((name) => !BLOCKED_CORS_REQUEST_HEADERS.has(name.toLowerCase()));
+}
+
+function appendCORSHeaderName(headers, seen, name) {
+  const text = asString(name);
+  if (!isValidCORSHeaderToken(text)) {
+    return;
+  }
+  const lower = text.toLowerCase();
+  if (BLOCKED_CORS_REQUEST_HEADERS.has(lower) || seen.has(lower)) {
+    return;
+  }
+  seen.add(lower);
+  headers.push(text);
+}
+
+function isValidCORSHeaderToken(name) {
+  return /^[A-Za-z0-9!#$%&'*+.^_`|~-]+$/.test(asString(name));
+}
+
+function addVaryHeader(res, token) {
+  const text = asString(token);
+  if (!text || typeof res.setHeader !== 'function') {
+    return;
+  }
+  const current = typeof res.getHeader === 'function' ? res.getHeader('Vary') : '';
+  const seen = new Set();
+  const merged = [];
+  const addToken = (value) => {
+    const trimmed = asString(value);
+    if (!trimmed) {
+      return;
+    }
+    const lower = trimmed.toLowerCase();
+    if (seen.has(lower)) {
+      return;
+    }
+    seen.add(lower);
+    merged.push(trimmed);
+  };
+  if (Array.isArray(current)) {
+    for (const value of current) {
+      for (const part of String(value).split(',')) {
+        addToken(part);
+      }
+    }
+  } else {
+    for (const part of String(current || '').split(',')) {
+      addToken(part);
+    }
+  }
+  addToken(text);
+  res.setHeader('Vary', merged.join(', '));
+}
+
+function readHeader(req, key) {
+  if (!req || !req.headers) {
+    return '';
+  }
+  return req.headers[String(key).toLowerCase()];
+}
+
+function asString(v) {
+  if (typeof v === 'string') {
+    return v.trim();
+  }
+  if (Array.isArray(v)) {
+    return asString(v[0]);
+  }
+  if (v == null) {
+    return '';
+  }
+  return String(v).trim();
+}
+
+module.exports = {
+  setCorsHeaders,
+};
diff --git a/internal/js/chat-stream/http_internal.js b/internal/js/chat-stream/http_internal.js
index 20f24c8..01caa8d 100644
--- a/internal/js/chat-stream/http_internal.js
+++ b/internal/js/chat-stream/http_internal.js
@@ -3,15 +3,9 @@
 const {
   writeOpenAIError,
 } = require('./error_shape');
-
-function setCorsHeaders(res) {
-  res.setHeader('Access-Control-Allow-Origin', '*');
-  res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS, PUT, DELETE');
-  res.setHeader(
-    'Access-Control-Allow-Headers',
-    'Content-Type, Authorization, X-API-Key, X-Ds2-Target-Account, X-Vercel-Protection-Bypass',
-  );
-}
+const {
+  setCorsHeaders,
+} = require('./cors');
 
 function header(req, key) {
   if (!req || !req.headers) {
diff --git a/internal/js/chat-stream/index.js b/internal/js/chat-stream/index.js
index 57740fd..398fc9b 100644
--- a/internal/js/chat-stream/index.js
+++ b/internal/js/chat-stream/index.js
@@ -40,7 +40,7 @@ const {
 } = require('./dedupe');
 
 async function handler(req, res) {
-  setCorsHeaders(res);
+  setCorsHeaders(res, req);
   if (req.method === 'OPTIONS') {
     res.statusCode = 204;
     res.end();
diff --git a/internal/js/chat-stream/proxy_go.js b/internal/js/chat-stream/proxy_go.js
index 5218df0..4e31a9c 100644
--- a/internal/js/chat-stream/proxy_go.js
+++ b/internal/js/chat-stream/proxy_go.js
@@ -53,7 +53,8 @@ async function proxyToGo(req, res, rawBody) {
 
     res.statusCode = upstream.status;
     upstream.headers.forEach((value, key) => {
-      if (key.toLowerCase() === 'content-length') {
+      const lower = key.toLowerCase();
+      if (lower === 'content-length' || lower === 'content-encoding') {
         return;
       }
       res.setHeader(key, value);
diff --git a/internal/js/chat-stream/sse_parse_impl.js b/internal/js/chat-stream/sse_parse_impl.js
index 8db00ae..7c6cfae 100644
--- a/internal/js/chat-stream/sse_parse_impl.js
+++ b/internal/js/chat-stream/sse_parse_impl.js
@@ -54,6 +54,27 @@ function splitThinkingParts(parts) {
   return { parts: out, transitioned: thinkingDone };
 }
 
+function dropThinkingParts(parts) {
+  if (!Array.isArray(parts) || parts.length === 0) {
+    return parts;
+  }
+  return parts.filter((p) => p && p.type !== 'thinking');
+}
+
+function finalizeThinkingParts(parts, thinkingEnabled, newType) {
+  const splitResult = splitThinkingParts(parts);
+  let finalType = newType;
+  let finalParts = splitResult.parts;
+  if (splitResult.transitioned) {
+    finalType = 'text';
+  }
+  if (!thinkingEnabled) {
+    finalParts = dropThinkingParts(finalParts);
+    finalType = 'text';
+  }
+  return { parts: finalParts, newType: finalType };
+}
+
 function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenceMarkers = true) {
   if (!chunk || typeof chunk !== 'object') {
     return {
@@ -194,7 +215,9 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
 
   let partType = 'text';
   if (pathValue === 'response/thinking_content') {
-    if (newType === 'text') {
+    if (!thinkingEnabled) {
+      partType = 'thinking';
+    } else if (newType === 'text') {
       partType = 'text';
     } else {
       partType = 'thinking';
@@ -239,20 +262,17 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
     }
     
     let resolvedParts = filterLeakedContentFilterParts(parts);
-    const splitResult = splitThinkingParts(resolvedParts);
-    if (splitResult.transitioned) {
-      newType = 'text';
-    }
+    const finalized = finalizeThinkingParts(resolvedParts, thinkingEnabled, newType);
     
     return {
       parsed: true,
-      parts: splitResult.parts,
+      parts: finalized.parts,
       finished: false,
       contentFilter: false,
       errorMessage: '',
       promptTokens,
       outputTokens,
-      newType,
+      newType: finalized.newType,
     };
   }
 
@@ -273,20 +293,17 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
     parts.push(...extracted.parts);
     
     let resolvedParts = filterLeakedContentFilterParts(parts);
-    const splitResult = splitThinkingParts(resolvedParts);
-    if (splitResult.transitioned) {
-      newType = 'text';
-    }
+    const finalized = finalizeThinkingParts(resolvedParts, thinkingEnabled, newType);
     
     return {
       parsed: true,
-      parts: splitResult.parts,
+      parts: finalized.parts,
       finished: false,
       contentFilter: false,
       errorMessage: '',
       promptTokens,
       outputTokens,
-      newType,
+      newType: finalized.newType,
     };
   }
 
@@ -316,20 +333,17 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
   }
   
   let resolvedParts = filterLeakedContentFilterParts(parts);
-  const splitResult = splitThinkingParts(resolvedParts);
-  if (splitResult.transitioned) {
-    newType = 'text';
-  }
+  const finalized = finalizeThinkingParts(resolvedParts, thinkingEnabled, newType);
 
   return {
     parsed: true,
-    parts: splitResult.parts,
+    parts: finalized.parts,
     finished: false,
     contentFilter: false,
     errorMessage: '',
     promptTokens,
     outputTokens,
-    newType,
+    newType: finalized.newType,
   };
 }
 
diff --git a/internal/js/chat-stream/vercel_stream_impl.js b/internal/js/chat-stream/vercel_stream_impl.js
index b28ecb0..553af69 100644
--- a/internal/js/chat-stream/vercel_stream_impl.js
+++ b/internal/js/chat-stream/vercel_stream_impl.js
@@ -10,7 +10,7 @@ const {
   formatOpenAIStreamToolCalls,
 } = require('../helpers/stream-tool-sieve');
 const { BASE_HEADERS } = require('../shared/deepseek-constants');
-const { writeOpenAIError } = require('./error_shape');
+const { writeOpenAIError, openAIErrorType } = require('./error_shape');
 const { parseChunkForContent, isCitation } = require('./sse_parse');
 const { buildUsage } = require('./token_usage');
 const {
@@ -129,6 +129,7 @@ async function handleVercelStream(req, res, rawBody, payload) {
     const toolSieveEnabled = toolPolicy.toolSieveEnabled;
     const toolSieveState = createToolSieveState();
     let toolCallsEmitted = false;
+    let toolCallsDoneEmitted = false;
     const streamToolCallIDs = new Map();
     const streamToolNames = new Map();
     const decoder = new TextDecoder();
@@ -153,14 +154,16 @@ async function handleVercelStream(req, res, rawBody, payload) {
         return;
       }
       const detected = parseStandaloneToolCalls(outputText, toolNames);
-      if (detected.length > 0 && !toolCallsEmitted) {
+      if (detected.length > 0 && !toolCallsDoneEmitted) {
         toolCallsEmitted = true;
+        toolCallsDoneEmitted = true;
         sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(detected, streamToolCallIDs) });
       } else if (toolSieveEnabled) {
         const tailEvents = flushToolSieve(toolSieveState, toolNames);
         for (const evt of tailEvents) {
           if (evt.type === 'tool_calls' && Array.isArray(evt.calls) && evt.calls.length > 0) {
             toolCallsEmitted = true;
+            toolCallsDoneEmitted = true;
             sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(evt.calls, streamToolCallIDs) });
             resetStreamToolCallState(streamToolCallIDs, streamToolNames);
             continue;
@@ -173,6 +176,15 @@ async function handleVercelStream(req, res, rawBody, payload) {
       if (detected.length > 0 || toolCallsEmitted) {
         reason = 'tool_calls';
       }
+      if (detected.length === 0 && !toolCallsEmitted && outputText.trim() === '') {
+        const detail = upstreamEmptyOutputDetail(reason === 'content_filter', outputText, thinkingText);
+        sendFailedChunk(res, detail.status, detail.message, detail.code);
+        await releaseLease();
+        if (!res.writableEnded && !res.destroyed) {
+          res.end();
+        }
+        return;
+      }
       sendFrame({
         id: sessionID,
         object: 'chat.completion.chunk',
@@ -234,7 +246,7 @@ async function handleVercelStream(req, res, rawBody, payload) {
             return;
           }
           if (parsed.contentFilter) {
-            await finish('stop');
+            await finish(outputText.trim() === '' ? 'content_filter' : 'stop');
             return;
           }
           if (parsed.finished) {
@@ -284,6 +296,7 @@ async function handleVercelStream(req, res, rawBody, payload) {
                 }
                 if (evt.type === 'tool_calls') {
                   toolCallsEmitted = true;
+                  toolCallsDoneEmitted = true;
                   sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(evt.calls, streamToolCallIDs) });
                   resetStreamToolCallState(streamToolCallIDs, streamToolNames);
                   continue;
@@ -315,6 +328,46 @@ function toBool(v) {
   return v === true;
 }
 
+function upstreamEmptyOutputDetail(contentFilter, _text, thinking) {
+  if (contentFilter) {
+    return {
+      status: 400,
+      message: 'Upstream content filtered the response and returned no output.',
+      code: 'content_filter',
+    };
+  }
+  if (thinking !== '') {
+    return {
+      status: 429,
+      message: 'Upstream account hit a rate limit and returned reasoning without visible output.',
+      code: 'upstream_empty_output',
+    };
+  }
+  return {
+    status: 429,
+    message: 'Upstream account hit a rate limit and returned empty output.',
+    code: 'upstream_empty_output',
+  };
+}
+
+function sendFailedChunk(res, status, message, code) {
+  res.write(`data: ${JSON.stringify({
+    status_code: status,
+    error: {
+      message,
+      type: openAIErrorType(status),
+      code,
+      param: null,
+    },
+  })}\n\n`);
+  if (!res.writableEnded && !res.destroyed) {
+    res.write('data: [DONE]\n\n');
+  }
+  if (typeof res.flush === 'function') {
+    res.flush();
+  }
+}
+
 module.exports = {
   handleVercelStream,
 };
diff --git a/internal/js/helpers/stream-tool-sieve/parse.js b/internal/js/helpers/stream-tool-sieve/parse.js
index f6bb865..0e7d552 100644
--- a/internal/js/helpers/stream-tool-sieve/parse.js
+++ b/internal/js/helpers/stream-tool-sieve/parse.js
@@ -8,7 +8,7 @@ const {
   stripFencedCodeBlocks,
 } = require('./parse_payload');
 
-const TOOL_MARKUP_PREFIXES = ['<tool_call', '<function_call', '<invoke'];
+const TOOL_MARKUP_PREFIXES = ['<tool_calls'];
 
 function extractToolNames(tools) {
   if (!Array.isArray(tools) || tools.length === 0) {
@@ -45,7 +45,6 @@ function parseToolCallsDetailed(text, toolNames) {
   if (shouldSkipToolCallParsingForCodeFenceExample(normalized)) {
     return result;
   }
-
   // XML markup parsing only.
   const parsed = parseMarkupToolCalls(normalized);
   if (parsed.length === 0) {
@@ -73,7 +72,6 @@ function parseStandaloneToolCallsDetailed(text, toolNames) {
   if (shouldSkipToolCallParsingForCodeFenceExample(trimmed)) {
     return result;
   }
-
   // XML markup parsing only.
   const parsed = parseMarkupToolCalls(trimmed);
   if (parsed.length === 0) {
diff --git a/internal/js/helpers/stream-tool-sieve/parse_payload.js b/internal/js/helpers/stream-tool-sieve/parse_payload.js
index 61bc996..49579f9 100644
--- a/internal/js/helpers/stream-tool-sieve/parse_payload.js
+++ b/internal/js/helpers/stream-tool-sieve/parse_payload.js
@@ -1,26 +1,11 @@
 'use strict';
 
-const TOOL_CALL_MARKUP_BLOCK_PATTERN = /<(?:[a-z0-9_:-]+:)?(tool_call|function_call|invoke)\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi;
-const TOOL_CALL_MARKUP_SELFCLOSE_PATTERN = /<(?:[a-z0-9_:-]+:)?invoke\b([^>]*)\/>/gi;
+const TOOLS_WRAPPER_PATTERN = /<tool_calls\b[^>]*>([\s\S]*?)<\/tool_calls>/gi;
+const TOOL_CALL_MARKUP_BLOCK_PATTERN = /<(?:[a-z0-9_:-]+:)?invoke\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?invoke>/gi;
+const PARAMETER_BLOCK_PATTERN = /<(?:[a-z0-9_:-]+:)?parameter\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?parameter>/gi;
 const TOOL_CALL_MARKUP_KV_PATTERN = /<(?:[a-z0-9_:-]+:)?([a-z0-9_.-]+)\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi;
-const TOOL_CALL_MARKUP_ATTR_PATTERN = /(name|function|tool)\s*=\s*"([^"]+)"/i;
-const TOOL_CALL_MARKUP_NAME_PATTERNS = [
-  /<(?:[a-z0-9_:-]+:)?tool_name\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?tool_name>/i,
-  /<(?:[a-z0-9_:-]+:)?function_name\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?function_name>/i,
-  /<(?:[a-z0-9_:-]+:)?name\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?name>/i,
-  /<(?:[a-z0-9_:-]+:)?function\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?function>/i,
-];
-const TOOL_CALL_MARKUP_ARGS_PATTERNS = [
-  /<(?:[a-z0-9_:-]+:)?input\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?input>/i,
-  /<(?:[a-z0-9_:-]+:)?arguments\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?arguments>/i,
-  /<(?:[a-z0-9_:-]+:)?argument\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?argument>/i,
-  /<(?:[a-z0-9_:-]+:)?parameters\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?parameters>/i,
-  /<(?:[a-z0-9_:-]+:)?parameter\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?parameter>/i,
-  /<(?:[a-z0-9_:-]+:)?args\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?args>/i,
-  /<(?:[a-z0-9_:-]+:)?params\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?params>/i,
-];
 const CDATA_PATTERN = /^<!\[CDATA\[([\s\S]*?)]]>$/i;
-const HTML_ENTITIES_PATTERN = /&[a-z0-9#]+;/gi;
+const XML_ATTR_PATTERN = /\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')/gi;
 
 const {
   toStringSafe,
@@ -40,57 +25,54 @@ function parseMarkupToolCalls(text) {
     return [];
   }
   const out = [];
-  for (const m of raw.matchAll(TOOL_CALL_MARKUP_BLOCK_PATTERN)) {
-    const parsed = parseMarkupSingleToolCall(toStringSafe(m[2]).trim(), toStringSafe(m[3]).trim());
-    if (parsed) {
-      out.push(parsed);
-    }
-  }
-  for (const m of raw.matchAll(TOOL_CALL_MARKUP_SELFCLOSE_PATTERN)) {
-    const parsed = parseMarkupSingleToolCall(toStringSafe(m[1]).trim(), '');
-    if (parsed) {
-      out.push(parsed);
+  for (const wrapper of raw.matchAll(TOOLS_WRAPPER_PATTERN)) {
+    const body = toStringSafe(wrapper[1]);
+    for (const block of body.matchAll(TOOL_CALL_MARKUP_BLOCK_PATTERN)) {
+      const parsed = parseMarkupSingleToolCall(block);
+      if (parsed) {
+        out.push(parsed);
+      }
     }
   }
   return out;
 }
 
-function parseMarkupSingleToolCall(attrs, inner) {
-  // Try inline JSON parse for the inner content.
+function parseMarkupSingleToolCall(block) {
+  const attrs = parseTagAttributes(block[1]);
+  const name = toStringSafe(attrs.name).trim();
+  if (!name) {
+    return null;
+  }
+  const inner = toStringSafe(block[2]).trim();
+
   if (inner) {
     try {
       const decoded = JSON.parse(inner);
-      if (decoded && typeof decoded === 'object' && !Array.isArray(decoded) && decoded.name) {
+      if (decoded && typeof decoded === 'object' && !Array.isArray(decoded)) {
         return {
-          name: toStringSafe(decoded.name),
-          input: decoded.input && typeof decoded.input === 'object' && !Array.isArray(decoded.input) ? decoded.input : {},
+          name,
+          input: decoded.input && typeof decoded.input === 'object' && !Array.isArray(decoded.input)
+            ? decoded.input
+            : decoded.parameters && typeof decoded.parameters === 'object' && !Array.isArray(decoded.parameters)
+              ? decoded.parameters
+              : {},
         };
       }
     } catch (_err) {
       // Not JSON, continue with markup parsing.
     }
   }
-  let name = '';
-  const attrMatch = attrs.match(TOOL_CALL_MARKUP_ATTR_PATTERN);
-  if (attrMatch && attrMatch[2]) {
-    name = toStringSafe(attrMatch[2]).trim();
-  }
-  if (!name) {
-    name = extractRawTagValue(findMarkupTagValue(inner, TOOL_CALL_MARKUP_NAME_PATTERNS));
-  }
-  if (!name) {
-    return null;
-  }
-
-  let input = {};
-  const argsRaw = findMarkupTagValue(inner, TOOL_CALL_MARKUP_ARGS_PATTERNS);
-  if (argsRaw) {
-    input = parseMarkupInput(argsRaw);
-  } else {
-    const kv = parseMarkupKVObject(inner);
-    if (Object.keys(kv).length > 0) {
-      input = kv;
+  const input = {};
+  for (const match of inner.matchAll(PARAMETER_BLOCK_PATTERN)) {
+    const parameterAttrs = parseTagAttributes(match[1]);
+    const paramName = toStringSafe(parameterAttrs.name).trim();
+    if (!paramName) {
+      continue;
     }
+    appendMarkupValue(input, paramName, parseMarkupValue(match[2]));
+  }
+  if (Object.keys(input).length === 0 && inner.trim() !== '') {
+    return null;
   }
   return { name, input };
 }
@@ -153,11 +135,14 @@ function parseMarkupValue(raw) {
     }
   }
 
-  try {
-    return JSON.parse(s);
-  } catch (_err) {
-    return s;
+  if (s.startsWith('{') || s.startsWith('[')) {
+    try {
+      return JSON.parse(s);
+    } catch (_err) {
+      return s;
+    }
   }
+  return s;
 }
 
 function extractRawTagValue(inner) {
@@ -187,19 +172,20 @@ function unescapeHtml(safe) {
     .replace(/&#x27;/g, "'");
 }
 
-function stripTagText(text) {
-  return toStringSafe(text).replace(/<[^>]+>/g, ' ').trim();
-}
-
-function findMarkupTagValue(text, patterns) {
-  const source = toStringSafe(text);
-  for (const p of patterns) {
-    const m = source.match(p);
-    if (m && m[1] !== undefined) {
-      return toStringSafe(m[1]);
-    }
+function parseTagAttributes(raw) {
+  const source = toStringSafe(raw);
+  const out = {};
+  if (!source) {
+    return out;
   }
-  return '';
+  for (const match of source.matchAll(XML_ATTR_PATTERN)) {
+    const key = toStringSafe(match[1]).trim().toLowerCase();
+    if (!key) {
+      continue;
+    }
+    out[key] = match[3] || match[4] || '';
+  }
+  return out;
 }
 
 function parseToolCallInput(v) {
diff --git a/internal/js/helpers/stream-tool-sieve/sieve-xml.js b/internal/js/helpers/stream-tool-sieve/sieve-xml.js
index 6442dbc..cc8ee43 100644
--- a/internal/js/helpers/stream-tool-sieve/sieve-xml.js
+++ b/internal/js/helpers/stream-tool-sieve/sieve-xml.js
@@ -1,21 +1,16 @@
 'use strict';
 const { parseToolCalls } = require('./parse');
 
-// Tag pairs ordered longest-first: wrapper tags checked before inner tags.
+// XML wrapper tag pair used by the streaming sieve.
 const XML_TOOL_TAG_PAIRS = [
   { open: '<tool_calls', close: '</tool_calls>' },
-  { open: '<tool_call', close: '</tool_call>' },
-  { open: '<function_calls', close: '</function_calls>' },
-  { open: '<function_call', close: '</function_call>' },
-  { open: '<invoke', close: '</invoke>' },
-  { open: '<tool_use', close: '</tool_use>' },
 ];
 
 const XML_TOOL_OPENING_TAGS = XML_TOOL_TAG_PAIRS.map(p => p.open);
 
 function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
   const lower = captured.toLowerCase();
-  // Find the FIRST matching open/close pair, preferring wrapper tags.
+  // Find the FIRST matching open/close pair for the canonical wrapper.
   for (const pair of XML_TOOL_TAG_PAIRS) {
     const openIdx = lower.indexOf(pair.open);
     if (openIdx < 0) {
@@ -25,7 +20,7 @@ function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
     const closeIdx = lower.lastIndexOf(pair.close);
     if (closeIdx < openIdx) {
       // Opening tag present but specific closing tag hasn't arrived.
-      // Return not-ready — do NOT fall through to inner pairs.
+      // Return not-ready so buffering continues until the wrapper closes.
       return { ready: false, prefix: '', calls: [], suffix: '' };
     }
     const closeEnd = closeIdx + pair.close.length;
diff --git a/internal/js/helpers/stream-tool-sieve/tool-keywords.js b/internal/js/helpers/stream-tool-sieve/tool-keywords.js
index ed7fbe0..93efd5d 100644
--- a/internal/js/helpers/stream-tool-sieve/tool-keywords.js
+++ b/internal/js/helpers/stream-tool-sieve/tool-keywords.js
@@ -1,16 +1,15 @@
 'use strict';
 
 const XML_TOOL_SEGMENT_TAGS = [
-  '<tool_calls>', '<tool_calls\n', '<tool_calls ', '<tool_call>', '<tool_call\n', '<tool_call ',
-  '<invoke ', '<invoke>', '<function_call', '<function_calls', '<tool_use>',
+  '<tool_calls>', '<tool_calls\n', '<tool_calls ',
 ];
 
 const XML_TOOL_OPENING_TAGS = [
-  '<tool_calls', '<tool_call', '<invoke', '<function_call', '<function_calls', '<tool_use',
+  '<tool_calls',
 ];
 
 const XML_TOOL_CLOSING_TAGS = [
-  '</tool_calls>', '</tool_call>', '</invoke>', '</function_call>', '</function_calls>', '</tool_use>',
+  '</tool_calls>',
 ];
 
 module.exports = {
@@ -18,4 +17,3 @@ module.exports = {
   XML_TOOL_OPENING_TAGS,
   XML_TOOL_CLOSING_TAGS,
 };
-
diff --git a/internal/prompt/tool_calls.go b/internal/prompt/tool_calls.go
index 4c14f6b..d38e9fa 100644
--- a/internal/prompt/tool_calls.go
+++ b/internal/prompt/tool_calls.go
@@ -16,8 +16,8 @@ var promptXMLTextEscaper = strings.NewReplacer(
 
 var promptXMLNamePattern = regexp.MustCompile(`^[A-Za-z_][A-Za-z0-9_.:-]*$`)
 
-// FormatToolCallsForPrompt renders a tool_calls slice into the canonical
-// prompt-visible history block used across adapters.
+// FormatToolCallsForPrompt renders a tool_calls slice into the prompt-visible
+// invoke/parameter history block used across adapters.
 func FormatToolCallsForPrompt(raw any) string {
 	calls, ok := raw.([]any)
 	if !ok || len(calls) == 0 {
@@ -93,28 +93,99 @@ func formatToolCallForPrompt(call map[string]any) string {
 	}
 
 	parameters := formatToolCallParametersForPrompt(argsRaw)
+	if parameters == "" {
+		return `  <invoke name="` + escapeXMLAttribute(name) + `"></invoke>`
+	}
 
-	return "  <tool_call>\n" +
-		"    <tool_name>" + escapeXMLText(name) + "</tool_name>\n" +
+	return "  <invoke name=\"" + escapeXMLAttribute(name) + "\">\n" +
 		parameters + "\n" +
-		"  </tool_call>"
+		"  </invoke>"
 }
 
 func formatToolCallParametersForPrompt(raw any) string {
 	value := normalizePromptToolCallValue(raw)
-	body, ok := renderPromptToolXMLBody(value, "      ")
-	if ok {
-		if strings.TrimSpace(body) == "" {
-			return "    <parameters></parameters>"
-		}
-		return "    <parameters>\n" + body + "\n    </parameters>"
+	body, ok := renderPromptToolParameters(value, "    ")
+	if ok && strings.TrimSpace(body) != "" {
+		return body
 	}
 
 	fallback := StringifyToolCallArguments(raw)
 	if strings.TrimSpace(fallback) == "" {
-		fallback = "{}"
+		return ""
+	}
+	return "    <parameter name=\"content\">" + renderPromptXMLText(fallback) + "</parameter>"
+}
+
+func renderPromptToolParameters(value any, indent string) (string, bool) {
+	switch v := value.(type) {
+	case nil:
+		return "", true
+	case map[string]any:
+		if len(v) == 0 {
+			return "", true
+		}
+		keys := make([]string, 0, len(v))
+		for k := range v {
+			keys = append(keys, k)
+		}
+		sort.Strings(keys)
+		lines := make([]string, 0, len(keys))
+		for _, key := range keys {
+			rendered, ok := renderPromptParameterNode(key, v[key], indent)
+			if !ok {
+				return "", false
+			}
+			lines = append(lines, rendered)
+		}
+		return strings.Join(lines, "\n"), true
+	case []any:
+		lines := make([]string, 0, len(v))
+		for _, item := range v {
+			rendered, ok := renderPromptParameterNode("item", item, indent)
+			if !ok {
+				return "", false
+			}
+			lines = append(lines, rendered)
+		}
+		return strings.Join(lines, "\n"), true
+	case string:
+		return indent + `<parameter name="content">` + renderPromptXMLText(v) + `</parameter>`, true
+	default:
+		return indent + `<parameter name="value">` + renderPromptXMLText(fmt.Sprint(v)) + `</parameter>`, true
+	}
+}
+
+func renderPromptParameterNode(name string, value any, indent string) (string, bool) {
+	trimmedName := strings.TrimSpace(name)
+	if trimmedName == "" {
+		return "", false
+	}
+	switch v := value.(type) {
+	case nil:
+		return indent + `<parameter name="` + escapeXMLAttribute(trimmedName) + `"></parameter>`, true
+	case map[string]any:
+		body, ok := renderPromptToolXMLBody(v, indent+"  ")
+		if !ok {
+			return "", false
+		}
+		if strings.TrimSpace(body) == "" {
+			return indent + `<parameter name="` + escapeXMLAttribute(trimmedName) + `"></parameter>`, true
+		}
+		return indent + `<parameter name="` + escapeXMLAttribute(trimmedName) + "\">\n" + body + "\n" + indent + `</parameter>`, true
+	case []any:
+		body, ok := renderPromptToolXMLArray(v, indent+"  ")
+		if !ok {
+			return "", false
+		}
+		if strings.TrimSpace(body) == "" {
+			return indent + `<parameter name="` + escapeXMLAttribute(trimmedName) + `"></parameter>`, true
+		}
+		return indent + `<parameter name="` + escapeXMLAttribute(trimmedName) + "\">\n" + body + "\n" + indent + `</parameter>`, true
+	case string:
+		return indent + `<parameter name="` + escapeXMLAttribute(trimmedName) + `">` + renderPromptXMLText(v) + `</parameter>`, true
+	default:
+		return indent + `<parameter name="` + escapeXMLAttribute(trimmedName) + `">` + renderPromptXMLText(fmt.Sprint(v)) + `</parameter>`, true
 	}
-	return "    <parameters><content>" + renderPromptXMLText(fallback) + "</content></parameters>"
 }
 
 func normalizePromptToolCallValue(raw any) any {
@@ -246,6 +317,18 @@ func isValidPromptXMLName(name string) bool {
 	return promptXMLNamePattern.MatchString(strings.TrimSpace(name))
 }
 
+func escapeXMLAttribute(text string) string {
+	if text == "" {
+		return ""
+	}
+	return strings.NewReplacer(
+		"&", "&amp;",
+		`"`, "&quot;",
+		"<", "&lt;",
+		">", "&gt;",
+	).Replace(text)
+}
+
 func normalizeToolArgumentString(raw string) string {
 	trimmed := strings.TrimSpace(raw)
 	if trimmed == "" {
diff --git a/internal/prompt/tool_calls_test.go b/internal/prompt/tool_calls_test.go
index 2d30770..b26658c 100644
--- a/internal/prompt/tool_calls_test.go
+++ b/internal/prompt/tool_calls_test.go
@@ -22,7 +22,7 @@ func TestFormatToolCallsForPromptXML(t *testing.T) {
 	if got == "" {
 		t.Fatal("expected non-empty formatted tool calls")
 	}
-	if got != "<tool_calls>\n  <tool_call>\n    <tool_name>search_web</tool_name>\n    <parameters>\n      <query><![CDATA[latest]]></query>\n    </parameters>\n  </tool_call>\n</tool_calls>" {
+	if got != "<tool_calls>\n  <invoke name=\"search_web\">\n    <parameter name=\"query\"><![CDATA[latest]]></parameter>\n  </invoke>\n</tool_calls>" {
 		t.Fatalf("unexpected formatted tool call XML: %q", got)
 	}
 }
@@ -34,7 +34,7 @@ func TestFormatToolCallsForPromptEscapesXMLEntities(t *testing.T) {
 			"arguments": `{"q":"a < b && c > d"}`,
 		},
 	})
-	want := "<tool_calls>\n  <tool_call>\n    <tool_name>search&lt;&amp;&gt;</tool_name>\n    <parameters>\n      <q><![CDATA[a < b && c > d]]></q>\n    </parameters>\n  </tool_call>\n</tool_calls>"
+	want := "<tool_calls>\n  <invoke name=\"search&lt;&amp;&gt;\">\n    <parameter name=\"q\"><![CDATA[a < b && c > d]]></parameter>\n  </invoke>\n</tool_calls>"
 	if got != want {
 		t.Fatalf("unexpected escaped tool call XML: %q", got)
 	}
@@ -50,7 +50,7 @@ func TestFormatToolCallsForPromptUsesCDATAForMultilineContent(t *testing.T) {
 			},
 		},
 	})
-	want := "<tool_calls>\n  <tool_call>\n    <tool_name>write_file</tool_name>\n    <parameters>\n      <content><![CDATA[#!/bin/bash\nprintf \"hello\"\n]]></content>\n      <path><![CDATA[script.sh]]></path>\n    </parameters>\n  </tool_call>\n</tool_calls>"
+	want := "<tool_calls>\n  <invoke name=\"write_file\">\n    <parameter name=\"content\"><![CDATA[#!/bin/bash\nprintf \"hello\"\n]]></parameter>\n    <parameter name=\"path\"><![CDATA[script.sh]]></parameter>\n  </invoke>\n</tool_calls>"
 	if got != want {
 		t.Fatalf("unexpected multiline cdata tool call XML: %q", got)
 	}
diff --git a/internal/adapter/openai/file_refs.go b/internal/promptcompat/file_refs.go
similarity index 96%
rename from internal/adapter/openai/file_refs.go
rename to internal/promptcompat/file_refs.go
index d1cef34..86006b6 100644
--- a/internal/adapter/openai/file_refs.go
+++ b/internal/promptcompat/file_refs.go
@@ -1,8 +1,8 @@
-package openai
+package promptcompat
 
 import "strings"
 
-func collectOpenAIRefFileIDs(req map[string]any) []string {
+func CollectOpenAIRefFileIDs(req map[string]any) []string {
 	if len(req) == 0 {
 		return nil
 	}
diff --git a/internal/promptcompat/history_transcript.go b/internal/promptcompat/history_transcript.go
new file mode 100644
index 0000000..cd9a238
--- /dev/null
+++ b/internal/promptcompat/history_transcript.go
@@ -0,0 +1,19 @@
+package promptcompat
+
+import (
+	"fmt"
+	"strings"
+
+	"ds2api/internal/prompt"
+)
+
+const historySplitInjectedFilename = "IGNORE"
+
+func BuildOpenAIHistoryTranscript(messages []any) string {
+	normalized := NormalizeOpenAIMessagesForPrompt(messages, "")
+	transcript := strings.TrimSpace(prompt.MessagesPrepare(normalized))
+	if transcript == "" {
+		return ""
+	}
+	return fmt.Sprintf("[file content end]\n\n%s\n\n[file name]: %s\n[file content begin]\n", transcript, historySplitInjectedFilename)
+}
diff --git a/internal/adapter/openai/message_normalize.go b/internal/promptcompat/message_normalize.go
similarity index 90%
rename from internal/adapter/openai/message_normalize.go
rename to internal/promptcompat/message_normalize.go
index 906c377..2e87259 100644
--- a/internal/adapter/openai/message_normalize.go
+++ b/internal/promptcompat/message_normalize.go
@@ -1,4 +1,4 @@
-package openai
+package promptcompat
 
 import (
 	"strings"
@@ -8,7 +8,7 @@ import (
 
 const assistantReasoningLabel = "reasoning_content"
 
-func normalizeOpenAIMessagesForPrompt(raw []any, traceID string) []map[string]any {
+func NormalizeOpenAIMessagesForPrompt(raw []any, traceID string) []map[string]any {
 	_ = traceID
 	out := make([]map[string]any, 0, len(raw))
 	for _, item := range raw {
@@ -36,10 +36,10 @@ func normalizeOpenAIMessagesForPrompt(raw []any, traceID string) []map[string]an
 		case "user", "system", "developer":
 			out = append(out, map[string]any{
 				"role":    normalizeOpenAIRoleForPrompt(role),
-				"content": normalizeOpenAIContentForPrompt(msg["content"]),
+				"content": NormalizeOpenAIContentForPrompt(msg["content"]),
 			})
 		default:
-			content := normalizeOpenAIContentForPrompt(msg["content"])
+			content := NormalizeOpenAIContentForPrompt(msg["content"])
 			if content == "" {
 				continue
 			}
@@ -56,7 +56,7 @@ func normalizeOpenAIMessagesForPrompt(raw []any, traceID string) []map[string]an
 }
 
 func buildAssistantContentForPrompt(msg map[string]any) string {
-	content := strings.TrimSpace(normalizeOpenAIContentForPrompt(msg["content"]))
+	content := strings.TrimSpace(NormalizeOpenAIContentForPrompt(msg["content"]))
 	reasoning := strings.TrimSpace(normalizeOpenAIReasoningContentForPrompt(msg["reasoning_content"]))
 	if reasoning == "" {
 		reasoning = strings.TrimSpace(extractOpenAIReasoningContentFromMessage(msg["content"]))
@@ -149,14 +149,14 @@ func formatPromptLabeledBlock(label, text string) string {
 }
 
 func buildToolContentForPrompt(msg map[string]any) string {
-	content := normalizeOpenAIContentForPrompt(msg["content"])
+	content := NormalizeOpenAIContentForPrompt(msg["content"])
 	if strings.TrimSpace(content) == "" {
 		return "null"
 	}
 	return content
 }
 
-func normalizeOpenAIContentForPrompt(v any) string {
+func NormalizeOpenAIContentForPrompt(v any) string {
 	return prompt.NormalizeContent(v)
 }
 
diff --git a/internal/adapter/openai/message_normalize_test.go b/internal/promptcompat/message_normalize_test.go
similarity index 90%
rename from internal/adapter/openai/message_normalize_test.go
rename to internal/promptcompat/message_normalize_test.go
index 564fea7..36079d0 100644
--- a/internal/adapter/openai/message_normalize_test.go
+++ b/internal/promptcompat/message_normalize_test.go
@@ -1,4 +1,4 @@
-package openai
+package promptcompat
 
 import (
 	"strings"
@@ -33,7 +33,7 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsAndToolResult(t *tes
 		},
 	}
 
-	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
+	normalized := NormalizeOpenAIMessagesForPrompt(raw, "")
 	if len(normalized) != 4 {
 		t.Fatalf("expected 4 normalized messages with assistant tool history preserved, got %d", len(normalized))
 	}
@@ -41,7 +41,7 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsAndToolResult(t *tes
 	if !strings.Contains(assistantContent, "<tool_calls>") {
 		t.Fatalf("assistant tool history should be preserved in XML form, got %q", assistantContent)
 	}
-	if !strings.Contains(assistantContent, "<tool_name>get_weather</tool_name>") {
+	if !strings.Contains(assistantContent, `<invoke name="get_weather">`) {
 		t.Fatalf("expected tool name in preserved history, got %q", assistantContent)
 	}
 	if !strings.Contains(normalized[3]["content"].(string), `"temp":18`) {
@@ -67,7 +67,7 @@ func TestNormalizeOpenAIMessagesForPrompt_ToolObjectContentPreserved(t *testing.
 		},
 	}
 
-	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
+	normalized := NormalizeOpenAIMessagesForPrompt(raw, "")
 	got, _ := normalized[0]["content"].(string)
 	if !strings.Contains(got, `"temp":18`) || !strings.Contains(got, `"condition":"sunny"`) {
 		t.Fatalf("expected serialized object in tool content, got %q", got)
@@ -88,7 +88,7 @@ func TestNormalizeOpenAIMessagesForPrompt_ToolArrayBlocksJoined(t *testing.T) {
 		},
 	}
 
-	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
+	normalized := NormalizeOpenAIMessagesForPrompt(raw, "")
 	got, _ := normalized[0]["content"].(string)
 	if !strings.Contains(got, `line-1`) || !strings.Contains(got, `line-2`) {
 		t.Fatalf("expected tool content blocks preserved, got %q", got)
@@ -107,7 +107,7 @@ func TestNormalizeOpenAIMessagesForPrompt_FunctionRoleCompatible(t *testing.T) {
 		},
 	}
 
-	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
+	normalized := NormalizeOpenAIMessagesForPrompt(raw, "")
 	if len(normalized) != 1 {
 		t.Fatalf("expected one normalized message, got %d", len(normalized))
 	}
@@ -134,7 +134,7 @@ func TestNormalizeOpenAIMessagesForPrompt_EmptyToolContentPreservedAsNull(t *tes
 		},
 	}
 
-	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
+	normalized := NormalizeOpenAIMessagesForPrompt(raw, "")
 	if len(normalized) != 2 {
 		t.Fatalf("expected tool completion turn to be preserved, got %#v", normalized)
 	}
@@ -172,15 +172,15 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantMultipleToolCallsRemainSepara
 		},
 	}
 
-	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
+	normalized := NormalizeOpenAIMessagesForPrompt(raw, "")
 	if len(normalized) != 1 {
 		t.Fatalf("expected assistant tool_call-only message preserved, got %#v", normalized)
 	}
 	content, _ := normalized[0]["content"].(string)
-	if strings.Count(content, "<tool_call>") != 2 {
+	if strings.Count(content, "<invoke name=") != 2 {
 		t.Fatalf("expected two preserved tool call blocks, got %q", content)
 	}
-	if !strings.Contains(content, "<tool_name>search_web</tool_name>") || !strings.Contains(content, "<tool_name>eval_javascript</tool_name>") {
+	if !strings.Contains(content, `<invoke name="search_web">`) || !strings.Contains(content, `<invoke name="eval_javascript">`) {
 		t.Fatalf("expected both tool names in preserved history, got %q", content)
 	}
 }
@@ -201,7 +201,7 @@ func TestNormalizeOpenAIMessagesForPrompt_PreservesConcatenatedToolArguments(t *
 		},
 	}
 
-	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
+	normalized := NormalizeOpenAIMessagesForPrompt(raw, "")
 	if len(normalized) != 1 {
 		t.Fatalf("expected assistant tool_call-only content preserved, got %#v", normalized)
 	}
@@ -227,7 +227,7 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsMissingNameAreDroppe
 		},
 	}
 
-	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
+	normalized := NormalizeOpenAIMessagesForPrompt(raw, "")
 	if len(normalized) != 0 {
 		t.Fatalf("expected assistant tool_calls without text to be dropped when name is missing, got %#v", normalized)
 	}
@@ -250,7 +250,7 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantNilContentDoesNotInjectNullLi
 		},
 	}
 
-	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
+	normalized := NormalizeOpenAIMessagesForPrompt(raw, "")
 	if len(normalized) != 1 {
 		t.Fatalf("expected nil-content assistant tool_call-only message preserved, got %#v", normalized)
 	}
@@ -268,7 +268,7 @@ func TestNormalizeOpenAIMessagesForPrompt_DeveloperRoleMapsToSystem(t *testing.T
 		map[string]any{"role": "developer", "content": "必须先走工具调用"},
 		map[string]any{"role": "user", "content": "你好"},
 	}
-	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
+	normalized := NormalizeOpenAIMessagesForPrompt(raw, "")
 	if len(normalized) != 2 {
 		t.Fatalf("expected 2 normalized messages, got %d", len(normalized))
 	}
@@ -287,7 +287,7 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantArrayContentFallbackWhenTextE
 		},
 	}
 
-	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
+	normalized := NormalizeOpenAIMessagesForPrompt(raw, "")
 	if len(normalized) != 1 {
 		t.Fatalf("expected one normalized message, got %d", len(normalized))
 	}
@@ -306,7 +306,7 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantReasoningContentPreserved(t *
 		},
 	}
 
-	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
+	normalized := NormalizeOpenAIMessagesForPrompt(raw, "")
 	if len(normalized) != 1 {
 		t.Fatalf("expected one normalized assistant message, got %#v", normalized)
 	}
diff --git a/internal/promptcompat/prompt_build.go b/internal/promptcompat/prompt_build.go
new file mode 100644
index 0000000..9d2ee4e
--- /dev/null
+++ b/internal/promptcompat/prompt_build.go
@@ -0,0 +1,25 @@
+package promptcompat
+
+import (
+	"ds2api/internal/prompt"
+)
+
+func buildOpenAIFinalPrompt(messagesRaw []any, toolsRaw any, traceID string, thinkingEnabled bool) (string, []string) {
+	return BuildOpenAIPrompt(messagesRaw, toolsRaw, traceID, DefaultToolChoicePolicy(), thinkingEnabled)
+}
+
+func BuildOpenAIPrompt(messagesRaw []any, toolsRaw any, traceID string, toolPolicy ToolChoicePolicy, thinkingEnabled bool) (string, []string) {
+	messages := NormalizeOpenAIMessagesForPrompt(messagesRaw, traceID)
+	toolNames := []string{}
+	if tools, ok := toolsRaw.([]any); ok && len(tools) > 0 {
+		messages, toolNames = injectToolPrompt(messages, tools, toolPolicy)
+	}
+	return prompt.MessagesPrepareWithThinking(messages, thinkingEnabled), toolNames
+}
+
+// BuildOpenAIPromptForAdapter exposes the OpenAI-compatible prompt building flow so
+// other protocol adapters (for example Gemini) can reuse the same tool/history
+// normalization logic and remain behavior-compatible with chat/completions.
+func BuildOpenAIPromptForAdapter(messagesRaw []any, toolsRaw any, traceID string, thinkingEnabled bool) (string, []string) {
+	return buildOpenAIFinalPrompt(messagesRaw, toolsRaw, traceID, thinkingEnabled)
+}
diff --git a/internal/adapter/openai/prompt_build_test.go b/internal/promptcompat/prompt_build_test.go
similarity index 94%
rename from internal/adapter/openai/prompt_build_test.go
rename to internal/promptcompat/prompt_build_test.go
index 0d7e1c5..82101d3 100644
--- a/internal/adapter/openai/prompt_build_test.go
+++ b/internal/promptcompat/prompt_build_test.go
@@ -1,4 +1,4 @@
-package openai
+package promptcompat
 
 import (
 	"strings"
@@ -50,7 +50,7 @@ func TestBuildOpenAIFinalPrompt_HandlerPathIncludesToolRoundtripSemantics(t *tes
 	if !strings.Contains(finalPrompt, "<tool_calls>") {
 		t.Fatalf("handler finalPrompt should preserve assistant tool history: %q", finalPrompt)
 	}
-	if !strings.Contains(finalPrompt, "<tool_name>get_weather</tool_name>") {
+	if !strings.Contains(finalPrompt, `<invoke name="get_weather">`) {
 		t.Fatalf("handler finalPrompt should include tool name history: %q", finalPrompt)
 	}
 }
@@ -74,7 +74,7 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t *
 	}
 
 	finalPrompt, _ := buildOpenAIFinalPrompt(messages, tools, "", false)
-	if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <tool_calls> XML block at the end of your response.") {
+	if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <tool_calls>...</tool_calls> XML block at the end of your response.") {
 		t.Fatalf("vercel prepare finalPrompt missing final tool-call anchor instruction: %q", finalPrompt)
 	}
 	if !strings.Contains(finalPrompt, "TOOL CALL FORMAT") {
diff --git a/internal/adapter/openai/standard_request.go b/internal/promptcompat/request_normalize.go
similarity index 70%
rename from internal/adapter/openai/standard_request.go
rename to internal/promptcompat/request_normalize.go
index 4270c6e..6d3f12d 100644
--- a/internal/adapter/openai/standard_request.go
+++ b/internal/promptcompat/request_normalize.go
@@ -1,4 +1,4 @@
-package openai
+package promptcompat
 
 import (
 	"fmt"
@@ -8,28 +8,34 @@ import (
 	"ds2api/internal/util"
 )
 
-func normalizeOpenAIChatRequest(store ConfigReader, req map[string]any, traceID string) (util.StandardRequest, error) {
+type ConfigReader interface {
+	ModelAliases() map[string]string
+	CompatWideInputStrictOutput() bool
+}
+
+func NormalizeOpenAIChatRequest(store ConfigReader, req map[string]any, traceID string) (StandardRequest, error) {
 	model, _ := req["model"].(string)
 	messagesRaw, _ := req["messages"].([]any)
 	if strings.TrimSpace(model) == "" || len(messagesRaw) == 0 {
-		return util.StandardRequest{}, fmt.Errorf("request must include 'model' and 'messages'")
+		return StandardRequest{}, fmt.Errorf("request must include 'model' and 'messages'")
 	}
 	resolvedModel, ok := config.ResolveModel(store, model)
 	if !ok {
-		return util.StandardRequest{}, fmt.Errorf("model %q is not available", model)
+		return StandardRequest{}, fmt.Errorf("model %q is not available", model)
 	}
-	thinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
+	defaultThinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
+	thinkingEnabled := util.ResolveThinkingEnabled(req, defaultThinkingEnabled)
 	responseModel := strings.TrimSpace(model)
 	if responseModel == "" {
 		responseModel = resolvedModel
 	}
-	toolPolicy := util.DefaultToolChoicePolicy()
-	finalPrompt, toolNames := buildOpenAIFinalPromptWithPolicy(messagesRaw, req["tools"], traceID, toolPolicy, thinkingEnabled)
+	toolPolicy := DefaultToolChoicePolicy()
+	finalPrompt, toolNames := BuildOpenAIPrompt(messagesRaw, req["tools"], traceID, toolPolicy, thinkingEnabled)
 	toolNames = ensureToolDetectionEnabled(toolNames, req["tools"])
 	passThrough := collectOpenAIChatPassThrough(req)
-	refFileIDs := collectOpenAIRefFileIDs(req)
+	refFileIDs := CollectOpenAIRefFileIDs(req)
 
-	return util.StandardRequest{
+	return StandardRequest{
 		Surface:        "openai_chat",
 		RequestedModel: strings.TrimSpace(model),
 		ResolvedModel:  resolvedModel,
@@ -47,17 +53,18 @@ func normalizeOpenAIChatRequest(store ConfigReader, req map[string]any, traceID
 	}, nil
 }
 
-func normalizeOpenAIResponsesRequest(store ConfigReader, req map[string]any, traceID string) (util.StandardRequest, error) {
+func NormalizeOpenAIResponsesRequest(store ConfigReader, req map[string]any, traceID string) (StandardRequest, error) {
 	model, _ := req["model"].(string)
 	model = strings.TrimSpace(model)
 	if model == "" {
-		return util.StandardRequest{}, fmt.Errorf("request must include 'model'")
+		return StandardRequest{}, fmt.Errorf("request must include 'model'")
 	}
 	resolvedModel, ok := config.ResolveModel(store, model)
 	if !ok {
-		return util.StandardRequest{}, fmt.Errorf("model %q is not available", model)
+		return StandardRequest{}, fmt.Errorf("model %q is not available", model)
 	}
-	thinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
+	defaultThinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
+	thinkingEnabled := util.ResolveThinkingEnabled(req, defaultThinkingEnabled)
 
 	// Keep width-control as an explicit policy hook even if current default is true.
 	allowWideInput := true
@@ -66,26 +73,26 @@ func normalizeOpenAIResponsesRequest(store ConfigReader, req map[string]any, tra
 	}
 	var messagesRaw []any
 	if allowWideInput {
-		messagesRaw = responsesMessagesFromRequest(req)
+		messagesRaw = ResponsesMessagesFromRequest(req)
 	} else if msgs, ok := req["messages"].([]any); ok && len(msgs) > 0 {
 		messagesRaw = msgs
 	}
 	if len(messagesRaw) == 0 {
-		return util.StandardRequest{}, fmt.Errorf("request must include 'input' or 'messages'")
+		return StandardRequest{}, fmt.Errorf("request must include 'input' or 'messages'")
 	}
 	toolPolicy, err := parseToolChoicePolicy(req["tool_choice"], req["tools"])
 	if err != nil {
-		return util.StandardRequest{}, err
+		return StandardRequest{}, err
 	}
-	finalPrompt, toolNames := buildOpenAIFinalPromptWithPolicy(messagesRaw, req["tools"], traceID, toolPolicy, thinkingEnabled)
+	finalPrompt, toolNames := BuildOpenAIPrompt(messagesRaw, req["tools"], traceID, toolPolicy, thinkingEnabled)
 	toolNames = ensureToolDetectionEnabled(toolNames, req["tools"])
 	if !toolPolicy.IsNone() {
 		toolPolicy.Allowed = namesToSet(toolNames)
 	}
 	passThrough := collectOpenAIChatPassThrough(req)
-	refFileIDs := collectOpenAIRefFileIDs(req)
+	refFileIDs := CollectOpenAIRefFileIDs(req)
 
-	return util.StandardRequest{
+	return StandardRequest{
 		Surface:        "openai_responses",
 		RequestedModel: model,
 		ResolvedModel:  resolvedModel,
@@ -135,8 +142,8 @@ func collectOpenAIChatPassThrough(req map[string]any) map[string]any {
 	return out
 }
 
-func parseToolChoicePolicy(toolChoiceRaw any, toolsRaw any) (util.ToolChoicePolicy, error) {
-	policy := util.DefaultToolChoicePolicy()
+func parseToolChoicePolicy(toolChoiceRaw any, toolsRaw any) (ToolChoicePolicy, error) {
+	policy := DefaultToolChoicePolicy()
 	declaredNames := extractDeclaredToolNames(toolsRaw)
 	declaredSet := namesToSet(declaredNames)
 	if len(declaredNames) > 0 {
@@ -151,25 +158,25 @@ func parseToolChoicePolicy(toolChoiceRaw any, toolsRaw any) (util.ToolChoicePoli
 	case string:
 		switch strings.ToLower(strings.TrimSpace(v)) {
 		case "", "auto":
-			policy.Mode = util.ToolChoiceAuto
+			policy.Mode = ToolChoiceAuto
 		case "none":
-			policy.Mode = util.ToolChoiceNone
+			policy.Mode = ToolChoiceNone
 			policy.Allowed = nil
 		case "required":
-			policy.Mode = util.ToolChoiceRequired
+			policy.Mode = ToolChoiceRequired
 		default:
-			return util.ToolChoicePolicy{}, fmt.Errorf("unsupported tool_choice: %q", v)
+			return ToolChoicePolicy{}, fmt.Errorf("unsupported tool_choice: %q", v)
 		}
 	case map[string]any:
 		allowedOverride, hasAllowedOverride, err := parseAllowedToolNames(v["allowed_tools"])
 		if err != nil {
-			return util.ToolChoicePolicy{}, err
+			return ToolChoicePolicy{}, err
 		}
 		if hasAllowedOverride {
 			filtered := make([]string, 0, len(allowedOverride))
 			for _, name := range allowedOverride {
 				if _, ok := declaredSet[name]; !ok {
-					return util.ToolChoicePolicy{}, fmt.Errorf("tool_choice.allowed_tools contains undeclared tool %q", name)
+					return ToolChoicePolicy{}, fmt.Errorf("tool_choice.allowed_tools contains undeclared tool %q", name)
 				}
 				filtered = append(filtered, name)
 			}
@@ -182,46 +189,46 @@ func parseToolChoicePolicy(toolChoiceRaw any, toolsRaw any) (util.ToolChoicePoli
 			if hasFunctionSelector(v) {
 				name, err := parseForcedToolName(v)
 				if err != nil {
-					return util.ToolChoicePolicy{}, err
+					return ToolChoicePolicy{}, err
 				}
-				policy.Mode = util.ToolChoiceForced
+				policy.Mode = ToolChoiceForced
 				policy.ForcedName = name
 				policy.Allowed = namesToSet([]string{name})
 			} else {
-				policy.Mode = util.ToolChoiceAuto
+				policy.Mode = ToolChoiceAuto
 			}
 		case "none":
-			policy.Mode = util.ToolChoiceNone
+			policy.Mode = ToolChoiceNone
 			policy.Allowed = nil
 		case "required":
-			policy.Mode = util.ToolChoiceRequired
+			policy.Mode = ToolChoiceRequired
 		case "function":
 			name, err := parseForcedToolName(v)
 			if err != nil {
-				return util.ToolChoicePolicy{}, err
+				return ToolChoicePolicy{}, err
 			}
-			policy.Mode = util.ToolChoiceForced
+			policy.Mode = ToolChoiceForced
 			policy.ForcedName = name
 			policy.Allowed = namesToSet([]string{name})
 		default:
-			return util.ToolChoicePolicy{}, fmt.Errorf("unsupported tool_choice.type: %q", typ)
+			return ToolChoicePolicy{}, fmt.Errorf("unsupported tool_choice.type: %q", typ)
 		}
 	default:
-		return util.ToolChoicePolicy{}, fmt.Errorf("tool_choice must be a string or object")
+		return ToolChoicePolicy{}, fmt.Errorf("tool_choice must be a string or object")
 	}
 
-	if policy.Mode == util.ToolChoiceRequired || policy.Mode == util.ToolChoiceForced {
+	if policy.Mode == ToolChoiceRequired || policy.Mode == ToolChoiceForced {
 		if len(declaredNames) == 0 {
-			return util.ToolChoicePolicy{}, fmt.Errorf("tool_choice=%s requires non-empty tools", policy.Mode)
+			return ToolChoicePolicy{}, fmt.Errorf("tool_choice=%s requires non-empty tools", policy.Mode)
 		}
 	}
-	if policy.Mode == util.ToolChoiceForced {
+	if policy.Mode == ToolChoiceForced {
 		if _, ok := declaredSet[policy.ForcedName]; !ok {
-			return util.ToolChoicePolicy{}, fmt.Errorf("tool_choice forced function %q is not declared in tools", policy.ForcedName)
+			return ToolChoicePolicy{}, fmt.Errorf("tool_choice forced function %q is not declared in tools", policy.ForcedName)
 		}
 	}
-	if len(policy.Allowed) == 0 && (policy.Mode == util.ToolChoiceRequired || policy.Mode == util.ToolChoiceForced) {
-		return util.ToolChoicePolicy{}, fmt.Errorf("tool_choice policy resolved to empty allowed tool set")
+	if len(policy.Allowed) == 0 && (policy.Mode == ToolChoiceRequired || policy.Mode == ToolChoiceForced) {
+		return ToolChoicePolicy{}, fmt.Errorf("tool_choice policy resolved to empty allowed tool set")
 	}
 	return policy, nil
 }
diff --git a/internal/adapter/openai/responses_input_items.go b/internal/promptcompat/responses_input_items.go
similarity index 82%
rename from internal/adapter/openai/responses_input_items.go
rename to internal/promptcompat/responses_input_items.go
index 6c42b38..92139d3 100644
--- a/internal/adapter/openai/responses_input_items.go
+++ b/internal/promptcompat/responses_input_items.go
@@ -1,4 +1,4 @@
-package openai
+package promptcompat
 
 import (
 	"fmt"
@@ -20,25 +20,7 @@ func normalizeResponsesInputItemWithState(m map[string]any, callNameByID map[str
 	role := strings.ToLower(strings.TrimSpace(asString(m["role"])))
 	if role != "" {
 		if role == "assistant" {
-			out := map[string]any{
-				"role": "assistant",
-			}
-			if toolCalls, ok := m["tool_calls"].([]any); ok && len(toolCalls) > 0 {
-				out["tool_calls"] = toolCalls
-			}
-			content := m["content"]
-			if content == nil {
-				if txt, _ := m["text"].(string); strings.TrimSpace(txt) != "" {
-					content = txt
-				}
-			}
-			if content != nil {
-				out["content"] = content
-			}
-			if _, hasToolCalls := out["tool_calls"]; hasToolCalls || out["content"] != nil {
-				return out
-			}
-			return nil
+			return normalizeResponsesAssistantMessage(m)
 		}
 		content := m["content"]
 		if content == nil {
@@ -70,6 +52,10 @@ func normalizeResponsesInputItemWithState(m map[string]any, callNameByID map[str
 	itemType := strings.ToLower(strings.TrimSpace(asString(m["type"])))
 	switch itemType {
 	case "message", "input_message":
+		role := strings.ToLower(strings.TrimSpace(asString(m["role"])))
+		if role == "assistant" {
+			return normalizeResponsesAssistantMessage(m)
+		}
 		content := m["content"]
 		if content == nil {
 			if txt, _ := m["text"].(string); strings.TrimSpace(txt) != "" {
@@ -79,7 +65,6 @@ func normalizeResponsesInputItemWithState(m map[string]any, callNameByID map[str
 		if content == nil {
 			return nil
 		}
-		role := strings.ToLower(strings.TrimSpace(asString(m["role"])))
 		if role == "" {
 			role = "user"
 		}
@@ -182,7 +167,7 @@ func normalizeResponsesInputItemWithState(m map[string]any, callNameByID map[str
 		}
 	}
 	if content, ok := m["content"]; ok {
-		if strings.TrimSpace(normalizeOpenAIContentForPrompt(content)) != "" {
+		if strings.TrimSpace(NormalizeOpenAIContentForPrompt(content)) != "" {
 			return map[string]any{
 				"role":    "user",
 				"content": content,
@@ -192,6 +177,31 @@ func normalizeResponsesInputItemWithState(m map[string]any, callNameByID map[str
 	return nil
 }
 
+func normalizeResponsesAssistantMessage(m map[string]any) map[string]any {
+	out := map[string]any{
+		"role": "assistant",
+	}
+	if toolCalls, ok := m["tool_calls"].([]any); ok && len(toolCalls) > 0 {
+		out["tool_calls"] = toolCalls
+	}
+	content := m["content"]
+	if content == nil {
+		if txt, _ := m["text"].(string); strings.TrimSpace(txt) != "" {
+			content = txt
+		}
+	}
+	if content != nil {
+		out["content"] = content
+	}
+	if reasoning := strings.TrimSpace(normalizeOpenAIReasoningContentForPrompt(m["reasoning_content"])); reasoning != "" {
+		out["reasoning_content"] = m["reasoning_content"]
+	}
+	if _, hasToolCalls := out["tool_calls"]; hasToolCalls || out["content"] != nil || out["reasoning_content"] != nil {
+		return out
+	}
+	return nil
+}
+
 func normalizeResponsesFallbackPart(m map[string]any) string {
 	if m == nil {
 		return ""
@@ -205,7 +215,7 @@ func normalizeResponsesFallbackPart(m map[string]any) string {
 		return txt
 	}
 	if content, ok := m["content"]; ok {
-		if normalized := strings.TrimSpace(normalizeOpenAIContentForPrompt(content)); normalized != "" {
+		if normalized := strings.TrimSpace(NormalizeOpenAIContentForPrompt(content)); normalized != "" {
 			return normalized
 		}
 	}
diff --git a/internal/promptcompat/responses_input_items_test.go b/internal/promptcompat/responses_input_items_test.go
new file mode 100644
index 0000000..4a782f2
--- /dev/null
+++ b/internal/promptcompat/responses_input_items_test.go
@@ -0,0 +1,50 @@
+package promptcompat
+
+import "testing"
+
+func TestNormalizeResponsesInputItemPreservesAssistantReasoningContent(t *testing.T) {
+	item := map[string]any{
+		"role":              "assistant",
+		"reasoning_content": "hidden reasoning",
+		"tool_calls": []any{
+			map[string]any{
+				"type": "function",
+				"function": map[string]any{
+					"name":      "search",
+					"arguments": `{"q":"docs"}`,
+				},
+			},
+		},
+	}
+
+	got := normalizeResponsesInputItem(item)
+	if got == nil {
+		t.Fatal("expected assistant item to be preserved")
+	}
+	if got["role"] != "assistant" {
+		t.Fatalf("unexpected role: %#v", got["role"])
+	}
+	if got["reasoning_content"] != "hidden reasoning" {
+		t.Fatalf("expected reasoning_content preserved, got %#v", got["reasoning_content"])
+	}
+}
+
+func TestNormalizeResponsesInputItemAssistantMessageWithReasoningBlocks(t *testing.T) {
+	item := map[string]any{
+		"type": "message",
+		"role": "assistant",
+		"content": []any{
+			map[string]any{"type": "reasoning", "text": "internal chain"},
+			map[string]any{"type": "output_text", "text": "visible answer"},
+		},
+	}
+
+	got := normalizeResponsesInputItem(item)
+	if got == nil {
+		t.Fatal("expected assistant message item to be preserved")
+	}
+	content, _ := got["content"].([]any)
+	if len(content) != 2 {
+		t.Fatalf("expected content blocks preserved, got %#v", got["content"])
+	}
+}
diff --git a/internal/adapter/openai/responses_input_normalize.go b/internal/promptcompat/responses_input_normalize.go
similarity index 88%
rename from internal/adapter/openai/responses_input_normalize.go
rename to internal/promptcompat/responses_input_normalize.go
index 6514669..e362d0e 100644
--- a/internal/adapter/openai/responses_input_normalize.go
+++ b/internal/promptcompat/responses_input_normalize.go
@@ -1,16 +1,16 @@
-package openai
+package promptcompat
 
 import (
 	"fmt"
 	"strings"
 )
 
-func responsesMessagesFromRequest(req map[string]any) []any {
+func ResponsesMessagesFromRequest(req map[string]any) []any {
 	if msgs, ok := req["messages"].([]any); ok && len(msgs) > 0 {
 		return prependInstructionMessage(msgs, req["instructions"])
 	}
 	if rawInput, ok := req["input"]; ok {
-		if msgs := normalizeResponsesInputAsMessages(rawInput); len(msgs) > 0 {
+		if msgs := NormalizeResponsesInputAsMessages(rawInput); len(msgs) > 0 {
 			return prependInstructionMessage(msgs, req["instructions"])
 		}
 	}
@@ -29,7 +29,7 @@ func prependInstructionMessage(messages []any, instructions any) []any {
 	return out
 }
 
-func normalizeResponsesInputAsMessages(input any) []any {
+func NormalizeResponsesInputAsMessages(input any) []any {
 	switch v := input.(type) {
 	case string:
 		if strings.TrimSpace(v) == "" {
@@ -46,7 +46,7 @@ func normalizeResponsesInputAsMessages(input any) []any {
 			return []any{map[string]any{"role": "user", "content": txt}}
 		}
 		if content, ok := v["content"]; ok {
-			if strings.TrimSpace(normalizeOpenAIContentForPrompt(content)) != "" {
+			if strings.TrimSpace(NormalizeOpenAIContentForPrompt(content)) != "" {
 				return []any{map[string]any{"role": "user", "content": content}}
 			}
 		}
diff --git a/internal/util/standard_request.go b/internal/promptcompat/standard_request.go
similarity index 98%
rename from internal/util/standard_request.go
rename to internal/promptcompat/standard_request.go
index b809dfd..9ec3781 100644
--- a/internal/util/standard_request.go
+++ b/internal/promptcompat/standard_request.go
@@ -1,4 +1,4 @@
-package util
+package promptcompat
 
 import "ds2api/internal/config"
 
diff --git a/internal/util/standard_request_test.go b/internal/promptcompat/standard_request_test.go
similarity index 82%
rename from internal/util/standard_request_test.go
rename to internal/promptcompat/standard_request_test.go
index f484605..7b529a6 100644
--- a/internal/util/standard_request_test.go
+++ b/internal/promptcompat/standard_request_test.go
@@ -1,4 +1,4 @@
-package util
+package promptcompat
 
 import "testing"
 
@@ -10,9 +10,9 @@ func TestStandardRequestCompletionPayloadSetsModelTypeFromResolvedModel(t *testi
 		search    bool
 		modelType string
 	}{
-		{name: "default", model: "deepseek-chat", thinking: false, search: false, modelType: "default"},
-		{name: "expert", model: "deepseek-expert-reasoner", thinking: true, search: false, modelType: "expert"},
-		{name: "vision", model: "deepseek-vision-chat-search", thinking: false, search: true, modelType: "vision"},
+		{name: "default", model: "deepseek-v4-flash", thinking: false, search: false, modelType: "default"},
+		{name: "expert", model: "deepseek-v4-pro", thinking: true, search: false, modelType: "expert"},
+		{name: "vision", model: "deepseek-v4-vision-search", thinking: false, search: true, modelType: "vision"},
 	}
 
 	for _, tc := range tests {
diff --git a/internal/promptcompat/tool_prompt.go b/internal/promptcompat/tool_prompt.go
new file mode 100644
index 0000000..ba5f2cf
--- /dev/null
+++ b/internal/promptcompat/tool_prompt.go
@@ -0,0 +1,72 @@
+package promptcompat
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+
+	"ds2api/internal/toolcall"
+)
+
+func injectToolPrompt(messages []map[string]any, tools []any, policy ToolChoicePolicy) ([]map[string]any, []string) {
+	if policy.IsNone() {
+		return messages, nil
+	}
+	toolSchemas := make([]string, 0, len(tools))
+	names := make([]string, 0, len(tools))
+	isAllowed := func(name string) bool {
+		if strings.TrimSpace(name) == "" {
+			return false
+		}
+		if len(policy.Allowed) == 0 {
+			return true
+		}
+		_, ok := policy.Allowed[name]
+		return ok
+	}
+
+	for _, t := range tools {
+		tool, ok := t.(map[string]any)
+		if !ok {
+			continue
+		}
+		fn, _ := tool["function"].(map[string]any)
+		if len(fn) == 0 {
+			fn = tool
+		}
+		name, _ := fn["name"].(string)
+		desc, _ := fn["description"].(string)
+		schema, _ := fn["parameters"].(map[string]any)
+		name = strings.TrimSpace(name)
+		if !isAllowed(name) {
+			continue
+		}
+		names = append(names, name)
+		if desc == "" {
+			desc = "No description available"
+		}
+		b, _ := json.Marshal(schema)
+		toolSchemas = append(toolSchemas, fmt.Sprintf("Tool: %s\nDescription: %s\nParameters: %s", name, desc, string(b)))
+	}
+	if len(toolSchemas) == 0 {
+		return messages, names
+	}
+	toolPrompt := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n") + "\n\n" + toolcall.BuildToolCallInstructions(names)
+	if policy.Mode == ToolChoiceRequired {
+		toolPrompt += "\n7) For this response, you MUST call at least one tool from the allowed list."
+	}
+	if policy.Mode == ToolChoiceForced && strings.TrimSpace(policy.ForcedName) != "" {
+		toolPrompt += "\n7) For this response, you MUST call exactly this tool name: " + strings.TrimSpace(policy.ForcedName)
+		toolPrompt += "\n8) Do not call any other tool."
+	}
+
+	for i := range messages {
+		if messages[i]["role"] == "system" {
+			old, _ := messages[i]["content"].(string)
+			messages[i]["content"] = strings.TrimSpace(old + "\n\n" + toolPrompt)
+			return messages, names
+		}
+	}
+	messages = append([]map[string]any{{"role": "system", "content": toolPrompt}}, messages...)
+	return messages, names
+}
diff --git a/internal/rawsample/rawsample_test.go b/internal/rawsample/rawsample_test.go
index b70c633..e22c2cc 100644
--- a/internal/rawsample/rawsample_test.go
+++ b/internal/rawsample/rawsample_test.go
@@ -22,7 +22,7 @@ func TestPersistWritesSampleFilesAndMeta(t *testing.T) {
 		SampleID: "My Sample! 01",
 		Source:   "unit-test",
 		Request: map[string]any{
-			"model":  "deepseek-chat",
+			"model":  "deepseek-v4-flash",
 			"stream": true,
 			"messages": []any{
 				map[string]any{"role": "user", "content": "广州天气"},
diff --git a/internal/server/router.go b/internal/server/router.go
index e1bf6f4..60db26b 100644
--- a/internal/server/router.go
+++ b/internal/server/router.go
@@ -15,14 +15,18 @@ import (
 	"github.com/go-chi/chi/v5/middleware"
 
 	"ds2api/internal/account"
-	"ds2api/internal/adapter/claude"
-	"ds2api/internal/adapter/gemini"
-	"ds2api/internal/adapter/openai"
-	"ds2api/internal/admin"
 	"ds2api/internal/auth"
 	"ds2api/internal/chathistory"
 	"ds2api/internal/config"
-	"ds2api/internal/deepseek"
+	dsclient "ds2api/internal/deepseek/client"
+	"ds2api/internal/httpapi/admin"
+	"ds2api/internal/httpapi/claude"
+	"ds2api/internal/httpapi/gemini"
+	"ds2api/internal/httpapi/openai/chat"
+	"ds2api/internal/httpapi/openai/embeddings"
+	"ds2api/internal/httpapi/openai/files"
+	"ds2api/internal/httpapi/openai/responses"
+	"ds2api/internal/httpapi/openai/shared"
 	"ds2api/internal/webui"
 )
 
@@ -30,7 +34,7 @@ type App struct {
 	Store    *config.Store
 	Pool     *account.Pool
 	Resolver *auth.Resolver
-	DS       *deepseek.Client
+	DS       *dsclient.Client
 	Router   http.Handler
 }
 
@@ -40,11 +44,11 @@ func NewApp() (*App, error) {
 		return nil, fmt.Errorf("load config: %w", err)
 	}
 	pool := account.NewPool(store)
-	var dsClient *deepseek.Client
+	var dsClient *dsclient.Client
 	resolver := auth.NewResolver(store, pool, func(ctx context.Context, acc config.Account) (string, error) {
 		return dsClient.Login(ctx, acc)
 	})
-	dsClient = deepseek.NewClient(store, resolver)
+	dsClient = dsclient.NewClient(store, resolver)
 	if err := dsClient.PreloadPow(context.Background()); err != nil {
 		config.Logger.Warn("[PoW] init failed", "error", err)
 	} else {
@@ -55,10 +59,14 @@ func NewApp() (*App, error) {
 		config.Logger.Warn("[chat_history] unavailable", "path", chatHistoryStore.Path(), "error", err)
 	}
 
-	openaiHandler := &openai.Handler{Store: store, Auth: resolver, DS: dsClient, ChatHistory: chatHistoryStore}
-	claudeHandler := &claude.Handler{Store: store, Auth: resolver, DS: dsClient, OpenAI: openaiHandler}
-	geminiHandler := &gemini.Handler{Store: store, Auth: resolver, DS: dsClient, OpenAI: openaiHandler}
-	adminHandler := &admin.Handler{Store: store, Pool: pool, DS: dsClient, OpenAI: openaiHandler, ChatHistory: chatHistoryStore}
+	modelsHandler := &shared.ModelsHandler{Store: store}
+	chatHandler := &chat.Handler{Store: store, Auth: resolver, DS: dsClient, ChatHistory: chatHistoryStore}
+	responsesHandler := &responses.Handler{Store: store, Auth: resolver, DS: dsClient, ChatHistory: chatHistoryStore}
+	filesHandler := &files.Handler{Store: store, Auth: resolver, DS: dsClient, ChatHistory: chatHistoryStore}
+	embeddingsHandler := &embeddings.Handler{Store: store, Auth: resolver, DS: dsClient, ChatHistory: chatHistoryStore}
+	claudeHandler := &claude.Handler{Store: store, Auth: resolver, DS: dsClient, OpenAI: chatHandler}
+	geminiHandler := &gemini.Handler{Store: store, Auth: resolver, DS: dsClient, OpenAI: chatHandler}
+	adminHandler := &admin.Handler{Store: store, Pool: pool, DS: dsClient, OpenAI: chatHandler, ChatHistory: chatHistoryStore}
 	webuiHandler := webui.NewHandler()
 
 	r := chi.NewRouter()
@@ -83,7 +91,13 @@ func NewApp() (*App, error) {
 	r.Head("/healthz", healthzHandler)
 	r.Get("/readyz", readyzHandler)
 	r.Head("/readyz", readyzHandler)
-	openai.RegisterRoutes(r, openaiHandler)
+	r.Get("/v1/models", modelsHandler.ListModels)
+	r.Get("/v1/models/{model_id}", modelsHandler.GetModel)
+	r.Post("/v1/chat/completions", chatHandler.ChatCompletions)
+	r.Post("/v1/responses", responsesHandler.Responses)
+	r.Get("/v1/responses/{response_id}", responsesHandler.GetResponseByID)
+	r.Post("/v1/files", filesHandler.UploadFile)
+	r.Post("/v1/embeddings", embeddingsHandler.Embeddings)
 	claude.RegisterRoutes(r, claudeHandler)
 	gemini.RegisterRoutes(r, geminiHandler)
 	r.Route("/admin", func(ar chi.Router) {
@@ -140,11 +154,25 @@ func (noopLogEntry) Write(_ int, _ int, _ http.Header, _ time.Duration, _ interf
 
 func (noopLogEntry) Panic(_ interface{}, _ []byte) {}
 
+var defaultCORSAllowHeaders = []string{
+	"Content-Type",
+	"Authorization",
+	"X-API-Key",
+	"X-Ds2-Target-Account",
+	"X-Ds2-Source",
+	"X-Vercel-Protection-Bypass",
+	"X-Goog-Api-Key",
+	"Anthropic-Version",
+	"Anthropic-Beta",
+}
+
+var blockedCORSRequestHeaders = map[string]struct{}{
+	"x-ds2-internal-token": {},
+}
+
 func cors(next http.Handler) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		w.Header().Set("Access-Control-Allow-Origin", "*")
-		w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS, PUT, DELETE")
-		w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, X-API-Key, X-Ds2-Target-Account, X-Ds2-Source, X-Vercel-Protection-Bypass")
+		setCORSHeaders(w, r)
 		if r.Method == http.MethodOptions {
 			w.WriteHeader(http.StatusNoContent)
 			return
@@ -153,6 +181,125 @@ func cors(next http.Handler) http.Handler {
 	})
 }
 
+func setCORSHeaders(w http.ResponseWriter, r *http.Request) {
+	origin := strings.TrimSpace(r.Header.Get("Origin"))
+	if origin == "" {
+		w.Header().Set("Access-Control-Allow-Origin", "*")
+	} else {
+		w.Header().Set("Access-Control-Allow-Origin", origin)
+		addVaryHeaderToken(w.Header(), "Origin")
+	}
+	w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS, PUT, DELETE")
+	w.Header().Set("Access-Control-Allow-Headers", buildCORSAllowHeaders(r))
+	w.Header().Set("Access-Control-Max-Age", "600")
+	addVaryHeaderToken(w.Header(), "Access-Control-Request-Headers")
+	if strings.EqualFold(strings.TrimSpace(r.Header.Get("Access-Control-Request-Private-Network")), "true") {
+		w.Header().Set("Access-Control-Allow-Private-Network", "true")
+		addVaryHeaderToken(w.Header(), "Access-Control-Request-Private-Network")
+	}
+}
+
+func buildCORSAllowHeaders(r *http.Request) string {
+	names := make([]string, 0, len(defaultCORSAllowHeaders)+4)
+	seen := make(map[string]struct{}, len(defaultCORSAllowHeaders)+4)
+	for _, name := range defaultCORSAllowHeaders {
+		appendCORSHeaderName(&names, seen, name)
+	}
+	if r == nil {
+		return strings.Join(names, ", ")
+	}
+	for _, name := range splitCORSRequestHeaders(r.Header.Get("Access-Control-Request-Headers")) {
+		appendCORSHeaderName(&names, seen, name)
+	}
+	return strings.Join(names, ", ")
+}
+
+func splitCORSRequestHeaders(raw string) []string {
+	if strings.TrimSpace(raw) == "" {
+		return nil
+	}
+	parts := strings.Split(raw, ",")
+	out := make([]string, 0, len(parts))
+	for _, part := range parts {
+		name := strings.TrimSpace(part)
+		if !isValidCORSHeaderToken(name) {
+			continue
+		}
+		if _, blocked := blockedCORSRequestHeaders[strings.ToLower(name)]; blocked {
+			continue
+		}
+		out = append(out, name)
+	}
+	return out
+}
+
+func appendCORSHeaderName(dst *[]string, seen map[string]struct{}, name string) {
+	name = strings.TrimSpace(name)
+	if !isValidCORSHeaderToken(name) {
+		return
+	}
+	key := strings.ToLower(name)
+	if _, blocked := blockedCORSRequestHeaders[key]; blocked {
+		return
+	}
+	if _, ok := seen[key]; ok {
+		return
+	}
+	seen[key] = struct{}{}
+	*dst = append(*dst, name)
+}
+
+func isValidCORSHeaderToken(v string) bool {
+	if v == "" {
+		return false
+	}
+	for i := 0; i < len(v); i++ {
+		c := v[i]
+		if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') {
+			continue
+		}
+		switch c {
+		case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '.', '^', '_', '`', '|', '~':
+			continue
+		default:
+			return false
+		}
+	}
+	return true
+}
+
+func addVaryHeaderToken(h http.Header, token string) {
+	if h == nil {
+		return
+	}
+	token = strings.TrimSpace(token)
+	if token == "" {
+		return
+	}
+	current := h.Values("Vary")
+	seen := map[string]struct{}{}
+	merged := make([]string, 0, len(current)+1)
+	for _, value := range current {
+		for _, part := range strings.Split(value, ",") {
+			name := strings.TrimSpace(part)
+			if name == "" {
+				continue
+			}
+			key := strings.ToLower(name)
+			if _, ok := seen[key]; ok {
+				continue
+			}
+			seen[key] = struct{}{}
+			merged = append(merged, name)
+		}
+	}
+	key := strings.ToLower(token)
+	if _, ok := seen[key]; !ok {
+		merged = append(merged, token)
+	}
+	h.Set("Vary", strings.Join(merged, ", "))
+}
+
 func WriteUnhandledError(w http.ResponseWriter, err error) {
 	w.Header().Set("Content-Type", "application/json")
 	w.WriteHeader(http.StatusInternalServerError)
diff --git a/internal/server/router_cors_test.go b/internal/server/router_cors_test.go
new file mode 100644
index 0000000..448b1f1
--- /dev/null
+++ b/internal/server/router_cors_test.go
@@ -0,0 +1,119 @@
+package server
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+)
+
+func TestCORSPreflightAllowsThirdPartyRequestedHeaders(t *testing.T) {
+	handler := cors(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.WriteHeader(http.StatusTeapot)
+	}))
+
+	req := httptest.NewRequest(http.MethodOptions, "/v1/chat/completions", nil)
+	req.Header.Set("Origin", "app://obsidian.md")
+	req.Header.Set("Access-Control-Request-Headers", "authorization, x-stainless-os, x-stainless-runtime, x-ds2-internal-token")
+	req.Header.Set("Access-Control-Request-Private-Network", "true")
+
+	rec := httptest.NewRecorder()
+	handler.ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusNoContent {
+		t.Fatalf("expected 204 for preflight, got %d", rec.Code)
+	}
+	if got := rec.Header().Get("Access-Control-Allow-Origin"); got != "app://obsidian.md" {
+		t.Fatalf("expected origin echo, got %q", got)
+	}
+	if got := rec.Header().Get("Access-Control-Allow-Private-Network"); got != "true" {
+		t.Fatalf("expected private network allow header, got %q", got)
+	}
+
+	allowHeaders := strings.ToLower(rec.Header().Get("Access-Control-Allow-Headers"))
+	for _, want := range []string{"authorization", "x-stainless-os", "x-stainless-runtime"} {
+		if !strings.Contains(allowHeaders, want) {
+			t.Fatalf("expected allow headers to include %q, got %q", want, rec.Header().Get("Access-Control-Allow-Headers"))
+		}
+	}
+	if strings.Contains(allowHeaders, "x-ds2-internal-token") {
+		t.Fatalf("expected internal-only header to stay blocked, got %q", rec.Header().Get("Access-Control-Allow-Headers"))
+	}
+
+	vary := strings.ToLower(rec.Header().Get("Vary"))
+	for _, want := range []string{"origin", "access-control-request-headers", "access-control-request-private-network"} {
+		if !strings.Contains(vary, want) {
+			t.Fatalf("expected vary to include %q, got %q", want, rec.Header().Get("Vary"))
+		}
+	}
+}
+
+func TestBuildCORSAllowHeadersKeepsDefaultsWithoutRequest(t *testing.T) {
+	got := strings.ToLower(buildCORSAllowHeaders(nil))
+	for _, want := range []string{"content-type", "x-goog-api-key", "anthropic-version", "x-ds2-source"} {
+		if !strings.Contains(got, want) {
+			t.Fatalf("expected default allow headers to include %q, got %q", want, got)
+		}
+	}
+}
+
+func TestAppCORSPreflightIsUnifiedAcrossInterfaces(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[{"email":"u@example.com","password":"p"}]}`)
+	t.Setenv("DS2API_ENV_WRITEBACK", "0")
+
+	app, err := NewApp()
+	if err != nil {
+		t.Fatalf("NewApp() error: %v", err)
+	}
+
+	cases := []struct {
+		name    string
+		path    string
+		headers string
+	}{
+		{
+			name:    "openai",
+			path:    "/v1/chat/completions",
+			headers: "authorization, x-stainless-os",
+		},
+		{
+			name:    "claude",
+			path:    "/anthropic/v1/messages",
+			headers: "x-api-key, anthropic-version, x-stainless-os",
+		},
+		{
+			name:    "gemini",
+			path:    "/v1beta/models/gemini-2.5-pro:generateContent",
+			headers: "x-goog-api-key, x-client-version",
+		},
+		{
+			name:    "admin",
+			path:    "/admin/login",
+			headers: "content-type, x-requested-with",
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			req := httptest.NewRequest(http.MethodOptions, tc.path, nil)
+			req.Header.Set("Origin", "app://obsidian.md")
+			req.Header.Set("Access-Control-Request-Headers", tc.headers)
+
+			rec := httptest.NewRecorder()
+			app.Router.ServeHTTP(rec, req)
+
+			if rec.Code != http.StatusNoContent {
+				t.Fatalf("expected %s preflight status 204, got %d", tc.path, rec.Code)
+			}
+			if got := rec.Header().Get("Access-Control-Allow-Origin"); got != "app://obsidian.md" {
+				t.Fatalf("expected origin echo for %s, got %q", tc.path, got)
+			}
+			allowHeaders := strings.ToLower(rec.Header().Get("Access-Control-Allow-Headers"))
+			for _, want := range splitCORSRequestHeaders(tc.headers) {
+				if !strings.Contains(allowHeaders, strings.ToLower(want)) {
+					t.Fatalf("expected allow headers for %s to include %q, got %q", tc.path, want, rec.Header().Get("Access-Control-Allow-Headers"))
+				}
+			}
+		})
+	}
+}
diff --git a/internal/server/router_routes_test.go b/internal/server/router_routes_test.go
new file mode 100644
index 0000000..3891c8d
--- /dev/null
+++ b/internal/server/router_routes_test.go
@@ -0,0 +1,99 @@
+package server
+
+import (
+	"fmt"
+	"net/http"
+	"testing"
+
+	"github.com/go-chi/chi/v5"
+)
+
+func TestAPIRoutesRemainRegistered(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[{"email":"u@example.com","password":"p"}]}`)
+	t.Setenv("DS2API_ENV_WRITEBACK", "0")
+
+	app, err := NewApp()
+	if err != nil {
+		t.Fatalf("NewApp() error: %v", err)
+	}
+	routes, ok := app.Router.(chi.Routes)
+	if !ok {
+		t.Fatalf("app router does not expose chi routes: %T", app.Router)
+	}
+
+	got := map[string]bool{}
+	if err := chi.Walk(routes, func(method string, route string, _ http.Handler, _ ...func(http.Handler) http.Handler) error {
+		got[fmt.Sprintf("%s %s", method, route)] = true
+		return nil
+	}); err != nil {
+		t.Fatalf("walk routes: %v", err)
+	}
+
+	for _, want := range []string{
+		"GET /v1/models",
+		"GET /v1/models/{model_id}",
+		"POST /v1/chat/completions",
+		"POST /v1/responses",
+		"GET /v1/responses/{response_id}",
+		"POST /v1/files",
+		"POST /v1/embeddings",
+		"GET /anthropic/v1/models",
+		"POST /anthropic/v1/messages",
+		"POST /anthropic/v1/messages/count_tokens",
+		"POST /v1/messages",
+		"POST /messages",
+		"POST /v1/messages/count_tokens",
+		"POST /messages/count_tokens",
+		"POST /v1beta/models/{model}:generateContent",
+		"POST /v1beta/models/{model}:streamGenerateContent",
+		"POST /v1/models/{model}:generateContent",
+		"POST /v1/models/{model}:streamGenerateContent",
+		"POST /admin/login",
+		"GET /admin/verify",
+		"GET /admin/config",
+		"POST /admin/config",
+		"GET /admin/settings",
+		"PUT /admin/settings",
+		"POST /admin/settings/password",
+		"POST /admin/config/import",
+		"GET /admin/config/export",
+		"POST /admin/keys",
+		"PUT /admin/keys/{key}",
+		"DELETE /admin/keys/{key}",
+		"GET /admin/proxies",
+		"POST /admin/proxies",
+		"PUT /admin/proxies/{proxyID}",
+		"DELETE /admin/proxies/{proxyID}",
+		"POST /admin/proxies/test",
+		"GET /admin/accounts",
+		"POST /admin/accounts",
+		"PUT /admin/accounts/{identifier}",
+		"DELETE /admin/accounts/{identifier}",
+		"PUT /admin/accounts/{identifier}/proxy",
+		"GET /admin/queue/status",
+		"POST /admin/accounts/test",
+		"POST /admin/accounts/test-all",
+		"POST /admin/accounts/sessions/delete-all",
+		"POST /admin/import",
+		"POST /admin/test",
+		"POST /admin/dev/raw-samples/capture",
+		"GET /admin/dev/raw-samples/query",
+		"POST /admin/dev/raw-samples/save",
+		"POST /admin/vercel/sync",
+		"GET /admin/vercel/status",
+		"POST /admin/vercel/status",
+		"GET /admin/export",
+		"GET /admin/dev/captures",
+		"DELETE /admin/dev/captures",
+		"GET /admin/chat-history",
+		"GET /admin/chat-history/{id}",
+		"DELETE /admin/chat-history",
+		"DELETE /admin/chat-history/{id}",
+		"PUT /admin/chat-history/settings",
+		"GET /admin/version",
+	} {
+		if !got[want] {
+			t.Fatalf("expected route %s to be registered", want)
+		}
+	}
+}
diff --git a/internal/sse/consumer.go b/internal/sse/consumer.go
index 0af4746..1a9adf8 100644
--- a/internal/sse/consumer.go
+++ b/internal/sse/consumer.go
@@ -4,7 +4,7 @@ import (
 	"net/http"
 	"strings"
 
-	"ds2api/internal/deepseek"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 )
 
 // CollectResult holds the aggregated text and thinking content from a
@@ -35,7 +35,7 @@ func CollectStream(resp *http.Response, thinkingEnabled bool, closeBody bool) Co
 	if thinkingEnabled {
 		currentType = "thinking"
 	}
-	_ = deepseek.ScanSSELines(resp, func(line []byte) bool {
+	_ = dsprotocol.ScanSSELines(resp, func(line []byte) bool {
 		chunk, done, parsed := ParseDeepSeekSSELine(line)
 		if parsed && !done {
 			collector.ingestChunk(chunk)
diff --git a/internal/sse/consumer_edge_test.go b/internal/sse/consumer_edge_test.go
index 99679c5..4654ef8 100644
--- a/internal/sse/consumer_edge_test.go
+++ b/internal/sse/consumer_edge_test.go
@@ -56,6 +56,21 @@ func TestCollectStreamThinkingAndText(t *testing.T) {
 	}
 }
 
+func TestCollectStreamDropsThinkingWhenDisabled(t *testing.T) {
+	resp := makeHTTPResponse(
+		"data: {\"p\":\"response/thinking_content\",\"v\":\"Thinking...\"}\n" +
+			"data: {\"p\":\"response/content\",\"v\":\"Answer\"}\n" +
+			"data: [DONE]\n",
+	)
+	result := CollectStream(resp, false, true)
+	if result.Thinking != "" {
+		t.Fatalf("expected disabled thinking to be dropped, got %q", result.Thinking)
+	}
+	if result.Text != "Answer" {
+		t.Fatalf("expected only visible answer, got %q", result.Text)
+	}
+}
+
 func TestCollectStreamOnlyThinking(t *testing.T) {
 	resp := makeHTTPResponse(
 		"data: {\"p\":\"response/thinking_content\",\"v\":\"Only thinking\"}\n" +
diff --git a/internal/sse/parser.go b/internal/sse/parser.go
index 34813be..3057eda 100644
--- a/internal/sse/parser.go
+++ b/internal/sse/parser.go
@@ -6,7 +6,7 @@ import (
 	"regexp"
 	"strings"
 
-	"ds2api/internal/deepseek"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 )
 
 type ContentPart struct {
@@ -34,10 +34,10 @@ func shouldSkipPath(path string) bool {
 	if isFragmentStatusPath(path) {
 		return true
 	}
-	if _, ok := deepseek.SkipExactPathSet[path]; ok {
+	if _, ok := dsprotocol.SkipExactPathSet[path]; ok {
 		return true
 	}
-	for _, p := range deepseek.SkipContainsPatterns {
+	for _, p := range dsprotocol.SkipContainsPatterns {
 		if strings.Contains(path, p) {
 			return true
 		}
@@ -99,6 +99,10 @@ func ParseSSEChunkForContent(chunk map[string]any, thinkingEnabled bool, current
 	if transitioned {
 		newType = "text"
 	}
+	if !thinkingEnabled {
+		parts = dropThinkingParts(parts)
+		newType = "text"
+	}
 	return parts, false, newType
 }
 
@@ -172,6 +176,9 @@ func updateTypeFromNestedResponse(path string, v any, newType *string) {
 func resolvePartType(path string, thinkingEnabled bool, newType string) string {
 	switch {
 	case path == "response/thinking_content":
+		if !thinkingEnabled {
+			return "thinking"
+		}
 		if newType == "text" {
 			return "text"
 		}
@@ -187,6 +194,20 @@ func resolvePartType(path string, thinkingEnabled bool, newType string) string {
 	}
 }
 
+func dropThinkingParts(parts []ContentPart) []ContentPart {
+	if len(parts) == 0 {
+		return parts
+	}
+	out := parts[:0]
+	for _, p := range parts {
+		if p.Type == "thinking" {
+			continue
+		}
+		out = append(out, p)
+	}
+	return out
+}
+
 func appendChunkValueContent(v any, partType string, newType *string, parts *[]ContentPart, path string) bool {
 	switch val := v.(type) {
 	case string:
diff --git a/internal/testsuite/edge_cases.go b/internal/testsuite/edge_cases.go
index 1cdf72e..a2d5d19 100644
--- a/internal/testsuite/edge_cases.go
+++ b/internal/testsuite/edge_cases.go
@@ -47,7 +47,7 @@ func (r *Runner) caseConcurrencyThresholdLimit(ctx context.Context, cc *caseCont
 					"Authorization": "Bearer " + r.apiKey,
 				},
 				Body: map[string]any{
-					"model": "deepseek-chat",
+					"model": "deepseek-v4-flash",
 					"messages": []map[string]any{
 						{"role": "user", "content": fmt.Sprintf("并发边界测试 #%d，请输出不少于300字。", idx)},
 					},
@@ -92,7 +92,7 @@ func (r *Runner) caseStreamAbortRelease(ctx context.Context, cc *caseContext) er
 				"Authorization": "Bearer " + r.apiKey,
 			},
 			Body: map[string]any{
-				"model": "deepseek-chat",
+				"model": "deepseek-v4-flash",
 				"messages": []map[string]any{
 					{"role": "user", "content": fmt.Sprintf("中断释放测试 #%d，请流式回复", i)},
 				},
@@ -184,7 +184,7 @@ func (r *Runner) caseSSEJSONIntegrity(ctx context.Context, cc *caseContext) erro
 			"Authorization": "Bearer " + r.apiKey,
 		},
 		Body: map[string]any{
-			"model": "deepseek-chat",
+			"model": "deepseek-v4-flash",
 			"messages": []map[string]any{
 				{"role": "user", "content": "输出一句话"},
 			},
diff --git a/internal/testsuite/edge_cases_error_contract.go b/internal/testsuite/edge_cases_error_contract.go
index d65ce6d..f177155 100644
--- a/internal/testsuite/edge_cases_error_contract.go
+++ b/internal/testsuite/edge_cases_error_contract.go
@@ -43,7 +43,7 @@ func (r *Runner) caseMissingMessages(ctx context.Context, cc *caseContext) error
 			"Authorization": "Bearer " + r.apiKey,
 		},
 		Body: map[string]any{
-			"model":  "deepseek-chat",
+			"model":  "deepseek-v4-flash",
 			"stream": false,
 		},
 		Retryable: true,
@@ -125,7 +125,7 @@ func (r *Runner) caseTokenRefreshManagedAccount(ctx context.Context, cc *caseCon
 			"X-Ds2-Target-Account": id,
 		},
 		Body: map[string]any{
-			"model": "deepseek-chat",
+			"model": "deepseek-v4-flash",
 			"messages": []map[string]any{
 				{"role": "user", "content": "token refresh test"},
 			},
@@ -165,6 +165,12 @@ func (r *Runner) caseTokenRefreshManagedAccount(ctx context.Context, cc *caseCon
 		}
 	}
 	cc.assert("has_token_after_refresh", hasToken, fmt.Sprintf("config=%s", string(cfgResp.Body)))
-	cc.assert("token_preview_changed_from_invalid", !strings.HasPrefix(preview, invalidToken[:20]), fmt.Sprintf("preview=%s invalid_prefix=%s", preview, invalidToken[:20]))
+	maskedInvalid := invalidToken
+	if len(maskedInvalid) <= 4 {
+		maskedInvalid = strings.Repeat("*", len(maskedInvalid))
+	} else {
+		maskedInvalid = maskedInvalid[:2] + "****" + maskedInvalid[len(maskedInvalid)-2:]
+	}
+	cc.assert("token_preview_changed_from_invalid", preview != maskedInvalid, fmt.Sprintf("preview=%s invalid_mask=%s", preview, maskedInvalid))
 	return nil
 }
diff --git a/internal/testsuite/runner_cases_admin.go b/internal/testsuite/runner_cases_admin.go
index d66adea..a908575 100644
--- a/internal/testsuite/runner_cases_admin.go
+++ b/internal/testsuite/runner_cases_admin.go
@@ -80,7 +80,7 @@ func (r *Runner) caseAdminAccountTest(ctx context.Context, cc *caseContext) erro
 		},
 		Body: map[string]any{
 			"identifier": r.accountID,
-			"model":      "deepseek-chat",
+			"model":      "deepseek-v4-flash",
 			"message":    "ping",
 		},
 		Retryable: true,
diff --git a/internal/testsuite/runner_cases_openai.go b/internal/testsuite/runner_cases_openai.go
index 057a7ef..bd22971 100644
--- a/internal/testsuite/runner_cases_openai.go
+++ b/internal/testsuite/runner_cases_openai.go
@@ -51,12 +51,12 @@ func (r *Runner) caseModelsOpenAI(ctx context.Context, cc *caseContext) error {
 	}
 	cc.assert("status_200", resp.StatusCode == http.StatusOK, fmt.Sprintf("status=%d", resp.StatusCode))
 	ids := extractModelIDs(resp.Body)
-	cc.assert("has_deepseek_chat", contains(ids, "deepseek-chat"), strings.Join(ids, ","))
-	cc.assert("has_deepseek_reasoner", contains(ids, "deepseek-reasoner"), strings.Join(ids, ","))
-	cc.assert("has_deepseek_expert_chat", contains(ids, "deepseek-expert-chat"), strings.Join(ids, ","))
-	cc.assert("has_deepseek_expert_reasoner", contains(ids, "deepseek-expert-reasoner"), strings.Join(ids, ","))
-	cc.assert("has_deepseek_vision_chat", contains(ids, "deepseek-vision-chat"), strings.Join(ids, ","))
-	cc.assert("has_deepseek_vision_reasoner", contains(ids, "deepseek-vision-reasoner"), strings.Join(ids, ","))
+	cc.assert("has_deepseek_chat", contains(ids, "deepseek-v4-flash"), strings.Join(ids, ","))
+	cc.assert("has_deepseek_reasoner", contains(ids, "deepseek-v4-pro"), strings.Join(ids, ","))
+	cc.assert("has_deepseek_expert_chat", contains(ids, "deepseek-v4-pro"), strings.Join(ids, ","))
+	cc.assert("has_deepseek_expert_reasoner", contains(ids, "deepseek-v4-pro"), strings.Join(ids, ","))
+	cc.assert("has_deepseek_vision_chat", contains(ids, "deepseek-v4-vision"), strings.Join(ids, ","))
+	cc.assert("has_deepseek_vision_reasoner", contains(ids, "deepseek-v4-vision"), strings.Join(ids, ","))
 	return nil
 }
 
@@ -69,7 +69,7 @@ func (r *Runner) caseModelOpenAIByID(ctx context.Context, cc *caseContext) error
 	var m map[string]any
 	_ = json.Unmarshal(resp.Body, &m)
 	cc.assert("object_model", asString(m["object"]) == "model", fmt.Sprintf("body=%s", string(resp.Body)))
-	cc.assert("id_deepseek_chat", asString(m["id"]) == "deepseek-chat", fmt.Sprintf("body=%s", string(resp.Body)))
+	cc.assert("id_deepseek_chat", asString(m["id"]) == "deepseek-v4-flash", fmt.Sprintf("body=%s", string(resp.Body)))
 	return nil
 }
 func (r *Runner) caseChatNonstream(ctx context.Context, cc *caseContext) error {
@@ -80,7 +80,7 @@ func (r *Runner) caseChatNonstream(ctx context.Context, cc *caseContext) error {
 			"Authorization": "Bearer " + r.apiKey,
 		},
 		Body: map[string]any{
-			"model": "deepseek-chat",
+			"model": "deepseek-v4-flash",
 			"messages": []map[string]any{
 				{"role": "user", "content": "请简单回复一句话"},
 			},
@@ -108,7 +108,7 @@ func (r *Runner) caseChatStream(ctx context.Context, cc *caseContext) error {
 			"Authorization": "Bearer " + r.apiKey,
 		},
 		Body: map[string]any{
-			"model": "deepseek-chat",
+			"model": "deepseek-v4-flash",
 			"messages": []map[string]any{
 				{"role": "user", "content": "请流式回复一句话"},
 			},
diff --git a/internal/testsuite/runner_cases_openai_advanced.go b/internal/testsuite/runner_cases_openai_advanced.go
index 34e9f01..f0ec3cf 100644
--- a/internal/testsuite/runner_cases_openai_advanced.go
+++ b/internal/testsuite/runner_cases_openai_advanced.go
@@ -17,7 +17,7 @@ func (r *Runner) caseReasonerStream(ctx context.Context, cc *caseContext) error
 			"Authorization": "Bearer " + r.apiKey,
 		},
 		Body: map[string]any{
-			"model": "deepseek-reasoner",
+			"model": "deepseek-v4-pro",
 			"messages": []map[string]any{
 				{"role": "user", "content": "先思考后回答：1+1"},
 			},
@@ -137,7 +137,7 @@ func (r *Runner) caseConcurrencyBurst(ctx context.Context, cc *caseContext) erro
 					"Authorization": "Bearer " + r.apiKey,
 				},
 				Body: map[string]any{
-					"model": "deepseek-chat",
+					"model": "deepseek-v4-flash",
 					"messages": []map[string]any{
 						{"role": "user", "content": fmt.Sprintf("并发请求 #%d，请回复ok", idx)},
 					},
@@ -184,7 +184,7 @@ func (r *Runner) caseInvalidKey(ctx context.Context, cc *caseContext) error {
 			"Authorization": "Bearer invalid-testsuite-key-" + sanitizeID(r.runID),
 		},
 		Body: map[string]any{
-			"model": "deepseek-chat",
+			"model": "deepseek-v4-flash",
 			"messages": []map[string]any{
 				{"role": "user", "content": "hi"},
 			},
@@ -206,7 +206,7 @@ func (r *Runner) caseInvalidKey(ctx context.Context, cc *caseContext) error {
 
 func toolcallPayload(stream bool) map[string]any {
 	return map[string]any{
-		"model": "deepseek-chat",
+		"model": "deepseek-v4-flash",
 		"messages": []map[string]any{
 			{
 				"role":    "user",
diff --git a/internal/toolcall/regression_test.go b/internal/toolcall/regression_test.go
index d268374..7615fa3 100644
--- a/internal/toolcall/regression_test.go
+++ b/internal/toolcall/regression_test.go
@@ -13,18 +13,18 @@ func TestRegression_RobustXMLAndCDATA(t *testing.T) {
 	}{
 		{
 			name:     "Standard JSON parameters (Regression)",
-			text:     `<tool_call><tool_name>foo</tool_name><parameters>{"a": 1}</parameters></tool_call>`,
-			expected: []ParsedToolCall{{Name: "foo", Input: map[string]any{"a": float64(1)}}},
+			text:     `<tool_calls><invoke name="foo"><parameter name="a">1</parameter></invoke></tool_calls>`,
+			expected: []ParsedToolCall{{Name: "foo", Input: map[string]any{"a": "1"}}},
 		},
 		{
 			name:     "XML tags parameters (Regression)",
-			text:     `<tool_call><tool_name>foo</tool_name><parameters><arg1>hello</arg1></parameters></tool_call>`,
+			text:     `<tool_calls><invoke name="foo"><parameter name="arg1">hello</parameter></invoke></tool_calls>`,
 			expected: []ParsedToolCall{{Name: "foo", Input: map[string]any{"arg1": "hello"}}},
 		},
 		{
 			name: "CDATA parameters (New Feature)",
-			text: `<tool_call><tool_name>write_file</tool_name><parameters><content><![CDATA[line 1
-line 2 with <tags> and & symbols]]></content></parameters></tool_call>`,
+			text: `<tool_calls><invoke name="write_file"><parameter name="content"><![CDATA[line 1
+line 2 with <tags> and & symbols]]></parameter></invoke></tool_calls>`,
 			expected: []ParsedToolCall{{
 				Name:  "write_file",
 				Input: map[string]any{"content": "line 1\nline 2 with <tags> and & symbols"},
@@ -32,9 +32,9 @@ line 2 with <tags> and & symbols]]></content></parameters></tool_call>`,
 		},
 		{
 			name: "Nested XML with repeated parameters (New Feature)",
-			text: `<tool_call><tool_name>write_file</tool_name><parameters><path>script.sh</path><content><![CDATA[#!/bin/bash
+			text: `<tool_calls><invoke name="write_file"><parameter name="path">script.sh</parameter><parameter name="content"><![CDATA[#!/bin/bash
 echo "hello"
-]]></content><item>first</item><item>second</item></parameters></tool_call>`,
+]]></parameter><parameter name="item">first</parameter><parameter name="item">second</parameter></invoke></tool_calls>`,
 			expected: []ParsedToolCall{{
 				Name: "write_file",
 				Input: map[string]any{
@@ -46,7 +46,7 @@ echo "hello"
 		},
 		{
 			name: "Dirty XML with unescaped symbols (Robustness Improvement)",
-			text: `<tool_call><tool_name>bash</tool_name><parameters><command>echo "hello" > out.txt && cat out.txt</command></parameters></tool_call>`,
+			text: `<tool_calls><invoke name="bash"><parameter name="command">echo "hello" > out.txt && cat out.txt</parameter></invoke></tool_calls>`,
 			expected: []ParsedToolCall{{
 				Name:  "bash",
 				Input: map[string]any{"command": "echo \"hello\" > out.txt && cat out.txt"},
@@ -54,7 +54,7 @@ echo "hello"
 		},
 		{
 			name: "Mixed JSON inside CDATA (New Hybrid Case)",
-			text: `<tool_call><tool_name>foo</tool_name><parameters><![CDATA[{"json_param": "works"}]]></parameters></tool_call>`,
+			text: `<tool_calls><invoke name="foo"><parameter name="json_param"><![CDATA[works]]></parameter></invoke></tool_calls>`,
 			expected: []ParsedToolCall{{
 				Name:  "foo",
 				Input: map[string]any{"json_param": "works"},
diff --git a/internal/toolcall/tool_prompt.go b/internal/toolcall/tool_prompt.go
index 8e896e9..7f405d2 100644
--- a/internal/toolcall/tool_prompt.go
+++ b/internal/toolcall/tool_prompt.go
@@ -9,164 +9,227 @@ import "strings"
 // The toolNames slice should contain the actual tool names available in the
 // current request; the function picks real names for examples.
 func BuildToolCallInstructions(toolNames []string) string {
-	// Pick real tool names for examples; fall back to generic names.
-	ex1 := "read_file"
-	ex2 := "write_to_file"
-	ex3 := "ask_followup_question"
-	used := map[string]bool{}
-	for _, n := range toolNames {
-		switch {
-		// Read/query-type tools
-		case !used["ex1"] && matchAny(n, "read_file", "list_files", "search_files", "Read", "Glob"):
-			ex1 = n
-			used["ex1"] = true
-		// Write/execute-type tools
-		case !used["ex2"] && matchAny(n, "write_to_file", "apply_diff", "execute_command", "exec_command", "Write", "Edit", "MultiEdit", "Bash"):
-			ex2 = n
-			used["ex2"] = true
-		// Interactive/meta tools
-		case !used["ex3"] && matchAny(n, "ask_followup_question", "attempt_completion", "update_todo_list", "Task"):
-			ex3 = n
-			used["ex3"] = true
-		}
-	}
-	ex1Params := exampleReadParams(ex1)
-	ex2Params := exampleWriteOrExecParams(ex2)
-	ex3Params := exampleInteractiveParams(ex3)
-
 	return `TOOL CALL FORMAT — FOLLOW EXACTLY:
 
 <tool_calls>
-  <tool_call>
-    <tool_name>TOOL_NAME_HERE</tool_name>
-    <parameters>
-      <PARAMETER_NAME><![CDATA[PARAMETER_VALUE]]></PARAMETER_NAME>
-    </parameters>
-  </tool_call>
+  <invoke name="TOOL_NAME_HERE">
+    <parameter name="PARAMETER_NAME"><![CDATA[PARAMETER_VALUE]]></parameter>
+  </invoke>
 </tool_calls>
 
 RULES:
-1) Use the <tool_calls> XML format only. Never emit JSON or function-call syntax.
-2) Put one or more <tool_call> entries under a single <tool_calls> root.
-3) Parameters must be XML, not JSON.
+1) Use the <tool_calls> XML wrapper format only.
+2) Put one or more <invoke> entries under a single <tool_calls> root.
+3) Put the tool name in the invoke name attribute: <invoke name="TOOL_NAME">.
 4) All string values must use <![CDATA[...]]>, even short ones. This includes code, scripts, file contents, prompts, paths, names, and queries.
-5) Objects use nested XML elements. Arrays may repeat the same tag or use <item> children.
-6) Numbers, booleans, and null stay plain text.
-7) Use only the parameter names in the tool schema. Do not invent fields.
-8) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.
+5) Every top-level argument must be a <parameter name="ARG_NAME">...</parameter> node.
+6) Objects use nested XML elements inside the parameter body. Arrays may repeat <item> children.
+7) Numbers, booleans, and null stay plain text.
+8) Use only the parameter names in the tool schema. Do not invent fields.
+9) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.
 
 PARAMETER SHAPES:
-- string => <name><![CDATA[value]]></name>
-- object => nested XML elements
-- array => repeated tags or <item> children
-- number/bool/null => plain text
+- string => <parameter name="x"><![CDATA[value]]></parameter>
+- object => <parameter name="x"><field>...</field></parameter>
+- array => <parameter name="x"><item>...</item><item>...</item></parameter>
+- number/bool/null => <parameter name="x">plain_text</parameter>
 
 【WRONG — Do NOT do these】:
 
 Wrong 1 — mixed text after XML:
   <tool_calls>...</tool_calls> I hope this helps.
-Wrong 2 — function-call syntax:
-  Grep({"pattern": "token"})
-Wrong 3 — JSON parameters:
-  <tool_call><tool_name>` + ex1 + `</tool_name><parameters>{"path":"x"}</parameters></tool_call>
-Wrong 4 — Markdown code fences:
+Wrong 2 — Markdown code fences:
   ` + "```xml" + `
   <tool_calls>...</tool_calls>
   ` + "```" + `
 
-Remember: The ONLY valid way to use tools is the <tool_calls> XML block at the end of your response.
+Remember: The ONLY valid way to use tools is the <tool_calls>...</tool_calls> XML block at the end of your response.
 
-【CORRECT EXAMPLES】:
-
-Example A — Single tool:
-<tool_calls>
-  <tool_call>
-    <tool_name>` + ex1 + `</tool_name>
-    <parameters>` + ex1Params + `</parameters>
-  </tool_call>
-</tool_calls>
-
-Example B — Two tools in parallel:
-<tool_calls>
-  <tool_call>
-    <tool_name>` + ex1 + `</tool_name>
-    <parameters>` + ex1Params + `</parameters>
-  </tool_call>
-  <tool_call>
-    <tool_name>` + ex2 + `</tool_name>
-    <parameters>` + ex2Params + `</parameters>
-  </tool_call>
-</tool_calls>
-
-Example C — Tool with nested XML parameters:
-<tool_calls>
-  <tool_call>
-    <tool_name>` + ex3 + `</tool_name>
-    <parameters>` + ex3Params + `</parameters>
-  </tool_call>
-</tool_calls>
- 
-Example D — Tool with long script using CDATA (RELIABLE FOR CODE/SCRIPTS):
-<tool_calls>
-  <tool_call>
-    <tool_name>` + ex2 + `</tool_name>
-    <parameters>
-      <path>` + promptCDATA("script.sh") + `</path>
-      <content><![CDATA[
-#!/bin/bash
-if [ "$1" == "test" ]; then
-  echo "Success!"
-fi
-]]></content>
-    </parameters>
-  </tool_call>
-</tool_calls>
-
-`
+` + buildCorrectToolExamples(toolNames)
 }
 
-func matchAny(name string, candidates ...string) bool {
-	for _, c := range candidates {
-		if name == c {
-			return true
+type promptToolExample struct {
+	name   string
+	params string
+}
+
+func buildCorrectToolExamples(toolNames []string) string {
+	names := uniqueToolNames(toolNames)
+	examples := make([]string, 0, 4)
+
+	if single, ok := firstBasicExample(names); ok {
+		examples = append(examples, "Example A — Single tool:\n"+renderToolExampleBlock([]promptToolExample{single}))
+	}
+
+	if parallel := firstNBasicExamples(names, 2); len(parallel) >= 2 {
+		examples = append(examples, "Example B — Two tools in parallel:\n"+renderToolExampleBlock(parallel))
+	}
+
+	if nested, ok := firstNestedExample(names); ok {
+		examples = append(examples, "Example C — Tool with nested XML parameters:\n"+renderToolExampleBlock([]promptToolExample{nested}))
+	}
+
+	if script, ok := firstScriptExample(names); ok {
+		examples = append(examples, "Example D — Tool with long script using CDATA (RELIABLE FOR CODE/SCRIPTS):\n"+renderToolExampleBlock([]promptToolExample{script}))
+	}
+
+	if len(examples) == 0 {
+		return ""
+	}
+	return "【CORRECT EXAMPLES】:\n\n" + strings.Join(examples, "\n\n") + "\n\n"
+}
+
+func uniqueToolNames(toolNames []string) []string {
+	names := make([]string, 0, len(toolNames))
+	seen := map[string]bool{}
+	for _, name := range toolNames {
+		name = strings.TrimSpace(name)
+		if name == "" || seen[name] {
+			continue
+		}
+		seen[name] = true
+		names = append(names, name)
+	}
+	return names
+}
+
+func firstBasicExample(names []string) (promptToolExample, bool) {
+	for _, name := range names {
+		if params, ok := exampleBasicParams(name); ok {
+			return promptToolExample{name: name, params: params}, true
 		}
 	}
-	return false
+	return promptToolExample{}, false
 }
 
-func exampleReadParams(name string) string {
+func firstNBasicExamples(names []string, count int) []promptToolExample {
+	out := make([]promptToolExample, 0, count)
+	for _, name := range names {
+		if params, ok := exampleBasicParams(name); ok {
+			out = append(out, promptToolExample{name: name, params: params})
+			if len(out) == count {
+				return out
+			}
+		}
+	}
+	return out
+}
+
+func firstNestedExample(names []string) (promptToolExample, bool) {
+	for _, name := range names {
+		if params, ok := exampleNestedParams(name); ok {
+			return promptToolExample{name: name, params: params}, true
+		}
+	}
+	return promptToolExample{}, false
+}
+
+func firstScriptExample(names []string) (promptToolExample, bool) {
+	for _, name := range names {
+		if params, ok := exampleScriptParams(name); ok {
+			return promptToolExample{name: name, params: params}, true
+		}
+	}
+	return promptToolExample{}, false
+}
+
+func renderToolExampleBlock(calls []promptToolExample) string {
+	var b strings.Builder
+	b.WriteString("<tool_calls>\n")
+	for _, call := range calls {
+		b.WriteString(`  <invoke name="`)
+		b.WriteString(call.name)
+		b.WriteString("\">\n")
+		b.WriteString(indentPromptParameters(call.params, "    "))
+		b.WriteString("\n  </invoke>\n")
+	}
+	b.WriteString("</tool_calls>")
+	return b.String()
+}
+
+func indentPromptParameters(body, indent string) string {
+	if strings.TrimSpace(body) == "" {
+		return indent + `<parameter name="content"></parameter>`
+	}
+	lines := strings.Split(body, "\n")
+	for i, line := range lines {
+		if strings.TrimSpace(line) == "" {
+			lines[i] = line
+			continue
+		}
+		lines[i] = indent + line
+	}
+	return strings.Join(lines, "\n")
+}
+
+func wrapParameter(name, inner string) string {
+	return `<parameter name="` + name + `">` + inner + `</parameter>`
+}
+
+func exampleBasicParams(name string) (string, bool) {
 	switch strings.TrimSpace(name) {
 	case "Read":
-		return `<file_path>` + promptCDATA("README.md") + `</file_path>`
+		return wrapParameter("file_path", promptCDATA("README.md")), true
 	case "Glob":
-		return `<pattern>` + promptCDATA("**/*.go") + `</pattern><path>` + promptCDATA(".") + `</path>`
-	default:
-		return `<path>` + promptCDATA("src/main.go") + `</path>`
-	}
-}
-
-func exampleWriteOrExecParams(name string) string {
-	switch strings.TrimSpace(name) {
+		return wrapParameter("pattern", promptCDATA("**/*.go")) + "\n" + wrapParameter("path", promptCDATA(".")), true
+	case "read_file":
+		return wrapParameter("path", promptCDATA("src/main.go")), true
+	case "list_files":
+		return wrapParameter("path", promptCDATA(".")), true
+	case "search_files":
+		return wrapParameter("query", promptCDATA("tool call parser")), true
 	case "Bash", "execute_command":
-		return `<command>` + promptCDATA("pwd") + `</command>`
+		return wrapParameter("command", promptCDATA("pwd")), true
 	case "exec_command":
-		return `<cmd>` + promptCDATA("pwd") + `</cmd>`
+		return wrapParameter("cmd", promptCDATA("pwd")), true
+	case "Write":
+		return wrapParameter("file_path", promptCDATA("notes.txt")) + "\n" + wrapParameter("content", promptCDATA("Hello world")), true
+	case "write_to_file":
+		return wrapParameter("path", promptCDATA("notes.txt")) + "\n" + wrapParameter("content", promptCDATA("Hello world")), true
 	case "Edit":
-		return `<file_path>` + promptCDATA("README.md") + `</file_path><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string>`
+		return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + wrapParameter("old_string", promptCDATA("foo")) + "\n" + wrapParameter("new_string", promptCDATA("bar")), true
 	case "MultiEdit":
-		return `<file_path>` + promptCDATA("README.md") + `</file_path><edits><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></edits>`
-	default:
-		return `<path>` + promptCDATA("output.txt") + `</path><content>` + promptCDATA("Hello world") + `</content>`
+		return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></parameter>`, true
 	}
+	return "", false
 }
 
-func exampleInteractiveParams(name string) string {
+func exampleNestedParams(name string) (string, bool) {
 	switch strings.TrimSpace(name) {
+	case "MultiEdit":
+		return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></parameter>`, true
 	case "Task":
-		return `<description>` + promptCDATA("Investigate flaky tests") + `</description><prompt>` + promptCDATA("Run targeted tests and summarize failures") + `</prompt>`
-	default:
-		return `<question>` + promptCDATA("Which approach do you prefer?") + `</question><follow_up><text>` + promptCDATA("Option A") + `</text></follow_up><follow_up><text>` + promptCDATA("Option B") + `</text></follow_up>`
+		return wrapParameter("description", promptCDATA("Investigate flaky tests")) + "\n" + wrapParameter("prompt", promptCDATA("Run targeted tests and summarize failures")), true
+	case "ask_followup_question":
+		return wrapParameter("question", promptCDATA("Which approach do you prefer?")) + "\n" + `<parameter name="follow_up"><item><text>` + promptCDATA("Option A") + `</text></item><item><text>` + promptCDATA("Option B") + `</text></item></parameter>`, true
 	}
+	return "", false
+}
+
+func exampleScriptParams(name string) (string, bool) {
+	scriptCommand := `cat > /tmp/test_escape.sh <<'EOF'
+#!/bin/bash
+echo 'single "double"'
+echo "literal dollar: \$HOME"
+EOF
+bash /tmp/test_escape.sh`
+	scriptContent := `#!/bin/bash
+echo 'single "double"'
+echo "literal dollar: $HOME"`
+
+	switch strings.TrimSpace(name) {
+	case "Bash":
+		return wrapParameter("command", promptCDATA(scriptCommand)) + "\n" + wrapParameter("description", promptCDATA("Test shell escaping")), true
+	case "execute_command":
+		return wrapParameter("command", promptCDATA(scriptCommand)), true
+	case "exec_command":
+		return wrapParameter("cmd", promptCDATA(scriptCommand)), true
+	case "Write":
+		return wrapParameter("file_path", promptCDATA("test_escape.sh")) + "\n" + wrapParameter("content", promptCDATA(scriptContent)), true
+	case "write_to_file":
+		return wrapParameter("path", promptCDATA("test_escape.sh")) + "\n" + wrapParameter("content", promptCDATA(scriptContent)), true
+	}
+	return "", false
 }
 
 func promptCDATA(text string) string {
diff --git a/internal/toolcall/tool_prompt_test.go b/internal/toolcall/tool_prompt_test.go
index 67aeb27..8b0e8cf 100644
--- a/internal/toolcall/tool_prompt_test.go
+++ b/internal/toolcall/tool_prompt_test.go
@@ -7,20 +7,124 @@ import (
 
 func TestBuildToolCallInstructions_ExecCommandUsesCmdExample(t *testing.T) {
 	out := BuildToolCallInstructions([]string{"exec_command"})
-	if !strings.Contains(out, `<tool_name>exec_command</tool_name>`) {
+	if !strings.Contains(out, `<invoke name="exec_command">`) {
 		t.Fatalf("expected exec_command in examples, got: %s", out)
 	}
-	if !strings.Contains(out, `<parameters><cmd><![CDATA[pwd]]></cmd></parameters>`) {
+	if !strings.Contains(out, `<parameter name="cmd"><![CDATA[pwd]]></parameter>`) {
 		t.Fatalf("expected cmd parameter example for exec_command, got: %s", out)
 	}
 }
 
 func TestBuildToolCallInstructions_ExecuteCommandUsesCommandExample(t *testing.T) {
 	out := BuildToolCallInstructions([]string{"execute_command"})
-	if !strings.Contains(out, `<tool_name>execute_command</tool_name>`) {
+	if !strings.Contains(out, `<invoke name="execute_command">`) {
 		t.Fatalf("expected execute_command in examples, got: %s", out)
 	}
-	if !strings.Contains(out, `<parameters><command><![CDATA[pwd]]></command></parameters>`) {
+	if !strings.Contains(out, `<parameter name="command"><![CDATA[pwd]]></parameter>`) {
 		t.Fatalf("expected command parameter example for execute_command, got: %s", out)
 	}
 }
+
+func TestBuildToolCallInstructions_BashUsesCommandAndDescriptionExamples(t *testing.T) {
+	out := BuildToolCallInstructions([]string{"Bash"})
+	blocks := findInvokeBlocks(out, "Bash")
+	if len(blocks) == 0 {
+		t.Fatalf("expected Bash examples, got: %s", out)
+	}
+
+	sawDescription := false
+	for _, block := range blocks {
+		if !strings.Contains(block, `<parameter name="command">`) {
+			t.Fatalf("expected every Bash example to use command parameter, got: %s", block)
+		}
+		if strings.Contains(block, `<parameter name="path">`) || strings.Contains(block, `<parameter name="content">`) {
+			t.Fatalf("expected Bash examples not to use file write parameters, got: %s", block)
+		}
+		if strings.Contains(block, `<parameter name="description">`) {
+			sawDescription = true
+		}
+	}
+	if !sawDescription {
+		t.Fatalf("expected Bash long-script example to include description, got: %s", out)
+	}
+	if strings.Contains(out, `<invoke name="Read">`) {
+		t.Fatalf("expected examples to avoid unavailable hard-coded Read tool, got: %s", out)
+	}
+}
+
+func TestBuildToolCallInstructions_ExecuteCommandLongScriptUsesCommand(t *testing.T) {
+	out := BuildToolCallInstructions([]string{"execute_command"})
+	blocks := findInvokeBlocks(out, "execute_command")
+	if len(blocks) == 0 {
+		t.Fatalf("expected execute_command examples, got: %s", out)
+	}
+
+	for _, block := range blocks {
+		if !strings.Contains(block, `<parameter name="command">`) {
+			t.Fatalf("expected execute_command examples to use command parameter, got: %s", block)
+		}
+		if strings.Contains(block, `<parameter name="path">`) || strings.Contains(block, `<parameter name="content">`) {
+			t.Fatalf("expected execute_command examples not to use file write parameters, got: %s", block)
+		}
+	}
+	if !strings.Contains(out, `test_escape.sh`) {
+		t.Fatalf("expected execute_command long-script example, got: %s", out)
+	}
+}
+
+func TestBuildToolCallInstructions_ExecCommandLongScriptUsesCmd(t *testing.T) {
+	out := BuildToolCallInstructions([]string{"exec_command"})
+	blocks := findInvokeBlocks(out, "exec_command")
+	if len(blocks) == 0 {
+		t.Fatalf("expected exec_command examples, got: %s", out)
+	}
+
+	for _, block := range blocks {
+		if !strings.Contains(block, `<parameter name="cmd">`) {
+			t.Fatalf("expected exec_command examples to use cmd parameter, got: %s", block)
+		}
+		if strings.Contains(block, `<parameter name="command">`) || strings.Contains(block, `<parameter name="path">`) || strings.Contains(block, `<parameter name="content">`) {
+			t.Fatalf("expected exec_command examples not to use command or file write parameters, got: %s", block)
+		}
+	}
+	if !strings.Contains(out, `test_escape.sh`) {
+		t.Fatalf("expected exec_command long-script example, got: %s", out)
+	}
+}
+
+func TestBuildToolCallInstructions_WriteUsesFilePathAndContent(t *testing.T) {
+	out := BuildToolCallInstructions([]string{"Write"})
+	blocks := findInvokeBlocks(out, "Write")
+	if len(blocks) == 0 {
+		t.Fatalf("expected Write examples, got: %s", out)
+	}
+
+	for _, block := range blocks {
+		if !strings.Contains(block, `<parameter name="file_path">`) || !strings.Contains(block, `<parameter name="content">`) {
+			t.Fatalf("expected Write examples to use file_path and content, got: %s", block)
+		}
+		if strings.Contains(block, `<parameter name="path">`) {
+			t.Fatalf("expected Write examples not to use path, got: %s", block)
+		}
+	}
+}
+
+func findInvokeBlocks(text, name string) []string {
+	open := `<invoke name="` + name + `">`
+	remaining := text
+	blocks := []string{}
+	for {
+		start := strings.Index(remaining, open)
+		if start < 0 {
+			return blocks
+		}
+		remaining = remaining[start:]
+		end := strings.Index(remaining, `</invoke>`)
+		if end < 0 {
+			return blocks
+		}
+		end += len(`</invoke>`)
+		blocks = append(blocks, remaining[:end])
+		remaining = remaining[end:]
+	}
+}
diff --git a/internal/toolcall/toolcalls_input_parse.go b/internal/toolcall/toolcalls_input_parse.go
index b987e64..4b7ef7e 100644
--- a/internal/toolcall/toolcalls_input_parse.go
+++ b/internal/toolcall/toolcalls_input_parse.go
@@ -2,6 +2,7 @@ package toolcall
 
 import (
 	"encoding/json"
+	"html"
 	"strings"
 	"unicode"
 )
@@ -13,7 +14,7 @@ func parseToolCallInput(v any) map[string]any {
 	case map[string]any:
 		return x
 	case string:
-		raw := strings.TrimSpace(x)
+		raw := strings.TrimSpace(html.UnescapeString(x))
 		if raw == "" {
 			return map[string]any{}
 		}
diff --git a/internal/toolcall/toolcalls_markup.go b/internal/toolcall/toolcalls_markup.go
index 94420dc..3d8e657 100644
--- a/internal/toolcall/toolcalls_markup.go
+++ b/internal/toolcall/toolcalls_markup.go
@@ -7,120 +7,10 @@ import (
 	"strings"
 )
 
-var toolCallMarkupTagNames = []string{"tool_call", "function_call", "invoke"}
-var toolCallMarkupTagPatternByName = map[string]*regexp.Regexp{
-	"tool_call":     regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?tool_call\b([^>]*)>(.*?)</(?:[a-z0-9_:-]+:)?tool_call>`),
-	"function_call": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?function_call\b([^>]*)>(.*?)</(?:[a-z0-9_:-]+:)?function_call>`),
-	"invoke":        regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?invoke\b([^>]*)>(.*?)</(?:[a-z0-9_:-]+:)?invoke>`),
-}
-var toolCallMarkupSelfClosingPattern = regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?invoke\b([^>]*)/>`)
 var toolCallMarkupKVPattern = regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?([a-z0-9_\-.]+)\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?([a-z0-9_\-.]+)>`)
-var toolCallMarkupAttrPattern = regexp.MustCompile(`(?is)(name|function|tool)\s*=\s*"([^"]+)"`)
-var anyTagPattern = regexp.MustCompile(`(?is)<[^>]+>`)
-var toolCallMarkupNameTagNames = []string{"name", "function"}
-var toolCallMarkupNamePatternByTag = map[string]*regexp.Regexp{
-	"name":     regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?name\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?name>`),
-	"function": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?function\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?function>`),
-}
 
 // cdataPattern matches a standalone CDATA section.
 var cdataPattern = regexp.MustCompile(`(?is)^<!\[CDATA\[(.*?)]]>$`)
-var toolCallMarkupArgsTagNames = []string{"input", "arguments", "argument", "parameters", "parameter", "args", "params"}
-var toolCallMarkupArgsPatternByTag = map[string]*regexp.Regexp{
-	"input":      regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?input\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?input>`),
-	"arguments":  regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?arguments\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?arguments>`),
-	"argument":   regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?argument\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?argument>`),
-	"parameters": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?parameters\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?parameters>`),
-	"parameter":  regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?parameter\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?parameter>`),
-	"args":       regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?args\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?args>`),
-	"params":     regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?params\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?params>`),
-}
-
-func parseMarkupToolCalls(text string) []ParsedToolCall {
-	trimmed := strings.TrimSpace(text)
-	if trimmed == "" {
-		return nil
-	}
-
-	out := make([]ParsedToolCall, 0)
-	for _, tagName := range toolCallMarkupTagNames {
-		pattern := toolCallMarkupTagPatternByName[tagName]
-		for _, m := range pattern.FindAllStringSubmatch(trimmed, -1) {
-			if len(m) < 3 {
-				continue
-			}
-			attrs := strings.TrimSpace(m[1])
-			inner := strings.TrimSpace(m[2])
-			if parsed := parseMarkupSingleToolCall(attrs, inner); parsed.Name != "" {
-				out = append(out, parsed)
-			}
-		}
-	}
-	for _, m := range toolCallMarkupSelfClosingPattern.FindAllStringSubmatch(trimmed, -1) {
-		if len(m) < 2 {
-			continue
-		}
-		if parsed := parseMarkupSingleToolCall(strings.TrimSpace(m[1]), ""); parsed.Name != "" {
-			out = append(out, parsed)
-		}
-	}
-	if len(out) == 0 {
-		return nil
-	}
-	return out
-}
-
-func parseMarkupSingleToolCall(attrs string, inner string) ParsedToolCall {
-	// Try parsing inner content as a JSON tool call object.
-	if raw := strings.TrimSpace(inner); raw != "" && strings.HasPrefix(raw, "{") {
-		var obj map[string]any
-		if err := json.Unmarshal([]byte(raw), &obj); err == nil {
-			name, _ := obj["name"].(string)
-			if name == "" {
-				if fn, ok := obj["function"].(map[string]any); ok {
-					name, _ = fn["name"].(string)
-				}
-			}
-			if name == "" {
-				if fc, ok := obj["functionCall"].(map[string]any); ok {
-					name, _ = fc["name"].(string)
-				}
-			}
-			if strings.TrimSpace(name) != "" {
-				input := parseToolCallInput(obj["input"])
-				if len(input) == 0 {
-					if args, ok := obj["arguments"]; ok {
-						input = parseToolCallInput(args)
-					}
-				}
-				return ParsedToolCall{Name: strings.TrimSpace(name), Input: input}
-			}
-		}
-	}
-
-	name := ""
-	if m := toolCallMarkupAttrPattern.FindStringSubmatch(attrs); len(m) >= 3 {
-		name = strings.TrimSpace(m[2])
-	}
-	if name == "" {
-		name = findMarkupTagValue(inner, toolCallMarkupNameTagNames, toolCallMarkupNamePatternByTag)
-	}
-	if name == "" {
-		return ParsedToolCall{}
-	}
-
-	input := map[string]any{}
-	if argsRaw := findMarkupTagValue(inner, toolCallMarkupArgsTagNames, toolCallMarkupArgsPatternByTag); argsRaw != "" {
-		input = parseMarkupInput(argsRaw)
-	} else if kv := parseMarkupKVObject(inner); len(kv) > 0 {
-		input = kv
-	}
-	return ParsedToolCall{Name: name, Input: input}
-}
-
-func parseMarkupInput(raw string) map[string]any {
-	return parseStructuredToolCallInput(raw)
-}
 
 func parseMarkupKVObject(text string) map[string]any {
 	matches := toolCallMarkupKVPattern.FindAllStringSubmatch(strings.TrimSpace(text), -1)
@@ -212,23 +102,3 @@ func extractRawTagValue(inner string) string {
 	// but for KV objects we usually want the value.
 	return html.UnescapeString(inner)
 }
-
-func stripTagText(text string) string {
-	return strings.TrimSpace(anyTagPattern.ReplaceAllString(text, ""))
-}
-
-func findMarkupTagValue(text string, tagNames []string, patternByTag map[string]*regexp.Regexp) string {
-	for _, tag := range tagNames {
-		pattern := patternByTag[tag]
-		if pattern == nil {
-			continue
-		}
-		if m := pattern.FindStringSubmatch(text); len(m) >= 2 {
-			value := extractRawTagValue(m[1])
-			if value != "" {
-				return value
-			}
-		}
-	}
-	return ""
-}
diff --git a/internal/toolcall/toolcalls_parse.go b/internal/toolcall/toolcalls_parse.go
index bc61124..16743ac 100644
--- a/internal/toolcall/toolcalls_parse.go
+++ b/internal/toolcall/toolcalls_parse.go
@@ -46,9 +46,6 @@ func parseToolCallsDetailedXMLOnly(text string) ToolCallParseResult {
 	}
 
 	parsed := parseXMLToolCalls(trimmed)
-	if len(parsed) == 0 {
-		parsed = parseMarkupToolCalls(trimmed)
-	}
 	if len(parsed) == 0 {
 		return result
 	}
@@ -77,16 +74,7 @@ func filterToolCallsDetailed(parsed []ParsedToolCall) ([]ParsedToolCall, []strin
 
 func looksLikeToolCallSyntax(text string) bool {
 	lower := strings.ToLower(text)
-	return strings.Contains(lower, "<tool_calls") ||
-		strings.Contains(lower, "<tool_call") ||
-		strings.Contains(lower, "<function_calls") ||
-		strings.Contains(lower, "<function_call") ||
-		strings.Contains(lower, "<invoke") ||
-		strings.Contains(lower, "<tool_use") ||
-		strings.Contains(lower, "<attempt_completion") ||
-		strings.Contains(lower, "<ask_followup_question") ||
-		strings.Contains(lower, "<new_task") ||
-		strings.Contains(lower, "<result")
+	return strings.Contains(lower, "<tool_calls")
 }
 
 func stripFencedCodeBlocks(text string) string {
diff --git a/internal/toolcall/toolcalls_parse_markup.go b/internal/toolcall/toolcalls_parse_markup.go
index f657e6c..a1424e8 100644
--- a/internal/toolcall/toolcalls_parse_markup.go
+++ b/internal/toolcall/toolcalls_parse_markup.go
@@ -2,244 +2,31 @@ package toolcall
 
 import (
 	"encoding/json"
-	"encoding/xml"
 	"html"
 	"regexp"
 	"strings"
 )
 
-var xmlToolCallPattern = regexp.MustCompile(`(?is)<tool_call>\s*(.*?)\s*</tool_call>`)
-var functionCallPattern = regexp.MustCompile(`(?is)<function_call>\s*([^<]+?)\s*</function_call>`)
-var functionParamPattern = regexp.MustCompile(`(?is)<function\s+parameter\s+name="([^"]+)"\s*>\s*(.*?)\s*</function\s+parameter>`)
-var antmlFunctionCallPattern = regexp.MustCompile(`(?is)<(?:[a-z0-9_]+:)?function_call[^>]*(?:name|function)="([^"]+)"[^>]*>\s*(.*?)\s*</(?:[a-z0-9_]+:)?function_call>`)
-var antmlArgumentPattern = regexp.MustCompile(`(?is)<(?:[a-z0-9_]+:)?argument\s+name="([^"]+)"\s*>\s*(.*?)\s*</(?:[a-z0-9_]+:)?argument>`)
-var invokeCallPattern = regexp.MustCompile(`(?is)<invoke\s+name="([^"]+)"\s*>(.*?)</invoke>`)
-var invokeParamPattern = regexp.MustCompile(`(?is)<parameter\s+name="([^"]+)"\s*>\s*(.*?)\s*</parameter>`)
-var toolUseFunctionPattern = regexp.MustCompile(`(?is)<tool_use>\s*<function\s+name="([^"]+)"\s*>(.*?)</function>\s*</tool_use>`)
-var toolUseNameParametersPattern = regexp.MustCompile(`(?is)<tool_use>\s*<tool_name>\s*([^<]+?)\s*</tool_name>\s*<parameters>\s*(.*?)\s*</parameters>\s*</tool_use>`)
-var toolUseFunctionNameParametersPattern = regexp.MustCompile(`(?is)<tool_use>\s*<function_name>\s*([^<]+?)\s*</function_name>\s*<parameters>\s*(.*?)\s*</parameters>\s*</tool_use>`)
-var toolUseToolNameBodyPattern = regexp.MustCompile(`(?is)<tool_use>\s*<tool_name>\s*([^<]+?)\s*</tool_name>\s*(.*?)\s*</tool_use>`)
-var xmlToolNamePatterns = []*regexp.Regexp{
-	regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?tool_name\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?tool_name>`),
-	regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?function_name\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?function_name>`),
-}
+var xmlToolCallsWrapperPattern = regexp.MustCompile(`(?is)<tool_calls\b[^>]*>\s*(.*?)\s*</tool_calls>`)
+var xmlInvokePattern = regexp.MustCompile(`(?is)<invoke\b([^>]*)>\s*(.*?)\s*</invoke>`)
+var xmlParameterPattern = regexp.MustCompile(`(?is)<parameter\b([^>]*)>\s*(.*?)\s*</parameter>`)
+var xmlAttrPattern = regexp.MustCompile(`(?is)\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')`)
 
 func parseXMLToolCalls(text string) []ParsedToolCall {
-	matches := xmlToolCallPattern.FindAllString(text, -1)
-	out := make([]ParsedToolCall, 0, len(matches)+1)
-	for _, block := range matches {
-		call, ok := parseSingleXMLToolCall(block)
-		if !ok {
-			continue
-		}
-		out = append(out, call)
-	}
-	if len(out) > 0 {
-		return out
-	}
-	if call, ok := parseFunctionCallTagStyle(text); ok {
-		return []ParsedToolCall{call}
-	}
-	if calls := parseAntmlFunctionCallStyles(text); len(calls) > 0 {
-		return calls
-	}
-	if call, ok := parseInvokeFunctionCallStyle(text); ok {
-		return []ParsedToolCall{call}
-	}
-	if call, ok := parseToolUseFunctionStyle(text); ok {
-		return []ParsedToolCall{call}
-	}
-	if call, ok := parseToolUseNameParametersStyle(text); ok {
-		return []ParsedToolCall{call}
-	}
-	if call, ok := parseToolUseFunctionNameParametersStyle(text); ok {
-		return []ParsedToolCall{call}
-	}
-	if call, ok := parseToolUseToolNameBodyStyle(text); ok {
-		return []ParsedToolCall{call}
-	}
-	return nil
-}
-
-func parseSingleXMLToolCall(block string) (ParsedToolCall, bool) {
-	inner := strings.TrimSpace(block)
-	inner = strings.TrimPrefix(inner, "<tool_call>")
-	inner = strings.TrimSuffix(inner, "</tool_call>")
-	inner = strings.TrimSpace(inner)
-	if strings.HasPrefix(inner, "{") {
-		var payload map[string]any
-		if err := json.Unmarshal([]byte(inner), &payload); err == nil {
-			name := strings.TrimSpace(asString(payload["tool"]))
-			if name == "" {
-				name = strings.TrimSpace(asString(payload["tool_name"]))
-			}
-			if name != "" {
-				input := map[string]any{}
-				if params, ok := payload["params"].(map[string]any); ok {
-					input = params
-				} else if params, ok := payload["parameters"].(map[string]any); ok {
-					input = params
-				}
-				return ParsedToolCall{Name: name, Input: input}, true
-			}
-		}
-	}
-
-	name := ""
-	params := extractXMLToolParamsByRegex(inner)
-	dec := xml.NewDecoder(strings.NewReader(block))
-	inTool := false
-	for {
-		tok, err := dec.Token()
-		if err != nil {
-			break
-		}
-		switch t := tok.(type) {
-		case xml.StartElement:
-			tag := strings.ToLower(t.Name.Local)
-			switch tag {
-			case "tool":
-				inTool = true
-				for _, attr := range t.Attr {
-					if strings.EqualFold(strings.TrimSpace(attr.Name.Local), "name") && strings.TrimSpace(name) == "" {
-						name = strings.TrimSpace(attr.Value)
-					}
-				}
-			case "parameters":
-				var node struct {
-					Inner string `xml:",innerxml"`
-				}
-				if err := dec.DecodeElement(&node, &t); err == nil {
-					inner := strings.TrimSpace(node.Inner)
-					if inner != "" {
-						extracted := extractRawTagValue(inner)
-						if parsed := parseStructuredToolCallInput(extracted); len(parsed) > 0 {
-							for k, vv := range parsed {
-								params[k] = vv
-							}
-						}
-					}
-				}
-			case "tool_name", "function_name", "name":
-				var v string
-				if err := dec.DecodeElement(&v, &t); err == nil && strings.TrimSpace(v) != "" {
-					name = strings.TrimSpace(v)
-				}
-			case "input", "arguments", "argument", "args", "params":
-				var v string
-				if err := dec.DecodeElement(&v, &t); err == nil && strings.TrimSpace(v) != "" {
-					if parsed := parseStructuredToolCallInput(strings.TrimSpace(v)); len(parsed) > 0 {
-						for k, vv := range parsed {
-							params[k] = vv
-						}
-					}
-				}
-			default:
-				if inTool {
-					var v string
-					if err := dec.DecodeElement(&v, &t); err == nil {
-						params[t.Name.Local] = strings.TrimSpace(html.UnescapeString(v))
-					}
-				}
-			}
-		case xml.EndElement:
-			tag := strings.ToLower(t.Name.Local)
-			if tag == "tool" {
-				inTool = false
-			}
-		}
-	}
-	if strings.TrimSpace(name) == "" {
-		name = strings.TrimSpace(html.UnescapeString(extractXMLToolNameByRegex(stripTopLevelXMLParameters(inner))))
-	}
-	if strings.TrimSpace(name) == "" {
-		return ParsedToolCall{}, false
-	}
-	return ParsedToolCall{Name: strings.TrimSpace(html.UnescapeString(name)), Input: params}, true
-}
-
-func stripTopLevelXMLParameters(inner string) string {
-	out := strings.TrimSpace(inner)
-	for {
-		idx := strings.Index(strings.ToLower(out), "<parameters")
-		if idx < 0 {
-			return out
-		}
-		segment := out[idx:]
-		segmentLower := strings.ToLower(segment)
-		openEnd := strings.Index(segmentLower, ">")
-		if openEnd < 0 {
-			return out
-		}
-		closeIdx := strings.Index(segmentLower, "</parameters>")
-		if closeIdx < 0 {
-			return out[:idx]
-		}
-		end := idx + closeIdx + len("</parameters>")
-		out = out[:idx] + out[end:]
-	}
-}
-
-func extractXMLToolNameByRegex(inner string) string {
-	for _, pattern := range xmlToolNamePatterns {
-		if m := pattern.FindStringSubmatch(inner); len(m) >= 2 {
-			if v := strings.TrimSpace(stripTagText(m[1])); v != "" {
-				return v
-			}
-		}
-	}
-	return ""
-}
-
-func extractXMLToolParamsByRegex(inner string) map[string]any {
-	raw := findMarkupTagValue(inner, toolCallMarkupArgsTagNames, toolCallMarkupArgsPatternByTag)
-	if raw == "" {
-		return map[string]any{}
-	}
-	parsed := parseMarkupInput(raw)
-	if parsed == nil {
-		return map[string]any{}
-	}
-	return parsed
-}
-
-func parseFunctionCallTagStyle(text string) (ParsedToolCall, bool) {
-	m := functionCallPattern.FindStringSubmatch(text)
-	if len(m) < 2 {
-		return ParsedToolCall{}, false
-	}
-	name := strings.TrimSpace(html.UnescapeString(m[1]))
-	if name == "" {
-		return ParsedToolCall{}, false
-	}
-	input := map[string]any{}
-	for _, pm := range functionParamPattern.FindAllStringSubmatch(text, -1) {
-		if len(pm) < 3 {
-			continue
-		}
-		key := strings.TrimSpace(pm[1])
-		val := extractRawTagValue(pm[2])
-		if key != "" {
-			if parsed := parseStructuredToolCallInput(val); len(parsed) > 0 {
-				if isOnlyRawValue(parsed, val) {
-					input[key] = val
-				} else {
-					input[key] = parsed
-				}
-			}
-		}
-	}
-	return ParsedToolCall{Name: name, Input: input}, true
-}
-
-func parseAntmlFunctionCallStyles(text string) []ParsedToolCall {
-	matches := antmlFunctionCallPattern.FindAllStringSubmatch(text, -1)
-	if len(matches) == 0 {
+	wrappers := xmlToolCallsWrapperPattern.FindAllStringSubmatch(text, -1)
+	if len(wrappers) == 0 {
 		return nil
 	}
-	out := make([]ParsedToolCall, 0, len(matches))
-	for _, m := range matches {
-		if call, ok := parseSingleAntmlFunctionCallMatch(m); ok {
+	out := make([]ParsedToolCall, 0, len(wrappers))
+	for _, wrapper := range wrappers {
+		if len(wrapper) < 2 {
+			continue
+		}
+		for _, block := range xmlInvokePattern.FindAllStringSubmatch(wrapper[1], -1) {
+			call, ok := parseSingleXMLToolCall(block)
+			if !ok {
+				continue
+			}
 			out = append(out, call)
 		}
 	}
@@ -249,189 +36,90 @@ func parseAntmlFunctionCallStyles(text string) []ParsedToolCall {
 	return out
 }
 
-func parseSingleAntmlFunctionCallMatch(m []string) (ParsedToolCall, bool) {
-	if len(m) < 3 {
+func parseSingleXMLToolCall(block []string) (ParsedToolCall, bool) {
+	if len(block) < 3 {
 		return ParsedToolCall{}, false
 	}
-	name := strings.TrimSpace(html.UnescapeString(m[1]))
+	attrs := parseXMLTagAttributes(block[1])
+	name := strings.TrimSpace(html.UnescapeString(attrs["name"]))
 	if name == "" {
 		return ParsedToolCall{}, false
 	}
-	body := strings.TrimSpace(m[2])
-	input := map[string]any{}
-	if strings.HasPrefix(body, "{") {
-		if err := json.Unmarshal([]byte(body), &input); err == nil {
+
+	inner := strings.TrimSpace(block[2])
+	if strings.HasPrefix(inner, "{") {
+		var payload map[string]any
+		if err := json.Unmarshal([]byte(inner), &payload); err == nil {
+			input := map[string]any{}
+			if params, ok := payload["input"].(map[string]any); ok {
+				input = params
+			}
+			if len(input) == 0 {
+				if params, ok := payload["parameters"].(map[string]any); ok {
+					input = params
+				}
+			}
 			return ParsedToolCall{Name: name, Input: input}, true
 		}
 	}
-	for _, am := range antmlArgumentPattern.FindAllStringSubmatch(body, -1) {
-		if len(am) < 3 {
-			continue
-		}
-		k := strings.TrimSpace(am[1])
-		v := extractRawTagValue(am[2])
-		if k != "" {
-			input[k] = v
-		}
-	}
-	if len(input) > 0 {
-		return ParsedToolCall{Name: name, Input: input}, true
-	}
-	if paramsRaw := findMarkupTagValue(body, toolCallMarkupArgsTagNames, toolCallMarkupArgsPatternByTag); paramsRaw != "" {
-		if parsed := parseMarkupInput(paramsRaw); len(parsed) > 0 {
-			return ParsedToolCall{Name: name, Input: parsed}, true
-		}
-	}
-	if strings.Contains(body, "<") {
-		if parsed := parseStructuredToolCallInput(body); len(parsed) > 0 && !isOnlyRawValue(parsed, body) {
-			return ParsedToolCall{Name: name, Input: parsed}, true
-		}
-	}
-	return ParsedToolCall{Name: name, Input: input}, true
-}
 
-func parseInvokeFunctionCallStyle(text string) (ParsedToolCall, bool) {
-	m := invokeCallPattern.FindStringSubmatch(text)
-	if len(m) < 3 {
-		return ParsedToolCall{}, false
-	}
-	name := strings.TrimSpace(html.UnescapeString(m[1]))
-	if name == "" {
-		return ParsedToolCall{}, false
-	}
 	input := map[string]any{}
-	for _, pm := range invokeParamPattern.FindAllStringSubmatch(m[2], -1) {
-		if len(pm) < 3 {
+	for _, paramMatch := range xmlParameterPattern.FindAllStringSubmatch(inner, -1) {
+		if len(paramMatch) < 3 {
 			continue
 		}
-		k := strings.TrimSpace(pm[1])
-		v := extractRawTagValue(pm[2])
-		if k != "" {
-			if parsed := parseStructuredToolCallInput(v); len(parsed) > 0 {
-				if isOnlyRawValue(parsed, v) {
-					input[k] = v
-				} else {
-					input[k] = parsed
-				}
-			}
+		paramAttrs := parseXMLTagAttributes(paramMatch[1])
+		paramName := strings.TrimSpace(html.UnescapeString(paramAttrs["name"]))
+		if paramName == "" {
+			continue
 		}
+		value := parseInvokeParameterValue(paramMatch[2])
+		appendMarkupValue(input, paramName, value)
 	}
+
 	if len(input) == 0 {
-		if argsRaw := findMarkupTagValue(m[2], toolCallMarkupArgsTagNames, toolCallMarkupArgsPatternByTag); argsRaw != "" {
-			input = parseMarkupInput(argsRaw)
-		} else if kv := parseMarkupKVObject(m[2]); len(kv) > 0 {
-			input = kv
-		} else if parsed := parseStructuredToolCallInput(m[2]); len(parsed) > 0 && !isOnlyRawValue(parsed, strings.TrimSpace(html.UnescapeString(m[2]))) {
-			input = parsed
+		if strings.TrimSpace(inner) != "" {
+			return ParsedToolCall{}, false
 		}
+		return ParsedToolCall{Name: name, Input: map[string]any{}}, true
 	}
 	return ParsedToolCall{Name: name, Input: input}, true
 }
 
-func parseToolUseFunctionStyle(text string) (ParsedToolCall, bool) {
-	m := toolUseFunctionPattern.FindStringSubmatch(text)
-	if len(m) < 3 {
-		return ParsedToolCall{}, false
+func parseXMLTagAttributes(raw string) map[string]string {
+	if strings.TrimSpace(raw) == "" {
+		return map[string]string{}
 	}
-	name := strings.TrimSpace(html.UnescapeString(m[1]))
-	if name == "" {
-		return ParsedToolCall{}, false
-	}
-	body := m[2]
-	input := map[string]any{}
-	for _, pm := range invokeParamPattern.FindAllStringSubmatch(body, -1) {
-		if len(pm) < 3 {
+	out := map[string]string{}
+	for _, m := range xmlAttrPattern.FindAllStringSubmatch(raw, -1) {
+		if len(m) < 5 {
 			continue
 		}
-		k := strings.TrimSpace(pm[1])
-		v := extractRawTagValue(pm[2])
-		if k != "" {
-			if parsed := parseStructuredToolCallInput(v); len(parsed) > 0 {
-				if isOnlyRawValue(parsed, v) {
-					input[k] = v
-				} else {
-					input[k] = parsed
-				}
+		key := strings.ToLower(strings.TrimSpace(m[1]))
+		if key == "" {
+			continue
+		}
+		value := m[3]
+		if value == "" {
+			value = m[4]
+		}
+		out[key] = value
+	}
+	return out
+}
+
+func parseInvokeParameterValue(raw string) any {
+	trimmed := strings.TrimSpace(raw)
+	if trimmed == "" {
+		return ""
+	}
+	if parsed := parseStructuredToolCallInput(trimmed); len(parsed) > 0 {
+		if len(parsed) == 1 {
+			if rawValue, ok := parsed["_raw"].(string); ok {
+				return rawValue
 			}
 		}
+		return parsed
 	}
-	return ParsedToolCall{Name: name, Input: input}, true
-}
-
-func parseToolUseNameParametersStyle(text string) (ParsedToolCall, bool) {
-	m := toolUseNameParametersPattern.FindStringSubmatch(text)
-	if len(m) < 3 {
-		return ParsedToolCall{}, false
-	}
-	name := strings.TrimSpace(html.UnescapeString(m[1]))
-	if name == "" {
-		return ParsedToolCall{}, false
-	}
-	raw := strings.TrimSpace(m[2])
-	input := map[string]any{}
-	if raw != "" {
-		if parsed := parseStructuredToolCallInput(raw); len(parsed) > 0 {
-			input = parsed
-		}
-	}
-	return ParsedToolCall{Name: name, Input: input}, true
-}
-
-func parseToolUseFunctionNameParametersStyle(text string) (ParsedToolCall, bool) {
-	m := toolUseFunctionNameParametersPattern.FindStringSubmatch(text)
-	if len(m) < 3 {
-		return ParsedToolCall{}, false
-	}
-	name := strings.TrimSpace(html.UnescapeString(m[1]))
-	if name == "" {
-		return ParsedToolCall{}, false
-	}
-	raw := strings.TrimSpace(m[2])
-	input := map[string]any{}
-	if raw != "" {
-		if parsed := parseStructuredToolCallInput(raw); len(parsed) > 0 {
-			input = parsed
-		}
-	}
-	return ParsedToolCall{Name: name, Input: input}, true
-}
-
-func parseToolUseToolNameBodyStyle(text string) (ParsedToolCall, bool) {
-	m := toolUseToolNameBodyPattern.FindStringSubmatch(text)
-	if len(m) < 3 {
-		return ParsedToolCall{}, false
-	}
-	name := strings.TrimSpace(html.UnescapeString(m[1]))
-	if name == "" {
-		return ParsedToolCall{}, false
-	}
-	body := strings.TrimSpace(m[2])
-	input := map[string]any{}
-	if body != "" {
-		if kv := parseXMLChildKV(body); len(kv) > 0 {
-			input = kv
-		} else if kv := parseMarkupKVObject(body); len(kv) > 0 {
-			input = kv
-		} else if parsed := parseStructuredToolCallInput(body); len(parsed) > 0 {
-			input = parsed
-		}
-	}
-	return ParsedToolCall{Name: name, Input: input}, true
-}
-
-func parseXMLChildKV(body string) map[string]any {
-	trimmed := strings.TrimSpace(body)
-	if trimmed == "" {
-		return nil
-	}
-	parsed := parseStructuredToolCallInput(trimmed)
-	if len(parsed) == 0 {
-		return nil
-	}
-	return parsed
-}
-
-func asString(v any) string {
-	s, _ := v.(string)
-	return s
+	return html.UnescapeString(extractRawTagValue(trimmed))
 }
diff --git a/internal/toolcall/toolcalls_test.go b/internal/toolcall/toolcalls_test.go
index ec1fa5b..13d0bef 100644
--- a/internal/toolcall/toolcalls_test.go
+++ b/internal/toolcall/toolcalls_test.go
@@ -16,8 +16,8 @@ func TestFormatOpenAIToolCalls(t *testing.T) {
 	}
 }
 
-func TestParseToolCallsSupportsClaudeXMLToolCall(t *testing.T) {
-	text := `<tool_call><tool_name>Bash</tool_name><parameters><command>pwd</command><description>show cwd</description></parameters></tool_call>`
+func TestParseToolCallsSupportsToolCallsWrapper(t *testing.T) {
+	text := `<tool_calls><invoke name="Bash"><parameter name="command">pwd</parameter><parameter name="description">show cwd</parameter></invoke></tool_calls>`
 	calls := ParseToolCalls(text, []string{"bash"})
 	if len(calls) != 1 {
 		t.Fatalf("expected 1 call, got %#v", calls)
@@ -30,10 +30,10 @@ func TestParseToolCallsSupportsClaudeXMLToolCall(t *testing.T) {
 	}
 }
 
-func TestParseToolCallsSupportsMultilineCDATAAndRepeatedXMLTags(t *testing.T) {
-	text := `<tool_call><tool_name>write_file</tool_name><parameters><path>script.sh</path><content><![CDATA[#!/bin/bash
+func TestParseToolCallsSupportsStandaloneToolWithMultilineCDATAAndRepeatedXMLTags(t *testing.T) {
+	text := `<tool_calls><invoke name="write_file"><parameter name="path">script.sh</parameter><parameter name="content"><![CDATA[#!/bin/bash
 echo "hello"
-]]></content><item>first</item><item>second</item></parameters></tool_call>`
+]]></parameter><parameter name="item">first</parameter><parameter name="item">second</parameter></invoke></tool_calls>`
 	calls := ParseToolCalls(text, []string{"write_file"})
 	if len(calls) != 1 {
 		t.Fatalf("expected 1 call, got %#v", calls)
@@ -54,8 +54,8 @@ echo "hello"
 	}
 }
 
-func TestParseToolCallsSupportsCanonicalXMLParametersJSON(t *testing.T) {
-	text := `<tool_call><tool_name>get_weather</tool_name><parameters>{"city":"beijing","unit":"c"}</parameters></tool_call>`
+func TestParseToolCallsSupportsInvokeParameters(t *testing.T) {
+	text := `<tool_calls><invoke name="get_weather"><parameter name="city">beijing</parameter><parameter name="unit">c</parameter></invoke></tool_calls>`
 	calls := ParseToolCalls(text, []string{"get_weather"})
 	if len(calls) != 1 {
 		t.Fatalf("expected 1 call, got %#v", calls)
@@ -68,8 +68,8 @@ func TestParseToolCallsSupportsCanonicalXMLParametersJSON(t *testing.T) {
 	}
 }
 
-func TestParseToolCallsPreservesRawMalformedXMLParameters(t *testing.T) {
-	text := `<tool_call><tool_name>execute_command</tool_name><parameters>cd /root && git status</parameters></tool_call>`
+func TestParseToolCallsPreservesRawMalformedParams(t *testing.T) {
+	text := `<tool_calls><invoke name="execute_command"><parameter name="command">cd /root && git status</parameter></invoke></tool_calls>`
 	calls := ParseToolCalls(text, []string{"execute_command"})
 	if len(calls) != 1 {
 		t.Fatalf("expected 1 call, got %#v", calls)
@@ -77,17 +77,17 @@ func TestParseToolCallsPreservesRawMalformedXMLParameters(t *testing.T) {
 	if calls[0].Name != "execute_command" {
 		t.Fatalf("expected tool name execute_command, got %q", calls[0].Name)
 	}
-	raw, ok := calls[0].Input["_raw"].(string)
+	raw, ok := calls[0].Input["command"].(string)
 	if !ok {
-		t.Fatalf("expected raw argument tracking, got %#v", calls[0].Input)
+		t.Fatalf("expected raw command tracking, got %#v", calls[0].Input)
 	}
 	if raw != "cd /root && git status" {
 		t.Fatalf("expected raw arguments to be preserved, got %q", raw)
 	}
 }
 
-func TestParseToolCallsSupportsXMLParametersJSONWithAmpersandCommand(t *testing.T) {
-	text := `<tool_calls><tool_call><tool_name>execute_command</tool_name><parameters>{"command":"sshpass -p 'xxx' ssh -o StrictHostKeyChecking=no -p 1111 root@111.111.111.111 'cd /root && git clone https://github.com/ericc-ch/copilot-api.git'","cwd":null,"timeout":null}</parameters></tool_call></tool_calls>`
+func TestParseToolCallsSupportsParamsJSONWithAmpersandCommand(t *testing.T) {
+	text := `<tool_calls><invoke name="execute_command"><parameter name="command">sshpass -p 'xxx' ssh -o StrictHostKeyChecking=no -p 1111 root@111.111.111.111 'cd /root && git clone https://github.com/ericc-ch/copilot-api.git'</parameter><parameter name="cwd"></parameter><parameter name="timeout"></parameter></invoke></tool_calls>`
 	calls := ParseToolCalls(text, []string{"execute_command"})
 	if len(calls) != 1 {
 		t.Fatalf("expected 1 call, got %#v", calls)
@@ -101,8 +101,8 @@ func TestParseToolCallsSupportsXMLParametersJSONWithAmpersandCommand(t *testing.
 	}
 }
 
-func TestParseToolCallsDoesNotTreatParameterNameTagAsToolName(t *testing.T) {
-	text := `<tool_call><tool name="execute_command"><parameters><name>file.txt</name><command>pwd</command></parameters></tool></tool_call>`
+func TestParseToolCallsDoesNotTreatParamsNameTagAsToolName(t *testing.T) {
+	text := `<tool_calls><invoke name="execute_command"><parameter name="tool_name">file.txt</parameter><parameter name="command">pwd</parameter></invoke></tool_calls>`
 	calls := ParseToolCalls(text, []string{"execute_command"})
 	if len(calls) != 1 {
 		t.Fatalf("expected 1 call, got %#v", calls)
@@ -110,13 +110,13 @@ func TestParseToolCallsDoesNotTreatParameterNameTagAsToolName(t *testing.T) {
 	if calls[0].Name != "execute_command" {
 		t.Fatalf("expected tool name execute_command, got %q", calls[0].Name)
 	}
-	if calls[0].Input["name"] != "file.txt" {
+	if calls[0].Input["tool_name"] != "file.txt" {
 		t.Fatalf("expected parameter name preserved, got %#v", calls[0].Input)
 	}
 }
 
-func TestParseToolCallsDetailedMarksXMLToolCallSyntax(t *testing.T) {
-	text := `<tool_call><tool_name>Bash</tool_name><parameters><command>pwd</command></parameters></tool_call>`
+func TestParseToolCallsDetailedMarksToolCallsSyntax(t *testing.T) {
+	text := `<tool_calls><invoke name="Bash"><parameter name="command">pwd</parameter></invoke></tool_calls>`
 	res := ParseToolCallsDetailed(text, []string{"bash"})
 	if !res.SawToolCallSyntax {
 		t.Fatalf("expected SawToolCallSyntax=true, got %#v", res)
@@ -126,8 +126,8 @@ func TestParseToolCallsDetailedMarksXMLToolCallSyntax(t *testing.T) {
 	}
 }
 
-func TestParseToolCallsSupportsClaudeXMLJSONToolCall(t *testing.T) {
-	text := `<tool_call>{"tool":"Bash","params":{"command":"pwd","description":"show cwd"}}</tool_call>`
+func TestParseToolCallsSupportsInlineJSONToolObject(t *testing.T) {
+	text := `<tool_calls><invoke name="Bash">{"input":{"command":"pwd","description":"show cwd"}}</invoke></tool_calls>`
 	calls := ParseToolCalls(text, []string{"bash"})
 	if len(calls) != 1 {
 		t.Fatalf("expected 1 call, got %#v", calls)
@@ -140,170 +140,46 @@ func TestParseToolCallsSupportsClaudeXMLJSONToolCall(t *testing.T) {
 	}
 }
 
-func TestParseToolCallsSupportsFunctionCallTagStyle(t *testing.T) {
-	text := `<function_call>Bash</function_call><function parameter name="command">ls -la</function parameter><function parameter name="description">list</function parameter>`
-	calls := ParseToolCalls(text, []string{"bash"})
-	if len(calls) != 1 {
-		t.Fatalf("expected 1 call, got %#v", calls)
-	}
-	if calls[0].Name != "Bash" {
-		t.Fatalf("expected original tool name Bash, got %q", calls[0].Name)
-	}
-	if calls[0].Input["command"] != "ls -la" {
-		t.Fatalf("expected command argument, got %#v", calls[0].Input)
-	}
-}
-
-func TestParseToolCallsSupportsAntmlFunctionCallStyle(t *testing.T) {
-	text := `<antml:function_calls><antml:function_call name="Bash">{"command":"pwd","description":"x"}</antml:function_call></antml:function_calls>`
-	calls := ParseToolCalls(text, []string{"bash"})
-	if len(calls) != 1 {
-		t.Fatalf("expected 1 call, got %#v", calls)
-	}
-	if calls[0].Name != "Bash" {
-		t.Fatalf("expected original tool name Bash, got %q", calls[0].Name)
-	}
-	if calls[0].Input["command"] != "pwd" {
-		t.Fatalf("expected command argument, got %#v", calls[0].Input)
-	}
-}
-
-func TestParseToolCallsSupportsAntmlArgumentStyle(t *testing.T) {
-	text := `<antml:function_calls><antml:function_call id="1" name="Bash"><antml:argument name="command">pwd</antml:argument><antml:argument name="description">x</antml:argument></antml:function_call></antml:function_calls>`
-	calls := ParseToolCalls(text, []string{"bash"})
-	if len(calls) != 1 {
-		t.Fatalf("expected 1 call, got %#v", calls)
-	}
-	if calls[0].Name != "Bash" {
-		t.Fatalf("expected original tool name Bash, got %q", calls[0].Name)
-	}
-	if calls[0].Input["command"] != "pwd" {
-		t.Fatalf("expected command argument, got %#v", calls[0].Input)
-	}
-}
-
-func TestParseToolCallsSupportsInvokeFunctionCallStyle(t *testing.T) {
-	text := `<function_calls><invoke name="Bash"><parameter name="command">pwd</parameter><parameter name="description">d</parameter></invoke></function_calls>`
-	calls := ParseToolCalls(text, []string{"bash"})
-	if len(calls) != 1 {
-		t.Fatalf("expected 1 call, got %#v", calls)
-	}
-	if calls[0].Name != "Bash" {
-		t.Fatalf("expected original tool name Bash, got %q", calls[0].Name)
-	}
-	if calls[0].Input["command"] != "pwd" {
-		t.Fatalf("expected command argument, got %#v", calls[0].Input)
-	}
-}
-
-func TestParseToolCallsSupportsToolUseFunctionParameterStyle(t *testing.T) {
-	text := `<tool_use><function name="search_web"><parameter name="query">test</parameter></function></tool_use>`
-	calls := ParseToolCalls(text, []string{"search_web"})
-	if len(calls) != 1 {
-		t.Fatalf("expected 1 call, got %#v", calls)
-	}
-	if calls[0].Name != "search_web" {
-		t.Fatalf("expected canonical tool name search_web, got %q", calls[0].Name)
-	}
-	if calls[0].Input["query"] != "test" {
-		t.Fatalf("expected query argument, got %#v", calls[0].Input)
-	}
-}
-
-func TestParseToolCallsSupportsToolUseNameParametersStyle(t *testing.T) {
-	text := `<tool_use><tool_name>write_file</tool_name><parameters>{"path":"/tmp/a.txt","content":"abc"}</parameters></tool_use>`
-	calls := ParseToolCalls(text, []string{"write_file"})
-	if len(calls) != 1 {
-		t.Fatalf("expected 1 call, got %#v", calls)
-	}
-	if calls[0].Name != "write_file" {
-		t.Fatalf("expected tool name write_file, got %q", calls[0].Name)
-	}
-	if calls[0].Input["path"] != "/tmp/a.txt" {
-		t.Fatalf("expected path argument, got %#v", calls[0].Input)
-	}
-}
-
-func TestParseToolCallsSupportsToolUseFunctionNameParametersStyle(t *testing.T) {
-	text := `<tool_use><function_name>write_file</function_name><parameters>{"path":"/tmp/b.txt","content":"xyz"}</parameters></tool_use>`
-	calls := ParseToolCalls(text, []string{"write_file"})
-	if len(calls) != 1 {
-		t.Fatalf("expected 1 call, got %#v", calls)
-	}
-	if calls[0].Name != "write_file" {
-		t.Fatalf("expected tool name write_file, got %q", calls[0].Name)
-	}
-	if calls[0].Input["content"] != "xyz" {
-		t.Fatalf("expected content argument, got %#v", calls[0].Input)
-	}
-}
-
-func TestParseToolCallsSupportsToolUseToolNameBodyStyle(t *testing.T) {
-	text := `<tool_use><tool_name>write_file</tool_name><path>/tmp/c.txt</path><content>hello</content></tool_use>`
-	calls := ParseToolCalls(text, []string{"write_file"})
-	if len(calls) != 1 {
-		t.Fatalf("expected 1 call, got %#v", calls)
-	}
-	if calls[0].Name != "write_file" {
-		t.Fatalf("expected tool name write_file, got %q", calls[0].Name)
-	}
-	if calls[0].Input["path"] != "/tmp/c.txt" {
-		t.Fatalf("expected path argument, got %#v", calls[0].Input)
-	}
-}
-
-func TestParseToolCallsSupportsNestedToolTagStyle(t *testing.T) {
-	text := `<tool_call><tool name="Bash"><command>pwd</command><description>show cwd</description></tool></tool_call>`
-	calls := ParseToolCalls(text, []string{"bash"})
-	if len(calls) != 1 {
-		t.Fatalf("expected 1 call, got %#v", calls)
-	}
-	if calls[0].Name != "Bash" {
-		t.Fatalf("expected original tool name Bash, got %q", calls[0].Name)
-	}
-	if calls[0].Input["command"] != "pwd" {
-		t.Fatalf("expected command argument, got %#v", calls[0].Input)
-	}
-}
-
-func TestParseToolCallsSupportsAntmlFunctionAttributeWithParametersTag(t *testing.T) {
-	text := `<antml:function_calls><antml:function_call id="x" function="Bash"><antml:parameters>{"command":"pwd"}</antml:parameters></antml:function_call></antml:function_calls>`
-	calls := ParseToolCalls(text, []string{"bash"})
-	if len(calls) != 1 {
-		t.Fatalf("expected 1 call, got %#v", calls)
-	}
-	if calls[0].Name != "Bash" {
-		t.Fatalf("expected original tool name Bash, got %q", calls[0].Name)
-	}
-	if calls[0].Input["command"] != "pwd" {
-		t.Fatalf("expected command argument, got %#v", calls[0].Input)
-	}
-}
-
-func TestParseToolCallsSupportsMultipleAntmlFunctionCalls(t *testing.T) {
-	text := `<antml:function_calls><antml:function_call id="1" function="Bash"><antml:parameters>{"command":"pwd"}</antml:parameters></antml:function_call><antml:function_call id="2" function="Read"><antml:parameters>{"file_path":"README.md"}</antml:parameters></antml:function_call></antml:function_calls>`
-	calls := ParseToolCalls(text, []string{"bash", "read"})
-	if len(calls) != 2 {
-		t.Fatalf("expected 2 calls, got %#v", calls)
-	}
-	if calls[0].Name != "Bash" || calls[1].Name != "Read" {
-		t.Fatalf("expected original names [Bash Read], got %#v", calls)
-	}
-}
-
 func TestParseToolCallsDoesNotAcceptMismatchedMarkupTags(t *testing.T) {
-	text := `<tool_call><name>read_file</function><arguments>{"path":"README.md"}</arguments></tool_call>`
+	text := `<tool_calls><invoke name="read_file"><parameter name="path">README.md</function></invoke></tool_calls>`
 	calls := ParseToolCalls(text, []string{"read_file"})
 	if len(calls) != 0 {
 		t.Fatalf("expected mismatched tags to be rejected, got %#v", calls)
 	}
 }
 
-func TestParseToolCallsDoesNotTreatParametersFunctionNameAsToolName(t *testing.T) {
-	text := `<tool_call><parameters><function_name>data_only</function_name><path>README.md</path></parameters></tool_call>`
+func TestParseToolCallsDoesNotTreatNameInsideParamsAsToolName(t *testing.T) {
+	text := `<tool_calls><invoke><parameter name="path">README.md</parameter></invoke></tool_calls>`
 	calls := ParseToolCalls(text, []string{"read_file"})
 	if len(calls) != 0 {
-		t.Fatalf("expected no tool call when function_name appears only under parameters, got %#v", calls)
+		t.Fatalf("expected no tool call when name appears only under params, got %#v", calls)
+	}
+}
+
+func TestParseToolCallsRejectsLegacyToolsWrapper(t *testing.T) {
+	text := `<tools><tool_call><tool_name>read_file</tool_name><param>{"path":"README.md"}</param></tool_call></tools>`
+	calls := ParseToolCalls(text, []string{"read_file"})
+	if len(calls) != 0 {
+		t.Fatalf("expected legacy tools wrapper to be rejected, got %#v", calls)
+	}
+}
+
+func TestParseToolCallsRejectsBareInvokeWithoutToolCallsWrapper(t *testing.T) {
+	text := `<invoke name="read_file"><parameter name="path">README.md</parameter></invoke>`
+	res := ParseToolCallsDetailed(text, []string{"read_file"})
+	if len(res.Calls) != 0 {
+		t.Fatalf("expected bare invoke to be rejected, got %#v", res.Calls)
+	}
+	if res.SawToolCallSyntax {
+		t.Fatalf("expected bare invoke to no longer count as supported syntax, got %#v", res)
+	}
+}
+
+func TestParseToolCallsRejectsLegacyCanonicalBody(t *testing.T) {
+	text := `<tool_calls><invoke name="read_file"><tool_name>read_file</tool_name><param>{"path":"README.md"}</param></invoke></tool_calls>`
+	calls := ParseToolCalls(text, []string{"read_file"})
+	if len(calls) != 0 {
+		t.Fatalf("expected legacy canonical body to be rejected, got %#v", calls)
 	}
 }
 
@@ -445,7 +321,7 @@ func TestRepairLooseJSONWithNestedObjects(t *testing.T) {
 }
 
 func TestParseToolCallsUnescapesHTMLEntityArguments(t *testing.T) {
-	text := `<tool_call><tool_name>Bash</tool_name><parameters>{"command":"echo a &gt; out.txt"}</parameters></tool_call>`
+	text := `<tool_calls><invoke name="Bash"><parameter name="command">echo a &gt; out.txt</parameter></invoke></tool_calls>`
 	calls := ParseToolCalls(text, []string{"bash"})
 	if len(calls) != 1 {
 		t.Fatalf("expected one call, got %#v", calls)
@@ -457,7 +333,7 @@ func TestParseToolCallsUnescapesHTMLEntityArguments(t *testing.T) {
 }
 
 func TestParseToolCallsIgnoresXMLInsideFencedCodeBlock(t *testing.T) {
-	text := "Here is an example:\n```xml\n<tool_call><tool_name>read_file</tool_name><parameters>{\"path\":\"README.md\"}</parameters></tool_call>\n```\nDo not execute it."
+	text := "Here is an example:\n```xml\n<tool_calls><invoke name=\"read_file\"><parameter name=\"path\">README.md</parameter></invoke></tool_calls>\n```\nDo not execute it."
 	res := ParseToolCallsDetailed(text, []string{"read_file"})
 	if len(res.Calls) != 0 {
 		t.Fatalf("expected no parsed calls for fenced example, got %#v", res.Calls)
@@ -465,7 +341,7 @@ func TestParseToolCallsIgnoresXMLInsideFencedCodeBlock(t *testing.T) {
 }
 
 func TestParseToolCallsParsesOnlyNonFencedXMLToolCall(t *testing.T) {
-	text := "```xml\n<tool_call><tool_name>read_file</tool_name><parameters>{\"path\":\"README.md\"}</parameters></tool_call>\n```\n<tool_call><tool_name>search</tool_name><parameters>{\"q\":\"golang\"}</parameters></tool_call>"
+	text := "```xml\n<tool_calls><invoke name=\"read_file\"><parameter name=\"path\">README.md</parameter></invoke></tool_calls>\n```\n<tool_calls><invoke name=\"search\"><parameter name=\"q\">golang</parameter></invoke></tool_calls>"
 	res := ParseToolCallsDetailed(text, []string{"read_file", "search"})
 	if len(res.Calls) != 1 {
 		t.Fatalf("expected exactly one parsed call outside fence, got %#v", res.Calls)
@@ -476,7 +352,7 @@ func TestParseToolCallsParsesOnlyNonFencedXMLToolCall(t *testing.T) {
 }
 
 func TestParseToolCallsParsesAfterFourBacktickFence(t *testing.T) {
-	text := "````markdown\n```xml\n<tool_call><tool_name>read_file</tool_name><parameters>{\"path\":\"README.md\"}</parameters></tool_call>\n```\n````\n<tool_call><tool_name>search</tool_name><parameters>{\"q\":\"outside\"}</parameters></tool_call>"
+	text := "````markdown\n```xml\n<tool_calls><invoke name=\"read_file\"><parameter name=\"path\">README.md</parameter></invoke></tool_calls>\n```\n````\n<tool_calls><invoke name=\"search\"><parameter name=\"q\">outside</parameter></invoke></tool_calls>"
 	res := ParseToolCallsDetailed(text, []string{"read_file", "search"})
 	if len(res.Calls) != 1 {
 		t.Fatalf("expected exactly one parsed call outside four-backtick fence, got %#v", res.Calls)
diff --git a/internal/adapter/openai/tool_sieve_core.go b/internal/toolstream/tool_sieve_core.go
similarity index 73%
rename from internal/adapter/openai/tool_sieve_core.go
rename to internal/toolstream/tool_sieve_core.go
index 4fbd64d..2ec0914 100644
--- a/internal/adapter/openai/tool_sieve_core.go
+++ b/internal/toolstream/tool_sieve_core.go
@@ -1,4 +1,4 @@
-package openai
+package toolstream
 
 import (
 	"strings"
@@ -6,16 +6,16 @@ import (
 	"ds2api/internal/toolcall"
 )
 
-func processToolSieveChunk(state *toolStreamSieveState, chunk string, toolNames []string) []toolStreamEvent {
+func ProcessChunk(state *State, chunk string, toolNames []string) []Event {
 	if state == nil {
 		return nil
 	}
 	if chunk != "" {
 		state.pending.WriteString(chunk)
 	}
-	events := make([]toolStreamEvent, 0, 2)
+	events := make([]Event, 0, 2)
 	if len(state.pendingToolCalls) > 0 {
-		events = append(events, toolStreamEvent{ToolCalls: state.pendingToolCalls})
+		events = append(events, Event{ToolCalls: state.pendingToolCalls})
 		state.pendingToolRaw = ""
 		state.pendingToolCalls = nil
 	}
@@ -37,7 +37,7 @@ func processToolSieveChunk(state *toolStreamSieveState, chunk string, toolNames
 			if len(calls) > 0 {
 				if prefix != "" {
 					state.noteText(prefix)
-					events = append(events, toolStreamEvent{Content: prefix})
+					events = append(events, Event{Content: prefix})
 				}
 				if suffix != "" {
 					state.pending.WriteString(suffix)
@@ -48,7 +48,7 @@ func processToolSieveChunk(state *toolStreamSieveState, chunk string, toolNames
 			}
 			if prefix != "" {
 				state.noteText(prefix)
-				events = append(events, toolStreamEvent{Content: prefix})
+				events = append(events, Event{Content: prefix})
 			}
 			if suffix != "" {
 				state.pending.WriteString(suffix)
@@ -65,7 +65,7 @@ func processToolSieveChunk(state *toolStreamSieveState, chunk string, toolNames
 			prefix := pending[:start]
 			if prefix != "" {
 				state.noteText(prefix)
-				events = append(events, toolStreamEvent{Content: prefix})
+				events = append(events, Event{Content: prefix})
 			}
 			state.pending.Reset()
 			state.capture.WriteString(pending[start:])
@@ -81,19 +81,19 @@ func processToolSieveChunk(state *toolStreamSieveState, chunk string, toolNames
 		state.pending.Reset()
 		state.pending.WriteString(hold)
 		state.noteText(safe)
-		events = append(events, toolStreamEvent{Content: safe})
+		events = append(events, Event{Content: safe})
 	}
 
 	return events
 }
 
-func flushToolSieve(state *toolStreamSieveState, toolNames []string) []toolStreamEvent {
+func Flush(state *State, toolNames []string) []Event {
 	if state == nil {
 		return nil
 	}
-	events := processToolSieveChunk(state, "", toolNames)
+	events := ProcessChunk(state, "", toolNames)
 	if len(state.pendingToolCalls) > 0 {
-		events = append(events, toolStreamEvent{ToolCalls: state.pendingToolCalls})
+		events = append(events, Event{ToolCalls: state.pendingToolCalls})
 		state.pendingToolRaw = ""
 		state.pendingToolCalls = nil
 	}
@@ -102,14 +102,14 @@ func flushToolSieve(state *toolStreamSieveState, toolNames []string) []toolStrea
 		if ready {
 			if consumedPrefix != "" {
 				state.noteText(consumedPrefix)
-				events = append(events, toolStreamEvent{Content: consumedPrefix})
+				events = append(events, Event{Content: consumedPrefix})
 			}
 			if len(consumedCalls) > 0 {
-				events = append(events, toolStreamEvent{ToolCalls: consumedCalls})
+				events = append(events, Event{ToolCalls: consumedCalls})
 			}
 			if consumedSuffix != "" {
 				state.noteText(consumedSuffix)
-				events = append(events, toolStreamEvent{Content: consumedSuffix})
+				events = append(events, Event{Content: consumedSuffix})
 			}
 		} else {
 			content := state.capture.String()
@@ -117,7 +117,7 @@ func flushToolSieve(state *toolStreamSieveState, toolNames []string) []toolStrea
 				// If capture never resolved into a real tool call, release the
 				// buffered text instead of swallowing it.
 				state.noteText(content)
-				events = append(events, toolStreamEvent{Content: content})
+				events = append(events, Event{Content: content})
 			}
 		}
 		state.capture.Reset()
@@ -128,13 +128,13 @@ func flushToolSieve(state *toolStreamSieveState, toolNames []string) []toolStrea
 		content := state.pending.String()
 		// If pending never resolved into a real tool call, release it as text.
 		state.noteText(content)
-		events = append(events, toolStreamEvent{Content: content})
+		events = append(events, Event{Content: content})
 		state.pending.Reset()
 	}
 	return events
 }
 
-func splitSafeContentForToolDetection(state *toolStreamSieveState, s string) (safe, hold string) {
+func splitSafeContentForToolDetection(state *State, s string) (safe, hold string) {
 	if s == "" {
 		return "", ""
 	}
@@ -150,7 +150,7 @@ func splitSafeContentForToolDetection(state *toolStreamSieveState, s string) (sa
 	return s, ""
 }
 
-func findToolSegmentStart(state *toolStreamSieveState, s string) int {
+func findToolSegmentStart(state *State, s string) int {
 	if s == "" {
 		return -1
 	}
@@ -179,7 +179,7 @@ func findToolSegmentStart(state *toolStreamSieveState, s string) int {
 	}
 }
 
-func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix string, calls []toolcall.ParsedToolCall, suffix string, ready bool) {
+func consumeToolCapture(state *State, toolNames []string) (prefix string, calls []toolcall.ParsedToolCall, suffix string, ready bool) {
 	captured := state.capture.String()
 	if captured == "" {
 		return "", nil, "", false
diff --git a/internal/adapter/openai/tool_sieve_jsonscan.go b/internal/toolstream/tool_sieve_jsonscan.go
similarity index 97%
rename from internal/adapter/openai/tool_sieve_jsonscan.go
rename to internal/toolstream/tool_sieve_jsonscan.go
index 6568721..d9e9593 100644
--- a/internal/adapter/openai/tool_sieve_jsonscan.go
+++ b/internal/toolstream/tool_sieve_jsonscan.go
@@ -1,4 +1,4 @@
-package openai
+package toolstream
 
 import "strings"
 
diff --git a/internal/adapter/openai/tool_sieve_state.go b/internal/toolstream/tool_sieve_state.go
similarity index 87%
rename from internal/adapter/openai/tool_sieve_state.go
rename to internal/toolstream/tool_sieve_state.go
index 8128f8c..1d709bd 100644
--- a/internal/adapter/openai/tool_sieve_state.go
+++ b/internal/toolstream/tool_sieve_state.go
@@ -1,11 +1,11 @@
-package openai
+package toolstream
 
 import (
 	"ds2api/internal/toolcall"
 	"strings"
 )
 
-type toolStreamSieveState struct {
+type State struct {
 	pending               strings.Builder
 	capture               strings.Builder
 	capturing             bool
@@ -23,19 +23,19 @@ type toolStreamSieveState struct {
 	toolArgsDone          bool
 }
 
-type toolStreamEvent struct {
+type Event struct {
 	Content        string
 	ToolCalls      []toolcall.ParsedToolCall
-	ToolCallDeltas []toolCallDelta
+	ToolCallDeltas []ToolCallDelta
 }
 
-type toolCallDelta struct {
+type ToolCallDelta struct {
 	Index     int
 	Name      string
 	Arguments string
 }
 
-func (s *toolStreamSieveState) resetIncrementalToolState() {
+func (s *State) resetIncrementalToolState() {
 	s.disableDeltas = false
 	s.toolNameSent = false
 	s.toolName = ""
@@ -45,7 +45,7 @@ func (s *toolStreamSieveState) resetIncrementalToolState() {
 	s.toolArgsDone = false
 }
 
-func (s *toolStreamSieveState) noteText(content string) {
+func (s *State) noteText(content string) {
 	if !hasMeaningfulText(content) {
 		return
 	}
@@ -56,7 +56,7 @@ func hasMeaningfulText(text string) bool {
 	return strings.TrimSpace(text) != ""
 }
 
-func insideCodeFenceWithState(state *toolStreamSieveState, text string) bool {
+func insideCodeFenceWithState(state *State, text string) bool {
 	if state == nil {
 		return insideCodeFence(text)
 	}
@@ -76,7 +76,7 @@ func insideCodeFence(text string) bool {
 	return len(simulateCodeFenceState(nil, 0, true, text).stack) > 0
 }
 
-func updateCodeFenceState(state *toolStreamSieveState, text string) {
+func updateCodeFenceState(state *State, text string) {
 	if state == nil || !hasMeaningfulText(text) {
 		return
 	}
diff --git a/internal/adapter/openai/tool_sieve_xml.go b/internal/toolstream/tool_sieve_xml.go
similarity index 58%
rename from internal/adapter/openai/tool_sieve_xml.go
rename to internal/toolstream/tool_sieve_xml.go
index b019b93..87fb075 100644
--- a/internal/adapter/openai/tool_sieve_xml.go
+++ b/internal/toolstream/tool_sieve_xml.go
@@ -1,4 +1,4 @@
-package openai
+package toolstream
 
 import (
 	"ds2api/internal/toolcall"
@@ -9,47 +9,27 @@ import (
 // --- XML tool call support for the streaming sieve ---
 
 //nolint:unused // kept as explicit tag inventory for future XML sieve refinements.
-var xmlToolCallClosingTags = []string{"</tool_calls>", "</tool_call>", "</invoke>", "</function_call>", "</function_calls>", "</tool_use>",
-	// Agent-style XML tags (Roo Code, Cline, etc.)
-	"</attempt_completion>", "</ask_followup_question>", "</new_task>", "</result>"}
-var xmlToolCallOpeningTags = []string{"<tool_calls", "<tool_call", "<invoke", "<function_call", "<function_calls", "<tool_use",
-	// Agent-style XML tags
-	"<attempt_completion", "<ask_followup_question", "<new_task", "<result"}
+var xmlToolCallClosingTags = []string{"</tool_calls>"}
+var xmlToolCallOpeningTags = []string{"<tool_calls"}
 
 // xmlToolCallTagPairs maps each opening tag to its expected closing tag.
 // Order matters: longer/wrapper tags must be checked first.
 var xmlToolCallTagPairs = []struct{ open, close string }{
 	{"<tool_calls", "</tool_calls>"},
-	{"<tool_call", "</tool_call>"},
-	{"<function_calls", "</function_calls>"},
-	{"<function_call", "</function_call>"},
-	{"<invoke", "</invoke>"},
-	{"<tool_use", "</tool_use>"},
-	// Agent-style: these are XML "tool call" patterns from coding agents.
-	// They get captured → parsed. If parsing fails, the raw XML is preserved
-	// so the caller can still see the original text.
-	{"<attempt_completion", "</attempt_completion>"},
-	{"<ask_followup_question", "</ask_followup_question>"},
-	{"<new_task", "</new_task>"},
 }
 
-// xmlToolCallBlockPattern matches a complete XML tool call block (wrapper or standalone).
+// xmlToolCallBlockPattern matches a complete canonical XML tool call block.
 //
 //nolint:unused // reserved for future fast-path XML block detection.
-var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)(<tool_calls>\s*(?:.*?)\s*</tool_calls>|<tool_call>\s*(?:.*?)\s*</tool_call>|<invoke\b[^>]*>(?:.*?)</invoke>|<function_calls?\b[^>]*>(?:.*?)</function_calls?>|<tool_use>(?:.*?)</tool_use>|<attempt_completion>(?:.*?)</attempt_completion>|<ask_followup_question>(?:.*?)</ask_followup_question>|<new_task>(?:.*?)</new_task>)`)
+var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)(<tool_calls\b[^>]*>\s*(?:.*?)\s*</tool_calls>)`)
 
 // xmlToolTagsToDetect is the set of XML tag prefixes used by findToolSegmentStart.
-var xmlToolTagsToDetect = []string{"<tool_calls>", "<tool_calls\n", "<tool_call>", "<tool_call\n",
-	"<invoke ", "<invoke>", "<function_call", "<function_calls", "<tool_use>",
-	// Agent-style tags
-	"<attempt_completion>", "<ask_followup_question>", "<new_task>"}
+var xmlToolTagsToDetect = []string{"<tool_calls>", "<tool_calls\n", "<tool_calls "}
 
 // consumeXMLToolCapture tries to extract complete XML tool call blocks from captured text.
 func consumeXMLToolCapture(captured string, toolNames []string) (prefix string, calls []toolcall.ParsedToolCall, suffix string, ready bool) {
 	lower := strings.ToLower(captured)
-	// Find the FIRST matching open/close pair, preferring wrapper tags.
-	// Tag pairs are ordered longest-first (e.g. <tool_calls before <tool_call)
-	// so wrapper tags are checked before inner tags.
+	// Find the FIRST matching open/close pair for the canonical wrapper.
 	for _, pair := range xmlToolCallTagPairs {
 		openIdx := strings.Index(lower, pair.open)
 		if openIdx < 0 {
@@ -59,8 +39,7 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string,
 		closeIdx := strings.LastIndex(lower, pair.close)
 		if closeIdx < openIdx {
 			// Opening tag is present but its specific closing tag hasn't arrived.
-			// Return not-ready so we keep buffering — do NOT fall through to
-			// try inner pairs (e.g. <tool_call inside <tool_calls).
+			// Return not-ready so we keep buffering until the canonical wrapper closes.
 			return "", nil, "", false
 		}
 		closeEnd := closeIdx + len(pair.close)
@@ -93,8 +72,8 @@ func hasOpenXMLToolTag(captured string) bool {
 	return false
 }
 
-// findPartialXMLToolTagStart checks if the string ends with a partial XML tool tag
-// (e.g., "<tool_ca" or "<inv") and returns the position of the '<'.
+// findPartialXMLToolTagStart checks if the string ends with a partial canonical
+// XML wrapper tag (e.g., "<too") and returns the position of the '<'.
 func findPartialXMLToolTagStart(s string) int {
 	lastLT := strings.LastIndex(s, "<")
 	if lastLT < 0 {
diff --git a/internal/adapter/openai/tool_sieve_xml_test.go b/internal/toolstream/tool_sieve_xml_test.go
similarity index 63%
rename from internal/adapter/openai/tool_sieve_xml_test.go
rename to internal/toolstream/tool_sieve_xml_test.go
index 16827cc..4b06bc3 100644
--- a/internal/adapter/openai/tool_sieve_xml_test.go
+++ b/internal/toolstream/tool_sieve_xml_test.go
@@ -1,4 +1,4 @@
-package openai
+package toolstream
 
 import (
 	"strings"
@@ -6,21 +6,20 @@ import (
 )
 
 func TestProcessToolSieveInterceptsXMLToolCallWithoutLeak(t *testing.T) {
-	var state toolStreamSieveState
+	var state State
 	// Simulate a model producing XML tool call output chunk by chunk.
 	chunks := []string{
 		"<tool_calls>\n",
-		"  <tool_call>\n",
-		"    <tool_name>read_file</tool_name>\n",
-		`    <parameters>{"path":"README.MD"}</parameters>` + "\n",
-		"  </tool_call>\n",
+		`  <invoke name="read_file">` + "\n",
+		`    <parameter name="path">README.MD</parameter>` + "\n",
+		"  </invoke>\n",
 		"</tool_calls>",
 	}
-	var events []toolStreamEvent
+	var events []Event
 	for _, c := range chunks {
-		events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
 	}
-	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
+	events = append(events, Flush(&state, []string{"read_file"})...)
 
 	var textContent string
 	var toolCalls int
@@ -31,7 +30,7 @@ func TestProcessToolSieveInterceptsXMLToolCallWithoutLeak(t *testing.T) {
 		toolCalls += len(evt.ToolCalls)
 	}
 
-	if strings.Contains(textContent, "<tool_call") {
+	if strings.Contains(textContent, "<invoke ") {
 		t.Fatalf("XML tool call content leaked to text: %q", textContent)
 	}
 	if strings.Contains(textContent, "read_file") {
@@ -43,22 +42,22 @@ func TestProcessToolSieveInterceptsXMLToolCallWithoutLeak(t *testing.T) {
 }
 
 func TestProcessToolSieveHandlesLongXMLToolCall(t *testing.T) {
-	var state toolStreamSieveState
+	var state State
 	const toolName = "write_to_file"
 	payload := strings.Repeat("x", 4096)
 	splitAt := len(payload) / 2
 	chunks := []string{
-		"<tool_calls>\n  <tool_call>\n    <tool_name>" + toolName + "</tool_name>\n    <parameters>\n      <content><![CDATA[",
+		"<tool_calls>\n  <invoke name=\"" + toolName + "\">\n    <parameter name=\"content\"><![CDATA[",
 		payload[:splitAt],
 		payload[splitAt:],
-		"]]></content>\n    </parameters>\n  </tool_call>\n</tool_calls>",
+		"]]></parameter>\n  </invoke>\n</tool_calls>",
 	}
 
-	var events []toolStreamEvent
+	var events []Event
 	for _, c := range chunks {
-		events = append(events, processToolSieveChunk(&state, c, []string{toolName})...)
+		events = append(events, ProcessChunk(&state, c, []string{toolName})...)
 	}
-	events = append(events, flushToolSieve(&state, []string{toolName})...)
+	events = append(events, Flush(&state, []string{toolName})...)
 
 	var textContent strings.Builder
 	toolCalls := 0
@@ -86,18 +85,18 @@ func TestProcessToolSieveHandlesLongXMLToolCall(t *testing.T) {
 }
 
 func TestProcessToolSieveXMLWithLeadingText(t *testing.T) {
-	var state toolStreamSieveState
+	var state State
 	// Model outputs some prose then an XML tool call.
 	chunks := []string{
 		"Let me check the file.\n",
-		"<tool_calls>\n  <tool_call>\n    <tool_name>read_file</tool_name>\n",
-		`    <parameters>{"path":"go.mod"}</parameters>` + "\n  </tool_call>\n</tool_calls>",
+		"<tool_calls>\n  <invoke name=\"read_file\">\n",
+		`    <parameter name="path">go.mod</parameter>` + "\n  </invoke>\n</tool_calls>",
 	}
-	var events []toolStreamEvent
+	var events []Event
 	for _, c := range chunks {
-		events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
 	}
-	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
+	events = append(events, Flush(&state, []string{"read_file"})...)
 
 	var textContent string
 	var toolCalls int
@@ -113,7 +112,7 @@ func TestProcessToolSieveXMLWithLeadingText(t *testing.T) {
 		t.Fatalf("expected leading text to be emitted, got %q", textContent)
 	}
 	// The XML itself should NOT leak.
-	if strings.Contains(textContent, "<tool_call") {
+	if strings.Contains(textContent, "<invoke ") {
 		t.Fatalf("XML tool call content leaked to text: %q", textContent)
 	}
 	if toolCalls == 0 {
@@ -122,10 +121,10 @@ func TestProcessToolSieveXMLWithLeadingText(t *testing.T) {
 }
 
 func TestProcessToolSievePassesThroughNonToolXMLBlock(t *testing.T) {
-	var state toolStreamSieveState
-	chunk := `<tool_call><title>示例 XML</title><body>plain text xml payload</body></tool_call>`
-	events := processToolSieveChunk(&state, chunk, []string{"read_file"})
-	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
+	var state State
+	chunk := `<tool><title>示例 XML</title><body>plain text xml payload</body></tool>`
+	events := ProcessChunk(&state, chunk, []string{"read_file"})
+	events = append(events, Flush(&state, []string{"read_file"})...)
 
 	var textContent strings.Builder
 	toolCalls := 0
@@ -142,10 +141,10 @@ func TestProcessToolSievePassesThroughNonToolXMLBlock(t *testing.T) {
 }
 
 func TestProcessToolSieveNonToolXMLKeepsSuffixForToolParsing(t *testing.T) {
-	var state toolStreamSieveState
-	chunk := `<tool_call><title>plain xml</title></tool_call><invoke name="read_file"><parameters>{"path":"README.MD"}</parameters></invoke>`
-	events := processToolSieveChunk(&state, chunk, []string{"read_file"})
-	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
+	var state State
+	chunk := `<tool><title>plain xml</title></tool><tool_calls><invoke name="read_file"><parameter name="path">README.MD</parameter></invoke></tool_calls>`
+	events := ProcessChunk(&state, chunk, []string{"read_file"})
+	events = append(events, Flush(&state, []string{"read_file"})...)
 
 	var textContent strings.Builder
 	toolCalls := 0
@@ -153,10 +152,10 @@ func TestProcessToolSieveNonToolXMLKeepsSuffixForToolParsing(t *testing.T) {
 		textContent.WriteString(evt.Content)
 		toolCalls += len(evt.ToolCalls)
 	}
-	if !strings.Contains(textContent.String(), `<tool_call><title>plain xml</title></tool_call>`) {
+	if !strings.Contains(textContent.String(), `<tool><title>plain xml</title></tool>`) {
 		t.Fatalf("expected leading non-tool XML to be preserved, got %q", textContent.String())
 	}
-	if strings.Contains(textContent.String(), `<invoke name="read_file">`) {
+	if strings.Contains(textContent.String(), `<tool_calls><invoke`) {
 		t.Fatalf("expected invoke tool XML to be intercepted, got %q", textContent.String())
 	}
 	if toolCalls != 1 {
@@ -165,10 +164,10 @@ func TestProcessToolSieveNonToolXMLKeepsSuffixForToolParsing(t *testing.T) {
 }
 
 func TestProcessToolSievePassesThroughMalformedExecutableXMLBlock(t *testing.T) {
-	var state toolStreamSieveState
-	chunk := `<tool_call><parameters>{"path":"README.md"}</parameters></tool_call>`
-	events := processToolSieveChunk(&state, chunk, []string{"read_file"})
-	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
+	var state State
+	chunk := `<tool_calls><invoke name="read_file"><param>{"path":"README.md"}</param></invoke></tool_calls>`
+	events := ProcessChunk(&state, chunk, []string{"read_file"})
+	events = append(events, Flush(&state, []string{"read_file"})...)
 
 	var textContent strings.Builder
 	toolCalls := 0
@@ -186,28 +185,28 @@ func TestProcessToolSievePassesThroughMalformedExecutableXMLBlock(t *testing.T)
 }
 
 func TestProcessToolSievePassesThroughFencedXMLToolCallExamples(t *testing.T) {
-	var state toolStreamSieveState
+	var state State
 	input := strings.Join([]string{
 		"Before first example.\n```",
-		"xml\n<tool_call><tool_name>read_file</tool_name><parameters>{\"path\":\"README.md\"}</parameters></tool_call>\n```\n",
+		"xml\n<tool_calls><invoke name=\"read_file\"><parameter name=\"path\">README.md</parameter></invoke></tool_calls>\n```\n",
 		"Between examples.\n```xml\n",
-		"<tool_call><tool_name>search</tool_name><parameters>{\"q\":\"golang\"}</parameters></tool_call>\n",
+		"<tool_calls><invoke name=\"search\"><parameter name=\"q\">golang</parameter></invoke></tool_calls>\n",
 		"```\nAfter examples.",
 	}, "")
 
 	chunks := []string{
 		"Before first example.\n```",
-		"xml\n<tool_call><tool_name>read_file</tool_name><parameters>{\"path\":\"README.md\"}</parameters></tool_call>\n```\n",
+		"xml\n<tool_calls><invoke name=\"read_file\"><parameter name=\"path\">README.md</parameter></invoke></tool_calls>\n```\n",
 		"Between examples.\n```xml\n",
-		"<tool_call><tool_name>search</tool_name><parameters>{\"q\":\"golang\"}</parameters></tool_call>\n",
+		"<tool_calls><invoke name=\"search\"><parameter name=\"q\">golang</parameter></invoke></tool_calls>\n",
 		"```\nAfter examples.",
 	}
 
-	var events []toolStreamEvent
+	var events []Event
 	for _, c := range chunks {
-		events = append(events, processToolSieveChunk(&state, c, []string{"read_file", "search"})...)
+		events = append(events, ProcessChunk(&state, c, []string{"read_file", "search"})...)
 	}
-	events = append(events, flushToolSieve(&state, []string{"read_file", "search"})...)
+	events = append(events, Flush(&state, []string{"read_file", "search"})...)
 
 	var textContent strings.Builder
 	toolCalls := 0
@@ -227,24 +226,24 @@ func TestProcessToolSievePassesThroughFencedXMLToolCallExamples(t *testing.T) {
 }
 
 func TestProcessToolSieveKeepsPartialXMLTagInsideFencedExample(t *testing.T) {
-	var state toolStreamSieveState
+	var state State
 	input := strings.Join([]string{
 		"Example:\n```xml\n<tool_ca",
-		"ll><tool_name>read_file</tool_name><parameters>{\"path\":\"README.md\"}</parameters></tool_call>\n```\n",
+		"lls><invoke name=\"read_file\"><parameter name=\"path\">README.md</parameter></invoke></tool_calls>\n```\n",
 		"Done.",
 	}, "")
 
 	chunks := []string{
 		"Example:\n```xml\n<tool_ca",
-		"ll><tool_name>read_file</tool_name><parameters>{\"path\":\"README.md\"}</parameters></tool_call>\n```\n",
+		"lls><invoke name=\"read_file\"><parameter name=\"path\">README.md</parameter></invoke></tool_calls>\n```\n",
 		"Done.",
 	}
 
-	var events []toolStreamEvent
+	var events []Event
 	for _, c := range chunks {
-		events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
 	}
-	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
+	events = append(events, Flush(&state, []string{"read_file"})...)
 
 	var textContent strings.Builder
 	toolCalls := 0
@@ -264,17 +263,17 @@ func TestProcessToolSieveKeepsPartialXMLTagInsideFencedExample(t *testing.T) {
 }
 
 func TestProcessToolSievePartialXMLTagHeldBack(t *testing.T) {
-	var state toolStreamSieveState
+	var state State
 	// Chunk ends with a partial XML tool tag.
-	events := processToolSieveChunk(&state, "Hello <tool_ca", []string{"read_file"})
+	events := ProcessChunk(&state, "Hello <too", []string{"read_file"})
 
 	var textContent string
 	for _, evt := range events {
 		textContent += evt.Content
 	}
 
-	// "Hello " should be emitted, but "<tool_ca" should be held back.
-	if strings.Contains(textContent, "<tool_ca") {
+	// "Hello " should be emitted, but "<too" should be held back.
+	if strings.Contains(textContent, "<too") {
 		t.Fatalf("partial XML tag should not be emitted, got %q", textContent)
 	}
 	if !strings.Contains(textContent, "Hello") {
@@ -289,10 +288,8 @@ func TestFindToolSegmentStartDetectsXMLToolCalls(t *testing.T) {
 		want  int
 	}{
 		{"tool_calls_tag", "some text <tool_calls>\n", 10},
-		{"tool_call_tag", "prefix <tool_call>\n", 7},
-		{"invoke_tag", "text <invoke name=\"foo\">body</invoke>", 5},
-		{"xml_inside_code_fence", "```xml\n<tool_call><tool_name>read_file</tool_name></tool_call>\n```", -1},
-		{"function_call_tag", "<function_call name=\"foo\">body</function_call>", 0},
+		{"bare_tool_call_text", "prefix <tool_call>\n", -1},
+		{"xml_inside_code_fence", "```xml\n<tool_calls><invoke name=\"read_file\"></invoke></tool_calls>\n```", -1},
 		{"no_xml", "just plain text", -1},
 		{"gemini_json_no_detect", `some text {"functionCall":{"name":"search"}}`, -1},
 	}
@@ -312,10 +309,10 @@ func TestFindPartialXMLToolTagStart(t *testing.T) {
 		input string
 		want  int
 	}{
-		{"partial_tool_call", "Hello <tool_ca", 6},
-		{"partial_invoke", "Prefix <inv", 7},
+		{"partial_tool_calls", "Hello <tool_ca", 6},
+		{"bare_tool_call_not_held", "Hello <tool_name", -1},
 		{"partial_lt_only", "Text <", 5},
-		{"complete_tag", "Text <tool_call>done", -1},
+		{"complete_tag", "Text <tool_calls>done", -1},
 		{"no_lt", "plain text", -1},
 		{"closed_lt", "a < b > c", -1},
 	}
@@ -330,10 +327,10 @@ func TestFindPartialXMLToolTagStart(t *testing.T) {
 }
 
 func TestHasOpenXMLToolTag(t *testing.T) {
-	if !hasOpenXMLToolTag("<tool_call>\n<tool_name>foo</tool_name>") {
+	if !hasOpenXMLToolTag("<tool_calls>\n<invoke name=\"foo\">") {
 		t.Fatal("should detect open XML tool tag without closing tag")
 	}
-	if hasOpenXMLToolTag("<tool_call>\n<tool_name>foo</tool_name></tool_call>") {
+	if hasOpenXMLToolTag("<tool_calls>\n<invoke name=\"foo\"></invoke>\n</tool_calls>") {
 		t.Fatal("should return false when closing tag is present")
 	}
 	if hasOpenXMLToolTag("plain text without any XML") {
@@ -344,50 +341,34 @@ func TestHasOpenXMLToolTag(t *testing.T) {
 // Test the EXACT scenario the user reports: token-by-token streaming where
 // <tool_calls> tag arrives in small pieces.
 func TestProcessToolSieveTokenByTokenXMLNoLeak(t *testing.T) {
-	var state toolStreamSieveState
+	var state State
 	// Simulate DeepSeek model generating tokens one at a time.
 	chunks := []string{
 		"<",
 		"tool",
-		"_calls",
+		"_ca",
+		"lls",
 		">\n",
-		"  <",
-		"tool",
-		"_call",
-		">\n",
-		"    <",
-		"tool",
-		"_name",
-		">",
+		"  <in",
+		"voke",
+		` name="`,
 		"read",
 		"_file",
+		`">` + "\n",
+		"    <para",
+		`meter name="path">`,
+		"README.MD",
+		"</parameter>\n",
+		"  </invoke>\n",
 		"</",
-		"tool",
-		"_name",
-		">\n",
-		"    <",
-		"parameters",
-		">",
-		`{"path"`,
-		`: "README.MD"`,
-		`}`,
-		"</",
-		"parameters",
-		">\n",
-		"  </",
-		"tool",
-		"_call",
-		">\n",
-		"</",
-		"tool",
-		"_calls",
+		"tool_calls",
 		">",
 	}
-	var events []toolStreamEvent
+	var events []Event
 	for _, c := range chunks {
-		events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
 	}
-	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
+	events = append(events, Flush(&state, []string{"read_file"})...)
 
 	var textContent string
 	var toolCalls int
@@ -398,7 +379,7 @@ func TestProcessToolSieveTokenByTokenXMLNoLeak(t *testing.T) {
 		toolCalls += len(evt.ToolCalls)
 	}
 
-	if strings.Contains(textContent, "<tool_call") {
+	if strings.Contains(textContent, "<invoke ") {
 		t.Fatalf("XML tool call content leaked to text in token-by-token mode: %q", textContent)
 	}
 	if strings.Contains(textContent, "tool_calls>") {
@@ -412,21 +393,20 @@ func TestProcessToolSieveTokenByTokenXMLNoLeak(t *testing.T) {
 	}
 }
 
-// Test that flushToolSieve on incomplete XML falls back to raw text.
+// Test that Flush on incomplete XML falls back to raw text.
 func TestFlushToolSieveIncompleteXMLFallsBackToText(t *testing.T) {
-	var state toolStreamSieveState
+	var state State
 	// XML block starts but stream ends before completion.
 	chunks := []string{
 		"<tool_calls>\n",
-		"  <tool_call>\n",
-		"    <tool_name>read_file</tool_name>\n",
+		"  <invoke name=\"read_file\">\n",
 	}
-	var events []toolStreamEvent
+	var events []Event
 	for _, c := range chunks {
-		events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
 	}
 	// Stream ends abruptly - flush should NOT dump raw XML.
-	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
+	events = append(events, Flush(&state, []string{"read_file"})...)
 
 	var textContent string
 	for _, evt := range events {
@@ -442,9 +422,9 @@ func TestFlushToolSieveIncompleteXMLFallsBackToText(t *testing.T) {
 
 // Test that the opening tag "<tool_calls>\n  " is NOT emitted as text content.
 func TestOpeningXMLTagNotLeakedAsContent(t *testing.T) {
-	var state toolStreamSieveState
+	var state State
 	// First chunk is the opening tag - should be held, not emitted.
-	evts1 := processToolSieveChunk(&state, "<tool_calls>\n  ", []string{"read_file"})
+	evts1 := ProcessChunk(&state, "<tool_calls>\n  ", []string{"read_file"})
 	for _, evt := range evts1 {
 		if strings.Contains(evt.Content, "<tool_calls>") {
 			t.Fatalf("opening tag leaked on first chunk: %q", evt.Content)
@@ -452,8 +432,8 @@ func TestOpeningXMLTagNotLeakedAsContent(t *testing.T) {
 	}
 
 	// Remaining content arrives.
-	evts2 := processToolSieveChunk(&state, "<tool_call>\n    <tool_name>read_file</tool_name>\n    <parameters>{\"path\":\"README.MD\"}</parameters>\n  </tool_call>\n</tool_calls>", []string{"read_file"})
-	evts2 = append(evts2, flushToolSieve(&state, []string{"read_file"})...)
+	evts2 := ProcessChunk(&state, "<invoke name=\"read_file\">\n    <parameter name=\"path\">README.MD</parameter>\n  </invoke>\n</tool_calls>", []string{"read_file"})
+	evts2 = append(evts2, Flush(&state, []string{"read_file"})...)
 
 	var textContent string
 	var toolCalls int
@@ -465,7 +445,7 @@ func TestOpeningXMLTagNotLeakedAsContent(t *testing.T) {
 		toolCalls += len(evt.ToolCalls)
 	}
 
-	if strings.Contains(textContent, "<tool_call") {
+	if strings.Contains(textContent, "<invoke ") {
 		t.Fatalf("XML content leaked: %q", textContent)
 	}
 	if toolCalls == 0 {
@@ -474,7 +454,7 @@ func TestOpeningXMLTagNotLeakedAsContent(t *testing.T) {
 }
 
 func TestProcessToolSieveFallsBackToRawAttemptCompletion(t *testing.T) {
-	var state toolStreamSieveState
+	var state State
 	// Simulate an agent outputting attempt_completion XML tag.
 	// If it does not parse as a tool call, it should fall back to raw text.
 	chunks := []string{
@@ -483,11 +463,11 @@ func TestProcessToolSieveFallsBackToRawAttemptCompletion(t *testing.T) {
 		"  <result>Here is the answer</result>\n",
 		"</attempt_completion>",
 	}
-	var events []toolStreamEvent
+	var events []Event
 	for _, c := range chunks {
-		events = append(events, processToolSieveChunk(&state, c, []string{"attempt_completion"})...)
+		events = append(events, ProcessChunk(&state, c, []string{"attempt_completion"})...)
 	}
-	events = append(events, flushToolSieve(&state, []string{"attempt_completion"})...)
+	events = append(events, Flush(&state, []string{"attempt_completion"})...)
 
 	var textContent string
 	for _, evt := range events {
@@ -504,3 +484,24 @@ func TestProcessToolSieveFallsBackToRawAttemptCompletion(t *testing.T) {
 		t.Fatalf("expected agent XML to fall back to raw text, got %q", textContent)
 	}
 }
+
+func TestProcessToolSievePassesThroughBareToolCallAsText(t *testing.T) {
+	var state State
+	chunk := `<invoke name="read_file"><parameter name="path">README.md</parameter></invoke>`
+	events := ProcessChunk(&state, chunk, []string{"read_file"})
+	events = append(events, Flush(&state, []string{"read_file"})...)
+
+	var textContent strings.Builder
+	toolCalls := 0
+	for _, evt := range events {
+		textContent.WriteString(evt.Content)
+		toolCalls += len(evt.ToolCalls)
+	}
+
+	if toolCalls != 0 {
+		t.Fatalf("expected bare invoke to remain text, got %d events=%#v", toolCalls, events)
+	}
+	if textContent.String() != chunk {
+		t.Fatalf("expected bare invoke to pass through unchanged, got %q", textContent.String())
+	}
+}
diff --git a/internal/translatorcliproxy/bridge_test.go b/internal/translatorcliproxy/bridge_test.go
index 9dbfe30..3370360 100644
--- a/internal/translatorcliproxy/bridge_test.go
+++ b/internal/translatorcliproxy/bridge_test.go
@@ -16,6 +16,14 @@ func TestToOpenAIClaude(t *testing.T) {
 	}
 }
 
+func TestToOpenAIGeminiThinkingBudgetZeroDisablesReasoning(t *testing.T) {
+	raw := []byte(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}],"generationConfig":{"thinkingConfig":{"thinkingBudget":0}}}`)
+	got := string(ToOpenAI(sdktranslator.FormatGemini, "gemini-2.5-flash", raw, false))
+	if !strings.Contains(got, `"reasoning_effort":"none"`) {
+		t.Fatalf("expected Gemini thinkingBudget=0 to translate to reasoning_effort none, got: %s", got)
+	}
+}
+
 func TestFromOpenAINonStreamClaude(t *testing.T) {
 	original := []byte(`{"model":"claude-sonnet-4-5","messages":[{"role":"user","content":"hi"}],"stream":false}`)
 	translatedReq := []byte(`{"model":"claude-sonnet-4-5","messages":[{"role":"user","content":"hi"}],"stream":false}`)
diff --git a/internal/util/messages.go b/internal/util/messages.go
index b6920c0..3a43f24 100644
--- a/internal/util/messages.go
+++ b/internal/util/messages.go
@@ -6,7 +6,7 @@ import (
 	"ds2api/internal/prompt"
 )
 
-const ClaudeDefaultModel = "claude-sonnet-4-5"
+const ClaudeDefaultModel = "claude-sonnet-4-6"
 
 type Message struct {
 	Role    string `json:"role"`
diff --git a/internal/util/messages_test.go b/internal/util/messages_test.go
index e7fd822..077e903 100644
--- a/internal/util/messages_test.go
+++ b/internal/util/messages_test.go
@@ -104,6 +104,18 @@ func TestConvertClaudeToDeepSeek(t *testing.T) {
 	}
 }
 
+func TestConvertClaudeToDeepSeekUsesGlobalAliasResolution(t *testing.T) {
+	store := config.LoadStore()
+	req := map[string]any{
+		"model":    "claude-3-5-sonnet-latest",
+		"messages": []any{map[string]any{"role": "user", "content": "Hi"}},
+	}
+	out := ConvertClaudeToDeepSeek(req, store)
+	if out["model"] != "deepseek-v4-flash" {
+		t.Fatalf("expected global alias resolution, got model=%q", out["model"])
+	}
+}
+
 func contains(s, sub string) bool {
 	return len(s) >= len(sub) && (s == sub || len(sub) == 0 || (len(s) > 0 && (indexOf(s, sub) >= 0)))
 }
diff --git a/internal/util/thinking.go b/internal/util/thinking.go
new file mode 100644
index 0000000..6fa101c
--- /dev/null
+++ b/internal/util/thinking.go
@@ -0,0 +1,92 @@
+package util
+
+import "strings"
+
+func ResolveThinkingEnabled(req map[string]any, defaultEnabled bool) bool {
+	if enabled, ok := ResolveThinkingOverride(req); ok {
+		return enabled
+	}
+	return defaultEnabled
+}
+
+func ResolveThinkingOverride(req map[string]any) (bool, bool) {
+	if req == nil {
+		return false, false
+	}
+	if enabled, ok := parseThinkingSetting(req["thinking"]); ok {
+		return enabled, true
+	}
+	if enabled, ok := parseReasoningSetting(req["reasoning"]); ok {
+		return enabled, true
+	}
+	if extraBody, ok := req["extra_body"].(map[string]any); ok {
+		if enabled, ok := parseThinkingSetting(extraBody["thinking"]); ok {
+			return enabled, true
+		}
+		if enabled, ok := parseReasoningSetting(extraBody["reasoning"]); ok {
+			return enabled, true
+		}
+		if enabled, ok := parseReasoningEffort(extraBody["reasoning_effort"]); ok {
+			return enabled, true
+		}
+	}
+	if enabled, ok := parseReasoningEffort(req["reasoning_effort"]); ok {
+		return enabled, true
+	}
+	return false, false
+}
+
+func parseThinkingSetting(raw any) (bool, bool) {
+	switch v := raw.(type) {
+	case bool:
+		return v, true
+	case string:
+		switch strings.ToLower(strings.TrimSpace(v)) {
+		case "enabled", "enable", "on", "true":
+			return true, true
+		case "disabled", "disable", "off", "false", "none":
+			return false, true
+		default:
+			return false, false
+		}
+	case map[string]any:
+		if typ, ok := v["type"]; ok {
+			return parseThinkingSetting(typ)
+		}
+	}
+	return false, false
+}
+
+func parseReasoningSetting(raw any) (bool, bool) {
+	switch v := raw.(type) {
+	case bool:
+		return v, true
+	case string:
+		return parseReasoningEffort(v)
+	case map[string]any:
+		for _, key := range []string{"effort", "type", "enabled"} {
+			if enabled, ok := parseReasoningSetting(v[key]); ok {
+				return enabled, true
+			}
+		}
+	}
+	return false, false
+}
+
+func parseReasoningEffort(raw any) (bool, bool) {
+	switch strings.ToLower(strings.TrimSpace(toString(raw))) {
+	case "minimal", "low", "medium", "high", "xhigh":
+		return true, true
+	case "none", "disabled", "disable", "off", "false":
+		return false, true
+	default:
+		return false, false
+	}
+}
+
+func toString(raw any) string {
+	if s, ok := raw.(string); ok {
+		return s
+	}
+	return ""
+}
diff --git a/internal/util/thinking_test.go b/internal/util/thinking_test.go
new file mode 100644
index 0000000..003fb5b
--- /dev/null
+++ b/internal/util/thinking_test.go
@@ -0,0 +1,55 @@
+package util
+
+import "testing"
+
+func TestResolveThinkingEnabledPriority(t *testing.T) {
+	req := map[string]any{
+		"thinking": map[string]any{"type": "disabled"},
+		"extra_body": map[string]any{
+			"thinking": map[string]any{"type": "enabled"},
+		},
+		"reasoning_effort": "high",
+	}
+	if got := ResolveThinkingEnabled(req, true); got {
+		t.Fatalf("expected top-level thinking to win, got enabled=%v", got)
+	}
+}
+
+func TestResolveThinkingEnabledUsesExtraBodyFallback(t *testing.T) {
+	req := map[string]any{
+		"extra_body": map[string]any{
+			"thinking": map[string]any{"type": "disabled"},
+		},
+	}
+	if got := ResolveThinkingEnabled(req, true); got {
+		t.Fatalf("expected extra_body thinking to disable, got enabled=%v", got)
+	}
+}
+
+func TestResolveThinkingEnabledMapsReasoningEffortToEnabled(t *testing.T) {
+	for _, effort := range []string{"minimal", "low", "medium", "high", "xhigh"} {
+		if got := ResolveThinkingEnabled(map[string]any{"reasoning_effort": effort}, false); !got {
+			t.Fatalf("expected reasoning_effort=%s to enable thinking", effort)
+		}
+	}
+}
+
+func TestResolveThinkingEnabledMapsReasoningObject(t *testing.T) {
+	req := map[string]any{"reasoning": map[string]any{"effort": "none"}}
+	if got := ResolveThinkingEnabled(req, true); got {
+		t.Fatalf("expected reasoning.effort=none to disable thinking")
+	}
+	req = map[string]any{"reasoning": map[string]any{"effort": "medium"}}
+	if got := ResolveThinkingEnabled(req, false); !got {
+		t.Fatalf("expected reasoning.effort=medium to enable thinking")
+	}
+}
+
+func TestResolveThinkingEnabledDefaultsWhenUnset(t *testing.T) {
+	if !ResolveThinkingEnabled(nil, true) {
+		t.Fatal("expected default thinking=true when unset")
+	}
+	if ResolveThinkingEnabled(nil, false) {
+		t.Fatal("expected default thinking=false when unset")
+	}
+}
diff --git a/internal/util/util_edge_test.go b/internal/util/util_edge_test.go
index e7bfef8..6084d9c 100644
--- a/internal/util/util_edge_test.go
+++ b/internal/util/util_edge_test.go
@@ -348,15 +348,27 @@ func TestConvertClaudeToDeepSeekNoSystem(t *testing.T) {
 	}
 }
 
-func TestConvertClaudeToDeepSeekOpusUsesSlowMapping(t *testing.T) {
-	t.Setenv("DS2API_CONFIG_JSON", `{"keys":[],"accounts":[],"claude_mapping":{"fast":"deepseek-chat","slow":"deepseek-reasoner"}}`)
+func TestConvertClaudeToDeepSeekOpusUsesGlobalAlias(t *testing.T) {
 	store := config.LoadStore()
 	req := map[string]any{
 		"model":    "claude-opus-4-6",
 		"messages": []any{map[string]any{"role": "user", "content": "Hi"}},
 	}
 	out := ConvertClaudeToDeepSeek(req, store)
-	if out["model"] != "deepseek-reasoner" {
-		t.Fatalf("expected opus to use slow mapping, got %q", out["model"])
+	if out["model"] != "deepseek-v4-pro" {
+		t.Fatalf("expected opus to use global alias, got %q", out["model"])
+	}
+}
+
+func TestConvertClaudeToDeepSeekUsesExplicitModelAlias(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{"keys":[],"accounts":[],"model_aliases":{"claude-sonnet-4-6":"deepseek-v4-pro-search"}}`)
+	store := config.LoadStore()
+	req := map[string]any{
+		"model":    "claude-sonnet-4-6",
+		"messages": []any{map[string]any{"role": "user", "content": "Hi"}},
+	}
+	out := ConvertClaudeToDeepSeek(req, store)
+	if out["model"] != "deepseek-v4-pro-search" {
+		t.Fatalf("expected explicit alias override, got %q", out["model"])
 	}
 }
diff --git a/plans/refactor-line-gate-targets.txt b/plans/refactor-line-gate-targets.txt
index e144b75..9cdbcbb 100644
--- a/plans/refactor-line-gate-targets.txt
+++ b/plans/refactor-line-gate-targets.txt
@@ -13,70 +13,70 @@ internal/config/store_index.go
 internal/config/store_accessors.go
 internal/config/account.go
 
-internal/admin/handler_config_read.go
-internal/admin/handler_config_write.go
-internal/admin/handler_config_import.go
-internal/admin/handler_settings_read.go
-internal/admin/handler_settings_write.go
-internal/admin/handler_settings_parse.go
-internal/admin/handler_settings_runtime.go
-internal/admin/handler_accounts_crud.go
-internal/admin/handler_accounts_testing.go
-internal/admin/handler_accounts_queue.go
+internal/httpapi/admin/configmgmt/handler_config_read.go
+internal/httpapi/admin/configmgmt/handler_config_write.go
+internal/httpapi/admin/configmgmt/handler_config_import.go
+internal/httpapi/admin/settings/handler_settings_read.go
+internal/httpapi/admin/settings/handler_settings_write.go
+internal/httpapi/admin/settings/handler_settings_parse.go
+internal/httpapi/admin/settings/handler_settings_runtime.go
+internal/httpapi/admin/accounts/handler_accounts_crud.go
+internal/httpapi/admin/accounts/handler_accounts_testing.go
+internal/httpapi/admin/accounts/handler_accounts_queue.go
 
 internal/account/pool_core.go
 internal/account/pool_acquire.go
 internal/account/pool_waiters.go
 internal/account/pool_limits.go
 
-internal/deepseek/client_core.go
-internal/deepseek/client_auth.go
-internal/deepseek/client_completion.go
-internal/deepseek/client_http_json.go
-internal/deepseek/client_http_helpers.go
+internal/deepseek/client/client_core.go
+internal/deepseek/client/client_auth.go
+internal/deepseek/client/client_completion.go
+internal/deepseek/client/client_http_json.go
+internal/deepseek/client/client_http_helpers.go
 
 internal/format/openai/render_chat.go
 internal/format/openai/render_responses.go
 internal/format/openai/render_stream_events.go
 internal/format/openai/render_usage.go
 
-internal/adapter/openai/handler_routes.go
-internal/adapter/openai/handler_chat.go
-internal/adapter/openai/handler_errors.go
-internal/adapter/openai/handler_toolcall_policy.go
-internal/adapter/openai/handler_toolcall_format.go
-internal/adapter/openai/responses_handler.go
-internal/adapter/openai/responses_input_normalize.go
-internal/adapter/openai/responses_input_items.go
-internal/adapter/openai/responses_stream_runtime_core.go
-internal/adapter/openai/responses_stream_runtime_events.go
-internal/adapter/openai/responses_stream_runtime_toolcalls.go
-internal/adapter/openai/tool_sieve_state.go
-internal/adapter/openai/tool_sieve_core.go
-internal/adapter/openai/tool_sieve_xml.go
-internal/adapter/openai/tool_sieve_jsonscan.go
+internal/httpapi/openai/shared/models.go
+internal/httpapi/openai/chat/handler_chat.go
+internal/httpapi/openai/shared/handler_errors.go
+internal/httpapi/openai/shared/handler_toolcall_policy.go
+internal/httpapi/openai/shared/handler_toolcall_format.go
+internal/httpapi/openai/responses/responses_handler.go
+internal/promptcompat/responses_input_normalize.go
+internal/promptcompat/responses_input_items.go
+internal/httpapi/openai/responses/responses_stream_runtime_core.go
+internal/httpapi/openai/responses/responses_stream_runtime_events.go
+internal/httpapi/openai/responses/responses_stream_runtime_toolcalls.go
+internal/toolstream/tool_sieve_state.go
+internal/toolstream/tool_sieve_core.go
+internal/toolstream/tool_sieve_xml.go
+internal/toolstream/tool_sieve_jsonscan.go
 
 internal/toolcall/toolcalls_parse.go
 internal/toolcall/toolcalls_candidates.go
 internal/toolcall/toolcalls_format.go
 
-internal/adapter/claude/handler_routes.go
-internal/adapter/claude/handler_messages.go
-internal/adapter/claude/handler_tokens.go
-internal/adapter/claude/handler_errors.go
-internal/adapter/claude/handler_utils.go
-internal/adapter/claude/stream_runtime_core.go
-internal/adapter/claude/stream_runtime_emit.go
-internal/adapter/claude/stream_runtime_finalize.go
+internal/httpapi/claude/handler_routes.go
+internal/httpapi/claude/handler_messages.go
+internal/httpapi/claude/handler_tokens.go
+internal/httpapi/claude/handler_errors.go
+internal/httpapi/claude/handler_utils.go
+internal/httpapi/claude/stream_runtime_core.go
+internal/httpapi/claude/stream_runtime_emit.go
+internal/httpapi/claude/stream_runtime_finalize.go
 
-internal/adapter/gemini/handler_routes.go
-internal/adapter/gemini/handler_generate.go
-internal/adapter/gemini/handler_stream_runtime.go
-internal/adapter/gemini/handler_errors.go
-internal/adapter/gemini/convert_request.go
-internal/adapter/gemini/convert_messages.go
-internal/adapter/gemini/convert_tools.go
-internal/adapter/gemini/convert_passthrough.go
+internal/httpapi/gemini/handler_routes.go
+internal/httpapi/gemini/handler_generate.go
+internal/httpapi/gemini/handler_stream_runtime.go
+internal/httpapi/gemini/handler_errors.go
+internal/httpapi/gemini/convert_request.go
+internal/httpapi/gemini/convert_messages.go
+internal/httpapi/gemini/convert_tools.go
+internal/httpapi/gemini/convert_passthrough.go
 
 internal/testsuite/runner_core.go
 internal/testsuite/runner_env.go
diff --git a/pow/README.md b/pow/README.md
index 7467fd2..85c74e1 100644
--- a/pow/README.md
+++ b/pow/README.md
@@ -1,6 +1,6 @@
 # DeepSeek PoW 纯算实现
 
-替代 `internal/deepseek/assets/sha3_wasm_bg.*.wasm` + wazero 运行时。
+当前服务端 PoW 已走纯 Go 实现：`internal/deepseek/pow.go` 负责从上游 challenge map 中取字段，调用 `ds2api/pow` 求解 nonce，并组装 `x-ds-pow-response` header。
 
 ## 算法
 
@@ -17,48 +17,14 @@ hash   = DeepSeekHashV1(input)      → 32 bytes
 header = base64(json({algorithm, challenge, salt, answer, signature, target_path}))
 ```
 
-## 性能 (Apple M4, Go 1.25)
+## 主要入口
 
-```
-BenchmarkHash    187.5 ns/op    0 alloc    → 5.33M hash/s
-BenchmarkSolve   13.4 ms/op    2 alloc    → 75 道/秒/核 (difficulty=144000)
-```
-
-对比 wazero 调 WASM: hash 快 **5×**, solve 快 **2.8×**。
+- `pow/deepseek_hash.go`：DeepSeekHashV1 / Keccak-f[1600] rounds 1..23。
+- `pow/deepseek_pow.go`：`SolvePow`、`BuildPowHeader`、`SolveAndBuildHeader`。
+- `internal/deepseek/pow.go`：服务侧适配层，校验 `algorithm == DeepSeekHashV1` 并调用 `pow.SolvePow`。
 
 ## 测试
 
 ```bash
 cd pow && go test -v ./... && go test -bench=. -benchmem
 ```
-
-## 替换 WASM
-
-替换 `internal/deepseek/pow.go` 中 `PowSolver.Compute`:
-
-```go
-// 原: 调 wasm_solve(retptr, chPtr, chLen, prefixPtr, prefixLen, difficulty)
-// 新:
-import "ds2api/pow"
-
-func (c *Client) GetPow(ctx context.Context, a *auth.RequestAuth, ...) (string, error) {
-    // ... 省略 token/retry 逻辑,只改 compute 部分 ...
-    challenge, _ := bizData["challenge"].(map[string]any)
-    ch := &pow.Challenge{
-        Algorithm:  challenge["algorithm"].(string),
-        Challenge:  challenge["challenge"].(string),
-        Salt:       challenge["salt"].(string),
-        ExpireAt:   int64(challenge["expire_at"].(float64)),
-        Difficulty: int64(challenge["difficulty"].(float64)),
-        Signature:  challenge["signature"].(string),
-        TargetPath: challenge["target_path"].(string),
-    }
-    return pow.SolveAndBuildHeader(ch)
-}
-```
-
-可删除:
-- `internal/deepseek/assets/sha3_wasm_bg.*.wasm`
-- `internal/deepseek/embedded_pow.go`
-- `internal/deepseek/pow.go` 中 `PowSolver` 结构体、wazero 相关池化代码
-- `go.mod` 中 `github.com/tetratelabs/wazero` 依赖
diff --git a/scripts/lint.sh b/scripts/lint.sh
index cf8e14a..32eea6a 100755
--- a/scripts/lint.sh
+++ b/scripts/lint.sh
@@ -8,6 +8,10 @@ LINT_BIN="${GOLANGCI_LINT_BIN:-golangci-lint}"
 BOOTSTRAP_VERSION="${GOLANGCI_LINT_VERSION:-v2.11.4}"
 BOOTSTRAP_BIN="${ROOT_DIR}/.tmp/golangci-lint-${BOOTSTRAP_VERSION}"
 
+export GOCACHE="${GOCACHE:-${ROOT_DIR}/.tmp/go-build-cache}"
+export GOLANGCI_LINT_CACHE="${GOLANGCI_LINT_CACHE:-${ROOT_DIR}/.tmp/golangci-lint-cache}"
+mkdir -p "$GOCACHE" "$GOLANGCI_LINT_CACHE"
+
 bootstrap_golangci_lint() {
   local version_no_v os arch artifact archive_url tmp_dir
   version_no_v="${BOOTSTRAP_VERSION#v}"
@@ -49,9 +53,9 @@ bootstrap_golangci_lint() {
 run_lint() {
   local bin="$1"
   if [[ "$bin" == *" "* ]]; then
-    eval "$bin fmt --diff -c .golangci.yml" && eval "$bin run -c .golangci.yml"
+    eval "$bin fmt --diff -c .golangci.yml" && eval "$bin run -c .golangci.yml ./..."
   else
-    "$bin" fmt --diff -c .golangci.yml && "$bin" run -c .golangci.yml
+    "$bin" fmt --diff -c .golangci.yml && "$bin" run -c .golangci.yml ./...
   fi
 }
 
diff --git a/tests/compat/expected/toolcalls_xml_tool_name_parameters_json.json b/tests/compat/expected/toolcalls_canonical_nested_param.json
similarity index 100%
rename from tests/compat/expected/toolcalls_xml_tool_name_parameters_json.json
rename to tests/compat/expected/toolcalls_canonical_nested_param.json
diff --git a/tests/compat/expected/toolcalls_xml_tool_call.json b/tests/compat/expected/toolcalls_canonical_tool_call.json
similarity index 98%
rename from tests/compat/expected/toolcalls_xml_tool_call.json
rename to tests/compat/expected/toolcalls_canonical_tool_call.json
index 5bcd9ce..124de59 100644
--- a/tests/compat/expected/toolcalls_xml_tool_call.json
+++ b/tests/compat/expected/toolcalls_canonical_tool_call.json
@@ -10,4 +10,4 @@
   "sawToolCallSyntax": true,
   "rejectedByPolicy": false,
   "rejectedToolNames": []
-}
\ No newline at end of file
+}
diff --git a/tests/compat/expected/toolcalls_function_call_tag.json b/tests/compat/expected/toolcalls_function_call_tag.json
deleted file mode 100644
index 5bcd9ce..0000000
--- a/tests/compat/expected/toolcalls_function_call_tag.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-  "calls": [
-    {
-      "name": "read_file",
-      "input": {
-        "path": "README.MD"
-      }
-    }
-  ],
-  "sawToolCallSyntax": true,
-  "rejectedByPolicy": false,
-  "rejectedToolNames": []
-}
\ No newline at end of file
diff --git a/tests/compat/expected/toolcalls_invoke_attr.json b/tests/compat/expected/toolcalls_invoke_attr.json
deleted file mode 100644
index 5bcd9ce..0000000
--- a/tests/compat/expected/toolcalls_invoke_attr.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-  "calls": [
-    {
-      "name": "read_file",
-      "input": {
-        "path": "README.MD"
-      }
-    }
-  ],
-  "sawToolCallSyntax": true,
-  "rejectedByPolicy": false,
-  "rejectedToolNames": []
-}
\ No newline at end of file
diff --git a/tests/compat/fixtures/toolcalls/canonical_nested_param.json b/tests/compat/fixtures/toolcalls/canonical_nested_param.json
new file mode 100644
index 0000000..5dd0f9b
--- /dev/null
+++ b/tests/compat/fixtures/toolcalls/canonical_nested_param.json
@@ -0,0 +1,6 @@
+{
+  "text": "<tool_calls><invoke name=\"get_weather\"><parameter name=\"city\"><![CDATA[beijing]]></parameter><parameter name=\"unit\"><![CDATA[c]]></parameter></invoke></tool_calls>",
+  "tool_names": [
+    "get_weather"
+  ]
+}
diff --git a/tests/compat/fixtures/toolcalls/canonical_tool_call.json b/tests/compat/fixtures/toolcalls/canonical_tool_call.json
new file mode 100644
index 0000000..6d80e9b
--- /dev/null
+++ b/tests/compat/fixtures/toolcalls/canonical_tool_call.json
@@ -0,0 +1,6 @@
+{
+  "text": "<tool_calls><invoke name=\"read_file\"><parameter name=\"path\">README.MD</parameter></invoke></tool_calls>",
+  "tool_names": [
+    "read_file"
+  ]
+}
diff --git a/tests/compat/fixtures/toolcalls/function_call_tag.json b/tests/compat/fixtures/toolcalls/function_call_tag.json
deleted file mode 100644
index 0f35956..0000000
--- a/tests/compat/fixtures/toolcalls/function_call_tag.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-  "text": "<function_call><function>read_file</function><parameters>{\"path\":\"README.MD\"}</parameters></function_call>",
-  "tool_names": [
-    "read_file"
-  ]
-}
\ No newline at end of file
diff --git a/tests/compat/fixtures/toolcalls/invoke_attr.json b/tests/compat/fixtures/toolcalls/invoke_attr.json
deleted file mode 100644
index 70c77fc..0000000
--- a/tests/compat/fixtures/toolcalls/invoke_attr.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-  "text": "<invoke name=\"read_file\"><argument>{\"path\":\"README.MD\"}</argument></invoke>",
-  "tool_names": [
-    "read_file"
-  ]
-}
\ No newline at end of file
diff --git a/tests/compat/fixtures/toolcalls/xml_tool_call.json b/tests/compat/fixtures/toolcalls/xml_tool_call.json
deleted file mode 100644
index 279f1a2..0000000
--- a/tests/compat/fixtures/toolcalls/xml_tool_call.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-  "text": "<tool_call><name>read_file</name><arguments>{\"path\":\"README.MD\"}</arguments></tool_call>",
-  "tool_names": [
-    "read_file"
-  ]
-}
\ No newline at end of file
diff --git a/tests/compat/fixtures/toolcalls/xml_tool_name_parameters_json.json b/tests/compat/fixtures/toolcalls/xml_tool_name_parameters_json.json
deleted file mode 100644
index 6ccd51e..0000000
--- a/tests/compat/fixtures/toolcalls/xml_tool_name_parameters_json.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-  "text": "<tool_call><tool_name>get_weather</tool_name><parameters>{\"city\":\"beijing\",\"unit\":\"c\"}</parameters></tool_call>",
-  "tool_names": [
-    "get_weather"
-  ]
-}
diff --git a/tests/node/chat-stream.test.js b/tests/node/chat-stream.test.js
index 4f78374..50e94ee 100644
--- a/tests/node/chat-stream.test.js
+++ b/tests/node/chat-stream.test.js
@@ -2,13 +2,18 @@
 
 const test = require('node:test');
 const assert = require('node:assert/strict');
+const { EventEmitter } = require('node:events');
 
 const handler = require('../../api/chat-stream.js');
+const { handleVercelStream } = require('../../internal/js/chat-stream/vercel_stream.js');
 const {
   createToolSieveState,
   processToolSieveChunk,
   flushToolSieve,
 } = require('../../internal/js/helpers/stream-tool-sieve.js');
+const {
+  setCorsHeaders,
+} = require('../../internal/js/chat-stream/http_internal.js');
 
 const {
   parseChunkForContent,
@@ -26,11 +31,170 @@ const {
   trimContinuationOverlap,
 } = handler.__test;
 
+function createMockResponse() {
+  const headers = new Map();
+  return {
+    setHeader(key, value) {
+      headers.set(String(key).toLowerCase(), value);
+    },
+    getHeader(key) {
+      return headers.get(String(key).toLowerCase());
+    },
+  };
+}
+
+class MockStreamRequest extends EventEmitter {
+  constructor() {
+    super();
+    this.url = '/v1/chat/completions';
+    this.headers = { host: 'example.test', 'content-type': 'application/json' };
+  }
+}
+
+class MockStreamResponse extends EventEmitter {
+  constructor() {
+    super();
+    this.headers = new Map();
+    this.statusCode = 0;
+    this.chunks = [];
+    this.writableEnded = false;
+    this.destroyed = false;
+  }
+
+  setHeader(key, value) {
+    this.headers.set(String(key).toLowerCase(), value);
+  }
+
+  getHeader(key) {
+    return this.headers.get(String(key).toLowerCase());
+  }
+
+  write(chunk) {
+    this.chunks.push(Buffer.isBuffer(chunk) ? chunk.toString('utf8') : String(chunk));
+    return true;
+  }
+
+  end(chunk) {
+    if (chunk) {
+      this.write(chunk);
+    }
+    this.writableEnded = true;
+  }
+
+  flushHeaders() {}
+
+  flush() {}
+
+  bodyText() {
+    return this.chunks.join('');
+  }
+}
+
+function jsonResponse(body, status = 200) {
+  return new Response(JSON.stringify(body), {
+    status,
+    headers: { 'content-type': 'application/json' },
+  });
+}
+
+function sseResponse(lines) {
+  const encoder = new TextEncoder();
+  return new Response(new ReadableStream({
+    start(controller) {
+      for (const line of lines) {
+        controller.enqueue(encoder.encode(line));
+      }
+      controller.close();
+    },
+  }), {
+    status: 200,
+    headers: { 'content-type': 'text/event-stream' },
+  });
+}
+
+function parseSSEDataFrames(body) {
+  return body
+    .split('\n\n')
+    .map((frame) => frame.trim())
+    .filter((frame) => frame.startsWith('data:'))
+    .map((frame) => frame.slice(5).trim());
+}
+
+async function runMockVercelStream(upstreamLines, prepareOverrides = {}) {
+  const originalFetch = global.fetch;
+  const fetchURLs = [];
+  const prepareBody = {
+    session_id: 'chatcmpl-test',
+    lease_id: 'lease-test',
+    model: 'gpt-test',
+    final_prompt: 'hello',
+    thinking_enabled: false,
+    search_enabled: false,
+    compat: { strip_reference_markers: true },
+    tool_names: [],
+    deepseek_token: 'deepseek-token',
+    pow_header: 'pow-header',
+    payload: { prompt: 'hello' },
+    ...prepareOverrides,
+  };
+  global.fetch = async (url) => {
+    const textURL = String(url);
+    fetchURLs.push(textURL);
+    if (textURL.includes('__stream_prepare=1')) {
+      return jsonResponse(prepareBody);
+    }
+    if (textURL.includes('__stream_release=1')) {
+      return jsonResponse({ success: true });
+    }
+    return sseResponse(upstreamLines);
+  };
+  try {
+    const req = new MockStreamRequest();
+    const res = new MockStreamResponse();
+    const payload = { model: 'gpt-test', stream: true };
+    await handleVercelStream(req, res, Buffer.from(JSON.stringify(payload)), payload);
+    return { res, frames: parseSSEDataFrames(res.bodyText()), fetchURLs };
+  } finally {
+    global.fetch = originalFetch;
+  }
+}
+
 test('chat-stream exposes parser test hooks', () => {
   assert.equal(typeof parseChunkForContent, 'function');
   assert.equal(typeof resolveToolcallPolicy, 'function');
 });
 
+test('vercel stream emits Go-parity empty-output failure on DONE', async () => {
+  const { frames } = await runMockVercelStream(['data: [DONE]\n\n']);
+  assert.equal(frames.length, 2);
+  const failed = JSON.parse(frames[0]);
+  assert.equal(failed.status_code, 429);
+  assert.equal(failed.error.type, 'rate_limit_error');
+  assert.equal(failed.error.code, 'upstream_empty_output');
+  assert.equal(frames[1], '[DONE]');
+});
+
+test('vercel stream emits content_filter failure when upstream filters empty output', async () => {
+  const { frames } = await runMockVercelStream(['data: {"code":"content_filter"}\n\n']);
+  assert.equal(frames.length, 2);
+  const failed = JSON.parse(frames[0]);
+  assert.equal(failed.status_code, 400);
+  assert.equal(failed.error.type, 'invalid_request_error');
+  assert.equal(failed.error.code, 'content_filter');
+  assert.equal(frames[1], '[DONE]');
+});
+
+test('vercel stream keeps stop finish when content_filter arrives after visible text', async () => {
+  const { frames } = await runMockVercelStream([
+    'data: {"p":"response/content","v":"hello"}\n\n',
+    'data: {"code":"content_filter"}\n\n',
+  ]);
+  const parsed = frames.filter((frame) => frame !== '[DONE]').map((frame) => JSON.parse(frame));
+  assert.equal(parsed[0].choices[0].delta.content, 'hello');
+  assert.equal(parsed[1].choices[0].finish_reason, 'stop');
+  assert.equal(parsed[1].usage.completion_tokens, 1);
+});
+
 test('resolveToolcallPolicy defaults to feature-match + early emit when prepare flags missing', () => {
   const policy = resolveToolcallPolicy(
     {},
@@ -218,6 +382,24 @@ test('parseChunkForContent handles response/fragments APPEND with thinking and r
   ]);
 });
 
+test('parseChunkForContent drops thinking content when thinking is disabled', () => {
+  const thinking = parseChunkForContent(
+    { p: 'response/thinking_content', v: 'hidden thought' },
+    false,
+    'text',
+  );
+  assert.equal(thinking.finished, false);
+  assert.equal(thinking.newType, 'text');
+  assert.deepEqual(thinking.parts, []);
+
+  const answer = parseChunkForContent(
+    { p: 'response/content', v: 'visible answer' },
+    false,
+    thinking.newType,
+  );
+  assert.deepEqual(answer.parts, [{ text: 'visible answer', type: 'text' }]);
+});
+
 test('parseChunkForContent supports wrapped response.fragments object shape', () => {
   const chunk = {
     p: 'response',
@@ -400,6 +582,32 @@ test('extractPathname strips query only', () => {
   assert.equal(extractPathname('/v1beta/models/gemini-2.5-flash:streamGenerateContent?key=1'), '/v1beta/models/gemini-2.5-flash:streamGenerateContent');
 });
 
+test('setCorsHeaders reflects requested third-party headers and blocks internal-only headers', () => {
+  const res = createMockResponse();
+  setCorsHeaders(res, {
+    headers: {
+      origin: 'app://obsidian.md',
+      'access-control-request-headers': 'authorization, x-stainless-os, x-stainless-runtime, x-ds2-internal-token',
+      'access-control-request-private-network': 'true',
+    },
+  });
+
+  assert.equal(res.getHeader('access-control-allow-origin'), 'app://obsidian.md');
+  assert.equal(res.getHeader('access-control-allow-private-network'), 'true');
+  assert.equal(res.getHeader('access-control-max-age'), '600');
+
+  const allowHeaders = String(res.getHeader('access-control-allow-headers') || '').toLowerCase();
+  assert.equal(allowHeaders.includes('authorization'), true);
+  assert.equal(allowHeaders.includes('x-stainless-os'), true);
+  assert.equal(allowHeaders.includes('x-stainless-runtime'), true);
+  assert.equal(allowHeaders.includes('x-ds2-internal-token'), false);
+
+  const vary = String(res.getHeader('vary') || '').toLowerCase();
+  assert.equal(vary.includes('origin'), true);
+  assert.equal(vary.includes('access-control-request-headers'), true);
+  assert.equal(vary.includes('access-control-request-private-network'), true);
+});
+
 test('trimContinuationOverlap preserves short normal tokens and trims long snapshots', () => {
   assert.equal(trimContinuationOverlap('我们被问到', '我们'), '我们');
   const existing = '我们被问到：这是一个很长的续答快照前缀，用来验证去重逻辑不会误伤正常 token。';
diff --git a/tests/node/stream-tool-sieve.test.js b/tests/node/stream-tool-sieve.test.js
index a5f29ac..1e5012a 100644
--- a/tests/node/stream-tool-sieve.test.js
+++ b/tests/node/stream-tool-sieve.test.js
@@ -42,7 +42,7 @@ test('extractToolNames keeps only declared tool names (Go parity)', () => {
 });
 
 test('parseToolCalls parses XML markup tool call', () => {
-  const payload = '<tool_call><tool_name>read_file</tool_name><parameters>{"path":"README.MD"}</parameters></tool_call>';
+  const payload = '<tool_calls><invoke name="read_file"><parameter name="path">README.MD</parameter></invoke></tool_calls>';
   const calls = parseToolCalls(payload, ['read_file']);
   assert.equal(calls.length, 1);
   assert.equal(calls[0].name, 'read_file');
@@ -61,7 +61,7 @@ test('parseToolCalls ignores tool_call payloads that exist only inside fenced co
   const text = [
     'I will call a tool now.',
     '```xml',
-    '<tool_call><tool_name>read_file</tool_name><parameters>{"path":"README.md"}</parameters></tool_call>',
+    '<tool_calls><invoke name="read_file"><parameter name="path">README.md</parameter></invoke></tool_calls>',
     '```',
   ].join('\n');
   const calls = parseToolCalls(text, ['read_file']);
@@ -69,7 +69,7 @@ test('parseToolCalls ignores tool_call payloads that exist only inside fenced co
 });
 
 test('parseToolCalls keeps unknown schema names when toolNames is provided', () => {
-  const payload = '<tool_call><tool_name>not_in_schema</tool_name><parameters>{"q":"go"}</parameters></tool_call>';
+  const payload = '<tool_calls><invoke name="not_in_schema"><parameter name="q">go</parameter></invoke></tool_calls>';
   const calls = parseToolCalls(payload, ['search']);
   assert.equal(calls.length, 1);
   assert.equal(calls[0].name, 'not_in_schema');
@@ -77,7 +77,7 @@ test('parseToolCalls keeps unknown schema names when toolNames is provided', ()
 
 test('sieve emits tool_calls for XML tool call payload', () => {
   const events = runSieve(
-    ['<tool_call><tool_name>read_file</tool_name><parameters>{"path":"README.MD"}</parameters></tool_call>'],
+    ['<tool_calls><invoke name="read_file"><parameter name="path">README.MD</parameter></invoke></tool_calls>'],
     ['read_file'],
   );
   const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
@@ -88,8 +88,8 @@ test('sieve emits tool_calls for XML tool call payload', () => {
 test('sieve emits tool_calls when XML tag spans multiple chunks', () => {
   const events = runSieve(
     [
-      '<tool_call><tool_name>read_file</tool_name>',
-      '<parameters>{"path":"README.MD"}</parameters></tool_call>',
+      '<tool_calls><invoke name="read_file">',
+      '<parameter name="path">README.MD</parameter></invoke></tool_calls>',
     ],
     ['read_file'],
   );
@@ -103,10 +103,10 @@ test('sieve keeps long XML tool calls buffered until the closing tag arrives', (
   const splitAt = longContent.length / 2;
   const events = runSieve(
     [
-      '<tool_calls>\n  <tool_call>\n    <tool_name>write_to_file</tool_name>\n    <parameters>\n      <content><![CDATA[',
+      '<tool_calls>\n  <invoke name="write_to_file">\n    <parameter name="content"><![CDATA[',
       longContent.slice(0, splitAt),
       longContent.slice(splitAt),
-      ']]></content>\n    </parameters>\n  </tool_call>\n</tool_calls>',
+      ']]></parameter>\n  </invoke>\n</tool_calls>',
     ],
     ['write_to_file'],
   );
@@ -147,7 +147,16 @@ test('sieve keeps embedded invalid tool-like json as normal text to avoid stream
 });
 
 test('sieve passes malformed executable-looking XML through as text', () => {
-  const chunk = '<tool_call><parameters>{"path":"README.MD"}</parameters></tool_call>';
+  const chunk = '<tool_calls><invoke name="read_file"><param>{"path":"README.MD"}</param></invoke></tool_calls>';
+  const events = runSieve([chunk], ['read_file']);
+  const leakedText = collectText(events);
+  const hasToolCalls = events.some((evt) => evt.type === 'tool_calls' && evt.calls?.length > 0);
+  assert.equal(hasToolCalls, false);
+  assert.equal(leakedText, chunk);
+});
+
+test('sieve keeps bare tool_call XML as plain text without wrapper', () => {
+  const chunk = '<invoke name="read_file"><parameter name="path">README.MD</parameter></invoke>';
   const events = runSieve([chunk], ['read_file']);
   const leakedText = collectText(events);
   const hasToolCalls = events.some((evt) => evt.type === 'tool_calls' && evt.calls?.length > 0);
@@ -160,13 +169,12 @@ test('sieve flushes incomplete captured XML tool blocks by falling back to raw t
     [
       '前置正文G。',
       '<tool_calls>\n',
-      '  <tool_call>\n',
-      '    <tool_name>read_file</tool_name>\n',
+      '  <invoke name="read_file">\n',
     ],
     ['read_file'],
   );
   const leakedText = collectText(events);
-  const expected = ['前置正文G。', '<tool_calls>\n', '  <tool_call>\n', '    <tool_name>read_file</tool_name>\n'].join('');
+  const expected = ['前置正文G。', '<tool_calls>\n', '  <invoke name="read_file">\n'].join('');
   const hasToolCalls = events.some((evt) => evt.type === 'tool_calls' && evt.calls?.length > 0);
   assert.equal(hasToolCalls, false);
   assert.equal(leakedText, expected);
@@ -176,7 +184,7 @@ test('sieve captures XML wrapper tags with attributes without leaking wrapper te
   const events = runSieve(
     [
       '前置正文H。',
-      '<tool_calls id="x"><tool_call><tool_name>read_file</tool_name><parameters>{"path":"README.MD"}</parameters></tool_call></tool_calls>',
+      '<tool_calls id="x"><invoke name="read_file"><parameter name="path">README.MD</parameter></invoke></tool_calls>',
       '后置正文I。',
     ],
     ['read_file'],
@@ -270,7 +278,7 @@ test('formatOpenAIStreamToolCalls reuses ids with the same idStore', () => {
 });
 
 test('parseToolCalls rejects mismatched markup tags', () => {
-  const payload = '<tool_call><name>read_file</function><arguments>{"path":"README.md"}</arguments></tool_call>';
+  const payload = '<tool_calls><invoke name="read_file"><parameter name="path">README.md</function></invoke></tool_calls>';
   const calls = parseToolCalls(payload, ['read_file']);
   assert.equal(calls.length, 0);
 });
diff --git a/tests/raw_stream_samples/README.md b/tests/raw_stream_samples/README.md
index 9b2957e..5f89183 100644
--- a/tests/raw_stream_samples/README.md
+++ b/tests/raw_stream_samples/README.md
@@ -87,7 +87,7 @@ for d in tests/raw_stream_samples/*; do
 done
 ```
 
-回放输出会显示 `tokens=<parsed>/<expected>`，并在不一致时判定失败；`report.json` 中也会包含：
+回放输出会显示 `tokens=<parsed>/<expected>`；默认只记录 token 差异，不因 token 不一致失败。如需把 token 差异作为失败条件，给模拟器增加 `--fail-on-token-mismatch`。`report.json` 中也会包含：
 
 - `raw_expected_output_tokens`
 - `raw_parsed_output_tokens`
diff --git a/tests/raw_stream_samples/content-filter-trigger-20260405-jwt3/meta.json b/tests/raw_stream_samples/content-filter-trigger-20260405-jwt3/meta.json
index 59bde11..7719a71 100644
--- a/tests/raw_stream_samples/content-filter-trigger-20260405-jwt3/meta.json
+++ b/tests/raw_stream_samples/content-filter-trigger-20260405-jwt3/meta.json
@@ -2,7 +2,7 @@
   "sample_id": "content-filter-trigger-20260405-jwt3",
   "captured_at_utc": "2026-04-04T16:28:52Z",
   "request": {
-    "model": "deepseek-reasoner-search",
+    "model": "deepseek-v4-pro-search",
     "stream": true,
     "messages": [
       {
diff --git a/tests/raw_stream_samples/continue-thinking-snapshot-replay-20260405/meta.json b/tests/raw_stream_samples/continue-thinking-snapshot-replay-20260405/meta.json
index e878836..02d9cd4 100644
--- a/tests/raw_stream_samples/continue-thinking-snapshot-replay-20260405/meta.json
+++ b/tests/raw_stream_samples/continue-thinking-snapshot-replay-20260405/meta.json
@@ -5,7 +5,7 @@
   "request": {
     "chat_session_id": "0a3c904d-5761-4cf0-ae51-9b41c1c78f1e",
     "parent_message_id": null,
-    "prompt": "<｜System｜>\n**Memories**\nThese are memories stored via the memory_tool that you can reference in future conversations.\n[]\n\n\n**Recent Chats**\nThese are some of the user's recent conversations. You can use them to understand user preferences:\n[\n    {\n        \"title\": \"\",\n        \"last_chat\": \"2026年4月6日\"\n    },\n    {\n        \"title\": \"\",\n        \"last_chat\": \"2026年4月6日\"\n    },\n    {\n        \"title\": \"江青判刑原因\",\n        \"last_chat\": \"2026年4月5日\"\n    },\n    {\n        \"title\": \"GitHub個人檔案\",\n        \"last_chat\": \"2026年4月4日\"\n    },\n    {\n        \"title\": \"DS2API架構圖\",\n        \"last_chat\": \"2026年4月4日\"\n    },\n    {\n        \"title\": \"Markdown範例\",\n        \"last_chat\": \"2026年4月4日\"\n    },\n    {\n        \"title\": \"廣州天氣概況\",\n        \"last_chat\": \"2026年4月4日\"\n    },\n    {\n        \"title\": \"Xbox手把SVG\",\n        \"last_chat\": \"2026年4月4日\"\n    },\n    {\n        \"title\": \"清除记忆\",\n        \"last_chat\": \"2026年4月4日\"\n    },\n    {\n        \"title\": \"SVG與安卓XML示例\",\n        \"last_chat\": \"2026年4月4日\"\n    }\n]\n\n\n\n\n\n\n\n\n\n\nYou have access to these tools:\n\nTool: memory_tool\nDescription: The memory tool stores long-term information across conversations.\nUse `action` to control the operation: `create` (add), `edit` (update), `delete` (remove).\n- No relevant record: `create` + `content`\n- Existing relevant record: `edit` + `id` + `content`\n- Outdated/irrelevant record: `delete` + `id`\nMemories will automatically appear in the <memories> tag in later conversations.\nDo not store sensitive information (e.g., ethnicity, religion, sexual orientation, political views, sex life, criminal records).\nYou may store: preferred name, preferences, plans, work-related notes, chat style preferences, first chat time, etc.\nDo not show memory content directly in the conversation unless the user explicitly asks.\nToday is 2026年4月6日.\nSimilar memories should be merged; prefer updating existing records.\n\nExamples:\n{\"action\":\"create\",\"content\":\"User prefers brief replies and is more active on weekends.\"}\n{\"action\":\"edit\",\"id\":12,\"content\":\"User’s preferred name updated to “A-Xing”, prefers Chinese replies.\"}\n{\"action\":\"delete\",\"id\":7}\nParameters: {\"properties\":{\"action\":{\"description\":\"Operation to perform: create, edit, or delete\",\"enum\":[\"create\",\"edit\",\"delete\"],\"type\":\"string\"},\"content\":{\"description\":\"The content of the memory record (required for create/edit)\",\"type\":\"string\"},\"id\":{\"description\":\"The id of the memory record (required for edit/delete)\",\"type\":\"integer\"}},\"required\":[\"action\"],\"type\":\"object\"}\n\nTool: search_web\nDescription: Search the web for up-to-date or specific information.\nUse this when the user asks for the latest news, current facts, or needs verification.\nGenerate focused keywords and run multiple searches if needed.\nToday is 2026年4月6日.\n\nResponse format:\n- items[].id (short id), title, url, text\n\nCitations:\n- After using results, add `[citation,domain](id)` after the sentence.\n- Multiple citations are allowed.\n- If no results are cited, omit citations.\n\nExample:\nThe capital of France is Paris. [citation,example.com](abc123)\nThe population is about 2.1 million. [citation,example.com](abc123) [citation,example2.com](def456)\nParameters: {\"properties\":{\"query\":{\"description\":\"search keyword\",\"type\":\"string\"},\"topic\":{\"description\":\"search topic (one of `general`, `news`, `finance`)\",\"enum\":[\"general\",\"news\",\"finance\"],\"type\":\"string\"}},\"required\":[\"query\"],\"type\":\"object\"}\n\nTool: scrape_web\nDescription: Scrape a URL for detailed page content.\nUse this when the user requests content from a specific page or when search snippets are insufficient.\nAvoid using it for common questions unless the user asks.\nParameters: {\"properties\":{\"url\":{\"description\":\"url to scrape\",\"type\":\"string\"}},\"required\":[\"url\"],\"type\":\"object\"}\n\nTool: eval_javascript\nDescription: Execute JavaScript code using QuickJS engine (ES2020). The result is the value of the last expression in the code. For calculations with decimals, use toFixed() to control precision. Console output (log/info/warn/error) is captured and returned in 'logs' field. No DOM or Node.js APIs available. Example: '1 + 2' returns 3; 'const x = 5; x * 2' returns 10.\nParameters: {\"properties\":{\"code\":{\"description\":\"The JavaScript code to execute\",\"type\":\"string\"}},\"required\":[\"code\"],\"type\":\"object\"}\n\nTool: get_time_info\nDescription: Get the current local date and time info from the device. Returns year/month/day, weekday, ISO date/time strings, timezone, and timestamp.\nParameters: {\"properties\":{},\"type\":\"object\"}\n\nTool: clipboard_tool\nDescription: Read or write plain text from the device clipboard. Use action: read or write. For write, provide text. Do NOT write to the clipboard unless the user has explicitly requested it.\nParameters: {\"properties\":{\"action\":{\"description\":\"Operation to perform: read or write\",\"enum\":[\"read\",\"write\"],\"type\":\"string\"},\"text\":{\"description\":\"Text to write to the clipboard (required for write)\",\"type\":\"string\"}},\"required\":[\"action\"],\"type\":\"object\"}\n\nTool: text_to_speech\nDescription: Speak text aloud to the user using the device's text-to-speech engine. Use this when the user asks you to read something aloud, or when audio output is appropriate. The tool returns immediately; audio plays in the background on the device. Provide natural, readable text without markdown formatting.\nParameters: {\"properties\":{\"text\":{\"description\":\"The text to speak aloud\",\"type\":\"string\"}},\"required\":[\"text\"],\"type\":\"object\"}\n\nTool: ask_user\nDescription: Ask the user one or more questions when you need clarification, additional information, or confirmation. Each question can optionally provide a list of suggested options for the user to choose from. The user may select an option or provide their own free-text answer for each question. The answers will be returned as a JSON object mapping question IDs to the user's responses.\nParameters: {\"properties\":{\"questions\":{\"description\":\"List of questions to ask the user\",\"items\":{\"properties\":{\"id\":{\"description\":\"Unique identifier for this question\",\"type\":\"string\"},\"options\":{\"description\":\"Optional list of suggested options for the user to choose from\",\"items\":{\"type\":\"string\"},\"type\":\"array\"},\"question\":{\"description\":\"The question text to display to the user\",\"type\":\"string\"},\"selection_type\":{\"description\":\"Answer type: text (free text input, default), single (select exactly one option), multi (select one or more options)\",\"enum\":[\"text\",\"single\",\"multi\"],\"type\":\"string\"}},\"required\":[\"id\",\"question\"],\"type\":\"object\"},\"type\":\"array\"}},\"required\":[\"questions\"],\"type\":\"object\"}\n\nTOOL CALL FORMAT — FOLLOW EXACTLY:\n\nWhen calling tools, emit ONLY raw XML at the very end of your response. No text before, no text after, no markdown fences.\n\n<tool_calls>\n  <tool_call>\n    <tool_name>TOOL_NAME_HERE</tool_name>\n    <parameters>{\"key\":\"value\"}</parameters>\n  </tool_call>\n</tool_calls>\n\nRULES:\n1) Output ONLY the XML above when calling tools. Do NOT mix tool XML with regular text.\n2) <parameters> MUST contain a strict JSON object. All JSON keys and strings use double quotes.\n3) Multiple tools → multiple <tool_call> blocks inside ONE <tool_calls> root.\n4) Do NOT wrap the XML in markdown code fences (no triple backticks).\n5) After receiving a tool result, use it directly. Only call another tool if the result is insufficient.\n6) Parameters MUST use the exact field names from the selected tool schema.\n7) CRITICAL: Do NOT invent or add any extra fields (such as \"_raw\", \"_xml\"). Use ONLY the fields strictly defined in the schema. Extra fields will cause execution failure.\n\n❌ WRONG — Do NOT do these:\nWrong 1 — mixed text and XML:\n  I'll read the file for you. <tool_calls><tool_call>...\nWrong 2 — describing tool calls in text:\n  [调用 Bash] {\"command\": \"ls\"}\nWrong 3 — missing <tool_calls> wrapper:\n  <tool_call><tool_name>read_file</tool_name><parameters>{}</parameters></tool_call>\nWrong 4 — extra/invented fields:\n  <parameters>{\"_raw\": \"...\", \"command\": \"ls\"}</parameters>\n\n\n✅ CORRECT EXAMPLES:\n\nExample A — Single tool:\n<tool_calls>\n  <tool_call>\n    <tool_name>read_file</tool_name>\n    <parameters>{\"path\":\"src/main.go\"}</parameters>\n  </tool_call>\n</tool_calls>\n\nExample B — Two tools in parallel:\n<tool_calls>\n  <tool_call>\n    <tool_name>read_file</tool_name>\n    <parameters>{\"path\":\"src/main.go\"}</parameters>\n  </tool_call>\n  <tool_call>\n    <tool_name>write_to_file</tool_name>\n    <parameters>{\"path\":\"output.txt\",\"content\":\"Hello world\"}</parameters>\n  </tool_call>\n</tool_calls>\n\nExample C — Tool with complex nested JSON parameters:\n<tool_calls>\n  <tool_call>\n    <tool_name>ask_followup_question</tool_name>\n    <parameters>{\"question\":\"Which approach do you prefer?\",\"follow_up\":[{\"text\":\"Option A\"},{\"text\":\"Option B\"}]}</parameters>\n  </tool_call>\n</tool_calls>\n\nRemember: Output ONLY the <tool_calls>...</tool_calls> XML block when calling tools.<｜end▁of▁instructions｜>\n\n<｜User｜>\n<｜User｜>\n在一个类似2022×2022的花园的每个方格中，最初都有一个高度为0的树，园丁和伐木工交替进行以下游戏，园丁首先开始：园丁选择花园中的一个方格，该方格上的每棵树以及周围至多八个方格中的所有树都会增长一单位，伐木工随后选择板上的四个不同方格，这些方格上正高的树都会减少一单位，称一棵树为雄伟的，如果其高度至少为10的六次方.确定园丁能够确保板上最终有K棵雄伟的树，无论伐木工如何操作，求最大的K<｜end▁of▁sentence｜><｜end▁of▁sentence｜>",
+    "prompt": "<｜System｜>\n**Memories**\nThese are memories stored via the memory_tool that you can reference in future conversations.\n[]\n\n\n**Recent Chats**\nThese are some of the user's recent conversations. You can use them to understand user preferences:\n[\n    {\n        \"title\": \"\",\n        \"last_chat\": \"2026年4月6日\"\n    },\n    {\n        \"title\": \"\",\n        \"last_chat\": \"2026年4月6日\"\n    },\n    {\n        \"title\": \"江青判刑原因\",\n        \"last_chat\": \"2026年4月5日\"\n    },\n    {\n        \"title\": \"GitHub個人檔案\",\n        \"last_chat\": \"2026年4月4日\"\n    },\n    {\n        \"title\": \"DS2API架構圖\",\n        \"last_chat\": \"2026年4月4日\"\n    },\n    {\n        \"title\": \"Markdown範例\",\n        \"last_chat\": \"2026年4月4日\"\n    },\n    {\n        \"title\": \"廣州天氣概況\",\n        \"last_chat\": \"2026年4月4日\"\n    },\n    {\n        \"title\": \"Xbox手把SVG\",\n        \"last_chat\": \"2026年4月4日\"\n    },\n    {\n        \"title\": \"清除记忆\",\n        \"last_chat\": \"2026年4月4日\"\n    },\n    {\n        \"title\": \"SVG與安卓XML示例\",\n        \"last_chat\": \"2026年4月4日\"\n    }\n]\n\n\n\n\n\n\n\n\n\n\nYou have access to these tools:\n\nTool: memory_tool\nDescription: The memory tool stores long-term information across conversations.\nUse `action` to control the operation: `create` (add), `edit` (update), `delete` (remove).\n- No relevant record: `create` + `content`\n- Existing relevant record: `edit` + `id` + `content`\n- Outdated/irrelevant record: `delete` + `id`\nMemories will automatically appear in the <memories> tag in later conversations.\nDo not store sensitive information (e.g., ethnicity, religion, sexual orientation, political views, sex life, criminal records).\nYou may store: preferred name, preferences, plans, work-related notes, chat style preferences, first chat time, etc.\nDo not show memory content directly in the conversation unless the user explicitly asks.\nToday is 2026年4月6日.\nSimilar memories should be merged; prefer updating existing records.\n\nExamples:\n{\"action\":\"create\",\"content\":\"User prefers brief replies and is more active on weekends.\"}\n{\"action\":\"edit\",\"id\":12,\"content\":\"User’s preferred name updated to “A-Xing”, prefers Chinese replies.\"}\n{\"action\":\"delete\",\"id\":7}\nParameters: {\"properties\":{\"action\":{\"description\":\"Operation to perform: create, edit, or delete\",\"enum\":[\"create\",\"edit\",\"delete\"],\"type\":\"string\"},\"content\":{\"description\":\"The content of the memory record (required for create/edit)\",\"type\":\"string\"},\"id\":{\"description\":\"The id of the memory record (required for edit/delete)\",\"type\":\"integer\"}},\"required\":[\"action\"],\"type\":\"object\"}\n\nTool: search_web\nDescription: Search the web for up-to-date or specific information.\nUse this when the user asks for the latest news, current facts, or needs verification.\nGenerate focused keywords and run multiple searches if needed.\nToday is 2026年4月6日.\n\nResponse format:\n- items[].id (short id), title, url, text\n\nCitations:\n- After using results, add `[citation,domain](id)` after the sentence.\n- Multiple citations are allowed.\n- If no results are cited, omit citations.\n\nExample:\nThe capital of France is Paris. [citation,example.com](abc123)\nThe population is about 2.1 million. [citation,example.com](abc123) [citation,example2.com](def456)\nParameters: {\"properties\":{\"query\":{\"description\":\"search keyword\",\"type\":\"string\"},\"topic\":{\"description\":\"search topic (one of `general`, `news`, `finance`)\",\"enum\":[\"general\",\"news\",\"finance\"],\"type\":\"string\"}},\"required\":[\"query\"],\"type\":\"object\"}\n\nTool: scrape_web\nDescription: Scrape a URL for detailed page content.\nUse this when the user requests content from a specific page or when search snippets are insufficient.\nAvoid using it for common questions unless the user asks.\nParameters: {\"properties\":{\"url\":{\"description\":\"url to scrape\",\"type\":\"string\"}},\"required\":[\"url\"],\"type\":\"object\"}\n\nTool: eval_javascript\nDescription: Execute JavaScript code using QuickJS engine (ES2020). The result is the value of the last expression in the code. For calculations with decimals, use toFixed() to control precision. Console output (log/info/warn/error) is captured and returned in 'logs' field. No DOM or Node.js APIs available. Example: '1 + 2' returns 3; 'const x = 5; x * 2' returns 10.\nParameters: {\"properties\":{\"code\":{\"description\":\"The JavaScript code to execute\",\"type\":\"string\"}},\"required\":[\"code\"],\"type\":\"object\"}\n\nTool: get_time_info\nDescription: Get the current local date and time info from the device. Returns year/month/day, weekday, ISO date/time strings, timezone, and timestamp.\nParameters: {\"properties\":{},\"type\":\"object\"}\n\nTool: clipboard_tool\nDescription: Read or write plain text from the device clipboard. Use action: read or write. For write, provide text. Do NOT write to the clipboard unless the user has explicitly requested it.\nParameters: {\"properties\":{\"action\":{\"description\":\"Operation to perform: read or write\",\"enum\":[\"read\",\"write\"],\"type\":\"string\"},\"text\":{\"description\":\"Text to write to the clipboard (required for write)\",\"type\":\"string\"}},\"required\":[\"action\"],\"type\":\"object\"}\n\nTool: text_to_speech\nDescription: Speak text aloud to the user using the device's text-to-speech engine. Use this when the user asks you to read something aloud, or when audio output is appropriate. The tool returns immediately; audio plays in the background on the device. Provide natural, readable text without markdown formatting.\nParameters: {\"properties\":{\"text\":{\"description\":\"The text to speak aloud\",\"type\":\"string\"}},\"required\":[\"text\"],\"type\":\"object\"}\n\nTool: ask_user\nDescription: Ask the user one or more questions when you need clarification, additional information, or confirmation. Each question can optionally provide a list of suggested options for the user to choose from. The user may select an option or provide their own free-text answer for each question. The answers will be returned as a JSON object mapping question IDs to the user's responses.\nParameters: {\"properties\":{\"questions\":{\"description\":\"List of questions to ask the user\",\"items\":{\"properties\":{\"id\":{\"description\":\"Unique identifier for this question\",\"type\":\"string\"},\"options\":{\"description\":\"Optional list of suggested options for the user to choose from\",\"items\":{\"type\":\"string\"},\"type\":\"array\"},\"question\":{\"description\":\"The question text to display to the user\",\"type\":\"string\"},\"selection_type\":{\"description\":\"Answer type: text (free text input, default), single (select exactly one option), multi (select one or more options)\",\"enum\":[\"text\",\"single\",\"multi\"],\"type\":\"string\"}},\"required\":[\"id\",\"question\"],\"type\":\"object\"},\"type\":\"array\"}},\"required\":[\"questions\"],\"type\":\"object\"}\n\nTOOL CALL FORMAT — FOLLOW EXACTLY:\n\n<tool_calls>\n  <invoke name=\"TOOL_NAME_HERE\">\n    <parameter name=\"PARAMETER_NAME\"><![CDATA[PARAMETER_VALUE]]></parameter>\n  </invoke>\n</tool_calls>\n\nRULES:\n1) Use the <tool_calls> XML wrapper format only.\n2) Put one or more <invoke> entries under a single <tool_calls> root.\n3) Use <invoke name=\"...\"> for the tool name and <parameter name=\"...\"> for each argument.\n4) All string values should use <![CDATA[...]]> when they may contain code, markup, JSON, paths, prompts, or other special characters.\n5) Objects use nested XML inside a <parameter>; arrays may repeat <item> children.\n6) Numbers, booleans, and null stay plain text.\n7) Use only the parameter names in the tool schema. Do not invent fields.\n8) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.\n\nPARAMETER SHAPES:\n- string => <parameter name=\"x\"><![CDATA[value]]></parameter>\n- object => <parameter name=\"x\"><field>...</field></parameter>\n- array => <parameter name=\"x\"><item>...</item></parameter>\n- number/bool/null => plain text\n\n【WRONG — Do NOT do these】:\n\nWrong 1 — mixed text after XML:\n  <tool_calls>...</tool_calls> I hope this helps.\nWrong 2 — old canonical tags or raw payloads:\n  <tools><tool_call><tool_name>read_file</tool_name><param>{\"path\":\"x\"}</param></tool_call></tools>\nWrong 3 — Markdown code fences:\n  ```xml\n  <tool_calls>...</tool_calls>\n  ```\n\nRemember: The ONLY valid way to use tools is the <tool_calls>...</tool_calls> XML block at the end of your response.\n\n【CORRECT EXAMPLES】:\n\nExample A — Single tool:\n<tool_calls>\n  <invoke name=\"read_file\">\n    <parameter name=\"path\"><![CDATA[src/main.go]]></parameter>\n  </invoke>\n</tool_calls>\n\nExample B — Two tools in parallel:\n<tool_calls>\n  <invoke name=\"read_file\">\n    <parameter name=\"path\"><![CDATA[src/main.go]]></parameter>\n  </invoke>\n  <invoke name=\"write_to_file\">\n    <parameter name=\"path\"><![CDATA[output.txt]]></parameter>\n    <parameter name=\"content\"><![CDATA[Hello world]]></parameter>\n  </invoke>\n</tool_calls>\n\nExample C — Tool with nested XML parameters:\n<tool_calls>\n  <invoke name=\"ask_followup_question\">\n    <parameter name=\"question\"><![CDATA[Which approach do you prefer?]]></parameter>\n    <parameter name=\"follow_up\"><item><text><![CDATA[Option A]]></text></item><item><text><![CDATA[Option B]]></text></item></parameter>\n  </invoke>\n</tool_calls>\n<｜end▁of▁instructions｜>\n\n<｜User｜>\n<｜User｜>\n在一个类似2022×2022的花园的每个方格中，最初都有一个高度为0的树，园丁和伐木工交替进行以下游戏，园丁首先开始：园丁选择花园中的一个方格，该方格上的每棵树以及周围至多八个方格中的所有树都会增长一单位，伐木工随后选择板上的四个不同方格，这些方格上正高的树都会减少一单位，称一棵树为雄伟的，如果其高度至少为10的六次方.确定园丁能够确保板上最终有K棵雄伟的树，无论伐木工如何操作，求最大的K<｜end▁of▁sentence｜><｜end▁of▁sentence｜>",
     "ref_file_ids": [],
     "search_enabled": false,
     "thinking_enabled": true
diff --git a/tests/raw_stream_samples/guangzhou-weather-reasoner-search-20260404/meta.json b/tests/raw_stream_samples/guangzhou-weather-reasoner-search-20260404/meta.json
index 95848d7..8f14549 100644
--- a/tests/raw_stream_samples/guangzhou-weather-reasoner-search-20260404/meta.json
+++ b/tests/raw_stream_samples/guangzhou-weather-reasoner-search-20260404/meta.json
@@ -2,7 +2,7 @@
   "sample_id": "guangzhou-weather-reasoner-search-20260404",
   "captured_at_utc": "2026-04-04T16:01:27Z",
   "request": {
-    "model": "deepseek-reasoner-search",
+    "model": "deepseek-v4-pro-search",
     "stream": true,
     "messages": [
       {
diff --git a/tests/raw_stream_samples/markdown-format-example-20260405-spacefix/meta.json b/tests/raw_stream_samples/markdown-format-example-20260405-spacefix/meta.json
index 6e9d23e..5c00cbf 100644
--- a/tests/raw_stream_samples/markdown-format-example-20260405-spacefix/meta.json
+++ b/tests/raw_stream_samples/markdown-format-example-20260405-spacefix/meta.json
@@ -9,7 +9,7 @@
         "role": "user"
       }
     ],
-    "model": "deepseek-reasoner-search",
+    "model": "deepseek-v4-pro-search",
     "stream": true
   },
   "capture": {
diff --git a/tests/raw_stream_samples/markdown-format-example-20260405/meta.json b/tests/raw_stream_samples/markdown-format-example-20260405/meta.json
index c8df46d..b7421e7 100644
--- a/tests/raw_stream_samples/markdown-format-example-20260405/meta.json
+++ b/tests/raw_stream_samples/markdown-format-example-20260405/meta.json
@@ -9,7 +9,7 @@
         "role": "user"
       }
     ],
-    "model": "deepseek-reasoner-search",
+    "model": "deepseek-v4-pro-search",
     "stream": true
   },
   "capture": {
diff --git a/tests/scripts/capture-raw-stream-sample.sh b/tests/scripts/capture-raw-stream-sample.sh
index 6d1cce0..6e8ed7b 100755
--- a/tests/scripts/capture-raw-stream-sample.sh
+++ b/tests/scripts/capture-raw-stream-sample.sh
@@ -7,7 +7,7 @@ cd "$ROOT_DIR"
 CONFIG_PATH="${1:-config.json}"
 SAMPLE_ID="${2:-capture-$(date -u +%Y%m%dT%H%M%SZ)}"
 QUESTION="${3:-广州天气}"
-MODEL="${4:-deepseek-reasoner-search}"
+MODEL="${4:-deepseek-v4-pro-search}"
 API_KEY="${5:-}"
 ADMIN_KEY="${DS2API_ADMIN_KEY:-admin}"
 
diff --git a/tests/scripts/run-unit-go.sh b/tests/scripts/run-unit-go.sh
index 38a11b8..c9ae5b9 100755
--- a/tests/scripts/run-unit-go.sh
+++ b/tests/scripts/run-unit-go.sh
@@ -4,4 +4,7 @@ set -euo pipefail
 ROOT_DIR="$(cd "$(dirname "$0")/../.." && pwd)"
 cd "$ROOT_DIR"
 
+export GOCACHE="${GOCACHE:-${ROOT_DIR}/.tmp/go-build-cache}"
+mkdir -p "$GOCACHE"
+
 go test ./... "$@"
diff --git a/webui/package-lock.json b/webui/package-lock.json
index 55ac557..63f1eed 100644
--- a/webui/package-lock.json
+++ b/webui/package-lock.json
@@ -13,7 +13,8 @@
                 "react": "^18.2.0",
                 "react-dom": "^18.2.0",
                 "react-router-dom": "^7.13.0",
-                "tailwind-merge": "^3.4.0"
+                "tailwind-merge": "^3.4.0",
+                "uuid": "^14.0.0"
             },
             "devDependencies": {
                 "@vitejs/plugin-react": "^6.0.1",
@@ -2021,6 +2022,19 @@
             "dev": true,
             "license": "MIT"
         },
+        "node_modules/uuid": {
+            "version": "14.0.0",
+            "resolved": "https://registry.npmjs.org/uuid/-/uuid-14.0.0.tgz",
+            "integrity": "sha512-Qo+uWgilfSmAhXCMav1uYFynlQO7fMFiMVZsQqZRMIXp0O7rR7qjkj+cPvBHLgBqi960QCoo/PH2/6ZtVqKvrg==",
+            "funding": [
+                "https://github.com/sponsors/broofa",
+                "https://github.com/sponsors/ctavan"
+            ],
+            "license": "MIT",
+            "bin": {
+                "uuid": "dist-node/bin/uuid"
+            }
+        },
         "node_modules/vite": {
             "version": "8.0.5",
             "resolved": "https://registry.npmjs.org/vite/-/vite-8.0.5.tgz",
diff --git a/webui/package.json b/webui/package.json
index 3f41ca9..bc69afb 100644
--- a/webui/package.json
+++ b/webui/package.json
@@ -14,7 +14,8 @@
         "react": "^18.2.0",
         "react-dom": "^18.2.0",
         "react-router-dom": "^7.13.0",
-        "tailwind-merge": "^3.4.0"
+        "tailwind-merge": "^3.4.0",
+        "uuid": "^14.0.0"
     },
     "devDependencies": {
         "@vitejs/plugin-react": "^6.0.1",
diff --git a/webui/src/features/account/AddKeyModal.jsx b/webui/src/features/account/AddKeyModal.jsx
index 875101a..a79e705 100644
--- a/webui/src/features/account/AddKeyModal.jsx
+++ b/webui/src/features/account/AddKeyModal.jsx
@@ -1,4 +1,7 @@
 import { X } from 'lucide-react'
+import { v4 as uuidv4 } from 'uuid'
+
+import { maskSecret } from '../../utils/maskSecret'
 
 export default function AddKeyModal({ show, t, editingKey, newKey, setNewKey, loading, onClose, onAdd }) {
     if (!show) {
@@ -6,6 +9,7 @@ export default function AddKeyModal({ show, t, editingKey, newKey, setNewKey, lo
     }
 
     const isEditing = Boolean(editingKey?.key)
+    const displayKey = isEditing ? maskSecret(editingKey?.key || newKey.key) : newKey.key
 
     return (
         <div className="fixed inset-0 z-50 flex items-center justify-center bg-black/50 backdrop-blur-sm p-4 animate-in fade-in">
@@ -24,7 +28,7 @@ export default function AddKeyModal({ show, t, editingKey, newKey, setNewKey, lo
                                 type="text"
                                 className={isEditing ? "input-field bg-muted/30 flex-1 cursor-not-allowed" : "input-field bg-[#09090b] flex-1"}
                                 placeholder={isEditing ? t('accountManager.keyReadonlyPlaceholder') : t('accountManager.newKeyPlaceholder')}
-                                value={newKey.key}
+                                value={displayKey}
                                 onChange={e => setNewKey({ ...newKey, key: e.target.value })}
                                 autoFocus={!isEditing}
                                 readOnly={isEditing}
@@ -32,7 +36,7 @@ export default function AddKeyModal({ show, t, editingKey, newKey, setNewKey, lo
                             {!isEditing && (
                                 <button
                                     type="button"
-                                    onClick={() => setNewKey({ ...newKey, key: 'sk-' + crypto.randomUUID().replace(/-/g, '') })}
+                                    onClick={() => setNewKey({ ...newKey, key: 'sk-' + uuidv4().replace(/-/g, '') })}
                                     className="px-3 py-2 bg-secondary text-secondary-foreground rounded-lg hover:bg-secondary/80 transition-colors text-sm font-medium border border-border whitespace-nowrap"
                                 >
                                     {t('accountManager.generate')}
diff --git a/webui/src/features/account/ApiKeysPanel.jsx b/webui/src/features/account/ApiKeysPanel.jsx
index 7030d8b..4905a5f 100644
--- a/webui/src/features/account/ApiKeysPanel.jsx
+++ b/webui/src/features/account/ApiKeysPanel.jsx
@@ -2,6 +2,8 @@ import { useState } from 'react'
 import { Check, ChevronDown, Copy, Pencil, Plus, Trash2 } from 'lucide-react'
 import clsx from 'clsx'
 
+import { maskSecret } from '../../utils/maskSecret'
+
 function fallbackCopyText(text) {
     const textArea = document.createElement('textarea')
     textArea.value = text
@@ -102,7 +104,7 @@ export default function ApiKeysPanel({
                                         className="font-mono text-sm bg-muted/50 px-3 py-1 rounded inline-block hover:bg-muted transition-colors"
                                         title={t('accountManager.copyKeyTitle')}
                                     >
-                                        {(item.key || '').slice(0, 16)}****
+                                        {maskSecret(item.key)}
                                     </button>
                                     <div className="text-sm text-muted-foreground truncate">{item.remark || '-'}</div>
                                     {copiedKey === item.key && (
diff --git a/webui/src/features/apiTester/ApiTesterContainer.jsx b/webui/src/features/apiTester/ApiTesterContainer.jsx
index 96e824a..bf70d22 100644
--- a/webui/src/features/apiTester/ApiTesterContainer.jsx
+++ b/webui/src/features/apiTester/ApiTesterContainer.jsx
@@ -50,18 +50,12 @@ export default function ApiTesterContainer({ config, onMessage, authFetch }) {
     const customKeyManaged = customKeyActive && configuredKeys.includes(trimmedApiKey)
 
     const models = [
-        { id: 'deepseek-chat', name: 'deepseek-chat', icon: 'MessageSquare', desc: t('apiTester.models.chat'), color: 'text-amber-500' },
-        { id: 'deepseek-reasoner', name: 'deepseek-reasoner', icon: 'Cpu', desc: t('apiTester.models.reasoner'), color: 'text-amber-600' },
-        { id: 'deepseek-chat-search', name: 'deepseek-chat-search', icon: 'SearchIcon', desc: t('apiTester.models.chatSearch'), color: 'text-cyan-500' },
-        { id: 'deepseek-reasoner-search', name: 'deepseek-reasoner-search', icon: 'SearchIcon', desc: t('apiTester.models.reasonerSearch'), color: 'text-cyan-600' },
-        { id: 'deepseek-expert-chat', name: 'deepseek-expert-chat', icon: 'MessageSquare', desc: t('apiTester.models.expertChat'), color: 'text-emerald-500' },
-        { id: 'deepseek-expert-reasoner', name: 'deepseek-expert-reasoner', icon: 'Cpu', desc: t('apiTester.models.expertReasoner'), color: 'text-emerald-600' },
-        { id: 'deepseek-expert-chat-search', name: 'deepseek-expert-chat-search', icon: 'SearchIcon', desc: t('apiTester.models.expertChatSearch'), color: 'text-teal-500' },
-        { id: 'deepseek-expert-reasoner-search', name: 'deepseek-expert-reasoner-search', icon: 'SearchIcon', desc: t('apiTester.models.expertReasonerSearch'), color: 'text-teal-600' },
-        { id: 'deepseek-vision-chat', name: 'deepseek-vision-chat', icon: 'MessageSquare', desc: t('apiTester.models.visionChat'), color: 'text-violet-500' },
-        { id: 'deepseek-vision-reasoner', name: 'deepseek-vision-reasoner', icon: 'Cpu', desc: t('apiTester.models.visionReasoner'), color: 'text-violet-600' },
-        { id: 'deepseek-vision-chat-search', name: 'deepseek-vision-chat-search', icon: 'SearchIcon', desc: t('apiTester.models.visionChatSearch'), color: 'text-fuchsia-500' },
-        { id: 'deepseek-vision-reasoner-search', name: 'deepseek-vision-reasoner-search', icon: 'SearchIcon', desc: t('apiTester.models.visionReasonerSearch'), color: 'text-fuchsia-600' },
+        { id: 'deepseek-v4-flash', name: 'deepseek-v4-flash', icon: 'MessageSquare', desc: t('apiTester.models.flash'), color: 'text-amber-500' },
+        { id: 'deepseek-v4-pro', name: 'deepseek-v4-pro', icon: 'Cpu', desc: t('apiTester.models.pro'), color: 'text-amber-600' },
+        { id: 'deepseek-v4-flash-search', name: 'deepseek-v4-flash-search', icon: 'SearchIcon', desc: t('apiTester.models.flashSearch'), color: 'text-cyan-500' },
+        { id: 'deepseek-v4-pro-search', name: 'deepseek-v4-pro-search', icon: 'SearchIcon', desc: t('apiTester.models.proSearch'), color: 'text-cyan-600' },
+        { id: 'deepseek-v4-vision', name: 'deepseek-v4-vision', icon: 'ImageIcon', desc: t('apiTester.models.vision'), color: 'text-violet-500' },
+        { id: 'deepseek-v4-vision-search', name: 'deepseek-v4-vision-search', icon: 'SearchIcon', desc: t('apiTester.models.visionSearch'), color: 'text-fuchsia-600' },
     ]
 
     const { runTest, stopGeneration } = useChatStreamClient({
diff --git a/webui/src/features/apiTester/ConfigPanel.jsx b/webui/src/features/apiTester/ConfigPanel.jsx
index ddf9848..2dbbfdd 100644
--- a/webui/src/features/apiTester/ConfigPanel.jsx
+++ b/webui/src/features/apiTester/ConfigPanel.jsx
@@ -10,6 +10,8 @@ import {
 } from 'lucide-react'
 import clsx from 'clsx'
 
+import { maskSecret } from '../../utils/maskSecret'
+
 export default function ConfigPanel({
     t,
     configExpanded,
@@ -40,6 +42,7 @@ export default function ConfigPanel({
     }
     const selectedModel = models.find(m => m.id === model) || models[0]
     const SelectedModelIcon = selectedModel ? (iconMap[selectedModel.icon] || MessageSquare) : MessageSquare
+    const defaultKeyPreview = maskSecret(config.keys?.[0])
 
     return (
         <div className={clsx(
@@ -158,7 +161,7 @@ export default function ConfigPanel({
                             autoComplete="off"
                             spellCheck={false}
                             className="w-full h-10 px-3 bg-muted/30 border border-border rounded-lg text-sm font-mono placeholder:text-muted-foreground/40 focus:outline-none focus:ring-1 focus:ring-ring focus:border-ring transition-all"
-                            placeholder={config.keys?.[0] ? t('apiTester.apiKeyDefault', { suffix: config.keys[0].slice(-6) }) : t('apiTester.apiKeyPlaceholder')}
+                            placeholder={defaultKeyPreview ? t('apiTester.apiKeyDefault', { preview: defaultKeyPreview }) : t('apiTester.apiKeyPlaceholder')}
                             value={apiKey}
                             onChange={e => setApiKey(e.target.value)}
                         />
diff --git a/webui/src/features/apiTester/useApiTesterState.js b/webui/src/features/apiTester/useApiTesterState.js
index 96f168b..e89b667 100644
--- a/webui/src/features/apiTester/useApiTesterState.js
+++ b/webui/src/features/apiTester/useApiTesterState.js
@@ -1,7 +1,7 @@
 import { useEffect, useRef, useState } from 'react'
 
 export function useApiTesterState({ t }) {
-    const [model, setModel] = useState('deepseek-chat')
+    const [model, setModel] = useState('deepseek-v4-flash')
     const defaultMessage = t('apiTester.defaultMessage')
     const [message, setMessage] = useState(defaultMessage)
     const [apiKey, setApiKey] = useState('')
diff --git a/webui/src/features/settings/HistorySplitSection.jsx b/webui/src/features/settings/HistorySplitSection.jsx
index d9db63c..242d687 100644
--- a/webui/src/features/settings/HistorySplitSection.jsx
+++ b/webui/src/features/settings/HistorySplitSection.jsx
@@ -9,15 +9,10 @@ export default function HistorySplitSection({ t, form, setForm }) {
                 <label className="flex items-start gap-3 rounded-lg border border-border bg-background/60 p-4">
                     <input
                         type="checkbox"
-                        checked={Boolean(form.history_split?.enabled ?? true)}
-                        onChange={(e) => setForm((prev) => ({
-                            ...prev,
-                            history_split: {
-                                ...prev.history_split,
-                                enabled: e.target.checked,
-                            },
-                        }))}
-                        className="mt-1 h-4 w-4 rounded border-border"
+                        checked
+                        disabled
+                        readOnly
+                        className="mt-1 h-4 w-4 rounded border-border disabled:opacity-70"
                     />
                     <div className="space-y-1">
                         <span className="text-sm font-medium block">{t('settings.historySplitEnabled')}</span>
diff --git a/webui/src/features/settings/ModelSection.jsx b/webui/src/features/settings/ModelSection.jsx
index b1a220e..d377ac5 100644
--- a/webui/src/features/settings/ModelSection.jsx
+++ b/webui/src/features/settings/ModelSection.jsx
@@ -2,26 +2,15 @@ export default function ModelSection({ t, form, setForm }) {
     return (
         <div className="bg-card border border-border rounded-xl p-5 space-y-4">
             <h3 className="font-semibold">{t('settings.modelTitle')}</h3>
-            <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
-                <label className="text-sm space-y-2">
-                    <span className="text-muted-foreground">{t('settings.claudeMapping')}</span>
-                    <textarea
-                        value={form.claude_mapping_text}
-                        onChange={(e) => setForm((prev) => ({ ...prev, claude_mapping_text: e.target.value }))}
-                        rows={8}
-                        className="w-full bg-background border border-border rounded-lg px-3 py-2 font-mono text-xs"
-                    />
-                </label>
-                <label className="text-sm space-y-2">
-                    <span className="text-muted-foreground">{t('settings.modelAliases')}</span>
-                    <textarea
-                        value={form.model_aliases_text}
-                        onChange={(e) => setForm((prev) => ({ ...prev, model_aliases_text: e.target.value }))}
-                        rows={8}
-                        className="w-full bg-background border border-border rounded-lg px-3 py-2 font-mono text-xs"
-                    />
-                </label>
-            </div>
+            <label className="text-sm space-y-2 block">
+                <span className="text-muted-foreground">{t('settings.modelAliases')}</span>
+                <textarea
+                    value={form.model_aliases_text}
+                    onChange={(e) => setForm((prev) => ({ ...prev, model_aliases_text: e.target.value }))}
+                    rows={12}
+                    className="w-full bg-background border border-border rounded-lg px-3 py-2 font-mono text-xs"
+                />
+            </label>
         </div>
     )
 }
diff --git a/webui/src/features/settings/useSettingsForm.js b/webui/src/features/settings/useSettingsForm.js
index 96aa1b5..917ee39 100644
--- a/webui/src/features/settings/useSettingsForm.js
+++ b/webui/src/features/settings/useSettingsForm.js
@@ -18,7 +18,6 @@ const DEFAULT_FORM = {
     embeddings: { provider: '' },
     auto_delete: { mode: 'none' },
     history_split: { enabled: true, trigger_after_turns: 1 },
-    claude_mapping_text: '{\n  "fast": "deepseek-chat",\n  "slow": "deepseek-reasoner"\n}',
     model_aliases_text: '{}',
 }
 
@@ -72,10 +71,9 @@ function fromServerForm(data) {
             mode: normalizeAutoDeleteMode(data.auto_delete),
         },
         history_split: {
-            enabled: data.history_split?.enabled ?? true,
+            enabled: true,
             trigger_after_turns: Number(data.history_split?.trigger_after_turns || 1),
         },
-        claude_mapping_text: JSON.stringify(data.claude_mapping || {}, null, 2),
         model_aliases_text: JSON.stringify(data.model_aliases || {}, null, 2),
     }
 }
@@ -96,7 +94,7 @@ function toServerPayload(form) {
         embeddings: { provider: String(form.embeddings.provider || '').trim() },
         auto_delete: { mode: normalizeAutoDeleteMode(form.auto_delete) },
         history_split: {
-            enabled: Boolean(form.history_split?.enabled ?? true),
+            enabled: true,
             trigger_after_turns: Number(form.history_split?.trigger_after_turns || 1),
         },
     }
@@ -176,10 +174,8 @@ export function useSettingsForm({ apiFetch, t, onMessage, onRefresh, onForceLogo
     }, [loadSettings])
 
     const saveSettings = useCallback(async () => {
-        let claudeMapping = {}
         let modelAliases = {}
         try {
-            claudeMapping = parseJSONMap(form.claude_mapping_text, 'claude_mapping', t)
             modelAliases = parseJSONMap(form.model_aliases_text, 'model_aliases', t)
         } catch (e) {
             onMessage('error', e.message)
@@ -188,7 +184,6 @@ export function useSettingsForm({ apiFetch, t, onMessage, onRefresh, onForceLogo
 
         const payload = {
             ...toServerPayload(form),
-            claude_mapping: claudeMapping,
             model_aliases: modelAliases,
         }
 
diff --git a/webui/src/locales/en.json b/webui/src/locales/en.json
index f37530c..1be3f59 100644
--- a/webui/src/locales/en.json
+++ b/webui/src/locales/en.json
@@ -219,18 +219,12 @@
     "apiTester": {
         "defaultMessage": "Hello, please introduce yourself in one sentence.",
         "models": {
-            "chat": "Non-reasoning model",
-            "reasoner": "Reasoning model",
-            "chatSearch": "Non-reasoning model (with search)",
-            "reasonerSearch": "Reasoning model (with search)",
-            "expertChat": "Non-reasoning expert mode",
-            "expertReasoner": "Reasoning expert mode",
-            "expertChatSearch": "Non-reasoning expert mode (with search)",
-            "expertReasonerSearch": "Reasoning expert mode (with search)",
-            "visionChat": "Non-reasoning vision mode",
-            "visionReasoner": "Reasoning vision mode",
-            "visionChatSearch": "Non-reasoning vision mode (with search)",
-            "visionReasonerSearch": "Reasoning vision mode (with search)"
+            "flash": "v4 Flash (thinking on by default)",
+            "pro": "v4 Pro (thinking on by default)",
+            "flashSearch": "v4 Flash (with search)",
+            "proSearch": "v4 Pro (with search)",
+            "vision": "v4 Vision (thinking on by default)",
+            "visionSearch": "v4 Vision (with search)"
         },
         "missingApiKey": "Please provide an API key.",
         "requestFailed": "Request failed.",
@@ -244,7 +238,7 @@
         "accountSelector": "Account",
         "autoRandom": "🤖 Auto / Random",
         "apiKeyOptional": "API Key (optional)",
-        "apiKeyDefault": "Default: ...{suffix}",
+        "apiKeyDefault": "Default: {preview}",
         "apiKeyPlaceholder": "Enter a custom key",
         "modeManaged": "Managed key mode (uses account pool).",
         "modeDirect": "Direct token mode (requires a valid DeepSeek token).",
@@ -381,16 +375,15 @@
         "embeddingsProvider": "Embeddings provider",
         "historySplitTitle": "History Split",
         "historySplitDesc": "Pack earlier turns into an attached HISTORY.txt so the model reads the file first and then continues from the latest user request.",
-        "historySplitEnabled": "Enable history split",
-        "historySplitEnabledDesc": "Enabled by default. Turning this off falls back to normal full-context requests.",
+        "historySplitEnabled": "History split is forced on",
+        "historySplitEnabledDesc": "This capability is now enabled globally; legacy disabled values are ignored.",
         "historySplitTriggerAfterTurns": "Trigger threshold (user turns)",
         "historySplitTriggerHelp": "Default is 1, which means history split starts from the second turn.",
         "compatibilityTitle": "Compatibility",
         "compatibilityDesc": "Compatibility controls that keep stream output closer to the wire format or safer for the web UI.",
         "stripReferenceMarkers": "Strip [reference:N] markers",
         "modelTitle": "Model mapping",
-        "claudeMapping": "Claude mapping (JSON)",
-        "modelAliases": "Model aliases (JSON)",
+        "modelAliases": "Global model aliases (JSON)",
         "autoDeleteTitle": "Session Cleanup Policy",
         "autoDeleteDesc": "Choose how DeepSeek remote chat records are cleaned up after each request completes.",
         "autoDeleteMode": "Deletion mode",
diff --git a/webui/src/locales/zh.json b/webui/src/locales/zh.json
index d3ead83..03abbc2 100644
--- a/webui/src/locales/zh.json
+++ b/webui/src/locales/zh.json
@@ -219,18 +219,12 @@
     "apiTester": {
         "defaultMessage": "你好，请用一句话介绍你自己。",
         "models": {
-            "chat": "非思考模型",
-            "reasoner": "思考模型",
-            "chatSearch": "非思考模型 (带搜索)",
-            "reasonerSearch": "思考模型 (带搜索)",
-            "expertChat": "非思考专家模式",
-            "expertReasoner": "思考专家模式",
-            "expertChatSearch": "非思考专家模式 (带搜索)",
-            "expertReasonerSearch": "思考专家模式 (带搜索)",
-            "visionChat": "非思考视觉模式",
-            "visionReasoner": "思考视觉模式",
-            "visionChatSearch": "非思考视觉模式 (带搜索)",
-            "visionReasonerSearch": "思考视觉模式 (带搜索)"
+            "flash": "v4 Flash（默认开启思考）",
+            "pro": "v4 Pro（默认开启思考）",
+            "flashSearch": "v4 Flash（带搜索）",
+            "proSearch": "v4 Pro（带搜索）",
+            "vision": "v4 Vision（默认开启思考）",
+            "visionSearch": "v4 Vision（带搜索）"
         },
         "missingApiKey": "请提供 API 密钥",
         "requestFailed": "请求失败",
@@ -244,7 +238,7 @@
         "accountSelector": "选择账号",
         "autoRandom": "🤖 自动 / 随机",
         "apiKeyOptional": "API 密钥 (可选)",
-        "apiKeyDefault": "默认: ...{suffix}",
+        "apiKeyDefault": "默认: {preview}",
         "apiKeyPlaceholder": "输入自定义密钥",
         "modeManaged": "当前使用托管 key 模式（会走账号池）。",
         "modeDirect": "当前使用直通 token 模式（需填写有效 DeepSeek token）。",
@@ -381,16 +375,15 @@
         "embeddingsProvider": "Embeddings Provider",
         "historySplitTitle": "历史拆分",
         "historySplitDesc": "将更早的对话整理成 HISTORY.txt 上传，让模型优先读取历史文件，再结合最新一轮继续回答。",
-        "historySplitEnabled": "启用历史拆分",
-        "historySplitEnabledDesc": "默认开启。关闭后会恢复为普通的完整上下文提交。",
+        "historySplitEnabled": "历史拆分已强制启用",
+        "historySplitEnabledDesc": "该能力现在全局开启；旧配置里的关闭值会被忽略。",
         "historySplitTriggerAfterTurns": "触发阈值（用户回合数）",
         "historySplitTriggerHelp": "默认值为 1，表示从第二轮开始拆分历史。",
         "compatibilityTitle": "兼容性设置",
         "compatibilityDesc": "用于控制输出格式兼容性，避免把模型原始流里的标记直接暴露到前端。",
         "stripReferenceMarkers": "移除 [reference:N] 标记",
         "modelTitle": "模型映射",
-        "claudeMapping": "Claude 映射（JSON）",
-        "modelAliases": "模型别名（JSON）",
+        "modelAliases": "全局模型映射（JSON）",
         "autoDeleteTitle": "会话删除策略",
         "autoDeleteDesc": "选择每次请求完成后如何清理 DeepSeek 远端聊天记录。",
         "autoDeleteMode": "删除模式",
diff --git a/webui/src/utils/maskSecret.js b/webui/src/utils/maskSecret.js
new file mode 100644
index 0000000..330b41e
--- /dev/null
+++ b/webui/src/utils/maskSecret.js
@@ -0,0 +1,10 @@
+export function maskSecret(secret) {
+    const value = String(secret ?? '')
+    if (!value) {
+        return ''
+    }
+    if (value.length <= 4) {
+        return '*'.repeat(value.length)
+    }
+    return `${value.slice(0, 2)}****${value.slice(-2)}`
+}