diff --git a/.github/workflows/quality-gates.yml b/.github/workflows/quality-gates.yml
index 0365672..6d6a9d5 100644
--- a/.github/workflows/quality-gates.yml
+++ b/.github/workflows/quality-gates.yml
@@ -5,12 +5,23 @@ on:
   push:
     branches:
       - dev
+      - main
 
 permissions:
   contents: read
 
+concurrency:
+  group: quality-gates-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  GO_VERSION: "1.26.x"
+  NODE_VERSION: "24"
+  GOLANGCI_LINT_VERSION: "v2.11.4"
+
 jobs:
-  quality-gates:
+  lint-and-refactor:
+    name: Lint and Refactor Gate
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
@@ -19,19 +30,13 @@ jobs:
       - name: Setup Go
         uses: actions/setup-go@v5
         with:
-          go-version: "1.26.x"
-
-      - name: Setup Node
-        uses: actions/setup-node@v4
-        with:
-          node-version: "24"
-          cache: "npm"
-          cache-dependency-path: webui/package-lock.json
+          go-version: ${{ env.GO_VERSION }}
+          cache-dependency-path: go.sum
 
       - name: Setup golangci-lint
         uses: golangci/golangci-lint-action@v8
         with:
-          version: v2.11.4
+          version: ${{ env.GOLANGCI_LINT_VERSION }}
           install-mode: binary
           verify: true
 
@@ -41,10 +46,88 @@ jobs:
       - name: Refactor Line Gate
         run: ./tests/scripts/check-refactor-line-gate.sh
 
+  go-unit:
+    name: Go Unit (${{ matrix.os }})
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - macos-latest
+          - windows-latest
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: ${{ env.GO_VERSION }}
+          cache-dependency-path: go.sum
+
+      - name: Go Unit Gate
+        run: ./tests/scripts/run-unit-go.sh
+
+  unit-all:
+    name: Unit Gates (Go + Node)
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: ${{ env.GO_VERSION }}
+          cache-dependency-path: go.sum
+
+      - name: Setup Node
+        uses: actions/setup-node@v4
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          cache: npm
+          cache-dependency-path: webui/package-lock.json
+
       - name: Unit Gates (Go + Node)
         run: ./tests/scripts/run-unit-all.sh
 
+  webui-build:
+    name: WebUI Build
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Node
+        uses: actions/setup-node@v4
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          cache: npm
+          cache-dependency-path: webui/package-lock.json
+
       - name: WebUI Build Gate
         run: |
-          npm ci --prefix webui
+          npm ci --prefix webui --prefer-offline --no-audit
           npm run build --prefix webui
+
+  cross-build:
+    name: Release Target Cross-Build
+    if: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/main') }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: ${{ env.GO_VERSION }}
+          cache-dependency-path: go.sum
+
+      - name: Cross-Build Release Targets
+        env:
+          CROSS_BUILD_JOBS: "3"
+        run: ./tests/scripts/check-cross-build.sh
diff --git a/.github/workflows/release-artifacts.yml b/.github/workflows/release-artifacts.yml
index 17b3b74..09ec700 100644
--- a/.github/workflows/release-artifacts.yml
+++ b/.github/workflows/release-artifacts.yml
@@ -15,6 +15,14 @@ permissions:
   contents: write
   packages: write
 
+concurrency:
+  group: release-artifacts-${{ github.event.release.tag_name || github.event.inputs.release_tag }}
+  cancel-in-progress: false
+
+env:
+  GO_VERSION: "1.26.x"
+  NODE_VERSION: "24"
+
 jobs:
   build-and-upload:
     runs-on: ubuntu-latest
@@ -27,12 +35,13 @@ jobs:
       - name: Setup Go
         uses: actions/setup-go@v5
         with:
-          go-version: "1.26.x"
+          go-version: ${{ env.GO_VERSION }}
+          cache-dependency-path: go.sum
 
       - name: Setup Node
         uses: actions/setup-node@v4
         with:
-          node-version: "24"
+          node-version: ${{ env.NODE_VERSION }}
           cache: "npm"
           cache-dependency-path: webui/package-lock.json
 
@@ -44,52 +53,13 @@ jobs:
 
       - name: Build WebUI
         run: |
-          npm ci --prefix webui
+          npm ci --prefix webui --prefer-offline --no-audit
           npm run build --prefix webui
 
       - name: Build Multi-Platform Archives
-        run: |
-          set -euo pipefail
-          TAG="${RELEASE_TAG}"
-          BUILD_VERSION="${TAG}"
-          if [ -z "${BUILD_VERSION}" ] && [ -f VERSION ]; then
-            BUILD_VERSION="$(cat VERSION | tr -d '[:space:]')"
-          fi
-          mkdir -p dist
-
-          targets=(
-            "linux/amd64"
-            "linux/arm64"
-            "darwin/amd64"
-            "darwin/arm64"
-            "windows/amd64"
-          )
-
-          for target in "${targets[@]}"; do
-            GOOS="${target%/*}"
-            GOARCH="${target#*/}"
-            PKG="ds2api_${TAG}_${GOOS}_${GOARCH}"
-            STAGE="dist/${PKG}"
-            BIN="ds2api"
-            if [ "${GOOS}" = "windows" ]; then
-              BIN="ds2api.exe"
-            fi
-
-            mkdir -p "${STAGE}/static"
-            CGO_ENABLED=0 GOOS="${GOOS}" GOARCH="${GOARCH}" \
-              go build -trimpath -ldflags="-s -w -X ds2api/internal/version.BuildVersion=${BUILD_VERSION}" -o "${STAGE}/${BIN}" ./cmd/ds2api
-
-            cp config.example.json .env.example LICENSE README.MD README.en.md "${STAGE}/"
-            cp -R static/admin "${STAGE}/static/admin"
-
-            if [ "${GOOS}" = "windows" ]; then
-              (cd dist && zip -rq "${PKG}.zip" "${PKG}")
-            else
-              tar -C dist -czf "dist/${PKG}.tar.gz" "${PKG}"
-            fi
-
-            rm -rf "${STAGE}"
-          done
+        env:
+          RELEASE_BUILD_JOBS: "3"
+        run: ./scripts/build-release-archives.sh
 
       - name: Prepare Docker release inputs
         run: |
@@ -153,6 +123,8 @@ jobs:
           platforms: linux/amd64,linux/arm64
           tags: ${{ steps.meta_release.outputs.tags }}
           labels: ${{ steps.meta_release.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
 
       - name: Export Docker image archives for release assets
         run: |
@@ -162,12 +134,14 @@ jobs:
           docker buildx build \
             --platform linux/amd64 \
             --target runtime-from-dist \
+            --cache-from type=gha \
             --output type=docker,dest="dist/ds2api_${TAG}_docker_linux_amd64.tar" \
             .
 
           docker buildx build \
             --platform linux/arm64 \
             --target runtime-from-dist \
+            --cache-from type=gha \
             --output type=docker,dest="dist/ds2api_${TAG}_docker_linux_arm64.tar" \
             .
 
diff --git a/API.en.md b/API.en.md
index ca1e7a9..04a26fb 100644
--- a/API.en.md
+++ b/API.en.md
@@ -37,7 +37,7 @@ Docs: [Overview](README.en.md) / [Architecture](docs/ARCHITECTURE.en.md) / [Depl
 
 - OpenAI / Claude / Gemini protocols are now mounted on one shared `chi` router tree assembled in `internal/server/router.go`.
 - Adapter responsibilities are streamlined to: **request normalization → DeepSeek invocation → protocol-shaped rendering**, reducing legacy split-logic paths.
-- Tool-calling semantics are aligned between Go and Node runtime: the only executable model-output syntax is the canonical XML tool block `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`, plus stream-time anti-leak filtering.
+- Tool-calling semantics are aligned between Go and Node runtime: models should output the DSML shell `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; DS2API also accepts legacy canonical XML `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`. DSML is normalized back to XML at the parser entry, so internal parsing remains XML-based, with stream-time anti-leak filtering.
 - `Admin API` separates static config from runtime policy: `/admin/config*` for configuration state, `/admin/settings*` for runtime behavior.
 
 ---
@@ -334,7 +334,8 @@ When `tools` is present, DS2API performs anti-leak handling:
 
 Additional notes:
 
-- The parser currently treats only canonical XML tool blocks (`<tool_calls>` / `<invoke name="...">` / `<parameter name="...">`) as executable tool calls. Legacy `<tools>`, `<tool_call>`, `<tool_name>`, `<param>`, `<function_call>`, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text.
+- The parser treats DSML shell tool blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`) and legacy canonical XML tool blocks (`<tool_calls>` / `<invoke name="...">` / `<parameter name="...">`) as executable tool calls. DSML is normalized back to XML at the parser entry; internal parsing remains XML-based. Legacy `<tools>`, `<tool_call>`, `<tool_name>`, `<param>`, `<function_call>`, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text.
+- If the final visible response text is empty but the reasoning stream contains an executable tool call, Chat / Responses emits a standard OpenAI `tool_calls` / `function_call` output during finalization. If thinking/reasoning was not enabled by the client, that reasoning text is used only for detection and is not exposed as visible text or `reasoning_content`.
 - `tool_calls` shown inside fenced markdown code blocks (for example, ```json ... ```) are treated as examples, not executable calls.
 
 ---
diff --git a/API.md b/API.md
index 35d97d4..a045b6c 100644
--- a/API.md
+++ b/API.md
@@ -37,7 +37,7 @@
 
 - OpenAI / Claude / Gemini 三套协议已统一挂在同一 `chi` 路由树上，由 `internal/server/router.go` 负责装配。
 - 适配器层职责收敛为：**请求归一化 → DeepSeek 调用 → 协议形态渲染**，减少历史版本中“同能力多处实现”的分叉。
-- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致：当前唯一可执行的模型输出语法是 canonical XML 工具块 `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`，并在流式场景执行防泄漏筛分。
+- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致：推荐模型输出 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`；兼容层也接受 DSML wrapper 别名 `<dsml|tool_calls>`、`<|tool_calls>`、`<｜tool_calls>`、常见 DSML 分隔符漏写形态（如 `<|DSML tool_calls>`）、`DSML` 与工具标签名黏连的常见 typo（如 `<DSMLtool_calls>`），以及旧式 canonical XML `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`。实现上采用窄容错结构扫描：只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 会进入工具路径，裸 `<invoke>` 不计为已支持语法；流式场景继续执行防泄漏筛分。若参数体本身是合法 JSON 字面量（如 `123`、`true`、`null`、数组或对象），会按结构化值输出，不再一律当作字符串；若 CDATA 偶发漏闭合，则会在最终 parse / flush 恢复阶段做窄修复，尽量保住已完整包裹的外层工具调用。
 - `Admin API` 将配置与运行时策略分开：`/admin/config*` 管静态配置，`/admin/settings*` 管运行时行为。
 
 ---
@@ -196,16 +196,22 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=`
   "object": "list",
   "data": [
     {"id": "deepseek-v4-flash", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-flash-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
     {"id": "deepseek-v4-pro", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-pro-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
     {"id": "deepseek-v4-flash-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-flash-search-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
     {"id": "deepseek-v4-pro-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-pro-search-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
     {"id": "deepseek-v4-vision", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-v4-vision-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}
+    {"id": "deepseek-v4-vision-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-vision-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-vision-search-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}
   ]
 }
 ```
 
-> 说明：`/v1/models` 返回的是规范化后的 DeepSeek 原生模型 ID；常见 alias 仅用于请求入参解析，不会在该接口中单独展开返回。
+> 说明：`/v1/models` 返回的是规范化后的 DeepSeek 原生模型 ID；常见 alias 仅用于请求入参解析，不会在该接口中单独展开返回。带 `-nothinking` 后缀的模型表示无论请求里是否显式开启 thinking / reasoning，都会强制关闭思考输出。
 
 ### 模型 alias 解析策略
 
@@ -213,8 +219,9 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=`
 
 1. 先匹配 DeepSeek 原生模型。
 2. 再匹配 `model_aliases` 精确映射。
-3. 未命中时按模型家族规则回退（如 `o*`、`gpt-*`、`claude-*`）。
-4. 仍未命中则返回 `invalid_request_error`。
+3. 如果请求名以 `-nothinking` 结尾，则在最终解析出的规范模型上追加对应的无思考变体。
+4. 未命中时按模型家族规则回退（如 `o*`、`gpt-*`、`claude-*`）。
+5. 仍未命中则返回 `invalid_request_error`。
 
 当前内置默认 alias 来自 `internal/config/models.go`，`config.model_aliases` 会在运行时覆盖或补充同名映射。节选：
 
@@ -224,6 +231,8 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=`
 - Gemini：`gemini-2.5-pro`、`gemini-2.5-flash`、`gemini-pro-vision`
 - 其他兼容族：`llama-*`、`qwen-*`、`mistral-*`、`command-*` 会按家族启发式回退
 
+上述 alias 若在请求名后追加 `-nothinking` 后缀，也会映射到对应的强制关闭 thinking 版本。
+
 退役历史模型（如 `claude-1.*`、`claude-2.*`、`claude-instant-*`、`gpt-3.5*`）会被显式拒绝。
 
 ### `POST /v1/chat/completions`
@@ -239,7 +248,7 @@ Content-Type: application/json
 
 | 字段 | 类型 | 必填 | 说明 |
 | --- | --- | --- | --- |
-| `model` | string | ✅ | 支持 DeepSeek 原生模型 + 常见 alias（如 `gpt-5.5`、`gpt-5.4-mini`、`gpt-5.3-codex`、`o3`、`claude-opus-4-6`、`claude-sonnet-4-6`、`gemini-2.5-pro`、`gemini-2.5-flash` 等） |
+| `model` | string | ✅ | 支持 DeepSeek 原生模型 + 常见 alias（如 `gpt-5.5`、`gpt-5.4-mini`、`gpt-5.3-codex`、`o3`、`claude-opus-4-6`、`claude-sonnet-4-6`、`gemini-2.5-pro`、`gemini-2.5-flash` 等）；若模型名带 `-nothinking` 后缀，则强制关闭 thinking / reasoning |
 | `messages` | array | ✅ | OpenAI 风格消息数组 |
 | `stream` | boolean | ❌ | 默认 `false` |
 | `tools` | array | ❌ | Function Calling 定义 |
@@ -335,7 +344,8 @@ data: [DONE]
 补充说明：
 
 - **非代码块上下文**下，工具负载即使与普通文本混合，也会按特征识别并产出可执行 tool call（前后普通文本仍可透传）。
-- 解析器当前只把 canonical XML 工具块（`<tool_calls>` / `<invoke name="...">` / `<parameter name="...">`）作为可执行调用解析；旧式 `<tools>`、`<tool_call>`、`<tool_name>`、`<param>`、`<function_call>`、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理。
+- 解析器当前把 DSML 外壳（`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`）、DSML wrapper 别名（`<dsml|tool_calls>`、`<|tool_calls>`、`<｜tool_calls>`）、常见 DSML 分隔符漏写形态（如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`）、`DSML` 与工具标签名黏连的常见 typo（如 `<DSMLtool_calls>` / `<DSMLinvoke>` / `<DSMLparameter>`）和旧式 canonical XML 工具块（`<tool_calls>` / `<invoke name="...">` / `<parameter name="...">`）作为可执行调用解析；DSML 会先归一化回 XML，内部仍以 XML 解析语义为准。旧式 `<tools>`、`<tool_call>`、`<tool_name>`、`<param>`、`<function_call>`、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理。
+- 当最终可见正文为空但思维链里包含可执行工具调用时，Chat / Responses 会在收尾阶段补发标准 OpenAI `tool_calls` / `function_call` 输出；如果客户端未开启 thinking / reasoning，该思维链只用于检测，不会作为可见正文或 `reasoning_content` 暴露。
 - Markdown fenced code block（例如 ```json ... ```）中的 `tool_calls` 仅视为示例文本，不会被执行。
 
 ---
@@ -448,16 +458,19 @@ data: [DONE]
   "object": "list",
   "data": [
     {"id": "claude-sonnet-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
+    {"id": "claude-sonnet-4-6-nothinking", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
     {"id": "claude-haiku-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
-    {"id": "claude-opus-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"}
+    {"id": "claude-haiku-4-5-nothinking", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
+    {"id": "claude-opus-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
+    {"id": "claude-opus-4-6-nothinking", "object": "model", "created": 1715635200, "owned_by": "anthropic"}
   ],
   "first_id": "claude-opus-4-6",
-  "last_id": "claude-3-haiku-20240307",
+  "last_id": "claude-3-haiku-20240307-nothinking",
   "has_more": false
 }
 ```
 
-> 说明：示例仅展示部分模型；实际返回除当前主别名外，还包含 Claude 4.x snapshots，以及 3.x 历史模型 ID 与常见别名。
+> 说明：示例仅展示部分模型；实际返回除当前主别名外，还包含 Claude 4.x snapshots、3.x 历史模型 ID 与常见别名，并为这些可映射模型额外提供 `-nothinking` 变体。
 
 ### `POST /anthropic/v1/messages`
 
@@ -475,7 +488,7 @@ anthropic-version: 2023-06-01
 
 | 字段 | 类型 | 必填 | 说明 |
 | --- | --- | --- | --- |
-| `model` | string | ✅ | 例如 `claude-sonnet-4-6` / `claude-opus-4-6` / `claude-haiku-4-5`（兼容 `claude-sonnet-4-5`、`claude-3-5-haiku-latest`），并支持历史 Claude 模型 ID |
+| `model` | string | ✅ | 例如 `claude-sonnet-4-6` / `claude-opus-4-6` / `claude-haiku-4-5`（兼容 `claude-sonnet-4-5`、`claude-3-5-haiku-latest`），并支持历史 Claude 模型 ID；若模型名带 `-nothinking` 后缀，则强制关闭 thinking / reasoning |
 | `messages` | array | ✅ | Claude 风格消息数组 |
 | `max_tokens` | number | ❌ | 缺省自动补 `8192`；当前实现不会硬性截断上游输出 |
 | `stream` | boolean | ❌ | 默认 `false` |
@@ -533,7 +546,8 @@ data: {"type":"message_stop"}
 
 **说明**：
 
-- 名称中包含 `opus` / `reasoner` / `slow` 的模型会输出 `thinking_delta`
+- 默认模型会按各 surface 的既有规则输出 thinking / reasoning 相关增量
+- 带 `-nothinking` 后缀的模型会强制关闭 thinking，即使请求显式传了 `thinking` / `reasoning` / `reasoning_effort` 也不会输出 `thinking_delta`
 - 不会输出 `signature_delta`（上游 DeepSeek 未提供可验证签名）
 - `tools` 场景优先避免泄露原始工具 JSON，不强制发送 `input_json_delta`
 
@@ -574,7 +588,7 @@ data: {"type":"message_stop"}
 
 ### `POST /v1beta/models/{model}:generateContent`
 
-请求体兼容 Gemini `contents` / `tools` 字段，模型名可用 alias 自动映射到 DeepSeek 模型。
+请求体兼容 Gemini `contents` / `tools` 字段，模型名可用 alias 自动映射到 DeepSeek 模型；若路径中的模型名带 `-nothinking` 后缀，则最终会映射到对应的无思考模型。
 
 响应为 Gemini 兼容结构，核心字段包括：
 
diff --git a/Dockerfile b/Dockerfile
index be25b95..ac062f7 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -20,7 +20,7 @@ RUN set -eux; \
     GOARCH="${TARGETARCH:-$(go env GOARCH)}"; \
     BUILD_VERSION_RESOLVED="${BUILD_VERSION:-}"; \
     if [ -z "${BUILD_VERSION_RESOLVED}" ] && [ -f VERSION ]; then BUILD_VERSION_RESOLVED="$(cat VERSION | tr -d "[:space:]")"; fi; \
-    CGO_ENABLED=0 GOOS="${GOOS}" GOARCH="${GOARCH}" go build -ldflags="-s -w -X ds2api/internal/version.BuildVersion=${BUILD_VERSION_RESOLVED}" -o /out/ds2api ./cmd/ds2api
+    CGO_ENABLED=0 GOOS="${GOOS}" GOARCH="${GOARCH}" go build -buildvcs=false -ldflags="-s -w -X ds2api/internal/version.BuildVersion=${BUILD_VERSION_RESOLVED}" -o /out/ds2api ./cmd/ds2api
 
 FROM busybox:1.36.1-musl AS busybox-tools
 
@@ -54,7 +54,6 @@ RUN set -eux; \
     test -n "${PKG_DIR}"; \
     mkdir -p /out/static; \
     cp "${PKG_DIR}/ds2api" /out/ds2api; \
-
     cp "${PKG_DIR}/config.example.json" /out/config.example.json; \
     cp -R "${PKG_DIR}/static/admin" /out/static/admin
 
diff --git a/README.MD b/README.MD
index 412596e..fd975bf 100644
--- a/README.MD
+++ b/README.MD
@@ -4,11 +4,14 @@
 
 # DS2API
 
+<a href="https://trendshift.io/repositories/24508" target="_blank"><img src="https://trendshift.io/api/badge/repositories/24508" alt="CJackHwang%2Fds2api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+
 [![License](https://img.shields.io/github/license/CJackHwang/ds2api.svg)](LICENSE)
 ![Stars](https://img.shields.io/github/stars/CJackHwang/ds2api.svg)
 ![Forks](https://img.shields.io/github/forks/CJackHwang/ds2api.svg)
 [![Release](https://img.shields.io/github/v/release/CJackHwang/ds2api?display_name=tag)](https://github.com/CJackHwang/ds2api/releases)
 [![Docker](https://img.shields.io/badge/docker-ready-blue.svg)](docs/DEPLOY.md)
+
 [![Deploy on Zeabur](https://zeabur.com/button.svg)](https://zeabur.com/templates/L4CFHP)
 [![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https://github.com/CJackHwang/ds2api)
 
@@ -122,23 +125,32 @@ flowchart LR
 | 模型类型 | 模型 ID | thinking | search |
 | --- | --- | --- | --- |
 | default | `deepseek-v4-flash` | 默认开启，可由请求参数控制 | ❌ |
+| default | `deepseek-v4-flash-nothinking` | 永久关闭，不受请求参数影响 | ❌ |
 | expert | `deepseek-v4-pro` | 默认开启，可由请求参数控制 | ❌ |
+| expert | `deepseek-v4-pro-nothinking` | 永久关闭，不受请求参数影响 | ❌ |
 | default | `deepseek-v4-flash-search` | 默认开启，可由请求参数控制 | ✅ |
+| default | `deepseek-v4-flash-search-nothinking` | 永久关闭，不受请求参数影响 | ✅ |
 | expert | `deepseek-v4-pro-search` | 默认开启，可由请求参数控制 | ✅ |
+| expert | `deepseek-v4-pro-search-nothinking` | 永久关闭，不受请求参数影响 | ✅ |
 | vision | `deepseek-v4-vision` | 默认开启，可由请求参数控制 | ❌ |
+| vision | `deepseek-v4-vision-nothinking` | 永久关闭，不受请求参数影响 | ❌ |
 | vision | `deepseek-v4-vision-search` | 默认开启，可由请求参数控制 | ✅ |
+| vision | `deepseek-v4-vision-search-nothinking` | 永久关闭，不受请求参数影响 | ✅ |
 
-除原生模型外，也支持常见 alias 输入（如 `gpt-4.1`、`gpt-5`、`gpt-5-codex`、`o3`、`claude-*`、`gemini-*` 等），但 `/v1/models` 返回的是规范化后的 DeepSeek 原生模型 ID。完整 alias 行为以 [API.md](API.md#模型-alias-解析策略) 和 `config.example.json` 为准。
+除原生模型外，也支持常见 alias 输入（如 `gpt-4.1`、`gpt-5`、`gpt-5-codex`、`o3`、`claude-*`、`gemini-*` 等），但 `/v1/models` 返回的是规范化后的 DeepSeek 原生模型 ID。若 alias 名本身追加 `-nothinking` 后缀，也会映射到对应的强制关思考模型。完整 alias 行为以 [API.md](API.md#模型-alias-解析策略) 和 `config.example.json` 为准。
 
 ### Claude 接口（`GET /anthropic/v1/models`）
 
 | 当前常用模型 | 默认映射 |
 | --- | --- |
 | `claude-sonnet-4-6` | `deepseek-v4-flash` |
+| `claude-sonnet-4-6-nothinking` | `deepseek-v4-flash-nothinking` |
 | `claude-haiku-4-5`（兼容 `claude-3-5-haiku-latest`） | `deepseek-v4-flash` |
+| `claude-haiku-4-5-nothinking` | `deepseek-v4-flash-nothinking` |
 | `claude-opus-4-6` | `deepseek-v4-pro` |
+| `claude-opus-4-6-nothinking` | `deepseek-v4-pro-nothinking` |
 
-可通过配置中的 `model_aliases` 覆盖映射关系。
+可通过配置中的 `model_aliases` 覆盖映射关系；若请求模型名带 `-nothinking`，会在最终映射结果上强制追加无思考语义。
 `/anthropic/v1/models` 除上述主别名外，还会返回 Claude 4.x snapshots、3.x 历史模型 ID 与常见 alias，便于旧客户端直接兼容。
 
 #### Claude Code 接入避坑（实测）
@@ -146,11 +158,11 @@ flowchart LR
 - `ANTHROPIC_BASE_URL` 推荐直接指向 DS2API 根地址（例如 `http://127.0.0.1:5001`），Claude Code 会请求 `/v1/messages?beta=true`。
 - `ANTHROPIC_API_KEY` 需要与 `config.json` 中 `keys` 一致；建议同时保留常规 key 与 `sk-ant-*` 形态 key，兼容不同客户端校验习惯。
 - 若系统设置了代理，建议对 DS2API 地址配置 `NO_PROXY=127.0.0.1,localhost,<你的主机IP>`，避免本地回环请求被代理拦截。
-- 如遇“工具调用输出成文本、未执行”问题，请优先检查模型输出是否为当前唯一受支持的 XML 工具块：`<tool_calls><invoke name="..."><parameter name="...">...`，而不是旧式 `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`、`<function_call>`、`tool_use` 或纯 JSON `tool_calls` 片段。
+- 如遇“工具调用输出成文本、未执行”问题，请优先检查模型输出是否为推荐的 DSML 工具块：`<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`。兼容层也接受旧式 canonical XML：`<tool_calls><invoke name="..."><parameter name="...">...`；旧式 `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`、`<function_call>`、`tool_use` 或纯 JSON `tool_calls` 片段不会执行。
 
 ### Gemini 接口
 
-Gemini 适配器将模型名通过 `model_aliases` 或内置规则映射到 DeepSeek 原生模型，支持 `generateContent` 和 `streamGenerateContent` 两种调用方式，并完整支持 Tool Calling（`functionDeclarations` → `functionCall` 输出）。
+Gemini 适配器将模型名通过 `model_aliases` 或内置规则映射到 DeepSeek 原生模型，支持 `generateContent` 和 `streamGenerateContent` 两种调用方式，并完整支持 Tool Calling（`functionDeclarations` → `functionCall` 输出）。若 Gemini 模型名带 `-nothinking` 后缀，例如 `gemini-2.5-pro-nothinking`，会映射到对应的强制关闭思考模型。
 
 ## 快速开始
 
@@ -278,7 +290,10 @@ go run ./cmd/ds2api
 - `model_aliases`：OpenAI / Claude / Gemini 共用的模型 alias 映射。
 - `runtime`：账号并发、队列与 token 刷新策略，可通过 Admin Settings 热更新。
 - `auto_delete.mode`：请求结束后的远端会话清理策略，支持 `none` / `single` / `all`。
-- `history_split`：多轮历史拆分策略，已全局强制开启；可调整触发阈值，避免长历史全部内联进 prompt。
+- `history_split`：轮次拆分策略；默认关闭，开启后默认从第二轮开始将旧历史上传为 `HISTORY.txt`。
+- `current_input_file`：独立拆分策略；默认开启且阈值为 `0`，触发时将完整上下文合并上传为隐藏上下文文件，并跳过 `HISTORY.txt`。
+- `history_split` 与 `current_input_file` 互斥，最多启用一个；两者都关闭时请求直接透传。
+- `thinking_injection`：默认开启；在最新 user 消息末尾追加思考增强提示词，提高高强度推理与工具调用前的思考稳定性；`prompt` 留空时使用内置默认提示词。
 
 环境变量完整列表见 [部署指南](docs/DEPLOY.md)，接口鉴权规则见 [API.md](API.md#鉴权规则)。
 
@@ -312,14 +327,14 @@ Gemini 路由还可以使用 `x-goog-api-key`，或在没有认证头时使用 `
 当请求中带 `tools` 时，DS2API 会做防泄漏处理与结构化转译：
 
 1. 只在**非代码块上下文**启用执行型 toolcall 识别（代码块示例默认不触发）
-2. 解析层当前只把 canonical XML 工具块视为可执行调用：`<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`；旧式 `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`、`<function_call>`、`tool_use` / antml 变体与纯 JSON `tool_calls` 片段都会按普通文本处理
+2. 解析层当前把 DSML 外壳视为推荐可执行调用：`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`；兼容旧式 canonical XML `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`。DSML 只是外壳别名，内部仍以 XML 解析语义为准；旧式 `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`、`<function_call>`、`tool_use` / antml 变体与纯 JSON `tool_calls` 片段都会按普通文本处理
 3. `responses` 流式严格使用官方 item 生命周期事件（`response.output_item.*`、`response.content_part.*`、`response.function_call_arguments.*`）
 4. `responses` 支持并执行 `tool_choice`（`auto`/`none`/`required`/强制函数）；`required` 违规时非流式返回 `422`，流式返回 `response.failed`
 5. 客户端请求哪种协议，就按该协议返回工具调用（OpenAI/Claude/Gemini 各自原生结构）；模型侧优先约束输出规范 XML，再由兼容层转译
 
-> 说明：当前版本在 parser 层仍以“尽量解析成功”为优先，未启用基于 allow-list 的工具名硬拒绝。
+> 说明：当前版本 parser 层以”尽量解析成功”为优先，所有格式合法的 XML 工具调用都会通过，不做工具名 allow-list 过滤。
 >
-> 想评估“把工具调用封装成 XML 再输入模型”的方案，可参考：`docs/toolcall-semantics.md`。
+> 想评估”把工具调用封装成 XML 再输入模型”的方案，可参考：`docs/toolcall-semantics.md`。
 
 ## 本地开发抓包工具
 
@@ -383,7 +398,7 @@ npm run build --prefix webui
 工作流文件：`.github/workflows/release-artifacts.yml`
 
 - **触发条件**：仅在 GitHub Release `published` 时触发（普通 push 不会触发）
-- **构建产物**：多平台二进制包（`linux/amd64`、`linux/arm64`、`darwin/amd64`、`darwin/arm64`、`windows/amd64`）+ `sha256sums.txt`
+- **构建产物**：多平台二进制包（`linux/amd64`、`linux/arm64`、`linux/armv7`、`darwin/amd64`、`darwin/arm64`、`windows/amd64`、`windows/arm64`）+ `sha256sums.txt`
 - **容器镜像发布**：仅推送到 GHCR（`ghcr.io/cjackhwang/ds2api`）
 - **每个压缩包包含**：`ds2api` 可执行文件、`static/admin`、WASM 文件（同时支持内置 fallback）、`config.example.json` 配置示例、README、LICENSE
 
diff --git a/README.en.md b/README.en.md
index 747993d..13b6982 100644
--- a/README.en.md
+++ b/README.en.md
@@ -4,6 +4,8 @@
 
 # DS2API
 
+<a href="https://trendshift.io/repositories/24508" target="_blank"><img src="https://trendshift.io/api/badge/repositories/24508" alt="CJackHwang%2Fds2api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+
 [![License](https://img.shields.io/github/license/CJackHwang/ds2api.svg)](LICENSE)
 ![Stars](https://img.shields.io/github/stars/CJackHwang/ds2api.svg)
 ![Forks](https://img.shields.io/github/forks/CJackHwang/ds2api.svg)
@@ -144,7 +146,7 @@ Besides the primary aliases above, `/anthropic/v1/models` also returns Claude 4.
 - Set `ANTHROPIC_BASE_URL` to the DS2API root URL (for example `http://127.0.0.1:5001`). Claude Code sends requests to `/v1/messages?beta=true`.
 - `ANTHROPIC_API_KEY` must match an entry in `keys` from `config.json`. Keeping both a regular key and an `sk-ant-*` style key improves client compatibility.
 - If your environment has proxy variables, set `NO_PROXY=127.0.0.1,localhost,<your_host_ip>` for DS2API to avoid proxy interception of local traffic.
-- If tool calls are rendered as plain text and not executed, first verify the model output uses the only supported XML block: `<tool_calls><invoke name="..."><parameter name="...">...`, not legacy `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`, `<function_call>`, `tool_use`, or standalone JSON `tool_calls`.
+- If tool calls are rendered as plain text and not executed, first verify the model output uses the recommended DSML block: `<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`. DS2API also accepts legacy canonical XML: `<tool_calls><invoke name="..."><parameter name="...">...`; legacy `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`, `<function_call>`, `tool_use`, or standalone JSON `tool_calls` are not executed.
 
 ### Gemini Endpoint
 
@@ -310,7 +312,7 @@ Queue limit = DS2API_ACCOUNT_MAX_QUEUE (default = recommended concurrency)
 When `tools` is present in the request, DS2API performs anti-leak handling:
 
 1. Toolcall feature matching is enabled only in **non-code-block context** (fenced examples are ignored)
-2. The parser now treats only the canonical XML wrapper as executable tool-calling syntax: `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`; legacy `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`, `<function_call>`, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text
+2. The parser now treats the DSML shell as the recommended executable tool-calling syntax: `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; it also accepts legacy canonical XML `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`. DSML is a shell alias and internal parsing remains XML-based; legacy `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`, `<function_call>`, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text
 3. `responses` streaming strictly uses official item lifecycle events (`response.output_item.*`, `response.content_part.*`, `response.function_call_arguments.*`)
 4. `responses` supports and enforces `tool_choice` (`auto`/`none`/`required`/forced function); `required` violations return `422` for non-stream and `response.failed` for stream
 5. The output protocol follows the client request (OpenAI / Claude / Gemini native shapes); model-side prompting can prefer XML, and the compatibility layer handles the protocol-specific translation
@@ -379,7 +381,7 @@ npm run build --prefix webui
 Workflow: `.github/workflows/release-artifacts.yml`
 
 - **Trigger**: only on GitHub Release `published` (normal pushes do not trigger builds)
-- **Outputs**: multi-platform archives (`linux/amd64`, `linux/arm64`, `darwin/amd64`, `darwin/arm64`, `windows/amd64`) + `sha256sums.txt`
+- **Outputs**: multi-platform archives (`linux/amd64`, `linux/arm64`, `linux/armv7`, `darwin/amd64`, `darwin/arm64`, `windows/amd64`, `windows/arm64`) + `sha256sums.txt`
 - **Container publishing**: GHCR only (`ghcr.io/cjackhwang/ds2api`)
 - **Each archive includes**: `ds2api` executable, `static/admin`, WASM file (with embedded fallback support), `config.example.json`-based config template, README, LICENSE
 
diff --git a/VERSION b/VERSION
index fcdb2e1..ee74734 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-4.0.0
+4.1.0
diff --git a/config.example.json b/config.example.json
index f93a2c3..14a25c5 100644
--- a/config.example.json
+++ b/config.example.json
@@ -51,9 +51,17 @@
     "store_ttl_seconds": 900
   },
   "history_split": {
-    "enabled": true,
+    "enabled": false,
     "trigger_after_turns": 1
   },
+  "current_input_file": {
+    "enabled": true,
+    "min_chars": 0
+  },
+  "thinking_injection": {
+    "enabled": true,
+    "prompt": ""
+  },
   "embeddings": {
     "provider": "deterministic"
   },
diff --git a/docs/ARCHITECTURE.en.md b/docs/ARCHITECTURE.en.md
index 1f6b24a..3ba24fa 100644
--- a/docs/ARCHITECTURE.en.md
+++ b/docs/ARCHITECTURE.en.md
@@ -175,7 +175,7 @@ flowchart LR
 - `internal/deepseek/{client,protocol,transport}`: upstream requests, sessions, PoW adaptation, protocol constants, and transport details.
 - `internal/js/chat-stream` + `api/chat-stream.js`: Vercel Node streaming bridge; Go prepare/release owns auth, account lease, and completion payload assembly, while Node relays real-time SSE with Go-aligned finalization and tool sieve semantics.
 - `internal/stream` + `internal/sse`: Go stream parsing and incremental assembly.
-- `internal/toolcall` + `internal/toolstream`: canonical XML tool-call parsing + anti-leak sieve (the only executable format is `<tool_calls>` / `<invoke name="...">` / `<parameter name="...">`).
+- `internal/toolcall` + `internal/toolstream`: DSML shell compatibility plus canonical XML tool-call parsing and anti-leak sieve; DSML is normalized back to XML at the entrypoint, and internal parsing remains XML-based.
 - `internal/httpapi/admin/*`: Admin API root assembly plus auth/accounts/config/settings/proxies/rawsamples/vercel/history/devcapture/version resource packages.
 - `internal/chathistory`: server-side conversation history persistence, pagination, detail lookup, and retention policy.
 - `internal/config`: config loading/validation + runtime settings hot-reload.
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index 5094ea4..d5b8baf 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -175,7 +175,7 @@ flowchart LR
 - `internal/deepseek/{client,protocol,transport}`：上游请求、会话、PoW 适配、协议常量与传输层。
 - `internal/js/chat-stream` + `api/chat-stream.js`：Vercel Node 流式桥；Go prepare/release 管理鉴权、账号租约和 completion payload，Node 侧负责实时 SSE 转发并保持 Go 对齐的终结态和 tool sieve 语义。
 - `internal/stream` + `internal/sse`：Go 流式解析与增量处理。
-- `internal/toolcall` + `internal/toolstream`：canonical XML 工具调用解析与防泄漏筛分（唯一可执行格式：`<tool_calls>` / `<invoke name="...">` / `<parameter name="...">`）。
+- `internal/toolcall` + `internal/toolstream`：DSML 外壳兼容与 canonical XML 工具调用解析、防泄漏筛分；DSML 会在入口归一化回 XML，内部仍按 XML 语义解析。
 - `internal/httpapi/admin/*`：Admin API 根装配与 auth/accounts/config/settings/proxies/rawsamples/vercel/history/devcapture/version 等资源子包。
 - `internal/chathistory`：服务器端对话记录持久化、分页、单条详情和保留策略。
 - `internal/config`：配置加载、校验、运行时 settings 热更新。
diff --git a/docs/DEPLOY.en.md b/docs/DEPLOY.en.md
index de52b4c..f81de01 100644
--- a/docs/DEPLOY.en.md
+++ b/docs/DEPLOY.en.md
@@ -70,9 +70,9 @@ Built-in GitHub Actions workflow: `.github/workflows/release-artifacts.yml`
 
 | Platform | Architecture | Format |
 | --- | --- | --- |
-| Linux | amd64, arm64 | `.tar.gz` |
+| Linux | amd64, arm64, armv7 | `.tar.gz` |
 | macOS | amd64, arm64 | `.tar.gz` |
-| Windows | amd64 | `.zip` |
+| Windows | amd64, arm64 | `.zip` |
 
 Each archive includes:
 
@@ -538,7 +538,7 @@ curl -s http://127.0.0.1:5001/readyz
 
 # 3. Model list
 curl -s http://127.0.0.1:5001/v1/models
-# Expected: {"object":"list","data":[...]}
+# Expected: {"object":"list","data":[...]} (including `*-nothinking` variants)
 
 # 4. Admin panel (if WebUI is built)
 curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:5001/admin
diff --git a/docs/DEPLOY.md b/docs/DEPLOY.md
index 7509cb3..0f91fdf 100644
--- a/docs/DEPLOY.md
+++ b/docs/DEPLOY.md
@@ -70,9 +70,9 @@ cp config.example.json config.json
 
 | 平台 | 架构 | 文件格式 |
 | --- | --- | --- |
-| Linux | amd64, arm64 | `.tar.gz` |
+| Linux | amd64, arm64, armv7 | `.tar.gz` |
 | macOS | amd64, arm64 | `.tar.gz` |
-| Windows | amd64 | `.zip` |
+| Windows | amd64, arm64 | `.zip` |
 
 每个压缩包包含：
 
@@ -548,7 +548,7 @@ curl -s http://127.0.0.1:5001/readyz
 
 # 3. 模型列表
 curl -s http://127.0.0.1:5001/v1/models
-# 预期: {"object":"list","data":[...]}
+# 预期: {"object":"list","data":[...]}（包含 `*-nothinking` 变体）
 
 # 4. 管理台页面（如果已构建 WebUI）
 curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:5001/admin
diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md
new file mode 100644
index 0000000..4002e13
--- /dev/null
+++ b/docs/DEVELOPMENT.md
@@ -0,0 +1,112 @@
+# DS2API 开发者速查
+
+语言 / Language: 中文
+
+本文面向维护者和贡献者，用于快速判断“从哪里看、改哪里、跑什么”。架构细节仍以 [ARCHITECTURE.md](./ARCHITECTURE.md) 为准，接口行为以 [API.md](../API.md) 为准。
+
+## 1. 本地入口
+
+常用启动与检查：
+
+```bash
+# 后端
+go run ./cmd/ds2api
+
+# WebUI 开发服务器
+npm run dev --prefix webui
+
+# WebUI 生产构建
+npm run build --prefix webui
+```
+
+PR 前固定门禁：
+
+```bash
+./scripts/lint.sh
+./tests/scripts/check-refactor-line-gate.sh
+./tests/scripts/run-unit-all.sh
+npm run build --prefix webui
+```
+
+修改 Go 文件后先运行：
+
+```bash
+gofmt -w <changed-go-files>
+```
+
+## 2. 代码定位
+
+优先从这些入口顺着调用链看：
+
+| 目标 | 入口 |
+| --- | --- |
+| 总路由、CORS、健康检查 | `internal/server/router.go` |
+| OpenAI Chat / Responses | `internal/httpapi/openai/chat`、`internal/httpapi/openai/responses` |
+| Claude / Gemini 兼容入口 | `internal/httpapi/claude`、`internal/httpapi/gemini` |
+| API 请求归一到网页纯文本上下文 | `internal/promptcompat`、`docs/prompt-compatibility.md` |
+| 工具调用解析与流式防泄漏 | `internal/toolcall`、`internal/toolstream`、`docs/toolcall-semantics.md` |
+| DeepSeek 上游调用、登录、PoW、代理 | `internal/deepseek/client`、`internal/deepseek/transport` |
+| 账号池、并发槽位、等待队列 | `internal/account` |
+| Admin API | `internal/httpapi/admin` |
+| WebUI 页面 | `webui/src/layout/DashboardShell.jsx`、`webui/src/features/*` |
+| 服务器端对话记录 | `internal/chathistory`、`internal/httpapi/admin/history` |
+
+## 3. 常见改动建议
+
+- 改接口行为时，同时检查 `API.md` / `API.en.md` 是否需要同步。
+- 改 prompt 兼容链路时，必须同步 `docs/prompt-compatibility.md`。
+- 改 tool call 语义时，同时检查 Go、Node sieve 和 `docs/toolcall-semantics.md`。
+- 改 WebUI 配置项时，同时检查 `webui/src/features/settings`、语言包和 `config.example.json`。
+- 拆分大文件时，保持对外函数签名稳定，并跑 `./tests/scripts/check-refactor-line-gate.sh`。
+
+## 4. 故障定位
+
+接口请求先看路由入口，再看协议适配层，最后看共享 runtime：
+
+1. 路由是否命中：`internal/server/router.go` 和对应 `RegisterRoutes`。
+2. 鉴权与账号选择：`internal/auth`、`internal/account`。
+3. 请求归一化：`internal/promptcompat` 或协议转换包。
+4. 上游请求：`internal/deepseek/client`。
+5. 流式输出：`internal/stream`、`internal/sse`、`internal/toolstream`。
+6. 响应格式：`internal/format/*` 或 `internal/translatorcliproxy`。
+
+对话记录页面问题优先检查：
+
+- Admin API：`/admin/chat-history`、`/admin/chat-history/{id}`。
+- 后端存储：`internal/chathistory/store.go`。
+- 前端轮询和 ETag：`webui/src/features/chatHistory/ChatHistoryContainer.jsx`。
+
+Tool call 问题优先跑：
+
+```bash
+go test -v ./internal/toolcall ./internal/toolstream -count=1
+node --test tests/node/stream-tool-sieve.test.js tests/node/chat-stream.test.js
+```
+
+## 5. 测试选择
+
+小范围 Go 改动：
+
+```bash
+go test ./internal/<package> -count=1
+```
+
+前端改动：
+
+```bash
+npm run build --prefix webui
+```
+
+高风险协议或流式改动：
+
+```bash
+./tests/scripts/run-unit-all.sh
+```
+
+发布或真实账号链路验证：
+
+```bash
+./tests/scripts/run-live.sh
+```
+
+端到端测试产物默认写入 `artifacts/testsuite/`。分享日志前需要清理 token、密码、cookie 和原始请求响应内容。
diff --git a/docs/README.md b/docs/README.md
index a80093c..b3556eb 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -11,7 +11,8 @@
 3. [接口文档（API）](../API.md)
 4. [部署指南](./DEPLOY.md)
 5. [测试指南](./TESTING.md)
-6. [贡献指南](./CONTRIBUTING.md)
+6. [开发者速查](./DEVELOPMENT.md)
+7. [贡献指南](./CONTRIBUTING.md)
 
 ### 专题文档
 
@@ -41,7 +42,8 @@ Recommended reading order:
 3. [API reference](../API.en.md)
 4. [Deployment guide](./DEPLOY.en.md)
 5. [Testing guide](./TESTING.md)
-6. [Contributing guide](./CONTRIBUTING.en.md)
+6. [Developer quick reference](./DEVELOPMENT.md)
+7. [Contributing guide](./CONTRIBUTING.en.md)
 
 ### Topical docs
 
diff --git a/docs/TESTING.md b/docs/TESTING.md
index 40c3501..dd16142 100644
--- a/docs/TESTING.md
+++ b/docs/TESTING.md
@@ -13,6 +13,7 @@ DS2API 提供两个层级的测试：
 | 单元测试（Go） | `./tests/scripts/run-unit-go.sh` | 不需要真实账号 |
 | 单元测试（Node） | `./tests/scripts/run-unit-node.sh` | 不需要真实账号 |
 | 单元测试（全部） | `./tests/scripts/run-unit-all.sh` | 不需要真实账号 |
+| Release 目标交叉编译 | `./tests/scripts/check-cross-build.sh` | 覆盖发布包支持的 GOOS/GOARCH |
 | 端到端测试 | `./tests/scripts/run-live.sh` | 使用真实账号执行全链路测试 |
 
 端到端测试集会录制完整的请求/响应日志，用于故障排查。
@@ -35,6 +36,7 @@ npm run build --prefix webui
 
 - `./scripts/lint.sh` 会运行 Go 格式化检查和 `golangci-lint`；修改 Go 文件后仍建议先执行 `gofmt -w <files>`。
 - `run-unit-all.sh` 串行调用 Go 与 Node 单元测试入口。
+- CI 还会额外在 macOS/Windows 跑 Go 单测，并执行 release 目标交叉编译检查。
 - `run-live.sh` 是真实账号端到端测试，适合作为发布或高风险改动后的补充验证，不属于每次 PR 的固定本地门禁。
 
 ---
@@ -57,6 +59,7 @@ npm run build --prefix webui
 # 结构与流程门禁
 ./tests/scripts/check-refactor-line-gate.sh
 ./tests/scripts/check-node-split-syntax.sh
+./tests/scripts/check-cross-build.sh
 
 # 历史阶段门禁：阶段 6 手工烟测签字检查（默认读取 plans/stage6-manual-smoke.md）
 ./tests/scripts/check-stage6-manual-smoke.sh
diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md
index 495d1cc..6039e34 100644
--- a/docs/prompt-compatibility.md
+++ b/docs/prompt-compatibility.md
@@ -68,6 +68,8 @@ DS2API 当前的核心思路，不是把客户端传来的 `messages`、`tools`
   [internal/prompt/messages.go](../internal/prompt/messages.go)
 - prompt 可见 tool history XML：
   [internal/prompt/tool_calls.go](../internal/prompt/tool_calls.go)
+- 最新 user 思考格式注入：
+  [internal/promptcompat/thinking_injection.go](../internal/promptcompat/thinking_injection.go)
 - completion payload：
   [internal/promptcompat/standard_request.go](../internal/promptcompat/standard_request.go)
 
@@ -96,11 +98,23 @@ DS2API 当前的核心思路，不是把客户端传来的 `messages`、`tools`
 - `prompt` 才是对话上下文主载体。
 - `ref_file_ids` 只承载文件引用，不承载普通文本消息。
 - `tools` 不会作为“原生工具 schema”直接下发给下游，而是被改写进 `prompt`。
+- 当前 `/v1/chat/completions` 业务路径仍是“每次请求新建一个远端 `chat_session_id`，并默认发送 `parent_message_id: null`”；因此 DS2API 对外默认表现为“新会话 + prompt 拼历史”，而不是复用 DeepSeek 原生会话树。
+- 但 DeepSeek 远端本身支持同一 `chat_session_id` 的跨轮次持续对话。2026-04-27 已用项目内现有 DeepSeek client 做过一次不改业务代码的双轮实测：同一 `chat_session_id` 下，第 1 轮返回 `request_message_id=1` / `response_message_id=2` / 文本 `SESSION_TEST_ONE`；第 2 轮重新获取一次 PoW，并发送 `parent_message_id=2` 后，成功返回 `request_message_id=3` / `response_message_id=4` / 文本 `SESSION_TEST_TWO`。这说明“同远端会话持续聊天”能力存在，且每轮需要携带正确的 parent/message 链接信息，同时重新获取对应轮次可用的 PoW。
 - OpenAI Chat / Responses 原生走统一 OpenAI 标准化与 DeepSeek payload 组装；Claude / Gemini 会尽量复用 OpenAI prompt/tool 语义，其中 Gemini 直接复用 `promptcompat.BuildOpenAIPromptForAdapter`，Claude 消息接口在可代理场景会转换为 OpenAI chat 形态再执行。
-- 客户端传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`。Claude surface 没有 `thinking` 字段时按 Anthropic 语义视为关闭；Gemini `generationConfig.thinkingConfig.thinkingBudget` 会翻译成同一套 thinking 开关；关闭时即使上游返回 `response/thinking_content`，兼容层也不会把它当作可见正文输出。
+- 客户端传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`。Gemini `generationConfig.thinkingConfig.thinkingBudget` 会翻译成同一套 thinking 开关；关闭时即使上游返回 `response/thinking_content`，兼容层也不会把它当作可见正文输出。若最终解析出的模型名带 `-nothinking` 后缀，则会无条件强制关闭 thinking，优先级高于请求体中的 `thinking` / `reasoning` / `reasoning_effort`。Claude surface 在流式请求且未显式声明 `thinking` 时，仍按 Anthropic 语义默认关闭；但在非流式代理场景，兼容层会内部开启一次下游 thinking，用于捕获“正文为空、工具调用落在 thinking 里”的情况，随后在回包前剥离用户不可见的 thinking block。
+- 对 OpenAI Chat / Responses 的非流式收尾，如果最终可见正文为空，兼容层会优先尝试把思维链中的独立 DSML / XML 工具块当作真实工具调用解析出来。流式链路也会在收尾阶段做同样的 fallback 检测，但不会因为思维链内容去中途拦截或改写流式输出；thinking / reasoning 增量仍按原样先发，只有在结束收尾时才可能补发最终工具调用结果。补发结果会作为本轮 assistant 的结构化 `tool_calls` / `function_call` 输出返回，而不是塞进 `content` 文本；如果客户端没有开启 thinking / reasoning，思维链只用于检测，不会作为 `reasoning_content` 或可见正文暴露。只有正文为空且思维链里也没有可执行工具调用时，才继续按空回复错误处理。
+- OpenAI Chat / Responses 的空回复错误处理之前会默认做一次内部补偿重试：第一次上游完整结束后，如果最终可见正文为空、没有解析到工具调用、也没有已经向客户端流式发出工具调用，并且终止原因不是 `content_filter`，兼容层会复用同一个 `chat_session_id`、账号、token 与工具策略，把原始 completion `prompt` 追加固定后缀 `Previous reply had no visible output. Please regenerate the visible final answer or tool call now.` 后重新提交一次。重试遵循 DeepSeek 多轮对话协议：从第一次上游 SSE 流中提取 `response_message_id`，并在重试 payload 中设置 `parent_message_id` 为该值，使重试成为同一会话的后续轮次而非断裂的根消息；同时重新获取一次 PoW（若 PoW 获取失败则回退到原始 PoW）。该重试不会重新标准化消息、不会新建 session、不会切换账号，也不会向流式客户端插入重试标记；第二次 thinking / reasoning 会按正常增量直接接到第一次之后，并继续使用 overlap trim 去重。若第二次仍为空，终端错误码仍保持现有 `upstream_empty_output`；若任一尝试触发空 `content_filter`，不做补偿重试并保持 `content_filter` 错误。JS Vercel 运行时同样设置 `parent_message_id`，但因无法直接调用 PoW API 而复用原始 PoW。
 
 ## 5. prompt 是怎么拼出来的
 
+OpenAI Chat / Responses 在标准化后、history split / current input file 之前，会默认执行 `thinking_injection` 增强。它参考 DeepSeek V4 “把控制指令放在 user 消息末尾更稳定”的用法，在最新 user message 后追加思考增强提示词。当前内置默认提示词以 `Reasoning Effort: Absolute maximum with no shortcuts permitted.` 开头，并继续要求模型充分分解问题、覆盖潜在路径与边界条件、把完整推演过程显式写出。该开关默认启用，可通过 `thinking_injection.enabled=false` 关闭；也可以通过 `thinking_injection.prompt` 自定义提示词，留空时使用内置默认提示词。
+
+这段增强属于 prompt 可见上下文：
+
+- 普通请求会直接出现在最终 `prompt` 的最新 user block 末尾。
+- 如果触发 `HISTORY.txt`，它会保留在 live context 的最新 user turn 中。
+- 如果触发 current input file，它会进入完整上下文文件中。
+
 ### 5.1 角色标记
 
 最终 prompt 使用 DeepSeek 风格角色标记：
@@ -117,17 +131,7 @@ DS2API 当前的核心思路，不是把客户端传来的 `messages`、`tools`
 实现位置：
 [internal/prompt/messages.go](../internal/prompt/messages.go)
 
-### 5.2 thinking continuity 说明
-
-如果启用了 thinking，会在最前面额外插入一个 system block，提醒模型：
-
-- 继续既有会话，不要重开
-- earlier messages 是 binding context
-- 不要把最终回答只留在 reasoning 里
-
-这部分不是客户端原始消息，而是兼容层主动补进去的连续性契约。
-
-### 5.3 相邻同角色消息会合并
+### 5.2 相邻同角色消息会合并
 
 在最终 `MessagesPrepareWithThinking` 中，相邻同 role 的消息会被合并成一个块，中间插入空行。
 
@@ -144,11 +148,11 @@ DS2API 当前的核心思路，不是把客户端传来的 `messages`、`tools`
 
 1. 把每个 tool 的名称、描述、参数 schema 序列化成文本。
 2. 拼成 `You have access to these tools:` 大段说明。
-3. 再附上统一的 XML tool call 格式约束。
+3. 再附上统一的 DSML tool call 外壳格式约束。
 4. 把这整段内容并入 system prompt。
 
-工具调用正例仍只示范 canonical XML：`<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`。
-提示词会额外强调：如果要调用工具，工具块的首个非空白字符必须就是 `<tool_calls>`，不能只输出 `</tool_calls>` 而漏掉 opening tag。
+工具调用正例现在优先示范官方 DSML 风格：`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`。
+兼容层仍接受旧式纯 `<tool_calls>` wrapper，但提示词会优先要求模型输出官方 DSML 标签，并强调不能只输出 closing wrapper 而漏掉 opening tag。需要注意：这是“兼容 DSML 外壳，内部仍以 XML 解析语义为准”，不是原生 DSML 全链路实现；DSML 标签会在解析入口归一化回现有 XML 标签后继续走同一套 parser。
 正例中的工具名只会来自当前请求实际声明的工具；如果当前请求没有足够的已知工具形态，就省略对应的单工具、多工具或嵌套示例，避免把不可用工具名写进 prompt。
 对执行类工具，脚本内容必须进入执行参数本身：`Bash` / `execute_command` 使用 `command`，`exec_command` 使用 `cmd`；不要把脚本示范成 `path` / `content` 文件写入参数。
 
@@ -182,18 +186,18 @@ assistant 的 reasoning 会变成一个显式标签块：
 
 ### 7.2 历史 tool_calls 保留方式
 
-assistant 历史 `tool_calls` 不会保留成 OpenAI 原生 JSON，而会转成 prompt 可见的 XML：
+assistant 历史 `tool_calls` 不会保留成 OpenAI 原生 JSON，而会转成 prompt 可见的 DSML 外壳：
 
 ```xml
-<tool_calls>
-  <invoke name="read_file">
-    <parameter name="path"><![CDATA[src/main.go]]></parameter>
-  </invoke>
-</tool_calls>
+<|DSML|tool_calls>
+  <|DSML|invoke name="read_file">
+    <|DSML|parameter name="path"><![CDATA[src/main.go]]></|DSML|parameter>
+  </|DSML|invoke>
+</|DSML|tool_calls>
 ```
 
-这也是当前项目里唯一受支持的 canonical tool-calling 形态；其他形态都会作为普通文本保留，不会作为可执行调用语法。
-例外是 parser 会对一个非常窄的模型失误做修复：如果 assistant 输出了 `<invoke ...>` ... `</tool_calls>`，但漏掉最前面的 opening `<tool_calls>`，解析阶段会补回 wrapper 后再尝试识别。
+解析层同时兼容旧式纯 XML 形态：`<tool_calls>` / `<invoke>` / `<parameter>`。两者都会先归一到现有 XML 解析语义；其他旧格式都会作为普通文本保留，不会作为可执行调用语法。
+例外是 parser 会对一个非常窄的模型失误做修复：如果 assistant 输出了 `<invoke ...>` ... `</tool_calls>`（或 DSML 对应标签），但漏掉最前面的 opening wrapper，解析阶段会补回 wrapper 后再尝试识别。
 
 这件事很重要，因为它决定了：
 
@@ -236,7 +240,12 @@ OpenAI 文件相关实现：
 
 ## 9. 多轮历史为什么不会一直完整内联在 prompt
 
-history split 现在全局强制开启；旧配置中的 `history_split.enabled=false` 会被忽略。默认从第 2 个 user turn 起就可能触发，仍可通过 `history_split.trigger_after_turns` 调整触发阈值。
+兼容层提供两种拆分策略：
+
+- `history_split` 是轮次拆分，默认关闭；开启后默认从第 2 个 user turn 起触发，可通过 `history_split.trigger_after_turns` 调整阈值。
+- `current_input_file` 是独立拆分，默认开启；它用于把“完整上下文”合并进隐藏上下文文件。当最新 user turn 的纯文本长度达到 `current_input_file.min_chars`（默认 `0`）时，兼容层会上传一个文件名为 `IGNORE.txt` 的上下文文件，并在 live prompt 中只保留一个中性的 user 消息要求模型直接回答最新请求，不再暴露文件名或要求模型读取本地文件。
+
+两个策略互斥，最多只能启用一个。如果两个开关都关闭，请求会直接透传，不上传 `HISTORY.txt` 或 current input file。
 
 相关实现：
 
@@ -244,8 +253,10 @@ history split 现在全局强制开启；旧配置中的 `history_split.enabled=
   [internal/config/store_accessors.go](../internal/config/store_accessors.go)
 - 历史拆分：
   [internal/httpapi/openai/history/history_split.go](../internal/httpapi/openai/history/history_split.go)
+- 当前输入转文件：
+  [internal/httpapi/openai/history/current_input_file.go](../internal/httpapi/openai/history/current_input_file.go)
 
-触发后行为：
+history split 触发后行为：
 
 1. 旧历史消息被切出去。
 2. 旧历史会被重新序列化成一个文本文件。
@@ -273,6 +284,20 @@ history split 现在全局强制开启；旧配置中的 `history_split.enabled=
 - `prompt` 里的 live context
 - `ref_file_ids` 指向的 history transcript file
 
+当前输入转文件启用并触发时，不会同时启用 history split，也不会上传 `HISTORY.txt`。上传文件的真实文件名是 `IGNORE.txt`，文件内容是完整 `messages` 上下文；它仍会先用 OpenAI 消息标准化和 DeepSeek 角色标记序列化，再包进 `IGNORE` 文件边界里：
+
+```text
+[uploaded filename]: IGNORE.txt
+[file content end]
+
+<｜begin▁of▁sentence｜><｜System｜>...<｜User｜>...<｜Assistant｜>...<｜Tool｜>...<｜User｜>...
+
+[file name]: IGNORE
+[file content begin]
+```
+
+开启后，请求的 live prompt 不再直接内联完整上下文，而是保留一个 user role 的短提示，提示模型基于已提供上下文直接回答最新请求；上传后的 `file_id` 会进入 `ref_file_ids`。
+
 ## 10. 各协议入口的差异
 
 ### 10.1 OpenAI Chat / Responses
@@ -320,7 +345,7 @@ history split 现在全局强制开启；旧配置中的 `history_split.enabled=
 
 ```json
 {
-  "prompt": "<｜begin▁of▁sentence｜><｜System｜>continuity instructions...\\n\\n原 system / developer\\n\\nYou have access to these tools: ...<｜end▁of▁instructions｜><｜User｜>最新问题<｜Assistant｜>",
+  "prompt": "<｜begin▁of▁sentence｜><｜System｜>原 system / developer\n\nYou have access to these tools: ...<｜end▁of▁instructions｜><｜User｜>最新问题<｜Assistant｜>",
   "ref_file_ids": [
     "file-history-ignore",
     "file-systemprompt",
diff --git a/docs/toolcall-semantics.md b/docs/toolcall-semantics.md
index ea5c456..5529a4b 100644
--- a/docs/toolcall-semantics.md
+++ b/docs/toolcall-semantics.md
@@ -4,9 +4,19 @@
 
 文档导航：[总览](../README.MD) / [架构说明](./ARCHITECTURE.md) / [测试指南](./TESTING.md)
 
-## 1) 当前唯一可执行格式
+## 1) 当前可执行格式
 
-当前版本只把下面这类 canonical XML 视为可执行工具调用：
+当前版本推荐模型输出 DSML 外壳：
+
+```xml
+<|DSML|tool_calls>
+  <|DSML|invoke name="read_file">
+    <|DSML|parameter name="path"><![CDATA[README.MD]]></|DSML|parameter>
+  </|DSML|invoke>
+</|DSML|tool_calls>
+```
+
+兼容层仍接受旧式 canonical XML：
 
 ```xml
 <tool_calls>
@@ -16,21 +26,26 @@
 </tool_calls>
 ```
 
+这不是原生 DSML 全链路实现。DSML 只作为 prompt 外壳和解析入口别名；进入 parser 前会被归一化成 `<tool_calls>` / `<invoke>` / `<parameter>`，内部仍以现有 XML 解析语义为准。
+
 约束：
 
-- 必须有 `<tool_calls>...</tool_calls>` wrapper
-- 每个调用必须在 `<invoke name="...">...</invoke>` 内
+- 必须有 `<|DSML|tool_calls>...</|DSML|tool_calls>` 或 `<tool_calls>...</tool_calls>` wrapper
+- 每个调用必须在 `<|DSML|invoke name="...">...</|DSML|invoke>` 或 `<invoke name="...">...</invoke>` 内
 - 工具名必须放在 `invoke` 的 `name` 属性
-- 参数必须使用 `<parameter name="...">...</parameter>`
+- 参数必须使用 `<|DSML|parameter name="...">...</|DSML|parameter>` 或 `<parameter name="...">...</parameter>`
+- 同一个工具块内不要混用 DSML 标签和旧 XML 工具标签；混搭会被视为非法工具块
 
 兼容修复：
 
-- 如果模型漏掉 opening `<tool_calls>`，但后面仍输出了一个或多个 `<invoke ...>` 并以 `</tool_calls>` 收尾，Go 解析链路会在解析前补回缺失的 opening wrapper。
-- 这是一个针对常见模型失误的窄修复，不改变推荐输出格式；prompt 仍要求模型直接输出完整 canonical XML。
+- 如果模型漏掉 opening wrapper，但后面仍输出了一个或多个 invoke 并以 closing wrapper 收尾，Go 解析链路会在解析前补回缺失的 opening wrapper。
+- 如果模型把 DSML 标签里的分隔符 `|` 写漏成空格（例如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`，或无 leading pipe 的 `<DSML tool_calls>` 形态），或把 `DSML` 与工具标签名直接黏连（例如 `<DSMLtool_calls>` / `<DSMLinvoke>` / `<DSMLparameter>`），Go / Node 会在固定工具标签名范围内归一化；相似但非工具标签名（如 `tool_calls_extra`）仍按普通文本处理。
+- 这是一个针对常见模型失误的窄修复，不改变推荐输出格式；prompt 仍要求模型直接输出完整 DSML 外壳。
+- 裸 `<invoke ...>` / `<parameter ...>` 不会被当成“已支持的工具语法”；只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 才会进入工具调用路径。
 
-## 2) 非 canonical 内容
+## 2) 非兼容内容
 
-任何不满足上述 canonical XML 形态的内容，都会保留为普通文本，不会执行。一个例外是上一节提到的“缺失 opening `<tool_calls>`、但 closing `</tool_calls>` 仍存在”的窄修复场景。
+任何不满足上述 DSML / canonical XML 形态的内容，都会保留为普通文本，不会执行。一个例外是上一节提到的“缺失 opening wrapper、但 closing wrapper 仍存在”的窄修复场景。
 
 当前 parser 不把 allow-list 当作硬安全边界：即使传入了已声明工具名列表，XML 里出现未声明工具名时也会尽量解析并交给上层协议输出；真正的执行侧仍必须自行校验工具名和参数。
 
@@ -38,25 +53,30 @@
 
 在流式链路中（Go / Node 一致）：
 
-- canonical `<tool_calls>` wrapper 会进入结构化捕获
-- 如果流里直接从 `<invoke ...>` 开始，但后面补上了 `</tool_calls>`，Go 流式筛分也会按缺失 opening wrapper 的修复路径尝试恢复
+- DSML `<|DSML|tool_calls>` wrapper、兼容变体（`<dsml|tool_calls>`、`<｜tool_calls>`、`<|tool_calls>`）、窄容错空格分隔形态（如 `<|DSML tool_calls>`）、黏连形态（如 `<DSMLtool_calls>`）和 canonical `<tool_calls>` wrapper 都会进入结构化捕获
+- 如果流里直接从 invoke 开始，但后面补上了 closing wrapper，Go 流式筛分也会按缺失 opening wrapper 的修复路径尝试恢复
 - 已识别成功的工具调用不会再次回流到普通文本
 - 不符合新格式的块不会执行，并继续按原样文本透传
-- fenced code block 中的 XML 示例始终按普通文本处理
+- fenced code block（反引号 `` ``` `` 和波浪线 `~~~`）中的 XML 示例始终按普通文本处理
+- 支持嵌套围栏（如 4 反引号嵌套 3 反引号）和 CDATA 内围栏保护
+- 如果模型把 `<![CDATA[` 打开后却没有闭合，流式扫描阶段仍会保守地继续缓冲，不会误把 CDATA 里的示例 XML 当成真实工具调用；在最终 parse / flush 恢复阶段，会对这类 loose CDATA 做窄修复，尽量保住外层已完整包裹的真实工具调用
+- 当文本中 mention 了某种标签名（如 `<dsml|tool_calls>` 或 Markdown inline code 里的 `<|DSML|tool_calls>`）而后面紧跟真正工具调用时，sieve 会跳过不可解析的 mention 候选并继续匹配后续真实工具块，不会因 mention 导致工具调用丢失，也不会截断 mention 后的正文
+
+另外，`<parameter>` 的值如果本身是合法 JSON 字面量，也会按结构化值解析，而不是一律保留为字符串。例如 `123`、`true`、`null`、`[1,2]`、`{"a":1}` 都会还原成对应的 number / boolean / null / array / object。
 
 ## 4) 输出结构
 
 `ParseToolCallsDetailed` / `parseToolCallsDetailed` 返回：
 
 - `calls`：解析出的工具调用列表（`name` + `input`）
-- `sawToolCallSyntax`：检测到 canonical wrapper，或命中“缺失 opening wrapper 但可修复”的形态时会为 `true`
+- `sawToolCallSyntax`：检测到 DSML / canonical wrapper，或命中“缺失 opening wrapper 但可修复”的形态时会为 `true`；裸 `invoke` 不计入该标记
 - `rejectedByPolicy`：当前固定为 `false`
 - `rejectedToolNames`：当前固定为空数组
 
 ## 5) 落地建议
 
-1. Prompt 里只示范 canonical XML 语法。
-2. 上游客户端仍应直接输出 canonical XML；DS2API 只对“closing tag 在、opening tag 漏掉”的常见失误做窄修复，不会泛化接受其他旧格式。
+1. Prompt 里只示范 DSML 外壳语法。
+2. 上游客户端应直接输出完整 DSML 外壳；DS2API 兼容旧式 canonical XML，并只对“closing tag 在、opening tag 漏掉”的常见失误做窄修复，不会泛化接受其他旧格式。
 3. 不要依赖 parser 做安全控制；执行器侧仍应做工具名和参数校验。
 
 ## 6) 回归验证
@@ -70,6 +90,12 @@ node --test tests/node/stream-tool-sieve.test.js
 
 重点覆盖：
 
-- canonical `<tool_calls>` wrapper 正常解析
-- 非 canonical 内容按普通文本透传
+- DSML `<|DSML|tool_calls>` wrapper 正常解析
+- legacy canonical `<tool_calls>` wrapper 正常解析
+- 别名变体（`<dsml|tool_calls>`、`<｜tool_calls>`、`<|tool_calls>`）、DSML 空格分隔 typo（如 `<|DSML tool_calls>`）和黏连 typo（如 `<DSMLtool_calls>`）正常解析
+- 混搭标签（DSML wrapper + canonical inner）归一化后正常解析
+- 波浪线围栏 `~~~` 内的示例不执行
+- 嵌套围栏（4 反引号嵌套 3 反引号）内的示例不执行
+- 文本 mention 标签名后紧跟真正工具调用的场景（含同一 wrapper 变体）
+- 非兼容内容按普通文本透传
 - 代码块示例不执行
diff --git a/internal/chathistory/store.go b/internal/chathistory/store.go
index faa1818..8f215a1 100644
--- a/internal/chathistory/store.go
+++ b/internal/chathistory/store.go
@@ -192,6 +192,18 @@ func (s *Store) Snapshot() (File, error) {
 	return cloneFile(s.state), nil
 }
 
+func (s *Store) Revision() (int64, error) {
+	if s == nil {
+		return 0, errors.New("chat history store is nil")
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return 0, s.err
+	}
+	return s.state.Revision, nil
+}
+
 func (s *Store) Enabled() bool {
 	if s == nil {
 		return false
@@ -220,6 +232,22 @@ func (s *Store) Get(id string) (Entry, error) {
 	return cloneEntry(item), nil
 }
 
+func (s *Store) DetailRevision(id string) (int64, error) {
+	if s == nil {
+		return 0, errors.New("chat history store is nil")
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return 0, s.err
+	}
+	item, ok := s.details[strings.TrimSpace(id)]
+	if !ok {
+		return 0, errors.New("chat history entry not found")
+	}
+	return item.Revision, nil
+}
+
 func (s *Store) Start(params StartParams) (Entry, error) {
 	if s == nil {
 		return Entry{}, errors.New("chat history store is nil")
diff --git a/internal/config/codec.go b/internal/config/codec.go
index 246df9b..1cf078b 100644
--- a/internal/config/codec.go
+++ b/internal/config/codec.go
@@ -48,6 +48,12 @@ func (c Config) MarshalJSON() ([]byte, error) {
 	if c.HistorySplit.Enabled != nil || c.HistorySplit.TriggerAfterTurns != nil {
 		m["history_split"] = c.HistorySplit
 	}
+	if c.CurrentInputFile.Enabled != nil || c.CurrentInputFile.MinChars != 0 {
+		m["current_input_file"] = c.CurrentInputFile
+	}
+	if c.ThinkingInjection.Enabled != nil || strings.TrimSpace(c.ThinkingInjection.Prompt) != "" {
+		m["thinking_injection"] = c.ThinkingInjection
+	}
 	if c.VercelSyncHash != "" {
 		m["_vercel_sync_hash"] = c.VercelSyncHash
 	}
@@ -118,6 +124,14 @@ func (c *Config) UnmarshalJSON(b []byte) error {
 			if err := json.Unmarshal(v, &c.HistorySplit); err != nil {
 				return fmt.Errorf("invalid field %q: %w", k, err)
 			}
+		case "current_input_file":
+			if err := json.Unmarshal(v, &c.CurrentInputFile); err != nil {
+				return fmt.Errorf("invalid field %q: %w", k, err)
+			}
+		case "thinking_injection":
+			if err := json.Unmarshal(v, &c.ThinkingInjection); err != nil {
+				return fmt.Errorf("invalid field %q: %w", k, err)
+			}
 		case "_vercel_sync_hash":
 			if err := json.Unmarshal(v, &c.VercelSyncHash); err != nil {
 				return fmt.Errorf("invalid field %q: %w", k, err)
@@ -157,6 +171,14 @@ func (c Config) Clone() Config {
 			Enabled:           cloneBoolPtr(c.HistorySplit.Enabled),
 			TriggerAfterTurns: cloneIntPtr(c.HistorySplit.TriggerAfterTurns),
 		},
+		CurrentInputFile: CurrentInputFileConfig{
+			Enabled:  cloneBoolPtr(c.CurrentInputFile.Enabled),
+			MinChars: c.CurrentInputFile.MinChars,
+		},
+		ThinkingInjection: ThinkingInjectionConfig{
+			Enabled: cloneBoolPtr(c.ThinkingInjection.Enabled),
+			Prompt:  c.ThinkingInjection.Prompt,
+		},
 		VercelSyncHash:   c.VercelSyncHash,
 		VercelSyncTime:   c.VercelSyncTime,
 		AdditionalFields: map[string]any{},
diff --git a/internal/config/config.go b/internal/config/config.go
index 4053798..cd0ae1c 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -8,21 +8,23 @@ import (
 )
 
 type Config struct {
-	Keys             []string           `json:"keys,omitempty"`
-	APIKeys          []APIKey           `json:"api_keys,omitempty"`
-	Accounts         []Account          `json:"accounts,omitempty"`
-	Proxies          []Proxy            `json:"proxies,omitempty"`
-	ModelAliases     map[string]string  `json:"model_aliases,omitempty"`
-	Admin            AdminConfig        `json:"admin,omitempty"`
-	Runtime          RuntimeConfig      `json:"runtime,omitempty"`
-	Compat           CompatConfig       `json:"compat,omitempty"`
-	Responses        ResponsesConfig    `json:"responses,omitempty"`
-	Embeddings       EmbeddingsConfig   `json:"embeddings,omitempty"`
-	AutoDelete       AutoDeleteConfig   `json:"auto_delete"`
-	HistorySplit     HistorySplitConfig `json:"history_split"`
-	VercelSyncHash   string             `json:"_vercel_sync_hash,omitempty"`
-	VercelSyncTime   int64              `json:"_vercel_sync_time,omitempty"`
-	AdditionalFields map[string]any     `json:"-"`
+	Keys              []string                `json:"keys,omitempty"`
+	APIKeys           []APIKey                `json:"api_keys,omitempty"`
+	Accounts          []Account               `json:"accounts,omitempty"`
+	Proxies           []Proxy                 `json:"proxies,omitempty"`
+	ModelAliases      map[string]string       `json:"model_aliases,omitempty"`
+	Admin             AdminConfig             `json:"admin,omitempty"`
+	Runtime           RuntimeConfig           `json:"runtime,omitempty"`
+	Compat            CompatConfig            `json:"compat,omitempty"`
+	Responses         ResponsesConfig         `json:"responses,omitempty"`
+	Embeddings        EmbeddingsConfig        `json:"embeddings,omitempty"`
+	AutoDelete        AutoDeleteConfig        `json:"auto_delete"`
+	HistorySplit      HistorySplitConfig      `json:"history_split"`
+	CurrentInputFile  CurrentInputFileConfig  `json:"current_input_file,omitempty"`
+	ThinkingInjection ThinkingInjectionConfig `json:"thinking_injection,omitempty"`
+	VercelSyncHash    string                  `json:"_vercel_sync_hash,omitempty"`
+	VercelSyncTime    int64                   `json:"_vercel_sync_time,omitempty"`
+	AdditionalFields  map[string]any          `json:"-"`
 }
 
 type Account struct {
@@ -100,7 +102,6 @@ func (c *Config) NormalizeCredentials() {
 	}
 
 	c.normalizeModelAliases()
-	c.forceHistorySplitEnabled()
 }
 
 // DropInvalidAccounts removes accounts that cannot be addressed by admin APIs
@@ -141,14 +142,6 @@ func (c *Config) normalizeModelAliases() {
 	}
 }
 
-func (c *Config) forceHistorySplitEnabled() {
-	if c == nil {
-		return
-	}
-	enabled := true
-	c.HistorySplit.Enabled = &enabled
-}
-
 type CompatConfig struct {
 	WideInputStrictOutput *bool `json:"wide_input_strict_output,omitempty"`
 	StripReferenceMarkers *bool `json:"strip_reference_markers,omitempty"`
@@ -184,3 +177,13 @@ type HistorySplitConfig struct {
 	Enabled           *bool `json:"enabled,omitempty"`
 	TriggerAfterTurns *int  `json:"trigger_after_turns,omitempty"`
 }
+
+type CurrentInputFileConfig struct {
+	Enabled  *bool `json:"enabled,omitempty"`
+	MinChars int   `json:"min_chars,omitempty"`
+}
+
+type ThinkingInjectionConfig struct {
+	Enabled *bool  `json:"enabled,omitempty"`
+	Prompt  string `json:"prompt,omitempty"`
+}
diff --git a/internal/config/config_edge_test.go b/internal/config/config_edge_test.go
index 7741777..55b928d 100644
--- a/internal/config/config_edge_test.go
+++ b/internal/config/config_edge_test.go
@@ -19,6 +19,16 @@ func TestGetModelConfigDeepSeekChat(t *testing.T) {
 	}
 }
 
+func TestGetModelConfigDeepSeekChatNoThinking(t *testing.T) {
+	thinking, search, ok := GetModelConfig("deepseek-v4-flash-nothinking")
+	if !ok {
+		t.Fatal("expected ok for deepseek-v4-flash-nothinking")
+	}
+	if thinking || search {
+		t.Fatalf("expected thinking=false search=false for deepseek-v4-flash-nothinking, got thinking=%v search=%v", thinking, search)
+	}
+}
+
 func TestGetModelConfigDeepSeekReasoner(t *testing.T) {
 	thinking, search, ok := GetModelConfig("deepseek-v4-pro")
 	if !ok {
@@ -84,6 +94,10 @@ func TestGetModelTypeDefaultExpertAndVision(t *testing.T) {
 	if !ok || defaultType != "default" {
 		t.Fatalf("expected default model_type, got ok=%v model_type=%q", ok, defaultType)
 	}
+	defaultNoThinkingType, ok := GetModelType("deepseek-v4-flash-nothinking")
+	if !ok || defaultNoThinkingType != "default" {
+		t.Fatalf("expected default model_type for nothinking, got ok=%v model_type=%q", ok, defaultNoThinkingType)
+	}
 	expertType, ok := GetModelType("deepseek-v4-pro")
 	if !ok || expertType != "expert" {
 		t.Fatalf("expected expert model_type, got ok=%v model_type=%q", ok, expertType)
@@ -734,12 +748,18 @@ func TestOpenAIModelsResponse(t *testing.T) {
 		t.Fatal("expected non-empty models list")
 	}
 	expected := map[string]bool{
-		"deepseek-v4-flash":         false,
-		"deepseek-v4-pro":           false,
-		"deepseek-v4-flash-search":  false,
-		"deepseek-v4-pro-search":    false,
-		"deepseek-v4-vision":        false,
-		"deepseek-v4-vision-search": false,
+		"deepseek-v4-flash":                    false,
+		"deepseek-v4-flash-nothinking":         false,
+		"deepseek-v4-pro":                      false,
+		"deepseek-v4-pro-nothinking":           false,
+		"deepseek-v4-flash-search":             false,
+		"deepseek-v4-flash-search-nothinking":  false,
+		"deepseek-v4-pro-search":               false,
+		"deepseek-v4-pro-search-nothinking":    false,
+		"deepseek-v4-vision":                   false,
+		"deepseek-v4-vision-nothinking":        false,
+		"deepseek-v4-vision-search":            false,
+		"deepseek-v4-vision-search-nothinking": false,
 	}
 	for _, model := range data {
 		if _, ok := expected[model.ID]; ok {
diff --git a/internal/config/model_alias_test.go b/internal/config/model_alias_test.go
index f537b21..64cbda8 100644
--- a/internal/config/model_alias_test.go
+++ b/internal/config/model_alias_test.go
@@ -13,6 +13,13 @@ func TestResolveModelDirectDeepSeek(t *testing.T) {
 	}
 }
 
+func TestResolveModelDirectDeepSeekNoThinking(t *testing.T) {
+	got, ok := ResolveModel(nil, "deepseek-v4-flash-nothinking")
+	if !ok || got != "deepseek-v4-flash-nothinking" {
+		t.Fatalf("expected deepseek-v4-flash-nothinking, got ok=%v model=%q", ok, got)
+	}
+}
+
 func TestResolveModelAlias(t *testing.T) {
 	got, ok := ResolveModel(nil, "gpt-4.1")
 	if !ok || got != "deepseek-v4-flash" {
@@ -34,6 +41,13 @@ func TestResolveLatestClaudeAlias(t *testing.T) {
 	}
 }
 
+func TestResolveLatestClaudeAliasNoThinking(t *testing.T) {
+	got, ok := ResolveModel(nil, "claude-sonnet-4-6-nothinking")
+	if !ok || got != "deepseek-v4-flash-nothinking" {
+		t.Fatalf("expected alias claude-sonnet-4-6-nothinking -> deepseek-v4-flash-nothinking, got ok=%v model=%q", ok, got)
+	}
+}
+
 func TestResolveExpandedHistoricalAliases(t *testing.T) {
 	cases := []struct {
 		name  string
@@ -68,6 +82,13 @@ func TestResolveModelHeuristicReasoner(t *testing.T) {
 	}
 }
 
+func TestResolveModelHeuristicReasonerNoThinking(t *testing.T) {
+	got, ok := ResolveModel(nil, "o3-super-nothinking")
+	if !ok || got != "deepseek-v4-pro-nothinking" {
+		t.Fatalf("expected heuristic reasoner nothinking, got ok=%v model=%q", ok, got)
+	}
+}
+
 func TestResolveModelUnknown(t *testing.T) {
 	_, ok := ResolveModel(nil, "totally-custom-model")
 	if ok {
diff --git a/internal/config/models.go b/internal/config/models.go
index 7b28ec3..1349ef1 100644
--- a/internal/config/models.go
+++ b/internal/config/models.go
@@ -14,7 +14,9 @@ type ModelAliasReader interface {
 	ModelAliases() map[string]string
 }
 
-var DeepSeekModels = []ModelInfo{
+const noThinkingModelSuffix = "-nothinking"
+
+var deepSeekBaseModels = []ModelInfo{
 	{ID: "deepseek-v4-flash", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
 	{ID: "deepseek-v4-pro", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
 	{ID: "deepseek-v4-flash-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
@@ -23,7 +25,9 @@ var DeepSeekModels = []ModelInfo{
 	{ID: "deepseek-v4-vision-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
 }
 
-var ClaudeModels = []ModelInfo{
+var DeepSeekModels = appendNoThinkingVariants(deepSeekBaseModels)
+
+var claudeBaseModels = []ModelInfo{
 	// Current aliases
 	{ID: "claude-opus-4-6", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
 	{ID: "claude-sonnet-4-6", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
@@ -53,19 +57,26 @@ var ClaudeModels = []ModelInfo{
 	{ID: "claude-3-haiku-20240307", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
 }
 
+var ClaudeModels = appendNoThinkingVariants(claudeBaseModels)
+
 func GetModelConfig(model string) (thinking bool, search bool, ok bool) {
-	switch lower(model) {
+	baseModel, noThinking := splitNoThinkingModel(model)
+	if baseModel == "" {
+		return false, false, false
+	}
+	switch baseModel {
 	case "deepseek-v4-flash", "deepseek-v4-pro", "deepseek-v4-vision":
-		return true, false, true
+		return !noThinking, false, true
 	case "deepseek-v4-flash-search", "deepseek-v4-pro-search", "deepseek-v4-vision-search":
-		return true, true, true
+		return !noThinking, true, true
 	default:
 		return false, false, false
 	}
 }
 
 func GetModelType(model string) (modelType string, ok bool) {
-	switch lower(model) {
+	baseModel, _ := splitNoThinkingModel(model)
+	switch baseModel {
 	case "deepseek-v4-flash", "deepseek-v4-flash-search":
 		return "default", true
 	case "deepseek-v4-pro", "deepseek-v4-pro-search":
@@ -82,6 +93,11 @@ func IsSupportedDeepSeekModel(model string) bool {
 	return ok
 }
 
+func IsNoThinkingModel(model string) bool {
+	_, noThinking := splitNoThinkingModel(model)
+	return noThinking
+}
+
 func DefaultModelAliases() map[string]string {
 	return map[string]string{
 		// OpenAI GPT / ChatGPT families
@@ -191,62 +207,19 @@ func ResolveModel(store ModelAliasReader, requested string) (string, bool) {
 	if model == "" {
 		return "", false
 	}
-	if isRetiredHistoricalModel(model) {
-		return "", false
-	}
+	aliases := loadModelAliases(store)
 	if IsSupportedDeepSeekModel(model) {
 		return model, true
 	}
-	aliases := DefaultModelAliases()
-	if store != nil {
-		for k, v := range store.ModelAliases() {
-			aliases[lower(strings.TrimSpace(k))] = lower(strings.TrimSpace(v))
-		}
-	}
 	if mapped, ok := aliases[model]; ok && IsSupportedDeepSeekModel(mapped) {
 		return mapped, true
 	}
-	if strings.HasPrefix(model, "deepseek-") {
+	baseModel, noThinking := splitNoThinkingModel(model)
+	resolvedModel, ok := resolveCanonicalModel(aliases, baseModel)
+	if !ok {
 		return "", false
 	}
-
-	knownFamily := false
-	for _, prefix := range []string{
-		"gpt-", "o1", "o3", "claude-", "gemini-", "llama-", "qwen-", "mistral-", "command-",
-	} {
-		if strings.HasPrefix(model, prefix) {
-			knownFamily = true
-			break
-		}
-	}
-	if !knownFamily {
-		return "", false
-	}
-
-	useVision := strings.Contains(model, "vision")
-	useReasoner := strings.Contains(model, "reason") ||
-		strings.Contains(model, "reasoner") ||
-		strings.HasPrefix(model, "o1") ||
-		strings.HasPrefix(model, "o3") ||
-		strings.Contains(model, "opus") ||
-		strings.Contains(model, "slow") ||
-		strings.Contains(model, "r1")
-	useSearch := strings.Contains(model, "search")
-
-	switch {
-	case useVision && useSearch:
-		return "deepseek-v4-vision-search", true
-	case useVision:
-		return "deepseek-v4-vision", true
-	case useReasoner && useSearch:
-		return "deepseek-v4-pro-search", true
-	case useReasoner:
-		return "deepseek-v4-pro", true
-	case useSearch:
-		return "deepseek-v4-flash-search", true
-	default:
-		return "deepseek-v4-flash", true
-	}
+	return withNoThinkingVariant(resolvedModel, noThinking), true
 }
 
 func isRetiredHistoricalModel(model string) bool {
@@ -303,3 +276,100 @@ func ClaudeModelsResponse() map[string]any {
 	resp["has_more"] = false
 	return resp
 }
+
+func appendNoThinkingVariants(models []ModelInfo) []ModelInfo {
+	out := make([]ModelInfo, 0, len(models)*2)
+	for _, model := range models {
+		out = append(out, model)
+		variant := model
+		variant.ID = withNoThinkingVariant(model.ID, true)
+		out = append(out, variant)
+	}
+	return out
+}
+
+func splitNoThinkingModel(model string) (string, bool) {
+	model = lower(strings.TrimSpace(model))
+	if strings.HasSuffix(model, noThinkingModelSuffix) {
+		return strings.TrimSuffix(model, noThinkingModelSuffix), true
+	}
+	return model, false
+}
+
+func withNoThinkingVariant(model string, enabled bool) string {
+	baseModel, _ := splitNoThinkingModel(model)
+	if !enabled {
+		return baseModel
+	}
+	if baseModel == "" {
+		return ""
+	}
+	return baseModel + noThinkingModelSuffix
+}
+
+func loadModelAliases(store ModelAliasReader) map[string]string {
+	aliases := DefaultModelAliases()
+	if store != nil {
+		for k, v := range store.ModelAliases() {
+			aliases[lower(strings.TrimSpace(k))] = lower(strings.TrimSpace(v))
+		}
+	}
+	return aliases
+}
+
+func resolveCanonicalModel(aliases map[string]string, model string) (string, bool) {
+	model = lower(strings.TrimSpace(model))
+	if model == "" {
+		return "", false
+	}
+	if isRetiredHistoricalModel(model) {
+		return "", false
+	}
+	if IsSupportedDeepSeekModel(model) {
+		return model, true
+	}
+	if mapped, ok := aliases[model]; ok && IsSupportedDeepSeekModel(mapped) {
+		return mapped, true
+	}
+	if strings.HasPrefix(model, "deepseek-") {
+		return "", false
+	}
+
+	knownFamily := false
+	for _, prefix := range []string{
+		"gpt-", "o1", "o3", "claude-", "gemini-", "llama-", "qwen-", "mistral-", "command-",
+	} {
+		if strings.HasPrefix(model, prefix) {
+			knownFamily = true
+			break
+		}
+	}
+	if !knownFamily {
+		return "", false
+	}
+
+	useVision := strings.Contains(model, "vision")
+	useReasoner := strings.Contains(model, "reason") ||
+		strings.Contains(model, "reasoner") ||
+		strings.HasPrefix(model, "o1") ||
+		strings.HasPrefix(model, "o3") ||
+		strings.Contains(model, "opus") ||
+		strings.Contains(model, "slow") ||
+		strings.Contains(model, "r1")
+	useSearch := strings.Contains(model, "search")
+
+	switch {
+	case useVision && useSearch:
+		return "deepseek-v4-vision-search", true
+	case useVision:
+		return "deepseek-v4-vision", true
+	case useReasoner && useSearch:
+		return "deepseek-v4-pro-search", true
+	case useReasoner:
+		return "deepseek-v4-pro", true
+	case useSearch:
+		return "deepseek-v4-flash-search", true
+	default:
+		return "deepseek-v4-flash", true
+	}
+}
diff --git a/internal/config/store_accessors.go b/internal/config/store_accessors.go
index 4b25284..f5b8369 100644
--- a/internal/config/store_accessors.go
+++ b/internal/config/store_accessors.go
@@ -164,7 +164,12 @@ func (s *Store) AutoDeleteSessions() bool {
 }
 
 func (s *Store) HistorySplitEnabled() bool {
-	return true
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	if s.cfg.HistorySplit.Enabled == nil {
+		return false
+	}
+	return *s.cfg.HistorySplit.Enabled
 }
 
 func (s *Store) HistorySplitTriggerAfterTurns() int {
@@ -175,3 +180,37 @@ func (s *Store) HistorySplitTriggerAfterTurns() int {
 	}
 	return *s.cfg.HistorySplit.TriggerAfterTurns
 }
+
+func (s *Store) CurrentInputFileEnabled() bool {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	historySplitEnabled := s.cfg.HistorySplit.Enabled != nil && *s.cfg.HistorySplit.Enabled
+	if historySplitEnabled {
+		return false
+	}
+	if s.cfg.CurrentInputFile.Enabled == nil {
+		return true
+	}
+	return *s.cfg.CurrentInputFile.Enabled
+}
+
+func (s *Store) CurrentInputFileMinChars() int {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	return s.cfg.CurrentInputFile.MinChars
+}
+
+func (s *Store) ThinkingInjectionEnabled() bool {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	if s.cfg.ThinkingInjection.Enabled == nil {
+		return true
+	}
+	return *s.cfg.ThinkingInjection.Enabled
+}
+
+func (s *Store) ThinkingInjectionPrompt() string {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	return strings.TrimSpace(s.cfg.ThinkingInjection.Prompt)
+}
diff --git a/internal/config/store_accessors_test.go b/internal/config/store_accessors_test.go
index af197ce..9b88e15 100644
--- a/internal/config/store_accessors_test.go
+++ b/internal/config/store_accessors_test.go
@@ -4,14 +4,14 @@ import "testing"
 
 func TestStoreHistorySplitAccessors(t *testing.T) {
 	store := &Store{cfg: Config{}}
-	if !store.HistorySplitEnabled() {
-		t.Fatal("expected history split enabled by default")
+	if store.HistorySplitEnabled() {
+		t.Fatal("expected history split disabled by default")
 	}
 	if got := store.HistorySplitTriggerAfterTurns(); got != 1 {
 		t.Fatalf("default history split trigger_after_turns=%d want=1", got)
 	}
 
-	enabled := false
+	enabled := true
 	turns := 3
 	store.cfg.HistorySplit = HistorySplitConfig{
 		Enabled:           &enabled,
@@ -19,24 +19,73 @@ func TestStoreHistorySplitAccessors(t *testing.T) {
 	}
 
 	if !store.HistorySplitEnabled() {
-		t.Fatal("expected history split to stay enabled after legacy disabled override")
+		t.Fatal("expected history split enabled")
 	}
 	if got := store.HistorySplitTriggerAfterTurns(); got != 3 {
 		t.Fatalf("history split trigger_after_turns=%d want=3", got)
 	}
 }
 
-func TestStoreHistorySplitLegacyDisabledConfigNormalizesToEnabled(t *testing.T) {
+func TestStoreHistorySplitDisabledConfigStaysDisabled(t *testing.T) {
 	t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"history_split":{"enabled":false,"trigger_after_turns":2}}`)
 	store := LoadStore()
-	if !store.HistorySplitEnabled() {
-		t.Fatal("expected history split enabled when legacy config disables it")
+	if store.HistorySplitEnabled() {
+		t.Fatal("expected history split disabled when config disables it")
 	}
 	snap := store.Snapshot()
-	if snap.HistorySplit.Enabled == nil || !*snap.HistorySplit.Enabled {
-		t.Fatalf("expected normalized history_split.enabled=true, got %#v", snap.HistorySplit.Enabled)
+	if snap.HistorySplit.Enabled == nil || *snap.HistorySplit.Enabled {
+		t.Fatalf("expected history_split.enabled=false, got %#v", snap.HistorySplit.Enabled)
 	}
 	if got := store.HistorySplitTriggerAfterTurns(); got != 2 {
 		t.Fatalf("history split trigger_after_turns=%d want=2", got)
 	}
 }
+
+func TestStoreCurrentInputFileAccessors(t *testing.T) {
+	store := &Store{cfg: Config{}}
+	if !store.CurrentInputFileEnabled() {
+		t.Fatal("expected current input file enabled by default")
+	}
+	if got := store.CurrentInputFileMinChars(); got != 0 {
+		t.Fatalf("default current input file min_chars=%d want=0", got)
+	}
+
+	enabled := false
+	store.cfg.CurrentInputFile = CurrentInputFileConfig{Enabled: &enabled, MinChars: 12345}
+	if store.CurrentInputFileEnabled() {
+		t.Fatal("expected current input file disabled")
+	}
+
+	enabled = true
+	store.cfg.CurrentInputFile.Enabled = &enabled
+	if !store.CurrentInputFileEnabled() {
+		t.Fatal("expected current input file enabled")
+	}
+	if got := store.CurrentInputFileMinChars(); got != 12345 {
+		t.Fatalf("current input file min_chars=%d want=12345", got)
+	}
+
+	historyEnabled := true
+	store.cfg.HistorySplit.Enabled = &historyEnabled
+	if store.CurrentInputFileEnabled() {
+		t.Fatal("expected history split to suppress current input file mode")
+	}
+}
+
+func TestStoreThinkingInjectionAccessors(t *testing.T) {
+	store := &Store{cfg: Config{}}
+	if !store.ThinkingInjectionEnabled() {
+		t.Fatal("expected thinking injection enabled by default")
+	}
+
+	disabled := false
+	store.cfg.ThinkingInjection.Enabled = &disabled
+	if store.ThinkingInjectionEnabled() {
+		t.Fatal("expected thinking injection disabled by explicit config")
+	}
+
+	store.cfg.ThinkingInjection.Prompt = "  custom thinking prompt  "
+	if got := store.ThinkingInjectionPrompt(); got != "custom thinking prompt" {
+		t.Fatalf("thinking injection prompt=%q want custom thinking prompt", got)
+	}
+}
diff --git a/internal/config/validation.go b/internal/config/validation.go
index 3e8954c..d7bcb28 100644
--- a/internal/config/validation.go
+++ b/internal/config/validation.go
@@ -27,6 +27,12 @@ func ValidateConfig(c Config) error {
 	if err := ValidateHistorySplitConfig(c.HistorySplit); err != nil {
 		return err
 	}
+	if err := ValidateCurrentInputFileConfig(c.CurrentInputFile); err != nil {
+		return err
+	}
+	if c.HistorySplit.Enabled != nil && *c.HistorySplit.Enabled && c.CurrentInputFile.Enabled != nil && *c.CurrentInputFile.Enabled {
+		return fmt.Errorf("history_split.enabled and current_input_file.enabled cannot both be true")
+	}
 	if err := ValidateAccountProxyReferences(c.Accounts, c.Proxies); err != nil {
 		return err
 	}
@@ -123,6 +129,13 @@ func ValidateHistorySplitConfig(historySplit HistorySplitConfig) error {
 	return nil
 }
 
+func ValidateCurrentInputFileConfig(currentInputFile CurrentInputFileConfig) error {
+	if currentInputFile.MinChars != 0 {
+		return ValidateIntRange("current_input_file.min_chars", currentInputFile.MinChars, 1, 100000000, true)
+	}
+	return nil
+}
+
 func ValidateIntRange(name string, value, min, max int, required bool) error {
 	if value == 0 && !required {
 		return nil
diff --git a/internal/config/validation_test.go b/internal/config/validation_test.go
index cf4a68e..67b80a1 100644
--- a/internal/config/validation_test.go
+++ b/internal/config/validation_test.go
@@ -46,6 +46,19 @@ func TestValidateConfigRejectsInvalidValues(t *testing.T) {
 			}},
 			want: "history_split.trigger_after_turns",
 		},
+		{
+			name: "current input file",
+			cfg:  Config{CurrentInputFile: CurrentInputFileConfig{MinChars: -1}},
+			want: "current_input_file.min_chars",
+		},
+		{
+			name: "split modes mutually exclusive",
+			cfg: Config{
+				HistorySplit:     HistorySplitConfig{Enabled: boolPtr(true)},
+				CurrentInputFile: CurrentInputFileConfig{Enabled: boolPtr(true)},
+			},
+			want: "cannot both be true",
+		},
 	}
 
 	for _, tc := range tests {
@@ -68,3 +81,5 @@ func TestValidateConfigAcceptsLegacyAutoDeleteSessions(t *testing.T) {
 }
 
 func intPtr(v int) *int { return &v }
+
+func boolPtr(v bool) *bool { return &v }
diff --git a/internal/deepseek/protocol/constants.go b/internal/deepseek/protocol/constants.go
index 79e218e..3cb6c4d 100644
--- a/internal/deepseek/protocol/constants.go
+++ b/internal/deepseek/protocol/constants.go
@@ -3,6 +3,7 @@ package protocol
 import (
 	_ "embed"
 	"encoding/json"
+	"fmt"
 )
 
 const (
@@ -21,15 +22,11 @@ const (
 	DeepSeekUploadTargetPath     = "/api/v0/file/upload_file"
 )
 
-var defaultBaseHeaders = map[string]string{
-	"Host":              "chat.deepseek.com",
-	"User-Agent":        "DeepSeek/1.8.0 Android/35",
-	"Accept":            "application/json",
-	"Content-Type":      "application/json",
-	"x-client-platform": "android",
-	"x-client-version":  "1.8.0",
-	"x-client-locale":   "zh_CN",
-	"accept-charset":    "UTF-8",
+var defaultStaticBaseHeaders = map[string]string{
+	"Host":           "chat.deepseek.com",
+	"Accept":         "application/json",
+	"Content-Type":   "application/json",
+	"accept-charset": "UTF-8",
 }
 
 var defaultSkipContainsPatterns = []string{
@@ -47,11 +44,21 @@ var defaultSkipExactPaths = []string{
 	"response/search_status",
 }
 
-var BaseHeaders = cloneStringMap(defaultBaseHeaders)
+var ClientVersion string
+var BaseHeaders = map[string]string{}
 var SkipContainsPatterns = cloneStringSlice(defaultSkipContainsPatterns)
 var SkipExactPathSet = toStringSet(defaultSkipExactPaths)
 
+type clientConstants struct {
+	Name            string `json:"name"`
+	Platform        string `json:"platform"`
+	Version         string `json:"version"`
+	AndroidAPILevel string `json:"android_api_level"`
+	Locale          string `json:"locale"`
+}
+
 type sharedConstants struct {
+	Client              clientConstants   `json:"client"`
 	BaseHeaders         map[string]string `json:"base_headers"`
 	SkipContainsPattern []string          `json:"skip_contains_patterns"`
 	SkipExactPaths      []string          `json:"skip_exact_paths"`
@@ -63,19 +70,68 @@ var sharedConstantsJSON []byte
 func init() {
 	cfg := sharedConstants{}
 	if err := json.Unmarshal(sharedConstantsJSON, &cfg); err != nil {
-		return
-	}
-	if len(cfg.BaseHeaders) > 0 {
-		BaseHeaders = cloneStringMap(cfg.BaseHeaders)
+		panic(fmt.Errorf("load DeepSeek shared constants: %w", err))
 	}
+	applySharedConstants(cfg)
+}
+
+func applySharedConstants(cfg sharedConstants) {
+	client := normalizeClientConstants(cfg.Client)
+	ClientVersion = client.Version
+	BaseHeaders = buildBaseHeaders(client, cfg.BaseHeaders)
+	SkipContainsPatterns = cloneStringSlice(defaultSkipContainsPatterns)
 	if len(cfg.SkipContainsPattern) > 0 {
 		SkipContainsPatterns = cloneStringSlice(cfg.SkipContainsPattern)
 	}
+	SkipExactPathSet = toStringSet(defaultSkipExactPaths)
 	if len(cfg.SkipExactPaths) > 0 {
 		SkipExactPathSet = toStringSet(cfg.SkipExactPaths)
 	}
 }
 
+func normalizeClientConstants(in clientConstants) clientConstants {
+	if in.Name == "" {
+		in.Name = "DeepSeek"
+	}
+	if in.Platform == "" {
+		in.Platform = "android"
+	}
+	if in.AndroidAPILevel == "" {
+		in.AndroidAPILevel = "35"
+	}
+	if in.Locale == "" {
+		in.Locale = "zh_CN"
+	}
+	return in
+}
+
+func buildBaseHeaders(client clientConstants, overrides map[string]string) map[string]string {
+	out := cloneStringMap(defaultStaticBaseHeaders)
+	for k, v := range overrides {
+		if k == "" || v == "" {
+			continue
+		}
+		out[k] = v
+	}
+	if client.Name != "" && client.Version != "" {
+		userAgent := client.Name + "/" + client.Version
+		if client.Platform == "android" && client.AndroidAPILevel != "" {
+			userAgent += " Android/" + client.AndroidAPILevel
+		}
+		out["User-Agent"] = userAgent
+	}
+	if client.Platform != "" {
+		out["x-client-platform"] = client.Platform
+	}
+	if client.Version != "" {
+		out["x-client-version"] = client.Version
+	}
+	if client.Locale != "" {
+		out["x-client-locale"] = client.Locale
+	}
+	return out
+}
+
 func cloneStringMap(in map[string]string) map[string]string {
 	out := make(map[string]string, len(in))
 	for k, v := range in {
@@ -103,6 +159,6 @@ func toStringSet(in []string) map[string]struct{} {
 
 const (
 	KeepAliveTimeout  = 5
-	StreamIdleTimeout = 30
+	StreamIdleTimeout = 90
 	MaxKeepaliveCount = 10
 )
diff --git a/internal/deepseek/protocol/constants_shared.json b/internal/deepseek/protocol/constants_shared.json
index fb58d0e..353f03d 100644
--- a/internal/deepseek/protocol/constants_shared.json
+++ b/internal/deepseek/protocol/constants_shared.json
@@ -1,11 +1,15 @@
 {
+  "client": {
+    "name": "DeepSeek",
+    "platform": "android",
+    "version": "2.0.1",
+    "android_api_level": "35",
+    "locale": "zh_CN"
+  },
   "base_headers": {
     "Host": "chat.deepseek.com",
-    "User-Agent": "DeepSeek/1.8.0 Android/35",
     "Accept": "application/json",
-    "x-client-platform": "android",
-    "x-client-version": "1.8.0",
-    "x-client-locale": "zh_CN",
+    "Content-Type": "application/json",
     "accept-charset": "UTF-8"
   },
   "skip_contains_patterns": [
diff --git a/internal/deepseek/protocol/constants_test.go b/internal/deepseek/protocol/constants_test.go
index b64e579..1f278f1 100644
--- a/internal/deepseek/protocol/constants_test.go
+++ b/internal/deepseek/protocol/constants_test.go
@@ -1,11 +1,32 @@
 package protocol
 
-import "testing"
+import (
+	"encoding/json"
+	"testing"
+)
 
 func TestSharedConstantsLoaded(t *testing.T) {
+	cfg := sharedConstants{}
+	if err := json.Unmarshal(sharedConstantsJSON, &cfg); err != nil {
+		t.Fatalf("failed to parse shared constants: %v", err)
+	}
+	client := normalizeClientConstants(cfg.Client)
+	if ClientVersion != client.Version {
+		t.Fatalf("unexpected client version=%q", ClientVersion)
+	}
+	wantUserAgent := client.Name + "/" + client.Version + " Android/" + client.AndroidAPILevel
+	if BaseHeaders["User-Agent"] != wantUserAgent {
+		t.Fatalf("unexpected user agent=%q", BaseHeaders["User-Agent"])
+	}
 	if BaseHeaders["x-client-platform"] != "android" {
 		t.Fatalf("unexpected base header x-client-platform=%q", BaseHeaders["x-client-platform"])
 	}
+	if BaseHeaders["x-client-version"] != ClientVersion {
+		t.Fatalf("unexpected base header x-client-version=%q", BaseHeaders["x-client-version"])
+	}
+	if BaseHeaders["Content-Type"] != "application/json" {
+		t.Fatalf("unexpected base header Content-Type=%q", BaseHeaders["Content-Type"])
+	}
 	if len(SkipContainsPatterns) == 0 {
 		t.Fatal("expected skip contains patterns to be loaded")
 	}
@@ -13,3 +34,23 @@ func TestSharedConstantsLoaded(t *testing.T) {
 		t.Fatal("expected response/search_status in exact skip path set")
 	}
 }
+
+func TestClientHeadersDerivedFromSharedVersion(t *testing.T) {
+	client := normalizeClientConstants(clientConstants{
+		Name:            "DeepSeek",
+		Platform:        "android",
+		Version:         "9.8.7",
+		AndroidAPILevel: "35",
+		Locale:          "zh_CN",
+	})
+	headers := buildBaseHeaders(client, map[string]string{
+		"User-Agent":       "stale",
+		"x-client-version": "stale",
+	})
+	if headers["User-Agent"] != "DeepSeek/9.8.7 Android/35" {
+		t.Fatalf("unexpected derived user agent=%q", headers["User-Agent"])
+	}
+	if headers["x-client-version"] != "9.8.7" {
+		t.Fatalf("unexpected derived client version=%q", headers["x-client-version"])
+	}
+}
diff --git a/internal/format/openai/render_chat.go b/internal/format/openai/render_chat.go
index c09e870..24b6fa1 100644
--- a/internal/format/openai/render_chat.go
+++ b/internal/format/openai/render_chat.go
@@ -7,15 +7,19 @@ import (
 )
 
 func BuildChatCompletion(completionID, model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any {
-	detected := toolcall.ParseStandaloneToolCallsDetailed(finalText, toolNames)
+	detected := toolcall.ParseAssistantToolCallsDetailed(finalText, finalThinking, toolNames)
+	return BuildChatCompletionWithToolCalls(completionID, model, finalPrompt, finalThinking, finalText, detected.Calls)
+}
+
+func BuildChatCompletionWithToolCalls(completionID, model, finalPrompt, finalThinking, finalText string, detected []toolcall.ParsedToolCall) map[string]any {
 	finishReason := "stop"
 	messageObj := map[string]any{"role": "assistant", "content": finalText}
 	if strings.TrimSpace(finalThinking) != "" {
 		messageObj["reasoning_content"] = finalThinking
 	}
-	if len(detected.Calls) > 0 {
+	if len(detected) > 0 {
 		finishReason = "tool_calls"
-		messageObj["tool_calls"] = toolcall.FormatOpenAIToolCalls(detected.Calls)
+		messageObj["tool_calls"] = toolcall.FormatOpenAIToolCalls(detected)
 		messageObj["content"] = nil
 	}
 
diff --git a/internal/format/openai/render_responses.go b/internal/format/openai/render_responses.go
index 8fc4dbe..7d8e78b 100644
--- a/internal/format/openai/render_responses.go
+++ b/internal/format/openai/render_responses.go
@@ -12,12 +12,16 @@ import (
 func BuildResponseObject(responseID, model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any {
 	// Strict mode: only standalone, structured tool-call payloads are treated
 	// as executable tool calls.
-	detected := toolcall.ParseStandaloneToolCallsDetailed(finalText, toolNames)
+	detected := toolcall.ParseAssistantToolCallsDetailed(finalText, finalThinking, toolNames)
+	return BuildResponseObjectWithToolCalls(responseID, model, finalPrompt, finalThinking, finalText, detected.Calls)
+}
+
+func BuildResponseObjectWithToolCalls(responseID, model, finalPrompt, finalThinking, finalText string, detected []toolcall.ParsedToolCall) map[string]any {
 	exposedOutputText := finalText
 	output := make([]any, 0, 2)
-	if len(detected.Calls) > 0 {
+	if len(detected) > 0 {
 		exposedOutputText = ""
-		output = append(output, toResponsesFunctionCallItems(detected.Calls)...)
+		output = append(output, toResponsesFunctionCallItems(detected)...)
 	} else {
 		content := make([]any, 0, 2)
 		if finalThinking != "" {
diff --git a/internal/format/openai/render_test.go b/internal/format/openai/render_test.go
index 8a252de..2f22a98 100644
--- a/internal/format/openai/render_test.go
+++ b/internal/format/openai/render_test.go
@@ -67,22 +67,22 @@ func TestBuildResponseObjectReasoningOnlyFallsBackToOutputText(t *testing.T) {
 	}
 }
 
-func TestBuildResponseObjectIgnoresToolCallFromThinkingChannel(t *testing.T) {
+func TestBuildResponseObjectPromotesToolCallFromThinkingWhenTextEmpty(t *testing.T) {
 	obj := BuildResponseObject(
 		"resp_test",
 		"gpt-4o",
 		"prompt",
-		`{"tool_calls":[{"name":"search","input":{"q":"from-thinking"}}]}`,
+		`<tool_calls><invoke name="search"><parameter name="q">from-thinking</parameter></invoke></tool_calls>`,
 		"",
 		[]string{"search"},
 	)
 
 	output, _ := obj["output"].([]any)
 	if len(output) != 1 {
-		t.Fatalf("expected one message output item, got %#v", obj["output"])
+		t.Fatalf("expected one output item, got %#v", obj["output"])
 	}
 	first, _ := output[0].(map[string]any)
-	if first["type"] != "message" {
-		t.Fatalf("expected output message, got %#v", first["type"])
+	if first["type"] != "function_call" {
+		t.Fatalf("expected function_call output, got %#v", first["type"])
 	}
 }
diff --git a/internal/httpapi/admin/handler_settings_test.go b/internal/httpapi/admin/handler_settings_test.go
index aefc1bd..fba6bd1 100644
--- a/internal/httpapi/admin/handler_settings_test.go
+++ b/internal/httpapi/admin/handler_settings_test.go
@@ -58,12 +58,29 @@ func TestGetSettingsIncludesHistorySplitDefaults(t *testing.T) {
 	var body map[string]any
 	_ = json.Unmarshal(rec.Body.Bytes(), &body)
 	historySplit, _ := body["history_split"].(map[string]any)
-	if got := boolFrom(historySplit["enabled"]); !got {
-		t.Fatalf("expected history_split.enabled=true, body=%v", body)
+	if got := boolFrom(historySplit["enabled"]); got {
+		t.Fatalf("expected history_split.enabled=false, body=%v", body)
 	}
 	if got := intFrom(historySplit["trigger_after_turns"]); got != 1 {
 		t.Fatalf("expected history_split.trigger_after_turns=1, got %d body=%v", got, body)
 	}
+	currentInputFile, _ := body["current_input_file"].(map[string]any)
+	if got := boolFrom(currentInputFile["enabled"]); !got {
+		t.Fatalf("expected current_input_file.enabled=true, body=%v", body)
+	}
+	if got := intFrom(currentInputFile["min_chars"]); got != 0 {
+		t.Fatalf("expected current_input_file.min_chars=0, got %d body=%v", got, body)
+	}
+	thinkingInjection, _ := body["thinking_injection"].(map[string]any)
+	if got := boolFrom(thinkingInjection["enabled"]); !got {
+		t.Fatalf("expected thinking_injection.enabled=true, body=%v", body)
+	}
+	if got, _ := thinkingInjection["prompt"].(string); got != "" {
+		t.Fatalf("expected empty custom thinking prompt, got %q body=%v", got, body)
+	}
+	if got, _ := thinkingInjection["default_prompt"].(string); got == "" {
+		t.Fatalf("expected default thinking prompt, body=%v", body)
+	}
 }
 
 func TestUpdateSettingsValidation(t *testing.T) {
@@ -177,7 +194,7 @@ func TestUpdateSettingsHistorySplit(t *testing.T) {
 	h := newAdminTestHandler(t, `{"keys":["k1"]}`)
 	payload := map[string]any{
 		"history_split": map[string]any{
-			"enabled":             false,
+			"enabled":             true,
 			"trigger_after_turns": 3,
 		},
 	}
@@ -190,11 +207,181 @@ func TestUpdateSettingsHistorySplit(t *testing.T) {
 	}
 	snap := h.Store.Snapshot()
 	if snap.HistorySplit.Enabled == nil || !*snap.HistorySplit.Enabled {
-		t.Fatalf("expected history_split.enabled to be forced true, got %#v", snap.HistorySplit.Enabled)
+		t.Fatalf("expected history_split.enabled=true, got %#v", snap.HistorySplit.Enabled)
 	}
 	if snap.HistorySplit.TriggerAfterTurns == nil || *snap.HistorySplit.TriggerAfterTurns != 3 {
 		t.Fatalf("expected history_split.trigger_after_turns=3, got %#v", snap.HistorySplit.TriggerAfterTurns)
 	}
+	if snap.CurrentInputFile.Enabled == nil || *snap.CurrentInputFile.Enabled {
+		t.Fatalf("expected history split to disable current_input_file, got %#v", snap.CurrentInputFile.Enabled)
+	}
+}
+
+func TestUpdateSettingsCurrentInputFile(t *testing.T) {
+	h := newAdminTestHandler(t, `{"keys":["k1"],"history_split":{"enabled":true,"trigger_after_turns":2}}`)
+	payload := map[string]any{
+		"current_input_file": map[string]any{
+			"enabled":   true,
+			"min_chars": 12345,
+		},
+	}
+	b, _ := json.Marshal(payload)
+	req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b))
+	rec := httptest.NewRecorder()
+	h.updateSettings(rec, req)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	snap := h.Store.Snapshot()
+	if snap.CurrentInputFile.Enabled == nil || !*snap.CurrentInputFile.Enabled {
+		t.Fatalf("expected current_input_file.enabled=true, got %#v", snap.CurrentInputFile)
+	}
+	if snap.CurrentInputFile.MinChars != 12345 {
+		t.Fatalf("expected current_input_file.min_chars=12345, got %#v", snap.CurrentInputFile)
+	}
+	if snap.HistorySplit.Enabled == nil || *snap.HistorySplit.Enabled {
+		t.Fatalf("expected current input file to disable history_split, got %#v", snap.HistorySplit.Enabled)
+	}
+}
+
+func TestUpdateSettingsCurrentInputFilePartialUpdatePreservesEnabled(t *testing.T) {
+	h := newAdminTestHandler(t, `{"keys":["k1"],"current_input_file":{"enabled":false,"min_chars":777}}`)
+	payload := map[string]any{
+		"current_input_file": map[string]any{
+			"min_chars": 5000,
+		},
+	}
+	b, _ := json.Marshal(payload)
+	req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b))
+	rec := httptest.NewRecorder()
+	h.updateSettings(rec, req)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	snap := h.Store.Snapshot()
+	if snap.CurrentInputFile.Enabled == nil || *snap.CurrentInputFile.Enabled {
+		t.Fatalf("expected current_input_file.enabled to remain false, got %#v", snap.CurrentInputFile.Enabled)
+	}
+	if snap.CurrentInputFile.MinChars != 5000 {
+		t.Fatalf("expected current_input_file.min_chars=5000, got %#v", snap.CurrentInputFile)
+	}
+}
+
+func TestUpdateSettingsCurrentInputFilePartialUpdatePreservesMinChars(t *testing.T) {
+	h := newAdminTestHandler(t, `{"keys":["k1"],"current_input_file":{"enabled":false,"min_chars":777}}`)
+	payload := map[string]any{
+		"current_input_file": map[string]any{
+			"enabled": true,
+		},
+	}
+	b, _ := json.Marshal(payload)
+	req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b))
+	rec := httptest.NewRecorder()
+	h.updateSettings(rec, req)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	snap := h.Store.Snapshot()
+	if snap.CurrentInputFile.Enabled == nil || !*snap.CurrentInputFile.Enabled {
+		t.Fatalf("expected current_input_file.enabled=true, got %#v", snap.CurrentInputFile.Enabled)
+	}
+	if snap.CurrentInputFile.MinChars != 777 {
+		t.Fatalf("expected current_input_file.min_chars to remain 777, got %#v", snap.CurrentInputFile)
+	}
+}
+
+func TestUpdateSettingsRejectsTwoSplitModesEnabled(t *testing.T) {
+	h := newAdminTestHandler(t, `{"keys":["k1"]}`)
+	payload := map[string]any{
+		"history_split": map[string]any{
+			"enabled":             true,
+			"trigger_after_turns": 3,
+		},
+		"current_input_file": map[string]any{
+			"enabled":   true,
+			"min_chars": 0,
+		},
+	}
+	b, _ := json.Marshal(payload)
+	req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b))
+	rec := httptest.NewRecorder()
+	h.updateSettings(rec, req)
+	if rec.Code != http.StatusBadRequest {
+		t.Fatalf("expected 400, got %d body=%s", rec.Code, rec.Body.String())
+	}
+}
+
+func TestUpdateSettingsThinkingInjection(t *testing.T) {
+	h := newAdminTestHandler(t, `{"keys":["k1"]}`)
+	payload := map[string]any{
+		"thinking_injection": map[string]any{
+			"enabled": false,
+			"prompt":  " custom thinking prompt ",
+		},
+	}
+	b, _ := json.Marshal(payload)
+	req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b))
+	rec := httptest.NewRecorder()
+	h.updateSettings(rec, req)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	snap := h.Store.Snapshot()
+	if snap.ThinkingInjection.Enabled == nil || *snap.ThinkingInjection.Enabled {
+		t.Fatalf("expected thinking_injection.enabled=false, got %#v", snap.ThinkingInjection.Enabled)
+	}
+	if h.Store.ThinkingInjectionEnabled() {
+		t.Fatal("expected thinking injection accessor to reflect disabled config")
+	}
+	if got := h.Store.ThinkingInjectionPrompt(); got != "custom thinking prompt" {
+		t.Fatalf("expected custom thinking prompt, got %q", got)
+	}
+}
+
+func TestUpdateSettingsThinkingInjectionPartialPromptPreservesEnabled(t *testing.T) {
+	h := newAdminTestHandler(t, `{"keys":["k1"],"thinking_injection":{"enabled":false,"prompt":"original prompt"}}`)
+	payload := map[string]any{
+		"thinking_injection": map[string]any{
+			"prompt": " updated prompt ",
+		},
+	}
+	b, _ := json.Marshal(payload)
+	req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b))
+	rec := httptest.NewRecorder()
+	h.updateSettings(rec, req)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	snap := h.Store.Snapshot()
+	if snap.ThinkingInjection.Enabled == nil || *snap.ThinkingInjection.Enabled {
+		t.Fatalf("expected thinking_injection.enabled to remain false, got %#v", snap.ThinkingInjection.Enabled)
+	}
+	if got := h.Store.ThinkingInjectionPrompt(); got != "updated prompt" {
+		t.Fatalf("expected updated prompt, got %q", got)
+	}
+}
+
+func TestUpdateSettingsThinkingInjectionPartialEnabledPreservesPrompt(t *testing.T) {
+	h := newAdminTestHandler(t, `{"keys":["k1"],"thinking_injection":{"enabled":false,"prompt":"original prompt"}}`)
+	payload := map[string]any{
+		"thinking_injection": map[string]any{
+			"enabled": true,
+		},
+	}
+	b, _ := json.Marshal(payload)
+	req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b))
+	rec := httptest.NewRecorder()
+	h.updateSettings(rec, req)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	snap := h.Store.Snapshot()
+	if snap.ThinkingInjection.Enabled == nil || !*snap.ThinkingInjection.Enabled {
+		t.Fatalf("expected thinking_injection.enabled=true, got %#v", snap.ThinkingInjection.Enabled)
+	}
+	if got := h.Store.ThinkingInjectionPrompt(); got != "original prompt" {
+		t.Fatalf("expected original prompt to be preserved, got %q", got)
+	}
 }
 
 func TestUpdateSettingsAutoDeleteMode(t *testing.T) {
diff --git a/internal/httpapi/admin/history/handler_chat_history.go b/internal/httpapi/admin/history/handler_chat_history.go
index e05a9e3..8072a2a 100644
--- a/internal/httpapi/admin/history/handler_chat_history.go
+++ b/internal/httpapi/admin/history/handler_chat_history.go
@@ -16,6 +16,24 @@ func (h *Handler) getChatHistory(w http.ResponseWriter, r *http.Request) {
 		writeJSON(w, http.StatusServiceUnavailable, map[string]any{"detail": "chat history store is not configured"})
 		return
 	}
+	ifNoneMatch := strings.TrimSpace(r.Header.Get("If-None-Match"))
+	if ifNoneMatch != "" {
+		revision, err := store.Revision()
+		if err != nil {
+			writeJSON(w, http.StatusServiceUnavailable, map[string]any{
+				"detail": err.Error(),
+				"path":   store.Path(),
+			})
+			return
+		}
+		etag := chathistory.ListETag(revision)
+		w.Header().Set("ETag", etag)
+		w.Header().Set("Cache-Control", "no-cache")
+		if ifNoneMatch == etag {
+			w.WriteHeader(http.StatusNotModified)
+			return
+		}
+	}
 	snapshot, err := store.Snapshot()
 	if err != nil {
 		writeJSON(w, http.StatusServiceUnavailable, map[string]any{
@@ -27,7 +45,7 @@ func (h *Handler) getChatHistory(w http.ResponseWriter, r *http.Request) {
 	etag := chathistory.ListETag(snapshot.Revision)
 	w.Header().Set("ETag", etag)
 	w.Header().Set("Cache-Control", "no-cache")
-	if strings.TrimSpace(r.Header.Get("If-None-Match")) == etag {
+	if ifNoneMatch == etag {
 		w.WriteHeader(http.StatusNotModified)
 		return
 	}
@@ -51,6 +69,25 @@ func (h *Handler) getChatHistoryItem(w http.ResponseWriter, r *http.Request) {
 		writeJSON(w, http.StatusBadRequest, map[string]any{"detail": "history id is required"})
 		return
 	}
+	ifNoneMatch := strings.TrimSpace(r.Header.Get("If-None-Match"))
+	if ifNoneMatch != "" {
+		revision, err := store.DetailRevision(id)
+		if err != nil {
+			status := http.StatusInternalServerError
+			if strings.Contains(strings.ToLower(err.Error()), "not found") {
+				status = http.StatusNotFound
+			}
+			writeJSON(w, status, map[string]any{"detail": err.Error()})
+			return
+		}
+		etag := chathistory.DetailETag(id, revision)
+		w.Header().Set("ETag", etag)
+		w.Header().Set("Cache-Control", "no-cache")
+		if ifNoneMatch == etag {
+			w.WriteHeader(http.StatusNotModified)
+			return
+		}
+	}
 	item, err := store.Get(id)
 	if err != nil {
 		status := http.StatusInternalServerError
@@ -63,7 +100,7 @@ func (h *Handler) getChatHistoryItem(w http.ResponseWriter, r *http.Request) {
 	etag := chathistory.DetailETag(item.ID, item.Revision)
 	w.Header().Set("ETag", etag)
 	w.Header().Set("Cache-Control", "no-cache")
-	if strings.TrimSpace(r.Header.Get("If-None-Match")) == etag {
+	if ifNoneMatch == etag {
 		w.WriteHeader(http.StatusNotModified)
 		return
 	}
diff --git a/internal/httpapi/admin/history/handler_chat_history_test.go b/internal/httpapi/admin/history/handler_chat_history_test.go
index 1397bae..4d3e32f 100644
--- a/internal/httpapi/admin/history/handler_chat_history_test.go
+++ b/internal/httpapi/admin/history/handler_chat_history_test.go
@@ -95,6 +95,15 @@ func TestGetChatHistoryAndUpdateSettings(t *testing.T) {
 		t.Fatalf("expected detail etag header")
 	}
 
+	notModifiedItemReq := httptest.NewRequest(http.MethodGet, "/chat-history/"+entry.ID, nil)
+	notModifiedItemReq.Header.Set("Authorization", "Bearer admin")
+	notModifiedItemReq.Header.Set("If-None-Match", itemRec.Header().Get("ETag"))
+	notModifiedItemRec := httptest.NewRecorder()
+	r.ServeHTTP(notModifiedItemRec, notModifiedItemReq)
+	if notModifiedItemRec.Code != http.StatusNotModified {
+		t.Fatalf("expected detail 304, got %d body=%s", notModifiedItemRec.Code, notModifiedItemRec.Body.String())
+	}
+
 	updateReq := httptest.NewRequest(http.MethodPut, "/chat-history/settings", bytes.NewReader([]byte(`{"limit":10}`)))
 	updateReq.Header.Set("Authorization", "Bearer admin")
 	updateRec := httptest.NewRecorder()
diff --git a/internal/httpapi/admin/settings/handler_settings_parse.go b/internal/httpapi/admin/settings/handler_settings_parse.go
index 14fb92d..bd26c7f 100644
--- a/internal/httpapi/admin/settings/handler_settings_parse.go
+++ b/internal/httpapi/admin/settings/handler_settings_parse.go
@@ -21,7 +21,7 @@ func boolFrom(v any) bool {
 	}
 }
 
-func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *config.RuntimeConfig, *config.CompatConfig, *config.ResponsesConfig, *config.EmbeddingsConfig, *config.AutoDeleteConfig, *config.HistorySplitConfig, map[string]string, error) {
+func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *config.RuntimeConfig, *config.CompatConfig, *config.ResponsesConfig, *config.EmbeddingsConfig, *config.AutoDeleteConfig, *config.HistorySplitConfig, *config.CurrentInputFileConfig, *config.ThinkingInjectionConfig, map[string]string, error) {
 	var (
 		adminCfg        *config.AdminConfig
 		runtimeCfg      *config.RuntimeConfig
@@ -30,6 +30,8 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		embCfg          *config.EmbeddingsConfig
 		autoDeleteCfg   *config.AutoDeleteConfig
 		historySplitCfg *config.HistorySplitConfig
+		currentInputCfg *config.CurrentInputFileConfig
+		thinkingInjCfg  *config.ThinkingInjectionConfig
 		aliasMap        map[string]string
 	)
 
@@ -38,7 +40,7 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		if v, exists := raw["jwt_expire_hours"]; exists {
 			n := intFrom(v)
 			if err := config.ValidateIntRange("admin.jwt_expire_hours", n, 1, 720, true); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			cfg.JWTExpireHours = n
 		}
@@ -50,33 +52,33 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		if v, exists := raw["account_max_inflight"]; exists {
 			n := intFrom(v)
 			if err := config.ValidateIntRange("runtime.account_max_inflight", n, 1, 256, true); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			cfg.AccountMaxInflight = n
 		}
 		if v, exists := raw["account_max_queue"]; exists {
 			n := intFrom(v)
 			if err := config.ValidateIntRange("runtime.account_max_queue", n, 1, 200000, true); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			cfg.AccountMaxQueue = n
 		}
 		if v, exists := raw["global_max_inflight"]; exists {
 			n := intFrom(v)
 			if err := config.ValidateIntRange("runtime.global_max_inflight", n, 1, 200000, true); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			cfg.GlobalMaxInflight = n
 		}
 		if v, exists := raw["token_refresh_interval_hours"]; exists {
 			n := intFrom(v)
 			if err := config.ValidateIntRange("runtime.token_refresh_interval_hours", n, 1, 720, true); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			cfg.TokenRefreshIntervalHours = n
 		}
 		if cfg.AccountMaxInflight > 0 && cfg.GlobalMaxInflight > 0 && cfg.GlobalMaxInflight < cfg.AccountMaxInflight {
-			return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight")
+			return nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight")
 		}
 		runtimeCfg = cfg
 	}
@@ -99,7 +101,7 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		if v, exists := raw["store_ttl_seconds"]; exists {
 			n := intFrom(v)
 			if err := config.ValidateIntRange("responses.store_ttl_seconds", n, 30, 86400, true); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			cfg.StoreTTLSeconds = n
 		}
@@ -111,7 +113,7 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		if v, exists := raw["provider"]; exists {
 			p := strings.TrimSpace(fmt.Sprintf("%v", v))
 			if err := config.ValidateTrimmedString("embeddings.provider", p, false); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			cfg.Provider = p
 		}
@@ -137,7 +139,7 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		if v, exists := raw["mode"]; exists {
 			mode := strings.ToLower(strings.TrimSpace(fmt.Sprintf("%v", v)))
 			if err := config.ValidateAutoDeleteMode(mode); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			if mode == "" {
 				mode = "none"
@@ -152,20 +154,74 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 
 	if raw, ok := req["history_split"].(map[string]any); ok {
 		cfg := &config.HistorySplitConfig{}
-		enabled := true
-		cfg.Enabled = &enabled
+		if v, exists := raw["enabled"]; exists {
+			enabled := boolFrom(v)
+			cfg.Enabled = &enabled
+		}
 		if v, exists := raw["trigger_after_turns"]; exists {
 			n := intFrom(v)
 			if err := config.ValidateIntRange("history_split.trigger_after_turns", n, 1, 1000, true); err != nil {
-				return nil, nil, nil, nil, nil, nil, nil, nil, err
+				return nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, err
 			}
 			cfg.TriggerAfterTurns = &n
 		}
 		if err := config.ValidateHistorySplitConfig(*cfg); err != nil {
-			return nil, nil, nil, nil, nil, nil, nil, nil, err
+			return nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, err
 		}
 		historySplitCfg = cfg
 	}
 
-	return adminCfg, runtimeCfg, compatCfg, respCfg, embCfg, autoDeleteCfg, historySplitCfg, aliasMap, nil
+	if raw, ok := req["current_input_file"].(map[string]any); ok {
+		cfg := &config.CurrentInputFileConfig{}
+		if v, exists := raw["enabled"]; exists {
+			enabled := boolFrom(v)
+			cfg.Enabled = &enabled
+		}
+		if v, exists := raw["min_chars"]; exists {
+			n := intFrom(v)
+			if err := config.ValidateIntRange("current_input_file.min_chars", n, 0, 100000000, true); err != nil {
+				return nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, err
+			}
+			cfg.MinChars = n
+		}
+		if err := config.ValidateCurrentInputFileConfig(*cfg); err != nil {
+			return nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, err
+		}
+		currentInputCfg = cfg
+	}
+	if boolPtrValue(historySplitCfgEnabled(historySplitCfg)) && boolPtrValue(currentInputCfgEnabled(currentInputCfg)) {
+		return nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("history_split.enabled and current_input_file.enabled cannot both be true")
+	}
+
+	if raw, ok := req["thinking_injection"].(map[string]any); ok {
+		cfg := &config.ThinkingInjectionConfig{}
+		if v, exists := raw["enabled"]; exists {
+			b := boolFrom(v)
+			cfg.Enabled = &b
+		}
+		if v, exists := raw["prompt"]; exists {
+			cfg.Prompt = strings.TrimSpace(fmt.Sprintf("%v", v))
+		}
+		thinkingInjCfg = cfg
+	}
+
+	return adminCfg, runtimeCfg, compatCfg, respCfg, embCfg, autoDeleteCfg, historySplitCfg, currentInputCfg, thinkingInjCfg, aliasMap, nil
+}
+
+func historySplitCfgEnabled(cfg *config.HistorySplitConfig) *bool {
+	if cfg == nil {
+		return nil
+	}
+	return cfg.Enabled
+}
+
+func currentInputCfgEnabled(cfg *config.CurrentInputFileConfig) *bool {
+	if cfg == nil {
+		return nil
+	}
+	return cfg.Enabled
+}
+
+func boolPtrValue(v *bool) bool {
+	return v != nil && *v
 }
diff --git a/internal/httpapi/admin/settings/handler_settings_read.go b/internal/httpapi/admin/settings/handler_settings_read.go
index 7587004..6944d3d 100644
--- a/internal/httpapi/admin/settings/handler_settings_read.go
+++ b/internal/httpapi/admin/settings/handler_settings_read.go
@@ -6,6 +6,7 @@ import (
 
 	authn "ds2api/internal/auth"
 	"ds2api/internal/config"
+	"ds2api/internal/promptcompat"
 )
 
 func (h *Handler) getSettings(w http.ResponseWriter, _ *http.Request) {
@@ -34,6 +35,15 @@ func (h *Handler) getSettings(w http.ResponseWriter, _ *http.Request) {
 			"enabled":             h.Store.HistorySplitEnabled(),
 			"trigger_after_turns": h.Store.HistorySplitTriggerAfterTurns(),
 		},
+		"current_input_file": map[string]any{
+			"enabled":   h.Store.CurrentInputFileEnabled(),
+			"min_chars": h.Store.CurrentInputFileMinChars(),
+		},
+		"thinking_injection": map[string]any{
+			"enabled":        h.Store.ThinkingInjectionEnabled(),
+			"prompt":         h.Store.ThinkingInjectionPrompt(),
+			"default_prompt": promptcompat.DefaultThinkingInjectionPrompt,
+		},
 		"model_aliases":     snap.ModelAliases,
 		"env_backed":        h.Store.IsEnvBacked(),
 		"needs_vercel_sync": needsSync,
diff --git a/internal/httpapi/admin/settings/handler_settings_write.go b/internal/httpapi/admin/settings/handler_settings_write.go
index 11ac6b4..1958d5f 100644
--- a/internal/httpapi/admin/settings/handler_settings_write.go
+++ b/internal/httpapi/admin/settings/handler_settings_write.go
@@ -17,7 +17,7 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	adminCfg, runtimeCfg, compatCfg, responsesCfg, embeddingsCfg, autoDeleteCfg, historySplitCfg, aliasMap, err := parseSettingsUpdateRequest(req)
+	adminCfg, runtimeCfg, compatCfg, responsesCfg, embeddingsCfg, autoDeleteCfg, historySplitCfg, currentInputCfg, thinkingInjCfg, aliasMap, err := parseSettingsUpdateRequest(req)
 	if err != nil {
 		writeJSON(w, http.StatusBadRequest, map[string]any{"detail": err.Error()})
 		return
@@ -28,6 +28,10 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) {
 			return
 		}
 	}
+	currentInputEnabledSet := hasNestedSettingsKey(req, "current_input_file", "enabled")
+	currentInputMinCharsSet := hasNestedSettingsKey(req, "current_input_file", "min_chars")
+	thinkingInjectionEnabledSet := hasNestedSettingsKey(req, "thinking_injection", "enabled")
+	thinkingInjectionPromptSet := hasNestedSettingsKey(req, "thinking_injection", "prompt")
 
 	if err := h.Store.Update(func(c *config.Config) error {
 		if adminCfg != nil {
@@ -70,11 +74,35 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) {
 		if historySplitCfg != nil {
 			if historySplitCfg.Enabled != nil {
 				c.HistorySplit.Enabled = historySplitCfg.Enabled
+				if *historySplitCfg.Enabled {
+					disabled := false
+					c.CurrentInputFile.Enabled = &disabled
+				}
 			}
 			if historySplitCfg.TriggerAfterTurns != nil {
 				c.HistorySplit.TriggerAfterTurns = historySplitCfg.TriggerAfterTurns
 			}
 		}
+		if currentInputCfg != nil {
+			if currentInputEnabledSet {
+				c.CurrentInputFile.Enabled = currentInputCfg.Enabled
+			}
+			if currentInputEnabledSet && currentInputCfg.Enabled != nil && *currentInputCfg.Enabled {
+				disabled := false
+				c.HistorySplit.Enabled = &disabled
+			}
+			if currentInputMinCharsSet {
+				c.CurrentInputFile.MinChars = currentInputCfg.MinChars
+			}
+		}
+		if thinkingInjCfg != nil {
+			if thinkingInjectionEnabledSet {
+				c.ThinkingInjection.Enabled = thinkingInjCfg.Enabled
+			}
+			if thinkingInjectionPromptSet {
+				c.ThinkingInjection.Prompt = thinkingInjCfg.Prompt
+			}
+		}
 		if aliasMap != nil {
 			c.ModelAliases = aliasMap
 		}
@@ -128,3 +156,12 @@ func (h *Handler) updateSettingsPassword(w http.ResponseWriter, r *http.Request)
 		"jwt_valid_after_unix": now,
 	})
 }
+
+func hasNestedSettingsKey(req map[string]any, section, key string) bool {
+	raw, ok := req[section].(map[string]any)
+	if !ok {
+		return false
+	}
+	_, exists := raw[key]
+	return exists
+}
diff --git a/internal/httpapi/admin/shared/deps.go b/internal/httpapi/admin/shared/deps.go
index 9adc755..8ae8bf7 100644
--- a/internal/httpapi/admin/shared/deps.go
+++ b/internal/httpapi/admin/shared/deps.go
@@ -35,6 +35,10 @@ type ConfigStore interface {
 	AutoDeleteMode() string
 	HistorySplitEnabled() bool
 	HistorySplitTriggerAfterTurns() int
+	CurrentInputFileEnabled() bool
+	CurrentInputFileMinChars() int
+	ThinkingInjectionEnabled() bool
+	ThinkingInjectionPrompt() string
 	CompatStripReferenceMarkers() bool
 	AutoDeleteSessions() bool
 }
diff --git a/internal/httpapi/claude/deps_injection_test.go b/internal/httpapi/claude/deps_injection_test.go
index e30ec2f..41afd68 100644
--- a/internal/httpapi/claude/deps_injection_test.go
+++ b/internal/httpapi/claude/deps_injection_test.go
@@ -53,6 +53,26 @@ func TestNormalizeClaudeRequestEnablesThinkingWhenRequested(t *testing.T) {
 	}
 }
 
+func TestNormalizeClaudeRequestNoThinkingAliasForcesThinkingOff(t *testing.T) {
+	req := map[string]any{
+		"model": "claude-opus-4-6-nothinking",
+		"messages": []any{
+			map[string]any{"role": "user", "content": "hello"},
+		},
+		"thinking": map[string]any{"type": "enabled", "budget_tokens": 1024},
+	}
+	out, err := normalizeClaudeRequest(mockClaudeConfig{}, req)
+	if err != nil {
+		t.Fatalf("normalizeClaudeRequest error: %v", err)
+	}
+	if out.Standard.ResolvedModel != "deepseek-v4-pro-nothinking" {
+		t.Fatalf("resolved model mismatch: got=%q", out.Standard.ResolvedModel)
+	}
+	if out.Standard.Thinking {
+		t.Fatalf("expected nothinking alias to force downstream thinking off")
+	}
+}
+
 func TestNormalizeClaudeRequestPrefersGlobalAliasMapping(t *testing.T) {
 	req := map[string]any{
 		"model": "claude-sonnet-4-6",
diff --git a/internal/httpapi/claude/handler_messages.go b/internal/httpapi/claude/handler_messages.go
index e424503..ad8f54e 100644
--- a/internal/httpapi/claude/handler_messages.go
+++ b/internal/httpapi/claude/handler_messages.go
@@ -52,7 +52,7 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, store C
 		}
 	}
 	translatedReq := translatorcliproxy.ToOpenAI(sdktranslator.FormatClaude, translateModel, raw, stream)
-	translatedReq = applyClaudeThinkingPolicyToOpenAIRequest(translatedReq, req)
+	translatedReq, exposeThinking := applyClaudeThinkingPolicyToOpenAIRequest(translatedReq, req, stream)
 
 	isVercelPrepare := strings.TrimSpace(r.URL.Query().Get("__stream_prepare")) == "1"
 	isVercelRelease := strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1"
@@ -118,23 +118,26 @@ func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, store C
 		return true
 	}
 	converted := translatorcliproxy.FromOpenAINonStream(sdktranslator.FormatClaude, model, raw, translatedReq, body)
+	if !exposeThinking {
+		converted = stripClaudeThinkingBlocks(converted)
+	}
 	w.Header().Set("Content-Type", "application/json")
 	w.WriteHeader(http.StatusOK)
 	_, _ = w.Write(converted)
 	return true
 }
 
-func applyClaudeThinkingPolicyToOpenAIRequest(translated []byte, original map[string]any) []byte {
+func applyClaudeThinkingPolicyToOpenAIRequest(translated []byte, original map[string]any, stream bool) ([]byte, bool) {
 	req := map[string]any{}
 	if err := json.Unmarshal(translated, &req); err != nil {
-		return translated
+		return translated, false
 	}
 	enabled, ok := util.ResolveThinkingOverride(original)
 	if !ok {
 		if _, translatedHasOverride := util.ResolveThinkingOverride(req); translatedHasOverride {
-			return translated
+			return translated, false
 		}
-		enabled = false
+		enabled = !stream
 	}
 	typ := "disabled"
 	if enabled {
@@ -143,7 +146,33 @@ func applyClaudeThinkingPolicyToOpenAIRequest(translated []byte, original map[st
 	req["thinking"] = map[string]any{"type": typ}
 	out, err := json.Marshal(req)
 	if err != nil {
-		return translated
+		return translated, ok && enabled
+	}
+	return out, ok && enabled
+}
+
+func stripClaudeThinkingBlocks(raw []byte) []byte {
+	var payload map[string]any
+	if err := json.Unmarshal(raw, &payload); err != nil {
+		return raw
+	}
+	content, _ := payload["content"].([]any)
+	if len(content) == 0 {
+		return raw
+	}
+	filtered := make([]any, 0, len(content))
+	for _, item := range content {
+		block, _ := item.(map[string]any)
+		blockType, _ := block["type"].(string)
+		if strings.TrimSpace(blockType) == "thinking" {
+			continue
+		}
+		filtered = append(filtered, item)
+	}
+	payload["content"] = filtered
+	out, err := json.Marshal(payload)
+	if err != nil {
+		return raw
 	}
 	return out
 }
diff --git a/internal/httpapi/claude/handler_util_test.go b/internal/httpapi/claude/handler_util_test.go
index 68f68ca..7b83c88 100644
--- a/internal/httpapi/claude/handler_util_test.go
+++ b/internal/httpapi/claude/handler_util_test.go
@@ -93,10 +93,10 @@ func TestNormalizeClaudeMessagesToolUseToAssistantToolCalls(t *testing.T) {
 		t.Fatalf("expected call id preserved, got %#v", call)
 	}
 	content, _ := m["content"].(string)
-	if !containsStr(content, "<tool_calls>") || !containsStr(content, `<invoke name="search_web">`) {
-		t.Fatalf("expected assistant content to include XML tool call history, got %q", content)
+	if !containsStr(content, "<|DSML|tool_calls>") || !containsStr(content, `<|DSML|invoke name="search_web">`) {
+		t.Fatalf("expected assistant content to include DSML tool call history, got %q", content)
 	}
-	if !containsStr(content, `<parameter name="query"><![CDATA[latest]]></parameter>`) {
+	if !containsStr(content, `<|DSML|parameter name="query"><![CDATA[latest]]></|DSML|parameter>`) {
 		t.Fatalf("expected assistant content to include serialized parameters, got %q", content)
 	}
 }
@@ -292,8 +292,8 @@ func TestBuildClaudeToolPromptSingleTool(t *testing.T) {
 	if !containsStr(prompt, "Search the web") {
 		t.Fatalf("expected description in prompt")
 	}
-	if !containsStr(prompt, "<tool_calls>") {
-		t.Fatalf("expected XML tool_calls format in prompt")
+	if !containsStr(prompt, "<|DSML|tool_calls>") {
+		t.Fatalf("expected DSML tool_calls format in prompt")
 	}
 	if !containsStr(prompt, "TOOL CALL FORMAT") {
 		t.Fatalf("expected tool call format header in prompt")
diff --git a/internal/httpapi/claude/proxy_vercel_test.go b/internal/httpapi/claude/proxy_vercel_test.go
index 2eff38b..a8a9cd4 100644
--- a/internal/httpapi/claude/proxy_vercel_test.go
+++ b/internal/httpapi/claude/proxy_vercel_test.go
@@ -126,7 +126,7 @@ func TestClaudeProxyViaOpenAIPreservesThinkingOverride(t *testing.T) {
 	}
 }
 
-func TestClaudeProxyViaOpenAIDisablesThinkingByDefault(t *testing.T) {
+func TestClaudeProxyViaOpenAIEnablesThinkingInternallyByDefaultForNonStream(t *testing.T) {
 	openAI := &openAIProxyCaptureStub{}
 	h := &Handler{
 		Store:  claudeProxyStoreStub{aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}},
@@ -141,8 +141,8 @@ func TestClaudeProxyViaOpenAIDisablesThinkingByDefault(t *testing.T) {
 		t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
 	}
 	thinking, _ := openAI.seenReq["thinking"].(map[string]any)
-	if thinking["type"] != "disabled" {
-		t.Fatalf("expected Claude default to disable downstream thinking, got %#v", openAI.seenReq)
+	if thinking["type"] != "enabled" {
+		t.Fatalf("expected Claude non-stream default to enable downstream thinking internally, got %#v", openAI.seenReq)
 	}
 }
 
@@ -166,6 +166,43 @@ func TestClaudeProxyViaOpenAIEnablesThinkingWhenRequested(t *testing.T) {
 	}
 }
 
+func TestClaudeProxyViaOpenAIKeepsStreamDefaultThinkingDisabled(t *testing.T) {
+	openAI := &openAIProxyCaptureStub{}
+	h := &Handler{
+		Store:  claudeProxyStoreStub{aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}},
+		OpenAI: openAI,
+	}
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", strings.NewReader(`{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hi"}],"stream":true}`))
+	rec := httptest.NewRecorder()
+
+	h.Messages(rec, req)
+
+	thinking, _ := openAI.seenReq["thinking"].(map[string]any)
+	if thinking["type"] != "disabled" {
+		t.Fatalf("expected Claude stream default to keep downstream thinking disabled, got %#v", openAI.seenReq)
+	}
+}
+
+func TestClaudeProxyViaOpenAIStripsThinkingBlocksFromNonStreamResponse(t *testing.T) {
+	body := `{"id":"chatcmpl_1","object":"chat.completion","created":1,"model":"claude-sonnet-4-5","choices":[{"index":0,"message":{"role":"assistant","content":null,"reasoning_content":"internal reasoning","tool_calls":[{"id":"call_1","type":"function","function":{"name":"search","arguments":"{\"q\":\"x\"}"}}]},"finish_reason":"tool_calls"}],"usage":{"prompt_tokens":1,"completion_tokens":1,"total_tokens":2}}`
+	h := &Handler{OpenAI: openAIProxyStub{status: 200, body: body}}
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", strings.NewReader(`{"model":"claude-sonnet-4-5","messages":[{"role":"user","content":"hi"}],"stream":false}`))
+	rec := httptest.NewRecorder()
+
+	h.Messages(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
+	}
+	got := rec.Body.String()
+	if strings.Contains(got, `"type":"thinking"`) {
+		t.Fatalf("expected converted Claude response to strip thinking block, got %s", got)
+	}
+	if !strings.Contains(got, `"tool_use"`) {
+		t.Fatalf("expected converted Claude response to preserve tool_use, got %s", got)
+	}
+}
+
 func TestClaudeProxyTranslatesInlineImageToOpenAIDataURL(t *testing.T) {
 	openAI := &openAIProxyCaptureStub{}
 	h := &Handler{OpenAI: openAI}
diff --git a/internal/httpapi/claude/standard_request.go b/internal/httpapi/claude/standard_request.go
index 26c6fda..3f3e238 100644
--- a/internal/httpapi/claude/standard_request.go
+++ b/internal/httpapi/claude/standard_request.go
@@ -37,6 +37,9 @@ func normalizeClaudeRequest(store ConfigReader, req map[string]any) (claudeNorma
 		searchEnabled = false
 	}
 	thinkingEnabled := util.ResolveThinkingEnabled(req, false)
+	if config.IsNoThinkingModel(dsModel) {
+		thinkingEnabled = false
+	}
 	finalPrompt := prompt.MessagesPrepareWithThinking(toMessageMaps(dsPayload["messages"]), thinkingEnabled)
 	toolNames := extractClaudeToolNames(toolsRequested)
 	if len(toolNames) == 0 && len(toolsRequested) > 0 {
diff --git a/internal/httpapi/gemini/convert_request.go b/internal/httpapi/gemini/convert_request.go
index 1d32105..ca1497a 100644
--- a/internal/httpapi/gemini/convert_request.go
+++ b/internal/httpapi/gemini/convert_request.go
@@ -22,6 +22,9 @@ func normalizeGeminiRequest(store ConfigReader, routeModel string, req map[strin
 	}
 	defaultThinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
 	thinkingEnabled := util.ResolveThinkingEnabled(req, defaultThinkingEnabled)
+	if config.IsNoThinkingModel(resolvedModel) {
+		thinkingEnabled = false
+	}
 
 	messagesRaw := geminiMessagesFromRequest(req)
 	if len(messagesRaw) == 0 {
diff --git a/internal/httpapi/gemini/convert_request_test.go b/internal/httpapi/gemini/convert_request_test.go
new file mode 100644
index 0000000..797f83c
--- /dev/null
+++ b/internal/httpapi/gemini/convert_request_test.go
@@ -0,0 +1,28 @@
+package gemini
+
+import "testing"
+
+func TestNormalizeGeminiRequestNoThinkingModelForcesThinkingOff(t *testing.T) {
+	req := map[string]any{
+		"contents": []any{
+			map[string]any{
+				"role":  "user",
+				"parts": []any{map[string]any{"text": "hello"}},
+			},
+		},
+		"reasoning_effort": "high",
+	}
+	out, err := normalizeGeminiRequest(testGeminiConfig{}, "gemini-2.5-pro-nothinking", req, false)
+	if err != nil {
+		t.Fatalf("normalizeGeminiRequest error: %v", err)
+	}
+	if out.ResolvedModel != "deepseek-v4-pro-nothinking" {
+		t.Fatalf("resolved model mismatch: got=%q", out.ResolvedModel)
+	}
+	if out.Thinking {
+		t.Fatalf("expected nothinking model to force thinking off")
+	}
+	if out.Search {
+		t.Fatalf("expected search=false, got=%v", out.Search)
+	}
+}
diff --git a/internal/httpapi/openai/chat/chat_stream_runtime.go b/internal/httpapi/openai/chat/chat_stream_runtime.go
index 0f65fd0..e83a488 100644
--- a/internal/httpapi/openai/chat/chat_stream_runtime.go
+++ b/internal/httpapi/openai/chat/chat_stream_runtime.go
@@ -1,7 +1,6 @@
 package chat
 
 import (
-	"ds2api/internal/toolcall"
 	"encoding/json"
 	"net/http"
 	"strings"
@@ -33,11 +32,13 @@ type chatStreamRuntime struct {
 	toolCallsEmitted     bool
 	toolCallsDoneEmitted bool
 
-	toolSieve         toolstream.State
-	streamToolCallIDs map[int]string
-	streamToolNames   map[int]string
-	thinking          strings.Builder
-	text              strings.Builder
+	toolSieve             toolstream.State
+	streamToolCallIDs     map[int]string
+	streamToolNames       map[int]string
+	thinking              strings.Builder
+	toolDetectionThinking strings.Builder
+	text                  strings.Builder
+	responseMessageID     int
 
 	finalThinking     string
 	finalText         string
@@ -128,12 +129,16 @@ func (s *chatStreamRuntime) resetStreamToolCallState() {
 	s.streamToolNames = map[int]string{}
 }
 
-func (s *chatStreamRuntime) finalize(finishReason string) {
+func (s *chatStreamRuntime) finalize(finishReason string, deferEmptyOutput bool) bool {
+	s.finalErrorStatus = 0
+	s.finalErrorMessage = ""
+	s.finalErrorCode = ""
 	finalThinking := s.thinking.String()
+	finalToolDetectionThinking := s.toolDetectionThinking.String()
 	finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
 	s.finalThinking = finalThinking
 	s.finalText = finalText
-	detected := toolcall.ParseStandaloneToolCallsDetailed(finalText, s.toolNames)
+	detected := detectAssistantToolCalls(finalText, finalThinking, finalToolDetectionThinking, s.toolNames)
 	if len(detected.Calls) > 0 && !s.toolCallsDoneEmitted {
 		finishReason = "tool_calls"
 		delta := map[string]any{
@@ -203,8 +208,14 @@ func (s *chatStreamRuntime) finalize(finishReason string) {
 	}
 	if len(detected.Calls) == 0 && !s.toolCallsEmitted && strings.TrimSpace(finalText) == "" {
 		status, message, code := upstreamEmptyOutputDetail(finishReason == "content_filter", finalText, finalThinking)
+		if deferEmptyOutput {
+			s.finalErrorStatus = status
+			s.finalErrorMessage = message
+			s.finalErrorCode = code
+			return false
+		}
 		s.sendFailedChunk(status, message, code)
-		return
+		return true
 	}
 	usage := openaifmt.BuildChatUsage(s.finalPrompt, finalThinking, finalText)
 	s.finalFinishReason = finishReason
@@ -217,12 +228,16 @@ func (s *chatStreamRuntime) finalize(finishReason string) {
 		usage,
 	))
 	s.sendDone()
+	return true
 }
 
 func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedDecision {
 	if !parsed.Parsed {
 		return streamengine.ParsedDecision{}
 	}
+	if parsed.ResponseMessageID > 0 {
+		s.responseMessageID = parsed.ResponseMessageID
+	}
 	if parsed.ContentFilter {
 		if strings.TrimSpace(s.text.String()) == "" {
 			return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("content_filter")}
@@ -238,6 +253,12 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
 
 	newChoices := make([]map[string]any, 0, len(parsed.Parts))
 	contentSeen := false
+	for _, p := range parsed.ToolDetectionThinkingParts {
+		trimmed := sse.TrimContinuationOverlap(s.toolDetectionThinking.String(), p.Text)
+		if trimmed != "" {
+			s.toolDetectionThinking.WriteString(trimmed)
+		}
+	}
 	for _, p := range parsed.Parts {
 		cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
 		if s.searchEnabled && sse.IsCitation(cleanedText) {
diff --git a/internal/httpapi/openai/chat/empty_retry_runtime.go b/internal/httpapi/openai/chat/empty_retry_runtime.go
new file mode 100644
index 0000000..28e67e9
--- /dev/null
+++ b/internal/httpapi/openai/chat/empty_retry_runtime.go
@@ -0,0 +1,283 @@
+package chat
+
+import (
+	"context"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"ds2api/internal/auth"
+	"ds2api/internal/config"
+	dsprotocol "ds2api/internal/deepseek/protocol"
+	openaifmt "ds2api/internal/format/openai"
+	"ds2api/internal/sse"
+	streamengine "ds2api/internal/stream"
+)
+
+type chatNonStreamResult struct {
+	thinking              string
+	toolDetectionThinking string
+	text                  string
+	contentFilter         bool
+	detectedCalls         int
+	body                  map[string]any
+	finishReason          string
+	responseMessageID     int
+}
+
+func (h *Handler) handleNonStreamWithRetry(w http.ResponseWriter, ctx context.Context, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, historySession *chatHistorySession) {
+	attempts := 0
+	currentResp := resp
+	usagePrompt := finalPrompt
+	accumulatedThinking := ""
+	accumulatedToolDetectionThinking := ""
+	for {
+		result, ok := h.collectChatNonStreamAttempt(w, currentResp, completionID, model, usagePrompt, thinkingEnabled, searchEnabled, toolNames)
+		if !ok {
+			return
+		}
+		accumulatedThinking += sse.TrimContinuationOverlap(accumulatedThinking, result.thinking)
+		accumulatedToolDetectionThinking += sse.TrimContinuationOverlap(accumulatedToolDetectionThinking, result.toolDetectionThinking)
+		result.thinking = accumulatedThinking
+		result.toolDetectionThinking = accumulatedToolDetectionThinking
+		detected := detectAssistantToolCalls(result.text, result.thinking, result.toolDetectionThinking, toolNames)
+		result.detectedCalls = len(detected.Calls)
+		result.body = openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, result.thinking, result.text, detected.Calls)
+		result.finishReason = chatFinishReason(result.body)
+		if !shouldRetryChatNonStream(result, attempts) {
+			h.finishChatNonStreamResult(w, result, attempts, usagePrompt, historySession)
+			return
+		}
+
+		attempts++
+		config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "parent_message_id", result.responseMessageID)
+		retryPow, powErr := h.DS.GetPow(ctx, a, 3)
+		if powErr != nil {
+			config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "error", powErr)
+			retryPow = pow
+		}
+		retryPayload := clonePayloadForEmptyOutputRetry(payload, result.responseMessageID)
+		nextResp, err := h.DS.CallCompletion(ctx, a, retryPayload, retryPow, 3)
+		if err != nil {
+			if historySession != nil {
+				historySession.error(http.StatusInternalServerError, "Failed to get completion.", "error", result.thinking, result.text)
+			}
+			writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.")
+			config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "error", err)
+			return
+		}
+		usagePrompt = usagePromptWithEmptyOutputRetry(finalPrompt, attempts)
+		currentResp = nextResp
+	}
+}
+
+func (h *Handler) collectChatNonStreamAttempt(w http.ResponseWriter, resp *http.Response, completionID, model, usagePrompt string, thinkingEnabled, searchEnabled bool, toolNames []string) (chatNonStreamResult, bool) {
+	if resp.StatusCode != http.StatusOK {
+		defer func() { _ = resp.Body.Close() }()
+		body, _ := io.ReadAll(resp.Body)
+		writeOpenAIError(w, resp.StatusCode, string(body))
+		return chatNonStreamResult{}, false
+	}
+	result := sse.CollectStream(resp, thinkingEnabled, true)
+	stripReferenceMarkers := h.compatStripReferenceMarkers()
+	finalThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers)
+	finalToolDetectionThinking := cleanVisibleOutput(result.ToolDetectionThinking, stripReferenceMarkers)
+	finalText := cleanVisibleOutput(result.Text, stripReferenceMarkers)
+	if searchEnabled {
+		finalText = replaceCitationMarkersWithLinks(finalText, result.CitationLinks)
+	}
+	detected := detectAssistantToolCalls(finalText, finalThinking, finalToolDetectionThinking, toolNames)
+	respBody := openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, finalThinking, finalText, detected.Calls)
+	return chatNonStreamResult{
+		thinking:              finalThinking,
+		toolDetectionThinking: finalToolDetectionThinking,
+		text:                  finalText,
+		contentFilter:         result.ContentFilter,
+		detectedCalls:         len(detected.Calls),
+		body:                  respBody,
+		finishReason:          chatFinishReason(respBody),
+		responseMessageID:     result.ResponseMessageID,
+	}, true
+}
+
+func (h *Handler) finishChatNonStreamResult(w http.ResponseWriter, result chatNonStreamResult, attempts int, usagePrompt string, historySession *chatHistorySession) {
+	if result.detectedCalls == 0 && shouldWriteUpstreamEmptyOutputError(result.text) {
+		status, message, code := upstreamEmptyOutputDetail(result.contentFilter, result.text, result.thinking)
+		if historySession != nil {
+			historySession.error(status, message, code, result.thinking, result.text)
+		}
+		writeUpstreamEmptyOutputError(w, result.text, result.thinking, result.contentFilter)
+		config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "chat.completions", "stream", false, "retry_attempts", attempts, "success_source", "none", "content_filter", result.contentFilter)
+		return
+	}
+	if historySession != nil {
+		historySession.success(http.StatusOK, result.thinking, result.text, result.finishReason, openaifmt.BuildChatUsage(usagePrompt, result.thinking, result.text))
+	}
+	writeJSON(w, http.StatusOK, result.body)
+	source := "first_attempt"
+	if attempts > 0 {
+		source = "synthetic_retry"
+	}
+	config.Logger.Info("[openai_empty_retry] completed", "surface", "chat.completions", "stream", false, "retry_attempts", attempts, "success_source", source)
+}
+
+func chatFinishReason(respBody map[string]any) string {
+	if choices, ok := respBody["choices"].([]map[string]any); ok && len(choices) > 0 {
+		if fr, _ := choices[0]["finish_reason"].(string); strings.TrimSpace(fr) != "" {
+			return fr
+		}
+	}
+	return "stop"
+}
+
+func shouldRetryChatNonStream(result chatNonStreamResult, attempts int) bool {
+	return emptyOutputRetryEnabled() &&
+		attempts < emptyOutputRetryMaxAttempts() &&
+		!result.contentFilter &&
+		result.detectedCalls == 0 &&
+		strings.TrimSpace(result.text) == ""
+}
+
+func (h *Handler) handleStreamWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, historySession *chatHistorySession) {
+	streamRuntime, initialType, ok := h.prepareChatStreamRuntime(w, resp, completionID, model, finalPrompt, thinkingEnabled, searchEnabled, toolNames, historySession)
+	if !ok {
+		return
+	}
+	attempts := 0
+	currentResp := resp
+	for {
+		terminalWritten, retryable := h.consumeChatStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, historySession, attempts < emptyOutputRetryMaxAttempts())
+		if terminalWritten {
+			logChatStreamTerminal(streamRuntime, attempts)
+			return
+		}
+		if !retryable || !emptyOutputRetryEnabled() || attempts >= emptyOutputRetryMaxAttempts() {
+			streamRuntime.finalize("stop", false)
+			recordChatStreamHistory(streamRuntime, historySession)
+			config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "chat.completions", "stream", true, "retry_attempts", attempts, "success_source", "none")
+			return
+		}
+		attempts++
+		config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "parent_message_id", streamRuntime.responseMessageID)
+		retryPow, powErr := h.DS.GetPow(r.Context(), a, 3)
+		if powErr != nil {
+			config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "error", powErr)
+			retryPow = pow
+		}
+		nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadForEmptyOutputRetry(payload, streamRuntime.responseMessageID), retryPow, 3)
+		if err != nil {
+			failChatStreamRetry(streamRuntime, historySession, http.StatusInternalServerError, "Failed to get completion.", "error")
+			config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "error", err)
+			return
+		}
+		if nextResp.StatusCode != http.StatusOK {
+			defer func() { _ = nextResp.Body.Close() }()
+			body, _ := io.ReadAll(nextResp.Body)
+			failChatStreamRetry(streamRuntime, historySession, nextResp.StatusCode, string(body), "error")
+			return
+		}
+		streamRuntime.finalPrompt = usagePromptWithEmptyOutputRetry(finalPrompt, attempts)
+		currentResp = nextResp
+	}
+}
+
+func (h *Handler) prepareChatStreamRuntime(w http.ResponseWriter, resp *http.Response, completionID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, historySession *chatHistorySession) (*chatStreamRuntime, string, bool) {
+	if resp.StatusCode != http.StatusOK {
+		defer func() { _ = resp.Body.Close() }()
+		body, _ := io.ReadAll(resp.Body)
+		if historySession != nil {
+			historySession.error(resp.StatusCode, string(body), "error", "", "")
+		}
+		writeOpenAIError(w, resp.StatusCode, string(body))
+		return nil, "", false
+	}
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache, no-transform")
+	w.Header().Set("Connection", "keep-alive")
+	w.Header().Set("X-Accel-Buffering", "no")
+	rc := http.NewResponseController(w)
+	_, canFlush := w.(http.Flusher)
+	if !canFlush {
+		config.Logger.Warn("[stream] response writer does not support flush; streaming may be buffered")
+	}
+	initialType := "text"
+	if thinkingEnabled {
+		initialType = "thinking"
+	}
+	streamRuntime := newChatStreamRuntime(
+		w, rc, canFlush, completionID, time.Now().Unix(), model, finalPrompt,
+		thinkingEnabled, searchEnabled, h.compatStripReferenceMarkers(), toolNames,
+		len(toolNames) > 0, h.toolcallFeatureMatchEnabled() && h.toolcallEarlyEmitHighConfidence(),
+	)
+	return streamRuntime, initialType, true
+}
+
+func (h *Handler) consumeChatStreamAttempt(r *http.Request, resp *http.Response, streamRuntime *chatStreamRuntime, initialType string, thinkingEnabled bool, historySession *chatHistorySession, allowDeferEmpty bool) (bool, bool) {
+	defer func() { _ = resp.Body.Close() }()
+	finalReason := "stop"
+	streamengine.ConsumeSSE(streamengine.ConsumeConfig{
+		Context:             r.Context(),
+		Body:                resp.Body,
+		ThinkingEnabled:     thinkingEnabled,
+		InitialType:         initialType,
+		KeepAliveInterval:   time.Duration(dsprotocol.KeepAliveTimeout) * time.Second,
+		IdleTimeout:         time.Duration(dsprotocol.StreamIdleTimeout) * time.Second,
+		MaxKeepAliveNoInput: dsprotocol.MaxKeepaliveCount,
+	}, streamengine.ConsumeHooks{
+		OnKeepAlive: streamRuntime.sendKeepAlive,
+		OnParsed: func(parsed sse.LineResult) streamengine.ParsedDecision {
+			decision := streamRuntime.onParsed(parsed)
+			if historySession != nil {
+				historySession.progress(streamRuntime.thinking.String(), streamRuntime.text.String())
+			}
+			return decision
+		},
+		OnFinalize: func(reason streamengine.StopReason, _ error) {
+			if string(reason) == "content_filter" {
+				finalReason = "content_filter"
+			}
+		},
+		OnContextDone: func() {
+			if historySession != nil {
+				historySession.stopped(streamRuntime.thinking.String(), streamRuntime.text.String(), string(streamengine.StopReasonContextCancelled))
+			}
+		},
+	})
+	terminalWritten := streamRuntime.finalize(finalReason, allowDeferEmpty && finalReason != "content_filter")
+	if terminalWritten {
+		recordChatStreamHistory(streamRuntime, historySession)
+		return true, false
+	}
+	return false, true
+}
+
+func recordChatStreamHistory(streamRuntime *chatStreamRuntime, historySession *chatHistorySession) {
+	if historySession == nil {
+		return
+	}
+	if streamRuntime.finalErrorMessage != "" {
+		historySession.error(streamRuntime.finalErrorStatus, streamRuntime.finalErrorMessage, streamRuntime.finalErrorCode, streamRuntime.thinking.String(), streamRuntime.text.String())
+		return
+	}
+	historySession.success(http.StatusOK, streamRuntime.finalThinking, streamRuntime.finalText, streamRuntime.finalFinishReason, streamRuntime.finalUsage)
+}
+
+func failChatStreamRetry(streamRuntime *chatStreamRuntime, historySession *chatHistorySession, status int, message, code string) {
+	streamRuntime.sendFailedChunk(status, message, code)
+	if historySession != nil {
+		historySession.error(status, message, code, streamRuntime.thinking.String(), streamRuntime.text.String())
+	}
+}
+
+func logChatStreamTerminal(streamRuntime *chatStreamRuntime, attempts int) {
+	source := "first_attempt"
+	if attempts > 0 {
+		source = "synthetic_retry"
+	}
+	if streamRuntime.finalErrorMessage != "" {
+		config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "chat.completions", "stream", true, "retry_attempts", attempts, "success_source", "none", "error_code", streamRuntime.finalErrorCode)
+		return
+	}
+	config.Logger.Info("[openai_empty_retry] completed", "surface", "chat.completions", "stream", true, "retry_attempts", attempts, "success_source", source)
+}
diff --git a/internal/httpapi/openai/chat/handler.go b/internal/httpapi/openai/chat/handler.go
index 81d1d22..da6c2ab 100644
--- a/internal/httpapi/openai/chat/handler.go
+++ b/internal/httpapi/openai/chat/handler.go
@@ -46,7 +46,16 @@ func (h *Handler) applyHistorySplit(ctx context.Context, a *auth.RequestAuth, st
 	if h == nil {
 		return stdReq, nil
 	}
-	return history.Service{Store: h.Store, DS: h.DS}.Apply(ctx, a, stdReq)
+	stdReq = shared.ApplyThinkingInjection(h.Store, stdReq)
+	svc := history.Service{Store: h.Store, DS: h.DS}
+	out, err := svc.ApplyCurrentInputFile(ctx, a, stdReq)
+	if err != nil {
+		return stdReq, err
+	}
+	if out.CurrentInputFileApplied {
+		return out, nil
+	}
+	return svc.Apply(ctx, a, out)
 }
 
 func (h *Handler) preprocessInlineFileInputs(ctx context.Context, a *auth.RequestAuth, req map[string]any) error {
@@ -114,6 +123,22 @@ func writeUpstreamEmptyOutputError(w http.ResponseWriter, text, thinking string,
 	return shared.WriteUpstreamEmptyOutputError(w, text, thinking, contentFilter)
 }
 
+func emptyOutputRetryEnabled() bool {
+	return shared.EmptyOutputRetryEnabled()
+}
+
+func emptyOutputRetryMaxAttempts() int {
+	return shared.EmptyOutputRetryMaxAttempts()
+}
+
+func clonePayloadForEmptyOutputRetry(payload map[string]any, parentMessageID int) map[string]any {
+	return shared.ClonePayloadForEmptyOutputRetry(payload, parentMessageID)
+}
+
+func usagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string {
+	return shared.UsagePromptWithEmptyOutputRetry(originalPrompt, retryAttempts)
+}
+
 func formatIncrementalStreamToolCallDeltas(deltas []toolstream.ToolCallDelta, ids map[int]string) []map[string]any {
 	return shared.FormatIncrementalStreamToolCallDeltas(deltas, ids)
 }
@@ -125,3 +150,7 @@ func filterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta,
 func formatFinalStreamToolCallsWithStableIDs(calls []toolcall.ParsedToolCall, ids map[int]string) []map[string]any {
 	return shared.FormatFinalStreamToolCallsWithStableIDs(calls, ids)
 }
+
+func detectAssistantToolCalls(text, exposedThinking, detectionThinking string, toolNames []string) toolcall.ToolCallParseResult {
+	return shared.DetectAssistantToolCalls(text, exposedThinking, detectionThinking, toolNames)
+}
diff --git a/internal/httpapi/openai/chat/handler_chat.go b/internal/httpapi/openai/chat/handler_chat.go
index 4a6d01a..4ee77dc 100644
--- a/internal/httpapi/openai/chat/handler_chat.go
+++ b/internal/httpapi/openai/chat/handler_chat.go
@@ -22,6 +22,10 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
 		h.handleVercelStreamRelease(w, r)
 		return
 	}
+	if isVercelStreamPowRequest(r) {
+		h.handleVercelStreamPow(w, r)
+		return
+	}
 	if isVercelStreamPrepareRequest(r) {
 		h.handleVercelStreamPrepare(w, r)
 		return
@@ -105,10 +109,10 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 	if stdReq.Stream {
-		h.handleStream(w, r, resp, sessionID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames, historySession)
+		h.handleStreamWithRetry(w, r, a, resp, payload, pow, sessionID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames, historySession)
 		return
 	}
-	h.handleNonStream(w, resp, sessionID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames, historySession)
+	h.handleNonStreamWithRetry(w, r.Context(), a, resp, payload, pow, sessionID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames, historySession)
 }
 
 func (h *Handler) autoDeleteRemoteSession(ctx context.Context, a *auth.RequestAuth, sessionID string) {
@@ -158,11 +162,13 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, resp *http.Response, co
 
 	stripReferenceMarkers := h.compatStripReferenceMarkers()
 	finalThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers)
+	finalToolDetectionThinking := cleanVisibleOutput(result.ToolDetectionThinking, stripReferenceMarkers)
 	finalText := cleanVisibleOutput(result.Text, stripReferenceMarkers)
 	if searchEnabled {
 		finalText = replaceCitationMarkersWithLinks(finalText, result.CitationLinks)
 	}
-	if shouldWriteUpstreamEmptyOutputError(finalText) {
+	detected := detectAssistantToolCalls(finalText, finalThinking, finalToolDetectionThinking, toolNames)
+	if shouldWriteUpstreamEmptyOutputError(finalText) && len(detected.Calls) == 0 {
 		status, message, code := upstreamEmptyOutputDetail(result.ContentFilter, finalText, finalThinking)
 		if historySession != nil {
 			historySession.error(status, message, code, finalThinking, finalText)
@@ -170,7 +176,7 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, resp *http.Response, co
 		writeUpstreamEmptyOutputError(w, finalText, finalThinking, result.ContentFilter)
 		return
 	}
-	respBody := openaifmt.BuildChatCompletion(completionID, model, finalPrompt, finalThinking, finalText, toolNames)
+	respBody := openaifmt.BuildChatCompletionWithToolCalls(completionID, model, finalPrompt, finalThinking, finalText, detected.Calls)
 	finishReason := "stop"
 	if choices, ok := respBody["choices"].([]map[string]any); ok && len(choices) > 0 {
 		if fr, _ := choices[0]["finish_reason"].(string); strings.TrimSpace(fr) != "" {
@@ -249,9 +255,9 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
 		},
 		OnFinalize: func(reason streamengine.StopReason, _ error) {
 			if string(reason) == "content_filter" {
-				streamRuntime.finalize("content_filter")
+				streamRuntime.finalize("content_filter", false)
 			} else {
-				streamRuntime.finalize("stop")
+				streamRuntime.finalize("stop", false)
 			}
 			if historySession == nil {
 				return
diff --git a/internal/httpapi/openai/chat/handler_toolcall_test.go b/internal/httpapi/openai/chat/handler_toolcall_test.go
index f949a46..cf74192 100644
--- a/internal/httpapi/openai/chat/handler_toolcall_test.go
+++ b/internal/httpapi/openai/chat/handler_toolcall_test.go
@@ -142,6 +142,65 @@ func TestHandleNonStreamReturns429WhenUpstreamHasOnlyThinking(t *testing.T) {
 	}
 }
 
+func TestHandleNonStreamPromotesThinkingToolCallsWhenTextEmpty(t *testing.T) {
+	h := &Handler{}
+	resp := makeSSEHTTPResponse(
+		`data: {"p":"response/thinking_content","v":"<tool_calls><invoke name=\"search\"><parameter name=\"q\">from-thinking</parameter></invoke></tool_calls>"}`,
+		`data: [DONE]`,
+	)
+	rec := httptest.NewRecorder()
+
+	h.handleNonStream(rec, resp, "cid-thinking-tool", "deepseek-v4-pro", "prompt", true, false, []string{"search"}, nil)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200 for thinking tool calls, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	out := decodeJSONBody(t, rec.Body.String())
+	choices, _ := out["choices"].([]any)
+	if len(choices) == 0 {
+		t.Fatalf("expected choices, got %#v", out)
+	}
+	choice, _ := choices[0].(map[string]any)
+	if got := asString(choice["finish_reason"]); got != "tool_calls" {
+		t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"])
+	}
+	message, _ := choice["message"].(map[string]any)
+	toolCalls, _ := message["tool_calls"].([]any)
+	if len(toolCalls) != 1 {
+		t.Fatalf("expected one tool call, got %#v", message["tool_calls"])
+	}
+	if content, exists := message["content"]; !exists || content != nil {
+		t.Fatalf("expected content nil when tool call promoted, got %#v", message["content"])
+	}
+}
+
+func TestHandleNonStreamPromotesHiddenThinkingDSMLToolCallsWhenTextEmpty(t *testing.T) {
+	h := &Handler{}
+	resp := makeSSEHTTPResponse(
+		`data: {"p":"response/thinking_content","v":"<|DSML|tool_calls><|DSML|invoke name=\"search\"><|DSML|parameter name=\"q\">from-hidden-thinking</|DSML|parameter></|DSML|invoke></|DSML|tool_calls>"}`,
+		`data: [DONE]`,
+	)
+	rec := httptest.NewRecorder()
+
+	h.handleNonStream(rec, resp, "cid-hidden-thinking-tool", "deepseek-v4-pro", "prompt", false, false, []string{"search"}, nil)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200 for hidden thinking tool calls, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	out := decodeJSONBody(t, rec.Body.String())
+	choices, _ := out["choices"].([]any)
+	choice, _ := choices[0].(map[string]any)
+	message, _ := choice["message"].(map[string]any)
+	if _, ok := message["reasoning_content"]; ok {
+		t.Fatalf("expected hidden thinking not to be exposed, got %#v", message)
+	}
+	toolCalls, _ := message["tool_calls"].([]any)
+	if len(toolCalls) != 1 {
+		t.Fatalf("expected one hidden-thinking tool call, got %#v", message["tool_calls"])
+	}
+	if got := asString(choice["finish_reason"]); got != "tool_calls" {
+		t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"])
+	}
+}
+
 func TestHandleStreamToolsPlainTextStreamsBeforeFinish(t *testing.T) {
 	h := &Handler{}
 	resp := makeSSEHTTPResponse(
@@ -214,6 +273,76 @@ func TestHandleStreamIncompleteCapturedToolJSONFlushesAsTextOnFinalize(t *testin
 	}
 }
 
+func TestHandleStreamPromotesThinkingToolCallsOnFinalizeWithoutMidstreamIntercept(t *testing.T) {
+	h := &Handler{}
+	resp := makeSSEHTTPResponse(
+		`data: {"p":"response/thinking_content","v":"<tool_calls><invoke name=\"search\"><parameter name=\"q\">from-thinking</parameter></invoke></tool_calls>"}`,
+		`data: [DONE]`,
+	)
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
+
+	h.handleStream(rec, req, resp, "cid-thinking-stream", "deepseek-v4-pro", "prompt", true, false, []string{"search"}, nil)
+
+	frames, done := parseSSEDataFrames(t, rec.Body.String())
+	if !done {
+		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
+	}
+	if !streamHasToolCallsDelta(frames) {
+		t.Fatalf("expected tool_calls delta from finalize fallback, body=%s", rec.Body.String())
+	}
+	reasoningSeen := false
+	for _, frame := range frames {
+		choices, _ := frame["choices"].([]any)
+		for _, item := range choices {
+			choice, _ := item.(map[string]any)
+			delta, _ := choice["delta"].(map[string]any)
+			if asString(delta["reasoning_content"]) != "" {
+				reasoningSeen = true
+			}
+		}
+	}
+	if !reasoningSeen {
+		t.Fatalf("expected reasoning_content to stream before finalize fallback, body=%s", rec.Body.String())
+	}
+	if streamFinishReason(frames) != "tool_calls" {
+		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
+	}
+}
+
+func TestHandleStreamPromotesHiddenThinkingDSMLToolCallsOnFinalize(t *testing.T) {
+	h := &Handler{}
+	resp := makeSSEHTTPResponse(
+		`data: {"p":"response/thinking_content","v":"<|DSML|tool_calls><|DSML|invoke name=\"search\"><|DSML|parameter name=\"q\">from-hidden-thinking</|DSML|parameter></|DSML|invoke></|DSML|tool_calls>"}`,
+		`data: [DONE]`,
+	)
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
+
+	h.handleStream(rec, req, resp, "cid-hidden-thinking-stream", "deepseek-v4-pro", "prompt", false, false, []string{"search"}, nil)
+
+	frames, done := parseSSEDataFrames(t, rec.Body.String())
+	if !done {
+		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
+	}
+	if !streamHasToolCallsDelta(frames) {
+		t.Fatalf("expected tool_calls delta from hidden thinking fallback, body=%s", rec.Body.String())
+	}
+	for _, frame := range frames {
+		choices, _ := frame["choices"].([]any)
+		for _, item := range choices {
+			choice, _ := item.(map[string]any)
+			delta, _ := choice["delta"].(map[string]any)
+			if asString(delta["reasoning_content"]) != "" {
+				t.Fatalf("did not expect hidden reasoning_content delta, body=%s", rec.Body.String())
+			}
+		}
+	}
+	if streamFinishReason(frames) != "tool_calls" {
+		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
+	}
+}
+
 func TestHandleStreamEmitsDistinctToolCallIDsAcrossSeparateToolBlocks(t *testing.T) {
 	h := &Handler{}
 	resp := makeSSEHTTPResponse(
diff --git a/internal/httpapi/openai/chat/test_helpers_test.go b/internal/httpapi/openai/chat/test_helpers_test.go
index 0423f4e..e382a37 100644
--- a/internal/httpapi/openai/chat/test_helpers_test.go
+++ b/internal/httpapi/openai/chat/test_helpers_test.go
@@ -20,6 +20,10 @@ type mockOpenAIConfig struct {
 	embedProv           string
 	historySplitEnabled bool
 	historySplitTurns   int
+	currentInputEnabled bool
+	currentInputMin     int
+	thinkingInjection   *bool
+	thinkingPrompt      string
 }
 
 func (m mockOpenAIConfig) ModelAliases() map[string]string { return m.aliases }
@@ -45,6 +49,17 @@ func (m mockOpenAIConfig) HistorySplitTriggerAfterTurns() int {
 	}
 	return m.historySplitTurns
 }
+func (m mockOpenAIConfig) CurrentInputFileEnabled() bool { return m.currentInputEnabled }
+func (m mockOpenAIConfig) CurrentInputFileMinChars() int {
+	return m.currentInputMin
+}
+func (m mockOpenAIConfig) ThinkingInjectionEnabled() bool {
+	if m.thinkingInjection == nil {
+		return false
+	}
+	return *m.thinkingInjection
+}
+func (m mockOpenAIConfig) ThinkingInjectionPrompt() string { return m.thinkingPrompt }
 
 type streamStatusAuthStub struct{}
 
diff --git a/internal/httpapi/openai/chat/vercel_stream.go b/internal/httpapi/openai/chat/vercel_stream.go
index 1a3c00d..2a59410 100644
--- a/internal/httpapi/openai/chat/vercel_stream.go
+++ b/internal/httpapi/openai/chat/vercel_stream.go
@@ -150,6 +150,44 @@ func (h *Handler) handleVercelStreamRelease(w http.ResponseWriter, r *http.Reque
 	writeJSON(w, http.StatusOK, map[string]any{"success": true})
 }
 
+func (h *Handler) handleVercelStreamPow(w http.ResponseWriter, r *http.Request) {
+	if !config.IsVercel() {
+		http.NotFound(w, r)
+		return
+	}
+	internalSecret := vercelInternalSecret()
+	internalToken := strings.TrimSpace(r.Header.Get("X-Ds2-Internal-Token"))
+	if internalSecret == "" || subtle.ConstantTimeCompare([]byte(internalToken), []byte(internalSecret)) != 1 {
+		writeOpenAIError(w, http.StatusUnauthorized, "unauthorized internal request")
+		return
+	}
+
+	var req map[string]any
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		writeOpenAIError(w, http.StatusBadRequest, "invalid json")
+		return
+	}
+	leaseID, _ := req["lease_id"].(string)
+	leaseID = strings.TrimSpace(leaseID)
+	if leaseID == "" {
+		writeOpenAIError(w, http.StatusBadRequest, "lease_id is required")
+		return
+	}
+	leaseAuth := h.lookupStreamLeaseAuth(leaseID)
+	if leaseAuth == nil {
+		writeOpenAIError(w, http.StatusNotFound, "stream lease not found or expired")
+		return
+	}
+	powHeader, err := h.DS.GetPow(r.Context(), leaseAuth, 3)
+	if err != nil {
+		writeOpenAIError(w, http.StatusInternalServerError, "Failed to get PoW.")
+		return
+	}
+	writeJSON(w, http.StatusOK, map[string]any{
+		"pow_header": powHeader,
+	})
+}
+
 func isVercelStreamPrepareRequest(r *http.Request) bool {
 	if r == nil {
 		return false
@@ -164,6 +202,13 @@ func isVercelStreamReleaseRequest(r *http.Request) bool {
 	return strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1"
 }
 
+func isVercelStreamPowRequest(r *http.Request) bool {
+	if r == nil {
+		return false
+	}
+	return strings.TrimSpace(r.URL.Query().Get("__stream_pow")) == "1"
+}
+
 func vercelInternalSecret() string {
 	if v := strings.TrimSpace(os.Getenv("DS2API_VERCEL_INTERNAL_SECRET")); v != "" {
 		return v
@@ -199,6 +244,20 @@ func (h *Handler) holdStreamLease(a *auth.RequestAuth) string {
 	return leaseID
 }
 
+func (h *Handler) lookupStreamLeaseAuth(leaseID string) *auth.RequestAuth {
+	leaseID = strings.TrimSpace(leaseID)
+	if leaseID == "" {
+		return nil
+	}
+	h.leaseMu.Lock()
+	lease, ok := h.streamLeases[leaseID]
+	h.leaseMu.Unlock()
+	if !ok || time.Now().After(lease.ExpiresAt) {
+		return nil
+	}
+	return lease.Auth
+}
+
 func (h *Handler) releaseStreamLease(leaseID string) bool {
 	leaseID = strings.TrimSpace(leaseID)
 	if leaseID == "" {
diff --git a/internal/httpapi/openai/deps_injection_test.go b/internal/httpapi/openai/deps_injection_test.go
index 0d906aa..1f199bb 100644
--- a/internal/httpapi/openai/deps_injection_test.go
+++ b/internal/httpapi/openai/deps_injection_test.go
@@ -16,6 +16,10 @@ type mockOpenAIConfig struct {
 	embedProv           string
 	historySplitEnabled bool
 	historySplitTurns   int
+	currentInputEnabled bool
+	currentInputMin     int
+	thinkingInjection   *bool
+	thinkingPrompt      string
 }
 
 func (m mockOpenAIConfig) ModelAliases() map[string]string { return m.aliases }
@@ -41,6 +45,17 @@ func (m mockOpenAIConfig) HistorySplitTriggerAfterTurns() int {
 	}
 	return m.historySplitTurns
 }
+func (m mockOpenAIConfig) CurrentInputFileEnabled() bool { return m.currentInputEnabled }
+func (m mockOpenAIConfig) CurrentInputFileMinChars() int {
+	return m.currentInputMin
+}
+func (m mockOpenAIConfig) ThinkingInjectionEnabled() bool {
+	if m.thinkingInjection == nil {
+		return false
+	}
+	return *m.thinkingInjection
+}
+func (m mockOpenAIConfig) ThinkingInjectionPrompt() string { return m.thinkingPrompt }
 
 func TestNormalizeOpenAIChatRequestWithConfigInterface(t *testing.T) {
 	cfg := mockOpenAIConfig{
@@ -65,6 +80,28 @@ func TestNormalizeOpenAIChatRequestWithConfigInterface(t *testing.T) {
 	}
 }
 
+func TestNormalizeOpenAIChatRequestDisablesThinkingForNoThinkingModel(t *testing.T) {
+	cfg := mockOpenAIConfig{wideInput: true}
+	req := map[string]any{
+		"model":            "deepseek-v4-pro-nothinking",
+		"messages":         []any{map[string]any{"role": "user", "content": "hello"}},
+		"reasoning_effort": "high",
+	}
+	out, err := promptcompat.NormalizeOpenAIChatRequest(cfg, req, "")
+	if err != nil {
+		t.Fatalf("promptcompat.NormalizeOpenAIChatRequest error: %v", err)
+	}
+	if out.ResolvedModel != "deepseek-v4-pro-nothinking" {
+		t.Fatalf("resolved model mismatch: got=%q", out.ResolvedModel)
+	}
+	if out.Thinking {
+		t.Fatalf("expected nothinking model to force thinking off")
+	}
+	if out.Search {
+		t.Fatalf("expected search=false for deepseek-v4-pro-nothinking, got=%v", out.Search)
+	}
+}
+
 func TestNormalizeOpenAIResponsesRequestWideInputPolicyFromInterface(t *testing.T) {
 	req := map[string]any{
 		"model": "deepseek-v4-flash",
diff --git a/internal/httpapi/openai/history/current_input_file.go b/internal/httpapi/openai/history/current_input_file.go
new file mode 100644
index 0000000..d0cf990
--- /dev/null
+++ b/internal/httpapi/openai/history/current_input_file.go
@@ -0,0 +1,94 @@
+package history
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"strings"
+
+	"ds2api/internal/auth"
+	dsclient "ds2api/internal/deepseek/client"
+	"ds2api/internal/httpapi/openai/shared"
+	"ds2api/internal/promptcompat"
+)
+
+const (
+	currentInputFilename    = "IGNORE.txt"
+	currentInputContentType = "text/plain; charset=utf-8"
+	currentInputPurpose     = "assistants"
+)
+
+func (s Service) ApplyCurrentInputFile(ctx context.Context, a *auth.RequestAuth, stdReq promptcompat.StandardRequest) (promptcompat.StandardRequest, error) {
+	if s.DS == nil || s.Store == nil || a == nil || !s.Store.CurrentInputFileEnabled() {
+		return stdReq, nil
+	}
+	threshold := s.Store.CurrentInputFileMinChars()
+
+	index, text := latestUserInputForFile(stdReq.Messages)
+	if index < 0 {
+		return stdReq, nil
+	}
+	historySplitReached := s.Store.HistorySplitEnabled() && wouldSplitHistory(stdReq.Messages, s.Store.HistorySplitTriggerAfterTurns())
+	if len([]rune(text)) < threshold && !historySplitReached {
+		return stdReq, nil
+	}
+	fileText := promptcompat.BuildOpenAICurrentInputContextTranscript(stdReq.Messages)
+	if strings.TrimSpace(fileText) == "" {
+		return stdReq, errors.New("current user input file produced empty transcript")
+	}
+
+	result, err := s.DS.UploadFile(ctx, a, dsclient.UploadFileRequest{
+		Filename:    currentInputFilename,
+		ContentType: currentInputContentType,
+		Purpose:     currentInputPurpose,
+		Data:        []byte(fileText),
+	}, 3)
+	if err != nil {
+		return stdReq, fmt.Errorf("upload current user input file: %w", err)
+	}
+	fileID := strings.TrimSpace(result.ID)
+	if fileID == "" {
+		return stdReq, errors.New("upload current user input file returned empty file id")
+	}
+
+	messages := []any{
+		map[string]any{
+			"role":    "user",
+			"content": currentInputFilePrompt(),
+		},
+	}
+
+	stdReq.Messages = messages
+	stdReq.CurrentInputFileApplied = true
+	stdReq.RefFileIDs = prependUniqueRefFileID(stdReq.RefFileIDs, fileID)
+	stdReq.FinalPrompt, stdReq.ToolNames = promptcompat.BuildOpenAIPrompt(messages, stdReq.ToolsRaw, "", stdReq.ToolChoice, stdReq.Thinking)
+	return stdReq, nil
+}
+
+func latestUserInputForFile(messages []any) (int, string) {
+	for i := len(messages) - 1; i >= 0; i-- {
+		msg, ok := messages[i].(map[string]any)
+		if !ok {
+			continue
+		}
+		role := strings.ToLower(strings.TrimSpace(shared.AsString(msg["role"])))
+		if role != "user" {
+			continue
+		}
+		text := promptcompat.NormalizeOpenAIContentForPrompt(msg["content"])
+		if strings.TrimSpace(text) == "" {
+			return -1, ""
+		}
+		return i, text
+	}
+	return -1, ""
+}
+
+func wouldSplitHistory(messages []any, triggerAfterTurns int) bool {
+	_, historyMessages := SplitOpenAIHistoryMessages(messages, triggerAfterTurns)
+	return len(historyMessages) > 0
+}
+
+func currentInputFilePrompt() string {
+	return "The current request and prior conversation context have already been provided. Answer the latest user request directly."
+}
diff --git a/internal/httpapi/openai/history/history_split.go b/internal/httpapi/openai/history/history_split.go
index 96775ef..de7bf51 100644
--- a/internal/httpapi/openai/history/history_split.go
+++ b/internal/httpapi/openai/history/history_split.go
@@ -24,7 +24,7 @@ type Service struct {
 }
 
 func (s Service) Apply(ctx context.Context, a *auth.RequestAuth, stdReq promptcompat.StandardRequest) (promptcompat.StandardRequest, error) {
-	if s.DS == nil || s.Store == nil || a == nil {
+	if s.DS == nil || s.Store == nil || a == nil || !s.Store.HistorySplitEnabled() {
 		return stdReq, nil
 	}
 
diff --git a/internal/httpapi/openai/history_split_test.go b/internal/httpapi/openai/history_split_test.go
index c6059d7..2fa6080 100644
--- a/internal/httpapi/openai/history_split_test.go
+++ b/internal/httpapi/openai/history_split_test.go
@@ -76,7 +76,7 @@ func TestBuildOpenAIHistoryTranscriptUsesInjectedFileWrapper(t *testing.T) {
 	if !strings.Contains(transcript, "[reasoning_content]") || !strings.Contains(transcript, "hidden reasoning") {
 		t.Fatalf("expected reasoning block preserved, got %q", transcript)
 	}
-	if !strings.Contains(transcript, "<tool_calls>") {
+	if !strings.Contains(transcript, "<|DSML|tool_calls>") {
 		t.Fatalf("expected tool calls preserved, got %q", transcript)
 	}
 	if !strings.HasSuffix(transcript, "\n[file name]: IGNORE\n[file content begin]\n") {
@@ -149,6 +149,219 @@ func TestApplyHistorySplitSkipsFirstTurn(t *testing.T) {
 	}
 }
 
+func TestApplyThinkingInjectionAppendsLatestUserPrompt(t *testing.T) {
+	ds := &inlineUploadDSStub{}
+	h := &openAITestSurface{
+		Store: mockOpenAIConfig{
+			wideInput:           true,
+			historySplitEnabled: true,
+			historySplitTurns:   1,
+			thinkingInjection:   boolPtr(true),
+		},
+		DS: ds,
+	}
+	req := map[string]any{
+		"model": "deepseek-v4-flash",
+		"messages": []any{
+			map[string]any{"role": "user", "content": "hello"},
+		},
+	}
+	stdReq, err := promptcompat.NormalizeOpenAIChatRequest(h.Store, req, "")
+	if err != nil {
+		t.Fatalf("normalize failed: %v", err)
+	}
+
+	out, err := h.applyHistorySplit(context.Background(), &auth.RequestAuth{DeepSeekToken: "token"}, stdReq)
+	if err != nil {
+		t.Fatalf("apply thinking injection failed: %v", err)
+	}
+	if len(ds.uploadCalls) != 0 {
+		t.Fatalf("expected no upload for first short turn, got %d", len(ds.uploadCalls))
+	}
+	if !strings.Contains(out.FinalPrompt, "hello\n\n"+promptcompat.ThinkingInjectionMarker) {
+		t.Fatalf("expected thinking injection after latest user message, got %s", out.FinalPrompt)
+	}
+}
+
+func TestApplyThinkingInjectionUsesCustomPrompt(t *testing.T) {
+	ds := &inlineUploadDSStub{}
+	h := &openAITestSurface{
+		Store: mockOpenAIConfig{
+			wideInput:         true,
+			thinkingInjection: boolPtr(true),
+			thinkingPrompt:    "custom thinking format",
+		},
+		DS: ds,
+	}
+	req := map[string]any{
+		"model": "deepseek-v4-flash",
+		"messages": []any{
+			map[string]any{"role": "user", "content": "hello"},
+		},
+	}
+	stdReq, err := promptcompat.NormalizeOpenAIChatRequest(h.Store, req, "")
+	if err != nil {
+		t.Fatalf("normalize failed: %v", err)
+	}
+
+	out, err := h.applyHistorySplit(context.Background(), &auth.RequestAuth{DeepSeekToken: "token"}, stdReq)
+	if err != nil {
+		t.Fatalf("apply thinking injection failed: %v", err)
+	}
+	if !strings.Contains(out.FinalPrompt, "hello\n\ncustom thinking format") {
+		t.Fatalf("expected custom thinking injection after latest user message, got %s", out.FinalPrompt)
+	}
+}
+
+func TestApplyHistorySplitDirectPassThroughWhenBothSplitsDisabled(t *testing.T) {
+	ds := &inlineUploadDSStub{}
+	h := &openAITestSurface{
+		Store: mockOpenAIConfig{
+			wideInput:           true,
+			historySplitEnabled: false,
+			currentInputEnabled: false,
+		},
+		DS: ds,
+	}
+	req := map[string]any{
+		"model":    "deepseek-v4-flash",
+		"messages": historySplitTestMessages(),
+	}
+	stdReq, err := promptcompat.NormalizeOpenAIChatRequest(h.Store, req, "")
+	if err != nil {
+		t.Fatalf("normalize failed: %v", err)
+	}
+
+	out, err := h.applyHistorySplit(context.Background(), &auth.RequestAuth{DeepSeekToken: "token"}, stdReq)
+	if err != nil {
+		t.Fatalf("apply history split failed: %v", err)
+	}
+	if len(ds.uploadCalls) != 0 {
+		t.Fatalf("expected no uploads when both split modes are disabled, got %d", len(ds.uploadCalls))
+	}
+	if out.CurrentInputFileApplied || out.HistoryText != "" {
+		t.Fatalf("expected direct pass-through, got current_input=%v history=%q", out.CurrentInputFileApplied, out.HistoryText)
+	}
+	if !strings.Contains(out.FinalPrompt, "first user turn") || !strings.Contains(out.FinalPrompt, "latest user turn") {
+		t.Fatalf("expected original prompt context to stay inline, got %s", out.FinalPrompt)
+	}
+}
+
+func TestApplyCurrentInputFileUploadsFirstTurnWithInjectedWrapper(t *testing.T) {
+	ds := &inlineUploadDSStub{}
+	h := &openAITestSurface{
+		Store: mockOpenAIConfig{
+			wideInput:           true,
+			historySplitEnabled: true,
+			historySplitTurns:   1,
+			currentInputEnabled: true,
+			currentInputMin:     10,
+			thinkingInjection:   boolPtr(true),
+		},
+		DS: ds,
+	}
+	req := map[string]any{
+		"model": "deepseek-v4-flash",
+		"messages": []any{
+			map[string]any{"role": "user", "content": "first turn content that is long enough"},
+		},
+	}
+	stdReq, err := promptcompat.NormalizeOpenAIChatRequest(h.Store, req, "")
+	if err != nil {
+		t.Fatalf("normalize failed: %v", err)
+	}
+
+	out, err := h.applyHistorySplit(context.Background(), &auth.RequestAuth{DeepSeekToken: "token"}, stdReq)
+	if err != nil {
+		t.Fatalf("apply current input file failed: %v", err)
+	}
+	if len(ds.uploadCalls) != 1 {
+		t.Fatalf("expected 1 current input upload, got %d", len(ds.uploadCalls))
+	}
+	upload := ds.uploadCalls[0]
+	if upload.Filename != "IGNORE.txt" {
+		t.Fatalf("unexpected upload filename: %q", upload.Filename)
+	}
+	uploadedText := string(upload.Data)
+	if !strings.HasPrefix(uploadedText, "[file content end]\n\n") {
+		t.Fatalf("expected injected file wrapper prefix, got %q", uploadedText)
+	}
+	if !strings.Contains(uploadedText, "<｜begin▁of▁sentence｜><｜User｜>first turn content that is long enough") {
+		t.Fatalf("expected serialized current user turn markers, got %q", uploadedText)
+	}
+	if !strings.Contains(uploadedText, promptcompat.ThinkingInjectionMarker) {
+		t.Fatalf("expected thinking injection in current input file, got %q", uploadedText)
+	}
+	if !strings.HasSuffix(uploadedText, "\n[file name]: IGNORE\n[file content begin]\n") {
+		t.Fatalf("expected injected file wrapper suffix, got %q", uploadedText)
+	}
+	if strings.Contains(out.FinalPrompt, "first turn content that is long enough") {
+		t.Fatalf("expected current input text to be replaced in live prompt, got %s", out.FinalPrompt)
+	}
+	if strings.Contains(out.FinalPrompt, "CURRENT_USER_INPUT.txt") || strings.Contains(out.FinalPrompt, "IGNORE.txt") || strings.Contains(out.FinalPrompt, "Read that file") {
+		t.Fatalf("expected live prompt not to instruct file reads, got %s", out.FinalPrompt)
+	}
+	if !strings.Contains(out.FinalPrompt, "Answer the latest user request directly.") {
+		t.Fatalf("expected neutral continuation instruction in live prompt, got %s", out.FinalPrompt)
+	}
+	if len(out.RefFileIDs) != 1 || out.RefFileIDs[0] != "file-inline-1" {
+		t.Fatalf("expected current input file id in ref_file_ids, got %#v", out.RefFileIDs)
+	}
+}
+
+func TestApplyCurrentInputFileReplacesHistorySplitWithFullContextFile(t *testing.T) {
+	ds := &inlineUploadDSStub{}
+	h := &openAITestSurface{
+		Store: mockOpenAIConfig{
+			wideInput:           true,
+			historySplitEnabled: true,
+			historySplitTurns:   1,
+			currentInputEnabled: true,
+			currentInputMin:     1000,
+			thinkingInjection:   boolPtr(true),
+		},
+		DS: ds,
+	}
+	req := map[string]any{
+		"model":    "deepseek-v4-flash",
+		"messages": historySplitTestMessages(),
+	}
+	stdReq, err := promptcompat.NormalizeOpenAIChatRequest(h.Store, req, "")
+	if err != nil {
+		t.Fatalf("normalize failed: %v", err)
+	}
+
+	out, err := h.applyHistorySplit(context.Background(), &auth.RequestAuth{DeepSeekToken: "token"}, stdReq)
+	if err != nil {
+		t.Fatalf("apply current input file failed: %v", err)
+	}
+	if !out.CurrentInputFileApplied {
+		t.Fatalf("expected current input file to replace history split")
+	}
+	if len(ds.uploadCalls) != 1 {
+		t.Fatalf("expected one current input upload, got %d", len(ds.uploadCalls))
+	}
+	upload := ds.uploadCalls[0]
+	if upload.Filename != "IGNORE.txt" {
+		t.Fatalf("expected IGNORE.txt upload, got %q", upload.Filename)
+	}
+	uploadedText := string(upload.Data)
+	for _, want := range []string{"system instructions", "first user turn", "hidden reasoning", "tool result", "latest user turn", promptcompat.ThinkingInjectionMarker} {
+		if !strings.Contains(uploadedText, want) {
+			t.Fatalf("expected full context file to contain %q, got %q", want, uploadedText)
+		}
+	}
+	if out.HistoryText != "" {
+		t.Fatalf("expected no HISTORY transcript when current input file replaces split, got %q", out.HistoryText)
+	}
+	if strings.Contains(out.FinalPrompt, "first user turn") || strings.Contains(out.FinalPrompt, "latest user turn") || strings.Contains(out.FinalPrompt, "CURRENT_USER_INPUT.txt") || strings.Contains(out.FinalPrompt, "IGNORE.txt") || strings.Contains(out.FinalPrompt, "Read that file") {
+		t.Fatalf("expected live prompt to use only a neutral continuation instruction, got %s", out.FinalPrompt)
+	}
+	if !strings.Contains(out.FinalPrompt, "Answer the latest user request directly.") {
+		t.Fatalf("expected neutral continuation instruction in live prompt, got %s", out.FinalPrompt)
+	}
+}
+
 func TestApplyHistorySplitCarriesHistoryText(t *testing.T) {
 	ds := &inlineUploadDSStub{}
 	h := &openAITestSurface{
@@ -424,3 +637,7 @@ func TestHistorySplitWorksAcrossAutoDeleteModes(t *testing.T) {
 func defaultToolChoicePolicy() promptcompat.ToolChoicePolicy {
 	return promptcompat.DefaultToolChoicePolicy()
 }
+
+func boolPtr(v bool) *bool {
+	return &v
+}
diff --git a/internal/httpapi/openai/models_route_test.go b/internal/httpapi/openai/models_route_test.go
index 9e318f9..60b014d 100644
--- a/internal/httpapi/openai/models_route_test.go
+++ b/internal/httpapi/openai/models_route_test.go
@@ -22,6 +22,15 @@ func TestGetModelRouteDirectAndAlias(t *testing.T) {
 		}
 	})
 
+	t.Run("direct_nothinking", func(t *testing.T) {
+		req := httptest.NewRequest(http.MethodGet, "/v1/models/deepseek-v4-flash-nothinking", nil)
+		rec := httptest.NewRecorder()
+		r.ServeHTTP(rec, req)
+		if rec.Code != http.StatusOK {
+			t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+		}
+	})
+
 	t.Run("direct_expert", func(t *testing.T) {
 		req := httptest.NewRequest(http.MethodGet, "/v1/models/deepseek-v4-pro", nil)
 		rec := httptest.NewRecorder()
@@ -48,6 +57,15 @@ func TestGetModelRouteDirectAndAlias(t *testing.T) {
 			t.Fatalf("expected 200 for alias, got %d body=%s", rec.Code, rec.Body.String())
 		}
 	})
+
+	t.Run("alias_nothinking", func(t *testing.T) {
+		req := httptest.NewRequest(http.MethodGet, "/v1/models/claude-sonnet-4-6-nothinking", nil)
+		rec := httptest.NewRecorder()
+		r.ServeHTTP(rec, req)
+		if rec.Code != http.StatusOK {
+			t.Fatalf("expected 200 for nothinking alias, got %d body=%s", rec.Code, rec.Body.String())
+		}
+	})
 }
 
 func TestGetModelRouteNotFound(t *testing.T) {
diff --git a/internal/httpapi/openai/responses/empty_retry_runtime.go b/internal/httpapi/openai/responses/empty_retry_runtime.go
new file mode 100644
index 0000000..adad24a
--- /dev/null
+++ b/internal/httpapi/openai/responses/empty_retry_runtime.go
@@ -0,0 +1,233 @@
+package responses
+
+import (
+	"context"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"ds2api/internal/auth"
+	"ds2api/internal/config"
+	dsprotocol "ds2api/internal/deepseek/protocol"
+	openaifmt "ds2api/internal/format/openai"
+	"ds2api/internal/promptcompat"
+	"ds2api/internal/sse"
+	streamengine "ds2api/internal/stream"
+	"ds2api/internal/toolcall"
+)
+
+type responsesNonStreamResult struct {
+	thinking              string
+	toolDetectionThinking string
+	text                  string
+	contentFilter         bool
+	parsed                toolcall.ToolCallParseResult
+	body                  map[string]any
+	responseMessageID     int
+}
+
+func (h *Handler) handleResponsesNonStreamWithRetry(w http.ResponseWriter, ctx context.Context, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, owner, responseID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolChoice promptcompat.ToolChoicePolicy, traceID string) {
+	attempts := 0
+	currentResp := resp
+	usagePrompt := finalPrompt
+	accumulatedThinking := ""
+	accumulatedToolDetectionThinking := ""
+	for {
+		result, ok := h.collectResponsesNonStreamAttempt(w, currentResp, responseID, model, usagePrompt, thinkingEnabled, searchEnabled, toolNames)
+		if !ok {
+			return
+		}
+		accumulatedThinking += sse.TrimContinuationOverlap(accumulatedThinking, result.thinking)
+		accumulatedToolDetectionThinking += sse.TrimContinuationOverlap(accumulatedToolDetectionThinking, result.toolDetectionThinking)
+		result.thinking = accumulatedThinking
+		result.toolDetectionThinking = accumulatedToolDetectionThinking
+		result.parsed = detectAssistantToolCalls(result.text, result.thinking, result.toolDetectionThinking, toolNames)
+		result.body = openaifmt.BuildResponseObjectWithToolCalls(responseID, model, usagePrompt, result.thinking, result.text, result.parsed.Calls)
+
+		if !shouldRetryResponsesNonStream(result, attempts) {
+			h.finishResponsesNonStreamResult(w, result, attempts, owner, responseID, toolChoice, traceID)
+			return
+		}
+
+		attempts++
+		config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "responses", "stream", false, "retry_attempt", attempts, "parent_message_id", result.responseMessageID)
+		retryPow, powErr := h.DS.GetPow(ctx, a, 3)
+		if powErr != nil {
+			config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "responses", "stream", false, "retry_attempt", attempts, "error", powErr)
+			retryPow = pow
+		}
+		nextResp, err := h.DS.CallCompletion(ctx, a, clonePayloadForEmptyOutputRetry(payload, result.responseMessageID), retryPow, 3)
+		if err != nil {
+			writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.")
+			config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "responses", "stream", false, "retry_attempt", attempts, "error", err)
+			return
+		}
+		usagePrompt = usagePromptWithEmptyOutputRetry(finalPrompt, attempts)
+		currentResp = nextResp
+	}
+}
+
+func (h *Handler) collectResponsesNonStreamAttempt(w http.ResponseWriter, resp *http.Response, responseID, model, usagePrompt string, thinkingEnabled, searchEnabled bool, toolNames []string) (responsesNonStreamResult, bool) {
+	defer func() { _ = resp.Body.Close() }()
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		writeOpenAIError(w, resp.StatusCode, strings.TrimSpace(string(body)))
+		return responsesNonStreamResult{}, false
+	}
+	result := sse.CollectStream(resp, thinkingEnabled, false)
+	stripReferenceMarkers := h.compatStripReferenceMarkers()
+	sanitizedThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers)
+	toolDetectionThinking := cleanVisibleOutput(result.ToolDetectionThinking, stripReferenceMarkers)
+	sanitizedText := cleanVisibleOutput(result.Text, stripReferenceMarkers)
+	if searchEnabled {
+		sanitizedText = replaceCitationMarkersWithLinks(sanitizedText, result.CitationLinks)
+	}
+	textParsed := detectAssistantToolCalls(sanitizedText, sanitizedThinking, toolDetectionThinking, toolNames)
+	responseObj := openaifmt.BuildResponseObjectWithToolCalls(responseID, model, usagePrompt, sanitizedThinking, sanitizedText, textParsed.Calls)
+	return responsesNonStreamResult{
+		thinking:              sanitizedThinking,
+		toolDetectionThinking: toolDetectionThinking,
+		text:                  sanitizedText,
+		contentFilter:         result.ContentFilter,
+		parsed:                textParsed,
+		body:                  responseObj,
+		responseMessageID:     result.ResponseMessageID,
+	}, true
+}
+
+func (h *Handler) finishResponsesNonStreamResult(w http.ResponseWriter, result responsesNonStreamResult, attempts int, owner, responseID string, toolChoice promptcompat.ToolChoicePolicy, traceID string) {
+	if len(result.parsed.Calls) == 0 && writeUpstreamEmptyOutputError(w, result.text, result.thinking, result.contentFilter) {
+		config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "responses", "stream", false, "retry_attempts", attempts, "success_source", "none", "content_filter", result.contentFilter)
+		return
+	}
+	logResponsesToolPolicyRejection(traceID, toolChoice, result.parsed, "text")
+	if toolChoice.IsRequired() && len(result.parsed.Calls) == 0 {
+		writeOpenAIErrorWithCode(w, http.StatusUnprocessableEntity, "tool_choice requires at least one valid tool call.", "tool_choice_violation")
+		return
+	}
+	h.getResponseStore().put(owner, responseID, result.body)
+	writeJSON(w, http.StatusOK, result.body)
+	source := "first_attempt"
+	if attempts > 0 {
+		source = "synthetic_retry"
+	}
+	config.Logger.Info("[openai_empty_retry] completed", "surface", "responses", "stream", false, "retry_attempts", attempts, "success_source", source)
+}
+
+func shouldRetryResponsesNonStream(result responsesNonStreamResult, attempts int) bool {
+	return emptyOutputRetryEnabled() &&
+		attempts < emptyOutputRetryMaxAttempts() &&
+		!result.contentFilter &&
+		len(result.parsed.Calls) == 0 &&
+		strings.TrimSpace(result.text) == ""
+}
+
+func (h *Handler) handleResponsesStreamWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, owner, responseID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolChoice promptcompat.ToolChoicePolicy, traceID string) {
+	streamRuntime, initialType, ok := h.prepareResponsesStreamRuntime(w, resp, owner, responseID, model, finalPrompt, thinkingEnabled, searchEnabled, toolNames, toolChoice, traceID)
+	if !ok {
+		return
+	}
+	attempts := 0
+	currentResp := resp
+	for {
+		terminalWritten, retryable := h.consumeResponsesStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, attempts < emptyOutputRetryMaxAttempts())
+		if terminalWritten {
+			logResponsesStreamTerminal(streamRuntime, attempts)
+			return
+		}
+		if !retryable || !emptyOutputRetryEnabled() || attempts >= emptyOutputRetryMaxAttempts() {
+			streamRuntime.finalize("stop", false)
+			config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "responses", "stream", true, "retry_attempts", attempts, "success_source", "none", "error_code", streamRuntime.finalErrorCode)
+			return
+		}
+		attempts++
+		config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "responses", "stream", true, "retry_attempt", attempts, "parent_message_id", streamRuntime.responseMessageID)
+		retryPow, powErr := h.DS.GetPow(r.Context(), a, 3)
+		if powErr != nil {
+			config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "responses", "stream", true, "retry_attempt", attempts, "error", powErr)
+			retryPow = pow
+		}
+		nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadForEmptyOutputRetry(payload, streamRuntime.responseMessageID), retryPow, 3)
+		if err != nil {
+			streamRuntime.failResponse(http.StatusInternalServerError, "Failed to get completion.", "error")
+			config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "responses", "stream", true, "retry_attempt", attempts, "error", err)
+			return
+		}
+		if nextResp.StatusCode != http.StatusOK {
+			defer func() { _ = nextResp.Body.Close() }()
+			body, _ := io.ReadAll(nextResp.Body)
+			streamRuntime.failResponse(nextResp.StatusCode, strings.TrimSpace(string(body)), "error")
+			return
+		}
+		streamRuntime.finalPrompt = usagePromptWithEmptyOutputRetry(finalPrompt, attempts)
+		currentResp = nextResp
+	}
+}
+
+func (h *Handler) prepareResponsesStreamRuntime(w http.ResponseWriter, resp *http.Response, owner, responseID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolChoice promptcompat.ToolChoicePolicy, traceID string) (*responsesStreamRuntime, string, bool) {
+	if resp.StatusCode != http.StatusOK {
+		defer func() { _ = resp.Body.Close() }()
+		body, _ := io.ReadAll(resp.Body)
+		writeOpenAIError(w, resp.StatusCode, strings.TrimSpace(string(body)))
+		return nil, "", false
+	}
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache, no-transform")
+	w.Header().Set("Connection", "keep-alive")
+	w.Header().Set("X-Accel-Buffering", "no")
+	rc := http.NewResponseController(w)
+	_, canFlush := w.(http.Flusher)
+	initialType := "text"
+	if thinkingEnabled {
+		initialType = "thinking"
+	}
+	streamRuntime := newResponsesStreamRuntime(
+		w, rc, canFlush, responseID, model, finalPrompt, thinkingEnabled, searchEnabled,
+		h.compatStripReferenceMarkers(), toolNames, len(toolNames) > 0,
+		h.toolcallFeatureMatchEnabled() && h.toolcallEarlyEmitHighConfidence(),
+		toolChoice, traceID, func(obj map[string]any) {
+			h.getResponseStore().put(owner, responseID, obj)
+		},
+	)
+	streamRuntime.sendCreated()
+	return streamRuntime, initialType, true
+}
+
+func (h *Handler) consumeResponsesStreamAttempt(r *http.Request, resp *http.Response, streamRuntime *responsesStreamRuntime, initialType string, thinkingEnabled bool, allowDeferEmpty bool) (bool, bool) {
+	defer func() { _ = resp.Body.Close() }()
+	finalReason := "stop"
+	streamengine.ConsumeSSE(streamengine.ConsumeConfig{
+		Context:             r.Context(),
+		Body:                resp.Body,
+		ThinkingEnabled:     thinkingEnabled,
+		InitialType:         initialType,
+		KeepAliveInterval:   time.Duration(dsprotocol.KeepAliveTimeout) * time.Second,
+		IdleTimeout:         time.Duration(dsprotocol.StreamIdleTimeout) * time.Second,
+		MaxKeepAliveNoInput: dsprotocol.MaxKeepaliveCount,
+	}, streamengine.ConsumeHooks{
+		OnParsed: streamRuntime.onParsed,
+		OnFinalize: func(reason streamengine.StopReason, _ error) {
+			if string(reason) == "content_filter" {
+				finalReason = "content_filter"
+			}
+		},
+	})
+	terminalWritten := streamRuntime.finalize(finalReason, allowDeferEmpty && finalReason != "content_filter")
+	if terminalWritten {
+		return true, false
+	}
+	return false, true
+}
+
+func logResponsesStreamTerminal(streamRuntime *responsesStreamRuntime, attempts int) {
+	source := "first_attempt"
+	if attempts > 0 {
+		source = "synthetic_retry"
+	}
+	if streamRuntime.failed {
+		config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "responses", "stream", true, "retry_attempts", attempts, "success_source", "none", "error_code", streamRuntime.finalErrorCode)
+		return
+	}
+	config.Logger.Info("[openai_empty_retry] completed", "surface", "responses", "stream", true, "retry_attempts", attempts, "success_source", source)
+}
diff --git a/internal/httpapi/openai/responses/handler.go b/internal/httpapi/openai/responses/handler.go
index 09feb91..fc00da4 100644
--- a/internal/httpapi/openai/responses/handler.go
+++ b/internal/httpapi/openai/responses/handler.go
@@ -11,6 +11,7 @@ import (
 	"ds2api/internal/httpapi/openai/history"
 	"ds2api/internal/httpapi/openai/shared"
 	"ds2api/internal/promptcompat"
+	"ds2api/internal/toolcall"
 	"ds2api/internal/toolstream"
 )
 
@@ -39,7 +40,16 @@ func (h *Handler) applyHistorySplit(ctx context.Context, a *auth.RequestAuth, st
 	if h == nil {
 		return stdReq, nil
 	}
-	return history.Service{Store: h.Store, DS: h.DS}.Apply(ctx, a, stdReq)
+	stdReq = shared.ApplyThinkingInjection(h.Store, stdReq)
+	svc := history.Service{Store: h.Store, DS: h.DS}
+	out, err := svc.ApplyCurrentInputFile(ctx, a, stdReq)
+	if err != nil {
+		return stdReq, err
+	}
+	if out.CurrentInputFileApplied {
+		return out, nil
+	}
+	return svc.Apply(ctx, a, out)
 }
 
 func (h *Handler) preprocessInlineFileInputs(ctx context.Context, a *auth.RequestAuth, req map[string]any) error {
@@ -103,6 +113,26 @@ func writeUpstreamEmptyOutputError(w http.ResponseWriter, text, thinking string,
 	return shared.WriteUpstreamEmptyOutputError(w, text, thinking, contentFilter)
 }
 
+func emptyOutputRetryEnabled() bool {
+	return shared.EmptyOutputRetryEnabled()
+}
+
+func emptyOutputRetryMaxAttempts() int {
+	return shared.EmptyOutputRetryMaxAttempts()
+}
+
+func clonePayloadForEmptyOutputRetry(payload map[string]any, parentMessageID int) map[string]any {
+	return shared.ClonePayloadForEmptyOutputRetry(payload, parentMessageID)
+}
+
+func usagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string {
+	return shared.UsagePromptWithEmptyOutputRetry(originalPrompt, retryAttempts)
+}
+
 func filterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta, seenNames map[int]string) []toolstream.ToolCallDelta {
 	return shared.FilterIncrementalToolCallDeltasByAllowed(deltas, seenNames)
 }
+
+func detectAssistantToolCalls(text, exposedThinking, detectionThinking string, toolNames []string) toolcall.ToolCallParseResult {
+	return shared.DetectAssistantToolCalls(text, exposedThinking, detectionThinking, toolNames)
+}
diff --git a/internal/httpapi/openai/responses/responses_handler.go b/internal/httpapi/openai/responses/responses_handler.go
index 8913322..f32e3ec 100644
--- a/internal/httpapi/openai/responses/responses_handler.go
+++ b/internal/httpapi/openai/responses/responses_handler.go
@@ -115,10 +115,10 @@ func (h *Handler) Responses(w http.ResponseWriter, r *http.Request) {
 
 	responseID := "resp_" + strings.ReplaceAll(uuid.NewString(), "-", "")
 	if stdReq.Stream {
-		h.handleResponsesStream(w, r, resp, owner, responseID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames, stdReq.ToolChoice, traceID)
+		h.handleResponsesStreamWithRetry(w, r, a, resp, payload, pow, owner, responseID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames, stdReq.ToolChoice, traceID)
 		return
 	}
-	h.handleResponsesNonStream(w, resp, owner, responseID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames, stdReq.ToolChoice, traceID)
+	h.handleResponsesNonStreamWithRetry(w, r.Context(), a, resp, payload, pow, owner, responseID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames, stdReq.ToolChoice, traceID)
 }
 
 func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Response, owner, responseID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolChoice promptcompat.ToolChoicePolicy, traceID string) {
@@ -131,14 +131,15 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
 	result := sse.CollectStream(resp, thinkingEnabled, true)
 	stripReferenceMarkers := h.compatStripReferenceMarkers()
 	sanitizedThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers)
+	toolDetectionThinking := cleanVisibleOutput(result.ToolDetectionThinking, stripReferenceMarkers)
 	sanitizedText := cleanVisibleOutput(result.Text, stripReferenceMarkers)
 	if searchEnabled {
 		sanitizedText = replaceCitationMarkersWithLinks(sanitizedText, result.CitationLinks)
 	}
-	if writeUpstreamEmptyOutputError(w, sanitizedText, sanitizedThinking, result.ContentFilter) {
+	textParsed := detectAssistantToolCalls(sanitizedText, sanitizedThinking, toolDetectionThinking, toolNames)
+	if len(textParsed.Calls) == 0 && writeUpstreamEmptyOutputError(w, sanitizedText, sanitizedThinking, result.ContentFilter) {
 		return
 	}
-	textParsed := toolcall.ParseStandaloneToolCallsDetailed(sanitizedText, toolNames)
 	logResponsesToolPolicyRejection(traceID, toolChoice, textParsed, "text")
 
 	callCount := len(textParsed.Calls)
@@ -147,7 +148,7 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
 		return
 	}
 
-	responseObj := openaifmt.BuildResponseObject(responseID, model, finalPrompt, sanitizedThinking, sanitizedText, toolNames)
+	responseObj := openaifmt.BuildResponseObjectWithToolCalls(responseID, model, finalPrompt, sanitizedThinking, sanitizedText, textParsed.Calls)
 	h.getResponseStore().put(owner, responseID, responseObj)
 	writeJSON(w, http.StatusOK, responseObj)
 }
@@ -205,8 +206,12 @@ func (h *Handler) handleResponsesStream(w http.ResponseWriter, r *http.Request,
 		MaxKeepAliveNoInput: dsprotocol.MaxKeepaliveCount,
 	}, streamengine.ConsumeHooks{
 		OnParsed: streamRuntime.onParsed,
-		OnFinalize: func(_ streamengine.StopReason, _ error) {
-			streamRuntime.finalize()
+		OnFinalize: func(reason streamengine.StopReason, _ error) {
+			if string(reason) == "content_filter" {
+				streamRuntime.finalize("content_filter", false)
+				return
+			}
+			streamRuntime.finalize("stop", false)
 		},
 	})
 }
diff --git a/internal/httpapi/openai/responses/responses_stream_runtime_core.go b/internal/httpapi/openai/responses/responses_stream_runtime_core.go
index 1bd81e6..984593d 100644
--- a/internal/httpapi/openai/responses/responses_stream_runtime_core.go
+++ b/internal/httpapi/openai/responses/responses_stream_runtime_core.go
@@ -34,24 +34,29 @@ type responsesStreamRuntime struct {
 	toolCallsEmitted     bool
 	toolCallsDoneEmitted bool
 
-	sieve             toolstream.State
-	thinking          strings.Builder
-	text              strings.Builder
-	visibleText       strings.Builder
-	streamToolCallIDs map[int]string
-	functionItemIDs   map[int]string
-	functionOutputIDs map[int]int
-	functionArgs      map[int]string
-	functionDone      map[int]bool
-	functionAdded     map[int]bool
-	functionNames     map[int]string
-	messageItemID     string
-	messageOutputID   int
-	nextOutputID      int
-	messageAdded      bool
-	messagePartAdded  bool
-	sequence          int
-	failed            bool
+	sieve                 toolstream.State
+	thinking              strings.Builder
+	toolDetectionThinking strings.Builder
+	text                  strings.Builder
+	visibleText           strings.Builder
+	responseMessageID     int
+	streamToolCallIDs     map[int]string
+	functionItemIDs       map[int]string
+	functionOutputIDs     map[int]int
+	functionArgs          map[int]string
+	functionDone          map[int]bool
+	functionAdded         map[int]bool
+	functionNames         map[int]string
+	messageItemID         string
+	messageOutputID       int
+	nextOutputID          int
+	messageAdded          bool
+	messagePartAdded      bool
+	sequence              int
+	failed                bool
+	finalErrorStatus      int
+	finalErrorMessage     string
+	finalErrorCode        string
 
 	persistResponse func(obj map[string]any)
 }
@@ -102,6 +107,9 @@ func newResponsesStreamRuntime(
 
 func (s *responsesStreamRuntime) failResponse(status int, message, code string) {
 	s.failed = true
+	s.finalErrorStatus = status
+	s.finalErrorMessage = message
+	s.finalErrorCode = code
 	failedResp := map[string]any{
 		"id":          s.responseID,
 		"type":        "response",
@@ -125,15 +133,20 @@ func (s *responsesStreamRuntime) failResponse(status int, message, code string)
 	s.sendDone()
 }
 
-func (s *responsesStreamRuntime) finalize() {
+func (s *responsesStreamRuntime) finalize(finishReason string, deferEmptyOutput bool) bool {
+	s.failed = false
+	s.finalErrorStatus = 0
+	s.finalErrorMessage = ""
+	s.finalErrorCode = ""
 	finalThinking := s.thinking.String()
+	finalToolDetectionThinking := s.toolDetectionThinking.String()
 	finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
 
 	if s.bufferToolContent {
 		s.processToolStreamEvents(toolstream.Flush(&s.sieve, s.toolNames), true, true)
 	}
 
-	textParsed := toolcall.ParseStandaloneToolCallsDetailed(finalText, s.toolNames)
+	textParsed := detectAssistantToolCalls(finalText, finalThinking, finalToolDetectionThinking, s.toolNames)
 	detected := textParsed.Calls
 	s.logToolPolicyRejections(textParsed)
 
@@ -148,12 +161,18 @@ func (s *responsesStreamRuntime) finalize() {
 
 	if s.toolChoice.IsRequired() && len(detected) == 0 {
 		s.failResponse(http.StatusUnprocessableEntity, "tool_choice requires at least one valid tool call.", "tool_choice_violation")
-		return
+		return true
 	}
 	if len(detected) == 0 && strings.TrimSpace(finalText) == "" {
-		status, message, code := upstreamEmptyOutputDetail(false, finalText, finalThinking)
+		status, message, code := upstreamEmptyOutputDetail(finishReason == "content_filter", finalText, finalThinking)
+		if deferEmptyOutput {
+			s.finalErrorStatus = status
+			s.finalErrorMessage = message
+			s.finalErrorCode = code
+			return false
+		}
 		s.failResponse(status, message, code)
-		return
+		return true
 	}
 	s.closeIncompleteFunctionItems()
 
@@ -163,6 +182,7 @@ func (s *responsesStreamRuntime) finalize() {
 	}
 	s.sendEvent("response.completed", openaifmt.BuildResponsesCompletedPayload(obj))
 	s.sendDone()
+	return true
 }
 
 func (s *responsesStreamRuntime) logToolPolicyRejections(textParsed toolcall.ToolCallParseResult) {
@@ -186,11 +206,23 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
 	if !parsed.Parsed {
 		return streamengine.ParsedDecision{}
 	}
-	if parsed.ContentFilter || parsed.ErrorMessage != "" || parsed.Stop {
+	if parsed.ResponseMessageID > 0 {
+		s.responseMessageID = parsed.ResponseMessageID
+	}
+	if parsed.ContentFilter || parsed.ErrorMessage != "" {
+		return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("content_filter")}
+	}
+	if parsed.Stop {
 		return streamengine.ParsedDecision{Stop: true}
 	}
 
 	contentSeen := false
+	for _, p := range parsed.ToolDetectionThinkingParts {
+		trimmed := sse.TrimContinuationOverlap(s.toolDetectionThinking.String(), p.Text)
+		if trimmed != "" {
+			s.toolDetectionThinking.WriteString(trimmed)
+		}
+	}
 	for _, p := range parsed.Parts {
 		cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
 		if cleanedText == "" {
diff --git a/internal/httpapi/openai/responses/responses_stream_test.go b/internal/httpapi/openai/responses/responses_stream_test.go
index c19f311..c9316e4 100644
--- a/internal/httpapi/openai/responses/responses_stream_test.go
+++ b/internal/httpapi/openai/responses/responses_stream_test.go
@@ -232,6 +232,76 @@ func TestHandleResponsesStreamFailsWhenUpstreamHasOnlyThinking(t *testing.T) {
 	}
 }
 
+func TestHandleResponsesStreamPromotesThinkingToolCallsOnFinalizeWithoutMidstreamIntercept(t *testing.T) {
+	h := &Handler{}
+	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
+	rec := httptest.NewRecorder()
+
+	sseLine := func(path, value string) string {
+		b, _ := json.Marshal(map[string]any{
+			"p": path,
+			"v": value,
+		})
+		return "data: " + string(b) + "\n"
+	}
+
+	streamBody := sseLine("response/thinking_content", `<tool_calls><invoke name="read_file"><parameter name="path">README.MD</parameter></invoke></tool_calls>`) + "data: [DONE]\n"
+	resp := &http.Response{
+		StatusCode: http.StatusOK,
+		Body:       io.NopCloser(strings.NewReader(streamBody)),
+	}
+
+	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-v4-pro", "prompt", true, false, []string{"read_file"}, promptcompat.DefaultToolChoicePolicy(), "")
+
+	body := rec.Body.String()
+	if !strings.Contains(body, "event: response.reasoning.delta") {
+		t.Fatalf("expected reasoning delta in stream body, got %s", body)
+	}
+	if !strings.Contains(body, "event: response.function_call_arguments.done") {
+		t.Fatalf("expected finalize fallback function call event, got %s", body)
+	}
+	if strings.Contains(body, "event: response.failed") {
+		t.Fatalf("did not expect response.failed, body=%s", body)
+	}
+}
+
+func TestHandleResponsesStreamPromotesHiddenThinkingDSMLToolCallsOnFinalize(t *testing.T) {
+	h := &Handler{}
+	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
+	rec := httptest.NewRecorder()
+
+	sseLine := func(path, value string) string {
+		b, _ := json.Marshal(map[string]any{
+			"p": path,
+			"v": value,
+		})
+		return "data: " + string(b) + "\n"
+	}
+
+	streamBody := sseLine("response/thinking_content", `<|DSML|tool_calls><|DSML|invoke name="read_file"><|DSML|parameter name="path">README.MD</|DSML|parameter></|DSML|invoke></|DSML|tool_calls>`) + "data: [DONE]\n"
+	resp := &http.Response{
+		StatusCode: http.StatusOK,
+		Body:       io.NopCloser(strings.NewReader(streamBody)),
+	}
+
+	policy := promptcompat.ToolChoicePolicy{
+		Mode:    promptcompat.ToolChoiceRequired,
+		Allowed: map[string]struct{}{"read_file": {}},
+	}
+	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_hidden", "deepseek-v4-pro", "prompt", false, false, []string{"read_file"}, policy, "")
+
+	body := rec.Body.String()
+	if strings.Contains(body, "event: response.reasoning.delta") {
+		t.Fatalf("did not expect hidden reasoning delta in stream body, got %s", body)
+	}
+	if !strings.Contains(body, "event: response.function_call_arguments.done") {
+		t.Fatalf("expected hidden-thinking fallback function call event, got %s", body)
+	}
+	if strings.Contains(body, "event: response.failed") {
+		t.Fatalf("did not expect response.failed, body=%s", body)
+	}
+}
+
 func TestHandleResponsesNonStreamRequiredToolChoiceViolation(t *testing.T) {
 	h := &Handler{}
 	rec := httptest.NewRecorder()
@@ -258,7 +328,7 @@ func TestHandleResponsesNonStreamRequiredToolChoiceViolation(t *testing.T) {
 	}
 }
 
-func TestHandleResponsesNonStreamRequiredToolChoiceIgnoresThinkingToolPayload(t *testing.T) {
+func TestHandleResponsesNonStreamRequiredToolChoiceIgnoresThinkingToolPayloadWhenTextExists(t *testing.T) {
 	h := &Handler{}
 	rec := httptest.NewRecorder()
 	resp := &http.Response{
@@ -351,6 +421,65 @@ func TestHandleResponsesNonStreamReturns429WhenUpstreamHasOnlyThinking(t *testin
 	}
 }
 
+func TestHandleResponsesNonStreamPromotesThinkingToolCallsWhenTextEmpty(t *testing.T) {
+	h := &Handler{}
+	rec := httptest.NewRecorder()
+	resp := &http.Response{
+		StatusCode: http.StatusOK,
+		Body: io.NopCloser(strings.NewReader(
+			`data: {"p":"response/thinking_content","v":"<tool_calls><invoke name=\"read_file\"><parameter name=\"path\">README.MD</parameter></invoke></tool_calls>"}` + "\n" +
+				`data: [DONE]` + "\n",
+		)),
+	}
+
+	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-v4-pro", "prompt", true, false, []string{"read_file"}, promptcompat.DefaultToolChoicePolicy(), "")
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200 for thinking tool calls, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	out := decodeJSONBody(t, rec.Body.String())
+	output, _ := out["output"].([]any)
+	if len(output) != 1 {
+		t.Fatalf("expected one output item, got %#v", out["output"])
+	}
+	first, _ := output[0].(map[string]any)
+	if got := asString(first["type"]); got != "function_call" {
+		t.Fatalf("expected function_call output, got %#v", first["type"])
+	}
+}
+
+func TestHandleResponsesNonStreamPromotesHiddenThinkingDSMLToolCallsWhenTextEmpty(t *testing.T) {
+	h := &Handler{}
+	rec := httptest.NewRecorder()
+	resp := &http.Response{
+		StatusCode: http.StatusOK,
+		Body: io.NopCloser(strings.NewReader(
+			`data: {"p":"response/thinking_content","v":"<|DSML|tool_calls><|DSML|invoke name=\"read_file\"><|DSML|parameter name=\"path\">README.MD</|DSML|parameter></|DSML|invoke></|DSML|tool_calls>"}` + "\n" +
+				`data: [DONE]` + "\n",
+		)),
+	}
+
+	policy := promptcompat.ToolChoicePolicy{
+		Mode:    promptcompat.ToolChoiceRequired,
+		Allowed: map[string]struct{}{"read_file": {}},
+	}
+	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_hidden", "deepseek-v4-pro", "prompt", false, false, []string{"read_file"}, policy, "")
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200 for hidden thinking tool calls, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	out := decodeJSONBody(t, rec.Body.String())
+	output, _ := out["output"].([]any)
+	if len(output) != 1 {
+		t.Fatalf("expected one output item, got %#v", out["output"])
+	}
+	first, _ := output[0].(map[string]any)
+	if got := asString(first["type"]); got != "function_call" {
+		t.Fatalf("expected function_call output, got %#v", first["type"])
+	}
+	if strings.Contains(rec.Body.String(), "reasoning") {
+		t.Fatalf("did not expect hidden reasoning in response body, got %s", rec.Body.String())
+	}
+}
+
 func extractSSEEventPayload(body, targetEvent string) (map[string]any, bool) {
 	scanner := bufio.NewScanner(strings.NewReader(body))
 	matched := false
diff --git a/internal/httpapi/openai/shared/assistant_toolcalls.go b/internal/httpapi/openai/shared/assistant_toolcalls.go
new file mode 100644
index 0000000..25f930b
--- /dev/null
+++ b/internal/httpapi/openai/shared/assistant_toolcalls.go
@@ -0,0 +1,26 @@
+package shared
+
+import (
+	"strings"
+
+	"ds2api/internal/toolcall"
+)
+
+func DetectAssistantToolCalls(text, exposedThinking, detectionThinking string, toolNames []string) toolcall.ToolCallParseResult {
+	textParsed := toolcall.ParseStandaloneToolCallsDetailed(text, toolNames)
+	if len(textParsed.Calls) > 0 {
+		return textParsed
+	}
+	if strings.TrimSpace(text) != "" {
+		return textParsed
+	}
+	thinking := detectionThinking
+	if strings.TrimSpace(thinking) == "" {
+		thinking = exposedThinking
+	}
+	thinkingParsed := toolcall.ParseStandaloneToolCallsDetailed(thinking, toolNames)
+	if len(thinkingParsed.Calls) > 0 {
+		return thinkingParsed
+	}
+	return textParsed
+}
diff --git a/internal/httpapi/openai/shared/deps.go b/internal/httpapi/openai/shared/deps.go
index 3db5b37..6315541 100644
--- a/internal/httpapi/openai/shared/deps.go
+++ b/internal/httpapi/openai/shared/deps.go
@@ -45,6 +45,10 @@ type ConfigReader interface {
 	AutoDeleteSessions() bool
 	HistorySplitEnabled() bool
 	HistorySplitTriggerAfterTurns() int
+	CurrentInputFileEnabled() bool
+	CurrentInputFileMinChars() int
+	ThinkingInjectionEnabled() bool
+	ThinkingInjectionPrompt() string
 }
 
 type Deps struct {
diff --git a/internal/httpapi/openai/shared/empty_retry.go b/internal/httpapi/openai/shared/empty_retry.go
new file mode 100644
index 0000000..a84e93e
--- /dev/null
+++ b/internal/httpapi/openai/shared/empty_retry.go
@@ -0,0 +1,56 @@
+package shared
+
+import "strings"
+
+const EmptyOutputRetrySuffix = "Previous reply had no visible output. Please regenerate the visible final answer or tool call now."
+
+func EmptyOutputRetryEnabled() bool {
+	return true
+}
+
+func EmptyOutputRetryMaxAttempts() int {
+	return 1
+}
+
+func ClonePayloadWithEmptyOutputRetryPrompt(payload map[string]any) map[string]any {
+	return ClonePayloadForEmptyOutputRetry(payload, 0)
+}
+
+// ClonePayloadForEmptyOutputRetry creates a retry payload with the suffix
+// appended and, if parentMessageID > 0, sets parent_message_id so the
+// retry is submitted as a proper follow-up turn in the same DeepSeek
+// session rather than a disconnected root message.
+func ClonePayloadForEmptyOutputRetry(payload map[string]any, parentMessageID int) map[string]any {
+	clone := make(map[string]any, len(payload))
+	for k, v := range payload {
+		clone[k] = v
+	}
+	original, _ := payload["prompt"].(string)
+	clone["prompt"] = AppendEmptyOutputRetrySuffix(original)
+	if parentMessageID > 0 {
+		clone["parent_message_id"] = parentMessageID
+	}
+	return clone
+}
+
+func AppendEmptyOutputRetrySuffix(prompt string) string {
+	prompt = strings.TrimRight(prompt, "\r\n\t ")
+	if prompt == "" {
+		return EmptyOutputRetrySuffix
+	}
+	return prompt + "\n\n" + EmptyOutputRetrySuffix
+}
+
+func UsagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string {
+	if retryAttempts <= 0 {
+		return originalPrompt
+	}
+	parts := make([]string, 0, retryAttempts+1)
+	parts = append(parts, originalPrompt)
+	next := originalPrompt
+	for i := 0; i < retryAttempts; i++ {
+		next = AppendEmptyOutputRetrySuffix(next)
+		parts = append(parts, next)
+	}
+	return strings.Join(parts, "\n")
+}
diff --git a/internal/httpapi/openai/shared/thinking_injection.go b/internal/httpapi/openai/shared/thinking_injection.go
new file mode 100644
index 0000000..13cb7b4
--- /dev/null
+++ b/internal/httpapi/openai/shared/thinking_injection.go
@@ -0,0 +1,21 @@
+package shared
+
+import "ds2api/internal/promptcompat"
+
+func ApplyThinkingInjection(store ConfigReader, stdReq promptcompat.StandardRequest) promptcompat.StandardRequest {
+	if store == nil || !store.ThinkingInjectionEnabled() || !stdReq.Thinking {
+		return stdReq
+	}
+	messages, changed := promptcompat.AppendThinkingInjectionPromptToLatestUser(stdReq.Messages, store.ThinkingInjectionPrompt())
+	if !changed {
+		return stdReq
+	}
+	finalPrompt, toolNames := promptcompat.BuildOpenAIPrompt(messages, stdReq.ToolsRaw, "", stdReq.ToolChoice, stdReq.Thinking)
+	if len(toolNames) == 0 && len(stdReq.ToolNames) > 0 {
+		toolNames = stdReq.ToolNames
+	}
+	stdReq.Messages = messages
+	stdReq.FinalPrompt = finalPrompt
+	stdReq.ToolNames = toolNames
+	return stdReq
+}
diff --git a/internal/httpapi/openai/stream_status_test.go b/internal/httpapi/openai/stream_status_test.go
index 3c2827f..f34c11f 100644
--- a/internal/httpapi/openai/stream_status_test.go
+++ b/internal/httpapi/openai/stream_status_test.go
@@ -66,6 +66,44 @@ func (m streamStatusDSStub) DeleteAllSessionsForToken(_ context.Context, _ strin
 	return nil
 }
 
+type streamStatusDSSeqStub struct {
+	resps    []*http.Response
+	payloads []map[string]any
+}
+
+func (m *streamStatusDSSeqStub) CreateSession(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
+	return "session-id", nil
+}
+
+func (m *streamStatusDSSeqStub) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
+	return "pow", nil
+}
+
+func (m *streamStatusDSSeqStub) UploadFile(_ context.Context, _ *auth.RequestAuth, _ dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) {
+	return &dsclient.UploadFileResult{ID: "file-id", Filename: "file.txt", Bytes: 1, Status: "uploaded"}, nil
+}
+
+func (m *streamStatusDSSeqStub) CallCompletion(_ context.Context, _ *auth.RequestAuth, payload map[string]any, _ string, _ int) (*http.Response, error) {
+	clone := make(map[string]any, len(payload))
+	for k, v := range payload {
+		clone[k] = v
+	}
+	m.payloads = append(m.payloads, clone)
+	idx := len(m.payloads) - 1
+	if idx >= len(m.resps) {
+		idx = len(m.resps) - 1
+	}
+	return m.resps[idx], nil
+}
+
+func (m *streamStatusDSSeqStub) DeleteSessionForToken(_ context.Context, _ string, _ string) (*dsclient.DeleteSessionResult, error) {
+	return &dsclient.DeleteSessionResult{Success: true}, nil
+}
+
+func (m *streamStatusDSSeqStub) DeleteAllSessionsForToken(_ context.Context, _ string) error {
+	return nil
+}
+
 func makeOpenAISSEHTTPResponse(lines ...string) *http.Response {
 	body := strings.Join(lines, "\n")
 	if !strings.HasSuffix(body, "\n") {
@@ -78,6 +116,12 @@ func makeOpenAISSEHTTPResponse(lines ...string) *http.Response {
 	}
 }
 
+func newOpenAITestRouter(h *openAITestSurface) http.Handler {
+	r := chi.NewRouter()
+	registerOpenAITestRoutes(r, h)
+	return r
+}
+
 func captureStatusMiddleware(statuses *[]int) func(http.Handler) http.Handler {
 	return func(next http.Handler) http.Handler {
 		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -239,6 +283,133 @@ func TestChatCompletionsStreamEmitsFailureFrameWhenUpstreamOutputEmpty(t *testin
 	}
 }
 
+func TestChatCompletionsStreamRetriesEmptyOutputOnSameSession(t *testing.T) {
+	ds := &streamStatusDSSeqStub{resps: []*http.Response{
+		makeOpenAISSEHTTPResponse(`data: {"response_message_id":42,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
+		makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
+	}}
+	h := &openAITestSurface{
+		Store: mockOpenAIConfig{wideInput: true},
+		Auth:  streamStatusAuthStub{},
+		DS:    ds,
+	}
+	reqBody := `{"model":"deepseek-v4-pro","messages":[{"role":"user","content":"hi"}],"stream":true}`
+	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
+	req.Header.Set("Authorization", "Bearer direct-token")
+	req.Header.Set("Content-Type", "application/json")
+	rec := httptest.NewRecorder()
+	newOpenAITestRouter(h).ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	if len(ds.payloads) != 2 {
+		t.Fatalf("expected one synthetic retry call, got %d", len(ds.payloads))
+	}
+	if ds.payloads[0]["chat_session_id"] != ds.payloads[1]["chat_session_id"] {
+		t.Fatalf("expected retry to reuse session, payloads=%#v", ds.payloads)
+	}
+	retryPrompt := asString(ds.payloads[1]["prompt"])
+	if !strings.Contains(retryPrompt, "Previous reply had no visible output. Please regenerate the visible final answer or tool call now.") {
+		t.Fatalf("expected retry suffix in prompt, got %q", retryPrompt)
+	}
+	// Verify multi-turn chaining: retry must set parent_message_id from first call's response_message_id.
+	if parentID, ok := ds.payloads[1]["parent_message_id"].(int); !ok || parentID != 42 {
+		t.Fatalf("expected retry parent_message_id=42, got %#v", ds.payloads[1]["parent_message_id"])
+	}
+
+	frames, done := parseSSEDataFrames(t, rec.Body.String())
+	if !done {
+		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
+	}
+	doneCount := strings.Count(rec.Body.String(), "data: [DONE]")
+	if doneCount != 1 {
+		t.Fatalf("expected one [DONE], got %d body=%s", doneCount, rec.Body.String())
+	}
+	if len(frames) != 3 {
+		t.Fatalf("expected reasoning, content, finish frames, got %#v body=%s", frames, rec.Body.String())
+	}
+	id := asString(frames[0]["id"])
+	for _, frame := range frames[1:] {
+		if asString(frame["id"]) != id {
+			t.Fatalf("expected same completion id across retry stream, frames=%#v", frames)
+		}
+	}
+	choices, _ := frames[1]["choices"].([]any)
+	choice, _ := choices[0].(map[string]any)
+	delta, _ := choice["delta"].(map[string]any)
+	if asString(delta["content"]) != "visible" {
+		t.Fatalf("expected retry content delta, got %#v body=%s", delta, rec.Body.String())
+	}
+}
+
+func TestChatCompletionsNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
+	ds := &streamStatusDSSeqStub{resps: []*http.Response{
+		makeOpenAISSEHTTPResponse(`data: {"response_message_id":99,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
+		makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
+	}}
+	h := &openAITestSurface{
+		Store: mockOpenAIConfig{wideInput: true},
+		Auth:  streamStatusAuthStub{},
+		DS:    ds,
+	}
+	reqBody := `{"model":"deepseek-v4-pro","messages":[{"role":"user","content":"hi"}],"stream":false}`
+	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
+	req.Header.Set("Authorization", "Bearer direct-token")
+	req.Header.Set("Content-Type", "application/json")
+	rec := httptest.NewRecorder()
+	newOpenAITestRouter(h).ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200 after retry, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	if len(ds.payloads) != 2 {
+		t.Fatalf("expected one synthetic retry call, got %d", len(ds.payloads))
+	}
+	// Verify multi-turn chaining.
+	if parentID, ok := ds.payloads[1]["parent_message_id"].(int); !ok || parentID != 99 {
+		t.Fatalf("expected retry parent_message_id=99, got %#v", ds.payloads[1]["parent_message_id"])
+	}
+	var out map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
+		t.Fatalf("decode response failed: %v body=%s", err, rec.Body.String())
+	}
+	choices, _ := out["choices"].([]any)
+	choice, _ := choices[0].(map[string]any)
+	message, _ := choice["message"].(map[string]any)
+	if asString(message["content"]) != "visible" {
+		t.Fatalf("expected retry visible content, got %#v", message)
+	}
+	if !strings.Contains(asString(message["reasoning_content"]), "plan") {
+		t.Fatalf("expected first-attempt reasoning to be preserved, got %#v", message)
+	}
+}
+
+func TestChatCompletionsContentFilterDoesNotRetry(t *testing.T) {
+	ds := &streamStatusDSSeqStub{resps: []*http.Response{
+		makeOpenAISSEHTTPResponse(`data: {"code":"content_filter"}`),
+		makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
+	}}
+	h := &openAITestSurface{
+		Store: mockOpenAIConfig{wideInput: true},
+		Auth:  streamStatusAuthStub{},
+		DS:    ds,
+	}
+	reqBody := `{"model":"deepseek-v4-flash","messages":[{"role":"user","content":"hi"}],"stream":false}`
+	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
+	req.Header.Set("Authorization", "Bearer direct-token")
+	req.Header.Set("Content-Type", "application/json")
+	rec := httptest.NewRecorder()
+	newOpenAITestRouter(h).ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusBadRequest {
+		t.Fatalf("expected content_filter 400, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	if len(ds.payloads) != 1 {
+		t.Fatalf("expected no retry on content_filter, got %d calls", len(ds.payloads))
+	}
+}
+
 func TestResponsesStreamUsageIgnoresBatchAccumulatedTokenUsage(t *testing.T) {
 	statuses := make([]int, 0, 1)
 	h := &openAITestSurface{
@@ -287,6 +458,94 @@ func TestResponsesStreamUsageIgnoresBatchAccumulatedTokenUsage(t *testing.T) {
 	}
 }
 
+func TestResponsesStreamRetriesThinkingOnlyOutput(t *testing.T) {
+	ds := &streamStatusDSSeqStub{resps: []*http.Response{
+		makeOpenAISSEHTTPResponse(`data: {"response_message_id":77,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
+		makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
+	}}
+	h := &openAITestSurface{
+		Store: mockOpenAIConfig{wideInput: true},
+		Auth:  streamStatusAuthStub{},
+		DS:    ds,
+	}
+	reqBody := `{"model":"deepseek-v4-pro","input":"hi","stream":true}`
+	req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(reqBody))
+	req.Header.Set("Authorization", "Bearer direct-token")
+	req.Header.Set("Content-Type", "application/json")
+	rec := httptest.NewRecorder()
+	newOpenAITestRouter(h).ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	if len(ds.payloads) != 2 {
+		t.Fatalf("expected one synthetic retry call, got %d", len(ds.payloads))
+	}
+	// Verify multi-turn chaining.
+	if parentID, ok := ds.payloads[1]["parent_message_id"].(int); !ok || parentID != 77 {
+		t.Fatalf("expected retry parent_message_id=77, got %#v", ds.payloads[1]["parent_message_id"])
+	}
+	body := rec.Body.String()
+	if strings.Contains(body, "response.failed") {
+		t.Fatalf("did not expect premature response.failed, body=%s", body)
+	}
+	if !strings.Contains(body, "response.reasoning.delta") || !strings.Contains(body, "response.output_text.delta") || !strings.Contains(body, "response.completed") {
+		t.Fatalf("expected reasoning, text delta, and completed events, body=%s", body)
+	}
+	if strings.Count(body, "data: [DONE]") != 1 {
+		t.Fatalf("expected one [DONE], body=%s", body)
+	}
+}
+
+func TestResponsesNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
+	ds := &streamStatusDSSeqStub{resps: []*http.Response{
+		makeOpenAISSEHTTPResponse(`data: {"response_message_id":88,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
+		makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
+	}}
+	h := &openAITestSurface{
+		Store: mockOpenAIConfig{wideInput: true},
+		Auth:  streamStatusAuthStub{},
+		DS:    ds,
+	}
+	reqBody := `{"model":"deepseek-v4-pro","input":"hi","stream":false}`
+	req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(reqBody))
+	req.Header.Set("Authorization", "Bearer direct-token")
+	req.Header.Set("Content-Type", "application/json")
+	rec := httptest.NewRecorder()
+	newOpenAITestRouter(h).ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200 after retry, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	if len(ds.payloads) != 2 {
+		t.Fatalf("expected one synthetic retry call, got %d", len(ds.payloads))
+	}
+	// Verify multi-turn chaining.
+	if parentID, ok := ds.payloads[1]["parent_message_id"].(int); !ok || parentID != 88 {
+		t.Fatalf("expected retry parent_message_id=88, got %#v", ds.payloads[1]["parent_message_id"])
+	}
+	var out map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
+		t.Fatalf("decode response failed: %v body=%s", err, rec.Body.String())
+	}
+	if asString(out["output_text"]) != "visible" {
+		t.Fatalf("expected retry visible output_text, got %#v", out["output_text"])
+	}
+	output, _ := out["output"].([]any)
+	if len(output) == 0 {
+		t.Fatalf("expected output items, got %#v", out)
+	}
+	item, _ := output[0].(map[string]any)
+	content, _ := item["content"].([]any)
+	if len(content) == 0 {
+		t.Fatalf("expected content entries, got %#v", item)
+	}
+	reasoning, _ := content[0].(map[string]any)
+	if asString(reasoning["type"]) != "reasoning" || !strings.Contains(asString(reasoning["text"]), "plan") {
+		t.Fatalf("expected preserved reasoning entry, got %#v", content)
+	}
+}
+
 func TestResponsesNonStreamUsageIgnoresPromptAndOutputTokenUsage(t *testing.T) {
 	statuses := make([]int, 0, 1)
 	h := &openAITestSurface{
diff --git a/internal/httpapi/openai/test_bridge_test.go b/internal/httpapi/openai/test_bridge_test.go
index 91549ce..6815589 100644
--- a/internal/httpapi/openai/test_bridge_test.go
+++ b/internal/httpapi/openai/test_bridge_test.go
@@ -84,7 +84,16 @@ func (h *openAITestSurface) ChatCompletions(w http.ResponseWriter, r *http.Reque
 }
 
 func (h *openAITestSurface) applyHistorySplit(ctx context.Context, a *auth.RequestAuth, stdReq promptcompat.StandardRequest) (promptcompat.StandardRequest, error) {
-	return history.Service{Store: h.Store, DS: h.DS}.Apply(ctx, a, stdReq)
+	stdReq = shared.ApplyThinkingInjection(h.Store, stdReq)
+	svc := history.Service{Store: h.Store, DS: h.DS}
+	out, err := svc.ApplyCurrentInputFile(ctx, a, stdReq)
+	if err != nil {
+		return stdReq, err
+	}
+	if out.CurrentInputFileApplied {
+		return out, nil
+	}
+	return svc.Apply(ctx, a, out)
 }
 
 func (h *openAITestSurface) preprocessInlineFileInputs(ctx context.Context, a *auth.RequestAuth, req map[string]any) error {
diff --git a/internal/js/chat-stream/http_internal.js b/internal/js/chat-stream/http_internal.js
index 01caa8d..247e38c 100644
--- a/internal/js/chat-stream/http_internal.js
+++ b/internal/js/chat-stream/http_internal.js
@@ -58,6 +58,33 @@ async function fetchStreamPrepare(req, rawBody) {
   };
 }
 
+async function fetchStreamPow(req, leaseID) {
+  const url = buildInternalGoURL(req);
+  url.searchParams.set('__stream_pow', '1');
+
+  const upstream = await fetch(url.toString(), {
+    method: 'POST',
+    headers: buildInternalGoHeaders(req, { withInternalToken: true, withContentType: true }),
+    body: Buffer.from(JSON.stringify({ lease_id: leaseID })),
+  });
+
+  const text = await upstream.text();
+  let body = {};
+  try {
+    body = JSON.parse(text || '{}');
+  } catch (_err) {
+    body = {};
+  }
+
+  return {
+    ok: upstream.ok,
+    status: upstream.status,
+    contentType: upstream.headers.get('content-type') || 'application/json',
+    text,
+    body,
+  };
+}
+
 function relayPreparedFailure(res, prep) {
   if (prep.status === 401 && looksLikeVercelAuthPage(prep.text)) {
     writeOpenAIError(
@@ -195,6 +222,7 @@ module.exports = {
   header,
   readRawBody,
   fetchStreamPrepare,
+  fetchStreamPow,
   relayPreparedFailure,
   safeReadText,
   buildInternalGoURL,
diff --git a/internal/js/chat-stream/vercel_stream_impl.js b/internal/js/chat-stream/vercel_stream_impl.js
index 553af69..dfd6aad 100644
--- a/internal/js/chat-stream/vercel_stream_impl.js
+++ b/internal/js/chat-stream/vercel_stream_impl.js
@@ -25,6 +25,7 @@ const {
   asString,
   isAbortError,
   fetchStreamPrepare,
+  fetchStreamPow,
   relayPreparedFailure,
   createLeaseReleaser,
 } = require('./http_internal');
@@ -33,6 +34,10 @@ const {
 } = require('./dedupe');
 
 const DEEPSEEK_COMPLETION_URL = 'https://chat.deepseek.com/api/v0/chat/completion';
+const DEEPSEEK_CONTINUE_URL = 'https://chat.deepseek.com/api/v0/chat/continue';
+const EMPTY_OUTPUT_RETRY_SUFFIX = 'Previous reply had no visible output. Please regenerate the visible final answer or tool call now.';
+const EMPTY_OUTPUT_RETRY_MAX_ATTEMPTS = 1;
+const AUTO_CONTINUE_MAX_ROUNDS = 8;
 
 async function handleVercelStream(req, res, rawBody, payload) {
   const prep = await fetchStreamPrepare(req, rawBody);
@@ -45,7 +50,7 @@ async function handleVercelStream(req, res, rawBody, payload) {
   const sessionID = asString(prep.body.session_id) || `chatcmpl-${Date.now()}`;
   const leaseID = asString(prep.body.lease_id);
   const deepseekToken = asString(prep.body.deepseek_token);
-  const powHeader = asString(prep.body.pow_header);
+  const initialPowHeader = asString(prep.body.pow_header);
   const completionPayload = prep.body.payload && typeof prep.body.payload === 'object' ? prep.body.payload : null;
   const finalPrompt = asString(prep.body.final_prompt);
   const thinkingEnabled = toBool(prep.body.thinking_enabled);
@@ -55,7 +60,7 @@ async function handleVercelStream(req, res, rawBody, payload) {
   const emitEarlyToolDeltas = toolPolicy.emitEarlyToolDeltas;
   const stripReferenceMarkers = boolDefaultTrue(prep.body.compat && prep.body.compat.strip_reference_markers);
 
-  if (!model || !leaseID || !deepseekToken || !powHeader || !completionPayload) {
+  if (!model || !leaseID || !deepseekToken || !initialPowHeader || !completionPayload) {
     writeOpenAIError(res, 500, 'invalid vercel prepare response');
     return;
   }
@@ -84,23 +89,66 @@ async function handleVercelStream(req, res, rawBody, payload) {
   res.on('close', onResClose);
 
   try {
-    let completionRes;
-    try {
-      completionRes = await fetch(DEEPSEEK_COMPLETION_URL, {
-        method: 'POST',
-        headers: {
-          ...BASE_HEADERS,
-          authorization: `Bearer ${deepseekToken}`,
-          'x-ds-pow-response': powHeader,
-        },
-        body: JSON.stringify(completionPayload),
-        signal: upstreamController.signal,
-      });
-    } catch (err) {
-      if (clientClosed || isAbortError(err)) {
-        return;
+    let currentPowHeader = initialPowHeader;
+    const refreshPowHeader = async (roundType) => {
+      try {
+        const pow = await fetchStreamPow(req, leaseID);
+        const nextPowHeader = asString(pow.body && pow.body.pow_header);
+        if (pow.ok && nextPowHeader) {
+          currentPowHeader = nextPowHeader;
+          return currentPowHeader;
+        }
+        console.warn('[vercel_stream_pow] refresh failed, reusing previous PoW', {
+          round_type: roundType,
+          status: pow.status || 0,
+        });
+      } catch (err) {
+        if (clientClosed || isAbortError(err)) {
+          return '';
+        }
+        console.warn('[vercel_stream_pow] refresh failed, reusing previous PoW', {
+          round_type: roundType,
+          error: err,
+        });
       }
-      throw err;
+      return currentPowHeader;
+    };
+
+    const fetchDeepSeekStream = async (url, bodyPayload, powHeader) => {
+      try {
+        return await fetch(url, {
+          method: 'POST',
+          headers: {
+            ...BASE_HEADERS,
+            authorization: `Bearer ${deepseekToken}`,
+            'x-ds-pow-response': powHeader,
+          },
+          body: JSON.stringify(bodyPayload),
+          signal: upstreamController.signal,
+        });
+      } catch (err) {
+        if (clientClosed || isAbortError(err)) {
+          return null;
+        }
+        throw err;
+      }
+    };
+    const fetchCompletion = (bodyPayload) => fetchDeepSeekStream(DEEPSEEK_COMPLETION_URL, bodyPayload, currentPowHeader);
+    const fetchContinue = async (messageID) => {
+      const powHeader = await refreshPowHeader('continue');
+      if (!powHeader) {
+        return null;
+      }
+      return fetchDeepSeekStream(DEEPSEEK_CONTINUE_URL, {
+        chat_session_id: sessionID,
+        message_id: messageID,
+        fallback_to_resume: true,
+      }, powHeader);
+    };
+
+    let completionRes = await fetchCompletion(completionPayload);
+    if (completionRes === null) {
+      return;
     }
     if (clientClosed) {
       return;
@@ -126,6 +174,7 @@ async function handleVercelStream(req, res, rawBody, payload) {
     let currentType = thinkingEnabled ? 'thinking' : 'text';
     let thinkingText = '';
     let outputText = '';
+    let usagePrompt = finalPrompt;
     const toolSieveEnabled = toolPolicy.toolSieveEnabled;
     const toolSieveState = createToolSieveState();
     let toolCallsEmitted = false;
@@ -133,7 +182,6 @@ async function handleVercelStream(req, res, rawBody, payload) {
     const streamToolCallIDs = new Map();
     const streamToolNames = new Map();
     const decoder = new TextDecoder();
-    reader = completionRes.body.getReader();
     let buffered = '';
     let ended = false;
     const { sendFrame, sendDeltaFrame } = createChatCompletionEmitter({
@@ -144,14 +192,14 @@ async function handleVercelStream(req, res, rawBody, payload) {
       isClosed: () => clientClosed,
     });
 
-    const finish = async (reason) => {
+    const finish = async (reason, options = {}) => {
       if (ended) {
-        return;
+        return true;
       }
-      ended = true;
       if (clientClosed || res.writableEnded || res.destroyed) {
+        ended = true;
         await releaseLease();
-        return;
+        return true;
       }
       const detected = parseStandaloneToolCalls(outputText, toolNames);
       if (detected.length > 0 && !toolCallsDoneEmitted) {
@@ -177,21 +225,26 @@ async function handleVercelStream(req, res, rawBody, payload) {
         reason = 'tool_calls';
       }
       if (detected.length === 0 && !toolCallsEmitted && outputText.trim() === '') {
+        if (options.deferEmpty && reason !== 'content_filter') {
+          return false;
+        }
+        ended = true;
         const detail = upstreamEmptyOutputDetail(reason === 'content_filter', outputText, thinkingText);
         sendFailedChunk(res, detail.status, detail.message, detail.code);
         await releaseLease();
         if (!res.writableEnded && !res.destroyed) {
           res.end();
         }
-        return;
+        return true;
       }
+      ended = true;
       sendFrame({
         id: sessionID,
         object: 'chat.completion.chunk',
         created,
         model,
         choices: [{ delta: {}, index: 0, finish_reason: reason }],
-        usage: buildUsage(finalPrompt, thinkingText, outputText),
+        usage: buildUsage(usagePrompt, thinkingText, outputText),
       });
       if (!res.writableEnded && !res.destroyed) {
         res.write('data: [DONE]\n\n');
@@ -200,122 +253,194 @@ async function handleVercelStream(req, res, rawBody, payload) {
       if (!res.writableEnded && !res.destroyed) {
         res.end();
       }
+      return true;
     };
 
-    try {
+    const processStream = async (initialResponse, allowDeferEmpty) => {
+      let currentResponse = initialResponse;
+      let continueState = createContinueState(sessionID);
+      let continueRounds = 0;
       // eslint-disable-next-line no-constant-condition
       while (true) {
-        if (clientClosed) {
-          await finish('stop');
-          return;
-        }
-        const { value, done } = await reader.read();
-        if (done) {
-          break;
-        }
-        buffered += decoder.decode(value, { stream: true });
-        const lines = buffered.split('\n');
-        buffered = lines.pop() || '';
-
-        for (const rawLine of lines) {
-          const line = rawLine.trim();
-          if (!line.startsWith('data:')) {
-            continue;
-          }
-          const dataStr = line.slice(5).trim();
-          if (!dataStr) {
-            continue;
-          }
-          if (dataStr === '[DONE]') {
-            await finish('stop');
-            return;
-          }
-          let chunk;
-          try {
-            chunk = JSON.parse(dataStr);
-          } catch (_err) {
-            continue;
-          }
-          const parsed = parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenceMarkers);
-          if (!parsed.parsed) {
-            continue;
-          }
-          currentType = parsed.newType;
-          if (parsed.errorMessage) {
-            await finish('content_filter');
-            return;
-          }
-          if (parsed.contentFilter) {
-            await finish(outputText.trim() === '' ? 'content_filter' : 'stop');
-            return;
-          }
-          if (parsed.finished) {
-            await finish('stop');
-            return;
-          }
-
-          for (const p of parsed.parts) {
-            if (!p.text) {
-              continue;
+        reader = currentResponse.body.getReader();
+        buffered = '';
+        let streamEnded = false;
+        try {
+          // eslint-disable-next-line no-constant-condition
+          while (true) {
+            if (clientClosed) {
+              await finish('stop');
+              return { terminal: true, retryable: false };
             }
-            if (p.type === 'thinking') {
-              if (thinkingEnabled) {
-                const trimmed = trimContinuationOverlap(thinkingText, p.text);
-                if (!trimmed) {
+            const { value, done } = await reader.read();
+            if (done) {
+              break;
+            }
+            buffered += decoder.decode(value, { stream: true });
+            const lines = buffered.split('\n');
+            buffered = lines.pop() || '';
+
+            for (const rawLine of lines) {
+              const line = rawLine.trim();
+              if (!line.startsWith('data:')) {
+                continue;
+              }
+              const dataStr = line.slice(5).trim();
+              if (!dataStr) {
+                continue;
+              }
+              if (dataStr === '[DONE]') {
+                streamEnded = true;
+                break;
+              }
+              let chunk;
+              try {
+                chunk = JSON.parse(dataStr);
+              } catch (_err) {
+                continue;
+              }
+              observeContinueState(continueState, chunk);
+              const parsed = parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenceMarkers);
+              if (!parsed.parsed) {
+                continue;
+              }
+              currentType = parsed.newType;
+              if (parsed.errorMessage) {
+                return { terminal: await finish('content_filter'), retryable: false };
+              }
+              if (parsed.contentFilter) {
+                return { terminal: await finish(outputText.trim() === '' ? 'content_filter' : 'stop'), retryable: false };
+              }
+              if (parsed.finished) {
+                streamEnded = true;
+                break;
+              }
+
+              for (const p of parsed.parts) {
+                if (!p.text) {
                   continue;
                 }
-                thinkingText += trimmed;
-                sendDeltaFrame({ reasoning_content: trimmed });
-              }
-            } else {
-              const trimmed = trimContinuationOverlap(outputText, p.text);
-              if (!trimmed) {
-                continue;
-              }
-              if (searchEnabled && isCitation(trimmed)) {
-                continue;
-              }
-              outputText += trimmed;
-              if (!toolSieveEnabled) {
-                sendDeltaFrame({ content: trimmed });
-                continue;
-              }
-              const events = processToolSieveChunk(toolSieveState, trimmed, toolNames);
-              for (const evt of events) {
-                if (evt.type === 'tool_call_deltas') {
-                  if (!emitEarlyToolDeltas) {
+                if (p.type === 'thinking') {
+                  if (thinkingEnabled) {
+                    const trimmed = trimContinuationOverlap(thinkingText, p.text);
+                    if (!trimmed) {
+                      continue;
+                    }
+                    thinkingText += trimmed;
+                    sendDeltaFrame({ reasoning_content: trimmed });
+                  }
+                } else {
+                  const trimmed = trimContinuationOverlap(outputText, p.text);
+                  if (!trimmed) {
                     continue;
                   }
-                  const filtered = filterIncrementalToolCallDeltasByAllowed(evt.deltas, toolNames, streamToolNames);
-                  const formatted = formatIncrementalToolCallDeltas(filtered, streamToolCallIDs);
-                  if (formatted.length > 0) {
-                    toolCallsEmitted = true;
-                    sendDeltaFrame({ tool_calls: formatted });
+                  if (searchEnabled && isCitation(trimmed)) {
+                    continue;
+                  }
+                  outputText += trimmed;
+                  if (!toolSieveEnabled) {
+                    sendDeltaFrame({ content: trimmed });
+                    continue;
+                  }
+                  const events = processToolSieveChunk(toolSieveState, trimmed, toolNames);
+                  for (const evt of events) {
+                    if (evt.type === 'tool_call_deltas') {
+                      if (!emitEarlyToolDeltas) {
+                        continue;
+                      }
+                      const filtered = filterIncrementalToolCallDeltasByAllowed(evt.deltas, toolNames, streamToolNames);
+                      const formatted = formatIncrementalToolCallDeltas(filtered, streamToolCallIDs);
+                      if (formatted.length > 0) {
+                        toolCallsEmitted = true;
+                        sendDeltaFrame({ tool_calls: formatted });
+                      }
+                      continue;
+                    }
+                    if (evt.type === 'tool_calls') {
+                      toolCallsEmitted = true;
+                      toolCallsDoneEmitted = true;
+                      sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(evt.calls, streamToolCallIDs) });
+                      resetStreamToolCallState(streamToolCallIDs, streamToolNames);
+                      continue;
+                    }
+                    if (evt.text) {
+                      sendDeltaFrame({ content: evt.text });
+                    }
                   }
-                  continue;
-                }
-                if (evt.type === 'tool_calls') {
-                  toolCallsEmitted = true;
-                  toolCallsDoneEmitted = true;
-                  sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(evt.calls, streamToolCallIDs) });
-                  resetStreamToolCallState(streamToolCallIDs, streamToolNames);
-                  continue;
-                }
-                if (evt.text) {
-                  sendDeltaFrame({ content: evt.text });
                 }
               }
+              if (streamEnded) {
+                break;
+              }
+            }
+            if (streamEnded) {
+              break;
             }
           }
+        } catch (err) {
+          if (clientClosed || isAbortError(err)) {
+            await finish('stop');
+            return { terminal: true, retryable: false };
+          }
+          await finish('stop');
+          return { terminal: true, retryable: false };
         }
+
+        if (shouldAutoContinue(continueState) && continueRounds < AUTO_CONTINUE_MAX_ROUNDS) {
+          continueRounds += 1;
+          const nextRes = await fetchContinue(continueState.responseMessageID);
+          if (nextRes === null) {
+            return { terminal: true, retryable: false };
+          }
+          if (!nextRes.ok || !nextRes.body) {
+            return { terminal: await finish('stop'), retryable: false };
+          }
+          continueState = prepareContinueStateForNextRound(continueState);
+          currentResponse = nextRes;
+          continue;
+        }
+        break;
       }
-      await finish('stop');
-    } catch (err) {
-      if (clientClosed || isAbortError(err)) {
+
+      const terminal = await finish('stop', { deferEmpty: allowDeferEmpty });
+      return { terminal, retryable: !terminal && allowDeferEmpty, responseMessageID: continueState.responseMessageID };
+    };
+
+    let retryAttempts = 0;
+    // eslint-disable-next-line no-constant-condition
+    while (true) {
+      const processed = await processStream(completionRes, retryAttempts < EMPTY_OUTPUT_RETRY_MAX_ATTEMPTS);
+      if (processed.terminal) {
+        return;
+      }
+      if (!processed.retryable || retryAttempts >= EMPTY_OUTPUT_RETRY_MAX_ATTEMPTS) {
+        await finish('stop');
+        return;
+      }
+      retryAttempts += 1;
+      console.info('[openai_empty_retry] attempting synthetic retry', {
+        surface: 'chat.completions',
+        stream: true,
+        retry_attempt: retryAttempts,
+        parent_message_id: processed.responseMessageID || 0,
+      });
+      usagePrompt = usagePromptWithEmptyOutputRetry(finalPrompt, retryAttempts);
+      const retryPowHeader = await refreshPowHeader('retry');
+      if (!retryPowHeader) {
+        return;
+      }
+      completionRes = await fetchDeepSeekStream(
+        DEEPSEEK_COMPLETION_URL,
+        clonePayloadForEmptyOutputRetry(completionPayload, processed.responseMessageID),
+        retryPowHeader,
+      );
+      if (completionRes === null) {
+        return;
+      }
+      if (!completionRes.ok || !completionRes.body) {
         await finish('stop');
         return;
       }
-      await finish('stop');
     }
   } finally {
     req.removeListener('aborted', onReqAborted);
@@ -328,6 +453,113 @@ function toBool(v) {
   return v === true;
 }
 
+function clonePayloadForEmptyOutputRetry(payload, parentMessageID) {
+  const clone = {
+    ...(payload || {}),
+    prompt: appendEmptyOutputRetrySuffix(asString(payload && payload.prompt)),
+  };
+  if (parentMessageID && parentMessageID > 0) {
+    clone.parent_message_id = parentMessageID;
+  }
+  return clone;
+}
+
+function appendEmptyOutputRetrySuffix(prompt) {
+  const base = asString(prompt).trimEnd();
+  if (!base) {
+    return EMPTY_OUTPUT_RETRY_SUFFIX;
+  }
+  return `${base}\n\n${EMPTY_OUTPUT_RETRY_SUFFIX}`;
+}
+
+function usagePromptWithEmptyOutputRetry(originalPrompt, attempts) {
+  if (!attempts || attempts <= 0) {
+    return originalPrompt;
+  }
+  const parts = [originalPrompt];
+  let next = originalPrompt;
+  for (let i = 0; i < attempts; i += 1) {
+    next = appendEmptyOutputRetrySuffix(next);
+    parts.push(next);
+  }
+  return parts.join('\n');
+}
+
+function createContinueState(sessionID) {
+  return {
+    sessionID: asString(sessionID),
+    responseMessageID: 0,
+    lastStatus: '',
+    finished: false,
+  };
+}
+
+function prepareContinueStateForNextRound(state) {
+  return {
+    ...state,
+    lastStatus: '',
+    finished: false,
+  };
+}
+
+function observeContinueState(state, chunk) {
+  if (!state || !chunk || typeof chunk !== 'object') {
+    return;
+  }
+  const topID = numberValue(chunk.response_message_id);
+  if (topID > 0) {
+    state.responseMessageID = topID;
+  }
+  if (chunk.p === 'response/status') {
+    setContinueStatus(state, asString(chunk.v));
+  }
+  const response = chunk.v && typeof chunk.v === 'object' ? chunk.v.response : null;
+  if (response && typeof response === 'object') {
+    const id = numberValue(response.message_id);
+    if (id > 0) {
+      state.responseMessageID = id;
+    }
+    setContinueStatus(state, asString(response.status));
+    if (response.auto_continue === true) {
+      state.lastStatus = 'AUTO_CONTINUE';
+    }
+  }
+  const messageResponse = chunk.message && typeof chunk.message === 'object' && chunk.message.response;
+  if (messageResponse && typeof messageResponse === 'object') {
+    const id = numberValue(messageResponse.message_id);
+    if (id > 0) {
+      state.responseMessageID = id;
+    }
+    setContinueStatus(state, asString(messageResponse.status));
+  }
+}
+
+function setContinueStatus(state, status) {
+  const normalized = asString(status).trim();
+  if (!normalized) {
+    return;
+  }
+  state.lastStatus = normalized;
+  if (normalized.toUpperCase() === 'FINISHED') {
+    state.finished = true;
+  }
+}
+
+function shouldAutoContinue(state) {
+  if (!state || state.finished || !state.sessionID || state.responseMessageID <= 0) {
+    return false;
+  }
+  return ['WIP', 'INCOMPLETE', 'AUTO_CONTINUE'].includes(asString(state.lastStatus).trim().toUpperCase());
+}
+
+function numberValue(v) {
+  if (typeof v === 'number' && Number.isFinite(v)) {
+    return Math.trunc(v);
+  }
+  const parsed = Number.parseInt(asString(v), 10);
+  return Number.isFinite(parsed) ? parsed : 0;
+}
+
 function upstreamEmptyOutputDetail(contentFilter, _text, thinking) {
   if (contentFilter) {
     return {
diff --git a/internal/js/helpers/stream-tool-sieve/parse.js b/internal/js/helpers/stream-tool-sieve/parse.js
index 0e7d552..82f8f94 100644
--- a/internal/js/helpers/stream-tool-sieve/parse.js
+++ b/internal/js/helpers/stream-tool-sieve/parse.js
@@ -6,10 +6,10 @@ const {
 const {
   parseMarkupToolCalls,
   stripFencedCodeBlocks,
+  containsToolCallWrapperSyntaxOutsideIgnored,
+  sanitizeLooseCDATA,
 } = require('./parse_payload');
 
-const TOOL_MARKUP_PREFIXES = ['<tool_calls'];
-
 function extractToolNames(tools) {
   if (!Array.isArray(tools) || tools.length === 0) {
     return [];
@@ -46,7 +46,13 @@ function parseToolCallsDetailed(text, toolNames) {
     return result;
   }
   // XML markup parsing only.
-  const parsed = parseMarkupToolCalls(normalized);
+  let parsed = parseMarkupToolCalls(normalized);
+  if (parsed.length === 0 && normalized.toLowerCase().includes('<![cdata[')) {
+    const recovered = sanitizeLooseCDATA(normalized);
+    if (recovered !== normalized) {
+      parsed = parseMarkupToolCalls(recovered);
+    }
+  }
   if (parsed.length === 0) {
     return result;
   }
@@ -73,7 +79,13 @@ function parseStandaloneToolCallsDetailed(text, toolNames) {
     return result;
   }
   // XML markup parsing only.
-  const parsed = parseMarkupToolCalls(trimmed);
+  let parsed = parseMarkupToolCalls(trimmed);
+  if (parsed.length === 0 && trimmed.toLowerCase().includes('<![cdata[')) {
+    const recovered = sanitizeLooseCDATA(trimmed);
+    if (recovered !== trimmed) {
+      parsed = parseMarkupToolCalls(recovered);
+    }
+  }
   if (parsed.length === 0) {
     return result;
   }
@@ -110,8 +122,8 @@ function filterToolCallsDetailed(parsed, toolNames) {
 }
 
 function looksLikeToolCallSyntax(text) {
-  const lower = toStringSafe(text).toLowerCase();
-  return TOOL_MARKUP_PREFIXES.some((prefix) => lower.includes(prefix));
+  const styles = containsToolCallWrapperSyntaxOutsideIgnored(text);
+  return styles.dsml || styles.canonical;
 }
 
 function shouldSkipToolCallParsingForCodeFenceExample(text) {
diff --git a/internal/js/helpers/stream-tool-sieve/parse_payload.js b/internal/js/helpers/stream-tool-sieve/parse_payload.js
index 05334d3..185ed4d 100644
--- a/internal/js/helpers/stream-tool-sieve/parse_payload.js
+++ b/internal/js/helpers/stream-tool-sieve/parse_payload.js
@@ -3,6 +3,7 @@
 const TOOL_CALL_MARKUP_KV_PATTERN = /<(?:[a-z0-9_:-]+:)?([a-z0-9_.-]+)\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi;
 const CDATA_PATTERN = /^<!\[CDATA\[([\s\S]*?)]]>$/i;
 const XML_ATTR_PATTERN = /\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')/gi;
+const TOOL_MARKUP_NAMES = ['tool_calls', 'invoke', 'parameter'];
 
 const {
   toStringSafe,
@@ -13,11 +14,110 @@ function stripFencedCodeBlocks(text) {
   if (!t) {
     return '';
   }
-  return t.replace(/```[\s\S]*?```/g, ' ');
+  const lines = t.split('\n');
+  const out = [];
+  let inFence = false;
+  let fenceChar = '';
+  let fenceLen = 0;
+  let inCDATA = false;
+  let beforeFenceIdx = 0;
+
+  for (let li = 0; li < lines.length; li += 1) {
+    const line = lines[li];
+    const lineWithNL = li < lines.length - 1 ? line + '\n' : line;
+
+    // CDATA protection
+    if (inCDATA || cdataStartsBeforeFence(line)) {
+      out.push(lineWithNL);
+      inCDATA = updateCDATAStateLine(inCDATA, line);
+      continue;
+    }
+
+    const trimmed = line.replace(/^[ \t]+/, '');
+    if (!inFence) {
+      const fence = parseFenceOpenLine(trimmed);
+      if (fence) {
+        inFence = true;
+        fenceChar = fence.ch;
+        fenceLen = fence.count;
+        beforeFenceIdx = out.length;
+        continue;
+      }
+      out.push(lineWithNL);
+      continue;
+    }
+
+    if (isFenceCloseLine(trimmed, fenceChar, fenceLen)) {
+      inFence = false;
+      fenceChar = '';
+      fenceLen = 0;
+    }
+  }
+
+  if (inFence) {
+    // Unclosed fence: keep content before the fence started.
+    if (beforeFenceIdx > 0) {
+      return out.slice(0, beforeFenceIdx).join('');
+    }
+    return '';
+  }
+  return out.join('');
+}
+
+function parseFenceOpenLine(trimmed) {
+  if (trimmed.length < 3) return null;
+  const ch = trimmed[0];
+  if (ch !== '`' && ch !== '~') return null;
+  let count = 0;
+  while (count < trimmed.length && trimmed[count] === ch) count++;
+  if (count < 3) return null;
+  return { ch, count };
+}
+
+function isFenceCloseLine(trimmed, fenceChar, fenceLen) {
+  if (!fenceChar || !trimmed || trimmed[0] !== fenceChar) return false;
+  let count = 0;
+  while (count < trimmed.length && trimmed[count] === fenceChar) count++;
+  if (count < fenceLen) return false;
+  return trimmed.slice(count).trim() === '';
+}
+
+function cdataStartsBeforeFence(line) {
+  const cdataIdx = line.toLowerCase().indexOf('<![cdata[');
+  if (cdataIdx < 0) return false;
+  const fenceIdx = Math.min(
+    line.indexOf('```') >= 0 ? line.indexOf('```') : Infinity,
+    line.indexOf('~~~') >= 0 ? line.indexOf('~~~') : Infinity,
+  );
+  return fenceIdx === Infinity || cdataIdx < fenceIdx;
+}
+
+function updateCDATAStateLine(inCDATA, line) {
+  const lower = line.toLowerCase();
+  let pos = 0;
+  let state = inCDATA;
+  while (pos < lower.length) {
+    if (state) {
+      const end = lower.indexOf(']]>', pos);
+      if (end < 0) return true;
+      pos = end + ']]>'.length;
+      state = false;
+      continue;
+    }
+    const start = lower.indexOf('<![cdata[', pos);
+    if (start < 0) return false;
+    pos = start + '<![cdata['.length;
+    state = true;
+  }
+  return state;
 }
 
 function parseMarkupToolCalls(text) {
-  const raw = toStringSafe(text).trim();
+  const normalized = normalizeDSMLToolCallMarkup(toStringSafe(text));
+  if (!normalized.ok) {
+    return [];
+  }
+  const raw = normalized.text.trim();
   if (!raw) {
     return [];
   }
@@ -34,6 +134,133 @@ function parseMarkupToolCalls(text) {
   return out;
 }
 
+function normalizeDSMLToolCallMarkup(text) {
+  const raw = toStringSafe(text);
+  if (!raw) {
+    return { text: '', ok: true };
+  }
+  const styles = containsToolMarkupSyntaxOutsideIgnored(raw);
+  if (!styles.dsml) {
+    return { text: raw, ok: true };
+  }
+  return {
+    text: replaceDSMLToolMarkupOutsideIgnored(raw),
+    ok: true,
+  };
+}
+
+function containsDSMLToolMarkup(text) {
+  return containsToolMarkupSyntaxOutsideIgnored(text).dsml;
+}
+
+function containsCanonicalToolMarkup(text) {
+  return containsToolMarkupSyntaxOutsideIgnored(text).canonical;
+}
+
+function containsToolCallWrapperSyntaxOutsideIgnored(text) {
+  const raw = toStringSafe(text);
+  const styles = { dsml: false, canonical: false };
+  if (!raw) {
+    return styles;
+  }
+  const lower = raw.toLowerCase();
+  for (let i = 0; i < raw.length;) {
+    const skipped = skipXmlIgnoredSection(lower, i);
+    if (skipped.blocked) {
+      return styles;
+    }
+    if (skipped.advanced) {
+      i = skipped.next;
+      continue;
+    }
+    const tag = scanToolMarkupTagAt(raw, i);
+    if (tag) {
+      if (tag.name !== 'tool_calls') {
+        i = tag.end + 1;
+        continue;
+      }
+      if (tag.dsmlLike) {
+        styles.dsml = true;
+      } else {
+        styles.canonical = true;
+      }
+      if (styles.dsml && styles.canonical) {
+        return styles;
+      }
+      i = tag.end + 1;
+      continue;
+    }
+    i += 1;
+  }
+  return styles;
+}
+function containsToolMarkupSyntaxOutsideIgnored(text) {
+  const raw = toStringSafe(text);
+  const styles = { dsml: false, canonical: false };
+  if (!raw) {
+    return styles;
+  }
+  for (let i = 0; i < raw.length;) {
+    const skipped = skipXmlIgnoredSection(raw.toLowerCase(), i);
+    if (skipped.blocked) {
+      return styles;
+    }
+    if (skipped.advanced) {
+      i = skipped.next;
+      continue;
+    }
+    const tag = scanToolMarkupTagAt(raw, i);
+    if (tag) {
+      if (tag.dsmlLike) {
+        styles.dsml = true;
+      } else {
+        styles.canonical = true;
+      }
+      if (styles.dsml && styles.canonical) {
+        return styles;
+      }
+      i = tag.end + 1;
+      continue;
+    }
+    i += 1;
+  }
+  return styles;
+}
+
+function replaceDSMLToolMarkupOutsideIgnored(text) {
+  const raw = toStringSafe(text);
+  if (!raw) {
+    return '';
+  }
+  const lower = raw.toLowerCase();
+  let out = '';
+  for (let i = 0; i < raw.length;) {
+    const skipped = skipXmlIgnoredSection(lower, i);
+    if (skipped.blocked) {
+      out += raw.slice(i);
+      break;
+    }
+    if (skipped.advanced) {
+      out += raw.slice(i, skipped.next);
+      i = skipped.next;
+      continue;
+    }
+    const tag = scanToolMarkupTagAt(raw, i);
+    if (tag) {
+      if (tag.dsmlLike) {
+        out += `<${tag.closing ? '/' : ''}${tag.name}${raw.slice(tag.nameEnd, tag.end + 1)}`;
+      } else {
+        out += raw.slice(tag.start, tag.end + 1);
+      }
+      i = tag.end + 1;
+      continue;
+    }
+    out += raw[i];
+    i += 1;
+  }
+  return out;
+}
+
 function parseMarkupSingleToolCall(block) {
   const attrs = parseTagAttributes(block.attrs);
   const name = toStringSafe(attrs.name).trim();
@@ -89,7 +316,8 @@ function findXmlElementBlocks(text, tag) {
     }
     const end = findMatchingXmlEndTagOutsideCDATA(source, name, start.bodyStart);
     if (!end) {
-      break;
+      pos = start.bodyStart;
+      continue;
     }
     out.push({
       attrs: start.attrs,
@@ -190,6 +418,150 @@ function skipXmlIgnoredSection(lower, i) {
   return { advanced: false, blocked: false, next: i };
 }
 
+function scanToolMarkupTagAt(text, start) {
+  const raw = toStringSafe(text);
+  if (!raw || start < 0 || start >= raw.length || raw[start] !== '<') {
+    return null;
+  }
+  const lower = raw.toLowerCase();
+  let i = start + 1;
+  const closing = raw[i] === '/';
+  if (closing) {
+    i += 1;
+  }
+  let dsmlLike = false;
+  if (i < raw.length && isToolMarkupPipe(raw[i])) {
+    dsmlLike = true;
+    i += 1;
+  }
+  if (lower.startsWith('dsml', i)) {
+    dsmlLike = true;
+    i += 'dsml'.length;
+    while (i < raw.length && isToolMarkupSeparator(raw[i])) {
+      i += 1;
+    }
+  }
+  const { name, len } = matchToolMarkupName(lower, i);
+  if (!name) {
+    return null;
+  }
+  const nameEnd = i + len;
+  if (!hasXmlTagBoundary(raw, nameEnd)) {
+    return null;
+  }
+  const end = findXmlTagEnd(raw, nameEnd);
+  if (end < 0) {
+    return null;
+  }
+  return {
+    start,
+    end,
+    nameStart: i,
+    nameEnd,
+    name,
+    closing,
+    selfClosing: raw.slice(start, end + 1).trim().endsWith('/>'),
+    dsmlLike,
+    canonical: !dsmlLike,
+  };
+}
+
+function findToolMarkupTagOutsideIgnored(text, from) {
+  const raw = toStringSafe(text);
+  const lower = raw.toLowerCase();
+  for (let i = Math.max(0, from || 0); i < raw.length;) {
+    const skipped = skipXmlIgnoredSection(lower, i);
+    if (skipped.blocked) {
+      return null;
+    }
+    if (skipped.advanced) {
+      i = skipped.next;
+      continue;
+    }
+    const tag = scanToolMarkupTagAt(raw, i);
+    if (tag) {
+      return tag;
+    }
+    i += 1;
+  }
+  return null;
+}
+
+function findMatchingToolMarkupClose(text, openTag) {
+  const raw = toStringSafe(text);
+  if (!raw || !openTag || !openTag.name || openTag.closing) {
+    return null;
+  }
+  let depth = 1;
+  for (let pos = openTag.end + 1; pos < raw.length;) {
+    const tag = findToolMarkupTagOutsideIgnored(raw, pos);
+    if (!tag) {
+      return null;
+    }
+    if (tag.name !== openTag.name) {
+      pos = tag.end + 1;
+      continue;
+    }
+    if (tag.closing) {
+      depth -= 1;
+      if (depth === 0) {
+        return tag;
+      }
+    } else if (!tag.selfClosing) {
+      depth += 1;
+    }
+    pos = tag.end + 1;
+  }
+  return null;
+}
+
+function findPartialToolMarkupStart(text) {
+  const raw = toStringSafe(text);
+  const lastLT = raw.lastIndexOf('<');
+  if (lastLT < 0) {
+    return -1;
+  }
+  const tail = raw.slice(lastLT);
+  if (tail.includes('>')) {
+    return -1;
+  }
+  const lowerTail = tail.toLowerCase();
+  const candidates = [
+    '<tool_calls', '<invoke', '<parameter',
+    '<|tool_calls', '<|invoke', '<|parameter',
+    '<｜tool_calls', '<｜invoke', '<｜parameter',
+    '<|dsml|tool_calls', '<|dsml|invoke', '<|dsml|parameter',
+    '<dsmltool_calls', '<dsmlinvoke', '<dsmlparameter',
+    '<dsml tool_calls', '<dsml invoke', '<dsml parameter',
+    '<dsml|tool_calls', '<dsml|invoke', '<dsml|parameter',
+    '<|dsmltool_calls', '<|dsmlinvoke', '<|dsmlparameter',
+    '<|dsml tool_calls', '<|dsml invoke', '<|dsml parameter',
+  ];
+  for (const candidate of candidates) {
+    if (candidate.startsWith(lowerTail)) {
+      return lastLT;
+    }
+  }
+  return -1;
+}
+
+function isToolMarkupPipe(ch) {
+  return ch === '|' || ch === '｜';
+}
+
+function isToolMarkupSeparator(ch) {
+  return ch === ' ' || ch === '\t' || ch === '\r' || ch === '\n' || isToolMarkupPipe(ch);
+}
+
+function matchToolMarkupName(lower, start) {
+  for (const name of TOOL_MARKUP_NAMES) {
+    if (lower.startsWith(name, start)) {
+      return { name, len: name.length };
+    }
+  }
+  return { name: '', len: 0 };
+}
+
 function findXmlTagEnd(text, from) {
   let quote = '';
   for (let i = Math.max(0, from || 0); i < text.length; i += 1) {
@@ -267,7 +639,8 @@ function parseMarkupKVObject(text) {
 function parseMarkupValue(raw) {
   const cdata = extractStandaloneCDATA(raw);
   if (cdata.ok) {
-    return cdata.value;
+    const literal = parseJSONLiteralValue(cdata.value);
+    return literal.ok ? literal.value : cdata.value;
   }
   const s = toStringSafe(extractRawTagValue(raw)).trim();
   if (!s) {
@@ -284,12 +657,9 @@ function parseMarkupValue(raw) {
     }
   }
 
-  if (s.startsWith('{') || s.startsWith('[')) {
-    try {
-      return JSON.parse(s);
-    } catch (_err) {
-      return s;
-    }
+  const literal = parseJSONLiteralValue(s);
+  if (literal.ok) {
+    return literal.value;
   }
   return s;
 }
@@ -327,9 +697,65 @@ function extractStandaloneCDATA(inner) {
   if (cdataMatch && cdataMatch[1] !== undefined) {
     return { ok: true, value: cdataMatch[1] };
   }
+  if (s.toLowerCase().startsWith('<![cdata[')) {
+    return { ok: true, value: s.slice('<![CDATA['.length) };
+  }
   return { ok: false, value: '' };
 }
 
+function parseJSONLiteralValue(raw) {
+  const s = toStringSafe(raw).trim();
+  if (!s) {
+    return { ok: false, value: null };
+  }
+  if (!['{', '[', '"', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 't', 'f', 'n'].includes(s[0])) {
+    return { ok: false, value: null };
+  }
+  try {
+    return { ok: true, value: JSON.parse(s) };
+  } catch (_err) {
+    return { ok: false, value: null };
+  }
+}
+
+function sanitizeLooseCDATA(text) {
+  const raw = toStringSafe(text);
+  if (!raw) {
+    return '';
+  }
+  const lower = raw.toLowerCase();
+  const openMarker = '<![cdata[';
+  const closeMarker = ']]>';
+
+  let out = '';
+  let pos = 0;
+  let changed = false;
+  while (pos < raw.length) {
+    const startRel = lower.indexOf(openMarker, pos);
+    if (startRel < 0) {
+      out += raw.slice(pos);
+      break;
+    }
+    const start = startRel;
+    const contentStart = start + openMarker.length;
+    out += raw.slice(pos, start);
+
+    const endRel = lower.indexOf(closeMarker, contentStart);
+    if (endRel >= 0) {
+      const end = endRel + closeMarker.length;
+      out += raw.slice(start, end);
+      pos = end;
+      continue;
+    }
+
+    changed = true;
+    out += raw.slice(contentStart);
+    pos = raw.length;
+  }
+
+  return changed ? out : raw;
+}
+
 function parseTagAttributes(raw) {
   const source = toStringSafe(raw);
   const out = {};
@@ -403,4 +829,11 @@ function isOnlyRawValue(obj) {
 module.exports = {
   stripFencedCodeBlocks,
   parseMarkupToolCalls,
+  normalizeDSMLToolCallMarkup,
+  containsToolMarkupSyntaxOutsideIgnored,
+  containsToolCallWrapperSyntaxOutsideIgnored,
+  findToolMarkupTagOutsideIgnored,
+  findMatchingToolMarkupClose,
+  findPartialToolMarkupStart,
+  sanitizeLooseCDATA,
 };
diff --git a/internal/js/helpers/stream-tool-sieve/sieve-xml.js b/internal/js/helpers/stream-tool-sieve/sieve-xml.js
index 90ea280..463e4db 100644
--- a/internal/js/helpers/stream-tool-sieve/sieve-xml.js
+++ b/internal/js/helpers/stream-tool-sieve/sieve-xml.js
@@ -1,115 +1,121 @@
 'use strict';
 const { parseToolCalls } = require('./parse');
-
-// XML wrapper tag pair used by the streaming sieve.
-const XML_TOOL_TAG_PAIRS = [
-  { open: '<tool_calls', close: '</tool_calls>' },
-];
-
-const XML_TOOL_OPENING_TAGS = XML_TOOL_TAG_PAIRS.map(p => p.open);
+const {
+  findToolMarkupTagOutsideIgnored,
+  findMatchingToolMarkupClose,
+  findPartialToolMarkupStart,
+} = require('./parse_payload');
 
 function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
-  const lower = captured.toLowerCase();
-  // Find the FIRST matching open/close pair for the canonical wrapper.
-  for (const pair of XML_TOOL_TAG_PAIRS) {
-    const openIdx = lower.indexOf(pair.open);
-    if (openIdx < 0) {
+  let anyOpenFound = false;
+  let best = null;
+  let rejected = null;
+
+  // Scan every recognized wrapper occurrence. Prose can mention a wrapper tag
+  // before the actual tool block, including the same variant as the real block.
+  for (let searchFrom = 0; searchFrom < captured.length;) {
+    const openTag = findFirstToolTag(captured, searchFrom, 'tool_calls', false);
+    if (!openTag) {
+      break;
+    }
+    const closeTag = findMatchingToolMarkupClose(captured, openTag);
+    if (!closeTag) {
+      anyOpenFound = true;
+      searchFrom = openTag.end + 1;
       continue;
     }
-    // Ignore closing tags that appear inside CDATA payloads, such as
-    // write-file content containing tool-call documentation examples.
-    const closeIdx = findXMLCloseOutsideCDATA(captured, pair.close, openIdx + pair.open.length);
-    if (closeIdx < 0) {
-      // Opening tag present but specific closing tag hasn't arrived.
-      // Return not-ready so buffering continues until the wrapper closes.
-      return { ready: false, prefix: '', calls: [], suffix: '' };
-    }
-    const closeEnd = closeIdx + pair.close.length;
-    const xmlBlock = captured.slice(openIdx, closeEnd);
-    let prefixPart = captured.slice(0, openIdx);
-    let suffixPart = captured.slice(closeEnd);
+    const xmlBlock = captured.slice(openTag.start, closeTag.end + 1);
+    const prefixPart = captured.slice(0, openTag.start);
+    const suffixPart = captured.slice(closeTag.end + 1);
     const parsed = parseToolCalls(xmlBlock, toolNames);
     if (Array.isArray(parsed) && parsed.length > 0) {
       const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
-      return {
-        ready: true,
-        prefix: trimmedFence.prefix,
-        calls: parsed,
-        suffix: trimmedFence.suffix,
+      if (!best || openTag.start < best.start) {
+        best = {
+          start: openTag.start,
+          prefix: trimmedFence.prefix,
+          calls: parsed,
+          suffix: trimmedFence.suffix,
+        };
+      }
+      break;
+    }
+    if (!rejected || openTag.start < rejected.start) {
+      rejected = {
+        start: openTag.start,
+        prefix: prefixPart + xmlBlock,
+        suffix: suffixPart,
       };
     }
+    searchFrom = openTag.end + 1;
+  }
+  if (best) {
+    return { ready: true, prefix: best.prefix, calls: best.calls, suffix: best.suffix };
+  }
+  if (anyOpenFound) {
+    // At least one opening tag was found but none had a matching close tag.
+    return { ready: false, prefix: '', calls: [], suffix: '' };
+  }
+  if (rejected) {
     // If this block failed to become a tool call, pass it through as text.
-    return { ready: true, prefix: prefixPart + xmlBlock, calls: [], suffix: suffixPart };
+    return { ready: true, prefix: rejected.prefix, calls: [], suffix: rejected.suffix };
+  }
+  const invokeTag = findFirstToolTag(captured, 0, 'invoke', false);
+  if (invokeTag) {
+    const wrapperOpen = findFirstToolTag(captured, 0, 'tool_calls', false);
+    if (!wrapperOpen || wrapperOpen.start > invokeTag.start) {
+      const closeTag = findFirstToolTag(captured, invokeTag.start + 1, 'tool_calls', true);
+      if (closeTag && closeTag.start > invokeTag.start) {
+        const xmlBlock = '<tool_calls>' + captured.slice(invokeTag.start, closeTag.end + 1);
+        const prefixPart = captured.slice(0, invokeTag.start);
+        const suffixPart = captured.slice(closeTag.end + 1);
+        const parsed = parseToolCalls(xmlBlock, toolNames);
+        if (Array.isArray(parsed) && parsed.length > 0) {
+          const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
+          return {
+            ready: true,
+            prefix: trimmedFence.prefix,
+            calls: parsed,
+            suffix: trimmedFence.suffix,
+          };
+        }
+        return { ready: true, prefix: prefixPart + captured.slice(invokeTag.start, closeTag.end + 1), calls: [], suffix: suffixPart };
+      }
+    }
   }
   return { ready: false, prefix: '', calls: [], suffix: '' };
 }
 
 function hasOpenXMLToolTag(captured) {
-  const lower = captured.toLowerCase();
-  for (const pair of XML_TOOL_TAG_PAIRS) {
-    const openIdx = lower.indexOf(pair.open);
-    if (openIdx >= 0) {
-      if (findXMLCloseOutsideCDATA(captured, pair.close, openIdx + pair.open.length) < 0) {
-        return true;
-      }
+  for (let pos = 0; pos < captured.length;) {
+    const tag = findFirstToolTag(captured, pos, 'tool_calls', false);
+    if (!tag) {
+      return false;
     }
+    if (!findMatchingToolMarkupClose(captured, tag)) {
+      return true;
+    }
+    pos = tag.end + 1;
   }
   return false;
 }
 
-function findPartialXMLToolTagStart(s) {
-  const lastLT = s.lastIndexOf('<');
-  if (lastLT < 0) {
-    return -1;
-  }
-  const tail = s.slice(lastLT);
-  if (tail.includes('>')) {
-    return -1;
-  }
-  const lowerTail = tail.toLowerCase();
-  for (const tag of XML_TOOL_OPENING_TAGS) {
-    const tagWithLT = tag.startsWith('<') ? tag : '<' + tag;
-    if (tagWithLT.startsWith(lowerTail)) {
-      return lastLT;
+function findFirstToolTag(text, from, name, closing) {
+  for (let pos = Math.max(0, from || 0); pos < text.length;) {
+    const tag = findToolMarkupTagOutsideIgnored(text, pos);
+    if (!tag) {
+      return null;
     }
-  }
-  return -1;
-}
-
-function findXMLCloseOutsideCDATA(s, closeTag, start) {
-  const text = typeof s === 'string' ? s : '';
-  const target = String(closeTag || '').toLowerCase();
-  if (!text || !target) {
-    return -1;
-  }
-  const lower = text.toLowerCase();
-  for (let i = Math.max(0, start || 0); i < text.length;) {
-    if (lower.startsWith('<![cdata[', i)) {
-      const end = lower.indexOf(']]>', i + '<![cdata['.length);
-      if (end < 0) {
-        return -1;
-      }
-      i = end + ']]>'.length;
-      continue;
+    if (tag.name === name && tag.closing === closing) {
+      return tag;
     }
-    if (lower.startsWith('<!--', i)) {
-      const end = lower.indexOf('-->', i + '<!--'.length);
-      if (end < 0) {
-        return -1;
-      }
-      i = end + '-->'.length;
-      continue;
-    }
-    if (lower.startsWith(target, i)) {
-      return i;
-    }
-    i += 1;
+    pos = tag.end + 1;
   }
-  return -1;
+  return null;
 }
 
 module.exports = {
   consumeXMLToolCapture,
   hasOpenXMLToolTag,
-  findPartialXMLToolTagStart,
+  findPartialXMLToolTagStart: findPartialToolMarkupStart,
 };
diff --git a/internal/js/helpers/stream-tool-sieve/sieve.js b/internal/js/helpers/stream-tool-sieve/sieve.js
index 6ae85f7..a90a662 100644
--- a/internal/js/helpers/stream-tool-sieve/sieve.js
+++ b/internal/js/helpers/stream-tool-sieve/sieve.js
@@ -6,8 +6,9 @@ const {
 } = require('./state');
 const { trimWrappingJSONFence } = require('./jsonscan');
 const {
-  XML_TOOL_SEGMENT_TAGS,
-} = require('./tool-keywords');
+  findToolMarkupTagOutsideIgnored,
+  sanitizeLooseCDATA,
+} = require('./parse_payload');
 const {
   consumeXMLToolCapture: consumeXMLToolCaptureImpl,
   hasOpenXMLToolTag,
@@ -43,6 +44,10 @@ function processToolSieveChunk(state, chunk, toolNames) {
       resetIncrementalToolState(state);
 
       if (Array.isArray(consumed.calls) && consumed.calls.length > 0) {
+        if (consumed.prefix) {
+          noteText(state, consumed.prefix);
+          events.push({ type: 'text', text: consumed.prefix });
+        }
         state.pendingToolRaw = captured;
         state.pendingToolCalls = consumed.calls;
         if (consumed.suffix) {
@@ -113,8 +118,27 @@ function flushToolSieve(state, toolNames) {
       }
     } else if (state.capture) {
       const content = state.capture;
-      noteText(state, content);
-      events.push({ type: 'text', text: content });
+      const recovered = sanitizeLooseCDATA(content);
+      if (recovered !== content) {
+        const recoveredResult = consumeXMLToolCaptureImpl(recovered, toolNames, trimWrappingJSONFence);
+        if (recoveredResult.ready && Array.isArray(recoveredResult.calls) && recoveredResult.calls.length > 0) {
+          if (recoveredResult.prefix) {
+            noteText(state, recoveredResult.prefix);
+            events.push({ type: 'text', text: recoveredResult.prefix });
+          }
+          events.push({ type: 'tool_calls', calls: recoveredResult.calls });
+          if (recoveredResult.suffix) {
+            noteText(state, recoveredResult.suffix);
+            events.push({ type: 'text', text: recoveredResult.suffix });
+          }
+        } else {
+          noteText(state, content);
+          events.push({ type: 'text', text: content });
+        }
+      } else {
+        noteText(state, content);
+        events.push({ type: 'text', text: content });
+      }
     }
     state.capture = '';
     state.capturing = false;
@@ -151,26 +175,16 @@ function findToolSegmentStart(state, s) {
   if (!s) {
     return -1;
   }
-  const lower = s.toLowerCase();
   let offset = 0;
   while (true) {
-    // Only check XML tool tags.
-    let bestIdx = -1;
-    let matchedTag = '';
-    for (const tag of XML_TOOL_SEGMENT_TAGS) {
-      const idx = lower.indexOf(tag, offset);
-      if (idx >= 0 && (bestIdx < 0 || idx < bestIdx)) {
-        bestIdx = idx;
-        matchedTag = tag;
-      }
-    }
-    if (bestIdx < 0) {
+    const tag = findToolMarkupTagOutsideIgnored(s, offset);
+    if (!tag) {
       return -1;
     }
-    if (!insideCodeFenceWithState(state, s.slice(0, bestIdx))) {
-      return bestIdx;
+    if (!insideCodeFenceWithState(state, s.slice(0, tag.start))) {
+      return tag.start;
     }
-    offset = bestIdx + matchedTag.length;
+    offset = tag.end + 1;
   }
 }
 
diff --git a/internal/js/helpers/stream-tool-sieve/state.js b/internal/js/helpers/stream-tool-sieve/state.js
index 447ecdf..f9fb2b5 100644
--- a/internal/js/helpers/stream-tool-sieve/state.js
+++ b/internal/js/helpers/stream-tool-sieve/state.js
@@ -7,6 +7,7 @@ function createToolSieveState() {
     capturing: false,
     codeFenceStack: [],
     codeFencePendingTicks: 0,
+    codeFencePendingTildes: 0,
     codeFenceLineStart: true,
     pendingToolRaw: '',
     pendingToolCalls: [],
@@ -46,8 +47,7 @@ function insideCodeFence(text) {
   if (!t) {
     return false;
   }
-  const ticks = (t.match(/```/g) || []).length;
-  return ticks % 2 === 1;
+  return simulateCodeFenceState([], 0, 0, true, t).stack.length > 0;
 }
 
 function insideCodeFenceWithState(state, text) {
@@ -57,6 +57,7 @@ function insideCodeFenceWithState(state, text) {
   const simulated = simulateCodeFenceState(
     Array.isArray(state.codeFenceStack) ? state.codeFenceStack : [],
     Number.isInteger(state.codeFencePendingTicks) ? state.codeFencePendingTicks : 0,
+    Number.isInteger(state.codeFencePendingTildes) ? state.codeFencePendingTildes : 0,
     state.codeFenceLineStart !== false,
     text,
   );
@@ -70,37 +71,57 @@ function updateCodeFenceState(state, text) {
   const next = simulateCodeFenceState(
     Array.isArray(state.codeFenceStack) ? state.codeFenceStack : [],
     Number.isInteger(state.codeFencePendingTicks) ? state.codeFencePendingTicks : 0,
+    Number.isInteger(state.codeFencePendingTildes) ? state.codeFencePendingTildes : 0,
     state.codeFenceLineStart !== false,
     text,
   );
   state.codeFenceStack = next.stack;
   state.codeFencePendingTicks = next.pendingTicks;
+  state.codeFencePendingTildes = next.pendingTildes;
   state.codeFenceLineStart = next.lineStart;
 }
 
-function simulateCodeFenceState(stack, pendingTicks, lineStart, text) {
+function simulateCodeFenceState(stack, pendingTicks, pendingTildes, lineStart, text) {
   const chunk = typeof text === 'string' ? text : '';
   const nextStack = Array.isArray(stack) ? [...stack] : [];
   let ticks = Number.isInteger(pendingTicks) ? pendingTicks : 0;
+  let tildes = Number.isInteger(pendingTildes) ? pendingTildes : 0;
   let atLineStart = lineStart !== false;
 
-  const flushTicks = () => {
+  const flushPending = () => {
     if (ticks > 0) {
       if (atLineStart && ticks >= 3) {
-        applyFenceMarker(nextStack, ticks);
+        applyFenceMarker(nextStack, ticks); // positive = backtick
       }
       atLineStart = false;
       ticks = 0;
     }
+    if (tildes > 0) {
+      if (atLineStart && tildes >= 3) {
+        applyFenceMarker(nextStack, -tildes); // negative = tilde
+      }
+      atLineStart = false;
+      tildes = 0;
+    }
   };
 
   for (let i = 0; i < chunk.length; i += 1) {
     const ch = chunk[i];
     if (ch === '`') {
+      if (tildes > 0) {
+        flushPending();
+      }
       ticks += 1;
       continue;
     }
-    flushTicks();
+    if (ch === '~') {
+      if (ticks > 0) {
+        flushPending();
+      }
+      tildes += 1;
+      continue;
+    }
+    flushPending();
     if (ch === '\n' || ch === '\r') {
       atLineStart = true;
       continue;
@@ -110,29 +131,37 @@ function simulateCodeFenceState(stack, pendingTicks, lineStart, text) {
     }
     atLineStart = false;
   }
-  // keep ticks for cross-chunk continuation.
   return {
     stack: nextStack,
     pendingTicks: ticks,
+    pendingTildes: tildes,
     lineStart: atLineStart,
   };
 }
 
-function applyFenceMarker(stack, ticks) {
+// Positive values = backtick fences, negative = tilde fences.
+// Closing must match fence type.
+function applyFenceMarker(stack, marker) {
   if (!Array.isArray(stack)) {
     return;
   }
   if (stack.length === 0) {
-    stack.push(ticks);
+    stack.push(marker);
     return;
   }
   const top = stack[stack.length - 1];
-  if (ticks >= top) {
+  const sameType = (top > 0 && marker > 0) || (top < 0 && marker < 0);
+  if (!sameType) {
+    stack.push(marker);
+    return;
+  }
+  const absMarker = Math.abs(marker);
+  const absTop = Math.abs(top);
+  if (absMarker >= absTop) {
     stack.pop();
     return;
   }
-  // nested/open inner fence using longer marker for robustness.
-  stack.push(ticks);
+  stack.push(marker);
 }
 
 function hasMeaningfulText(text) {
diff --git a/internal/js/helpers/stream-tool-sieve/tool-keywords.js b/internal/js/helpers/stream-tool-sieve/tool-keywords.js
index 93efd5d..382e5a2 100644
--- a/internal/js/helpers/stream-tool-sieve/tool-keywords.js
+++ b/internal/js/helpers/stream-tool-sieve/tool-keywords.js
@@ -1,14 +1,47 @@
 'use strict';
 
 const XML_TOOL_SEGMENT_TAGS = [
+  '<|dsml|tool_calls>', '<|dsml|tool_calls\n', '<|dsml|tool_calls ',
+  '<|dsml|invoke ', '<|dsml|invoke\n', '<|dsml|invoke\t', '<|dsml|invoke\r',
+  '<|dsmltool_calls>', '<|dsmltool_calls\n', '<|dsmltool_calls ',
+  '<|dsmlinvoke ', '<|dsmlinvoke\n', '<|dsmlinvoke\t', '<|dsmlinvoke\r',
+  '<|dsml tool_calls>', '<|dsml tool_calls\n', '<|dsml tool_calls ',
+  '<|dsml invoke ', '<|dsml invoke\n', '<|dsml invoke\t', '<|dsml invoke\r',
+  '<dsml|tool_calls>', '<dsml|tool_calls\n', '<dsml|tool_calls ',
+  '<dsml|invoke ', '<dsml|invoke\n', '<dsml|invoke\t', '<dsml|invoke\r',
+  '<dsmltool_calls>', '<dsmltool_calls\n', '<dsmltool_calls ',
+  '<dsmlinvoke ', '<dsmlinvoke\n', '<dsmlinvoke\t', '<dsmlinvoke\r',
+  '<dsml tool_calls>', '<dsml tool_calls\n', '<dsml tool_calls ',
+  '<dsml invoke ', '<dsml invoke\n', '<dsml invoke\t', '<dsml invoke\r',
+  '<｜tool_calls>', '<｜tool_calls\n', '<｜tool_calls ',
+  '<｜invoke ', '<｜invoke\n', '<｜invoke\t', '<｜invoke\r',
+  '<|tool_calls>', '<|tool_calls\n', '<|tool_calls ',
+  '<|invoke ', '<|invoke\n', '<|invoke\t', '<|invoke\r',
   '<tool_calls>', '<tool_calls\n', '<tool_calls ',
+  '<invoke ', '<invoke\n', '<invoke\t', '<invoke\r',
 ];
 
 const XML_TOOL_OPENING_TAGS = [
+  '<|dsml|tool_calls',
+  '<|dsmltool_calls',
+  '<|dsml tool_calls',
+  '<dsml|tool_calls',
+  '<dsmltool_calls',
+  '<dsml tool_calls',
+  '<｜tool_calls',
+  '<|tool_calls',
   '<tool_calls',
 ];
 
 const XML_TOOL_CLOSING_TAGS = [
+  '</|dsml|tool_calls>',
+  '</|dsmltool_calls>',
+  '</|dsml tool_calls>',
+  '</dsml|tool_calls>',
+  '</dsmltool_calls>',
+  '</dsml tool_calls>',
+  '</｜tool_calls>',
+  '</|tool_calls>',
   '</tool_calls>',
 ];
 
diff --git a/internal/js/shared/deepseek-constants.js b/internal/js/shared/deepseek-constants.js
index e24cfb1..b142c9e 100644
--- a/internal/js/shared/deepseek-constants.js
+++ b/internal/js/shared/deepseek-constants.js
@@ -3,14 +3,17 @@
 const fs = require('fs');
 const path = require('path');
 
+const DEFAULT_CLIENT = Object.freeze({
+  name: 'DeepSeek',
+  platform: 'android',
+  androidApiLevel: '35',
+  locale: 'zh_CN',
+});
+
 const DEFAULT_BASE_HEADERS = Object.freeze({
   Host: 'chat.deepseek.com',
-  'User-Agent': 'DeepSeek/1.8.0 Android/35',
   Accept: 'application/json',
   'Content-Type': 'application/json',
-  'x-client-platform': 'android',
-  'x-client-version': '1.8.0',
-  'x-client-locale': 'zh_CN',
   'accept-charset': 'UTF-8',
 });
 
@@ -29,38 +32,96 @@ const DEFAULT_SKIP_EXACT_PATHS = Object.freeze([
   'response/search_status',
 ]);
 
-function loadSharedConstants() {
-  const sharedPath = path.resolve(__dirname, '../../internal/deepseek/constants_shared.json');
-  try {
-    const raw = fs.readFileSync(sharedPath, 'utf8');
-    const parsed = JSON.parse(raw);
-    const baseHeaders = parsed && typeof parsed.base_headers === 'object' && !Array.isArray(parsed.base_headers)
-      ? { ...DEFAULT_BASE_HEADERS, ...parsed.base_headers }
-      : { ...DEFAULT_BASE_HEADERS };
-    const skipPatterns = Array.isArray(parsed && parsed.skip_contains_patterns)
-      ? parsed.skip_contains_patterns.filter((v) => typeof v === 'string' && v !== '')
-      : [...DEFAULT_SKIP_PATTERNS];
-    const skipExactPaths = Array.isArray(parsed && parsed.skip_exact_paths)
-      ? parsed.skip_exact_paths.filter((v) => typeof v === 'string' && v !== '')
-      : [...DEFAULT_SKIP_EXACT_PATHS];
-    return {
-      baseHeaders,
-      skipPatterns,
-      skipExactPaths,
-    };
-  } catch (_err) {
-    return {
-      baseHeaders: { ...DEFAULT_BASE_HEADERS },
-      skipPatterns: [...DEFAULT_SKIP_PATTERNS],
-      skipExactPaths: [...DEFAULT_SKIP_EXACT_PATHS],
-    };
+function asNonEmptyString(value) {
+  return typeof value === 'string' && value !== '' ? value : '';
+}
+
+function normalizeClient(raw) {
+  const client = raw && typeof raw === 'object' && !Array.isArray(raw) ? raw : {};
+  return {
+    name: asNonEmptyString(client.name) || DEFAULT_CLIENT.name,
+    platform: asNonEmptyString(client.platform) || DEFAULT_CLIENT.platform,
+    version: asNonEmptyString(client.version),
+    androidApiLevel: asNonEmptyString(client.android_api_level) || DEFAULT_CLIENT.androidApiLevel,
+    locale: asNonEmptyString(client.locale) || DEFAULT_CLIENT.locale,
+  };
+}
+
+function buildBaseHeaders(parsed, client) {
+  const rawBaseHeaders = parsed && typeof parsed.base_headers === 'object' && !Array.isArray(parsed.base_headers)
+    ? parsed.base_headers
+    : {};
+  const baseHeaders = { ...DEFAULT_BASE_HEADERS, ...rawBaseHeaders };
+  if (client.name && client.version) {
+    const androidSuffix = client.platform === 'android' && client.androidApiLevel
+      ? ` Android/${client.androidApiLevel}`
+      : '';
+    baseHeaders['User-Agent'] = `${client.name}/${client.version}${androidSuffix}`;
   }
+  if (client.platform) {
+    baseHeaders['x-client-platform'] = client.platform;
+  }
+  if (client.version) {
+    baseHeaders['x-client-version'] = client.version;
+  }
+  if (client.locale) {
+    baseHeaders['x-client-locale'] = client.locale;
+  }
+  return baseHeaders;
+}
+
+function sharedConstantsPaths() {
+  return [
+    path.resolve(__dirname, '../../deepseek/protocol/constants_shared.json'),
+    path.resolve(process.cwd(), 'internal/deepseek/protocol/constants_shared.json'),
+  ];
+}
+
+function readSharedConstants() {
+  try {
+    return require('../../deepseek/protocol/constants_shared.json');
+  } catch (_err) {
+    // Fall through to filesystem candidates for test and local execution variants.
+  }
+  for (const sharedPath of sharedConstantsPaths()) {
+    try {
+      const raw = fs.readFileSync(sharedPath, 'utf8');
+      return JSON.parse(raw);
+    } catch (_err) {
+      // Try the next candidate path; fall back to in-file structural defaults below.
+    }
+  }
+  return {};
+}
+
+function loadSharedConstants() {
+  const parsed = readSharedConstants();
+  const client = normalizeClient(parsed && parsed.client);
+  const skipPatterns = Array.isArray(parsed && parsed.skip_contains_patterns)
+    ? parsed.skip_contains_patterns.filter((v) => typeof v === 'string' && v !== '')
+    : [...DEFAULT_SKIP_PATTERNS];
+  const skipExactPaths = Array.isArray(parsed && parsed.skip_exact_paths)
+    ? parsed.skip_exact_paths.filter((v) => typeof v === 'string' && v !== '')
+    : [...DEFAULT_SKIP_EXACT_PATHS];
+  return {
+    client,
+    baseHeaders: buildBaseHeaders(parsed, client),
+    skipPatterns,
+    skipExactPaths,
+  };
 }
 
 const shared = loadSharedConstants();
 
 module.exports = {
+  CLIENT: Object.freeze({ ...shared.client }),
+  CLIENT_VERSION: shared.client.version,
   BASE_HEADERS: Object.freeze(shared.baseHeaders),
   SKIP_PATTERNS: Object.freeze(shared.skipPatterns),
   SKIP_EXACT_PATHS: new Set(shared.skipExactPaths),
+  __test: {
+    buildBaseHeaders,
+    normalizeClient,
+    sharedConstantsPaths,
+  },
 };
diff --git a/internal/prompt/messages.go b/internal/prompt/messages.go
index 993eeef..d882f34 100644
--- a/internal/prompt/messages.go
+++ b/internal/prompt/messages.go
@@ -30,11 +30,6 @@ func MessagesPrepareWithThinking(messages []map[string]any, thinkingEnabled bool
 		Text string
 	}
 	processed := make([]block, 0, len(messages))
-	if thinkingEnabled {
-		if instruction := buildConversationContinuityInstructions(thinkingEnabled); strings.TrimSpace(instruction) != "" {
-			processed = append(processed, block{Role: "system", Text: instruction})
-		}
-	}
 	for _, m := range messages {
 		role, _ := m["role"].(string)
 		text := NormalizeContent(m["content"])
@@ -93,17 +88,6 @@ func formatRoleBlock(marker, text, endMarker string) string {
 	return out
 }
 
-func buildConversationContinuityInstructions(thinkingEnabled bool) string {
-	lines := []string{
-		"Continue the conversation from the full prior context and the latest tool results.",
-		"Treat earlier messages as binding context; answer the user's current request as a continuation, not a restart.",
-	}
-	if thinkingEnabled {
-		lines = append(lines, "Keep reasoning internal. Do not leave the final user-facing answer only in reasoning; always provide the answer in visible assistant content.")
-	}
-	return strings.Join(lines, "\n")
-}
-
 func NormalizeContent(v any) string {
 	if v == nil {
 		return ""
diff --git a/internal/prompt/messages_test.go b/internal/prompt/messages_test.go
index 8be34b2..a992ae6 100644
--- a/internal/prompt/messages_test.go
+++ b/internal/prompt/messages_test.go
@@ -58,23 +58,14 @@ func TestNormalizeContentArrayFallsBackToContentWhenTextEmpty(t *testing.T) {
 	}
 }
 
-func TestMessagesPrepareWithThinkingAddsContinuityContract(t *testing.T) {
+func TestMessagesPrepareWithThinkingPreservesPromptShape(t *testing.T) {
 	messages := []map[string]any{{"role": "user", "content": "Question"}}
 	gotThinking := MessagesPrepareWithThinking(messages, true)
 	gotPlain := MessagesPrepareWithThinking(messages, false)
-	if gotThinking == gotPlain {
-		t.Fatalf("expected thinking-enabled prompt to include extra continuity instructions")
+	if gotThinking != gotPlain {
+		t.Fatalf("expected thinking flag not to add extra continuity instructions, got thinking=%q plain=%q", gotThinking, gotPlain)
 	}
 	if !strings.HasSuffix(gotThinking, "<｜Assistant｜>") {
 		t.Fatalf("expected assistant suffix, got %q", gotThinking)
 	}
-	if !strings.Contains(gotThinking, "Continue the conversation from the full prior context") {
-		t.Fatalf("expected continuity instruction in thinking prompt, got %q", gotThinking)
-	}
-	if !strings.Contains(gotThinking, "final user-facing answer only in reasoning") {
-		t.Fatalf("expected visible-answer instruction in thinking prompt, got %q", gotThinking)
-	}
-	if strings.Contains(gotPlain, "Continue the conversation from the full prior context") {
-		t.Fatalf("did not expect thinking-only instruction in plain prompt, got %q", gotPlain)
-	}
 }
diff --git a/internal/prompt/tool_calls.go b/internal/prompt/tool_calls.go
index d38e9fa..da52920 100644
--- a/internal/prompt/tool_calls.go
+++ b/internal/prompt/tool_calls.go
@@ -38,7 +38,7 @@ func FormatToolCallsForPrompt(raw any) string {
 	if len(blocks) == 0 {
 		return ""
 	}
-	return "<tool_calls>\n" + strings.Join(blocks, "\n") + "\n</tool_calls>"
+	return "<|DSML|tool_calls>\n" + strings.Join(blocks, "\n") + "\n</|DSML|tool_calls>"
 }
 
 // StringifyToolCallArguments normalizes tool arguments into a compact string
@@ -94,12 +94,12 @@ func formatToolCallForPrompt(call map[string]any) string {
 
 	parameters := formatToolCallParametersForPrompt(argsRaw)
 	if parameters == "" {
-		return `  <invoke name="` + escapeXMLAttribute(name) + `"></invoke>`
+		return `  <|DSML|invoke name="` + escapeXMLAttribute(name) + `"></|DSML|invoke>`
 	}
 
-	return "  <invoke name=\"" + escapeXMLAttribute(name) + "\">\n" +
+	return "  <|DSML|invoke name=\"" + escapeXMLAttribute(name) + "\">\n" +
 		parameters + "\n" +
-		"  </invoke>"
+		"  </|DSML|invoke>"
 }
 
 func formatToolCallParametersForPrompt(raw any) string {
@@ -113,7 +113,7 @@ func formatToolCallParametersForPrompt(raw any) string {
 	if strings.TrimSpace(fallback) == "" {
 		return ""
 	}
-	return "    <parameter name=\"content\">" + renderPromptXMLText(fallback) + "</parameter>"
+	return "    <|DSML|parameter name=\"content\">" + renderPromptXMLText(fallback) + "</|DSML|parameter>"
 }
 
 func renderPromptToolParameters(value any, indent string) (string, bool) {
@@ -149,9 +149,9 @@ func renderPromptToolParameters(value any, indent string) (string, bool) {
 		}
 		return strings.Join(lines, "\n"), true
 	case string:
-		return indent + `<parameter name="content">` + renderPromptXMLText(v) + `</parameter>`, true
+		return indent + `<|DSML|parameter name="content">` + renderPromptXMLText(v) + `</|DSML|parameter>`, true
 	default:
-		return indent + `<parameter name="value">` + renderPromptXMLText(fmt.Sprint(v)) + `</parameter>`, true
+		return indent + `<|DSML|parameter name="value">` + renderPromptXMLText(fmt.Sprint(v)) + `</|DSML|parameter>`, true
 	}
 }
 
@@ -162,29 +162,29 @@ func renderPromptParameterNode(name string, value any, indent string) (string, b
 	}
 	switch v := value.(type) {
 	case nil:
-		return indent + `<parameter name="` + escapeXMLAttribute(trimmedName) + `"></parameter>`, true
+		return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + `"></|DSML|parameter>`, true
 	case map[string]any:
 		body, ok := renderPromptToolXMLBody(v, indent+"  ")
 		if !ok {
 			return "", false
 		}
 		if strings.TrimSpace(body) == "" {
-			return indent + `<parameter name="` + escapeXMLAttribute(trimmedName) + `"></parameter>`, true
+			return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + `"></|DSML|parameter>`, true
 		}
-		return indent + `<parameter name="` + escapeXMLAttribute(trimmedName) + "\">\n" + body + "\n" + indent + `</parameter>`, true
+		return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + "\">\n" + body + "\n" + indent + `</|DSML|parameter>`, true
 	case []any:
 		body, ok := renderPromptToolXMLArray(v, indent+"  ")
 		if !ok {
 			return "", false
 		}
 		if strings.TrimSpace(body) == "" {
-			return indent + `<parameter name="` + escapeXMLAttribute(trimmedName) + `"></parameter>`, true
+			return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + `"></|DSML|parameter>`, true
 		}
-		return indent + `<parameter name="` + escapeXMLAttribute(trimmedName) + "\">\n" + body + "\n" + indent + `</parameter>`, true
+		return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + "\">\n" + body + "\n" + indent + `</|DSML|parameter>`, true
 	case string:
-		return indent + `<parameter name="` + escapeXMLAttribute(trimmedName) + `">` + renderPromptXMLText(v) + `</parameter>`, true
+		return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + `">` + renderPromptXMLText(v) + `</|DSML|parameter>`, true
 	default:
-		return indent + `<parameter name="` + escapeXMLAttribute(trimmedName) + `">` + renderPromptXMLText(fmt.Sprint(v)) + `</parameter>`, true
+		return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + `">` + renderPromptXMLText(fmt.Sprint(v)) + `</|DSML|parameter>`, true
 	}
 }
 
diff --git a/internal/prompt/tool_calls_test.go b/internal/prompt/tool_calls_test.go
index b26658c..8a5a369 100644
--- a/internal/prompt/tool_calls_test.go
+++ b/internal/prompt/tool_calls_test.go
@@ -9,7 +9,7 @@ func TestStringifyToolCallArgumentsPreservesConcatenatedJSON(t *testing.T) {
 	}
 }
 
-func TestFormatToolCallsForPromptXML(t *testing.T) {
+func TestFormatToolCallsForPromptDSML(t *testing.T) {
 	got := FormatToolCallsForPrompt([]any{
 		map[string]any{
 			"id": "call_1",
@@ -22,8 +22,8 @@ func TestFormatToolCallsForPromptXML(t *testing.T) {
 	if got == "" {
 		t.Fatal("expected non-empty formatted tool calls")
 	}
-	if got != "<tool_calls>\n  <invoke name=\"search_web\">\n    <parameter name=\"query\"><![CDATA[latest]]></parameter>\n  </invoke>\n</tool_calls>" {
-		t.Fatalf("unexpected formatted tool call XML: %q", got)
+	if got != "<|DSML|tool_calls>\n  <|DSML|invoke name=\"search_web\">\n    <|DSML|parameter name=\"query\"><![CDATA[latest]]></|DSML|parameter>\n  </|DSML|invoke>\n</|DSML|tool_calls>" {
+		t.Fatalf("unexpected formatted tool call DSML: %q", got)
 	}
 }
 
@@ -34,7 +34,7 @@ func TestFormatToolCallsForPromptEscapesXMLEntities(t *testing.T) {
 			"arguments": `{"q":"a < b && c > d"}`,
 		},
 	})
-	want := "<tool_calls>\n  <invoke name=\"search&lt;&amp;&gt;\">\n    <parameter name=\"q\"><![CDATA[a < b && c > d]]></parameter>\n  </invoke>\n</tool_calls>"
+	want := "<|DSML|tool_calls>\n  <|DSML|invoke name=\"search&lt;&amp;&gt;\">\n    <|DSML|parameter name=\"q\"><![CDATA[a < b && c > d]]></|DSML|parameter>\n  </|DSML|invoke>\n</|DSML|tool_calls>"
 	if got != want {
 		t.Fatalf("unexpected escaped tool call XML: %q", got)
 	}
@@ -50,7 +50,7 @@ func TestFormatToolCallsForPromptUsesCDATAForMultilineContent(t *testing.T) {
 			},
 		},
 	})
-	want := "<tool_calls>\n  <invoke name=\"write_file\">\n    <parameter name=\"content\"><![CDATA[#!/bin/bash\nprintf \"hello\"\n]]></parameter>\n    <parameter name=\"path\"><![CDATA[script.sh]]></parameter>\n  </invoke>\n</tool_calls>"
+	want := "<|DSML|tool_calls>\n  <|DSML|invoke name=\"write_file\">\n    <|DSML|parameter name=\"content\"><![CDATA[#!/bin/bash\nprintf \"hello\"\n]]></|DSML|parameter>\n    <|DSML|parameter name=\"path\"><![CDATA[script.sh]]></|DSML|parameter>\n  </|DSML|invoke>\n</|DSML|tool_calls>"
 	if got != want {
 		t.Fatalf("unexpected multiline cdata tool call XML: %q", got)
 	}
diff --git a/internal/promptcompat/history_transcript.go b/internal/promptcompat/history_transcript.go
index cd9a238..93bf4ba 100644
--- a/internal/promptcompat/history_transcript.go
+++ b/internal/promptcompat/history_transcript.go
@@ -10,6 +10,23 @@ import (
 const historySplitInjectedFilename = "IGNORE"
 
 func BuildOpenAIHistoryTranscript(messages []any) string {
+	return buildOpenAIInjectedFileTranscript(messages)
+}
+
+func BuildOpenAICurrentUserInputTranscript(text string) string {
+	if strings.TrimSpace(text) == "" {
+		return ""
+	}
+	return BuildOpenAICurrentInputContextTranscript([]any{
+		map[string]any{"role": "user", "content": text},
+	})
+}
+
+func BuildOpenAICurrentInputContextTranscript(messages []any) string {
+	return buildOpenAIInjectedFileTranscript(messages)
+}
+
+func buildOpenAIInjectedFileTranscript(messages []any) string {
 	normalized := NormalizeOpenAIMessagesForPrompt(messages, "")
 	transcript := strings.TrimSpace(prompt.MessagesPrepare(normalized))
 	if transcript == "" {
diff --git a/internal/promptcompat/message_normalize_test.go b/internal/promptcompat/message_normalize_test.go
index 36079d0..df41997 100644
--- a/internal/promptcompat/message_normalize_test.go
+++ b/internal/promptcompat/message_normalize_test.go
@@ -38,10 +38,10 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsAndToolResult(t *tes
 		t.Fatalf("expected 4 normalized messages with assistant tool history preserved, got %d", len(normalized))
 	}
 	assistantContent, _ := normalized[2]["content"].(string)
-	if !strings.Contains(assistantContent, "<tool_calls>") {
-		t.Fatalf("assistant tool history should be preserved in XML form, got %q", assistantContent)
+	if !strings.Contains(assistantContent, "<|DSML|tool_calls>") {
+		t.Fatalf("assistant tool history should be preserved in DSML form, got %q", assistantContent)
 	}
-	if !strings.Contains(assistantContent, `<invoke name="get_weather">`) {
+	if !strings.Contains(assistantContent, `<|DSML|invoke name="get_weather">`) {
 		t.Fatalf("expected tool name in preserved history, got %q", assistantContent)
 	}
 	if !strings.Contains(normalized[3]["content"].(string), `"temp":18`) {
@@ -49,7 +49,7 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsAndToolResult(t *tes
 	}
 
 	prompt := util.MessagesPrepare(normalized)
-	if !strings.Contains(prompt, "<tool_calls>") {
+	if !strings.Contains(prompt, "<|DSML|tool_calls>") {
 		t.Fatalf("expected preserved assistant tool history in prompt: %q", prompt)
 	}
 }
@@ -177,10 +177,10 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantMultipleToolCallsRemainSepara
 		t.Fatalf("expected assistant tool_call-only message preserved, got %#v", normalized)
 	}
 	content, _ := normalized[0]["content"].(string)
-	if strings.Count(content, "<invoke name=") != 2 {
+	if strings.Count(content, "<|DSML|invoke name=") != 2 {
 		t.Fatalf("expected two preserved tool call blocks, got %q", content)
 	}
-	if !strings.Contains(content, `<invoke name="search_web">`) || !strings.Contains(content, `<invoke name="eval_javascript">`) {
+	if !strings.Contains(content, `<|DSML|invoke name="search_web">`) || !strings.Contains(content, `<|DSML|invoke name="eval_javascript">`) {
 		t.Fatalf("expected both tool names in preserved history, got %q", content)
 	}
 }
@@ -258,7 +258,7 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantNilContentDoesNotInjectNullLi
 	if strings.Contains(content, "null") {
 		t.Fatalf("expected no null literal injection, got %q", content)
 	}
-	if !strings.Contains(content, "<tool_calls>") {
+	if !strings.Contains(content, "<|DSML|tool_calls>") {
 		t.Fatalf("expected assistant tool history in normalized content, got %q", content)
 	}
 }
diff --git a/internal/promptcompat/prompt_build_test.go b/internal/promptcompat/prompt_build_test.go
index 82101d3..b649fea 100644
--- a/internal/promptcompat/prompt_build_test.go
+++ b/internal/promptcompat/prompt_build_test.go
@@ -47,10 +47,10 @@ func TestBuildOpenAIFinalPrompt_HandlerPathIncludesToolRoundtripSemantics(t *tes
 	if !strings.Contains(finalPrompt, `"condition":"sunny"`) {
 		t.Fatalf("handler finalPrompt should preserve tool output content: %q", finalPrompt)
 	}
-	if !strings.Contains(finalPrompt, "<tool_calls>") {
+	if !strings.Contains(finalPrompt, "<|DSML|tool_calls>") {
 		t.Fatalf("handler finalPrompt should preserve assistant tool history: %q", finalPrompt)
 	}
-	if !strings.Contains(finalPrompt, `<invoke name="get_weather">`) {
+	if !strings.Contains(finalPrompt, `<|DSML|invoke name="get_weather">`) {
 		t.Fatalf("handler finalPrompt should include tool name history: %q", finalPrompt)
 	}
 }
@@ -74,7 +74,7 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t *
 	}
 
 	finalPrompt, _ := buildOpenAIFinalPrompt(messages, tools, "", false)
-	if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <tool_calls>...</tool_calls> XML block at the end of your response.") {
+	if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.") {
 		t.Fatalf("vercel prepare finalPrompt missing final tool-call anchor instruction: %q", finalPrompt)
 	}
 	if !strings.Contains(finalPrompt, "TOOL CALL FORMAT") {
@@ -88,16 +88,14 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t *
 	}
 }
 
-func TestBuildOpenAIFinalPromptWithThinkingAddsContinuationContract(t *testing.T) {
+func TestBuildOpenAIFinalPromptWithThinkingKeepsPromptUnchanged(t *testing.T) {
 	messages := []any{
 		map[string]any{"role": "user", "content": "继续回答上一个问题"},
 	}
 
-	finalPrompt, _ := buildOpenAIFinalPrompt(messages, nil, "", true)
-	if !strings.Contains(finalPrompt, "Continue the conversation from the full prior context") {
-		t.Fatalf("expected continuation contract in thinking prompt, got=%q", finalPrompt)
-	}
-	if !strings.Contains(finalPrompt, "final user-facing answer only in reasoning") {
-		t.Fatalf("expected visible-answer contract in thinking prompt, got=%q", finalPrompt)
+	finalPromptThinking, _ := buildOpenAIFinalPrompt(messages, nil, "", true)
+	finalPromptPlain, _ := buildOpenAIFinalPrompt(messages, nil, "", false)
+	if finalPromptThinking != finalPromptPlain {
+		t.Fatalf("expected thinking flag not to prepend continuation contract, thinking=%q plain=%q", finalPromptThinking, finalPromptPlain)
 	}
 }
diff --git a/internal/promptcompat/request_normalize.go b/internal/promptcompat/request_normalize.go
index 6d3f12d..8efa772 100644
--- a/internal/promptcompat/request_normalize.go
+++ b/internal/promptcompat/request_normalize.go
@@ -25,6 +25,9 @@ func NormalizeOpenAIChatRequest(store ConfigReader, req map[string]any, traceID
 	}
 	defaultThinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
 	thinkingEnabled := util.ResolveThinkingEnabled(req, defaultThinkingEnabled)
+	if config.IsNoThinkingModel(resolvedModel) {
+		thinkingEnabled = false
+	}
 	responseModel := strings.TrimSpace(model)
 	if responseModel == "" {
 		responseModel = resolvedModel
@@ -65,6 +68,9 @@ func NormalizeOpenAIResponsesRequest(store ConfigReader, req map[string]any, tra
 	}
 	defaultThinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
 	thinkingEnabled := util.ResolveThinkingEnabled(req, defaultThinkingEnabled)
+	if config.IsNoThinkingModel(resolvedModel) {
+		thinkingEnabled = false
+	}
 
 	// Keep width-control as an explicit policy hook even if current default is true.
 	allowWideInput := true
diff --git a/internal/promptcompat/standard_request.go b/internal/promptcompat/standard_request.go
index 9ec3781..6480d9b 100644
--- a/internal/promptcompat/standard_request.go
+++ b/internal/promptcompat/standard_request.go
@@ -3,21 +3,22 @@ package promptcompat
 import "ds2api/internal/config"
 
 type StandardRequest struct {
-	Surface        string
-	RequestedModel string
-	ResolvedModel  string
-	ResponseModel  string
-	Messages       []any
-	HistoryText    string
-	ToolsRaw       any
-	FinalPrompt    string
-	ToolNames      []string
-	ToolChoice     ToolChoicePolicy
-	Stream         bool
-	Thinking       bool
-	Search         bool
-	RefFileIDs     []string
-	PassThrough    map[string]any
+	Surface                 string
+	RequestedModel          string
+	ResolvedModel           string
+	ResponseModel           string
+	Messages                []any
+	HistoryText             string
+	CurrentInputFileApplied bool
+	ToolsRaw                any
+	FinalPrompt             string
+	ToolNames               []string
+	ToolChoice              ToolChoicePolicy
+	Stream                  bool
+	Thinking                bool
+	Search                  bool
+	RefFileIDs              []string
+	PassThrough             map[string]any
 }
 
 type ToolChoiceMode string
diff --git a/internal/promptcompat/standard_request_test.go b/internal/promptcompat/standard_request_test.go
index 7b529a6..437888d 100644
--- a/internal/promptcompat/standard_request_test.go
+++ b/internal/promptcompat/standard_request_test.go
@@ -11,6 +11,7 @@ func TestStandardRequestCompletionPayloadSetsModelTypeFromResolvedModel(t *testi
 		modelType string
 	}{
 		{name: "default", model: "deepseek-v4-flash", thinking: false, search: false, modelType: "default"},
+		{name: "default_nothinking", model: "deepseek-v4-flash-nothinking", thinking: false, search: false, modelType: "default"},
 		{name: "expert", model: "deepseek-v4-pro", thinking: true, search: false, modelType: "expert"},
 		{name: "vision", model: "deepseek-v4-vision-search", thinking: false, search: true, modelType: "vision"},
 	}
diff --git a/internal/promptcompat/thinking_injection.go b/internal/promptcompat/thinking_injection.go
new file mode 100644
index 0000000..6daa454
--- /dev/null
+++ b/internal/promptcompat/thinking_injection.go
@@ -0,0 +1,73 @@
+package promptcompat
+
+import "strings"
+
+const (
+	ThinkingInjectionMarker        = "Reasoning Effort: Absolute maximum with no shortcuts permitted."
+	DefaultThinkingInjectionPrompt = ThinkingInjectionMarker + "\n" +
+		"You MUST be very thorough in your thinking and comprehensively decompose the problem to resolve the root cause, rigorously stress-testing your logic against all potential paths, edge cases, and adversarial scenarios.\n" +
+		"Explicitly write out your entire deliberation process, documenting every intermediate step, considered alternative, and rejected hypothesis to ensure absolutely no assumption is left unchecked."
+)
+
+func AppendThinkingInjectionToLatestUser(messages []any) ([]any, bool) {
+	return AppendThinkingInjectionPromptToLatestUser(messages, "")
+}
+
+func AppendThinkingInjectionPromptToLatestUser(messages []any, injectionPrompt string) ([]any, bool) {
+	if len(messages) == 0 {
+		return messages, false
+	}
+	injectionPrompt = strings.TrimSpace(injectionPrompt)
+	if injectionPrompt == "" {
+		injectionPrompt = DefaultThinkingInjectionPrompt
+	}
+	for i := len(messages) - 1; i >= 0; i-- {
+		msg, ok := messages[i].(map[string]any)
+		if !ok {
+			continue
+		}
+		if strings.ToLower(strings.TrimSpace(asString(msg["role"]))) != "user" {
+			continue
+		}
+		content := msg["content"]
+		normalizedContent := NormalizeOpenAIContentForPrompt(content)
+		if strings.Contains(normalizedContent, ThinkingInjectionMarker) || strings.Contains(normalizedContent, injectionPrompt) {
+			return messages, false
+		}
+		updatedContent := appendThinkingInjectionToContent(content, injectionPrompt)
+		out := append([]any(nil), messages...)
+		cloned := make(map[string]any, len(msg))
+		for k, v := range msg {
+			cloned[k] = v
+		}
+		cloned["content"] = updatedContent
+		out[i] = cloned
+		return out, true
+	}
+	return messages, false
+}
+
+func appendThinkingInjectionToContent(content any, injectionPrompt string) any {
+	switch x := content.(type) {
+	case string:
+		return appendTextBlock(x, injectionPrompt)
+	case []any:
+		out := append([]any(nil), x...)
+		out = append(out, map[string]any{
+			"type": "text",
+			"text": injectionPrompt,
+		})
+		return out
+	default:
+		text := NormalizeOpenAIContentForPrompt(content)
+		return appendTextBlock(text, injectionPrompt)
+	}
+}
+
+func appendTextBlock(base, addition string) string {
+	base = strings.TrimSpace(base)
+	if base == "" {
+		return addition
+	}
+	return base + "\n\n" + addition
+}
diff --git a/internal/promptcompat/thinking_injection_test.go b/internal/promptcompat/thinking_injection_test.go
new file mode 100644
index 0000000..542dbe8
--- /dev/null
+++ b/internal/promptcompat/thinking_injection_test.go
@@ -0,0 +1,81 @@
+package promptcompat
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestAppendThinkingInjectionToLatestUserStringContent(t *testing.T) {
+	messages := []any{
+		map[string]any{"role": "user", "content": "older"},
+		map[string]any{"role": "assistant", "content": "ok"},
+		map[string]any{"role": "user", "content": "latest"},
+	}
+
+	out, changed := AppendThinkingInjectionToLatestUser(messages)
+	if !changed {
+		t.Fatal("expected thinking injection to be appended")
+	}
+	latest := out[2].(map[string]any)
+	content, _ := latest["content"].(string)
+	if !strings.Contains(content, "latest\n\n"+ThinkingInjectionMarker) {
+		t.Fatalf("expected injection after latest user text, got %q", content)
+	}
+	older := out[0].(map[string]any)
+	if older["content"] != "older" {
+		t.Fatalf("expected older user message unchanged, got %#v", older["content"])
+	}
+}
+
+func TestAppendThinkingInjectionToLatestUserArrayContent(t *testing.T) {
+	messages := []any{
+		map[string]any{
+			"role": "user",
+			"content": []any{
+				map[string]any{"type": "text", "text": "latest"},
+			},
+		},
+	}
+
+	out, changed := AppendThinkingInjectionToLatestUser(messages)
+	if !changed {
+		t.Fatal("expected thinking injection to be appended")
+	}
+	content, _ := out[0].(map[string]any)["content"].([]any)
+	if len(content) != 2 {
+		t.Fatalf("expected appended text block, got %#v", content)
+	}
+	block, _ := content[1].(map[string]any)
+	if block["type"] != "text" || !strings.Contains(block["text"].(string), ThinkingInjectionMarker) {
+		t.Fatalf("unexpected appended block: %#v", block)
+	}
+}
+
+func TestAppendThinkingInjectionToLatestUserCustomPrompt(t *testing.T) {
+	messages := []any{
+		map[string]any{"role": "user", "content": "latest"},
+	}
+
+	out, changed := AppendThinkingInjectionPromptToLatestUser(messages, "custom thinking format")
+	if !changed {
+		t.Fatal("expected custom thinking injection to be appended")
+	}
+	content, _ := out[0].(map[string]any)["content"].(string)
+	if !strings.Contains(content, "latest\n\ncustom thinking format") {
+		t.Fatalf("expected custom injection after latest user text, got %q", content)
+	}
+}
+
+func TestAppendThinkingInjectionToLatestUserSkipsDuplicate(t *testing.T) {
+	messages := []any{
+		map[string]any{"role": "user", "content": "latest\n\n" + DefaultThinkingInjectionPrompt},
+	}
+
+	out, changed := AppendThinkingInjectionToLatestUser(messages)
+	if changed {
+		t.Fatal("expected duplicate injection to be skipped")
+	}
+	if len(out) != 1 {
+		t.Fatalf("unexpected messages: %#v", out)
+	}
+}
diff --git a/internal/sse/consumer.go b/internal/sse/consumer.go
index 1a9adf8..db42bf5 100644
--- a/internal/sse/consumer.go
+++ b/internal/sse/consumer.go
@@ -5,15 +5,18 @@ import (
 	"strings"
 
 	dsprotocol "ds2api/internal/deepseek/protocol"
+	"ds2api/internal/util"
 )
 
 // CollectResult holds the aggregated text and thinking content from a
 // DeepSeek SSE stream, consumed to completion (non-streaming use case).
 type CollectResult struct {
-	Text          string
-	Thinking      string
-	ContentFilter bool
-	CitationLinks map[int]string
+	Text                  string
+	Thinking              string
+	ToolDetectionThinking string
+	ContentFilter         bool
+	CitationLinks         map[int]string
+	ResponseMessageID     int
 }
 
 // CollectStream fully consumes a DeepSeek SSE response and separates
@@ -28,9 +31,11 @@ func CollectStream(resp *http.Response, thinkingEnabled bool, closeBody bool) Co
 	}
 	text := strings.Builder{}
 	thinking := strings.Builder{}
+	toolDetectionThinking := strings.Builder{}
 	contentFilter := false
 	stopped := false
 	collector := newCitationLinkCollector()
+	responseMessageID := 0
 	currentType := "text"
 	if thinkingEnabled {
 		currentType = "thinking"
@@ -39,6 +44,7 @@ func CollectStream(resp *http.Response, thinkingEnabled bool, closeBody bool) Co
 		chunk, done, parsed := ParseDeepSeekSSELine(line)
 		if parsed && !done {
 			collector.ingestChunk(chunk)
+			observeResponseMessageID(chunk, &responseMessageID)
 		}
 		if done {
 			return false
@@ -70,12 +76,44 @@ func CollectStream(resp *http.Response, thinkingEnabled bool, closeBody bool) Co
 				text.WriteString(trimmed)
 			}
 		}
+		for _, p := range result.ToolDetectionThinkingParts {
+			trimmed := TrimContinuationOverlap(toolDetectionThinking.String(), p.Text)
+			toolDetectionThinking.WriteString(trimmed)
+		}
 		return true
 	})
 	return CollectResult{
-		Text:          text.String(),
-		Thinking:      thinking.String(),
-		ContentFilter: contentFilter,
-		CitationLinks: collector.build(),
+		Text:                  text.String(),
+		Thinking:              thinking.String(),
+		ToolDetectionThinking: toolDetectionThinking.String(),
+		ContentFilter:         contentFilter,
+		CitationLinks:         collector.build(),
+		ResponseMessageID:     responseMessageID,
+	}
+}
+
+// observeResponseMessageID extracts the response_message_id from a parsed SSE
+// chunk. It mirrors the extraction logic in client_continue.go's observe
+// method, checking top-level response_message_id, v.response.message_id, and
+// message.response.message_id.
+func observeResponseMessageID(chunk map[string]any, out *int) {
+	if chunk == nil || out == nil {
+		return
+	}
+	if id := util.IntFrom(chunk["response_message_id"]); id > 0 {
+		*out = id
+	}
+	v, _ := chunk["v"].(map[string]any)
+	if response, _ := v["response"].(map[string]any); response != nil {
+		if id := util.IntFrom(response["message_id"]); id > 0 {
+			*out = id
+		}
+	}
+	if message, _ := chunk["message"].(map[string]any); message != nil {
+		if response, _ := message["response"].(map[string]any); response != nil {
+			if id := util.IntFrom(response["message_id"]); id > 0 {
+				*out = id
+			}
+		}
 	}
 }
diff --git a/internal/sse/line.go b/internal/sse/line.go
index fbd2939..a52a9ab 100644
--- a/internal/sse/line.go
+++ b/internal/sse/line.go
@@ -1,15 +1,19 @@
 package sse
 
-import "fmt"
+import (
+	"fmt"
+)
 
 // LineResult is the normalized parse result for one DeepSeek SSE line.
 type LineResult struct {
-	Parsed        bool
-	Stop          bool
-	ContentFilter bool
-	ErrorMessage  string
-	Parts         []ContentPart
-	NextType      string
+	Parsed                     bool
+	Stop                       bool
+	ContentFilter              bool
+	ErrorMessage               string
+	Parts                      []ContentPart
+	ToolDetectionThinkingParts []ContentPart
+	NextType                   string
+	ResponseMessageID          int
 }
 
 // ParseDeepSeekContentLine centralizes one-line DeepSeek SSE parsing for both
@@ -46,12 +50,17 @@ func ParseDeepSeekContentLine(raw []byte, thinkingEnabled bool, currentType stri
 			NextType:      currentType,
 		}
 	}
-	parts, finished, nextType := ParseSSEChunkForContent(chunk, thinkingEnabled, currentType)
+	parts, detectionThinkingParts, finished, nextType := ParseSSEChunkForContentDetailed(chunk, thinkingEnabled, currentType)
 	parts = filterLeakedContentFilterParts(parts)
+	detectionThinkingParts = filterLeakedContentFilterParts(detectionThinkingParts)
+	var respMsgID int
+	observeResponseMessageID(chunk, &respMsgID)
 	return LineResult{
-		Parsed:   true,
-		Stop:     finished,
-		Parts:    parts,
-		NextType: nextType,
+		Parsed:                     true,
+		Stop:                       finished,
+		Parts:                      parts,
+		ToolDetectionThinkingParts: detectionThinkingParts,
+		NextType:                   nextType,
+		ResponseMessageID:          respMsgID,
 	}
 }
diff --git a/internal/sse/parser.go b/internal/sse/parser.go
index 3057eda..abb2eb6 100644
--- a/internal/sse/parser.go
+++ b/internal/sse/parser.go
@@ -69,20 +69,25 @@ func isFragmentStatusPath(path string) bool {
 }
 
 func ParseSSEChunkForContent(chunk map[string]any, thinkingEnabled bool, currentFragmentType string) ([]ContentPart, bool, string) {
+	parts, _, finished, nextType := ParseSSEChunkForContentDetailed(chunk, thinkingEnabled, currentFragmentType)
+	return parts, finished, nextType
+}
+
+func ParseSSEChunkForContentDetailed(chunk map[string]any, thinkingEnabled bool, currentFragmentType string) ([]ContentPart, []ContentPart, bool, string) {
 	v, ok := chunk["v"]
 	if !ok {
-		return nil, false, currentFragmentType
+		return nil, nil, false, currentFragmentType
 	}
 	path, _ := chunk["p"].(string)
 	if shouldSkipPath(path) {
-		return nil, false, currentFragmentType
+		return nil, nil, false, currentFragmentType
 	}
 	if isStatusPath(path) {
 		if s, ok := v.(string); ok {
 			if strings.EqualFold(strings.TrimSpace(s), "FINISHED") {
-				return nil, true, currentFragmentType
+				return nil, nil, true, currentFragmentType
 			}
-			return nil, false, currentFragmentType
+			return nil, nil, false, currentFragmentType
 		}
 	}
 	newType := currentFragmentType
@@ -92,18 +97,32 @@ func ParseSSEChunkForContent(chunk map[string]any, thinkingEnabled bool, current
 	partType := resolvePartType(path, thinkingEnabled, newType)
 	finished := appendChunkValueContent(v, partType, &newType, &parts, path)
 	if finished {
-		return nil, true, newType
+		return nil, nil, true, newType
 	}
 	var transitioned bool
 	parts, transitioned = splitThinkingParts(parts)
 	if transitioned {
 		newType = "text"
 	}
+	detectionThinkingParts := selectThinkingParts(parts)
 	if !thinkingEnabled {
 		parts = dropThinkingParts(parts)
 		newType = "text"
 	}
-	return parts, false, newType
+	return parts, detectionThinkingParts, false, newType
+}
+
+func selectThinkingParts(parts []ContentPart) []ContentPart {
+	if len(parts) == 0 {
+		return nil
+	}
+	out := make([]ContentPart, 0, len(parts))
+	for _, p := range parts {
+		if p.Type == "thinking" {
+			out = append(out, p)
+		}
+	}
+	return out
 }
 
 func collectDirectFragments(path string, chunk map[string]any, v any, newType *string, parts *[]ContentPart) {
diff --git a/internal/toolcall/fence_edge_test.go b/internal/toolcall/fence_edge_test.go
new file mode 100644
index 0000000..5faff0b
--- /dev/null
+++ b/internal/toolcall/fence_edge_test.go
@@ -0,0 +1,66 @@
+package toolcall
+
+import (
+	"strings"
+	"testing"
+)
+
+// 4 反引号嵌套 3 反引号
+func TestStripFencedCodeBlocks_NestedFourBackticks(t *testing.T) {
+	text := "Before\n\x60\x60\x60\x60markdown\nHere is \x60\x60\x60 nested \x60\x60\x60 example\n\x60\x60\x60\x60\nAfter"
+	got := stripFencedCodeBlocks(text)
+	if !strings.Contains(got, "Before") || !strings.Contains(got, "After") {
+		t.Fatalf("expected Before and After preserved, got %q", got)
+	}
+	if strings.Contains(got, "nested") {
+		t.Fatalf("expected nested content stripped, got %q", got)
+	}
+}
+
+// 波浪线围栏
+func TestStripFencedCodeBlocks_TildeFence(t *testing.T) {
+	text := "Before\n~~~python\ncode here\n~~~\nAfter"
+	got := stripFencedCodeBlocks(text)
+	if !strings.Contains(got, "Before") || !strings.Contains(got, "After") {
+		t.Fatalf("expected Before/After, got %q", got)
+	}
+	if strings.Contains(got, "code here") {
+		t.Fatalf("expected code stripped, got %q", got)
+	}
+}
+
+// 未闭合围栏 + 后面跟真正的工具调用：不应返回空字符串
+func TestStripFencedCodeBlocks_UnclosedFencePreservesToolCall(t *testing.T) {
+	text := "Example:\n\x60\x60\x60xml\n<tool_calls><invoke name=\"read_file\"><parameter name=\"path\">README.md</parameter></invoke></tool_calls>\n\n<tool_calls><invoke name=\"search\"><parameter name=\"q\">go</parameter></invoke></tool_calls>"
+	got := stripFencedCodeBlocks(text)
+	if got == "" {
+		t.Fatalf("unclosed fence should not truncate everything — real tool call after the fence is lost")
+	}
+}
+
+// CDATA 内的围栏不应被剥离
+func TestStripFencedCodeBlocks_FenceInsideCDATA(t *testing.T) {
+	text := "<tool_calls><invoke name=\"write\">\n<parameter name=\"content\"><![CDATA[\n\x60\x60\x60python\nprint('hello')\n\x60\x60\x60\n]]></parameter>\n</invoke></tool_calls>"
+	got := stripFencedCodeBlocks(text)
+	if !strings.Contains(got, "\x60\x60\x60python") {
+		t.Fatalf("fenced code inside CDATA should be preserved, got %q", got)
+	}
+}
+
+// 连续多个围栏
+func TestStripFencedCodeBlocks_MultipleFences(t *testing.T) {
+	text := "Before\n\x60\x60\x60\nfence1\n\x60\x60\x60\nMiddle\n\x60\x60\x60\nfence2\n\x60\x60\x60\nAfter"
+	got := stripFencedCodeBlocks(text)
+	if !strings.Contains(got, "Before") || !strings.Contains(got, "Middle") || !strings.Contains(got, "After") {
+		t.Fatalf("expected non-fenced content preserved, got %q", got)
+	}
+}
+
+// 围栏包含内嵌 ``` 行但没有独立成行
+func TestStripFencedCodeBlocks_InlineBackticksNotFence(t *testing.T) {
+	text := "Before\n\x60\x60\x60go\nfmt.Println(\x60\x60\x60hello\x60\x60\x60)\n\x60\x60\x60\nAfter"
+	got := stripFencedCodeBlocks(text)
+	if !strings.Contains(got, "Before") || !strings.Contains(got, "After") {
+		t.Fatalf("expected Before/After, got %q", got)
+	}
+}
diff --git a/internal/toolcall/regression_test.go b/internal/toolcall/regression_test.go
index 7615fa3..fc88db0 100644
--- a/internal/toolcall/regression_test.go
+++ b/internal/toolcall/regression_test.go
@@ -12,9 +12,9 @@ func TestRegression_RobustXMLAndCDATA(t *testing.T) {
 		expected []ParsedToolCall
 	}{
 		{
-			name:     "Standard JSON parameters (Regression)",
+			name:     "Standard JSON scalar parameters (Regression)",
 			text:     `<tool_calls><invoke name="foo"><parameter name="a">1</parameter></invoke></tool_calls>`,
-			expected: []ParsedToolCall{{Name: "foo", Input: map[string]any{"a": "1"}}},
+			expected: []ParsedToolCall{{Name: "foo", Input: map[string]any{"a": float64(1)}}},
 		},
 		{
 			name:     "XML tags parameters (Regression)",
diff --git a/internal/toolcall/tool_prompt.go b/internal/toolcall/tool_prompt.go
index aa556e8..6844eb4 100644
--- a/internal/toolcall/tool_prompt.go
+++ b/internal/toolcall/tool_prompt.go
@@ -11,44 +11,45 @@ import "strings"
 func BuildToolCallInstructions(toolNames []string) string {
 	return `TOOL CALL FORMAT — FOLLOW EXACTLY:
 
-<tool_calls>
-  <invoke name="TOOL_NAME_HERE">
-    <parameter name="PARAMETER_NAME"><![CDATA[PARAMETER_VALUE]]></parameter>
-  </invoke>
-</tool_calls>
+<|DSML|tool_calls>
+  <|DSML|invoke name="TOOL_NAME_HERE">
+    <|DSML|parameter name="PARAMETER_NAME"><![CDATA[PARAMETER_VALUE]]></|DSML|parameter>
+  </|DSML|invoke>
+</|DSML|tool_calls>
 
 RULES:
-1) Use the <tool_calls> XML wrapper format only.
-2) Put one or more <invoke> entries under a single <tool_calls> root.
-3) Put the tool name in the invoke name attribute: <invoke name="TOOL_NAME">.
+1) Use the <|DSML|tool_calls> wrapper format.
+2) Put one or more <|DSML|invoke> entries under a single <|DSML|tool_calls> root.
+3) Put the tool name in the invoke name attribute: <|DSML|invoke name="TOOL_NAME">.
 4) All string values must use <![CDATA[...]]>, even short ones. This includes code, scripts, file contents, prompts, paths, names, and queries.
-5) Every top-level argument must be a <parameter name="ARG_NAME">...</parameter> node.
+5) Every top-level argument must be a <|DSML|parameter name="ARG_NAME">...</|DSML|parameter> node.
 6) Objects use nested XML elements inside the parameter body. Arrays may repeat <item> children.
 7) Numbers, booleans, and null stay plain text.
 8) Use only the parameter names in the tool schema. Do not invent fields.
 9) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.
-10) If you call a tool, the first non-whitespace characters of that tool block must be exactly <tool_calls>.
-11) Never omit the opening <tool_calls> tag, even if you already plan to close with </tool_calls>.
+10) If you call a tool, the first non-whitespace characters of that tool block must be exactly <|DSML|tool_calls>.
+11) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with </|DSML|tool_calls>.
+12) Compatibility note: the runtime also accepts the legacy XML tags <tool_calls> / <invoke> / <parameter>, but prefer the DSML-prefixed form above.
 
 PARAMETER SHAPES:
-- string => <parameter name="x"><![CDATA[value]]></parameter>
-- object => <parameter name="x"><field>...</field></parameter>
-- array => <parameter name="x"><item>...</item><item>...</item></parameter>
-- number/bool/null => <parameter name="x">plain_text</parameter>
+- string => <|DSML|parameter name="x"><![CDATA[value]]></|DSML|parameter>
+- object => <|DSML|parameter name="x"><field>...</field></|DSML|parameter>
+- array => <|DSML|parameter name="x"><item>...</item><item>...</item></|DSML|parameter>
+- number/bool/null => <|DSML|parameter name="x">plain_text</|DSML|parameter>
 
 【WRONG — Do NOT do these】:
 
 Wrong 1 — mixed text after XML:
-  <tool_calls>...</tool_calls> I hope this helps.
+  <|DSML|tool_calls>...</|DSML|tool_calls> I hope this helps.
 Wrong 2 — Markdown code fences:
   ` + "```xml" + `
-  <tool_calls>...</tool_calls>
+  <|DSML|tool_calls>...</|DSML|tool_calls>
   ` + "```" + `
 Wrong 3 — missing opening wrapper:
-  <invoke name="TOOL_NAME">...</invoke>
-  </tool_calls>
+  <|DSML|invoke name="TOOL_NAME">...</|DSML|invoke>
+  </|DSML|tool_calls>
 
-Remember: The ONLY valid way to use tools is the <tool_calls>...</tool_calls> XML block at the end of your response.
+Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.
 
 ` + buildCorrectToolExamples(toolNames)
 }
@@ -140,21 +141,21 @@ func firstScriptExample(names []string) (promptToolExample, bool) {
 
 func renderToolExampleBlock(calls []promptToolExample) string {
 	var b strings.Builder
-	b.WriteString("<tool_calls>\n")
+	b.WriteString("<|DSML|tool_calls>\n")
 	for _, call := range calls {
-		b.WriteString(`  <invoke name="`)
+		b.WriteString(`  <|DSML|invoke name="`)
 		b.WriteString(call.name)
-		b.WriteString("\">\n")
+		b.WriteString(`">` + "\n")
 		b.WriteString(indentPromptParameters(call.params, "    "))
-		b.WriteString("\n  </invoke>\n")
+		b.WriteString("\n  </|DSML|invoke>\n")
 	}
-	b.WriteString("</tool_calls>")
+	b.WriteString("</|DSML|tool_calls>")
 	return b.String()
 }
 
 func indentPromptParameters(body, indent string) string {
 	if strings.TrimSpace(body) == "" {
-		return indent + `<parameter name="content"></parameter>`
+		return indent + `<|DSML|parameter name="content"></|DSML|parameter>`
 	}
 	lines := strings.Split(body, "\n")
 	for i, line := range lines {
@@ -168,7 +169,7 @@ func indentPromptParameters(body, indent string) string {
 }
 
 func wrapParameter(name, inner string) string {
-	return `<parameter name="` + name + `">` + inner + `</parameter>`
+	return `<|DSML|parameter name="` + name + `">` + inner + `</|DSML|parameter>`
 }
 
 func exampleBasicParams(name string) (string, bool) {
@@ -194,7 +195,7 @@ func exampleBasicParams(name string) (string, bool) {
 	case "Edit":
 		return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + wrapParameter("old_string", promptCDATA("foo")) + "\n" + wrapParameter("new_string", promptCDATA("bar")), true
 	case "MultiEdit":
-		return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></parameter>`, true
+		return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
 	}
 	return "", false
 }
@@ -202,11 +203,11 @@ func exampleBasicParams(name string) (string, bool) {
 func exampleNestedParams(name string) (string, bool) {
 	switch strings.TrimSpace(name) {
 	case "MultiEdit":
-		return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></parameter>`, true
+		return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
 	case "Task":
 		return wrapParameter("description", promptCDATA("Investigate flaky tests")) + "\n" + wrapParameter("prompt", promptCDATA("Run targeted tests and summarize failures")), true
 	case "ask_followup_question":
-		return wrapParameter("question", promptCDATA("Which approach do you prefer?")) + "\n" + `<parameter name="follow_up"><item><text>` + promptCDATA("Option A") + `</text></item><item><text>` + promptCDATA("Option B") + `</text></item></parameter>`, true
+		return wrapParameter("question", promptCDATA("Which approach do you prefer?")) + "\n" + `<|DSML|parameter name="follow_up"><item><text>` + promptCDATA("Option A") + `</text></item><item><text>` + promptCDATA("Option B") + `</text></item></|DSML|parameter>`, true
 	}
 	return "", false
 }
diff --git a/internal/toolcall/tool_prompt_test.go b/internal/toolcall/tool_prompt_test.go
index d482d52..f153e43 100644
--- a/internal/toolcall/tool_prompt_test.go
+++ b/internal/toolcall/tool_prompt_test.go
@@ -7,20 +7,20 @@ import (
 
 func TestBuildToolCallInstructions_ExecCommandUsesCmdExample(t *testing.T) {
 	out := BuildToolCallInstructions([]string{"exec_command"})
-	if !strings.Contains(out, `<invoke name="exec_command">`) {
+	if !strings.Contains(out, `<|DSML|invoke name="exec_command">`) {
 		t.Fatalf("expected exec_command in examples, got: %s", out)
 	}
-	if !strings.Contains(out, `<parameter name="cmd"><![CDATA[pwd]]></parameter>`) {
+	if !strings.Contains(out, `<|DSML|parameter name="cmd"><![CDATA[pwd]]></|DSML|parameter>`) {
 		t.Fatalf("expected cmd parameter example for exec_command, got: %s", out)
 	}
 }
 
 func TestBuildToolCallInstructions_ExecuteCommandUsesCommandExample(t *testing.T) {
 	out := BuildToolCallInstructions([]string{"execute_command"})
-	if !strings.Contains(out, `<invoke name="execute_command">`) {
+	if !strings.Contains(out, `<|DSML|invoke name="execute_command">`) {
 		t.Fatalf("expected execute_command in examples, got: %s", out)
 	}
-	if !strings.Contains(out, `<parameter name="command"><![CDATA[pwd]]></parameter>`) {
+	if !strings.Contains(out, `<|DSML|parameter name="command"><![CDATA[pwd]]></|DSML|parameter>`) {
 		t.Fatalf("expected command parameter example for execute_command, got: %s", out)
 	}
 }
@@ -34,20 +34,20 @@ func TestBuildToolCallInstructions_BashUsesCommandAndDescriptionExamples(t *test
 
 	sawDescription := false
 	for _, block := range blocks {
-		if !strings.Contains(block, `<parameter name="command">`) {
+		if !strings.Contains(block, `<|DSML|parameter name="command">`) {
 			t.Fatalf("expected every Bash example to use command parameter, got: %s", block)
 		}
-		if strings.Contains(block, `<parameter name="path">`) || strings.Contains(block, `<parameter name="content">`) {
+		if strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
 			t.Fatalf("expected Bash examples not to use file write parameters, got: %s", block)
 		}
-		if strings.Contains(block, `<parameter name="description">`) {
+		if strings.Contains(block, `<|DSML|parameter name="description">`) {
 			sawDescription = true
 		}
 	}
 	if !sawDescription {
 		t.Fatalf("expected Bash long-script example to include description, got: %s", out)
 	}
-	if strings.Contains(out, `<invoke name="Read">`) {
+	if strings.Contains(out, `<|DSML|invoke name="Read">`) {
 		t.Fatalf("expected examples to avoid unavailable hard-coded Read tool, got: %s", out)
 	}
 }
@@ -60,10 +60,10 @@ func TestBuildToolCallInstructions_ExecuteCommandLongScriptUsesCommand(t *testin
 	}
 
 	for _, block := range blocks {
-		if !strings.Contains(block, `<parameter name="command">`) {
+		if !strings.Contains(block, `<|DSML|parameter name="command">`) {
 			t.Fatalf("expected execute_command examples to use command parameter, got: %s", block)
 		}
-		if strings.Contains(block, `<parameter name="path">`) || strings.Contains(block, `<parameter name="content">`) {
+		if strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
 			t.Fatalf("expected execute_command examples not to use file write parameters, got: %s", block)
 		}
 	}
@@ -80,10 +80,10 @@ func TestBuildToolCallInstructions_ExecCommandLongScriptUsesCmd(t *testing.T) {
 	}
 
 	for _, block := range blocks {
-		if !strings.Contains(block, `<parameter name="cmd">`) {
+		if !strings.Contains(block, `<|DSML|parameter name="cmd">`) {
 			t.Fatalf("expected exec_command examples to use cmd parameter, got: %s", block)
 		}
-		if strings.Contains(block, `<parameter name="command">`) || strings.Contains(block, `<parameter name="path">`) || strings.Contains(block, `<parameter name="content">`) {
+		if strings.Contains(block, `<|DSML|parameter name="command">`) || strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
 			t.Fatalf("expected exec_command examples not to use command or file write parameters, got: %s", block)
 		}
 	}
@@ -100,10 +100,10 @@ func TestBuildToolCallInstructions_WriteUsesFilePathAndContent(t *testing.T) {
 	}
 
 	for _, block := range blocks {
-		if !strings.Contains(block, `<parameter name="file_path">`) || !strings.Contains(block, `<parameter name="content">`) {
+		if !strings.Contains(block, `<|DSML|parameter name="file_path">`) || !strings.Contains(block, `<|DSML|parameter name="content">`) {
 			t.Fatalf("expected Write examples to use file_path and content, got: %s", block)
 		}
-		if strings.Contains(block, `<parameter name="path">`) {
+		if strings.Contains(block, `<|DSML|parameter name="path">`) {
 			t.Fatalf("expected Write examples not to use path, got: %s", block)
 		}
 	}
@@ -111,7 +111,7 @@ func TestBuildToolCallInstructions_WriteUsesFilePathAndContent(t *testing.T) {
 
 func TestBuildToolCallInstructions_AnchorsMissingOpeningWrapperFailureMode(t *testing.T) {
 	out := BuildToolCallInstructions([]string{"read_file"})
-	if !strings.Contains(out, "Never omit the opening <tool_calls> tag") {
+	if !strings.Contains(out, "Never omit the opening <|DSML|tool_calls> tag") {
 		t.Fatalf("expected explicit missing-opening-tag warning, got: %s", out)
 	}
 	if !strings.Contains(out, "Wrong 3 — missing opening wrapper") {
@@ -120,7 +120,7 @@ func TestBuildToolCallInstructions_AnchorsMissingOpeningWrapperFailureMode(t *te
 }
 
 func findInvokeBlocks(text, name string) []string {
-	open := `<invoke name="` + name + `">`
+	open := `<|DSML|invoke name="` + name + `">`
 	remaining := text
 	blocks := []string{}
 	for {
@@ -129,11 +129,11 @@ func findInvokeBlocks(text, name string) []string {
 			return blocks
 		}
 		remaining = remaining[start:]
-		end := strings.Index(remaining, `</invoke>`)
+		end := strings.Index(remaining, `</|DSML|invoke>`)
 		if end < 0 {
 			return blocks
 		}
-		end += len(`</invoke>`)
+		end += len(`</|DSML|invoke>`)
 		blocks = append(blocks, remaining[:end])
 		remaining = remaining[end:]
 	}
diff --git a/internal/toolcall/toolcalls_dsml.go b/internal/toolcall/toolcalls_dsml.go
new file mode 100644
index 0000000..c93e04c
--- /dev/null
+++ b/internal/toolcall/toolcalls_dsml.go
@@ -0,0 +1,54 @@
+package toolcall
+
+import "strings"
+
+func normalizeDSMLToolCallMarkup(text string) (string, bool) {
+	if text == "" {
+		return "", true
+	}
+	hasAliasLikeMarkup, _ := ContainsToolMarkupSyntaxOutsideIgnored(text)
+	if !hasAliasLikeMarkup {
+		return text, true
+	}
+	return rewriteDSMLToolMarkupOutsideIgnored(text), true
+}
+
+func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
+	if text == "" {
+		return ""
+	}
+	lower := strings.ToLower(text)
+	var b strings.Builder
+	b.Grow(len(text))
+	for i := 0; i < len(text); {
+		next, advanced, blocked := skipXMLIgnoredSection(lower, i)
+		if blocked {
+			b.WriteString(text[i:])
+			break
+		}
+		if advanced {
+			b.WriteString(text[i:next])
+			i = next
+			continue
+		}
+		tag, ok := scanToolMarkupTagAt(text, i)
+		if !ok {
+			b.WriteByte(text[i])
+			i++
+			continue
+		}
+		if tag.DSMLLike {
+			b.WriteByte('<')
+			if tag.Closing {
+				b.WriteByte('/')
+			}
+			b.WriteString(tag.Name)
+			b.WriteString(text[tag.NameEnd : tag.End+1])
+			i = tag.End + 1
+			continue
+		}
+		b.WriteString(text[tag.Start : tag.End+1])
+		i = tag.End + 1
+	}
+	return b.String()
+}
diff --git a/internal/toolcall/toolcalls_markup.go b/internal/toolcall/toolcalls_markup.go
index b01ba21..f9f2b4f 100644
--- a/internal/toolcall/toolcalls_markup.go
+++ b/internal/toolcall/toolcalls_markup.go
@@ -111,5 +111,72 @@ func extractStandaloneCDATA(inner string) (string, bool) {
 	if cdataMatches := cdataPattern.FindStringSubmatch(trimmed); len(cdataMatches) >= 2 {
 		return cdataMatches[1], true
 	}
+	if strings.HasPrefix(strings.ToLower(trimmed), "<![cdata[") {
+		return trimmed[len("<![CDATA["):], true
+	}
 	return "", false
 }
+
+func parseJSONLiteralValue(raw string) (any, bool) {
+	trimmed := strings.TrimSpace(raw)
+	if trimmed == "" {
+		return nil, false
+	}
+
+	switch trimmed[0] {
+	case '{', '[', '"', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 't', 'f', 'n':
+	default:
+		return nil, false
+	}
+
+	var parsed any
+	if err := json.Unmarshal([]byte(trimmed), &parsed); err != nil {
+		return nil, false
+	}
+	return parsed, true
+}
+
+// SanitizeLooseCDATA repairs malformed trailing CDATA openings just enough for
+// final parsing and flush-time recovery. Properly closed CDATA blocks are left
+// untouched; an unclosed opener is stripped so the remaining text can still be
+// parsed as part of the surrounding tool markup.
+func SanitizeLooseCDATA(text string) string {
+	if text == "" {
+		return ""
+	}
+
+	lower := strings.ToLower(text)
+	const openMarker = "<![cdata["
+	const closeMarker = "]]>"
+
+	var b strings.Builder
+	b.Grow(len(text))
+	changed := false
+	pos := 0
+	for pos < len(text) {
+		startRel := strings.Index(lower[pos:], openMarker)
+		if startRel < 0 {
+			b.WriteString(text[pos:])
+			break
+		}
+		start := pos + startRel
+		contentStart := start + len(openMarker)
+		b.WriteString(text[pos:start])
+
+		if endRel := strings.Index(lower[contentStart:], closeMarker); endRel >= 0 {
+			end := contentStart + endRel + len(closeMarker)
+			b.WriteString(text[start:end])
+			pos = end
+			continue
+		}
+
+		changed = true
+		b.WriteString(text[contentStart:])
+		pos = len(text)
+	}
+
+	if !changed {
+		return text
+	}
+	return b.String()
+}
diff --git a/internal/toolcall/toolcalls_parse.go b/internal/toolcall/toolcalls_parse.go
index a950c2c..f5f9d39 100644
--- a/internal/toolcall/toolcalls_parse.go
+++ b/internal/toolcall/toolcalls_parse.go
@@ -32,6 +32,21 @@ func ParseStandaloneToolCallsDetailed(text string, availableToolNames []string)
 	return parseToolCallsDetailedXMLOnly(text)
 }
 
+func ParseAssistantToolCallsDetailed(text, thinking string, availableToolNames []string) ToolCallParseResult {
+	textParsed := ParseStandaloneToolCallsDetailed(text, availableToolNames)
+	if len(textParsed.Calls) > 0 {
+		return textParsed
+	}
+	if strings.TrimSpace(text) != "" {
+		return textParsed
+	}
+	thinkingParsed := ParseStandaloneToolCallsDetailed(thinking, availableToolNames)
+	if len(thinkingParsed.Calls) > 0 {
+		return thinkingParsed
+	}
+	return textParsed
+}
+
 func parseToolCallsDetailedXMLOnly(text string) ToolCallParseResult {
 	result := ToolCallParseResult{}
 	trimmed := strings.TrimSpace(text)
@@ -45,7 +60,17 @@ func parseToolCallsDetailedXMLOnly(text string) ToolCallParseResult {
 		return result
 	}
 
-	parsed := parseXMLToolCalls(trimmed)
+	normalized, ok := normalizeDSMLToolCallMarkup(trimmed)
+	if !ok {
+		return result
+	}
+	parsed := parseXMLToolCalls(normalized)
+	if len(parsed) == 0 && strings.Contains(strings.ToLower(normalized), "<![cdata[") {
+		recovered := SanitizeLooseCDATA(normalized)
+		if recovered != normalized {
+			parsed = parseXMLToolCalls(recovered)
+		}
+	}
 	if len(parsed) == 0 {
 		return result
 	}
@@ -73,8 +98,8 @@ func filterToolCallsDetailed(parsed []ParsedToolCall) ([]ParsedToolCall, []strin
 }
 
 func looksLikeToolCallSyntax(text string) bool {
-	lower := strings.ToLower(text)
-	return strings.Contains(lower, "<tool_calls")
+	hasDSML, hasCanonical := ContainsToolCallWrapperSyntaxOutsideIgnored(text)
+	return hasDSML || hasCanonical
 }
 
 func stripFencedCodeBlocks(text string) string {
@@ -88,6 +113,9 @@ func stripFencedCodeBlocks(text string) string {
 	inFence := false
 	fenceMarker := ""
 	inCDATA := false
+	// Track builder length when a fence opens so we can preserve content
+	// collected before the unclosed fence.
+	beforeFenceLen := 0
 	for _, line := range lines {
 		if inCDATA || cdataStartsBeforeFence(line) {
 			b.WriteString(line)
@@ -99,6 +127,7 @@ func stripFencedCodeBlocks(text string) string {
 			if marker, ok := parseFenceOpen(trimmed); ok {
 				inFence = true
 				fenceMarker = marker
+				beforeFenceLen = b.Len()
 				continue
 			}
 			b.WriteString(line)
@@ -112,6 +141,12 @@ func stripFencedCodeBlocks(text string) string {
 	}
 
 	if inFence {
+		// Unclosed fence: preserve content that was collected before the
+		// fence started rather than dropping everything.
+		result := b.String()
+		if beforeFenceLen > 0 && beforeFenceLen <= len(result) {
+			return result[:beforeFenceLen]
+		}
 		return ""
 	}
 	return b.String()
diff --git a/internal/toolcall/toolcalls_parse_markup.go b/internal/toolcall/toolcalls_parse_markup.go
index 9c4edd3..8633ad0 100644
--- a/internal/toolcall/toolcalls_parse_markup.go
+++ b/internal/toolcall/toolcalls_parse_markup.go
@@ -124,7 +124,8 @@ func findXMLElementBlocks(text, tag string) []xmlElementBlock {
 		}
 		closeStart, closeEnd, ok := findMatchingXMLEndTagOutsideCDATA(text, tag, bodyStart)
 		if !ok {
-			break
+			pos = bodyStart
+			continue
 		}
 		out = append(out, xmlElementBlock{
 			Attrs: attrs,
@@ -294,15 +295,24 @@ func parseInvokeParameterValue(raw string) any {
 		return ""
 	}
 	if value, ok := extractStandaloneCDATA(trimmed); ok {
+		if parsed, ok := parseJSONLiteralValue(value); ok {
+			return parsed
+		}
 		return value
 	}
-	if parsed := parseStructuredToolCallInput(trimmed); len(parsed) > 0 {
-		if len(parsed) == 1 {
-			if rawValue, ok := parsed["_raw"].(string); ok {
-				return rawValue
+	decoded := html.UnescapeString(extractRawTagValue(trimmed))
+	if strings.Contains(decoded, "<") && strings.Contains(decoded, ">") {
+		if parsed := parseStructuredToolCallInput(decoded); len(parsed) > 0 {
+			if len(parsed) == 1 {
+				if rawValue, ok := parsed["_raw"].(string); ok {
+					return rawValue
+				}
 			}
+			return parsed
 		}
+	}
+	if parsed, ok := parseJSONLiteralValue(decoded); ok {
 		return parsed
 	}
-	return html.UnescapeString(extractRawTagValue(trimmed))
+	return decoded
 }
diff --git a/internal/toolcall/toolcalls_scan.go b/internal/toolcall/toolcalls_scan.go
new file mode 100644
index 0000000..099f73b
--- /dev/null
+++ b/internal/toolcall/toolcalls_scan.go
@@ -0,0 +1,219 @@
+package toolcall
+
+import "strings"
+
+var toolMarkupNames = []string{"tool_calls", "invoke", "parameter"}
+
+type ToolMarkupTag struct {
+	Start       int
+	End         int
+	NameStart   int
+	NameEnd     int
+	Name        string
+	Closing     bool
+	SelfClosing bool
+	DSMLLike    bool
+	Canonical   bool
+}
+
+func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
+	lower := strings.ToLower(text)
+	for i := 0; i < len(text); {
+		next, advanced, blocked := skipXMLIgnoredSection(lower, i)
+		if blocked {
+			return hasDSML, hasCanonical
+		}
+		if advanced {
+			i = next
+			continue
+		}
+		if tag, ok := scanToolMarkupTagAt(text, i); ok {
+			if tag.DSMLLike {
+				hasDSML = true
+			} else {
+				hasCanonical = true
+			}
+			if hasDSML && hasCanonical {
+				return true, true
+			}
+			i = tag.End + 1
+			continue
+		}
+		i++
+	}
+	return hasDSML, hasCanonical
+}
+
+func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) {
+	lower := strings.ToLower(text)
+	for i := 0; i < len(text); {
+		next, advanced, blocked := skipXMLIgnoredSection(lower, i)
+		if blocked {
+			return hasDSML, hasCanonical
+		}
+		if advanced {
+			i = next
+			continue
+		}
+		if tag, ok := scanToolMarkupTagAt(text, i); ok {
+			if tag.Name != "tool_calls" {
+				i = tag.End + 1
+				continue
+			}
+			if tag.DSMLLike {
+				hasDSML = true
+			} else {
+				hasCanonical = true
+			}
+			if hasDSML && hasCanonical {
+				return true, true
+			}
+			i = tag.End + 1
+			continue
+		}
+		i++
+	}
+	return hasDSML, hasCanonical
+}
+
+func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, bool) {
+	lower := strings.ToLower(text)
+	for i := maxInt(start, 0); i < len(text); {
+		next, advanced, blocked := skipXMLIgnoredSection(lower, i)
+		if blocked {
+			return ToolMarkupTag{}, false
+		}
+		if advanced {
+			i = next
+			continue
+		}
+		if tag, ok := scanToolMarkupTagAt(text, i); ok {
+			return tag, true
+		}
+		i++
+	}
+	return ToolMarkupTag{}, false
+}
+
+func FindMatchingToolMarkupClose(text string, open ToolMarkupTag) (ToolMarkupTag, bool) {
+	if text == "" || open.Name == "" || open.Closing {
+		return ToolMarkupTag{}, false
+	}
+	depth := 1
+	for pos := open.End + 1; pos < len(text); {
+		tag, ok := FindToolMarkupTagOutsideIgnored(text, pos)
+		if !ok {
+			return ToolMarkupTag{}, false
+		}
+		if tag.Name != open.Name {
+			pos = tag.End + 1
+			continue
+		}
+		if tag.Closing {
+			depth--
+			if depth == 0 {
+				return tag, true
+			}
+		} else if !tag.SelfClosing {
+			depth++
+		}
+		pos = tag.End + 1
+	}
+	return ToolMarkupTag{}, false
+}
+
+func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) {
+	if start < 0 || start >= len(text) || text[start] != '<' {
+		return ToolMarkupTag{}, false
+	}
+	lower := strings.ToLower(text)
+	i := start + 1
+	closing := false
+	if i < len(text) && text[i] == '/' {
+		closing = true
+		i++
+	}
+	dsmlLike := false
+	if next, ok := consumeToolMarkupPipe(text, i); ok {
+		dsmlLike = true
+		i = next
+	}
+	if strings.HasPrefix(lower[i:], "dsml") {
+		dsmlLike = true
+		i += len("dsml")
+		for next, ok := consumeToolMarkupSeparator(text, i); ok; next, ok = consumeToolMarkupSeparator(text, i) {
+			i = next
+		}
+	}
+	name, nameLen := matchToolMarkupName(lower, i)
+	if nameLen == 0 {
+		return ToolMarkupTag{}, false
+	}
+	nameEnd := i + nameLen
+	if !hasToolMarkupBoundary(text, nameEnd) {
+		return ToolMarkupTag{}, false
+	}
+	end := findXMLTagEnd(text, nameEnd)
+	if end < 0 {
+		return ToolMarkupTag{}, false
+	}
+	trimmed := strings.TrimSpace(text[start : end+1])
+	return ToolMarkupTag{
+		Start:       start,
+		End:         end,
+		NameStart:   i,
+		NameEnd:     nameEnd,
+		Name:        name,
+		Closing:     closing,
+		SelfClosing: strings.HasSuffix(trimmed, "/>"),
+		DSMLLike:    dsmlLike,
+		Canonical:   !dsmlLike,
+	}, true
+}
+
+func matchToolMarkupName(lower string, start int) (string, int) {
+	for _, name := range toolMarkupNames {
+		if strings.HasPrefix(lower[start:], name) {
+			return name, len(name)
+		}
+	}
+	return "", 0
+}
+
+func consumeToolMarkupPipe(text string, idx int) (int, bool) {
+	if idx >= len(text) {
+		return idx, false
+	}
+	if text[idx] == '|' {
+		return idx + 1, true
+	}
+	if strings.HasPrefix(text[idx:], "｜") {
+		return idx + len("｜"), true
+	}
+	return idx, false
+}
+
+func consumeToolMarkupSeparator(text string, idx int) (int, bool) {
+	if idx >= len(text) {
+		return idx, false
+	}
+	if text[idx] == ' ' || text[idx] == '\t' || text[idx] == '\r' || text[idx] == '\n' {
+		return idx + 1, true
+	}
+	if next, ok := consumeToolMarkupPipe(text, idx); ok {
+		return next, true
+	}
+	return idx, false
+}
+
+func hasToolMarkupBoundary(text string, idx int) bool {
+	if idx >= len(text) {
+		return true
+	}
+	switch text[idx] {
+	case ' ', '\t', '\n', '\r', '>', '/':
+		return true
+	default:
+		return false
+	}
+}
diff --git a/internal/toolcall/toolcalls_test.go b/internal/toolcall/toolcalls_test.go
index c4bfe51..b48f88c 100644
--- a/internal/toolcall/toolcalls_test.go
+++ b/internal/toolcall/toolcalls_test.go
@@ -30,6 +30,54 @@ func TestParseToolCallsSupportsToolCallsWrapper(t *testing.T) {
 	}
 }
 
+func TestParseToolCallsSupportsDSMLShell(t *testing.T) {
+	text := `<|DSML|tool_calls><|DSML|invoke name="Bash"><|DSML|parameter name="command"><![CDATA[pwd]]></|DSML|parameter></|DSML|invoke></|DSML|tool_calls>`
+	calls := ParseToolCalls(text, []string{"Bash"})
+	if len(calls) != 1 {
+		t.Fatalf("expected 1 DSML call, got %#v", calls)
+	}
+	if calls[0].Name != "Bash" || calls[0].Input["command"] != "pwd" {
+		t.Fatalf("unexpected DSML parse result: %#v", calls[0])
+	}
+}
+
+func TestParseToolCallsSupportsDSMLShellWithCanonicalExampleInCDATA(t *testing.T) {
+	content := `<tool_calls><invoke name="demo"><parameter name="value">x</parameter></invoke></tool_calls>`
+	text := `<|DSML|tool_calls><|DSML|invoke name="Write"><|DSML|parameter name="file_path">notes.md</|DSML|parameter><|DSML|parameter name="content"><![CDATA[` + content + `]]></|DSML|parameter></|DSML|invoke></|DSML|tool_calls>`
+	calls := ParseToolCalls(text, []string{"Write"})
+	if len(calls) != 1 {
+		t.Fatalf("expected 1 DSML call with XML-looking CDATA, got %#v", calls)
+	}
+	if calls[0].Name != "Write" || calls[0].Input["content"] != content {
+		t.Fatalf("unexpected DSML CDATA parse result: %#v", calls[0])
+	}
+}
+
+func TestParseToolCallsTreatsUnclosedCDATAAsText(t *testing.T) {
+	text := `<tool_calls><invoke name="Write"><parameter name="content"><![CDATA[hello world</parameter></invoke></tool_calls>`
+	res := ParseToolCallsDetailed(text, []string{"Write"})
+	if len(res.Calls) != 1 {
+		t.Fatalf("expected unclosed CDATA to still parse via outer wrapper, got %#v", res.Calls)
+	}
+	got, _ := res.Calls[0].Input["content"].(string)
+	if got != "hello world" {
+		t.Fatalf("expected recovered CDATA payload, got %q", got)
+	}
+}
+
+func TestParseToolCallsNormalizesMixedDSMLAndCanonicalToolTags(t *testing.T) {
+	// Models commonly mix DSML wrapper tags with canonical inner tags.
+	// These should be normalized and parsed, not rejected.
+	text := `<|DSML|tool_calls><invoke name="Bash"><|DSML|parameter name="command">pwd</|DSML|parameter></invoke></|DSML|tool_calls>`
+	calls := ParseToolCalls(text, []string{"Bash"})
+	if len(calls) != 1 {
+		t.Fatalf("expected mixed DSML/XML tool tags to be normalized and parsed, got %#v", calls)
+	}
+	if calls[0].Name != "Bash" || calls[0].Input["command"] != "pwd" {
+		t.Fatalf("unexpected mixed DSML parse result: %#v", calls[0])
+	}
+}
+
 func TestParseToolCallsSupportsStandaloneToolWithMultilineCDATAAndRepeatedXMLTags(t *testing.T) {
 	text := `<tool_calls><invoke name="write_file"><parameter name="path">script.sh</parameter><parameter name="content"><![CDATA[#!/bin/bash
 echo "hello"
@@ -94,6 +142,23 @@ func TestParseToolCallsSupportsInvokeParameters(t *testing.T) {
 	}
 }
 
+func TestParseToolCallsSupportsJSONScalarParameters(t *testing.T) {
+	text := `<tool_calls><invoke name="configure"><parameter name="count">123</parameter><parameter name="max_tokens"><![CDATA[256]]></parameter><parameter name="enabled">true</parameter></invoke></tool_calls>`
+	calls := ParseToolCalls(text, []string{"configure"})
+	if len(calls) != 1 {
+		t.Fatalf("expected 1 call, got %#v", calls)
+	}
+	if got, ok := calls[0].Input["count"].(float64); !ok || got != 123 {
+		t.Fatalf("expected numeric count, got %#v", calls[0].Input["count"])
+	}
+	if got, ok := calls[0].Input["max_tokens"].(float64); !ok || got != 256 {
+		t.Fatalf("expected numeric max_tokens, got %#v", calls[0].Input["max_tokens"])
+	}
+	if got, ok := calls[0].Input["enabled"].(bool); !ok || !got {
+		t.Fatalf("expected boolean enabled, got %#v", calls[0].Input["enabled"])
+	}
+}
+
 func TestParseToolCallsPreservesRawMalformedParams(t *testing.T) {
 	text := `<tool_calls><invoke name="execute_command"><parameter name="command">cd /root && git status</parameter></invoke></tool_calls>`
 	calls := ParseToolCalls(text, []string{"execute_command"})
@@ -407,3 +472,102 @@ func TestParseToolCallsParsesAfterFourBacktickFence(t *testing.T) {
 		t.Fatalf("expected non-fenced tool call to be parsed, got %#v", res.Calls[0])
 	}
 }
+
+func TestParseToolCallsToleratesDSMLSpaceSeparatorTypo(t *testing.T) {
+	text := strings.Join([]string{
+		"<|DSML tool_calls>",
+		"<|DSML invoke name=\"Read\">",
+		"<|DSML parameter name=\"file_path\"><![CDATA[/tmp/input.txt]]></|DSML parameter>",
+		"</|DSML invoke>",
+		"</|DSML tool_calls>",
+	}, "\n")
+	calls := ParseToolCalls(text, []string{"Read"})
+	if len(calls) != 1 {
+		t.Fatalf("expected one call from DSML space-separator typo, got %#v", calls)
+	}
+	if calls[0].Name != "Read" {
+		t.Fatalf("expected Read call, got %#v", calls[0])
+	}
+	if got, _ := calls[0].Input["file_path"].(string); got != "/tmp/input.txt" {
+		t.Fatalf("expected file_path to parse, got %q", got)
+	}
+}
+
+func TestParseToolCallsDoesNotAcceptDSMLSpaceLookalikeTagName(t *testing.T) {
+	text := strings.Join([]string{
+		"<|DSML tool_calls_extra>",
+		"<|DSML invoke name=\"Read\">",
+		"<|DSML parameter name=\"file_path\">/tmp/input.txt</|DSML parameter>",
+		"</|DSML invoke>",
+		"</|DSML tool_calls_extra>",
+	}, "\n")
+	calls := ParseToolCalls(text, []string{"Read"})
+	if len(calls) != 0 {
+		t.Fatalf("expected no calls from lookalike tag, got %#v", calls)
+	}
+}
+
+func TestParseToolCallsToleratesDSMLCollapsedTagNames(t *testing.T) {
+	todos := `[x] 检查 toolcalls_format.go 格式化逻辑
+[x] 检查 toolcalls_parse.go 解析逻辑
+[x] 检查 toolcalls_xml.go 和 toolcalls_dsml.go
+[x] 检查 toolcalls_markup.go 和 toolcalls_json_repair.go
+[x] 检查 prompt/tool_calls.go 注入逻辑
+[x] 检查 toolstream 流式解析
+[x] 查看测试文件确认预期行为
+[x] 给出调查结论`
+	text := strings.Join([]string{
+		"[]",
+		"<DSMLtool_calls>",
+		"<DSMLinvoke name=\"update_todo_list\">",
+		"<DSMLparameter name=\"todos\"><![CDATA[" + todos + "]]></DSMLparameter>",
+		"</DSMLinvoke>",
+		"</DSMLtool_calls>",
+	}, "\n")
+	calls := ParseToolCalls(text, []string{"update_todo_list"})
+	if len(calls) != 1 {
+		t.Fatalf("expected one call from collapsed DSML tags, got %#v", calls)
+	}
+	if calls[0].Name != "update_todo_list" {
+		t.Fatalf("expected update_todo_list call, got %#v", calls[0])
+	}
+	if got, _ := calls[0].Input["todos"].(string); got != todos {
+		t.Fatalf("expected todos to round-trip, got %q", got)
+	}
+}
+
+func TestParseToolCallsDoesNotAcceptDSMLCollapsedLookalikeTagName(t *testing.T) {
+	text := strings.Join([]string{
+		"<DSMLtool_calls_extra>",
+		"<DSMLinvoke name=\"update_todo_list\">",
+		"<DSMLparameter name=\"todos\">x</DSMLparameter>",
+		"</DSMLinvoke>",
+		"</DSMLtool_calls_extra>",
+	}, "\n")
+	calls := ParseToolCalls(text, []string{"update_todo_list"})
+	if len(calls) != 0 {
+		t.Fatalf("expected no calls from collapsed lookalike tag, got %#v", calls)
+	}
+}
+
+func TestParseToolCallsSkipsProseMentionOfSameWrapperVariant(t *testing.T) {
+	text := strings.Join([]string{
+		"Summary: support canonical <tool_calls> and DSML <|DSML|tool_calls> wrappers.",
+		"",
+		"<|DSML|tool_calls>",
+		"<|DSML|invoke name=\"Bash\">",
+		"<|DSML|parameter name=\"command\"><![CDATA[git status]]></|DSML|parameter>",
+		"</|DSML|invoke>",
+		"</|DSML|tool_calls>",
+	}, "\n")
+	res := ParseToolCallsDetailed(text, []string{"Bash"})
+	if len(res.Calls) != 1 {
+		t.Fatalf("expected one parsed call after prose mention, got %#v", res.Calls)
+	}
+	if res.Calls[0].Name != "Bash" {
+		t.Fatalf("expected Bash call, got %#v", res.Calls[0])
+	}
+	if got, _ := res.Calls[0].Input["command"].(string); got != "git status" {
+		t.Fatalf("expected command to parse, got %q", got)
+	}
+}
diff --git a/internal/toolstream/complex_edge_test.go b/internal/toolstream/complex_edge_test.go
new file mode 100644
index 0000000..759a80f
--- /dev/null
+++ b/internal/toolstream/complex_edge_test.go
@@ -0,0 +1,682 @@
+package toolstream
+
+import (
+	"strings"
+	"testing"
+)
+
+// ---- 错位工具块 ----
+
+// 只有 </tool_calls> 没有 <tool_calls>
+func TestSieve_MismatchedClose_OnlyClosingTag(t *testing.T) {
+	var state State
+	chunks := []string{
+		"一些正文内容\n",
+		"</tool_calls>\n",
+		"后续内容",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
+	}
+	events = append(events, Flush(&state, []string{"read_file"})...)
+
+	var text strings.Builder
+	tc := 0
+	for _, e := range events {
+		text.WriteString(e.Content)
+		tc += len(e.ToolCalls)
+	}
+	if tc != 0 {
+		t.Fatalf("孤立闭合标签不应触发工具调用，got %d", tc)
+	}
+	if !strings.Contains(text.String(), "一些正文") || !strings.Contains(text.String(), "后续内容") {
+		t.Fatalf("应保留所有文本, got %q", text.String())
+	}
+}
+
+// <tool_calls> 打开后跟的不是 <invoke> 而是普通文本
+func TestSieve_ToolCallsWrapperWithNoInvoke(t *testing.T) {
+	var state State
+	chunks := []string{
+		"<tool_calls>\n",
+		"这里没有 invoke 标签\n",
+		"</tool_calls>\n",
+		"后续内容",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
+	}
+	events = append(events, Flush(&state, []string{"read_file"})...)
+
+	var text strings.Builder
+	tc := 0
+	for _, e := range events {
+		text.WriteString(e.Content)
+		tc += len(e.ToolCalls)
+	}
+	if tc != 0 {
+		t.Fatalf("无 invoke 不应触发工具调用，got %d", tc)
+	}
+}
+
+// 两个连续工具调用块
+func TestSieve_TwoConsecutiveToolCallBlocks(t *testing.T) {
+	var state State
+	chunks := []string{
+		`<tool_calls><invoke name="read_file"><parameter name="path">a.txt</parameter></invoke></tool_calls>`,
+		"\n",
+		`<tool_calls><invoke name="read_file"><parameter name="path">b.txt</parameter></invoke></tool_calls>`,
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
+	}
+	events = append(events, Flush(&state, []string{"read_file"})...)
+
+	tc := 0
+	for _, e := range events {
+		tc += len(e.ToolCalls)
+	}
+	if tc != 2 {
+		t.Fatalf("应解析出两个工具调用，got %d, events=%#v", tc, events)
+	}
+}
+
+// ---- 围栏内的工具调用不应触发 ----
+
+// 反引号围栏内有完整工具调用 + 围栏外有真正的工具调用
+func TestSieve_FencedExampleThenRealToolCall(t *testing.T) {
+	var state State
+	chunks := []string{
+		"示例：\n```xml\n",
+		`<tool_calls><invoke name="fake"><parameter name="x">1</parameter></invoke></tool_calls>`,
+		"\n```\n",
+		`<tool_calls><invoke name="read_file"><parameter name="path">real.txt</parameter></invoke></tool_calls>`,
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"read_file", "fake"})...)
+	}
+	events = append(events, Flush(&state, []string{"read_file", "fake"})...)
+
+	var text strings.Builder
+	tc := 0
+	var names []string
+	for _, e := range events {
+		text.WriteString(e.Content)
+		for _, call := range e.ToolCalls {
+			tc++
+			names = append(names, call.Name)
+		}
+	}
+	if tc != 1 {
+		t.Fatalf("应只触发围栏外的工具调用，got %d, names=%v", tc, names)
+	}
+	if names[0] != "read_file" {
+		t.Fatalf("应触发 read_file，got %v", names)
+	}
+	if !strings.Contains(text.String(), "示例") {
+		t.Fatalf("围栏前文本应保留, got %q", text.String())
+	}
+}
+
+// 波浪线围栏包裹工具调用
+func TestSieve_TildeFencedToolCallIgnored(t *testing.T) {
+	var state State
+	chunks := []string{
+		"~~~\n",
+		`<tool_calls><invoke name="read_file"><parameter name="path">x</parameter></invoke></tool_calls>`,
+		"\n~~~\n",
+		"结束",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
+	}
+	events = append(events, Flush(&state, []string{"read_file"})...)
+
+	tc := 0
+	var text strings.Builder
+	for _, e := range events {
+		text.WriteString(e.Content)
+		tc += len(e.ToolCalls)
+	}
+	if tc != 0 {
+		t.Fatalf("波浪线围栏内工具调用不应触发，got %d", tc)
+	}
+	if !strings.Contains(text.String(), "结束") {
+		t.Fatalf("围栏后文本应保留, got %q", text.String())
+	}
+}
+
+// 4 反引号嵌套 3 反引号，内含工具标签
+func TestSieve_FourBacktickNestedThreeWithToolCall(t *testing.T) {
+	var state State
+	chunks := []string{
+		"````markdown\n",
+		"```xml\n",
+		`<tool_calls><invoke name="read_file"><parameter name="path">x</parameter></invoke></tool_calls>`,
+		"\n```\n",
+		"````\n",
+		"外部文本",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
+	}
+	events = append(events, Flush(&state, []string{"read_file"})...)
+
+	tc := 0
+	var text strings.Builder
+	for _, e := range events {
+		text.WriteString(e.Content)
+		tc += len(e.ToolCalls)
+	}
+	if tc != 0 {
+		t.Fatalf("4反引号嵌套内的工具调用不应触发，got %d", tc)
+	}
+	if !strings.Contains(text.String(), "外部文本") {
+		t.Fatalf("围栏外文本应保留, got %q", text.String())
+	}
+}
+
+// ---- DSML 变体在围栏内不触发 ----
+
+func TestSieve_DSMLInsideFenceIgnored(t *testing.T) {
+	var state State
+	chunks := []string{
+		"```\n",
+		"<|DSML|tool_calls>\n",
+		`<|DSML|invoke name="read_file">`,
+		`<|DSML|parameter name="path">x</|DSML|parameter>`,
+		"</|DSML|invoke>\n",
+		"</|DSML|tool_calls>\n",
+		"```\n",
+		"结束",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
+	}
+	events = append(events, Flush(&state, []string{"read_file"})...)
+
+	tc := 0
+	for _, e := range events {
+		tc += len(e.ToolCalls)
+	}
+	if tc != 0 {
+		t.Fatalf("围栏内的 DSML 工具调用不应触发，got %d", tc)
+	}
+}
+
+// ---- 工具调用前后有丰富文本 ----
+
+func TestSieve_RichTextAroundToolCall(t *testing.T) {
+	var state State
+	chunks := []string{
+		"我来帮你查看文件内容。\n\n",
+		"首先读取 README：\n",
+		`<tool_calls><invoke name="read_file"><parameter name="path">README.md</parameter></invoke></tool_calls>`,
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
+	}
+	events = append(events, Flush(&state, []string{"read_file"})...)
+
+	var text strings.Builder
+	tc := 0
+	for _, e := range events {
+		text.WriteString(e.Content)
+		tc += len(e.ToolCalls)
+	}
+	if tc != 1 {
+		t.Fatalf("应有一个工具调用，got %d", tc)
+	}
+	if !strings.Contains(text.String(), "帮你查看") {
+		t.Fatalf("前置文本丢失, got %q", text.String())
+	}
+	if strings.Contains(text.String(), "<invoke") {
+		t.Fatalf("工具标签泄漏, got %q", text.String())
+	}
+}
+
+// ---- 工具调用在 CDATA 包含代码围栏 ----
+
+func TestSieve_ToolCallWithCDATAContainingFence(t *testing.T) {
+	var state State
+	payload := "```python\nprint('hello')\n```"
+	chunks := []string{
+		"<tool_calls>\n",
+		`<invoke name="write_file">` + "\n",
+		`<parameter name="path">test.md</parameter>` + "\n",
+		`<parameter name="content"><![CDATA[` + payload + `]]></parameter>` + "\n",
+		"</invoke>\n",
+		"</tool_calls>",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"write_file"})...)
+	}
+	events = append(events, Flush(&state, []string{"write_file"})...)
+
+	var text strings.Builder
+	tc := 0
+	var gotContent any
+	for _, e := range events {
+		text.WriteString(e.Content)
+		if len(e.ToolCalls) > 0 {
+			tc += len(e.ToolCalls)
+			gotContent = e.ToolCalls[0].Input["content"]
+		}
+	}
+	if tc != 1 {
+		t.Fatalf("应有一个工具调用，got %d", tc)
+	}
+	content, _ := gotContent.(string)
+	if content != payload {
+		t.Fatalf("CDATA 内围栏内容应完整保留，got %q want %q", content, payload)
+	}
+	if text.Len() != 0 {
+		t.Fatalf("不应有文本泄漏, got %q", text.String())
+	}
+}
+
+// ---- 极端 token 拆分 ----
+
+// 工具标签被拆成单字符流式到达
+func TestSieve_CharByCharToolCall(t *testing.T) {
+	var state State
+	full := `<tool_calls><invoke name="read_file"><parameter name="path">go.mod</parameter></invoke></tool_calls>`
+	var events []Event
+	for _, ch := range full {
+		events = append(events, ProcessChunk(&state, string(ch), []string{"read_file"})...)
+	}
+	events = append(events, Flush(&state, []string{"read_file"})...)
+
+	var text strings.Builder
+	tc := 0
+	for _, e := range events {
+		text.WriteString(e.Content)
+		tc += len(e.ToolCalls)
+	}
+	if tc != 1 {
+		t.Fatalf("单字符流式应解析出工具调用，got %d", tc)
+	}
+	if strings.Contains(text.String(), "invoke") {
+		t.Fatalf("标签泄漏, got %q", text.String())
+	}
+}
+
+// ---- 混合格式变体 ----
+
+// 全宽竖线 wrapper + DSML invoke
+func TestSieve_FullwidthPipeWrapperDSMLInvoke(t *testing.T) {
+	var state State
+	chunks := []string{
+		"<｜tool_calls>\n",
+		"<|DSML|invoke name=\"read_file\">\n",
+		"<|DSML|parameter name=\"path\">README.md</|DSML|parameter>\n",
+		"</|DSML|invoke>\n",
+		"</｜tool_calls>",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
+	}
+	events = append(events, Flush(&state, []string{"read_file"})...)
+
+	var text strings.Builder
+	tc := 0
+	for _, e := range events {
+		text.WriteString(e.Content)
+		tc += len(e.ToolCalls)
+	}
+	if tc != 1 {
+		t.Fatalf("全宽+DSML混合应解析成功，got %d", tc)
+	}
+	if strings.Contains(strings.ToLower(text.String()), "dsml") {
+		t.Fatalf("DSML 标签泄漏, got %q", text.String())
+	}
+}
+
+// ---- 未闭合工具块应回退为文本 ----
+
+func TestSieve_UnclosedToolCallBlockFallsBack(t *testing.T) {
+	var state State
+	chunks := []string{
+		"<tool_calls>\n",
+		`<invoke name="read_file">` + "\n",
+		`<parameter name="path">README.md</parameter>` + "\n",
+		// 缺少 </invoke> 和 </tool_calls>
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
+	}
+	events = append(events, Flush(&state, []string{"read_file"})...)
+
+	var text strings.Builder
+	tc := 0
+	for _, e := range events {
+		text.WriteString(e.Content)
+		tc += len(e.ToolCalls)
+	}
+	// 未闭合的应回退为文本，不应丢失
+	if text.String() == "" {
+		t.Fatalf("未闭合工具块不应丢失所有内容")
+	}
+	if tc != 0 {
+		t.Fatalf("未闭合工具块不应解析出工具调用，got %d", tc)
+	}
+}
+
+// ---- 文本中 mention 标签变体名 + 真正的工具调用 ----
+
+// 模型输出 commit message 文本中包含 <dsml|tool_calls> 等 mention，
+// 紧随其后是真正的 DSML 工具调用。mention 的变体和实际工具调用变体不同。
+func TestSieve_TagMentionInTextThenRealToolCall(t *testing.T) {
+	var state State
+	chunks := []string{
+		"建议的 commit message：\n\nfeat: expand DSML alias support\n\n",
+		"Add support for <dsml|tool_calls>, ",
+		"<｜tool_calls> (fullwidth pipe),\n",
+		"and <|tool_calls> wrapper variants.\n\n",
+		"<|DSML|tool_calls>\n",
+		"<|DSML|invoke name=\"Bash\">\n",
+		"<|DSML|parameter name=\"command\"><![CDATA[git status]]></|DSML|parameter>\n",
+		"</|DSML|invoke>\n",
+		"</|DSML|tool_calls>",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"Bash"})...)
+	}
+	events = append(events, Flush(&state, []string{"Bash"})...)
+
+	var text strings.Builder
+	tc := 0
+	var names []string
+	for _, e := range events {
+		text.WriteString(e.Content)
+		for _, call := range e.ToolCalls {
+			tc++
+			names = append(names, call.Name)
+		}
+	}
+
+	if tc != 1 {
+		t.Fatalf("应解析出 1 个工具调用，got %d, text=%q", tc, text.String())
+	}
+	if names[0] != "Bash" {
+		t.Fatalf("应解析出 Bash，got %v", names)
+	}
+	if !strings.Contains(text.String(), "commit message") {
+		t.Fatalf("前置文本应保留, got %q", text.String())
+	}
+}
+
+func TestSieve_SameVariantTagMentionInTextThenRealToolCall(t *testing.T) {
+	var state State
+	chunks := []string{
+		"Summary: support canonical <tool_calls> and DSML <|DSML|tool_calls> wrappers.\n\n",
+		"<|DSML|tool_calls>\n",
+		"<|DSML|invoke name=\"Bash\">\n",
+		"<|DSML|parameter name=\"command\"><![CDATA[git status]]></|DSML|parameter>\n",
+		"</|DSML|invoke>\n",
+		"</|DSML|tool_calls>",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"Bash"})...)
+	}
+	events = append(events, Flush(&state, []string{"Bash"})...)
+
+	var text strings.Builder
+	var callName string
+	var command string
+	callCount := 0
+	for _, e := range events {
+		text.WriteString(e.Content)
+		for _, call := range e.ToolCalls {
+			callCount++
+			callName = call.Name
+			command, _ = call.Input["command"].(string)
+		}
+	}
+
+	if callCount != 1 {
+		t.Fatalf("应解析出 1 个工具调用，got %d, text=%q", callCount, text.String())
+	}
+	if callName != "Bash" {
+		t.Fatalf("应解析出 Bash，got %q", callName)
+	}
+	if command != "git status" {
+		t.Fatalf("应解析出 command，got %q", command)
+	}
+	if !strings.Contains(text.String(), "Summary:") {
+		t.Fatalf("前置文本应保留, got %q", text.String())
+	}
+}
+
+func TestSieve_ReviewSampleWithAliasMentionsPreservesBodyAndToolCalls(t *testing.T) {
+	var state State
+	chunks := []string{
+		"Done reviewing the diff. Here's my analysis before we commit:\n\n",
+		"Summary of Changes\n",
+		"DSML wrapper variant support — recognize aliases (<dsml|tool_calls>, <|tool_calls>, <｜tool_calls>) alongside canonical <tool_calls> and <|DSML|tool_calls> wrappers.\n\n",
+		"<|DSML|tool_calls>\n",
+		"<|DSML|invoke name=\"Bash\">\n",
+		"<|DSML|parameter name=\"command\"><![CDATA[git add docs/toolcall-semantics.md internal/toolstream/tool_sieve_xml.go]]></|DSML|parameter>\n",
+		"<|DSML|parameter name=\"description\"><![CDATA[Stage all relevant changed files]]></|DSML|parameter>\n",
+		"</|DSML|invoke>\n",
+		"<|DSML|invoke name=\"Bash\">\n",
+		"<|DSML|parameter name=\"command\"><![CDATA[git commit -m \"$(cat <<'EOF'\nfeat(toolstream): expand DSML wrapper detection\n\nSupport DSML wrapper aliases: <dsml|tool_calls>, <|tool_calls>, <｜tool_calls> alongside existing canonical wrappers.\nEOF\n)\"]]></|DSML|parameter>\n",
+		"<|DSML|parameter name=\"description\"><![CDATA[Create commit with all staged changes]]></|DSML|parameter>\n",
+		"</|DSML|invoke>\n",
+		"</|DSML|tool_calls>",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"Bash"})...)
+	}
+	events = append(events, Flush(&state, []string{"Bash"})...)
+
+	var text strings.Builder
+	var commands []string
+	for _, e := range events {
+		text.WriteString(e.Content)
+		for _, call := range e.ToolCalls {
+			if call.Name == "Bash" {
+				cmd, _ := call.Input["command"].(string)
+				commands = append(commands, cmd)
+			}
+		}
+	}
+
+	if len(commands) != 2 {
+		t.Fatalf("应解析出 2 个 Bash 工具调用，got %d, text=%q", len(commands), text.String())
+	}
+	if !strings.Contains(text.String(), "<|DSML|tool_calls> wrappers") {
+		t.Fatalf("正文中的 DSML mention 应保留, got %q", text.String())
+	}
+	if !strings.Contains(text.String(), "Summary of Changes") {
+		t.Fatalf("前置正文应完整保留, got %q", text.String())
+	}
+	if strings.Contains(text.String(), "git add docs/toolcall-semantics.md") {
+		t.Fatalf("真实工具参数不应泄漏到正文, got %q", text.String())
+	}
+	if !strings.Contains(commands[0], "git add") || !strings.Contains(commands[1], "git commit") {
+		t.Fatalf("工具参数解析不符合预期, got %#v", commands)
+	}
+}
+
+func TestSieve_ChineseReviewSamplePreservesInlineDSMLMention(t *testing.T) {
+	var state State
+	chunks := []string{
+		"# Context from my IDE setup:\n\n## My request for Codex:\n",
+		"基于我的审查，这是工作区更改的总结和提交。\n\n## 审查报告\n\n### 文档\n\nAPI.md 中的工具调用部分缺少针对新 DSML 别名的更新——它只提到了 `",
+		"<|DSML|tool_calls>` 和 canonical `<tool_calls>`。由于这涉及 API 兼容性和文档准确性，需要在下游进行记录。\n\n",
+		"### 代码\n\n所有更改现在一致地处理四个 DSML wrapper 变体。\n\n现在提交已暂存的更改。\n\n",
+		"<|DSML|tool_calls>\n",
+		"  <|DSML|invoke name=\"Bash\">\n",
+		"    <|DSML|parameter name=\"command\"><![CDATA[git commit -m \"$(cat <<'EOF'\nfeat: expand DSML tool-call alias and fence handling\nEOF\n)\"]]></|DSML|parameter>\n",
+		"    <|DSML|parameter name=\"description\"><![CDATA[Commit staged changes]]></|DSML|parameter>\n",
+		"  </|DSML|invoke>\n",
+		"</|DSML|tool_calls>\n\n补充",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"Bash"})...)
+	}
+	events = append(events, Flush(&state, []string{"Bash"})...)
+
+	var text strings.Builder
+	callCount := 0
+	for _, e := range events {
+		text.WriteString(e.Content)
+		callCount += len(e.ToolCalls)
+	}
+
+	if callCount != 1 {
+		t.Fatalf("应解析出 1 个工具调用，got %d, text=%q", callCount, text.String())
+	}
+	want := "它只提到了 `<|DSML|tool_calls>` 和 canonical `<tool_calls>`。由于这涉及 API 兼容性"
+	if !strings.Contains(text.String(), want) {
+		t.Fatalf("正文不应在 inline DSML mention 处截断, want contains %q, got %q", want, text.String())
+	}
+	if !strings.Contains(text.String(), "补充") {
+		t.Fatalf("工具块后的正文应保留, got %q", text.String())
+	}
+	if strings.Contains(text.String(), "<|DSML|invoke") {
+		t.Fatalf("真实工具块不应泄漏到正文, got %q", text.String())
+	}
+}
+
+func TestSieve_ToleratesDSMLSpaceSeparatorTypo(t *testing.T) {
+	var state State
+	chunks := []string{
+		"准备读取文件。\n",
+		"<|DSML tool_calls>\n",
+		"<|DSML invoke name=\"Read\">\n",
+		"<|DSML parameter name=\"file_path\"><![CDATA[/tmp/input.txt]]></|DSML parameter>\n",
+		"</|DSML invoke>\n",
+		"</|DSML tool_calls>",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"Read"})...)
+	}
+	events = append(events, Flush(&state, []string{"Read"})...)
+
+	var text strings.Builder
+	var filePath string
+	callCount := 0
+	for _, e := range events {
+		text.WriteString(e.Content)
+		for _, call := range e.ToolCalls {
+			callCount++
+			filePath, _ = call.Input["file_path"].(string)
+		}
+	}
+
+	if callCount != 1 {
+		t.Fatalf("应解析出 1 个工具调用，got %d, text=%q", callCount, text.String())
+	}
+	if filePath != "/tmp/input.txt" {
+		t.Fatalf("应解析出 file_path，got %q", filePath)
+	}
+	if !strings.Contains(text.String(), "准备读取文件") {
+		t.Fatalf("前置正文应保留, got %q", text.String())
+	}
+	if strings.Contains(text.String(), "<|DSML invoke") {
+		t.Fatalf("真实工具块不应泄漏到正文, got %q", text.String())
+	}
+}
+
+func TestSieve_DSMLSpaceLookalikeTagNameStaysText(t *testing.T) {
+	var state State
+	input := "<|DSML tool_calls_extra><|DSML invoke name=\"Read\"><|DSML parameter name=\"file_path\">/tmp/input.txt</|DSML parameter></|DSML invoke></|DSML tool_calls_extra>"
+	events := ProcessChunk(&state, input, []string{"Read"})
+	events = append(events, Flush(&state, []string{"Read"})...)
+
+	var text strings.Builder
+	callCount := 0
+	for _, e := range events {
+		text.WriteString(e.Content)
+		callCount += len(e.ToolCalls)
+	}
+	if callCount != 0 {
+		t.Fatalf("相似标签名不应触发工具调用，got %d", callCount)
+	}
+	if text.String() != input {
+		t.Fatalf("相似标签名应作为正文透传, got %q", text.String())
+	}
+}
+
+func TestSieve_DSMLCollapsedTagNamesWithPrefixText(t *testing.T) {
+	var state State
+	todos := `[x] 检查 toolcalls_format.go 格式化逻辑
+[x] 检查 toolcalls_parse.go 解析逻辑
+[x] 检查 toolcalls_xml.go 和 toolcalls_dsml.go
+[x] 检查 toolcalls_markup.go 和 toolcalls_json_repair.go
+[x] 检查 prompt/tool_calls.go 注入逻辑
+[x] 检查 toolstream 流式解析
+[x] 查看测试文件确认预期行为
+[x] 给出调查结论`
+	chunks := []string{
+		"[]\n",
+		"<DSMLtool_calls>\n",
+		"<DSMLinvoke name=\"update_todo_list\">\n",
+		"<DSMLparameter name=\"todos\"><![CDATA[" + todos + "]]></DSMLparameter>\n",
+		"</DSMLinvoke>\n",
+		"</DSMLtool_calls>",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"update_todo_list"})...)
+	}
+	events = append(events, Flush(&state, []string{"update_todo_list"})...)
+
+	var text strings.Builder
+	var gotTodos string
+	callCount := 0
+	for _, e := range events {
+		text.WriteString(e.Content)
+		for _, call := range e.ToolCalls {
+			callCount++
+			gotTodos, _ = call.Input["todos"].(string)
+		}
+	}
+	if callCount != 1 {
+		t.Fatalf("应解析出 1 个工具调用，got %d, text=%q", callCount, text.String())
+	}
+	if gotTodos != todos {
+		t.Fatalf("todos 应完整保留，got %q", gotTodos)
+	}
+	if text.String() != "[]\n" {
+		t.Fatalf("前置正文应完整保留且不泄漏工具块, got %q", text.String())
+	}
+}
+
+func TestSieve_DSMLCollapsedLookalikeTagNameStaysText(t *testing.T) {
+	var state State
+	input := "<DSMLtool_calls_extra><DSMLinvoke name=\"update_todo_list\"><DSMLparameter name=\"todos\">x</DSMLparameter></DSMLinvoke></DSMLtool_calls_extra>"
+	events := ProcessChunk(&state, input, []string{"update_todo_list"})
+	events = append(events, Flush(&state, []string{"update_todo_list"})...)
+
+	var text strings.Builder
+	callCount := 0
+	for _, e := range events {
+		text.WriteString(e.Content)
+		callCount += len(e.ToolCalls)
+	}
+	if callCount != 0 {
+		t.Fatalf("相似 collapsed 标签名不应触发工具调用，got %d", callCount)
+	}
+	if text.String() != input {
+		t.Fatalf("相似 collapsed 标签名应作为正文透传, got %q", text.String())
+	}
+}
diff --git a/internal/toolstream/fence_edge_sieve_test.go b/internal/toolstream/fence_edge_sieve_test.go
new file mode 100644
index 0000000..d56335f
--- /dev/null
+++ b/internal/toolstream/fence_edge_sieve_test.go
@@ -0,0 +1,59 @@
+package toolstream
+
+import (
+	"strings"
+	"testing"
+)
+
+// 波浪线围栏内的工具调用标签不应触发工具调用
+func TestProcessToolSieveTildeFenceDoesNotTriggerToolCall(t *testing.T) {
+	var state State
+	chunks := []string{
+		"示例：\n~~~xml\n",
+		"<tool_calls><invoke name=\"read_file\"><parameter name=\"path\">README.md</parameter></invoke></tool_calls>\n",
+		"~~~\n",
+		"完毕。",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
+	}
+	events = append(events, Flush(&state, []string{"read_file"})...)
+
+	var textContent strings.Builder
+	toolCalls := 0
+	for _, evt := range events {
+		textContent.WriteString(evt.Content)
+		toolCalls += len(evt.ToolCalls)
+	}
+
+	if toolCalls != 0 {
+		t.Fatalf("expected tilde-fenced tool example to stay text, got %d tool calls", toolCalls)
+	}
+	if !strings.Contains(textContent.String(), "示例") || !strings.Contains(textContent.String(), "完毕") {
+		t.Fatalf("expected surrounding text preserved, got %q", textContent.String())
+	}
+}
+
+// 4 反引号嵌套 3 反引号（内含工具标签）不应触发
+func TestProcessToolSieveNestedFourBacktickFenceDoesNotTrigger(t *testing.T) {
+	var state State
+	input := "说明：\n````xml\n```\n<tool_calls><invoke name=\"read_file\"><parameter name=\"path\">x</parameter></invoke></tool_calls>\n```\n````\n结束。"
+	chunks := strings.SplitAfter(input, "\n")
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
+	}
+	events = append(events, Flush(&state, []string{"read_file"})...)
+
+	var textContent strings.Builder
+	toolCalls := 0
+	for _, evt := range events {
+		textContent.WriteString(evt.Content)
+		toolCalls += len(evt.ToolCalls)
+	}
+
+	if toolCalls != 0 {
+		t.Fatalf("expected 4-backtick fenced example to stay text, got %d tool calls", toolCalls)
+	}
+}
diff --git a/internal/toolstream/tool_sieve_core.go b/internal/toolstream/tool_sieve_core.go
index 2ec0914..a228c13 100644
--- a/internal/toolstream/tool_sieve_core.go
+++ b/internal/toolstream/tool_sieve_core.go
@@ -114,10 +114,30 @@ func Flush(state *State, toolNames []string) []Event {
 		} else {
 			content := state.capture.String()
 			if content != "" {
-				// If capture never resolved into a real tool call, release the
-				// buffered text instead of swallowing it.
-				state.noteText(content)
-				events = append(events, Event{Content: content})
+				recovered := toolcall.SanitizeLooseCDATA(content)
+				if recovered != content {
+					if prefix, calls, suffix, recoveredReady := consumeXMLToolCapture(recovered, toolNames); recoveredReady && len(calls) > 0 {
+						if prefix != "" {
+							state.noteText(prefix)
+							events = append(events, Event{Content: prefix})
+						}
+						events = append(events, Event{ToolCalls: calls})
+						if suffix != "" {
+							state.noteText(suffix)
+							events = append(events, Event{Content: suffix})
+						}
+					} else {
+						// If capture never resolved into a real tool call, release
+						// the buffered text instead of swallowing it.
+						state.noteText(content)
+						events = append(events, Event{Content: content})
+					}
+				} else {
+					// If capture never resolved into a real tool call, release the
+					// buffered text instead of swallowing it.
+					state.noteText(content)
+					events = append(events, Event{Content: content})
+				}
 			}
 		}
 		state.capture.Reset()
@@ -193,5 +213,8 @@ func consumeToolCapture(state *State, toolNames []string) (prefix string, calls
 	if hasOpenXMLToolTag(captured) {
 		return "", nil, "", false
 	}
-	return "", nil, "", false
+	if shouldKeepBareInvokeCapture(captured) {
+		return "", nil, "", false
+	}
+	return captured, nil, "", true
 }
diff --git a/internal/toolstream/tool_sieve_state.go b/internal/toolstream/tool_sieve_state.go
index 1d709bd..cdc70d5 100644
--- a/internal/toolstream/tool_sieve_state.go
+++ b/internal/toolstream/tool_sieve_state.go
@@ -6,21 +6,22 @@ import (
 )
 
 type State struct {
-	pending               strings.Builder
-	capture               strings.Builder
-	capturing             bool
-	codeFenceStack        []int
-	codeFencePendingTicks int
-	codeFenceLineStart    bool
-	pendingToolRaw        string
-	pendingToolCalls      []toolcall.ParsedToolCall
-	disableDeltas         bool
-	toolNameSent          bool
-	toolName              string
-	toolArgsStart         int
-	toolArgsSent          int
-	toolArgsString        bool
-	toolArgsDone          bool
+	pending                strings.Builder
+	capture                strings.Builder
+	capturing              bool
+	codeFenceStack         []int
+	codeFencePendingTicks  int
+	codeFencePendingTildes int
+	codeFenceNotLineStart  bool // inverted: zero-value false means "at line start"
+	pendingToolRaw         string
+	pendingToolCalls       []toolcall.ParsedToolCall
+	disableDeltas          bool
+	toolNameSent           bool
+	toolName               string
+	toolArgsStart          int
+	toolArgsSent           int
+	toolArgsString         bool
+	toolArgsDone           bool
 }
 
 type Event struct {
@@ -63,7 +64,8 @@ func insideCodeFenceWithState(state *State, text string) bool {
 	simulated := simulateCodeFenceState(
 		state.codeFenceStack,
 		state.codeFencePendingTicks,
-		state.codeFenceLineStart,
+		state.codeFencePendingTildes,
+		!state.codeFenceNotLineStart,
 		text,
 	)
 	return len(simulated.stack) > 0
@@ -73,7 +75,7 @@ func insideCodeFence(text string) bool {
 	if text == "" {
 		return false
 	}
-	return len(simulateCodeFenceState(nil, 0, true, text).stack) > 0
+	return len(simulateCodeFenceState(nil, 0, 0, true, text).stack) > 0
 }
 
 func updateCodeFenceState(state *State, text string) {
@@ -83,43 +85,65 @@ func updateCodeFenceState(state *State, text string) {
 	next := simulateCodeFenceState(
 		state.codeFenceStack,
 		state.codeFencePendingTicks,
-		state.codeFenceLineStart,
+		state.codeFencePendingTildes,
+		!state.codeFenceNotLineStart,
 		text,
 	)
 	state.codeFenceStack = next.stack
 	state.codeFencePendingTicks = next.pendingTicks
-	state.codeFenceLineStart = next.lineStart
+	state.codeFencePendingTildes = next.pendingTildes
+	state.codeFenceNotLineStart = !next.lineStart
 }
 
 type codeFenceSimulation struct {
-	stack        []int
-	pendingTicks int
-	lineStart    bool
+	stack         []int
+	pendingTicks  int
+	pendingTildes int
+	lineStart     bool
 }
 
-func simulateCodeFenceState(stack []int, pendingTicks int, lineStart bool, text string) codeFenceSimulation {
+func simulateCodeFenceState(stack []int, pendingTicks, pendingTildes int, lineStart bool, text string) codeFenceSimulation {
 	chunk := text
 	nextStack := append([]int(nil), stack...)
 	ticks := pendingTicks
+	tildes := pendingTildes
 	atLineStart := lineStart
 
-	flushTicks := func() {
+	flushPending := func() {
 		if ticks > 0 {
 			if atLineStart && ticks >= 3 {
-				applyFenceMarker(&nextStack, ticks)
+				applyFenceMarker(&nextStack, ticks) // positive = backtick
 			}
 			atLineStart = false
 			ticks = 0
 		}
+		if tildes > 0 {
+			if atLineStart && tildes >= 3 {
+				applyFenceMarker(&nextStack, -tildes) // negative = tilde
+			}
+			atLineStart = false
+			tildes = 0
+		}
 	}
 
 	for i := 0; i < len(chunk); i++ {
 		ch := chunk[i]
 		if ch == '`' {
+			if tildes > 0 {
+				// Mixed chars — flush tildes first.
+				flushPending()
+			}
 			ticks++
 			continue
 		}
-		flushTicks()
+		if ch == '~' {
+			if ticks > 0 {
+				flushPending()
+			}
+			tildes++
+			continue
+		}
+		flushPending()
 		switch ch {
 		case '\n', '\r':
 			atLineStart = true
@@ -134,24 +158,43 @@ func simulateCodeFenceState(stack []int, pendingTicks int, lineStart bool, text
 	}
 
 	return codeFenceSimulation{
-		stack:        nextStack,
-		pendingTicks: ticks,
-		lineStart:    atLineStart,
+		stack:         nextStack,
+		pendingTicks:  ticks,
+		pendingTildes: tildes,
+		lineStart:     atLineStart,
 	}
 }
 
-func applyFenceMarker(stack *[]int, ticks int) {
-	if stack == nil || ticks <= 0 {
+// applyFenceMarker pushes or pops a fence marker on the stack.
+// Positive values represent backtick fences, negative represent tilde fences.
+// A closing marker must match the sign (type) of the opening marker.
+func applyFenceMarker(stack *[]int, marker int) {
+	if stack == nil || marker == 0 {
 		return
 	}
 	if len(*stack) == 0 {
-		*stack = append(*stack, ticks)
+		*stack = append(*stack, marker)
 		return
 	}
 	top := (*stack)[len(*stack)-1]
-	if ticks >= top {
+	// Signs must match: backtick closes backtick, tilde closes tilde.
+	sameType := (top > 0 && marker > 0) || (top < 0 && marker < 0)
+	if !sameType {
+		// Different fence type — treat as nested.
+		*stack = append(*stack, marker)
+		return
+	}
+	absMarker := marker
+	absTop := top
+	if absMarker < 0 {
+		absMarker = -absMarker
+	}
+	if absTop < 0 {
+		absTop = -absTop
+	}
+	if absMarker >= absTop {
 		*stack = (*stack)[:len(*stack)-1]
 		return
 	}
-	*stack = append(*stack, ticks)
+	*stack = append(*stack, marker)
 }
diff --git a/internal/toolstream/tool_sieve_xml.go b/internal/toolstream/tool_sieve_xml.go
index 72cbbaa..9a6789e 100644
--- a/internal/toolstream/tool_sieve_xml.go
+++ b/internal/toolstream/tool_sieve_xml.go
@@ -2,74 +2,86 @@ package toolstream
 
 import (
 	"ds2api/internal/toolcall"
-	"regexp"
 	"strings"
 )
 
-// --- XML tool call support for the streaming sieve ---
-
-//nolint:unused // kept as explicit tag inventory for future XML sieve refinements.
-var xmlToolCallClosingTags = []string{"</tool_calls>"}
-var xmlToolCallOpeningTags = []string{"<tool_calls", "<invoke"}
-
-// xmlToolCallTagPairs maps each opening tag to its expected closing tag.
-// Order matters: longer/wrapper tags must be checked first.
-var xmlToolCallTagPairs = []struct{ open, close string }{
-	{"<tool_calls", "</tool_calls>"},
-}
-
-// xmlToolCallBlockPattern matches a complete canonical XML tool call block.
-//
-//nolint:unused // reserved for future fast-path XML block detection.
-var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)(<tool_calls\b[^>]*>\s*(?:.*?)\s*</tool_calls>)`)
-
-// xmlToolTagsToDetect is the set of XML tag prefixes used by findToolSegmentStart.
-var xmlToolTagsToDetect = []string{"<tool_calls>", "<tool_calls\n", "<tool_calls ", "<invoke ", "<invoke\n", "<invoke\t", "<invoke\r"}
-
 // consumeXMLToolCapture tries to extract complete XML tool call blocks from captured text.
 func consumeXMLToolCapture(captured string, toolNames []string) (prefix string, calls []toolcall.ParsedToolCall, suffix string, ready bool) {
-	lower := strings.ToLower(captured)
-	// Find the FIRST matching open/close pair for the canonical wrapper.
-	for _, pair := range xmlToolCallTagPairs {
-		openIdx := strings.Index(lower, pair.open)
-		if openIdx < 0 {
+	anyOpenFound := false
+	type candidate struct {
+		start  int
+		prefix string
+		calls  []toolcall.ParsedToolCall
+		suffix string
+	}
+	type rejectedBlock struct {
+		start  int
+		prefix string
+		suffix string
+	}
+	var best *candidate
+	var rejected *rejectedBlock
+
+	// Scan every recognized tool tag occurrence. Prose can mention a wrapper
+	// tag before the actual tool block, including the same variant as the real
+	// block. We only accept complete tool_calls wrappers that parse cleanly.
+	for searchFrom := 0; searchFrom < len(captured); {
+		tag, ok := toolcall.FindToolMarkupTagOutsideIgnored(captured, searchFrom)
+		if !ok {
+			break
+		}
+		if tag.Closing || tag.Name != "tool_calls" {
+			searchFrom = tag.End + 1
 			continue
 		}
-		// Find the matching closing tag outside CDATA. Long write-file tool
-		// calls often contain XML examples in CDATA, including </tool_calls>.
-		closeIdx := findXMLCloseOutsideCDATA(captured, pair.close, openIdx+len(pair.open))
-		if closeIdx < 0 {
-			// Opening tag is present but its specific closing tag hasn't arrived.
-			// Return not-ready so we keep buffering until the canonical wrapper closes.
-			return "", nil, "", false
+		closeTag, ok := toolcall.FindMatchingToolMarkupClose(captured, tag)
+		if !ok {
+			anyOpenFound = true
+			searchFrom = tag.End + 1
+			continue
 		}
-		closeEnd := closeIdx + len(pair.close)
 
-		xmlBlock := captured[openIdx:closeEnd]
-		prefixPart := captured[:openIdx]
-		suffixPart := captured[closeEnd:]
+		xmlBlock := captured[tag.Start : closeTag.End+1]
+		prefixPart := captured[:tag.Start]
+		suffixPart := captured[closeTag.End+1:]
 		parsed := toolcall.ParseToolCalls(xmlBlock, toolNames)
 		if len(parsed) > 0 {
 			prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
-			return prefixPart, parsed, suffixPart, true
-		}
-		// If this block failed to become a tool call, pass it through as text.
-		return prefixPart + xmlBlock, nil, suffixPart, true
-	}
-	if !strings.Contains(lower, "<tool_calls") {
-		invokeIdx := strings.Index(lower, "<invoke")
-		closeIdx := findXMLCloseOutsideCDATA(captured, "</tool_calls>", invokeIdx)
-		if invokeIdx >= 0 && closeIdx > invokeIdx {
-			closeEnd := closeIdx + len("</tool_calls>")
-			xmlBlock := "<tool_calls>" + captured[invokeIdx:closeIdx] + "</tool_calls>"
-			prefixPart := captured[:invokeIdx]
-			suffixPart := captured[closeEnd:]
-			parsed := toolcall.ParseToolCalls(xmlBlock, toolNames)
-			if len(parsed) > 0 {
-				prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
-				return prefixPart, parsed, suffixPart, true
+			if best == nil || tag.Start < best.start {
+				best = &candidate{start: tag.Start, prefix: prefixPart, calls: parsed, suffix: suffixPart}
+			}
+			break
+		}
+		if rejected == nil || tag.Start < rejected.start {
+			rejected = &rejectedBlock{start: tag.Start, prefix: prefixPart + xmlBlock, suffix: suffixPart}
+		}
+		searchFrom = tag.End + 1
+	}
+	if best != nil {
+		return best.prefix, best.calls, best.suffix, true
+	}
+	if anyOpenFound {
+		// At least one opening tag was found but none had a matching close tag.
+		// Keep buffering until a closing tag arrives.
+		return "", nil, "", false
+	}
+	if rejected != nil {
+		// If this block failed to become a tool call, pass it through as text.
+		return rejected.prefix, nil, rejected.suffix, true
+	}
+	if invokeTag, ok := findFirstToolMarkupTagByName(captured, 0, "invoke"); ok {
+		if wrapperOpen, ok := findFirstToolMarkupTagByName(captured, 0, "tool_calls"); !ok || wrapperOpen.Start > invokeTag.Start {
+			if closeTag, ok := findFirstToolMarkupTagByNameFrom(captured, invokeTag.Start+1, "tool_calls", true); ok && closeTag.Start > invokeTag.Start {
+				xmlBlock := "<tool_calls>" + captured[invokeTag.Start:closeTag.End+1]
+				prefixPart := captured[:invokeTag.Start]
+				suffixPart := captured[closeTag.End+1:]
+				parsed := toolcall.ParseToolCalls(xmlBlock, toolNames)
+				if len(parsed) > 0 {
+					prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
+					return prefixPart, parsed, suffixPart, true
+				}
+				return prefixPart + captured[invokeTag.Start:closeTag.End+1], nil, suffixPart, true
 			}
-			return prefixPart + captured[invokeIdx:closeEnd], nil, suffixPart, true
 		}
 	}
 	return "", nil, "", false
@@ -78,52 +90,54 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string,
 // hasOpenXMLToolTag returns true if captured text contains an XML tool opening tag
 // whose SPECIFIC closing tag has not appeared yet.
 func hasOpenXMLToolTag(captured string) bool {
-	lower := strings.ToLower(captured)
-	for _, pair := range xmlToolCallTagPairs {
-		openIdx := strings.Index(lower, pair.open)
-		if openIdx >= 0 {
-			if findXMLCloseOutsideCDATA(captured, pair.close, openIdx+len(pair.open)) < 0 {
-				return true
-			}
+	for searchFrom := 0; searchFrom < len(captured); {
+		tag, ok := toolcall.FindToolMarkupTagOutsideIgnored(captured, searchFrom)
+		if !ok {
+			return false
 		}
+		if tag.Closing || tag.Name != "tool_calls" {
+			searchFrom = tag.End + 1
+			continue
+		}
+		if _, ok := toolcall.FindMatchingToolMarkupClose(captured, tag); !ok {
+			return true
+		}
+		searchFrom = tag.End + 1
 	}
 	return false
 }
 
-func findXMLCloseOutsideCDATA(s, closeTag string, start int) int {
-	if s == "" || closeTag == "" {
-		return -1
+func shouldKeepBareInvokeCapture(captured string) bool {
+	invokeTag, ok := findFirstToolMarkupTagByName(captured, 0, "invoke")
+	if !ok {
+		return false
 	}
-	if start < 0 {
-		start = 0
+	if wrapperOpen, ok := findFirstToolMarkupTagByName(captured, 0, "tool_calls"); ok && wrapperOpen.Start <= invokeTag.Start {
+		return false
 	}
-	lower := strings.ToLower(s)
-	target := strings.ToLower(closeTag)
-	for i := start; i < len(s); {
-		switch {
-		case strings.HasPrefix(lower[i:], "<![cdata["):
-			end := strings.Index(lower[i+len("<![cdata["):], "]]>")
-			if end < 0 {
-				return -1
-			}
-			i += len("<![cdata[") + end + len("]]>")
-		case strings.HasPrefix(lower[i:], "<!--"):
-			end := strings.Index(lower[i+len("<!--"):], "-->")
-			if end < 0 {
-				return -1
-			}
-			i += len("<!--") + end + len("-->")
-		case strings.HasPrefix(lower[i:], target):
-			return i
-		default:
-			i++
-		}
+	if closeTag, ok := findFirstToolMarkupTagByNameFrom(captured, invokeTag.Start+1, "tool_calls", true); ok && closeTag.Start > invokeTag.Start {
+		return true
 	}
-	return -1
+	startEnd := invokeTag.End
+	if startEnd < 0 {
+		return true
+	}
+	body := captured[startEnd+1:]
+	trimmedBody := strings.TrimLeft(body, " \t\r\n")
+	if trimmedBody == "" {
+		return true
+	}
+
+	if invokeCloseTag, ok := findFirstToolMarkupTagByNameFrom(captured, startEnd+1, "invoke", true); ok {
+		return strings.TrimSpace(captured[invokeCloseTag.End+1:]) == ""
+	}
+
+	trimmedLower := strings.ToLower(trimmedBody)
+	return strings.HasPrefix(trimmedLower, "<parameter") ||
+		strings.HasPrefix(trimmedLower, "{") ||
+		strings.HasPrefix(trimmedLower, "[")
 }
 
-// findPartialXMLToolTagStart checks if the string ends with a partial canonical
-// XML wrapper tag (e.g., "<too") and returns the position of the '<'.
 func findPartialXMLToolTagStart(s string) int {
 	lastLT := strings.LastIndex(s, "<")
 	if lastLT < 0 {
@@ -135,13 +149,18 @@ func findPartialXMLToolTagStart(s string) int {
 		return -1
 	}
 	lowerTail := strings.ToLower(tail)
-	// Check if the tail is a prefix of any known XML tool tag.
-	for _, tag := range xmlToolCallOpeningTags {
-		tagWithLT := tag
-		if !strings.HasPrefix(tagWithLT, "<") {
-			tagWithLT = "<" + tagWithLT
-		}
-		if strings.HasPrefix(tagWithLT, lowerTail) {
+	for _, tag := range []string{
+		"<tool_calls", "<invoke", "<parameter",
+		"<|tool_calls", "<|invoke", "<|parameter",
+		"<｜tool_calls", "<｜invoke", "<｜parameter",
+		"<|dsml|tool_calls", "<|dsml|invoke", "<|dsml|parameter",
+		"<dsmltool_calls", "<dsmlinvoke", "<dsmlparameter",
+		"<dsml tool_calls", "<dsml invoke", "<dsml parameter",
+		"<dsml|tool_calls", "<dsml|invoke", "<dsml|parameter",
+		"<|dsmltool_calls", "<|dsmlinvoke", "<|dsmlparameter",
+		"<|dsml tool_calls", "<|dsml invoke", "<|dsml parameter",
+	} {
+		if strings.HasPrefix(tag, lowerTail) {
 			return lastLT
 		}
 	}
diff --git a/internal/toolstream/tool_sieve_xml_scan.go b/internal/toolstream/tool_sieve_xml_scan.go
new file mode 100644
index 0000000..faaea84
--- /dev/null
+++ b/internal/toolstream/tool_sieve_xml_scan.go
@@ -0,0 +1,28 @@
+package toolstream
+
+import "ds2api/internal/toolcall"
+
+func findFirstToolMarkupTagByName(s string, start int, name string) (toolcall.ToolMarkupTag, bool) {
+	return findFirstToolMarkupTagByNameFrom(s, start, name, false)
+}
+
+func findFirstToolMarkupTagByNameFrom(s string, start int, name string, closing bool) (toolcall.ToolMarkupTag, bool) {
+	for pos := maxInt(start, 0); pos < len(s); {
+		tag, ok := toolcall.FindToolMarkupTagOutsideIgnored(s, pos)
+		if !ok {
+			return toolcall.ToolMarkupTag{}, false
+		}
+		if tag.Name == name && tag.Closing == closing {
+			return tag, true
+		}
+		pos = tag.End + 1
+	}
+	return toolcall.ToolMarkupTag{}, false
+}
+
+func maxInt(a, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
diff --git a/internal/toolstream/tool_sieve_xml_tags.go b/internal/toolstream/tool_sieve_xml_tags.go
new file mode 100644
index 0000000..d4179bd
--- /dev/null
+++ b/internal/toolstream/tool_sieve_xml_tags.go
@@ -0,0 +1,34 @@
+package toolstream
+
+import "regexp"
+
+// --- XML tool call support for the streaming sieve ---
+
+//nolint:unused // kept as explicit tag inventory for future XML sieve refinements.
+var xmlToolCallClosingTags = []string{"</tool_calls>", "</|dsml|tool_calls>", "</|dsmltool_calls>", "</|dsml tool_calls>", "</dsml|tool_calls>", "</dsmltool_calls>", "</dsml tool_calls>", "</｜tool_calls>", "</|tool_calls>"}
+
+// xmlToolCallBlockPattern matches a complete canonical XML tool call block.
+//
+//nolint:unused // reserved for future fast-path XML block detection.
+var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)((?:<tool_calls\b|<\|dsml\|tool_calls\b)[^>]*>\s*(?:.*?)\s*(?:</tool_calls>|</\|dsml\|tool_calls>))`)
+
+// xmlToolTagsToDetect is the set of XML tag prefixes used by findToolSegmentStart.
+var xmlToolTagsToDetect = []string{
+	"<|dsml|tool_calls>", "<|dsml|tool_calls\n", "<|dsml|tool_calls ",
+	"<|dsml|invoke ", "<|dsml|invoke\n", "<|dsml|invoke\t", "<|dsml|invoke\r",
+	"<|dsmltool_calls>", "<|dsmltool_calls\n", "<|dsmltool_calls ",
+	"<|dsmlinvoke ", "<|dsmlinvoke\n", "<|dsmlinvoke\t", "<|dsmlinvoke\r",
+	"<|dsml tool_calls>", "<|dsml tool_calls\n", "<|dsml tool_calls ",
+	"<|dsml invoke ", "<|dsml invoke\n", "<|dsml invoke\t", "<|dsml invoke\r",
+	"<dsml|tool_calls>", "<dsml|tool_calls\n", "<dsml|tool_calls ",
+	"<dsml|invoke ", "<dsml|invoke\n", "<dsml|invoke\t", "<dsml|invoke\r",
+	"<dsmltool_calls>", "<dsmltool_calls\n", "<dsmltool_calls ",
+	"<dsmlinvoke ", "<dsmlinvoke\n", "<dsmlinvoke\t", "<dsmlinvoke\r",
+	"<dsml tool_calls>", "<dsml tool_calls\n", "<dsml tool_calls ",
+	"<dsml invoke ", "<dsml invoke\n", "<dsml invoke\t", "<dsml invoke\r",
+	"<｜tool_calls>", "<｜tool_calls\n", "<｜tool_calls ",
+	"<｜invoke ", "<｜invoke\n", "<｜invoke\t", "<｜invoke\r",
+	"<|tool_calls>", "<|tool_calls\n", "<|tool_calls ",
+	"<|invoke ", "<|invoke\n", "<|invoke\t", "<|invoke\r",
+	"<tool_calls>", "<tool_calls\n", "<tool_calls ", "<invoke ", "<invoke\n", "<invoke\t", "<invoke\r",
+}
diff --git a/internal/toolstream/tool_sieve_xml_test.go b/internal/toolstream/tool_sieve_xml_test.go
index 87e79b5..efcf56d 100644
--- a/internal/toolstream/tool_sieve_xml_test.go
+++ b/internal/toolstream/tool_sieve_xml_test.go
@@ -41,6 +41,37 @@ func TestProcessToolSieveInterceptsXMLToolCallWithoutLeak(t *testing.T) {
 	}
 }
 
+func TestProcessToolSieveInterceptsDSMLToolCallWithoutLeak(t *testing.T) {
+	var state State
+	chunks := []string{
+		"<|DSML|tool",
+		"_calls>\n",
+		`  <|DSML|invoke name="read_file">` + "\n",
+		`    <|DSML|parameter name="path">README.MD</|DSML|parameter>` + "\n",
+		"  </|DSML|invoke>\n",
+		"</|DSML|tool_calls>",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
+	}
+	events = append(events, Flush(&state, []string{"read_file"})...)
+
+	var textContent string
+	var toolCalls int
+	for _, evt := range events {
+		textContent += evt.Content
+		toolCalls += len(evt.ToolCalls)
+	}
+
+	if strings.Contains(strings.ToLower(textContent), "dsml") || strings.Contains(textContent, "read_file") {
+		t.Fatalf("DSML tool call content leaked to text: %q", textContent)
+	}
+	if toolCalls != 1 {
+		t.Fatalf("expected one DSML tool call, got %d events=%#v", toolCalls, events)
+	}
+}
+
 func TestProcessToolSieveHandlesLongXMLToolCall(t *testing.T) {
 	var state State
 	const toolName = "write_to_file"
@@ -143,6 +174,41 @@ func TestProcessToolSieveKeepsCDATAEmbeddedToolClosingBuffered(t *testing.T) {
 	}
 }
 
+func TestProcessToolSieveFallsBackWhenCDATANeverCloses(t *testing.T) {
+	var state State
+	chunks := []string{
+		"<tool_calls>\n  <invoke name=\"Write\">\n    <parameter name=\"content\"><![CDATA[",
+		"hello world",
+		"</parameter>\n  </invoke>\n</tool_calls>",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"Write"})...)
+	}
+	events = append(events, Flush(&state, []string{"Write"})...)
+
+	var textContent strings.Builder
+	toolCalls := 0
+	for _, evt := range events {
+		if evt.Content != "" {
+			textContent.WriteString(evt.Content)
+		}
+		toolCalls += len(evt.ToolCalls)
+		if len(evt.ToolCalls) > 0 {
+			if got, _ := evt.ToolCalls[0].Input["content"].(string); got != "hello world" {
+				t.Fatalf("expected recovered CDATA payload, got %q", got)
+			}
+		}
+	}
+
+	if toolCalls != 1 {
+		t.Fatalf("expected unclosed CDATA payload to still parse, got %d tool calls events=%#v", toolCalls, events)
+	}
+	if textContent.Len() != 0 {
+		t.Fatalf("expected no leaked text, got %q", textContent.String())
+	}
+}
+
 func TestProcessToolSieveXMLWithLeadingText(t *testing.T) {
 	var state State
 	// Model outputs some prose then an XML tool call.
@@ -567,6 +633,58 @@ func TestProcessToolSievePassesThroughBareToolCallAsText(t *testing.T) {
 	}
 }
 
+func TestProcessToolSieveBareInvokeInlineProseDoesNotStall(t *testing.T) {
+	var state State
+	chunk := "Use `<invoke name=\"read_file\">` as plain documentation text."
+	events := ProcessChunk(&state, chunk, []string{"read_file"})
+
+	var textContent strings.Builder
+	toolCalls := 0
+	for _, evt := range events {
+		textContent.WriteString(evt.Content)
+		toolCalls += len(evt.ToolCalls)
+	}
+
+	if toolCalls != 0 {
+		t.Fatalf("expected inline invoke prose to remain text, got %d events=%#v", toolCalls, events)
+	}
+	if textContent.String() != chunk {
+		t.Fatalf("expected inline invoke prose to stream immediately, got %q", textContent.String())
+	}
+	if state.capturing {
+		t.Fatal("expected inline invoke prose not to leave stream capture open")
+	}
+}
+
+func TestProcessToolSieveBareInvokeExampleReleasesWhenNotRepairable(t *testing.T) {
+	var state State
+	chunks := []string{
+		`Example: <invoke name="read_file"><parameter name="path">README.md</parameter>`,
+		"</invoke> then continue.",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
+	}
+
+	var textContent strings.Builder
+	toolCalls := 0
+	for _, evt := range events {
+		textContent.WriteString(evt.Content)
+		toolCalls += len(evt.ToolCalls)
+	}
+
+	if toolCalls != 0 {
+		t.Fatalf("expected non-repairable bare invoke to remain text, got %d events=%#v", toolCalls, events)
+	}
+	if textContent.String() != strings.Join(chunks, "") {
+		t.Fatalf("expected non-repairable bare invoke to pass through, got %q", textContent.String())
+	}
+	if state.capturing {
+		t.Fatal("expected non-repairable bare invoke not to leave stream capture open")
+	}
+}
+
 func TestProcessToolSieveRepairsMissingOpeningWrapperWithoutLeakingInvokeText(t *testing.T) {
 	var state State
 	chunks := []string{
@@ -595,3 +713,96 @@ func TestProcessToolSieveRepairsMissingOpeningWrapperWithoutLeakingInvokeText(t
 		t.Fatalf("expected repaired missing-wrapper stream not to leak xml text, got %q", textContent.String())
 	}
 }
+
+// Test fullwidth pipe variant: <｜tool_calls> (U+FF5C) should be buffered and parsed.
+func TestProcessToolSieveFullwidthPipeVariantDoesNotLeak(t *testing.T) {
+	var state State
+	chunks := []string{
+		"<\uff5ctool_calls>\n",
+		"<invoke name=\"execute_command\">\n",
+		"<parameter name=\"command\">git status</parameter>\n",
+		"</invoke>\n",
+		"</\uff5ctool_calls>",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"execute_command"})...)
+	}
+	events = append(events, Flush(&state, []string{"execute_command"})...)
+
+	var textContent string
+	var toolCalls int
+	for _, evt := range events {
+		textContent += evt.Content
+		toolCalls += len(evt.ToolCalls)
+	}
+
+	if strings.Contains(textContent, "invoke") || strings.Contains(textContent, "execute_command") {
+		t.Fatalf("fullwidth pipe variant leaked to text: %q", textContent)
+	}
+	if toolCalls != 1 {
+		t.Fatalf("expected one tool call from fullwidth pipe variant, got %d events=%#v", toolCalls, events)
+	}
+}
+
+// Test <DSML|tool_calls> with <|DSML|invoke> (DSML prefix without leading pipe on wrapper).
+func TestProcessToolSieveDSMLPrefixVariantDoesNotLeak(t *testing.T) {
+	var state State
+	chunks := []string{
+		"<DSML|tool_calls>\n",
+		"  <|DSML|invoke name=\"execute_command\">\n",
+		"    <|DSML|parameter name=\"command\"><![CDATA[git status]]></|DSML|parameter>\n",
+		"  </|DSML|invoke>\n",
+		"</DSML|tool_calls>",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"execute_command"})...)
+	}
+	events = append(events, Flush(&state, []string{"execute_command"})...)
+
+	var textContent string
+	var toolCalls int
+	for _, evt := range events {
+		textContent += evt.Content
+		toolCalls += len(evt.ToolCalls)
+	}
+
+	if strings.Contains(strings.ToLower(textContent), "dsml") || strings.Contains(textContent, "execute_command") {
+		t.Fatalf("DSML prefix variant leaked to text: %q", textContent)
+	}
+	if toolCalls != 1 {
+		t.Fatalf("expected one tool call from DSML prefix variant, got %d events=%#v", toolCalls, events)
+	}
+}
+
+// Test <DSML|tool_calls> with <DSML|invoke> (no pipe anywhere) should be buffered and parsed.
+func TestProcessToolSieveDSMLBarePrefixVariantDoesNotLeak(t *testing.T) {
+	var state State
+	chunks := []string{
+		"<DSML|tool_calls>\n",
+		"<DSML|invoke name=\"execute_command\">\n",
+		"<DSML|parameter name=\"command\"><![CDATA[git status]]></DSML|parameter>\n",
+		"</DSML|invoke>\n",
+		"</DSML|tool_calls>",
+	}
+	var events []Event
+	for _, c := range chunks {
+		events = append(events, ProcessChunk(&state, c, []string{"execute_command"})...)
+	}
+	events = append(events, Flush(&state, []string{"execute_command"})...)
+
+	var textContent string
+	var toolCalls int
+	for _, evt := range events {
+		textContent += evt.Content
+		toolCalls += len(evt.ToolCalls)
+	}
+
+	if strings.Contains(strings.ToLower(textContent), "dsml") || strings.Contains(textContent, "execute_command") {
+		t.Fatalf("DSML bare prefix variant leaked to text: %q", textContent)
+	}
+	if toolCalls != 1 {
+		t.Fatalf("expected one tool call from DSML bare prefix variant, got %d events=%#v", toolCalls, events)
+	}
+}
diff --git a/internal/util/messages_test.go b/internal/util/messages_test.go
index 077e903..9ddafd6 100644
--- a/internal/util/messages_test.go
+++ b/internal/util/messages_test.go
@@ -116,6 +116,18 @@ func TestConvertClaudeToDeepSeekUsesGlobalAliasResolution(t *testing.T) {
 	}
 }
 
+func TestConvertClaudeToDeepSeekUsesNoThinkingAliasResolution(t *testing.T) {
+	store := config.LoadStore()
+	req := map[string]any{
+		"model":    "claude-sonnet-4-6-nothinking",
+		"messages": []any{map[string]any{"role": "user", "content": "Hi"}},
+	}
+	out := ConvertClaudeToDeepSeek(req, store)
+	if out["model"] != "deepseek-v4-flash-nothinking" {
+		t.Fatalf("expected noThinking alias resolution, got model=%q", out["model"])
+	}
+}
+
 func contains(s, sub string) bool {
 	return len(s) >= len(sub) && (s == sub || len(sub) == 0 || (len(s) > 0 && (indexOf(s, sub) >= 0)))
 }
diff --git a/internal/util/util_edge_test.go b/internal/util/util_edge_test.go
index 6084d9c..463df1a 100644
--- a/internal/util/util_edge_test.go
+++ b/internal/util/util_edge_test.go
@@ -372,3 +372,16 @@ func TestConvertClaudeToDeepSeekUsesExplicitModelAlias(t *testing.T) {
 		t.Fatalf("expected explicit alias override, got %q", out["model"])
 	}
 }
+
+func TestConvertClaudeToDeepSeekUsesExplicitNoThinkingModelAlias(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{"keys":[],"accounts":[],"model_aliases":{"claude-sonnet-4-6":"deepseek-v4-pro-search"}}`)
+	store := config.LoadStore()
+	req := map[string]any{
+		"model":    "claude-sonnet-4-6-nothinking",
+		"messages": []any{map[string]any{"role": "user", "content": "Hi"}},
+	}
+	out := ConvertClaudeToDeepSeek(req, store)
+	if out["model"] != "deepseek-v4-pro-search-nothinking" {
+		t.Fatalf("expected explicit alias override with nothinking suffix, got %q", out["model"])
+	}
+}
diff --git a/scripts/build-release-archives.sh b/scripts/build-release-archives.sh
new file mode 100755
index 0000000..415aab8
--- /dev/null
+++ b/scripts/build-release-archives.sh
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT_DIR"
+
+source "${ROOT_DIR}/scripts/release-targets.sh"
+
+build_one() {
+  local tag="$1" build_version="$2" goos="$3" goarch="$4" goarm="$5" label="$6"
+  local pkg stage bin
+
+  pkg="ds2api_${tag}_${label}"
+  stage="dist/${pkg}"
+  bin="ds2api"
+  if [[ "$goos" == "windows" ]]; then
+    bin="ds2api.exe"
+  fi
+
+  echo "[release-archives] building ${label}"
+  rm -rf "$stage"
+  mkdir -p "${stage}/static"
+
+  if [[ "$goarm" == "-" ]]; then
+    CGO_ENABLED=0 GOOS="$goos" GOARCH="$goarch" \
+      go build -buildvcs=false -trimpath -ldflags="-s -w -X ds2api/internal/version.BuildVersion=${build_version}" -o "${stage}/${bin}" ./cmd/ds2api
+  else
+    CGO_ENABLED=0 GOOS="$goos" GOARCH="$goarch" GOARM="$goarm" \
+      go build -buildvcs=false -trimpath -ldflags="-s -w -X ds2api/internal/version.BuildVersion=${build_version}" -o "${stage}/${bin}" ./cmd/ds2api
+  fi
+
+  cp config.example.json .env.example LICENSE README.MD README.en.md "${stage}/"
+  cp -R static/admin "${stage}/static/admin"
+
+  if [[ "$goos" == "windows" ]]; then
+    (cd dist && zip -rq "${pkg}.zip" "${pkg}")
+  else
+    tar -C dist -czf "dist/${pkg}.tar.gz" "${pkg}"
+  fi
+
+  rm -rf "$stage"
+}
+
+if [[ "${1:-}" == "--build-one" ]]; then
+  shift
+  build_one "$@"
+  exit 0
+fi
+
+tag="${RELEASE_TAG:-}"
+if [[ -z "$tag" && -f VERSION ]]; then
+  tag="$(tr -d '[:space:]' < VERSION)"
+fi
+if [[ -z "$tag" ]]; then
+  echo "release tag is empty; set RELEASE_TAG or provide VERSION." >&2
+  exit 1
+fi
+
+build_version="${BUILD_VERSION:-$tag}"
+jobs="${RELEASE_BUILD_JOBS:-}"
+if [[ -z "$jobs" ]]; then
+  if command -v nproc >/dev/null 2>&1; then
+    jobs="$(nproc)"
+  elif command -v sysctl >/dev/null 2>&1; then
+    jobs="$(sysctl -n hw.ncpu)"
+  else
+    jobs="2"
+  fi
+fi
+
+mkdir -p dist
+
+if [[ "$jobs" -le 1 ]]; then
+  for target in "${DS2API_RELEASE_TARGETS[@]}"; do
+    read -r goos goarch goarm label <<< "$target"
+    build_one "$tag" "$build_version" "$goos" "$goarch" "$goarm" "$label"
+  done
+else
+  printf '%s\n' "${DS2API_RELEASE_TARGETS[@]}" \
+    | xargs -L 1 -P "$jobs" bash "${ROOT_DIR}/scripts/build-release-archives.sh" --build-one "$tag" "$build_version"
+fi
diff --git a/scripts/build-webui.sh b/scripts/build-webui.sh
index 485f4cb..bde077e 100755
--- a/scripts/build-webui.sh
+++ b/scripts/build-webui.sh
@@ -11,7 +11,7 @@ cd "$(dirname "$0")/../webui"
 # 检查 node_modules
 if [ ! -d "node_modules" ]; then
     echo "📦 Installing dependencies..."
-    npm install
+    npm ci --prefer-offline --no-audit
 fi
 
 # 构建
diff --git a/scripts/release-targets.sh b/scripts/release-targets.sh
new file mode 100755
index 0000000..63a5a7e
--- /dev/null
+++ b/scripts/release-targets.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+# goos goarch goarm package-label
+DS2API_RELEASE_TARGETS=(
+  "linux amd64 - linux_amd64"
+  "linux arm64 - linux_arm64"
+  "linux arm 7 linux_armv7"
+  "darwin amd64 - darwin_amd64"
+  "darwin arm64 - darwin_arm64"
+  "windows amd64 - windows_amd64"
+  "windows arm64 - windows_arm64"
+)
diff --git a/tests/node/chat-stream.test.js b/tests/node/chat-stream.test.js
index 50e94ee..dbfbe3e 100644
--- a/tests/node/chat-stream.test.js
+++ b/tests/node/chat-stream.test.js
@@ -121,8 +121,15 @@ function parseSSEDataFrames(body) {
 }
 
 async function runMockVercelStream(upstreamLines, prepareOverrides = {}) {
+  return runMockVercelStreamSequence([upstreamLines], prepareOverrides);
+}
+
+async function runMockVercelStreamSequence(upstreamSequences, prepareOverrides = {}) {
   const originalFetch = global.fetch;
   const fetchURLs = [];
+  const fetchBodies = [];
+  let completionCalls = 0;
+  let continueCalls = 0;
   const prepareBody = {
     session_id: 'chatcmpl-test',
     lease_id: 'lease-test',
@@ -137,23 +144,36 @@ async function runMockVercelStream(upstreamLines, prepareOverrides = {}) {
     payload: { prompt: 'hello' },
     ...prepareOverrides,
   };
-  global.fetch = async (url) => {
+  global.fetch = async (url, init = {}) => {
     const textURL = String(url);
     fetchURLs.push(textURL);
+    if (init && init.body) {
+      fetchBodies.push(JSON.parse(String(init.body)));
+    }
     if (textURL.includes('__stream_prepare=1')) {
       return jsonResponse(prepareBody);
     }
+    if (textURL.includes('__stream_pow=1')) {
+      return jsonResponse({ pow_header: 'pow-header-refreshed' });
+    }
     if (textURL.includes('__stream_release=1')) {
       return jsonResponse({ success: true });
     }
-    return sseResponse(upstreamLines);
+    if (textURL.includes('/continue')) {
+      const idx = Math.min(continueCalls + 1, upstreamSequences.length - 1);
+      continueCalls += 1;
+      return sseResponse(upstreamSequences[idx]);
+    }
+    const idx = Math.min(completionCalls, upstreamSequences.length - 1);
+    completionCalls += 1;
+    return sseResponse(upstreamSequences[idx]);
   };
   try {
     const req = new MockStreamRequest();
     const res = new MockStreamResponse();
     const payload = { model: 'gpt-test', stream: true };
     await handleVercelStream(req, res, Buffer.from(JSON.stringify(payload)), payload);
-    return { res, frames: parseSSEDataFrames(res.bodyText()), fetchURLs };
+    return { res, frames: parseSSEDataFrames(res.bodyText()), fetchURLs, fetchBodies };
   } finally {
     global.fetch = originalFetch;
   }
@@ -174,6 +194,94 @@ test('vercel stream emits Go-parity empty-output failure on DONE', async () => {
   assert.equal(frames[1], '[DONE]');
 });
 
+test('vercel stream retries empty output once and keeps one terminal frame', async () => {
+  const { frames, fetchURLs, fetchBodies } = await runMockVercelStreamSequence([
+    ['data: [DONE]\n\n'],
+    ['data: {"p":"response/content","v":"visible"}\n\n', 'data: [DONE]\n\n'],
+  ]);
+  const parsed = frames.filter((frame) => frame !== '[DONE]').map((frame) => JSON.parse(frame));
+  const completionBodies = fetchBodies.filter((body) => Object.hasOwn(body, 'prompt'));
+  assert.equal(fetchURLs.filter((url) => url === 'https://chat.deepseek.com/api/v0/chat/completion').length, 2);
+  assert.equal(fetchURLs.filter((url) => url.includes('__stream_pow=1')).length, 1);
+  assert.equal(frames.filter((frame) => frame === '[DONE]').length, 1);
+  assert.equal(parsed[0].choices[0].delta.content, 'visible');
+  assert.equal(parsed[1].choices[0].finish_reason, 'stop');
+  assert.equal(parsed[0].id, parsed[1].id);
+  assert.match(completionBodies[1].prompt, /Previous reply had no visible output\. Please regenerate the visible final answer or tool call now\.$/);
+});
+
+test('vercel stream exhausts DeepSeek continue before synthetic retry', async () => {
+  const { frames, fetchURLs, fetchBodies } = await runMockVercelStreamSequence([
+    [
+      'data: {"response_message_id":7,"v":{"response":{"message_id":7,"status":"WIP","auto_continue":true}}}\n\n',
+      'data: [DONE]\n\n',
+    ],
+    ['data: {"p":"response/content","v":"continued"}\n\n', 'data: [DONE]\n\n'],
+  ]);
+  const parsed = frames.filter((frame) => frame !== '[DONE]').map((frame) => JSON.parse(frame));
+  assert.equal(fetchURLs.filter((url) => url === 'https://chat.deepseek.com/api/v0/chat/completion').length, 1);
+  assert.equal(fetchURLs.filter((url) => url === 'https://chat.deepseek.com/api/v0/chat/continue').length, 1);
+  assert.equal(fetchURLs.filter((url) => url.includes('__stream_pow=1')).length, 1);
+  assert.equal(parsed[0].choices[0].delta.content, 'continued');
+  assert.equal(parsed[1].choices[0].finish_reason, 'stop');
+  assert.equal(fetchBodies.some((body) => String(body.prompt || '').includes('Previous reply had no visible output')), false);
+});
+
+test('vercel stream reuses prior PoW when refresh fails', async () => {
+  const originalFetch = global.fetch;
+  const fetchURLs = [];
+  const completionPowHeaders = [];
+  let completionCalls = 0;
+  global.fetch = async (url, init = {}) => {
+    const textURL = String(url);
+    fetchURLs.push(textURL);
+    if (textURL.includes('__stream_prepare=1')) {
+      return jsonResponse({
+        session_id: 'chatcmpl-test',
+        lease_id: 'lease-test',
+        model: 'gpt-test',
+        final_prompt: 'hello',
+        thinking_enabled: false,
+        search_enabled: false,
+        compat: { strip_reference_markers: true },
+        tool_names: [],
+        deepseek_token: 'deepseek-token',
+        pow_header: 'pow-header-initial',
+        payload: { prompt: 'hello' },
+      });
+    }
+    if (textURL.includes('__stream_pow=1')) {
+      return jsonResponse({}, 500);
+    }
+    if (textURL.includes('__stream_release=1')) {
+      return jsonResponse({ success: true });
+    }
+    if (textURL === 'https://chat.deepseek.com/api/v0/chat/completion') {
+      completionPowHeaders.push(init.headers['x-ds-pow-response']);
+      completionCalls += 1;
+      if (completionCalls === 1) {
+        return sseResponse(['data: [DONE]\n\n']);
+      }
+      return sseResponse(['data: {"p":"response/content","v":"visible"}\n\n', 'data: [DONE]\n\n']);
+    }
+    throw new Error(`unexpected fetch url: ${textURL}`);
+  };
+  try {
+    const req = new MockStreamRequest();
+    const res = new MockStreamResponse();
+    const payload = { model: 'gpt-test', stream: true };
+    await handleVercelStream(req, res, Buffer.from(JSON.stringify(payload)), payload);
+    const frames = parseSSEDataFrames(res.bodyText());
+    const parsed = frames.filter((frame) => frame !== '[DONE]').map((frame) => JSON.parse(frame));
+    assert.deepEqual(completionPowHeaders, ['pow-header-initial', 'pow-header-initial']);
+    assert.equal(fetchURLs.filter((url) => url.includes('__stream_pow=1')).length, 1);
+    assert.equal(parsed[0].choices[0].delta.content, 'visible');
+    assert.equal(parsed[1].choices[0].finish_reason, 'stop');
+  } finally {
+    global.fetch = originalFetch;
+  }
+});
+
 test('vercel stream emits content_filter failure when upstream filters empty output', async () => {
   const { frames } = await runMockVercelStream(['data: {"code":"content_filter"}\n\n']);
   assert.equal(frames.length, 2);
diff --git a/tests/node/js_compat_test.js b/tests/node/js_compat_test.js
index ba35d55..01305dc 100644
--- a/tests/node/js_compat_test.js
+++ b/tests/node/js_compat_test.js
@@ -6,6 +6,7 @@ const fs = require('node:fs');
 const path = require('node:path');
 
 const chatStream = require('../../api/chat-stream.js');
+const deepseekConstants = require('../../internal/js/shared/deepseek-constants.js');
 const { parseToolCallsDetailed, parseStandaloneToolCallsDetailed } = require('../../internal/js/helpers/stream-tool-sieve.js');
 
 const { parseChunkForContent, estimateTokens } = chatStream.__test;
@@ -16,6 +17,15 @@ function readJSON(filePath) {
   return JSON.parse(fs.readFileSync(filePath, 'utf8'));
 }
 
+test('js shared constants derive client headers from shared json', () => {
+  const shared = readJSON(path.resolve(__dirname, '../../internal/deepseek/protocol/constants_shared.json'));
+  const client = shared.client;
+  assert.equal(deepseekConstants.CLIENT_VERSION, client.version);
+  assert.equal(deepseekConstants.BASE_HEADERS['x-client-version'], client.version);
+  assert.equal(deepseekConstants.BASE_HEADERS['User-Agent'], `${client.name}/${client.version} Android/${client.android_api_level}`);
+  assert.equal(deepseekConstants.BASE_HEADERS['Content-Type'], 'application/json');
+});
+
 test('js compat: sse fixtures', () => {
   const fixtureDir = path.join(compatRoot, 'fixtures', 'sse_chunks');
   const expectedDir = path.join(compatRoot, 'expected');
diff --git a/tests/node/stream-tool-sieve.test.js b/tests/node/stream-tool-sieve.test.js
index cc6ae93..1938984 100644
--- a/tests/node/stream-tool-sieve.test.js
+++ b/tests/node/stream-tool-sieve.test.js
@@ -49,6 +49,226 @@ test('parseToolCalls parses XML markup tool call', () => {
   assert.deepEqual(calls[0].input, { path: 'README.MD' });
 });
 
+test('parseToolCalls parses DSML shell as XML-compatible tool call', () => {
+  const payload = '<|DSML|tool_calls><|DSML|invoke name="read_file"><|DSML|parameter name="path">README.MD</|DSML|parameter></|DSML|invoke></|DSML|tool_calls>';
+  const calls = parseToolCalls(payload, ['read_file']);
+  assert.equal(calls.length, 1);
+  assert.equal(calls[0].name, 'read_file');
+  assert.deepEqual(calls[0].input, { path: 'README.MD' });
+});
+
+test('parseToolCalls tolerates DSML space-separator typo', () => {
+  const payload = '<|DSML tool_calls><|DSML invoke name="Read"><|DSML parameter name="file_path"><![CDATA[/tmp/input.txt]]></|DSML parameter></|DSML invoke></|DSML tool_calls>';
+  const calls = parseToolCalls(payload, ['Read']);
+  assert.equal(calls.length, 1);
+  assert.equal(calls[0].name, 'Read');
+  assert.deepEqual(calls[0].input, { file_path: '/tmp/input.txt' });
+});
+
+test('parseToolCalls ignores DSML space lookalike tag names', () => {
+  const payload = '<|DSML tool_calls_extra><|DSML invoke name="Read"><|DSML parameter name="file_path">/tmp/input.txt</|DSML parameter></|DSML invoke></|DSML tool_calls_extra>';
+  const calls = parseToolCalls(payload, ['Read']);
+  assert.equal(calls.length, 0);
+});
+
+test('parseToolCalls tolerates collapsed DSML tag names', () => {
+  const todos = [
+    '[x] 检查 toolcalls_format.go 格式化逻辑',
+    '[x] 检查 toolcalls_parse.go 解析逻辑',
+    '[x] 检查 toolcalls_xml.go 和 toolcalls_dsml.go',
+    '[x] 检查 toolcalls_markup.go 和 toolcalls_json_repair.go',
+    '[x] 检查 prompt/tool_calls.go 注入逻辑',
+    '[x] 检查 toolstream 流式解析',
+    '[x] 查看测试文件确认预期行为',
+    '[x] 给出调查结论',
+  ].join('\n');
+  const payload = `<DSMLtool_calls><DSMLinvoke name="update_todo_list"><DSMLparameter name="todos"><![CDATA[${todos}]]></DSMLparameter></DSMLinvoke></DSMLtool_calls>`;
+  const calls = parseToolCalls(payload, ['update_todo_list']);
+  assert.equal(calls.length, 1);
+  assert.equal(calls[0].name, 'update_todo_list');
+  assert.equal(calls[0].input.todos, todos);
+});
+
+test('parseToolCalls ignores collapsed DSML lookalike tag names', () => {
+  const payload = '<DSMLtool_calls_extra><DSMLinvoke name="update_todo_list"><DSMLparameter name="todos">x</DSMLparameter></DSMLinvoke></DSMLtool_calls_extra>';
+  const calls = parseToolCalls(payload, ['update_todo_list']);
+  assert.equal(calls.length, 0);
+});
+
+test('parseToolCalls keeps canonical XML examples inside DSML CDATA', () => {
+  const content = '<tool_calls><invoke name="demo"><parameter name="value">x</parameter></invoke></tool_calls>';
+  const payload = `<|DSML|tool_calls><|DSML|invoke name="write_file"><|DSML|parameter name="path">notes.md</|DSML|parameter><|DSML|parameter name="content"><![CDATA[${content}]]></|DSML|parameter></|DSML|invoke></|DSML|tool_calls>`;
+  const calls = parseToolCalls(payload, ['write_file']);
+  assert.equal(calls.length, 1);
+  assert.equal(calls[0].name, 'write_file');
+  assert.deepEqual(calls[0].input, { path: 'notes.md', content });
+});
+
+test('parseToolCalls recovers when CDATA never closes inside a valid wrapper', () => {
+  const payload = '<tool_calls><invoke name="Write"><parameter name="content"><![CDATA[hello world</parameter></invoke></tool_calls>';
+  const calls = parseToolCalls(payload, ['Write']);
+  assert.equal(calls.length, 1);
+  assert.equal(calls[0].name, 'Write');
+  assert.equal(calls[0].input.content, 'hello world');
+});
+
+test('parseToolCalls supports JSON scalar parameters', () => {
+  const payload = '<tool_calls><invoke name="configure"><parameter name="count">123</parameter><parameter name="max_tokens"><![CDATA[256]]></parameter><parameter name="enabled">true</parameter></invoke></tool_calls>';
+  const calls = parseToolCalls(payload, ['configure']);
+  assert.equal(calls.length, 1);
+  assert.equal(calls[0].name, 'configure');
+  assert.equal(calls[0].input.count, 123);
+  assert.equal(calls[0].input.max_tokens, 256);
+  assert.equal(calls[0].input.enabled, true);
+});
+
+test('parseToolCalls normalizes mixed DSML and XML tool tags', () => {
+  // Models commonly mix DSML wrapper tags with canonical inner tags.
+  const payload = '<|DSML|tool_calls><invoke name="read_file"><|DSML|parameter name="path">README.MD</|DSML|parameter></invoke></|DSML|tool_calls>';
+  const calls = parseToolCalls(payload, ['read_file']);
+  assert.equal(calls.length, 1);
+  assert.equal(calls[0].name, 'read_file');
+  assert.deepEqual(calls[0].input, { path: 'README.MD' });
+});
+
+test('parseToolCalls skips prose mention of same wrapper variant', () => {
+  const payload = [
+    'Summary: support canonical <tool_calls> and DSML <|DSML|tool_calls> wrappers.',
+    '',
+    '<|DSML|tool_calls>',
+    '<|DSML|invoke name="Bash">',
+    '<|DSML|parameter name="command"><![CDATA[git status]]></|DSML|parameter>',
+    '</|DSML|invoke>',
+    '</|DSML|tool_calls>',
+  ].join('\n');
+  const calls = parseToolCalls(payload, ['Bash']);
+  assert.equal(calls.length, 1);
+  assert.equal(calls[0].name, 'Bash');
+  assert.equal(calls[0].input.command, 'git status');
+});
+
+test('sieve emits tool_calls after prose mentions same wrapper variant', () => {
+  const events = runSieve([
+    'Summary: support canonical <tool_calls> and DSML <|DSML|tool_calls> wrappers.\n\n',
+    '<|DSML|tool_calls>\n',
+    '<|DSML|invoke name="Bash">\n',
+    '<|DSML|parameter name="command"><![CDATA[git status]]></|DSML|parameter>\n',
+    '</|DSML|invoke>\n',
+    '</|DSML|tool_calls>',
+  ], ['Bash']);
+  const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
+  assert.equal(finalCalls.length, 1);
+  assert.equal(finalCalls[0].name, 'Bash');
+  assert.equal(finalCalls[0].input.command, 'git status');
+  assert.equal(collectText(events).includes('Summary:'), true);
+});
+
+test('sieve emits tool_calls for DSML space-separator typo', () => {
+  const events = runSieve([
+    '准备读取文件。\n',
+    '<|DSML tool_calls>\n',
+    '<|DSML invoke name="Read">\n',
+    '<|DSML parameter name="file_path"><![CDATA[/tmp/input.txt]]></|DSML parameter>\n',
+    '</|DSML invoke>\n',
+    '</|DSML tool_calls>',
+  ], ['Read']);
+  const text = collectText(events);
+  const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
+  assert.equal(finalCalls.length, 1);
+  assert.equal(finalCalls[0].name, 'Read');
+  assert.equal(finalCalls[0].input.file_path, '/tmp/input.txt');
+  assert.equal(text.includes('准备读取文件'), true);
+  assert.equal(text.includes('<|DSML invoke'), false);
+});
+
+test('sieve keeps DSML space lookalike tag names as text', () => {
+  const input = '<|DSML tool_calls_extra><|DSML invoke name="Read"><|DSML parameter name="file_path">/tmp/input.txt</|DSML parameter></|DSML invoke></|DSML tool_calls_extra>';
+  const events = runSieve([input], ['Read']);
+  const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
+  assert.equal(finalCalls.length, 0);
+  assert.equal(collectText(events), input);
+});
+
+test('sieve emits tool_calls for collapsed DSML tag names and preserves prefix text', () => {
+  const todos = [
+    '[x] 检查 toolcalls_format.go 格式化逻辑',
+    '[x] 检查 toolcalls_parse.go 解析逻辑',
+    '[x] 检查 toolcalls_xml.go 和 toolcalls_dsml.go',
+    '[x] 检查 toolcalls_markup.go 和 toolcalls_json_repair.go',
+    '[x] 检查 prompt/tool_calls.go 注入逻辑',
+    '[x] 检查 toolstream 流式解析',
+    '[x] 查看测试文件确认预期行为',
+    '[x] 给出调查结论',
+  ].join('\n');
+  const events = runSieve([
+    '[]\n',
+    '<DSMLtool_calls>\n',
+    '<DSMLinvoke name="update_todo_list">\n',
+    `<DSMLparameter name="todos"><![CDATA[${todos}]]></DSMLparameter>\n`,
+    '</DSMLinvoke>\n',
+    '</DSMLtool_calls>',
+  ], ['update_todo_list']);
+  const text = collectText(events);
+  const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
+  assert.equal(finalCalls.length, 1);
+  assert.equal(finalCalls[0].name, 'update_todo_list');
+  assert.equal(finalCalls[0].input.todos, todos);
+  assert.equal(text, '[]\n');
+});
+
+test('sieve keeps collapsed DSML lookalike tag names as text', () => {
+  const input = '<DSMLtool_calls_extra><DSMLinvoke name="update_todo_list"><DSMLparameter name="todos">x</DSMLparameter></DSMLinvoke></DSMLtool_calls_extra>';
+  const events = runSieve([input], ['update_todo_list']);
+  const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
+  assert.equal(finalCalls.length, 0);
+  assert.equal(collectText(events), input);
+});
+
+test('sieve preserves review body with alias mentions before real DSML tool calls', () => {
+  const events = runSieve([
+    "Done reviewing the diff. Here's my analysis before we commit:\n\n",
+    'Summary of Changes\n',
+    'DSML wrapper variant support — recognize aliases (<dsml|tool_calls>, <|tool_calls>, <｜tool_calls>) alongside canonical <tool_calls> and <|DSML|tool_calls> wrappers.\n\n',
+    '<|DSML|tool_calls>\n',
+    '<|DSML|invoke name="Bash">\n',
+    '<|DSML|parameter name="command"><![CDATA[git add docs/toolcall-semantics.md internal/toolstream/tool_sieve_xml.go]]></|DSML|parameter>\n',
+    '<|DSML|parameter name="description"><![CDATA[Stage all relevant changed files]]></|DSML|parameter>\n',
+    '</|DSML|invoke>\n',
+    '<|DSML|invoke name="Bash">\n',
+    '<|DSML|parameter name="command"><![CDATA[git commit -m "$(cat <<\'EOF\'\nfeat(toolstream): expand DSML wrapper detection\n\nSupport DSML wrapper aliases: <dsml|tool_calls>, <|tool_calls>, <｜tool_calls> alongside existing canonical wrappers.\nEOF\n)"]]></|DSML|parameter>\n',
+    '<|DSML|parameter name="description"><![CDATA[Create commit with all staged changes]]></|DSML|parameter>\n',
+    '</|DSML|invoke>\n',
+    '</|DSML|tool_calls>',
+  ], ['Bash']);
+  const text = collectText(events);
+  const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
+  assert.equal(finalCalls.length, 2);
+  assert.equal(text.includes('<|DSML|tool_calls> wrappers'), true);
+  assert.equal(text.includes('Summary of Changes'), true);
+  assert.equal(text.includes('git add docs/toolcall-semantics.md'), false);
+});
+
+test('sieve preserves Chinese review body with inline DSML mention before real tool call', () => {
+  const events = runSieve([
+    '# Context from my IDE setup:\n\n## My request for Codex:\n',
+    '基于我的审查，这是工作区更改的总结和提交。\n\n## 审查报告\n\n### 文档\n\nAPI.md 中的工具调用部分缺少针对新 DSML 别名的更新——它只提到了 `',
+    '<|DSML|tool_calls>` 和 canonical `<tool_calls>`。由于这涉及 API 兼容性和文档准确性，需要在下游进行记录。\n\n',
+    '### 代码\n\n所有更改现在一致地处理四个 DSML wrapper 变体。\n\n现在提交已暂存的更改。\n\n',
+    '<|DSML|tool_calls>\n',
+    '  <|DSML|invoke name="Bash">\n',
+    '    <|DSML|parameter name="command"><![CDATA[git commit -m "$(cat <<\'EOF\'\nfeat: expand DSML tool-call alias and fence handling\nEOF\n)"]]></|DSML|parameter>\n',
+    '    <|DSML|parameter name="description"><![CDATA[Commit staged changes]]></|DSML|parameter>\n',
+    '  </|DSML|invoke>\n',
+    '</|DSML|tool_calls>\n\n补充',
+  ], ['Bash']);
+  const text = collectText(events);
+  const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
+  assert.equal(finalCalls.length, 1);
+  assert.equal(text.includes('它只提到了 `<|DSML|tool_calls>` 和 canonical `<tool_calls>`。由于这涉及 API 兼容性'), true);
+  assert.equal(text.includes('补充'), true);
+  assert.equal(text.includes('<|DSML|invoke'), false);
+});
+
 test('parseToolCalls ignores JSON tool_calls payload (XML-only)', () => {
   const payload = JSON.stringify({
     tool_calls: [{ name: 'read_file', input: { path: 'README.MD' } }],
@@ -98,6 +318,22 @@ test('sieve emits tool_calls when XML tag spans multiple chunks', () => {
   assert.equal(finalCalls[0].name, 'read_file');
 });
 
+test('sieve emits tool_calls when DSML tag spans multiple chunks', () => {
+  const events = runSieve(
+    [
+      '<|DSML|tool',
+      '_calls><|DSML|invoke name="read_file">',
+      '<|DSML|parameter name="path">README.MD</|DSML|parameter></|DSML|invoke></|DSML|tool_calls>',
+    ],
+    ['read_file'],
+  );
+  const leakedText = collectText(events);
+  const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
+  assert.equal(leakedText, '');
+  assert.equal(finalCalls.length, 1);
+  assert.equal(finalCalls[0].name, 'read_file');
+});
+
 test('sieve keeps long XML tool calls buffered until the closing tag arrives', () => {
   const longContent = 'x'.repeat(4096);
   const splitAt = longContent.length / 2;
@@ -118,6 +354,23 @@ test('sieve keeps long XML tool calls buffered until the closing tag arrives', (
   assert.equal(finalCalls[0].input.content, longContent);
 });
 
+test('sieve recovers when CDATA never closes inside a valid wrapper', () => {
+  const events = runSieve(
+    [
+      '<tool_calls>\n  <invoke name="Write">\n    <parameter name="content"><![CDATA[',
+      'hello world',
+      '</parameter>\n  </invoke>\n</tool_calls>',
+    ],
+    ['Write'],
+  );
+  const leakedText = collectText(events);
+  const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
+  assert.equal(finalCalls.length, 1);
+  assert.equal(finalCalls[0].name, 'Write');
+  assert.equal(finalCalls[0].input.content, 'hello world');
+  assert.equal(leakedText, '');
+});
+
 test('sieve keeps CDATA tool examples buffered until the outer closing tag arrives', () => {
   const content = [
     '# DS2API 4.0 更新内容',
diff --git a/tests/scripts/check-cross-build.sh b/tests/scripts/check-cross-build.sh
new file mode 100755
index 0000000..22741ec
--- /dev/null
+++ b/tests/scripts/check-cross-build.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/../.." && pwd)"
+cd "$ROOT_DIR"
+
+source "${ROOT_DIR}/scripts/release-targets.sh"
+
+OUT_DIR="${ROOT_DIR}/.tmp/cross-build"
+
+build_one() {
+  local goos="$1" goarch="$2" goarm="$3" label="$4"
+  local out
+  out="${OUT_DIR}/${label}/ds2api"
+  if [[ "$goos" == "windows" ]]; then
+    out="${out}.exe"
+  fi
+
+  echo "[cross-build] ${label}"
+  mkdir -p "$(dirname "$out")"
+  if [[ "$goarm" == "-" ]]; then
+    CGO_ENABLED=0 GOOS="$goos" GOARCH="$goarch" \
+      go build -buildvcs=false -trimpath -o "$out" ./cmd/ds2api
+  else
+    CGO_ENABLED=0 GOOS="$goos" GOARCH="$goarch" GOARM="$goarm" \
+      go build -buildvcs=false -trimpath -o "$out" ./cmd/ds2api
+  fi
+}
+
+if [[ "${1:-}" == "--build-one" ]]; then
+  shift
+  build_one "$@"
+  exit 0
+fi
+
+jobs="${CROSS_BUILD_JOBS:-}"
+if [[ -z "$jobs" ]]; then
+  if command -v nproc >/dev/null 2>&1; then
+    jobs="$(nproc)"
+  elif command -v sysctl >/dev/null 2>&1; then
+    jobs="$(sysctl -n hw.ncpu)"
+  else
+    jobs="2"
+  fi
+fi
+
+rm -rf "$OUT_DIR"
+mkdir -p "$OUT_DIR"
+
+if [[ "$jobs" -le 1 ]]; then
+  for target in "${DS2API_RELEASE_TARGETS[@]}"; do
+    read -r goos goarch goarm label <<< "$target"
+    build_one "$goos" "$goarch" "$goarm" "$label"
+  done
+else
+  printf '%s\n' "${DS2API_RELEASE_TARGETS[@]}" \
+    | xargs -L 1 -P "$jobs" bash "${ROOT_DIR}/tests/scripts/check-cross-build.sh" --build-one
+fi
diff --git a/webui/src/features/apiTester/ApiTesterContainer.jsx b/webui/src/features/apiTester/ApiTesterContainer.jsx
index bf70d22..fe79a35 100644
--- a/webui/src/features/apiTester/ApiTesterContainer.jsx
+++ b/webui/src/features/apiTester/ApiTesterContainer.jsx
@@ -1,3 +1,4 @@
+import { useEffect, useMemo, useState } from 'react'
 import clsx from 'clsx'
 
 import { useI18n } from '../../i18n'
@@ -6,8 +7,75 @@ import { useChatStreamClient } from './useChatStreamClient'
 import ConfigPanel from './ConfigPanel'
 import ChatPanel from './ChatPanel'
 
+function describeModel(t, modelID) {
+    const noThinking = modelID.endsWith('-nothinking')
+
+    let description = t('apiTester.models.generic')
+    if (modelID.includes('vision-search')) {
+        description = t('apiTester.models.visionSearch')
+    } else if (modelID.includes('vision')) {
+        description = t('apiTester.models.vision')
+    } else if (modelID.includes('pro-search')) {
+        description = t('apiTester.models.proSearch')
+    } else if (modelID.includes('pro')) {
+        description = t('apiTester.models.pro')
+    } else if (modelID.includes('flash-search')) {
+        description = t('apiTester.models.flashSearch')
+    } else if (modelID.includes('flash')) {
+        description = t('apiTester.models.flash')
+    }
+
+    if (noThinking) {
+        return `${description} · ${t('apiTester.models.noThinking')}`
+    }
+    return description
+}
+
+function decorateModel(t, modelID) {
+    const isVision = modelID.includes('vision')
+    const isSearch = modelID.includes('search')
+    const isPro = modelID.includes('pro')
+
+    if (isVision && isSearch) {
+        return {
+            id: modelID,
+            name: modelID,
+            icon: 'ImageIcon',
+            desc: describeModel(t, modelID),
+            color: 'text-fuchsia-600',
+        }
+    }
+    if (isVision) {
+        return {
+            id: modelID,
+            name: modelID,
+            icon: 'ImageIcon',
+            desc: describeModel(t, modelID),
+            color: 'text-violet-500',
+        }
+    }
+    if (isSearch) {
+        return {
+            id: modelID,
+            name: modelID,
+            icon: 'SearchIcon',
+            desc: describeModel(t, modelID),
+            color: isPro ? 'text-cyan-600' : 'text-cyan-500',
+        }
+    }
+    return {
+        id: modelID,
+        name: modelID,
+        icon: isPro ? 'Cpu' : 'MessageSquare',
+        desc: describeModel(t, modelID),
+        color: isPro ? 'text-amber-600' : 'text-amber-500',
+    }
+}
+
 export default function ApiTesterContainer({ config, onMessage, authFetch }) {
     const { t } = useI18n()
+    const [availableModelIDs, setAvailableModelIDs] = useState([])
+    const [modelsLoaded, setModelsLoaded] = useState(false)
 
     const {
         model,
@@ -49,14 +117,58 @@ export default function ApiTesterContainer({ config, onMessage, authFetch }) {
     const customKeyActive = trimmedApiKey !== ''
     const customKeyManaged = customKeyActive && configuredKeys.includes(trimmedApiKey)
 
-    const models = [
-        { id: 'deepseek-v4-flash', name: 'deepseek-v4-flash', icon: 'MessageSquare', desc: t('apiTester.models.flash'), color: 'text-amber-500' },
-        { id: 'deepseek-v4-pro', name: 'deepseek-v4-pro', icon: 'Cpu', desc: t('apiTester.models.pro'), color: 'text-amber-600' },
-        { id: 'deepseek-v4-flash-search', name: 'deepseek-v4-flash-search', icon: 'SearchIcon', desc: t('apiTester.models.flashSearch'), color: 'text-cyan-500' },
-        { id: 'deepseek-v4-pro-search', name: 'deepseek-v4-pro-search', icon: 'SearchIcon', desc: t('apiTester.models.proSearch'), color: 'text-cyan-600' },
-        { id: 'deepseek-v4-vision', name: 'deepseek-v4-vision', icon: 'ImageIcon', desc: t('apiTester.models.vision'), color: 'text-violet-500' },
-        { id: 'deepseek-v4-vision-search', name: 'deepseek-v4-vision-search', icon: 'SearchIcon', desc: t('apiTester.models.visionSearch'), color: 'text-fuchsia-600' },
-    ]
+    useEffect(() => {
+        let disposed = false
+
+        async function loadModels() {
+            try {
+                const res = await authFetch('/v1/models')
+                if (!res.ok) {
+                    throw new Error(`failed to fetch models: ${res.status}`)
+                }
+                const data = await res.json()
+                const modelIDs = Array.isArray(data?.data)
+                    ? data.data
+                        .map((item) => String(item?.id || '').trim())
+                        .filter(Boolean)
+                    : []
+                if (!disposed) {
+                    setAvailableModelIDs(modelIDs)
+                }
+            } catch (_err) {
+                if (!disposed) {
+                    setAvailableModelIDs([])
+                }
+            } finally {
+                if (!disposed) {
+                    setModelsLoaded(true)
+                }
+            }
+        }
+
+        setModelsLoaded(false)
+        loadModels()
+        return () => {
+            disposed = true
+        }
+    }, [authFetch])
+
+    const models = useMemo(
+        () => availableModelIDs.map((modelID) => decorateModel(t, modelID)),
+        [availableModelIDs, t]
+    )
+
+    useEffect(() => {
+        if (!models.length) {
+            if (model) {
+                setModel('')
+            }
+            return
+        }
+        if (!model || !models.some((item) => item.id === model)) {
+            setModel(models[0].id)
+        }
+    }, [model, models, setModel])
 
     const { runTest, stopGeneration } = useChatStreamClient({
         t,
@@ -84,6 +196,7 @@ export default function ApiTesterContainer({ config, onMessage, authFetch }) {
                 models={models}
                 model={model}
                 setModel={setModel}
+                modelsLoaded={modelsLoaded}
                 streamingMode={streamingMode}
                 setStreamingMode={setStreamingMode}
                 selectedAccount={selectedAccount}
@@ -114,6 +227,7 @@ export default function ApiTesterContainer({ config, onMessage, authFetch }) {
                 streamingContent={streamingContent}
                 onRunTest={runTest}
                 onStopGeneration={stopGeneration}
+                hasAvailableModel={models.length > 0}
             />
         </div>
     )
diff --git a/webui/src/features/apiTester/ChatPanel.jsx b/webui/src/features/apiTester/ChatPanel.jsx
index 5da6684..32b160e 100644
--- a/webui/src/features/apiTester/ChatPanel.jsx
+++ b/webui/src/features/apiTester/ChatPanel.jsx
@@ -21,6 +21,7 @@ export default function ChatPanel({
     streamingContent,
     onRunTest,
     onStopGeneration,
+    hasAvailableModel,
 }) {
     const fileInputRef = useRef(null)
     const [uploadingFiles, setUploadingFiles] = useState(false)
@@ -181,7 +182,7 @@ export default function ChatPanel({
                     <div className="absolute left-2 bottom-2 z-10">
                         <button
                             onClick={() => fileInputRef.current?.click()}
-                            disabled={uploadingFiles || isStreaming}
+                            disabled={uploadingFiles || isStreaming || !hasAvailableModel}
                             className="p-2 text-muted-foreground hover:text-primary transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
                             title="Attach files"
                         >
@@ -189,11 +190,12 @@ export default function ChatPanel({
                         </button>
                     </div>
                     <textarea
-                        className="w-full bg-[#09090b] border border-border rounded-xl pl-12 pr-12 py-3 text-sm focus:ring-2 focus:ring-primary/20 focus:border-primary transition-all resize-none custom-scrollbar placeholder:text-muted-foreground/50 text-foreground shadow-inner"
-                        placeholder={t('apiTester.enterMessage')}
+                        className="w-full bg-[#09090b] border border-border rounded-xl pl-12 pr-12 py-3 text-sm focus:ring-2 focus:ring-primary/20 focus:border-primary transition-all resize-none custom-scrollbar placeholder:text-muted-foreground/50 text-foreground shadow-inner disabled:opacity-60 disabled:cursor-not-allowed"
+                        placeholder={hasAvailableModel ? t('apiTester.enterMessage') : t('apiTester.noModelsMessagePlaceholder')}
                         rows={1}
                         style={{ minHeight: '52px' }}
                         value={message}
+                        disabled={!hasAvailableModel}
                         onChange={e => setMessage(e.target.value)}
                         onKeyDown={e => {
                             if (e.key === 'Enter' && !e.shiftKey) {
@@ -212,7 +214,7 @@ export default function ChatPanel({
                         ) : (
                             <button
                                 onClick={onRunTest}
-                                disabled={loading || uploadingFiles || (!message.trim() && attachedFiles.length === 0)}
+                                disabled={loading || uploadingFiles || !hasAvailableModel || (!message.trim() && attachedFiles.length === 0)}
                                 className="p-2 text-primary hover:text-primary/80 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
                             >
                                 {loading ? <Loader2 className="w-4 h-4 animate-spin" /> : <Send className="w-4 h-4" />}
diff --git a/webui/src/features/apiTester/ConfigPanel.jsx b/webui/src/features/apiTester/ConfigPanel.jsx
index 2dbbfdd..73df472 100644
--- a/webui/src/features/apiTester/ConfigPanel.jsx
+++ b/webui/src/features/apiTester/ConfigPanel.jsx
@@ -19,6 +19,7 @@ export default function ConfigPanel({
     models,
     model,
     setModel,
+    modelsLoaded,
     streamingMode,
     setStreamingMode,
     selectedAccount,
@@ -43,6 +44,7 @@ export default function ConfigPanel({
     const selectedModel = models.find(m => m.id === model) || models[0]
     const SelectedModelIcon = selectedModel ? (iconMap[selectedModel.icon] || MessageSquare) : MessageSquare
     const defaultKeyPreview = maskSecret(config.keys?.[0])
+    const hasModels = models.length > 0
 
     return (
         <div className={clsx(
@@ -73,19 +75,24 @@ export default function ConfigPanel({
                         <label className="text-[11px] font-semibold text-muted-foreground uppercase tracking-wider ml-0.5">{t('apiTester.modelLabel')}</label>
                         <div className="relative">
                             <select
-                                className="w-full h-11 pl-3 pr-9 bg-secondary border border-border rounded-lg text-sm appearance-none focus:outline-none focus:ring-1 focus:ring-ring focus:border-ring transition-all cursor-pointer hover:bg-muted/70 text-foreground"
+                                className="w-full h-11 pl-3 pr-9 bg-secondary border border-border rounded-lg text-sm appearance-none focus:outline-none focus:ring-1 focus:ring-ring focus:border-ring transition-all cursor-pointer hover:bg-muted/70 text-foreground disabled:opacity-60 disabled:cursor-not-allowed"
                                 value={model}
                                 onChange={e => setModel(e.target.value)}
+                                disabled={!hasModels}
                             >
-                                {models.map(m => (
+                                {hasModels ? models.map(m => (
                                     <option key={m.id} value={m.id} className="bg-popover text-popover-foreground">
                                         {m.name}
                                     </option>
-                                ))}
+                                )) : (
+                                    <option value="" className="bg-popover text-popover-foreground">
+                                        {modelsLoaded ? t('apiTester.noModels') : t('apiTester.loadingModels')}
+                                    </option>
+                                )}
                             </select>
                             <ChevronDown className="absolute right-2.5 top-3.5 w-4 h-4 text-muted-foreground pointer-events-none" />
                         </div>
-                        {selectedModel && (
+                        {selectedModel ? (
                             <div className="mt-3 rounded-lg border border-border bg-muted/20 p-3">
                                 <div className="flex items-start gap-3">
                                     <div className={clsx(
@@ -107,6 +114,10 @@ export default function ConfigPanel({
                                     {t('apiTester.modelPickerHint')}
                                 </p>
                             </div>
+                        ) : (
+                            <div className="mt-3 rounded-lg border border-dashed border-border bg-muted/10 p-3 text-[11px] text-muted-foreground leading-relaxed">
+                                {modelsLoaded ? t('apiTester.noModelsHint') : t('apiTester.loadingModelsHint')}
+                            </div>
                         )}
                     </div>
 
diff --git a/webui/src/features/chatHistory/ChatHistoryContainer.jsx b/webui/src/features/chatHistory/ChatHistoryContainer.jsx
index 17d9692..fe28a2a 100644
--- a/webui/src/features/chatHistory/ChatHistoryContainer.jsx
+++ b/webui/src/features/chatHistory/ChatHistoryContainer.jsx
@@ -8,6 +8,10 @@ const LIMIT_OPTIONS = [0, 10, 20, 50]
 const DISABLED_LIMIT = 0
 const MESSAGE_COLLAPSE_AT = 700
 const VIEW_MODE_KEY = 'ds2api_chat_history_view_mode'
+const BEGIN_SENTENCE_MARKER = '<｜begin▁of▁sentence｜>'
+const USER_MARKER = '<｜User｜>'
+const ASSISTANT_MARKER = '<｜Assistant｜>'
+const END_SENTENCE_MARKER = '<｜end▁of▁sentence｜>'
 
 function formatDateTime(value, lang) {
     if (!value) return '-'
@@ -105,14 +109,129 @@ function MergeModeIcon() {
     )
 }
 
-function RequestMessages({ item, t }) {
-    const messages = Array.isArray(item?.messages) && item.messages.length > 0
+function skipWhitespace(text, start) {
+    let cursor = start
+    while (cursor < text.length && /\s/.test(text[cursor])) {
+        cursor += 1
+    }
+    return cursor
+}
+
+function parseStrictHistoryMessages(historyText) {
+    const rawText = String(historyText || '')
+    const beginIndex = rawText.indexOf(BEGIN_SENTENCE_MARKER)
+    if (beginIndex < 0) return null
+
+    const transcript = rawText.slice(beginIndex)
+
+    let cursor = BEGIN_SENTENCE_MARKER.length
+    const parsed = []
+    let expectedRole = null
+    let trailingAssistantPromptOnly = false
+
+    while (cursor < transcript.length) {
+        if (expectedRole === null) {
+            if (transcript.startsWith(USER_MARKER, cursor)) {
+                expectedRole = 'user'
+            } else if (transcript.startsWith(ASSISTANT_MARKER, cursor)) {
+                expectedRole = 'assistant'
+            } else if (transcript.slice(cursor).trim() === '') {
+                break
+            } else {
+                return null
+            }
+        }
+
+        if (transcript.startsWith(USER_MARKER, cursor)) {
+            if (expectedRole !== 'user') return null
+            cursor += USER_MARKER.length
+            const nextAssistant = transcript.indexOf(ASSISTANT_MARKER, cursor)
+            const nextSentenceEnd = transcript.indexOf(END_SENTENCE_MARKER, cursor)
+            if (nextAssistant < 0) return null
+            if (nextSentenceEnd >= 0 && nextSentenceEnd < nextAssistant) {
+                const assistantStart = skipWhitespace(transcript, nextSentenceEnd + END_SENTENCE_MARKER.length)
+                if (!transcript.startsWith(ASSISTANT_MARKER, assistantStart)) return null
+                parsed.push({
+                    role: 'user',
+                    content: transcript.slice(cursor, nextSentenceEnd),
+                })
+                cursor = assistantStart
+                expectedRole = 'assistant'
+                continue
+            }
+            parsed.push({
+                role: 'user',
+                content: transcript.slice(cursor, nextAssistant),
+            })
+            const assistantStart = nextAssistant + ASSISTANT_MARKER.length
+            if (transcript.indexOf(END_SENTENCE_MARKER, assistantStart) < 0) {
+                trailingAssistantPromptOnly = true
+                cursor = assistantStart
+                break
+            }
+            cursor = nextAssistant
+            expectedRole = 'assistant'
+            continue
+        }
+
+        if (transcript.startsWith(ASSISTANT_MARKER, cursor)) {
+            if (expectedRole !== 'assistant') return null
+            cursor += ASSISTANT_MARKER.length
+            const nextSentenceEnd = transcript.indexOf(END_SENTENCE_MARKER, cursor)
+            if (nextSentenceEnd < 0) return null
+            parsed.push({
+                role: 'assistant',
+                content: transcript.slice(cursor, nextSentenceEnd),
+            })
+            cursor = nextSentenceEnd + END_SENTENCE_MARKER.length
+            expectedRole = 'user'
+            continue
+        }
+
+        if (parsed.length && expectedRole === 'user') break
+        if (transcript.slice(cursor).trim() === '') break
+        return null
+    }
+
+    if (!parsed.length) {
+        return null
+    }
+
+    if (!trailingAssistantPromptOnly && parsed[parsed.length - 1]?.role !== 'assistant') {
+        return null
+    }
+
+    return parsed
+}
+
+function buildListModeMessages(item, t) {
+    const liveMessages = Array.isArray(item?.messages) && item.messages.length > 0
         ? item.messages
         : [{ role: 'user', content: item?.user_input || t('chatHistory.emptyUserInput') }]
+    const historyMessages = parseStrictHistoryMessages(item?.history_text)
+
+    if (!historyMessages?.length) {
+        return { messages: liveMessages, historyMerged: false }
+    }
+
+    const insertAt = liveMessages.findIndex(message => {
+        const role = String(message?.role || '').trim().toLowerCase()
+        return role !== 'system' && role !== 'developer'
+    })
+    const mergedMessages = [...liveMessages]
+    mergedMessages.splice(insertAt < 0 ? mergedMessages.length : insertAt, 0, ...historyMessages)
+
+    return { messages: mergedMessages, historyMerged: true }
+}
+
+function RequestMessages({ item, t, messages }) {
+    const requestMessages = Array.isArray(messages) && messages.length > 0
+        ? messages
+        : [{ role: 'user', content: item?.user_input || t('chatHistory.emptyUserInput') }]
 
     return (
         <div className="space-y-5 max-w-4xl mx-auto">
-            {messages.map((message, index) => {
+            {requestMessages.map((message, index) => {
                 const role = message.role || 'user'
                 const isUser = role === 'user'
                 const isAssistant = role === 'assistant'
@@ -121,7 +240,7 @@ function RequestMessages({ item, t }) {
                     ? t('chatHistory.role.user')
                     : (isAssistant ? t('chatHistory.role.assistant') : (isTool ? t('chatHistory.role.tool') : t('chatHistory.role.system')))
                 return (
-                    <div key={`${role}-${index}`} className="flex gap-4">
+                    <div key={`${role}-${index}`} className={clsx('flex gap-4', isUser && 'flex-row-reverse justify-start')}>
                         <div className={clsx(
                             'w-8 h-8 rounded-lg flex items-center justify-center shrink-0 border border-border',
                             isUser
@@ -133,7 +252,7 @@ function RequestMessages({ item, t }) {
                                 : <Bot className="w-4 h-4 text-foreground" />}
                         </div>
                         <div className="max-w-[88%] lg:max-w-[78%] text-left">
-                            <div className="text-[11px] uppercase tracking-[0.12em] text-muted-foreground mb-2 px-1">
+                            <div className={clsx('text-[11px] uppercase tracking-[0.12em] text-muted-foreground mb-2 px-1', isUser && 'text-right')}>
                                 {label}
                             </div>
                             <div className={clsx(
@@ -205,13 +324,15 @@ function HistoryTextView({ item, t }) {
 
 function DetailConversation({ selectedItem, t, viewMode, detailScrollRef, assistantStartRef, bottomButtonClassName }) {
     if (!selectedItem) return null
+    const listModeState = viewMode === 'list' ? buildListModeMessages(selectedItem, t) : null
+    const showHistoryAtTop = viewMode !== 'list' || !listModeState?.historyMerged
 
     return (
         <>
-            <HistoryTextView item={selectedItem} t={t} />
+            {showHistoryAtTop && <HistoryTextView item={selectedItem} t={t} />}
 
             {viewMode === 'list'
-                ? <RequestMessages item={selectedItem} t={t} />
+                ? <RequestMessages item={selectedItem} t={t} messages={listModeState?.messages} />
                 : <MergedPromptView item={selectedItem} t={t} />}
 
             <div ref={assistantStartRef} className="flex gap-4 max-w-4xl mx-auto">
@@ -410,12 +531,12 @@ export default function ChatHistoryContainer({ authFetch, onMessage }) {
     }, [])
 
     useEffect(() => {
-        if (!autoRefreshReady) return undefined
+        if (!autoRefreshReady || limit === DISABLED_LIMIT) return undefined
         const timer = window.setInterval(() => {
             loadList({ mode: 'silent', announceError: false })
         }, 5000)
         return () => window.clearInterval(timer)
-    }, [autoRefreshReady])
+    }, [autoRefreshReady, limit])
 
     useEffect(() => {
         if (!autoRefreshReady || !selectedId || selectedSummary?.status !== 'streaming') return undefined
@@ -494,7 +615,12 @@ export default function ChatHistoryContainer({ authFetch, onMessage }) {
             setLimit(resolvedLimit)
             listETagRef.current = ''
             syncItems(Array.isArray(data.items) ? data.items : [])
-            onMessage?.('success', t('chatHistory.limitUpdated', { limit: resolvedLimit === DISABLED_LIMIT ? t('chatHistory.off') : resolvedLimit }))
+            onMessage?.(
+                'success',
+                resolvedLimit === DISABLED_LIMIT
+                    ? t('chatHistory.disabledSuccess')
+                    : t('chatHistory.limitUpdated', { limit: resolvedLimit })
+            )
         } catch (error) {
             onMessage?.('error', error.message || t('chatHistory.updateLimitFailed'))
         } finally {
@@ -573,6 +699,12 @@ export default function ChatHistoryContainer({ authFetch, onMessage }) {
             openMobileDetail(itemId, event)
             return
         }
+        if (itemId === selectedId) {
+            detailETagRef.current = ''
+            setSelectedDetail(null)
+            loadDetail(itemId, { announceError: false })
+            return
+        }
         setPendingJumpToAssistant(true)
         setSelectedId(itemId)
     }
diff --git a/webui/src/features/settings/BehaviorSection.jsx b/webui/src/features/settings/BehaviorSection.jsx
index e96852a..6b907af 100644
--- a/webui/src/features/settings/BehaviorSection.jsx
+++ b/webui/src/features/settings/BehaviorSection.jsx
@@ -28,6 +28,41 @@ export default function BehaviorSection({ t, form, setForm }) {
                         className="w-full bg-background border border-border rounded-lg px-3 py-2"
                     />
                 </label>
+                <label className="flex items-start gap-3 rounded-lg border border-border bg-background/60 p-4">
+                    <input
+                        type="checkbox"
+                        checked={Boolean(form.thinking_injection?.enabled ?? true)}
+                        onChange={(e) => setForm((prev) => ({
+                            ...prev,
+                            thinking_injection: {
+                                ...prev.thinking_injection,
+                                enabled: e.target.checked,
+                            },
+                        }))}
+                        className="mt-1 h-4 w-4 rounded border-border"
+                    />
+                    <div className="space-y-1">
+                        <span className="text-sm font-medium block">{t('settings.thinkingInjectionEnabled')}</span>
+                        <span className="text-xs text-muted-foreground block">{t('settings.thinkingInjectionDesc')}</span>
+                    </div>
+                </label>
+                <label className="text-sm space-y-2 md:col-span-2">
+                    <span className="text-muted-foreground">{t('settings.thinkingInjectionPrompt')}</span>
+                    <textarea
+                        rows={5}
+                        value={form.thinking_injection?.prompt || ''}
+                        placeholder={form.thinking_injection?.default_prompt || ''}
+                        onChange={(e) => setForm((prev) => ({
+                            ...prev,
+                            thinking_injection: {
+                                ...prev.thinking_injection,
+                                prompt: e.target.value,
+                            },
+                        }))}
+                        className="w-full bg-background border border-border rounded-lg px-3 py-2 resize-y min-h-32"
+                    />
+                    <p className="text-xs text-muted-foreground">{t('settings.thinkingInjectionPromptHelp')}</p>
+                </label>
             </div>
         </div>
     )
diff --git a/webui/src/features/settings/HistorySplitSection.jsx b/webui/src/features/settings/HistorySplitSection.jsx
index 242d687..30a0bc1 100644
--- a/webui/src/features/settings/HistorySplitSection.jsx
+++ b/webui/src/features/settings/HistorySplitSection.jsx
@@ -9,10 +9,19 @@ export default function HistorySplitSection({ t, form, setForm }) {
                 <label className="flex items-start gap-3 rounded-lg border border-border bg-background/60 p-4">
                     <input
                         type="checkbox"
-                        checked
-                        disabled
-                        readOnly
-                        className="mt-1 h-4 w-4 rounded border-border disabled:opacity-70"
+                        checked={Boolean(form.history_split?.enabled)}
+                        onChange={(e) => setForm((prev) => ({
+                            ...prev,
+                            history_split: {
+                                ...prev.history_split,
+                                enabled: e.target.checked,
+                            },
+                            current_input_file: {
+                                ...prev.current_input_file,
+                                enabled: e.target.checked ? false : Boolean(prev.current_input_file?.enabled),
+                            },
+                        }))}
+                        className="mt-1 h-4 w-4 rounded border-border"
                     />
                     <div className="space-y-1">
                         <span className="text-sm font-medium block">{t('settings.historySplitEnabled')}</span>
@@ -25,7 +34,7 @@ export default function HistorySplitSection({ t, form, setForm }) {
                         type="number"
                         min={1}
                         max={1000}
-                        value={form.history_split.trigger_after_turns}
+                        value={form.history_split?.trigger_after_turns || 1}
                         onChange={(e) => setForm((prev) => ({
                             ...prev,
                             history_split: {
@@ -38,6 +47,49 @@ export default function HistorySplitSection({ t, form, setForm }) {
                     <p className="text-xs text-muted-foreground">{t('settings.historySplitTriggerHelp')}</p>
                 </label>
             </div>
+            <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
+                <label className="flex items-start gap-3 rounded-lg border border-border bg-background/60 p-4">
+                    <input
+                        type="checkbox"
+                        checked={Boolean(form.current_input_file?.enabled)}
+                        onChange={(e) => setForm((prev) => ({
+                            ...prev,
+                            history_split: {
+                                ...prev.history_split,
+                                enabled: e.target.checked ? false : Boolean(prev.history_split?.enabled),
+                            },
+                            current_input_file: {
+                                ...prev.current_input_file,
+                                enabled: e.target.checked,
+                            },
+                        }))}
+                        className="mt-1 h-4 w-4 rounded border-border"
+                    />
+                    <div className="space-y-1">
+                        <span className="text-sm font-medium block">{t('settings.currentInputFileEnabled')}</span>
+                        <span className="text-xs text-muted-foreground block">{t('settings.currentInputFileDesc')}</span>
+                    </div>
+                </label>
+                <label className="text-sm space-y-2">
+                    <span className="text-muted-foreground">{t('settings.currentInputFileMinChars')}</span>
+                    <input
+                        type="number"
+                        min={0}
+                        max={100000000}
+                        value={form.current_input_file?.min_chars ?? 0}
+                        onChange={(e) => setForm((prev) => ({
+                            ...prev,
+                            current_input_file: {
+                                ...prev.current_input_file,
+                                min_chars: Number(e.target.value || 0),
+                            },
+                        }))}
+                        className="w-full bg-background border border-border rounded-lg px-3 py-2"
+                    />
+                    <p className="text-xs text-muted-foreground">{t('settings.currentInputFileHelp')}</p>
+                </label>
+            </div>
+            <p className="text-xs text-muted-foreground">{t('settings.splitPassThroughHelp')}</p>
         </div>
     )
 }
diff --git a/webui/src/features/settings/useSettingsForm.js b/webui/src/features/settings/useSettingsForm.js
index 917ee39..b900af3 100644
--- a/webui/src/features/settings/useSettingsForm.js
+++ b/webui/src/features/settings/useSettingsForm.js
@@ -17,7 +17,9 @@ const DEFAULT_FORM = {
     responses: { store_ttl_seconds: 900 },
     embeddings: { provider: '' },
     auto_delete: { mode: 'none' },
-    history_split: { enabled: true, trigger_after_turns: 1 },
+    history_split: { enabled: false, trigger_after_turns: 1 },
+    current_input_file: { enabled: true, min_chars: 0 },
+    thinking_injection: { enabled: true, prompt: '', default_prompt: '' },
     model_aliases_text: '{}',
 }
 
@@ -50,6 +52,8 @@ function normalizeAutoDeleteMode(raw) {
 }
 
 function fromServerForm(data) {
+    const historySplitEnabled = Boolean(data.history_split?.enabled)
+    const currentInputFileEnabled = historySplitEnabled ? false : (data.current_input_file?.enabled ?? true)
     return {
         admin: { jwt_expire_hours: Number(data.admin?.jwt_expire_hours || 24) },
         runtime: {
@@ -71,14 +75,25 @@ function fromServerForm(data) {
             mode: normalizeAutoDeleteMode(data.auto_delete),
         },
         history_split: {
-            enabled: true,
+            enabled: historySplitEnabled,
             trigger_after_turns: Number(data.history_split?.trigger_after_turns || 1),
         },
+        current_input_file: {
+            enabled: currentInputFileEnabled,
+            min_chars: Number(data.current_input_file?.min_chars ?? 0),
+        },
+        thinking_injection: {
+            enabled: data.thinking_injection?.enabled ?? true,
+            prompt: data.thinking_injection?.prompt || '',
+            default_prompt: data.thinking_injection?.default_prompt || '',
+        },
         model_aliases_text: JSON.stringify(data.model_aliases || {}, null, 2),
     }
 }
 
 function toServerPayload(form) {
+    const historySplitEnabled = Boolean(form.history_split?.enabled)
+    const currentInputFileEnabled = historySplitEnabled ? false : Boolean(form.current_input_file?.enabled)
     return {
         admin: { jwt_expire_hours: Number(form.admin.jwt_expire_hours) },
         runtime: {
@@ -94,9 +109,17 @@ function toServerPayload(form) {
         embeddings: { provider: String(form.embeddings.provider || '').trim() },
         auto_delete: { mode: normalizeAutoDeleteMode(form.auto_delete) },
         history_split: {
-            enabled: true,
+            enabled: historySplitEnabled,
             trigger_after_turns: Number(form.history_split?.trigger_after_turns || 1),
         },
+        current_input_file: {
+            enabled: currentInputFileEnabled,
+            min_chars: Number(form.current_input_file?.min_chars ?? 0),
+        },
+        thinking_injection: {
+            enabled: Boolean(form.thinking_injection?.enabled ?? true),
+            prompt: String(form.thinking_injection?.prompt || '').trim(),
+        },
     }
 }
 
diff --git a/webui/src/layout/DashboardShell.jsx b/webui/src/layout/DashboardShell.jsx
index b0542a6..a5c5f14 100644
--- a/webui/src/layout/DashboardShell.jsx
+++ b/webui/src/layout/DashboardShell.jsx
@@ -1,4 +1,4 @@
-import { useCallback, useEffect, useState } from 'react'
+import { Suspense, lazy, useCallback, useEffect, useState } from 'react'
 import { useLocation, useNavigate } from 'react-router-dom'
 import {
     LayoutDashboard,
@@ -11,20 +11,33 @@ import {
     Server,
     Users,
     Globe,
-    History
+    History,
+    Loader2
 } from 'lucide-react'
 import clsx from 'clsx'
 
-import AccountManagerContainer from '../features/account/AccountManagerContainer'
-import ApiTesterContainer from '../features/apiTester/ApiTesterContainer'
-import ChatHistoryContainer from '../features/chatHistory/ChatHistoryContainer'
-import BatchImport from '../components/BatchImport'
-import VercelSyncContainer from '../features/vercel/VercelSyncContainer'
-import SettingsContainer from '../features/settings/SettingsContainer'
-import ProxyManagerContainer from '../features/proxy/ProxyManagerContainer'
 import LanguageToggle from '../components/LanguageToggle'
 import { useI18n } from '../i18n'
 
+const AccountManagerContainer = lazy(() => import('../features/account/AccountManagerContainer'))
+const ApiTesterContainer = lazy(() => import('../features/apiTester/ApiTesterContainer'))
+const ChatHistoryContainer = lazy(() => import('../features/chatHistory/ChatHistoryContainer'))
+const BatchImport = lazy(() => import('../components/BatchImport'))
+const VercelSyncContainer = lazy(() => import('../features/vercel/VercelSyncContainer'))
+const SettingsContainer = lazy(() => import('../features/settings/SettingsContainer'))
+const ProxyManagerContainer = lazy(() => import('../features/proxy/ProxyManagerContainer'))
+
+function TabLoadingFallback({ label }) {
+    return (
+        <div className="min-h-[320px] rounded-lg border border-border bg-card flex items-center justify-center">
+            <div className="flex items-center gap-3 text-sm text-muted-foreground">
+                <Loader2 className="w-4 h-4 animate-spin" />
+                <span>{label}</span>
+            </div>
+        </div>
+    )
+}
+
 export default function DashboardShell({ token, onLogout, config, fetchConfig, showMessage, message, onForceLogout, isVercel }) {
     const { t } = useI18n()
     const location = useLocation()
@@ -47,6 +60,7 @@ export default function DashboardShell({ token, onLogout, config, fetchConfig, s
     const pathTab = routeSegments[0] || ''
     const activeTab = tabIds.has(pathTab) ? pathTab : 'accounts'
     const adminBasePath = pathSegments[0] === 'admin' ? '/admin' : ''
+    const activeNavItem = navItems.find(n => n.id === activeTab)
 
     const navigateToTab = useCallback((tabID) => {
         const nextPath = tabID === 'accounts'
@@ -232,10 +246,10 @@ export default function DashboardShell({ token, onLogout, config, fetchConfig, s
                     <div className="max-w-6xl mx-auto space-y-4 lg:space-y-6">
                         <div className="hidden lg:block mb-8">
                             <h1 className="text-3xl font-bold tracking-tight mb-2">
-                                {navItems.find(n => n.id === activeTab)?.label}
+                                {activeNavItem?.label}
                             </h1>
                             <p className="text-muted-foreground">
-                                {navItems.find(n => n.id === activeTab)?.description}
+                                {activeNavItem?.description}
                             </p>
                         </div>
 
@@ -251,7 +265,9 @@ export default function DashboardShell({ token, onLogout, config, fetchConfig, s
                         )}
 
                         <div className="animate-in fade-in duration-500">
-                            {renderTab()}
+                            <Suspense fallback={<TabLoadingFallback label={activeNavItem?.label || 'DS2API'} />}>
+                                {renderTab()}
+                            </Suspense>
                         </div>
                     </div>
                 </div>
diff --git a/webui/src/locales/en.json b/webui/src/locales/en.json
index 1be3f59..f82f4ac 100644
--- a/webui/src/locales/en.json
+++ b/webui/src/locales/en.json
@@ -224,7 +224,9 @@
             "flashSearch": "v4 Flash (with search)",
             "proSearch": "v4 Pro (with search)",
             "vision": "v4 Vision (thinking on by default)",
-            "visionSearch": "v4 Vision (with search)"
+            "visionSearch": "v4 Vision (with search)",
+            "generic": "Compatible model",
+            "noThinking": "thinking forced off"
         },
         "missingApiKey": "Please provide an API key.",
         "requestFailed": "Request failed.",
@@ -234,6 +236,11 @@
         "config": "Configuration",
         "modelLabel": "Model",
         "modelPickerHint": "Use the dropdown to pick a model. The list scrolls automatically.",
+        "loadingModels": "Loading models...",
+        "loadingModelsHint": "Fetching the available model list from /v1/models.",
+        "noModels": "No models available",
+        "noModelsHint": "The /v1/models endpoint did not return any usable models. Check the backend configuration or API status.",
+        "noModelsMessagePlaceholder": "No models are available right now, so the tester cannot send a request.",
         "streamMode": "Streaming",
         "accountSelector": "Account",
         "autoRandom": "🤖 Auto / Random",
@@ -264,6 +271,7 @@
         "deleteSuccess": "Conversation deleted.",
         "deleteFailed": "Failed to delete conversation.",
         "updateLimitFailed": "Failed to update retention limit.",
+        "disabledSuccess": "Conversation history saving disabled.",
         "limitUpdated": "Retention limit updated to {limit}",
         "listTitle": "History",
         "detailTitle": "Details",
@@ -373,12 +381,21 @@
         "behaviorTitle": "Behavior",
         "responsesTTL": "Responses store TTL (seconds)",
         "embeddingsProvider": "Embeddings provider",
-        "historySplitTitle": "History Split",
-        "historySplitDesc": "Pack earlier turns into an attached HISTORY.txt so the model reads the file first and then continues from the latest user request.",
-        "historySplitEnabled": "History split is forced on",
-        "historySplitEnabledDesc": "This capability is now enabled globally; legacy disabled values are ignored.",
+        "thinkingInjectionEnabled": "Thinking format injection",
+        "thinkingInjectionDesc": "Append a structured <think> checklist to the latest user message before prompt assembly.",
+        "thinkingInjectionPrompt": "Thinking format prompt",
+        "thinkingInjectionPromptHelp": "Leave empty to use the built-in default prompt shown as the input placeholder.",
+        "historySplitTitle": "Context Split",
+        "historySplitDesc": "Choose one context-splitting mode to avoid inlining very long prompts.",
+        "historySplitEnabled": "Turn split (second turn by default)",
+        "historySplitEnabledDesc": "After the configured user-turn threshold, pack earlier conversation into HISTORY.txt.",
         "historySplitTriggerAfterTurns": "Trigger threshold (user turns)",
         "historySplitTriggerHelp": "Default is 1, which means history split starts from the second turn.",
+        "currentInputFileEnabled": "Independent split (by size)",
+        "currentInputFileDesc": "After the character threshold is reached, upload the full context as a hidden context file and skip HISTORY.txt.",
+        "currentInputFileMinChars": "Current input threshold (characters)",
+        "currentInputFileHelp": "Default is 0, which uses independent split whenever there is input.",
+        "splitPassThroughHelp": "Turn split and independent split are mutually exclusive; choose at most one. If both are unchecked, requests pass through directly without uploading split context files.",
         "compatibilityTitle": "Compatibility",
         "compatibilityDesc": "Compatibility controls that keep stream output closer to the wire format or safer for the web UI.",
         "stripReferenceMarkers": "Strip [reference:N] markers",
diff --git a/webui/src/locales/zh.json b/webui/src/locales/zh.json
index 03abbc2..69f5a46 100644
--- a/webui/src/locales/zh.json
+++ b/webui/src/locales/zh.json
@@ -224,7 +224,9 @@
             "flashSearch": "v4 Flash（带搜索）",
             "proSearch": "v4 Pro（带搜索）",
             "vision": "v4 Vision（默认开启思考）",
-            "visionSearch": "v4 Vision（带搜索）"
+            "visionSearch": "v4 Vision（带搜索）",
+            "generic": "兼容模型",
+            "noThinking": "强制关闭思考"
         },
         "missingApiKey": "请提供 API 密钥",
         "requestFailed": "请求失败",
@@ -234,6 +236,11 @@
         "config": "配置",
         "modelLabel": "模型",
         "modelPickerHint": "使用下拉列表选择模型，长列表会自动滚动。",
+        "loadingModels": "正在加载模型...",
+        "loadingModelsHint": "正在从 /v1/models 拉取可用模型列表。",
+        "noModels": "没有可用模型",
+        "noModelsHint": "/v1/models 当前没有返回任何可用模型，请先检查后端配置或接口状态。",
+        "noModelsMessagePlaceholder": "当前没有可用模型，暂时无法发起测试。",
         "streamMode": "流式模式",
         "accountSelector": "选择账号",
         "autoRandom": "🤖 自动 / 随机",
@@ -264,6 +271,7 @@
         "deleteSuccess": "对话记录已删除",
         "deleteFailed": "删除对话记录失败",
         "updateLimitFailed": "更新保留条数失败",
+        "disabledSuccess": "已关闭对话历史记录",
         "limitUpdated": "保留条数已更新为 {limit}",
         "listTitle": "历史列表",
         "detailTitle": "对话详情",
@@ -373,12 +381,21 @@
         "behaviorTitle": "行为设置",
         "responsesTTL": "Responses 缓存 TTL（秒）",
         "embeddingsProvider": "Embeddings Provider",
-        "historySplitTitle": "历史拆分",
-        "historySplitDesc": "将更早的对话整理成 HISTORY.txt 上传，让模型优先读取历史文件，再结合最新一轮继续回答。",
-        "historySplitEnabled": "历史拆分已强制启用",
-        "historySplitEnabledDesc": "该能力现在全局开启；旧配置里的关闭值会被忽略。",
+        "thinkingInjectionEnabled": "思考格式注入",
+        "thinkingInjectionDesc": "在组装 prompt 前，将结构化 <think> 检查清单追加到最新用户消息末尾。",
+        "thinkingInjectionPrompt": "思考格式提示词",
+        "thinkingInjectionPromptHelp": "留空时使用内置默认提示词；默认内容会显示在输入框占位文本中。",
+        "historySplitTitle": "上下文拆分",
+        "historySplitDesc": "选择一种上下文拆分方式，减少超长 prompt 直接内联。",
+        "historySplitEnabled": "轮次拆分（默认第二轮）",
+        "historySplitEnabledDesc": "从配置的用户回合数之后，将更早的对话整理成 HISTORY.txt。",
         "historySplitTriggerAfterTurns": "触发阈值（用户回合数）",
         "historySplitTriggerHelp": "默认值为 1，表示从第二轮开始拆分历史。",
+        "currentInputFileEnabled": "独立拆分（按量）",
+        "currentInputFileDesc": "达到字符阈值后，将完整上下文上传为隐藏上下文文件，并跳过 HISTORY.txt。",
+        "currentInputFileMinChars": "当前输入阈值（字符数）",
+        "currentInputFileHelp": "默认 0，表示有输入时就使用独立拆分。",
+        "splitPassThroughHelp": "轮次拆分和独立拆分互斥，只能选择一种；如果都不勾选，请求会直接透传，不上传拆分上下文文件。",
         "compatibilityTitle": "兼容性设置",
         "compatibilityDesc": "用于控制输出格式兼容性，避免把模型原始流里的标记直接暴露到前端。",
         "stripReferenceMarkers": "移除 [reference:N] 标记",