Merge pull request #211 from CJackHwang/dev

Merge pull request #209 from CJackHwang/codex/investigate-command-output-loss-issue Preserve trailing newlines when stripping leaked CONTENT_FILTER and strengthen XML tool-call parsing
Merge pull request #212 from CJackHwang/codex/address-handling-of-upstream-vacuum-returns
2026-05-02 15:35:27 +08:00 · 2026-04-04 02:20:39 +08:00 · 2026-04-04 02:20:01 +08:00 · 2026-04-04 02:14:39 +08:00 · 2026-04-04 01:53:39 +08:00 · 2026-04-04 01:52:57 +08:00
223 changed files with 13275 additions and 3381 deletions
--- a/.env.example
+++ b/.env.example
@@ -1,93 +1,17 @@
-# DS2API environment template (Go runtime)
-# Copy this file to .env and adjust values.
-# Updated: 2026-02
-
-# ---------------------------------------------------------------
-# Runtime
-# ---------------------------------------------------------------
-# HTTP listen port (default: 5001)
+# DS2API runtime
 PORT=5001
-
-# Log level: DEBUG | INFO | WARN | ERROR
 LOG_LEVEL=INFO

-# Max concurrent inflight requests per account in managed-key mode.
-# Default: 2
-# Recommended client concurrency is calculated dynamically as:
-#   account_count * DS2API_ACCOUNT_MAX_INFLIGHT
-# So by default it is account_count * 2.
-# Requests beyond inflight slots enter a waiting queue first.
-# Default queue size equals recommended concurrency, so 429 starts after:
-#   account_count * DS2API_ACCOUNT_MAX_INFLIGHT * 2
-# Alias: DS2API_ACCOUNT_CONCURRENCY
-# DS2API_ACCOUNT_MAX_INFLIGHT=2
+# Admin authentication
+DS2API_ADMIN_KEY=change-me

-# Optional waiting queue size override for managed-key mode.
-# Default: recommended_concurrency (same as account_count * inflight_limit)
-# Alias: DS2API_ACCOUNT_QUEUE_SIZE
-# DS2API_ACCOUNT_MAX_QUEUE=10
+# Config loading (choose one)
+# 1) file-based config
+DS2API_CONFIG_PATH=/app/config.json
+# 2) inline JSON or Base64 JSON
+# DS2API_CONFIG_JSON=
+# 3) legacy compatibility alias
+# CONFIG_JSON=

-# ---------------------------------------------------------------
-# Admin auth
-# ---------------------------------------------------------------
-# Admin key for /admin login and protected admin APIs.
-# Default is "admin" when unset, but setting it explicitly is recommended.
-DS2API_ADMIN_KEY=admin
-
-# Optional JWT signing secret for admin token.
-# Defaults to DS2API_ADMIN_KEY when unset.
-# DS2API_JWT_SECRET=change-me
-
-# Optional admin JWT validity in hours (default: 24)
-# DS2API_JWT_EXPIRE_HOURS=24
-
-# ---------------------------------------------------------------
-# Config source (choose one)
-# ---------------------------------------------------------------
-# Option A: config file path (local/dev recommended)
-# DS2API_CONFIG_PATH=config.json
-
-# Option B: JSON string
-# DS2API_CONFIG_JSON={"keys":["your-api-key"],"accounts":[{"email":"user@example.com","password":"xxx","token":""}]}
-
-# Option C: Base64 encoded JSON (recommended for Vercel env var)
-# DS2API_CONFIG_JSON=eyJrZXlzIjpbInlvdXItYXBpLWtleSJdLCJhY2NvdW50cyI6W3siZW1haWwiOiJ1c2VyQGV4YW1wbGUuY29tIiwicGFzc3dvcmQiOiJ4eHgiLCJ0b2tlbiI6IiJ9XX0=
-#
-# Generate from local config.json:
-#   DS2API_CONFIG_JSON="$(base64 < config.json | tr -d '\n')"
-
-# ---------------------------------------------------------------
-# Paths (optional)
-# ---------------------------------------------------------------
-# WASM file used for PoW solving
-# DS2API_WASM_PATH=sha3_wasm_bg.7b9ca65ddd.wasm
-
-# Built admin static assets directory
-# DS2API_STATIC_ADMIN_DIR=static/admin
-
-# Auto-build WebUI on startup when static/admin is missing.
-# Default: enabled on local/Docker, disabled on Vercel.
-# DS2API_AUTO_BUILD_WEBUI=true
-
-# Internal auth secret used by the Vercel hybrid streaming path
-# (Go prepare endpoint <-> Node stream function).
-# Optional: falls back to DS2API_ADMIN_KEY when unset.
-# DS2API_VERCEL_INTERNAL_SECRET=change-me
-
-# Stream lease TTL seconds for Vercel hybrid streaming.
-# During this window, the managed account stays occupied until Node calls release.
-# Default: 900 (15 minutes)
-# DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS=900
-
-# ---------------------------------------------------------------
-# Vercel sync integration (optional)
-# ---------------------------------------------------------------
-# VERCEL_TOKEN=your-vercel-token
-# VERCEL_PROJECT_ID=prj_xxxxxxxxxxxx
-# VERCEL_TEAM_ID=team_xxxxxxxxxxxx
-
-# Optional: Vercel deployment protection bypass secret.
-# If deployment protection is enabled, DS2API will use this value as
-# x-vercel-protection-bypass for internal Node->Go calls on Vercel.
-# You can also use VERCEL_AUTOMATION_BYPASS_SECRET directly.
-# DS2API_VERCEL_PROTECTION_BYPASS=your-bypass-secret
+# Optional: static admin assets path
+# DS2API_STATIC_ADMIN_DIR=/app/static/admin
--- a/.github/workflows/quality-gates.yml
+++ b/.github/workflows/quality-gates.yml
@@ -24,7 +24,7 @@ jobs:
      - name: Setup Node
        uses: actions/setup-node@v4
        with:
-          node-version: "20"
+          node-version: "24"
          cache: "npm"
          cache-dependency-path: webui/package-lock.json

--- a/.github/workflows/release-artifacts.yml
+++ b/.github/workflows/release-artifacts.yml
@@ -32,7 +32,7 @@ jobs:
      - name: Setup Node
        uses: actions/setup-node@v4
        with:
-          node-version: "20"
+          node-version: "24"
          cache: "npm"
          cache-dependency-path: webui/package-lock.json

@@ -51,6 +51,10 @@ jobs:
        run: |
          set -euo pipefail
          TAG="${RELEASE_TAG}"
+          BUILD_VERSION="${TAG}"
+          if [ -z "${BUILD_VERSION}" ] && [ -f VERSION ]; then
+            BUILD_VERSION="$(cat VERSION | tr -d '[:space:]')"
+          fi
          mkdir -p dist

          targets=(
@@ -73,9 +77,9 @@ jobs:

            mkdir -p "${STAGE}/static"
            CGO_ENABLED=0 GOOS="${GOOS}" GOARCH="${GOARCH}" \
-              go build -trimpath -ldflags="-s -w" -o "${STAGE}/${BIN}" ./cmd/ds2api
+              go build -trimpath -ldflags="-s -w -X ds2api/internal/version.BuildVersion=${BUILD_VERSION}" -o "${STAGE}/${BIN}" ./cmd/ds2api

-            cp config.example.json .env.example sha3_wasm_bg.7b9ca65ddd.wasm LICENSE README.MD README.en.md "${STAGE}/"
+            cp config.example.json .env.example internal/deepseek/assets/sha3_wasm_bg.7b9ca65ddd.wasm LICENSE README.MD README.en.md "${STAGE}/"
            cp -R static/admin "${STAGE}/static/admin"

            if [ "${GOOS}" = "windows" ]; then
--- a/.github/workflows/release-dockerhub.yml
+++ b/.github/workflows/release-dockerhub.yml
@@ -123,5 +123,7 @@ jobs:
          labels: |
            org.opencontainers.image.version=${{ steps.next_version.outputs.new_version }}
            org.opencontainers.image.revision=${{ github.sha }}
+          build-args: |
+            BUILD_VERSION=${{ steps.next_version.outputs.new_tag }}
          cache-from: type=gha
          cache-to: type=gha,mode=max
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,128 +1,130 @@
-name: Release to Aliyun CR
-
-on:
-  workflow_dispatch:
-    inputs:
-      version_type:
-        description: '版本类型'
-        required: true
-        default: 'patch'
-        type: choice
-        options:
-          - patch
-          - minor
-          - major
-
-permissions:
-  contents: write
-
-jobs:
-  release:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v5
-        with:
-          fetch-depth: 0
-          token: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Get current version
-        id: get_version
-        run: |
-          LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0")
-          TAG_VERSION=${LATEST_TAG#v}
-
-          if [ -f VERSION ]; then
-            FILE_VERSION=$(cat VERSION | tr -d '[:space:]')
-          else
-            FILE_VERSION="0.0.0"
-          fi
-
-          function version_gt() { test "$(printf '%s\n' "$@" | sort -V | head -n 1)" != "$1"; }
-
-          if version_gt "$FILE_VERSION" "$TAG_VERSION"; then
-            VERSION="$FILE_VERSION"
-          else
-            VERSION="$TAG_VERSION"
-          fi
-
-          echo "Current version: $VERSION"
-          echo "current_version=$VERSION" >> $GITHUB_OUTPUT
-
-      - name: Calculate next version
-        id: next_version
-        env:
-          VERSION_TYPE: ${{ github.event.inputs.version_type }}
-        run: |
-          VERSION="${{ steps.get_version.outputs.current_version }}"
-          BASE_VERSION=$(echo "$VERSION" | sed 's/-.*$//')
-
-          IFS='.' read -r -a version_parts <<< "$BASE_VERSION"
-          MAJOR="${version_parts[0]:-0}"
-          MINOR="${version_parts[1]:-0}"
-          PATCH="${version_parts[2]:-0}"
-
-          case "$VERSION_TYPE" in
-            major)
-              NEW_VERSION="$((MAJOR + 1)).0.0"
-              ;;
-            minor)
-              NEW_VERSION="${MAJOR}.$((MINOR + 1)).0"
-              ;;
-            *)
-              NEW_VERSION="${MAJOR}.${MINOR}.$((PATCH + 1))"
-              ;;
-          esac
-
-          echo "New version: $NEW_VERSION"
-          echo "new_version=$NEW_VERSION" >> $GITHUB_OUTPUT
-          echo "new_tag=v$NEW_VERSION" >> $GITHUB_OUTPUT
-
-      - name: Update VERSION file
-        run: |
-          echo "${{ steps.next_version.outputs.new_version }}" > VERSION
-
-      - name: Commit VERSION and create tag
-        run: |
-          git config user.name "github-actions[bot]"
-          git config user.email "github-actions[bot]@users.noreply.github.com"
-
-          git add VERSION
-          if ! git diff --cached --quiet; then
-            git commit -m "chore: bump version to ${{ steps.next_version.outputs.new_tag }} [skip ci]"
-          fi
-
-          NEW_TAG="${{ steps.next_version.outputs.new_tag }}"
-          git tag -a "$NEW_TAG" -m "Release $NEW_TAG"
-          git push origin HEAD:main "$NEW_TAG"
-
-      # Docker 构建并推送到阿里云
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Log in to Aliyun Container Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ secrets.ALIYUN_REGISTRY }}
-          username: ${{ secrets.ALIYUN_REGISTRY_USER }}
-          password: ${{ secrets.ALIYUN_REGISTRY_PASSWORD }}
-
-      - name: Build and push Docker image
-        uses: docker/build-push-action@v6
-        with:
-          context: .
-          file: ./Dockerfile
-          platforms: linux/amd64,linux/arm64
-          push: true
-          tags: |
-            ${{ secrets.ALIYUN_REGISTRY }}/${{ secrets.ALIYUN_REGISTRY_NAMESPACE }}/ds2api:${{ steps.next_version.outputs.new_tag }}
-            ${{ secrets.ALIYUN_REGISTRY }}/${{ secrets.ALIYUN_REGISTRY_NAMESPACE }}/ds2api:${{ steps.next_version.outputs.new_version }}
-            ${{ secrets.ALIYUN_REGISTRY }}/${{ secrets.ALIYUN_REGISTRY_NAMESPACE }}/ds2api:latest
-          labels: |
-            org.opencontainers.image.version=${{ steps.next_version.outputs.new_version }}
-            org.opencontainers.image.revision=${{ github.sha }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
+name: Release to Aliyun CR
+
+on:
+  workflow_dispatch:
+    inputs:
+      version_type:
+        description: '版本类型'
+        required: true
+        default: 'patch'
+        type: choice
+        options:
+          - patch
+          - minor
+          - major
+
+permissions:
+  contents: write
+
+jobs:
+  release:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Get current version
+        id: get_version
+        run: |
+          LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0")
+          TAG_VERSION=${LATEST_TAG#v}
+
+          if [ -f VERSION ]; then
+            FILE_VERSION=$(cat VERSION | tr -d '[:space:]')
+          else
+            FILE_VERSION="0.0.0"
+          fi
+
+          function version_gt() { test "$(printf '%s\n' "$@" | sort -V | head -n 1)" != "$1"; }
+
+          if version_gt "$FILE_VERSION" "$TAG_VERSION"; then
+            VERSION="$FILE_VERSION"
+          else
+            VERSION="$TAG_VERSION"
+          fi
+
+          echo "Current version: $VERSION"
+          echo "current_version=$VERSION" >> $GITHUB_OUTPUT
+
+      - name: Calculate next version
+        id: next_version
+        env:
+          VERSION_TYPE: ${{ github.event.inputs.version_type }}
+        run: |
+          VERSION="${{ steps.get_version.outputs.current_version }}"
+          BASE_VERSION=$(echo "$VERSION" | sed 's/-.*$//')
+
+          IFS='.' read -r -a version_parts <<< "$BASE_VERSION"
+          MAJOR="${version_parts[0]:-0}"
+          MINOR="${version_parts[1]:-0}"
+          PATCH="${version_parts[2]:-0}"
+
+          case "$VERSION_TYPE" in
+            major)
+              NEW_VERSION="$((MAJOR + 1)).0.0"
+              ;;
+            minor)
+              NEW_VERSION="${MAJOR}.$((MINOR + 1)).0"
+              ;;
+            *)
+              NEW_VERSION="${MAJOR}.${MINOR}.$((PATCH + 1))"
+              ;;
+          esac
+
+          echo "New version: $NEW_VERSION"
+          echo "new_version=$NEW_VERSION" >> $GITHUB_OUTPUT
+          echo "new_tag=v$NEW_VERSION" >> $GITHUB_OUTPUT
+
+      - name: Update VERSION file
+        run: |
+          echo "${{ steps.next_version.outputs.new_version }}" > VERSION
+
+      - name: Commit VERSION and create tag
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+
+          git add VERSION
+          if ! git diff --cached --quiet; then
+            git commit -m "chore: bump version to ${{ steps.next_version.outputs.new_tag }} [skip ci]"
+          fi
+
+          NEW_TAG="${{ steps.next_version.outputs.new_tag }}"
+          git tag -a "$NEW_TAG" -m "Release $NEW_TAG"
+          git push origin HEAD:main "$NEW_TAG"
+
+      # Docker 构建并推送到阿里云
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to Aliyun Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ secrets.ALIYUN_REGISTRY }}
+          username: ${{ secrets.ALIYUN_REGISTRY_USER }}
+          password: ${{ secrets.ALIYUN_REGISTRY_PASSWORD }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: ./Dockerfile
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: |
+            ${{ secrets.ALIYUN_REGISTRY }}/${{ secrets.ALIYUN_REGISTRY_NAMESPACE }}/ds2api:${{ steps.next_version.outputs.new_tag }}
+            ${{ secrets.ALIYUN_REGISTRY }}/${{ secrets.ALIYUN_REGISTRY_NAMESPACE }}/ds2api:${{ steps.next_version.outputs.new_version }}
+            ${{ secrets.ALIYUN_REGISTRY }}/${{ secrets.ALIYUN_REGISTRY_NAMESPACE }}/ds2api:latest
+          labels: |
+            org.opencontainers.image.version=${{ steps.next_version.outputs.new_version }}
+            org.opencontainers.image.revision=${{ github.sha }}
+          build-args: |
+            BUILD_VERSION=${{ steps.next_version.outputs.new_tag }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
--- a/API.en.md
+++ b/API.en.md
@@ -31,6 +31,13 @@ This document describes the actual behavior of the current Go codebase.
 | Health probes | `GET /healthz`, `GET /readyz` |
 | CORS | Enabled (`Access-Control-Allow-Origin: *`, allows `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Vercel-Protection-Bypass`) |

+### 3.0 Adapter-Layer Notes
+
+- OpenAI / Claude / Gemini protocols are now mounted on one shared `chi` router tree assembled in `internal/server/router.go`.
+- Adapter responsibilities are streamlined to: **request normalization → DeepSeek invocation → protocol-shaped rendering**, reducing legacy split-logic paths.
+- Tool-calling semantics are aligned between Go and Node runtime: structured parsing first (JSON/XML/invoke/markup), plus stream-time anti-leak filtering.
+- `Admin API` separates static config from runtime policy: `/admin/config*` for configuration state, `/admin/settings*` for runtime behavior.
+
 ---

 ## Configuration Best Practice
@@ -46,6 +53,7 @@ Use it per deployment mode:

 - Local run: read `config.json` directly
 - Docker / Vercel: generate Base64 from `config.json`, then set `DS2API_CONFIG_JSON`
+- Compatibility note: `DS2API_CONFIG_JSON` may also contain raw JSON directly; `CONFIG_JSON` is the legacy fallback variable

 ```bash
 DS2API_CONFIG_JSON="$(base64 < config.json | tr -d '\n')"
@@ -65,6 +73,7 @@ Two header formats accepted:
 | --- | --- |
 | Bearer Token | `Authorization: Bearer <token>` |
 | API Key Header | `x-api-key: <token>` (no `Bearer` prefix) |
+| Gemini-compatible | `x-goog-api-key: <token>` or `?key=<token>` / `?api_key=<token>` |

 **Auth behavior**:

@@ -72,6 +81,7 @@ Two header formats accepted:
 - Token is not in `config.keys` → **Direct token mode**: treated as a DeepSeek token directly

 **Optional header**: `X-Ds2-Target-Account: <email_or_mobile>` — Pin a specific managed account.
+Gemini-compatible clients can also send `x-goog-api-key`, `?key=`, or `?api_key=` as the caller credential source.

 ### Admin Endpoints (`/admin/*`)

@@ -88,7 +98,9 @@ Two header formats accepted:
 | Method | Path | Auth | Description |
 | --- | --- | --- | --- |
 | GET | `/healthz` | None | Liveness probe |
+| HEAD | `/healthz` | None | Liveness probe (no body) |
 | GET | `/readyz` | None | Readiness probe |
+| HEAD | `/readyz` | None | Readiness probe (no body) |
 | GET | `/v1/models` | None | OpenAI model list |
 | GET | `/v1/models/{id}` | None | OpenAI single-model query (alias accepted) |
 | POST | `/v1/chat/completions` | Business | OpenAI chat completions |
@@ -124,13 +136,16 @@ Two header formats accepted:
 | GET | `/admin/queue/status` | Admin | Account queue status |
 | POST | `/admin/accounts/test` | Admin | Test one account |
 | POST | `/admin/accounts/test-all` | Admin | Test all accounts |
+| POST | `/admin/accounts/sessions/delete-all` | Admin | Delete all sessions for one account |
 | POST | `/admin/import` | Admin | Batch import keys/accounts |
 | POST | `/admin/test` | Admin | Test API through service |
 | POST | `/admin/vercel/sync` | Admin | Sync config to Vercel |
 | GET | `/admin/vercel/status` | Admin | Vercel sync status |
+| POST | `/admin/vercel/status` | Admin | Vercel sync status / draft compare |
 | GET | `/admin/export` | Admin | Export config JSON/Base64 |
 | GET | `/admin/dev/captures` | Admin | Read local packet-capture entries |
 | DELETE | `/admin/dev/captures` | Admin | Clear local packet-capture entries |
+| GET | `/admin/version` | Admin | Check current version and latest Release |

 ---

@@ -580,6 +595,10 @@ Returns sanitized config.
 ```json
 {
  "keys": ["k1", "k2"],
+  "env_backed": false,
+  "env_source_present": true,
+  "env_writeback_enabled": true,
+  "config_path": "/data/config.json",
  "accounts": [
    {
      "identifier": "user@example.com",
@@ -599,7 +618,7 @@ Returns sanitized config.

 ### `POST /admin/config`

-Updatable fields: `keys`, `accounts`, `claude_mapping`.
+Only updates `keys`, `accounts`, and `claude_mapping`.

 **Request**:

@@ -620,23 +639,27 @@ Updatable fields: `keys`, `accounts`, `claude_mapping`.

 Reads runtime settings and status, including:

- `admin` (JWT expiry, default-password warning, etc.)
- `runtime` (`account_max_inflight`, `account_max_queue`, `global_max_inflight`)
- `toolcall` / `responses` / `embeddings`
+- `success`
+- `admin` (`has_password_hash`, `jwt_expire_hours`, `jwt_valid_after_unix`, `default_password_warning`)
+- `runtime` (`account_max_inflight`, `account_max_queue`, `global_max_inflight`, `token_refresh_interval_hours`)
+- `responses` / `embeddings`
+- `auto_delete` (`sessions`)
 - `claude_mapping` / `model_aliases`
 - `env_backed`, `needs_vercel_sync`
+- `toolcall` policy is fixed to `feature_match + high` and is no longer returned or editable via settings

 ### `PUT /admin/settings`

 Hot-updates runtime settings. Supported fields:

 - `admin.jwt_expire_hours`
- `runtime.account_max_inflight` / `runtime.account_max_queue` / `runtime.global_max_inflight`
- `toolcall.mode` / `toolcall.early_emit_confidence`
+- `runtime.account_max_inflight` / `runtime.account_max_queue` / `runtime.global_max_inflight` / `runtime.token_refresh_interval_hours`
 - `responses.store_ttl_seconds`
 - `embeddings.provider`
+- `auto_delete.sessions`
 - `claude_mapping`
 - `model_aliases`
+- `toolcall` policy is fixed and is no longer writable through settings

 ### `POST /admin/settings/password`

@@ -648,6 +671,8 @@ Request example:
 {"new_password":"your-new-password"}
 ```

+It also accepts `{"password":"your-new-password"}`.
+
 ### `POST /admin/config/import`

 Imports full config with:
@@ -656,6 +681,8 @@ Imports full config with:
 - `mode=replace`

 The request can send config directly, or wrapped as `{"config": {...}, "mode":"merge"}`.
+Query params `?mode=merge` / `?mode=replace` are also supported.
+Import accepts `keys`, `accounts`, `claude_mapping` / `claude_model_mapping`, `model_aliases`, `admin`, `runtime`, `responses`, `embeddings`, and `auto_delete`; legacy `toolcall` fields are ignored.

 ### `GET /admin/config/export`

@@ -681,6 +708,7 @@ Exports full config in three forms: `config`, `json`, and `base64`.
 | --- | --- | --- |
 | `page` | `1` | ≥ 1 |
 | `page_size` | `10` | 1–100 |
+| `q` | empty | Filter by identifier / email / mobile |

 **Response**:

@@ -693,7 +721,8 @@ Exports full config in three forms: `config`, `json`, and `base64`.
      "mobile": "",
      "has_password": true,
      "has_token": true,
-      "token_preview": "abc..."
+      "token_preview": "abc...",
+      "test_status": "ok"
    }
  ],
  "total": 25,
@@ -703,6 +732,8 @@ Exports full config in three forms: `config`, `json`, and `base64`.
 }
 ```

+Returned items also include `test_status`, usually `ok` or `failed`.
+
 ### `POST /admin/accounts`

 ```json
@@ -755,10 +786,14 @@ Exports full config in three forms: `config`, `json`, and `base64`.
  "success": true,
  "response_time": 1240,
  "message": "API test successful (session creation only)",
-  "model": "deepseek-chat"
+  "model": "deepseek-chat",
+  "session_count": 0,
+  "config_writable": true
 }
 ```

+If a `message` is provided, `thinking` may also be included when the upstream response carries reasoning text.
+
 ### `POST /admin/accounts/test-all`

 Optional request field: `model`.
@@ -772,6 +807,25 @@ Optional request field: `model`.
 }
 ```

+The internal concurrency limit is currently fixed at 5.
+
+### `POST /admin/accounts/sessions/delete-all`
+
+Deletes all DeepSeek sessions for a specific account. Request example:
+
+```json
+{"identifier":"user@example.com"}
+```
+
+Response:
+
+```json
+{"success": true, "message": "删除成功"}
+```
+
+If the account is missing or deletion fails, `success` becomes `false` and `message` contains the error.
+The current handler returns the Chinese literal `删除成功` on success.
+
 ### `POST /admin/import`

 Batch import keys and accounts.
@@ -849,16 +903,25 @@ Or manual deploy required:
 }
 ```

+Failed account checks are returned in `failed_accounts`, and any saved Vercel credentials are returned in `saved_credentials`.
+
 ### `GET /admin/vercel/status`

 ```json
 {
  "synced": true,
  "last_sync_time": 1738400000,
-  "has_synced_before": true
+  "has_synced_before": true,
+  "env_backed": false,
+  "config_hash": "....",
+  "last_synced_hash": "....",
+  "draft_hash": "....",
+  "draft_differs": false
 }
 ```

+`POST /admin/vercel/status` can also accept `config_override` to compare a draft config against the current synced config.
+
 ### `GET /admin/export`

 ```json
@@ -868,6 +931,29 @@ Or manual deploy required:
 }
 ```

+This is the same payload as `GET /admin/config/export`, just with a shorter path.
+
+### `GET /admin/version`
+
+Checks the current build version and the latest GitHub Release:
+
+```json
+{
+  "success": true,
+  "current_version": "3.0.0",
+  "current_tag": "v3.0.0",
+  "source": "file:VERSION",
+  "checked_at": "2026-03-29T00:00:00Z",
+  "latest_tag": "v3.0.0",
+  "latest_version": "3.0.0",
+  "release_url": "https://github.com/CJackHwang/ds2api/releases/tag/v3.0.0",
+  "published_at": "2026-03-28T12:00:00Z",
+  "has_update": false
+}
+```
+
+If GitHub API access fails, the response includes `check_error` while still returning HTTP 200.
+
 ### `GET /admin/dev/captures`

 Reads local packet-capture status and recent entries (Admin auth required):
--- a/API.md
+++ b/API.md
@@ -31,6 +31,13 @@
 | 健康检查 | `GET /healthz`、`GET /readyz` |
 | CORS | 已启用（`Access-Control-Allow-Origin: *`，允许 `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Vercel-Protection-Bypass`） |

+### 3.0 接口适配层说明
+
+- OpenAI / Claude / Gemini 三套协议已统一挂在同一 `chi` 路由树上，由 `internal/server/router.go` 负责装配。
+- 适配器层职责收敛为：**请求归一化 → DeepSeek 调用 → 协议形态渲染**，减少历史版本中“同能力多处实现”的分叉。
+- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致：优先结构化解析（JSON/XML/invoke/markup），并在流式场景执行防泄漏筛分。
+- `Admin API` 将配置与运行时策略分开：`/admin/config*` 管静态配置，`/admin/settings*` 管运行时行为。
+
 ---

 ## 配置最佳实践
@@ -46,6 +53,7 @@ cp config.example.json config.json

 - 本地运行：直接读取 `config.json`
 - Docker / Vercel：从 `config.json` 生成 Base64，填入 `DS2API_CONFIG_JSON`
+- 兼容写法：`DS2API_CONFIG_JSON` 也可直接填原始 JSON；`CONFIG_JSON` 是旧版兼容回退变量

 ```bash
 DS2API_CONFIG_JSON="$(base64 < config.json | tr -d '\n')"
@@ -65,6 +73,7 @@ Vercel 一键部署可先只填 `DS2API_ADMIN_KEY`，部署后在 `/admin` 导
 | --- | --- |
 | Bearer Token | `Authorization: Bearer <token>` |
 | API Key Header | `x-api-key: <token>`（无 `Bearer` 前缀） |
+| Gemini 兼容 | `x-goog-api-key: <token>` 或 `?key=<token>` / `?api_key=<token>` |

 **鉴权行为**：

@@ -72,6 +81,7 @@ Vercel 一键部署可先只填 `DS2API_ADMIN_KEY`，部署后在 `/admin` 导
 - token 不在 `config.keys` 中 → **直通 token 模式**，直接作为 DeepSeek token 使用

 **可选请求头**：`X-Ds2-Target-Account: <email_or_mobile>` — 指定使用某个托管账号。
+Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=` 作为凭据来源。

 ### Admin 接口（`/admin/*`）

@@ -88,7 +98,9 @@ Vercel 一键部署可先只填 `DS2API_ADMIN_KEY`，部署后在 `/admin` 导
 | 方法 | 路径 | 鉴权 | 说明 |
 | --- | --- | --- | --- |
 | GET | `/healthz` | 无 | 存活探针 |
+| HEAD | `/healthz` | 无 | 存活探针（无响应体） |
 | GET | `/readyz` | 无 | 就绪探针 |
+| HEAD | `/readyz` | 无 | 就绪探针（无响应体） |
 | GET | `/v1/models` | 无 | OpenAI 模型列表 |
 | GET | `/v1/models/{id}` | 无 | OpenAI 单模型查询（支持 alias 入参） |
 | POST | `/v1/chat/completions` | 业务 | OpenAI 对话补全 |
@@ -124,13 +136,16 @@ Vercel 一键部署可先只填 `DS2API_ADMIN_KEY`，部署后在 `/admin` 导
 | GET | `/admin/queue/status` | Admin | 账号队列状态 |
 | POST | `/admin/accounts/test` | Admin | 测试单个账号 |
 | POST | `/admin/accounts/test-all` | Admin | 测试全部账号 |
+| POST | `/admin/accounts/sessions/delete-all` | Admin | 删除某账号的全部会话 |
 | POST | `/admin/import` | Admin | 批量导入 keys/accounts |
 | POST | `/admin/test` | Admin | 测试当前 API 可用性 |
 | POST | `/admin/vercel/sync` | Admin | 同步配置到 Vercel |
 | GET | `/admin/vercel/status` | Admin | Vercel 同步状态 |
+| POST | `/admin/vercel/status` | Admin | Vercel 同步状态 / 草稿对比 |
 | GET | `/admin/export` | Admin | 导出配置 JSON/Base64 |
 | GET | `/admin/dev/captures` | Admin | 查看本地抓包记录 |
 | DELETE | `/admin/dev/captures` | Admin | 清空本地抓包记录 |
+| GET | `/admin/version` | Admin | 查询当前版本与最新 Release |

 ---

@@ -286,7 +301,8 @@ data: [DONE]

 补充说明：

- **非代码块上下文**下，工具 JSON 即使与普通文本混合，也会按特征识别并产出可执行 tool call（前后普通文本仍可透传）。
+- **非代码块上下文**下，工具负载即使与普通文本混合，也会按特征识别并产出可执行 tool call（前后普通文本仍可透传）。
+- 解析器以 XML/Markup 为最高优先级，并兼容 JSON、ANTML、text-kv 等格式输入；最终按客户端协议转译为对应 tool call 结构（OpenAI/Claude/Gemini）。
 - Markdown fenced code block（例如 ```json ... ```）中的 `tool_calls` 仅视为示例文本，不会被执行。

 ---
@@ -346,7 +362,8 @@ data: [DONE]
 ```

 流式场景下若 `tool_choice=required` 违规，会返回 `response.failed` 后结束（不再发送 `response.completed`）。
-未在 `tools` 声明中的工具名会被严格拒绝，不会作为有效 tool call 下发。
+
+> 当前版本说明：解析层默认“尽量提取结构化 tool call”，未启用基于 `tools` allow-list 的硬拒绝；是否执行仍应由你的工具执行器做白名单校验。

 ### `GET /v1/responses/{response_id}`

@@ -492,6 +509,8 @@ data: {"type":"message_stop"}
 }
 ```

+返回项还会包含 `test_status`，当前值通常为 `ok` 或 `failed`。
+
 ---

 ## Gemini 兼容接口
@@ -585,6 +604,10 @@ data: {"type":"message_stop"}
 ```json
 {
  "keys": ["k1", "k2"],
+  "env_backed": false,
+  "env_source_present": true,
+  "env_writeback_enabled": true,
+  "config_path": "/data/config.json",
  "accounts": [
    {
      "identifier": "user@example.com",
@@ -604,7 +627,7 @@ data: {"type":"message_stop"}

 ### `POST /admin/config`

-可更新 `keys`、`accounts`、`claude_mapping`。
+只更新 `keys`、`accounts`、`claude_mapping`。

 **请求**：

@@ -625,23 +648,27 @@ data: {"type":"message_stop"}

 读取运行时设置与状态，返回：

- `admin`（JWT 过期、默认密码告警等）
- `runtime`（`account_max_inflight`、`account_max_queue`、`global_max_inflight`）
- `toolcall` / `responses` / `embeddings`
+- `success`
+- `admin`（`has_password_hash`、`jwt_expire_hours`、`jwt_valid_after_unix`、`default_password_warning`）
+- `runtime`（`account_max_inflight`、`account_max_queue`、`global_max_inflight`、`token_refresh_interval_hours`）
+- `responses` / `embeddings`
+- `auto_delete`（`sessions`）
 - `claude_mapping` / `model_aliases`
 - `env_backed`、`needs_vercel_sync`
+- `toolcall` 策略已固定为 `feature_match + high`，不再通过 settings 返回或修改

 ### `PUT /admin/settings`

 热更新运行时设置。支持更新：

 - `admin.jwt_expire_hours`
- `runtime.account_max_inflight` / `runtime.account_max_queue` / `runtime.global_max_inflight`
- `toolcall.mode` / `toolcall.early_emit_confidence`
+- `runtime.account_max_inflight` / `runtime.account_max_queue` / `runtime.global_max_inflight` / `runtime.token_refresh_interval_hours`
 - `responses.store_ttl_seconds`
 - `embeddings.provider`
+- `auto_delete.sessions`
 - `claude_mapping`
 - `model_aliases`
+- `toolcall` 策略已固定，不再作为可写入字段

 ### `POST /admin/settings/password`

@@ -653,6 +680,8 @@ data: {"type":"message_stop"}
 {"new_password":"your-new-password"}
 ```

+也兼容 `{"password":"your-new-password"}`。
+
 ### `POST /admin/config/import`

 导入完整配置，支持：
@@ -661,6 +690,8 @@ data: {"type":"message_stop"}
 - `mode=replace`

 请求可直接传配置对象，或使用 `{"config": {...}, "mode":"merge"}` 包裹格式。
+也支持在查询参数里传 `?mode=merge` / `?mode=replace`。
+导入时会接受 `keys`、`accounts`、`claude_mapping` / `claude_model_mapping`、`model_aliases`、`admin`、`runtime`、`responses`、`embeddings`、`auto_delete` 等字段；`toolcall` 相关字段会被忽略。

 ### `GET /admin/config/export`

@@ -686,6 +717,7 @@ data: {"type":"message_stop"}
 | --- | --- | --- |
 | `page` | `1` | ≥ 1 |
 | `page_size` | `10` | 1–100 |
+| `q` | 空 | 按 identifier / email / mobile 过滤 |

 **响应**：

@@ -698,7 +730,8 @@ data: {"type":"message_stop"}
      "mobile": "",
      "has_password": true,
      "has_token": true,
-      "token_preview": "abc..."
+      "token_preview": "abc...",
+      "test_status": "ok"
    }
  ],
  "total": 25,
@@ -760,10 +793,14 @@ data: {"type":"message_stop"}
  "success": true,
  "response_time": 1240,
  "message": "API 测试成功（仅会话创建）",
-  "model": "deepseek-chat"
+  "model": "deepseek-chat",
+  "session_count": 0,
+  "config_writable": true
 }
 ```

+如果传入 `message`，还会附带 `thinking`（当上游返回思考内容时）。
+
 ### `POST /admin/accounts/test-all`

 可选请求字段：`model`
@@ -777,6 +814,24 @@ data: {"type":"message_stop"}
 }
 ```

+内部并发上限当前固定为 5。
+
+### `POST /admin/accounts/sessions/delete-all`
+
+清空指定账号的所有 DeepSeek 会话。请求体示例：
+
+```json
+{"identifier":"user@example.com"}
+```
+
+响应：
+
+```json
+{"success": true, "message": "删除成功"}
+```
+
+如果账号不存在或删除失败，`success` 会是 `false`，`message` 会返回错误原因。
+
 ### `POST /admin/import`

 批量导入 keys 与 accounts。
@@ -854,16 +909,25 @@ data: {"type":"message_stop"}
 }
 ```

+失败校验的账号会通过 `failed_accounts` 返回；成功保存到 Vercel 的凭据会通过 `saved_credentials` 返回。
+
 ### `GET /admin/vercel/status`

 ```json
 {
  "synced": true,
  "last_sync_time": 1738400000,
-  "has_synced_before": true
+  "has_synced_before": true,
+  "env_backed": false,
+  "config_hash": "....",
+  "last_synced_hash": "....",
+  "draft_hash": "....",
+  "draft_differs": false
 }
 ```

+`POST /admin/vercel/status` 还可以携带 `config_override`，用于对比“草稿配置”和当前已同步配置。
+
 ### `GET /admin/export`

 ```json
@@ -873,6 +937,29 @@ data: {"type":"message_stop"}
 }
 ```

+该接口与 `GET /admin/config/export` 返回相同内容，只是路径更短。
+
+### `GET /admin/version`
+
+查询当前构建版本与 GitHub 最新 Release：
+
+```json
+{
+  "success": true,
+  "current_version": "3.0.0",
+  "current_tag": "v3.0.0",
+  "source": "file:VERSION",
+  "checked_at": "2026-03-29T00:00:00Z",
+  "latest_tag": "v3.0.0",
+  "latest_version": "3.0.0",
+  "release_url": "https://github.com/CJackHwang/ds2api/releases/tag/v3.0.0",
+  "published_at": "2026-03-28T12:00:00Z",
+  "has_update": false
+}
+```
+
+如果 GitHub API 不可用，响应里会额外包含 `check_error`，但 HTTP 状态仍为 200。
+
 ### `GET /admin/dev/captures`

 查看本地抓包状态与最近记录（需 Admin 鉴权）：
--- a/15
+++ b/15
@@ -1,4 +1,4 @@
-FROM node:20 AS webui-builder
+FROM node:24 AS webui-builder

 WORKDIR /app/webui
 COPY webui/package.json webui/package-lock.json ./
@@ -6,30 +6,35 @@ RUN npm ci
 COPY webui ./
 RUN npm run build

-FROM golang:1.24 AS go-builder
+FROM golang:1.26 AS go-builder
 WORKDIR /app
 ARG TARGETOS
 ARG TARGETARCH
+ARG BUILD_VERSION
 COPY go.mod go.sum* ./
 RUN go mod download
 COPY . .
 RUN set -eux; \
    GOOS="${TARGETOS:-$(go env GOOS)}"; \
    GOARCH="${TARGETARCH:-$(go env GOARCH)}"; \
-    CGO_ENABLED=0 GOOS="${GOOS}" GOARCH="${GOARCH}" go build -o /out/ds2api ./cmd/ds2api
+    BUILD_VERSION_RESOLVED="${BUILD_VERSION:-}"; \
+    if [ -z "${BUILD_VERSION_RESOLVED}" ] && [ -f VERSION ]; then BUILD_VERSION_RESOLVED="$(cat VERSION | tr -d "[:space:]")"; fi; \
+    CGO_ENABLED=0 GOOS="${GOOS}" GOARCH="${GOARCH}" go build -ldflags="-s -w -X ds2api/internal/version.BuildVersion=${BUILD_VERSION_RESOLVED}" -o /out/ds2api ./cmd/ds2api

 FROM busybox:1.36.1-musl AS busybox-tools

 FROM debian:bookworm-slim AS runtime-base
 WORKDIR /app
-COPY --from=go-builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
 COPY --from=busybox-tools /bin/busybox /usr/local/bin/busybox
 EXPOSE 5001
 CMD ["/usr/local/bin/ds2api"]

 FROM runtime-base AS runtime-from-source
 COPY --from=go-builder /out/ds2api /usr/local/bin/ds2api
-COPY --from=go-builder /app/sha3_wasm_bg.7b9ca65ddd.wasm /app/sha3_wasm_bg.7b9ca65ddd.wasm
+COPY --from=go-builder /app/internal/deepseek/assets/sha3_wasm_bg.7b9ca65ddd.wasm /app/sha3_wasm_bg.7b9ca65ddd.wasm
 COPY --from=go-builder /app/config.example.json /app/config.example.json
 COPY --from=webui-builder /app/static/admin /app/static/admin

--- a/README.MD
+++ b/README.MD
@@ -8,7 +8,7 @@
 ![Stars](https://img.shields.io/github/stars/CJackHwang/ds2api.svg)
 ![Forks](https://img.shields.io/github/forks/CJackHwang/ds2api.svg)
 [![Release](https://img.shields.io/github/v/release/CJackHwang/ds2api?display_name=tag)](https://github.com/CJackHwang/ds2api/releases)
-[![Docker](https://img.shields.io/badge/docker-ready-blue.svg)](DEPLOY.md)
+[![Docker](https://img.shields.io/badge/docker-ready-blue.svg)](docs/DEPLOY.md)
 [![Deploy on Zeabur](https://zeabur.com/button.svg)](https://zeabur.com/templates/L4CFHP)
 [![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https://github.com/CJackHwang/ds2api)

@@ -16,47 +16,76 @@

 将 DeepSeek Web 对话能力转换为 OpenAI、Claude 与 Gemini 兼容 API。后端为 **Go 全量实现**，前端为 React WebUI 管理台（源码在 `webui/`，部署时自动构建到 `static/admin`）。

+> **重要免责声明**
+>
+> 本仓库仅供学习、研究、个人实验和内部验证使用，不提供任何形式的商业授权、适用性保证或结果保证。
+>
+> 作者及仓库维护者不对因使用、修改、分发、部署或依赖本项目而产生的任何直接或间接损失、账号封禁、数据丢失、法律风险或第三方索赔负责。
+>
+> 请勿将本项目用于违反服务条款、协议、法律法规或平台规则的场景。商业使用前请自行确认 `LICENSE`、相关协议以及你是否获得了作者的书面许可。
+
 ## 架构概览

 ```mermaid
 flowchart LR
-    Client["🖥️ 客户端\n(OpenAI / Claude / Gemini 兼容)"]
+    Client["🖥️ 客户端 / SDK\n(OpenAI / Claude / Gemini)"]
+    Upstream["☁️ DeepSeek API"]

-    subgraph DS2API["DS2API 服务"]
-        direction TB
-        CORS["CORS 中间件"]
-        Auth["🔐 鉴权中间件"]
+    subgraph DS2API["DS2API 3.x（统一 OpenAI 内核）"]
+        Router["chi Router + 中间件\n(RequestID / RealIP / Logger / Recoverer / CORS)"]

-        subgraph Adapters["适配器层"]
-            OA["OpenAI 适配器\n/v1/*"]
-            CA["Claude 适配器\n/anthropic/*"]
-            GA["Gemini 适配器\n/v1beta/models/*"]
+        subgraph Adapters["协议适配层"]
+            OA["OpenAI\n/v1/*"]
+            CA["Claude\n/anthropic/* + /v1/messages"]
+            GA["Gemini\n/v1beta/models/* + /v1/models/*"]
+            Admin["Admin API\n/admin/*"]
+            WebUI["WebUI\n/admin（静态托管）"]
        end

-        subgraph Support["支撑模块"]
-            Pool["📦 账号池 / 并发队列"]
-            PoW["⚙️ PoW WASM\n(wazero)"]
+        subgraph Runtime["运行时核心能力"]
+            Bridge["CLIProxy 转换桥\n(多协议 <-> OpenAI)"]
+            OAEngine["OpenAI ChatCompletions\n(统一工具调用与流式语义)"]
+            Auth["Auth Resolver\n(API key / bearer / x-goog-api-key)"]
+            Pool["Account Pool + Queue\n(并发槽位 + 等待队列)"]
+            DSClient["DeepSeek Client\n(Session / Auth / HTTP)"]
+            Pow["PoW WASM\n(wazero 预加载)"]
+            Tool["Tool Sieve\n(Go/Node 语义对齐)"]
        end
-
-        Admin["🛠️ Admin API\n/admin/*"]
-        WebUI["🌐 WebUI\n(/admin)"]
    end

-    DS["☁️ DeepSeek API"]
+    Client --> Router
+    Router --> OA & CA & GA
+    Router --> Admin
+    Router --> WebUI

-    Client -- "请求" --> CORS --> Auth
-    Auth --> OA & CA & GA
-    OA & CA & GA -- "调用" --> DS
-    Auth --> Admin
-    OA & CA & GA -. "轮询选账号" .-> Pool
-    OA & CA & GA -. "计算 PoW" .-> PoW
-    DS -- "响应" --> Client
+    OA --> OAEngine
+    CA & GA --> Bridge
+    Bridge --> OAEngine
+    OAEngine --> Auth
+    OAEngine -.账号轮询.-> Pool
+    OAEngine -.工具调用解析.-> Tool
+    OAEngine -.PoW 计算.-> Pow
+    Auth --> DSClient
+    DSClient --> Upstream
+    Upstream --> DSClient
+    OAEngine --> Bridge
+    Bridge --> Client
 ```

 - **后端**：Go（`cmd/ds2api/`、`api/`、`internal/`），不依赖 Python 运行时
 - **前端**：React 管理台（`webui/`），运行时托管静态构建产物
 - **部署**：本地运行、Docker、Vercel Serverless、Linux systemd

+### 3.0 底层架构调整（相较旧版本）
+
+- **统一路由内核**：所有协议入口统一汇聚到 `internal/server/router.go`，并在同一路由树中注册 OpenAI / Claude / Gemini / Admin / WebUI 路由，避免多入口行为漂移。
+- **统一执行链路**：Claude / Gemini 入口先经 `internal/translatorcliproxy` 做协议转换，再进入 `openai.ChatCompletions` 统一处理工具调用与流式语义，最后再转换回原协议响应。
+- **适配器分层更清晰**：`internal/adapter/{claude,gemini}` 负责入口/出口协议封装，`internal/adapter/openai` 负责核心执行，DeepSeek 侧调用只保留在 OpenAI 内核中。
+- **Tool Calling 双运行时对齐**：Go 侧（`internal/util`）与 Vercel Node 侧（`internal/js/helpers/stream-tool-sieve`）保持一致的解析/防泄漏语义，覆盖 JSON / XML / invoke / text-kv 多风格输入。
+- **配置与运行时设置解耦**：静态配置（`config`）与运行时策略（`settings`）通过 Admin API 分离管理，支持热更新和密码轮换失效旧 JWT。
+- **流式能力升级**：`/v1/responses` 与 `/v1/chat/completions` 共享更一致的工具调用增量输出策略，降低不同 SDK 下的行为差异。
+- **可观测与可运维增强**：`/healthz`、`/readyz`、`/admin/version`、`/admin/dev/captures` 形成排障闭环，便于发布后验证。
+
 ## 核心能力

 | 能力 | 说明 |
@@ -68,7 +97,7 @@ flowchart LR
 | 并发队列控制 | 每账号 in-flight 上限 + 等待队列，动态计算建议并发值 |
 | DeepSeek PoW | WASM 计算（`wazero`），无需外部 Node.js 依赖 |
 | Tool Calling | 防泄漏处理：非代码块高置信特征识别、`delta.tool_calls` 早发、结构化增量输出 |
-| Admin API | 配置管理、运行时设置热更新、账号测试 / 批量测试、导入导出、Vercel 同步 |
+| Admin API | 配置管理、运行时设置热更新、账号测试 / 批量测试、会话清理、导入导出、Vercel 同步、版本检查 |
 | WebUI 管理台 | `/admin` 单页应用（中英文双语、深色模式） |
 | 运维探针 | `GET /healthz`（存活）、`GET /readyz`（就绪） |

@@ -106,6 +135,14 @@ flowchart LR
 可通过配置中的 `claude_mapping` 或 `claude_model_mapping` 覆盖映射关系。
 另外，`/anthropic/v1/models` 现已包含 Claude 1.x/2.x/3.x/4.x 历史模型 ID 与常见别名，便于旧客户端直接兼容。

+
+#### Claude Code 接入避坑（实测）
+
+- `ANTHROPIC_BASE_URL` 推荐直接指向 DS2API 根地址（例如 `http://127.0.0.1:5001`），Claude Code 会请求 `/v1/messages?beta=true`。
+- `ANTHROPIC_API_KEY` 需要与 `config.json` 中 `keys` 一致；建议同时保留常规 key 与 `sk-ant-*` 形态 key，兼容不同客户端校验习惯。
+- 若系统设置了代理，建议对 DS2API 地址配置 `NO_PROXY=127.0.0.1,localhost,<你的主机IP>`，避免本地回环请求被代理拦截。
+- 如遇“工具调用输出成文本、未执行”问题，请升级到包含 Claude 工具调用多格式解析（JSON/XML/ANTML/invoke）的版本。
+
 ### Gemini 接口

 Gemini 适配器将模型名通过 `model_aliases` 或内置规则映射到 DeepSeek 原生模型，支持 `generateContent` 和 `streamGenerateContent` 两种调用方式，并完整支持 Tool Calling（`functionDeclarations` → `functionCall` 输出）。
@@ -124,10 +161,11 @@ cp config.example.json config.json
 后续部署建议：
 - 本地运行：直接读取 `config.json`
 - Docker / Vercel：由 `config.json` 生成 `DS2API_CONFIG_JSON`（Base64）注入环境变量
+- 兼容写法：`DS2API_CONFIG_JSON` 也可以直接写原始 JSON；`CONFIG_JSON` 是旧版回退变量

 ### 方式一：本地运行

-**前置要求**：Go 1.24+，Node.js 20+（仅在需要构建 WebUI 时）
+**前置要求**：Go 1.26+，Node.js 20+（仅在需要构建 WebUI 时）

 ```bash
 # 1. 克隆仓库
@@ -144,28 +182,27 @@ go run ./cmd/ds2api

 默认监听地址：`http://localhost:5001`

-> **WebUI 自动构建**：本地首次启动时，若 `static/admin` 不存在，会自动尝试执行 `npm install && npm run build`（需要本机有 Node.js）。你也可以手动构建：`./scripts/build-webui.sh`
+> **WebUI 自动构建**：本地首次启动时，若 `static/admin` 不存在，会自动尝试执行 `npm ci`（仅在缺少依赖时）和 `npm run build -- --outDir static/admin --emptyOutDir`（需要本机有 Node.js）。你也可以手动构建：`./scripts/build-webui.sh`

 ### 方式二：Docker 运行

 ```bash
-# 1. 准备环境变量文件
+# 1. 准备环境变量和配置文件
 cp .env.example .env
+cp config.example.json config.json

-# 2. 从 config.json 生成 DS2API_CONFIG_JSON（单行 Base64）
-DS2API_CONFIG_JSON="$(base64 < config.json | tr -d '\n')"
-
-# 3. 编辑 .env，设置：
+# 2. 编辑 .env（至少设置 DS2API_ADMIN_KEY）
 #    DS2API_ADMIN_KEY=请替换为强密码
-#    DS2API_CONFIG_JSON=${DS2API_CONFIG_JSON}

-# 4. 启动
+# 3. 启动
 docker-compose up -d

-# 5. 查看日志
+# 4. 查看日志
 docker-compose logs -f
 ```

+默认 `docker-compose.yml` 会把宿主机 `6011` 映射到容器内的 `5001`。如果你希望直接对外暴露 `5001`，请调整 `ports` 配置。
+
 更新镜像：`docker-compose up -d --build`

 #### Zeabur 一键部署（Dockerfile）
@@ -174,6 +211,8 @@ docker-compose logs -f
 2. 部署完成后访问 `/admin`，使用 Zeabur 环境变量/模板指引中的 `DS2API_ADMIN_KEY` 登录。
 3. 在管理台导入/编辑配置（会写入并持久化到 `/data/config.json`）。

+说明：Zeabur 使用仓库内 `Dockerfile` 直接构建时，不需要额外传入 `BUILD_VERSION`；镜像会优先读取该构建参数，未提供时自动回退到仓库根目录的 `VERSION` 文件。
+
 ### 方式三：Vercel 部署

 1. Fork 仓库到自己的 GitHub
@@ -196,7 +235,7 @@ base64 < config.json | tr -d '\n'

 > **流式说明**：`/v1/chat/completions` 在 Vercel 上默认走 `api/chat-stream.js`（Node Runtime）以保证实时 SSE。鉴权、账号选择、会话/PoW 准备仍由 Go 内部 prepare 接口完成；流式响应（含 `tools`）在 Node 侧执行与 Go 对齐的输出组装与防泄漏处理。

-详细部署说明请参阅 [部署指南](DEPLOY.md)。
+详细部署说明请参阅 [部署指南](docs/DEPLOY.md)。

 ### 方式四：下载 Release 构建包

@@ -238,13 +277,11 @@ cp opencode.json.example opencode.json
  "accounts": [
    {
      "email": "user@example.com",
-      "password": "your-password",
-      "token": ""
+      "password": "your-password"
    },
    {
      "mobile": "12345678901",
-      "password": "your-password",
-      "token": ""
+      "password": "your-password"
    }
  ],
  "model_aliases": {
@@ -255,17 +292,13 @@ cp opencode.json.example opencode.json
  "compat": {
    "wide_input_strict_output": true
  },
-  "toolcall": {
-    "mode": "feature_match",
-    "early_emit_confidence": "high"
-  },
  "responses": {
    "store_ttl_seconds": 900
  },
  "embeddings": {
    "provider": "deterministic"
  },
-  "claude_model_mapping": {
+  "claude_mapping": {
    "fast": "deepseek-chat",
    "slow": "deepseek-reasoner"
  },
@@ -275,22 +308,27 @@ cp opencode.json.example opencode.json
  "runtime": {
    "account_max_inflight": 2,
    "account_max_queue": 0,
-    "global_max_inflight": 0
+    "global_max_inflight": 0,
+    "token_refresh_interval_hours": 6
+  },
+  "auto_delete": {
+    "sessions": false
  }
 }
 ```

 - `keys`：API 访问密钥列表，客户端通过 `Authorization: Bearer <key>` 鉴权
 - `accounts`：DeepSeek 账号列表，支持 `email` 或 `mobile` 登录
- `token`：留空则首次请求时自动登录获取；也可预填已有 token
+- `token`：配置文件中即使填写也会在加载时被清空（不会从 `config.json` 读取 token）；实际 token 仅在运行时内存中维护并自动刷新
 - `model_aliases`：常见模型名（如 GPT/Codex/Claude）到 DeepSeek 模型的映射
 - `compat.wide_input_strict_output`：建议保持 `true`（当前实现默认宽进严出）
- `toolcall`：固定采用特征匹配 + 高置信早发策略
+- `toolcall`：策略已固定为特征匹配 + 高置信早发，不再作为可配置项
 - `responses.store_ttl_seconds`：`/v1/responses/{id}` 的内存缓存 TTL
 - `embeddings.provider`：embedding 提供方（当前内置 `deterministic/mock/builtin`）
- `claude_model_mapping`：字典中 `fast`/`slow` 后缀映射到对应 DeepSeek 模型
+- `claude_mapping`：字典中 `fast`/`slow` 后缀映射到对应 DeepSeek 模型（兼容读取 `claude_model_mapping`）
 - `admin`：管理后台设置（JWT 过期时间、密码哈希等），可通过 Admin Settings API 热更新
- `runtime`：运行时参数（并发限制、队列大小），可通过 Admin Settings API 热更新
+- `runtime`：运行时参数（并发限制、队列大小、托管账号 token 刷新间隔），可通过 Admin Settings API 热更新；`account_max_queue=0`/`global_max_inflight=0` 表示按推荐值自动计算，`token_refresh_interval_hours=6` 为默认强制重登间隔
+- `auto_delete.sessions`：是否在请求结束后自动清理 DeepSeek 会话（默认 `false`，可在 Settings 热更新）

 ### 环境变量

@@ -303,9 +341,14 @@ cp opencode.json.example opencode.json
 | `DS2API_JWT_EXPIRE_HOURS` | Admin JWT 过期小时数 | `24` |
 | `DS2API_CONFIG_PATH` | 配置文件路径 | `config.json` |
 | `DS2API_CONFIG_JSON` | 直接注入配置（JSON 或 Base64） | — |
+| `CONFIG_JSON` | 旧版兼容配置注入 | — |
+| `DS2API_ENV_WRITEBACK` | 环境变量模式下自动写回配置文件并切换文件模式（`1/true/yes/on`） | 关闭 |
 | `DS2API_WASM_PATH` | PoW WASM 文件路径 | 自动查找 |
 | `DS2API_STATIC_ADMIN_DIR` | 管理台静态文件目录 | `static/admin` |
 | `DS2API_AUTO_BUILD_WEBUI` | 启动时自动构建 WebUI | 本地开启，Vercel 关闭 |
+| `DS2API_DEV_PACKET_CAPTURE` | 本地开发抓包开关（记录最近会话请求/响应体） | 本地非 Vercel 默认开启 |
+| `DS2API_DEV_PACKET_CAPTURE_LIMIT` | 本地抓包保留条数（超出自动淘汰） | `5` |
+| `DS2API_DEV_PACKET_CAPTURE_MAX_BODY_BYTES` | 单条响应体最大记录字节数 | `2097152` |
 | `DS2API_ACCOUNT_MAX_INFLIGHT` | 每账号最大并发 in-flight 请求数 | `2` |
 | `DS2API_ACCOUNT_CONCURRENCY` | 同上（兼容旧名） | — |
 | `DS2API_ACCOUNT_MAX_QUEUE` | 等待队列上限 | `recommended_concurrency` |
@@ -322,6 +365,8 @@ cp opencode.json.example opencode.json
 | `VERCEL_TEAM_ID` | Vercel 团队 ID | — |
 | `DS2API_VERCEL_PROTECTION_BYPASS` | Vercel 部署保护绕过密钥（内部 Node→Go 调用） | — |

+> 提示：当检测到 `DS2API_CONFIG_JSON/CONFIG_JSON` 时，管理台会显示当前模式风险与自动持久化状态（含 `DS2API_CONFIG_PATH` 路径与模式切换说明）。
+
 ## 鉴权模式

 调用业务接口（`/v1/*`、`/anthropic/*`、Gemini 路由）时支持两种模式：
@@ -332,6 +377,7 @@ cp opencode.json.example opencode.json
 | **直通 token 模式** | 传入 token 不在 `config.keys` 中时，直接作为 DeepSeek token 使用 |

 可选请求头 `X-Ds2-Target-Account`：指定使用某个托管账号（值为 email 或 mobile）。
+Gemini 路由还可以使用 `x-goog-api-key`，或在没有认证头时使用 `?key=` / `?api_key=` 作为调用方凭据。

 ## 并发模型

@@ -348,13 +394,17 @@ cp opencode.json.example opencode.json

 ## Tool Call 适配

-当请求中带 `tools` 时，DS2API 会做防泄漏处理：
+当请求中带 `tools` 时，DS2API 会做防泄漏处理与结构化转译：

-1. 只在**非代码块上下文**启用 toolcall 特征识别（代码块示例不会触发）
-2. `responses` 流式严格使用官方 item 生命周期事件（`response.output_item.*`、`response.content_part.*`、`response.function_call_arguments.*`）
-3. 未在 `tools` 声明中的工具名会被严格拒绝，不会下发为有效 tool call
+1. 只在**非代码块上下文**启用执行型 toolcall 识别（代码块示例默认不触发）
+2. 解析层以 XML/Markup 为最高优先级，同时兼容 JSON / ANTML / invoke / text-kv，并统一归一到内部工具调用结构
+3. `responses` 流式严格使用官方 item 生命周期事件（`response.output_item.*`、`response.content_part.*`、`response.function_call_arguments.*`）
 4. `responses` 支持并执行 `tool_choice`（`auto`/`none`/`required`/强制函数）；`required` 违规时非流式返回 `422`，流式返回 `response.failed`
-5. 仅在通过策略校验后才会发出有效工具调用事件，避免错误工具名进入客户端执行链
+5. 客户端请求哪种协议，就按该协议返回工具调用（OpenAI/Claude/Gemini 各自原生结构）；模型侧优先约束输出规范 XML，再由兼容层转译
+
+> 说明：当前版本在 parser 层仍以“尽量解析成功”为优先，未启用基于 allow-list 的工具名硬拒绝。
+>
+> 想评估“把工具调用封装成 XML 再输入模型”的方案，可参考：`docs/toolcall-semantics.md`。

 ## 本地开发抓包工具

@@ -383,13 +433,14 @@ go run ./cmd/ds2api

 ```text
 ds2api/
+├── app/                     # 统一 HTTP Handler 组装层（供本地与 Serverless 复用）
 ├── cmd/
 │   ├── ds2api/              # 本地 / 容器启动入口
 │   └── ds2api-tests/        # 端到端测试集入口
 ├── api/
 │   ├── index.go             # Vercel Serverless Go 入口
 │   ├── chat-stream.js       # Vercel Node.js 流式转发
-│   └── helpers/             # Node.js 辅助模块
+│   └── (rewrite targets in vercel.json)
 ├── internal/
 │   ├── account/             # 账号池与并发队列
 │   ├── adapter/
@@ -399,26 +450,34 @@ ds2api/
 │   ├── admin/               # Admin API handlers（含 Settings 热更新）
 │   ├── auth/                # 鉴权与 JWT
 │   ├── claudeconv/          # Claude 消息格式转换
-│   ├── compat/              # 兼容性辅助
-│   ├── config/              # 配置加载与热更新
+│   ├── compat/              # Go 版本兼容与回归测试辅助
+│   ├── config/              # 配置加载、校验与热更新
 │   ├── deepseek/            # DeepSeek API 客户端、PoW WASM
+│   ├── js/                  # Node 运行时流式处理与兼容逻辑
 │   ├── devcapture/          # 开发抓包模块
 │   ├── format/              # 输出格式化
 │   ├── prompt/              # Prompt 构建
 │   ├── server/              # HTTP 路由与中间件（chi router）
 │   ├── sse/                 # SSE 解析工具
 │   ├── stream/              # 统一流式消费引擎
+│   ├── testsuite/           # 端到端测试框架与用例编排
+│   ├── translatorcliproxy/  # CLIProxy 桥接与流写入组件
 │   ├── util/                # 通用工具函数
+│   ├── version/             # 版本解析 / 比较与 tag 规范化
 │   └── webui/               # WebUI 静态文件托管与自动构建
 ├── webui/                   # React WebUI 源码（Vite + Tailwind）
 │   └── src/
-│       ├── components/      # AccountManager / ApiTester / BatchImport / VercelSync / Login / LandingPage
+│       ├── app/             # 路由、鉴权、配置状态管理
+│       ├── features/        # 业务功能模块（account/settings/vercel/apiTester）
+│       ├── components/      # 登录/落地页等通用组件
 │       └── locales/         # 中英文语言包（zh.json / en.json）
 ├── scripts/
 │   └── build-webui.sh       # WebUI 手动构建脚本
 ├── tests/
 │   ├── compat/              # 兼容性测试夹具与期望输出
+│   ├── node/                # Node 侧单元测试（chat-stream / tool-sieve）
 │   └── scripts/             # 统一测试脚本入口（unit/e2e）
+├── docs/                    # 部署 / 贡献 / 测试等辅助文档
 ├── static/admin/            # WebUI 构建产物（不提交到 Git）
 ├── .github/
 │   ├── workflows/           # GitHub Actions（质量门禁 + Release 自动构建）
@@ -438,9 +497,9 @@ ds2api/
 | 文档 | 说明 |
 | --- | --- |
 | [API.md](API.md) / [API.en.md](API.en.md) | API 接口文档（含请求/响应示例） |
-| [DEPLOY.md](DEPLOY.md) / [DEPLOY.en.md](DEPLOY.en.md) | 部署指南（本地/Docker/Vercel/systemd） |
-| [CONTRIBUTING.md](CONTRIBUTING.md) / [CONTRIBUTING.en.md](CONTRIBUTING.en.md) | 贡献指南 |
-| [TESTING.md](TESTING.md) | 测试集使用指南 |
+| [DEPLOY.md](docs/DEPLOY.md) / [DEPLOY.en.md](docs/DEPLOY.en.md) | 部署指南（本地/Docker/Vercel/systemd） |
+| [CONTRIBUTING.md](docs/CONTRIBUTING.md) / [CONTRIBUTING.en.md](docs/CONTRIBUTING.en.md) | 贡献指南 |
+| [TESTING.md](docs/TESTING.md) | 测试集使用指南 |

 ## 测试

@@ -468,6 +527,23 @@ go run ./cmd/ds2api-tests \
 npm ci --prefix webui && npm run build --prefix webui
 ```

+## 测试
+
+详细测试指南请参阅 [docs/TESTING.md](docs/TESTING.md)。
+
+### 快速测试命令
+
+```bash
+# 运行所有单元测试
+go test ./...
+
+# 运行 tool calls 相关测试（调试工具调用问题）
+go test -v -run 'TestParseToolCalls|TestRepair' ./internal/util/
+
+# 运行端到端测试
+./tests/scripts/run-live.sh
+```
+
 ## Release 自动构建（GitHub Actions）

 工作流文件：`.github/workflows/release-artifacts.yml`
@@ -475,8 +551,11 @@ npm ci --prefix webui && npm run build --prefix webui
 - **触发条件**：仅在 GitHub Release `published` 时触发（普通 push 不会触发）
 - **构建产物**：多平台二进制包（`linux/amd64`、`linux/arm64`、`darwin/amd64`、`darwin/arm64`、`windows/amd64`）+ `sha256sums.txt`
 - **容器镜像发布**：仅推送到 GHCR（`ghcr.io/cjackhwang/ds2api`）
- **每个压缩包包含**：`ds2api` 可执行文件、`static/admin`、WASM 文件、配置示例、README、LICENSE
+- **每个压缩包包含**：`ds2api` 可执行文件、`static/admin`、WASM 文件（同时支持内置 fallback）、配置示例、README、LICENSE

 ## 免责声明

-本项目基于逆向方式实现，仅供学习与研究使用。稳定性和可用性不作保证，请勿用于违反服务条款或法律法规的场景。
+本项目基于逆向方式实现，仅供学习、研究、个人实验和内部验证使用，不提供任何商业授权、稳定性保证或可用性保证。
+作者及仓库维护者不对因使用、修改、分发、部署或依赖本项目而产生的任何直接或间接损失、账号封禁、数据丢失、法律风险或第三方索赔负责。
+
+请勿将本项目用于违反服务条款、协议、法律法规或平台规则的场景。商业使用前请自行确认 `LICENSE`、相关协议以及你是否获得了作者的书面许可。
--- a/README.en.md
+++ b/README.en.md
@@ -8,7 +8,7 @@
 ![Stars](https://img.shields.io/github/stars/CJackHwang/ds2api.svg)
 ![Forks](https://img.shields.io/github/forks/CJackHwang/ds2api.svg)
 [![Release](https://img.shields.io/github/v/release/CJackHwang/ds2api?display_name=tag)](https://github.com/CJackHwang/ds2api/releases)
-[![Docker](https://img.shields.io/badge/docker-ready-blue.svg)](DEPLOY.en.md)
+[![Docker](https://img.shields.io/badge/docker-ready-blue.svg)](docs/DEPLOY.en.md)
 [![Deploy on Zeabur](https://zeabur.com/button.svg)](https://zeabur.com/templates/L4CFHP)
 [![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https://github.com/CJackHwang/ds2api)

@@ -16,47 +16,76 @@ Language: [中文](README.MD) | [English](README.en.md)

 DS2API converts DeepSeek Web chat capability into OpenAI-compatible, Claude-compatible, and Gemini-compatible APIs. The backend is a **pure Go implementation**, with a React WebUI admin panel (source in `webui/`, build output auto-generated to `static/admin` during deployment).

+> **Important Disclaimer**
+>
+> This repository is provided for learning, research, personal experimentation, and internal validation only. It does not grant any commercial authorization and comes with no warranty of fitness, stability, or results.
+>
+> The author and repository maintainers are not responsible for any direct or indirect loss, account suspension, data loss, legal risk, or third-party claims arising from use, modification, distribution, deployment, or reliance on this project.
+>
+> Do not use this project in ways that violate service terms, agreements, laws, or platform rules. Before any commercial use, review the `LICENSE`, the relevant terms, and confirm that you have the author's written permission.
+
 ## Architecture Overview

 ```mermaid
 flowchart LR
-    Client["🖥️ Clients\n(OpenAI / Claude / Gemini compat)"]
+    Client["🖥️ Clients / SDKs\n(OpenAI / Claude / Gemini)"]
+    Upstream["☁️ DeepSeek API"]

-    subgraph DS2API["DS2API Service"]
-        direction TB
-        CORS["CORS Middleware"]
-        Auth["🔐 Auth Middleware"]
+    subgraph DS2API["DS2API 3.x (Unified OpenAI Core)"]
+        Router["chi Router + Middleware\n(RequestID / RealIP / Logger / Recoverer / CORS)"]

-        subgraph Adapters["Adapter Layer"]
-            OA["OpenAI Adapter\n/v1/*"]
-            CA["Claude Adapter\n/anthropic/*"]
-            GA["Gemini Adapter\n/v1beta/models/*"]
+        subgraph Adapters["Protocol Adapters"]
+            OA["OpenAI\n/v1/*"]
+            CA["Claude\n/anthropic/* + /v1/messages"]
+            GA["Gemini\n/v1beta/models/* + /v1/models/*"]
+            Admin["Admin API\n/admin/*"]
+            WebUI["WebUI\n/admin (static hosting)"]
        end

-        subgraph Support["Support Modules"]
-            Pool["📦 Account Pool / Queue"]
-            PoW["⚙️ PoW WASM\n(wazero)"]
+        subgraph Runtime["Runtime + Core Capabilities"]
+            Bridge["CLIProxy Bridge\n(multi-protocol <-> OpenAI)"]
+            OAEngine["OpenAI ChatCompletions\n(unified tools + stream semantics)"]
+            Auth["Auth Resolver\n(API key / bearer / x-goog-api-key)"]
+            Pool["Account Pool + Queue\n(in-flight slots + wait queue)"]
+            DSClient["DeepSeek Client\n(session / auth / HTTP)"]
+            Pow["PoW WASM\n(wazero preload)"]
+            Tool["Tool Sieve\n(Go/Node semantic parity)"]
        end
-
-        Admin["🛠️ Admin API\n/admin/*"]
-        WebUI["🌐 WebUI\n(/admin)"]
    end

-    DS["☁️ DeepSeek API"]
+    Client --> Router
+    Router --> OA & CA & GA
+    Router --> Admin
+    Router --> WebUI

-    Client -- "Request" --> CORS --> Auth
-    Auth --> OA & CA & GA
-    OA & CA & GA -- "Call" --> DS
-    Auth --> Admin
-    OA & CA & GA -. "Rotate accounts" .-> Pool
-    OA & CA & GA -. "Compute PoW" .-> PoW
-    DS -- "Response" --> Client
+    OA --> OAEngine
+    CA & GA --> Bridge
+    Bridge --> OAEngine
+    OAEngine --> Auth
+    OAEngine -.account rotation.-> Pool
+    OAEngine -.tool-call parsing.-> Tool
+    OAEngine -.PoW solving.-> Pow
+    Auth --> DSClient
+    DSClient --> Upstream
+    Upstream --> DSClient
+    OAEngine --> Bridge
+    Bridge --> Client
 ```

 - **Backend**: Go (`cmd/ds2api/`, `api/`, `internal/`), no Python runtime
 - **Frontend**: React admin panel (`webui/`), served as static build at runtime
 - **Deployment**: local run, Docker, Vercel serverless, Linux systemd

+### 3.0 Architecture Changes (vs older releases)
+
+- **Unified routing core**: all protocol entries are now centralized through `internal/server/router.go`, with OpenAI / Claude / Gemini / Admin / WebUI routes registered in one tree to avoid multi-entry drift.
+- **Unified execution chain**: Claude/Gemini entries are translated by `internal/translatorcliproxy`, then executed through `openai.ChatCompletions` for shared tool-calling and stream semantics, then translated back to the client protocol.
+- **Cleaner adapter boundaries**: `internal/adapter/{claude,gemini}` handles protocol wrappers, while `internal/adapter/openai` remains the execution core; upstream DeepSeek calls are retained only in the OpenAI core.
+- **Tool-calling parity across runtimes**: Go (`internal/util`) and Vercel Node (`internal/js/helpers/stream-tool-sieve`) follow aligned parsing/anti-leak semantics across JSON / XML / invoke / text-kv inputs.
+- **Config/runtime separation**: static config (`config`) and runtime policy (`settings`) are managed independently via Admin APIs, enabling hot updates and password rotation with JWT invalidation.
+- **Streaming behavior upgrade**: `/v1/responses` and `/v1/chat/completions` now share a more consistent incremental tool-call emission strategy across SDK ecosystems.
+- **Improved operability**: `/healthz`, `/readyz`, `/admin/version`, and `/admin/dev/captures` form a tighter post-deploy diagnostics loop.
+
 ## Key Capabilities

 | Capability | Details |
@@ -68,7 +97,7 @@ flowchart LR
 | Concurrency control | Per-account in-flight limit + waiting queue, dynamic recommended concurrency |
 | DeepSeek PoW | WASM solving via `wazero`, no external Node.js dependency |
 | Tool Calling | Anti-leak handling: non-code-block feature match, early `delta.tool_calls`, structured incremental output |
-| Admin API | Config management, runtime settings hot-reload, account testing/batch test, import/export, Vercel sync |
+| Admin API | Config management, runtime settings hot-reload, account testing/batch test, session cleanup, import/export, Vercel sync, version check |
 | WebUI Admin Panel | SPA at `/admin` (bilingual Chinese/English, dark mode) |
 | Health Probes | `GET /healthz` (liveness), `GET /readyz` (readiness) |

@@ -106,6 +135,14 @@ flowchart LR
 Override mapping via `claude_mapping` or `claude_model_mapping` in config.
 In addition, `/anthropic/v1/models` now includes historical Claude 1.x/2.x/3.x/4.x IDs and common aliases for legacy client compatibility.

+
+#### Claude Code integration pitfalls (validated)
+
+- Set `ANTHROPIC_BASE_URL` to the DS2API root URL (for example `http://127.0.0.1:5001`). Claude Code sends requests to `/v1/messages?beta=true`.
+- `ANTHROPIC_API_KEY` must match an entry in `keys` from `config.json`. Keeping both a regular key and an `sk-ant-*` style key improves client compatibility.
+- If your environment has proxy variables, set `NO_PROXY=127.0.0.1,localhost,<your_host_ip>` for DS2API to avoid proxy interception of local traffic.
+- If tool calls are rendered as plain text and not executed, upgrade to a build that includes multi-format Claude tool-call parsing (JSON/XML/ANTML/invoke).
+
 ### Gemini Endpoint

 The Gemini adapter maps model names to DeepSeek native models via `model_aliases` or built-in heuristics, supporting both `generateContent` and `streamGenerateContent` call patterns with full Tool Calling support (`functionDeclarations` → `functionCall` output).
@@ -124,10 +161,11 @@ cp config.example.json config.json
 Recommended per deployment mode:
 - Local run: read `config.json` directly
 - Docker / Vercel: generate Base64 from `config.json` and inject as `DS2API_CONFIG_JSON`
+- Compatibility note: `DS2API_CONFIG_JSON` may also contain raw JSON directly; `CONFIG_JSON` is the legacy fallback variable

 ### Option 1: Local Run

-**Prerequisites**: Go 1.24+, Node.js 20+ (only if building WebUI locally)
+**Prerequisites**: Go 1.26+, Node.js 20+ (only if building WebUI locally)

 ```bash
 # 1. Clone
@@ -144,28 +182,27 @@ go run ./cmd/ds2api

 Default URL: `http://localhost:5001`

-> **WebUI auto-build**: On first local startup, if `static/admin` is missing, DS2API will auto-run `npm install && npm run build` (requires Node.js). You can also build manually: `./scripts/build-webui.sh`
+> **WebUI auto-build**: On first local startup, if `static/admin` is missing, DS2API will auto-run `npm ci` (only when dependencies are missing) and `npm run build -- --outDir static/admin --emptyOutDir` (requires Node.js). You can also build manually: `./scripts/build-webui.sh`

 ### Option 2: Docker

 ```bash
-# 1. Prepare env file
+# 1. Prepare env file and config file
 cp .env.example .env
+cp config.example.json config.json

-# 2. Generate DS2API_CONFIG_JSON from config.json (single-line Base64)
-DS2API_CONFIG_JSON="$(base64 < config.json | tr -d '\n')"
-
-# 3. Edit .env and set:
+# 2. Edit .env (at least set DS2API_ADMIN_KEY)
 #    DS2API_ADMIN_KEY=replace-with-a-strong-secret
-#    DS2API_CONFIG_JSON=${DS2API_CONFIG_JSON}

-# 4. Start
+# 3. Start
 docker-compose up -d

-# 5. View logs
+# 4. View logs
 docker-compose logs -f
 ```

+The default `docker-compose.yml` maps host port `6011` to container port `5001`. If you want `5001` exposed directly, adjust the `ports` mapping.
+
 Rebuild after updates: `docker-compose up -d --build`

 #### Zeabur One-Click (Dockerfile)
@@ -174,6 +211,8 @@ Rebuild after updates: `docker-compose up -d --build`
 2. After deployment, open `/admin` and login with `DS2API_ADMIN_KEY` shown in Zeabur env/template instructions.
 3. Import / edit config in Admin UI (it will be written and persisted to `/data/config.json`).

+Note: when Zeabur builds directly from the repo `Dockerfile`, you do not need to pass `BUILD_VERSION`. The image prefers that build arg when provided, and automatically falls back to the repo-root `VERSION` file when it is absent.
+
 ### Option 3: Vercel

 1. Fork this repo to your GitHub account
@@ -196,7 +235,7 @@ base64 < config.json | tr -d '\n'

 > **Streaming note**: `/v1/chat/completions` on Vercel is routed to `api/chat-stream.js` (Node Runtime) for real-time SSE. Auth, account selection, and session/PoW preparation are still handled by the Go internal prepare endpoint; streaming output (including `tools`) is assembled on Node with Go-aligned anti-leak handling.

-For detailed deployment instructions, see the [Deployment Guide](DEPLOY.en.md).
+For detailed deployment instructions, see the [Deployment Guide](docs/DEPLOY.en.md).

 ### Option 4: Download Release Binaries

@@ -238,13 +277,11 @@ cp opencode.json.example opencode.json
  "accounts": [
    {
      "email": "user@example.com",
-      "password": "your-password",
-      "token": ""
+      "password": "your-password"
    },
    {
      "mobile": "12345678901",
-      "password": "your-password",
-      "token": ""
+      "password": "your-password"
    }
  ],
  "model_aliases": {
@@ -255,17 +292,13 @@ cp opencode.json.example opencode.json
  "compat": {
    "wide_input_strict_output": true
  },
-  "toolcall": {
-    "mode": "feature_match",
-    "early_emit_confidence": "high"
-  },
  "responses": {
    "store_ttl_seconds": 900
  },
  "embeddings": {
    "provider": "deterministic"
  },
-  "claude_model_mapping": {
+  "claude_mapping": {
    "fast": "deepseek-chat",
    "slow": "deepseek-reasoner"
  },
@@ -275,22 +308,27 @@ cp opencode.json.example opencode.json
  "runtime": {
    "account_max_inflight": 2,
    "account_max_queue": 0,
-    "global_max_inflight": 0
+    "global_max_inflight": 0,
+    "token_refresh_interval_hours": 6
+  },
+  "auto_delete": {
+    "sessions": false
  }
 }
 ```

 - `keys`: API access keys; clients authenticate via `Authorization: Bearer <key>`
 - `accounts`: DeepSeek account list, supports `email` or `mobile` login
- `token`: Leave empty for auto-login on first request; or pre-fill an existing token
+- `token`: Even if set in `config.json`, it is cleared during load (DS2API does not read persisted tokens from config); runtime tokens are maintained/refreshed in memory only
 - `model_aliases`: Map common model names (GPT/Codex/Claude) to DeepSeek models
 - `compat.wide_input_strict_output`: Keep `true` (current default policy)
- `toolcall`: Fixed to feature matching + high-confidence early emit
+- `toolcall`: Fixed to feature matching + high-confidence early emit, no longer configurable
 - `responses.store_ttl_seconds`: In-memory TTL for `/v1/responses/{id}`
 - `embeddings.provider`: Embeddings provider (`deterministic/mock/builtin` built-in)
- `claude_model_mapping`: Maps `fast`/`slow` suffixes to corresponding DeepSeek models
+- `claude_mapping`: Maps `fast`/`slow` suffixes to corresponding DeepSeek models (still compatible with `claude_model_mapping`)
 - `admin`: Admin panel settings (JWT expiry, password hash, etc.), hot-reloadable via Admin Settings API
- `runtime`: Runtime parameters (concurrency limits, queue sizes), hot-reloadable via Admin Settings API
+- `runtime`: Runtime parameters (concurrency limits, queue sizes, managed token refresh interval), hot-reloadable via Admin Settings API; `account_max_queue=0`/`global_max_inflight=0` means auto-calculate from recommended values, `token_refresh_interval_hours=6` is the default forced re-login interval
+- `auto_delete.sessions`: Whether to auto-delete DeepSeek sessions after request completion (default `false`, hot-reloadable via Settings)

 ### Environment Variables

@@ -303,6 +341,8 @@ cp opencode.json.example opencode.json
 | `DS2API_JWT_EXPIRE_HOURS` | Admin JWT TTL in hours | `24` |
 | `DS2API_CONFIG_PATH` | Config file path | `config.json` |
 | `DS2API_CONFIG_JSON` | Inline config (JSON or Base64) | — |
+| `CONFIG_JSON` | Legacy compatibility config input | — |
+| `DS2API_ENV_WRITEBACK` | Auto-write env-backed config to file and transition to file mode (`1/true/yes/on`) | Disabled |
 | `DS2API_WASM_PATH` | PoW WASM file path | Auto-detect |
 | `DS2API_STATIC_ADMIN_DIR` | Admin static assets dir | `static/admin` |
 | `DS2API_AUTO_BUILD_WEBUI` | Auto-build WebUI on startup | Enabled locally, disabled on Vercel |
@@ -322,6 +362,8 @@ cp opencode.json.example opencode.json
 | `VERCEL_TEAM_ID` | Vercel team ID | — |
 | `DS2API_VERCEL_PROTECTION_BYPASS` | Vercel deployment protection bypass for internal Node→Go calls | — |

+> Note: when `DS2API_CONFIG_JSON/CONFIG_JSON` is detected, the Admin UI shows mode risk and auto-persistence status (including `DS2API_CONFIG_PATH` and mode-transition hints).
+
 ## Authentication Modes

 For business endpoints (`/v1/*`, `/anthropic/*`, Gemini routes), DS2API supports two modes:
@@ -332,6 +374,7 @@ For business endpoints (`/v1/*`, `/anthropic/*`, Gemini routes), DS2API supports
 | **Direct token** | If the token is not in `config.keys`, DS2API treats it as a DeepSeek token directly |

 Optional header `X-Ds2-Target-Account`: Pin a specific managed account (value is email or mobile).
+Gemini routes also accept `x-goog-api-key`, or `?key=` / `?api_key=` when no auth header is present.

 ## Concurrency Model

@@ -384,13 +427,14 @@ Response fields include:

 ```text
 ds2api/
+├── app/                     # Unified HTTP handler assembly (shared by local + serverless)
 ├── cmd/
 │   ├── ds2api/              # Local / container entrypoint
 │   └── ds2api-tests/        # End-to-end testsuite entrypoint
 ├── api/
 │   ├── index.go             # Vercel Serverless Go entry
 │   ├── chat-stream.js       # Vercel Node.js stream relay
-│   └── helpers/             # Node.js helper modules
+│   └── (rewrite targets in vercel.json)
 ├── internal/
 │   ├── account/             # Account pool and concurrency queue
 │   ├── adapter/
@@ -400,26 +444,34 @@ ds2api/
 │   ├── admin/               # Admin API handlers (incl. Settings hot-reload)
 │   ├── auth/                # Auth and JWT
 │   ├── claudeconv/          # Claude message format conversion
-│   ├── compat/              # Compatibility helpers
-│   ├── config/              # Config loading and hot-reload
+│   ├── compat/              # Go-version compatibility and regression helpers
+│   ├── config/              # Config loading, validation, and hot-reload
 │   ├── deepseek/            # DeepSeek API client, PoW WASM
+│   ├── js/                  # Node runtime stream/compat logic
 │   ├── devcapture/          # Dev packet capture module
 │   ├── format/              # Output formatting
 │   ├── prompt/              # Prompt construction
 │   ├── server/              # HTTP routing and middleware (chi router)
 │   ├── sse/                 # SSE parsing utilities
 │   ├── stream/              # Unified stream consumption engine
+│   ├── testsuite/           # End-to-end testsuite framework and case orchestration
+│   ├── translatorcliproxy/  # CLIProxy bridge and stream writer components
 │   ├── util/                # Common utilities
+│   ├── version/             # Version parsing/comparison and tag normalization
 │   └── webui/               # WebUI static file serving and auto-build
 ├── webui/                   # React WebUI source (Vite + Tailwind)
 │   └── src/
-│       ├── components/      # AccountManager / ApiTester / BatchImport / VercelSync / Login / LandingPage
+│       ├── app/             # Routing, auth, config state
+│       ├── features/        # Feature modules (account/settings/vercel/apiTester)
+│       ├── components/      # Shared UI pieces (login/landing, etc.)
 │       └── locales/         # Language packs (zh.json / en.json)
 ├── scripts/
 │   └── build-webui.sh       # Manual WebUI build script
 ├── tests/
 │   ├── compat/              # Compatibility fixtures and expected outputs
+│   ├── node/                # Node-side unit tests (chat-stream / tool-sieve)
 │   └── scripts/             # Unified test script entrypoints (unit/e2e)
+├── docs/                    # Deployment / contributing / testing docs
 ├── static/admin/            # WebUI build output (not committed to Git)
 ├── .github/
 │   ├── workflows/           # GitHub Actions (quality gates + release automation)
@@ -439,9 +491,9 @@ ds2api/
 | Document | Description |
 | --- | --- |
 | [API.md](API.md) / [API.en.md](API.en.md) | API reference with request/response examples |
-| [DEPLOY.md](DEPLOY.md) / [DEPLOY.en.md](DEPLOY.en.md) | Deployment guide (local/Docker/Vercel/systemd) |
-| [CONTRIBUTING.md](CONTRIBUTING.md) / [CONTRIBUTING.en.md](CONTRIBUTING.en.md) | Contributing guide |
-| [TESTING.md](TESTING.md) | Testsuite guide |
+| [DEPLOY.md](docs/DEPLOY.md) / [DEPLOY.en.md](docs/DEPLOY.en.md) | Deployment guide (local/Docker/Vercel/systemd) |
+| [CONTRIBUTING.md](docs/CONTRIBUTING.md) / [CONTRIBUTING.en.md](docs/CONTRIBUTING.en.md) | Contributing guide |
+| [TESTING.md](docs/TESTING.md) | Testsuite guide |

 ## Testing

@@ -476,8 +528,11 @@ Workflow: `.github/workflows/release-artifacts.yml`
 - **Trigger**: only on GitHub Release `published` (normal pushes do not trigger builds)
 - **Outputs**: multi-platform archives (`linux/amd64`, `linux/arm64`, `darwin/amd64`, `darwin/arm64`, `windows/amd64`) + `sha256sums.txt`
 - **Container publishing**: GHCR only (`ghcr.io/cjackhwang/ds2api`)
- **Each archive includes**: `ds2api` executable, `static/admin`, WASM file, config template, README, LICENSE
+- **Each archive includes**: `ds2api` executable, `static/admin`, WASM file (with embedded fallback support), config template, README, LICENSE

 ## Disclaimer

-This project is built through reverse engineering and is provided for learning and research only. Stability is not guaranteed. Do not use it in scenarios that violate terms of service or laws.
+This project is built through reverse engineering and is provided for learning, research, personal experimentation, and internal validation only. No commercial authorization is granted, and no warranty of stability, fitness, or results is provided.
+The author and repository maintainers are not responsible for any direct or indirect loss, account suspension, data loss, legal risk, or third-party claims arising from use, modification, distribution, deployment, or reliance on this project.
+
+Do not use this project in ways that violate service terms, agreements, laws, or platform rules. Before any commercial use, review the `LICENSE`, the relevant terms, and confirm that you have the author's written permission.
--- a/2
+++ b/2
@@ -1 +1 @@
-0.1.0
+3.1.0
--- a/config.example.json
+++ b/config.example.json
@@ -9,20 +9,17 @@
    {
      "_comment": "邮箱登录方式",
      "email": "example1@example.com",
-      "password": "your-password-1",
-      "token": ""
+      "password": "your-password-1"
    },
    {
      "_comment": "邮箱登录方式 - 账号2",
      "email": "example2@example.com",
-      "password": "your-password-2",
-      "token": ""
+      "password": "your-password-2"
    },
    {
      "_comment": "手机号登录方式（中国大陆）",
      "mobile": "12345678901",
-      "password": "your-password-3",
-      "token": ""
+      "password": "your-password-3"
    }
  ],
  "model_aliases": {
@@ -43,8 +40,19 @@
  "embeddings": {
    "provider": "deterministic"
  },
-  "claude_model_mapping": {
+  "claude_mapping": {
    "fast": "deepseek-chat",
    "slow": "deepseek-reasoner"
+  },
+  "admin": {
+    "jwt_expire_hours": 24
+  },
+  "runtime": {
+    "account_max_inflight": 2,
+    "account_max_queue": 0,
+    "global_max_inflight": 0
+  },
+  "auto_delete": {
+    "sessions": false
  }
 }
--- a/docs/CONTRIBUTING.en.md
+++ b/docs/CONTRIBUTING.en.md
@@ -8,7 +8,7 @@ Thanks for your interest in contributing to DS2API!

 ### Prerequisites

- Go 1.24+
+- Go 1.26+
 - Node.js 20+ (for WebUI development)
 - npm (bundled with Node.js)

@@ -70,6 +70,7 @@ docker-compose -f docker-compose.dev.yml up
 5. Open a Pull Request

 > 💡 If you modify files under `webui/`, no manual build is needed — CI handles it automatically.
+> If you want to verify the generated `static/admin/` assets locally, you can still run `./scripts/build-webui.sh`.

 ## Build WebUI

@@ -93,13 +94,14 @@ Manually build WebUI to `static/admin/`:

 ```text
 ds2api/
+├── app/                     # Shared HTTP handler assembly (local + serverless)
 ├── cmd/
 │   ├── ds2api/              # Local/container entrypoint
 │   └── ds2api-tests/        # End-to-end testsuite entrypoint
 ├── api/
 │   ├── index.go             # Vercel Serverless Go entry
 │   ├── chat-stream.js       # Vercel Node.js stream relay
-│   └── helpers/             # Node.js helper modules
+│   └── (rewrite targets in vercel.json)
 ├── internal/
 │   ├── account/             # Account pool and concurrency queue
 │   ├── adapter/
@@ -109,23 +111,33 @@ ds2api/
 │   ├── admin/               # Admin API handlers
 │   ├── auth/                # Auth and JWT
 │   ├── claudeconv/          # Claude message conversion
-│   ├── compat/              # Compatibility helpers
-│   ├── config/              # Config loading and hot-reload
+│   ├── compat/              # Go-version compatibility and regression helpers
+│   ├── config/              # Config loading, validation, and hot-reload
 │   ├── deepseek/            # DeepSeek client, PoW WASM
+│   ├── js/                  # Node runtime stream/compat logic
 │   ├── devcapture/          # Dev packet capture
 │   ├── format/              # Output formatting
 │   ├── prompt/              # Prompt building
 │   ├── server/              # HTTP routing (chi router)
 │   ├── sse/                 # SSE parsing utilities
 │   ├── stream/              # Unified stream consumption engine
-│   ├── testsuite/           # Testsuite core logic
+│   ├── testsuite/           # Testsuite framework and scenario orchestration
+│   ├── translatorcliproxy/  # CLIProxy bridge and stream writer
 │   ├── util/                # Common utilities
+│   ├── version/             # Version parsing and comparison
 │   └── webui/               # WebUI static hosting
 ├── webui/                   # React WebUI source
 │   └── src/
-│       ├── components/      # Components
+│       ├── app/             # Routing, auth, config state
+│       ├── features/        # Feature modules
+│       ├── components/      # Shared components
 │       └── locales/         # Language packs
 ├── scripts/                 # Build and test scripts
+├── tests/
+│   ├── compat/              # Compatibility fixtures and expected outputs
+│   ├── node/                # Node-side unit tests
+│   └── scripts/             # Test script entrypoints (unit/e2e)
+├── plans/                   # Plans, gates, and manual smoke-test records
 ├── static/admin/            # WebUI build output (not committed)
 ├── Dockerfile               # Multi-stage build
 ├── docker-compose.yml       # Production
--- a/docs/CONTRIBUTING.md
+++ b/docs/CONTRIBUTING.md
@@ -8,7 +8,7 @@

 ### 前置要求

- Go 1.24+
+- Go 1.26+
 - Node.js 20+（WebUI 开发时）
 - npm（随 Node.js 提供）

@@ -70,6 +70,7 @@ docker-compose -f docker-compose.dev.yml up
 5. 发起 Pull Request

 > 💡 如果修改了 `webui/` 目录下的文件，无需手动构建——CI 会自动处理。
+> 但如果你本地想验证 `static/admin/` 产物，还是可以手动运行 `./scripts/build-webui.sh`。

 ## WebUI 构建

@@ -93,13 +94,14 @@ docker-compose -f docker-compose.dev.yml up

 ```text
 ds2api/
+├── app/                     # 统一 HTTP Handler 装配（本地 + Serverless）
 ├── cmd/
 │   ├── ds2api/              # 本地/容器启动入口
 │   └── ds2api-tests/        # 端到端测试集入口
 ├── api/
 │   ├── index.go             # Vercel Serverless Go 入口
 │   ├── chat-stream.js       # Vercel Node.js 流式转发
-│   └── helpers/             # Node.js 辅助模块
+│   └── (rewrite targets in vercel.json)
 ├── internal/
 │   ├── account/             # 账号池与并发队列
 │   ├── adapter/
@@ -109,23 +111,33 @@ ds2api/
 │   ├── admin/               # Admin API handlers
 │   ├── auth/                # 鉴权与 JWT
 │   ├── claudeconv/          # Claude 消息格式转换
-│   ├── compat/              # 兼容性辅助
-│   ├── config/              # 配置加载与热更新
+│   ├── compat/              # Go 版本兼容与回归测试辅助
+│   ├── config/              # 配置加载、校验与热更新
 │   ├── deepseek/            # DeepSeek 客户端、PoW WASM
+│   ├── js/                  # Node 运行时流式/兼容逻辑
 │   ├── devcapture/          # 开发抓包
 │   ├── format/              # 输出格式化
 │   ├── prompt/              # Prompt 构建
 │   ├── server/              # HTTP 路由（chi router）
 │   ├── sse/                 # SSE 解析工具
 │   ├── stream/              # 统一流式消费引擎
-│   ├── testsuite/           # 测试集核心逻辑
+│   ├── testsuite/           # 测试集框架与场景编排
+│   ├── translatorcliproxy/  # CLIProxy 桥接与流式写入
 │   ├── util/                # 通用工具
+│   ├── version/             # 版本解析与比较
 │   └── webui/               # WebUI 静态托管
 ├── webui/                   # React WebUI 源码
 │   └── src/
-│       ├── components/      # 组件
+│       ├── app/             # 路由、鉴权、配置状态
+│       ├── features/        # 业务功能模块
+│       ├── components/      # 通用组件
 │       └── locales/         # 语言包
 ├── scripts/                 # 构建与测试脚本
+├── tests/
+│   ├── compat/              # 兼容夹具与期望输出
+│   ├── node/                # Node 侧单元测试
+│   └── scripts/             # 测试脚本入口（unit/e2e）
+├── plans/                   # 计划、门禁和手工烟测记录
 ├── static/admin/            # WebUI 构建产物（不提交）
 ├── Dockerfile               # 多阶段构建
 ├── docker-compose.yml       # 生产环境
--- a/docs/DEPLOY.en.md
+++ b/docs/DEPLOY.en.md
@@ -24,7 +24,7 @@ This guide covers all deployment methods for the current Go-based codebase.

 | Dependency | Minimum Version | Notes |
 | --- | --- | --- |
-| Go | 1.24+ | Build backend |
+| Go | 1.26+ | Build backend |
 | Node.js | 20+ | Only needed to build WebUI locally |
 | npm | Bundled with Node.js | Install WebUI dependencies |

@@ -32,6 +32,7 @@ Config source (choose one):

 - **File**: `config.json` (recommended for local/Docker)
 - **Environment variable**: `DS2API_CONFIG_JSON` (recommended for Vercel; supports raw JSON or Base64)
+- Compatibility note: `CONFIG_JSON` is the legacy fallback variable; `DS2API_CONFIG_JSON` may also contain raw JSON directly

 Unified recommendation (best practice):

@@ -69,7 +70,7 @@ Default address: `http://0.0.0.0:5001` (override with `PORT`).

 ### 1.2 WebUI Build

-On first local startup, if `static/admin/` is missing, DS2API will automatically attempt to build the WebUI (requires Node.js/npm).
+On first local startup, if `static/admin/` is missing, DS2API will automatically attempt to build the WebUI (requires Node.js/npm; when dependencies are missing it runs `npm ci` first, then `npm run build -- --outDir static/admin --emptyOutDir`).

 Manual build:

@@ -110,15 +111,12 @@ go build -o ds2api ./cmd/ds2api
 ### 2.1 Basic Steps

 ```bash
-# Copy env template
+# Copy env template and config file
 cp .env.example .env
+cp config.example.json config.json

-# Generate single-line Base64 from config.json
-DS2API_CONFIG_JSON="$(base64 < config.json | tr -d '\n')"
-
-# Edit .env and set:
+# Edit .env and set at least:
 #   DS2API_ADMIN_KEY=your-admin-key
-#   DS2API_CONFIG_JSON=${DS2API_CONFIG_JSON}

 # Start
 docker-compose up -d
@@ -127,6 +125,8 @@ docker-compose up -d
 docker-compose logs -f
 ```

+The default `docker-compose.yml` maps host port `6011` to container port `5001`. If you want `5001` exposed directly, adjust the `ports` mapping.
+
 ### 2.2 Update

 ```bash
@@ -185,6 +185,7 @@ Notes:

 - **Port**: DS2API listens on `5001` by default; the template sets `PORT=5001`.
 - **Persistent config**: the template mounts `/data` and sets `DS2API_CONFIG_PATH=/data/config.json`. After importing config in Admin UI, it will be written and persisted to this path.
+- **Build version**: Zeabur / regular `docker build` does not require `BUILD_VERSION` by default. The image prefers that build arg when provided, and automatically falls back to the repo-root `VERSION` file when it is absent.
 - **First login**: after deployment, open `/admin` and login with `DS2API_ADMIN_KEY` shown in Zeabur env/template instructions (recommended: rotate to a strong secret after first login).

 ---
@@ -248,6 +249,7 @@ VERCEL_TEAM_ID=team_xxxxxxxxxxxx   # optional for personal accounts
 | `DS2API_ACCOUNT_QUEUE_SIZE` | Alias (legacy compat) | — |
 | `DS2API_GLOBAL_MAX_INFLIGHT` | Global inflight limit | `recommended_concurrency` |
 | `DS2API_MAX_INFLIGHT` | Alias (legacy compat) | — |
+| `DS2API_ENV_WRITEBACK` | When `DS2API_CONFIG_JSON` is present, auto-write to `DS2API_CONFIG_PATH` and switch to file-backed mode after success (`1/true/yes/on`) | Disabled |
 | `DS2API_VERCEL_INTERNAL_SECRET` | Hybrid streaming internal auth | Falls back to `DS2API_ADMIN_KEY` |
 | `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | Stream lease TTL | `900` |
 | `VERCEL_TOKEN` | Vercel sync token | — |
@@ -366,7 +368,7 @@ Each archive includes:

 - `ds2api` executable (`ds2api.exe` on Windows)
 - `static/admin/` (built WebUI assets)
- `sha3_wasm_bg.7b9ca65ddd.wasm`
+- `sha3_wasm_bg.7b9ca65ddd.wasm` (optional; binary has embedded fallback)
 - `config.example.json`, `.env.example`
 - `README.MD`, `README.en.md`, `LICENSE`

@@ -399,7 +401,7 @@ cp config.example.json config.json
 docker pull ghcr.io/cjackhwang/ds2api:latest

 # specific version (example)
-docker pull ghcr.io/cjackhwang/ds2api:v2.1.2
+docker pull ghcr.io/cjackhwang/ds2api:v3.0.0
 ```

 ---
@@ -455,7 +457,9 @@ server {
 ```bash
 # Copy compiled binary and related files to target directory
 sudo mkdir -p /opt/ds2api
-sudo cp ds2api config.json sha3_wasm_bg.7b9ca65ddd.wasm /opt/ds2api/
+sudo cp ds2api config.json /opt/ds2api/
+# Optional: if you want to use an external WASM file (override the embedded one, from a release package or build output)
+# sudo cp /path/to/sha3_wasm_bg.7b9ca65ddd.wasm /opt/ds2api/
 sudo cp -r static/admin /opt/ds2api/static/admin
 ```

--- a/docs/DEPLOY.md
+++ b/docs/DEPLOY.md
@@ -24,7 +24,7 @@

 | 依赖 | 最低版本 | 说明 |
 | --- | --- | --- |
-| Go | 1.24+ | 编译后端 |
+| Go | 1.26+ | 编译后端 |
 | Node.js | 20+ | 仅在需要本地构建 WebUI 时 |
 | npm | 随 Node.js 提供 | 安装 WebUI 依赖 |

@@ -32,6 +32,7 @@

 - **文件方式**：`config.json`（推荐本地/Docker 使用）
 - **环境变量方式**：`DS2API_CONFIG_JSON`（推荐 Vercel 使用，支持 JSON 字符串或 Base64 编码）
+- 兼容写法：`CONFIG_JSON` 是旧版回退变量；`DS2API_CONFIG_JSON` 也可以直接写原始 JSON

 统一建议（最优实践）：

@@ -69,7 +70,7 @@ go run ./cmd/ds2api

 ### 1.2 WebUI 构建

-本地首次启动时，若 `static/admin/` 不存在，服务会自动尝试构建 WebUI（需要 Node.js/npm）。
+本地首次启动时，若 `static/admin/` 不存在，服务会自动尝试构建 WebUI（需要 Node.js/npm；缺依赖时会先执行 `npm ci`，再执行 `npm run build -- --outDir static/admin --emptyOutDir`）。

 你也可以手动构建：

@@ -110,15 +111,12 @@ go build -o ds2api ./cmd/ds2api
 ### 2.1 基本步骤

 ```bash
-# 复制环境变量模板
+# 复制环境变量模板和配置文件
 cp .env.example .env
+cp config.example.json config.json

-# 从 config.json 生成单行 Base64
-DS2API_CONFIG_JSON="$(base64 < config.json | tr -d '\n')"
-
-# 编辑 .env（请改成你的强密码），设置：
+# 编辑 .env（请改成你的强密码），至少设置：
 #   DS2API_ADMIN_KEY=your-admin-key
-#   DS2API_CONFIG_JSON=${DS2API_CONFIG_JSON}

 # 启动
 docker-compose up -d
@@ -127,6 +125,8 @@ docker-compose up -d
 docker-compose logs -f
 ```

+默认 `docker-compose.yml` 会把宿主机 `6011` 映射到容器内的 `5001`。如果你希望直接对外暴露 `5001`，请调整 `ports` 配置。
+
 ### 2.2 更新

 ```bash
@@ -185,6 +185,7 @@ healthcheck:

 - **端口**：服务默认监听 `5001`，模板会固定设置 `PORT=5001`。
 - **配置持久化**：模板挂载卷 `/data`，并设置 `DS2API_CONFIG_PATH=/data/config.json`；在管理台导入配置后，会写入并持久化到该路径。
+- **构建版本号**：Zeabur / 普通 `docker build` 默认不需要传 `BUILD_VERSION`；镜像会优先使用该构建参数，未提供时自动回退到仓库根目录的 `VERSION` 文件。
 - **首次登录**：部署完成后访问 `/admin`，使用 Zeabur 环境变量/模板指引中的 `DS2API_ADMIN_KEY` 登录（建议首次登录后自行更换为强密码）。

 ---
@@ -248,6 +249,7 @@ VERCEL_TEAM_ID=team_xxxxxxxxxxxx   # 个人账号可留空
 | `DS2API_ACCOUNT_QUEUE_SIZE` | 同上（兼容别名） | — |
 | `DS2API_GLOBAL_MAX_INFLIGHT` | 全局并发上限 | `recommended_concurrency` |
 | `DS2API_MAX_INFLIGHT` | 同上（兼容别名） | — |
+| `DS2API_ENV_WRITEBACK` | 检测到 `DS2API_CONFIG_JSON` 时自动写入 `DS2API_CONFIG_PATH`，并在成功后转为文件模式（`1/true/yes/on`） | 关闭 |
 | `DS2API_VERCEL_INTERNAL_SECRET` | 混合流式内部鉴权 | 回退用 `DS2API_ADMIN_KEY` |
 | `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | 流式 lease TTL | `900` |
 | `VERCEL_TOKEN` | Vercel 同步 token | — |
@@ -366,7 +368,7 @@ No Output Directory named "public" found after the Build completed.

 - `ds2api` 可执行文件（Windows 为 `ds2api.exe`）
 - `static/admin/`（WebUI 构建产物）
- `sha3_wasm_bg.7b9ca65ddd.wasm`
+- `sha3_wasm_bg.7b9ca65ddd.wasm`（可选；程序内置 embed fallback）
 - `config.example.json`、`.env.example`
 - `README.MD`、`README.en.md`、`LICENSE`

@@ -399,7 +401,7 @@ cp config.example.json config.json
 docker pull ghcr.io/cjackhwang/ds2api:latest

 # 指定版本（示例）
-docker pull ghcr.io/cjackhwang/ds2api:v2.1.2
+docker pull ghcr.io/cjackhwang/ds2api:v3.0.0
 ```

 ---
@@ -455,7 +457,9 @@ server {
 ```bash
 # 将编译好的二进制文件和相关文件复制到目标目录
 sudo mkdir -p /opt/ds2api
-sudo cp ds2api config.json sha3_wasm_bg.7b9ca65ddd.wasm /opt/ds2api/
+sudo cp ds2api config.json /opt/ds2api/
+# 可选：若你希望使用外置 WASM 文件（覆盖内置版本，来自 release 包或构建产物）
+# sudo cp /path/to/sha3_wasm_bg.7b9ca65ddd.wasm /opt/ds2api/
 sudo cp -r static/admin /opt/ds2api/static/admin
 ```

--- a/docs/DeepSeekSSE流格式字段分析-2026-04-03.md
+++ b/docs/DeepSeekSSE流格式字段分析-2026-04-03.md
@@ -0,0 +1,82 @@
+# DeepSeek SSE 流格式字段分析（2026-04-03）
+
+> 日期：2026-04-03（UTC）
+> 
+> 样本：`tests/raw_stream_samples/guangzhou-weather-reasoner-search-20260403/upstream.stream.sse`
+> 
+> 模型：`deepseek-reasoner-search`（搜索 + 思考）
+
+## 1. SSE 事件层结构
+
+原始流由标准 SSE 帧组成，常见形态：
+
+```text
+event: <type>
+data: <json or text>
+
+```
+
+样本中主要 `event` 类型：
+
+- `ready`：流建立后返回请求/响应消息 ID。
+- `update_session`：会话时间戳更新。
+- `finish`：流式阶段结束。
+- （无 `event` 时）默认为 message 事件，`data:` 中承载主要增量数据。
+
+## 2. `data` JSON 常见字段
+
+上游增量主体多为 JSON Patch 风格对象：
+
+- `p`（path）：字段路径，如 `response/fragments/-1/content`。
+- `o`（op，可选）：操作类型，常见 `SET` / `APPEND` / `BATCH`。
+- `v`（value）：值（字符串、布尔、对象、数组都可能）。
+
+示例（语义）：
+
+- `{"p":"response/fragments/-1/content","o":"APPEND","v":"..."}`
+- `{"p":"response/fragments/-16/status","v":"FINISHED"}`
+- `{"p":"response/status","o":"SET","v":"FINISHED"}`
+
+## 3. 搜索+思考场景关键路径
+
+### 3.1 文本内容
+
+- `response/fragments/<idx>/content`
+- `response/content`
+- `response/thinking_content`
+- `response/fragments`（`APPEND` + fragment 数组）
+
+### 3.2 搜索相关
+
+- `response/fragments/<idx>/results`（检索结果数组）
+- `response/search_status`（检索状态，建议跳过展示）
+
+### 3.3 状态相关（重点）
+
+- `response/status = FINISHED`：**最终结束信号**（需要保留用于结束判定）
+- `response/fragments/<idx>/status = FINISHED`：**分片级状态**（高频，建议跳过输出）
+- `response/quasi_status`：过程状态（建议跳过输出）
+
+## 4. 泄露问题根因（FINISHED 重复）
+
+在搜索 + 思考模型中，`response/fragments/<idx>/status` 会出现大量不同 `<idx>`（例如 `-1/-2/-3/-16...`）的 `FINISHED`。
+
+若只过滤固定少量索引（例如仅 `-1/-2/-3`），其他索引的状态会当普通文本透传，导致前端出现：
+
+- `FINISHEDFINISHEDFINISHED...`
+
+## 5. 适配建议（已落地）
+
+1. 跳过所有 `response/fragments/-?\d+/status`。
+2. 继续保留 `response/status=FINISHED` 作为真正结束判定。
+3. 通过独立仿真工具持续回放全部样本，作为回归门禁：
+
+```bash
+./tests/scripts/run-raw-stream-sim.sh
+```
+
+## 6. 后续扩展建议
+
+- 增加不同模型（`deepseek-chat-search` / 非 search / 非 thinking）样本。
+- 增加异常样本（限流、中断、content_filter、空结果）。
+- 为仿真报告加入字段覆盖率统计（路径频次、事件频次、终止路径命中率）。
--- a/docs/TESTING.md
+++ b/docs/TESTING.md
@@ -1,6 +1,6 @@
 # DS2API 测试指南

-语言 / Language: [中文 + English](TESTING.md)
+语言 / Language: 中文 + English（同页）

 ## 概述 | Overview

@@ -14,6 +14,7 @@ DS2API 提供两个层级的测试：
 | 端到端测试 | `./tests/scripts/run-live.sh` | 使用真实账号执行全链路测试 |

 端到端测试集会录制完整的请求/响应日志，用于故障排查。
+Node 单元测试脚本会先做 `node --check` 语法门禁，再以 `--test-concurrency=1` 串行执行测试文件，减少模块级共享状态带来的干扰。

 ---

@@ -51,7 +52,7 @@ DS2API 提供两个层级的测试：
 1. **Preflight 检查**：
   - `go test ./... -count=1`（单元测试）
   - `./tests/scripts/check-node-split-syntax.sh`（Node 拆分模块语法门禁）
-   - `node --test`（如仓库存在 Node 单测文件时执行；当前默认以 Go 测试 + Node 语法门禁为主）
+   - `node --test tests/node/stream-tool-sieve.test.js tests/node/chat-stream.test.js tests/node/js_compat_test.js`
   - `npm run build --prefix webui`（WebUI 构建检查）

 2. **隔离启动**：复制 `config.json` 到临时目录，启动独立服务进程
@@ -66,6 +67,8 @@ DS2API 提供两个层级的测试：

 4. **结果收集**：继续执行所有用例（不中断），写入最终汇总

+如果你只想跳过这些 preflight 检查，可以直接运行 `go run ./cmd/ds2api-tests --no-preflight`。
+
 ---

 ## CLI 参数 | CLI Flags
@@ -173,12 +176,67 @@ rg "<trace_id>" artifacts/testsuite/<run_id>/server.log
 go test ./...
 ```

+### 运行特定模块的单元测试
+
+```bash
+# 运行 tool calls 相关测试（推荐用于调试 tool call 解析问题）
+go test -v -run 'TestParseToolCalls|TestRepair' ./internal/util/
+
+# 运行单个测试用例
+go test -v -run TestParseToolCallsWithDeepSeekHallucination ./internal/util/
+
+# 运行 format 相关测试
+go test -v ./internal/format/...
+
+# 运行 adapter 相关测试
+go test -v ./internal/adapter/openai/...
+```
+
+### 调试 Tool Call 问题 | Debugging Tool Call Issues
+
+当遇到 DeepSeek 工具调用解析问题时，可以使用以下方法：
+
+```bash
+# 1. 运行 tool calls 相关的所有测试
+go test -v -run 'TestParseToolCalls|TestRepair' ./internal/util/
+
+# 2. 查看测试输出中的详细调试信息
+go test -v -run TestParseToolCallsWithDeepSeekHallucination ./internal/util/ 2>&1
+
+# 3. 检查具体测试用例的修复效果
+# 测试用例位于 internal/util/toolcalls_test.go，包含：
+# - TestParseToolCallsWithDeepSeekHallucination: DeepSeek 典型幻觉输出
+# - TestRepairLooseJSONWithNestedObjects: 嵌套对象的方括号修复
+# - TestParseToolCallsWithMixedWindowsPaths: Windows 路径处理
+```
+
+### 运行 Node.js 测试
+
+```bash
+# 运行 Node 测试
+node --test tests/node/stream-tool-sieve.test.js
+
+# 或使用脚本
+./tests/scripts/run-unit-node.sh
+```
+
 ### 跑端到端测试（跳过 preflight）

 ```bash
 go run ./cmd/ds2api-tests --no-preflight
 ```

+### 运行原始流仿真（独立工具）
+
+```bash
+./tests/scripts/run-raw-stream-sim.sh
+```
+
+说明：
+- 该工具会重放 `tests/raw_stream_samples` 下全部样本，按上游 SSE 顺序做 1:1 仿真解析。
+- 默认校验不出现 `FINISHED` 文本泄露，并要求存在结束信号。
+- 结果会写入 `artifacts/raw-stream-sim/*.json`，可供其他测试脚本或排障流程复用。
+
 ### 指定输出目录和超时

 ```bash
--- a/docs/toolcall-semantics.md
+++ b/docs/toolcall-semantics.md
@@ -0,0 +1,72 @@
+# Tool call parsing semantics（Go/Node 统一语义）
+
+本文档描述当前代码中 `ParseToolCallsDetailed` / `parseToolCallsDetailed` 的**实际行为**，用于对齐 Go 与 Node Runtime。
+
+## 1) 输出结构（当前实现）
+
+- `calls`：解析得到的工具调用列表（`name` + `input`）。
+- `sawToolCallSyntax`：检测到工具调用语法特征时为 `true`（例如 `tool_calls`、`<tool_call>`、`<function_call>`、`<invoke>`、`function.name:`）。
+- `rejectedByPolicy`：当前实现固定为 `false`（预留字段，尚未启用 allow-list 拒绝）。
+- `rejectedToolNames`：当前实现固定为空数组（预留字段）。
+
+> 说明：`filterToolCallsDetailed` 当前仅做结构清洗，不做工具名策略拒绝。
+
+## 2) 解析管线
+
+1. **示例保护**：若判定为 fenced code block 示例上下文，则跳过执行型解析。
+2. **候选片段构建**：从完整文本中构建候选（原文、围绕 `tool_calls` 的 JSON 片段、首尾大括号切片等）。
+3. **按序尝试解析（命中即停）**：
+   - 对“明显 JSON 工具载荷候选”（以 `{`/`[` 开头且包含 `tool_calls`/`\"function\"`）先走 JSON 解析，避免 JSON 字符串内偶发 XML 片段误命中；
+   - 其余候选优先 XML 解析（`<tool_call>` / `<function_call>` / `<invoke>` / `tool_use` / `antml:function_call` 等）；
+   - JSON 解析（`{"tool_calls": [...]}`、列表、单对象）；
+   - Markup 解析；
+   - Text-KV 回退（如 `function.name:` + `function.arguments:`）。
+4. **兜底**：候选全部失败后，再对全文做 XML / Text-KV 回退。
+
+## 3) XML 能力边界（当前）
+
+当前已支持输入端的“多 XML/标记风格”解析，包括但不限于：
+
+- `<tool_call><tool_name>...</tool_name><parameters>...</parameters></tool_call>`
+- `<function_call>tool</function_call><function parameter name="x">...</function parameter>`
+- `<invoke name="tool"><parameter name="x">...</parameter></invoke>`
+- `antml:function_call` / `antml:argument` / `antml:parameters`
+- `tool_use` 家族标签
+
+但**输出端仍统一转换为 OpenAI 兼容 JSON 事件/对象**（`message.tool_calls`、`delta.tool_calls`、`response.function_call_arguments.*`）。
+
+## 4) 关于“是否可以封装成 XML 再喂给模型”
+
+结论：**可以做，而且当前解析器已经能兼容 XML 作为输入格式之一**，但代码里并没有 `toolcall.prefer_xml_output` 这个开关。现有可调配置只有：
+
+- `toolcall.mode`：`feature_match` / `off`
+- `toolcall.early_emit_confidence`：`high` / `low` / `off`
+
+推荐思路仍然是“输入兼容层 + 输出按客户端协议渲染”：
+
+1. **Prompt 约束层**：如果你要尝试 XML-first，可以在系统提示词里约束模型输出规范 XML tool block（例如 `<tool_calls><tool_call>...</tool_call></tool_calls>`）。
+2. **解析兼容层**：继续在 parser 中同时接受 JSON / XML / ANTML / invoke / text-kv。
+3. **协议归一层**：无论模型输出什么格式，统一落到内部 `ParsedToolCall`。
+4. **对外渲染层**：根据客户端请求协议渲染（OpenAI / Claude / Gemini 各自格式）。
+
+这样可以同时获得：
+
+- 减少模型端 JSON 转义/引号错误；
+- 不破坏现有 SDK / 客户端生态；
+- 逐步灰度（按模型、按租户、按请求开关）。
+
+## 5) 落地建议（低风险迭代）
+
+- 继续使用现有的 `toolcall.mode=feature_match` 和 `toolcall.early_emit_confidence=high` 作为默认策略。
+- 如果要试 XML-first，把它放在 prompt 层或上游模板层，不要假设代码里已有专门的 XML 输出开关。
+- 增加观测指标：
+  - `toolcall_parse_source`（json/xml/markup/textkv）；
+  - `toolcall_parse_success_rate`；
+  - `toolcall_malformed_rate`；
+  - `toolcall_repair_rate`。
+- 先在 `responses` 链路灰度，再扩展 `chat.completions`。
+
+## 6) 兼容性提醒
+
+- 上游模型若输出混合文本 + XML，仍可能出现“半结构化”噪声，需要依赖现有 sieve 增量消费策略。
+- XML 不等于安全：仍需做 tool 名、参数 schema、执行权限的服务端校验。
--- a/go.mod
+++ b/go.mod
@@ -1,17 +1,25 @@
 module ds2api

-go 1.24
+go 1.26.0

 require (
 	github.com/andybalholm/brotli v1.0.6
 	github.com/go-chi/chi/v5 v5.2.3
 	github.com/google/uuid v1.6.0
-	github.com/refraction-networking/utls v1.8.1
+	github.com/refraction-networking/utls v1.8.2
 	github.com/tetratelabs/wazero v1.9.0
 )

 require (
 	github.com/klauspost/compress v1.17.4 // indirect
-	golang.org/x/crypto v0.36.0 // indirect
-	golang.org/x/sys v0.31.0 // indirect
+	github.com/router-for-me/CLIProxyAPI/v6 v6.9.8 // indirect
+	github.com/sirupsen/logrus v1.9.3 // indirect
+	github.com/tidwall/gjson v1.18.0 // indirect
+	github.com/tidwall/match v1.1.1 // indirect
+	github.com/tidwall/pretty v1.2.0 // indirect
+	github.com/tidwall/sjson v1.2.5 // indirect
+	golang.org/x/crypto v0.45.0 // indirect
+	golang.org/x/net v0.47.0 // indirect
+	golang.org/x/sys v0.38.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -1,16 +1,47 @@
 github.com/andybalholm/brotli v1.0.6 h1:Yf9fFpf49Zrxb9NlQaluyE92/+X7UVHlhMNJN2sxfOI=
 github.com/andybalholm/brotli v1.0.6/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/go-chi/chi/v5 v5.2.3 h1:WQIt9uxdsAbgIYgid+BpYc+liqQZGMHRaUwp0JUcvdE=
 github.com/go-chi/chi/v5 v5.2.3/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
 github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/refraction-networking/utls v1.8.1 h1:yNY1kapmQU8JeM1sSw2H2asfTIwWxIkrMJI0pRUOCAo=
 github.com/refraction-networking/utls v1.8.1/go.mod h1:jkSOEkLqn+S/jtpEHPOsVv/4V4EVnelwbMQl4vCWXAM=
+github.com/refraction-networking/utls v1.8.2 h1:j4Q1gJj0xngdeH+Ox/qND11aEfhpgoEvV+S9iJ2IdQo=
+github.com/refraction-networking/utls v1.8.2/go.mod h1:jkSOEkLqn+S/jtpEHPOsVv/4V4EVnelwbMQl4vCWXAM=
+github.com/router-for-me/CLIProxyAPI/v6 v6.9.8 h1:O65R38THenp8E1IK0paQlOfop3Y6UYlfqSdLlepidSY=
+github.com/router-for-me/CLIProxyAPI/v6 v6.9.8/go.mod h1:P1jsIPFXorYGuS2N/3BlZYkpRKi/z7+oR3+1tdG0u4k=
+github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
+github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/tetratelabs/wazero v1.9.0 h1:IcZ56OuxrtaEz8UYNRHBrUa9bYeX9oVY93KspZZBf/I=
 github.com/tetratelabs/wazero v1.9.0/go.mod h1:TSbcXCfFP0L2FGkRPxHphadXPjo1T6W+CseNNY7EkjM=
+github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
+github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
+github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
+github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
+github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
+github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
 golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
 golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
+golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
+golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
+golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
+golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
+golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
 golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
+golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/internal/account/pool_acquire.go
+++ b/internal/account/pool_acquire.go
@@ -60,16 +60,10 @@ func (p *Pool) acquireLocked(target string, exclude map[string]bool) (config.Acc
 		return acc, true
 	}

-	if acc, ok := p.tryAcquire(exclude, true); ok {
-		return acc, true
-	}
-	if acc, ok := p.tryAcquire(exclude, false); ok {
-		return acc, true
-	}
-	return config.Account{}, false
+	return p.tryAcquire(exclude)
 }

-func (p *Pool) tryAcquire(exclude map[string]bool, requireToken bool) (config.Account, bool) {
+func (p *Pool) tryAcquire(exclude map[string]bool) (config.Account, bool) {
 	for i := 0; i < len(p.queue); i++ {
 		id := p.queue[i]
 		if exclude[id] || !p.canAcquireIDLocked(id) {
@@ -79,9 +73,6 @@ func (p *Pool) tryAcquire(exclude map[string]bool, requireToken bool) (config.Ac
 		if !ok {
 			continue
 		}
-		if requireToken && acc.Token == "" {
-			continue
-		}
 		p.inUse[id]++
 		p.bumpQueue(id)
 		return acc, true
--- a/internal/account/pool_test.go
+++ b/internal/account/pool_test.go
@@ -194,7 +194,7 @@ func TestPoolAccountConcurrencyAliasEnv(t *testing.T) {
 	}
 }

-func TestPoolSupportsTokenOnlyAccount(t *testing.T) {
+func TestPoolDropsLegacyTokenOnlyAccountOnLoad(t *testing.T) {
 	t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "1")
 	t.Setenv("DS2API_CONFIG_JSON", `{
 		"keys":["k1"],
@@ -203,19 +203,42 @@ func TestPoolSupportsTokenOnlyAccount(t *testing.T) {

 	pool := NewPool(config.LoadStore())
 	status := pool.Status()
-	if got, ok := status["total"].(int); !ok || got != 1 {
+	if got, ok := status["total"].(int); !ok || got != 0 {
 		t.Fatalf("unexpected total in pool status: %#v", status["total"])
 	}
-	if got, ok := status["available"].(int); !ok || got != 1 {
+	if got, ok := status["available"].(int); !ok || got != 0 {
 		t.Fatalf("unexpected available in pool status: %#v", status["available"])
 	}

-	acc, ok := pool.Acquire("", nil)
-	if !ok {
-		t.Fatalf("expected acquire success for token-only account")
+	if _, ok := pool.Acquire("", nil); ok {
+		t.Fatalf("expected acquire to fail for token-only account")
 	}
-	if acc.Token != "token-only-account" {
-		t.Fatalf("unexpected token on acquired account: %q", acc.Token)
+}
+
+func TestPoolAcquireRotatesIntoTokenlessAccounts(t *testing.T) {
+	t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "1")
+	t.Setenv("DS2API_ACCOUNT_CONCURRENCY", "")
+	t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "")
+	t.Setenv("DS2API_ACCOUNT_QUEUE_SIZE", "")
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["k1"],
+		"accounts":[
+			{"email":"acc1@example.com","token":"token1"},
+			{"email":"acc2@example.com","token":""},
+			{"email":"acc3@example.com","token":""}
+		]
+	}`)
+
+	pool := NewPool(config.LoadStore())
+	for i, want := range []string{"acc1@example.com", "acc2@example.com", "acc3@example.com"} {
+		acc, ok := pool.Acquire("", nil)
+		if !ok {
+			t.Fatalf("expected acquire success at step %d", i+1)
+		}
+		if got := acc.Identifier(); got != want {
+			t.Fatalf("unexpected account at step %d: got %q want %q", i+1, got, want)
+		}
+		pool.Release(acc.Identifier())
 	}
 }

--- a/internal/adapter/claude/deps.go
+++ b/internal/adapter/claude/deps.go
@@ -24,6 +24,10 @@ type ConfigReader interface {
 	ClaudeMapping() map[string]string
 }

+type OpenAIChatRunner interface {
+	ChatCompletions(w http.ResponseWriter, r *http.Request)
+}
+
 var _ AuthResolver = (*auth.Resolver)(nil)
 var _ DeepSeekCaller = (*deepseek.Client)(nil)
 var _ ConfigReader = (*config.Store)(nil)
--- a/internal/adapter/claude/handler_helpers_misc.go
+++ b/internal/adapter/claude/handler_helpers_misc.go
@@ -0,0 +1,97 @@
+package claude
+
+import (
+	"fmt"
+	"strings"
+)
+
+func hasSystemMessage(messages []any) bool {
+	for _, m := range messages {
+		msg, ok := m.(map[string]any)
+		if ok && msg["role"] == "system" {
+			return true
+		}
+	}
+	return false
+}
+
+func extractClaudeToolNames(tools []any) []string {
+	out := make([]string, 0, len(tools))
+	for _, t := range tools {
+		m, ok := t.(map[string]any)
+		if !ok {
+			continue
+		}
+		name, _, _ := extractClaudeToolMeta(m)
+		if name != "" {
+			out = append(out, name)
+		}
+	}
+	return out
+}
+
+func extractClaudeToolMeta(m map[string]any) (string, string, any) {
+	name, _ := m["name"].(string)
+	desc, _ := m["description"].(string)
+	schemaObj := m["input_schema"]
+	if schemaObj == nil {
+		schemaObj = m["parameters"]
+	}
+
+	if fn, ok := m["function"].(map[string]any); ok {
+		if strings.TrimSpace(name) == "" {
+			name, _ = fn["name"].(string)
+		}
+		if strings.TrimSpace(desc) == "" {
+			desc, _ = fn["description"].(string)
+		}
+		if schemaObj == nil {
+			if v, ok := fn["input_schema"]; ok {
+				schemaObj = v
+			}
+		}
+		if schemaObj == nil {
+			if v, ok := fn["parameters"]; ok {
+				schemaObj = v
+			}
+		}
+	}
+	return strings.TrimSpace(name), strings.TrimSpace(desc), schemaObj
+}
+
+func toMessageMaps(v any) []map[string]any {
+	arr, ok := v.([]any)
+	if !ok {
+		return nil
+	}
+	out := make([]map[string]any, 0, len(arr))
+	for _, item := range arr {
+		if m, ok := item.(map[string]any); ok {
+			out = append(out, m)
+		}
+	}
+	return out
+}
+
+func extractMessageContent(v any) string {
+	switch x := v.(type) {
+	case string:
+		return x
+	case []any:
+		parts := make([]string, 0, len(x))
+		for _, it := range x {
+			parts = append(parts, fmt.Sprintf("%v", it))
+		}
+		return strings.Join(parts, "\n")
+	default:
+		return fmt.Sprintf("%v", x)
+	}
+}
+
+func cloneMap(in map[string]any) map[string]any {
+	out := make(map[string]any, len(in))
+	for k, v := range in {
+		out[k] = v
+	}
+	return out
+}
--- a/internal/adapter/claude/handler_messages.go
+++ b/internal/adapter/claude/handler_messages.go
@@ -1,85 +1,126 @@
 package claude

 import (
+	"bytes"
 	"encoding/json"
-	"fmt"
 	"io"
 	"net/http"
+	"net/http/httptest"
 	"strings"
-	"time"

-	"ds2api/internal/auth"
 	"ds2api/internal/config"
-	claudefmt "ds2api/internal/format/claude"
-	"ds2api/internal/sse"
 	streamengine "ds2api/internal/stream"
+	"ds2api/internal/translatorcliproxy"
+	"ds2api/internal/util"
+
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 )

 func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) {
 	if strings.TrimSpace(r.Header.Get("anthropic-version")) == "" {
 		r.Header.Set("anthropic-version", "2023-06-01")
 	}
-	a, err := h.Auth.Determine(r)
-	if err != nil {
-		status := http.StatusUnauthorized
-		detail := err.Error()
-		if err == auth.ErrNoAccount {
-			status = http.StatusTooManyRequests
-		}
-		writeClaudeError(w, status, detail)
+	if h.OpenAI == nil {
+		writeClaudeError(w, http.StatusInternalServerError, "OpenAI proxy backend unavailable.")
 		return
 	}
-	defer h.Auth.Release(a)
+	if h.proxyViaOpenAI(w, r, h.Store) {
+		return
+	}
+	writeClaudeError(w, http.StatusBadGateway, "Failed to proxy Claude request.")
+}

+func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, store ConfigReader) bool {
+	raw, err := io.ReadAll(r.Body)
+	if err != nil {
+		writeClaudeError(w, http.StatusBadRequest, "invalid body")
+		return true
+	}
 	var req map[string]any
-	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+	if err := json.Unmarshal(raw, &req); err != nil {
 		writeClaudeError(w, http.StatusBadRequest, "invalid json")
-		return
+		return true
 	}
-	norm, err := normalizeClaudeRequest(h.Store, req)
-	if err != nil {
-		writeClaudeError(w, http.StatusBadRequest, err.Error())
-		return
-	}
-	stdReq := norm.Standard
+	model, _ := req["model"].(string)
+	stream := util.ToBool(req["stream"])

-	sessionID, err := h.DS.CreateSession(r.Context(), a, 3)
-	if err != nil {
-		writeClaudeError(w, http.StatusUnauthorized, "invalid token.")
-		return
+	// Preserve claude_mapping (fast/slow/opus routing) while proxying via OpenAI.
+	translateModel := model
+	if store != nil {
+		if norm, normErr := normalizeClaudeRequest(store, cloneMap(req)); normErr == nil && strings.TrimSpace(norm.Standard.ResolvedModel) != "" {
+			translateModel = strings.TrimSpace(norm.Standard.ResolvedModel)
+		}
 	}
-	pow, err := h.DS.GetPow(r.Context(), a, 3)
-	if err != nil {
-		writeClaudeError(w, http.StatusUnauthorized, "Failed to get PoW")
-		return
-	}
-	requestPayload := stdReq.CompletionPayload(sessionID)
-	resp, err := h.DS.CallCompletion(r.Context(), a, requestPayload, pow, 3)
-	if err != nil {
-		writeClaudeError(w, http.StatusInternalServerError, "Failed to get Claude response.")
-		return
-	}
-	if resp.StatusCode != http.StatusOK {
-		defer resp.Body.Close()
-		body, _ := io.ReadAll(resp.Body)
-		writeClaudeError(w, http.StatusInternalServerError, string(body))
-		return
+	translatedReq := translatorcliproxy.ToOpenAI(sdktranslator.FormatClaude, translateModel, raw, stream)
+
+	isVercelPrepare := strings.TrimSpace(r.URL.Query().Get("__stream_prepare")) == "1"
+	isVercelRelease := strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1"
+
+	if isVercelRelease {
+		proxyReq := r.Clone(r.Context())
+		proxyReq.URL.Path = "/v1/chat/completions"
+		proxyReq.Body = io.NopCloser(bytes.NewReader(raw))
+		proxyReq.ContentLength = int64(len(raw))
+		rec := httptest.NewRecorder()
+		h.OpenAI.ChatCompletions(rec, proxyReq)
+		res := rec.Result()
+		defer res.Body.Close()
+		body, _ := io.ReadAll(res.Body)
+		for k, vv := range res.Header {
+			for _, v := range vv {
+				w.Header().Add(k, v)
+			}
+		}
+		w.WriteHeader(res.StatusCode)
+		_, _ = w.Write(body)
+		return true
 	}

-	if stdReq.Stream {
-		h.handleClaudeStreamRealtime(w, r, resp, stdReq.ResponseModel, norm.NormalizedMessages, stdReq.Thinking, stdReq.Search, stdReq.ToolNames)
-		return
+	proxyReq := r.Clone(r.Context())
+	proxyReq.URL.Path = "/v1/chat/completions"
+	proxyReq.Body = io.NopCloser(bytes.NewReader(translatedReq))
+	proxyReq.ContentLength = int64(len(translatedReq))
+
+	if stream && !isVercelPrepare {
+		w.Header().Set("Content-Type", "text/event-stream")
+		w.Header().Set("Cache-Control", "no-cache, no-transform")
+		w.Header().Set("Connection", "keep-alive")
+		w.Header().Set("X-Accel-Buffering", "no")
+		streamWriter := translatorcliproxy.NewOpenAIStreamTranslatorWriter(w, sdktranslator.FormatClaude, model, raw, translatedReq)
+		h.OpenAI.ChatCompletions(streamWriter, proxyReq)
+		return true
 	}
-	result := sse.CollectStream(resp, stdReq.Thinking, true)
-	respBody := claudefmt.BuildMessageResponse(
-		fmt.Sprintf("msg_%d", time.Now().UnixNano()),
-		stdReq.ResponseModel,
-		norm.NormalizedMessages,
-		result.Thinking,
-		result.Text,
-		stdReq.ToolNames,
-	)
-	writeJSON(w, http.StatusOK, respBody)
+
+	rec := httptest.NewRecorder()
+	h.OpenAI.ChatCompletions(rec, proxyReq)
+	res := rec.Result()
+	defer res.Body.Close()
+	body, _ := io.ReadAll(res.Body)
+	if res.StatusCode < 200 || res.StatusCode >= 300 {
+		for k, vv := range res.Header {
+			for _, v := range vv {
+				w.Header().Add(k, v)
+			}
+		}
+		w.WriteHeader(res.StatusCode)
+		_, _ = w.Write(body)
+		return true
+	}
+	if isVercelPrepare {
+		for k, vv := range res.Header {
+			for _, v := range vv {
+				w.Header().Add(k, v)
+			}
+		}
+		w.WriteHeader(res.StatusCode)
+		_, _ = w.Write(body)
+		return true
+	}
+	converted := translatorcliproxy.FromOpenAINonStream(sdktranslator.FormatClaude, model, raw, translatedReq, body)
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(http.StatusOK)
+	_, _ = w.Write(converted)
+	return true
 }

 func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Request, resp *http.Response, model string, messages []any, thinkingEnabled, searchEnabled bool, toolNames []string) {
--- a/internal/adapter/claude/handler_routes.go
+++ b/internal/adapter/claude/handler_routes.go
@@ -15,9 +15,10 @@ import (
 var writeJSON = util.WriteJSON

 type Handler struct {
-	Store ConfigReader
-	Auth  AuthResolver
-	DS    DeepSeekCaller
+	Store  ConfigReader
+	Auth   AuthResolver
+	DS     DeepSeekCaller
+	OpenAI OpenAIChatRunner
 }

 var (
--- a/internal/adapter/claude/handler_stream_test.go
+++ b/internal/adapter/claude/handler_stream_test.go
@@ -315,3 +315,122 @@ func asString(v any) string {
 	s, _ := v.(string)
 	return s
 }
+
+func TestHandleClaudeStreamRealtimeToolSafetyAcrossStructuredFormats(t *testing.T) {
+	tests := []struct {
+		name    string
+		payload string
+	}{
+		{name: "xml_tool_call", payload: `<tool_call><tool_name>Bash</tool_name><parameters><command>pwd</command></parameters></tool_call>`},
+		{name: "xml_json_tool_call", payload: `<tool_call>{"tool":"Bash","params":{"command":"pwd"}}</tool_call>`},
+		{name: "nested_tool_tag_style", payload: `<tool_call><tool name="Bash"><command>pwd</command></tool></tool_call>`},
+		{name: "function_tag_style", payload: `<function_call>Bash</function_call><function parameter name="command">pwd</function parameter>`},
+		{name: "antml_argument_style", payload: `<antml:function_calls><antml:function_call id="1" name="Bash"><antml:argument name="command">pwd</antml:argument></antml:function_call></antml:function_calls>`},
+		{name: "antml_function_attr_parameters", payload: `<antml:function_calls><antml:function_call id="1" function="Bash"><antml:parameters>{"command":"pwd"}</antml:parameters></antml:function_call></antml:function_calls>`},
+		{name: "invoke_parameter_style", payload: `<function_calls><invoke name="Bash"><parameter name="command">pwd</parameter></invoke></function_calls>`},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			h := &Handler{}
+			resp := makeClaudeSSEHTTPResponse(
+				`data: {"p":"response/content","v":"`+strings.ReplaceAll(tc.payload, `"`, `\"`)+`"}`,
+				`data: [DONE]`,
+			)
+			rec := httptest.NewRecorder()
+			req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
+
+			h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-5", []any{map[string]any{"role": "user", "content": "use tool"}}, false, false, []string{"Bash"})
+
+			frames := parseClaudeFrames(t, rec.Body.String())
+			foundToolUse := false
+			for _, f := range findClaudeFrames(frames, "content_block_start") {
+				contentBlock, _ := f.Payload["content_block"].(map[string]any)
+				if contentBlock["type"] == "tool_use" {
+					foundToolUse = true
+					break
+				}
+			}
+			if !foundToolUse {
+				t.Fatalf("expected tool_use block for format %s, body=%s", tc.name, rec.Body.String())
+			}
+		})
+	}
+}
+
+func TestHandleClaudeStreamRealtimeDetectsToolUseWithLeadingProse(t *testing.T) {
+	h := &Handler{}
+	payload := "I'll call a tool now.\\n<tool_use><tool_name>write_file</tool_name><parameters>{\\\"path\\\":\\\"/tmp/a.txt\\\",\\\"content\\\":\\\"abc\\\"}</parameters></tool_use>"
+	resp := makeClaudeSSEHTTPResponse(
+		`data: {"p":"response/content","v":"`+payload+`"}`,
+		`data: [DONE]`,
+	)
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
+
+	h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-5", []any{map[string]any{"role": "user", "content": "use tool"}}, false, false, []string{"write_file"})
+
+	frames := parseClaudeFrames(t, rec.Body.String())
+	foundToolUse := false
+	for _, f := range findClaudeFrames(frames, "content_block_start") {
+		contentBlock, _ := f.Payload["content_block"].(map[string]any)
+		if contentBlock["type"] == "tool_use" && contentBlock["name"] == "write_file" {
+			foundToolUse = true
+			break
+		}
+	}
+	if !foundToolUse {
+		t.Fatalf("expected tool_use block with leading prose payload, body=%s", rec.Body.String())
+	}
+
+	for _, f := range findClaudeFrames(frames, "message_delta") {
+		delta, _ := f.Payload["delta"].(map[string]any)
+		if delta["stop_reason"] == "tool_use" {
+			return
+		}
+	}
+	t.Fatalf("expected stop_reason=tool_use, body=%s", rec.Body.String())
+}
+
+func TestHandleClaudeStreamRealtimeIgnoresUnclosedFencedToolExample(t *testing.T) {
+	h := &Handler{}
+	resp := makeClaudeSSEHTTPResponse(
+		"data: {\"p\":\"response/content\",\"v\":\"Here is an example:\\n```json\\n{\\\"tool_calls\\\":[{\\\"name\\\":\\\"Bash\\\",\\\"input\\\":{\\\"command\\\":\\\"pwd\\\"}}]}\"}",
+		"data: {\"p\":\"response/content\",\"v\":\"\\n```\\nDo not execute it.\"}",
+		`data: [DONE]`,
+	)
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
+
+	h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-5", []any{map[string]any{"role": "user", "content": "show example only"}}, false, false, []string{"Bash"})
+
+	frames := parseClaudeFrames(t, rec.Body.String())
+	foundToolUse := false
+	for _, f := range findClaudeFrames(frames, "content_block_start") {
+		contentBlock, _ := f.Payload["content_block"].(map[string]any)
+		if contentBlock["type"] == "tool_use" {
+			foundToolUse = true
+			break
+		}
+	}
+	if foundToolUse {
+		t.Fatalf("expected no tool_use for fenced example, body=%s", rec.Body.String())
+	}
+
+	foundToolStop := false
+	for _, f := range findClaudeFrames(frames, "message_delta") {
+		delta, _ := f.Payload["delta"].(map[string]any)
+		if delta["stop_reason"] == "tool_use" {
+			foundToolStop = true
+			break
+		}
+	}
+	if foundToolStop {
+		t.Fatalf("expected stop_reason to remain content-only, body=%s", rec.Body.String())
+	}
+}
+
+// Backward-compatible alias for historical test name used in CI logs.
+func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.T) {
+	TestHandleClaudeStreamRealtimeIgnoresUnclosedFencedToolExample(t)
+}
--- a/internal/adapter/claude/handler_util_test.go
+++ b/internal/adapter/claude/handler_util_test.go
@@ -48,10 +48,88 @@ func TestNormalizeClaudeMessagesToolResult(t *testing.T) {
 		},
 	}
 	got := normalizeClaudeMessages(msgs)
+	if len(got) != 1 {
+		t.Fatalf("expected one normalized message, got %d", len(got))
+	}
 	m := got[0].(map[string]any)
+	if m["role"] != "tool" {
+		t.Fatalf("expected tool role preserved, got %#v", m["role"])
+	}
 	content, _ := m["content"].(string)
-	if !strings.Contains(content, "[TOOL_RESULT_HISTORY]") || !strings.Contains(content, "content: tool output") {
-		t.Fatalf("expected serialized tool result marker, got %q", content)
+	if content != "tool output" {
+		t.Fatalf("expected raw tool output content preserved, got %q", content)
+	}
+}
+
+func TestNormalizeClaudeMessagesToolUseToAssistantToolCalls(t *testing.T) {
+	msgs := []any{
+		map[string]any{
+			"role": "assistant",
+			"content": []any{
+				map[string]any{
+					"type":  "tool_use",
+					"id":    "call_1",
+					"name":  "search_web",
+					"input": map[string]any{"query": "latest"},
+				},
+			},
+		},
+	}
+
+	got := normalizeClaudeMessages(msgs)
+	if len(got) != 1 {
+		t.Fatalf("expected one normalized tool-call message, got %d", len(got))
+	}
+	m := got[0].(map[string]any)
+	if m["role"] != "assistant" {
+		t.Fatalf("expected assistant role, got %#v", m["role"])
+	}
+	tc, _ := m["tool_calls"].([]any)
+	if len(tc) != 1 {
+		t.Fatalf("expected one tool call, got %#v", m["tool_calls"])
+	}
+	call, _ := tc[0].(map[string]any)
+	if call["id"] != "call_1" {
+		t.Fatalf("expected call id preserved, got %#v", call)
+	}
+	content, _ := m["content"].(string)
+	if !containsStr(content, "<tool_calls>") || !containsStr(content, "<tool_name>search_web</tool_name>") {
+		t.Fatalf("expected assistant content to include XML tool call history, got %q", content)
+	}
+	if !containsStr(content, `<parameters>{"query":"latest"}</parameters>`) {
+		t.Fatalf("expected assistant content to include serialized parameters, got %q", content)
+	}
+}
+
+func TestNormalizeClaudeMessagesDoesNotPromoteUserToolUse(t *testing.T) {
+	msgs := []any{
+		map[string]any{
+			"role": "user",
+			"content": []any{
+				map[string]any{
+					"type":  "tool_use",
+					"id":    "call_unsafe",
+					"name":  "dangerous_tool",
+					"input": map[string]any{"value": "x"},
+				},
+			},
+		},
+	}
+
+	got := normalizeClaudeMessages(msgs)
+	if len(got) != 1 {
+		t.Fatalf("expected one normalized message, got %d", len(got))
+	}
+	m := got[0].(map[string]any)
+	if m["role"] != "user" {
+		t.Fatalf("expected user role preserved, got %#v", m["role"])
+	}
+	if _, ok := m["tool_calls"]; ok {
+		t.Fatalf("expected no tool_calls promotion for user message, got %#v", m["tool_calls"])
+	}
+	content, _ := m["content"].(string)
+	if !containsStr(content, `"type":"tool_use"`) || !containsStr(content, "dangerous_tool") {
+		t.Fatalf("expected raw tool_use block preserved in user content, got %q", content)
 	}
 }

@@ -87,15 +165,104 @@ func TestNormalizeClaudeMessagesMixedContentBlocks(t *testing.T) {
 			"role": "user",
 			"content": []any{
 				map[string]any{"type": "text", "text": "Hello"},
-				map[string]any{"type": "image", "source": "data:..."},
+				map[string]any{"type": "image", "source": map[string]any{"type": "base64", "data": strings.Repeat("A", 2048)}},
 				map[string]any{"type": "text", "text": "World"},
 			},
 		},
 	}
 	got := normalizeClaudeMessages(msgs)
 	m := got[0].(map[string]any)
-	if m["content"] != "Hello\nWorld" {
-		t.Fatalf("expected only text parts joined, got %q", m["content"])
+	content, _ := m["content"].(string)
+	if !containsStr(content, "Hello") || !containsStr(content, "World") || !containsStr(content, `"type":"image"`) {
+		t.Fatalf("expected text plus non-text block marker preserved, got %q", content)
+	}
+	if !containsStr(content, omittedBinaryMarker) {
+		t.Fatalf("expected binary payload omitted marker, got %q", content)
+	}
+	if containsStr(content, strings.Repeat("A", 100)) {
+		t.Fatalf("expected raw base64 payload not to be included, got %q", content)
+	}
+}
+
+func TestNormalizeClaudeMessagesToolResultNonTextPayloadStringified(t *testing.T) {
+	msgs := []any{
+		map[string]any{
+			"role": "user",
+			"content": []any{
+				map[string]any{
+					"type":        "tool_result",
+					"tool_use_id": "call_image_1",
+					"name":        "vision_tool",
+					"content": []any{
+						map[string]any{"type": "text", "text": "image analysis"},
+						map[string]any{
+							"type":   "image",
+							"source": map[string]any{"type": "base64", "media_type": "image/png", "data": strings.Repeat("B", 2048)},
+						},
+					},
+				},
+			},
+		},
+	}
+
+	got := normalizeClaudeMessages(msgs)
+	if len(got) != 1 {
+		t.Fatalf("expected one normalized message, got %d", len(got))
+	}
+	m := got[0].(map[string]any)
+	if m["role"] != "tool" {
+		t.Fatalf("expected tool role, got %#v", m["role"])
+	}
+	content, _ := m["content"].(string)
+	if !containsStr(content, `"type":"tool_result"`) || !containsStr(content, `"type":"image"`) {
+		t.Fatalf("expected non-text tool_result payload to be JSON stringified, got %q", content)
+	}
+	if !containsStr(content, omittedBinaryMarker) {
+		t.Fatalf("expected binary data to be sanitized with omitted marker, got %q", content)
+	}
+	if containsStr(content, strings.Repeat("B", 100)) {
+		t.Fatalf("expected raw base64 payload not to be included, got %q", content)
+	}
+}
+
+func TestNormalizeClaudeMessagesBackfillsToolResultCallIDByName(t *testing.T) {
+	msgs := []any{
+		map[string]any{
+			"role": "assistant",
+			"content": []any{
+				map[string]any{
+					"type":  "tool_use",
+					"name":  "search_web",
+					"input": map[string]any{"query": "latest"},
+				},
+			},
+		},
+		map[string]any{
+			"role": "user",
+			"content": []any{
+				map[string]any{
+					"type":    "tool_result",
+					"name":    "search_web",
+					"content": "ok",
+				},
+			},
+		},
+	}
+
+	got := normalizeClaudeMessages(msgs)
+	if len(got) != 2 {
+		t.Fatalf("expected 2 messages, got %#v", got)
+	}
+	assistant, _ := got[0].(map[string]any)
+	tc, _ := assistant["tool_calls"].([]any)
+	call, _ := tc[0].(map[string]any)
+	callID, _ := call["id"].(string)
+	if !strings.HasPrefix(callID, "call_claude_") {
+		t.Fatalf("expected generated call id, got %#v", call)
+	}
+	toolMsg, _ := got[1].(map[string]any)
+	if toolMsg["tool_call_id"] != callID {
+		t.Fatalf("expected tool_result to reuse generated id, got %#v", toolMsg)
 	}
 }

@@ -125,8 +292,11 @@ func TestBuildClaudeToolPromptSingleTool(t *testing.T) {
 	if !containsStr(prompt, "Search the web") {
 		t.Fatalf("expected description in prompt")
 	}
-	if !containsStr(prompt, "tool_calls") {
-		t.Fatalf("expected tool_calls instruction in prompt")
+	if !containsStr(prompt, "<tool_calls>") {
+		t.Fatalf("expected XML tool_calls format in prompt")
+	}
+	if !containsStr(prompt, "TOOL CALL FORMAT") {
+		t.Fatalf("expected tool call format header in prompt")
 	}
 }

@@ -172,12 +342,9 @@ func TestBuildClaudeToolPromptSupportsOpenAIStyleFunctionTool(t *testing.T) {
 func TestBuildClaudeToolPromptSkipsNonMap(t *testing.T) {
 	tools := []any{"not a map"}
 	prompt := buildClaudeToolPrompt(tools)
-	if prompt == "" {
-		t.Fatal("expected non-empty prompt even with invalid tools")
-	}
-	// Should still contain the intro and instruction
-	if !containsStr(prompt, "You are Claude") {
-		t.Fatalf("expected intro in prompt")
+	// No valid tools → empty prompt
+	if prompt != "" {
+		t.Fatalf("expected empty prompt for non-map tools, got: %q", prompt)
 	}
 }

--- a/internal/adapter/claude/handler_utils.go
+++ b/internal/adapter/claude/handler_utils.go
@@ -4,169 +4,222 @@ import (
 	"encoding/json"
 	"fmt"
 	"strings"
+
+	"ds2api/internal/prompt"
+	"ds2api/internal/util"
 )

 func normalizeClaudeMessages(messages []any) []any {
 	out := make([]any, 0, len(messages))
+	state := &claudeToolCallState{
+		nameByID:       map[string]string{},
+		lastIDByName:   map[string]string{},
+		callIDSequence: 0,
+	}
 	for _, m := range messages {
 		msg, ok := m.(map[string]any)
 		if !ok {
 			continue
 		}
-		copied := cloneMap(msg)
+		role := strings.ToLower(strings.TrimSpace(fmt.Sprintf("%v", msg["role"])))
 		switch content := msg["content"].(type) {
 		case []any:
-			parts := make([]string, 0, len(content))
+			textParts := make([]string, 0, len(content))
+			flushText := func() {
+				if len(textParts) == 0 {
+					return
+				}
+				out = append(out, map[string]any{
+					"role":    role,
+					"content": strings.Join(textParts, "\n"),
+				})
+				textParts = textParts[:0]
+			}
 			for _, block := range content {
 				b, ok := block.(map[string]any)
 				if !ok {
 					continue
 				}
-				typeStr, _ := b["type"].(string)
-				if typeStr == "text" {
+				typeStr := strings.ToLower(strings.TrimSpace(fmt.Sprintf("%v", b["type"])))
+				switch typeStr {
+				case "text":
 					if t, ok := b["text"].(string); ok {
-						parts = append(parts, t)
+						textParts = append(textParts, t)
+					}
+				case "tool_use":
+					if role == "assistant" {
+						flushText()
+						if toolMsg := normalizeClaudeToolUseToAssistant(b, state); toolMsg != nil {
+							out = append(out, toolMsg)
+						}
+						continue
+					}
+					if raw := strings.TrimSpace(formatClaudeUnknownBlockForPrompt(b)); raw != "" {
+						textParts = append(textParts, raw)
+					}
+				case "tool_result":
+					flushText()
+					if toolMsg := normalizeClaudeToolResultToToolMessage(b, state); toolMsg != nil {
+						out = append(out, toolMsg)
+					}
+				default:
+					if raw := strings.TrimSpace(formatClaudeUnknownBlockForPrompt(b)); raw != "" {
+						textParts = append(textParts, raw)
 					}
 				}
-				if typeStr == "tool_result" {
-					parts = append(parts, formatClaudeToolResultForPrompt(b))
-				}
 			}
-			copied["content"] = strings.Join(parts, "\n")
+			flushText()
+		default:
+			copied := cloneMap(msg)
+			out = append(out, copied)
 		}
-		out = append(out, copied)
 	}
 	return out
 }

 func buildClaudeToolPrompt(tools []any) string {
-	parts := []string{"You are Claude, a helpful AI assistant. You have access to these tools:"}
+	toolSchemas := make([]string, 0, len(tools))
+	names := make([]string, 0, len(tools))
 	for _, t := range tools {
 		m, ok := t.(map[string]any)
 		if !ok {
 			continue
 		}
 		name, desc, schemaObj := extractClaudeToolMeta(m)
+		if name == "" {
+			continue
+		}
+		names = append(names, name)
 		schema, _ := json.Marshal(schemaObj)
-		parts = append(parts, fmt.Sprintf("Tool: %s\nDescription: %s\nParameters: %s", name, desc, schema))
+		toolSchemas = append(toolSchemas, fmt.Sprintf("Tool: %s\nDescription: %s\nParameters: %s", name, desc, schema))
 	}
-	parts = append(parts,
-		"When you need to use tools, you can call multiple tools in one response. Output ONLY JSON like {\"tool_calls\":[{\"name\":\"tool\",\"input\":{}}]}",
-		"History markers in conversation: [TOOL_CALL_HISTORY]...[/TOOL_CALL_HISTORY] are your previous tool calls; [TOOL_RESULT_HISTORY]...[/TOOL_RESULT_HISTORY] are runtime tool outputs, not user input.",
-		"After a valid [TOOL_RESULT_HISTORY], continue with final answer instead of repeating the same call unless required fields are still missing.",
-	)
-	return strings.Join(parts, "\n\n")
+	if len(toolSchemas) == 0 {
+		return ""
+	}
+	return "You have access to these tools:\n\n" +
+		strings.Join(toolSchemas, "\n\n") + "\n\n" +
+		util.BuildToolCallInstructions(names)
 }

 func formatClaudeToolResultForPrompt(block map[string]any) string {
 	if block == nil {
 		return ""
 	}
-	toolCallID := strings.TrimSpace(fmt.Sprintf("%v", block["tool_use_id"]))
-	if toolCallID == "" {
-		toolCallID = strings.TrimSpace(fmt.Sprintf("%v", block["tool_call_id"]))
+	payload := map[string]any{
+		"type":    "tool_result",
+		"content": block["content"],
 	}
-	if toolCallID == "" {
-		toolCallID = "unknown"
+	if toolCallID := strings.TrimSpace(fmt.Sprintf("%v", block["tool_use_id"])); toolCallID != "" {
+		payload["tool_call_id"] = toolCallID
+	} else if toolCallID := strings.TrimSpace(fmt.Sprintf("%v", block["tool_call_id"])); toolCallID != "" {
+		payload["tool_call_id"] = toolCallID
+	}
+	if name := strings.TrimSpace(fmt.Sprintf("%v", block["name"])); name != "" {
+		payload["name"] = name
+	}
+	b, err := json.Marshal(payload)
+	if err != nil {
+		return strings.TrimSpace(fmt.Sprintf("%v", payload))
+	}
+	return string(b)
+}
+
+func normalizeClaudeToolUseToAssistant(block map[string]any, state *claudeToolCallState) map[string]any {
+	if block == nil {
+		return nil
 	}
 	name := strings.TrimSpace(fmt.Sprintf("%v", block["name"]))
 	if name == "" {
-		name = "unknown"
-	}
-	content := strings.TrimSpace(fmt.Sprintf("%v", block["content"]))
-	if content == "" {
-		content = "null"
-	}
-	return fmt.Sprintf("[TOOL_RESULT_HISTORY]\nstatus: already_returned\norigin: tool_runtime\nnot_user_input: true\ntool_call_id: %s\nname: %s\ncontent: %s\n[/TOOL_RESULT_HISTORY]", toolCallID, name, content)
-}
-
-func hasSystemMessage(messages []any) bool {
-	for _, m := range messages {
-		msg, ok := m.(map[string]any)
-		if ok && msg["role"] == "system" {
-			return true
-		}
-	}
-	return false
-}
-
-func extractClaudeToolNames(tools []any) []string {
-	out := make([]string, 0, len(tools))
-	for _, t := range tools {
-		m, ok := t.(map[string]any)
-		if !ok {
-			continue
-		}
-		name, _, _ := extractClaudeToolMeta(m)
-		if name != "" {
-			out = append(out, name)
-		}
-	}
-	return out
-}
-
-func extractClaudeToolMeta(m map[string]any) (string, string, any) {
-	name, _ := m["name"].(string)
-	desc, _ := m["description"].(string)
-	schemaObj := m["input_schema"]
-	if schemaObj == nil {
-		schemaObj = m["parameters"]
-	}
-
-	if fn, ok := m["function"].(map[string]any); ok {
-		if strings.TrimSpace(name) == "" {
-			name, _ = fn["name"].(string)
-		}
-		if strings.TrimSpace(desc) == "" {
-			desc, _ = fn["description"].(string)
-		}
-		if schemaObj == nil {
-			if v, ok := fn["input_schema"]; ok {
-				schemaObj = v
-			}
-		}
-		if schemaObj == nil {
-			if v, ok := fn["parameters"]; ok {
-				schemaObj = v
-			}
-		}
-	}
-	return strings.TrimSpace(name), strings.TrimSpace(desc), schemaObj
-}
-
-func toMessageMaps(v any) []map[string]any {
-	arr, ok := v.([]any)
-	if !ok {
 		return nil
 	}
-	out := make([]map[string]any, 0, len(arr))
-	for _, item := range arr {
-		if m, ok := item.(map[string]any); ok {
-			out = append(out, m)
+	callID := safeStringValue(block["id"])
+	if callID == "" {
+		callID = safeStringValue(block["tool_use_id"])
+	}
+	if callID == "" {
+		callID = state.nextID()
+	}
+	state.nameByID[callID] = name
+	state.lastIDByName[strings.ToLower(name)] = callID
+	arguments := block["input"]
+	if arguments == nil {
+		arguments = map[string]any{}
+	}
+	argsJSON, err := json.Marshal(arguments)
+	if err != nil || len(argsJSON) == 0 {
+		argsJSON = []byte("{}")
+	}
+	toolCalls := []any{
+		map[string]any{
+			"id":   callID,
+			"type": "function",
+			"function": map[string]any{
+				"name":      name,
+				"arguments": string(argsJSON),
+			},
+		},
+	}
+	return map[string]any{
+		"role":       "assistant",
+		"content":    prompt.FormatToolCallsForPrompt(toolCalls),
+		"tool_calls": toolCalls,
+	}
+}
+
+func normalizeClaudeToolResultToToolMessage(block map[string]any, state *claudeToolCallState) map[string]any {
+	if block == nil {
+		return nil
+	}
+	name := safeStringValue(block["name"])
+	toolCallID := safeStringValue(block["tool_use_id"])
+	if toolCallID == "" {
+		toolCallID = safeStringValue(block["tool_call_id"])
+	}
+	if toolCallID == "" {
+		if name != "" {
+			toolCallID = strings.TrimSpace(state.lastIDByName[strings.ToLower(name)])
 		}
 	}
+	if toolCallID == "" {
+		toolCallID = state.nextID()
+	}
+	out := map[string]any{
+		"role":         "tool",
+		"tool_call_id": toolCallID,
+		"content":      normalizeClaudeToolResultContent(block["content"]),
+	}
+	if name != "" {
+		out["name"] = name
+		state.nameByID[toolCallID] = name
+		state.lastIDByName[strings.ToLower(name)] = toolCallID
+	} else if inferred := strings.TrimSpace(state.nameByID[toolCallID]); inferred != "" {
+		out["name"] = inferred
+	}
 	return out
 }

-func extractMessageContent(v any) string {
-	switch x := v.(type) {
-	case string:
-		return x
-	case []any:
-		parts := make([]string, 0, len(x))
-		for _, it := range x {
-			parts = append(parts, fmt.Sprintf("%v", it))
-		}
-		return strings.Join(parts, "\n")
-	default:
-		return fmt.Sprintf("%v", x)
+func normalizeClaudeToolResultContent(content any) any {
+	if text, ok := content.(string); ok {
+		return text
 	}
+	payload := map[string]any{
+		"type":    "tool_result",
+		"content": content,
+	}
+	b, err := json.Marshal(sanitizeClaudeBlockForPrompt(payload))
+	if err != nil {
+		return strings.TrimSpace(fmt.Sprintf("%v", content))
+	}
+	return string(b)
 }

-func cloneMap(in map[string]any) map[string]any {
-	out := make(map[string]any, len(in))
-	for k, v := range in {
-		out[k] = v
+func formatClaudeBlockRaw(block map[string]any) string {
+	if block == nil {
+		return ""
 	}
-	return out
+	b, err := json.Marshal(block)
+	if err != nil {
+		return strings.TrimSpace(fmt.Sprintf("%v", block))
+	}
+	return string(b)
 }
--- a/internal/adapter/claude/handler_utils_sanitize.go
+++ b/internal/adapter/claude/handler_utils_sanitize.go
@@ -0,0 +1,105 @@
+package claude
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+)
+
+const (
+	maxClaudeRawPromptChars = 1024
+	omittedBinaryMarker     = "[omitted_binary_payload]"
+)
+
+func formatClaudeUnknownBlockForPrompt(block map[string]any) string {
+	if block == nil {
+		return ""
+	}
+	safe := sanitizeClaudeBlockForPrompt(block)
+	raw := strings.TrimSpace(formatClaudeBlockRaw(safe))
+	if raw == "" {
+		return ""
+	}
+	if len(raw) > maxClaudeRawPromptChars {
+		return raw[:maxClaudeRawPromptChars] + "...(truncated)"
+	}
+	return raw
+}
+
+func sanitizeClaudeBlockForPrompt(block map[string]any) map[string]any {
+	out := cloneMap(block)
+	for k, v := range out {
+		if looksLikeBinaryFieldName(k) {
+			out[k] = omittedBinaryMarker
+			continue
+		}
+		switch inner := v.(type) {
+		case map[string]any:
+			out[k] = sanitizeClaudeBlockForPrompt(inner)
+		case []any:
+			out[k] = sanitizeClaudeArrayForPrompt(inner)
+		case string:
+			out[k] = sanitizeClaudeStringForPrompt(k, inner)
+		}
+	}
+	return out
+}
+
+func sanitizeClaudeArrayForPrompt(items []any) []any {
+	out := make([]any, 0, len(items))
+	for _, item := range items {
+		switch v := item.(type) {
+		case map[string]any:
+			out = append(out, sanitizeClaudeBlockForPrompt(v))
+		case []any:
+			out = append(out, sanitizeClaudeArrayForPrompt(v))
+		default:
+			out = append(out, v)
+		}
+	}
+	return out
+}
+
+func sanitizeClaudeStringForPrompt(key, value string) string {
+	trimmed := strings.TrimSpace(value)
+	if trimmed == "" {
+		return ""
+	}
+	if looksLikeBinaryFieldName(key) || looksLikeBase64Payload(trimmed) {
+		return omittedBinaryMarker
+	}
+	if len(trimmed) > maxClaudeRawPromptChars {
+		return trimmed[:maxClaudeRawPromptChars] + "...(truncated)"
+	}
+	return trimmed
+}
+
+func looksLikeBinaryFieldName(name string) bool {
+	n := strings.ToLower(strings.TrimSpace(name))
+	return n == "data" || n == "bytes" || n == "base64" || n == "inline_data" || n == "inlinedata"
+}
+
+func looksLikeBase64Payload(v string) bool {
+	if len(v) < 512 {
+		return false
+	}
+	compact := strings.TrimRight(v, "=")
+	if compact == "" {
+		return false
+	}
+	for _, ch := range compact {
+		if (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '+' || ch == '/' || ch == '-' || ch == '_' {
+			continue
+		}
+		return false
+	}
+	return true
+}
+
+func marshalCompactJSON(v any) string {
+	b, err := json.Marshal(v)
+	if err != nil {
+		return strings.TrimSpace(fmt.Sprintf("%v", v))
+	}
+	return string(b)
+}
--- a/internal/adapter/claude/proxy_vercel_test.go
+++ b/internal/adapter/claude/proxy_vercel_test.go
@@ -0,0 +1,84 @@
+package claude
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+)
+
+type claudeProxyStoreStub struct {
+	mapping map[string]string
+}
+
+func (s claudeProxyStoreStub) ClaudeMapping() map[string]string {
+	return s.mapping
+}
+
+type openAIProxyStub struct {
+	status int
+	body   string
+}
+
+func (s openAIProxyStub) ChatCompletions(w http.ResponseWriter, _ *http.Request) {
+	if s.status == 0 {
+		s.status = http.StatusOK
+	}
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(s.status)
+	_, _ = w.Write([]byte(s.body))
+}
+
+type openAIProxyCaptureStub struct {
+	seenModel string
+}
+
+func (s *openAIProxyCaptureStub) ChatCompletions(w http.ResponseWriter, r *http.Request) {
+	var req map[string]any
+	_ = json.NewDecoder(r.Body).Decode(&req)
+	if m, ok := req["model"].(string); ok {
+		s.seenModel = m
+	}
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(http.StatusOK)
+	_, _ = w.Write([]byte(`{"id":"ok","choices":[{"message":{"role":"assistant","content":"ok"}}]}`))
+}
+
+func TestClaudeProxyViaOpenAIVercelPreparePassthrough(t *testing.T) {
+	h := &Handler{OpenAI: openAIProxyStub{status: 200, body: `{"lease_id":"lease_123","payload":{"a":1}}`}}
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages?__stream_prepare=1", strings.NewReader(`{"model":"claude-sonnet-4-5","messages":[{"role":"user","content":"hi"}],"stream":true}`))
+	rec := httptest.NewRecorder()
+
+	h.Messages(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
+	}
+	var out map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
+		t.Fatalf("expected json response, got err=%v body=%s", err, rec.Body.String())
+	}
+	if _, ok := out["lease_id"]; !ok {
+		t.Fatalf("expected lease_id in prepare passthrough, got=%v", out)
+	}
+}
+
+func TestClaudeProxyViaOpenAIPreservesClaudeMapping(t *testing.T) {
+	openAI := &openAIProxyCaptureStub{}
+	h := &Handler{
+		Store:  claudeProxyStoreStub{mapping: map[string]string{"fast": "deepseek-chat", "slow": "deepseek-reasoner"}},
+		OpenAI: openAI,
+	}
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", strings.NewReader(`{"model":"claude-3-opus","messages":[{"role":"user","content":"hi"}],"stream":false}`))
+	rec := httptest.NewRecorder()
+
+	h.Messages(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
+	}
+	if got := strings.TrimSpace(openAI.seenModel); got != "deepseek-reasoner" {
+		t.Fatalf("expected mapped proxy model deepseek-reasoner, got %q", got)
+	}
+}
--- a/internal/adapter/claude/standard_request.go
+++ b/internal/adapter/claude/standard_request.go
@@ -38,6 +38,9 @@ func normalizeClaudeRequest(store ConfigReader, req map[string]any) (claudeNorma
 	}
 	finalPrompt := deepseek.MessagesPrepare(toMessageMaps(dsPayload["messages"]))
 	toolNames := extractClaudeToolNames(toolsRequested)
+	if len(toolNames) == 0 && len(toolsRequested) > 0 {
+		toolNames = []string{"__any_tool__"}
+	}

 	return claudeNormalizedRequest{
 		Standard: util.StandardRequest{
--- a/internal/adapter/claude/stream_runtime_core.go
+++ b/internal/adapter/claude/stream_runtime_core.go
@@ -26,6 +26,7 @@ type claudeStreamRuntime struct {
 	messageID string
 	thinking  strings.Builder
 	text      strings.Builder
+	outputTokens int

 	nextBlockIndex     int
 	thinkingBlockOpen  bool
@@ -66,6 +67,9 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
 	if !parsed.Parsed {
 		return streamengine.ParsedDecision{}
 	}
+	if parsed.OutputTokens > 0 {
+		s.outputTokens = parsed.OutputTokens
+	}
 	if parsed.ErrorMessage != "" {
 		s.upstreamErr = parsed.ErrorMessage
 		return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("upstream_error")}
@@ -116,6 +120,9 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse

 		s.text.WriteString(p.Text)
 		if s.bufferToolContent {
+			if hasUnclosedCodeFence(s.text.String()) {
+				continue
+			}
 			continue
 		}
 		s.closeThinkingBlock()
@@ -144,3 +151,7 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse

 	return streamengine.ParsedDecision{ContentSeen: contentSeen}
 }
+
+func hasUnclosedCodeFence(text string) bool {
+	return strings.Count(text, "```")%2 == 1
+}
--- a/internal/adapter/claude/stream_runtime_finalize.go
+++ b/internal/adapter/claude/stream_runtime_finalize.go
@@ -1,6 +1,7 @@
 package claude

 import (
+	"encoding/json"
 	"fmt"
 	"time"

@@ -45,9 +46,9 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
 	finalText := s.text.String()

 	if s.bufferToolContent {
-		detected := util.ParseToolCalls(finalText, s.toolNames)
+		detected := util.ParseStandaloneToolCalls(finalText, s.toolNames)
 		if len(detected) == 0 && finalText == "" && finalThinking != "" {
-			detected = util.ParseToolCalls(finalThinking, s.toolNames)
+			detected = util.ParseStandaloneToolCalls(finalThinking, s.toolNames)
 		}
 		if len(detected) > 0 {
 			stopReason = "tool_use"
@@ -60,9 +61,20 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
 						"type":  "tool_use",
 						"id":    fmt.Sprintf("toolu_%d_%d", time.Now().Unix(), idx),
 						"name":  tc.Name,
-						"input": tc.Input,
+						"input": map[string]any{},
 					},
 				})
+				
+				inputBytes, _ := json.Marshal(tc.Input)
+				s.send("content_block_delta", map[string]any{
+					"type":  "content_block_delta",
+					"index": idx,
+					"delta": map[string]any{
+						"type":         "input_json_delta",
+						"partial_json": string(inputBytes),
+					},
+				})
+
 				s.send("content_block_stop", map[string]any{
 					"type":  "content_block_stop",
 					"index": idx,
@@ -96,6 +108,9 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
 	}

 	outputTokens := util.EstimateTokens(finalThinking) + util.EstimateTokens(finalText)
+	if s.outputTokens > 0 {
+		outputTokens = s.outputTokens
+	}
 	s.send("message_delta", map[string]any{
 		"type": "message_delta",
 		"delta": map[string]any{
--- a/internal/adapter/claude/stream_status_test.go
+++ b/internal/adapter/claude/stream_status_test.go
@@ -1,7 +1,6 @@
 package claude

 import (
-	"context"
 	"net/http"
 	"net/http/httptest"
 	"strings"
@@ -9,48 +8,17 @@ import (

 	"github.com/go-chi/chi/v5"
 	chimw "github.com/go-chi/chi/v5/middleware"
-
-	"ds2api/internal/auth"
 )

-type streamStatusClaudeAuthStub struct{}
+type streamStatusClaudeOpenAIStub struct{}

-func (streamStatusClaudeAuthStub) Determine(_ *http.Request) (*auth.RequestAuth, error) {
-	return &auth.RequestAuth{
-		UseConfigToken: false,
-		DeepSeekToken:  "direct-token",
-		CallerID:       "caller:test",
-		TriedAccounts:  map[string]bool{},
-	}, nil
+func (streamStatusClaudeOpenAIStub) ChatCompletions(w http.ResponseWriter, _ *http.Request) {
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.WriteHeader(http.StatusOK)
+	_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-1\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"hello\"},\"finish_reason\":null}]}\n\n"))
+	_, _ = w.Write([]byte("data: [DONE]\n\n"))
 }

-func (streamStatusClaudeAuthStub) Release(_ *auth.RequestAuth) {}
-
-type streamStatusClaudeDSStub struct{}
-
-func (streamStatusClaudeDSStub) CreateSession(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
-	return "session-id", nil
-}
-
-func (streamStatusClaudeDSStub) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
-	return "pow", nil
-}
-
-func (streamStatusClaudeDSStub) CallCompletion(_ context.Context, _ *auth.RequestAuth, _ map[string]any, _ string, _ int) (*http.Response, error) {
-	body := "data: {\"p\":\"response/content\",\"v\":\"hello\"}\n" + "data: [DONE]\n"
-	return &http.Response{
-		StatusCode: http.StatusOK,
-		Header:     make(http.Header),
-		Body:       ioNopCloser{strings.NewReader(body)},
-	}, nil
-}
-
-type ioNopCloser struct {
-	*strings.Reader
-}
-
-func (ioNopCloser) Close() error { return nil }
-
 type streamStatusClaudeStoreStub struct{}

 func (streamStatusClaudeStoreStub) ClaudeMapping() map[string]string {
@@ -73,9 +41,8 @@ func captureClaudeStatusMiddleware(statuses *[]int) func(http.Handler) http.Hand
 func TestClaudeMessagesStreamStatusCapturedAs200(t *testing.T) {
 	statuses := make([]int, 0, 1)
 	h := &Handler{
-		Store: streamStatusClaudeStoreStub{},
-		Auth:  streamStatusClaudeAuthStub{},
-		DS:    streamStatusClaudeDSStub{},
+		Store:  streamStatusClaudeStoreStub{},
+		OpenAI: streamStatusClaudeOpenAIStub{},
 	}
 	r := chi.NewRouter()
 	r.Use(captureClaudeStatusMiddleware(&statuses))
@@ -83,7 +50,6 @@ func TestClaudeMessagesStreamStatusCapturedAs200(t *testing.T) {

 	reqBody := `{"model":"claude-sonnet-4-5","messages":[{"role":"user","content":"hi"}],"stream":true}`
 	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", strings.NewReader(reqBody))
-	req.Header.Set("Authorization", "Bearer direct-token")
 	req.Header.Set("Content-Type", "application/json")
 	rec := httptest.NewRecorder()
 	r.ServeHTTP(rec, req)
--- a/internal/adapter/claude/tool_call_state.go
+++ b/internal/adapter/claude/tool_call_state.go
@@ -0,0 +1,25 @@
+package claude
+
+import (
+	"fmt"
+	"strings"
+)
+
+type claudeToolCallState struct {
+	nameByID       map[string]string
+	lastIDByName   map[string]string
+	callIDSequence int
+}
+
+func (s *claudeToolCallState) nextID() string {
+	s.callIDSequence++
+	return fmt.Sprintf("call_claude_%d", s.callIDSequence)
+}
+
+func safeStringValue(v any) string {
+	s, ok := v.(string)
+	if !ok {
+		return ""
+	}
+	return strings.TrimSpace(s)
+}
--- a/internal/adapter/gemini/convert_messages.go
+++ b/internal/adapter/gemini/convert_messages.go
@@ -1,9 +1,20 @@
 package gemini

-import "strings"
+import (
+	"fmt"
+	"strings"
+)
+
+const maxGeminiRawPromptChars = 1024

 func geminiMessagesFromRequest(req map[string]any) []any {
 	out := make([]any, 0, 8)
+	toolCallCounter := 0
+	nextToolCallID := func() string {
+		toolCallCounter++
+		return fmt.Sprintf("call_gemini_%d", toolCallCounter)
+	}
+	lastToolCallIDByName := map[string]string{}
 	if sys := normalizeGeminiSystemInstruction(req["systemInstruction"]); strings.TrimSpace(sys) != "" {
 		out = append(out, map[string]any{
 			"role":    "system",
@@ -59,8 +70,11 @@ func geminiMessagesFromRequest(req map[string]any) []any {
 				if name := strings.TrimSpace(asString(fnCall["name"])); name != "" {
 					callID := strings.TrimSpace(asString(fnCall["id"]))
 					if callID == "" {
-						callID = "call_gemini"
+						if callID = strings.TrimSpace(asString(fnCall["call_id"])); callID == "" {
+							callID = nextToolCallID()
+						}
 					}
+					lastToolCallIDByName[strings.ToLower(name)] = callID
 					out = append(out, map[string]any{
 						"role": "assistant",
 						"tool_calls": []any{
@@ -89,7 +103,10 @@ func geminiMessagesFromRequest(req map[string]any) []any {
 					callID = strings.TrimSpace(asString(fnResp["tool_call_id"]))
 				}
 				if callID == "" {
-					callID = "call_gemini"
+					callID = strings.TrimSpace(lastToolCallIDByName[strings.ToLower(name)])
+				}
+				if callID == "" {
+					callID = nextToolCallID()
 				}
 				content := fnResp["response"]
 				if content == nil {
@@ -107,6 +124,11 @@ func geminiMessagesFromRequest(req map[string]any) []any {
 					msg["name"] = name
 				}
 				out = append(out, msg)
+				continue
+			}
+
+			if raw := strings.TrimSpace(formatGeminiUnknownPartForPrompt(part)); raw != "" && raw != "null" {
+				textParts = append(textParts, raw)
 			}
 		}
 		flushText()
@@ -151,3 +173,87 @@ func mapGeminiRole(v any) string {
 		return ""
 	}
 }
+
+func formatGeminiUnknownPartForPrompt(part map[string]any) string {
+	safe := sanitizeGeminiPartForPrompt(part)
+	raw := strings.TrimSpace(stringifyJSON(safe))
+	if raw == "" {
+		return ""
+	}
+	if len(raw) > maxGeminiRawPromptChars {
+		return raw[:maxGeminiRawPromptChars] + "...(truncated)"
+	}
+	return raw
+}
+
+func sanitizeGeminiPartForPrompt(part map[string]any) map[string]any {
+	out := make(map[string]any, len(part))
+	for k, v := range part {
+		if looksLikeGeminiBinaryField(k) {
+			out[k] = "[omitted_binary_payload]"
+			continue
+		}
+		switch x := v.(type) {
+		case map[string]any:
+			out[k] = sanitizeGeminiPartForPrompt(x)
+		case []any:
+			out[k] = sanitizeGeminiArrayForPrompt(x)
+		case string:
+			out[k] = sanitizeGeminiStringForPrompt(k, x)
+		default:
+			out[k] = v
+		}
+	}
+	return out
+}
+
+func sanitizeGeminiArrayForPrompt(items []any) []any {
+	out := make([]any, 0, len(items))
+	for _, item := range items {
+		switch x := item.(type) {
+		case map[string]any:
+			out = append(out, sanitizeGeminiPartForPrompt(x))
+		case []any:
+			out = append(out, sanitizeGeminiArrayForPrompt(x))
+		default:
+			out = append(out, x)
+		}
+	}
+	return out
+}
+
+func sanitizeGeminiStringForPrompt(key, value string) string {
+	trimmed := strings.TrimSpace(value)
+	if trimmed == "" {
+		return ""
+	}
+	if looksLikeGeminiBinaryField(key) || looksLikeGeminiBase64(trimmed) {
+		return "[omitted_binary_payload]"
+	}
+	if len(trimmed) > maxGeminiRawPromptChars {
+		return trimmed[:maxGeminiRawPromptChars] + "...(truncated)"
+	}
+	return trimmed
+}
+
+func looksLikeGeminiBinaryField(name string) bool {
+	n := strings.ToLower(strings.TrimSpace(name))
+	return n == "data" || n == "bytes" || n == "inlinedata" || n == "inline_data" || n == "base64"
+}
+
+func looksLikeGeminiBase64(v string) bool {
+	if len(v) < 512 {
+		return false
+	}
+	compact := strings.TrimRight(v, "=")
+	if compact == "" {
+		return false
+	}
+	for _, ch := range compact {
+		if (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '+' || ch == '/' || ch == '-' || ch == '_' {
+			continue
+		}
+		return false
+	}
+	return true
+}
--- a/internal/adapter/gemini/convert_messages_test.go
+++ b/internal/adapter/gemini/convert_messages_test.go
@@ -0,0 +1,129 @@
+package gemini
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestGeminiMessagesFromRequestPreservesFunctionRoundtrip(t *testing.T) {
+	req := map[string]any{
+		"contents": []any{
+			map[string]any{
+				"role": "model",
+				"parts": []any{
+					map[string]any{
+						"functionCall": map[string]any{
+							"id":   "call_g1",
+							"name": "search_web",
+							"args": map[string]any{"query": "ai"},
+						},
+					},
+				},
+			},
+			map[string]any{
+				"role": "user",
+				"parts": []any{
+					map[string]any{
+						"functionResponse": map[string]any{
+							"id":       "call_g1",
+							"name":     "search_web",
+							"response": "ok",
+						},
+					},
+				},
+			},
+		},
+	}
+
+	got := geminiMessagesFromRequest(req)
+	if len(got) != 2 {
+		t.Fatalf("expected two normalized messages, got %#v", got)
+	}
+	assistant, _ := got[0].(map[string]any)
+	if assistant["role"] != "assistant" {
+		t.Fatalf("expected assistant first, got %#v", assistant)
+	}
+	tc, _ := assistant["tool_calls"].([]any)
+	if len(tc) != 1 {
+		t.Fatalf("expected one tool call, got %#v", assistant["tool_calls"])
+	}
+	toolMsg, _ := got[1].(map[string]any)
+	if toolMsg["role"] != "tool" || toolMsg["tool_call_id"] != "call_g1" {
+		t.Fatalf("expected tool message with call id, got %#v", toolMsg)
+	}
+}
+
+func TestGeminiMessagesFromRequestPreservesUnknownPartAsRawJSONText(t *testing.T) {
+	req := map[string]any{
+		"contents": []any{
+			map[string]any{
+				"role": "user",
+				"parts": []any{
+					map[string]any{"text": "hello"},
+					map[string]any{"inlineData": map[string]any{"mimeType": "image/png", "data": strings.Repeat("A", 2048)}},
+				},
+			},
+		},
+	}
+
+	got := geminiMessagesFromRequest(req)
+	if len(got) != 1 {
+		t.Fatalf("expected one normalized message, got %#v", got)
+	}
+	msg, _ := got[0].(map[string]any)
+	content, _ := msg["content"].(string)
+	if !strings.Contains(content, "hello") || !strings.Contains(content, "inlineData") {
+		t.Fatalf("expected unknown part preserved as raw json text, got %q", content)
+	}
+	if !strings.Contains(content, "[omitted_binary_payload]") {
+		t.Fatalf("expected inlineData payload to be redacted, got %q", content)
+	}
+	if strings.Contains(content, strings.Repeat("A", 100)) {
+		t.Fatalf("expected raw base64 payload not to be embedded, got %q", content)
+	}
+}
+
+func TestGeminiMessagesFromRequestBackfillsFunctionResponseCallIDByName(t *testing.T) {
+	req := map[string]any{
+		"contents": []any{
+			map[string]any{
+				"role": "model",
+				"parts": []any{
+					map[string]any{
+						"functionCall": map[string]any{
+							"name": "search_web",
+							"args": map[string]any{"query": "docs"},
+						},
+					},
+				},
+			},
+			map[string]any{
+				"role": "user",
+				"parts": []any{
+					map[string]any{
+						"functionResponse": map[string]any{
+							"name":     "search_web",
+							"response": map[string]any{"ok": true},
+						},
+					},
+				},
+			},
+		},
+	}
+
+	got := geminiMessagesFromRequest(req)
+	if len(got) != 2 {
+		t.Fatalf("expected two normalized messages, got %#v", got)
+	}
+	assistant, _ := got[0].(map[string]any)
+	tc, _ := assistant["tool_calls"].([]any)
+	call, _ := tc[0].(map[string]any)
+	callID, _ := call["id"].(string)
+	if !strings.HasPrefix(callID, "call_gemini_") {
+		t.Fatalf("expected generated call id prefix, got %#v", call)
+	}
+	toolMsg, _ := got[1].(map[string]any)
+	if toolMsg["tool_call_id"] != callID {
+		t.Fatalf("expected tool response to inherit generated call id, tool=%#v call=%#v", toolMsg, call)
+	}
+}
--- a/internal/adapter/gemini/deps.go
+++ b/internal/adapter/gemini/deps.go
@@ -24,6 +24,10 @@ type ConfigReader interface {
 	ModelAliases() map[string]string
 }

+type OpenAIChatRunner interface {
+	ChatCompletions(w http.ResponseWriter, r *http.Request)
+}
+
 var _ AuthResolver = (*auth.Resolver)(nil)
 var _ DeepSeekCaller = (*deepseek.Client)(nil)
 var _ ConfigReader = (*config.Store)(nil)
--- a/internal/adapter/gemini/handler_generate.go
+++ b/internal/adapter/gemini/handler_generate.go
@@ -1,70 +1,134 @@
 package gemini

 import (
+	"bytes"
 	"encoding/json"
 	"io"
 	"net/http"
+	"net/http/httptest"
 	"strings"

 	"github.com/go-chi/chi/v5"

-	"ds2api/internal/auth"
 	"ds2api/internal/sse"
+	"ds2api/internal/translatorcliproxy"
 	"ds2api/internal/util"
+
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 )

 func (h *Handler) handleGenerateContent(w http.ResponseWriter, r *http.Request, stream bool) {
-	a, err := h.Auth.Determine(r)
+	if h.OpenAI == nil {
+		writeGeminiError(w, http.StatusInternalServerError, "OpenAI proxy backend unavailable.")
+		return
+	}
+	if h.proxyViaOpenAI(w, r, stream) {
+		return
+	}
+	writeGeminiError(w, http.StatusBadGateway, "Failed to proxy Gemini request.")
+}
+
+func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, stream bool) bool {
+	raw, err := io.ReadAll(r.Body)
 	if err != nil {
-		status := http.StatusUnauthorized
-		detail := err.Error()
-		if err == auth.ErrNoAccount {
-			status = http.StatusTooManyRequests
-		}
-		writeGeminiError(w, status, detail)
-		return
+		writeGeminiError(w, http.StatusBadRequest, "invalid body")
+		return true
 	}
-	defer h.Auth.Release(a)
-
-	var req map[string]any
-	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-		writeGeminiError(w, http.StatusBadRequest, "invalid json")
-		return
-	}
-
 	routeModel := strings.TrimSpace(chi.URLParam(r, "model"))
-	stdReq, err := normalizeGeminiRequest(h.Store, routeModel, req, stream)
-	if err != nil {
-		writeGeminiError(w, http.StatusBadRequest, err.Error())
-		return
-	}
-
-	sessionID, err := h.DS.CreateSession(r.Context(), a, 3)
-	if err != nil {
-		if a.UseConfigToken {
-			writeGeminiError(w, http.StatusUnauthorized, "Account token is invalid. Please re-login the account in admin.")
-		} else {
-			writeGeminiError(w, http.StatusUnauthorized, "Invalid token.")
+	translatedReq := translatorcliproxy.ToOpenAI(sdktranslator.FormatGemini, routeModel, raw, stream)
+	if !strings.Contains(string(translatedReq), `"stream"`) {
+		var reqMap map[string]any
+		if json.Unmarshal(translatedReq, &reqMap) == nil {
+			reqMap["stream"] = stream
+			if b, e := json.Marshal(reqMap); e == nil {
+				translatedReq = b
+			}
 		}
-		return
-	}
-	pow, err := h.DS.GetPow(r.Context(), a, 3)
-	if err != nil {
-		writeGeminiError(w, http.StatusUnauthorized, "Failed to get PoW (invalid token or unknown error).")
-		return
-	}
-	payload := stdReq.CompletionPayload(sessionID)
-	resp, err := h.DS.CallCompletion(r.Context(), a, payload, pow, 3)
-	if err != nil {
-		writeGeminiError(w, http.StatusInternalServerError, "Failed to get completion.")
-		return
 	}

-	if stream {
-		h.handleStreamGenerateContent(w, r, resp, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames)
-		return
+	isVercelPrepare := strings.TrimSpace(r.URL.Query().Get("__stream_prepare")) == "1"
+	isVercelRelease := strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1"
+
+	if isVercelRelease {
+		proxyReq := r.Clone(r.Context())
+		proxyReq.URL.Path = "/v1/chat/completions"
+		proxyReq.Body = io.NopCloser(bytes.NewReader(raw))
+		proxyReq.ContentLength = int64(len(raw))
+		rec := httptest.NewRecorder()
+		h.OpenAI.ChatCompletions(rec, proxyReq)
+		res := rec.Result()
+		defer res.Body.Close()
+		body, _ := io.ReadAll(res.Body)
+		for k, vv := range res.Header {
+			for _, v := range vv {
+				w.Header().Add(k, v)
+			}
+		}
+		w.WriteHeader(res.StatusCode)
+		_, _ = w.Write(body)
+		return true
 	}
-	h.handleNonStreamGenerateContent(w, resp, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.ToolNames)
+
+	proxyReq := r.Clone(r.Context())
+	proxyReq.URL.Path = "/v1/chat/completions"
+	proxyReq.Body = io.NopCloser(bytes.NewReader(translatedReq))
+	proxyReq.ContentLength = int64(len(translatedReq))
+
+	if stream && !isVercelPrepare {
+		w.Header().Set("Content-Type", "text/event-stream")
+		w.Header().Set("Cache-Control", "no-cache, no-transform")
+		w.Header().Set("Connection", "keep-alive")
+		w.Header().Set("X-Accel-Buffering", "no")
+		streamWriter := translatorcliproxy.NewOpenAIStreamTranslatorWriter(w, sdktranslator.FormatGemini, routeModel, raw, translatedReq)
+		h.OpenAI.ChatCompletions(streamWriter, proxyReq)
+		return true
+	}
+
+	rec := httptest.NewRecorder()
+	h.OpenAI.ChatCompletions(rec, proxyReq)
+	res := rec.Result()
+	defer res.Body.Close()
+	body, _ := io.ReadAll(res.Body)
+	if res.StatusCode < 200 || res.StatusCode >= 300 {
+		for k, vv := range res.Header {
+			for _, v := range vv {
+				w.Header().Add(k, v)
+			}
+		}
+		writeGeminiErrorFromOpenAI(w, res.StatusCode, body)
+		return true
+	}
+	if isVercelPrepare {
+		for k, vv := range res.Header {
+			for _, v := range vv {
+				w.Header().Add(k, v)
+			}
+		}
+		w.WriteHeader(res.StatusCode)
+		_, _ = w.Write(body)
+		return true
+	}
+	converted := translatorcliproxy.FromOpenAINonStream(sdktranslator.FormatGemini, routeModel, raw, translatedReq, body)
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(http.StatusOK)
+	_, _ = w.Write(converted)
+	return true
+}
+
+func writeGeminiErrorFromOpenAI(w http.ResponseWriter, status int, raw []byte) {
+	message := strings.TrimSpace(string(raw))
+	var parsed map[string]any
+	if err := json.Unmarshal(raw, &parsed); err == nil {
+		if errObj, ok := parsed["error"].(map[string]any); ok {
+			if msg, ok := errObj["message"].(string); ok && strings.TrimSpace(msg) != "" {
+				message = strings.TrimSpace(msg)
+			}
+		}
+	}
+	if message == "" {
+		message = http.StatusText(status)
+	}
+	writeGeminiError(w, status, message)
 }

 func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *http.Response, model, finalPrompt string, thinkingEnabled bool, toolNames []string) {
@@ -76,12 +140,12 @@ func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *ht
 	}

 	result := sse.CollectStream(resp, thinkingEnabled, true)
-	writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponse(model, finalPrompt, result.Thinking, result.Text, toolNames))
+	writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponse(model, finalPrompt, result.Thinking, result.Text, toolNames, result.OutputTokens))
 }

-func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any {
+func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string, outputTokens int) map[string]any {
 	parts := buildGeminiPartsFromFinal(finalText, finalThinking, toolNames)
-	usage := buildGeminiUsage(finalPrompt, finalThinking, finalText)
+	usage := buildGeminiUsage(finalPrompt, finalThinking, finalText, outputTokens)
 	return map[string]any{
 		"candidates": []map[string]any{
 			{
@@ -98,10 +162,14 @@ func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, final
 	}
 }

-func buildGeminiUsage(finalPrompt, finalThinking, finalText string) map[string]any {
+func buildGeminiUsage(finalPrompt, finalThinking, finalText string, outputTokens int) map[string]any {
 	promptTokens := util.EstimateTokens(finalPrompt)
 	reasoningTokens := util.EstimateTokens(finalThinking)
 	completionTokens := util.EstimateTokens(finalText)
+	if outputTokens > 0 {
+		completionTokens = outputTokens
+		reasoningTokens = 0
+	}
 	return map[string]any{
 		"promptTokenCount":     promptTokens,
 		"candidatesTokenCount": reasoningTokens + completionTokens,
--- a/internal/adapter/gemini/handler_routes.go
+++ b/internal/adapter/gemini/handler_routes.go
@@ -11,9 +11,10 @@ import (
 var writeJSON = util.WriteJSON

 type Handler struct {
-	Store ConfigReader
-	Auth  AuthResolver
-	DS    DeepSeekCaller
+	Store  ConfigReader
+	Auth   AuthResolver
+	DS     DeepSeekCaller
+	OpenAI OpenAIChatRunner
 }

 func RegisterRoutes(r chi.Router, h *Handler) {
--- a/internal/adapter/gemini/handler_stream_runtime.go
+++ b/internal/adapter/gemini/handler_stream_runtime.go
@@ -64,6 +64,7 @@ type geminiStreamRuntime struct {

 	thinking strings.Builder
 	text     strings.Builder
+	outputTokens int
 }

 func newGeminiStreamRuntime(
@@ -103,6 +104,9 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
 	if !parsed.Parsed {
 		return streamengine.ParsedDecision{}
 	}
+	if parsed.OutputTokens > 0 {
+		s.outputTokens = parsed.OutputTokens
+	}
 	if parsed.ContentFilter || parsed.ErrorMessage != "" || parsed.Stop {
 		return streamengine.ParsedDecision{Stop: true}
 	}
@@ -176,6 +180,6 @@ func (s *geminiStreamRuntime) finalize() {
 			},
 		},
 		"modelVersion":  s.model,
-		"usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText),
+		"usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText, s.outputTokens),
 	})
 }
--- a/internal/adapter/gemini/handler_test.go
+++ b/internal/adapter/gemini/handler_test.go
@@ -61,6 +61,44 @@ func (m testGeminiDS) CallCompletion(_ context.Context, _ *auth.RequestAuth, _ m
 	return m.resp, nil
 }

+type geminiOpenAIErrorStub struct {
+	status int
+	body   string
+	headers map[string]string
+}
+
+func (s geminiOpenAIErrorStub) ChatCompletions(w http.ResponseWriter, _ *http.Request) {
+	for k, v := range s.headers {
+		w.Header().Set(k, v)
+	}
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(s.status)
+	_, _ = w.Write([]byte(s.body))
+}
+
+type geminiOpenAISuccessStub struct {
+	stream bool
+	body   string
+}
+
+func (s geminiOpenAISuccessStub) ChatCompletions(w http.ResponseWriter, _ *http.Request) {
+	if s.stream {
+		w.Header().Set("Content-Type", "text/event-stream")
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-1\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"hello \"},\"finish_reason\":null}]}\n\n"))
+		_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-1\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"world\"},\"finish_reason\":\"stop\"}]}\n\n"))
+		_, _ = w.Write([]byte("data: [DONE]\n\n"))
+		return
+	}
+	out := s.body
+	if strings.TrimSpace(out) == "" {
+		out = `{"id":"chatcmpl-1","object":"chat.completion","choices":[{"index":0,"message":{"role":"assistant","tool_calls":[{"id":"call_1","type":"function","function":{"name":"eval_javascript","arguments":"{\"code\":\"1+1\"}"}}]},"finish_reason":"tool_calls"}]}`
+	}
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(http.StatusOK)
+	_, _ = w.Write([]byte(out))
+}
+
 func makeGeminiUpstreamResponse(lines ...string) *http.Response {
 	body := strings.Join(lines, "\n")
 	if !strings.HasSuffix(body, "\n") {
@@ -98,14 +136,11 @@ func TestGeminiRoutesRegistered(t *testing.T) {
 }

 func TestGenerateContentReturnsFunctionCallParts(t *testing.T) {
-	upstream := makeGeminiUpstreamResponse(
-		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"eval_javascript\",\"input\":{\"code\":\"1+1\"}}]}"}`,
-		`data: [DONE]`,
-	)
 	h := &Handler{
 		Store: testGeminiConfig{},
-		Auth:  testGeminiAuth{},
-		DS:    testGeminiDS{resp: upstream},
+		OpenAI: geminiOpenAISuccessStub{
+			body: `{"id":"chatcmpl-1","object":"chat.completion","choices":[{"index":0,"message":{"role":"assistant","tool_calls":[{"id":"call_1","type":"function","function":{"name":"eval_javascript","arguments":"{\"code\":\"1+1\"}"}}]},"finish_reason":"tool_calls"}]}`,
+		},
 	}
 	r := chi.NewRouter()
 	RegisterRoutes(r, h)
@@ -115,7 +150,6 @@ func TestGenerateContentReturnsFunctionCallParts(t *testing.T) {
 		"tools":[{"functionDeclarations":[{"name":"eval_javascript","description":"eval","parameters":{"type":"object","properties":{"code":{"type":"string"}}}}]}]
 	}`
 	req := httptest.NewRequest(http.MethodPost, "/v1beta/models/gemini-2.5-pro:generateContent", strings.NewReader(body))
-	req.Header.Set("Authorization", "Bearer direct-token")
 	rec := httptest.NewRecorder()
 	r.ServeHTTP(rec, req)
 	if rec.Code != http.StatusOK {
@@ -144,11 +178,7 @@ func TestGenerateContentReturnsFunctionCallParts(t *testing.T) {
 }

 func TestGenerateContentMixedToolSnippetAlsoTriggersFunctionCall(t *testing.T) {
-	upstream := makeGeminiUpstreamResponse(
-		`data: {"p":"response/content","v":"我来调用工具\n{\"tool_calls\":[{\"name\":\"eval_javascript\",\"input\":{\"code\":\"1+1\"}}]}"}`,
-		`data: [DONE]`,
-	)
-	h := &Handler{Store: testGeminiConfig{}, Auth: testGeminiAuth{}, DS: testGeminiDS{resp: upstream}}
+	h := &Handler{Store: testGeminiConfig{}, OpenAI: geminiOpenAISuccessStub{}}
 	r := chi.NewRouter()
 	RegisterRoutes(r, h)

@@ -157,7 +187,6 @@ func TestGenerateContentMixedToolSnippetAlsoTriggersFunctionCall(t *testing.T) {
 		"tools":[{"functionDeclarations":[{"name":"eval_javascript","description":"eval","parameters":{"type":"object","properties":{"code":{"type":"string"}}}}]}]
 	}`
 	req := httptest.NewRequest(http.MethodPost, "/v1beta/models/gemini-2.5-pro:generateContent", strings.NewReader(body))
-	req.Header.Set("Authorization", "Bearer direct-token")
 	rec := httptest.NewRecorder()
 	r.ServeHTTP(rec, req)

@@ -180,38 +209,25 @@ func TestGenerateContentMixedToolSnippetAlsoTriggersFunctionCall(t *testing.T) {
 }

 func TestStreamGenerateContentEmitsSSE(t *testing.T) {
-	upstream := makeGeminiUpstreamResponse(
-		`data: {"p":"response/content","v":"hello "}`,
-		`data: {"p":"response/content","v":"world"}`,
-		`data: [DONE]`,
-	)
 	h := &Handler{
-		Store: testGeminiConfig{},
-		Auth:  testGeminiAuth{},
-		DS:    testGeminiDS{resp: upstream},
+		Store:  testGeminiConfig{},
+		OpenAI: geminiOpenAISuccessStub{stream: true},
 	}
 	r := chi.NewRouter()
 	RegisterRoutes(r, h)

 	body := `{"contents":[{"role":"user","parts":[{"text":"hello"}]}]}`
 	req := httptest.NewRequest(http.MethodPost, "/v1/models/gemini-2.5-pro:streamGenerateContent?alt=sse", strings.NewReader(body))
-	req.Header.Set("Authorization", "Bearer direct-token")
 	rec := httptest.NewRecorder()
 	r.ServeHTTP(rec, req)

 	if rec.Code != http.StatusOK {
 		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
 	}
-	if !strings.Contains(rec.Body.String(), "data: ") {
-		t.Fatalf("expected SSE data frames, got body=%s", rec.Body.String())
-	}
-	if !strings.Contains(rec.Body.String(), `"finishReason":"STOP"`) {
-		t.Fatalf("expected stream finish frame, got body=%s", rec.Body.String())
-	}

 	frames := extractGeminiSSEFrames(t, rec.Body.String())
 	if len(frames) == 0 {
-		t.Fatalf("expected non-empty sse frames, body=%s", rec.Body.String())
+		t.Fatalf("expected non-empty stream frames, body=%s", rec.Body.String())
 	}
 	last := frames[len(frames)-1]
 	candidates, _ := last["candidates"].([]any)
@@ -229,16 +245,61 @@ func TestStreamGenerateContentEmitsSSE(t *testing.T) {
 	}
 }

+func TestGenerateContentOpenAIProxyErrorUsesGeminiEnvelope(t *testing.T) {
+	h := &Handler{
+		Store:  testGeminiConfig{},
+		OpenAI: geminiOpenAIErrorStub{
+			status: http.StatusUnauthorized,
+			body:   `{"error":{"message":"invalid api key"}}`,
+			headers: map[string]string{
+				"WWW-Authenticate":      `Bearer realm="example"`,
+				"Retry-After":           "30",
+				"X-RateLimit-Remaining": "0",
+			},
+		},
+	}
+	r := chi.NewRouter()
+	RegisterRoutes(r, h)
+
+	req := httptest.NewRequest(http.MethodPost, "/v1/models/gemini-2.5-pro:generateContent", strings.NewReader(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}`))
+	rec := httptest.NewRecorder()
+	r.ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusUnauthorized {
+		t.Fatalf("expected 401, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	var out map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
+		t.Fatalf("expected json body: %v", err)
+	}
+	errObj, _ := out["error"].(map[string]any)
+	if errObj["status"] != "UNAUTHENTICATED" {
+		t.Fatalf("expected Gemini status UNAUTHENTICATED, got=%v", errObj["status"])
+	}
+	if errObj["message"] != "invalid api key" {
+		t.Fatalf("expected parsed error message, got=%v", errObj["message"])
+	}
+	if got := rec.Header().Get("WWW-Authenticate"); got == "" {
+		t.Fatalf("expected WWW-Authenticate header to be preserved")
+	}
+	if got := rec.Header().Get("Retry-After"); got != "30" {
+		t.Fatalf("expected Retry-After header 30, got=%q", got)
+	}
+	if got := rec.Header().Get("X-RateLimit-Remaining"); got != "0" {
+		t.Fatalf("expected X-RateLimit-Remaining header 0, got=%q", got)
+	}
+}
+
 func extractGeminiSSEFrames(t *testing.T, body string) []map[string]any {
 	t.Helper()
 	scanner := bufio.NewScanner(strings.NewReader(body))
 	out := make([]map[string]any, 0, 4)
 	for scanner.Scan() {
 		line := strings.TrimSpace(scanner.Text())
-		if !strings.HasPrefix(line, "data: ") {
-			continue
+		raw := line
+		if strings.HasPrefix(line, "data: ") {
+			raw = strings.TrimSpace(strings.TrimPrefix(line, "data: "))
 		}
-		raw := strings.TrimSpace(strings.TrimPrefix(line, "data: "))
 		if raw == "" {
 			continue
 		}
--- a/internal/adapter/gemini/proxy_vercel_test.go
+++ b/internal/adapter/gemini/proxy_vercel_test.go
@@ -0,0 +1,42 @@
+package gemini
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+)
+
+type openAIProxyStub struct {
+	status int
+	body   string
+}
+
+func (s openAIProxyStub) ChatCompletions(w http.ResponseWriter, _ *http.Request) {
+	if s.status == 0 {
+		s.status = http.StatusOK
+	}
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(s.status)
+	_, _ = w.Write([]byte(s.body))
+}
+
+func TestGeminiProxyViaOpenAIVercelReleasePassthrough(t *testing.T) {
+	h := &Handler{OpenAI: openAIProxyStub{status: 200, body: `{"success":true}`}}
+	req := httptest.NewRequest(http.MethodPost, "/v1beta/models/gemini-2.5-pro:streamGenerateContent?__stream_release=1", strings.NewReader(`{"lease_id":"lease_123"}`))
+	rec := httptest.NewRecorder()
+
+	h.StreamGenerateContent(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
+	}
+	var out map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
+		t.Fatalf("expected json response, got err=%v body=%s", err, rec.Body.String())
+	}
+	if v, ok := out["success"].(bool); !ok || !v {
+		t.Fatalf("expected success=true passthrough, got=%v", out)
+	}
+}
--- a/internal/adapter/openai/chat_stream_runtime.go
+++ b/internal/adapter/openai/chat_stream_runtime.go
@@ -36,6 +36,7 @@ type chatStreamRuntime struct {
 	streamToolNames   map[int]string
 	thinking          strings.Builder
 	text              strings.Builder
+	outputTokens      int
 }

 func newChatStreamRuntime(
@@ -97,12 +98,12 @@ func (s *chatStreamRuntime) sendDone() {

 func (s *chatStreamRuntime) finalize(finishReason string) {
 	finalThinking := s.thinking.String()
-	finalText := s.text.String()
-	detected := util.ParseStandaloneToolCalls(finalText, s.toolNames)
-	if len(detected) > 0 && !s.toolCallsDoneEmitted {
+	finalText := sanitizeLeakedOutput(s.text.String())
+	detected := util.ParseStandaloneToolCallsDetailed(finalText, s.toolNames)
+	if len(detected.Calls) > 0 && !s.toolCallsDoneEmitted {
 		finishReason = "tool_calls"
 		delta := map[string]any{
-			"tool_calls": formatFinalStreamToolCallsWithStableIDs(detected, s.streamToolCallIDs),
+			"tool_calls": formatFinalStreamToolCallsWithStableIDs(detected.Calls, s.streamToolCallIDs),
 		}
 		if !s.firstChunkSent {
 			delta["role"] = "assistant"
@@ -141,8 +142,12 @@ func (s *chatStreamRuntime) finalize(finishReason string) {
 			if evt.Content == "" {
 				continue
 			}
+			cleaned := sanitizeLeakedOutput(evt.Content)
+			if cleaned == "" {
+				continue
+			}
 			delta := map[string]any{
-				"content": evt.Content,
+				"content": cleaned,
 			}
 			if !s.firstChunkSent {
 				delta["role"] = "assistant"
@@ -158,15 +163,22 @@ func (s *chatStreamRuntime) finalize(finishReason string) {
 		}
 	}

-	if len(detected) > 0 || s.toolCallsEmitted {
+	if len(detected.Calls) > 0 || s.toolCallsEmitted {
 		finishReason = "tool_calls"
 	}
+	usage := openaifmt.BuildChatUsage(s.finalPrompt, finalThinking, finalText)
+	if s.outputTokens > 0 {
+		usage["completion_tokens"] = s.outputTokens
+		if prompt, ok := usage["prompt_tokens"].(int); ok {
+			usage["total_tokens"] = prompt + s.outputTokens
+		}
+	}
 	s.sendChunk(openaifmt.BuildChatStreamChunk(
 		s.completionID,
 		s.created,
 		s.model,
 		[]map[string]any{openaifmt.BuildChatStreamFinishChoice(0, finishReason)},
-		openaifmt.BuildChatUsage(s.finalPrompt, finalThinking, finalText),
+		usage,
 	))
 	s.sendDone()
 }
@@ -175,7 +187,13 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
 	if !parsed.Parsed {
 		return streamengine.ParsedDecision{}
 	}
-	if parsed.ContentFilter || parsed.ErrorMessage != "" {
+	if parsed.OutputTokens > 0 {
+		s.outputTokens = parsed.OutputTokens
+	}
+	if parsed.ContentFilter {
+		return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReasonHandlerRequested}
+	}
+	if parsed.ErrorMessage != "" {
 		return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("content_filter")}
 	}
 	if parsed.Stop {
@@ -246,8 +264,12 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
 						continue
 					}
 					if evt.Content != "" {
+						cleaned := sanitizeLeakedOutput(evt.Content)
+						if cleaned == "" {
+							continue
+						}
 						contentDelta := map[string]any{
-							"content": evt.Content,
+							"content": cleaned,
 						}
 						if !s.firstChunkSent {
 							contentDelta["role"] = "assistant"
--- a/internal/adapter/openai/deps.go
+++ b/internal/adapter/openai/deps.go
@@ -19,6 +19,7 @@ type DeepSeekCaller interface {
 	CreateSession(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
 	GetPow(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
 	CallCompletion(ctx context.Context, a *auth.RequestAuth, payload map[string]any, powResp string, maxAttempts int) (*http.Response, error)
+	DeleteAllSessionsForToken(ctx context.Context, token string) error
 }

 type ConfigReader interface {
@@ -28,6 +29,7 @@ type ConfigReader interface {
 	ToolcallEarlyEmitConfidence() string
 	ResponsesStoreTTLSeconds() int
 	EmbeddingsProvider() string
+	AutoDeleteSessions() bool
 }

 var _ AuthResolver = (*auth.Resolver)(nil)
--- a/internal/adapter/openai/deps_injection_test.go
+++ b/internal/adapter/openai/deps_injection_test.go
@@ -19,6 +19,7 @@ func (m mockOpenAIConfig) ToolcallMode() string                { return m.toolMo
 func (m mockOpenAIConfig) ToolcallEarlyEmitConfidence() string { return m.earlyEmit }
 func (m mockOpenAIConfig) ResponsesStoreTTLSeconds() int       { return m.responsesTTL }
 func (m mockOpenAIConfig) EmbeddingsProvider() string          { return m.embedProv }
+func (m mockOpenAIConfig) AutoDeleteSessions() bool            { return false }

 func TestNormalizeOpenAIChatRequestWithConfigInterface(t *testing.T) {
 	cfg := mockOpenAIConfig{
--- a/internal/adapter/openai/handler_chat.go
+++ b/internal/adapter/openai/handler_chat.go
@@ -35,7 +35,25 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
 		writeOpenAIError(w, status, detail)
 		return
 	}
-	defer h.Auth.Release(a)
+	defer func() {
+		// 自动删除会话（同步）
+		// 必须在 Release 之前同步删除，否则：
+		// 1. 异步删除时账号已被 Release
+		// 2. 新请求可能获取到同一账号并开始使用
+		// 3. 异步删除仍在进行，会截断新请求正在使用的会话
+		if h.Store.AutoDeleteSessions() && a.DeepSeekToken != "" {
+			deleteCtx, cancel := context.WithTimeout(r.Context(), 10*time.Second)
+			defer cancel()
+			err := h.DS.DeleteAllSessionsForToken(deleteCtx, a.DeepSeekToken)
+			if err != nil {
+				config.Logger.Warn("[auto_delete_sessions] failed", "account", a.AccountID, "error", err)
+			} else {
+				config.Logger.Debug("[auto_delete_sessions] success", "account", a.AccountID)
+			}
+		}
+		h.Auth.Release(a)
+	}()
+
 	r = r.WithContext(auth.WithAuth(r.Context(), a))

 	var req map[string]any
@@ -87,8 +105,19 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, ctx context.Context, re
 	result := sse.CollectStream(resp, thinkingEnabled, true)

 	finalThinking := result.Thinking
-	finalText := result.Text
+	finalText := sanitizeLeakedOutput(result.Text)
+	if writeUpstreamEmptyOutputError(w, result) {
+		return
+	}
 	respBody := openaifmt.BuildChatCompletion(completionID, model, finalPrompt, finalThinking, finalText, toolNames)
+	if result.OutputTokens > 0 {
+		if usage, ok := respBody["usage"].(map[string]any); ok {
+			usage["completion_tokens"] = result.OutputTokens
+			if prompt, ok := usage["prompt_tokens"].(int); ok {
+				usage["total_tokens"] = prompt + result.OutputTokens
+			}
+		}
+	}
 	writeJSON(w, http.StatusOK, respBody)
 }

@@ -110,8 +139,8 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
 	}

 	created := time.Now().Unix()
-	bufferToolContent := len(toolNames) > 0 && h.toolcallFeatureMatchEnabled()
-	emitEarlyToolDeltas := h.toolcallEarlyEmitHighConfidence()
+	bufferToolContent := len(toolNames) > 0
+	emitEarlyToolDeltas := h.toolcallFeatureMatchEnabled() && h.toolcallEarlyEmitHighConfidence()
 	initialType := "text"
 	if thinkingEnabled {
 		initialType = "thinking"
--- a/internal/adapter/openai/handler_toolcall_format.go
+++ b/internal/adapter/openai/handler_toolcall_format.go
@@ -53,13 +53,13 @@ func injectToolPrompt(messages []map[string]any, tools []any, policy util.ToolCh
 	if len(toolSchemas) == 0 {
 		return messages, names
 	}
-	toolPrompt := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n") + "\n\nWhen you need to use tools, output ONLY this JSON format (no other text):\n{\"tool_calls\": [{\"name\": \"tool_name\", \"input\": {\"param\": \"value\"}}]}\n\nHistory markers in conversation:\n- [TOOL_CALL_HISTORY]...[/TOOL_CALL_HISTORY] means a tool call you already made earlier.\n- [TOOL_RESULT_HISTORY]...[/TOOL_RESULT_HISTORY] means the runtime returned a tool result (not user input).\n\nIMPORTANT:\n1) If calling tools, output ONLY the JSON. The response must start with { and end with }.\n2) After receiving a tool result, you MUST use it to produce the final answer.\n3) Only call another tool when the previous result is missing required data or returned an error.\n4) Do not repeat a tool call that is already satisfied by an existing [TOOL_RESULT_HISTORY] block."
+	toolPrompt := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n") + "\n\n" + buildToolCallInstructions(names)
 	if policy.Mode == util.ToolChoiceRequired {
-		toolPrompt += "\n5) For this response, you MUST call at least one tool from the allowed list."
+		toolPrompt += "\n7) For this response, you MUST call at least one tool from the allowed list."
 	}
 	if policy.Mode == util.ToolChoiceForced && strings.TrimSpace(policy.ForcedName) != "" {
-		toolPrompt += "\n5) For this response, you MUST call exactly this tool name: " + strings.TrimSpace(policy.ForcedName)
-		toolPrompt += "\n6) Do not call any other tool."
+		toolPrompt += "\n7) For this response, you MUST call exactly this tool name: " + strings.TrimSpace(policy.ForcedName)
+		toolPrompt += "\n8) Do not call any other tool."
 	}

 	for i := range messages {
@@ -73,6 +73,11 @@ func injectToolPrompt(messages []map[string]any, tools []any, policy util.ToolCh
 	return messages, names
 }

+// buildToolCallInstructions delegates to the shared util implementation.
+func buildToolCallInstructions(toolNames []string) string {
+	return util.BuildToolCallInstructions(toolNames)
+}
+
 func formatIncrementalStreamToolCallDeltas(deltas []toolCallDelta, ids map[int]string) []map[string]any {
 	if len(deltas) == 0 {
 		return nil
@@ -111,28 +116,21 @@ func filterIncrementalToolCallDeltasByAllowed(deltas []toolCallDelta, allowedNam
 	if len(deltas) == 0 {
 		return nil
 	}
-	allowed := namesToSet(allowedNames)
-	if len(allowed) == 0 {
-		for _, d := range deltas {
-			if d.Name != "" {
-				seenNames[d.Index] = "__blocked__"
-			}
-		}
-		return nil
-	}
 	out := make([]toolCallDelta, 0, len(deltas))
 	for _, d := range deltas {
 		if d.Name != "" {
-			if _, ok := allowed[d.Name]; !ok {
-				seenNames[d.Index] = "__blocked__"
-				continue
+			if seenNames != nil {
+				seenNames[d.Index] = d.Name
 			}
-			seenNames[d.Index] = d.Name
+			out = append(out, d)
+			continue
+		}
+		if seenNames == nil {
 			out = append(out, d)
 			continue
 		}
 		name := strings.TrimSpace(seenNames[d.Index])
-		if name == "" || name == "__blocked__" {
+		if name == "" {
 			continue
 		}
 		out = append(out, d)
--- a/internal/adapter/openai/handler_toolcall_policy.go
+++ b/internal/adapter/openai/handler_toolcall_policy.go
@@ -1,25 +1,9 @@
 package openai

-import "strings"
-
-func applyOpenAIChatPassThrough(req map[string]any, payload map[string]any) {
-	for k, v := range collectOpenAIChatPassThrough(req) {
-		payload[k] = v
-	}
-}
-
 func (h *Handler) toolcallFeatureMatchEnabled() bool {
-	if h == nil || h.Store == nil {
-		return true
-	}
-	mode := strings.TrimSpace(strings.ToLower(h.Store.ToolcallMode()))
-	return mode == "" || mode == "feature_match"
+	return true
 }

 func (h *Handler) toolcallEarlyEmitHighConfidence() bool {
-	if h == nil || h.Store == nil {
-		return true
-	}
-	level := strings.TrimSpace(strings.ToLower(h.Store.ToolcallEarlyEmitConfidence()))
-	return level == "" || level == "high"
+	return true
 }
--- a/internal/adapter/openai/handler_toolcall_test.go
+++ b/internal/adapter/openai/handler_toolcall_test.go
@@ -182,7 +182,7 @@ func TestHandleNonStreamToolCallInterceptsReasonerModel(t *testing.T) {
 	}
 }

-func TestHandleNonStreamUnknownToolNotIntercepted(t *testing.T) {
+func TestHandleNonStreamUnknownToolIntercepted(t *testing.T) {
 	h := &Handler{}
 	resp := makeSSEHTTPResponse(
 		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"not_in_schema\",\"input\":{\"q\":\"go\"}}]}"}`,
@@ -198,20 +198,17 @@ func TestHandleNonStreamUnknownToolNotIntercepted(t *testing.T) {
 	out := decodeJSONBody(t, rec.Body.String())
 	choices, _ := out["choices"].([]any)
 	choice, _ := choices[0].(map[string]any)
-	if choice["finish_reason"] != "stop" {
-		t.Fatalf("expected finish_reason=stop, got %#v", choice["finish_reason"])
+	if choice["finish_reason"] != "tool_calls" {
+		t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"])
 	}
 	msg, _ := choice["message"].(map[string]any)
-	if _, ok := msg["tool_calls"]; ok {
-		t.Fatalf("did not expect tool_calls for unknown schema name, got %#v", msg["tool_calls"])
-	}
-	content, _ := msg["content"].(string)
-	if !strings.Contains(content, `"tool_calls"`) {
-		t.Fatalf("expected unknown tool json to pass through as text, got %#v", content)
+	toolCalls, _ := msg["tool_calls"].([]any)
+	if len(toolCalls) != 1 {
+		t.Fatalf("expected tool_calls for unknown schema name, got %#v", msg["tool_calls"])
 	}
 }

-func TestHandleNonStreamEmbeddedToolCallExampleRemainsText(t *testing.T) {
+func TestHandleNonStreamEmbeddedToolCallExamplePromotesToolCall(t *testing.T) {
 	h := &Handler{}
 	resp := makeSSEHTTPResponse(
 		`data: {"p":"response/content","v":"下面是示例："}`,
@@ -229,20 +226,21 @@ func TestHandleNonStreamEmbeddedToolCallExampleRemainsText(t *testing.T) {
 	out := decodeJSONBody(t, rec.Body.String())
 	choices, _ := out["choices"].([]any)
 	choice, _ := choices[0].(map[string]any)
-	if choice["finish_reason"] != "stop" {
-		t.Fatalf("expected finish_reason=stop, got %#v", choice["finish_reason"])
+	if choice["finish_reason"] != "tool_calls" {
+		t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"])
 	}
 	msg, _ := choice["message"].(map[string]any)
-	if _, ok := msg["tool_calls"]; ok {
-		t.Fatalf("did not expect tool_calls field for embedded example: %#v", msg["tool_calls"])
+	toolCalls, _ := msg["tool_calls"].([]any)
+	if len(toolCalls) != 1 {
+		t.Fatalf("expected one tool_call field for embedded example: %#v", msg["tool_calls"])
 	}
 	content, _ := msg["content"].(string)
-	if !strings.Contains(content, "下面是示例：") || !strings.Contains(content, "请勿执行。") || !strings.Contains(content, `"tool_calls"`) {
-		t.Fatalf("expected embedded example to remain plain text, got %#v", content)
+	if strings.Contains(content, `"tool_calls"`) {
+		t.Fatalf("expected raw tool_calls json stripped from content, got %#v", content)
 	}
 }

-func TestHandleNonStreamFencedToolCallExampleNotIntercepted(t *testing.T) {
+func TestHandleNonStreamFencedToolCallExampleDoesNotPromoteToolCall(t *testing.T) {
 	h := &Handler{}
 	resp := makeSSEHTTPResponse(
 		"data: {\"p\":\"response/content\",\"v\":\"```json\\n{\\\"tool_calls\\\":[{\\\"name\\\":\\\"search\\\",\\\"input\\\":{\\\"q\\\":\\\"go\\\"}}]}\\n```\"}",
@@ -258,16 +256,60 @@ func TestHandleNonStreamFencedToolCallExampleNotIntercepted(t *testing.T) {
 	out := decodeJSONBody(t, rec.Body.String())
 	choices, _ := out["choices"].([]any)
 	choice, _ := choices[0].(map[string]any)
-	if choice["finish_reason"] != "stop" {
-		t.Fatalf("expected finish_reason=stop, got %#v", choice["finish_reason"])
+	if choice["finish_reason"] == "tool_calls" {
+		t.Fatalf("expected fenced example to remain content-only, got finish_reason=%#v", choice["finish_reason"])
 	}
 	msg, _ := choice["message"].(map[string]any)
-	if _, ok := msg["tool_calls"]; ok {
-		t.Fatalf("did not expect tool_calls field for fenced example: %#v", msg["tool_calls"])
+	toolCalls, _ := msg["tool_calls"].([]any)
+	if len(toolCalls) != 0 {
+		t.Fatalf("expected no tool_call field for fenced example: %#v", msg["tool_calls"])
 	}
 	content, _ := msg["content"].(string)
-	if !strings.Contains(content, "```json") || !strings.Contains(content, `"tool_calls"`) {
-		t.Fatalf("expected fenced tool example to pass through as text, got %q", content)
+	if !strings.Contains(content, `"tool_calls"`) {
+		t.Fatalf("expected fenced example content preserved, got %q", content)
+	}
+}
+
+// Backward-compatible alias for historical test name used in CI logs.
+func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) {
+	TestHandleNonStreamFencedToolCallExampleDoesNotPromoteToolCall(t)
+}
+
+func TestHandleNonStreamReturns502WhenUpstreamOutputEmpty(t *testing.T) {
+	h := &Handler{}
+	resp := makeSSEHTTPResponse(
+		`data: {"p":"response/content","v":""}`,
+		`data: [DONE]`,
+	)
+	rec := httptest.NewRecorder()
+
+	h.handleNonStream(rec, context.Background(), resp, "cid-empty", "deepseek-chat", "prompt", false, nil)
+	if rec.Code != http.StatusBadGateway {
+		t.Fatalf("expected status 502 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	out := decodeJSONBody(t, rec.Body.String())
+	errObj, _ := out["error"].(map[string]any)
+	if asString(errObj["code"]) != "upstream_empty_output" {
+		t.Fatalf("expected code=upstream_empty_output, got %#v", out)
+	}
+}
+
+func TestHandleNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) {
+	h := &Handler{}
+	resp := makeSSEHTTPResponse(
+		`data: {"code":"content_filter"}`,
+		`data: [DONE]`,
+	)
+	rec := httptest.NewRecorder()
+
+	h.handleNonStream(rec, context.Background(), resp, "cid-empty-filtered", "deepseek-chat", "prompt", false, nil)
+	if rec.Code != http.StatusBadRequest {
+		t.Fatalf("expected status 400 for filtered upstream output, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	out := decodeJSONBody(t, rec.Body.String())
+	errObj, _ := out["error"].(map[string]any)
+	if asString(errObj["code"]) != "content_filter" {
+		t.Fatalf("expected code=content_filter, got %#v", out)
 	}
 }

@@ -406,7 +448,7 @@ func TestHandleStreamReasonerToolCallInterceptsWithoutRawContentLeak(t *testing.
 	}
 }

-func TestHandleStreamUnknownToolDoesNotLeakRawPayload(t *testing.T) {
+func TestHandleStreamUnknownToolEmitsToolCall(t *testing.T) {
 	h := &Handler{}
 	resp := makeSSEHTTPResponse(
 		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"not_in_schema\",\"input\":{\"q\":\"go\"}}]}"}`,
@@ -421,18 +463,18 @@ func TestHandleStreamUnknownToolDoesNotLeakRawPayload(t *testing.T) {
 	if !done {
 		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
 	}
-	if streamHasToolCallsDelta(frames) {
-		t.Fatalf("did not expect tool_calls delta for unknown schema name, body=%s", rec.Body.String())
+	if !streamHasToolCallsDelta(frames) {
+		t.Fatalf("expected tool_calls delta for unknown schema name, body=%s", rec.Body.String())
 	}
 	if streamHasRawToolJSONContent(frames) {
 		t.Fatalf("did not expect raw tool_calls json leak for unknown schema name: %s", rec.Body.String())
 	}
-	if streamFinishReason(frames) != "stop" {
-		t.Fatalf("expected finish_reason=stop, body=%s", rec.Body.String())
+	if streamFinishReason(frames) != "tool_calls" {
+		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
 	}
 }

-func TestHandleStreamUnknownToolNoArgsDoesNotLeakRawPayload(t *testing.T) {
+func TestHandleStreamUnknownToolNoArgsEmitsToolCall(t *testing.T) {
 	h := &Handler{}
 	resp := makeSSEHTTPResponse(
 		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"not_in_schema\"}]}"}`,
@@ -447,14 +489,14 @@ func TestHandleStreamUnknownToolNoArgsDoesNotLeakRawPayload(t *testing.T) {
 	if !done {
 		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
 	}
-	if streamHasToolCallsDelta(frames) {
-		t.Fatalf("did not expect tool_calls delta for unknown schema name (no args), body=%s", rec.Body.String())
+	if !streamHasToolCallsDelta(frames) {
+		t.Fatalf("expected tool_calls delta for unknown schema name (no args), body=%s", rec.Body.String())
 	}
 	if streamHasRawToolJSONContent(frames) {
 		t.Fatalf("did not expect raw tool_calls json leak for unknown schema name (no args): %s", rec.Body.String())
 	}
-	if streamFinishReason(frames) != "stop" {
-		t.Fatalf("expected finish_reason=stop, body=%s", rec.Body.String())
+	if streamFinishReason(frames) != "tool_calls" {
+		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
 	}
 }

@@ -615,7 +657,7 @@ func TestHandleStreamToolCallWithSameChunkTrailingTextRemainsText(t *testing.T)
 	}
 }

-func TestHandleStreamFencedToolCallSnippetRemainsText(t *testing.T) {
+func TestHandleStreamFencedToolCallSnippetPromotesToolCall(t *testing.T) {
 	h := &Handler{}
 	resp := makeSSEHTTPResponse(
 		fmt.Sprintf(`data: {"p":"response/content","v":%q}`, "下面是调用示例：\n```json\n"),
@@ -631,8 +673,8 @@ func TestHandleStreamFencedToolCallSnippetRemainsText(t *testing.T) {
 	if !done {
 		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
 	}
-	if streamHasToolCallsDelta(frames) {
-		t.Fatalf("did not expect tool_calls delta for fenced snippet, body=%s", rec.Body.String())
+	if !streamHasToolCallsDelta(frames) {
+		t.Fatalf("expected tool_calls delta for fenced snippet, body=%s", rec.Body.String())
 	}
 	content := strings.Builder{}
 	for _, frame := range frames {
@@ -646,11 +688,53 @@ func TestHandleStreamFencedToolCallSnippetRemainsText(t *testing.T) {
 		}
 	}
 	got := content.String()
-	if !strings.Contains(got, "```json") || !strings.Contains(strings.ToLower(got), "tool_calls") {
-		t.Fatalf("expected fenced tool snippet in content, got=%q", got)
+	if strings.Contains(strings.ToLower(got), "tool_calls") {
+		t.Fatalf("expected raw fenced tool_calls snippet stripped from content, got=%q", got)
 	}
-	if streamFinishReason(frames) != "stop" {
-		t.Fatalf("expected finish_reason=stop, body=%s", rec.Body.String())
+	if strings.Contains(strings.ToLower(got), "```json") || strings.Contains(got, "\n```\n") {
+		t.Fatalf("expected consumed fenced tool payload to not leave empty code fence, got=%q", got)
+	}
+	if streamFinishReason(frames) != "tool_calls" {
+		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
+	}
+}
+
+func TestHandleStreamStandaloneToolCallAfterClosedFenceKeepsFence(t *testing.T) {
+	h := &Handler{}
+	resp := makeSSEHTTPResponse(
+		fmt.Sprintf(`data: {"p":"response/content","v":%q}`, "先给一个代码示例：\n```text\nhello\n```\n"),
+		fmt.Sprintf(`data: {"p":"response/content","v":%q}`, "{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}"),
+		`data: [DONE]`,
+	)
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
+
+	h.handleStream(rec, req, resp, "cid7g", "deepseek-chat", "prompt", false, false, []string{"search"})
+
+	frames, done := parseSSEDataFrames(t, rec.Body.String())
+	if !done {
+		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
+	}
+	if !streamHasToolCallsDelta(frames) {
+		t.Fatalf("expected tool_calls delta for standalone payload, body=%s", rec.Body.String())
+	}
+	content := strings.Builder{}
+	for _, frame := range frames {
+		choices, _ := frame["choices"].([]any)
+		for _, item := range choices {
+			choice, _ := item.(map[string]any)
+			delta, _ := choice["delta"].(map[string]any)
+			if c, ok := delta["content"].(string); ok {
+				content.WriteString(c)
+			}
+		}
+	}
+	got := content.String()
+	if !strings.Contains(got, "```") {
+		t.Fatalf("expected closed fence before standalone tool json to be preserved, got=%q", got)
+	}
+	if streamFinishReason(frames) != "tool_calls" {
+		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
 	}
 }

--- a/internal/adapter/openai/leaked_output_sanitize.go
+++ b/internal/adapter/openai/leaked_output_sanitize.go
@@ -0,0 +1,70 @@
+package openai
+
+import (
+	"regexp"
+)
+
+var emptyJSONFencePattern = regexp.MustCompile("(?is)```json\\s*```")
+var leakedToolCallArrayPattern = regexp.MustCompile(`(?is)\[\{\s*"function"\s*:\s*\{[\s\S]*?\}\s*,\s*"id"\s*:\s*"call[^"]*"\s*,\s*"type"\s*:\s*"function"\s*}\]`)
+var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*>\s*\{[\s\S]*?"tool_call_id"\s*:\s*"call[^"]*"\s*}`)
+
+// leakedMetaMarkerPattern matches DeepSeek special tokens in BOTH forms:
+//   - ASCII underscore: <｜end_of_sentence｜>
+//   - U+2581 variant:   <｜end▁of▁sentence｜>  (used in some DeepSeek outputs)
+var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[｜\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking)\s*[｜\|]>`)
+
+// leakedAgentXMLBlockPatterns catch agent-style XML blocks that leak through
+// when the sieve fails to capture them. These are applied only to complete
+// wrapper blocks so standalone "<result>" examples in normal output remain
+// untouched.
+var leakedAgentXMLBlockPatterns = []*regexp.Regexp{
+	regexp.MustCompile(`(?is)<attempt_completion\b[^>]*>(.*?)</attempt_completion>`),
+	regexp.MustCompile(`(?is)<ask_followup_question\b[^>]*>(.*?)</ask_followup_question>`),
+	regexp.MustCompile(`(?is)<new_task\b[^>]*>(.*?)</new_task>`),
+}
+
+var leakedAgentWrapperTagPattern = regexp.MustCompile(`(?is)</?(?:attempt_completion|ask_followup_question|new_task)\b[^>]*>`)
+var leakedAgentWrapperPlusResultOpenPattern = regexp.MustCompile(`(?is)<(?:attempt_completion|ask_followup_question|new_task)\b[^>]*>\s*<result>`)
+var leakedAgentResultPlusWrapperClosePattern = regexp.MustCompile(`(?is)</result>\s*</(?:attempt_completion|ask_followup_question|new_task)\b[^>]*>`)
+var leakedAgentResultTagPattern = regexp.MustCompile(`(?is)</?result>`)
+
+func sanitizeLeakedOutput(text string) string {
+	if text == "" {
+		return text
+	}
+	out := emptyJSONFencePattern.ReplaceAllString(text, "")
+	out = leakedToolCallArrayPattern.ReplaceAllString(out, "")
+	out = leakedToolResultBlobPattern.ReplaceAllString(out, "")
+	out = leakedMetaMarkerPattern.ReplaceAllString(out, "")
+	out = sanitizeLeakedAgentXMLBlocks(out)
+	return out
+}
+
+func sanitizeLeakedAgentXMLBlocks(text string) string {
+	out := text
+	for _, pattern := range leakedAgentXMLBlockPatterns {
+		out = pattern.ReplaceAllStringFunc(out, func(match string) string {
+			submatches := pattern.FindStringSubmatch(match)
+			if len(submatches) < 2 {
+				return match
+			}
+			// Preserve the inner text so leaked agent instructions do not erase
+			// the actual answer, but strip the wrapper/result markup itself.
+			return leakedAgentResultTagPattern.ReplaceAllString(submatches[1], "")
+		})
+	}
+	// Fallback for truncated output streams: strip any dangling wrapper tags
+	// that were not part of a complete block replacement. If we detect leaked
+	// wrapper tags, strip only adjacent <result> tags to avoid exposing agent
+	// markup without altering unrelated user-visible <result> examples.
+	if leakedAgentWrapperTagPattern.MatchString(out) {
+		out = leakedAgentWrapperPlusResultOpenPattern.ReplaceAllStringFunc(out, func(match string) string {
+			return leakedAgentResultTagPattern.ReplaceAllString(match, "")
+		})
+		out = leakedAgentResultPlusWrapperClosePattern.ReplaceAllStringFunc(out, func(match string) string {
+			return leakedAgentResultTagPattern.ReplaceAllString(match, "")
+		})
+		out = leakedAgentWrapperTagPattern.ReplaceAllString(out, "")
+	}
+	return out
+}
--- a/internal/adapter/openai/leaked_output_sanitize_test.go
+++ b/internal/adapter/openai/leaked_output_sanitize_test.go
@@ -0,0 +1,68 @@
+package openai
+
+import "testing"
+
+func TestSanitizeLeakedOutputRemovesEmptyJSONFence(t *testing.T) {
+	raw := "before\n```json\n```\nafter"
+	got := sanitizeLeakedOutput(raw)
+	if got != "before\n\nafter" {
+		t.Fatalf("unexpected sanitized empty json fence: %q", got)
+	}
+}
+
+func TestSanitizeLeakedOutputRemovesLeakedWireToolCallAndResult(t *testing.T) {
+	raw := "开始\n[{\"function\":{\"arguments\":\"{\\\"command\\\":\\\"java -version\\\"}\",\"name\":\"exec\"},\"id\":\"callb9a321\",\"type\":\"function\"}]< | Tool | >{\"content\":\"openjdk version 21\",\"tool_call_id\":\"callb9a321\"}\n结束"
+	got := sanitizeLeakedOutput(raw)
+	if got != "开始\n\n结束" {
+		t.Fatalf("unexpected sanitize result for leaked wire format: %q", got)
+	}
+}
+
+func TestSanitizeLeakedOutputRemovesStandaloneMetaMarkers(t *testing.T) {
+	raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<｜end▁of▁thinking｜>D<｜end▁of▁sentence｜>E"
+	got := sanitizeLeakedOutput(raw)
+	if got != "ABCDE" {
+		t.Fatalf("unexpected sanitize result for meta markers: %q", got)
+	}
+}
+
+func TestSanitizeLeakedOutputRemovesAgentXMLLeaks(t *testing.T) {
+	raw := "Done.<attempt_completion><result>Some final answer</result></attempt_completion>"
+	got := sanitizeLeakedOutput(raw)
+	if got != "Done.Some final answer" {
+		t.Fatalf("unexpected sanitize result for agent XML leak: %q", got)
+	}
+}
+
+func TestSanitizeLeakedOutputPreservesStandaloneResultTags(t *testing.T) {
+	raw := "Example XML: <result>value</result>"
+	got := sanitizeLeakedOutput(raw)
+	if got != raw {
+		t.Fatalf("unexpected sanitize result for standalone result tag: %q", got)
+	}
+}
+
+func TestSanitizeLeakedOutputRemovesDanglingAgentXMLOpeningTags(t *testing.T) {
+	raw := "Done.<attempt_completion><result>Some final answer"
+	got := sanitizeLeakedOutput(raw)
+	if got != "Done.Some final answer" {
+		t.Fatalf("unexpected sanitize result for dangling opening tags: %q", got)
+	}
+}
+
+func TestSanitizeLeakedOutputRemovesDanglingAgentXMLClosingTags(t *testing.T) {
+	raw := "Done.Some final answer</result></attempt_completion>"
+	got := sanitizeLeakedOutput(raw)
+	if got != "Done.Some final answer" {
+		t.Fatalf("unexpected sanitize result for dangling closing tags: %q", got)
+	}
+}
+
+func TestSanitizeLeakedOutputPreservesUnrelatedResultTagsWhenWrapperLeaks(t *testing.T) {
+	raw := "Done.<attempt_completion><result>Some final answer\nExample XML: <result>value</result>"
+	got := sanitizeLeakedOutput(raw)
+	want := "Done.Some final answer\nExample XML: <result>value</result>"
+	if got != want {
+		t.Fatalf("unexpected sanitize result for mixed leaked wrapper + xml example: %q", got)
+	}
+}
--- a/internal/adapter/openai/message_normalize.go
+++ b/internal/adapter/openai/message_normalize.go
@@ -1,15 +1,13 @@
 package openai

 import (
-	"encoding/json"
-	"fmt"
 	"strings"

-	"ds2api/internal/config"
 	"ds2api/internal/prompt"
 )

 func normalizeOpenAIMessagesForPrompt(raw []any, traceID string) []map[string]any {
+	_ = traceID
 	out := make([]map[string]any, 0, len(raw))
 	for _, item := range raw {
 		msg, ok := item.(map[string]any)
@@ -19,20 +17,19 @@ func normalizeOpenAIMessagesForPrompt(raw []any, traceID string) []map[string]an
 		role := strings.ToLower(strings.TrimSpace(asString(msg["role"])))
 		switch role {
 		case "assistant":
-			content := normalizeOpenAIContentForPrompt(msg["content"])
-			toolCalls := formatAssistantToolCallsForPrompt(msg, traceID)
-			combined := joinNonEmpty(content, toolCalls)
-			if combined == "" {
+			content := buildAssistantContentForPrompt(msg)
+			if content == "" {
 				continue
 			}
 			out = append(out, map[string]any{
 				"role":    "assistant",
-				"content": combined,
+				"content": content,
 			})
 		case "tool", "function":
+			content := buildToolContentForPrompt(msg)
 			out = append(out, map[string]any{
-				"role":    "user",
-				"content": formatToolResultForPrompt(msg),
+				"role":    "tool",
+				"content": content,
 			})
 		case "user", "system", "developer":
 			out = append(out, map[string]any{
@@ -56,115 +53,33 @@ func normalizeOpenAIMessagesForPrompt(raw []any, traceID string) []map[string]an
 	return out
 }

-func formatAssistantToolCallsForPrompt(msg map[string]any, traceID string) string {
-	entries := make([]string, 0)
-	if calls, ok := msg["tool_calls"].([]any); ok {
-		for i, item := range calls {
-			call, ok := item.(map[string]any)
-			if !ok {
-				continue
-			}
-			id := strings.TrimSpace(asString(call["id"]))
-			if id == "" {
-				id = fmt.Sprintf("call_%d", i+1)
-			}
-			name := strings.TrimSpace(asString(call["name"]))
-			args := ""
-
-			if fn, ok := call["function"].(map[string]any); ok {
-				if name == "" {
-					name = strings.TrimSpace(asString(fn["name"]))
-				}
-				args = normalizeOpenAIArgumentsForPrompt(fn["arguments"])
-			}
-			if name == "" {
-				name = "unknown"
-			}
-			if args == "" {
-				args = normalizeOpenAIArgumentsForPrompt(call["arguments"])
-			}
-			if args == "" {
-				args = normalizeOpenAIArgumentsForPrompt(call["input"])
-			}
-			if args == "" {
-				args = "{}"
-			}
-			maybeWarnSuspiciousToolHistory(traceID, id, name, args)
-			entries = append(entries, fmt.Sprintf("[TOOL_CALL_HISTORY]\nstatus: already_called\norigin: assistant\nnot_user_input: true\ntool_call_id: %s\nfunction.name: %s\nfunction.arguments: %s\n[/TOOL_CALL_HISTORY]", id, name, args))
-		}
+func buildAssistantContentForPrompt(msg map[string]any) string {
+	content := strings.TrimSpace(normalizeOpenAIContentForPrompt(msg["content"]))
+	toolHistory := prompt.FormatToolCallsForPrompt(msg["tool_calls"])
+	switch {
+	case content == "" && toolHistory == "":
+		return ""
+	case content == "":
+		return toolHistory
+	case toolHistory == "":
+		return content
+	default:
+		return content + "\n\n" + toolHistory
 	}
-
-	if legacy, ok := msg["function_call"].(map[string]any); ok {
-		name := strings.TrimSpace(asString(legacy["name"]))
-		if name == "" {
-			name = "unknown"
-		}
-		args := normalizeOpenAIArgumentsForPrompt(legacy["arguments"])
-		if args == "" {
-			args = "{}"
-		}
-		maybeWarnSuspiciousToolHistory(traceID, "call_legacy", name, args)
-		entries = append(entries, fmt.Sprintf("[TOOL_CALL_HISTORY]\nstatus: already_called\norigin: assistant\nnot_user_input: true\ntool_call_id: call_legacy\nfunction.name: %s\nfunction.arguments: %s\n[/TOOL_CALL_HISTORY]", name, args))
-	}
-
-	return strings.Join(entries, "\n\n")
 }

-func formatToolResultForPrompt(msg map[string]any) string {
-	toolCallID := strings.TrimSpace(asString(msg["tool_call_id"]))
-	if toolCallID == "" {
-		toolCallID = strings.TrimSpace(asString(msg["id"]))
-	}
-	if toolCallID == "" {
-		toolCallID = "unknown"
-	}
-
-	name := strings.TrimSpace(asString(msg["name"]))
-	if name == "" {
-		name = "unknown"
-	}
-
+func buildToolContentForPrompt(msg map[string]any) string {
 	content := normalizeOpenAIContentForPrompt(msg["content"])
-	if content == "" {
-		content = "null"
+	if strings.TrimSpace(content) == "" {
+		return "null"
 	}
-
-	return fmt.Sprintf("[TOOL_RESULT_HISTORY]\nstatus: already_returned\norigin: tool_runtime\nnot_user_input: true\ntool_call_id: %s\nname: %s\ncontent: %s\n[/TOOL_RESULT_HISTORY]", toolCallID, name, content)
+	return content
 }

 func normalizeOpenAIContentForPrompt(v any) string {
 	return prompt.NormalizeContent(v)
 }

-func normalizeOpenAIArgumentsForPrompt(v any) string {
-	switch x := v.(type) {
-	case string:
-		return normalizeToolArgumentString(x)
-	default:
-		return marshalToPromptString(v)
-	}
-}
-
-func normalizeToolArgumentString(raw string) string {
-	trimmed := strings.TrimSpace(raw)
-	if trimmed == "" {
-		return ""
-	}
-	if looksLikeConcatenatedJSON(trimmed) {
-		// Keep original payload to avoid silent argument rewrites.
-		return raw
-	}
-	return trimmed
-}
-
-func marshalToPromptString(v any) string {
-	b, err := json.Marshal(v)
-	if err != nil {
-		return strings.TrimSpace(fmt.Sprintf("%v", v))
-	}
-	return string(b)
-}
-
 func normalizeOpenAIRoleForPrompt(role string) string {
 	role = strings.ToLower(strings.TrimSpace(role))
 	if role == "developer" {
@@ -179,56 +94,3 @@ func asString(v any) string {
 	}
 	return ""
 }
-
-func joinNonEmpty(parts ...string) string {
-	nonEmpty := make([]string, 0, len(parts))
-	for _, p := range parts {
-		if strings.TrimSpace(p) == "" {
-			continue
-		}
-		nonEmpty = append(nonEmpty, p)
-	}
-	return strings.Join(nonEmpty, "\n\n")
-}
-
-func maybeWarnSuspiciousToolHistory(traceID, callID, name, args string) {
-	if !looksLikeConcatenatedJSON(args) {
-		return
-	}
-	traceID = strings.TrimSpace(traceID)
-	if traceID == "" {
-		traceID = "unknown"
-	}
-	config.Logger.Warn(
-		"[openai] suspicious tool call history payload detected",
-		"trace_id", traceID,
-		"tool_call_id", strings.TrimSpace(callID),
-		"name", strings.TrimSpace(name),
-		"arguments_preview", previewToolArgs(args, 160),
-	)
-}
-
-func looksLikeConcatenatedJSON(raw string) bool {
-	trimmed := strings.TrimSpace(raw)
-	if trimmed == "" {
-		return false
-	}
-	if strings.Contains(trimmed, "}{") || strings.Contains(trimmed, "][") {
-		return true
-	}
-	dec := json.NewDecoder(strings.NewReader(trimmed))
-	var first any
-	if err := dec.Decode(&first); err != nil {
-		return false
-	}
-	var second any
-	return dec.Decode(&second) == nil
-}
-
-func previewToolArgs(raw string, max int) string {
-	trimmed := strings.TrimSpace(raw)
-	if max <= 0 || len(trimmed) <= max {
-		return trimmed
-	}
-	return trimmed[:max]
-}
--- a/internal/adapter/openai/message_normalize_test.go
+++ b/internal/adapter/openai/message_normalize_test.go
@@ -35,23 +35,22 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsAndToolResult(t *tes

 	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
 	if len(normalized) != 4 {
-		t.Fatalf("expected 4 normalized messages, got %d", len(normalized))
+		t.Fatalf("expected 4 normalized messages with assistant tool history preserved, got %d", len(normalized))
 	}
 	assistantContent, _ := normalized[2]["content"].(string)
-	if !strings.Contains(assistantContent, "[TOOL_CALL_HISTORY]") ||
-		!strings.Contains(assistantContent, "tool_call_id: call_1") ||
-		!strings.Contains(assistantContent, "function.name: get_weather") ||
-		!strings.Contains(assistantContent, "function.arguments: {\"city\":\"beijing\"}") {
-		t.Fatalf("assistant tool call not serialized correctly: %q", assistantContent)
+	if !strings.Contains(assistantContent, "<tool_calls>") {
+		t.Fatalf("assistant tool history should be preserved in XML form, got %q", assistantContent)
 	}
-	toolContent, _ := normalized[3]["content"].(string)
-	if !strings.Contains(toolContent, "[TOOL_RESULT_HISTORY]") || !strings.Contains(toolContent, "name: get_weather") {
-		t.Fatalf("tool result not serialized correctly: %q", toolContent)
+	if !strings.Contains(assistantContent, "<tool_name>get_weather</tool_name>") {
+		t.Fatalf("expected tool name in preserved history, got %q", assistantContent)
+	}
+	if !strings.Contains(normalized[3]["content"].(string), `"temp":18`) {
+		t.Fatalf("tool result should be transparently forwarded, got %#v", normalized[3]["content"])
 	}

 	prompt := util.MessagesPrepare(normalized)
-	if !strings.Contains(prompt, "tool_call_id: call_1") || !strings.Contains(prompt, "[TOOL_RESULT_HISTORY]") {
-		t.Fatalf("expected prompt to include tool call + result semantics: %q", prompt)
+	if !strings.Contains(prompt, "<tool_calls>") {
+		t.Fatalf("expected preserved assistant tool history in prompt: %q", prompt)
 	}
 }

@@ -91,8 +90,8 @@ func TestNormalizeOpenAIMessagesForPrompt_ToolArrayBlocksJoined(t *testing.T) {

 	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
 	got, _ := normalized[0]["content"].(string)
-	if !strings.Contains(got, "line-1\nline-2") {
-		t.Fatalf("expected joined text blocks, got %q", got)
+	if !strings.Contains(got, `line-1`) || !strings.Contains(got, `line-2`) {
+		t.Fatalf("expected tool content blocks preserved, got %q", got)
 	}
 }

@@ -112,15 +111,42 @@ func TestNormalizeOpenAIMessagesForPrompt_FunctionRoleCompatible(t *testing.T) {
 	if len(normalized) != 1 {
 		t.Fatalf("expected one normalized message, got %d", len(normalized))
 	}
-	if normalized[0]["role"] != "user" {
-		t.Fatalf("expected function role mapped to user, got %#v", normalized[0]["role"])
+	if normalized[0]["role"] != "tool" {
+		t.Fatalf("expected function role normalized as tool, got %#v", normalized[0]["role"])
 	}
 	got, _ := normalized[0]["content"].(string)
-	if !strings.Contains(got, "name: legacy_tool") || !strings.Contains(got, `"ok":true`) {
+	if !strings.Contains(got, `"ok":true`) || strings.Contains(got, `"name":"legacy_tool"`) {
 		t.Fatalf("unexpected normalized function-role content: %q", got)
 	}
 }

+func TestNormalizeOpenAIMessagesForPrompt_EmptyToolContentPreservedAsNull(t *testing.T) {
+	raw := []any{
+		map[string]any{
+			"role":         "tool",
+			"tool_call_id": "call_5",
+			"name":         "noop_tool",
+			"content":      "",
+		},
+		map[string]any{
+			"role":    "assistant",
+			"content": "done",
+		},
+	}
+
+	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
+	if len(normalized) != 2 {
+		t.Fatalf("expected tool completion turn to be preserved, got %#v", normalized)
+	}
+	if normalized[0]["role"] != "tool" {
+		t.Fatalf("expected tool role preserved, got %#v", normalized[0]["role"])
+	}
+	got, _ := normalized[0]["content"].(string)
+	if got != "null" {
+		t.Fatalf("expected empty tool content normalized as null string, got %q", got)
+	}
+}
+
 func TestNormalizeOpenAIMessagesForPrompt_AssistantMultipleToolCallsRemainSeparated(t *testing.T) {
 	raw := []any{
 		map[string]any{
@@ -148,23 +174,14 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantMultipleToolCallsRemainSepara

 	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
 	if len(normalized) != 1 {
-		t.Fatalf("expected one normalized assistant message, got %d", len(normalized))
+		t.Fatalf("expected assistant tool_call-only message preserved, got %#v", normalized)
 	}
 	content, _ := normalized[0]["content"].(string)
-	if strings.Count(content, "[TOOL_CALL_HISTORY]") != 2 {
-		t.Fatalf("expected two TOOL_CALL_HISTORY blocks, got %q", content)
+	if strings.Count(content, "<tool_call>") != 2 {
+		t.Fatalf("expected two preserved tool call blocks, got %q", content)
 	}
-	if !strings.Contains(content, "tool_call_id: call_search") || !strings.Contains(content, "function.name: search_web") {
-		t.Fatalf("missing first tool call block, got %q", content)
-	}
-	if !strings.Contains(content, "tool_call_id: call_eval") || !strings.Contains(content, "function.name: eval_javascript") {
-		t.Fatalf("missing second tool call block, got %q", content)
-	}
-	if strings.Contains(content, "search_webeval_javascript") {
-		t.Fatalf("unexpected merged function name detected: %q", content)
-	}
-	if strings.Contains(content, `}{"`) {
-		t.Fatalf("unexpected concatenated function arguments detected: %q", content)
+	if !strings.Contains(content, "<tool_name>search_web</tool_name>") || !strings.Contains(content, "<tool_name>eval_javascript</tool_name>") {
+		t.Fatalf("expected both tool names in preserved history, got %q", content)
 	}
 }

@@ -186,11 +203,33 @@ func TestNormalizeOpenAIMessagesForPrompt_PreservesConcatenatedToolArguments(t *

 	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
 	if len(normalized) != 1 {
-		t.Fatalf("expected one normalized message, got %d", len(normalized))
+		t.Fatalf("expected assistant tool_call-only content preserved, got %#v", normalized)
 	}
 	content, _ := normalized[0]["content"].(string)
-	if !strings.Contains(content, `function.arguments: {}{"query":"测试工具调用"}`) {
-		t.Fatalf("expected original concatenated arguments in tool history, got %q", content)
+	if !strings.Contains(content, `{}{"query":"测试工具调用"}`) {
+		t.Fatalf("expected concatenated tool arguments preserved, got %q", content)
+	}
+}
+
+func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsMissingNameAreDropped(t *testing.T) {
+	raw := []any{
+		map[string]any{
+			"role": "assistant",
+			"tool_calls": []any{
+				map[string]any{
+					"id":   "call_missing_name",
+					"type": "function",
+					"function": map[string]any{
+						"arguments": `{"path":"README.MD"}`,
+					},
+				},
+			},
+		},
+	}
+
+	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
+	if len(normalized) != 0 {
+		t.Fatalf("expected assistant tool_calls without text to be dropped when name is missing, got %#v", normalized)
 	}
 }

@@ -213,14 +252,14 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantNilContentDoesNotInjectNullLi

 	normalized := normalizeOpenAIMessagesForPrompt(raw, "")
 	if len(normalized) != 1 {
-		t.Fatalf("expected one normalized message, got %d", len(normalized))
+		t.Fatalf("expected nil-content assistant tool_call-only message preserved, got %#v", normalized)
 	}
 	content, _ := normalized[0]["content"].(string)
-	if strings.Contains(content, "<｜Assistant｜>null") || strings.HasPrefix(strings.TrimSpace(content), "null") {
-		t.Fatalf("unexpected null literal injected into assistant tool history: %q", content)
+	if strings.Contains(content, "null") {
+		t.Fatalf("expected no null literal injection, got %q", content)
 	}
-	if !strings.Contains(content, "function.name: send_file_to_user") {
-		t.Fatalf("expected tool history block preserved, got %q", content)
+	if !strings.Contains(content, "<tool_calls>") {
+		t.Fatalf("expected assistant tool history in normalized content, got %q", content)
 	}
 }

--- a/internal/adapter/openai/prompt_build_test.go
+++ b/internal/adapter/openai/prompt_build_test.go
@@ -44,11 +44,14 @@ func TestBuildOpenAIFinalPrompt_HandlerPathIncludesToolRoundtripSemantics(t *tes
 	if len(toolNames) != 1 || toolNames[0] != "get_weather" {
 		t.Fatalf("unexpected tool names: %#v", toolNames)
 	}
-	if !strings.Contains(finalPrompt, "tool_call_id: call_1") ||
-		!strings.Contains(finalPrompt, "function.name: get_weather") ||
-		!strings.Contains(finalPrompt, "[TOOL_RESULT_HISTORY]") ||
-		!strings.Contains(finalPrompt, `"condition":"sunny"`) {
-		t.Fatalf("handler finalPrompt missing tool roundtrip semantics: %q", finalPrompt)
+	if !strings.Contains(finalPrompt, `"condition":"sunny"`) {
+		t.Fatalf("handler finalPrompt should preserve tool output content: %q", finalPrompt)
+	}
+	if !strings.Contains(finalPrompt, "<tool_calls>") {
+		t.Fatalf("handler finalPrompt should preserve assistant tool history: %q", finalPrompt)
+	}
+	if !strings.Contains(finalPrompt, "<tool_name>get_weather</tool_name>") {
+		t.Fatalf("handler finalPrompt should include tool name history: %q", finalPrompt)
 	}
 }

@@ -71,13 +74,19 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t *
 	}

 	finalPrompt, _ := buildOpenAIFinalPrompt(messages, tools, "")
-	if !strings.Contains(finalPrompt, "After receiving a tool result, you MUST use it to produce the final answer.") {
+	if !strings.Contains(finalPrompt, "After receiving a tool result, use it directly.") {
 		t.Fatalf("vercel prepare finalPrompt missing final-answer instruction: %q", finalPrompt)
 	}
-	if !strings.Contains(finalPrompt, "Only call another tool when the previous result is missing required data or returned an error.") {
+	if !strings.Contains(finalPrompt, "Only call another tool if the result is insufficient.") {
 		t.Fatalf("vercel prepare finalPrompt missing retry guard instruction: %q", finalPrompt)
 	}
-	if !strings.Contains(finalPrompt, "[TOOL_RESULT_HISTORY]") {
-		t.Fatalf("vercel prepare finalPrompt missing history marker instruction: %q", finalPrompt)
+	if !strings.Contains(finalPrompt, "TOOL CALL FORMAT") {
+		t.Fatalf("vercel prepare finalPrompt missing xml format instruction: %q", finalPrompt)
+	}
+	if !strings.Contains(finalPrompt, "Do NOT wrap the XML in markdown code fences") {
+		t.Fatalf("vercel prepare finalPrompt missing no-fence xml instruction: %q", finalPrompt)
+	}
+	if strings.Contains(finalPrompt, "```json") {
+		t.Fatalf("vercel prepare finalPrompt should not require fenced tool calls: %q", finalPrompt)
 	}
 }
--- a/internal/adapter/openai/responses_handler.go
+++ b/internal/adapter/openai/responses_handler.go
@@ -113,7 +113,11 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
 		return
 	}
 	result := sse.CollectStream(resp, thinkingEnabled, true)
-	textParsed := util.ParseStandaloneToolCallsDetailed(result.Text, toolNames)
+	sanitizedText := sanitizeLeakedOutput(result.Text)
+	if writeUpstreamEmptyOutputError(w, result) {
+		return
+	}
+	textParsed := util.ParseStandaloneToolCallsDetailed(sanitizedText, toolNames)
 	logResponsesToolPolicyRejection(traceID, toolChoice, textParsed, "text")

 	callCount := len(textParsed.Calls)
@@ -122,7 +126,15 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
 		return
 	}

-	responseObj := openaifmt.BuildResponseObject(responseID, model, finalPrompt, result.Thinking, result.Text, toolNames)
+	responseObj := openaifmt.BuildResponseObject(responseID, model, finalPrompt, result.Thinking, sanitizedText, toolNames)
+	if result.OutputTokens > 0 {
+		if usage, ok := responseObj["usage"].(map[string]any); ok {
+			usage["output_tokens"] = result.OutputTokens
+			if input, ok := usage["input_tokens"].(int); ok {
+				usage["total_tokens"] = input + result.OutputTokens
+			}
+		}
+	}
 	h.getResponseStore().put(owner, responseID, responseObj)
 	writeJSON(w, http.StatusOK, responseObj)
 }
@@ -145,8 +157,8 @@ func (h *Handler) handleResponsesStream(w http.ResponseWriter, r *http.Request,
 	if thinkingEnabled {
 		initialType = "thinking"
 	}
-	bufferToolContent := len(toolNames) > 0 && h.toolcallFeatureMatchEnabled()
-	emitEarlyToolDeltas := h.toolcallEarlyEmitHighConfidence()
+	bufferToolContent := len(toolNames) > 0
+	emitEarlyToolDeltas := h.toolcallFeatureMatchEnabled() && h.toolcallEarlyEmitHighConfidence()

 	streamRuntime := newResponsesStreamRuntime(
 		w,
--- a/internal/adapter/openai/responses_input_items.go
+++ b/internal/adapter/openai/responses_input_items.go
@@ -1,11 +1,11 @@
 package openai

 import (
-	"encoding/json"
 	"fmt"
 	"strings"

 	"ds2api/internal/config"
+	"ds2api/internal/prompt"
 )

 func normalizeResponsesInputItem(m map[string]any) map[string]any {
@@ -19,6 +19,27 @@ func normalizeResponsesInputItemWithState(m map[string]any, callNameByID map[str

 	role := strings.ToLower(strings.TrimSpace(asString(m["role"])))
 	if role != "" {
+		if role == "assistant" {
+			out := map[string]any{
+				"role": "assistant",
+			}
+			if toolCalls, ok := m["tool_calls"].([]any); ok && len(toolCalls) > 0 {
+				out["tool_calls"] = toolCalls
+			}
+			content := m["content"]
+			if content == nil {
+				if txt, _ := m["text"].(string); strings.TrimSpace(txt) != "" {
+					content = txt
+				}
+			}
+			if content != nil {
+				out["content"] = content
+			}
+			if _, hasToolCalls := out["tool_calls"]; hasToolCalls || out["content"] != nil {
+				return out
+			}
+			return nil
+		}
 		content := m["content"]
 		if content == nil {
 			if txt, _ := m["text"].(string); strings.TrimSpace(txt) != "" {
@@ -28,10 +49,22 @@ func normalizeResponsesInputItemWithState(m map[string]any, callNameByID map[str
 		if content == nil {
 			return nil
 		}
-		return map[string]any{
+		out := map[string]any{
 			"role":    normalizeOpenAIRoleForPrompt(role),
 			"content": content,
 		}
+		if role == "tool" || role == "function" {
+			if callID := strings.TrimSpace(asString(m["tool_call_id"])); callID != "" {
+				out["tool_call_id"] = callID
+			}
+			if callID := strings.TrimSpace(asString(m["call_id"])); callID != "" {
+				out["tool_call_id"] = callID
+			}
+			if name := strings.TrimSpace(asString(m["name"])); name != "" {
+				out["name"] = name
+			}
+		}
+		return out
 	}

 	itemType := strings.ToLower(strings.TrimSpace(asString(m["type"])))
@@ -115,7 +148,7 @@ func normalizeResponsesInputItemWithState(m map[string]any, callNameByID map[str

 		functionPayload := map[string]any{
 			"name":      name,
-			"arguments": stringifyToolCallArguments(argsRaw),
+			"arguments": prompt.StringifyToolCallArguments(argsRaw),
 		}
 		call := map[string]any{
 			"type":     "function",
@@ -178,26 +211,3 @@ func normalizeResponsesFallbackPart(m map[string]any) string {
 	}
 	return strings.TrimSpace(fmt.Sprintf("%v", m))
 }
-
-func stringifyToolCallArguments(v any) string {
-	switch x := v.(type) {
-	case nil:
-		return "{}"
-	case string:
-		s := strings.TrimSpace(x)
-		if s == "" {
-			return "{}"
-		}
-		s = normalizeToolArgumentString(s)
-		if s == "" {
-			return "{}"
-		}
-		return s
-	default:
-		b, err := json.Marshal(x)
-		if err != nil || len(b) == 0 {
-			return "{}"
-		}
-		return string(b)
-	}
-}
--- a/internal/adapter/openai/responses_stream_runtime_core.go
+++ b/internal/adapter/openai/responses_stream_runtime_core.go
@@ -32,7 +32,6 @@ type responsesStreamRuntime struct {
 	toolCallsDoneEmitted bool

 	sieve             toolStreamSieveState
-	thinkingSieve     toolStreamSieveState
 	thinking          strings.Builder
 	text              strings.Builder
 	visibleText       strings.Builder
@@ -50,6 +49,7 @@ type responsesStreamRuntime struct {
 	messagePartAdded  bool
 	sequence          int
 	failed            bool
+	outputTokens      int

 	persistResponse func(obj map[string]any)
 }
@@ -98,7 +98,7 @@ func newResponsesStreamRuntime(

 func (s *responsesStreamRuntime) finalize() {
 	finalThinking := s.thinking.String()
-	finalText := s.text.String()
+	finalText := sanitizeLeakedOutput(s.text.String())

 	if s.bufferToolContent {
 		s.processToolStreamEvents(flushToolSieve(&s.sieve, s.toolNames), true)
@@ -145,6 +145,14 @@ func (s *responsesStreamRuntime) finalize() {
 	s.closeIncompleteFunctionItems()

 	obj := s.buildCompletedResponseObject(finalThinking, finalText, detected)
+	if s.outputTokens > 0 {
+		if usage, ok := obj["usage"].(map[string]any); ok {
+			usage["output_tokens"] = s.outputTokens
+			if input, ok := usage["input_tokens"].(int); ok {
+				usage["total_tokens"] = input + s.outputTokens
+			}
+		}
+	}
 	if s.persistResponse != nil {
 		s.persistResponse(obj)
 	}
@@ -169,19 +177,13 @@ func (s *responsesStreamRuntime) logToolPolicyRejections(textParsed util.ToolCal
 	logRejected(textParsed, "text")
 }

-func (s *responsesStreamRuntime) hasFunctionCallDone() bool {
-	for _, done := range s.functionDone {
-		if done {
-			return true
-		}
-	}
-	return false
-}
-
 func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedDecision {
 	if !parsed.Parsed {
 		return streamengine.ParsedDecision{}
 	}
+	if parsed.OutputTokens > 0 {
+		s.outputTokens = parsed.OutputTokens
+	}
 	if parsed.ContentFilter || parsed.ErrorMessage != "" || parsed.Stop {
 		return streamengine.ParsedDecision{Stop: true}
 	}
@@ -204,12 +206,16 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
 			continue
 		}

-		s.text.WriteString(p.Text)
-		if !s.bufferToolContent {
-			s.emitTextDelta(p.Text)
+		cleanedText := sanitizeLeakedOutput(p.Text)
+		if cleanedText == "" {
 			continue
 		}
-		s.processToolStreamEvents(processToolSieveChunk(&s.sieve, p.Text, s.toolNames), true)
+		s.text.WriteString(cleanedText)
+		if !s.bufferToolContent {
+			s.emitTextDelta(cleanedText)
+			continue
+		}
+		s.processToolStreamEvents(processToolSieveChunk(&s.sieve, cleanedText, s.toolNames), true)
 	}

 	return streamengine.ParsedDecision{ContentSeen: contentSeen}
--- a/internal/adapter/openai/responses_stream_runtime_toolcalls.go
+++ b/internal/adapter/openai/responses_stream_runtime_toolcalls.go
@@ -94,6 +94,16 @@ func (s *responsesStreamRuntime) closeMessageItem() {
 	outputIndex := s.ensureMessageOutputIndex()
 	text := s.visibleText.String()
 	if s.messagePartAdded {
+		s.sendEvent(
+			"response.output_text.done",
+			openaifmt.BuildResponsesTextDonePayload(
+				s.responseID,
+				itemID,
+				outputIndex,
+				0,
+				text,
+			),
+		)
 		s.sendEvent(
 			"response.content_part.done",
 			openaifmt.BuildResponsesContentPartDonePayload(
--- a/internal/adapter/openai/responses_stream_test.go
+++ b/internal/adapter/openai/responses_stream_test.go
@@ -226,6 +226,40 @@ func TestHandleResponsesStreamMultiToolCallKeepsNameAndCallIDAligned(t *testing.
 	}
 }

+func TestHandleResponsesStreamEmitsOutputTextDoneBeforeContentPartDone(t *testing.T) {
+	h := &Handler{}
+	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
+	rec := httptest.NewRecorder()
+
+	sseLine := func(v string) string {
+		b, _ := json.Marshal(map[string]any{
+			"p": "response/content",
+			"v": v,
+		})
+		return "data: " + string(b) + "\n"
+	}
+
+	streamBody := sseLine("hello") + "data: [DONE]\n"
+	resp := &http.Response{
+		StatusCode: http.StatusOK,
+		Body:       io.NopCloser(strings.NewReader(streamBody)),
+	}
+
+	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, nil, util.DefaultToolChoicePolicy(), "")
+	body := rec.Body.String()
+	if !strings.Contains(body, "event: response.output_text.done") {
+		t.Fatalf("expected response.output_text.done payload, body=%s", body)
+	}
+	textDoneIdx := strings.Index(body, "event: response.output_text.done")
+	partDoneIdx := strings.Index(body, "event: response.content_part.done")
+	if textDoneIdx < 0 || partDoneIdx < 0 {
+		t.Fatalf("expected output_text.done + content_part.done, body=%s", body)
+	}
+	if textDoneIdx > partDoneIdx {
+		t.Fatalf("expected output_text.done before content_part.done, body=%s", body)
+	}
+}
+
 func TestHandleResponsesStreamOutputTextDeltaCarriesItemIndexes(t *testing.T) {
 	h := &Handler{}
 	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
@@ -263,7 +297,7 @@ func TestHandleResponsesStreamOutputTextDeltaCarriesItemIndexes(t *testing.T) {
 	}
 }

-func TestHandleResponsesStreamThinkingAndMixedToolExampleRemainMessageOnly(t *testing.T) {
+func TestHandleResponsesStreamThinkingAndMixedToolExampleEmitsFunctionCall(t *testing.T) {
 	h := &Handler{}
 	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
 	rec := httptest.NewRecorder()
@@ -299,6 +333,7 @@ func TestHandleResponsesStreamThinkingAndMixedToolExampleRemainMessageOnly(t *te
 	responseObj, _ := completedPayload["response"].(map[string]any)
 	output, _ := responseObj["output"].([]any)
 	hasMessage := false
+	hasFunctionCall := false
 	for _, item := range output {
 		m, _ := item.(map[string]any)
 		if m == nil {
@@ -308,15 +343,18 @@ func TestHandleResponsesStreamThinkingAndMixedToolExampleRemainMessageOnly(t *te
 			hasMessage = true
 		}
 		if asString(m["type"]) == "function_call" {
-			t.Fatalf("did not expect function_call output for mixed prose tool example, output=%#v", output)
+			hasFunctionCall = true
 		}
 	}
 	if !hasMessage {
 		t.Fatalf("expected message output for mixed prose tool example, output=%#v", output)
 	}
+	if !hasFunctionCall {
+		t.Fatalf("expected function_call output for mixed prose tool example, output=%#v", output)
+	}
 }

-func TestHandleResponsesStreamToolChoiceNoneRejectsFunctionCall(t *testing.T) {
+func TestHandleResponsesStreamToolChoiceNoneStillAllowsFunctionCall(t *testing.T) {
 	h := &Handler{}
 	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
 	rec := httptest.NewRecorder()
@@ -338,8 +376,8 @@ func TestHandleResponsesStreamToolChoiceNoneRejectsFunctionCall(t *testing.T) {

 	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, nil, policy, "")
 	body := rec.Body.String()
-	if strings.Contains(body, "event: response.function_call_arguments.done") {
-		t.Fatalf("did not expect function_call events for tool_choice=none, body=%s", body)
+	if !strings.Contains(body, "event: response.function_call_arguments.done") {
+		t.Fatalf("expected function_call events for tool_choice=none, body=%s", body)
 	}
 }

@@ -480,7 +518,7 @@ func TestHandleResponsesStreamRequiredMalformedToolPayloadFails(t *testing.T) {
 	}
 }

-func TestHandleResponsesStreamRejectsUnknownToolName(t *testing.T) {
+func TestHandleResponsesStreamAllowsUnknownToolName(t *testing.T) {
 	h := &Handler{}
 	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
 	rec := httptest.NewRecorder()
@@ -501,8 +539,8 @@ func TestHandleResponsesStreamRejectsUnknownToolName(t *testing.T) {

 	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, []string{"read_file"}, util.DefaultToolChoicePolicy(), "")
 	body := rec.Body.String()
-	if strings.Contains(body, "event: response.function_call_arguments.done") {
-		t.Fatalf("did not expect function_call events for unknown tool, body=%s", body)
+	if !strings.Contains(body, "event: response.function_call_arguments.done") {
+		t.Fatalf("expected function_call events for unknown tool, body=%s", body)
 	}
 }

@@ -559,7 +597,7 @@ func TestHandleResponsesNonStreamRequiredToolChoiceIgnoresThinkingToolPayload(t
 	}
 }

-func TestHandleResponsesNonStreamToolChoiceNoneRejectsFunctionCall(t *testing.T) {
+func TestHandleResponsesNonStreamToolChoiceNoneStillAllowsFunctionCall(t *testing.T) {
 	h := &Handler{}
 	rec := httptest.NewRecorder()
 	resp := &http.Response{
@@ -573,16 +611,64 @@ func TestHandleResponsesNonStreamToolChoiceNoneRejectsFunctionCall(t *testing.T)

 	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, nil, policy, "")
 	if rec.Code != http.StatusOK {
-		t.Fatalf("expected 200 for tool_choice=none passthrough text, got %d body=%s", rec.Code, rec.Body.String())
+		t.Fatalf("expected 200 for tool_choice=none handling, got %d body=%s", rec.Code, rec.Body.String())
 	}
 	out := decodeJSONBody(t, rec.Body.String())
 	output, _ := out["output"].([]any)
+	foundFunctionCall := false
 	for _, item := range output {
 		m, _ := item.(map[string]any)
 		if m != nil && m["type"] == "function_call" {
-			t.Fatalf("did not expect function_call output item for tool_choice=none, got %#v", output)
+			foundFunctionCall = true
 		}
 	}
+	if !foundFunctionCall {
+		t.Fatalf("expected function_call output item for tool_choice=none, got %#v", output)
+	}
+}
+
+func TestHandleResponsesNonStreamReturns502WhenUpstreamOutputEmpty(t *testing.T) {
+	h := &Handler{}
+	rec := httptest.NewRecorder()
+	resp := &http.Response{
+		StatusCode: http.StatusOK,
+		Body: io.NopCloser(strings.NewReader(
+			`data: {"p":"response/content","v":""}` + "\n" +
+				`data: [DONE]` + "\n",
+		)),
+	}
+
+	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, nil, util.DefaultToolChoicePolicy(), "")
+	if rec.Code != http.StatusBadGateway {
+		t.Fatalf("expected 502 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	out := decodeJSONBody(t, rec.Body.String())
+	errObj, _ := out["error"].(map[string]any)
+	if asString(errObj["code"]) != "upstream_empty_output" {
+		t.Fatalf("expected code=upstream_empty_output, got %#v", out)
+	}
+}
+
+func TestHandleResponsesNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) {
+	h := &Handler{}
+	rec := httptest.NewRecorder()
+	resp := &http.Response{
+		StatusCode: http.StatusOK,
+		Body: io.NopCloser(strings.NewReader(
+			`data: {"code":"content_filter"}` + "\n" +
+				`data: [DONE]` + "\n",
+		)),
+	}
+
+	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, nil, util.DefaultToolChoicePolicy(), "")
+	if rec.Code != http.StatusBadRequest {
+		t.Fatalf("expected 400 for filtered empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	out := decodeJSONBody(t, rec.Body.String())
+	errObj, _ := out["error"].(map[string]any)
+	if asString(errObj["code"]) != "content_filter" {
+		t.Fatalf("expected code=content_filter, got %#v", out)
+	}
 }

 func extractSSEEventPayload(body, targetEvent string) (map[string]any, bool) {
@@ -637,18 +723,3 @@ func extractAllSSEEventPayloads(body, targetEvent string) []map[string]any {
 	}
 	return out
 }
-
-func asFloat(v any) float64 {
-	switch x := v.(type) {
-	case float64:
-		return x
-	case float32:
-		return float64(x)
-	case int:
-		return float64(x)
-	case int64:
-		return float64(x)
-	default:
-		return 0
-	}
-}
--- a/internal/adapter/openai/standard_request.go
+++ b/internal/adapter/openai/standard_request.go
@@ -25,6 +25,7 @@ func normalizeOpenAIChatRequest(store ConfigReader, req map[string]any, traceID
 	}
 	toolPolicy := util.DefaultToolChoicePolicy()
 	finalPrompt, toolNames := buildOpenAIFinalPromptWithPolicy(messagesRaw, req["tools"], traceID, toolPolicy)
+	toolNames = ensureToolDetectionEnabled(toolNames, req["tools"])
 	passThrough := collectOpenAIChatPassThrough(req)

 	return util.StandardRequest{
@@ -74,10 +75,8 @@ func normalizeOpenAIResponsesRequest(store ConfigReader, req map[string]any, tra
 		return util.StandardRequest{}, err
 	}
 	finalPrompt, toolNames := buildOpenAIFinalPromptWithPolicy(messagesRaw, req["tools"], traceID, toolPolicy)
-	if toolPolicy.IsNone() {
-		toolNames = nil
-		toolPolicy.Allowed = nil
-	} else {
+	toolNames = ensureToolDetectionEnabled(toolNames, req["tools"])
+	if !toolPolicy.IsNone() {
 		toolPolicy.Allowed = namesToSet(toolNames)
 	}
 	passThrough := collectOpenAIChatPassThrough(req)
@@ -98,6 +97,20 @@ func normalizeOpenAIResponsesRequest(store ConfigReader, req map[string]any, tra
 	}, nil
 }

+func ensureToolDetectionEnabled(toolNames []string, toolsRaw any) []string {
+	if len(toolNames) > 0 {
+		return toolNames
+	}
+	tools, _ := toolsRaw.([]any)
+	if len(tools) == 0 {
+		return toolNames
+	}
+	// Keep stream sieve/tool buffering enabled even when client tool schemas
+	// are malformed or lack explicit names; parsed tool payload names are no
+	// longer filtered by this list.
+	return []string{"__any_tool__"}
+}
+
 func collectOpenAIChatPassThrough(req map[string]any) map[string]any {
 	out := map[string]any{}
 	for _, k := range []string{
--- a/internal/adapter/openai/standard_request_test.go
+++ b/internal/adapter/openai/standard_request_test.go
@@ -152,7 +152,7 @@ func TestNormalizeOpenAIResponsesRequestToolChoiceForcedUndeclaredFails(t *testi
 	}
 }

-func TestNormalizeOpenAIResponsesRequestToolChoiceNoneDisablesTools(t *testing.T) {
+func TestNormalizeOpenAIResponsesRequestToolChoiceNoneKeepsToolDetectionEnabled(t *testing.T) {
 	store := newEmptyStoreForNormalizeTest(t)
 	req := map[string]any{
 		"model": "gpt-4o",
@@ -174,7 +174,7 @@ func TestNormalizeOpenAIResponsesRequestToolChoiceNoneDisablesTools(t *testing.T
 	if n.ToolChoice.Mode != util.ToolChoiceNone {
 		t.Fatalf("expected tool choice mode none, got %q", n.ToolChoice.Mode)
 	}
-	if len(n.ToolNames) != 0 {
-		t.Fatalf("expected no tool names when tool_choice=none, got %#v", n.ToolNames)
+	if len(n.ToolNames) == 0 {
+		t.Fatalf("expected tool detection sentinel when tool_choice=none, got %#v", n.ToolNames)
 	}
 }
--- a/internal/adapter/openai/stream_status_test.go
+++ b/internal/adapter/openai/stream_status_test.go
@@ -53,6 +53,10 @@ func (m streamStatusDSStub) CallCompletion(_ context.Context, _ *auth.RequestAut
 	return m.resp, nil
 }

+func (m streamStatusDSStub) DeleteAllSessionsForToken(_ context.Context, _ string) error {
+	return nil
+}
+
 func makeOpenAISSEHTTPResponse(lines ...string) *http.Response {
 	body := strings.Join(lines, "\n")
 	if !strings.HasSuffix(body, "\n") {
@@ -167,15 +171,65 @@ func TestResponsesNonStreamMixedProseToolPayloadHandlerPath(t *testing.T) {
 		t.Fatalf("decode response failed: %v body=%s", err, rec.Body.String())
 	}
 	outputText, _ := out["output_text"].(string)
-	if outputText == "" {
-		t.Fatalf("expected output_text preserved for mixed prose payload")
+	if outputText != "" {
+		t.Fatalf("expected output_text hidden for mixed prose tool payload, got %q", outputText)
 	}
 	output, _ := out["output"].([]any)
 	if len(output) != 1 {
 		t.Fatalf("expected one output item, got %#v", output)
 	}
 	first, _ := output[0].(map[string]any)
-	if first["type"] != "message" {
-		t.Fatalf("expected message output item, got %#v", output)
+	if first["type"] != "function_call" {
+		t.Fatalf("expected function_call output item, got %#v", output)
+	}
+}
+
+func TestChatCompletionsStreamContentFilterStopsNormallyWithoutLeak(t *testing.T) {
+	statuses := make([]int, 0, 1)
+	h := &Handler{
+		Store: mockOpenAIConfig{wideInput: true},
+		Auth:  streamStatusAuthStub{},
+		DS: streamStatusDSStub{resp: makeOpenAISSEHTTPResponse(
+			`data: {"p":"response/content","v":"合法前缀"}`,
+			`data: {"p":"response/status","v":"CONTENT_FILTER","accumulated_token_usage":77}`,
+			`data: {"p":"response/content","v":"CONTENT_FILTER你好，这个问题我暂时无法回答，让我们换个话题再聊聊吧。"}`,
+		)},
+	}
+	r := chi.NewRouter()
+	r.Use(captureStatusMiddleware(&statuses))
+	RegisterRoutes(r, h)
+
+	reqBody := `{"model":"deepseek-chat","messages":[{"role":"user","content":"hi"}],"stream":true}`
+	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
+	req.Header.Set("Authorization", "Bearer direct-token")
+	req.Header.Set("Content-Type", "application/json")
+	rec := httptest.NewRecorder()
+	r.ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	if len(statuses) != 1 || statuses[0] != http.StatusOK {
+		t.Fatalf("expected captured status 200, got %#v", statuses)
+	}
+	if strings.Contains(rec.Body.String(), "这个问题我暂时无法回答") {
+		t.Fatalf("expected leaked content-filter suffix to be hidden, body=%s", rec.Body.String())
+	}
+
+	frames, done := parseSSEDataFrames(t, rec.Body.String())
+	if !done {
+		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
+	}
+	if len(frames) == 0 {
+		t.Fatalf("expected at least one json frame, body=%s", rec.Body.String())
+	}
+	last := frames[len(frames)-1]
+	choices, _ := last["choices"].([]any)
+	if len(choices) != 1 {
+		t.Fatalf("expected one choice in final frame, got %#v", last)
+	}
+	choice, _ := choices[0].(map[string]any)
+	if choice["finish_reason"] != "stop" {
+		t.Fatalf("expected finish_reason=stop for content-filter upstream stop, got %#v", choice["finish_reason"])
 	}
 }
--- a/internal/adapter/openai/tool_sieve_core.go
+++ b/internal/adapter/openai/tool_sieve_core.go
@@ -114,8 +114,14 @@ func flushToolSieve(state *toolStreamSieveState, toolNames []string) []toolStrea
 		} else {
 			content := state.capture.String()
 			if content != "" {
-				state.noteText(content)
-				events = append(events, toolStreamEvent{Content: content})
+				// If the captured text looks like an incomplete XML tool call block,
+				// swallow it to prevent leaking raw XML tags to the client.
+				if hasOpenXMLToolTag(content) {
+					// Drop it silently — incomplete tool call.
+				} else {
+					state.noteText(content)
+					events = append(events, toolStreamEvent{Content: content})
+				}
 			}
 		}
 		state.capture.Reset()
@@ -124,8 +130,14 @@ func flushToolSieve(state *toolStreamSieveState, toolNames []string) []toolStrea
 	}
 	if state.pending.Len() > 0 {
 		content := state.pending.String()
-		state.noteText(content)
-		events = append(events, toolStreamEvent{Content: content})
+		// Safety: if pending contains XML tool tag fragments (e.g. "tool_calls>"
+		// from a split closing tag), swallow them instead of leaking.
+		if hasOpenXMLToolTag(content) || looksLikeXMLToolTagFragment(content) {
+			// Drop it — likely an incomplete tool call fragment.
+		} else {
+			state.noteText(content)
+			events = append(events, toolStreamEvent{Content: content})
+		}
 		state.pending.Reset()
 	}
 	return events
@@ -159,6 +171,10 @@ func findSuspiciousPrefixStart(s string) int {
 			start = idx
 		}
 	}
+	// Also check for partial XML tool tag at end of string.
+	if xmlIdx := findPartialXMLToolTagStart(s); xmlIdx >= 0 && xmlIdx > start {
+		start = xmlIdx
+	}
 	return start
 }

@@ -167,22 +183,47 @@ func findToolSegmentStart(s string) int {
 		return -1
 	}
 	lower := strings.ToLower(s)
-	offset := 0
-	for {
-		keyRel := strings.Index(lower[offset:], "tool_calls")
-		if keyRel < 0 {
-			return -1
+	keywords := []string{"tool_calls", "\"function\"", "function.name:", "\"tool_use\""}
+	bestKeyIdx := -1
+	for _, kw := range keywords {
+		idx := strings.Index(lower, kw)
+		if idx >= 0 && (bestKeyIdx < 0 || idx < bestKeyIdx) {
+			bestKeyIdx = idx
 		}
-		keyIdx := offset + keyRel
-		start := strings.LastIndex(s[:keyIdx], "{")
-		if start < 0 {
-			start = keyIdx
-		}
-		if !insideCodeFence(s[:start]) {
-			return start
-		}
-		offset = keyIdx + len("tool_calls")
 	}
+	if fnKeyIdx := findQuotedFunctionCallKeyStart(s); fnKeyIdx >= 0 && (bestKeyIdx < 0 || fnKeyIdx < bestKeyIdx) {
+		bestKeyIdx = fnKeyIdx
+	}
+	// Also detect XML tool call tags.
+	for _, tag := range xmlToolTagsToDetect {
+		idx := strings.Index(lower, tag)
+		if idx >= 0 && (bestKeyIdx < 0 || idx < bestKeyIdx) {
+			bestKeyIdx = idx
+		}
+	}
+	if bestKeyIdx < 0 {
+		return -1
+	}
+	// For XML tags, the '<' is itself the segment start.
+	if bestKeyIdx < len(s) && s[bestKeyIdx] == '<' {
+		if fenceStart, ok := openFenceStartBefore(s, bestKeyIdx); ok {
+			return fenceStart
+		}
+		return bestKeyIdx
+	}
+	start := strings.LastIndex(s[:bestKeyIdx], "{")
+	if start < 0 {
+		start = bestKeyIdx
+	}
+	// If the keyword matched inside an XML tag (e.g. "tool_calls" in "<tool_calls>"),
+	// back up past the '<' to capture the full tag.
+	if start > 0 && s[start-1] == '<' {
+		start--
+	}
+	if fenceStart, ok := openFenceStartBefore(s, start); ok {
+		return fenceStart
+	}
+	return start
 }

 func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix string, calls []util.ParsedToolCall, suffix string, ready bool) {
@@ -190,14 +231,35 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix
 	if captured == "" {
 		return "", nil, "", false
 	}
+
+	// Try XML tool call extraction first.
+	if xmlPrefix, xmlCalls, xmlSuffix, xmlReady := consumeXMLToolCapture(captured, toolNames); xmlReady {
+		return xmlPrefix, xmlCalls, xmlSuffix, true
+	}
+	// If XML tags are present but block is incomplete, keep buffering.
+	if hasOpenXMLToolTag(captured) {
+		return "", nil, "", false
+	}
+
 	lower := strings.ToLower(captured)
-	keyIdx := strings.Index(lower, "tool_calls")
+	keyIdx := -1
+	keywords := []string{"tool_calls", "\"function\"", "function.name:", "\"tool_use\""}
+	for _, kw := range keywords {
+		idx := strings.Index(lower, kw)
+		if idx >= 0 && (keyIdx < 0 || idx < keyIdx) {
+			keyIdx = idx
+		}
+	}
+	if fnKeyIdx := findQuotedFunctionCallKeyStart(captured); fnKeyIdx >= 0 && (keyIdx < 0 || fnKeyIdx < keyIdx) {
+		keyIdx = fnKeyIdx
+	}
+
 	if keyIdx < 0 {
 		return "", nil, "", false
 	}
 	start := strings.LastIndex(captured[:keyIdx], "{")
 	if start < 0 {
-		return "", nil, "", false
+		start = keyIdx
 	}
 	obj, end, ok := extractJSONObjectFrom(captured, start)
 	if !ok {
@@ -205,9 +267,6 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix
 	}
 	prefixPart := captured[:start]
 	suffixPart := captured[end:]
-	if insideCodeFence(state.recentTextTail + prefixPart) {
-		return captured, nil, "", true
-	}
 	parsed := util.ParseStandaloneToolCallsDetailed(obj, toolNames)
 	if len(parsed.Calls) == 0 {
 		if parsed.SawToolCallSyntax && parsed.RejectedByPolicy {
@@ -215,7 +274,11 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix
 			// consume it to avoid leaking raw tool_calls JSON to user content.
 			return prefixPart, nil, suffixPart, true
 		}
+		// If it has obvious keywords but failed to parse even after loose repair,
+		// we still might want to intercept it if it looks like an attempt at tool call.
+		// For now, keep the original logic but rely on loose JSON repair.
 		return captured, nil, "", true
 	}
+	prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
 	return prefixPart, parsed.Calls, suffixPart, true
 }
--- a/internal/adapter/openai/tool_sieve_functioncall.go
+++ b/internal/adapter/openai/tool_sieve_functioncall.go
@@ -0,0 +1,100 @@
+package openai
+
+import "strings"
+
+func findQuotedFunctionCallKeyStart(s string) int {
+	lower := strings.ToLower(s)
+	quotedIdx := findFunctionCallKeyStart(lower, `"functioncall"`)
+	bareIdx := findFunctionCallKeyStart(lower, "functioncall")
+
+	// Prefer the quoted JSON key whenever we have a structural match.
+	// Bare-key detection is only for loose payloads where the quoted form
+	// is absent.
+	if quotedIdx >= 0 {
+		return quotedIdx
+	}
+	return bareIdx
+}
+
+func findFunctionCallKeyStart(lower, key string) int {
+	for from := 0; from < len(lower); {
+		rel := strings.Index(lower[from:], key)
+		if rel < 0 {
+			return -1
+		}
+		idx := from + rel
+		if isInsideJSONString(lower, idx) {
+			from = idx + 1
+			continue
+		}
+		if !hasJSONObjectContextPrefix(lower[:idx]) {
+			from = idx + 1
+			continue
+		}
+		if !hasJSONKeyBoundary(lower, idx, len(key)) {
+			from = idx + 1
+			continue
+		}
+		j := idx + len(key)
+		for j < len(lower) && (lower[j] == ' ' || lower[j] == '\t' || lower[j] == '\r' || lower[j] == '\n') {
+			j++
+		}
+		if j < len(lower) && lower[j] == ':' {
+			k := j + 1
+			for k < len(lower) && (lower[k] == ' ' || lower[k] == '\t' || lower[k] == '\r' || lower[k] == '\n') {
+				k++
+			}
+			if k < len(lower) && lower[k] != '{' {
+				from = idx + 1
+				continue
+			}
+			return idx
+		}
+		from = idx + 1
+	}
+	return -1
+}
+
+func isInsideJSONString(s string, idx int) bool {
+	inString := false
+	escaped := false
+	for i := 0; i < idx; i++ {
+		c := s[i]
+		if escaped {
+			escaped = false
+			continue
+		}
+		if c == '\\' && inString {
+			escaped = true
+			continue
+		}
+		if c == '"' {
+			inString = !inString
+		}
+	}
+	return inString
+}
+
+func hasJSONObjectContextPrefix(prefix string) bool {
+	return strings.LastIndex(prefix, "{") >= 0
+}
+
+func hasJSONKeyBoundary(s string, idx, keyLen int) bool {
+	if idx > 0 {
+		prev := s[idx-1]
+		if isLowerAlphaNumeric(prev) {
+			return false
+		}
+	}
+	if end := idx + keyLen; end < len(s) {
+		next := s[end]
+		if isLowerAlphaNumeric(next) {
+			return false
+		}
+	}
+	return true
+}
+
+func isLowerAlphaNumeric(b byte) bool {
+	return (b >= 'a' && b <= 'z') || (b >= '0' && b <= '9') || b == '_'
+}
--- a/internal/adapter/openai/tool_sieve_functioncall_test.go
+++ b/internal/adapter/openai/tool_sieve_functioncall_test.go
@@ -0,0 +1,23 @@
+package openai
+
+import "testing"
+
+func TestFindQuotedFunctionCallKeyStart_PrefersEarlierBareKey(t *testing.T) {
+	input := `{functionCall:{"name":"a","arguments":"{}"},"message":"literal text: \"functionCall\": not a key"}`
+
+	got := findQuotedFunctionCallKeyStart(input)
+	want := 1
+	if got != want {
+		t.Fatalf("findQuotedFunctionCallKeyStart() = %d, want %d", got, want)
+	}
+}
+
+func TestFindQuotedFunctionCallKeyStart_PrefersEarlierQuotedKey(t *testing.T) {
+	input := `{"functionCall":{"name":"a","arguments":"{}"},"note":"functionCall appears in prose"}`
+
+	got := findQuotedFunctionCallKeyStart(input)
+	want := 1
+	if got != want {
+		t.Fatalf("findQuotedFunctionCallKeyStart() = %d, want %d", got, want)
+	}
+}
--- a/internal/adapter/openai/tool_sieve_incremental.go
+++ b/internal/adapter/openai/tool_sieve_incremental.go
@@ -1,291 +0,0 @@
-package openai
-
-import "strings"
-
-func buildIncrementalToolDeltas(state *toolStreamSieveState) []toolCallDelta {
-	if state.disableDeltas {
-		return nil
-	}
-	captured := state.capture.String()
-	if captured == "" {
-		return nil
-	}
-	lower := strings.ToLower(captured)
-	keyIdx := strings.Index(lower, "tool_calls")
-	if keyIdx < 0 {
-		return nil
-	}
-	start := strings.LastIndex(captured[:keyIdx], "{")
-	if start < 0 {
-		return nil
-	}
-	if insideCodeFence(state.recentTextTail + captured[:start]) {
-		return nil
-	}
-	certainSingle, hasMultiple := classifyToolCallsIncrementalSafety(captured, keyIdx)
-	if hasMultiple {
-		state.disableDeltas = true
-		return nil
-	}
-	if !certainSingle {
-		// In uncertain phases (e.g. first call arrived but array not closed yet),
-		// avoid speculative deltas and wait for final parsed tool_calls payload.
-		return nil
-	}
-	callStart, ok := findFirstToolCallObjectStart(captured, keyIdx)
-	if !ok {
-		return nil
-	}
-	deltas := make([]toolCallDelta, 0, 2)
-	if state.toolName == "" {
-		name, ok := extractToolCallName(captured, callStart)
-		if !ok || name == "" {
-			return nil
-		}
-		state.toolName = name
-	}
-	if state.toolArgsStart < 0 {
-		argsStart, stringMode, ok := findToolCallArgsStart(captured, callStart)
-		if ok {
-			state.toolArgsString = stringMode
-			if stringMode {
-				state.toolArgsStart = argsStart + 1
-			} else {
-				state.toolArgsStart = argsStart
-			}
-			state.toolArgsSent = state.toolArgsStart
-		}
-	}
-	if !state.toolNameSent {
-		if state.toolArgsStart < 0 {
-			return nil
-		}
-		state.toolNameSent = true
-		deltas = append(deltas, toolCallDelta{Index: 0, Name: state.toolName})
-	}
-	if state.toolArgsStart < 0 || state.toolArgsDone {
-		return deltas
-	}
-	end, complete, ok := scanToolCallArgsProgress(captured, state.toolArgsStart, state.toolArgsString)
-	if !ok {
-		return deltas
-	}
-	if end > state.toolArgsSent {
-		deltas = append(deltas, toolCallDelta{
-			Index:     0,
-			Arguments: captured[state.toolArgsSent:end],
-		})
-		state.toolArgsSent = end
-	}
-	if complete {
-		state.toolArgsDone = true
-	}
-	return deltas
-}
-
-func classifyToolCallsIncrementalSafety(text string, keyIdx int) (certainSingle bool, hasMultiple bool) {
-	arrStart, ok := findToolCallsArrayStart(text, keyIdx)
-	if !ok {
-		return false, false
-	}
-	i := skipSpaces(text, arrStart+1)
-	if i >= len(text) || text[i] != '{' {
-		return false, false
-	}
-	count := 0
-	depth := 0
-	quote := byte(0)
-	escaped := false
-	for ; i < len(text); i++ {
-		ch := text[i]
-		if quote != 0 {
-			if escaped {
-				escaped = false
-				continue
-			}
-			if ch == '\\' {
-				escaped = true
-				continue
-			}
-			if ch == quote {
-				quote = 0
-			}
-			continue
-		}
-		if ch == '"' || ch == '\'' {
-			quote = ch
-			continue
-		}
-		if ch == '{' {
-			if depth == 0 {
-				count++
-				if count > 1 {
-					return false, true
-				}
-			}
-			depth++
-			continue
-		}
-		if ch == '}' {
-			if depth > 0 {
-				depth--
-			}
-			continue
-		}
-		if ch == ',' && depth == 0 {
-			// top-level separator means at least one more tool call exists
-			// (or is expected). Treat as multi-call and stop incremental deltas.
-			return false, true
-		}
-		if ch == ']' && depth == 0 {
-			return count == 1, false
-		}
-	}
-	// array not closed yet: still uncertain whether more calls will appear
-	return false, false
-}
-
-func findFirstToolCallObjectStart(text string, keyIdx int) (int, bool) {
-	arrStart, ok := findToolCallsArrayStart(text, keyIdx)
-	if !ok {
-		return -1, false
-	}
-	i := skipSpaces(text, arrStart+1)
-	if i >= len(text) || text[i] != '{' {
-		return -1, false
-	}
-	return i, true
-}
-
-func findToolCallsArrayStart(text string, keyIdx int) (int, bool) {
-	i := keyIdx + len("tool_calls")
-	for i < len(text) && text[i] != ':' {
-		i++
-	}
-	if i >= len(text) {
-		return -1, false
-	}
-	i = skipSpaces(text, i+1)
-	if i >= len(text) || text[i] != '[' {
-		return -1, false
-	}
-	return i, true
-}
-
-func extractToolCallName(text string, callStart int) (string, bool) {
-	valueStart, ok := findObjectFieldValueStart(text, callStart, []string{"name"})
-	if !ok || valueStart >= len(text) || text[valueStart] != '"' {
-		fnStart, fnOK := findFunctionObjectStart(text, callStart)
-		if !fnOK {
-			return "", false
-		}
-		valueStart, ok = findObjectFieldValueStart(text, fnStart, []string{"name"})
-		if !ok || valueStart >= len(text) || text[valueStart] != '"' {
-			return "", false
-		}
-	}
-	name, _, ok := parseJSONStringLiteral(text, valueStart)
-	if !ok {
-		return "", false
-	}
-	return name, true
-}
-
-func findToolCallArgsStart(text string, callStart int) (int, bool, bool) {
-	keys := []string{"input", "arguments", "args", "parameters", "params"}
-	valueStart, ok := findObjectFieldValueStart(text, callStart, keys)
-	if !ok {
-		fnStart, fnOK := findFunctionObjectStart(text, callStart)
-		if !fnOK {
-			return -1, false, false
-		}
-		valueStart, ok = findObjectFieldValueStart(text, fnStart, keys)
-		if !ok {
-			return -1, false, false
-		}
-	}
-	if valueStart >= len(text) {
-		return -1, false, false
-	}
-	ch := text[valueStart]
-	if ch == '{' || ch == '[' {
-		return valueStart, false, true
-	}
-	if ch == '"' {
-		return valueStart, true, true
-	}
-	return -1, false, false
-}
-
-func scanToolCallArgsProgress(text string, start int, stringMode bool) (int, bool, bool) {
-	if start < 0 || start > len(text) {
-		return 0, false, false
-	}
-	if stringMode {
-		escaped := false
-		for i := start; i < len(text); i++ {
-			ch := text[i]
-			if escaped {
-				escaped = false
-				continue
-			}
-			if ch == '\\' {
-				escaped = true
-				continue
-			}
-			if ch == '"' {
-				return i, true, true
-			}
-		}
-		return len(text), false, true
-	}
-	if start >= len(text) {
-		return start, false, false
-	}
-	if text[start] != '{' && text[start] != '[' {
-		return 0, false, false
-	}
-	depth := 0
-	quote := byte(0)
-	escaped := false
-	for i := start; i < len(text); i++ {
-		ch := text[i]
-		if quote != 0 {
-			if escaped {
-				escaped = false
-				continue
-			}
-			if ch == '\\' {
-				escaped = true
-				continue
-			}
-			if ch == quote {
-				quote = 0
-			}
-			continue
-		}
-		if ch == '"' || ch == '\'' {
-			quote = ch
-			continue
-		}
-		if ch == '{' || ch == '[' {
-			depth++
-			continue
-		}
-		if ch == '}' || ch == ']' {
-			depth--
-			if depth == 0 {
-				return i + 1, true, true
-			}
-		}
-	}
-	return len(text), false, true
-}
-
-func findFunctionObjectStart(text string, callStart int) (int, bool) {
-	valueStart, ok := findObjectFieldValueStart(text, callStart, []string{"function"})
-	if !ok || valueStart >= len(text) || text[valueStart] != '{' {
-		return -1, false
-	}
-	return valueStart, true
-}
--- a/internal/adapter/openai/tool_sieve_jsonscan.go
+++ b/internal/adapter/openai/tool_sieve_jsonscan.go
@@ -44,109 +44,41 @@ func extractJSONObjectFrom(text string, start int) (string, int, bool) {
 	return "", 0, false
 }

-func findObjectFieldValueStart(text string, objStart int, keys []string) (int, bool) {
-	if objStart < 0 || objStart >= len(text) || text[objStart] != '{' {
-		return 0, false
+func trimWrappingJSONFence(prefix, suffix string) (string, string) {
+	trimmedPrefix := strings.TrimRight(prefix, " \t\r\n")
+	fenceIdx := strings.LastIndex(trimmedPrefix, "```")
+	if fenceIdx < 0 {
+		return prefix, suffix
 	}
-	depth := 0
-	quote := byte(0)
-	escaped := false
-	for i := objStart; i < len(text); i++ {
-		ch := text[i]
-		if quote != 0 {
-			if escaped {
-				escaped = false
-				continue
-			}
-			if ch == '\\' {
-				escaped = true
-				continue
-			}
-			if ch == quote {
-				quote = 0
-			}
-			continue
-		}
-		if ch == '"' || ch == '\'' {
-			if depth == 1 {
-				key, end, ok := parseJSONStringLiteral(text, i)
-				if !ok {
-					return 0, false
-				}
-				j := skipSpaces(text, end)
-				if j >= len(text) || text[j] != ':' {
-					i = end - 1
-					continue
-				}
-				j = skipSpaces(text, j+1)
-				if j >= len(text) {
-					return 0, false
-				}
-				if containsKey(keys, key) {
-					return j, true
-				}
-				i = j - 1
-				continue
-			}
-			quote = ch
-			continue
-		}
-		if ch == '{' {
-			depth++
-			continue
-		}
-		if ch == '}' {
-			depth--
-			if depth == 0 {
-				break
-			}
-		}
+	// Only strip when the trailing fence in prefix behaves like an opening fence.
+	// A legitimate closing fence before a standalone tool JSON must be preserved.
+	if strings.Count(trimmedPrefix[:fenceIdx+3], "```")%2 == 0 {
+		return prefix, suffix
 	}
-	return 0, false
+	fenceHeader := strings.TrimSpace(trimmedPrefix[fenceIdx+3:])
+	if fenceHeader != "" && !strings.EqualFold(fenceHeader, "json") {
+		return prefix, suffix
+	}
+
+	trimmedSuffix := strings.TrimLeft(suffix, " \t\r\n")
+	if !strings.HasPrefix(trimmedSuffix, "```") {
+		return prefix, suffix
+	}
+	consumedLeading := len(suffix) - len(trimmedSuffix)
+	return trimmedPrefix[:fenceIdx], suffix[consumedLeading+3:]
 }

-func parseJSONStringLiteral(text string, start int) (string, int, bool) {
-	if start < 0 || start >= len(text) || text[start] != '"' {
-		return "", 0, false
+func openFenceStartBefore(s string, pos int) (int, bool) {
+	if pos <= 0 || pos > len(s) {
+		return -1, false
 	}
-	var b strings.Builder
-	escaped := false
-	for i := start + 1; i < len(text); i++ {
-		ch := text[i]
-		if escaped {
-			b.WriteByte(ch)
-			escaped = false
-			continue
-		}
-		if ch == '\\' {
-			escaped = true
-			continue
-		}
-		if ch == '"' {
-			return b.String(), i + 1, true
-		}
-		b.WriteByte(ch)
+	segment := s[:pos]
+	lastFence := strings.LastIndex(segment, "```")
+	if lastFence < 0 {
+		return -1, false
 	}
-	return "", 0, false
-}
-
-func containsKey(keys []string, value string) bool {
-	for _, k := range keys {
-		if k == value {
-			return true
-		}
-	}
-	return false
-}
-
-func skipSpaces(text string, i int) int {
-	for i < len(text) {
-		switch text[i] {
-		case ' ', '\t', '\n', '\r':
-			i++
-		default:
-			return i
-		}
-	}
-	return i
+	if strings.Count(segment, "```")%2 == 1 {
+		return lastFence, true
+	}
+	return -1, false
 }
--- a/internal/adapter/openai/tool_sieve_state.go
+++ b/internal/adapter/openai/tool_sieve_state.go
@@ -34,7 +34,8 @@ type toolCallDelta struct {
 	Arguments string
 }

-const toolSieveContextTailLimit = 256
+// Keep in sync with JS TOOL_SIEVE_CONTEXT_TAIL_LIMIT.
+const toolSieveContextTailLimit = 2048

 func (s *toolStreamSieveState) resetIncrementalToolState() {
 	s.disableDeltas = false
@@ -63,14 +64,3 @@ func appendTail(prev, next string, max int) string {
 	}
 	return combined[len(combined)-max:]
 }
-
-func looksLikeToolExampleContext(text string) bool {
-	return insideCodeFence(text)
-}
-
-func insideCodeFence(text string) bool {
-	if text == "" {
-		return false
-	}
-	return strings.Count(text, "```")%2 == 1
-}
--- a/internal/adapter/openai/tool_sieve_xml.go
+++ b/internal/adapter/openai/tool_sieve_xml.go
@@ -0,0 +1,166 @@
+package openai
+
+import (
+	"regexp"
+	"strings"
+
+	"ds2api/internal/util"
+)
+
+// --- XML tool call support for the streaming sieve ---
+
+var xmlToolCallClosingTags = []string{"</tool_calls>", "</tool_call>", "</invoke>", "</function_call>", "</function_calls>", "</tool_use>",
+	// Agent-style XML tags (Roo Code, Cline, etc.)
+	"</attempt_completion>", "</ask_followup_question>", "</new_task>", "</result>"}
+var xmlToolCallOpeningTags = []string{"<tool_calls", "<tool_call", "<invoke", "<function_call", "<function_calls", "<tool_use",
+	// Agent-style XML tags
+	"<attempt_completion", "<ask_followup_question", "<new_task", "<result"}
+
+// xmlToolCallTagPairs maps each opening tag to its expected closing tag.
+// Order matters: longer/wrapper tags must be checked first.
+var xmlToolCallTagPairs = []struct{ open, close string }{
+	{"<tool_calls", "</tool_calls>"},
+	{"<tool_call", "</tool_call>"},
+	{"<function_calls", "</function_calls>"},
+	{"<function_call", "</function_call>"},
+	{"<invoke", "</invoke>"},
+	{"<tool_use", "</tool_use>"},
+	// Agent-style: these are XML "tool call" patterns from coding agents.
+	// They get captured → parsed. If parsing fails, the block is consumed
+	// (swallowed) to prevent raw XML from leaking to the client.
+	{"<attempt_completion", "</attempt_completion>"},
+	{"<ask_followup_question", "</ask_followup_question>"},
+	{"<new_task", "</new_task>"},
+}
+
+// xmlToolCallBlockPattern matches a complete XML tool call block (wrapper or standalone).
+var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)(<tool_calls>\s*(?:.*?)\s*</tool_calls>|<tool_call>\s*(?:.*?)\s*</tool_call>|<invoke\b[^>]*>(?:.*?)</invoke>|<function_calls?\b[^>]*>(?:.*?)</function_calls?>|<tool_use>(?:.*?)</tool_use>|<attempt_completion>(?:.*?)</attempt_completion>|<ask_followup_question>(?:.*?)</ask_followup_question>|<new_task>(?:.*?)</new_task>)`)
+
+// xmlToolTagsToDetect is the set of XML tag prefixes used by findToolSegmentStart.
+var xmlToolTagsToDetect = []string{"<tool_calls>", "<tool_calls\n", "<tool_call>", "<tool_call\n",
+	"<invoke ", "<invoke>", "<function_call", "<function_calls", "<tool_use>",
+	// Agent-style tags
+	"<attempt_completion>", "<ask_followup_question>", "<new_task>"}
+
+// consumeXMLToolCapture tries to extract complete XML tool call blocks from captured text.
+func consumeXMLToolCapture(captured string, toolNames []string) (prefix string, calls []util.ParsedToolCall, suffix string, ready bool) {
+	lower := strings.ToLower(captured)
+	// Find the FIRST matching open/close pair, preferring wrapper tags.
+	// Tag pairs are ordered longest-first (e.g. <tool_calls before <tool_call)
+	// so wrapper tags are checked before inner tags.
+	for _, pair := range xmlToolCallTagPairs {
+		openIdx := strings.Index(lower, pair.open)
+		if openIdx < 0 {
+			continue
+		}
+		// Find the LAST occurrence of the specific closing tag to get the outermost block.
+		closeIdx := strings.LastIndex(lower, pair.close)
+		if closeIdx < openIdx {
+			// Opening tag is present but its specific closing tag hasn't arrived.
+			// Return not-ready so we keep buffering — do NOT fall through to
+			// try inner pairs (e.g. <tool_call inside <tool_calls).
+			return "", nil, "", false
+		}
+		closeEnd := closeIdx + len(pair.close)
+
+		xmlBlock := captured[openIdx:closeEnd]
+		prefixPart := captured[:openIdx]
+		suffixPart := captured[closeEnd:]
+		parsed := util.ParseToolCalls(xmlBlock, toolNames)
+		if len(parsed) > 0 {
+			prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
+			return prefixPart, parsed, suffixPart, true
+		}
+		// If this block does not look like an executable tool-call payload,
+		// pass it through as normal content (e.g. user-requested XML snippets).
+		if !looksLikeExecutableXMLToolCallBlock(xmlBlock, pair.open) {
+			return prefixPart + xmlBlock, nil, suffixPart, true
+		}
+		// Looks like XML tool syntax but failed to parse — consume it to avoid leak.
+		return prefixPart, nil, suffixPart, true
+	}
+	return "", nil, "", false
+}
+
+func looksLikeExecutableXMLToolCallBlock(xmlBlock, openTag string) bool {
+	lower := strings.ToLower(xmlBlock)
+	// Agent wrapper tags are always treated as internal tool-call wrappers.
+	switch openTag {
+	case "<attempt_completion", "<ask_followup_question", "<new_task":
+		return true
+	}
+	return strings.Contains(lower, "<tool_name") ||
+		strings.Contains(lower, "<parameters") ||
+		strings.Contains(lower, `"tool"`) ||
+		strings.Contains(lower, `"tool_name"`) ||
+		strings.Contains(lower, `"name"`)
+}
+
+// hasOpenXMLToolTag returns true if captured text contains an XML tool opening tag
+// whose SPECIFIC closing tag has not appeared yet.
+func hasOpenXMLToolTag(captured string) bool {
+	lower := strings.ToLower(captured)
+	for _, pair := range xmlToolCallTagPairs {
+		if strings.Contains(lower, pair.open) {
+			if !strings.Contains(lower, pair.close) {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+// findPartialXMLToolTagStart checks if the string ends with a partial XML tool tag
+// (e.g., "<tool_ca" or "<inv") and returns the position of the '<'.
+func findPartialXMLToolTagStart(s string) int {
+	lastLT := strings.LastIndex(s, "<")
+	if lastLT < 0 {
+		return -1
+	}
+	tail := s[lastLT:]
+	// If there's a '>' in the tail, the tag is closed — not partial.
+	if strings.Contains(tail, ">") {
+		return -1
+	}
+	lowerTail := strings.ToLower(tail)
+	// Check if the tail is a prefix of any known XML tool tag.
+	for _, tag := range xmlToolCallOpeningTags {
+		tagWithLT := tag
+		if !strings.HasPrefix(tagWithLT, "<") {
+			tagWithLT = "<" + tagWithLT
+		}
+		if strings.HasPrefix(tagWithLT, lowerTail) {
+			return lastLT
+		}
+	}
+	return -1
+}
+
+// looksLikeXMLToolTagFragment returns true if s looks like a fragment from a
+// split XML tool call tag — for example "tool_calls>" or "/tool_call>\n".
+// These fragments arise when '<' was consumed separately and the tail remains.
+func looksLikeXMLToolTagFragment(s string) bool {
+	trimmed := strings.TrimSpace(s)
+	if trimmed == "" {
+		return false
+	}
+	lower := strings.ToLower(trimmed)
+	// Check for closing tag tails like "tool_calls>" or "/tool_calls>"
+	fragments := []string{
+		"tool_calls>", "tool_call>", "/tool_calls>", "/tool_call>",
+		"function_calls>", "function_call>", "/function_calls>", "/function_call>",
+		"invoke>", "/invoke>", "tool_use>", "/tool_use>",
+		"tool_name>", "/tool_name>", "parameters>", "/parameters>",
+		// Agent-style tag fragments
+		"attempt_completion>", "/attempt_completion>",
+		"ask_followup_question>", "/ask_followup_question>",
+		"new_task>", "/new_task>",
+		"result>", "/result>",
+	}
+	for _, f := range fragments {
+		if strings.Contains(lower, f) {
+			return true
+		}
+	}
+	return false
+}
--- a/internal/adapter/openai/tool_sieve_xml_test.go
+++ b/internal/adapter/openai/tool_sieve_xml_test.go
@@ -0,0 +1,438 @@
+package openai
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestProcessToolSieveInterceptsXMLToolCallWithoutLeak(t *testing.T) {
+	var state toolStreamSieveState
+	// Simulate a model producing XML tool call output chunk by chunk.
+	chunks := []string{
+		"<tool_calls>\n",
+		"  <tool_call>\n",
+		"    <tool_name>read_file</tool_name>\n",
+		`    <parameters>{"path":"README.MD"}</parameters>` + "\n",
+		"  </tool_call>\n",
+		"</tool_calls>",
+	}
+	var events []toolStreamEvent
+	for _, c := range chunks {
+		events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
+	}
+	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
+
+	var textContent string
+	var toolCalls int
+	for _, evt := range events {
+		if evt.Content != "" {
+			textContent += evt.Content
+		}
+		toolCalls += len(evt.ToolCalls)
+	}
+
+	if strings.Contains(textContent, "<tool_call") {
+		t.Fatalf("XML tool call content leaked to text: %q", textContent)
+	}
+	if strings.Contains(textContent, "read_file") {
+		t.Fatalf("tool name leaked to text: %q", textContent)
+	}
+	if toolCalls == 0 {
+		t.Fatal("expected tool calls to be extracted, got none")
+	}
+}
+
+func TestProcessToolSieveXMLWithLeadingText(t *testing.T) {
+	var state toolStreamSieveState
+	// Model outputs some prose then an XML tool call.
+	chunks := []string{
+		"Let me check the file.\n",
+		"<tool_calls>\n  <tool_call>\n    <tool_name>read_file</tool_name>\n",
+		`    <parameters>{"path":"go.mod"}</parameters>` + "\n  </tool_call>\n</tool_calls>",
+	}
+	var events []toolStreamEvent
+	for _, c := range chunks {
+		events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
+	}
+	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
+
+	var textContent string
+	var toolCalls int
+	for _, evt := range events {
+		if evt.Content != "" {
+			textContent += evt.Content
+		}
+		toolCalls += len(evt.ToolCalls)
+	}
+
+	// Leading text should be emitted.
+	if !strings.Contains(textContent, "Let me check the file.") {
+		t.Fatalf("expected leading text to be emitted, got %q", textContent)
+	}
+	// The XML itself should NOT leak.
+	if strings.Contains(textContent, "<tool_call") {
+		t.Fatalf("XML tool call content leaked to text: %q", textContent)
+	}
+	if toolCalls == 0 {
+		t.Fatal("expected tool calls to be extracted, got none")
+	}
+}
+
+func TestProcessToolSievePassesThroughNonToolXMLBlock(t *testing.T) {
+	var state toolStreamSieveState
+	chunk := `<tool_call><title>示例 XML</title><body>plain text xml payload</body></tool_call>`
+	events := processToolSieveChunk(&state, chunk, []string{"read_file"})
+	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
+
+	var textContent strings.Builder
+	toolCalls := 0
+	for _, evt := range events {
+		textContent.WriteString(evt.Content)
+		toolCalls += len(evt.ToolCalls)
+	}
+	if toolCalls != 0 {
+		t.Fatalf("expected no tool calls for plain XML payload, got %d events=%#v", toolCalls, events)
+	}
+	if textContent.String() != chunk {
+		t.Fatalf("expected XML payload to pass through unchanged, got %q", textContent.String())
+	}
+}
+
+func TestProcessToolSieveNonToolXMLKeepsSuffixForToolParsing(t *testing.T) {
+	var state toolStreamSieveState
+	chunk := `<tool_call><title>plain xml</title></tool_call><invoke name="read_file"><parameters>{"path":"README.MD"}</parameters></invoke>`
+	events := processToolSieveChunk(&state, chunk, []string{"read_file"})
+	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
+
+	var textContent strings.Builder
+	toolCalls := 0
+	for _, evt := range events {
+		textContent.WriteString(evt.Content)
+		toolCalls += len(evt.ToolCalls)
+	}
+	if !strings.Contains(textContent.String(), `<tool_call><title>plain xml</title></tool_call>`) {
+		t.Fatalf("expected leading non-tool XML to be preserved, got %q", textContent.String())
+	}
+	if strings.Contains(textContent.String(), `<invoke name="read_file">`) {
+		t.Fatalf("expected invoke tool XML to be intercepted, got %q", textContent.String())
+	}
+	if toolCalls != 1 {
+		t.Fatalf("expected exactly one parsed tool call from suffix, got %d events=%#v", toolCalls, events)
+	}
+}
+
+func TestProcessToolSievePartialXMLTagHeldBack(t *testing.T) {
+	var state toolStreamSieveState
+	// Chunk ends with a partial XML tool tag.
+	events := processToolSieveChunk(&state, "Hello <tool_ca", []string{"read_file"})
+
+	var textContent string
+	for _, evt := range events {
+		textContent += evt.Content
+	}
+
+	// "Hello " should be emitted, but "<tool_ca" should be held back.
+	if strings.Contains(textContent, "<tool_ca") {
+		t.Fatalf("partial XML tag should not be emitted, got %q", textContent)
+	}
+	if !strings.Contains(textContent, "Hello") {
+		t.Fatalf("expected 'Hello' text to be emitted, got %q", textContent)
+	}
+}
+
+func TestFindToolSegmentStartDetectsXMLToolCalls(t *testing.T) {
+	cases := []struct {
+		name  string
+		input string
+		want  int
+	}{
+		{"tool_calls_tag", "some text <tool_calls>\n", 10},
+		{"gemini_function_call_json", `some text {"functionCall":{"name":"search","args":{"q":"latest"}}}`, 10},
+		{"tool_call_tag", "prefix <tool_call>\n", 7},
+		{"invoke_tag", "text <invoke name=\"foo\">body</invoke>", 5},
+		{"function_call_tag", "<function_call name=\"foo\">body</function_call>", 0},
+		{"no_xml", "just plain text", -1},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := findToolSegmentStart(tc.input)
+			if got != tc.want {
+				t.Fatalf("findToolSegmentStart(%q) = %d, want %d", tc.input, got, tc.want)
+			}
+		})
+	}
+}
+
+func TestFindToolSegmentStartIgnoresFunctionCallProse(t *testing.T) {
+	input := "Please explain the functionCall API field and how clients should parse it."
+	if got := findToolSegmentStart(input); got != -1 {
+		t.Fatalf("expected no tool segment start for prose, got %d", got)
+	}
+}
+
+func TestFindToolSegmentStartDetectsQuotedFunctionCallKey(t *testing.T) {
+	input := `prefix {"functionCall": {"name":"search_web","args":{"query":"x"}}}`
+	want := strings.Index(input, "{")
+	if got := findToolSegmentStart(input); got != want {
+		t.Fatalf("expected JSON object start %d, got %d", want, got)
+	}
+}
+
+func TestFindToolSegmentStartDetectsLooseFunctionCallKey(t *testing.T) {
+	input := `prefix {functionCall: {"name":"search_web","args":{"query":"x"}}}`
+	want := strings.Index(input, "{")
+	if got := findToolSegmentStart(input); got != want {
+		t.Fatalf("expected JSON object start %d, got %d", want, got)
+	}
+}
+
+func TestFindToolSegmentStartPrefersQuotedFunctionCallOverEarlierBareProse(t *testing.T) {
+	input := `prefix {note} functionCall: docs hint {"functionCall":{"name":"search_web","args":{"query":"x"}}}`
+	want := strings.Index(input, `{"functionCall"`)
+	if got := findToolSegmentStart(input); got != want {
+		t.Fatalf("expected quoted functionCall JSON start %d, got %d", want, got)
+	}
+}
+
+func TestFindToolSegmentStartIgnoresLooseFunctionCallProse(t *testing.T) {
+	input := "Please explain why functionCall: is used in documentation examples."
+	if got := findToolSegmentStart(input); got != -1 {
+		t.Fatalf("expected no tool segment start for prose, got %d", got)
+	}
+}
+
+func TestProcessToolSieveDoesNotBufferFunctionCallProse(t *testing.T) {
+	var state toolStreamSieveState
+	chunk := "Please explain the functionCall API field and keep streaming this sentence."
+	events := processToolSieveChunk(&state, chunk, []string{"search_web"})
+	var text string
+	for _, evt := range events {
+		text += evt.Content
+		if len(evt.ToolCalls) > 0 {
+			t.Fatalf("expected no tool calls for prose, got %#v", evt.ToolCalls)
+		}
+	}
+	if text != chunk {
+		t.Fatalf("expected prose to pass through immediately, got %q", text)
+	}
+}
+
+func TestProcessToolSieveDetectsGeminiFunctionCallPayload(t *testing.T) {
+	var state toolStreamSieveState
+	events := processToolSieveChunk(&state, `{"functionCall":{"name":"search_web","args":{"query":"latest"}}}`, []string{"search_web"})
+	events = append(events, flushToolSieve(&state, []string{"search_web"})...)
+
+	var textContent string
+	var toolCalls int
+	for _, evt := range events {
+		if evt.Content != "" {
+			textContent += evt.Content
+		}
+		toolCalls += len(evt.ToolCalls)
+	}
+	if toolCalls != 1 {
+		t.Fatalf("expected one tool call from functionCall payload, got events=%#v", events)
+	}
+	if strings.Contains(strings.ToLower(textContent), "functioncall") {
+		t.Fatalf("functionCall json leaked into text content: %q", textContent)
+	}
+}
+
+func TestFindPartialXMLToolTagStart(t *testing.T) {
+	cases := []struct {
+		name  string
+		input string
+		want  int
+	}{
+		{"partial_tool_call", "Hello <tool_ca", 6},
+		{"partial_invoke", "Prefix <inv", 7},
+		{"partial_lt_only", "Text <", 5},
+		{"complete_tag", "Text <tool_call>done", -1},
+		{"no_lt", "plain text", -1},
+		{"closed_lt", "a < b > c", -1},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := findPartialXMLToolTagStart(tc.input)
+			if got != tc.want {
+				t.Fatalf("findPartialXMLToolTagStart(%q) = %d, want %d", tc.input, got, tc.want)
+			}
+		})
+	}
+}
+
+func TestHasOpenXMLToolTag(t *testing.T) {
+	if !hasOpenXMLToolTag("<tool_call>\n<tool_name>foo</tool_name>") {
+		t.Fatal("should detect open XML tool tag without closing tag")
+	}
+	if hasOpenXMLToolTag("<tool_call>\n<tool_name>foo</tool_name></tool_call>") {
+		t.Fatal("should return false when closing tag is present")
+	}
+	if hasOpenXMLToolTag("plain text without any XML") {
+		t.Fatal("should return false for plain text")
+	}
+}
+
+// Test the EXACT scenario the user reports: token-by-token streaming where
+// <tool_calls> tag arrives in small pieces.
+func TestProcessToolSieveTokenByTokenXMLNoLeak(t *testing.T) {
+	var state toolStreamSieveState
+	// Simulate DeepSeek model generating tokens one at a time.
+	chunks := []string{
+		"<",
+		"tool",
+		"_calls",
+		">\n",
+		"  <",
+		"tool",
+		"_call",
+		">\n",
+		"    <",
+		"tool",
+		"_name",
+		">",
+		"read",
+		"_file",
+		"</",
+		"tool",
+		"_name",
+		">\n",
+		"    <",
+		"parameters",
+		">",
+		`{"path"`,
+		`: "README.MD"`,
+		`}`,
+		"</",
+		"parameters",
+		">\n",
+		"  </",
+		"tool",
+		"_call",
+		">\n",
+		"</",
+		"tool",
+		"_calls",
+		">",
+	}
+	var events []toolStreamEvent
+	for _, c := range chunks {
+		events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
+	}
+	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
+
+	var textContent string
+	var toolCalls int
+	for _, evt := range events {
+		if evt.Content != "" {
+			textContent += evt.Content
+		}
+		toolCalls += len(evt.ToolCalls)
+	}
+
+	if strings.Contains(textContent, "<tool_call") {
+		t.Fatalf("XML tool call content leaked to text in token-by-token mode: %q", textContent)
+	}
+	if strings.Contains(textContent, "tool_calls>") {
+		t.Fatalf("closing tag fragment leaked to text: %q", textContent)
+	}
+	if strings.Contains(textContent, "read_file") {
+		t.Fatalf("tool name leaked to text: %q", textContent)
+	}
+	if toolCalls == 0 {
+		t.Fatal("expected tool calls to be extracted, got none")
+	}
+}
+
+// Test that flushToolSieve on incomplete XML does NOT leak the raw XML content.
+func TestFlushToolSieveIncompleteXMLDoesNotLeak(t *testing.T) {
+	var state toolStreamSieveState
+	// XML block starts but stream ends before completion.
+	chunks := []string{
+		"<tool_calls>\n",
+		"  <tool_call>\n",
+		"    <tool_name>read_file</tool_name>\n",
+	}
+	var events []toolStreamEvent
+	for _, c := range chunks {
+		events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
+	}
+	// Stream ends abruptly - flush should NOT dump raw XML.
+	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
+
+	var textContent string
+	for _, evt := range events {
+		if evt.Content != "" {
+			textContent += evt.Content
+		}
+	}
+
+	if strings.Contains(textContent, "<tool_call") {
+		t.Fatalf("incomplete XML leaked on flush: %q", textContent)
+	}
+}
+
+// Test that the opening tag "<tool_calls>\n  " is NOT emitted as text content.
+func TestOpeningXMLTagNotLeakedAsContent(t *testing.T) {
+	var state toolStreamSieveState
+	// First chunk is the opening tag - should be held, not emitted.
+	evts1 := processToolSieveChunk(&state, "<tool_calls>\n  ", []string{"read_file"})
+	for _, evt := range evts1 {
+		if strings.Contains(evt.Content, "<tool_calls>") {
+			t.Fatalf("opening tag leaked on first chunk: %q", evt.Content)
+		}
+	}
+
+	// Remaining content arrives.
+	evts2 := processToolSieveChunk(&state, "<tool_call>\n    <tool_name>read_file</tool_name>\n    <parameters>{\"path\":\"README.MD\"}</parameters>\n  </tool_call>\n</tool_calls>", []string{"read_file"})
+	evts2 = append(evts2, flushToolSieve(&state, []string{"read_file"})...)
+
+	var textContent string
+	var toolCalls int
+	allEvents := append(evts1, evts2...)
+	for _, evt := range allEvents {
+		if evt.Content != "" {
+			textContent += evt.Content
+		}
+		toolCalls += len(evt.ToolCalls)
+	}
+
+	if strings.Contains(textContent, "<tool_call") {
+		t.Fatalf("XML content leaked: %q", textContent)
+	}
+	if toolCalls == 0 {
+		t.Fatal("expected tool calls to be extracted")
+	}
+}
+
+func TestProcessToolSieveInterceptsAttemptCompletionLeak(t *testing.T) {
+	var state toolStreamSieveState
+	// Simulate an agent outputting attempt_completion XML tag
+	// which shouldn't leak to text output, even if it fails to parse as a valid tool.
+	chunks := []string{
+		"Done with task.\n",
+		"<attempt_completion>\n",
+		"  <result>Here is the answer</result>\n",
+		"</attempt_completion>",
+	}
+	var events []toolStreamEvent
+	for _, c := range chunks {
+		events = append(events, processToolSieveChunk(&state, c, []string{"attempt_completion"})...)
+	}
+	events = append(events, flushToolSieve(&state, []string{"attempt_completion"})...)
+
+	var textContent string
+	for _, evt := range events {
+		if evt.Content != "" {
+			textContent += evt.Content
+		}
+	}
+
+	if !strings.Contains(textContent, "Done with task.\n") {
+		t.Fatalf("expected leading text to be emitted, got %q", textContent)
+	}
+
+	if strings.Contains(textContent, "<attempt_completion>") || strings.Contains(textContent, "result>") {
+		t.Fatalf("agent XML tag content leaked to text: %q", textContent)
+	}
+}
--- a/internal/adapter/openai/upstream_empty.go
+++ b/internal/adapter/openai/upstream_empty.go
@@ -0,0 +1,20 @@
+package openai
+
+import (
+	"net/http"
+	"strings"
+
+	"ds2api/internal/sse"
+)
+
+func writeUpstreamEmptyOutputError(w http.ResponseWriter, result sse.CollectResult) bool {
+	if strings.TrimSpace(result.Thinking) != "" || strings.TrimSpace(sanitizeLeakedOutput(result.Text)) != "" {
+		return false
+	}
+	if result.ContentFilter {
+		writeOpenAIErrorWithCode(w, http.StatusBadRequest, "Upstream content filtered the response and returned no output.", "content_filter")
+		return true
+	}
+	writeOpenAIErrorWithCode(w, http.StatusBadGateway, "Upstream model returned empty output.", "upstream_empty_output")
+	return true
+}
--- a/internal/adapter/openai/vercel_stream.go
+++ b/internal/adapter/openai/vercel_stream.go
@@ -93,18 +93,16 @@ func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Reque
 	}
 	leased = true
 	writeJSON(w, http.StatusOK, map[string]any{
-		"session_id":               sessionID,
-		"lease_id":                 leaseID,
-		"model":                    stdReq.ResponseModel,
-		"final_prompt":             stdReq.FinalPrompt,
-		"thinking_enabled":         stdReq.Thinking,
-		"search_enabled":           stdReq.Search,
-		"tool_names":               stdReq.ToolNames,
-		"toolcall_feature_match":   h.toolcallFeatureMatchEnabled(),
-		"toolcall_early_emit_high": h.toolcallEarlyEmitHighConfidence(),
-		"deepseek_token":           a.DeepSeekToken,
-		"pow_header":               powHeader,
-		"payload":                  payload,
+		"session_id":       sessionID,
+		"lease_id":         leaseID,
+		"model":            stdReq.ResponseModel,
+		"final_prompt":     stdReq.FinalPrompt,
+		"thinking_enabled": stdReq.Thinking,
+		"search_enabled":   stdReq.Search,
+		"tool_names":       stdReq.ToolNames,
+		"deepseek_token":   a.DeepSeekToken,
+		"pow_header":       powHeader,
+		"payload":          payload,
 	})
 }

--- a/internal/admin/deps.go
+++ b/internal/admin/deps.go
@@ -17,9 +17,13 @@ type ConfigStore interface {
 	FindAccount(identifier string) (config.Account, bool)
 	UpdateAccountToken(identifier, token string) error
 	UpdateAccountTestStatus(identifier, status string) error
+	AccountTestStatus(identifier string) (string, bool)
 	Update(mutator func(*config.Config) error) error
 	ExportJSONAndBase64() (string, string, error)
 	IsEnvBacked() bool
+	IsEnvWritebackEnabled() bool
+	HasEnvConfigSource() bool
+	ConfigPath() string
 	SetVercelSync(hash string, ts int64) error
 	AdminPasswordHash() string
 	AdminJWTExpireHours() int
@@ -27,6 +31,8 @@ type ConfigStore interface {
 	RuntimeAccountMaxInflight() int
 	RuntimeAccountMaxQueue(defaultSize int) int
 	RuntimeGlobalMaxInflight(defaultSize int) int
+	RuntimeTokenRefreshIntervalHours() int
+	AutoDeleteSessions() bool
 }

 type PoolController interface {
@@ -40,6 +46,8 @@ type DeepSeekCaller interface {
 	CreateSession(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
 	GetPow(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
 	CallCompletion(ctx context.Context, a *auth.RequestAuth, payload map[string]any, powResp string, maxAttempts int) (*http.Response, error)
+	GetSessionCountForToken(ctx context.Context, token string) (*deepseek.SessionStats, error)
+	DeleteAllSessionsForToken(ctx context.Context, token string) error
 }

 var _ ConfigStore = (*config.Store)(nil)
--- a/internal/admin/handler.go
+++ b/internal/admin/handler.go
@@ -31,12 +31,15 @@ func RegisterRoutes(r chi.Router, h *Handler) {
 		pr.Get("/queue/status", h.queueStatus)
 		pr.Post("/accounts/test", h.testSingleAccount)
 		pr.Post("/accounts/test-all", h.testAllAccounts)
+		pr.Post("/accounts/sessions/delete-all", h.deleteAllSessions)
 		pr.Post("/import", h.batchImport)
 		pr.Post("/test", h.testAPI)
 		pr.Post("/vercel/sync", h.syncVercel)
 		pr.Get("/vercel/status", h.vercelStatus)
+		pr.Post("/vercel/status", h.vercelStatus)
 		pr.Get("/export", h.exportConfig)
 		pr.Get("/dev/captures", h.getDevCaptures)
 		pr.Delete("/dev/captures", h.clearDevCaptures)
+		pr.Get("/version", h.getVersion)
 	})
 }
--- a/internal/admin/handler_accounts_crud.go
+++ b/internal/admin/handler_accounts_crud.go
@@ -54,6 +54,7 @@ func (h *Handler) listAccounts(w http.ResponseWriter, r *http.Request) {
 	}
 	items := make([]map[string]any, 0, end-start)
 	for _, acc := range accounts[start:end] {
+		testStatus, _ := h.Store.AccountTestStatus(acc.Identifier())
 		token := strings.TrimSpace(acc.Token)
 		preview := ""
 		if token != "" {
@@ -70,7 +71,7 @@ func (h *Handler) listAccounts(w http.ResponseWriter, r *http.Request) {
 			"has_password":  acc.Password != "",
 			"has_token":     token != "",
 			"token_preview": preview,
-			"test_status":   acc.TestStatus,
+			"test_status":   testStatus,
 		})
 	}
 	writeJSON(w, http.StatusOK, map[string]any{"items": items, "total": total, "page": page, "page_size": pageSize, "total_pages": totalPages})
--- a/internal/admin/handler_accounts_identifier_test.go
+++ b/internal/admin/handler_accounts_identifier_test.go
@@ -6,7 +6,6 @@ import (
 	"net/http"
 	"net/http/httptest"
 	"net/url"
-	"strings"
 	"testing"

 	"github.com/go-chi/chi/v5"
@@ -26,9 +25,9 @@ func newAdminTestHandler(t *testing.T, raw string) *Handler {
 	}
 }

-func TestListAccountsIncludesTokenOnlyIdentifier(t *testing.T) {
+func TestListAccountsUsesEmailIdentifier(t *testing.T) {
 	h := newAdminTestHandler(t, `{
-		"accounts":[{"token":"token-only-account"}]
+		"accounts":[{"email":"u@example.com","password":"pwd"}]
 	}`)

 	req := httptest.NewRequest(http.MethodGet, "/admin/accounts?page=1&page_size=10", nil)
@@ -49,38 +48,8 @@ func TestListAccountsIncludesTokenOnlyIdentifier(t *testing.T) {
 	}
 	first, _ := items[0].(map[string]any)
 	identifier, _ := first["identifier"].(string)
-	if identifier == "" {
-		t.Fatalf("expected non-empty identifier: %#v", first)
-	}
-	if !strings.HasPrefix(identifier, "token:") {
-		t.Fatalf("expected token synthetic identifier, got %q", identifier)
-	}
-}
-
-func TestDeleteAccountSupportsTokenOnlyIdentifier(t *testing.T) {
-	h := newAdminTestHandler(t, `{
-		"accounts":[{"token":"token-only-account"}]
-	}`)
-	accounts := h.Store.Accounts()
-	if len(accounts) != 1 {
-		t.Fatalf("expected 1 account, got %d", len(accounts))
-	}
-	id := accounts[0].Identifier()
-	if id == "" {
-		t.Fatal("expected token-only synthetic identifier")
-	}
-
-	r := chi.NewRouter()
-	r.Delete("/admin/accounts/{identifier}", h.deleteAccount)
-	req := httptest.NewRequest(http.MethodDelete, "/admin/accounts/"+url.PathEscape(id), nil)
-	rec := httptest.NewRecorder()
-	r.ServeHTTP(rec, req)
-
-	if rec.Code != http.StatusOK {
-		t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
-	}
-	if got := len(h.Store.Accounts()); got != 0 {
-		t.Fatalf("expected account removed, remaining=%d", got)
+	if identifier != "u@example.com" {
+		t.Fatalf("expected email identifier, got %q", identifier)
 	}
 }

@@ -142,11 +111,10 @@ func TestAddAccountRejectsCanonicalMobileDuplicate(t *testing.T) {
 	}
 }

-func TestFindAccountByIdentifierSupportsMobileAndTokenOnly(t *testing.T) {
+func TestFindAccountByIdentifierSupportsMobile(t *testing.T) {
 	h := newAdminTestHandler(t, `{
 		"accounts":[
-			{"email":"u@example.com","mobile":"13800138000","password":"pwd"},
-			{"token":"token-only-account"}
+			{"email":"u@example.com","mobile":"13800138000","password":"pwd"}
 		]
 	}`)

@@ -165,21 +133,4 @@ func TestFindAccountByIdentifierSupportsMobileAndTokenOnly(t *testing.T) {
 		t.Fatalf("unexpected account by +86 mobile: %#v", accByMobileWithCountryCode)
 	}

-	tokenOnlyID := ""
-	for _, acc := range h.Store.Accounts() {
-		if strings.TrimSpace(acc.Email) == "" && strings.TrimSpace(acc.Mobile) == "" {
-			tokenOnlyID = acc.Identifier()
-			break
-		}
-	}
-	if tokenOnlyID == "" {
-		t.Fatal("expected token-only account identifier")
-	}
-	accByTokenOnly, ok := findAccountByIdentifier(h.Store, tokenOnlyID)
-	if !ok {
-		t.Fatalf("expected find by token-only id=%q", tokenOnlyID)
-	}
-	if accByTokenOnly.Token != "token-only-account" {
-		t.Fatalf("unexpected token-only account: %#v", accByTokenOnly)
-	}
 }
--- a/internal/admin/handler_accounts_testing.go
+++ b/internal/admin/handler_accounts_testing.go
@@ -89,7 +89,15 @@ func runAccountTestsConcurrently(accounts []config.Account, maxConcurrency int,
 func (h *Handler) testAccount(ctx context.Context, acc config.Account, model, message string) map[string]any {
 	start := time.Now()
 	identifier := acc.Identifier()
-	result := map[string]any{"account": identifier, "success": false, "response_time": 0, "message": "", "model": model}
+	result := map[string]any{
+		"account":         identifier,
+		"success":         false,
+		"response_time":   0,
+		"message":         "",
+		"model":           model,
+		"session_count":   0,
+		"config_writable": !h.Store.IsEnvBacked(),
+	}
 	defer func() {
 		status := "failed"
 		if ok, _ := result["success"].(bool); ok {
@@ -97,15 +105,14 @@ func (h *Handler) testAccount(ctx context.Context, acc config.Account, model, me
 		}
 		_ = h.Store.UpdateAccountTestStatus(identifier, status)
 	}()
-	token := strings.TrimSpace(acc.Token)
-	if token == "" {
-		newToken, err := h.DS.Login(ctx, acc)
-		if err != nil {
-			result["message"] = "登录失败: " + err.Error()
-			return result
-		}
-		token = newToken
-		_ = h.Store.UpdateAccountToken(acc.Identifier(), token)
+	token, err := h.DS.Login(ctx, acc)
+	if err != nil {
+		result["message"] = "登录失败: " + err.Error()
+		return result
+	}
+	if err := h.Store.UpdateAccountToken(acc.Identifier(), token); err != nil {
+		result["message"] = "登录成功但写入运行时 token 失败: " + err.Error()
+		return result
 	}
 	authCtx := &authn.RequestAuth{UseConfigToken: false, DeepSeekToken: token}
 	sessionID, err := h.DS.CreateSession(ctx, authCtx, 1)
@@ -117,16 +124,26 @@ func (h *Handler) testAccount(ctx context.Context, acc config.Account, model, me
 		}
 		token = newToken
 		authCtx.DeepSeekToken = token
-		_ = h.Store.UpdateAccountToken(acc.Identifier(), token)
+		if err := h.Store.UpdateAccountToken(acc.Identifier(), token); err != nil {
+			result["message"] = "刷新 token 成功但写入运行时 token 失败: " + err.Error()
+			return result
+		}
 		sessionID, err = h.DS.CreateSession(ctx, authCtx, 1)
 		if err != nil {
 			result["message"] = "创建会话失败: " + err.Error()
 			return result
 		}
 	}
+
+	// 获取会话数量
+	sessionStats, sessionErr := h.DS.GetSessionCountForToken(ctx, token)
+	if sessionErr == nil && sessionStats != nil {
+		result["session_count"] = sessionStats.FirstPageCount
+	}
+
 	if strings.TrimSpace(message) == "" {
 		result["success"] = true
-		result["message"] = "API 测试成功（仅会话创建）"
+		result["message"] = "Token 刷新成功（登录与会话创建成功）"
 		result["response_time"] = int(time.Since(start).Milliseconds())
 		return result
 	}
@@ -210,3 +227,45 @@ func (h *Handler) testAPI(w http.ResponseWriter, r *http.Request) {
 	}
 	writeJSON(w, http.StatusOK, map[string]any{"success": false, "status_code": resp.StatusCode, "response": string(body)})
 }
+
+func (h *Handler) deleteAllSessions(w http.ResponseWriter, r *http.Request) {
+	var req map[string]any
+	_ = json.NewDecoder(r.Body).Decode(&req)
+	identifier, _ := req["identifier"].(string)
+	if strings.TrimSpace(identifier) == "" {
+		writeJSON(w, http.StatusBadRequest, map[string]any{"detail": "需要账号标识（identifier / email / mobile）"})
+		return
+	}
+	acc, ok := findAccountByIdentifier(h.Store, identifier)
+	if !ok {
+		writeJSON(w, http.StatusNotFound, map[string]any{"detail": "账号不存在"})
+		return
+	}
+
+	// 每次先登录刷新一次 token，避免使用过期 token。
+	token, err := h.DS.Login(r.Context(), acc)
+	if err != nil {
+		writeJSON(w, http.StatusOK, map[string]any{"success": false, "message": "登录失败: " + err.Error()})
+		return
+	}
+	_ = h.Store.UpdateAccountToken(acc.Identifier(), token)
+
+	// 删除所有会话
+	err = h.DS.DeleteAllSessionsForToken(r.Context(), token)
+	if err != nil {
+		// token 可能过期，尝试重新登录并重试一次
+		newToken, loginErr := h.DS.Login(r.Context(), acc)
+		if loginErr != nil {
+			writeJSON(w, http.StatusOK, map[string]any{"success": false, "message": "删除失败: " + err.Error()})
+			return
+		}
+		token = newToken
+		_ = h.Store.UpdateAccountToken(acc.Identifier(), token)
+		if retryErr := h.DS.DeleteAllSessionsForToken(r.Context(), token); retryErr != nil {
+			writeJSON(w, http.StatusOK, map[string]any{"success": false, "message": "删除失败: " + retryErr.Error()})
+			return
+		}
+	}
+
+	writeJSON(w, http.StatusOK, map[string]any{"success": true, "message": "删除成功"})
+}
--- a/internal/admin/handler_accounts_testing_test.go
+++ b/internal/admin/handler_accounts_testing_test.go
@@ -1,21 +1,28 @@
 package admin

 import (
+	"bytes"
 	"context"
+	"encoding/json"
 	"errors"
 	"net/http"
+	"net/http/httptest"
 	"strings"
 	"testing"

 	"ds2api/internal/auth"
 	"ds2api/internal/config"
+	"ds2api/internal/deepseek"
 )

 type testingDSMock struct {
-	loginCalls          int
-	createSessionCalls  int
-	getPowCalls         int
-	callCompletionCalls int
+	loginCalls                 int
+	createSessionCalls         int
+	getPowCalls                int
+	callCompletionCalls        int
+	deleteAllSessionsCalls     int
+	deleteAllSessionsError     error
+	deleteAllSessionsErrorOnce bool
 }

 func (m *testingDSMock) Login(_ context.Context, _ config.Account) (string, error) {
@@ -38,6 +45,22 @@ func (m *testingDSMock) CallCompletion(_ context.Context, _ *auth.RequestAuth, _
 	return nil, errors.New("should not call CallCompletion in this test")
 }

+func (m *testingDSMock) DeleteAllSessionsForToken(_ context.Context, _ string) error {
+	m.deleteAllSessionsCalls++
+	if m.deleteAllSessionsError != nil {
+		err := m.deleteAllSessionsError
+		if m.deleteAllSessionsErrorOnce {
+			m.deleteAllSessionsError = nil
+		}
+		return err
+	}
+	return nil
+}
+
+func (m *testingDSMock) GetSessionCountForToken(_ context.Context, _ string) (*deepseek.SessionStats, error) {
+	return &deepseek.SessionStats{Success: true}, nil
+}
+
 func TestTestAccount_BatchModeOnlyCreatesSession(t *testing.T) {
 	t.Setenv("DS2API_CONFIG_JSON", `{"accounts":[{"email":"batch@example.com","password":"pwd","token":""}]}`)
 	store := config.LoadStore()
@@ -54,7 +77,7 @@ func TestTestAccount_BatchModeOnlyCreatesSession(t *testing.T) {
 		t.Fatalf("expected success=true, got %#v", result)
 	}
 	msg, _ := result["message"].(string)
-	if !strings.Contains(msg, "仅会话创建") {
+	if !strings.Contains(msg, "Token 刷新成功") {
 		t.Fatalf("expected session-only success message, got %q", msg)
 	}
 	if ds.loginCalls != 1 || ds.createSessionCalls != 1 {
@@ -70,7 +93,43 @@ func TestTestAccount_BatchModeOnlyCreatesSession(t *testing.T) {
 	if updated.Token != "new-token" {
 		t.Fatalf("expected refreshed token to be persisted, got %q", updated.Token)
 	}
-	if updated.TestStatus != "ok" {
-		t.Fatalf("expected test status ok, got %q", updated.TestStatus)
+	testStatus, ok := store.AccountTestStatus("batch@example.com")
+	if !ok || testStatus != "ok" {
+		t.Fatalf("expected runtime test status ok, got %q (ok=%v)", testStatus, ok)
+	}
+}
+
+func TestDeleteAllSessions_RetryWithReloginOnDeleteFailure(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{"accounts":[{"email":"batch@example.com","password":"pwd","token":"expired-token"}]}`)
+	store := config.LoadStore()
+	ds := &testingDSMock{deleteAllSessionsError: errors.New("token expired"), deleteAllSessionsErrorOnce: true}
+	h := &Handler{Store: store, DS: ds}
+
+	req := httptest.NewRequest(http.MethodPost, "/delete-all", bytes.NewBufferString(`{"identifier":"batch@example.com"}`))
+	rec := httptest.NewRecorder()
+	h.deleteAllSessions(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected status 200, got %d", rec.Code)
+	}
+	var resp map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("unmarshal response: %v", err)
+	}
+	if ok, _ := resp["success"].(bool); !ok {
+		t.Fatalf("expected success response, got %#v", resp)
+	}
+	if ds.loginCalls != 2 {
+		t.Fatalf("expected initial login plus relogin, got %d", ds.loginCalls)
+	}
+	if ds.deleteAllSessionsCalls != 2 {
+		t.Fatalf("expected delete called twice, got %d", ds.deleteAllSessionsCalls)
+	}
+	updated, ok := store.FindAccount("batch@example.com")
+	if !ok {
+		t.Fatal("expected account")
+	}
+	if updated.Token != "new-token" {
+		t.Fatalf("expected refreshed token persisted, got %q", updated.Token)
 	}
 }
--- a/internal/admin/handler_config_import.go
+++ b/internal/admin/handler_config_import.go
@@ -43,6 +43,7 @@ func (h *Handler) configImport(w http.ResponseWriter, r *http.Request) {
 		writeJSON(w, http.StatusBadRequest, map[string]any{"detail": err.Error()})
 		return
 	}
+	incoming.ClearAccountTokens()

 	importedKeys, importedAccounts := 0, 0
 	err = h.Store.Update(func(c *config.Config) error {
@@ -119,12 +120,6 @@ func (h *Handler) configImport(w http.ResponseWriter, r *http.Request) {
 					next.ModelAliases[k] = v
 				}
 			}
-			if strings.TrimSpace(incoming.Toolcall.Mode) != "" {
-				next.Toolcall.Mode = incoming.Toolcall.Mode
-			}
-			if strings.TrimSpace(incoming.Toolcall.EarlyEmitConfidence) != "" {
-				next.Toolcall.EarlyEmitConfidence = incoming.Toolcall.EarlyEmitConfidence
-			}
 			if incoming.Responses.StoreTTLSeconds > 0 {
 				next.Responses.StoreTTLSeconds = incoming.Responses.StoreTTLSeconds
 			}
@@ -149,6 +144,9 @@ func (h *Handler) configImport(w http.ResponseWriter, r *http.Request) {
 			if incoming.Runtime.GlobalMaxInflight > 0 {
 				next.Runtime.GlobalMaxInflight = incoming.Runtime.GlobalMaxInflight
 			}
+			if incoming.Runtime.TokenRefreshIntervalHours > 0 {
+				next.Runtime.TokenRefreshIntervalHours = incoming.Runtime.TokenRefreshIntervalHours
+			}
 		}

 		normalizeSettingsConfig(&next)
@@ -180,6 +178,7 @@ func (h *Handler) configImport(w http.ResponseWriter, r *http.Request) {

 func (h *Handler) computeSyncHash() string {
 	snap := h.Store.Snapshot().Clone()
+	snap.ClearAccountTokens()
 	snap.VercelSyncHash = ""
 	snap.VercelSyncTime = 0
 	b, _ := json.Marshal(snap)
--- a/internal/admin/handler_config_read.go
+++ b/internal/admin/handler_config_read.go
@@ -8,8 +8,12 @@ import (
 func (h *Handler) getConfig(w http.ResponseWriter, _ *http.Request) {
 	snap := h.Store.Snapshot()
 	safe := map[string]any{
-		"keys":     snap.Keys,
-		"accounts": []map[string]any{},
+		"keys":                  snap.Keys,
+		"accounts":              []map[string]any{},
+		"env_backed":            h.Store.IsEnvBacked(),
+		"env_source_present":    h.Store.HasEnvConfigSource(),
+		"env_writeback_enabled": h.Store.IsEnvWritebackEnabled(),
+		"config_path":           h.Store.ConfigPath(),
 		"claude_mapping": func() map[string]string {
 			if len(snap.ClaudeMapping) > 0 {
 				return snap.ClaudeMapping
--- a/internal/admin/handler_config_write.go
+++ b/internal/admin/handler_config_write.go
@@ -50,9 +50,6 @@ func (h *Handler) updateConfig(w http.ResponseWriter, r *http.Request) {
 					if strings.TrimSpace(acc.Password) == "" {
 						acc.Password = prev.Password
 					}
-					if strings.TrimSpace(acc.Token) == "" {
-						acc.Token = prev.Token
-					}
 				}
 				seen[key] = struct{}{}
 				accounts = append(accounts, acc)
--- a/internal/admin/handler_settings_parse.go
+++ b/internal/admin/handler_settings_parse.go
@@ -7,15 +7,29 @@ import (
 	"ds2api/internal/config"
 )

-func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *config.RuntimeConfig, *config.ToolcallConfig, *config.ResponsesConfig, *config.EmbeddingsConfig, map[string]string, map[string]string, error) {
+func boolFrom(v any) bool {
+	if v == nil {
+		return false
+	}
+	switch x := v.(type) {
+	case bool:
+		return x
+	case string:
+		return strings.ToLower(strings.TrimSpace(x)) == "true"
+	default:
+		return false
+	}
+}
+
+func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *config.RuntimeConfig, *config.ResponsesConfig, *config.EmbeddingsConfig, *config.AutoDeleteConfig, map[string]string, map[string]string, error) {
 	var (
-		adminCfg    *config.AdminConfig
-		runtimeCfg  *config.RuntimeConfig
-		toolcallCfg *config.ToolcallConfig
-		respCfg     *config.ResponsesConfig
-		embCfg      *config.EmbeddingsConfig
-		claudeMap   map[string]string
-		aliasMap    map[string]string
+		adminCfg      *config.AdminConfig
+		runtimeCfg    *config.RuntimeConfig
+		respCfg       *config.ResponsesConfig
+		embCfg        *config.EmbeddingsConfig
+		autoDeleteCfg *config.AutoDeleteConfig
+		claudeMap     map[string]string
+		aliasMap      map[string]string
 	)

 	if raw, ok := req["admin"].(map[string]any); ok {
@@ -53,35 +67,19 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 			}
 			cfg.GlobalMaxInflight = n
 		}
+		if v, exists := raw["token_refresh_interval_hours"]; exists {
+			n := intFrom(v)
+			if n < 1 || n > 720 {
+				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.token_refresh_interval_hours must be between 1 and 720")
+			}
+			cfg.TokenRefreshIntervalHours = n
+		}
 		if cfg.AccountMaxInflight > 0 && cfg.GlobalMaxInflight > 0 && cfg.GlobalMaxInflight < cfg.AccountMaxInflight {
 			return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight")
 		}
 		runtimeCfg = cfg
 	}

-	if raw, ok := req["toolcall"].(map[string]any); ok {
-		cfg := &config.ToolcallConfig{}
-		if v, exists := raw["mode"]; exists {
-			mode := strings.ToLower(strings.TrimSpace(fmt.Sprintf("%v", v)))
-			switch mode {
-			case "feature_match", "off":
-				cfg.Mode = mode
-			default:
-				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("toolcall.mode must be feature_match or off")
-			}
-		}
-		if v, exists := raw["early_emit_confidence"]; exists {
-			level := strings.ToLower(strings.TrimSpace(fmt.Sprintf("%v", v)))
-			switch level {
-			case "high", "low", "off":
-				cfg.EarlyEmitConfidence = level
-			default:
-				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("toolcall.early_emit_confidence must be high, low or off")
-			}
-		}
-		toolcallCfg = cfg
-	}
-
 	if raw, ok := req["responses"].(map[string]any); ok {
 		cfg := &config.ResponsesConfig{}
 		if v, exists := raw["store_ttl_seconds"]; exists {
@@ -98,9 +96,6 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		cfg := &config.EmbeddingsConfig{}
 		if v, exists := raw["provider"]; exists {
 			p := strings.TrimSpace(fmt.Sprintf("%v", v))
-			if p == "" {
-				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("embeddings.provider cannot be empty")
-			}
 			cfg.Provider = p
 		}
 		embCfg = cfg
@@ -130,5 +125,13 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
 		}
 	}

-	return adminCfg, runtimeCfg, toolcallCfg, respCfg, embCfg, claudeMap, aliasMap, nil
+	if raw, ok := req["auto_delete"].(map[string]any); ok {
+		cfg := &config.AutoDeleteConfig{}
+		if v, exists := raw["sessions"]; exists {
+			cfg.Sessions = boolFrom(v)
+		}
+		autoDeleteCfg = cfg
+	}
+
+	return adminCfg, runtimeCfg, respCfg, embCfg, autoDeleteCfg, claudeMap, aliasMap, nil
 }
--- a/internal/admin/handler_settings_read.go
+++ b/internal/admin/handler_settings_read.go
@@ -21,13 +21,14 @@ func (h *Handler) getSettings(w http.ResponseWriter, _ *http.Request) {
 			"default_password_warning": authn.UsingDefaultAdminKey(h.Store),
 		},
 		"runtime": map[string]any{
-			"account_max_inflight": h.Store.RuntimeAccountMaxInflight(),
-			"account_max_queue":    h.Store.RuntimeAccountMaxQueue(recommended),
-			"global_max_inflight":  h.Store.RuntimeGlobalMaxInflight(recommended),
+			"account_max_inflight":         h.Store.RuntimeAccountMaxInflight(),
+			"account_max_queue":            h.Store.RuntimeAccountMaxQueue(recommended),
+			"global_max_inflight":          h.Store.RuntimeGlobalMaxInflight(recommended),
+			"token_refresh_interval_hours": h.Store.RuntimeTokenRefreshIntervalHours(),
 		},
-		"toolcall":          snap.Toolcall,
 		"responses":         snap.Responses,
 		"embeddings":        snap.Embeddings,
+		"auto_delete":       snap.AutoDelete,
 		"claude_mapping":    settingsClaudeMapping(snap),
 		"model_aliases":     snap.ModelAliases,
 		"env_backed":        h.Store.IsEnvBacked(),
--- a/internal/admin/handler_settings_runtime.go
+++ b/internal/admin/handler_settings_runtime.go
@@ -14,6 +14,9 @@ func validateMergedRuntimeSettings(current config.RuntimeConfig, incoming *confi
 		if incoming.GlobalMaxInflight > 0 {
 			merged.GlobalMaxInflight = incoming.GlobalMaxInflight
 		}
+		if incoming.TokenRefreshIntervalHours > 0 {
+			merged.TokenRefreshIntervalHours = incoming.TokenRefreshIntervalHours
+		}
 	}
 	return validateRuntimeSettings(merged)
 }
--- a/internal/admin/handler_settings_test.go
+++ b/internal/admin/handler_settings_test.go
@@ -28,6 +28,25 @@ func TestGetSettingsDefaultPasswordWarning(t *testing.T) {
 	}
 }

+func TestGetSettingsIncludesTokenRefreshInterval(t *testing.T) {
+	h := newAdminTestHandler(t, `{
+		"keys":["k1"],
+		"runtime":{"token_refresh_interval_hours":9}
+	}`)
+	req := httptest.NewRequest(http.MethodGet, "/admin/settings", nil)
+	rec := httptest.NewRecorder()
+	h.getSettings(rec, req)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
+	}
+	var body map[string]any
+	_ = json.Unmarshal(rec.Body.Bytes(), &body)
+	runtime, _ := body["runtime"].(map[string]any)
+	if got := intFrom(runtime["token_refresh_interval_hours"]); got != 9 {
+		t.Fatalf("expected token_refresh_interval_hours=9, got %d body=%v", got, body)
+	}
+}
+
 func TestUpdateSettingsValidation(t *testing.T) {
 	h := newAdminTestHandler(t, `{"keys":["k1"]}`)
 	payload := map[string]any{
@@ -44,6 +63,25 @@ func TestUpdateSettingsValidation(t *testing.T) {
 	}
 }

+func TestUpdateSettingsValidationRejectsTokenRefreshInterval(t *testing.T) {
+	h := newAdminTestHandler(t, `{"keys":["k1"]}`)
+	payload := map[string]any{
+		"runtime": map[string]any{
+			"token_refresh_interval_hours": 0,
+		},
+	}
+	b, _ := json.Marshal(payload)
+	req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b))
+	rec := httptest.NewRecorder()
+	h.updateSettings(rec, req)
+	if rec.Code != http.StatusBadRequest {
+		t.Fatalf("expected 400, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	if !bytes.Contains(rec.Body.Bytes(), []byte("runtime.token_refresh_interval_hours")) {
+		t.Fatalf("expected token refresh validation detail, got %s", rec.Body.String())
+	}
+}
+
 func TestUpdateSettingsValidationWithMergedRuntimeSnapshot(t *testing.T) {
 	h := newAdminTestHandler(t, `{
 		"keys":["k1"],
@@ -126,6 +164,29 @@ func TestUpdateSettingsHotReloadRuntime(t *testing.T) {
 	}
 }

+func TestUpdateSettingsHotReloadTokenRefreshInterval(t *testing.T) {
+	h := newAdminTestHandler(t, `{
+		"keys":["k1"],
+		"runtime":{"token_refresh_interval_hours":6}
+	}`)
+
+	payload := map[string]any{
+		"runtime": map[string]any{
+			"token_refresh_interval_hours": 12,
+		},
+	}
+	b, _ := json.Marshal(payload)
+	req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b))
+	rec := httptest.NewRecorder()
+	h.updateSettings(rec, req)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
+	}
+	if got := h.Store.RuntimeTokenRefreshIntervalHours(); got != 12 {
+		t.Fatalf("token_refresh_interval_hours=%d want=12", got)
+	}
+}
+
 func TestUpdateSettingsPasswordInvalidatesOldJWT(t *testing.T) {
 	hash := authn.HashAdminPassword("old-password")
 	h := newAdminTestHandler(t, `{"admin":{"password_hash":"`+hash+`"}}`)
@@ -207,6 +268,30 @@ func TestConfigImportMergeAndReplace(t *testing.T) {
 	}
 }

+func TestConfigImportAppliesTokenRefreshInterval(t *testing.T) {
+	h := newAdminTestHandler(t, `{"keys":["k1"]}`)
+
+	replace := map[string]any{
+		"mode": "replace",
+		"config": map[string]any{
+			"keys": []any{"k9"},
+			"runtime": map[string]any{
+				"token_refresh_interval_hours": 11,
+			},
+		},
+	}
+	replaceBytes, _ := json.Marshal(replace)
+	replaceReq := httptest.NewRequest(http.MethodPost, "/admin/config/import?mode=replace", bytes.NewReader(replaceBytes))
+	replaceRec := httptest.NewRecorder()
+	h.configImport(replaceRec, replaceReq)
+	if replaceRec.Code != http.StatusOK {
+		t.Fatalf("replace status=%d body=%s", replaceRec.Code, replaceRec.Body.String())
+	}
+	if got := h.Store.RuntimeTokenRefreshIntervalHours(); got != 11 {
+		t.Fatalf("token_refresh_interval_hours=%d want=11", got)
+	}
+}
+
 func TestConfigImportRejectsInvalidRuntimeBounds(t *testing.T) {
 	h := newAdminTestHandler(t, `{"keys":["k1"]}`)
 	payload := map[string]any{
--- a/internal/admin/handler_settings_write.go
+++ b/internal/admin/handler_settings_write.go
@@ -17,7 +17,7 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) {
 		return
 	}

-	adminCfg, runtimeCfg, toolcallCfg, responsesCfg, embeddingsCfg, claudeMap, aliasMap, err := parseSettingsUpdateRequest(req)
+	adminCfg, runtimeCfg, responsesCfg, embeddingsCfg, autoDeleteCfg, claudeMap, aliasMap, err := parseSettingsUpdateRequest(req)
 	if err != nil {
 		writeJSON(w, http.StatusBadRequest, map[string]any{"detail": err.Error()})
 		return
@@ -45,13 +45,8 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) {
 			if runtimeCfg.GlobalMaxInflight > 0 {
 				c.Runtime.GlobalMaxInflight = runtimeCfg.GlobalMaxInflight
 			}
-		}
-		if toolcallCfg != nil {
-			if strings.TrimSpace(toolcallCfg.Mode) != "" {
-				c.Toolcall.Mode = strings.TrimSpace(toolcallCfg.Mode)
-			}
-			if strings.TrimSpace(toolcallCfg.EarlyEmitConfidence) != "" {
-				c.Toolcall.EarlyEmitConfidence = strings.TrimSpace(toolcallCfg.EarlyEmitConfidence)
+			if runtimeCfg.TokenRefreshIntervalHours > 0 {
+				c.Runtime.TokenRefreshIntervalHours = runtimeCfg.TokenRefreshIntervalHours
 			}
 		}
 		if responsesCfg != nil && responsesCfg.StoreTTLSeconds > 0 {
@@ -60,6 +55,9 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) {
 		if embeddingsCfg != nil && strings.TrimSpace(embeddingsCfg.Provider) != "" {
 			c.Embeddings.Provider = strings.TrimSpace(embeddingsCfg.Provider)
 		}
+		if autoDeleteCfg != nil {
+			c.AutoDelete.Sessions = autoDeleteCfg.Sessions
+		}
 		if claudeMap != nil {
 			c.ClaudeMapping = claudeMap
 			c.ClaudeModelMap = nil
--- a/internal/admin/handler_vercel.go
+++ b/internal/admin/handler_vercel.go
@@ -3,6 +3,8 @@ package admin
 import (
 	"bytes"
 	"context"
+	"crypto/md5"
+	"encoding/base64"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -11,6 +13,8 @@ import (
 	"os"
 	"strings"
 	"time"
+
+	"ds2api/internal/config"
 )

 func (h *Handler) syncVercel(w http.ResponseWriter, r *http.Request) {
@@ -25,7 +29,7 @@ func (h *Handler) syncVercel(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 	validated, failed := h.validateAccountsForVercelSync(r.Context(), opts.AutoValidate)
-	_, cfgB64, err := h.Store.ExportJSONAndBase64()
+	cfgJSON, cfgB64, err := h.exportSyncConfig(req)
 	if err != nil {
 		writeJSON(w, http.StatusInternalServerError, map[string]any{"detail": err.Error()})
 		return
@@ -47,7 +51,7 @@ func (h *Handler) syncVercel(w http.ResponseWriter, r *http.Request) {
 	}
 	savedCreds := h.saveVercelProjectCredentials(r.Context(), client, opts, params, headers, envs)
 	manual, deployURL := triggerVercelDeployment(r.Context(), client, opts.ProjectID, params, headers)
-	_ = h.Store.SetVercelSync(h.computeSyncHash(), time.Now().Unix())
+	_ = h.Store.SetVercelSync(syncHashForJSON(cfgJSON), time.Now().Unix())
 	result := map[string]any{"success": true, "validated_accounts": validated}
 	if manual {
 		result["message"] = "配置已同步到 Vercel，请手动触发重新部署"
@@ -209,11 +213,71 @@ func triggerVercelDeployment(ctx context.Context, client *http.Client, projectID
 	return false, deployURL
 }

-func (h *Handler) vercelStatus(w http.ResponseWriter, _ *http.Request) {
+func (h *Handler) vercelStatus(w http.ResponseWriter, r *http.Request) {
 	snap := h.Store.Snapshot()
 	current := h.computeSyncHash()
 	synced := snap.VercelSyncHash != "" && snap.VercelSyncHash == current
-	writeJSON(w, http.StatusOK, map[string]any{"synced": synced, "last_sync_time": nilIfZero(snap.VercelSyncTime), "has_synced_before": snap.VercelSyncHash != ""})
+	draftHash := ""
+	draftDiffers := false
+	if r != nil && r.Method == http.MethodPost && r.Body != nil {
+		var req map[string]any
+		if err := json.NewDecoder(r.Body).Decode(&req); err == nil {
+			if cfgJSON, _, err := h.exportSyncConfig(req); err == nil {
+				draftHash = syncHashForJSON(cfgJSON)
+				draftDiffers = draftHash != "" && draftHash != current
+			}
+		}
+	}
+	writeJSON(w, http.StatusOK, map[string]any{
+		"synced":            synced,
+		"last_sync_time":    nilIfZero(snap.VercelSyncTime),
+		"has_synced_before": snap.VercelSyncHash != "",
+		"env_backed":        h.Store.IsEnvBacked(),
+		"config_hash":       current,
+		"last_synced_hash":  snap.VercelSyncHash,
+		"draft_hash":        draftHash,
+		"draft_differs":     draftDiffers,
+	})
+}
+
+func (h *Handler) exportSyncConfig(req map[string]any) (string, string, error) {
+	override, ok := req["config_override"]
+	if !ok || override == nil {
+		return h.Store.ExportJSONAndBase64()
+	}
+	raw, err := json.Marshal(override)
+	if err != nil {
+		return "", "", err
+	}
+	var cfg config.Config
+	if err := json.Unmarshal(raw, &cfg); err != nil {
+		return "", "", err
+	}
+	cfg.DropInvalidAccounts()
+	cfg.ClearAccountTokens()
+	cfg.VercelSyncHash = ""
+	cfg.VercelSyncTime = 0
+	b, err := json.Marshal(cfg)
+	if err != nil {
+		return "", "", err
+	}
+	return string(b), base64.StdEncoding.EncodeToString(b), nil
+}
+
+func syncHashForJSON(s string) string {
+	var cfg config.Config
+	if err := json.Unmarshal([]byte(s), &cfg); err != nil {
+		return ""
+	}
+	cfg.VercelSyncHash = ""
+	cfg.VercelSyncTime = 0
+	cfg.ClearAccountTokens()
+	b, err := json.Marshal(cfg)
+	if err != nil {
+		return ""
+	}
+	sum := md5.Sum(b)
+	return fmt.Sprintf("%x", sum)
 }

 func vercelRequest(ctx context.Context, client *http.Client, method, endpoint string, params url.Values, headers map[string]string, body any) (map[string]any, int, error) {
--- a/internal/admin/handler_version.go
+++ b/internal/admin/handler_version.go
@@ -0,0 +1,75 @@
+package admin
+
+import (
+	"encoding/json"
+	"net/http"
+	"strings"
+	"time"
+
+	"ds2api/internal/version"
+)
+
+const latestReleaseAPI = "https://api.github.com/repos/CJackHwang/ds2api/releases/latest"
+
+type latestReleasePayload struct {
+	TagName     string `json:"tag_name"`
+	HTMLURL     string `json:"html_url"`
+	PublishedAt string `json:"published_at"`
+}
+
+func (h *Handler) getVersion(w http.ResponseWriter, _ *http.Request) {
+	current, source := version.Current()
+	resp := map[string]any{
+		"success":         true,
+		"current_version": current,
+		"current_tag":     version.Tag(current),
+		"source":          source,
+		"checked_at":      time.Now().UTC().Format(time.RFC3339),
+	}
+
+	req, err := http.NewRequest(http.MethodGet, latestReleaseAPI, nil)
+	if err != nil {
+		resp["check_error"] = err.Error()
+		writeJSON(w, http.StatusOK, resp)
+		return
+	}
+	req.Header.Set("Accept", "application/vnd.github+json")
+	req.Header.Set("User-Agent", "ds2api-version-check")
+
+	client := &http.Client{Timeout: 4 * time.Second}
+	r, err := client.Do(req)
+	if err != nil {
+		resp["check_error"] = err.Error()
+		writeJSON(w, http.StatusOK, resp)
+		return
+	}
+	defer r.Body.Close()
+	if r.StatusCode < 200 || r.StatusCode >= 300 {
+		resp["check_error"] = "github api status: " + r.Status
+		writeJSON(w, http.StatusOK, resp)
+		return
+	}
+
+	var data latestReleasePayload
+	if err := json.NewDecoder(r.Body).Decode(&data); err != nil {
+		resp["check_error"] = err.Error()
+		writeJSON(w, http.StatusOK, resp)
+		return
+	}
+
+	latest := strings.TrimSpace(data.TagName)
+	if latest == "" {
+		resp["check_error"] = "missing latest tag"
+		writeJSON(w, http.StatusOK, resp)
+		return
+	}
+	latestVersion := strings.TrimPrefix(latest, "v")
+
+	resp["latest_tag"] = latest
+	resp["latest_version"] = latestVersion
+	resp["release_url"] = data.HTMLURL
+	resp["published_at"] = data.PublishedAt
+	resp["has_update"] = version.Compare(current, latestVersion) < 0
+
+	writeJSON(w, http.StatusOK, resp)
+}
--- a/internal/admin/helpers.go
+++ b/internal/admin/helpers.go
@@ -65,7 +65,6 @@ func toAccount(m map[string]any) config.Account {
 		Email:    email,
 		Mobile:   mobile,
 		Password: fieldString(m, "password"),
-		Token:    fieldString(m, "token"),
 	}
 }

--- a/internal/admin/helpers_edge_test.go
+++ b/internal/admin/helpers_edge_test.go
@@ -188,8 +188,8 @@ func TestToAccountAllFields(t *testing.T) {
 	if acc.Password != "secret" {
 		t.Fatalf("unexpected password: %q", acc.Password)
 	}
-	if acc.Token != "tok123" {
-		t.Fatalf("unexpected token: %q", acc.Token)
+	if acc.Token != "" {
+		t.Fatalf("expected token to be ignored, got %q", acc.Token)
 	}
 }

--- a/internal/admin/settings_validation.go
+++ b/internal/admin/settings_validation.go
@@ -12,8 +12,6 @@ func normalizeSettingsConfig(c *config.Config) {
 		return
 	}
 	c.Admin.PasswordHash = strings.TrimSpace(c.Admin.PasswordHash)
-	c.Toolcall.Mode = strings.ToLower(strings.TrimSpace(c.Toolcall.Mode))
-	c.Toolcall.EarlyEmitConfidence = strings.ToLower(strings.TrimSpace(c.Toolcall.EarlyEmitConfidence))
 	c.Embeddings.Provider = strings.TrimSpace(c.Embeddings.Provider)
 }

@@ -27,20 +25,6 @@ func validateSettingsConfig(c config.Config) error {
 	if c.Responses.StoreTTLSeconds != 0 && (c.Responses.StoreTTLSeconds < 30 || c.Responses.StoreTTLSeconds > 86400) {
 		return fmt.Errorf("responses.store_ttl_seconds must be between 30 and 86400")
 	}
-	if mode := strings.TrimSpace(c.Toolcall.Mode); mode != "" {
-		switch mode {
-		case "feature_match", "off":
-		default:
-			return fmt.Errorf("toolcall.mode must be feature_match or off")
-		}
-	}
-	if level := strings.TrimSpace(c.Toolcall.EarlyEmitConfidence); level != "" {
-		switch level {
-		case "high", "low", "off":
-		default:
-			return fmt.Errorf("toolcall.early_emit_confidence must be high, low or off")
-		}
-	}
 	if c.Embeddings.Provider != "" && strings.TrimSpace(c.Embeddings.Provider) == "" {
 		return fmt.Errorf("embeddings.provider cannot be empty")
 	}
@@ -57,6 +41,9 @@ func validateRuntimeSettings(runtime config.RuntimeConfig) error {
 	if runtime.GlobalMaxInflight != 0 && (runtime.GlobalMaxInflight < 1 || runtime.GlobalMaxInflight > 200000) {
 		return fmt.Errorf("runtime.global_max_inflight must be between 1 and 200000")
 	}
+	if runtime.TokenRefreshIntervalHours != 0 && (runtime.TokenRefreshIntervalHours < 1 || runtime.TokenRefreshIntervalHours > 720) {
+		return fmt.Errorf("runtime.token_refresh_interval_hours must be between 1 and 720")
+	}
 	if runtime.AccountMaxInflight > 0 && runtime.GlobalMaxInflight > 0 && runtime.GlobalMaxInflight < runtime.AccountMaxInflight {
 		return fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight")
 	}
--- a/internal/admin/token_runtime_http_test.go
+++ b/internal/admin/token_runtime_http_test.go
@@ -0,0 +1,109 @@
+package admin
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"github.com/go-chi/chi/v5"
+
+	"ds2api/internal/account"
+	"ds2api/internal/config"
+)
+
+func newHTTPAdminHarness(t *testing.T, rawConfig string, ds DeepSeekCaller) http.Handler {
+	t.Helper()
+	t.Setenv("DS2API_CONFIG_JSON", rawConfig)
+	t.Setenv("CONFIG_JSON", "")
+	store := config.LoadStore()
+	h := &Handler{
+		Store: store,
+		Pool:  account.NewPool(store),
+		DS:    ds,
+	}
+	r := chi.NewRouter()
+	RegisterRoutes(r, h)
+	return r
+}
+
+func adminReq(method, path string, body []byte) *http.Request {
+	req := httptest.NewRequest(method, path, bytes.NewReader(body))
+	req.Header.Set("Authorization", "Bearer admin")
+	req.Header.Set("Content-Type", "application/json")
+	return req
+}
+
+func TestConfigImportIgnoresTokenFieldInPayload(t *testing.T) {
+	ds := &testingDSMock{}
+	router := newHTTPAdminHarness(t, `{"accounts":[]}`, ds)
+
+	payload := []byte(`{
+		"mode":"replace",
+		"config":{
+			"accounts":[{"email":"u@example.com","password":"pwd","token":"expired-token"}]
+		}
+	}`)
+	rec := httptest.NewRecorder()
+	router.ServeHTTP(rec, adminReq(http.MethodPost, "/config/import", payload))
+	if rec.Code != http.StatusOK {
+		t.Fatalf("import status=%d body=%s", rec.Code, rec.Body.String())
+	}
+
+	readRec := httptest.NewRecorder()
+	router.ServeHTTP(readRec, adminReq(http.MethodGet, "/config", nil))
+	if readRec.Code != http.StatusOK {
+		t.Fatalf("get config status=%d body=%s", readRec.Code, readRec.Body.String())
+	}
+	var data map[string]any
+	if err := json.Unmarshal(readRec.Body.Bytes(), &data); err != nil {
+		t.Fatalf("decode config response: %v", err)
+	}
+	accounts, _ := data["accounts"].([]any)
+	if len(accounts) != 1 {
+		t.Fatalf("expected one account, got %d", len(accounts))
+	}
+	accountMap, _ := accounts[0].(map[string]any)
+	if hasToken, _ := accountMap["has_token"].(bool); hasToken {
+		t.Fatalf("expected imported token to be ignored, account=%#v", accountMap)
+	}
+}
+
+func TestAccountTestRefreshesRuntimeTokenButExportOmitsToken(t *testing.T) {
+	ds := &testingDSMock{}
+	router := newHTTPAdminHarness(t, `{
+		"accounts":[{"email":"batch@example.com","password":"pwd","token":"stale-token"}]
+	}`, ds)
+
+	rec := httptest.NewRecorder()
+	router.ServeHTTP(rec, adminReq(http.MethodPost, "/accounts/test", []byte(`{"identifier":"batch@example.com"}`)))
+	if rec.Code != http.StatusOK {
+		t.Fatalf("test account status=%d body=%s", rec.Code, rec.Body.String())
+	}
+	var testResp map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &testResp); err != nil {
+		t.Fatalf("decode test response: %v", err)
+	}
+	if ok, _ := testResp["success"].(bool); !ok {
+		t.Fatalf("expected test success, got %#v", testResp)
+	}
+	if ds.loginCalls < 1 {
+		t.Fatalf("expected login to be called at least once, got %d", ds.loginCalls)
+	}
+
+	exportRec := httptest.NewRecorder()
+	router.ServeHTTP(exportRec, adminReq(http.MethodGet, "/config/export", nil))
+	if exportRec.Code != http.StatusOK {
+		t.Fatalf("export status=%d body=%s", exportRec.Code, exportRec.Body.String())
+	}
+	var exportResp map[string]any
+	if err := json.Unmarshal(exportRec.Body.Bytes(), &exportResp); err != nil {
+		t.Fatalf("decode export response: %v", err)
+	}
+	exportJSON, _ := exportResp["json"].(string)
+	if strings.Contains(exportJSON, `"token"`) {
+		t.Fatalf("expected export json to omit tokens, got %s", exportJSON)
+	}
+}
--- a/internal/auth/auth_edge_test.go
+++ b/internal/auth/auth_edge_test.go
@@ -204,6 +204,45 @@ func TestSwitchAccountNilTriedAccounts(t *testing.T) {
 	r.Release(a)
 }

+func TestSwitchAccountSkipsLoginFailureAndContinues(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["managed-key"],
+		"accounts":[
+			{"email":"acc1@test.com","password":"pwd","token":"t1"},
+			{"email":"acc2@test.com","password":"pwd"},
+			{"email":"acc3@test.com","password":"pwd","token":"t3"}
+		]
+	}`)
+	store := config.LoadStore()
+	pool := account.NewPool(store)
+	r := NewResolver(store, pool, func(_ context.Context, acc config.Account) (string, error) {
+		if acc.Email == "acc2@test.com" {
+			return "", errors.New("login failed")
+		}
+		return "new-token", nil
+	})
+
+	req, _ := http.NewRequest("POST", "/", nil)
+	req.Header.Set("Authorization", "Bearer managed-key")
+	a, err := r.Determine(req)
+	if err != nil {
+		t.Fatalf("determine failed: %v", err)
+	}
+	defer r.Release(a)
+	if a.AccountID != "acc1@test.com" {
+		t.Fatalf("expected first account, got %q", a.AccountID)
+	}
+	if !r.SwitchAccount(context.Background(), a) {
+		t.Fatal("expected switch to succeed after skipping failed account")
+	}
+	if a.AccountID != "acc3@test.com" {
+		t.Fatalf("expected fallback to third account, got %q", a.AccountID)
+	}
+	if !a.TriedAccounts["acc2@test.com"] {
+		t.Fatalf("expected failed account to be marked as tried")
+	}
+}
+
 // ─── Release edge cases ─────────────────────────────────────────────

 func TestReleaseNilAuth(t *testing.T) {
--- a/internal/auth/request.go
+++ b/internal/auth/request.go
@@ -7,6 +7,8 @@ import (
 	"errors"
 	"net/http"
 	"strings"
+	"sync"
+	"time"

 	"ds2api/internal/account"
 	"ds2api/internal/config"
@@ -37,10 +39,18 @@ type Resolver struct {
 	Store *config.Store
 	Pool  *account.Pool
 	Login LoginFunc
+
+	mu               sync.Mutex
+	tokenRefreshedAt map[string]time.Time
 }

 func NewResolver(store *config.Store, pool *account.Pool, login LoginFunc) *Resolver {
-	return &Resolver{Store: store, Pool: pool, Login: login}
+	return &Resolver{
+		Store:            store,
+		Pool:             pool,
+		Login:            login,
+		tokenRefreshedAt: map[string]time.Time{},
+	}
 }

 func (r *Resolver) Determine(req *http.Request) (*RequestAuth, error) {
@@ -60,29 +70,53 @@ func (r *Resolver) Determine(req *http.Request) (*RequestAuth, error) {
 		}, nil
 	}
 	target := strings.TrimSpace(req.Header.Get("X-Ds2-Target-Account"))
-	acc, ok := r.Pool.AcquireWait(ctx, target, nil)
-	if !ok {
-		return nil, ErrNoAccount
-	}
-	a := &RequestAuth{
-		UseConfigToken: true,
-		CallerID:       callerID,
-		AccountID:      acc.Identifier(),
-		Account:        acc,
-		TriedAccounts:  map[string]bool{},
-		resolver:       r,
-	}
-	if acc.Token == "" {
-		if err := r.loginAndPersist(ctx, a); err != nil {
-			r.Pool.Release(a.AccountID)
-			return nil, err
-		}
-	} else {
-		a.DeepSeekToken = acc.Token
+	a, err := r.acquireManagedRequestAuth(ctx, callerID, target)
+	if err != nil {
+		return nil, err
 	}
 	return a, nil
 }

+func (r *Resolver) acquireManagedRequestAuth(ctx context.Context, callerID, target string) (*RequestAuth, error) {
+	tried := map[string]bool{}
+	var lastEnsureErr error
+	for {
+		if target == "" && len(tried) >= len(r.Store.Accounts()) {
+			if lastEnsureErr != nil {
+				return nil, lastEnsureErr
+			}
+			return nil, ErrNoAccount
+		}
+		acc, ok := r.Pool.AcquireWait(ctx, target, tried)
+		if !ok {
+			if lastEnsureErr != nil {
+				return nil, lastEnsureErr
+			}
+			return nil, ErrNoAccount
+		}
+
+		a := &RequestAuth{
+			UseConfigToken: true,
+			CallerID:       callerID,
+			AccountID:      acc.Identifier(),
+			Account:        acc,
+			TriedAccounts:  tried,
+			resolver:       r,
+		}
+
+		if err := r.ensureManagedToken(ctx, a); err != nil {
+			lastEnsureErr = err
+			tried[a.AccountID] = true
+			r.Pool.Release(a.AccountID)
+			if target != "" {
+				return nil, err
+			}
+			continue
+		}
+		return a, nil
+	}
+}
+
 // DetermineCaller resolves caller identity without acquiring any pooled account.
 // Use this for local-cache lookup routes that only need tenant isolation.
 func (r *Resolver) DetermineCaller(req *http.Request) (*RequestAuth, error) {
@@ -120,6 +154,7 @@ func (r *Resolver) loginAndPersist(ctx context.Context, a *RequestAuth) error {
 	}
 	a.Account.Token = token
 	a.DeepSeekToken = token
+	r.markTokenRefreshedNow(a.AccountID)
 	return r.Store.UpdateAccountToken(a.AccountID, token)
 }

@@ -142,6 +177,7 @@ func (r *Resolver) MarkTokenInvalid(a *RequestAuth) {
 	}
 	a.Account.Token = ""
 	a.DeepSeekToken = ""
+	r.clearTokenRefreshMark(a.AccountID)
 	_ = r.Store.UpdateAccountToken(a.AccountID, "")
 }

@@ -156,20 +192,20 @@ func (r *Resolver) SwitchAccount(ctx context.Context, a *RequestAuth) bool {
 		a.TriedAccounts[a.AccountID] = true
 		r.Pool.Release(a.AccountID)
 	}
-	acc, ok := r.Pool.Acquire("", a.TriedAccounts)
-	if !ok {
-		return false
-	}
-	a.Account = acc
-	a.AccountID = acc.Identifier()
-	if acc.Token == "" {
-		if err := r.loginAndPersist(ctx, a); err != nil {
+	for {
+		acc, ok := r.Pool.Acquire("", a.TriedAccounts)
+		if !ok {
 			return false
 		}
-	} else {
-		a.DeepSeekToken = acc.Token
+		a.Account = acc
+		a.AccountID = acc.Identifier()
+		if err := r.ensureManagedToken(ctx, a); err != nil {
+			a.TriedAccounts[a.AccountID] = true
+			r.Pool.Release(a.AccountID)
+			continue
+		}
+		return true
 	}
-	return true
 }

 func (r *Resolver) Release(a *RequestAuth) {
@@ -210,3 +246,57 @@ func callerTokenID(token string) string {
 	sum := sha256.Sum256([]byte(token))
 	return "caller:" + hex.EncodeToString(sum[:8])
 }
+
+func (r *Resolver) ensureManagedToken(ctx context.Context, a *RequestAuth) error {
+	if strings.TrimSpace(a.Account.Token) == "" {
+		return r.loginAndPersist(ctx, a)
+	}
+	if r.shouldForceRefresh(a.AccountID) {
+		if err := r.loginAndPersist(ctx, a); err != nil {
+			return err
+		}
+		return nil
+	}
+	a.DeepSeekToken = a.Account.Token
+	return nil
+}
+
+func (r *Resolver) shouldForceRefresh(accountID string) bool {
+	if r == nil || r.Store == nil {
+		return false
+	}
+	if strings.TrimSpace(accountID) == "" {
+		return false
+	}
+	intervalHours := r.Store.RuntimeTokenRefreshIntervalHours()
+	if intervalHours <= 0 {
+		return false
+	}
+	now := time.Now()
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	last, ok := r.tokenRefreshedAt[accountID]
+	if !ok || last.IsZero() {
+		r.tokenRefreshedAt[accountID] = now
+		return false
+	}
+	return now.Sub(last) >= time.Duration(intervalHours)*time.Hour
+}
+
+func (r *Resolver) markTokenRefreshedNow(accountID string) {
+	if strings.TrimSpace(accountID) == "" {
+		return
+	}
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.tokenRefreshedAt[accountID] = time.Now()
+}
+
+func (r *Resolver) clearTokenRefreshMark(accountID string) {
+	if strings.TrimSpace(accountID) == "" {
+		return
+	}
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	delete(r.tokenRefreshedAt, accountID)
+}
--- a/internal/auth/request_test.go
+++ b/internal/auth/request_test.go
@@ -2,8 +2,11 @@ package auth

 import (
 	"context"
+	"errors"
 	"net/http"
+	"sync/atomic"
 	"testing"
+	"time"

 	"ds2api/internal/account"
 	"ds2api/internal/config"
@@ -58,7 +61,7 @@ func TestDetermineWithXAPIKeyManagedKeyAcquiresAccount(t *testing.T) {
 	if auth.AccountID != "acc@example.com" {
 		t.Fatalf("unexpected account id: %q", auth.AccountID)
 	}
-	if auth.DeepSeekToken != "account-token" {
+	if auth.DeepSeekToken != "fresh-token" {
 		t.Fatalf("unexpected account token: %q", auth.DeepSeekToken)
 	}
 	if auth.CallerID == "" {
@@ -193,3 +196,202 @@ func TestDetermineCallerMissingToken(t *testing.T) {
 		t.Fatalf("unexpected error: %v", err)
 	}
 }
+
+func TestDetermineManagedAccountForcesRefreshEverySixHours(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["managed-key"],
+		"accounts":[{"email":"acc@example.com","password":"pwd","token":"seed-token"}]
+	}`)
+	store := config.LoadStore()
+	if err := store.UpdateAccountToken("acc@example.com", "seed-token"); err != nil {
+		t.Fatalf("update token failed: %v", err)
+	}
+	pool := account.NewPool(store)
+
+	var loginCount int32
+	resolver := NewResolver(store, pool, func(_ context.Context, _ config.Account) (string, error) {
+		n := atomic.AddInt32(&loginCount, 1)
+		return "fresh-token-" + string(rune('0'+n)), nil
+	})
+
+	req, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
+	req.Header.Set("x-api-key", "managed-key")
+
+	a1, err := resolver.Determine(req)
+	if err != nil {
+		t.Fatalf("determine failed: %v", err)
+	}
+	if a1.DeepSeekToken != "seed-token" {
+		t.Fatalf("expected initial token without forced refresh, got %q", a1.DeepSeekToken)
+	}
+	resolver.Release(a1)
+	if got := atomic.LoadInt32(&loginCount); got != 0 {
+		t.Fatalf("expected no login before refresh interval, got %d", got)
+	}
+
+	resolver.mu.Lock()
+	resolver.tokenRefreshedAt["acc@example.com"] = time.Now().Add(-7 * time.Hour)
+	resolver.mu.Unlock()
+
+	a2, err := resolver.Determine(req)
+	if err != nil {
+		t.Fatalf("determine after interval failed: %v", err)
+	}
+	defer resolver.Release(a2)
+	if a2.DeepSeekToken != "fresh-token-1" {
+		t.Fatalf("expected refreshed token after interval, got %q", a2.DeepSeekToken)
+	}
+	if got := atomic.LoadInt32(&loginCount); got != 1 {
+		t.Fatalf("expected exactly one forced refresh login, got %d", got)
+	}
+}
+
+func TestDetermineManagedAccountUsesUpdatedRefreshInterval(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["managed-key"],
+		"accounts":[{"email":"acc@example.com","password":"pwd","token":"seed-token"}],
+		"runtime":{"token_refresh_interval_hours":6}
+	}`)
+	store := config.LoadStore()
+	if err := store.UpdateAccountToken("acc@example.com", "seed-token"); err != nil {
+		t.Fatalf("update token failed: %v", err)
+	}
+	pool := account.NewPool(store)
+
+	var loginCount int32
+	resolver := NewResolver(store, pool, func(_ context.Context, _ config.Account) (string, error) {
+		n := atomic.AddInt32(&loginCount, 1)
+		return "fresh-token-" + string(rune('0'+n)), nil
+	})
+
+	req, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
+	req.Header.Set("x-api-key", "managed-key")
+
+	a1, err := resolver.Determine(req)
+	if err != nil {
+		t.Fatalf("determine failed: %v", err)
+	}
+	if a1.DeepSeekToken != "seed-token" {
+		t.Fatalf("expected initial token without forced refresh, got %q", a1.DeepSeekToken)
+	}
+	resolver.Release(a1)
+	if got := atomic.LoadInt32(&loginCount); got != 0 {
+		t.Fatalf("expected no login before runtime update, got %d", got)
+	}
+
+	if err := store.Update(func(c *config.Config) error {
+		c.Runtime.TokenRefreshIntervalHours = 1
+		return nil
+	}); err != nil {
+		t.Fatalf("update runtime failed: %v", err)
+	}
+
+	resolver.mu.Lock()
+	resolver.tokenRefreshedAt["acc@example.com"] = time.Now().Add(-2 * time.Hour)
+	resolver.mu.Unlock()
+
+	a2, err := resolver.Determine(req)
+	if err != nil {
+		t.Fatalf("determine after runtime update failed: %v", err)
+	}
+	defer resolver.Release(a2)
+	if a2.DeepSeekToken != "fresh-token-1" {
+		t.Fatalf("expected refreshed token after runtime update, got %q", a2.DeepSeekToken)
+	}
+	if got := atomic.LoadInt32(&loginCount); got != 1 {
+		t.Fatalf("expected exactly one login after runtime update, got %d", got)
+	}
+}
+
+func TestDetermineManagedAccountRetriesOtherAccountOnLoginFailure(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["managed-key"],
+		"accounts":[
+			{"email":"bad@example.com","password":"pwd"},
+			{"email":"good@example.com","password":"pwd","token":"good-token"}
+		]
+	}`)
+	store := config.LoadStore()
+	pool := account.NewPool(store)
+	resolver := NewResolver(store, pool, func(_ context.Context, acc config.Account) (string, error) {
+		if acc.Email == "bad@example.com" {
+			return "", errors.New("stale account")
+		}
+		return "fresh-good-token", nil
+	})
+
+	req, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
+	req.Header.Set("x-api-key", "managed-key")
+
+	a, err := resolver.Determine(req)
+	if err != nil {
+		t.Fatalf("determine failed: %v", err)
+	}
+	defer resolver.Release(a)
+	if a.AccountID != "good@example.com" {
+		t.Fatalf("expected fallback to good account, got %q", a.AccountID)
+	}
+	if a.DeepSeekToken == "" {
+		t.Fatal("expected non-empty token from fallback account")
+	}
+	if !a.TriedAccounts["bad@example.com"] {
+		t.Fatalf("expected bad account to be tracked as tried")
+	}
+}
+
+func TestDetermineTargetAccountDoesNotFallbackOnLoginFailure(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["managed-key"],
+		"accounts":[
+			{"email":"bad@example.com","password":"pwd"},
+			{"email":"good@example.com","password":"pwd","token":"good-token"}
+		]
+	}`)
+	store := config.LoadStore()
+	pool := account.NewPool(store)
+	resolver := NewResolver(store, pool, func(_ context.Context, acc config.Account) (string, error) {
+		if acc.Email == "bad@example.com" {
+			return "", errors.New("stale account")
+		}
+		return "fresh-good-token", nil
+	})
+
+	req, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
+	req.Header.Set("x-api-key", "managed-key")
+	req.Header.Set("X-Ds2-Target-Account", "bad@example.com")
+
+	_, err := resolver.Determine(req)
+	if err == nil {
+		t.Fatal("expected determine to fail for broken target account")
+	}
+}
+
+func TestDetermineManagedAccountReturnsLastEnsureErrorWhenAllFail(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["managed-key"],
+		"accounts":[
+			{"email":"bad1@example.com","password":"pwd"},
+			{"email":"bad2@example.com","password":"pwd"}
+		]
+	}`)
+	store := config.LoadStore()
+	pool := account.NewPool(store)
+	ensureErr := errors.New("all credentials stale")
+	resolver := NewResolver(store, pool, func(_ context.Context, _ config.Account) (string, error) {
+		return "", ensureErr
+	})
+
+	req, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
+	req.Header.Set("x-api-key", "managed-key")
+
+	_, err := resolver.Determine(req)
+	if err == nil {
+		t.Fatal("expected determine to fail")
+	}
+	if !errors.Is(err, ensureErr) {
+		t.Fatalf("expected ensure error, got %v", err)
+	}
+	if errors.Is(err, ErrNoAccount) {
+		t.Fatalf("expected auth-style ensure error, got ErrNoAccount")
+	}
+}
--- a/internal/compat/go_compat_test.go
+++ b/internal/compat/go_compat_test.go
@@ -73,22 +73,31 @@ func TestGoCompatToolcallFixtures(t *testing.T) {
 		mustLoadJSON(t, fixturePath, &fixture)

 		var expected struct {
-			Calls []util.ParsedToolCall `json:"calls"`
+			Calls             []util.ParsedToolCall `json:"calls"`
+			SawToolCallSyntax bool                  `json:"sawToolCallSyntax"`
+			RejectedByPolicy  bool                  `json:"rejectedByPolicy"`
+			RejectedToolNames []string              `json:"rejectedToolNames"`
 		}
 		mustLoadJSON(t, expectedPath, &expected)

-		var got []util.ParsedToolCall
+		var got util.ToolCallParseResult
 		switch strings.ToLower(strings.TrimSpace(fixture.Mode)) {
 		case "standalone":
-			got = util.ParseStandaloneToolCalls(fixture.Text, fixture.ToolNames)
+			got = util.ParseStandaloneToolCallsDetailed(fixture.Text, fixture.ToolNames)
 		default:
-			got = util.ParseToolCalls(fixture.Text, fixture.ToolNames)
+			got = util.ParseToolCallsDetailed(fixture.Text, fixture.ToolNames)
 		}
-		if len(got) == 0 && len(expected.Calls) == 0 {
-			continue
+		if got.Calls == nil {
+			got.Calls = []util.ParsedToolCall{}
 		}
-		if !reflect.DeepEqual(got, expected.Calls) {
-			t.Fatalf("toolcall fixture %s mismatch:\n got=%#v\nwant=%#v", name, got, expected.Calls)
+		if got.RejectedToolNames == nil {
+			got.RejectedToolNames = []string{}
+		}
+		if !reflect.DeepEqual(got.Calls, expected.Calls) ||
+			got.SawToolCallSyntax != expected.SawToolCallSyntax ||
+			got.RejectedByPolicy != expected.RejectedByPolicy ||
+			!reflect.DeepEqual(got.RejectedToolNames, expected.RejectedToolNames) {
+			t.Fatalf("toolcall fixture %s mismatch:\n got=%#v\nwant=%#v", name, got, expected)
 		}
 	}
 }
--- a/internal/config/account.go
+++ b/internal/config/account.go
@@ -1,10 +1,6 @@
 package config

-import (
-	"crypto/sha256"
-	"encoding/hex"
-	"strings"
-)
+import "strings"

 func (a Account) Identifier() string {
 	if strings.TrimSpace(a.Email) != "" {
@@ -13,12 +9,5 @@ func (a Account) Identifier() string {
 	if mobile := NormalizeMobileForStorage(a.Mobile); mobile != "" {
 		return mobile
 	}
-	// Backward compatibility: old configs may contain token-only accounts.
-	// Use a stable non-sensitive synthetic id so they can still join the pool.
-	token := strings.TrimSpace(a.Token)
-	if token == "" {
-		return ""
-	}
-	sum := sha256.Sum256([]byte(token))
-	return "token:" + hex.EncodeToString(sum[:8])
+	return ""
 }
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .1.0
 .1.0