merge: sync release smoke fix from dev

fix: drop obsolete release smoke check
Merge pull request #398 from CJackHwang/dev
2026-05-02 15:35:27 +08:00 · 2026-05-02 04:23:02 +08:00 · 2026-05-02 04:19:23 +08:00 · 2026-05-02 04:04:44 +08:00 · 2026-05-02 03:55:36 +08:00 · 2026-05-02 03:26:43 +08:00
535 changed files with 125184 additions and 14500 deletions
--- a/.env.example
+++ b/.env.example
@@ -1,5 +1,8 @@
 # DS2API runtime
+# Runtime listen port inside the app/container
 PORT=5001
+# Docker Compose host port (compose only; container still listens on PORT)
+DS2API_HOST_PORT=6011
 LOG_LEVEL=INFO

 # Admin authentication
--- a/.github/workflows/quality-gates.yml
+++ b/.github/workflows/quality-gates.yml
@@ -5,12 +5,23 @@ on:
  push:
    branches:
      - dev
+      - main

 permissions:
  contents: read

+concurrency:
+  group: quality-gates-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  GO_VERSION: "1.26.x"
+  NODE_VERSION: "24"
+  GOLANGCI_LINT_VERSION: "v2.11.4"
+
 jobs:
-  quality-gates:
+  lint-and-refactor:
+    name: Lint and Refactor Gate
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
@@ -19,22 +30,104 @@ jobs:
      - name: Setup Go
        uses: actions/setup-go@v5
        with:
-          go-version: "1.24.x"
+          go-version: ${{ env.GO_VERSION }}
+          cache-dependency-path: go.sum

-      - name: Setup Node
-        uses: actions/setup-node@v4
+      - name: Setup golangci-lint
+        uses: golangci/golangci-lint-action@v8
        with:
-          node-version: "24"
-          cache: "npm"
-          cache-dependency-path: webui/package-lock.json
+          version: ${{ env.GOLANGCI_LINT_VERSION }}
+          install-mode: binary
+          verify: true
+
+      - name: Go Format & Lint Gates
+        run: ./scripts/lint.sh

      - name: Refactor Line Gate
        run: ./tests/scripts/check-refactor-line-gate.sh

+  go-unit:
+    name: Go Unit (${{ matrix.os }})
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - macos-latest
+          - windows-latest
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: ${{ env.GO_VERSION }}
+          cache-dependency-path: go.sum
+
+      - name: Go Unit Gate
+        run: ./tests/scripts/run-unit-go.sh
+
+  unit-all:
+    name: Unit Gates (Go + Node)
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: ${{ env.GO_VERSION }}
+          cache-dependency-path: go.sum
+
+      - name: Setup Node
+        uses: actions/setup-node@v4
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          cache: npm
+          cache-dependency-path: webui/package-lock.json
+
      - name: Unit Gates (Go + Node)
        run: ./tests/scripts/run-unit-all.sh

+  webui-build:
+    name: WebUI Build
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Node
+        uses: actions/setup-node@v4
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          cache: npm
+          cache-dependency-path: webui/package-lock.json
+
      - name: WebUI Build Gate
        run: |
-          npm ci --prefix webui
+          npm ci --prefix webui --prefer-offline --no-audit
          npm run build --prefix webui
+
+  cross-build:
+    name: Release Target Cross-Build
+    if: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/main') }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: ${{ env.GO_VERSION }}
+          cache-dependency-path: go.sum
+
+      - name: Cross-Build Release Targets
+        env:
+          CROSS_BUILD_JOBS: "3"
+        run: ./tests/scripts/check-cross-build.sh
--- a/.github/workflows/release-artifacts.yml
+++ b/.github/workflows/release-artifacts.yml
@@ -15,6 +15,14 @@ permissions:
  contents: write
  packages: write

+concurrency:
+  group: release-artifacts-${{ github.event.release.tag_name || github.event.inputs.release_tag }}
+  cancel-in-progress: false
+
+env:
+  GO_VERSION: "1.26.x"
+  NODE_VERSION: "24"
+
 jobs:
  build-and-upload:
    runs-on: ubuntu-latest
@@ -27,69 +35,30 @@ jobs:
      - name: Setup Go
        uses: actions/setup-go@v5
        with:
-          go-version: "1.24.x"
+          go-version: ${{ env.GO_VERSION }}
+          cache-dependency-path: go.sum

      - name: Setup Node
        uses: actions/setup-node@v4
        with:
-          node-version: "24"
+          node-version: ${{ env.NODE_VERSION }}
          cache: "npm"
          cache-dependency-path: webui/package-lock.json

      - name: Release Blocking Gates
        run: |
-          ./tests/scripts/check-stage6-manual-smoke.sh
          ./tests/scripts/check-refactor-line-gate.sh
          ./tests/scripts/run-unit-all.sh

      - name: Build WebUI
        run: |
-          npm ci --prefix webui
+          npm ci --prefix webui --prefer-offline --no-audit
          npm run build --prefix webui

      - name: Build Multi-Platform Archives
-        run: |
-          set -euo pipefail
-          TAG="${RELEASE_TAG}"
-          BUILD_VERSION="${TAG}"
-          if [ -z "${BUILD_VERSION}" ] && [ -f VERSION ]; then
-            BUILD_VERSION="$(cat VERSION | tr -d '[:space:]')"
-          fi
-          mkdir -p dist
-
-          targets=(
-            "linux/amd64"
-            "linux/arm64"
-            "darwin/amd64"
-            "darwin/arm64"
-            "windows/amd64"
-          )
-
-          for target in "${targets[@]}"; do
-            GOOS="${target%/*}"
-            GOARCH="${target#*/}"
-            PKG="ds2api_${TAG}_${GOOS}_${GOARCH}"
-            STAGE="dist/${PKG}"
-            BIN="ds2api"
-            if [ "${GOOS}" = "windows" ]; then
-              BIN="ds2api.exe"
-            fi
-
-            mkdir -p "${STAGE}/static"
-            CGO_ENABLED=0 GOOS="${GOOS}" GOARCH="${GOARCH}" \
-              go build -trimpath -ldflags="-s -w -X ds2api/internal/version.BuildVersion=${BUILD_VERSION}" -o "${STAGE}/${BIN}" ./cmd/ds2api
-
-            cp config.example.json .env.example internal/deepseek/assets/sha3_wasm_bg.7b9ca65ddd.wasm LICENSE README.MD README.en.md "${STAGE}/"
-            cp -R static/admin "${STAGE}/static/admin"
-
-            if [ "${GOOS}" = "windows" ]; then
-              (cd dist && zip -rq "${PKG}.zip" "${PKG}")
-            else
-              tar -C dist -czf "dist/${PKG}.tar.gz" "${PKG}"
-            fi
-
-            rm -rf "${STAGE}"
-          done
+        env:
+          RELEASE_BUILD_JOBS: "3"
+        run: ./scripts/build-release-archives.sh

      - name: Prepare Docker release inputs
        run: |
@@ -153,6 +122,8 @@ jobs:
          platforms: linux/amd64,linux/arm64
          tags: ${{ steps.meta_release.outputs.tags }}
          labels: ${{ steps.meta_release.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max

      - name: Export Docker image archives for release assets
        run: |
@@ -162,12 +133,14 @@ jobs:
          docker buildx build \
            --platform linux/amd64 \
            --target runtime-from-dist \
+            --cache-from type=gha \
            --output type=docker,dest="dist/ds2api_${TAG}_docker_linux_amd64.tar" \
            .

          docker buildx build \
            --platform linux/arm64 \
            --target runtime-from-dist \
+            --cache-from type=gha \
            --output type=docker,dest="dist/ds2api_${TAG}_docker_linux_arm64.tar" \
            .

--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,7 @@ config.json
 *.swo
 *~
 .DS_Store
+opencode.json

 # Logs
 *.log
@@ -28,6 +29,7 @@ yarn.lock
 pnpm-lock.yaml

 # Build artifacts
+dist/
 *.tsbuildinfo
 .cache/
 .parcel-cache/
@@ -58,3 +60,11 @@ Thumbs.db
 # Claude Code
 .claude/
 CLAUDE.local.md
+
+# Local tool bootstrap cache
+.tmp/
+
+# Chat history
+data/
+.codex
+.roomodes
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -0,0 +1,73 @@
+version: "2"
+
+run:
+  tests: true
+
+linters:
+  default: standard
+  enable:
+    - errcheck
+    - govet
+    - ineffassign
+    - staticcheck
+    - unused
+  settings:
+    dupl:
+      threshold: 100
+    goconst:
+      min-len: 2
+      min-occurrences: 2
+    gocritic:
+      enabled-tags:
+        - diagnostic
+        - experimental
+        - opinionated
+        - performance
+        - style
+      disabled-checks:
+        - wrapperFunc
+        - rangeValCopy
+        - hugeParam
+    gocyclo:
+      min-complexity: 15
+    lll:
+      line-length: 140
+    misspell:
+      locale: US
+    nakedret:
+      max-func-lines: 30
+    prealloc:
+      simple: true
+      range-loops: true
+      for-loops: false
+  exclusions:
+    generated: lax
+    rules:
+      - path: (.+)\.go$
+        text: "ST1000: at least one file in a package should have a package comment"
+    paths:
+      - third_party$
+      - builtin$
+      - examples$
+      - vendor$
+      - webui/node_modules$
+
+issues:
+  max-issues-per-linter: 0
+  max-same-issues: 0
+
+formatters:
+  enable:
+    - gofmt
+  settings:
+    goimports:
+      local-prefixes:
+        - ds2api
+  exclusions:
+    generated: lax
+    paths:
+      - third_party$
+      - builtin$
+      - examples$
+      - vendor$
+      - webui/node_modules$
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -0,0 +1,29 @@
+# AGENTS.md
+
+These rules apply to all agent-made changes in this repository.
+
+## PR Gate
+
+- Before opening or updating a PR, run the same local gates as `.github/workflows/quality-gates.yml`.
+- Required commands:
+  - `./scripts/lint.sh`
+  - `./tests/scripts/check-refactor-line-gate.sh`
+  - `./tests/scripts/run-unit-all.sh`
+  - `npm run build --prefix webui`
+
+## Go Lint Rules
+
+- Run `gofmt -w` on every changed Go file before commit or push.
+- Do not ignore error returns from I/O-style cleanup calls such as `Close`, `Flush`, `Sync`, or similar methods.
+- If a cleanup error cannot be returned, log it explicitly.
+
+## Change Scope
+
+- Keep changes additive and tightly scoped to the requested feature or bugfix.
+- Do not mix unrelated refactors into feature PRs unless they are required to make the change pass gates.
+
+## Documentation Sync
+
+- When business logic or user-visible behavior changes, update the corresponding documentation in the same change.
+- `docs/prompt-compatibility.md` is the source-of-truth document for the “API -> pure-text web-chat context” compatibility flow.
+- If a change affects message normalization, tool prompt injection, prompt-visible tool history, file/reference handling, history split, or completion payload assembly, update `docs/prompt-compatibility.md` in the same change.
--- a/API.en.md
+++ b/API.en.md
@@ -4,6 +4,8 @@ Language: [中文](API.md) | [English](API.en.md)

 This document describes the actual behavior of the current Go codebase.

+Docs: [Overview](README.en.md) / [Architecture](docs/ARCHITECTURE.en.md) / [Deployment](docs/DEPLOY.en.md) / [Testing](docs/TESTING.md)
+
 ---

 ## Table of Contents
@@ -29,7 +31,16 @@ This document describes the actual behavior of the current Go codebase.
 | Base URL | `http://localhost:5001` or your deployment domain |
 | Default Content-Type | `application/json` |
 | Health probes | `GET /healthz`, `GET /readyz` |
-| CORS | Enabled (`Access-Control-Allow-Origin: *`, allows `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Vercel-Protection-Bypass`) |
+| CORS | Enabled (uniformly covers `/v1/*`, `/anthropic/*`, `/v1beta/models/*`, and `/admin/*`; echoes the browser `Origin` when present, otherwise `*`; default allow-list includes `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Ds2-Source`, `X-Vercel-Protection-Bypass`, `X-Goog-Api-Key`, `Anthropic-Version`, `Anthropic-Beta`, and also accepts third-party preflight-requested headers such as `x-stainless-*`; `/v1/chat/completions` on Vercel Node Runtime matches the same behavior; internal-only `X-Ds2-Internal-Token` remains blocked) |
+
+- All JSON request bodies must be valid UTF-8; malformed byte sequences are rejected on ingress with `400 invalid json`.
+
+### 3.0 Adapter-Layer Notes
+
+- OpenAI / Claude / Gemini protocols are now mounted on one shared `chi` router tree assembled in `internal/server/router.go`.
+- Adapter responsibilities are streamlined to: **request normalization → DeepSeek invocation → protocol-shaped rendering**, reducing legacy split-logic paths.
+- Tool-calling semantics are aligned between Go and Node runtime: models should output the DSML shell `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; DS2API also accepts legacy canonical XML `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`. DSML is normalized back to XML at the parser entry, so internal parsing remains XML-based, with stream-time anti-leak filtering.
+- `Admin API` separates static config from runtime policy: `/admin/config*` for configuration state, `/admin/settings*` for runtime behavior.

 ---

@@ -45,8 +56,7 @@ cp config.example.json config.json
 Use it per deployment mode:

 - Local run: read `config.json` directly
- Docker / Vercel: generate Base64 from `config.json`, then set `DS2API_CONFIG_JSON`
- Compatibility note: `DS2API_CONFIG_JSON` may also contain raw JSON directly; `CONFIG_JSON` is the legacy fallback variable
+- Docker / Vercel: generate Base64 from `config.json`, then set `DS2API_CONFIG_JSON`, or paste raw JSON directly

 ```bash
 DS2API_CONFIG_JSON="$(base64 < config.json | tr -d '\n')"
@@ -73,7 +83,7 @@ Two header formats accepted:
 - Token is in `config.keys` → **Managed account mode**: DS2API auto-selects an account via rotation
 - Token is not in `config.keys` → **Direct token mode**: treated as a DeepSeek token directly

-**Optional header**: `X-Ds2-Target-Account: <email_or_mobile>` — Pin a specific managed account.
+**Optional header**: `X-Ds2-Target-Account: <email_or_mobile>` — Pin a specific managed account; if the target account does not exist or the managed-account queue is exhausted, the request returns `429`, and current responses do not include `Retry-After`. If the account exists but login/refresh fails, the request returns the underlying `401` or upstream error.
 Gemini-compatible clients can also send `x-goog-api-key`, `?key=`, or `?api_key=` as the caller credential source.

 ### Admin Endpoints (`/admin/*`)
@@ -91,13 +101,16 @@ Gemini-compatible clients can also send `x-goog-api-key`, `?key=`, or `?api_key=
 | Method | Path | Auth | Description |
 | --- | --- | --- | --- |
 | GET | `/healthz` | None | Liveness probe |
+| HEAD | `/healthz` | None | Liveness probe (no body) |
 | GET | `/readyz` | None | Readiness probe |
+| HEAD | `/readyz` | None | Readiness probe (no body) |
 | GET | `/v1/models` | None | OpenAI model list |
 | GET | `/v1/models/{id}` | None | OpenAI single-model query (alias accepted) |
 | POST | `/v1/chat/completions` | Business | OpenAI chat completions |
 | POST | `/v1/responses` | Business | OpenAI Responses API (stream/non-stream) |
 | GET | `/v1/responses/{response_id}` | Business | Query stored response (in-memory TTL) |
 | POST | `/v1/embeddings` | Business | OpenAI Embeddings API |
+| POST | `/v1/files` | Business | OpenAI Files upload (multipart/form-data) |
 | GET | `/anthropic/v1/models` | None | Claude model list |
 | POST | `/anthropic/v1/messages` | Business | Claude messages |
 | POST | `/anthropic/v1/messages/count_tokens` | Business | Claude token counting |
@@ -119,25 +132,43 @@ Gemini-compatible clients can also send `x-goog-api-key`, `?key=`, or `?api_key=
 | POST | `/admin/settings/password` | Admin | Update admin password and invalidate old JWTs |
 | POST | `/admin/config/import` | Admin | Import config (merge/replace) |
 | GET | `/admin/config/export` | Admin | Export full config (`config`/`json`/`base64`) |
-| POST | `/admin/keys` | Admin | Add API key |
+| POST | `/admin/keys` | Admin | Add API key (optional `name`/`remark`) |
+| PUT | `/admin/keys/{key}` | Admin | Update API key metadata |
 | DELETE | `/admin/keys/{key}` | Admin | Delete API key |
+| GET | `/admin/proxies` | Admin | List proxies |
+| POST | `/admin/proxies` | Admin | Add proxy |
+| PUT | `/admin/proxies/{proxyID}` | Admin | Update proxy (empty password keeps old secret) |
+| DELETE | `/admin/proxies/{proxyID}` | Admin | Delete proxy (auto-unbind referenced accounts) |
+| POST | `/admin/proxies/test` | Admin | Test proxy connectivity |
 | GET | `/admin/accounts` | Admin | Paginated account list |
 | POST | `/admin/accounts` | Admin | Add account |
+| PUT | `/admin/accounts/{identifier}` | Admin | Update account name/remark |
 | DELETE | `/admin/accounts/{identifier}` | Admin | Delete account |
+| PUT | `/admin/accounts/{identifier}/proxy` | Admin | Bind/unbind proxy for an account |
 | GET | `/admin/queue/status` | Admin | Account queue status |
 | POST | `/admin/accounts/test` | Admin | Test one account |
 | POST | `/admin/accounts/test-all` | Admin | Test all accounts |
 | POST | `/admin/accounts/sessions/delete-all` | Admin | Delete all sessions for one account |
 | POST | `/admin/import` | Admin | Batch import keys/accounts |
 | POST | `/admin/test` | Admin | Test API through service |
+| POST | `/admin/dev/raw-samples/capture` | Admin | Fire one request and persist it as a raw sample |
+| GET | `/admin/dev/raw-samples/query` | Admin | Search current in-memory capture chains by prompt keyword |
+| POST | `/admin/dev/raw-samples/save` | Admin | Persist a selected in-memory capture chain as a raw sample |
 | POST | `/admin/vercel/sync` | Admin | Sync config to Vercel |
 | GET | `/admin/vercel/status` | Admin | Vercel sync status |
 | POST | `/admin/vercel/status` | Admin | Vercel sync status / draft compare |
 | GET | `/admin/export` | Admin | Export config JSON/Base64 |
 | GET | `/admin/dev/captures` | Admin | Read local packet-capture entries |
 | DELETE | `/admin/dev/captures` | Admin | Clear local packet-capture entries |
+| GET | `/admin/chat-history` | Admin | Read server-side conversation history |
+| DELETE | `/admin/chat-history` | Admin | Clear server-side conversation history |
+| GET | `/admin/chat-history/{id}` | Admin | Read one server-side conversation entry |
+| DELETE | `/admin/chat-history/{id}` | Admin | Delete one server-side conversation entry |
+| PUT | `/admin/chat-history/settings` | Admin | Update conversation history retention limit |
 | GET | `/admin/version` | Admin | Check current version and latest Release |

+OpenAI `/v1/*` paths are canonical. For clients configured with the bare DS2API service URL, the same OpenAI handlers are also exposed through root shortcuts: `/models`, `/models/{id}`, `/chat/completions`, `/responses`, `/responses/{response_id}`, `/embeddings`, and `/files`.
+
 ---

 ## Health Endpoints
@@ -160,7 +191,7 @@ Gemini-compatible clients can also send `x-goog-api-key`, `?key=`, or `?api_key=

 ### `GET /v1/models`

-No auth required. Returns supported models.
+No auth required. Returns the currently supported DeepSeek native model list.

 **Response**:

@@ -168,14 +199,22 @@ No auth required. Returns supported models.
 {
  "object": "list",
  "data": [
-    {"id": "deepseek-chat", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-reasoner", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-chat-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-reasoner-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}
+    {"id": "deepseek-v4-flash", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-flash-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-pro", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-pro-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-flash-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-flash-search-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-pro-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-pro-search-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-vision", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-vision-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}
  ]
 }
 ```

+> Note: `/v1/models` returns normalized DeepSeek native model IDs. Common aliases are accepted only as request input and are not expanded as separate items in this endpoint.
+
 ### Model Alias Resolution

 For `chat` / `responses` / `embeddings`, DS2API follows a wide-input/strict-output policy:
@@ -185,6 +224,18 @@ For `chat` / `responses` / `embeddings`, DS2API follows a wide-input/strict-outp
 3. If still unmatched, fall back by known family heuristics (`o*`, `gpt-*`, `claude-*`, etc.).
 4. If still unmatched, return `invalid_request_error`.

+Built-in aliases come from `internal/config/models.go`; `config.model_aliases` can override or add mappings at runtime. Excerpt:
+
+- OpenAI / Codex: `gpt-4o`, `gpt-4.1`, `gpt-5`, `gpt-5.5`, `gpt-5-codex`, `gpt-5.3-codex`, `codex-mini-latest`
+- OpenAI reasoning: `o1`, `o3`, `o3-deep-research`, `o4-mini`
+- Claude: `claude-opus-4-6`, `claude-sonnet-4-6`, `claude-haiku-4-5`, `claude-3-5-sonnet-latest`
+- Gemini: `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-pro-vision`
+- Other compatibility families: `llama-*`, `qwen-*`, `mistral-*`, and `command-*` fall back through family heuristics
+
+Current vision support resolves only to `deepseek-v4-vision` and does not expose a separate `vision-search` variant.
+
+Retired historical families such as `claude-1.*`, `claude-2.*`, `claude-instant-*`, and `gpt-3.5*` are explicitly rejected.
+
 ### `POST /v1/chat/completions`

 **Headers**:
@@ -198,7 +249,7 @@ Content-Type: application/json

 | Field | Type | Required | Notes |
 | --- | --- | --- | --- |
-| `model` | string | ✅ | DeepSeek native models + common aliases (`gpt-4o`, `gpt-5-codex`, `o3`, `claude-sonnet-4-5`, etc.) |
+| `model` | string | ✅ | DeepSeek native models + common aliases (`gpt-5.5`, `gpt-5.4-mini`, `gpt-5.3-codex`, `o3`, `claude-opus-4-6`, `gemini-2.5-pro`, `gemini-2.5-flash`, etc.) |
 | `messages` | array | ✅ | OpenAI-style messages |
 | `stream` | boolean | ❌ | Default `false` |
 | `tools` | array | ❌ | Function calling schema |
@@ -211,14 +262,14 @@ Content-Type: application/json
  "id": "<chat_session_id>",
  "object": "chat.completion",
  "created": 1738400000,
-  "model": "deepseek-reasoner",
+  "model": "deepseek-v4-pro",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "final response",
-        "reasoning_content": "reasoning trace (reasoner models)"
+        "reasoning_content": "reasoning trace (when thinking is enabled)"
      },
      "finish_reason": "stop"
    }
@@ -253,9 +304,10 @@ data: [DONE]
 **Field notes**:

 - First delta includes `role: assistant`
- `deepseek-reasoner` / `deepseek-reasoner-search` models emit `delta.reasoning_content`
+- When thinking is enabled, the stream may emit `delta.reasoning_content`
 - Text emits `delta.content`
 - Last chunk includes `finish_reason` and `usage`
+- Token counting prefers pass-through from upstream DeepSeek SSE (`accumulated_token_usage` / `token_usage`), and only falls back to local estimation when upstream usage is absent. Failed/interrupted endings (for example `response.failed`) may not include `usage`

 #### Tool Calls

@@ -288,7 +340,13 @@ When `tools` is present, DS2API performs anti-leak handling:
 }
 ```

-**Stream**: Once high-confidence toolcall features are matched, DS2API emits `delta.tool_calls` immediately (without waiting for full JSON closure), then keeps sending argument deltas; confirmed raw tool JSON is never forwarded as `delta.content`.
+**Stream**: Once high-confidence toolcall features are matched, DS2API emits `delta.tool_calls` immediately (without waiting for full argument closure), then keeps sending argument deltas; confirmed tool-call fragments are not forwarded as `delta.content`.
+
+Additional notes:
+
+- The parser treats DSML shell tool blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`) and legacy canonical XML tool blocks (`<tool_calls>` / `<invoke name="...">` / `<parameter name="...">`) as executable tool calls. DSML is normalized back to XML at the parser entry; internal parsing remains XML-based. Legacy `<tools>`, `<tool_call>`, `<tool_name>`, `<param>`, `<function_call>`, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text.
+- If the final visible response text is empty but the reasoning stream contains an executable tool call, Chat / Responses emits a standard OpenAI `tool_calls` / `function_call` output during finalization. If thinking/reasoning was not enabled by the client, that reasoning text is used only for detection and is not exposed as visible text or `reasoning_content`.
+- `tool_calls` shown inside fenced markdown code blocks (for example, ```json ... ```) are treated as examples, not executable calls.

 ---

@@ -347,7 +405,8 @@ data: [DONE]
 ```

 If `tool_choice=required` is violated in stream mode, DS2API emits `response.failed` then `[DONE]` (no `response.completed`).
-Unknown tool names (outside declared `tools`) are rejected and will not be emitted as valid tool calls.
+
+> Current behavior: the parser tries to extract structured tool calls and does not enforce a hard allow-list reject; your tool executor should still validate against a whitelist before executing.

 ### `GET /v1/responses/{response_id}`

@@ -364,13 +423,29 @@ Business auth required. Returns OpenAI-compatible embeddings shape.
 | `model` | string | ✅ | Supports native models + alias mapping |
 | `input` | string/array | ✅ | Supports string, string array, token array |

-> Requires `embeddings.provider`. Current supported values: `mock` / `deterministic` / `builtin`. If missing/unsupported, returns standard error shape with HTTP 501.
+> Requires `embeddings.provider`. Current supported values: `mock` / `deterministic` / `builtin` (all three use the same local deterministic implementation). If missing/unsupported, returns standard error shape with HTTP 501.
+
+### `POST /v1/files`
+
+Business auth required. OpenAI Files-compatible upload endpoint; currently only `multipart/form-data` is supported.
+
+| Field | Type | Required | Notes |
+| --- | --- | --- | --- |
+| `file` | file | ✅ | Binary payload |
+| `purpose` | string | ❌ | Forwarded purpose field |
+
+Constraints and behavior:
+
+- `Content-Type` must be `multipart/form-data` (otherwise `400`).
+- Total request size limit is **100 MiB** (over-limit returns `413`).
+- Success returns an OpenAI `file` object (`id/object/bytes/filename/purpose/status`, etc.) and includes `account_id` for source-account tracing.

 ---

 ## Claude-Compatible API

 Besides `/anthropic/v1/*`, DS2API also supports shortcut paths: `/v1/messages`, `/messages`, `/v1/messages/count_tokens`, `/messages/count_tokens`.
+Implementation-wise this path is unified on the OpenAI Chat Completions parse-and-translate pipeline to avoid maintaining divergent parsing chains.

 ### `GET /anthropic/v1/models`

@@ -382,17 +457,17 @@ No auth required.
 {
  "object": "list",
  "data": [
-    {"id": "claude-sonnet-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
+    {"id": "claude-sonnet-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
    {"id": "claude-haiku-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
    {"id": "claude-opus-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"}
  ],
  "first_id": "claude-opus-4-6",
-  "last_id": "claude-instant-1.0",
+  "last_id": "claude-3-haiku-20240307",
  "has_more": false
 }
 ```

-> Note: the example is partial; the real response includes historical Claude 1.x/2.x/3.x/4.x IDs and common aliases.
+> Note: the example is partial; besides the current primary aliases, the real response also includes Claude 4.x snapshots plus historical 3.x IDs and common aliases.

 ### `POST /anthropic/v1/messages`

@@ -410,12 +485,19 @@ anthropic-version: 2023-06-01

 | Field | Type | Required | Notes |
 | --- | --- | --- | --- |
-| `model` | string | ✅ | For example `claude-sonnet-4-5` / `claude-opus-4-6` / `claude-haiku-4-5` (compatible with `claude-3-5-haiku-latest`), plus historical Claude model IDs |
+| `model` | string | ✅ | For example `claude-sonnet-4-6` / `claude-opus-4-6` / `claude-haiku-4-5` (compatible with `claude-3-5-haiku-latest`), plus historical Claude model IDs |
 | `messages` | array | ✅ | Claude-style messages |
 | `max_tokens` | number | ❌ | Auto-filled to `8192` when omitted; not strictly enforced by upstream bridge |
 | `stream` | boolean | ❌ | Default `false` |
 | `system` | string | ❌ | Optional system prompt |
 | `tools` | array | ❌ | Claude tool schema |
+| `thinking` | object | ❌ | Anthropic thinking config; translated into downstream reasoning control, and ignored by `-nothinking` models |
+| `temperature` | number | ❌ | Passed through to the downstream bridge; if `temperature` and `top_p` are both present, `temperature` wins |
+| `top_p` | number | ❌ | Passed through when `temperature` is absent |
+| `stop_sequences` | array | ❌ | Passed through as downstream stop sequences |
+| `tool_choice` | string/object | ❌ | Supports `auto` / `none` / `required` / `{"type":"function","name":"..."}` and is translated to downstream tool choice |
+
+> Note: `thinking`, `temperature`, `top_p`, `stop_sequences`, and `tool_choice` are translated through the compatibility bridge. Final behavior still depends on the selected model and upstream support. When both `temperature` and `top_p` are present, `temperature` takes precedence.

 #### Non-Stream Response

@@ -424,7 +506,7 @@ anthropic-version: 2023-06-01
  "id": "msg_1738400000000000000",
  "type": "message",
  "role": "assistant",
-  "model": "claude-sonnet-4-5",
+  "model": "claude-sonnet-4-6",
  "content": [
    {"type": "text", "text": "response"}
  ],
@@ -478,7 +560,7 @@ data: {"type":"message_stop"}

 ```json
 {
-  "model": "claude-sonnet-4-5",
+  "model": "claude-sonnet-4-6",
  "messages": [
    {"role": "user", "content": "Hello"}
  ]
@@ -505,6 +587,7 @@ Supported paths:
 - `/v1/models/{model}:streamGenerateContent` (compat path)

 Authentication is the same as other business routes (`Authorization: Bearer <token>` or `x-api-key`).
+Implementation-wise this path is unified on the OpenAI Chat Completions parse-and-translate pipeline to avoid maintaining divergent parsing chains.

 ### `POST /v1beta/models/{model}:generateContent`

@@ -523,6 +606,7 @@ Returns SSE (`text/event-stream`), each chunk as `data: <json>`:
 - regular text: incremental text chunks
 - `tools` mode: buffered and emitted as `functionCall` at finalize phase
 - final chunk: includes `finishReason: "STOP"` and `usageMetadata`
+- Token counting prefers pass-through from upstream DeepSeek SSE (`accumulated_token_usage` / `token_usage`), and only falls back to local estimation when upstream usage is absent

 ---

@@ -581,12 +665,19 @@ Returns Vercel preconfiguration status.

 ### `GET /admin/config`

-Returns sanitized config.
+Returns sanitized config, including both `keys` and `api_keys`.

 ```json
 {
  "keys": ["k1", "k2"],
+  "api_keys": [
+    {"key": "k1", "name": "Primary", "remark": "Production"},
+    {"key": "k2", "name": "Backup", "remark": "Load test"}
+  ],
  "env_backed": false,
+  "env_source_present": true,
+  "env_writeback_enabled": true,
+  "config_path": "/data/config.json",
  "accounts": [
    {
      "identifier": "user@example.com",
@@ -597,28 +688,33 @@ Returns sanitized config.
      "token_preview": "abcde..."
    }
  ],
-  "claude_mapping": {
-    "fast": "deepseek-chat",
-    "slow": "deepseek-reasoner"
+  "model_aliases": {
+    "claude-sonnet-4-6": "deepseek-v4-flash",
+    "claude-opus-4-6": "deepseek-v4-pro"
  }
 }
 ```

 ### `POST /admin/config`

-Only updates `keys`, `accounts`, and `claude_mapping`.
+Only updates `keys`, `api_keys`, `accounts`, and `model_aliases`.
+If both `api_keys` and `keys` are sent, the structured `api_keys` entries win so `name` / `remark` metadata is preserved; `keys` remains a legacy fallback.

 **Request**:

 ```json
 {
  "keys": ["k1", "k2"],
+  "api_keys": [
+    {"key": "k1", "name": "Primary", "remark": "Production"},
+    {"key": "k2", "name": "Backup", "remark": "Load test"}
+  ],
  "accounts": [
    {"email": "user@example.com", "password": "pwd", "token": ""}
  ],
-  "claude_mapping": {
-    "fast": "deepseek-chat",
-    "slow": "deepseek-reasoner"
+  "model_aliases": {
+    "claude-sonnet-4-6": "deepseek-v4-flash",
+    "claude-opus-4-6": "deepseek-v4-pro"
  }
 }
 ```
@@ -630,9 +726,11 @@ Reads runtime settings and status, including:
 - `success`
 - `admin` (`has_password_hash`, `jwt_expire_hours`, `jwt_valid_after_unix`, `default_password_warning`)
 - `runtime` (`account_max_inflight`, `account_max_queue`, `global_max_inflight`, `token_refresh_interval_hours`)
+- `compat` (`wide_input_strict_output`, `strip_reference_markers`)
 - `responses` / `embeddings`
- `auto_delete` (`sessions`)
- `claude_mapping` / `model_aliases`
+- `auto_delete` (`mode`: `none` / `single` / `all`; legacy `sessions=true` is still treated as `all`)
+- `current_input_file` (`enabled` defaults to `true`, plus `min_chars`)
+- `model_aliases`
 - `env_backed`, `needs_vercel_sync`
 - `toolcall` policy is fixed to `feature_match + high` and is no longer returned or editable via settings

@@ -642,11 +740,13 @@ Hot-updates runtime settings. Supported fields:

 - `admin.jwt_expire_hours`
 - `runtime.account_max_inflight` / `runtime.account_max_queue` / `runtime.global_max_inflight` / `runtime.token_refresh_interval_hours`
+- `compat.wide_input_strict_output` / `compat.strip_reference_markers`
 - `responses.store_ttl_seconds`
 - `embeddings.provider`
- `auto_delete.sessions`
- `claude_mapping`
+- `auto_delete.mode`
+- `current_input_file.enabled` / `current_input_file.min_chars`
 - `model_aliases`
+- `history_split` is retained only for legacy config compatibility and no longer affects requests
 - `toolcall` policy is fixed and is no longer writable through settings

 ### `POST /admin/settings/password`
@@ -670,7 +770,9 @@ Imports full config with:

 The request can send config directly, or wrapped as `{"config": {...}, "mode":"merge"}`.
 Query params `?mode=merge` / `?mode=replace` are also supported.
-Import accepts `keys`, `accounts`, `claude_mapping` / `claude_model_mapping`, `model_aliases`, `admin`, `runtime`, `responses`, `embeddings`, and `auto_delete`; legacy `toolcall` fields are ignored.
+`replace` mode replaces the full config shape while preserving Vercel sync metadata. `merge` mode merges `keys`, `api_keys`, `accounts`, and `model_aliases`, and overwrites non-empty fields under `admin`, `runtime`, `responses`, and `embeddings`. Manage `compat`, `auto_delete`, and `current_input_file` via `/admin/settings` or the config file; `history_split` remains only for legacy compatibility; legacy `toolcall` fields are ignored.
+
+> Note: `merge` mode does not update `compat`, `auto_delete`, or `current_input_file`.

 ### `GET /admin/config/export`

@@ -679,7 +781,17 @@ Exports full config in three forms: `config`, `json`, and `base64`.
 ### `POST /admin/keys`

 ```json
-{"key": "new-api-key"}
+{"key": "new-api-key", "name": "Primary", "remark": "Production"}
+```
+
+**Response**: `{"success": true, "total_keys": 3}`
+
+### `PUT /admin/keys/{key}`
+
+Updates the `name` / `remark` of the specified API key. The path `key` is read-only and cannot be changed.
+
+```json
+{"name": "Backup", "remark": "Load test"}
 ```

 **Response**: `{"success": true, "total_keys": 3}`
@@ -688,6 +800,26 @@ Exports full config in three forms: `config`, `json`, and `base64`.

 **Response**: `{"success": true, "total_keys": 2}`

+### `GET /admin/proxies`
+
+Lists proxy configs (password is never returned; use `has_password` as a marker).
+
+### `POST /admin/proxies`
+
+Adds a proxy. Request accepts `id` (optional; auto-generated when omitted), `name`, `type` (`http` / `socks5`), `host`, `port`, `username`, `password`.
+
+### `PUT /admin/proxies/{proxyID}`
+
+Updates a proxy. If `password` is an empty string, the existing secret is preserved.
+
+### `DELETE /admin/proxies/{proxyID}`
+
+Deletes a proxy and automatically clears `proxy_id` on all accounts that reference it.
+
+### `POST /admin/proxies/test`
+
+Tests proxy connectivity: provide `proxy_id` to test a saved proxy; omit it to run a one-off test using proxy fields in the request body.
+
 ### `GET /admin/accounts`

 **Query params**:
@@ -695,7 +827,7 @@ Exports full config in three forms: `config`, `json`, and `base64`.
 | Param | Default | Range |
 | --- | --- | --- |
 | `page` | `1` | ≥ 1 |
-| `page_size` | `10` | 1–100 |
+| `page_size` | `10` | 1–5000 |
 | `q` | empty | Filter by identifier / email / mobile |

 **Response**:
@@ -730,12 +862,30 @@ Returned items also include `test_status`, usually `ok` or `failed`.

 **Response**: `{"success": true, "total_accounts": 6}`

+### `PUT /admin/accounts/{identifier}`
+
+Updates the `name` / `remark` of the specified account. The path `identifier` can be email or mobile and cannot be changed.
+
+```json
+{"name": "Primary account", "remark": "Shared with the team"}
+```
+
+**Response**: `{"success": true, "total_accounts": 6}`
+
 ### `DELETE /admin/accounts/{identifier}`

 `identifier` can be email, mobile, or the synthetic id for token-only accounts (`token:<hash>`).

 **Response**: `{"success": true, "total_accounts": 5}`

+### `PUT /admin/accounts/{identifier}/proxy`
+
+Updates proxy binding for a specific account.
+
+- Request body: `{"proxy_id":"..."}`.
+- Use empty `proxy_id` to unbind proxy.
+- `identifier` supports email / mobile / token-only synthetic id.
+
 ### `GET /admin/queue/status`

 ```json
@@ -746,24 +896,32 @@ Returned items also include `test_status`, usually `ok` or `failed`.
  "available_accounts": ["a@example.com"],
  "in_use_accounts": ["b@example.com"],
  "max_inflight_per_account": 2,
-  "recommended_concurrency": 8
+  "global_max_inflight": 8,
+  "recommended_concurrency": 8,
+  "waiting": 0,
+  "max_queue_size": 8
 }
 ```

 | Field | Description |
 | --- | --- |
-| `available` | Currently available accounts |
-| `in_use` | Currently in-use accounts |
+| `available` | Accounts that still have spare inflight capacity |
+| `in_use` | Number of occupied in-flight slots |
 | `total` | Total accounts |
+| `available_accounts` | List of account IDs with remaining inflight capacity |
+| `in_use_accounts` | List of account IDs currently in use |
 | `max_inflight_per_account` | Per-account inflight limit |
+| `global_max_inflight` | Global inflight limit |
 | `recommended_concurrency` | Suggested concurrency (`total × max_inflight_per_account`) |
+| `waiting` | Number of queued requests currently waiting |
+| `max_queue_size` | Waiting queue limit |

 ### `POST /admin/accounts/test`

 | Field | Required | Notes |
 | --- | --- | --- |
 | `identifier` | ✅ | email / mobile / token-only synthetic id |
-| `model` | ❌ | default `deepseek-chat` |
+| `model` | ❌ | default `deepseek-v4-flash` |
 | `message` | ❌ | if empty, only session creation is tested |

 **Response**:
@@ -774,14 +932,17 @@ Returned items also include `test_status`, usually `ok` or `failed`.
  "success": true,
  "response_time": 1240,
  "message": "API test successful (session creation only)",
-  "model": "deepseek-chat",
+  "model": "deepseek-v4-flash",
  "session_count": 0,
-  "config_writable": true
+  "config_writable": true,
+  "config_warning": ""
 }
 ```

 If a `message` is provided, `thinking` may also be included when the upstream response carries reasoning text.

+When the configured file path is not writable (for example, read-only `/app/config.json` inside some containers), login/session testing still proceeds; `config_warning` is returned to indicate token persistence failed and the token is memory-only until restart.
+
 ### `POST /admin/accounts/test-all`

 Optional request field: `model`.
@@ -845,7 +1006,7 @@ Test API availability through the service itself.

 | Field | Required | Default |
 | --- | --- | --- |
-| `model` | ❌ | `deepseek-chat` |
+| `model` | ❌ | `deepseek-v4-flash` |
 | `message` | ❌ | `你好` |
 | `api_key` | ❌ | First key in config |

@@ -859,6 +1020,74 @@ Test API availability through the service itself.
 }
 ```

+### `POST /admin/dev/raw-samples/capture`
+
+Internally issues one `/v1/chat/completions` request through the service, then persists the request metadata and raw upstream SSE into `tests/raw_stream_samples/<sample-id>/`.
+
+Common request fields:
+
+| Field | Required | Default | Notes |
+| --- | --- | --- | --- |
+| `message` | No | `你好` | Convenience single-turn user message |
+| `messages` | No | Auto-derived from `message` | OpenAI-style message array |
+| `model` | No | `deepseek-v4-flash` | Target model |
+| `stream` | No | `true` | Recommended to keep streaming enabled so raw SSE is recorded |
+| `api_key` | No | First configured key | Business API key to use |
+| `sample_id` | No | Auto-generated | Sample directory name |
+
+On success, the response headers include:
+
+- `X-Ds2-Sample-Id`
+- `X-Ds2-Sample-Dir`
+- `X-Ds2-Sample-Meta`
+- `X-Ds2-Sample-Upstream`
+
+If the request itself succeeds but the process did not record a new upstream capture, the endpoint returns:
+
+```json
+{"detail":"no upstream capture was recorded"}
+```
+
+### `GET /admin/dev/raw-samples/query`
+
+Searches the current process's in-memory capture entries and groups `completion + continue` rounds by `chat_session_id`.
+
+**Query parameters**:
+
+| Param | Default | Notes |
+| --- | --- | --- |
+| `q` | empty | Fuzzy match against request/response text |
+| `limit` | `20` | Max number of chains returned |
+
+**Response fields** include:
+
+- `items[].chain_key`
+- `items[].capture_ids`
+- `items[].round_count`
+- `items[].initial_label`
+- `items[].request_preview`
+- `items[].response_preview`
+
+### `POST /admin/dev/raw-samples/save`
+
+Persists one selected in-memory capture chain into `tests/raw_stream_samples/<sample-id>/`.
+
+Any one of these selectors is accepted:
+
+```json
+{"chain_key":"session:xxxx","sample_id":"tmp-from-memory"}
+```
+
+```json
+{"capture_id":"cap_xxx","sample_id":"tmp-from-memory"}
+```
+
+```json
+{"query":"Guangzhou weather","sample_id":"tmp-from-memory"}
+```
+
+The success payload includes `sample_id`, `dir`, `meta_path`, and `upstream_path`.
+
 ### `POST /admin/vercel/sync`

 | Field | Required | Notes |
@@ -928,15 +1157,15 @@ Checks the current build version and the latest GitHub Release:
 ```json
 {
  "success": true,
-  "current_version": "2.3.5",
-  "current_tag": "v2.3.5",
+  "current_version": "3.0.0",
+  "current_tag": "v3.0.0",
  "source": "file:VERSION",
  "checked_at": "2026-03-29T00:00:00Z",
-  "latest_tag": "v2.3.6",
-  "latest_version": "2.3.6",
-  "release_url": "https://github.com/CJackHwang/ds2api/releases/tag/v2.3.6",
+  "latest_tag": "v3.0.0",
+  "latest_version": "3.0.0",
+  "release_url": "https://github.com/CJackHwang/ds2api/releases/tag/v3.0.0",
  "published_at": "2026-03-28T12:00:00Z",
-  "has_update": true
+  "has_update": false
 }
 ```

@@ -997,7 +1226,7 @@ Clients should handle HTTP status code plus `error` / `detail` fields.
 | Code | Meaning |
 | --- | --- |
 | `401` | Authentication failed (invalid key/token, or expired admin JWT) |
-| `429` | Too many requests (exceeded inflight + queue capacity) |
+| `429` | Too many requests (exceeded inflight + queue capacity; current responses do not include `Retry-After`) |
 | `503` | Model unavailable or upstream error |

 ---
@@ -1011,7 +1240,7 @@ curl http://localhost:5001/v1/chat/completions \
  -H "Authorization: Bearer your-api-key" \
  -H "Content-Type: application/json" \
  -d '{
-    "model": "deepseek-chat",
+    "model": "deepseek-v4-flash",
    "messages": [{"role": "user", "content": "Hello"}],
    "stream": false
  }'
@@ -1024,7 +1253,7 @@ curl http://localhost:5001/v1/chat/completions \
  -H "Authorization: Bearer your-api-key" \
  -H "Content-Type: application/json" \
  -d '{
-    "model": "deepseek-reasoner",
+    "model": "deepseek-v4-pro",
    "messages": [{"role": "user", "content": "Explain quantum entanglement"}],
    "stream": true
  }'
@@ -1062,7 +1291,7 @@ curl http://localhost:5001/v1/chat/completions \
  -H "Authorization: Bearer your-api-key" \
  -H "Content-Type: application/json" \
  -d '{
-    "model": "deepseek-chat-search",
+    "model": "deepseek-v4-flash-search",
    "messages": [{"role": "user", "content": "Latest news today"}],
    "stream": true
  }'
@@ -1075,7 +1304,7 @@ curl http://localhost:5001/v1/chat/completions \
  -H "Authorization: Bearer your-api-key" \
  -H "Content-Type: application/json" \
  -d '{
-    "model": "deepseek-chat",
+    "model": "deepseek-v4-flash",
    "messages": [{"role": "user", "content": "What is the weather in Beijing?"}],
    "tools": [
      {
@@ -1136,7 +1365,7 @@ curl http://localhost:5001/anthropic/v1/messages \
  -H "Content-Type: application/json" \
  -H "anthropic-version: 2023-06-01" \
  -d '{
-    "model": "claude-sonnet-4-5",
+    "model": "claude-sonnet-4-6",
    "max_tokens": 1024,
    "messages": [{"role": "user", "content": "Hello"}]
  }'
@@ -1173,7 +1402,7 @@ curl http://localhost:5001/v1/chat/completions \
  -H "X-Ds2-Target-Account: user@example.com" \
  -H "Content-Type: application/json" \
  -d '{
-    "model": "deepseek-chat",
+    "model": "deepseek-v4-flash",
    "messages": [{"role": "user", "content": "Hello"}]
  }'
 ```
--- a/API.md
+++ b/API.md
@@ -4,6 +4,8 @@

 本文档描述当前 Go 代码库的实际 API 行为。

+文档导航：[总览](README.MD) / [架构说明](docs/ARCHITECTURE.md) / [部署指南](docs/DEPLOY.md) / [测试指南](docs/TESTING.md)
+
 ---

 ## 目录
@@ -29,7 +31,16 @@
 | Base URL | `http://localhost:5001` 或你的部署域名 |
 | 默认 Content-Type | `application/json` |
 | 健康检查 | `GET /healthz`、`GET /readyz` |
-| CORS | 已启用（`Access-Control-Allow-Origin: *`，允许 `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Vercel-Protection-Bypass`） |
+| CORS | 已启用（统一覆盖 `/v1/*`、`/anthropic/*`、`/v1beta/models/*`、`/admin/*`；浏览器有 `Origin` 时回显该 Origin，否则为 `*`；默认允许 `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Ds2-Source`, `X-Vercel-Protection-Bypass`, `X-Goog-Api-Key`, `Anthropic-Version`, `Anthropic-Beta`，并会放行预检里声明的第三方请求头，如 `x-stainless-*`；Vercel 上 `/v1/chat/completions` 的 Node Runtime 也对齐相同行为；内部专用头 `X-Ds2-Internal-Token` 仍被拦截） |
+
+- 所有 JSON 请求体都必须是合法 UTF-8；非法字节序列会在入站阶段被拒绝为 `400 invalid json`。
+
+### 3.0 接口适配层说明
+
+- OpenAI / Claude / Gemini 三套协议已统一挂在同一 `chi` 路由树上，由 `internal/server/router.go` 负责装配。
+- 适配器层职责收敛为：**请求归一化 → DeepSeek 调用 → 协议形态渲染**，减少历史版本中“同能力多处实现”的分叉。
+- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致：推荐模型输出 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`；兼容层也接受 DSML wrapper 别名 `<dsml|tool_calls>`、`<|tool_calls>`、`<｜tool_calls>`、常见 DSML 分隔符漏写形态（如 `<|DSML tool_calls>`）、`DSML` 与工具标签名黏连的常见 typo（如 `<DSMLtool_calls>`），以及旧式 canonical XML `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`。实现上采用窄容错结构扫描：只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 会进入工具路径，裸 `<invoke>` 不计为已支持语法；流式场景继续执行防泄漏筛分。若参数体本身是合法 JSON 字面量（如 `123`、`true`、`null`、数组或对象），会按结构化值输出，不再一律当作字符串；若 CDATA 偶发漏闭合，则会在最终 parse / flush 恢复阶段做窄修复，尽量保住已完整包裹的外层工具调用。
+- `Admin API` 将配置与运行时策略分开：`/admin/config*` 管静态配置，`/admin/settings*` 管运行时行为。

 ---

@@ -45,8 +56,7 @@ cp config.example.json config.json
 按部署方式使用：

 - 本地运行：直接读取 `config.json`
- Docker / Vercel：从 `config.json` 生成 Base64，填入 `DS2API_CONFIG_JSON`
- 兼容写法：`DS2API_CONFIG_JSON` 也可直接填原始 JSON；`CONFIG_JSON` 是旧版兼容回退变量
+- Docker / Vercel：从 `config.json` 生成 Base64，填入 `DS2API_CONFIG_JSON`，也可以直接填原始 JSON

 ```bash
 DS2API_CONFIG_JSON="$(base64 < config.json | tr -d '\n')"
@@ -73,7 +83,7 @@ Vercel 一键部署可先只填 `DS2API_ADMIN_KEY`，部署后在 `/admin` 导
 - token 在 `config.keys` 中 → **托管账号模式**，自动轮询选择账号
 - token 不在 `config.keys` 中 → **直通 token 模式**，直接作为 DeepSeek token 使用

-**可选请求头**：`X-Ds2-Target-Account: <email_or_mobile>` — 指定使用某个托管账号。
+**可选请求头**：`X-Ds2-Target-Account: <email_or_mobile>` — 指定使用某个托管账号；如果目标账号不存在，或管理账号队列已耗尽，相关业务请求会返回 `429`，当前不会附带 `Retry-After` 头。若账号存在但登录/刷新失败，则返回对应的 `401` 或上游错误。
 Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=` 作为凭据来源。

 ### Admin 接口（`/admin/*`）
@@ -91,13 +101,16 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=`
 | 方法 | 路径 | 鉴权 | 说明 |
 | --- | --- | --- | --- |
 | GET | `/healthz` | 无 | 存活探针 |
+| HEAD | `/healthz` | 无 | 存活探针（无响应体） |
 | GET | `/readyz` | 无 | 就绪探针 |
+| HEAD | `/readyz` | 无 | 就绪探针（无响应体） |
 | GET | `/v1/models` | 无 | OpenAI 模型列表 |
 | GET | `/v1/models/{id}` | 无 | OpenAI 单模型查询（支持 alias 入参） |
 | POST | `/v1/chat/completions` | 业务 | OpenAI 对话补全 |
 | POST | `/v1/responses` | 业务 | OpenAI Responses 接口（流式/非流式） |
 | GET | `/v1/responses/{response_id}` | 业务 | 查询已生成 response（内存 TTL） |
 | POST | `/v1/embeddings` | 业务 | OpenAI Embeddings 接口 |
+| POST | `/v1/files` | 业务 | OpenAI Files 上传（multipart/form-data） |
 | GET | `/anthropic/v1/models` | 无 | Claude 模型列表 |
 | POST | `/anthropic/v1/messages` | 业务 | Claude 消息接口 |
 | POST | `/anthropic/v1/messages/count_tokens` | 业务 | Claude token 计数 |
@@ -119,25 +132,43 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=`
 | POST | `/admin/settings/password` | Admin | 更新 Admin 密码并使旧 JWT 失效 |
 | POST | `/admin/config/import` | Admin | 导入配置（merge/replace） |
 | GET | `/admin/config/export` | Admin | 导出完整配置（含 `config`/`json`/`base64`） |
-| POST | `/admin/keys` | Admin | 添加 API key |
+| POST | `/admin/keys` | Admin | 添加 API key（可附 name/remark） |
+| PUT | `/admin/keys/{key}` | Admin | 更新 API key 备注信息 |
 | DELETE | `/admin/keys/{key}` | Admin | 删除 API key |
+| GET | `/admin/proxies` | Admin | 代理列表 |
+| POST | `/admin/proxies` | Admin | 添加代理 |
+| PUT | `/admin/proxies/{proxyID}` | Admin | 更新代理（留空 password 表示保留原密码） |
+| DELETE | `/admin/proxies/{proxyID}` | Admin | 删除代理（自动解绑引用该代理的账号） |
+| POST | `/admin/proxies/test` | Admin | 测试代理连通性 |
 | GET | `/admin/accounts` | Admin | 分页账号列表 |
 | POST | `/admin/accounts` | Admin | 添加账号 |
+| PUT | `/admin/accounts/{identifier}` | Admin | 更新账号 name/remark |
 | DELETE | `/admin/accounts/{identifier}` | Admin | 删除账号 |
+| PUT | `/admin/accounts/{identifier}/proxy` | Admin | 为账号绑定/解绑代理 |
 | GET | `/admin/queue/status` | Admin | 账号队列状态 |
 | POST | `/admin/accounts/test` | Admin | 测试单个账号 |
 | POST | `/admin/accounts/test-all` | Admin | 测试全部账号 |
 | POST | `/admin/accounts/sessions/delete-all` | Admin | 删除某账号的全部会话 |
 | POST | `/admin/import` | Admin | 批量导入 keys/accounts |
 | POST | `/admin/test` | Admin | 测试当前 API 可用性 |
+| POST | `/admin/dev/raw-samples/capture` | Admin | 直接发起一次请求并保存为 raw sample |
+| GET | `/admin/dev/raw-samples/query` | Admin | 按问题关键词查询当前内存抓包链 |
+| POST | `/admin/dev/raw-samples/save` | Admin | 把命中的内存抓包链保存为 raw sample |
 | POST | `/admin/vercel/sync` | Admin | 同步配置到 Vercel |
 | GET | `/admin/vercel/status` | Admin | Vercel 同步状态 |
 | POST | `/admin/vercel/status` | Admin | Vercel 同步状态 / 草稿对比 |
 | GET | `/admin/export` | Admin | 导出配置 JSON/Base64 |
 | GET | `/admin/dev/captures` | Admin | 查看本地抓包记录 |
 | DELETE | `/admin/dev/captures` | Admin | 清空本地抓包记录 |
+| GET | `/admin/chat-history` | Admin | 查看服务器端对话记录 |
+| DELETE | `/admin/chat-history` | Admin | 清空服务器端对话记录 |
+| GET | `/admin/chat-history/{id}` | Admin | 查看单条服务器端对话记录 |
+| DELETE | `/admin/chat-history/{id}` | Admin | 删除单条服务器端对话记录 |
+| PUT | `/admin/chat-history/settings` | Admin | 更新对话记录保留条数 |
 | GET | `/admin/version` | Admin | 查询当前版本与最新 Release |

+OpenAI `/v1/*` 仍是规范路径。对于只配置 DS2API 根地址的客户端，同一套 OpenAI handler 也通过根路径快捷路由暴露：`/models`、`/models/{id}`、`/chat/completions`、`/responses`、`/responses/{response_id}`、`/embeddings`、`/files`。
+
 ---

 ## 健康检查
@@ -160,7 +191,7 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=`

 ### `GET /v1/models`

-无需鉴权。返回当前支持的模型列表。
+无需鉴权。返回当前支持的 DeepSeek 原生模型列表。

 **响应示例**：

@@ -168,22 +199,44 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=`
 {
  "object": "list",
  "data": [
-    {"id": "deepseek-chat", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-reasoner", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-chat-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
-    {"id": "deepseek-reasoner-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}
+    {"id": "deepseek-v4-flash", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-flash-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-pro", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-pro-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-flash-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-flash-search-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-pro-search", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-pro-search-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-vision", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []},
+    {"id": "deepseek-v4-vision-nothinking", "object": "model", "created": 1677610602, "owned_by": "deepseek", "permission": []}
  ]
 }
 ```

+> 说明：`/v1/models` 返回的是规范化后的 DeepSeek 原生模型 ID；常见 alias 仅用于请求入参解析，不会在该接口中单独展开返回。带 `-nothinking` 后缀的模型表示无论请求里是否显式开启 thinking / reasoning，都会强制关闭思考输出。
+
 ### 模型 alias 解析策略

 对 `chat` / `responses` / `embeddings` 的 `model` 字段采用“宽进严出”：

 1. 先匹配 DeepSeek 原生模型。
 2. 再匹配 `model_aliases` 精确映射。
-3. 未命中时按模型家族规则回退（如 `o*`、`gpt-*`、`claude-*`）。
-4. 仍未命中则返回 `invalid_request_error`。
+3. 如果请求名以 `-nothinking` 结尾，则在最终解析出的规范模型上追加对应的无思考变体。
+4. 未命中时按模型家族规则回退（如 `o*`、`gpt-*`、`claude-*`）。
+5. 仍未命中则返回 `invalid_request_error`。
+
+当前内置默认 alias 来自 `internal/config/models.go`，`config.model_aliases` 会在运行时覆盖或补充同名映射。节选：
+
+- OpenAI / Codex：`gpt-4o`、`gpt-4.1`、`gpt-5`、`gpt-5.5`、`gpt-5-codex`、`gpt-5.3-codex`、`codex-mini-latest`
+- OpenAI reasoning：`o1`、`o3`、`o3-deep-research`、`o4-mini`
+- Claude：`claude-opus-4-6`、`claude-sonnet-4-6`、`claude-haiku-4-5`、`claude-3-5-sonnet-latest`
+- Gemini：`gemini-2.5-pro`、`gemini-2.5-flash`、`gemini-pro-vision`
+- 其他兼容族：`llama-*`、`qwen-*`、`mistral-*`、`command-*` 会按家族启发式回退
+
+上述 alias 若在请求名后追加 `-nothinking` 后缀，也会映射到对应的强制关闭 thinking 版本。
+当前视觉能力仅对应 `deepseek-v4-vision` / `deepseek-v4-vision-nothinking`，不会解析出独立的 `vision-search` 变体。
+
+退役历史模型（如 `claude-1.*`、`claude-2.*`、`claude-instant-*`、`gpt-3.5*`）会被显式拒绝。

 ### `POST /v1/chat/completions`

@@ -198,7 +251,7 @@ Content-Type: application/json

 | 字段 | 类型 | 必填 | 说明 |
 | --- | --- | --- | --- |
-| `model` | string | ✅ | 支持 DeepSeek 原生模型 + 常见 alias（如 `gpt-4o`、`gpt-5-codex`、`o3`、`claude-sonnet-4-5`） |
+| `model` | string | ✅ | 支持 DeepSeek 原生模型 + 常见 alias（如 `gpt-5.5`、`gpt-5.4-mini`、`gpt-5.3-codex`、`o3`、`claude-opus-4-6`、`claude-sonnet-4-6`、`gemini-2.5-pro`、`gemini-2.5-flash` 等）；若模型名带 `-nothinking` 后缀，则强制关闭 thinking / reasoning |
 | `messages` | array | ✅ | OpenAI 风格消息数组 |
 | `stream` | boolean | ❌ | 默认 `false` |
 | `tools` | array | ❌ | Function Calling 定义 |
@@ -211,14 +264,14 @@ Content-Type: application/json
  "id": "<chat_session_id>",
  "object": "chat.completion",
  "created": 1738400000,
-  "model": "deepseek-reasoner",
+  "model": "deepseek-v4-pro",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "最终回复",
-        "reasoning_content": "思考内容（reasoner 模型）"
+        "reasoning_content": "思考内容（开启 thinking 时）"
      },
      "finish_reason": "stop"
    }
@@ -253,9 +306,10 @@ data: [DONE]
 **字段说明**：

 - 首个 delta 包含 `role: assistant`
- `deepseek-reasoner` / `deepseek-reasoner-search` 模型输出 `delta.reasoning_content`
+- 开启 thinking 时会输出 `delta.reasoning_content`
 - 普通文本输出 `delta.content`
 - 最后一段包含 `finish_reason` 和 `usage`
+- token 计数优先透传上游 DeepSeek SSE（如 `accumulated_token_usage` / `token_usage`）；仅在上游缺失时回退本地估算。失败/中断型结束（例如 `response.failed`）可能不会携带 `usage`

 #### Tool Calls

@@ -288,12 +342,13 @@ data: [DONE]
 }
 ```

-**流式**：命中高置信特征后立即输出 `delta.tool_calls`（不等待完整 JSON 闭合），并持续发送 arguments 增量；已确认的 toolcall 原始 JSON 不会回流到 `delta.content`。
+**流式**：命中高置信特征后立即输出 `delta.tool_calls`（不等待完整工具参数闭合），并持续发送 arguments 增量；已确认的工具调用片段不会回流到 `delta.content`。

 补充说明：

 - **非代码块上下文**下，工具负载即使与普通文本混合，也会按特征识别并产出可执行 tool call（前后普通文本仍可透传）。
- 解析器以 XML/Markup 为最高优先级，并兼容 JSON、ANTML、text-kv 等格式输入；最终按客户端协议转译为对应 tool call 结构（OpenAI/Claude/Gemini）。
+- 解析器当前把 DSML 外壳（`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`）、DSML wrapper 别名（`<dsml|tool_calls>`、`<|tool_calls>`、`<｜tool_calls>`）、常见 DSML 分隔符漏写形态（如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`）、`DSML` 与工具标签名黏连的常见 typo（如 `<DSMLtool_calls>` / `<DSMLinvoke>` / `<DSMLparameter>`）和旧式 canonical XML 工具块（`<tool_calls>` / `<invoke name="...">` / `<parameter name="...">`）作为可执行调用解析；DSML 会先归一化回 XML，内部仍以 XML 解析语义为准。旧式 `<tools>`、`<tool_call>`、`<tool_name>`、`<param>`、`<function_call>`、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理。
+- 当最终可见正文为空但思维链里包含可执行工具调用时，Chat / Responses 会在收尾阶段补发标准 OpenAI `tool_calls` / `function_call` 输出；如果客户端未开启 thinking / reasoning，该思维链只用于检测，不会作为可见正文或 `reasoning_content` 暴露。
 - Markdown fenced code block（例如 ```json ... ```）中的 `tool_calls` 仅视为示例文本，不会被执行。

 ---
@@ -371,13 +426,29 @@ data: [DONE]
 | `model` | string | ✅ | 支持原生模型 + alias 自动映射 |
 | `input` | string/array | ✅ | 支持字符串、字符串数组、token 数组 |

-> 需配置 `embeddings.provider`。当前支持：`mock` / `deterministic` / `builtin`。未配置或不支持时返回标准错误结构（HTTP 501）。
+> 需配置 `embeddings.provider`。当前支持：`mock` / `deterministic` / `builtin`（三者都走同一套本地确定性实现）。未配置或不支持时返回标准错误结构（HTTP 501）。
+
+### `POST /v1/files`
+
+需要业务鉴权。兼容 OpenAI Files 上传接口，当前仅支持 `multipart/form-data`。
+
+| 字段 | 类型 | 必填 | 说明 |
+| --- | --- | --- | --- |
+| `file` | file | ✅ | 上传文件二进制 |
+| `purpose` | string | ❌ | 透传到上游用途字段 |
+
+约束与行为：
+
+- 请求必须为 `multipart/form-data`，否则返回 `400`。
+- 请求体总大小上限 **100 MiB**（超限返回 `413`）。
+- 成功返回 OpenAI `file` 对象（`id/object/bytes/filename/purpose/status` 等字段），并附带 `account_id` 便于定位来源账号。

 ---

 ## Claude 兼容接口

 除标准路径 `/anthropic/v1/*` 外，还支持快捷路径 `/v1/messages`、`/messages`、`/v1/messages/count_tokens`、`/messages/count_tokens`。
+实现上统一走 OpenAI Chat Completions 解析与回译链路，避免多套解析逻辑分叉维护。

 ### `GET /anthropic/v1/models`

@@ -389,17 +460,20 @@ data: [DONE]
 {
  "object": "list",
  "data": [
-    {"id": "claude-sonnet-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
+    {"id": "claude-sonnet-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
+    {"id": "claude-sonnet-4-6-nothinking", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
    {"id": "claude-haiku-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
-    {"id": "claude-opus-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"}
+    {"id": "claude-haiku-4-5-nothinking", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
+    {"id": "claude-opus-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"},
+    {"id": "claude-opus-4-6-nothinking", "object": "model", "created": 1715635200, "owned_by": "anthropic"}
  ],
  "first_id": "claude-opus-4-6",
-  "last_id": "claude-instant-1.0",
+  "last_id": "claude-3-haiku-20240307-nothinking",
  "has_more": false
 }
 ```

-> 说明：示例仅展示部分模型；实际返回包含 Claude 1.x/2.x/3.x/4.x 历史模型 ID 与常见别名。
+> 说明：示例仅展示部分模型；实际返回除当前主别名外，还包含 Claude 4.x snapshots、3.x 历史模型 ID 与常见别名，并为这些可映射模型额外提供 `-nothinking` 变体。

 ### `POST /anthropic/v1/messages`

@@ -417,12 +491,19 @@ anthropic-version: 2023-06-01

 | 字段 | 类型 | 必填 | 说明 |
 | --- | --- | --- | --- |
-| `model` | string | ✅ | 例如 `claude-sonnet-4-5` / `claude-opus-4-6` / `claude-haiku-4-5`（兼容 `claude-3-5-haiku-latest`），并支持历史 Claude 模型 ID |
+| `model` | string | ✅ | 例如 `claude-sonnet-4-6` / `claude-opus-4-6` / `claude-haiku-4-5`（兼容 `claude-sonnet-4-5`、`claude-3-5-haiku-latest`），并支持历史 Claude 模型 ID；若模型名带 `-nothinking` 后缀，则强制关闭 thinking / reasoning |
 | `messages` | array | ✅ | Claude 风格消息数组 |
 | `max_tokens` | number | ❌ | 缺省自动补 `8192`；当前实现不会硬性截断上游输出 |
 | `stream` | boolean | ❌ | 默认 `false` |
 | `system` | string | ❌ | 可选系统提示 |
 | `tools` | array | ❌ | Claude tool 定义 |
+| `thinking` | object | ❌ | Anthropic thinking 配置；会转译为下游 reasoning 控制，`-nothinking` 模型会忽略 |
+| `temperature` | number | ❌ | 透传到下游；若同时提供 `top_p`，以 `temperature` 为准 |
+| `top_p` | number | ❌ | 当未提供 `temperature` 时透传到下游 |
+| `stop_sequences` | array | ❌ | 透传到下游停用序列 |
+| `tool_choice` | string/object | ❌ | 支持 `auto` / `none` / `required` / `{"type":"function","name":"..."}`，并会转译为下游工具选择 |
+
+> 说明：上述 `thinking`、`temperature`、`top_p`、`stop_sequences`、`tool_choice` 都会走兼容层转译；最终是否生效仍取决于当前模型和上游能力。`temperature` 与 `top_p` 同时存在时，`temperature` 优先。

 #### 非流式响应

@@ -431,7 +512,7 @@ anthropic-version: 2023-06-01
  "id": "msg_1738400000000000000",
  "type": "message",
  "role": "assistant",
-  "model": "claude-sonnet-4-5",
+  "model": "claude-sonnet-4-6",
  "content": [
    {"type": "text", "text": "回复内容"}
  ],
@@ -475,7 +556,8 @@ data: {"type":"message_stop"}

 **说明**：

- 名称中包含 `opus` / `reasoner` / `slow` 的模型会输出 `thinking_delta`
+- 默认模型会按各 surface 的既有规则输出 thinking / reasoning 相关增量
+- 带 `-nothinking` 后缀的模型会强制关闭 thinking，即使请求显式传了 `thinking` / `reasoning` / `reasoning_effort` 也不会输出 `thinking_delta`
 - 不会输出 `signature_delta`（上游 DeepSeek 未提供可验证签名）
 - `tools` 场景优先避免泄露原始工具 JSON，不强制发送 `input_json_delta`

@@ -485,7 +567,7 @@ data: {"type":"message_stop"}

 ```json
 {
-  "model": "claude-sonnet-4-5",
+  "model": "claude-sonnet-4-6",
  "messages": [
    {"role": "user", "content": "你好"}
  ]
@@ -500,8 +582,6 @@ data: {"type":"message_stop"}
 }
 ```

-返回项还会包含 `test_status`，当前值通常为 `ok` 或 `failed`。
-
 ---

 ## Gemini 兼容接口
@@ -514,10 +594,11 @@ data: {"type":"message_stop"}
 - `/v1/models/{model}:streamGenerateContent`（兼容路径）

 鉴权方式同业务接口（`Authorization: Bearer <token>` 或 `x-api-key`）。
+实现上统一走 OpenAI Chat Completions 解析与回译链路，避免多套解析逻辑分叉维护。

 ### `POST /v1beta/models/{model}:generateContent`

-请求体兼容 Gemini `contents` / `tools` 字段，模型名可用 alias 自动映射到 DeepSeek 模型。
+请求体兼容 Gemini `contents` / `tools` 字段，模型名可用 alias 自动映射到 DeepSeek 模型；若路径中的模型名带 `-nothinking` 后缀，则最终会映射到对应的无思考模型。

 响应为 Gemini 兼容结构，核心字段包括：

@@ -532,6 +613,7 @@ data: {"type":"message_stop"}
 - 常规文本：持续返回增量文本 chunk
 - `tools` 场景：会缓冲并在结束时输出 `functionCall` 结构
 - 结束 chunk：包含 `finishReason: "STOP"` 与 `usageMetadata`
+- token 计数优先透传上游 DeepSeek SSE（如 `accumulated_token_usage` / `token_usage`）；仅在上游缺失时回退本地估算

 ---

@@ -590,12 +672,19 @@ data: {"type":"message_stop"}

 ### `GET /admin/config`

-返回脱敏后的配置。
+返回脱敏后的配置，包含 `keys` 与 `api_keys`。

 ```json
 {
  "keys": ["k1", "k2"],
+  "api_keys": [
+    {"key": "k1", "name": "主 Key", "remark": "生产流量"},
+    {"key": "k2", "name": "备用 Key", "remark": "压测"}
+  ],
  "env_backed": false,
+  "env_source_present": true,
+  "env_writeback_enabled": true,
+  "config_path": "/data/config.json",
  "accounts": [
    {
      "identifier": "user@example.com",
@@ -606,28 +695,33 @@ data: {"type":"message_stop"}
      "token_preview": "abcde..."
    }
  ],
-  "claude_mapping": {
-    "fast": "deepseek-chat",
-    "slow": "deepseek-reasoner"
+  "model_aliases": {
+    "claude-sonnet-4-6": "deepseek-v4-flash",
+    "claude-opus-4-6": "deepseek-v4-pro"
  }
 }
 ```

 ### `POST /admin/config`

-只更新 `keys`、`accounts`、`claude_mapping`。
+只更新 `keys`、`api_keys`、`accounts`、`model_aliases`。
+如果同时发送 `api_keys` 与 `keys`，优先保留 `api_keys` 中的结构化 `name` / `remark`；`keys` 仅作为旧格式兼容回退。

 **请求**：

 ```json
 {
  "keys": ["k1", "k2"],
+  "api_keys": [
+    {"key": "k1", "name": "主 Key", "remark": "生产流量"},
+    {"key": "k2", "name": "备用 Key", "remark": "压测"}
+  ],
  "accounts": [
    {"email": "user@example.com", "password": "pwd", "token": ""}
  ],
-  "claude_mapping": {
-    "fast": "deepseek-chat",
-    "slow": "deepseek-reasoner"
+  "model_aliases": {
+    "claude-sonnet-4-6": "deepseek-v4-flash",
+    "claude-opus-4-6": "deepseek-v4-pro"
  }
 }
 ```
@@ -639,9 +733,11 @@ data: {"type":"message_stop"}
 - `success`
 - `admin`（`has_password_hash`、`jwt_expire_hours`、`jwt_valid_after_unix`、`default_password_warning`）
 - `runtime`（`account_max_inflight`、`account_max_queue`、`global_max_inflight`、`token_refresh_interval_hours`）
+- `compat`（`wide_input_strict_output`、`strip_reference_markers`）
 - `responses` / `embeddings`
- `auto_delete`（`sessions`）
- `claude_mapping` / `model_aliases`
+- `auto_delete`（`mode`：`none` / `single` / `all`；旧配置 `sessions=true` 仍按 `all` 处理）
+- `current_input_file`（`enabled` 默认返回 `true`、`min_chars`）
+- `model_aliases`
 - `env_backed`、`needs_vercel_sync`
 - `toolcall` 策略已固定为 `feature_match + high`，不再通过 settings 返回或修改

@@ -651,11 +747,13 @@ data: {"type":"message_stop"}

 - `admin.jwt_expire_hours`
 - `runtime.account_max_inflight` / `runtime.account_max_queue` / `runtime.global_max_inflight` / `runtime.token_refresh_interval_hours`
+- `compat.wide_input_strict_output` / `compat.strip_reference_markers`
 - `responses.store_ttl_seconds`
 - `embeddings.provider`
- `auto_delete.sessions`
- `claude_mapping`
+- `auto_delete.mode`
+- `current_input_file.enabled` / `current_input_file.min_chars`
 - `model_aliases`
+- `history_split` 仅作为旧配置兼容字段保留，不再影响请求处理
 - `toolcall` 策略已固定，不再作为可写入字段

 ### `POST /admin/settings/password`
@@ -679,16 +777,33 @@ data: {"type":"message_stop"}

 请求可直接传配置对象，或使用 `{"config": {...}, "mode":"merge"}` 包裹格式。
 也支持在查询参数里传 `?mode=merge` / `?mode=replace`。
-导入时会接受 `keys`、`accounts`、`claude_mapping` / `claude_model_mapping`、`model_aliases`、`admin`、`runtime`、`responses`、`embeddings`、`auto_delete` 等字段；`toolcall` 相关字段会被忽略。
+`replace` 模式会按完整配置结构替换（保留 Vercel 同步元信息）；`merge` 模式会合并 `keys`、`api_keys`、`accounts`、`model_aliases`，并覆盖 `admin`、`runtime`、`responses`、`embeddings` 中的非空字段。`compat`、`auto_delete`、`current_input_file` 建议通过 `/admin/settings` 或配置文件管理；`history_split` 仅保留为旧配置兼容字段；`toolcall` 相关字段会被忽略。
+
+> 注意：`merge` 模式不会更新 `compat`、`auto_delete`、`current_input_file`。

 ### `GET /admin/config/export`

 导出完整配置，返回 `config`、`json`、`base64` 三种格式。

+响应示例：
+
+
+> 注：`_vercel_sync_hash` 和 `_vercel_sync_time` 为内部同步元数据字段，用于 Vercel 配置漂移检测。
+
 ### `POST /admin/keys`

 ```json
-{"key": "new-api-key"}
+{"key": "new-api-key", "name": "主 Key", "remark": "生产流量"}
+```
+
+**响应**：`{"success": true, "total_keys": 3}`
+
+### `PUT /admin/keys/{key}`
+
+更新指定 API key 的 `name` / `remark`，路径参数中的 `key` 为只读标识，不可修改。
+
+```json
+{"name": "备用 Key", "remark": "压测"}
 ```

 **响应**：`{"success": true, "total_keys": 3}`
@@ -697,6 +812,26 @@ data: {"type":"message_stop"}

 **响应**：`{"success": true, "total_keys": 2}`

+### `GET /admin/proxies`
+
+列出代理配置（密码不回传，仅返回 `has_password` 标记）。
+
+### `POST /admin/proxies`
+
+新增代理。请求体支持 `id`（可选，未传则自动生成）、`name`、`type`（`http` / `socks5`）、`host`、`port`、`username`、`password`。
+
+### `PUT /admin/proxies/{proxyID}`
+
+更新指定代理。若请求中 `password` 为空字符串，则保留原密码。
+
+### `DELETE /admin/proxies/{proxyID}`
+
+删除代理，并自动清空所有引用该代理账号的 `proxy_id`。
+
+### `POST /admin/proxies/test`
+
+测试代理连通性：传 `proxy_id` 时测试已保存代理；不传时按请求体代理字段做临时连通性测试。
+
 ### `GET /admin/accounts`

 **查询参数**：
@@ -704,7 +839,7 @@ data: {"type":"message_stop"}
 | 参数 | 默认 | 范围 |
 | --- | --- | --- |
 | `page` | `1` | ≥ 1 |
-| `page_size` | `10` | 1–100 |
+| `page_size` | `10` | 1–5000 |
 | `q` | 空 | 按 identifier / email / mobile 过滤 |

 **响应**：
@@ -737,12 +872,30 @@ data: {"type":"message_stop"}

 **响应**：`{"success": true, "total_accounts": 6}`

+### `PUT /admin/accounts/{identifier}`
+
+更新指定账号的 `name` / `remark`。路径参数中的 `identifier` 可以是 email 或 mobile，且不可修改。
+
+```json
+{"name": "主账号", "remark": "团队共享"}
+```
+
+**响应**：`{"success": true, "total_accounts": 6}`
+
 ### `DELETE /admin/accounts/{identifier}`

 `identifier` 可为 email、mobile，或 token-only 账号的合成标识（`token:<hash>`）。

 **响应**：`{"success": true, "total_accounts": 5}`

+### `PUT /admin/accounts/{identifier}/proxy`
+
+更新指定账号绑定代理。
+
+- 请求体：`{"proxy_id":"..."}`；
+- `proxy_id` 传空字符串时表示解绑代理；
+- `identifier` 支持 email / mobile / token-only 合成标识。
+
 ### `GET /admin/queue/status`

 ```json
@@ -753,24 +906,32 @@ data: {"type":"message_stop"}
  "available_accounts": ["a@example.com"],
  "in_use_accounts": ["b@example.com"],
  "max_inflight_per_account": 2,
-  "recommended_concurrency": 8
+  "global_max_inflight": 8,
+  "recommended_concurrency": 8,
+  "waiting": 0,
+  "max_queue_size": 8
 }
 ```

 | 字段 | 说明 |
 | --- | --- |
-| `available` | 当前可用账号数 |
-| `in_use` | 当前使用中的账号数 |
+| `available` | 仍有剩余并发槽位的账号数 |
+| `in_use` | 当前已占用的 in-flight 槽位数 |
 | `total` | 总账号数 |
+| `available_accounts` | 仍有剩余并发槽位的账号 ID 列表 |
+| `in_use_accounts` | 当前处于使用中的账号 ID 列表 |
 | `max_inflight_per_account` | 每账号并发上限 |
+| `global_max_inflight` | 全局并发上限 |
 | `recommended_concurrency` | 建议并发值（`total × max_inflight_per_account`） |
+| `waiting` | 当前等待中的请求数 |
+| `max_queue_size` | 等待队列上限 |

 ### `POST /admin/accounts/test`

 | 字段 | 必填 | 说明 |
 | --- | --- | --- |
 | `identifier` | ✅ | email / mobile / token-only 合成标识 |
-| `model` | ❌ | 默认 `deepseek-chat` |
+| `model` | ❌ | 默认 `deepseek-v4-flash` |
 | `message` | ❌ | 空字符串时仅测试会话创建 |

 **响应**：
@@ -781,14 +942,17 @@ data: {"type":"message_stop"}
  "success": true,
  "response_time": 1240,
  "message": "API 测试成功（仅会话创建）",
-  "model": "deepseek-chat",
+  "model": "deepseek-v4-flash",
  "session_count": 0,
-  "config_writable": true
+  "config_writable": true,
+  "config_warning": ""
 }
 ```

 如果传入 `message`，还会附带 `thinking`（当上游返回思考内容时）。

+当部署环境配置文件路径不可写（例如容器内默认 `/app/config.json` 只读）时，登录与会话测试仍可继续；此时会返回 `config_warning` 提示 token 仅保存在内存、重启后丢失。
+
 ### `POST /admin/accounts/test-all`

 可选请求字段：`model`
@@ -851,7 +1015,7 @@ data: {"type":"message_stop"}

 | 字段 | 必填 | 默认值 |
 | --- | --- | --- |
-| `model` | ❌ | `deepseek-chat` |
+| `model` | ❌ | `deepseek-v4-flash` |
 | `message` | ❌ | `你好` |
 | `api_key` | ❌ | 配置中第一个 key |

@@ -865,6 +1029,74 @@ data: {"type":"message_stop"}
 }
 ```

+### `POST /admin/dev/raw-samples/capture`
+
+直接通过服务自身发起一次 `/v1/chat/completions` 请求，并把请求元信息和上游原始 SSE 保存到 `tests/raw_stream_samples/<sample-id>/`。
+
+常用请求字段：
+
+| 字段 | 必填 | 默认值 | 说明 |
+| --- | --- | --- | --- |
+| `message` | 否 | `你好` | 便捷单轮用户消息 |
+| `messages` | 否 | 自动由 `message` 生成 | OpenAI 风格消息数组 |
+| `model` | 否 | `deepseek-v4-flash` | 目标模型 |
+| `stream` | 否 | `true` | 建议保留流式，以记录原始 SSE |
+| `api_key` | 否 | 配置中第一个 key | 调用业务接口使用的 key |
+| `sample_id` | 否 | 自动生成 | 样本目录名 |
+
+成功时会在响应头里附带：
+
+- `X-Ds2-Sample-Id`
+- `X-Ds2-Sample-Dir`
+- `X-Ds2-Sample-Meta`
+- `X-Ds2-Sample-Upstream`
+
+如果请求本身成功，但当前进程没有记录到新的上游抓包，会返回：
+
+```json
+{"detail":"no upstream capture was recorded"}
+```
+
+### `GET /admin/dev/raw-samples/query`
+
+按关键词查询当前进程内存里的抓包记录，并按 `chat_session_id` 归并 `completion + continue` 链。
+
+**查询参数**：
+
+| 参数 | 默认值 | 说明 |
+| --- | --- | --- |
+| `q` | 空 | 按请求体/响应体关键词模糊匹配 |
+| `limit` | `20` | 返回链条数上限 |
+
+**响应字段**包含：
+
+- `items[].chain_key`
+- `items[].capture_ids`
+- `items[].round_count`
+- `items[].initial_label`
+- `items[].request_preview`
+- `items[].response_preview`
+
+### `POST /admin/dev/raw-samples/save`
+
+把当前内存中的某条抓包链落盘为 `tests/raw_stream_samples/<sample-id>/`。
+
+支持以下任一种选中方式：
+
+```json
+{"chain_key":"session:xxxx","sample_id":"tmp-from-memory"}
+```
+
+```json
+{"capture_id":"cap_xxx","sample_id":"tmp-from-memory"}
+```
+
+```json
+{"query":"广州天气","sample_id":"tmp-from-memory"}
+```
+
+成功响应会返回 `sample_id`、`dir`、`meta_path`、`upstream_path`。
+
 ### `POST /admin/vercel/sync`

 | 字段 | 必填 | 说明 |
@@ -934,15 +1166,15 @@ data: {"type":"message_stop"}
 ```json
 {
  "success": true,
-  "current_version": "2.3.5",
-  "current_tag": "v2.3.5",
+  "current_version": "3.0.0",
+  "current_tag": "v3.0.0",
  "source": "file:VERSION",
  "checked_at": "2026-03-29T00:00:00Z",
-  "latest_tag": "v2.3.6",
-  "latest_version": "2.3.6",
-  "release_url": "https://github.com/CJackHwang/ds2api/releases/tag/v2.3.6",
+  "latest_tag": "v3.0.0",
+  "latest_version": "3.0.0",
+  "release_url": "https://github.com/CJackHwang/ds2api/releases/tag/v3.0.0",
  "published_at": "2026-03-28T12:00:00Z",
-  "has_update": true
+  "has_update": false
 }
 ```

@@ -1003,7 +1235,7 @@ Gemini 路由使用 Google 风格错误结构：
 | 状态码 | 说明 |
 | --- | --- |
 | `401` | 鉴权失败（key/token 无效，或 Admin JWT 过期） |
-| `429` | 请求过多（超出并发上限 + 等待队列） |
+| `429` | 请求过多（超出并发上限 + 等待队列；当前不附带 `Retry-After` 头） |
 | `503` | 模型不可用或上游服务异常 |

 ---
@@ -1017,7 +1249,7 @@ curl http://localhost:5001/v1/chat/completions \
  -H "Authorization: Bearer your-api-key" \
  -H "Content-Type: application/json" \
  -d '{
-    "model": "deepseek-chat",
+    "model": "deepseek-v4-flash",
    "messages": [{"role": "user", "content": "你好"}],
    "stream": false
  }'
@@ -1030,7 +1262,7 @@ curl http://localhost:5001/v1/chat/completions \
  -H "Authorization: Bearer your-api-key" \
  -H "Content-Type: application/json" \
  -d '{
-    "model": "deepseek-reasoner",
+    "model": "deepseek-v4-pro",
    "messages": [{"role": "user", "content": "解释一下量子纠缠"}],
    "stream": true
  }'
@@ -1043,7 +1275,7 @@ curl http://localhost:5001/v1/responses \
  -H "Authorization: Bearer your-api-key" \
  -H "Content-Type: application/json" \
  -d '{
-    "model": "gpt-5-codex",
+    "model": "gpt-5.3-codex",
    "input": "写一个 golang 的 hello world",
    "stream": true
  }'
@@ -1068,7 +1300,7 @@ curl http://localhost:5001/v1/chat/completions \
  -H "Authorization: Bearer your-api-key" \
  -H "Content-Type: application/json" \
  -d '{
-    "model": "deepseek-chat-search",
+    "model": "deepseek-v4-flash-search",
    "messages": [{"role": "user", "content": "今天的新闻"}],
    "stream": true
  }'
@@ -1081,7 +1313,7 @@ curl http://localhost:5001/v1/chat/completions \
  -H "Authorization: Bearer your-api-key" \
  -H "Content-Type: application/json" \
  -d '{
-    "model": "deepseek-chat",
+    "model": "deepseek-v4-flash",
    "messages": [{"role": "user", "content": "北京今天天气怎么样？"}],
    "tools": [
      {
@@ -1142,7 +1374,7 @@ curl http://localhost:5001/anthropic/v1/messages \
  -H "Content-Type: application/json" \
  -H "anthropic-version: 2023-06-01" \
  -d '{
-    "model": "claude-sonnet-4-5",
+    "model": "claude-sonnet-4-6",
    "max_tokens": 1024,
    "messages": [{"role": "user", "content": "你好"}]
  }'
@@ -1179,7 +1411,7 @@ curl http://localhost:5001/v1/chat/completions \
  -H "X-Ds2-Target-Account: user@example.com" \
  -H "Content-Type: application/json" \
  -d '{
-    "model": "deepseek-chat",
+    "model": "deepseek-v4-flash",
    "messages": [{"role": "user", "content": "你好"}]
  }'
 ```
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,128 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, religion, or sexual identity
+and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment for our
+community include:
+
+* Demonstrating empathy and kindness toward other people
+* Being respectful of differing opinions, viewpoints, and experiences
+* Giving and gracefully accepting constructive feedback
+* Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+* Focusing on what is best not just for us as individuals, but for the
+  overall community
+
+Examples of unacceptable behavior include:
+
+* The use of sexualized language or imagery, and sexual attention or
+  advances of any kind
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or email
+  address, without their explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+cjackhwang@qq.com.
+All complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series
+of actions.
+
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or
+permanent ban.
+
+### 3. Temporary Ban
+
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior,  harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within
+the community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.0, available at
+https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
+
+Community Impact Guidelines were inspired by [Mozilla's code of conduct
+enforcement ladder](https://github.com/mozilla/diversity).
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see the FAQ at
+https://www.contributor-covenant.org/faq. Translations are available at
+https://www.contributor-covenant.org/translations.
--- a/24
+++ b/24
@@ -1,12 +1,13 @@
-FROM node:20 AS webui-builder
+FROM node:24 AS webui-builder

 WORKDIR /app/webui
 COPY webui/package.json webui/package-lock.json ./
 RUN npm ci
+COPY config.example.json /app/config.example.json
 COPY webui ./
 RUN npm run build

-FROM golang:1.24 AS go-builder
+FROM golang:1.26 AS go-builder
 WORKDIR /app
 ARG TARGETOS
 ARG TARGETARCH
@@ -19,7 +20,7 @@ RUN set -eux; \
    GOARCH="${TARGETARCH:-$(go env GOARCH)}"; \
    BUILD_VERSION_RESOLVED="${BUILD_VERSION:-}"; \
    if [ -z "${BUILD_VERSION_RESOLVED}" ] && [ -f VERSION ]; then BUILD_VERSION_RESOLVED="$(cat VERSION | tr -d "[:space:]")"; fi; \
-    CGO_ENABLED=0 GOOS="${GOOS}" GOARCH="${GOARCH}" go build -ldflags="-s -w -X ds2api/internal/version.BuildVersion=${BUILD_VERSION_RESOLVED}" -o /out/ds2api ./cmd/ds2api
+    CGO_ENABLED=0 GOOS="${GOOS}" GOARCH="${GOARCH}" go build -buildvcs=false -ldflags="-s -w -X ds2api/internal/version.BuildVersion=${BUILD_VERSION_RESOLVED}" -o /out/ds2api ./cmd/ds2api

 FROM busybox:1.36.1-musl AS busybox-tools

@@ -27,6 +28,8 @@ FROM debian:bookworm-slim AS runtime-base
 WORKDIR /app
 RUN apt-get update \
    && apt-get install -y --no-install-recommends ca-certificates \
+    && groupadd -r ds2api && useradd -r -g ds2api -d /app -s /sbin/nologin ds2api \
+    && mkdir -p /app/data /data && chown -R ds2api:ds2api /app /data \
    && rm -rf /var/lib/apt/lists/*
 COPY --from=busybox-tools /bin/busybox /usr/local/bin/busybox
 EXPOSE 5001
@@ -34,9 +37,10 @@ CMD ["/usr/local/bin/ds2api"]

 FROM runtime-base AS runtime-from-source
 COPY --from=go-builder /out/ds2api /usr/local/bin/ds2api
-COPY --from=go-builder /app/internal/deepseek/assets/sha3_wasm_bg.7b9ca65ddd.wasm /app/sha3_wasm_bg.7b9ca65ddd.wasm
-COPY --from=go-builder /app/config.example.json /app/config.example.json
-COPY --from=webui-builder /app/static/admin /app/static/admin
+
+COPY --from=go-builder --chown=ds2api:ds2api /app/config.example.json /app/config.example.json
+COPY --from=webui-builder --chown=ds2api:ds2api /app/static/admin /app/static/admin
+USER ds2api

 FROM busybox-tools AS dist-extract
 ARG TARGETARCH
@@ -53,14 +57,14 @@ RUN set -eux; \
    test -n "${PKG_DIR}"; \
    mkdir -p /out/static; \
    cp "${PKG_DIR}/ds2api" /out/ds2api; \
-    cp "${PKG_DIR}/sha3_wasm_bg.7b9ca65ddd.wasm" /out/sha3_wasm_bg.7b9ca65ddd.wasm; \
    cp "${PKG_DIR}/config.example.json" /out/config.example.json; \
    cp -R "${PKG_DIR}/static/admin" /out/static/admin

 FROM runtime-base AS runtime-from-dist
 COPY --from=dist-extract /out/ds2api /usr/local/bin/ds2api
-COPY --from=dist-extract /out/sha3_wasm_bg.7b9ca65ddd.wasm /app/sha3_wasm_bg.7b9ca65ddd.wasm
-COPY --from=dist-extract /out/config.example.json /app/config.example.json
-COPY --from=dist-extract /out/static/admin /app/static/admin
+
+COPY --from=dist-extract --chown=ds2api:ds2api /out/config.example.json /app/config.example.json
+COPY --from=dist-extract --chown=ds2api:ds2api /out/static/admin /app/static/admin
+USER ds2api

 FROM runtime-from-source AS final
--- a/143
+++ b/143
@@ -1,5 +1,5 @@
-                    GNU GENERAL PUBLIC LICENSE
-                       Version 3, 29 June 2007
+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007

 Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
@@ -7,17 +7,15 @@

                            Preamble

-  The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
+  The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.

  The licenses for most software and other practical works are designed
 to take away your freedom to share and change the works.  By contrast,
-the GNU General Public License is intended to guarantee your freedom to
+our General Public Licenses are intended to guarantee your freedom to
 share and change all versions of a program--to make sure it remains free
-software for all its users.  We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors.  You can apply it to
-your programs, too.
+software for all its users.

  When we speak of free software, we are referring to freedom, not
 price.  Our General Public Licenses are designed to make sure that you
@@ -26,44 +24,34 @@ them if you wish), that you receive source code or can get it if you
 want it, that you can change the software or use pieces of it in new
 free programs, and that you know you can do these things.

-  To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights.  Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
+  Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.

-  For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received.  You must make sure that they, too, receive
-or can get the source code.  And you must show them these terms so they
-know their rights.
+  A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.

-  Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
+  The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.

-  For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software.  For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
-  Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so.  This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software.  The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable.  Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products.  If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
-  Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary.  To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
+  An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.

  The precise terms and conditions for copying, distribution and
 modification follow.
@@ -72,7 +60,7 @@ modification follow.

  0. Definitions.

-  "This License" refers to version 3 of the GNU General Public License.
+  "This License" refers to version 3 of the GNU Affero General Public License.

  "Copyright" also means copyright-like laws that apply to other kinds of
 works, such as semiconductor masks.
@@ -549,35 +537,45 @@ to collect a royalty for further conveying from those to whom you convey
 the Program, the only way you could satisfy both those terms and this
 License would be to refrain entirely from conveying the Program.

-  13. Use with the GNU Affero General Public License.
+  13. Remote Network Interaction; Use with the GNU General Public License.
+
+  Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.

  Notwithstanding any other provision of this License, you have
 permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
+under version 3 of the GNU General Public License into a single
 combined work, and to convey the resulting work.  The terms of this
 License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.

  14. Revised Versions of this License.

  The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time.  Such new versions will
-be similar in spirit to the present version, but may differ in detail to
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
 address new problems or concerns.

  Each version is given a distinguishing version number.  If the
-Program specifies that a certain numbered version of the GNU General
+Program specifies that a certain numbered version of the GNU Affero General
 Public License "or any later version" applies to it, you have the
 option of following the terms and conditions either of that numbered
 version or of any later version published by the Free Software
 Foundation.  If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
+GNU Affero General Public License, you may choose any version ever published
 by the Free Software Foundation.

  If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
+versions of the GNU Affero General Public License can be used, that proxy's
 public statement of acceptance of a version permanently authorizes you
 to choose that version for the Program.

@@ -635,40 +633,29 @@ the "copyright" line and a pointer to where the full notice is found.
    Copyright (C) <year>  <name of author>

    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
+    it under the terms of the GNU Affero General Public License as published
+    by the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
+    GNU Affero General Public License for more details.

-    You should have received a copy of the GNU General Public License
+    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.

 Also add information on how to contact you by electronic and paper mail.

-  If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
-    <program>  Copyright (C) <year>  <name of author>
-    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-    This is free software, and you are welcome to redistribute it
-    under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License.  Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
+  If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.

  You should also get your employer (if you work as a programmer) or school,
 if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
+For more information on this, and how to apply and follow the GNU AGPL, see
 <https://www.gnu.org/licenses/>.
-
-  The GNU General Public License does not permit incorporating your program
-into proprietary programs.  If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library.  If this is what you want to do, use the GNU Lesser General
-Public License instead of this License.  But first, please read
-<https://www.gnu.org/licenses/why-not-lgpl.html>.
--- a/README.MD
+++ b/README.MD
@@ -4,17 +4,24 @@

 # DS2API

+<a href="https://trendshift.io/repositories/24508" target="_blank"><img src="https://trendshift.io/api/badge/repositories/24508" alt="CJackHwang%2Fds2api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+
 [![License](https://img.shields.io/github/license/CJackHwang/ds2api.svg)](LICENSE)
 ![Stars](https://img.shields.io/github/stars/CJackHwang/ds2api.svg)
 ![Forks](https://img.shields.io/github/forks/CJackHwang/ds2api.svg)
 [![Release](https://img.shields.io/github/v/release/CJackHwang/ds2api?display_name=tag)](https://github.com/CJackHwang/ds2api/releases)
 [![Docker](https://img.shields.io/badge/docker-ready-blue.svg)](docs/DEPLOY.md)
+
 [![Deploy on Zeabur](https://zeabur.com/button.svg)](https://zeabur.com/templates/L4CFHP)
 [![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https://github.com/CJackHwang/ds2api)

 语言 / Language: [中文](README.MD) | [English](README.en.md)

-将 DeepSeek Web 对话能力转换为 OpenAI、Claude 与 Gemini 兼容 API。后端为 **Go 全量实现**，前端为 React WebUI 管理台（源码在 `webui/`，部署时自动构建到 `static/admin`）。
+将 DeepSeek Web 对话能力转换为 OpenAI、Claude 与 Gemini 兼容 API。核心后端以 **Go** 实现，Vercel 流式桥接额外使用少量 Node Runtime，前端为 React WebUI 管理台（源码在 `webui/`，部署时自动构建到 `static/admin`）。
+
+文档入口：[文档导航](docs/README.md) / [架构说明](docs/ARCHITECTURE.md) / [接口文档](API.md)
+
+【感谢Linux.do社区及GitHub社区各位开发者对项目的支持与贡献】

 > **重要免责声明**
 >
@@ -24,43 +31,86 @@
 >
 > 请勿将本项目用于违反服务条款、协议、法律法规或平台规则的场景。商业使用前请自行确认 `LICENSE`、相关协议以及你是否获得了作者的书面许可。

-## 架构概览
+## 目录
+
+- [架构概览（摘要）](#架构概览摘要)
+- [核心能力](#核心能力)
+- [平台兼容矩阵](#平台兼容矩阵)
+- [模型支持](#模型支持)
+  - [OpenAI 接口](#openai-接口get-v1models)
+  - [Claude 接口](#claude-接口get-anthropicv1models)
+  - [Gemini 接口](#gemini-接口)
+- [快速开始](#快速开始)
+  - [方式一：下载 Release 构建包](#方式一下载-release-构建包)
+  - [方式二：Docker 运行](#方式二docker-运行)
+  - [方式三：Vercel 部署](#方式三vercel-部署)
+  - [方式四：本地源码运行](#方式四本地源码运行)
+- [配置说明](#配置说明)
+- [鉴权模式](#鉴权模式)
+- [并发模型](#并发模型)
+- [Tool Call 适配](#tool-call-适配)
+- [本地开发抓包工具](#本地开发抓包工具)
+- [文档索引](#文档索引)
+- [测试](#测试)
+- [Release 自动构建（GitHub Actions）](#release-自动构建github-actions)
+- [免责声明](#免责声明)
+
+## 架构概览（摘要）

 ```mermaid
 flowchart LR
-    Client["🖥️ 客户端\n(OpenAI / Claude / Gemini 兼容)"]
+    Client["🖥️ 客户端 / SDK\n(OpenAI / Claude / Gemini)"]
+    Upstream["☁️ DeepSeek API"]

-    subgraph DS2API["DS2API 服务"]
-        direction TB
-        CORS["CORS 中间件"]
-        Auth["🔐 鉴权中间件"]
+    subgraph DS2API["DS2API 4.x（模块化 HTTP surface + PromptCompat 内核）"]
+        Router["chi Router + 中间件\n(RequestID / RealIP / Logger / Recoverer / CORS)"]

-        subgraph Adapters["适配器层"]
-            OA["OpenAI 适配器\n/v1/*"]
-            CA["Claude 适配器\n/anthropic/*"]
-            GA["Gemini 适配器\n/v1beta/models/*"]
+        subgraph HTTP["HTTP API surface"]
+            OA["OpenAI\nchat / responses / files / embeddings"]
+            CA["Claude\n/anthropic/* + /v1/messages"]
+            GA["Gemini\n/v1beta/models/* + /v1/models/*"]
+            Admin["Admin API\n资源子包"]
+            WebUI["WebUI\n/admin（静态托管）"]
+            Vercel["Vercel Node Stream\n/v1/chat/completions"]
        end

-        subgraph Support["支撑模块"]
-            Pool["📦 账号池 / 并发队列"]
-            PoW["⚙️ PoW WASM\n(wazero)"]
+        subgraph Runtime["运行时核心能力"]
+            Compat["PromptCompat\n(API -> 网页纯文本上下文)"]
+            Chat["Chat / Responses Runtime\n(统一工具调用与流式语义)"]
+            Auth["Auth Resolver\n(API key / bearer / x-goog-api-key)"]
+            Pool["Account Pool + Queue\n(并发槽位 + 等待队列)"]
+            DSClient["DeepSeek Client\n(Session / Auth / Completion / Files)"]
+            Pow["PoW 实现\n(纯 Go)"]
+            Tool["Tool Sieve\n(Go/Node 语义对齐)"]
+            History["History Split\n(长历史文件化)"]
        end
-
-        Admin["🛠️ Admin API\n/admin/*"]
-        WebUI["🌐 WebUI\n(/admin)"]
    end

-    DS["☁️ DeepSeek API"]
+    Client --> Router
+    Router --> OA & CA & GA
+    Router --> Admin
+    Router --> WebUI
+    Router --> Vercel

-    Client -- "请求" --> CORS --> Auth
-    Auth --> OA & CA & GA
-    OA & CA & GA -- "调用" --> DS
-    Auth --> Admin
-    OA & CA & GA -. "轮询选账号" .-> Pool
-    OA & CA & GA -. "计算 PoW" .-> PoW
-    DS -- "响应" --> Client
+    OA --> Compat
+    CA & GA --> Compat
+    Compat --> Chat
+    Compat -.长历史.-> History
+    Vercel -.Go prepare.-> Chat
+    Vercel -.Node SSE.-> Tool
+    Chat --> Auth
+    Chat -.账号轮询.-> Pool
+    Chat -.工具调用解析.-> Tool
+    Chat -.PoW 计算.-> Pow
+    Auth --> DSClient
+    DSClient --> Upstream
+    Upstream --> DSClient
+    Chat --> Client
+    Vercel --> Client
 ```

+详细架构拆分与目录职责见 [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md)。
+
 - **后端**：Go（`cmd/ds2api/`、`api/`、`internal/`），不依赖 Python 运行时
 - **前端**：React 管理台（`webui/`），运行时托管静态构建产物
 - **部署**：本地运行、Docker、Vercel Serverless、Linux systemd
@@ -69,17 +119,20 @@ flowchart LR

 | 能力 | 说明 |
 | --- | --- |
-| OpenAI 兼容 | `GET /v1/models`、`GET /v1/models/{id}`、`POST /v1/chat/completions`、`POST /v1/responses`、`GET /v1/responses/{response_id}`、`POST /v1/embeddings` |
+| OpenAI 兼容 | `GET /v1/models`、`GET /v1/models/{id}`、`POST /v1/chat/completions`、`POST /v1/responses`、`GET /v1/responses/{response_id}`、`POST /v1/embeddings`、`POST /v1/files` |
 | Claude 兼容 | `GET /anthropic/v1/models`、`POST /anthropic/v1/messages`、`POST /anthropic/v1/messages/count_tokens`（及快捷路径 `/v1/messages`、`/messages`） |
 | Gemini 兼容 | `POST /v1beta/models/{model}:generateContent`、`POST /v1beta/models/{model}:streamGenerateContent`（及 `/v1/models/{model}:*` 路径） |
+| 统一 CORS 兼容 | `/v1/*`、`/anthropic/*`、`/v1beta/models/*`、`/admin/*` 统一走同一套 CORS 策略；Vercel 上 `/v1/chat/completions` 的 Node Runtime 也对齐相同放行规则，尽量减少第三方预检请求头限制 |
 | 多账号轮询 | 自动 token 刷新、邮箱/手机号双登录方式 |
 | 并发队列控制 | 每账号 in-flight 上限 + 等待队列，动态计算建议并发值 |
-| DeepSeek PoW | WASM 计算（`wazero`），无需外部 Node.js 依赖 |
+| DeepSeek PoW | 纯 Go 高性能实现（DeepSeekHashV1），毫秒级响应 |
 | Tool Calling | 防泄漏处理：非代码块高置信特征识别、`delta.tool_calls` 早发、结构化增量输出 |
-| Admin API | 配置管理、运行时设置热更新、账号测试 / 批量测试、会话清理、导入导出、Vercel 同步、版本检查 |
-| WebUI 管理台 | `/admin` 单页应用（中英文双语、深色模式） |
+| Admin API | 配置管理、运行时设置热更新、代理管理、账号测试 / 批量测试、会话清理、导入导出、Vercel 同步、版本检查 |
+| WebUI 管理台 | `/admin` 单页应用（中英文双语、深色模式，支持查看服务器端对话记录） |
 | 运维探针 | `GET /healthz`（存活）、`GET /readyz`（就绪） |

+OpenAI `/v1/*` 仍是推荐的规范路径；同时支持 `/models`、`/chat/completions`、`/responses`、`/embeddings`、`/files` 等根路径快捷路由，方便只配置 DS2API 根地址的第三方客户端。
+
 ## 平台兼容矩阵

 | 级别 | 平台 | 当前状态 |
@@ -90,44 +143,63 @@ flowchart LR
 | P0 | Anthropic SDK（messages） | ✅ |
 | P0 | Google Gemini SDK（generateContent） | ✅ |
 | P1 | LangChain / LlamaIndex / OpenWebUI（OpenAI 兼容接入） | ✅ |
-| P2 | MCP 独立桥接层 | 规划中 |

 ## 模型支持

-### OpenAI 接口
+### OpenAI 接口（`GET /v1/models`）

-| 模型 | thinking | search |
-| --- | --- | --- |
-| `deepseek-chat` | ❌ | ❌ |
-| `deepseek-reasoner` | ✅ | ❌ |
-| `deepseek-chat-search` | ❌ | ✅ |
-| `deepseek-reasoner-search` | ✅ | ✅ |
+| 模型类型 | 模型 ID | thinking | search |
+| --- | --- | --- | --- |
+| default | `deepseek-v4-flash` | 默认开启，可由请求参数控制 | ❌ |
+| default | `deepseek-v4-flash-nothinking` | 永久关闭，不受请求参数影响 | ❌ |
+| expert | `deepseek-v4-pro` | 默认开启，可由请求参数控制 | ❌ |
+| expert | `deepseek-v4-pro-nothinking` | 永久关闭，不受请求参数影响 | ❌ |
+| default | `deepseek-v4-flash-search` | 默认开启，可由请求参数控制 | ✅ |
+| default | `deepseek-v4-flash-search-nothinking` | 永久关闭，不受请求参数影响 | ✅ |
+| expert | `deepseek-v4-pro-search` | 默认开启，可由请求参数控制 | ✅ |
+| expert | `deepseek-v4-pro-search-nothinking` | 永久关闭，不受请求参数影响 | ✅ |
+| vision | `deepseek-v4-vision` | 默认开启，可由请求参数控制 | ❌ |
+| vision | `deepseek-v4-vision-nothinking` | 永久关闭，不受请求参数影响 | ❌ |

-### Claude 接口
+除原生模型外，也支持常见 alias 输入（如 `gpt-4.1`、`gpt-5`、`gpt-5-codex`、`o3`、`claude-*`、`gemini-*` 等），但 `/v1/models` 返回的是规范化后的 DeepSeek 原生模型 ID。若 alias 名本身追加 `-nothinking` 后缀，也会映射到对应的强制关思考模型。完整 alias 行为以 [API.md](API.md#模型-alias-解析策略) 和 `config.example.json` 为准。
+当前上游视觉模型只暴露 `vision` 通道，不提供独立的联网搜索视觉变体。

-| 模型 | 默认映射 |
+### Claude 接口（`GET /anthropic/v1/models`）
+
+| 当前常用模型 | 默认映射 |
 | --- | --- |
-| `claude-sonnet-4-5` | `deepseek-chat` |
-| `claude-haiku-4-5`（兼容 `claude-3-5-haiku-latest`） | `deepseek-chat` |
-| `claude-opus-4-6` | `deepseek-reasoner` |
-
-可通过配置中的 `claude_mapping` 或 `claude_model_mapping` 覆盖映射关系。
-另外，`/anthropic/v1/models` 现已包含 Claude 1.x/2.x/3.x/4.x 历史模型 ID 与常见别名，便于旧客户端直接兼容。
+| `claude-sonnet-4-6` | `deepseek-v4-flash` |
+| `claude-sonnet-4-6-nothinking` | `deepseek-v4-flash-nothinking` |
+| `claude-haiku-4-5`（兼容 `claude-3-5-haiku-latest`） | `deepseek-v4-flash` |
+| `claude-haiku-4-5-nothinking` | `deepseek-v4-flash-nothinking` |
+| `claude-opus-4-6` | `deepseek-v4-pro` |
+| `claude-opus-4-6-nothinking` | `deepseek-v4-pro-nothinking` |

+可通过配置中的 `model_aliases` 覆盖映射关系；若请求模型名带 `-nothinking`，会在最终映射结果上强制追加无思考语义。
+`/anthropic/v1/models` 除上述主别名外，还会返回 Claude 4.x snapshots、3.x 历史模型 ID 与常见 alias，便于旧客户端直接兼容。

 #### Claude Code 接入避坑（实测）

 - `ANTHROPIC_BASE_URL` 推荐直接指向 DS2API 根地址（例如 `http://127.0.0.1:5001`），Claude Code 会请求 `/v1/messages?beta=true`。
 - `ANTHROPIC_API_KEY` 需要与 `config.json` 中 `keys` 一致；建议同时保留常规 key 与 `sk-ant-*` 形态 key，兼容不同客户端校验习惯。
 - 若系统设置了代理，建议对 DS2API 地址配置 `NO_PROXY=127.0.0.1,localhost,<你的主机IP>`，避免本地回环请求被代理拦截。
- 如遇“工具调用输出成文本、未执行”问题，请升级到包含 Claude 工具调用多格式解析（JSON/XML/ANTML/invoke）的版本。
+- 如遇“工具调用输出成文本、未执行”问题，请优先检查模型输出是否为推荐的 DSML 工具块：`<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`。兼容层也接受旧式 canonical XML：`<tool_calls><invoke name="..."><parameter name="...">...`；旧式 `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`、`<function_call>`、`tool_use` 或纯 JSON `tool_calls` 片段不会执行。

 ### Gemini 接口

-Gemini 适配器将模型名通过 `model_aliases` 或内置规则映射到 DeepSeek 原生模型，支持 `generateContent` 和 `streamGenerateContent` 两种调用方式，并完整支持 Tool Calling（`functionDeclarations` → `functionCall` 输出）。
+Gemini 适配器将模型名通过 `model_aliases` 或内置规则映射到 DeepSeek 原生模型，支持 `generateContent` 和 `streamGenerateContent` 两种调用方式，并完整支持 Tool Calling（`functionDeclarations` → `functionCall` 输出）。若 Gemini 模型名带 `-nothinking` 后缀，例如 `gemini-2.5-pro-nothinking`，会映射到对应的强制关闭思考模型。

 ## 快速开始

+### 部署方式优先级建议
+
+推荐按以下顺序选择部署方式：
+
+1. **下载 Release 构建包运行**：最省事，产物已编译完成，最适合大多数用户。
+2. **Docker / GHCR 镜像部署**：适合需要容器化、编排或云环境部署。
+3. **Vercel 部署**：适合已有 Vercel 环境且接受其平台约束的场景。
+4. **本地源码运行 / 自行编译**：适合开发、调试或需要自行修改代码的场景。
+
 ### 通用第一步（所有部署方式）

 把 `config.json` 作为唯一配置源（推荐做法）：
@@ -139,37 +211,31 @@ cp config.example.json config.json

 后续部署建议：
 - 本地运行：直接读取 `config.json`
- Docker / Vercel：由 `config.json` 生成 `DS2API_CONFIG_JSON`（Base64）注入环境变量
- 兼容写法：`DS2API_CONFIG_JSON` 也可以直接写原始 JSON；`CONFIG_JSON` 是旧版回退变量
+- Docker / Vercel：由 `config.json` 生成 `DS2API_CONFIG_JSON`（Base64）注入环境变量，也可以直接写原始 JSON

-### 方式一：本地运行
+WebUI 管理台里的“全量配置模板”也直接复用同一份 `config.example.json`，所以更新示例文件后，前端模板会自动保持一致。

-**前置要求**：Go 1.24+，Node.js 20+（仅在需要构建 WebUI 时）
+### 方式一：下载 Release 构建包
+
+每次发布 Release 时，GitHub Actions 会自动构建多平台二进制包：

 ```bash
-# 1. 克隆仓库
-git clone https://github.com/CJackHwang/ds2api.git
-cd ds2api
-
-# 2. 配置
+# 下载对应平台的压缩包后
+tar -xzf ds2api_<tag>_linux_amd64.tar.gz
+cd ds2api_<tag>_linux_amd64
 cp config.example.json config.json
-# 编辑 config.json，填入你的 DeepSeek 账号信息和 API key
-
-# 3. 启动
-go run ./cmd/ds2api
+# 编辑 config.json
+./ds2api
 ```

-默认监听地址：`http://localhost:5001`
-
-> **WebUI 自动构建**：本地首次启动时，若 `static/admin` 不存在，会自动尝试执行 `npm ci`（仅在缺少依赖时）和 `npm run build -- --outDir static/admin --emptyOutDir`（需要本机有 Node.js）。你也可以手动构建：`./scripts/build-webui.sh`
-
 ### 方式二：Docker 运行

 ```bash
-# 1. 准备环境变量文件
+# 1. 准备环境变量和配置文件
 cp .env.example .env
+cp config.example.json config.json

-# 2. 编辑 .env（至少设置 DS2API_ADMIN_KEY）
+# 2. 编辑 .env（至少设置 DS2API_ADMIN_KEY；如需修改宿主机端口，可额外设置 DS2API_HOST_PORT）
 #    DS2API_ADMIN_KEY=请替换为强密码

 # 3. 启动
@@ -179,7 +245,9 @@ docker-compose up -d
 docker-compose logs -f
 ```

-默认 `docker-compose.yml` 会把宿主机 `6011` 映射到容器内的 `5001`。如果你希望直接对外暴露 `5001`，请调整 `ports` 配置。
+默认 `docker-compose.yml` 会把宿主机 `6011` 映射到容器内的 `5001`。如果你希望直接对外暴露 `5001`，请设置 `DS2API_HOST_PORT=5001`（或者手动调整 `ports` 配置）。
+同时默认把 `./config.json` 挂载到容器 `/data/config.json`，并设置 `DS2API_CONFIG_PATH=/data/config.json`，用于避免 `/app` 只读导致运行时 token 持久化失败。
+镜像会预创建 `/data` 并授权给非 root 的 `ds2api` 用户；如果使用单文件 bind mount，请确保宿主机 `config.json` 对容器用户可读写，例如 `chmod 644 config.json`。

 更新镜像：`docker-compose up -d --build`

@@ -211,136 +279,50 @@ cp config.example.json config.json
 base64 < config.json | tr -d '\n'
 ```

-> **流式说明**：`/v1/chat/completions` 在 Vercel 上默认走 `api/chat-stream.js`（Node Runtime）以保证实时 SSE。鉴权、账号选择、会话/PoW 准备仍由 Go 内部 prepare 接口完成；流式响应（含 `tools`）在 Node 侧执行与 Go 对齐的输出组装与防泄漏处理。
+> **流式说明**：`/v1/chat/completions` 在 Vercel 上默认走 `api/chat-stream.js`（Node Runtime）以保证实时 SSE。鉴权、账号选择、会话/PoW 准备仍由 Go 内部 prepare 接口完成；流式响应（含 `tools`）在 Node 侧执行与 Go 对齐的输出组装与防泄漏处理。虽然这里只有 OpenAI chat 流式走 Node，但 CORS 放行策略仍与 Go 主路由保持一致，统一覆盖第三方客户端预检场景。

 详细部署说明请参阅 [部署指南](docs/DEPLOY.md)。

-### 方式四：下载 Release 构建包
+### 方式四：本地源码运行

-每次发布 Release 时，GitHub Actions 会自动构建多平台二进制包：
+**前置要求**：Go 1.26+，Node.js `20.19+` 或 `22.12+`（仅在需要构建 WebUI 时）；同时确保 `npm` 可用，建议 `npm 10+`

 ```bash
-# 下载对应平台的压缩包后
-tar -xzf ds2api_<tag>_linux_amd64.tar.gz
-cd ds2api_<tag>_linux_amd64
+# 1. 克隆仓库
+git clone https://github.com/CJackHwang/ds2api.git
+cd ds2api
+
+# 2. 配置
 cp config.example.json config.json
-# 编辑 config.json
-./ds2api
+# 编辑 config.json，填入你的 DeepSeek 账号信息和 API key
+
+# 3. 启动
+go run ./cmd/ds2api
 ```

-### 方式五：OpenCode CLI 接入
+默认本地访问地址：`http://127.0.0.1:5001`

-1. 复制示例配置：
+服务实际绑定：`0.0.0.0:5001`，因此同一局域网设备通常也可以通过你的内网 IP 访问。

-```bash
-cp opencode.json.example opencode.json
-```
-
-2. 编辑 `opencode.json`：
- 将 `baseURL` 改为你的 DS2API 地址（例如 `https://your-domain.com/v1`）
- 将 `apiKey` 改为你的 DS2API key（对应 `config.keys`）
-
-3. 在项目目录启动 OpenCode CLI（按你的安装方式运行 `opencode`）。
-
-> 建议优先使用 OpenAI 兼容路径（`/v1/*`），即示例里的 `@ai-sdk/openai-compatible` provider。
-> 若客户端支持 `wire_api`，可分别测试 `responses` 与 `chat`，DS2API 两条链路都兼容。
+> **WebUI 自动构建**：本地首次启动时，若 `static/admin` 不存在，会自动尝试执行 `npm ci`（仅在缺少依赖时）和 `npm run build -- --outDir static/admin --emptyOutDir`（需要本机有 Node.js 和 npm）。你也可以手动构建：`./scripts/build-webui.sh`

 ## 配置说明

-### `config.json` 示例
+`README` 只保留快速入口，完整字段请以 [config.example.json](config.example.json) 为模板，并参考 [部署指南](docs/DEPLOY.md#0-前置要求) 与 [API 配置最佳实践](API.md#配置最佳实践)。

-```json
-{
-  "keys": ["your-api-key-1", "your-api-key-2"],
-  "accounts": [
-    {
-      "email": "user@example.com",
-      "password": "your-password"
-    },
-    {
-      "mobile": "12345678901",
-      "password": "your-password"
-    }
-  ],
-  "model_aliases": {
-    "gpt-4o": "deepseek-chat",
-    "gpt-5-codex": "deepseek-reasoner",
-    "o3": "deepseek-reasoner"
-  },
-  "compat": {
-    "wide_input_strict_output": true
-  },
-  "responses": {
-    "store_ttl_seconds": 900
-  },
-  "embeddings": {
-    "provider": "deterministic"
-  },
-  "claude_mapping": {
-    "fast": "deepseek-chat",
-    "slow": "deepseek-reasoner"
-  },
-  "admin": {
-    "jwt_expire_hours": 24
-  },
-  "runtime": {
-    "account_max_inflight": 2,
-    "account_max_queue": 0,
-    "global_max_inflight": 0,
-    "token_refresh_interval_hours": 6
-  },
-  "auto_delete": {
-    "sessions": false
-  }
-}
-```
+常用字段：

- `keys`：API 访问密钥列表，客户端通过 `Authorization: Bearer <key>` 鉴权
- `accounts`：DeepSeek 账号列表，支持 `email` 或 `mobile` 登录
- `token`：配置文件中即使填写也会在加载时被清空（不会从 `config.json` 读取 token）；实际 token 仅在运行时内存中维护并自动刷新
- `model_aliases`：常见模型名（如 GPT/Codex/Claude）到 DeepSeek 模型的映射
- `compat.wide_input_strict_output`：建议保持 `true`（当前实现默认宽进严出）
- `toolcall`：策略已固定为特征匹配 + 高置信早发，不再作为可配置项
- `responses.store_ttl_seconds`：`/v1/responses/{id}` 的内存缓存 TTL
- `embeddings.provider`：embedding 提供方（当前内置 `deterministic/mock/builtin`）
- `claude_mapping`：字典中 `fast`/`slow` 后缀映射到对应 DeepSeek 模型（兼容读取 `claude_model_mapping`）
- `admin`：管理后台设置（JWT 过期时间、密码哈希等），可通过 Admin Settings API 热更新
- `runtime`：运行时参数（并发限制、队列大小、托管账号 token 刷新间隔），可通过 Admin Settings API 热更新；`account_max_queue=0`/`global_max_inflight=0` 表示按推荐值自动计算，`token_refresh_interval_hours=6` 为默认强制重登间隔
- `auto_delete.sessions`：是否在请求结束后自动清理 DeepSeek 会话（默认 `false`，可在 Settings 热更新）
+- `keys` / `api_keys`：客户端访问密钥，`api_keys` 支持 `name` 与 `remark` 元信息，`keys` 继续兼容。
+- `accounts`：DeepSeek 托管账号，支持 `email` 或 `mobile` 登录，可配置代理、名称和备注。
+- `model_aliases`：OpenAI / Claude / Gemini 共用的模型 alias 映射。
+- `runtime`：账号并发、队列与 token 刷新策略，可通过 Admin Settings 热更新。
+- `auto_delete.mode`：请求结束后的远端会话清理策略，支持 `none` / `single` / `all`。
+- `history_split`：旧轮次拆分字段，已废弃并忽略，仅保留兼容旧配置。
+- `current_input_file`：唯一生效的独立拆分策略；默认开启且阈值为 `0`，触发时将完整上下文合并上传为 `DS2API_HISTORY.txt` 上下文文件。
+- 如果关闭 `current_input_file`，请求会直接透传，不上传拆分上下文文件。
+- `thinking_injection`：默认开启；在最新 user 消息末尾追加思考增强提示词，提高高强度推理与工具调用前的思考稳定性；`prompt` 留空时使用内置默认提示词。

-### 环境变量
-
-| 变量 | 用途 | 默认值 |
-| --- | --- | --- |
-| `PORT` | 服务端口 | `5001` |
-| `LOG_LEVEL` | 日志级别 | `INFO`（可选：`DEBUG`/`WARN`/`ERROR`） |
-| `DS2API_ADMIN_KEY` | Admin 登录密钥 | `admin` |
-| `DS2API_JWT_SECRET` | Admin JWT 签名密钥 | 等同 `DS2API_ADMIN_KEY` |
-| `DS2API_JWT_EXPIRE_HOURS` | Admin JWT 过期小时数 | `24` |
-| `DS2API_CONFIG_PATH` | 配置文件路径 | `config.json` |
-| `DS2API_CONFIG_JSON` | 直接注入配置（JSON 或 Base64） | — |
-| `CONFIG_JSON` | 旧版兼容配置注入 | — |
-| `DS2API_WASM_PATH` | PoW WASM 文件路径 | 自动查找 |
-| `DS2API_STATIC_ADMIN_DIR` | 管理台静态文件目录 | `static/admin` |
-| `DS2API_AUTO_BUILD_WEBUI` | 启动时自动构建 WebUI | 本地开启，Vercel 关闭 |
-| `DS2API_DEV_PACKET_CAPTURE` | 本地开发抓包开关（记录最近会话请求/响应体） | 本地非 Vercel 默认开启 |
-| `DS2API_DEV_PACKET_CAPTURE_LIMIT` | 本地抓包保留条数（超出自动淘汰） | `5` |
-| `DS2API_DEV_PACKET_CAPTURE_MAX_BODY_BYTES` | 单条响应体最大记录字节数 | `2097152` |
-| `DS2API_ACCOUNT_MAX_INFLIGHT` | 每账号最大并发 in-flight 请求数 | `2` |
-| `DS2API_ACCOUNT_CONCURRENCY` | 同上（兼容旧名） | — |
-| `DS2API_ACCOUNT_MAX_QUEUE` | 等待队列上限 | `recommended_concurrency` |
-| `DS2API_ACCOUNT_QUEUE_SIZE` | 同上（兼容旧名） | — |
-| `DS2API_GLOBAL_MAX_INFLIGHT` | 全局最大 in-flight 请求数 | `recommended_concurrency` |
-| `DS2API_MAX_INFLIGHT` | 同上（兼容旧名） | — |
-| `DS2API_VERCEL_INTERNAL_SECRET` | Vercel 混合流式内部鉴权密钥 | 回退用 `DS2API_ADMIN_KEY` |
-| `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | 流式 lease 过期秒数 | `900` |
-| `DS2API_DEV_PACKET_CAPTURE` | 本地开发抓包开关（记录最近会话请求/响应体） | 本地非 Vercel 默认开启 |
-| `DS2API_DEV_PACKET_CAPTURE_LIMIT` | 本地抓包保留条数（超出自动淘汰） | `5` |
-| `DS2API_DEV_PACKET_CAPTURE_MAX_BODY_BYTES` | 单条响应体最大记录字节数 | `2097152` |
-| `VERCEL_TOKEN` | Vercel 同步 token | — |
-| `VERCEL_PROJECT_ID` | Vercel 项目 ID | — |
-| `VERCEL_TEAM_ID` | Vercel 团队 ID | — |
-| `DS2API_VERCEL_PROTECTION_BYPASS` | Vercel 部署保护绕过密钥（内部 Node→Go 调用） | — |
+环境变量完整列表见 [部署指南](docs/DEPLOY.md)，接口鉴权规则见 [API.md](API.md#鉴权规则)。

 ## 鉴权模式

@@ -352,6 +334,7 @@ cp opencode.json.example opencode.json
 | **直通 token 模式** | 传入 token 不在 `config.keys` 中时，直接作为 DeepSeek token 使用 |

 可选请求头 `X-Ds2-Target-Account`：指定使用某个托管账号（值为 email 或 mobile）。
+如果指定账号不存在，或者当前管理账号队列已满，请求会返回 `429`；当前 `429` 不附带 `Retry-After` 头。若账号存在但登录/刷新失败，则返回对应的鉴权错误。
 Gemini 路由还可以使用 `x-goog-api-key`，或在没有认证头时使用 `?key=` / `?api_key=` 作为调用方凭据。

 ## 并发模型
@@ -364,7 +347,7 @@ Gemini 路由还可以使用 `x-goog-api-key`，或在没有认证头时使用 `
 ```

 - 当 in-flight 槽位满时，请求进入等待队列，**不会立即 429**
- 超出总承载上限后才返回 `429 Too Many Requests`
+- 超出总承载上限后才返回 `429 Too Many Requests`，当前响应不附带 `Retry-After`
 - `GET /admin/queue/status` 返回实时并发状态

 ## Tool Call 适配
@@ -372,24 +355,24 @@ Gemini 路由还可以使用 `x-goog-api-key`，或在没有认证头时使用 `
 当请求中带 `tools` 时，DS2API 会做防泄漏处理与结构化转译：

 1. 只在**非代码块上下文**启用执行型 toolcall 识别（代码块示例默认不触发）
-2. 解析层以 XML/Markup 为最高优先级，同时兼容 JSON / ANTML / invoke / text-kv，并统一归一到内部工具调用结构
+2. 解析层当前把 DSML 外壳视为推荐可执行调用：`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`；兼容旧式 canonical XML `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`。DSML 只是外壳别名，内部仍以 XML 解析语义为准；旧式 `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`、`<function_call>`、`tool_use` / antml 变体与纯 JSON `tool_calls` 片段都会按普通文本处理
 3. `responses` 流式严格使用官方 item 生命周期事件（`response.output_item.*`、`response.content_part.*`、`response.function_call_arguments.*`）
 4. `responses` 支持并执行 `tool_choice`（`auto`/`none`/`required`/强制函数）；`required` 违规时非流式返回 `422`，流式返回 `response.failed`
 5. 客户端请求哪种协议，就按该协议返回工具调用（OpenAI/Claude/Gemini 各自原生结构）；模型侧优先约束输出规范 XML，再由兼容层转译

-> 说明：当前版本在 parser 层仍以“尽量解析成功”为优先，未启用基于 allow-list 的工具名硬拒绝。
+> 说明：当前版本 parser 层以”尽量解析成功”为优先，所有格式合法的 XML 工具调用都会通过，不做工具名 allow-list 过滤。
 >
-> 想评估“把工具调用封装成 XML 再输入模型”的方案，可参考：`docs/toolcall-semantics.md`。
+> 想评估”把工具调用封装成 XML 再输入模型”的方案，可参考：`docs/toolcall-semantics.md`。

 ## 本地开发抓包工具

-用于定位「responses 思考流/工具调用」等问题。开启后会自动记录最近 N 条 DeepSeek 对话上游请求体与响应体（默认 5 条，超出自动淘汰）。
+用于定位「responses 思考流/工具调用」等问题。开启后会自动记录最近 N 条 DeepSeek 对话上游请求体与响应体（默认 20 条，超出自动淘汰；单条响应体默认最多记录 5 MB）。

 启用示例：

 ```bash
 DS2API_DEV_PACKET_CAPTURE=true \
-DS2API_DEV_PACKET_CAPTURE_LIMIT=5 \
+DS2API_DEV_PACKET_CAPTURE_LIMIT=20 \
 go run ./cmd/ds2api
 ```

@@ -397,6 +380,8 @@ go run ./cmd/ds2api

 - `GET /admin/dev/captures`：查看抓包列表（最新在前）
 - `DELETE /admin/dev/captures`：清空抓包
+- `GET /admin/dev/raw-samples/query?q=关键词&limit=20`：按问题关键词查询当前内存抓包，并按 `chat_session_id` 归并 `completion + continue` 链
+- `POST /admin/dev/raw-samples/save`：把命中的某条抓包链保存为 `tests/raw_stream_samples/<sample-id>/` 回放样本

 返回字段包含：

@@ -404,62 +389,10 @@ go run ./cmd/ds2api
 - `response_body`：上游返回的原始流式内容拼接文本
 - `response_truncated`：是否触发单条大小截断

-## 项目结构
+保存接口支持用 `query`、`chain_key` 或 `capture_id` 选中目标。例如：

-```text
-ds2api/
-├── cmd/
-│   ├── ds2api/              # 本地 / 容器启动入口
-│   └── ds2api-tests/        # 端到端测试集入口
-├── api/
-│   ├── index.go             # Vercel Serverless Go 入口
-│   ├── chat-stream.js       # Vercel Node.js 流式转发
-│   └── (rewrite targets in vercel.json)
-├── internal/
-│   ├── account/             # 账号池与并发队列
-│   ├── adapter/
-│   │   ├── openai/          # OpenAI 兼容适配器（含 Tool Call 解析、Vercel 流式 prepare/release）
-│   │   ├── claude/          # Claude 兼容适配器
-│   │   └── gemini/          # Gemini 兼容适配器（generateContent / streamGenerateContent）
-│   ├── admin/               # Admin API handlers（含 Settings 热更新）
-│   ├── auth/                # 鉴权与 JWT
-│   ├── claudeconv/          # Claude 消息格式转换
-│   ├── compat/              # 兼容性辅助
-│   ├── config/              # 配置加载与热更新
-│   ├── deepseek/            # DeepSeek API 客户端、PoW WASM
-│   ├── js/                  # Node 运行时流式处理与兼容逻辑
-│   ├── devcapture/          # 开发抓包模块
-│   ├── format/              # 输出格式化
-│   ├── prompt/              # Prompt 构建
-│   ├── server/              # HTTP 路由与中间件（chi router）
-│   ├── sse/                 # SSE 解析工具
-│   ├── stream/              # 统一流式消费引擎
-│   ├── util/                # 通用工具函数
-│   └── webui/               # WebUI 静态文件托管与自动构建
-├── webui/                   # React WebUI 源码（Vite + Tailwind）
-│   └── src/
-│       ├── app/             # 路由、鉴权、配置状态管理
-│       ├── features/        # 业务功能模块（account/settings/vercel/apiTester）
-│       ├── components/      # 登录/落地页等通用组件
-│       └── locales/         # 中英文语言包（zh.json / en.json）
-├── scripts/
-│   └── build-webui.sh       # WebUI 手动构建脚本
-├── tests/
-│   ├── compat/              # 兼容性测试夹具与期望输出
-│   └── scripts/             # 统一测试脚本入口（unit/e2e）
-├── docs/                    # 部署 / 贡献 / 测试等辅助文档
-├── static/admin/            # WebUI 构建产物（不提交到 Git）
-├── .github/
-│   ├── workflows/           # GitHub Actions（质量门禁 + Release 自动构建）
-│   ├── ISSUE_TEMPLATE/      # Issue 模板
-│   └── PULL_REQUEST_TEMPLATE.md
-├── config.example.json      # 配置文件示例
-├── .env.example             # 环境变量示例
-├── Dockerfile               # 多阶段构建（WebUI + Go）
-├── docker-compose.yml       # 生产环境 Docker Compose
-├── docker-compose.dev.yml   # 开发环境 Docker Compose
-├── vercel.json              # Vercel 路由与构建配置
-└── go.mod / go.sum          # Go 模块依赖
+```json
+{"query":"广州天气","sample_id":"gz-weather-from-memory"}
 ```

 ## 文档索引
@@ -473,44 +406,18 @@ ds2api/

 ## 测试

-```bash
-# 单元测试（Go + Node）
-./tests/scripts/run-unit-all.sh
-
-# 一键端到端全链路测试（真实账号，生成完整请求/响应日志）
-./tests/scripts/run-live.sh
-
-# 或自定义参数
-go run ./cmd/ds2api-tests \
-  --config config.json \
-  --admin-key admin \
-  --out artifacts/testsuite \
-  --timeout 120 \
-  --retries 2
-```
-
-```bash
-# 发布前阻断门禁
-./tests/scripts/check-stage6-manual-smoke.sh
-./tests/scripts/check-refactor-line-gate.sh
-./tests/scripts/run-unit-all.sh
-npm ci --prefix webui && npm run build --prefix webui
-```
-
-## 测试
-
 详细测试指南请参阅 [docs/TESTING.md](docs/TESTING.md)。

 ### 快速测试命令

 ```bash
-# 运行所有单元测试
-go test ./...
+# 本地 PR 门禁
+./scripts/lint.sh
+./tests/scripts/check-refactor-line-gate.sh
+./tests/scripts/run-unit-all.sh
+npm run build --prefix webui

-# 运行 tool calls 相关测试（调试工具调用问题）
-go test -v -run 'TestParseToolCalls|TestRepair' ./internal/util/
-
-# 运行端到端测试
+# 端到端全链路测试（真实账号，生成完整请求/响应日志）
 ./tests/scripts/run-live.sh
 ```

@@ -518,10 +425,10 @@ go test -v -run 'TestParseToolCalls|TestRepair' ./internal/util/

 工作流文件：`.github/workflows/release-artifacts.yml`

- **触发条件**：仅在 GitHub Release `published` 时触发（普通 push 不会触发）
- **构建产物**：多平台二进制包（`linux/amd64`、`linux/arm64`、`darwin/amd64`、`darwin/arm64`、`windows/amd64`）+ `sha256sums.txt`
+- **触发条件**：默认仅在 GitHub Release `published` 时自动触发；也支持在 Actions 页面手动 `workflow_dispatch`，并填写 `release_tag` 复跑/补发
+- **构建产物**：多平台二进制包（`linux/amd64`、`linux/arm64`、`linux/armv7`、`darwin/amd64`、`darwin/arm64`、`windows/amd64`、`windows/arm64`）、Linux Docker 镜像导出包 + `sha256sums.txt`
 - **容器镜像发布**：仅推送到 GHCR（`ghcr.io/cjackhwang/ds2api`）
- **每个压缩包包含**：`ds2api` 可执行文件、`static/admin`、WASM 文件（同时支持内置 fallback）、配置示例、README、LICENSE
+- **每个二进制压缩包包含**：`ds2api` 可执行文件、`static/admin`、`config.example.json`、`.env.example`、`README.MD`、`README.en.md`、`LICENSE`

 ## 免责声明

--- a/README.en.md
+++ b/README.en.md
@@ -4,6 +4,8 @@

 # DS2API

+<a href="https://trendshift.io/repositories/24508" target="_blank"><img src="https://trendshift.io/api/badge/repositories/24508" alt="CJackHwang%2Fds2api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+
 [![License](https://img.shields.io/github/license/CJackHwang/ds2api.svg)](LICENSE)
 ![Stars](https://img.shields.io/github/stars/CJackHwang/ds2api.svg)
 ![Forks](https://img.shields.io/github/forks/CJackHwang/ds2api.svg)
@@ -14,7 +16,9 @@

 Language: [中文](README.MD) | [English](README.en.md)

-DS2API converts DeepSeek Web chat capability into OpenAI-compatible, Claude-compatible, and Gemini-compatible APIs. The backend is a **pure Go implementation**, with a React WebUI admin panel (source in `webui/`, build output auto-generated to `static/admin` during deployment).
+DS2API converts DeepSeek Web chat capability into OpenAI-compatible, Claude-compatible, and Gemini-compatible APIs. The core backend is Go-based, with a small Node Runtime bridge used for Vercel streaming, and the React WebUI admin panel lives in `webui/` (build output auto-generated to `static/admin` during deployment).
+
+Documentation entry: [Docs Index](docs/README.md) / [Architecture](docs/ARCHITECTURE.en.md) / [API Reference](API.en.md)

 > **Important Disclaimer**
 >
@@ -24,43 +28,86 @@ DS2API converts DeepSeek Web chat capability into OpenAI-compatible, Claude-comp
 >
 > Do not use this project in ways that violate service terms, agreements, laws, or platform rules. Before any commercial use, review the `LICENSE`, the relevant terms, and confirm that you have the author's written permission.

-## Architecture Overview
+## Table of Contents
+
+- [Architecture Overview (Summary)](#architecture-overview-summary)
+- [Key Capabilities](#key-capabilities)
+- [Platform Compatibility Matrix](#platform-compatibility-matrix)
+- [Model Support](#model-support)
+  - [OpenAI Endpoint](#openai-endpoint-get-v1models)
+  - [Claude Endpoint](#claude-endpoint-get-anthropicv1models)
+  - [Gemini Endpoint](#gemini-endpoint)
+- [Quick Start](#quick-start)
+  - [Option 1: Download Release Binaries](#option-1-download-release-binaries)
+  - [Option 2: Docker / GHCR](#option-2-docker--ghcr)
+  - [Option 3: Vercel](#option-3-vercel)
+  - [Option 4: Local Run](#option-4-local-run)
+- [Configuration](#configuration)
+- [Authentication Modes](#authentication-modes)
+- [Concurrency Model](#concurrency-model)
+- [Tool Call Adaptation](#tool-call-adaptation)
+- [Local Dev Packet Capture](#local-dev-packet-capture)
+- [Documentation Index](#documentation-index)
+- [Testing](#testing)
+- [Release Artifact Automation (GitHub Actions)](#release-artifact-automation-github-actions)
+- [Disclaimer](#disclaimer)
+
+## Architecture Overview (Summary)

 ```mermaid
 flowchart LR
-    Client["🖥️ Clients\n(OpenAI / Claude / Gemini compat)"]
+    Client["🖥️ Clients / SDKs\n(OpenAI / Claude / Gemini)"]
+    Upstream["☁️ DeepSeek API"]

-    subgraph DS2API["DS2API Service"]
-        direction TB
-        CORS["CORS Middleware"]
-        Auth["🔐 Auth Middleware"]
+    subgraph DS2API["DS2API 4.x (Modular HTTP Surface + PromptCompat Core)"]
+        Router["chi Router + Middleware\n(RequestID / RealIP / Logger / Recoverer / CORS)"]

-        subgraph Adapters["Adapter Layer"]
-            OA["OpenAI Adapter\n/v1/*"]
-            CA["Claude Adapter\n/anthropic/*"]
-            GA["Gemini Adapter\n/v1beta/models/*"]
+        subgraph HTTP["HTTP API Surface"]
+            OA["OpenAI\nchat / responses / files / embeddings"]
+            CA["Claude\n/anthropic/* + /v1/messages"]
+            GA["Gemini\n/v1beta/models/* + /v1/models/*"]
+            Admin["Admin API\nresource packages"]
+            WebUI["WebUI\n/admin (static hosting)"]
+            Vercel["Vercel Node Stream\n/v1/chat/completions"]
        end

-        subgraph Support["Support Modules"]
-            Pool["📦 Account Pool / Queue"]
-            PoW["⚙️ PoW WASM\n(wazero)"]
+        subgraph Runtime["Runtime + Core Capabilities"]
+            Compat["PromptCompat\n(API -> web-chat plain text context)"]
+            Chat["Chat / Responses Runtime\n(unified tools + stream semantics)"]
+            Auth["Auth Resolver\n(API key / bearer / x-goog-api-key)"]
+            Pool["Account Pool + Queue\n(in-flight slots + wait queue)"]
+            DSClient["DeepSeek Client\n(session / auth / completion / files)"]
+            Pow["PoW Solver\n(Pure Go)"]
+            Tool["Tool Sieve\n(Go/Node semantic parity)"]
+            History["History Split\n(long history as files)"]
        end
-
-        Admin["🛠️ Admin API\n/admin/*"]
-        WebUI["🌐 WebUI\n(/admin)"]
    end

-    DS["☁️ DeepSeek API"]
+    Client --> Router
+    Router --> OA & CA & GA
+    Router --> Admin
+    Router --> WebUI
+    Router --> Vercel

-    Client -- "Request" --> CORS --> Auth
-    Auth --> OA & CA & GA
-    OA & CA & GA -- "Call" --> DS
-    Auth --> Admin
-    OA & CA & GA -. "Rotate accounts" .-> Pool
-    OA & CA & GA -. "Compute PoW" .-> PoW
-    DS -- "Response" --> Client
+    OA --> Compat
+    CA & GA --> Compat
+    Compat --> Chat
+    Compat -.long history.-> History
+    Vercel -.Go prepare.-> Chat
+    Vercel -.Node SSE.-> Tool
+    Chat --> Auth
+    Chat -.account rotation.-> Pool
+    Chat -.tool-call parsing.-> Tool
+    Chat -.PoW solving.-> Pow
+    Auth --> DSClient
+    DSClient --> Upstream
+    Upstream --> DSClient
+    Chat --> Client
+    Vercel --> Client
 ```

+For the full module-by-module architecture and directory responsibilities, see [docs/ARCHITECTURE.en.md](docs/ARCHITECTURE.en.md).
+
 - **Backend**: Go (`cmd/ds2api/`, `api/`, `internal/`), no Python runtime
 - **Frontend**: React admin panel (`webui/`), served as static build at runtime
 - **Deployment**: local run, Docker, Vercel serverless, Linux systemd
@@ -69,17 +116,20 @@ flowchart LR

 | Capability | Details |
 | --- | --- |
-| OpenAI compatible | `GET /v1/models`, `GET /v1/models/{id}`, `POST /v1/chat/completions`, `POST /v1/responses`, `GET /v1/responses/{response_id}`, `POST /v1/embeddings` |
+| OpenAI compatible | `GET /v1/models`, `GET /v1/models/{id}`, `POST /v1/chat/completions`, `POST /v1/responses`, `GET /v1/responses/{response_id}`, `POST /v1/embeddings`, `POST /v1/files` |
 | Claude compatible | `GET /anthropic/v1/models`, `POST /anthropic/v1/messages`, `POST /anthropic/v1/messages/count_tokens` (plus shortcut paths `/v1/messages`, `/messages`) |
 | Gemini compatible | `POST /v1beta/models/{model}:generateContent`, `POST /v1beta/models/{model}:streamGenerateContent` (plus `/v1/models/{model}:*` paths) |
+| Unified CORS compatibility | `/v1/*`, `/anthropic/*`, `/v1beta/models/*`, and `/admin/*` share one CORS policy; on Vercel, the Node Runtime for `/v1/chat/completions` mirrors the same relaxed preflight behavior for third-party clients |
 | Multi-account rotation | Auto token refresh, email/mobile dual login |
 | Concurrency control | Per-account in-flight limit + waiting queue, dynamic recommended concurrency |
-| DeepSeek PoW | WASM solving via `wazero`, no external Node.js dependency |
+| DeepSeek PoW | Pure Go high-performance solver (DeepSeekHashV1), ms-level response |
 | Tool Calling | Anti-leak handling: non-code-block feature match, early `delta.tool_calls`, structured incremental output |
-| Admin API | Config management, runtime settings hot-reload, account testing/batch test, session cleanup, import/export, Vercel sync, version check |
-| WebUI Admin Panel | SPA at `/admin` (bilingual Chinese/English, dark mode) |
+| Admin API | Config management, runtime settings hot-reload, proxy management, account testing/batch test, session cleanup, import/export, Vercel sync, version check |
+| WebUI Admin Panel | SPA at `/admin` (bilingual Chinese/English, dark mode, with server-side conversation history) |
 | Health Probes | `GET /healthz` (liveness), `GET /readyz` (readiness) |

+OpenAI `/v1/*` routes remain canonical, and DS2API also accepts root shortcuts such as `/models`, `/chat/completions`, `/responses`, `/embeddings`, and `/files` for clients configured with the bare service URL.
+
 ## Platform Compatibility Matrix

 | Tier | Platform | Status |
@@ -90,37 +140,39 @@ flowchart LR
 | P0 | Anthropic SDK (messages) | ✅ |
 | P0 | Google Gemini SDK (generateContent) | ✅ |
 | P1 | LangChain / LlamaIndex / OpenWebUI (OpenAI-compatible integration) | ✅ |
-| P2 | MCP standalone bridge | Planned |

 ## Model Support

-### OpenAI Endpoint
+### OpenAI Endpoint (`GET /v1/models`)

-| Model | thinking | search |
-| --- | --- | --- |
-| `deepseek-chat` | ❌ | ❌ |
-| `deepseek-reasoner` | ✅ | ❌ |
-| `deepseek-chat-search` | ❌ | ✅ |
-| `deepseek-reasoner-search` | ✅ | ✅ |
+| Family | Model ID | thinking | search |
+| --- | --- | --- | --- |
+| default | `deepseek-v4-flash` | enabled by default, request-controlled | ❌ |
+| expert | `deepseek-v4-pro` | enabled by default, request-controlled | ❌ |
+| default | `deepseek-v4-flash-search` | enabled by default, request-controlled | ✅ |
+| expert | `deepseek-v4-pro-search` | enabled by default, request-controlled | ✅ |
+| vision | `deepseek-v4-vision` | enabled by default, request-controlled | ❌ |

-### Claude Endpoint
+Besides native IDs, DS2API also accepts common aliases as input (for example `gpt-4.1`, `gpt-5`, `gpt-5-codex`, `o3`, `claude-*`, `gemini-*`), but `/v1/models` returns normalized DeepSeek native model IDs. The complete alias behavior is documented in [API.en.md](API.en.md#model-alias-resolution) and `config.example.json`.
+Current upstream vision support exposes only the `vision` lane and does not provide a separate search-enabled vision variant.

-| Model | Default Mapping |
+### Claude Endpoint (`GET /anthropic/v1/models`)
+
+| Current common model | Default Mapping |
 | --- | --- |
-| `claude-sonnet-4-5` | `deepseek-chat` |
-| `claude-haiku-4-5` (compatible with `claude-3-5-haiku-latest`) | `deepseek-chat` |
-| `claude-opus-4-6` | `deepseek-reasoner` |
-
-Override mapping via `claude_mapping` or `claude_model_mapping` in config.
-In addition, `/anthropic/v1/models` now includes historical Claude 1.x/2.x/3.x/4.x IDs and common aliases for legacy client compatibility.
+| `claude-sonnet-4-6` | `deepseek-v4-flash` |
+| `claude-haiku-4-5` (compatible with `claude-3-5-haiku-latest`) | `deepseek-v4-flash` |
+| `claude-opus-4-6` | `deepseek-v4-pro` |

+Override mapping via the global `model_aliases` config.
+Besides the primary aliases above, `/anthropic/v1/models` also returns Claude 4.x snapshots plus historical 3.x IDs and common aliases for legacy client compatibility.

 #### Claude Code integration pitfalls (validated)

 - Set `ANTHROPIC_BASE_URL` to the DS2API root URL (for example `http://127.0.0.1:5001`). Claude Code sends requests to `/v1/messages?beta=true`.
 - `ANTHROPIC_API_KEY` must match an entry in `keys` from `config.json`. Keeping both a regular key and an `sk-ant-*` style key improves client compatibility.
 - If your environment has proxy variables, set `NO_PROXY=127.0.0.1,localhost,<your_host_ip>` for DS2API to avoid proxy interception of local traffic.
- If tool calls are rendered as plain text and not executed, upgrade to a build that includes multi-format Claude tool-call parsing (JSON/XML/ANTML/invoke).
+- If tool calls are rendered as plain text and not executed, first verify the model output uses the recommended DSML block: `<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`. DS2API also accepts legacy canonical XML: `<tool_calls><invoke name="..."><parameter name="...">...`; legacy `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`, `<function_call>`, `tool_use`, or standalone JSON `tool_calls` are not executed.

 ### Gemini Endpoint

@@ -128,6 +180,15 @@ The Gemini adapter maps model names to DeepSeek native models via `model_aliases

 ## Quick Start

+### Recommended deployment priority
+
+Recommended order when choosing a deployment method:
+
+1. **Download and run release binaries**: the easiest path for most users because the artifacts are already built.
+2. **Docker / GHCR image deployment**: suitable for containerized, orchestrated, or cloud environments.
+3. **Vercel deployment**: suitable if you already use Vercel and accept its platform constraints.
+4. **Run from source / build locally**: suitable for development, debugging, or when you need to modify the code yourself.
+
 ### Universal First Step (all deployment modes)

 Use `config.json` as the single source of truth (recommended):
@@ -139,47 +200,42 @@ cp config.example.json config.json

 Recommended per deployment mode:
 - Local run: read `config.json` directly
- Docker / Vercel: generate Base64 from `config.json` and inject as `DS2API_CONFIG_JSON`
- Compatibility note: `DS2API_CONFIG_JSON` may also contain raw JSON directly; `CONFIG_JSON` is the legacy fallback variable
+- Docker / Vercel: generate Base64 from `config.json` and inject as `DS2API_CONFIG_JSON`, or paste raw JSON directly

-### Option 1: Local Run
+The WebUI admin panel’s “Full configuration template” is loaded from the same `config.example.json`, so updating that file keeps the frontend template in sync.

-**Prerequisites**: Go 1.24+, Node.js 20+ (only if building WebUI locally)
+### Option 1: Download Release Binaries
+
+GitHub Actions automatically builds multi-platform archives on each Release:

 ```bash
-# 1. Clone
-git clone https://github.com/CJackHwang/ds2api.git
-cd ds2api
-
-# 2. Configure
+# After downloading the archive for your platform
+tar -xzf ds2api_<tag>_linux_amd64.tar.gz
+cd ds2api_<tag>_linux_amd64
 cp config.example.json config.json
-# Edit config.json with your DeepSeek account info and API keys
-
-# 3. Start
-go run ./cmd/ds2api
+# Edit config.json
+./ds2api
 ```

-Default URL: `http://localhost:5001`
-
-> **WebUI auto-build**: On first local startup, if `static/admin` is missing, DS2API will auto-run `npm ci` (only when dependencies are missing) and `npm run build -- --outDir static/admin --emptyOutDir` (requires Node.js). You can also build manually: `./scripts/build-webui.sh`
-
-### Option 2: Docker
+### Option 2: Docker / GHCR

 ```bash
-# 1. Prepare env file
+# Pull prebuilt image
+docker pull ghcr.io/cjackhwang/ds2api:latest
+
+# Or run a pinned version
+# docker pull ghcr.io/cjackhwang/ds2api:v3.0.0
+
+# Prepare env file and config file
 cp .env.example .env
+cp config.example.json config.json

-# 2. Edit .env (at least set DS2API_ADMIN_KEY)
-#    DS2API_ADMIN_KEY=replace-with-a-strong-secret
-
-# 3. Start
+# Start with compose
 docker-compose up -d
-
-# 4. View logs
-docker-compose logs -f
 ```

-The default `docker-compose.yml` maps host port `6011` to container port `5001`. If you want `5001` exposed directly, adjust the `ports` mapping.
+The default `docker-compose.yml` uses `ghcr.io/cjackhwang/ds2api:latest` and maps host port `6011` to container port `5001`. If you want `5001` exposed directly, set `DS2API_HOST_PORT=5001` (or adjust the `ports` mapping).
+It also mounts `./config.json` to `/data/config.json` and sets `DS2API_CONFIG_PATH=/data/config.json` by default, which avoids runtime token persistence failures caused by read-only `/app`.

 Rebuild after updates: `docker-compose up -d --build`

@@ -211,133 +267,49 @@ Recommended: convert `config.json` to Base64 locally, then paste into `DS2API_CO
 base64 < config.json | tr -d '\n'
 ```

-> **Streaming note**: `/v1/chat/completions` on Vercel is routed to `api/chat-stream.js` (Node Runtime) for real-time SSE. Auth, account selection, and session/PoW preparation are still handled by the Go internal prepare endpoint; streaming output (including `tools`) is assembled on Node with Go-aligned anti-leak handling.
+> **Streaming note**: `/v1/chat/completions` on Vercel is routed to `api/chat-stream.js` (Node Runtime) for real-time SSE. Auth, account selection, and session/PoW preparation are still handled by the Go internal prepare endpoint; streaming output (including `tools`) is assembled on Node with Go-aligned anti-leak handling. This is the only interface family currently routed through Node, and its CORS allow behavior is kept aligned with the Go router so third-party preflight handling stays unified.

 For detailed deployment instructions, see the [Deployment Guide](docs/DEPLOY.en.md).

-### Option 4: Download Release Binaries
+### Option 4: Local Run

-GitHub Actions automatically builds multi-platform archives on each Release:
+**Prerequisites**: Go 1.26+, Node.js `20.19+` or `22.12+` (only if building WebUI locally)

 ```bash
-# After downloading the archive for your platform
-tar -xzf ds2api_<tag>_linux_amd64.tar.gz
-cd ds2api_<tag>_linux_amd64
+# 1. Clone
+git clone https://github.com/CJackHwang/ds2api.git
+cd ds2api
+
+# 2. Configure
 cp config.example.json config.json
-# Edit config.json
-./ds2api
+# Edit config.json with your DeepSeek account info and API keys
+
+# 3. Start
+go run ./cmd/ds2api
 ```

-### Option 5: OpenCode CLI
+Default local URL: `http://127.0.0.1:5001`

-1. Copy the example config:
+The server actually binds to `0.0.0.0:5001`, so devices on the same LAN can usually reach it through your private IP as well.

-```bash
-cp opencode.json.example opencode.json
-```
-
-2. Edit `opencode.json`:
- Set `baseURL` to your DS2API endpoint (for example, `https://your-domain.com/v1`)
- Set `apiKey` to your DS2API key (from `config.keys`)
-
-3. Start OpenCode CLI in the project directory (run `opencode` using your installed method).
-
-> Recommended: use the OpenAI-compatible path (`/v1/*`) via `@ai-sdk/openai-compatible` as shown in the example.
-> If your client supports `wire_api`, test both `responses` and `chat`; DS2API supports both paths.
+> **WebUI auto-build**: On first local startup, if `static/admin` is missing, DS2API will auto-run `npm ci` (only when dependencies are missing) and `npm run build -- --outDir static/admin --emptyOutDir` (requires Node.js). You can also build manually: `./scripts/build-webui.sh`

 ## Configuration

-### `config.json` Example
+`README` keeps only the onboarding path. Use [config.example.json](config.example.json) as the field template, and see the [deployment guide](docs/DEPLOY.en.md#0-prerequisites) plus [API configuration notes](API.en.md#configuration-best-practice) for full details.

-```json
-{
-  "keys": ["your-api-key-1", "your-api-key-2"],
-  "accounts": [
-    {
-      "email": "user@example.com",
-      "password": "your-password"
-    },
-    {
-      "mobile": "12345678901",
-      "password": "your-password"
-    }
-  ],
-  "model_aliases": {
-    "gpt-4o": "deepseek-chat",
-    "gpt-5-codex": "deepseek-reasoner",
-    "o3": "deepseek-reasoner"
-  },
-  "compat": {
-    "wide_input_strict_output": true
-  },
-  "responses": {
-    "store_ttl_seconds": 900
-  },
-  "embeddings": {
-    "provider": "deterministic"
-  },
-  "claude_mapping": {
-    "fast": "deepseek-chat",
-    "slow": "deepseek-reasoner"
-  },
-  "admin": {
-    "jwt_expire_hours": 24
-  },
-  "runtime": {
-    "account_max_inflight": 2,
-    "account_max_queue": 0,
-    "global_max_inflight": 0,
-    "token_refresh_interval_hours": 6
-  },
-  "auto_delete": {
-    "sessions": false
-  }
-}
-```
+Common fields:

- `keys`: API access keys; clients authenticate via `Authorization: Bearer <key>`
- `accounts`: DeepSeek account list, supports `email` or `mobile` login
- `token`: Even if set in `config.json`, it is cleared during load (DS2API does not read persisted tokens from config); runtime tokens are maintained/refreshed in memory only
- `model_aliases`: Map common model names (GPT/Codex/Claude) to DeepSeek models
- `compat.wide_input_strict_output`: Keep `true` (current default policy)
- `toolcall`: Fixed to feature matching + high-confidence early emit, no longer configurable
- `responses.store_ttl_seconds`: In-memory TTL for `/v1/responses/{id}`
- `embeddings.provider`: Embeddings provider (`deterministic/mock/builtin` built-in)
- `claude_mapping`: Maps `fast`/`slow` suffixes to corresponding DeepSeek models (still compatible with `claude_model_mapping`)
- `admin`: Admin panel settings (JWT expiry, password hash, etc.), hot-reloadable via Admin Settings API
- `runtime`: Runtime parameters (concurrency limits, queue sizes, managed token refresh interval), hot-reloadable via Admin Settings API; `account_max_queue=0`/`global_max_inflight=0` means auto-calculate from recommended values, `token_refresh_interval_hours=6` is the default forced re-login interval
- `auto_delete.sessions`: Whether to auto-delete DeepSeek sessions after request completion (default `false`, hot-reloadable via Settings)
+- `keys` / `api_keys`: client API keys; `api_keys` adds `name` and `remark` metadata while `keys` remains compatible.
+- `accounts`: managed DeepSeek accounts, supporting `email` or `mobile` login plus proxy/name/remark metadata.
+- `model_aliases`: one shared alias map for OpenAI / Claude / Gemini model names.
+- `runtime`: account concurrency, queueing, and token refresh behavior, hot-reloadable via Admin Settings.
+- `auto_delete.mode`: remote session cleanup after each request, supporting `none` / `single` / `all`.
+- `history_split`: legacy multi-turn history split field, now ignored and kept only for backward-compatible config loading.
+- `current_input_file`: the only active split mode; it is enabled by default and uploads the full context as a `DS2API_HISTORY.txt` context file once the character threshold is reached.
+- If you turn off `current_input_file`, requests pass through directly without uploading any split context file.

-### Environment Variables
-
-| Variable | Purpose | Default |
-| --- | --- | --- |
-| `PORT` | Service port | `5001` |
-| `LOG_LEVEL` | Log level | `INFO` (`DEBUG`/`WARN`/`ERROR`) |
-| `DS2API_ADMIN_KEY` | Admin login key | `admin` |
-| `DS2API_JWT_SECRET` | Admin JWT signing secret | Same as `DS2API_ADMIN_KEY` |
-| `DS2API_JWT_EXPIRE_HOURS` | Admin JWT TTL in hours | `24` |
-| `DS2API_CONFIG_PATH` | Config file path | `config.json` |
-| `DS2API_CONFIG_JSON` | Inline config (JSON or Base64) | — |
-| `CONFIG_JSON` | Legacy compatibility config input | — |
-| `DS2API_WASM_PATH` | PoW WASM file path | Auto-detect |
-| `DS2API_STATIC_ADMIN_DIR` | Admin static assets dir | `static/admin` |
-| `DS2API_AUTO_BUILD_WEBUI` | Auto-build WebUI on startup | Enabled locally, disabled on Vercel |
-| `DS2API_ACCOUNT_MAX_INFLIGHT` | Max in-flight requests per account | `2` |
-| `DS2API_ACCOUNT_CONCURRENCY` | Alias (legacy compat) | — |
-| `DS2API_ACCOUNT_MAX_QUEUE` | Waiting queue limit | `recommended_concurrency` |
-| `DS2API_ACCOUNT_QUEUE_SIZE` | Alias (legacy compat) | — |
-| `DS2API_GLOBAL_MAX_INFLIGHT` | Global max in-flight requests | `recommended_concurrency` |
-| `DS2API_MAX_INFLIGHT` | Alias (legacy compat) | — |
-| `DS2API_VERCEL_INTERNAL_SECRET` | Vercel hybrid streaming internal auth | Falls back to `DS2API_ADMIN_KEY` |
-| `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | Stream lease TTL seconds | `900` |
-| `DS2API_DEV_PACKET_CAPTURE` | Local dev packet capture switch (record recent request/response bodies) | Enabled by default on non-Vercel local runtime |
-| `DS2API_DEV_PACKET_CAPTURE_LIMIT` | Number of captured sessions to retain (auto-evict overflow) | `5` |
-| `DS2API_DEV_PACKET_CAPTURE_MAX_BODY_BYTES` | Max recorded bytes per captured response body | `2097152` |
-| `VERCEL_TOKEN` | Vercel sync token | — |
-| `VERCEL_PROJECT_ID` | Vercel project ID | — |
-| `VERCEL_TEAM_ID` | Vercel team ID | — |
-| `DS2API_VERCEL_PROTECTION_BYPASS` | Vercel deployment protection bypass for internal Node→Go calls | — |
+For the full environment variable list, see [docs/DEPLOY.en.md](docs/DEPLOY.en.md). For auth behavior, see [API.en.md](API.en.md#authentication).

 ## Authentication Modes

@@ -369,21 +341,22 @@ Queue limit = DS2API_ACCOUNT_MAX_QUEUE (default = recommended concurrency)
 When `tools` is present in the request, DS2API performs anti-leak handling:

 1. Toolcall feature matching is enabled only in **non-code-block context** (fenced examples are ignored)
-   - In non-code-block context, tool JSON may still be recognized even when mixed with normal prose; surrounding prose can remain as text output.
-2. `responses` streaming strictly uses official item lifecycle events (`response.output_item.*`, `response.content_part.*`, `response.function_call_arguments.*`)
-3. Tool names not declared in the `tools` schema are strictly rejected and will not be emitted as valid tool calls
+2. The parser now treats the DSML shell as the recommended executable tool-calling syntax: `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; it also accepts legacy canonical XML `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`. DSML is a shell alias and internal parsing remains XML-based; legacy `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`, `<function_call>`, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text
+3. `responses` streaming strictly uses official item lifecycle events (`response.output_item.*`, `response.content_part.*`, `response.function_call_arguments.*`)
 4. `responses` supports and enforces `tool_choice` (`auto`/`none`/`required`/forced function); `required` violations return `422` for non-stream and `response.failed` for stream
-5. Valid tool call events are only emitted after passing policy validation, preventing invalid tool names from entering the client execution chain
+5. The output protocol follows the client request (OpenAI / Claude / Gemini native shapes); model-side prompting can prefer XML, and the compatibility layer handles the protocol-specific translation
+
+> Note: the current parser still prioritizes “parse successfully whenever possible”; hard allow-list rejection for undeclared tool names is not enabled yet.

 ## Local Dev Packet Capture

-This is for debugging issues such as Responses reasoning streaming and tool-call handoff. When enabled, DS2API stores the latest N DeepSeek conversation payload pairs (request body + upstream response body), defaulting to 5 entries with auto-eviction.
+This is for debugging issues such as Responses reasoning streaming and tool-call handoff. When enabled, DS2API stores the latest N DeepSeek conversation payload pairs (request body + upstream response body), defaulting to 20 entries with auto-eviction; each response body is capped at 5 MB by default.

 Enable example:

 ```bash
 DS2API_DEV_PACKET_CAPTURE=true \
-DS2API_DEV_PACKET_CAPTURE_LIMIT=5 \
+DS2API_DEV_PACKET_CAPTURE_LIMIT=20 \
 go run ./cmd/ds2api
 ```

@@ -391,6 +364,8 @@ Inspect/clear (Admin JWT required):

 - `GET /admin/dev/captures`: list captured items (newest first)
 - `DELETE /admin/dev/captures`: clear captured items
+- `GET /admin/dev/raw-samples/query?q=keyword&limit=20`: search current in-memory captures by prompt keyword and group `completion + continue` by `chat_session_id`
+- `POST /admin/dev/raw-samples/save`: persist a selected capture chain as `tests/raw_stream_samples/<sample-id>/`

 Response fields include:

@@ -398,62 +373,10 @@ Response fields include:
 - `response_body`: concatenated raw upstream stream body text
 - `response_truncated`: whether body-size truncation happened

-## Project Structure
+The save endpoint can target a chain by `query`, `chain_key`, or `capture_id`. Example:

-```text
-ds2api/
-├── cmd/
-│   ├── ds2api/              # Local / container entrypoint
-│   └── ds2api-tests/        # End-to-end testsuite entrypoint
-├── api/
-│   ├── index.go             # Vercel Serverless Go entry
-│   ├── chat-stream.js       # Vercel Node.js stream relay
-│   └── (rewrite targets in vercel.json)
-├── internal/
-│   ├── account/             # Account pool and concurrency queue
-│   ├── adapter/
-│   │   ├── openai/          # OpenAI adapter (incl. tool call parsing, Vercel stream prepare/release)
-│   │   ├── claude/          # Claude adapter
-│   │   └── gemini/          # Gemini adapter (generateContent / streamGenerateContent)
-│   ├── admin/               # Admin API handlers (incl. Settings hot-reload)
-│   ├── auth/                # Auth and JWT
-│   ├── claudeconv/          # Claude message format conversion
-│   ├── compat/              # Compatibility helpers
-│   ├── config/              # Config loading and hot-reload
-│   ├── deepseek/            # DeepSeek API client, PoW WASM
-│   ├── js/                  # Node runtime stream/compat logic
-│   ├── devcapture/          # Dev packet capture module
-│   ├── format/              # Output formatting
-│   ├── prompt/              # Prompt construction
-│   ├── server/              # HTTP routing and middleware (chi router)
-│   ├── sse/                 # SSE parsing utilities
-│   ├── stream/              # Unified stream consumption engine
-│   ├── util/                # Common utilities
-│   └── webui/               # WebUI static file serving and auto-build
-├── webui/                   # React WebUI source (Vite + Tailwind)
-│   └── src/
-│       ├── app/             # Routing, auth, config state
-│       ├── features/        # Feature modules (account/settings/vercel/apiTester)
-│       ├── components/      # Shared UI pieces (login/landing, etc.)
-│       └── locales/         # Language packs (zh.json / en.json)
-├── scripts/
-│   └── build-webui.sh       # Manual WebUI build script
-├── tests/
-│   ├── compat/              # Compatibility fixtures and expected outputs
-│   └── scripts/             # Unified test script entrypoints (unit/e2e)
-├── docs/                    # Deployment / contributing / testing docs
-├── static/admin/            # WebUI build output (not committed to Git)
-├── .github/
-│   ├── workflows/           # GitHub Actions (quality gates + release automation)
-│   ├── ISSUE_TEMPLATE/      # Issue templates
-│   └── PULL_REQUEST_TEMPLATE.md
-├── config.example.json      # Config file template
-├── .env.example             # Environment variable template
-├── Dockerfile               # Multi-stage build (WebUI + Go)
-├── docker-compose.yml       # Production Docker Compose
-├── docker-compose.dev.yml   # Development Docker Compose
-├── vercel.json              # Vercel routing and build config
-└── go.mod / go.sum          # Go module dependencies
+```json
+{"query":"Guangzhou weather","sample_id":"gz-weather-from-memory"}
 ```

 ## Documentation Index
@@ -467,38 +390,29 @@ ds2api/

 ## Testing

-```bash
-# Unit tests (Go + Node)
-./tests/scripts/run-unit-all.sh
+For the full testing guide, see [docs/TESTING.md](docs/TESTING.md).

-# One-command live end-to-end tests (real accounts, full request/response logs)
-./tests/scripts/run-live.sh
-
-# Or with custom flags
-go run ./cmd/ds2api-tests \
-  --config config.json \
-  --admin-key admin \
-  --out artifacts/testsuite \
-  --timeout 120 \
-  --retries 2
-```
+Quick commands:

 ```bash
-# Release-blocking gates
-./tests/scripts/check-stage6-manual-smoke.sh
+# Local PR gates
+./scripts/lint.sh
 ./tests/scripts/check-refactor-line-gate.sh
 ./tests/scripts/run-unit-all.sh
-npm ci --prefix webui && npm run build --prefix webui
+npm run build --prefix webui
+
+# Live end-to-end tests (real accounts, full request/response logs)
+./tests/scripts/run-live.sh
 ```

 ## Release Artifact Automation (GitHub Actions)

 Workflow: `.github/workflows/release-artifacts.yml`

- **Trigger**: only on GitHub Release `published` (normal pushes do not trigger builds)
- **Outputs**: multi-platform archives (`linux/amd64`, `linux/arm64`, `darwin/amd64`, `darwin/arm64`, `windows/amd64`) + `sha256sums.txt`
+- **Trigger**: by default only on GitHub Release `published`; you can also run it manually via `workflow_dispatch` and pass `release_tag` to rerun / backfill
+- **Outputs**: multi-platform binary archives (`linux/amd64`, `linux/arm64`, `linux/armv7`, `darwin/amd64`, `darwin/arm64`, `windows/amd64`, `windows/arm64`), Linux Docker image export tarballs, and `sha256sums.txt`
 - **Container publishing**: GHCR only (`ghcr.io/cjackhwang/ds2api`)
- **Each archive includes**: `ds2api` executable, `static/admin`, WASM file (with embedded fallback support), config template, README, LICENSE
+- **Each binary archive includes**: the `ds2api` executable, `static/admin`, `config.example.json`, `.env.example`, `README.MD`, `README.en.md`, and `LICENSE`

 ## Disclaimer

--- a/SECURITY.md
+++ b/SECURITY.md
@@ -0,0 +1,65 @@
+# Security Policy
+
+## Supported Versions
+
+**Only the latest version** receives security updates.  
+If you are using an older version, please upgrade to the latest release.
+
+| Version        | Supported          |
+| -------------- | ------------------ |
+| latest         | :white_check_mark: |
+| < latest       | :x:                |
+
+> **Why?** This project is maintained by a single developer. Keeping only one active version ensures fast response times and avoids legacy maintenance overhead.
+
+## What is a Security Vulnerability?
+
+A **security vulnerability** is a bug that can be exploited to compromise:
+- Data confidentiality (e.g., leaking secrets, user data)
+- Data integrity (e.g., unauthorized modification)
+- System availability (e.g., remote crash, denial of service)
+- Privilege escalation (e.g., normal user gains admin rights)
+
+**Examples**: SQL injection, command injection, path traversal, authentication bypass, insecure deserialization, sensitive data exposure.
+
+**What is NOT a security vulnerability?**  
+Regular bugs like crashes (without exploit potential), incorrect return values, performance issues, missing features, or documentation typos. Please report those via **GitHub Issues** publicly.
+
+## Reporting a Vulnerability
+
+If you believe you have found a security vulnerability, **please do NOT open a public issue**.
+
+Instead, send an email to: **cjackhwang@qq.com**
+
+Please include as much as possible:
+- A clear description of the issue
+- Steps to reproduce (code / input / environment)
+- Potential impact (what could an attacker do?)
+- Suggested fix (if any)
+
+You can expect:
+- **Initial response** within 3 business days (acknowledgment)
+- **Confirmation or clarification** within 7 days
+- **Fix or decision** within 14 days (depending on complexity)
+
+## What to Expect After Reporting
+
+| Outcome            | What happens |
+| ------------------ | ------------- |
+| **Accepted**       | I will develop a fix, release a patch version, and may credit you in the release notes (unless you prefer anonymity). |
+| **Declined**       | I will explain why (e.g., not a security issue, already fixed, out of scope, or requires a larger redesign). |
+| **Need more info** | I will ask follow-up questions. If no response within 14 days, the report may be considered stale. |
+
+## Disclosure Policy
+
+- Vulnerabilities will be **fixed privately** and then released as a new version.
+- After the fix is released, I will typically publish a short security advisory (via GitHub Security Advisories) without revealing exploit details.
+- Public disclosure can be coordinated if you request it.
+
+## Recognition
+
+I appreciate security researchers who follow responsible disclosure. Contributors who report valid, previously unknown vulnerabilities may be acknowledged in the project's README or release notes (unless they prefer to stay anonymous).
+
+---
+
+*Thank you for helping keep this project safe!*
--- a/2
+++ b/2
@@ -1 +1 @@
-2.5.1
+4.3.0
--- a/app/handler.go
+++ b/app/handler.go
@@ -3,9 +3,17 @@ package app
 import (
 	"net/http"

+	"ds2api/internal/config"
 	"ds2api/internal/server"
 )

 func NewHandler() http.Handler {
-	return server.NewApp().Router
+	app, err := server.NewApp()
+	if err != nil {
+		config.Logger.Error("[app] init failed", "error", err)
+		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			server.WriteUnhandledError(w, err)
+		})
+	}
+	return app.Router
 }
--- a/cmd/ds2api-tests/main.go
+++ b/cmd/ds2api-tests/main.go
@@ -30,8 +30,8 @@ func main() {
 	opts.Timeout = time.Duration(timeoutSeconds) * time.Second

 	if err := testsuite.Run(context.Background(), opts); err != nil {
-		fmt.Fprintln(os.Stderr, err.Error())
+		_, _ = fmt.Fprintln(os.Stderr, err.Error())
 		os.Exit(1)
 	}
-	fmt.Fprintln(os.Stdout, "testsuite completed successfully")
+	_, _ = fmt.Fprintln(os.Stdout, "testsuite completed successfully")
 }
--- a/cmd/ds2api/main.go
+++ b/cmd/ds2api/main.go
@@ -18,17 +18,26 @@ import (
 )

 func main() {
+	if err := config.LoadDotEnv(); err != nil {
+		config.Logger.Warn("[dotenv] load failed", "error", err)
+	}
+	config.RefreshLogger()
 	webui.EnsureBuiltOnStartup()
 	_ = auth.AdminKey()
-	app := server.NewApp()
+	app, err := server.NewApp()
+	if err != nil {
+		config.Logger.Error("server initialization failed", "error", err)
+		os.Exit(1)
+	}
 	port := strings.TrimSpace(os.Getenv("PORT"))
 	if port == "" {
 		port = "5001"
 	}

 	srv := &http.Server{
-		Addr:    "0.0.0.0:" + port,
-		Handler: app.Router,
+		Addr:              "0.0.0.0:" + port,
+		Handler:           app.Router,
+		ReadHeaderTimeout: 5 * time.Second,
 	}
 	localURL := fmt.Sprintf("http://127.0.0.1:%s", port)
 	lanIP := detectLANIPv4()
--- a/config.example.json
+++ b/config.example.json
@@ -5,14 +5,29 @@
    "your-api-key-1",
    "your-api-key-2"
  ],
+  "api_keys": [
+    {
+      "key": "your-api-key-1",
+      "name": "主 API Key",
+      "remark": "给 OpenAI 客户端使用"
+    },
+    {
+      "key": "your-api-key-2",
+      "name": "备用 API Key",
+      "remark": "压测或临时调试"
+    }
+  ],
  "accounts": [
    {
      "_comment": "邮箱登录方式",
+      "name": "主账号",
+      "remark": "优先用于生产流量",
      "email": "example1@example.com",
      "password": "your-password-1"
    },
    {
      "_comment": "邮箱登录方式 - 账号2",
+      "name": "备用账号",
      "email": "example2@example.com",
      "password": "your-password-2"
    },
@@ -23,36 +38,39 @@
    }
  ],
  "model_aliases": {
-    "gpt-4o": "deepseek-chat",
-    "gpt-5-codex": "deepseek-reasoner",
-    "o3": "deepseek-reasoner"
+    "gpt-4o": "deepseek-v4-flash",
+    "gpt-5.5": "deepseek-v4-flash",
+    "gpt-5.3-codex": "deepseek-v4-pro",
+    "o3": "deepseek-v4-pro"
  },
  "compat": {
-    "wide_input_strict_output": true
-  },
-  "toolcall": {
-    "mode": "feature_match",
-    "early_emit_confidence": "high"
+    "wide_input_strict_output": true,
+    "strip_reference_markers": true
  },
  "responses": {
    "store_ttl_seconds": 900
  },
+  "current_input_file": {
+    "enabled": true,
+    "min_chars": 0
+  },
+  "thinking_injection": {
+    "enabled": true,
+    "prompt": ""
+  },
  "embeddings": {
    "provider": "deterministic"
  },
-  "claude_mapping": {
-    "fast": "deepseek-chat",
-    "slow": "deepseek-reasoner"
-  },
  "admin": {
    "jwt_expire_hours": 24
  },
  "runtime": {
    "account_max_inflight": 2,
    "account_max_queue": 0,
-    "global_max_inflight": 0
+    "global_max_inflight": 0,
+    "token_refresh_interval_hours": 6
  },
  "auto_delete": {
-    "sessions": false
+    "mode": "none"
  }
 }
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@@ -16,7 +16,8 @@ services:
    container_name: ds2api-dev
    command: ["go", "run", "./cmd/ds2api"]
    ports:
-      - "${PORT:-5001}:${PORT:-5001}"
+      # Host port is configurable via DS2API_HOST_PORT; container port stays fixed at 5001.
+      - "${DS2API_HOST_PORT:-6011}:5001"
    env_file:
      - .env
    environment:
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,14 +1,17 @@
 services:
  ds2api:
    image: ghcr.io/cjackhwang/ds2api:latest
-    container_name: ds2api
-    restart: always
-    ports:
-      - "6011:5001"
-    volumes:
-      - ./config.json:/app/config.json    # 配置文件
-      - ./.env:/app/.env                  # 环境变量
-    environment:
-      - TZ=Asia/Shanghai
-      - LOG_LEVEL=INFO
-      - DS2API_ADMIN_KEY=${DS2API_ADMIN_KEY:-ds2api}
+    container_name: ds2api
+    restart: always
+    env_file:
+      - .env
+    ports:
+      # Host port is configurable via DS2API_HOST_PORT; container port stays fixed at 5001.
+      - "${DS2API_HOST_PORT:-6011}:5001"
+    volumes:
+      - ./config.json:/data/config.json   # 配置文件（持久化推荐路径）
+    environment:
+      - TZ=Asia/Shanghai
+      - LOG_LEVEL=INFO
+      - DS2API_ADMIN_KEY=${DS2API_ADMIN_KEY:-ds2api}
+      - DS2API_CONFIG_PATH=/data/config.json
--- a/docs/ARCHITECTURE.en.md
+++ b/docs/ARCHITECTURE.en.md
@@ -0,0 +1,196 @@
+# DS2API Architecture & Project Layout
+
+Language: [中文](ARCHITECTURE.md) | [English](ARCHITECTURE.en.md)
+
+> This file is the single architecture source for directory layout, module boundaries, and execution flow.
+
+## 1. Top-level Layout (core directories)
+
+> Notes: this lists the main business directories (excluding metadata/dependency dirs such as `.git/` and `webui/node_modules/`), with each folder annotated by purpose. Newly added directories should be verified from the code tree rather than treated as a per-file inventory here.
+
+```text
+ds2api/
+├── .github/                              # GitHub collaboration and CI config
+│   ├── ISSUE_TEMPLATE/                   # Issue templates
+│   └── workflows/                        # GitHub Actions workflows
+├── api/                                  # Serverless entrypoints (Vercel Go/Node)
+├── app/                                  # Application-level handler assembly
+├── cmd/                                  # Executable entrypoints
+│   ├── ds2api/                           # Main service bootstrap
+│   └── ds2api-tests/                     # E2E testsuite CLI bootstrap
+├── docs/                                 # Project documentation
+├── internal/                             # Core implementation (non-public packages)
+│   ├── account/                          # Account pool, inflight slots, waiting queue
+│   ├── auth/                             # Auth/JWT/credential resolution
+│   ├── chathistory/                      # Server-side conversation history storage/query
+│   ├── claudeconv/                       # Claude message conversion helpers
+│   ├── compat/                           # Compatibility and regression helpers
+│   ├── config/                           # Config loading/validation/hot reload
+│   ├── deepseek/                         # DeepSeek upstream client/protocol/transport
+│   │   ├── client/                       # Login/session/completion/upload/delete calls
+│   │   ├── protocol/                     # DeepSeek URLs, constants, skip path/pattern
+│   │   └── transport/                    # DeepSeek transport details
+│   ├── devcapture/                       # Dev capture and troubleshooting
+│   ├── format/                           # Response formatting layer
+│   │   ├── claude/                       # Claude output formatting
+│   │   └── openai/                       # OpenAI output formatting
+│   ├── httpapi/                          # HTTP surfaces: OpenAI/Claude/Gemini/Admin
+│   │   ├── admin/                        # Admin API root assembly and resource packages
+│   │   ├── claude/                       # Claude HTTP protocol adapter
+│   │   ├── gemini/                       # Gemini HTTP protocol adapter
+│   │   └── openai/                       # OpenAI HTTP surface
+│   │       ├── chat/                     # Chat Completions execution entrypoint
+│   │       ├── responses/                # Responses API and response store
+│   │       ├── files/                    # Files API and inline-file preprocessing
+│   │       ├── embeddings/               # Embeddings API
+│   │       ├── history/                  # OpenAI context file handling
+│   │       └── shared/                   # OpenAI HTTP errors/models/tool formatting
+│   ├── js/                               # Node runtime related logic
+│   │   ├── chat-stream/                  # Node streaming bridge
+│   │   ├── helpers/                      # JS helper modules
+│   │   │   └── stream-tool-sieve/        # JS implementation of tool sieve
+│   │   └── shared/                       # Shared semantics between Go/Node
+│   ├── prompt/                           # Prompt composition
+│   ├── promptcompat/                     # API request -> DeepSeek web-chat plain-text compatibility
+│   ├── rawsample/                        # Raw sample read/write and management
+│   ├── server/                           # Router and middleware assembly
+│   │   └── data/                         # Router/runtime helper data
+│   ├── sse/                              # SSE parsing utilities
+│   ├── stream/                           # Unified stream consumption engine
+│   ├── testsuite/                        # Testsuite execution framework
+│   ├── textclean/                        # Text cleanup
+│   ├── toolcall/                         # Tool-call parsing and repair
+│   ├── toolstream/                       # Go streaming tool-call anti-leak and delta detection
+│   ├── translatorcliproxy/               # Cross-protocol translation bridge
+│   ├── util/                             # Shared utility helpers
+│   ├── version/                          # Version query/compare
+│   └── webui/                            # WebUI static hosting logic
+├── plans/                                # Stage plans and manual QA records
+├── pow/                                  # PoW standalone implementation + benchmarks
+├── scripts/                              # Build/release helper scripts
+├── tests/                                # Test assets and scripts
+│   ├── compat/                           # Compatibility fixtures + expected outputs
+│   │   ├── expected/                     # Expected output samples
+│   │   └── fixtures/                     # Fixture inputs
+│   │       ├── sse_chunks/               # SSE chunk fixtures
+│   │       └── toolcalls/                # Tool-call fixtures
+│   ├── node/                             # Node unit tests
+│   ├── raw_stream_samples/               # Upstream raw SSE samples
+│   │   ├── content-filter-trigger-20260405-jwt3/          # Content-filter terminal sample
+│   │   ├── continue-thinking-snapshot-replay-20260405/    # Continue-thinking sample
+│   │   ├── guangzhou-weather-reasoner-search-20260404/    # Search/reference sample
+│   │   ├── markdown-format-example-20260405/              # Markdown sample
+│   │   └── markdown-format-example-20260405-spacefix/     # Space-fix sample
+│   ├── scripts/                          # Test entry scripts
+│   └── tools/                            # Testing helper tools
+└── webui/                                # React admin console source
+    ├── public/                           # Static assets
+    └── src/                              # Frontend source code
+        ├── app/                          # Routing/state scaffolding
+        ├── components/                   # Shared UI components
+        ├── features/                     # Feature modules
+        │   ├── account/                  # Account management page
+        │   ├── apiTester/                # API tester page
+        │   ├── settings/                 # Settings page
+        │   └── vercel/                   # Vercel sync page
+        ├── layout/                       # Layout components
+        ├── locales/                      # i18n strings
+        └── utils/                        # Frontend utilities
+```
+
+## 2. Primary Request Flow
+
+```mermaid
+flowchart LR
+    C[Client / SDK] --> R[internal/server/router.go]
+
+    subgraph HTTP[HTTP API surface]
+        OA[internal/httpapi/openai]
+        CHAT[openai/chat]
+        RESP[openai/responses]
+        FILES[openai/files + embeddings]
+        CA[internal/httpapi/claude]
+        GA[internal/httpapi/gemini]
+        AD[internal/httpapi/admin/*]
+        WEB[internal/webui static admin]
+    end
+
+    subgraph COMPAT[Prompt compatibility core]
+        PC[internal/promptcompat]
+        PROMPT[internal/prompt]
+        HIST[internal/httpapi/openai/history]
+    end
+
+    subgraph RUNTIME[Shared runtime]
+        AUTH[internal/auth]
+        POOL[internal/account queue + concurrency]
+        STREAM[internal/stream + internal/sse]
+        TOOL[internal/toolcall + internal/toolstream]
+        DS[internal/deepseek/client]
+        POW[pow + internal/deepseek/protocol]
+    end
+
+    subgraph NODE[Vercel Node Runtime]
+        NCS[api/chat-stream.js]
+        JS[internal/js/chat-stream + stream-tool-sieve]
+    end
+
+    R --> OA --> CHAT
+    OA --> RESP
+    OA --> FILES
+    R --> CA
+    R --> GA
+    R --> AD
+    R --> WEB
+    R -.Vercel stream.-> NCS
+
+    CA --> PC
+    GA --> PC
+    CHAT --> PC
+    RESP --> PC
+    PC --> PROMPT
+    PC -.long history.-> HIST
+    PC --> AUTH
+
+    NCS -.Go prepare/release.-> CHAT
+    NCS --> JS
+    JS --> TOOL
+
+    AUTH --> POOL
+    CHAT --> STREAM
+    RESP --> STREAM
+    STREAM --> TOOL
+    POOL --> DS
+    DS --> POW
+    DS --> U[DeepSeek upstream]
+```
+
+## 3. Responsibilities in `internal/`
+
+- `internal/server`: router tree + middlewares (health, protocol routes, Admin/WebUI).
+- `internal/httpapi/openai/*`: OpenAI HTTP surface split into chat, responses, files, embeddings, history, and shared packages; chat/responses share the promptcompat, stream, and toolcall semantics.
+- `internal/httpapi/{claude,gemini}`: protocol wrappers that normalize into the same prompt compatibility semantics without duplicating upstream execution.
+- `internal/promptcompat`: compatibility core for turning OpenAI/Claude/Gemini requests into DeepSeek web-chat plain-text context.
+- `internal/translatorcliproxy`: structure translation between Claude/Gemini and OpenAI.
+- `internal/deepseek/{client,protocol,transport}`: upstream requests, sessions, PoW adaptation, protocol constants, and transport details.
+- `internal/js/chat-stream` + `api/chat-stream.js`: Vercel Node streaming bridge; Go prepare/release owns auth, account lease, and completion payload assembly, while Node relays real-time SSE with Go-aligned finalization and tool sieve semantics.
+- `internal/stream` + `internal/sse`: Go stream parsing and incremental assembly.
+- `internal/toolcall` + `internal/toolstream`: DSML shell compatibility plus canonical XML tool-call parsing and anti-leak sieve; DSML is normalized back to XML at the entrypoint, and internal parsing remains XML-based.
+- `internal/httpapi/admin/*`: Admin API root assembly plus auth/accounts/config/settings/proxies/rawsamples/vercel/history/devcapture/version resource packages.
+- `internal/chathistory`: server-side conversation history persistence, pagination, detail lookup, and retention policy.
+- `internal/config`: config loading/validation + runtime settings hot-reload.
+- `internal/account`: managed account pool, inflight slots, waiting queue.
+
+## 4. WebUI Runtime Relation
+
+- `webui/` stores frontend source (Vite + React).
+- Runtime serves static output from `static/admin`.
+- On first local startup, if `static/admin` is missing, DS2API may auto-build it (Node.js required).
+
+## 5. Documentation Split Strategy
+
+- Onboarding & quick start: `README.MD` / `README.en.md`
+- Architecture & layout: `docs/ARCHITECTURE*.md` (this file)
+- API contracts: `API.md` / `API.en.md`
+- Deployment/testing/contributing: `docs/DEPLOY*`, `docs/TESTING.md`, `docs/CONTRIBUTING*`
+- Deep topics: `docs/toolcall-semantics.md`, `docs/DeepSeekSSE行为结构说明-2026-04-05.md`
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -0,0 +1,196 @@
+# DS2API 架构与项目结构说明
+
+语言 / Language: [中文](ARCHITECTURE.md) | [English](ARCHITECTURE.en.md)
+
+> 本文档用于集中维护“代码目录结构 + 模块边界 + 主链路调用关系”。
+
+## 1. 顶层目录结构（核心目录）
+
+> 说明：以下为仓库内主要业务目录（排除 `.git/` 与 `webui/node_modules/` 这类依赖/元数据目录），并标注每个文件夹作用。新增目录以代码为准，不要求在本文做逐文件展开。
+
+```text
+ds2api/
+├── .github/                              # GitHub 协作与 CI 配置
+│   ├── ISSUE_TEMPLATE/                   # Issue 模板
+│   └── workflows/                        # GitHub Actions 工作流
+├── api/                                  # Serverless 入口（Vercel Go/Node）
+├── app/                                  # 应用级 handler 装配层
+├── cmd/                                  # 可执行程序入口
+│   ├── ds2api/                           # 主服务启动入口
+│   └── ds2api-tests/                     # E2E 测试集 CLI 入口
+├── docs/                                 # 项目文档目录
+├── internal/                             # 核心业务实现（不对外暴露）
+│   ├── account/                          # 账号池、并发槽位、等待队列
+│   ├── auth/                             # 鉴权/JWT/凭证解析
+│   ├── chathistory/                      # 服务器端对话记录存储与查询
+│   ├── claudeconv/                       # Claude 消息格式转换工具
+│   ├── compat/                           # 兼容性辅助与回归支持
+│   ├── config/                           # 配置加载、校验、热更新
+│   ├── deepseek/                         # DeepSeek 上游 client/protocol/transport
+│   │   ├── client/                       # 登录、会话、completion、上传/删除等上游调用
+│   │   ├── protocol/                     # DeepSeek URL、常量、skip path/pattern
+│   │   └── transport/                    # DeepSeek 传输层细节
+│   ├── devcapture/                       # 开发抓包与调试采集
+│   ├── format/                           # 响应格式化层
+│   │   ├── claude/                       # Claude 输出格式化
+│   │   └── openai/                       # OpenAI 输出格式化
+│   ├── httpapi/                          # HTTP surface：OpenAI/Claude/Gemini/Admin
+│   │   ├── admin/                        # Admin API 根装配与资源子包
+│   │   ├── claude/                       # Claude HTTP 协议适配
+│   │   ├── gemini/                       # Gemini HTTP 协议适配
+│   │   └── openai/                       # OpenAI HTTP surface
+│   │       ├── chat/                     # Chat Completions 执行入口
+│   │       ├── responses/                # Responses API 与 response store
+│   │       ├── files/                    # Files API 与 inline file 预处理
+│   │       ├── embeddings/               # Embeddings API
+│   │       ├── history/                  # OpenAI context file handling
+│   │       └── shared/                   # OpenAI HTTP 公共错误/模型/工具格式
+│   ├── js/                               # Node Runtime 相关逻辑
+│   │   ├── chat-stream/                  # Node 流式输出桥接
+│   │   ├── helpers/                      # JS 辅助函数
+│   │   │   └── stream-tool-sieve/        # Tool sieve JS 实现
+│   │   └── shared/                       # Go/Node 共用语义片段
+│   ├── prompt/                           # Prompt 组装
+│   ├── promptcompat/                     # API 请求到 DeepSeek 网页纯文本上下文兼容层
+│   ├── rawsample/                        # raw sample 读写与管理
+│   ├── server/                           # 路由与中间件装配
+│   │   └── data/                         # 路由/运行时辅助数据
+│   ├── sse/                              # SSE 解析工具
+│   ├── stream/                           # 统一流式消费引擎
+│   ├── testsuite/                        # 测试集执行框架
+│   ├── textclean/                        # 文本清洗
+│   ├── toolcall/                         # 工具调用解析与修复
+│   ├── toolstream/                       # Go 流式 tool call 防泄漏与增量检测
+│   ├── translatorcliproxy/               # 多协议互转桥
+│   ├── util/                             # 通用工具函数
+│   ├── version/                          # 版本查询/比较
+│   └── webui/                            # WebUI 静态托管相关逻辑
+├── plans/                                # 阶段计划与人工验收记录
+├── pow/                                  # PoW 独立实现与基准
+├── scripts/                              # 构建/发布/辅助脚本
+├── tests/                                # 测试资源与脚本
+│   ├── compat/                           # 兼容性夹具与期望输出
+│   │   ├── expected/                     # 预期结果样本
+│   │   └── fixtures/                     # 测试输入夹具
+│   │       ├── sse_chunks/               # SSE chunk 夹具
+│   │       └── toolcalls/                # toolcall 夹具
+│   ├── node/                             # Node 单元测试
+│   ├── raw_stream_samples/               # 上游原始 SSE 样本
+│   │   ├── content-filter-trigger-20260405-jwt3/          # 风控终态样本
+│   │   ├── continue-thinking-snapshot-replay-20260405/    # continue 样本
+│   │   ├── guangzhou-weather-reasoner-search-20260404/    # 搜索+引用样本
+│   │   ├── markdown-format-example-20260405/              # Markdown 样本
+│   │   └── markdown-format-example-20260405-spacefix/     # 空格修复样本
+│   ├── scripts/                          # 测试脚本入口
+│   └── tools/                            # 测试辅助工具
+└── webui/                                # React 管理台源码
+    ├── public/                           # 静态资源
+    └── src/                              # 前端源码
+        ├── app/                          # 路由/状态框架
+        ├── components/                   # 共享组件
+        ├── features/                     # 功能模块
+        │   ├── account/                  # 账号管理页面
+        │   ├── apiTester/                # API 测试页面
+        │   ├── settings/                 # 设置页面
+        │   └── vercel/                   # Vercel 同步页面
+        ├── layout/                       # 布局组件
+        ├── locales/                      # 国际化文案
+        └── utils/                        # 前端工具函数
+```
+
+## 2. 请求主链路
+
+```mermaid
+flowchart LR
+    C[Client / SDK] --> R[internal/server/router.go]
+
+    subgraph HTTP[HTTP API surface]
+        OA[internal/httpapi/openai]
+        CHAT[openai/chat]
+        RESP[openai/responses]
+        FILES[openai/files + embeddings]
+        CA[internal/httpapi/claude]
+        GA[internal/httpapi/gemini]
+        AD[internal/httpapi/admin/*]
+        WEB[internal/webui static admin]
+    end
+
+    subgraph COMPAT[Prompt compatibility core]
+        PC[internal/promptcompat]
+        PROMPT[internal/prompt]
+        HIST[internal/httpapi/openai/history]
+    end
+
+    subgraph RUNTIME[Shared runtime]
+        AUTH[internal/auth]
+        POOL[internal/account queue + concurrency]
+        STREAM[internal/stream + internal/sse]
+        TOOL[internal/toolcall + internal/toolstream]
+        DS[internal/deepseek/client]
+        POW[pow + internal/deepseek/protocol]
+    end
+
+    subgraph NODE[Vercel Node Runtime]
+        NCS[api/chat-stream.js]
+        JS[internal/js/chat-stream + stream-tool-sieve]
+    end
+
+    R --> OA --> CHAT
+    OA --> RESP
+    OA --> FILES
+    R --> CA
+    R --> GA
+    R --> AD
+    R --> WEB
+    R -.Vercel stream.-> NCS
+
+    CA --> PC
+    GA --> PC
+    CHAT --> PC
+    RESP --> PC
+    PC --> PROMPT
+    PC -.长历史.-> HIST
+    PC --> AUTH
+
+    NCS -.Go prepare/release.-> CHAT
+    NCS --> JS
+    JS --> TOOL
+
+    AUTH --> POOL
+    CHAT --> STREAM
+    RESP --> STREAM
+    STREAM --> TOOL
+    POOL --> DS
+    DS --> POW
+    DS --> U[DeepSeek upstream]
+```
+
+## 3. internal/ 子模块职责
+
+- `internal/server`：路由树和中间件挂载（健康检查、协议入口、Admin/WebUI）。
+- `internal/httpapi/openai/*`：OpenAI HTTP surface，按 chat、responses、files、embeddings、history、shared 拆分；chat/responses 共享 promptcompat、stream、toolcall 等核心语义。
+- `internal/httpapi/{claude,gemini}`：协议输入输出适配，归一到同一套 prompt compatibility 语义，不重复实现上游调用逻辑。
+- `internal/promptcompat`：OpenAI/Claude/Gemini 请求到 DeepSeek 网页纯文本上下文的兼容内核。
+- `internal/translatorcliproxy`：Claude/Gemini 与 OpenAI 结构互转。
+- `internal/deepseek/{client,protocol,transport}`：上游请求、会话、PoW 适配、协议常量与传输层。
+- `internal/js/chat-stream` + `api/chat-stream.js`：Vercel Node 流式桥；Go prepare/release 管理鉴权、账号租约和 completion payload，Node 侧负责实时 SSE 转发并保持 Go 对齐的终结态和 tool sieve 语义。
+- `internal/stream` + `internal/sse`：Go 流式解析与增量处理。
+- `internal/toolcall` + `internal/toolstream`：DSML 外壳兼容与 canonical XML 工具调用解析、防泄漏筛分；DSML 会在入口归一化回 XML，内部仍按 XML 语义解析。
+- `internal/httpapi/admin/*`：Admin API 根装配与 auth/accounts/config/settings/proxies/rawsamples/vercel/history/devcapture/version 等资源子包。
+- `internal/chathistory`：服务器端对话记录持久化、分页、单条详情和保留策略。
+- `internal/config`：配置加载、校验、运行时 settings 热更新。
+- `internal/account`：托管账号池、并发槽位、等待队列。
+
+## 4. WebUI 与运行时关系
+
+- `webui/` 是前端源码（Vite + React）。
+- 运行时托管目录是 `static/admin`（构建产物）。
+- 本地首次启动若 `static/admin` 缺失，会尝试自动构建（依赖 Node.js）。
+
+## 5. 文档拆分策略
+
+- 总览与快速开始：`README.MD` / `README.en.md`
+- 架构与目录：`docs/ARCHITECTURE*.md`（本文件）
+- 接口协议：`API.md` / `API.en.md`
+- 部署、测试、贡献：`docs/DEPLOY*`、`docs/TESTING.md`、`docs/CONTRIBUTING*`
+- 专题：`docs/toolcall-semantics.md`、`docs/DeepSeekSSE行为结构说明-2026-04-05.md`
--- a/docs/CONTRIBUTING.en.md
+++ b/docs/CONTRIBUTING.en.md
@@ -8,8 +8,8 @@ Thanks for your interest in contributing to DS2API!

 ### Prerequisites

- Go 1.24+
- Node.js 20+ (for WebUI development)
+- Go 1.26+
+- Node.js `20.19+` or `22.12+` (for WebUI development)
 - npm (bundled with Node.js)

 ### Backend Development
@@ -25,7 +25,8 @@ cp config.example.json config.json

 # 3. Run backend
 go run ./cmd/ds2api
-# Default: http://localhost:5001
+# Local access: http://127.0.0.1:5001
+# Actual bind: 0.0.0.0:5001, so LAN access is available via your private IP
 ```

 ### Frontend Development (WebUI)
@@ -35,11 +36,12 @@ go run ./cmd/ds2api
 cd webui

 # 2. Install dependencies
-npm install
+npm ci

 # 3. Start dev server (hot reload)
 npm run dev
 # Default: http://localhost:5173, auto-proxies API to backend
+# host: 0.0.0.0 is not configured, so LAN access is not enabled by default
 ```

 WebUI tech stack:
@@ -57,10 +59,12 @@ docker-compose -f docker-compose.dev.yml up

 | Language | Standards |
 | --- | --- |
-| **Go** | Run `gofmt` and ensure `go test ./...` passes before committing |
+| **Go** | Run `gofmt -w` after editing Go files; before committing, run `./scripts/lint.sh` (format check + golangci-lint) |
 | **JavaScript/React** | Follow existing project style (functional components) |
 | **Commit messages** | Use semantic prefixes: `feat:`, `fix:`, `docs:`, `refactor:`, `style:`, `perf:`, `chore:` |

+Do not silently ignore cleanup errors from I/O-style calls such as `Close`, `Flush`, or `Sync`; return them when possible, otherwise log them explicitly.
+
 ## Submitting a PR

 1. Fork the repo
@@ -83,61 +87,24 @@ Manually build WebUI to `static/admin/`:
 ## Running Tests

 ```bash
-# Go + Node unit tests (recommended)
+# Local PR gates (kept aligned with the quality-gates workflow)
+./scripts/lint.sh
+./tests/scripts/check-refactor-line-gate.sh
 ./tests/scripts/run-unit-all.sh
+npm run build --prefix webui

-# End-to-end live tests (real accounts)
+# End-to-end live tests (real accounts; recommended for releases or high-risk changes)
 ./tests/scripts/run-live.sh
 ```

 ## Project Structure

-```text
-ds2api/
-├── cmd/
-│   ├── ds2api/              # Local/container entrypoint
-│   └── ds2api-tests/        # End-to-end testsuite entrypoint
-├── api/
-│   ├── index.go             # Vercel Serverless Go entry
-│   ├── chat-stream.js       # Vercel Node.js stream relay
-│   └── (rewrite targets in vercel.json)
-├── internal/
-│   ├── account/             # Account pool and concurrency queue
-│   ├── adapter/
-│   │   ├── openai/          # OpenAI adapter
-│   │   ├── claude/          # Claude adapter
-│   │   └── gemini/          # Gemini adapter
-│   ├── admin/               # Admin API handlers
-│   ├── auth/                # Auth and JWT
-│   ├── claudeconv/          # Claude message conversion
-│   ├── compat/              # Compatibility helpers
-│   ├── config/              # Config loading and hot-reload
-│   ├── deepseek/            # DeepSeek client, PoW WASM
-│   ├── js/                  # Node runtime stream/compat logic
-│   ├── devcapture/          # Dev packet capture
-│   ├── format/              # Output formatting
-│   ├── prompt/              # Prompt building
-│   ├── server/              # HTTP routing (chi router)
-│   ├── sse/                 # SSE parsing utilities
-│   ├── stream/              # Unified stream consumption engine
-│   ├── testsuite/           # Testsuite core logic
-│   ├── util/                # Common utilities
-│   └── webui/               # WebUI static hosting
-├── webui/                   # React WebUI source
-│   └── src/
-│       ├── app/             # Routing, auth, config state
-│       ├── features/        # Feature modules
-│       ├── components/      # Shared components
-│       └── locales/         # Language packs
-├── scripts/                 # Build and test scripts
-├── tests/                   # Unit tests, Node tests, and end-to-end tests
-├── plans/                   # Plans, gates, and manual smoke-test records
-├── static/admin/            # WebUI build output (not committed)
-├── Dockerfile               # Multi-stage build
-├── docker-compose.yml       # Production
-├── docker-compose.dev.yml   # Development
-└── vercel.json              # Vercel config
-```
+To avoid documentation drift, directory layout and module responsibilities were moved to:
+
+- [docs/ARCHITECTURE.en.md](./ARCHITECTURE.en.md)
+- [docs/README.md](./README.md)
+
+Before contributing, review the architecture doc sections for request flow and `internal/` module boundaries.

 ## Reporting Issues

--- a/docs/CONTRIBUTING.md
+++ b/docs/CONTRIBUTING.md
@@ -8,8 +8,8 @@

 ### 前置要求

- Go 1.24+
- Node.js 20+（WebUI 开发时）
+- Go 1.26+
+- Node.js `20.19+` 或 `22.12+`（WebUI 开发时）
 - npm（随 Node.js 提供）

 ### 后端开发
@@ -25,7 +25,8 @@ cp config.example.json config.json

 # 3. 启动后端
 go run ./cmd/ds2api
-# 默认监听 http://localhost:5001
+# 本地访问 http://127.0.0.1:5001
+# 实际绑定 0.0.0.0:5001，可通过局域网 IP 访问
 ```

 ### 前端开发（WebUI）
@@ -35,11 +36,12 @@ go run ./cmd/ds2api
 cd webui

 # 2. 安装依赖
-npm install
+npm ci

 # 3. 启动开发服务器（热更新）
 npm run dev
 # 默认监听 http://localhost:5173，自动代理 API 到后端
+# 当前未配置 host: 0.0.0.0，因此默认不对局域网开放
 ```

 WebUI 技术栈：
@@ -57,10 +59,12 @@ docker-compose -f docker-compose.dev.yml up

 | 语言 | 规范 |
 | --- | --- |
-| **Go** | 提交前运行 `gofmt`，确保 `go test ./...` 通过 |
+| **Go** | 修改 Go 文件后运行 `gofmt -w`；提交前运行 `./scripts/lint.sh`（包含格式化检查和 golangci-lint） |
 | **JavaScript/React** | 保持现有代码风格（函数组件） |
 | **提交信息** | 使用语义化前缀：`feat:`、`fix:`、`docs:`、`refactor:`、`style:`、`perf:`、`chore:` |

+I/O 类清理调用（如 `Close`、`Flush`、`Sync`）的错误不要直接忽略；无法向上返回时请显式记录日志。
+
 ## 提交 PR

 1. Fork 仓库
@@ -83,61 +87,24 @@ docker-compose -f docker-compose.dev.yml up
 ## 运行测试

 ```bash
-# Go + Node 单元测试（推荐）
+# PR 本地门禁（与 quality-gates 工作流保持一致）
+./scripts/lint.sh
+./tests/scripts/check-refactor-line-gate.sh
 ./tests/scripts/run-unit-all.sh
+npm run build --prefix webui

-# 端到端全链路测试（真实账号）
+# 端到端全链路测试（真实账号，发布或高风险改动时建议执行）
 ./tests/scripts/run-live.sh
 ```

 ## 项目结构

-```text
-ds2api/
-├── cmd/
-│   ├── ds2api/              # 本地/容器启动入口
-│   └── ds2api-tests/        # 端到端测试集入口
-├── api/
-│   ├── index.go             # Vercel Serverless Go 入口
-│   ├── chat-stream.js       # Vercel Node.js 流式转发
-│   └── (rewrite targets in vercel.json)
-├── internal/
-│   ├── account/             # 账号池与并发队列
-│   ├── adapter/
-│   │   ├── openai/          # OpenAI 兼容适配器
-│   │   ├── claude/          # Claude 兼容适配器
-│   │   └── gemini/          # Gemini 兼容适配器
-│   ├── admin/               # Admin API handlers
-│   ├── auth/                # 鉴权与 JWT
-│   ├── claudeconv/          # Claude 消息格式转换
-│   ├── compat/              # 兼容性辅助
-│   ├── config/              # 配置加载与热更新
-│   ├── deepseek/            # DeepSeek 客户端、PoW WASM
-│   ├── js/                  # Node 运行时流式/兼容逻辑
-│   ├── devcapture/          # 开发抓包
-│   ├── format/              # 输出格式化
-│   ├── prompt/              # Prompt 构建
-│   ├── server/              # HTTP 路由（chi router）
-│   ├── sse/                 # SSE 解析工具
-│   ├── stream/              # 统一流式消费引擎
-│   ├── testsuite/           # 测试集核心逻辑
-│   ├── util/                # 通用工具
-│   └── webui/               # WebUI 静态托管
-├── webui/                   # React WebUI 源码
-│   └── src/
-│       ├── app/             # 路由、鉴权、配置状态
-│       ├── features/        # 业务功能模块
-│       ├── components/      # 通用组件
-│       └── locales/         # 语言包
-├── scripts/                 # 构建与测试脚本
-├── tests/                   # 单元测试、Node 测试与端到端测试
-├── plans/                   # 计划、门禁和手工烟测记录
-├── static/admin/            # WebUI 构建产物（不提交）
-├── Dockerfile               # 多阶段构建
-├── docker-compose.yml       # 生产环境
-├── docker-compose.dev.yml   # 开发环境
-└── vercel.json              # Vercel 配置
-```
+为避免与其他文档重复维护，目录结构与模块职责已迁移到：
+
+- [docs/ARCHITECTURE.md](./ARCHITECTURE.md)
+- [docs/README.md](./README.md)
+
+贡献前建议先阅读架构文档中的“请求主链路”和 `internal/` 模块职责，再定位改动范围。

 ## 问题反馈

--- a/docs/DEPLOY.en.md
+++ b/docs/DEPLOY.en.md
@@ -4,15 +4,18 @@ Language: [中文](DEPLOY.md) | [English](DEPLOY.en.md)

 This guide covers all deployment methods for the current Go-based codebase.

+Doc map: [Index](./README.md) | [Architecture](./ARCHITECTURE.en.md) | [API](../API.en.md) | [Testing](./TESTING.md)
+
 ---

 ## Table of Contents

+- [Recommended deployment priority](#recommended-deployment-priority)
 - [Prerequisites](#0-prerequisites)
- [1. Local Run](#1-local-run)
- [2. Docker Deployment](#2-docker-deployment)
+- [1. Download Release Binaries](#1-download-release-binaries)
+- [2. Docker / GHCR Deployment](#2-docker--ghcr-deployment)
 - [3. Vercel Deployment](#3-vercel-deployment)
- [4. Download Release Binaries](#4-download-release-binaries)
+- [4. Local Run from Source](#4-local-run-from-source)
 - [5. Reverse Proxy (Nginx)](#5-reverse-proxy-nginx)
 - [6. Linux systemd Service](#6-linux-systemd-service)
 - [7. Post-Deploy Checks](#7-post-deploy-checks)
@@ -20,19 +23,29 @@ This guide covers all deployment methods for the current Go-based codebase.

 ---

+## Recommended deployment priority
+
+Recommended order when choosing a deployment method:
+
+1. **Download and run release binaries**: the easiest path for most users because the artifacts are already built.
+2. **Docker / GHCR image deployment**: suitable for containerized, orchestrated, or cloud environments.
+3. **Vercel deployment**: suitable if you already use Vercel and accept its platform constraints.
+4. **Run from source / build locally**: suitable for development, debugging, or when you need to modify the code yourself.
+
+---
+
 ## 0. Prerequisites

 | Dependency | Minimum Version | Notes |
 | --- | --- | --- |
-| Go | 1.24+ | Build backend |
-| Node.js | 20+ | Only needed to build WebUI locally |
+| Go | 1.26+ | Build backend |
+| Node.js | `20.19+` or `22.12+` | Only needed to build WebUI locally |
 | npm | Bundled with Node.js | Install WebUI dependencies |

 Config source (choose one):

 - **File**: `config.json` (recommended for local/Docker)
 - **Environment variable**: `DS2API_CONFIG_JSON` (recommended for Vercel; supports raw JSON or Base64)
- Compatibility note: `CONFIG_JSON` is the legacy fallback variable; `DS2API_CONFIG_JSON` may also contain raw JSON directly

 Unified recommendation (best practice):

@@ -47,75 +60,67 @@ Use `config.json` as the single source of truth:

 ---

-## 1. Local Run
+## 1. Download Release Binaries

-### 1.1 Basic Steps
+Built-in GitHub Actions workflow: `.github/workflows/release-artifacts.yml`
+
+- **Trigger**: by default only on Release `published`; you can also run it manually via `workflow_dispatch` and pass `release_tag` to rerun / backfill
+- **Outputs**: multi-platform binary archives, Linux Docker image export tarballs, and `sha256sums.txt`
+- **Container publishing**: GHCR only (`ghcr.io/cjackhwang/ds2api`)
+
+| Platform | Architecture | Format |
+| --- | --- | --- |
+| Linux | amd64, arm64, armv7 | `.tar.gz` |
+| macOS | amd64, arm64 | `.tar.gz` |
+| Windows | amd64, arm64 | `.zip` |
+
+Each archive includes:
+
+- `ds2api` executable (`ds2api.exe` on Windows)
+- `static/admin/` (built WebUI assets)
+- `config.example.json`, `.env.example`
+- `README.MD`, `README.en.md`, `LICENSE`
+
+### Usage

 ```bash
-# Clone
-git clone https://github.com/CJackHwang/ds2api.git
-cd ds2api
+# 1. Download the archive for your platform
+# 2. Extract
+tar -xzf ds2api_<tag>_linux_amd64.tar.gz
+cd ds2api_<tag>_linux_amd64

-# Copy and edit config
+# 3. Configure
 cp config.example.json config.json
-# Open config.json and fill in:
-#   - keys: your API access keys
-#   - accounts: DeepSeek accounts (email or mobile + password)
+# Edit config.json

-# Start
-go run ./cmd/ds2api
-```
-
-Default address: `http://0.0.0.0:5001` (override with `PORT`).
-
-### 1.2 WebUI Build
-
-On first local startup, if `static/admin/` is missing, DS2API will automatically attempt to build the WebUI (requires Node.js/npm; when dependencies are missing it runs `npm ci` first, then `npm run build -- --outDir static/admin --emptyOutDir`).
-
-Manual build:
-
-```bash
-./scripts/build-webui.sh
-```
-
-Or step by step:
-
-```bash
-cd webui
-npm install
-npm run build
-# Output goes to static/admin/
-```
-
-Control auto-build via environment variable:
-
-```bash
-# Disable auto-build
-DS2API_AUTO_BUILD_WEBUI=false go run ./cmd/ds2api
-
-# Force enable auto-build
-DS2API_AUTO_BUILD_WEBUI=true go run ./cmd/ds2api
-```
-
-### 1.3 Compile to Binary
-
-```bash
-go build -o ds2api ./cmd/ds2api
+# 4. Start
 ./ds2api
 ```

+### Maintainer Release Flow
+
+1. Create and publish a GitHub Release (with tag, for example `vX.Y.Z`)
+2. Wait for the `Release Artifacts` workflow to complete
+3. Download the matching archive from Release Assets
+
 ---

-## 2. Docker Deployment
+## 2. Docker / GHCR Deployment

 ### 2.1 Basic Steps

 ```bash
-# Copy env template
+# Pull prebuilt image
+docker pull ghcr.io/cjackhwang/ds2api:latest
+
+# Copy env template and config file
 cp .env.example .env
+cp config.example.json config.json

 # Edit .env and set at least:
 #   DS2API_ADMIN_KEY=your-admin-key
+# Optionally set the host port:
+#   DS2API_HOST_PORT=6011

 # Start
 docker-compose up -d
@@ -124,7 +129,16 @@ docker-compose up -d
 docker-compose logs -f
 ```

-The default `docker-compose.yml` maps host port `6011` to container port `5001`. If you want `5001` exposed directly, adjust the `ports` mapping.
+The default `docker-compose.yml` directly uses `ghcr.io/cjackhwang/ds2api:latest` and maps host port `6011` to container port `5001`. If you want `5001` exposed directly, set `DS2API_HOST_PORT=5001` (or adjust the `ports` mapping).
+The compose template also defaults to `DS2API_CONFIG_PATH=/data/config.json` with `./config.json:/data/config.json` mounted, so deployments avoid read-only `/app` persistence issues by default.
+The image pre-creates `/data` and grants it to the non-root `ds2api` user. If you bind-mount a single host file, make sure `config.json` is readable/writable by the container user, for example with `chmod 644 config.json`; otherwise Linux UID/GID mismatches can still cause `open /data/config.json: permission denied`.
+Compatibility note: when `DS2API_CONFIG_PATH` is unset and runtime base dir is `/app`, newer versions prefer `/data/config.json`; if that file is missing but legacy `/app/config.json` exists, DS2API automatically falls back to the legacy path to avoid post-upgrade config loss.
+
+If you want a pinned version instead of `latest`, you can also pull a specific tag directly:
+
+```bash
+docker pull ghcr.io/cjackhwang/ds2api:v3.0.0
+```

 ### 2.2 Update

@@ -137,7 +151,7 @@ docker-compose up -d --build
 The `Dockerfile` now provides two image paths:

 1. **Default local/dev path (`runtime-from-source`)**: a three-stage build (WebUI build + Go build + runtime).
-2. **Release path (`runtime-from-dist`)**: CI first creates `dist/ds2api_<tag>_linux_<arch>.tar.gz`, then Docker directly reuses the binary and `static/admin` assets from those release archives, without running `npm build`/`go build` again.
+2. **Release path (`runtime-from-dist`)**: the release workflow first creates tag-named release archives, then copies the Linux bundles to `dist/docker-input/linux_amd64.tar.gz` / `linux_arm64.tar.gz`; Docker consumes those prepared inputs directly, without rerunning `npm build`/`go build`.

 The release path keeps Docker images aligned with release archives and reduces duplicate build work.

@@ -184,6 +198,11 @@ Notes:

 - **Port**: DS2API listens on `5001` by default; the template sets `PORT=5001`.
 - **Persistent config**: the template mounts `/data` and sets `DS2API_CONFIG_PATH=/data/config.json`. After importing config in Admin UI, it will be written and persisted to this path.
+- **`open /app/config.json: permission denied`**: this means the instance is trying to persist runtime tokens to a read-only path (commonly `/app` inside the image).  
+  Recommended handling:
+  1. Set a writable path explicitly: `DS2API_CONFIG_PATH=/data/config.json` (and mount a persistent volume at `/data`);
+  2. If you bootstrap with `DS2API_CONFIG_JSON` and do not need runtime writeback, keep env-backed mode (`DS2API_ENV_WRITEBACK` disabled);
+  3. In current versions, login/session tests continue even if persistence fails; Admin API returns a warning that token persistence failed and token is memory-only until restart.
 - **Build version**: Zeabur / regular `docker build` does not require `BUILD_VERSION` by default. The image prefers that build arg when provided, and automatically falls back to the repo-root `VERSION` file when it is absent.
 - **First login**: after deployment, open `/admin` and login with `DS2API_ADMIN_KEY` shown in Zeabur env/template instructions (recommended: rotate to a strong secret after first login).

@@ -197,10 +216,10 @@ Notes:
 2. **Import** the project on Vercel
 3. **Set environment variables** (minimum required: one variable):

-   | Variable | Description |
-   | --- | --- |
-   | `DS2API_ADMIN_KEY` | Admin key (required) |
-   | `DS2API_CONFIG_JSON` | Config content, raw JSON or Base64 (optional, recommended) |
+| Variable | Description |
+| --- | --- |
+| `DS2API_ADMIN_KEY` | Admin key (required) |
+| `DS2API_CONFIG_JSON` | Config content, raw JSON or Base64 (optional, recommended) |

 4. **Deploy**

@@ -243,19 +262,19 @@ VERCEL_TEAM_ID=team_xxxxxxxxxxxx   # optional for personal accounts
 | Variable | Description | Default |
 | --- | --- | --- |
 | `DS2API_ACCOUNT_MAX_INFLIGHT` | Per-account inflight limit | `2` |
-| `DS2API_ACCOUNT_CONCURRENCY` | Alias (legacy compat) | — |
 | `DS2API_ACCOUNT_MAX_QUEUE` | Waiting queue limit | `recommended_concurrency` |
-| `DS2API_ACCOUNT_QUEUE_SIZE` | Alias (legacy compat) | — |
 | `DS2API_GLOBAL_MAX_INFLIGHT` | Global inflight limit | `recommended_concurrency` |
-| `DS2API_MAX_INFLIGHT` | Alias (legacy compat) | — |
+| `DS2API_ENV_WRITEBACK` | When `DS2API_CONFIG_JSON` is present, auto-write to `DS2API_CONFIG_PATH` and switch to file-backed mode after success (`1/true/yes/on`) | Disabled |
 | `DS2API_VERCEL_INTERNAL_SECRET` | Hybrid streaming internal auth | Falls back to `DS2API_ADMIN_KEY` |
 | `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | Stream lease TTL | `900` |
+| `DS2API_RAW_STREAM_SAMPLE_ROOT` | Raw stream sample root for saving/reading samples | `tests/raw_stream_samples` |
 | `VERCEL_TOKEN` | Vercel sync token | — |
 | `VERCEL_PROJECT_ID` | Vercel project ID | — |
 | `VERCEL_TEAM_ID` | Vercel team ID | — |
+| `DS2API_CHAT_HISTORY_PATH` | Chat history storage path (must be set to `/tmp/chat_history.json` on Vercel, otherwise unavailable due to read-only filesystem) | `data/chat_history.json` |
 | `DS2API_VERCEL_PROTECTION_BYPASS` | Deployment protection bypass for internal Node→Go calls | — |

-### 3.3 Vercel Architecture
+### 3.4 Vercel Architecture

 ```text
 Request ──────┐
@@ -291,13 +310,14 @@ Vercel Go Runtime applies platform-level response buffering, so this project use

 - `api/chat-stream.js` falls back to Go entry (`?__go=1`) for non-stream requests only
 - Streaming requests (including requests with `tools`) stay on the Node path and use Go-aligned tool-call anti-leak handling
+- The Node stream path also mirrors Go finalization semantics: empty visible output returns the same shaped error SSE, and empty `content_filter` returns a `content_filter` error
 - WebUI non-stream test calls `?__go=1` directly to avoid Node hop timeout on long requests

 #### Function Duration

 `vercel.json` sets `maxDuration: 300` for both `api/chat-stream.js` and `api/index.go` (subject to your Vercel plan limits).

-### 3.4 Vercel Troubleshooting
+### 3.5 Vercel Troubleshooting

 #### Go Build Failure

@@ -312,7 +332,7 @@ Error: Command failed: go build -ldflags -s -w -o .../bootstrap ...
 1. Open Vercel Project Settings → Build and Development Settings
 2. **Clear** custom Go Build Flags / Build Command (recommended)
 3. If ldflags must be used, set `-ldflags="-s -w"` (ensure it's one argument)
-4. Verify `go.mod` uses a supported version (currently `go 1.24`)
+4. Verify `go.mod` uses a supported version (currently `go 1.26.0`)
 5. Redeploy (recommended: clear cache)

 #### Internal Package Import Error
@@ -341,65 +361,84 @@ If API responses return Vercel HTML `Authentication Required`:
 - **Option B**: Add `x-vercel-protection-bypass` header to requests
 - **Option C**: Set `VERCEL_AUTOMATION_BYPASS_SECRET` (or `DS2API_VERCEL_PROTECTION_BYPASS`) for internal Node→Go calls

-### 3.5 Build Artifacts Not Committed
+#### Chat History Unavailable (read-only file system)
+
+```text
+create chat history dir: mkdir /var/task/data: read-only file system
+```
+
+**Cause**: Vercel Serverless functions have a read-only filesystem (`/var/task`). Chat history fails because it cannot create directories there.
+
+**Fix**: Add the following in Vercel Project Settings → Environment Variables:
+
+```text
+DS2API_CHAT_HISTORY_PATH=/tmp/chat_history.json
+```
+
+`/tmp` is the only writable directory in Vercel Serverless. Data is ephemeral (not persisted across cold starts), but the feature works within a single instance lifetime.
+
+### 3.6 Build Artifacts Not Committed

 - `static/admin` directory is not in Git
 - Vercel / Docker automatically generate WebUI assets during build

 ---

-## 4. Download Release Binaries
+## 4. Local Run from Source

-Built-in GitHub Actions workflow: `.github/workflows/release-artifacts.yml`
-
- **Trigger**: only on Release `published` (no build on normal push)
- **Outputs**: multi-platform binary archives + `sha256sums.txt`
- **Container publishing**: GHCR only (`ghcr.io/cjackhwang/ds2api`)
-
-| Platform | Architecture | Format |
-| --- | --- | --- |
-| Linux | amd64, arm64 | `.tar.gz` |
-| macOS | amd64, arm64 | `.tar.gz` |
-| Windows | amd64 | `.zip` |
-
-Each archive includes:
-
- `ds2api` executable (`ds2api.exe` on Windows)
- `static/admin/` (built WebUI assets)
- `sha3_wasm_bg.7b9ca65ddd.wasm` (optional; binary has embedded fallback)
- `config.example.json`, `.env.example`
- `README.MD`, `README.en.md`, `LICENSE`
-
-### Usage
+### 4.1 Basic Steps

 ```bash
-# 1. Download the archive for your platform
-# 2. Extract
-tar -xzf ds2api_<tag>_linux_amd64.tar.gz
-cd ds2api_<tag>_linux_amd64
+# Clone
+git clone https://github.com/CJackHwang/ds2api.git
+cd ds2api

-# 3. Configure
+# Copy and edit config
 cp config.example.json config.json
-# Edit config.json
+# Open config.json and fill in:
+#   - keys: your API access keys
+#   - accounts: DeepSeek accounts (email or mobile + password)

-# 4. Start
-./ds2api
+# Start
+go run ./cmd/ds2api
 ```

-### Maintainer Release Flow
+Default local access URL: `http://127.0.0.1:5001`; the server actually binds to `0.0.0.0:5001` (override with `PORT`).

-1. Create and publish a GitHub Release (with tag, for example `vX.Y.Z`)
-2. Wait for the `Release Artifacts` workflow to complete
-3. Download the matching archive from Release Assets
+### 4.2 WebUI Build

-### Pull from GHCR (Optional)
+On first local startup, if `static/admin/` is missing, DS2API will automatically attempt to build the WebUI (requires Node.js/npm; when dependencies are missing it runs `npm ci` first, then `npm run build -- --outDir static/admin --emptyOutDir`).
+
+Manual build:

 ```bash
-# latest
-docker pull ghcr.io/cjackhwang/ds2api:latest
+./scripts/build-webui.sh
+```

-# specific version (example)
-docker pull ghcr.io/cjackhwang/ds2api:v2.1.2
+Or step by step:
+
+```bash
+cd webui
+npm ci
+npm run build
+# Output goes to static/admin/
+```
+
+Control auto-build via environment variable:
+
+```bash
+# Disable auto-build
+DS2API_AUTO_BUILD_WEBUI=false go run ./cmd/ds2api
+
+# Force enable auto-build
+DS2API_AUTO_BUILD_WEBUI=true go run ./cmd/ds2api
+```
+
+### 4.3 Compile to Binary
+
+```bash
+go build -o ds2api ./cmd/ds2api
+./ds2api
 ```

 ---
@@ -456,8 +495,6 @@ server {
 # Copy compiled binary and related files to target directory
 sudo mkdir -p /opt/ds2api
 sudo cp ds2api config.json /opt/ds2api/
-# Optional: if you want to use an external WASM file (override the embedded one, from a release package or build output)
-# sudo cp /path/to/sha3_wasm_bg.7b9ca65ddd.wasm /opt/ds2api/
 sudo cp -r static/admin /opt/ds2api/static/admin
 ```

@@ -526,7 +563,7 @@ curl -s http://127.0.0.1:5001/readyz

 # 3. Model list
 curl -s http://127.0.0.1:5001/v1/models
-# Expected: {"object":"list","data":[...]}
+# Expected: {"object":"list","data":[...]} (including `*-nothinking` variants)

 # 4. Admin panel (if WebUI is built)
 curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:5001/admin
@@ -536,7 +573,7 @@ curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:5001/admin
 curl http://127.0.0.1:5001/v1/chat/completions \
  -H "Authorization: Bearer your-api-key" \
  -H "Content-Type: application/json" \
-  -d '{"model":"deepseek-chat","messages":[{"role":"user","content":"hello"}]}'
+  -d '{"model":"deepseek-v4-flash","messages":[{"role":"user","content":"hello"}]}'
 ```

 ---
@@ -567,4 +604,4 @@ The testsuite automatically performs:
 - ✅ Live scenario verification (OpenAI/Claude/Admin/concurrency/toolcall/streaming)
 - ✅ Full request/response artifact logging for debugging

-For detailed testsuite documentation, see [TESTING.md](TESTING.md).
+For detailed testsuite documentation, see [TESTING.md](TESTING.md). The fixed local PR gates are listed in [TESTING.md](TESTING.md#pr-门禁--pr-gates).
--- a/docs/DEPLOY.md
+++ b/docs/DEPLOY.md
@@ -4,15 +4,18 @@

 本指南基于当前 Go 代码库，详细说明各种部署方式。

+本页导航：[文档总索引](./README.md)｜[架构说明](./ARCHITECTURE.md)｜[接口文档](../API.md)｜[测试指南](./TESTING.md)
+
 ---

 ## 目录

+- [部署方式优先级建议](#部署方式优先级建议)
 - [前置要求](#0-前置要求)
- [一、本地运行](#一本地运行)
- [二、Docker 部署](#二docker-部署)
+- [一、下载 Release 构建包](#一下载-release-构建包)
+- [二、Docker / GHCR 部署](#二docker--ghcr-部署)
 - [三、Vercel 部署](#三vercel-部署)
- [四、下载 Release 构建包](#四下载-release-构建包)
+- [四、本地源码运行](#四本地源码运行)
 - [五、反向代理（Nginx）](#五反向代理nginx)
 - [六、Linux systemd 服务化](#六linux-systemd-服务化)
 - [七、部署后检查](#七部署后检查)
@@ -20,19 +23,29 @@

 ---

+## 部署方式优先级建议
+
+推荐按以下顺序选择部署方式：
+
+1. **下载 Release 构建包运行**：最省事，产物已编译完成，最适合大多数用户。
+2. **Docker / GHCR 镜像部署**：适合需要容器化、编排或云环境部署。
+3. **Vercel 部署**：适合已有 Vercel 环境且接受其平台约束的场景。
+4. **本地源码运行 / 自行编译**：适合开发、调试或需要自行修改代码的场景。
+
+---
+
 ## 0. 前置要求

 | 依赖 | 最低版本 | 说明 |
 | --- | --- | --- |
-| Go | 1.24+ | 编译后端 |
-| Node.js | 20+ | 仅在需要本地构建 WebUI 时 |
+| Go | 1.26+ | 编译后端 |
+| Node.js | `20.19+` 或 `22.12+` | 仅在需要本地构建 WebUI 时 |
 | npm | 随 Node.js 提供 | 安装 WebUI 依赖 |

 配置来源（任选其一）：

 - **文件方式**：`config.json`（推荐本地/Docker 使用）
- **环境变量方式**：`DS2API_CONFIG_JSON`（推荐 Vercel 使用，支持 JSON 字符串或 Base64 编码）
- 兼容写法：`CONFIG_JSON` 是旧版回退变量；`DS2API_CONFIG_JSON` 也可以直接写原始 JSON
+- **环境变量方式**：`DS2API_CONFIG_JSON`（推荐 Vercel 使用，支持 JSON 字符串或 Base64 编码，也可以直接写原始 JSON）

 统一建议（最优实践）：

@@ -47,75 +60,67 @@ cp config.example.json config.json

 ---

-## 一、本地运行
+## 一、下载 Release 构建包

-### 1.1 基本步骤
+仓库内置 GitHub Actions 工作流：`.github/workflows/release-artifacts.yml`
+
+- **触发条件**：默认仅在 Release `published` 时自动触发；也支持在 Actions 页面手动 `workflow_dispatch`，并填写 `release_tag` 复跑/补发
+- **构建产物**：多平台二进制压缩包、Linux Docker 镜像导出包 + `sha256sums.txt`
+- **容器镜像发布**：仅发布到 GHCR（`ghcr.io/cjackhwang/ds2api`）
+
+| 平台 | 架构 | 文件格式 |
+| --- | --- | --- |
+| Linux | amd64, arm64, armv7 | `.tar.gz` |
+| macOS | amd64, arm64 | `.tar.gz` |
+| Windows | amd64, arm64 | `.zip` |
+
+每个压缩包包含：
+
+- `ds2api` 可执行文件（Windows 为 `ds2api.exe`）
+- `static/admin/`（WebUI 构建产物）
+- `config.example.json`、`.env.example`
+- `README.MD`、`README.en.md`、`LICENSE`
+
+### 使用步骤

 ```bash
-# 克隆仓库
-git clone https://github.com/CJackHwang/ds2api.git
-cd ds2api
+# 1. 下载对应平台的压缩包
+# 2. 解压
+tar -xzf ds2api_<tag>_linux_amd64.tar.gz
+cd ds2api_<tag>_linux_amd64

-# 复制并编辑配置
+# 3. 配置
 cp config.example.json config.json
-# 使用你喜欢的编辑器打开 config.json，填入：
-#   - keys: 你的 API 访问密钥
-#   - accounts: DeepSeek 账号（email 或 mobile + password）
+# 编辑 config.json

-# 启动服务
-go run ./cmd/ds2api
-```
-
-默认监听 `http://0.0.0.0:5001`，可通过 `PORT` 环境变量覆盖。
-
-### 1.2 WebUI 构建
-
-本地首次启动时，若 `static/admin/` 不存在，服务会自动尝试构建 WebUI（需要 Node.js/npm；缺依赖时会先执行 `npm ci`，再执行 `npm run build -- --outDir static/admin --emptyOutDir`）。
-
-你也可以手动构建：
-
-```bash
-./scripts/build-webui.sh
-```
-
-或手动执行：
-
-```bash
-cd webui
-npm install
-npm run build
-# 产物输出到 static/admin/
-```
-
-通过环境变量控制自动构建行为：
-
-```bash
-# 强制关闭自动构建
-DS2API_AUTO_BUILD_WEBUI=false go run ./cmd/ds2api
-
-# 强制开启自动构建
-DS2API_AUTO_BUILD_WEBUI=true go run ./cmd/ds2api
-```
-
-### 1.3 编译为二进制文件
-
-```bash
-go build -o ds2api ./cmd/ds2api
+# 4. 启动
 ./ds2api
 ```

+### 维护者发布步骤
+
+1. 在 GitHub 创建并发布 Release（带 tag，如 `vX.Y.Z`）
+2. 等待 Actions 工作流 `Release Artifacts` 完成
+3. 在 Release 的 Assets 下载对应平台压缩包
+
 ---

-## 二、Docker 部署
+## 二、Docker / GHCR 部署

 ### 2.1 基本步骤

 ```bash
-# 复制环境变量模板
+# 拉取预编译镜像
+docker pull ghcr.io/cjackhwang/ds2api:latest
+
+# 复制环境变量模板和配置文件
 cp .env.example .env
+cp config.example.json config.json

 # 编辑 .env（请改成你的强密码），至少设置：
 #   DS2API_ADMIN_KEY=your-admin-key
+# 如需修改宿主机端口，可额外设置：
+#   DS2API_HOST_PORT=6011

 # 启动
 docker-compose up -d
@@ -124,7 +129,16 @@ docker-compose up -d
 docker-compose logs -f
 ```

-默认 `docker-compose.yml` 会把宿主机 `6011` 映射到容器内的 `5001`。如果你希望直接对外暴露 `5001`，请调整 `ports` 配置。
+默认 `docker-compose.yml` 直接使用 `ghcr.io/cjackhwang/ds2api:latest`，并把宿主机 `6011` 映射到容器内的 `5001`。如果你希望直接对外暴露 `5001`，请设置 `DS2API_HOST_PORT=5001`（或者手动调整 `ports` 配置）。
+Compose 模板还会默认设置 `DS2API_CONFIG_PATH=/data/config.json` 并挂载 `./config.json:/data/config.json`，优先避免 `/app` 只读带来的配置持久化问题。
+镜像内会预创建 `/data` 并授权给非 root 的 `ds2api` 用户；如果你使用 bind mount 单文件，请确保宿主机 `config.json` 至少可被容器用户读取/写入，例如 `chmod 644 config.json`，否则 Linux UID/GID 不一致时仍可能出现 `open /data/config.json: permission denied`。
+兼容说明：若未设置 `DS2API_CONFIG_PATH` 且运行目录是 `/app`，新版本会优先使用 `/data/config.json`；当该文件不存在但检测到历史 `/app/config.json` 时，会自动回退读取旧路径，避免升级后“配置丢失”。
+
+如需固定版本，也可以直接拉取指定 tag：
+
+```bash
+docker pull ghcr.io/cjackhwang/ds2api:v3.0.0
+```

 ### 2.2 更新

@@ -137,7 +151,7 @@ docker-compose up -d --build
 `Dockerfile` 提供两条构建路径：

 1. **本地/开发默认路径（`runtime-from-source`）**：三阶段构建（WebUI 构建 + Go 构建 + 运行阶段）。
-2. **Release 路径（`runtime-from-dist`）**：CI 先生成 `dist/ds2api_<tag>_linux_<arch>.tar.gz`，再由 Docker 直接复用该发布包内的二进制和 `static/admin` 产物组装运行镜像，不再重复执行 `npm build`/`go build`。
+2. **Release 路径（`runtime-from-dist`）**：发布工作流先生成 tag 命名的 Release 压缩包，再把 Linux 产物复制成 `dist/docker-input/linux_amd64.tar.gz` / `linux_arm64.tar.gz`；Docker 构建阶段直接消费这些输入，不再重复执行 `npm build`/`go build`。

 Release 路径可确保 Docker 镜像与 release 压缩包使用同一套产物，减少重复构建带来的差异。

@@ -184,6 +198,11 @@ healthcheck:

 - **端口**：服务默认监听 `5001`，模板会固定设置 `PORT=5001`。
 - **配置持久化**：模板挂载卷 `/data`，并设置 `DS2API_CONFIG_PATH=/data/config.json`；在管理台导入配置后，会写入并持久化到该路径。
+- **`open /app/config.json: permission denied`**：说明当前实例在尝试把运行时 token 持久化到只读路径（常见于镜像内 `/app`）。  
+  处理建议：
+  1. 显式设置可写路径：`DS2API_CONFIG_PATH=/data/config.json`（并挂载持久卷到 `/data`）；  
+  2. 若你使用 `DS2API_CONFIG_JSON` 启动且不需要运行时落盘，可保持环境变量模式（`DS2API_ENV_WRITEBACK` 关闭）；  
+  3. 最新版本中，即使持久化失败，登录/会话测试仍会继续，仅提示“token 未持久化（重启后丢失）”。
 - **构建版本号**：Zeabur / 普通 `docker build` 默认不需要传 `BUILD_VERSION`；镜像会优先使用该构建参数，未提供时自动回退到仓库根目录的 `VERSION` 文件。
 - **首次登录**：部署完成后访问 `/admin`，使用 Zeabur 环境变量/模板指引中的 `DS2API_ADMIN_KEY` 登录（建议首次登录后自行更换为强密码）。

@@ -197,10 +216,10 @@ healthcheck:
 2. **在 Vercel 上导入项目**
 3. **配置环境变量**（最少只需设置以下一项）：

-   | 变量 | 说明 |
-   | --- | --- |
-   | `DS2API_ADMIN_KEY` | 管理密钥（必填） |
-   | `DS2API_CONFIG_JSON` | 配置内容，JSON 字符串或 Base64 编码（可选，建议） |
+| 变量 | 说明 |
+| --- | --- |
+| `DS2API_ADMIN_KEY` | 管理密钥（必填） |
+| `DS2API_CONFIG_JSON` | 配置内容，JSON 字符串或 Base64 编码（可选，建议） |

 4. **部署**

@@ -243,19 +262,29 @@ VERCEL_TEAM_ID=team_xxxxxxxxxxxx   # 个人账号可留空
 | 变量 | 说明 | 默认值 |
 | --- | --- | --- |
 | `DS2API_ACCOUNT_MAX_INFLIGHT` | 每账号并发上限 | `2` |
-| `DS2API_ACCOUNT_CONCURRENCY` | 同上（兼容别名） | — |
 | `DS2API_ACCOUNT_MAX_QUEUE` | 等待队列上限 | `recommended_concurrency` |
-| `DS2API_ACCOUNT_QUEUE_SIZE` | 同上（兼容别名） | — |
 | `DS2API_GLOBAL_MAX_INFLIGHT` | 全局并发上限 | `recommended_concurrency` |
-| `DS2API_MAX_INFLIGHT` | 同上（兼容别名） | — |
+| `DS2API_ENV_WRITEBACK` | 检测到 `DS2API_CONFIG_JSON` 时自动写入 `DS2API_CONFIG_PATH`，并在成功后转为文件模式（`1/true/yes/on`） | 关闭 |
 | `DS2API_VERCEL_INTERNAL_SECRET` | 混合流式内部鉴权 | 回退用 `DS2API_ADMIN_KEY` |
 | `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | 流式 lease TTL | `900` |
+| `DS2API_RAW_STREAM_SAMPLE_ROOT` | raw stream 样本保存/读取根目录 | `tests/raw_stream_samples` |
 | `VERCEL_TOKEN` | Vercel 同步 token | — |
 | `VERCEL_PROJECT_ID` | Vercel 项目 ID | — |
 | `VERCEL_TEAM_ID` | Vercel 团队 ID | — |
+| `DS2API_CHAT_HISTORY_PATH` | Chat history 存储路径（Vercel 上必须设为 `/tmp/chat_history.json`，否则因文件系统只读而不可用） | `data/chat_history.json` |
 | `DS2API_VERCEL_PROTECTION_BYPASS` | 部署保护绕过密钥（内部 Node→Go 调用） | — |

-### 3.3 Vercel 架构说明
+### 3.3 运行时行为配置（通过 Admin API 设置）
+
+部分运行时行为无法通过环境变量直接配置，需要在部署后通过 Admin API 设置，例如：
+
+- **自动删除会话模式** (`auto_delete.mode`)：支持 `none` / `single` / `all`，默认为 `none`。可通过 `PUT /admin/settings` 更新。
+- **每账号并发上限** (`account_max_inflight`)：环境变量已支持，但也可通过 Admin API 热更新。
+- **全局并发上限** (`global_max_inflight`)：同上。
+
+详细说明参见 [API.md](../API.md#admin-接口) 中 `/admin/settings` 部分。
+
+### 3.4 Vercel 架构说明

 ```text
 请求 ─────┐
@@ -291,13 +320,14 @@ api/index.go  api/chat-stream.js

 - `api/chat-stream.js` 仅对非流式请求回退到 Go 入口（`?__go=1`）
 - 流式请求（包括带 `tools`）走 Node 路径，并执行与 Go 对齐的 tool-call 防泄漏处理
+- Node 流式路径同时对齐 Go 的终结态语义：空可见输出会返回同形状错误 SSE，空 `content_filter` 会返回 `content_filter` 错误
 - WebUI 的"非流式测试"直接请求 `?__go=1`，避免 Node 中转造成长请求超时

 #### 函数时长

 `vercel.json` 已将 `api/chat-stream.js` 与 `api/index.go` 的 `maxDuration` 设为 `300`（受 Vercel 套餐上限约束）。

-### 3.4 Vercel 常见报错排查
+### 3.5 Vercel 常见报错排查

 #### Go 构建失败

@@ -312,7 +342,7 @@ Error: Command failed: go build -ldflags -s -w -o .../bootstrap ...
 1. 进入 Vercel Project Settings → Build and Development Settings
 2. **清空**自定义 Go Build Flags / Build Command（推荐）
 3. 若必须设置 ldflags，使用 `-ldflags="-s -w"`（保证它是一个参数）
-4. 确认仓库 `go.mod` 为受支持版本（当前为 `go 1.24`）
+4. 确认仓库 `go.mod` 为受支持版本（当前为 `go 1.26.0`）
 5. 重新部署（建议清缓存后 Redeploy）

 #### Internal 包导入错误
@@ -341,65 +371,84 @@ No Output Directory named "public" found after the Build completed.
 - **方案 B**：请求中添加 `x-vercel-protection-bypass` 头
 - **方案 C**：设置 `VERCEL_AUTOMATION_BYPASS_SECRET`（或 `DS2API_VERCEL_PROTECTION_BYPASS`），仅影响内部 Node→Go 调用

-### 3.5 仓库不提交构建产物
+#### Chat History 不可用（read-only file system）
+
+```text
+create chat history dir: mkdir /var/task/data: read-only file system
+```
+
+**原因**：Vercel Serverless 函数的文件系统（`/var/task`）为只读，chat history 尝试在该路径下创建目录失败。
+
+**解决**：在 Vercel Project Settings → Environment Variables 中添加：
+
+```text
+DS2API_CHAT_HISTORY_PATH=/tmp/chat_history.json
+```
+
+`/tmp` 是 Vercel Serverless 环境中唯一可写的目录。数据在函数冷启动之间不会持久化（ephemeral），但在单个实例生命周期内功能正常。
+
+### 3.6 仓库不提交构建产物

 - `static/admin` 目录不在 Git 中
 - Vercel / Docker 构建阶段自动生成 WebUI 静态文件

 ---

-## 四、下载 Release 构建包
+## 四、本地源码运行

-仓库内置 GitHub Actions 工作流：`.github/workflows/release-artifacts.yml`
-
- **触发条件**：仅在 Release `published` 时触发（普通 push 不会构建）
- **构建产物**：多平台二进制压缩包 + `sha256sums.txt`
- **容器镜像发布**：仅发布到 GHCR（`ghcr.io/cjackhwang/ds2api`）
-
-| 平台 | 架构 | 文件格式 |
-| --- | --- | --- |
-| Linux | amd64, arm64 | `.tar.gz` |
-| macOS | amd64, arm64 | `.tar.gz` |
-| Windows | amd64 | `.zip` |
-
-每个压缩包包含：
-
- `ds2api` 可执行文件（Windows 为 `ds2api.exe`）
- `static/admin/`（WebUI 构建产物）
- `sha3_wasm_bg.7b9ca65ddd.wasm`（可选；程序内置 embed fallback）
- `config.example.json`、`.env.example`
- `README.MD`、`README.en.md`、`LICENSE`
-
-### 使用步骤
+### 4.1 基本步骤

 ```bash
-# 1. 下载对应平台的压缩包
-# 2. 解压
-tar -xzf ds2api_<tag>_linux_amd64.tar.gz
-cd ds2api_<tag>_linux_amd64
+# 克隆仓库
+git clone https://github.com/CJackHwang/ds2api.git
+cd ds2api

-# 3. 配置
+# 复制并编辑配置
 cp config.example.json config.json
-# 编辑 config.json
+# 使用你喜欢的编辑器打开 config.json，填入：
+#   - keys: 你的 API 访问密钥
+#   - accounts: DeepSeek 账号（email 或 mobile + password）

-# 4. 启动
-./ds2api
+# 启动服务
+go run ./cmd/ds2api
 ```

-### 维护者发布步骤
+默认本地访问地址是 `http://127.0.0.1:5001`；服务实际绑定 `0.0.0.0:5001`，可通过 `PORT` 环境变量覆盖。

-1. 在 GitHub 创建并发布 Release（带 tag，如 `vX.Y.Z`）
-2. 等待 Actions 工作流 `Release Artifacts` 完成
-3. 在 Release 的 Assets 下载对应平台压缩包
+### 4.2 WebUI 构建

-### 拉取 GHCR 镜像（可选）
+本地首次启动时，若 `static/admin/` 不存在，服务会自动尝试构建 WebUI（需要 Node.js/npm；缺依赖时会先执行 `npm ci`，再执行 `npm run build -- --outDir static/admin --emptyOutDir`）。
+
+你也可以手动构建：

 ```bash
-# latest
-docker pull ghcr.io/cjackhwang/ds2api:latest
+./scripts/build-webui.sh
+```

-# 指定版本（示例）
-docker pull ghcr.io/cjackhwang/ds2api:v2.1.2
+或手动执行：
+
+```bash
+cd webui
+npm ci
+npm run build
+# 产物输出到 static/admin/
+```
+
+通过环境变量控制自动构建行为：
+
+```bash
+# 强制关闭自动构建
+DS2API_AUTO_BUILD_WEBUI=false go run ./cmd/ds2api
+
+# 强制开启自动构建
+DS2API_AUTO_BUILD_WEBUI=true go run ./cmd/ds2api
+```
+
+### 4.3 编译为二进制文件
+
+```bash
+go build -o ds2api ./cmd/ds2api
+./ds2api
 ```

 ---
@@ -456,8 +505,6 @@ server {
 # 将编译好的二进制文件和相关文件复制到目标目录
 sudo mkdir -p /opt/ds2api
 sudo cp ds2api config.json /opt/ds2api/
-# 可选：若你希望使用外置 WASM 文件（覆盖内置版本，来自 release 包或构建产物）
-# sudo cp /path/to/sha3_wasm_bg.7b9ca65ddd.wasm /opt/ds2api/
 sudo cp -r static/admin /opt/ds2api/static/admin
 ```

@@ -526,7 +573,7 @@ curl -s http://127.0.0.1:5001/readyz

 # 3. 模型列表
 curl -s http://127.0.0.1:5001/v1/models
-# 预期: {"object":"list","data":[...]}
+# 预期: {"object":"list","data":[...]}（包含 `*-nothinking` 变体）

 # 4. 管理台页面（如果已构建 WebUI）
 curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:5001/admin
@@ -536,7 +583,7 @@ curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:5001/admin
 curl http://127.0.0.1:5001/v1/chat/completions \
  -H "Authorization: Bearer your-api-key" \
  -H "Content-Type: application/json" \
-  -d '{"model":"deepseek-chat","messages":[{"role":"user","content":"hello"}]}'
+  -d '{"model":"deepseek-v4-flash","messages":[{"role":"user","content":"hello"}]}'
 ```

 ---
@@ -567,4 +614,4 @@ go run ./cmd/ds2api-tests \
 - ✅ 真实调用场景验证（OpenAI/Claude/Admin/并发/toolcall/流式）
 - ✅ 全量请求与响应日志落盘（用于故障复盘）

-详细测试集说明参阅 [TESTING.md](TESTING.md)。
+详细测试集说明参阅 [TESTING.md](TESTING.md)。PR 前的固定本地门禁以 [TESTING.md](TESTING.md#pr-门禁--pr-gates) 为准。
--- a/docs/DEVELOPMENT.md
+++ b/docs/DEVELOPMENT.md
@@ -0,0 +1,112 @@
+# DS2API 开发者速查
+
+语言 / Language: 中文
+
+本文面向维护者和贡献者，用于快速判断“从哪里看、改哪里、跑什么”。架构细节仍以 [ARCHITECTURE.md](./ARCHITECTURE.md) 为准，接口行为以 [API.md](../API.md) 为准。
+
+## 1. 本地入口
+
+常用启动与检查：
+
+```bash
+# 后端
+go run ./cmd/ds2api
+
+# WebUI 开发服务器
+npm run dev --prefix webui
+
+# WebUI 生产构建
+npm run build --prefix webui
+```
+
+PR 前固定门禁：
+
+```bash
+./scripts/lint.sh
+./tests/scripts/check-refactor-line-gate.sh
+./tests/scripts/run-unit-all.sh
+npm run build --prefix webui
+```
+
+修改 Go 文件后先运行：
+
+```bash
+gofmt -w <changed-go-files>
+```
+
+## 2. 代码定位
+
+优先从这些入口顺着调用链看：
+
+| 目标 | 入口 |
+| --- | --- |
+| 总路由、CORS、健康检查 | `internal/server/router.go` |
+| OpenAI Chat / Responses | `internal/httpapi/openai/chat`、`internal/httpapi/openai/responses` |
+| Claude / Gemini 兼容入口 | `internal/httpapi/claude`、`internal/httpapi/gemini` |
+| API 请求归一到网页纯文本上下文 | `internal/promptcompat`、`docs/prompt-compatibility.md` |
+| 工具调用解析与流式防泄漏 | `internal/toolcall`、`internal/toolstream`、`docs/toolcall-semantics.md` |
+| DeepSeek 上游调用、登录、PoW、代理 | `internal/deepseek/client`、`internal/deepseek/transport` |
+| 账号池、并发槽位、等待队列 | `internal/account` |
+| Admin API | `internal/httpapi/admin` |
+| WebUI 页面 | `webui/src/layout/DashboardShell.jsx`、`webui/src/features/*` |
+| 服务器端对话记录 | `internal/chathistory`、`internal/httpapi/admin/history` |
+
+## 3. 常见改动建议
+
+- 改接口行为时，同时检查 `API.md` / `API.en.md` 是否需要同步。
+- 改 prompt 兼容链路时，必须同步 `docs/prompt-compatibility.md`。
+- 改 tool call 语义时，同时检查 Go、Node sieve 和 `docs/toolcall-semantics.md`。
+- 改 WebUI 配置项时，同时检查 `webui/src/features/settings`、语言包和 `config.example.json`。
+- 拆分大文件时，保持对外函数签名稳定，并跑 `./tests/scripts/check-refactor-line-gate.sh`。
+
+## 4. 故障定位
+
+接口请求先看路由入口，再看协议适配层，最后看共享 runtime：
+
+1. 路由是否命中：`internal/server/router.go` 和对应 `RegisterRoutes`。
+2. 鉴权与账号选择：`internal/auth`、`internal/account`。
+3. 请求归一化：`internal/promptcompat` 或协议转换包。
+4. 上游请求：`internal/deepseek/client`。
+5. 流式输出：`internal/stream`、`internal/sse`、`internal/toolstream`。
+6. 响应格式：`internal/format/*` 或 `internal/translatorcliproxy`。
+
+对话记录页面问题优先检查：
+
+- Admin API：`/admin/chat-history`、`/admin/chat-history/{id}`。
+- 后端存储：`internal/chathistory/store.go`。
+- 前端轮询和 ETag：`webui/src/features/chatHistory/ChatHistoryContainer.jsx`。
+
+Tool call 问题优先跑：
+
+```bash
+go test -v ./internal/toolcall ./internal/toolstream -count=1
+node --test tests/node/stream-tool-sieve.test.js tests/node/chat-stream.test.js
+```
+
+## 5. 测试选择
+
+小范围 Go 改动：
+
+```bash
+go test ./internal/<package> -count=1
+```
+
+前端改动：
+
+```bash
+npm run build --prefix webui
+```
+
+高风险协议或流式改动：
+
+```bash
+./tests/scripts/run-unit-all.sh
+```
+
+发布或真实账号链路验证：
+
+```bash
+./tests/scripts/run-live.sh
+```
+
+端到端测试产物默认写入 `artifacts/testsuite/`。分享日志前需要清理 token、密码、cookie 和原始请求响应内容。
--- a/docs/DeepSeekSSE行为结构说明-2026-04-05.md
+++ b/docs/DeepSeekSSE行为结构说明-2026-04-05.md
@@ -0,0 +1,326 @@
+# DeepSeek SSE 行为结构说明（第三方逆向版）
+
+> 说明：本文基于当前仓库 `tests/raw_stream_samples/` 下全部 `upstream.stream.sse` 原始流样本整理而成，属于第三方逆向观察文档，不是官方协议。
+> 当前 corpus 由 4 份原始流组成，覆盖搜索+引用、风控终态、Markdown 输出和空格敏感输出等行为。
+> 补充：文末还会注明少量“当前实现已确认、但 corpus 尚未完整覆盖”的行为，例如长思考场景下的自动续写状态。
+
+文档导航：[文档总索引](./README.md) / [测试指南](./TESTING.md) / [样本目录说明](../tests/raw_stream_samples/README.md)
+
+## 1. 样本覆盖
+
+下列样本共同构成了本文的观察基础：
+
+| 样本 | 观察重点 |
+| --- | --- |
+| [guangzhou-weather-reasoner-search-20260404](../tests/raw_stream_samples/guangzhou-weather-reasoner-search-20260404/upstream.stream.sse) | 搜索+思考流程，包含 `reference:N` 引用标记与工具片段 |
+| [content-filter-trigger-20260405-jwt3](../tests/raw_stream_samples/content-filter-trigger-20260405-jwt3/upstream.stream.sse) | `CONTENT_FILTER` 终态分支，包含拒答模板与 `ban_regenerate` |
+| [markdown-format-example-20260405](../tests/raw_stream_samples/markdown-format-example-20260405/upstream.stream.sse) | Markdown 输出的早期样本，用于观察 token 级输出形态 |
+| [markdown-format-example-20260405-spacefix](../tests/raw_stream_samples/markdown-format-example-20260405-spacefix/upstream.stream.sse) | Markdown 输出修正样本，用于验证空格 chunk 必须保留 |
+
+当前 corpus 的整体特征是 `message` 帧占绝对多数，控制事件只占很小一部分，但它们决定了流的生命周期和最终状态。
+
+## 2. 总体结构
+
+DeepSeek 的这类输出可以分成两层看：
+
+1. SSE 事件层。
+2. JSON 载荷层。
+
+事件层负责传输边界，载荷层负责业务状态。实现时不要把 HTTP chunk、SSE block 和业务 JSON 混为一体。
+
+最常见的时序可以概括为：
+
+```text
+ready
+update_session
+message(初始化 envelope)
+message(正文 / 片段 / 状态增量)
+message(状态收口)
+finish
+update_session
+title
+close
+```
+
+`finish` 表示生成流结束，但不是唯一的终止信号；真正的语义终态通常还要结合 `response/status`、`quasi_status` 和 `close` 一起判断。
+
+## 3. SSE 事件层
+
+当前 corpus 中观察到的事件类型如下：
+
+| 事件 | 作用 | 处理建议 |
+| --- | --- | --- |
+| `ready` | 传输层就绪，通常携带 `request_message_id`、`response_message_id`、`model_type` | 记录元数据即可，不参与正文拼接 |
+| `update_session` | 会话时间戳或心跳更新 | 当作会话状态帧处理 |
+| `message` | 主体载荷，绝大多数业务信息都在这里 | 必须按顺序解析并保序累积 |
+| `finish` | 生成阶段结束 | 作为流结束标记之一 |
+| `title` | 会话标题生成结果 | 元数据帧，不参与正文拼接 |
+| `close` | 连接关闭信息 | 仅用于收尾与审计 |
+
+说明：
+
+- `message` 是默认事件名，SSE 中没有显式 `event:` 时也应按 `message` 处理。
+- 目前样本里大量 `message` 帧没有独立的业务前缀，不能靠事件名区分正文和控制帧。
+- 可能出现空 payload 的 `message` 帧；它们应被视为 no-op，但不能打乱事件顺序。
+
+## 4. 载荷层形态
+
+`message` 的 `data:` 部分不是单一 schema，而是多种结构混合。当前 corpus 里主要见到以下几种形态：
+
+| 形态 | 典型结构 | 作用 |
+| --- | --- | --- |
+| 初始化 envelope | `{"v":{"response":{...}}}` | 给出会话初始状态、模型状态和片段容器 |
+| 纯文本 token | `{"v":"..."}` | 直接输出可见文本 token |
+| 路径补丁 | `{"p":"...","o":"APPEND|SET|BATCH","v":...}` | 对某个状态路径做增量更新 |
+| 终态 batch | `{"v":[{"p":"status","v":"CONTENT_FILTER"}, ...]}` | 尾部状态收口，常见于风控终态 |
+
+一个简化后的典型样式如下：
+
+```json
+{"v":"输出"}
+{"p":"response/fragments/-1/content","o":"APPEND","v":"..."}
+{"p":"response/fragments","o":"APPEND","v":[...]}
+{"p":"response","o":"BATCH","v":[{"p":"accumulated_token_usage","v":211},{"p":"quasi_status","v":"FINISHED"}]}
+{"p":"response/status","o":"SET","v":"FINISHED"}
+```
+
+注意：
+
+- `v` 可能是字符串、对象、数组、布尔值或数字。
+- `o` 当前样本里主要见到 `APPEND`、`SET`、`BATCH`。
+- `v` 为数组时，通常表示一个批量 patch 集合，而不是正文数组。
+
+## 5. 初始化 envelope
+
+每条流开头，常会先出现一个 `message` 帧，内容是完整的 `response` 初始状态。当前 corpus 中，这个 envelope 常见字段包括：
+
+- `message_id`
+- `parent_id`
+- `model`
+- `role`
+- `thinking_enabled`
+- `ban_edit`
+- `ban_regenerate`
+- `status`
+- `incomplete_message`
+- `accumulated_token_usage`
+- `files`
+- `feedback`
+- `inserted_at`
+- `search_enabled`
+- `fragments`
+- `conversation_mode`
+- `has_pending_fragment`
+- `auto_continue`
+- `search_triggered`
+
+这些字段更像会话状态和策略开关，不是正文内容。第三方实现应把它们保留在内部状态树里，而不是直接拼接到最终答案。
+
+## 6. 路径结构
+
+当前 corpus 里观察到的 `p` 路径可以归成几组：
+
+### 6.1 片段级路径
+
+- `response/fragments/-N/content`
+- `response/fragments/-N/status`
+- `response/fragments/-N/results`
+- `response/fragments/-N/elapsed_secs`
+
+这类路径表示某个片段对象的增量更新。`-N` 只是样本中的索引风格，不应被写死成固定数量。
+
+### 6.2 片段容器路径
+
+- `response/fragments`
+- `fragments`
+
+这两类路径通常承载 fragment 数组。前者更像响应树中的分支，后者更像终态批处理里的片段集合。
+
+### 6.3 语义状态路径
+
+- `response/status`
+- `response/has_pending_fragment`
+- `quasi_status`
+- `status`
+- `ban_regenerate`
+
+这类路径决定流是否结束、是否被风控、是否还有待处理片段。它们不应作为正文输出。
+
+尤其是 `response/status` / `status` 这类路径上的字符串值，应被视为控制信号而不是文本 token。当前已确认需要特殊对待的值包括：
+
+- `FINISHED`：正常完成终态，应触发收口。
+- `CONTENT_FILTER`：风控终态，应走拒答/模板分支。
+- `WIP` / `INCOMPLETE` / `AUTO_CONTINUE`：未完成但可继续生成的中间状态，不应直接输出给客户端。
+
+### 6.4 统计与进度路径
+
+- `accumulated_token_usage`
+
+这类路径用于使用量或进度统计，属于元数据。
+
+### 6.5 非命名空间字段
+
+在片段对象内部，还会看到 `content`、`references`、`result`、`queries`、`stage_id` 等字段。它们不一定带 `response/...` 前缀，但仍然是协议语义的一部分。
+
+## 7. fragment 类型
+
+当前 corpus 里已经观察到的 fragment 类型如下：
+
+| 类型 | 作用 | 是否应直接渲染 |
+| --- | --- | --- |
+| `RESPONSE` | 正常回答片段 | 是，属于正文 |
+| `THINK` | 推理或阶段提示 | 通常否，按产品策略决定是否展示 |
+| `TOOL_SEARCH` | 搜索工具调用元数据 | 否 |
+| `TOOL_OPEN` | 打开 / 抽取结果的工具元数据 | 否 |
+| `TIP` | 提示 / 警告类片段，常带 `style: WARNING` | 视产品策略决定，通常作为附注 |
+| `TEMPLATE_RESPONSE` | 风控拒答模板 | 是，但它属于终态 fallback，不是普通正文 |
+
+观察到的典型片段字段：
+
+- `id`
+- `type`
+- `content`
+- `references`
+- `stage_id`
+- `status`
+- `queries`
+- `results`
+- `result`
+- `elapsed_secs`
+- `style`
+- `hide_on_wip`
+
+第三方实现不要把 `fragment.type` 和 `p` 路径混为一谈。`type` 是语义分类，`p` 是状态树位置。
+
+## 8. 终态行为
+
+当前 corpus 里有两条很重要的终态分支。
+
+### 8.1 正常完成
+
+正常回答通常会出现如下收口顺序：
+
+1. `response` 的 `BATCH` 更新 `accumulated_token_usage`。
+2. `response` 的 `BATCH` 或单独 patch 更新 `quasi_status: FINISHED`。
+3. `response/status` 置为 `FINISHED`。
+4. `finish` 事件到来。
+5. 之后可能还有 `update_session`、`title`、`close`。
+
+### 8.2 风控终态
+
+`content-filter-trigger-20260405-jwt3` 展示了另一种终态路径：
+
+1. 先继续输出一段正常正文。
+2. 出现提示类 fragment，例如 `TIP`。
+3. 可能先把 `quasi_status` 提前收口到 `FINISHED`。
+4. 之后出现一个终态 batch，把 `ban_regenerate` 设为 `true`，把 `status` 置为 `CONTENT_FILTER`，并附带 `TEMPLATE_RESPONSE`。
+5. 最后再出现 `finish`，然后是收尾事件。
+
+这个分支说明：
+
+- `finish` 不等于正常结束。
+- `CONTENT_FILTER` 是一个独立终态，不是普通异常。
+- `TEMPLATE_RESPONSE` 不应被当作常规回答流的中间片段，它是终态 fallback。
+
+一个简化的风控尾部可以写成：
+
+```json
+{"p":"response","o":"BATCH","v":[{"p":"accumulated_token_usage","v":1269},{"p":"quasi_status","v":"FINISHED"}]}
+{"v":[{"p":"ban_regenerate","v":true},{"p":"status","v":"CONTENT_FILTER"},{"p":"fragments","v":[{"id":38,"type":"TEMPLATE_RESPONSE","content":"..."}]},{"p":"quasi_status","v":"CONTENT_FILTER"}]}
+{"event":"finish"}
+```
+
+### 8.3 自动续写中间态（实现补充）
+
+这部分不是当前 corpus 的直接覆盖项，而是 2026-04-05 在长思考实测中观察到、且已在当前实现中兼容的行为：
+
+1. 上游可能先把 `response/status` 或 envelope 内的 `response.status` 置为 `WIP` / `INCOMPLETE`。
+2. 有时还会伴随 `auto_continue: true`。
+3. 这表示当前轮输出尚未真正结束，客户端或代理层可以继续调用 continue 接口续写同一条回答。
+4. 续写后的内容会承接之前的思考与正文，不应把前一轮状态值泄露成可见文本。
+
+对第三方实现，建议把这一类状态统一当作“可继续的控制信号”：
+
+- 可以据此决定是否继续拉取后续流。
+- 不能把 `INCOMPLETE`、`WIP`、`AUTO_CONTINUE` 直接拼接到最终文本。
+- `finish` 事件本身也不能单独说明回答已完全结束，仍要结合状态字段判断。
+
+## 9. 文本重建规则
+
+如果你的目标是把流重建成最终可见文本，必须遵守下面这些规则：
+
+- 按接收顺序逐个追加 token。
+- 不要对每个 `v` 做 `trim` 或 `TrimSpace`。
+- 不要丢弃只包含空格的 chunk。
+- 不要合并连续空格、换行或 Markdown 符号附近的空白。
+- 不要把 `[reference:N]` 视为协议元数据，它在当前 corpus 里就是正文的一部分。
+- 如果你要屏蔽引用标记，应当把它做成可配置的后处理，而不是在解析阶段硬删。
+- `response/status` / `status` 路径上的状态字符串不应进入正文，即使它们不是终态。
+
+这点对 Markdown、代码块、引用、表格都很关键。样本里已经证明，`#`、`-`、`>`、`|` 这类符号后面的空格必须原样保留，否则渲染结果会变形。
+
+## 10. 推荐实现方式
+
+对第三方开发者，建议把实现拆成三条线：
+
+1. 原始事件线：保留 SSE block 顺序、事件名和完整 JSON 载荷。
+2. 状态树线：维护 `response`、`fragments`、`status`、`quasi_status` 等结构。
+3. 可见文本线：只从明确应渲染的 token / fragment 中拼接最终文本。
+
+一个简单的处理顺序可以是：
+
+```text
+parse SSE block
+  -> 识别 event
+  -> 解析 JSON payload
+  -> 更新状态树
+  -> 识别 status / quasi_status / auto_continue 等控制信号
+  -> 判定是否有可见文本
+  -> 追加到输出缓冲
+  -> 遇到 WIP / INCOMPLETE / AUTO_CONTINUE 时决定是否续写
+  -> 遇到 FINISHED / CONTENT_FILTER / finish 时收口
+```
+
+实现时的兼容原则：
+
+- 未知路径保留，不要报错中断。
+- 未知 fragment.type 保留在日志里。
+- 不要假设所有模型都一定输出 `thinking_content`，当前 corpus 的推理更多是通过 fragment 类型表达。
+- 不要假设 `title` 一定存在，它只是后置元数据。
+
+## 11. 本 corpus 证明了什么
+
+当前样本足以证明以下行为：
+
+- 搜索类模型会把工具调用、结果、引用和正文混在同一条 SSE 流里。
+- 风控不会简单地“没有输出”，而是会在正常生成后切换到 `CONTENT_FILTER` 终态。
+- Markdown 和代码输出对空格非常敏感，空格 chunk 不能吞。
+- `message` 是主体承载层，`ready` / `update_session` / `finish` / `title` / `close` 是控制层。
+- `fragment.type` 是可视化和工具链分层的关键，不应只靠 `p` 路径判断。
+
+结合 2026-04-05 的长思考实测，还可以补充一条当前实现层面的结论：
+
+- 长思考场景下，上游可能先给出 `INCOMPLETE` / `WIP` / `AUTO_CONTINUE` 状态，再通过 continue 链路续写；这些状态值本身不应作为正文输出。
+
+## 12. 适用边界
+
+本文是基于当前 corpus 的逆向说明，不是恒定协议。
+
+- 新模型可能增加新的 `p` 路径。
+- 新版本可能增加新的 fragment.type。
+- `CONTENT_FILTER` 的终态模板内容可能变化。
+- 自动续写相关状态（如 `INCOMPLETE` / `AUTO_CONTINUE`）当前主要来自实测与实现兼容逻辑，后续字段形态仍可能变化。当前实现不会仅因早期 `WIP` 状态就自动继续；只有显式 `INCOMPLETE` 或 `auto_continue` 信号才会触发 continue。
+- 解析器应当对未知字段、未知路径、未知事件保持容忍。
+
+如果你要把这份说明用于实际开发，建议同时保留原始流样本、回放脚本和回归测试，不要只依赖本文。
+
+## 2026-04-29 最近线上样本增量观察
+
+基于 `longtext-deepseek-v4-flash-20260429` 与 `longtext-deepseek-v4-pro-20260429` 两个真实账号长文本样本，近期格式变化要点如下：
+
+1. `data:` 事件中仍大量出现 `{"v":"..."}` 的无路径增量（`p` 缺失），解析器必须把空路径视为可见正文候选，而不能只依赖 `response/content`。
+2. 对象形态 `v`（如 `{"text":"..."}` / `{"content":"..."}`）仍会出现，且可能与无路径 chunk 混用；仅按字符串处理会导致正文丢块。
+3. 多轮 continuation 场景下，后续 chunk 可能不再重复显式 `status`，状态机需要保留上一轮 `INCOMPLETE` 语义直到出现终态。
+4. 2026-04-29 起客户端头部版本基线上调到 `x-client-version: 2.0.3`，否则部分账号会出现上游行为不一致（包括空输出与补轮异常）。
+
+建议：新增样本默认回放应优先覆盖「长文本 + 多轮 + 无路径 chunk」组合，避免只用短样本导致回归漏检。
--- a/docs/README.md
+++ b/docs/README.md
@@ -0,0 +1,61 @@
+# DS2API 文档导航 | Documentation Index
+
+语言 / Language: [中文](README.md) | [English](README.md#english)
+
+## 中文
+
+为减少重复维护，本仓库文档按“入口文档 + 专题文档”拆分。建议从下列顺序阅读：
+
+1. [项目总览（README）](../README.MD)
+2. [架构与目录说明](./ARCHITECTURE.md)
+3. [接口文档（API）](../API.md)
+4. [部署指南](./DEPLOY.md)
+5. [测试指南](./TESTING.md)
+6. [开发者速查](./DEVELOPMENT.md)
+7. [贡献指南](./CONTRIBUTING.md)
+
+### 专题文档
+
+- [API -> 网页对话纯文本兼容主链路说明](./prompt-compatibility.md)
+- [Tool Calling 统一语义](./toolcall-semantics.md)
+- [DeepSeek SSE 行为结构说明（逆向观察）](./DeepSeekSSE行为结构说明-2026-04-05.md)
+
+### 文档维护约定
+
+- 文档更新必须以实际代码实现为依据：总路由装配看 `internal/server/router.go`，协议/resource 路由看 `internal/httpapi/**/handler*.go` 与 `internal/httpapi/admin/handler.go`，配置默认值看 `internal/config/*`，模型/alias 看 `internal/config/models.go`，prompt 兼容链路看 `docs/prompt-compatibility.md` 列出的代码入口。
+- `README.MD` / `README.en.md`：面向首次接触用户，保留“是什么 + 怎么快速跑起来”。
+- `docs/ARCHITECTURE*.md`：面向开发者，集中维护项目结构、模块职责与调用链。
+- `API*.md`：面向客户端接入者，聚焦接口行为、鉴权和示例。
+- `docs/prompt-compatibility.md`：面向维护者，集中维护“API -> 网页对话纯文本上下文”的统一兼容语义；相关行为修改时必须同步更新。
+- 其他 `docs/*.md`：主题化说明，避免在多个文档重复粘贴同一段内容。
+
+---
+
+## English
+
+To reduce maintenance drift, docs are split into an “entry doc + topical docs” layout.
+
+Recommended reading order:
+
+1. [Project overview (README)](../README.en.md)
+2. [Architecture and project layout](./ARCHITECTURE.en.md)
+3. [API reference](../API.en.md)
+4. [Deployment guide](./DEPLOY.en.md)
+5. [Testing guide](./TESTING.md)
+6. [Developer quick reference](./DEVELOPMENT.md)
+7. [Contributing guide](./CONTRIBUTING.en.md)
+
+### Topical docs
+
+- [API -> pure-text web-chat compatibility pipeline](./prompt-compatibility.md)
+- [Tool-calling unified semantics](./toolcall-semantics.md)
+- [DeepSeek SSE behavior notes (reverse-engineered)](./DeepSeekSSE行为结构说明-2026-04-05.md)
+
+### Maintenance conventions
+
+- Documentation updates must be grounded in the actual implementation: root routing lives in `internal/server/router.go`, protocol/resource routes live in `internal/httpapi/**/handler*.go` and `internal/httpapi/admin/handler.go`, config defaults in `internal/config/*`, models/aliases in `internal/config/models.go`, and the prompt compatibility pipeline in the code entrypoints listed by `docs/prompt-compatibility.md`.
+- `README.MD` / `README.en.md`: onboarding-oriented (“what + quick start”).
+- `docs/ARCHITECTURE*.md`: developer-oriented source of truth for module boundaries and execution flow.
+- `API*.md`: integration-oriented behavior/contracts.
+- `docs/prompt-compatibility.md`: maintainer-oriented source of truth for the “API -> pure-text web-chat context” compatibility flow; update it whenever related behavior changes.
+- Other `docs/*.md`: focused topics, avoid copy-pasting the same section into multiple files.
--- a/docs/TESTING.md
+++ b/docs/TESTING.md
@@ -2,6 +2,8 @@

 语言 / Language: 中文 + English（同页）

+文档导航： [总览](../README.MD) / [架构说明](./ARCHITECTURE.md) / [部署指南](./DEPLOY.md) / [接口文档](../API.md)
+
 ## 概述 | Overview

 DS2API 提供两个层级的测试：
@@ -11,6 +13,7 @@ DS2API 提供两个层级的测试：
 | 单元测试（Go） | `./tests/scripts/run-unit-go.sh` | 不需要真实账号 |
 | 单元测试（Node） | `./tests/scripts/run-unit-node.sh` | 不需要真实账号 |
 | 单元测试（全部） | `./tests/scripts/run-unit-all.sh` | 不需要真实账号 |
+| Release 目标交叉编译 | `./tests/scripts/check-cross-build.sh` | 覆盖发布包支持的 GOOS/GOARCH |
 | 端到端测试 | `./tests/scripts/run-live.sh` | 使用真实账号执行全链路测试 |

 端到端测试集会录制完整的请求/响应日志，用于故障排查。
@@ -18,6 +21,26 @@ Node 单元测试脚本会先做 `node --check` 语法门禁，再以 `--test-co

 ---

+## PR 门禁 | PR Gates
+
+打开或更新 PR 前，按 `.github/workflows/quality-gates.yml` 的同等本地门禁执行：
+
+```bash
+./scripts/lint.sh
+./tests/scripts/check-refactor-line-gate.sh
+./tests/scripts/run-unit-all.sh
+npm run build --prefix webui
+```
+
+说明：
+
+- `./scripts/lint.sh` 会运行 Go 格式化检查和 `golangci-lint`；修改 Go 文件后仍建议先执行 `gofmt -w <files>`。
+- `run-unit-all.sh` 串行调用 Go 与 Node 单元测试入口。
+- CI 还会额外在 macOS/Windows 跑 Go 单测，并执行 release 目标交叉编译检查。
+- `run-live.sh` 是真实账号端到端测试，适合作为发布或高风险改动后的补充验证，不属于每次 PR 的固定本地门禁。
+
+---
+
 ## 快速开始 | Quick Start

 ### 单元测试 | Unit Tests
@@ -36,11 +59,11 @@ Node 单元测试脚本会先做 `node --check` 语法门禁，再以 `--test-co
 # 结构与流程门禁
 ./tests/scripts/check-refactor-line-gate.sh
 ./tests/scripts/check-node-split-syntax.sh
-
-# 发布阻断：阶段 6 手工烟测签字检查（默认读取 plans/stage6-manual-smoke.md）
-./tests/scripts/check-stage6-manual-smoke.sh
+./tests/scripts/check-cross-build.sh
 ```

+说明：`plans/stage6-manual-smoke.md` 已移除，阶段 6 手工烟测不再作为当前 CI 或发布门禁。
+
 ### 端到端测试 | End-to-End Tests

 ```bash
@@ -180,16 +203,16 @@ go test ./...

 ```bash
 # 运行 tool calls 相关测试（推荐用于调试 tool call 解析问题）
-go test -v -run 'TestParseToolCalls|TestRepair' ./internal/util/
+go test -v -run 'TestParseToolCalls|TestRepair' ./internal/toolcall/

 # 运行单个测试用例
-go test -v -run TestParseToolCallsWithDeepSeekHallucination ./internal/util/
+go test -v -run TestParseToolCallsWithDeepSeekHallucination ./internal/toolcall/

 # 运行 format 相关测试
 go test -v ./internal/format/...

-# 运行 adapter 相关测试
-go test -v ./internal/adapter/openai/...
+# 运行 HTTP API 相关测试
+go test -v ./internal/httpapi/openai/...
 ```

 ### 调试 Tool Call 问题 | Debugging Tool Call Issues
@@ -198,13 +221,13 @@ go test -v ./internal/adapter/openai/...

 ```bash
 # 1. 运行 tool calls 相关的所有测试
-go test -v -run 'TestParseToolCalls|TestRepair' ./internal/util/
+go test -v -run 'TestParseToolCalls|TestRepair' ./internal/toolcall/

 # 2. 查看测试输出中的详细调试信息
-go test -v -run TestParseToolCallsWithDeepSeekHallucination ./internal/util/ 2>&1
+go test -v -run TestParseToolCallsWithDeepSeekHallucination ./internal/toolcall/ 2>&1

 # 3. 检查具体测试用例的修复效果
-# 测试用例位于 internal/util/toolcalls_test.go，包含：
+# 测试用例位于 internal/toolcall/toolcalls_test.go，包含：
 # - TestParseToolCallsWithDeepSeekHallucination: DeepSeek 典型幻觉输出
 # - TestRepairLooseJSONWithNestedObjects: 嵌套对象的方括号修复
 # - TestParseToolCallsWithMixedWindowsPaths: Windows 路径处理
@@ -226,6 +249,56 @@ node --test tests/node/stream-tool-sieve.test.js
 go run ./cmd/ds2api-tests --no-preflight
 ```

+### 运行原始流仿真（独立工具）
+
+```bash
+./tests/scripts/run-raw-stream-sim.sh
+```
+
+说明：
+- 该工具默认重放 `tests/raw_stream_samples/manifest.json` 声明的 canonical 样本，按上游 SSE 顺序做 1:1 仿真解析。
+- 默认校验不出现 `FINISHED` 文本泄露，并要求存在结束信号。
+- 默认**不**把 `raw accumulated_token_usage` 与本地解析 token 做强一致校验（当前实现以内容估算为准）；如需强校验可显式加 `--fail-on-token-mismatch`。
+- 每次运行都会把本地派生结果写入 `artifacts/raw-stream-sim/<run-id>/<sample-id>/replay.output.txt`，并输出结构化报告。
+- 如果你有历史基线目录，可以通过 `--baseline-root` 让工具直接做文本对比。
+- 更完整的协议级行为结构说明见 [DeepSeekSSE行为结构说明-2026-04-05.md](./DeepSeekSSE行为结构说明-2026-04-05.md)。
+
+### 对单个样本做回放比对
+
+```bash
+./tests/scripts/compare-raw-stream-sample.sh markdown-format-example-20260405-spacefix
+```
+
+说明：
+- 该脚本会从 raw-only 样本目录读取 `upstream.stream.sse`。
+- 回放结果会写入 `artifacts/raw-stream-sim/<run-id>/<sample-id>/`，便于直接查阅。
+- 如果传入历史基线目录，脚本会自动对比当前回放输出和基线文本。
+
+### 采集永久样本
+
+本地启动服务后，可以直接打：
+
+```bash
+POST /admin/dev/raw-samples/capture
+```
+
+这个接口会把请求元信息和上游原始流写入 `tests/raw_stream_samples/<sample-id>/`，以后可以直接拿来做回放和字段分析。派生输出会在本地回放时再生成，不再落在样本目录里。
+
+### 从内存抓包查询并保存样本
+
+如果问题刚刚在本地复现过，也可以先查当前进程内存里的抓包，再选择性落盘：
+
+```bash
+GET /admin/dev/raw-samples/query?q=广州&limit=10
+POST /admin/dev/raw-samples/save
+{"chain_key":"session:xxxx","sample_id":"tmp-from-memory"}
+```
+
+说明：
+- `query` 会按 `chat_session_id` 把 `completion + continue` 归并成一条链，适合定位接续思考问题。
+- `save` 支持用 `query`、`chain_key` 或 `capture_id` 选中目标。
+- 生成的样本目录仍然是 `tests/raw_stream_samples/<sample-id>/`，可以直接喂给回放脚本。
+
 ### 指定输出目录和超时

 ```bash
--- a/docs/prompt-compatibility.md
+++ b/docs/prompt-compatibility.md
@@ -0,0 +1,428 @@
+# API -> 网页对话纯文本兼容主链路说明
+
+文档导航：[总览](../README.MD) / [架构说明](./ARCHITECTURE.md) / [接口文档](../API.md) / [测试指南](./TESTING.md)
+
+> 本文档是 DS2API“把 OpenAI / Claude / Gemini 风格 API 请求兼容成 DeepSeek 网页对话纯文本上下文”的专项说明。
+> 这是项目最重要的兼容产物之一。凡是修改消息标准化、tool prompt 注入、tool history 保留、文件引用、current input file / legacy history_split、下游 completion payload 组装等行为，都必须同步更新本文档。
+
+## 1. 核心结论
+
+DS2API 当前的核心思路，不是把客户端传来的 `messages`、`tools`、`attachments` 原样转发给下游。
+
+而是把这些高层 API 语义，统一压缩成 DeepSeek 网页对话更容易理解的三类输入：
+
+1. `prompt`
+   一个单字符串，里面带有角色标记、system 指令、历史消息、assistant reasoning 标签、历史 tool call XML 等。
+2. `ref_file_ids`
+   一个文件引用数组，承载附件、inline 上传文件，以及必要时被拆出去的历史文件。
+3. 控制位
+   例如 `thinking_enabled`、`search_enabled`、部分 passthrough 参数。
+
+也就是说，项目最重要的兼容动作，是把“结构化 API 会话”翻译成“网页对话纯文本上下文 + 文件引用”。
+
+## 2. 为什么这是核心产物
+
+因为对下游来说，真正稳定的输入面不是 OpenAI/Claude/Gemini 的原生 schema，而是：
+
+- 一段连续的对话 prompt
+- 一组可引用文件
+- 少量开关位
+
+这也是为什么很多表面上看像“协议兼容”的代码，最终都会收敛到同一类逻辑：
+
+- 先把不同协议的消息统一成内部消息序列
+- 再把工具声明改写成 system prompt 文本
+- 再把历史 tool call / tool result 改写成 prompt 可见内容
+- 最后输出成 DeepSeek completion payload
+
+## 3. 统一心智模型
+
+当前主链路可以这样理解：
+
+```text
+客户端请求
+  -> HTTP API surface（OpenAI / Claude / Gemini）
+  -> promptcompat 统一消息标准化
+  -> tool prompt 注入
+  -> DeepSeek 风格 prompt 拼装
+  -> 文件收集 / inline 上传 / current input file（OpenAI 链路）
+  -> completion payload
+  -> 下游网页对话接口
+```
+
+对应的关键代码入口：
+
+- OpenAI Chat / Responses：
+  [internal/promptcompat/request_normalize.go](../internal/promptcompat/request_normalize.go)
+- OpenAI prompt 组装：
+  [internal/promptcompat/prompt_build.go](../internal/promptcompat/prompt_build.go)
+- OpenAI 消息标准化：
+  [internal/promptcompat/message_normalize.go](../internal/promptcompat/message_normalize.go)
+- Claude 标准化：
+  [internal/httpapi/claude/standard_request.go](../internal/httpapi/claude/standard_request.go)
+- Claude 消息与 tool_use/tool_result 归一：
+  [internal/httpapi/claude/handler_utils.go](../internal/httpapi/claude/handler_utils.go)
+- Gemini 复用 OpenAI prompt builder：
+  [internal/httpapi/gemini/convert_request.go](../internal/httpapi/gemini/convert_request.go)
+- DeepSeek prompt 角色标记拼装：
+  [internal/prompt/messages.go](../internal/prompt/messages.go)
+- prompt 可见 tool history XML：
+  [internal/prompt/tool_calls.go](../internal/prompt/tool_calls.go)
+- 最新 user 思考格式注入：
+  [internal/promptcompat/thinking_injection.go](../internal/promptcompat/thinking_injection.go)
+- completion payload：
+  [internal/promptcompat/standard_request.go](../internal/promptcompat/standard_request.go)
+
+## 4. 下游真正收到的东西
+
+在“完成标准化后”，下游 completion payload 的核心形态是：
+
+```json
+{
+  "chat_session_id": "session-id",
+  "model_type": "default",
+  "parent_message_id": null,
+  "prompt": "<｜begin▁of▁sentence｜>...",
+  "ref_file_ids": [
+    "file-history",
+    "file-systemprompt",
+    "file-other-attachment"
+  ],
+  "thinking_enabled": true,
+  "search_enabled": false
+}
+```
+
+重点是：
+
+- `prompt` 才是对话上下文主载体。
+- `ref_file_ids` 只承载文件引用，不承载普通文本消息。
+- `tools` 不会作为“原生工具 schema”直接下发给下游，而是被改写进 `prompt`。
+- 对外返回给客户端的 `prompt_tokens` / `input_tokens` / `promptTokenCount` 不再按“最后一条消息”或字符粗估近似返回，而是基于**完整上下文 prompt**做 tokenizer 计数；为了避免上下文实际超限但客户端误以为还能塞下，请求侧上下文 token 会额外保守上浮一点，宁可略大也不低估。
+- 当前 `/v1/chat/completions` 业务路径仍是“每次请求新建一个远端 `chat_session_id`，并默认发送 `parent_message_id: null`”；因此 DS2API 对外默认表现为“新会话 + prompt 拼历史”，而不是复用 DeepSeek 原生会话树。
+- 但 DeepSeek 远端本身支持同一 `chat_session_id` 的跨轮次持续对话。2026-04-27 已用项目内现有 DeepSeek client 做过一次不改业务代码的双轮实测：同一 `chat_session_id` 下，第 1 轮返回 `request_message_id=1` / `response_message_id=2` / 文本 `SESSION_TEST_ONE`；第 2 轮重新获取一次 PoW，并发送 `parent_message_id=2` 后，成功返回 `request_message_id=3` / `response_message_id=4` / 文本 `SESSION_TEST_TWO`。这说明“同远端会话持续聊天”能力存在，且每轮需要携带正确的 parent/message 链接信息，同时重新获取对应轮次可用的 PoW。
+- OpenAI Chat / Responses 原生走统一 OpenAI 标准化与 DeepSeek payload 组装；Claude / Gemini 会尽量复用 OpenAI prompt/tool 语义，其中 Gemini 直接复用 `promptcompat.BuildOpenAIPromptForAdapter`，Claude 消息接口在可代理场景会转换为 OpenAI chat 形态再执行。
+- 客户端传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`。Gemini `generationConfig.thinkingConfig.thinkingBudget` 会翻译成同一套 thinking 开关；关闭时即使上游返回 `response/thinking_content`，兼容层也不会把它当作可见正文输出。若最终解析出的模型名带 `-nothinking` 后缀，则会无条件强制关闭 thinking，优先级高于请求体中的 `thinking` / `reasoning` / `reasoning_effort`。Claude surface 在流式请求且未显式声明 `thinking` 时，仍按 Anthropic 语义默认关闭；但在非流式代理场景，兼容层会内部开启一次下游 thinking，用于捕获“正文为空、工具调用落在 thinking 里”的情况，随后在回包前剥离用户不可见的 thinking block。
+- 对 OpenAI Chat / Responses 的非流式收尾，如果最终可见正文为空，兼容层会优先尝试把思维链中的独立 DSML / XML 工具块当作真实工具调用解析出来。流式链路也会在收尾阶段做同样的 fallback 检测，但不会因为思维链内容去中途拦截或改写流式输出；真正的工具识别始终基于原始上游文本，而不是基于“已经做过可见输出清洗”的版本，因此即使最终可见层会剥离完整 leaked DSML / XML `tool_calls` wrapper、并抑制全空参数或无效 wrapper 块，也不会影响真实工具调用转成结构化 `tool_calls` / `function_call`。补发结果会作为本轮 assistant 的结构化 `tool_calls` / `function_call` 输出返回，而不是塞进 `content` 文本；如果客户端没有开启 thinking / reasoning，思维链只用于检测，不会作为 `reasoning_content` 或可见正文暴露。只有正文为空且思维链里也没有可执行工具调用时，才继续按空回复错误处理。
+- OpenAI Chat / Responses 的空回复错误处理之前会默认做一次内部补偿重试：第一次上游完整结束后，如果最终可见正文为空、没有解析到工具调用、也没有已经向客户端流式发出工具调用，并且终止原因不是 `content_filter`，兼容层会复用同一个 `chat_session_id`、账号、token 与工具策略，把原始 completion `prompt` 追加固定后缀 `Previous reply had no visible output. Please regenerate the visible final answer or tool call now.` 后重新提交一次。重试遵循 DeepSeek 多轮对话协议：从第一次上游 SSE 流中提取 `response_message_id`，并在重试 payload 中设置 `parent_message_id` 为该值，使重试成为同一会话的后续轮次而非断裂的根消息；同时重新获取一次 PoW（若 PoW 获取失败则回退到原始 PoW）。该重试不会重新标准化消息、不会新建 session、不会切换账号，也不会向流式客户端插入重试标记；第二次 thinking / reasoning 会按正常增量直接接到第一次之后，并继续使用 overlap trim 去重。若第二次仍为空，终端错误码仍保持现有 `upstream_empty_output`；若任一尝试触发空 `content_filter`，不做补偿重试并保持 `content_filter` 错误。JS Vercel 运行时同样设置 `parent_message_id`，但因无法直接调用 PoW API 而复用原始 PoW。
+
+- OpenAI Chat / Responses 在最终可见正文渲染阶段，会把 DeepSeek 搜索返回中的 `[citation:N]` / `[reference:N]` 标记替换成对应 Markdown 链接。`citation` 标记按一基序号解析；`reference` 标记只有在同一段正文中出现 `[reference:0]`（允许冒号后有空格）时才按零基序号映射，并且不会影响同段正文里的 `citation` 标记。
+
+## 5. prompt 是怎么拼出来的
+
+OpenAI Chat / Responses 在标准化后、current input file 之前，会默认执行 `thinking_injection` 增强。它参考 DeepSeek V4 “把控制指令放在 user 消息末尾更稳定”的用法，在最新 user message 后追加思考增强提示词。当前内置默认提示词以 `Reasoning Effort: Absolute maximum with no shortcuts permitted.` 开头，并继续要求模型充分分解问题、覆盖潜在路径与边界条件、把完整推演过程显式写出。该开关默认启用，可通过 `thinking_injection.enabled=false` 关闭；也可以通过 `thinking_injection.prompt` 自定义提示词，留空时使用内置默认提示词。
+
+这段增强属于 prompt 可见上下文：
+
+- 普通请求会直接出现在最终 `prompt` 的最新 user block 末尾。
+- 如果触发 current input file，它会进入完整上下文文件中。
+
+另外，`MessagesPrepareWithThinking` 还会在最终 prompt 的最前面预置一段固定的 system 级“输出完整性约束（Output integrity guard）”：
+
+- 如果上游上下文、工具输出或解析后的文本出现乱码、损坏、部分解析、重复或其他畸形片段，不要模仿、不要回显，只输出给用户的正确内容。
+- 这段约束位于普通 system / tool prompt 之前，因此是当前最终 prompt 里的最高优先级前置指令。
+
+### 5.1 角色标记
+
+最终 prompt 使用 DeepSeek 风格角色标记：
+
+- `<｜begin▁of▁sentence｜>`
+- `<｜System｜>`
+- `<｜User｜>`
+- `<｜Assistant｜>`
+- `<｜Tool｜>`
+- `<｜end▁of▁instructions｜>`
+- `<｜end▁of▁sentence｜>`
+- `<｜end▁of▁toolresults｜>`
+
+实现位置：
+[internal/prompt/messages.go](../internal/prompt/messages.go)
+
+### 5.2 相邻同角色消息会合并
+
+在最终 `MessagesPrepareWithThinking` 中，相邻同 role 的消息会被合并成一个块，中间插入空行。
+
+这意味着：
+
+- prompt 中看到的是“合并后的 role block”
+- 不是客户端传来的逐条 message 原样排列
+
+## 6. tools 为什么是“文本注入”，不是原生下发
+
+当前项目把工具能力视为“prompt 约束的一部分”。
+
+具体做法：
+
+1. 把每个 tool 的名称、描述、参数 schema 序列化成文本。
+2. 拼成 `You have access to these tools:` 大段说明。
+3. 再附上统一的 DSML tool call 外壳格式约束。
+4. 把这整段内容并入 system prompt。
+
+工具调用正例现在优先示范官方 DSML 风格：`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`。
+兼容层仍接受旧式纯 `<tool_calls>` wrapper，但提示词会优先要求模型输出官方 DSML 标签，并强调不能只输出 closing wrapper 而漏掉 opening tag。需要注意：这是“兼容 DSML 外壳，内部仍以 XML 解析语义为准”，不是原生 DSML 全链路实现；DSML 标签会在解析入口归一化回现有 XML 标签后继续走同一套 parser。
+数组参数使用 `<item>...</item>` 子节点表示；当某个参数体只包含 item 子节点时，Go / Node 解析器会把它还原成数组，避免 `questions` / `options` 这类 schema 中要求 array 的参数被误解析成 `{ "item": ... }` 对象。除此之外，解析器还会回收一些更松散的列表写法，例如 JSON array 字面量或逗号分隔的 JSON 项序列，只要它们足够明确；但 `<item>` 仍然是首选形态。若模型把完整结构化 XML fragment 误包进 CDATA，兼容层会在保护 `content` / `command` 等原文字段的前提下，尝试把非原文字段中的 CDATA XML fragment 还原成 object / array。不过，如果 CDATA 只是单个平面的 XML/HTML 标签，例如 `<b>urgent</b>` 这种行内标记，兼容层会保留原始字符串，不会强行升成 object / array；只有明显表示结构的 CDATA 片段，例如多兄弟节点、嵌套子节点或 `item` 列表，才会触发结构化恢复。
+Go 侧读取 DeepSeek SSE 时不再依赖 `bufio.Scanner` 的固定 2MiB 单行上限；当写文件类工具把很长的 `content` 放在单个 `data:` 行里返回时，非流式收集、流式解析和 auto-continue 透传都会保留完整行，再进入同一套工具解析与序列化流程。
+在 assistant 最终回包阶段，如果某个 tool 参数在声明 schema 中明确是 `string`，兼容层会在把解析后的 `tool_calls` / `function_call` 重新序列化成 OpenAI / Responses / Claude 可见参数前，递归把该路径上的 number / bool / object / array 统一转成字符串；其中 object / array 会压成紧凑 JSON 字符串。这个保护只对 schema 明确声明为 string 的路径生效，不会改写本来就是 `number` / `boolean` / `object` / `array` 的参数。这样可以兼容 DeepSeek 输出了结构化片段、但上游客户端工具 schema 又严格要求字符串参数的场景（例如 `content`、`prompt`、`path`、`taskId` 等）。
+工具 schema 的权威来源始终是**当前请求实际携带的 schema**，而不是同名工具在其他 runtime（Claude Code / OpenCode / Codex 等）里的默认印象。兼容层现在会同时兼容 OpenAI 风格 `function.parameters`、直接工具对象上的 `parameters` / `input_schema`、以及 camelCase 的 `inputSchema` / `schema`，并在最终输出阶段按这份请求内 schema 决定是保留 array/object，还是仅对明确声明为 `string` 的路径做字符串化。该规则同样适用于 Claude 的流式收尾和 Vercel Node 流式 tool-call formatter，避免不同 runtime 因 schema shape 差异而出现同名工具参数类型漂移。
+正例中的工具名只会来自当前请求实际声明的工具；如果当前请求没有足够的已知工具形态，就省略对应的单工具、多工具或嵌套示例，避免把不可用工具名写进 prompt。
+对执行类工具，脚本内容必须进入执行参数本身：`Bash` / `execute_command` 使用 `command`，`exec_command` 使用 `cmd`；不要把脚本示范成 `path` / `content` 文件写入参数。
+如果当前请求声明了 `Read` / `read_file` 这类读取工具，兼容层会额外注入一条 read-tool cache guard：当读取结果只表示“文件未变更 / 已在历史中 / 请引用先前上下文 / 没有正文内容”时，模型必须把它视为内容不可用，不能反复调用同一个无正文读取；应改为请求完整正文读取能力，或向用户说明需要重新提供文件内容。这个约束只缓解客户端缓存返回空内容导致的死循环，DS2API 不会也无法凭空恢复客户端本地文件正文。
+
+OpenAI 路径实现：
+[internal/promptcompat/tool_prompt.go](../internal/promptcompat/tool_prompt.go)
+
+Claude 路径实现：
+[internal/httpapi/claude/handler_utils.go](../internal/httpapi/claude/handler_utils.go)
+
+统一工具调用格式模板：
+[internal/toolcall/tool_prompt.go](../internal/toolcall/tool_prompt.go)
+
+这也是项目“网页对话纯文本兼容”的关键设计：
+
+- tools 对下游来说，本质上是 prompt 内规则
+- 不是 native tool schema transport
+
+## 7. assistant 的 tool_calls / reasoning 如何保留
+
+### 7.1 reasoning 保留方式
+
+assistant 的 reasoning 会变成一个显式标签块：
+
+```text
+[reasoning_content]
+...
+[/reasoning_content]
+```
+
+然后再接可见回答正文。
+
+### 7.2 历史 tool_calls 保留方式
+
+assistant 历史 `tool_calls` 不会保留成 OpenAI 原生 JSON，而会转成 prompt 可见的 DSML 外壳：
+
+```xml
+<|DSML|tool_calls>
+  <|DSML|invoke name="read_file">
+    <|DSML|parameter name="path"><![CDATA[src/main.go]]></|DSML|parameter>
+  </|DSML|invoke>
+</|DSML|tool_calls>
+```
+
+解析层同时兼容旧式纯 XML 形态：`<tool_calls>` / `<invoke>` / `<parameter>`。两者都会先归一到现有 XML 解析语义；其他旧格式都会作为普通文本保留，不会作为可执行调用语法。
+例外是 parser 会对一个非常窄的模型失误做修复：如果 assistant 输出了 `<invoke ...>` ... `</tool_calls>`（或 DSML 对应标签），但漏掉最前面的 opening wrapper，解析阶段会补回 wrapper 后再尝试识别。
+
+这件事很重要，因为它决定了：
+
+- 历史工具调用在 prompt 中是“可见文本历史”
+- 不是“隐藏结构化元数据”
+
+实现位置：
+[internal/prompt/tool_calls.go](../internal/prompt/tool_calls.go)
+
+### 7.3 tool result 保留方式
+
+tool / function role 的结果会作为 `<｜Tool｜>...<｜end▁of▁toolresults｜>` 进入 prompt。
+
+如果 tool content 为空，当前会补成字符串 `"null"`，避免整个 tool turn 丢失。
+
+## 8. files、附件、systemprompt 文件的实际语义
+
+这里要明确区分两类东西：
+
+1. 文本型 system prompt
+   例如 OpenAI `developer` / `system` / Responses `instructions` / Claude top-level `system`
+   这类会进入 `prompt`。
+2. 文件型 systemprompt
+   例如通过附件、`input_file`、base64、data URL 上传的文件
+   这类不会直接内联进 `prompt`，而是进入 `ref_file_ids`。
+
+OpenAI 文件相关实现：
+
+- inline/base64/data URL 上传：
+  [internal/httpapi/openai/files/file_inline_upload.go](../internal/httpapi/openai/files/file_inline_upload.go)
+- 文件 ID 收集：
+  [internal/promptcompat/file_refs.go](../internal/promptcompat/file_refs.go)
+
+OpenAI 的文件上传现在不再是“只传文件本体”的通用路径，而是会先根据请求里的 `model` 解析出 DeepSeek 的上传类型，并把它透传到上传接口的 `x-model-type`。当前可见的上传类型就是 `default` / `expert` / `vision`，其中 vision 请求上传图片时必须带上 `vision`，否则下游容易退回到仅文本或 OCR 语义。这个模型类型会同时用于：
+
+- `/v1/files` 这类独立文件上传入口
+- Chat / Responses 的 inline 图片、附件上传
+- current input file 触发时生成的 `DS2API_HISTORY.txt` 上下文文件
+
+也就是说，文件上传和完成请求的 `model_type` 现在是一致的：完成 payload 里仍然是 `model_type`，上传文件则会在 DeepSeek 上传阶段携带同样的模型类型信息。
+
+结论：
+
+- “systemprompt 文字”在 prompt 里
+- “systemprompt 文件”通常只在 `ref_file_ids` 里
+
+除非调用方自己把文件内容展开后再塞进 system/developer 文本，否则文件内容不会自动出现在 prompt 正文。
+
+## 9. 多轮历史为什么不会一直完整内联在 prompt
+
+兼容层现在只保留 `current_input_file` 这一种拆分方式；旧的 `history_split` 已废弃，只保留为兼容旧配置的字段，不再参与请求处理。
+
+- `current_input_file` 默认开启；它用于把“完整上下文”合并进 `DS2API_HISTORY.txt` 上下文文件。当最新 user turn 的纯文本长度达到 `current_input_file.min_chars`（默认 `0`）时，兼容层会上传一个文件名为 `DS2API_HISTORY.txt` 的上下文文件。文件内容会先做 OpenAI 消息标准化，再序列化成按轮次编号的 `DS2API_HISTORY.txt` 风格 transcript，带有 `# DS2API_HISTORY.txt` 标题和 `=== N. ROLE ===` 分段；live prompt 中则会给出一个 continuation 语气的 user 消息，引导模型从 `DS2API_HISTORY.txt` 的最新状态继续推进，并直接回答最新请求，避免把任务拉回起点。
+- 如果 `current_input_file.enabled=false`，请求会直接透传，不上传任何拆分上下文文件。
+- 旧的 `history_split.enabled` / `history_split.trigger_after_turns` 会被读取进配置对象以保持兼容，但不会触发拆分上传，也不会影响 `current_input_file` 的默认开启。
+- 即使触发 `current_input_file` 后 live prompt 被缩短，对客户端回包里的上下文 token 统计，仍会沿用**拆分前的完整 prompt 语义**做计数，而不是按缩短后的占位 prompt 计算；否则会把真实上下文显著算小。
+
+相关实现：
+
+- 配置访问器：
+  [internal/config/store_accessors.go](../internal/config/store_accessors.go)
+- 当前输入转文件：
+  [internal/httpapi/openai/history/current_input_file.go](../internal/httpapi/openai/history/current_input_file.go)
+- 旧历史拆分兼容壳：
+  [internal/httpapi/openai/history/history_split.go](../internal/httpapi/openai/history/history_split.go)
+
+当前输入转文件启用并触发时，上传文件的真实文件名是 `DS2API_HISTORY.txt`，文件内容是完整 `messages` 上下文；它仍会先用 OpenAI 消息标准化和 DeepSeek 角色标记序列化，再按轮次编号成 `DS2API_HISTORY.txt` 风格的 transcript（不再注入文件边界标签）：
+
+```text
+[uploaded filename]: DS2API_HISTORY.txt
+# DS2API_HISTORY.txt
+Prior conversation history and tool progress.
+
+=== 1. SYSTEM ===
+...
+
+=== 2. USER ===
+...
+
+=== 3. ASSISTANT ===
+...
+
+=== 4. TOOL ===
+...
+```
+
+开启后，请求的 live prompt 不再直接内联完整上下文，而是保留一个 user role 的短提示，提示模型基于已提供上下文直接回答最新请求；上传后的 `file_id` 会进入 `ref_file_ids`。
+
+## 10. 各协议入口的差异
+
+### 10.1 OpenAI Chat / Responses
+
+特点：
+
+- `developer` 会映射到 `system`
+- Responses `instructions` 会 prepend 为 system message
+- `tools` 会注入 system prompt
+- `attachments` / `input_file` / inline 文件会进入 `ref_file_ids`
+- current input file 主要在这条链路里生效，旧 `history_split` 仅作兼容字段保留
+
+### 10.2 Claude Messages
+
+特点：
+
+- top-level `system` 优先作为系统提示
+- `tool_use` / `tool_result` 会被转换成统一的 assistant/tool 历史语义
+- `tools` 同样会被并进 system prompt
+- 常规执行通过 `internal/httpapi/claude/handler_messages.go` 转到 OpenAI chat 路径，模型 alias 会先解析成 DeepSeek 原生模型
+- 当前代码里没有像 OpenAI 那样完整的 `ref_file_ids` 附件链路
+
+### 10.3 Gemini
+
+特点：
+
+- `systemInstruction`、`contents.parts`、`functionCall`、`functionResponse` 会先归一
+- tools 会转成 OpenAI 风格 function schema
+- prompt 构建复用 OpenAI 的 `promptcompat.BuildOpenAIPromptForAdapter`
+- 未识别的非文本 part 会被安全序列化进 prompt，并对二进制/疑似 base64 内容做省略或截断处理
+
+也就是说，Gemini 在“最终 prompt 语义”上，尽量和 OpenAI 保持一致。
+
+## 11. 一份贴近真实的最终上下文示意
+
+假设用户发来一个多轮请求：
+
+- 有 system/developer 文本
+- 有 tools
+- 有一个文件型 systemprompt 附件
+- 有历史 assistant tool call / tool result
+- current input file 已触发
+
+那么最终上下文更接近：
+
+```json
+{
+  "prompt": "<｜begin▁of▁sentence｜><｜System｜>原 system / developer\n\nYou have access to these tools: ...<｜end▁of▁instructions｜><｜User｜>Continue from the latest state in the attached DS2API_HISTORY.txt context. Treat it as the current working state and answer the latest user request directly.<｜Assistant｜>",
+  "ref_file_ids": [
+    "file-current-input-ignore",
+    "file-systemprompt",
+    "file-other-attachment"
+  ],
+  "thinking_enabled": true,
+  "search_enabled": false
+}
+```
+
+这正是“API 转网页对话纯文本”的核心成果：
+
+- 大部分结构化语义被压进 `prompt`
+- 文件保持文件
+- 需要时把完整上下文拆进 `DS2API_HISTORY.txt` 上下文文件，并按轮次编号成 transcript
+
+## 12. 修改时必须同步本文档的场景
+
+只要触碰以下任一类行为，就必须在同一提交或同一 PR 中更新本文档：
+
+- 角色映射变更
+- system / developer / instructions 合并规则变更
+- assistant reasoning 保留格式变更
+- assistant 历史 `tool_calls` 的 XML 呈现方式变更
+- tool result 注入方式变更
+- tool prompt 模板或 tool_choice 约束变更
+- inline 文件上传 / 文件引用收集规则变更
+- current input file 触发条件、上传格式、`DS2API_HISTORY.txt` transcript 结构变更
+- 旧 `history_split` 兼容逻辑的读取、忽略或退化行为变更
+- completion payload 字段语义变更
+- Claude / Gemini 对这套统一语义的复用关系变更
+
+优先检查这些文件：
+
+- `internal/promptcompat/request_normalize.go`
+- `internal/promptcompat/prompt_build.go`
+- `internal/promptcompat/message_normalize.go`
+- `internal/promptcompat/tool_prompt.go`
+- `internal/httpapi/openai/files/file_inline_upload.go`
+- `internal/promptcompat/file_refs.go`
+- `internal/httpapi/openai/history/history_split.go`
+- `internal/promptcompat/responses_input_normalize.go`
+- `internal/httpapi/claude/standard_request.go`
+- `internal/httpapi/claude/handler_utils.go`
+- `internal/httpapi/gemini/convert_request.go`
+- `internal/httpapi/gemini/convert_messages.go`
+- `internal/httpapi/gemini/convert_tools.go`
+- `internal/prompt/messages.go`
+- `internal/prompt/tool_calls.go`
+- `internal/promptcompat/standard_request.go`
+
+## 13. 建议的最小验证
+
+改动这条链路后，至少补齐或检查这些测试：
+
+- `go test ./internal/prompt/...`
+- `go test ./internal/httpapi/openai/...`
+- `go test ./internal/httpapi/claude/...`
+- `go test ./internal/httpapi/gemini/...`
+- `go test ./internal/util/...`
+
+如果改的是 tool call 相关兼容语义，还应同时检查：
+
+- `go test ./internal/toolcall/...`
+- `node --test tests/node/stream-tool-sieve.test.js`
+
+## 14. 文档同步约定
+
+本文档是这条兼容链路的专项说明。
+
+如果外部接口行为也变了，还应同步检查：
+
+- [API.md](../API.md)
+- [API.en.md](../API.en.md)
+- [docs/toolcall-semantics.md](./toolcall-semantics.md)
+
+原则是：
+
+- 内部主链路变化，至少更新本文档
+- 外部可见契约变化，再同步更新 API 文档
--- a/docs/toolcall-semantics.md
+++ b/docs/toolcall-semantics.md
@@ -1,72 +1,105 @@
 # Tool call parsing semantics（Go/Node 统一语义）

-本文档描述当前代码中 `ParseToolCallsDetailed` / `parseToolCallsDetailed` 的**实际行为**，用于对齐 Go 与 Node Runtime。
+本文档描述当前代码中的**实际行为**，以 `internal/toolcall`、`internal/toolstream` 与 `internal/js/helpers/stream-tool-sieve` 为准。

-## 1) 输出结构（当前实现）
+文档导航：[总览](../README.MD) / [架构说明](./ARCHITECTURE.md) / [测试指南](./TESTING.md)

- `calls`：解析得到的工具调用列表（`name` + `input`）。
- `sawToolCallSyntax`：检测到工具调用语法特征时为 `true`（例如 `tool_calls`、`<tool_call>`、`<function_call>`、`<invoke>`、`function.name:`）。
- `rejectedByPolicy`：当前实现固定为 `false`（预留字段，尚未启用 allow-list 拒绝）。
- `rejectedToolNames`：当前实现固定为空数组（预留字段）。
+## 1) 当前可执行格式

-> 说明：`filterToolCallsDetailed` 当前仅做结构清洗，不做工具名策略拒绝。
+当前版本推荐模型输出 DSML 外壳：

-## 2) 解析管线
+```xml
+<|DSML|tool_calls>
+  <|DSML|invoke name="read_file">
+    <|DSML|parameter name="path"><![CDATA[README.MD]]></|DSML|parameter>
+  </|DSML|invoke>
+</|DSML|tool_calls>
+```

-1. **示例保护**：若判定为 fenced code block 示例上下文，则跳过执行型解析。
-2. **候选片段构建**：从完整文本中构建候选（原文、围绕 `tool_calls` 的 JSON 片段、首尾大括号切片等）。
-3. **按序尝试解析（命中即停）**：
-   - 对“明显 JSON 工具载荷候选”（以 `{`/`[` 开头且包含 `tool_calls`/`\"function\"`）先走 JSON 解析，避免 JSON 字符串内偶发 XML 片段误命中；
-   - 其余候选优先 XML 解析（`<tool_call>` / `<function_call>` / `<invoke>` / `tool_use` / `antml:function_call` 等）；
-   - JSON 解析（`{"tool_calls": [...]}`、列表、单对象）；
-   - Markup 解析；
-   - Text-KV 回退（如 `function.name:` + `function.arguments:`）。
-4. **兜底**：候选全部失败后，再对全文做 XML / Text-KV 回退。
+兼容层仍接受旧式 canonical XML：

-## 3) XML 能力边界（当前）
+```xml
+<tool_calls>
+  <invoke name="read_file">
+    <parameter name="path"><![CDATA[README.MD]]></parameter>
+  </invoke>
+</tool_calls>
+```

-当前已支持输入端的“多 XML/标记风格”解析，包括但不限于：
+这不是原生 DSML 全链路实现。DSML 主要用于让模型有意识地输出协议标识，隔离普通 XML 语义；进入 parser 前会按固定本地标签名归一化成 `<tool_calls>` / `<invoke>` / `<parameter>`，内部仍以现有 XML 解析语义为准。

- `<tool_call><tool_name>...</tool_name><parameters>...</parameters></tool_call>`
- `<function_call>tool</function_call><function parameter name="x">...</function parameter>`
- `<invoke name="tool"><parameter name="x">...</parameter></invoke>`
- `antml:function_call` / `antml:argument` / `antml:parameters`
- `tool_use` 家族标签
+约束：

-但**输出端仍统一转换为 OpenAI 兼容 JSON 事件/对象**（`message.tool_calls`、`delta.tool_calls`、`response.function_call_arguments.*`）。
+- 必须有 `<|DSML|tool_calls>...</|DSML|tool_calls>` 或 `<tool_calls>...</tool_calls>` wrapper
+- 每个调用必须在 `<|DSML|invoke name="...">...</|DSML|invoke>` 或 `<invoke name="...">...</invoke>` 内
+- 工具名必须放在 `invoke` 的 `name` 属性
+- 参数必须使用 `<|DSML|parameter name="...">...</|DSML|parameter>` 或 `<parameter name="...">...</parameter>`
+- 同一个工具块内不要混用 DSML 标签和旧 XML 工具标签；混搭会被视为非法工具块

-## 4) 关于“是否可以封装成 XML 再喂给模型”
+兼容修复：

-结论：**可以做，而且当前解析器已经能兼容 XML 作为输入格式之一**，但代码里并没有 `toolcall.prefer_xml_output` 这个开关。现有可调配置只有：
+- 如果模型漏掉 opening wrapper，但后面仍输出了一个或多个 invoke 并以 closing wrapper 收尾，Go 解析链路会在解析前补回缺失的 opening wrapper。
+- Go / Node 解析层不再枚举每一种 DSML typo。它会把工具标签名前的 `DSML`、管道符 `|` / `｜`、空白、重复 leading `<` 视为可容忍的协议噪声，然后只匹配固定本地标签名 `tool_calls` / `invoke` / `parameter`。例如 `<DSML|tool_calls>`、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、`<DSMLtool_calls>`、`<<DSML|DSML|tool_calls>` 都会归一化；相似但非固定标签名（如 `tool_calls_extra`）仍按普通文本处理。
+- 如果模型在固定工具标签名后多输出一个尾部管道符，例如 `<|DSML|tool_calls|` / `<|DSML|invoke|` / `<|DSML|parameter|`，兼容层会把这个尾部 `|` 当作异常标签终止符并补齐缺失的 `>`；如果后面已经有 `>`，也会消费这个多余 `|` 后再归一化。
+- 这是一个针对常见模型失误的窄修复，不改变推荐输出格式；prompt 仍要求模型直接输出完整 DSML 外壳。
+- 裸 `<invoke ...>` / `<parameter ...>` 不会被当成“已支持的工具语法”；只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 才会进入工具调用路径。

- `toolcall.mode`：`feature_match` / `off`
- `toolcall.early_emit_confidence`：`high` / `low` / `off`
+## 2) 非兼容内容

-推荐思路仍然是“输入兼容层 + 输出按客户端协议渲染”：
+任何不满足上述 DSML / canonical XML 形态的内容，都会保留为普通文本，不会执行。一个例外是上一节提到的“缺失 opening wrapper、但 closing wrapper 仍存在”的窄修复场景。

-1. **Prompt 约束层**：如果你要尝试 XML-first，可以在系统提示词里约束模型输出规范 XML tool block（例如 `<tool_calls><tool_call>...</tool_call></tool_calls>`）。
-2. **解析兼容层**：继续在 parser 中同时接受 JSON / XML / ANTML / invoke / text-kv。
-3. **协议归一层**：无论模型输出什么格式，统一落到内部 `ParsedToolCall`。
-4. **对外渲染层**：根据客户端请求协议渲染（OpenAI / Claude / Gemini 各自格式）。
+当前 parser 不把 allow-list 当作硬安全边界：即使传入了已声明工具名列表，XML 里出现未声明工具名时也会尽量解析并交给上层协议输出；真正的执行侧仍必须自行校验工具名和参数。

-这样可以同时获得：
+## 3) 流式与防泄漏行为

- 减少模型端 JSON 转义/引号错误；
- 不破坏现有 SDK / 客户端生态；
- 逐步灰度（按模型、按租户、按请求开关）。
+在流式链路中（Go / Node 一致）：

-## 5) 落地建议（低风险迭代）
+- DSML `<|DSML|tool_calls>` wrapper、基于固定本地标签名的 DSML 噪声容错形态、尾部管道符形态（如 `<|DSML|tool_calls|`）和 canonical `<tool_calls>` wrapper 都会进入结构化捕获
+- 如果流里直接从 invoke 开始，但后面补上了 closing wrapper，Go 流式筛分也会按缺失 opening wrapper 的修复路径尝试恢复
+- 已识别成功的工具调用不会再次回流到普通文本
+- 不符合新格式的块不会执行，并继续按原样文本透传
+- fenced code block（反引号 `` ``` `` 和波浪线 `~~~`）中的 XML 示例始终按普通文本处理
+- 支持嵌套围栏（如 4 反引号嵌套 3 反引号）和 CDATA 内围栏保护
+- 如果模型把 `<![CDATA[` 打开后却没有闭合，流式扫描阶段仍会保守地继续缓冲，不会误把 CDATA 里的示例 XML 当成真实工具调用；在最终 parse / flush 恢复阶段，会对这类 loose CDATA 做窄修复，尽量保住外层已完整包裹的真实工具调用
+- 当文本中 mention 了某种标签名（如 `<dsml|tool_calls>` 或 Markdown inline code 里的 `<|DSML|tool_calls>`）而后面紧跟真正工具调用时，sieve 会跳过不可解析的 mention 候选并继续匹配后续真实工具块，不会因 mention 导致工具调用丢失，也不会截断 mention 后的正文
+- Go 侧 SSE 读取不再使用 `bufio.Scanner` 的固定 token 上限；单个 `data:` 行中包含很长的写文件参数时，非流式收集、流式解析与 auto-continue 透传都应保留完整行，再交给 tool parser 处理

- 继续使用现有的 `toolcall.mode=feature_match` 和 `toolcall.early_emit_confidence=high` 作为默认策略。
- 如果要试 XML-first，把它放在 prompt 层或上游模板层，不要假设代码里已有专门的 XML 输出开关。
- 增加观测指标：
-  - `toolcall_parse_source`（json/xml/markup/textkv）；
-  - `toolcall_parse_success_rate`；
-  - `toolcall_malformed_rate`；
-  - `toolcall_repair_rate`。
- 先在 `responses` 链路灰度，再扩展 `chat.completions`。
+另外，`<parameter>` 的值如果本身是合法 JSON 字面量，也会按结构化值解析，而不是一律保留为字符串。例如 `123`、`true`、`null`、`[1,2]`、`{"a":1}` 都会还原成对应的 number / boolean / null / array / object。
+结构化 XML 参数也会还原为 JSON 结构：如果参数体只包含一个或多个 `<item>...</item>` 子节点，会输出数组；嵌套对象里的 item-only 字段也同样按数组处理。例如 `<parameter name="questions"><item><question>...</question></item></parameter>` 会输出 `{"questions":[{"question":"..."}]}`，而不是 `{"questions":{"item":...}}`。
+如果模型误把完整结构化 XML fragment 放进 CDATA，Go / Node 会先保护明显的原文字段（如 `content` / `command` / `prompt` / `old_string` / `new_string`），其余参数会尝试把 CDATA 内的完整 XML fragment 还原成 object / array；常见的 `<br>` 分隔符会按换行归一化后再解析。但如果 CDATA 只是单个平面的 XML/HTML 标签，例如 `<b>urgent</b>` 这种行内标记，兼容层会把它保留为原始字符串，而不会强行升成 object / array；只有明显表示结构的 CDATA 片段，例如多兄弟节点、嵌套子节点或 `item` 列表，才会触发结构化恢复。

-## 6) 兼容性提醒
+## 4) 输出结构

- 上游模型若输出混合文本 + XML，仍可能出现“半结构化”噪声，需要依赖现有 sieve 增量消费策略。
- XML 不等于安全：仍需做 tool 名、参数 schema、执行权限的服务端校验。
+`ParseToolCallsDetailed` / `parseToolCallsDetailed` 返回：
+
+- `calls`：解析出的工具调用列表（`name` + `input`）
+- `sawToolCallSyntax`：检测到 DSML / canonical wrapper，或命中“缺失 opening wrapper 但可修复”的形态时会为 `true`；裸 `invoke` 不计入该标记
+- `rejectedByPolicy`：当前固定为 `false`
+- `rejectedToolNames`：当前固定为空数组
+
+## 5) 落地建议
+
+1. Prompt 里只示范 DSML 外壳语法。
+2. 上游客户端应直接输出完整 DSML 外壳；DS2API 兼容旧式 canonical XML，并只对“closing tag 在、opening tag 漏掉”的常见失误做窄修复，不会泛化接受其他旧格式。
+3. 不要依赖 parser 做安全控制；执行器侧仍应做工具名和参数校验。
+
+## 6) 回归验证
+
+可直接运行：
+
+```bash
+go test -v -run 'TestParseToolCalls|TestProcessToolSieve' ./internal/toolcall ./internal/toolstream ./internal/httpapi/openai/...
+node --test tests/node/stream-tool-sieve.test.js
+```
+
+重点覆盖：
+
+- DSML `<|DSML|tool_calls>` wrapper 正常解析
+- legacy canonical `<tool_calls>` wrapper 正常解析
+- 固定本地标签名的 DSML 噪声容错形态（如 `<DSML|tool_calls>`、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、`<DSMLtool_calls>`、`<<DSML|DSML|tool_calls>`）正常解析
+- 混搭标签（DSML wrapper + canonical inner）归一化后正常解析
+- 波浪线围栏 `~~~` 内的示例不执行
+- 嵌套围栏（4 反引号嵌套 3 反引号）内的示例不执行
+- 文本 mention 标签名后紧跟真正工具调用的场景（含同一 wrapper 变体）
+- 非兼容内容按普通文本透传
+- 代码块示例不执行
--- a/go.mod
+++ b/go.mod
@@ -1,17 +1,27 @@
 module ds2api

-go 1.24
+go 1.26.0

 require (
-	github.com/andybalholm/brotli v1.0.6
-	github.com/go-chi/chi/v5 v5.2.3
+	github.com/andybalholm/brotli v1.2.1
+	github.com/go-chi/chi/v5 v5.2.5
 	github.com/google/uuid v1.6.0
-	github.com/refraction-networking/utls v1.8.1
-	github.com/tetratelabs/wazero v1.9.0
+	github.com/hupe1980/go-tiktoken v0.0.10
+	github.com/refraction-networking/utls v1.8.2
+	github.com/router-for-me/CLIProxyAPI/v6 v6.9.14
 )

+require github.com/dlclark/regexp2 v1.11.5 // indirect
+
 require (
-	github.com/klauspost/compress v1.17.4 // indirect
-	golang.org/x/crypto v0.36.0 // indirect
-	golang.org/x/sys v0.31.0 // indirect
+	github.com/klauspost/compress v1.18.5 // indirect
+	github.com/sirupsen/logrus v1.9.4 // indirect
+	github.com/tidwall/gjson v1.18.0 // indirect
+	github.com/tidwall/match v1.2.0 // indirect
+	github.com/tidwall/pretty v1.2.1 // indirect
+	github.com/tidwall/sjson v1.2.5 // indirect
+	golang.org/x/crypto v0.49.0 // indirect
+	golang.org/x/net v0.52.0
+	golang.org/x/sys v0.42.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -1,16 +1,49 @@
-github.com/andybalholm/brotli v1.0.6 h1:Yf9fFpf49Zrxb9NlQaluyE92/+X7UVHlhMNJN2sxfOI=
-github.com/andybalholm/brotli v1.0.6/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
-github.com/go-chi/chi/v5 v5.2.3 h1:WQIt9uxdsAbgIYgid+BpYc+liqQZGMHRaUwp0JUcvdE=
-github.com/go-chi/chi/v5 v5.2.3/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops=
+github.com/andybalholm/brotli v1.2.1 h1:R+f5xP285VArJDRgowrfb9DqL18yVK0gKAW/F+eTWro=
+github.com/andybalholm/brotli v1.2.1/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ=
+github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
+github.com/go-chi/chi/v5 v5.2.5 h1:Eg4myHZBjyvJmAFjFvWgrqDTXFyOzjj7YIm3L3mu6Ug=
+github.com/go-chi/chi/v5 v5.2.5/go.mod h1:X7Gx4mteadT3eDOMTsXzmI4/rwUpOwBHLpAfupzFJP0=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
-github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
-github.com/refraction-networking/utls v1.8.1 h1:yNY1kapmQU8JeM1sSw2H2asfTIwWxIkrMJI0pRUOCAo=
-github.com/refraction-networking/utls v1.8.1/go.mod h1:jkSOEkLqn+S/jtpEHPOsVv/4V4EVnelwbMQl4vCWXAM=
-github.com/tetratelabs/wazero v1.9.0 h1:IcZ56OuxrtaEz8UYNRHBrUa9bYeX9oVY93KspZZBf/I=
-github.com/tetratelabs/wazero v1.9.0/go.mod h1:TSbcXCfFP0L2FGkRPxHphadXPjo1T6W+CseNNY7EkjM=
-golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
-golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
-golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
-golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+github.com/hupe1980/go-tiktoken v0.0.10 h1:m6phOJaGyctqWdGIgwn9X8AfJvaG74tnQoDL+ntOUEQ=
+github.com/hupe1980/go-tiktoken v0.0.10/go.mod h1:NME6d8hrE+Jo+kLUZHhXShYV8e40hYkm4BbSLQKtvAo=
+github.com/klauspost/compress v1.18.5 h1:/h1gH5Ce+VWNLSWqPzOVn6XBO+vJbCNGvjoaGBFW2IE=
+github.com/klauspost/compress v1.18.5/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/refraction-networking/utls v1.8.2 h1:j4Q1gJj0xngdeH+Ox/qND11aEfhpgoEvV+S9iJ2IdQo=
+github.com/refraction-networking/utls v1.8.2/go.mod h1:jkSOEkLqn+S/jtpEHPOsVv/4V4EVnelwbMQl4vCWXAM=
+github.com/router-for-me/CLIProxyAPI/v6 v6.9.14 h1:XItUHrPGE9E5xTeZIPjKGmKqfEs1AZbxl1RPfO5xtrc=
+github.com/router-for-me/CLIProxyAPI/v6 v6.9.14/go.mod h1:P1jsIPFXorYGuS2N/3BlZYkpRKi/z7+oR3+1tdG0u4k=
+github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w=
+github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g=
+github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
+github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
+github.com/tidwall/match v1.2.0 h1:0pt8FlkOwjN2fPt4bIl4BoNxb98gGHN2ObFEDkrfZnM=
+github.com/tidwall/match v1.2.0/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
+github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
+github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
+github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
+github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
+github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
+golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
+golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
+golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
+golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
+golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
+golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
+golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
+golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/internal/account/pool_acquire.go
+++ b/internal/account/pool_acquire.go
@@ -60,16 +60,10 @@ func (p *Pool) acquireLocked(target string, exclude map[string]bool) (config.Acc
 		return acc, true
 	}

-	if acc, ok := p.tryAcquire(exclude, true); ok {
-		return acc, true
-	}
-	if acc, ok := p.tryAcquire(exclude, false); ok {
-		return acc, true
-	}
-	return config.Account{}, false
+	return p.tryAcquire(exclude)
 }

-func (p *Pool) tryAcquire(exclude map[string]bool, requireToken bool) (config.Account, bool) {
+func (p *Pool) tryAcquire(exclude map[string]bool) (config.Account, bool) {
 	for i := 0; i < len(p.queue); i++ {
 		id := p.queue[i]
 		if exclude[id] || !p.canAcquireIDLocked(id) {
@@ -79,9 +73,6 @@ func (p *Pool) tryAcquire(exclude map[string]bool, requireToken bool) (config.Ac
 		if !ok {
 			continue
 		}
-		if requireToken && acc.Token == "" {
-			continue
-		}
 		p.inUse[id]++
 		p.bumpQueue(id)
 		return acc, true
--- a/internal/account/pool_edge_test.go
+++ b/internal/account/pool_edge_test.go
@@ -13,9 +13,7 @@ import (

 func TestPoolEmptyNoAccounts(t *testing.T) {
 	t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "2")
-	t.Setenv("DS2API_ACCOUNT_CONCURRENCY", "")
 	t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "")
-	t.Setenv("DS2API_ACCOUNT_QUEUE_SIZE", "")
 	t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[]}`)
 	pool := NewPool(config.LoadStore())
 	if _, ok := pool.Acquire("", nil); ok {
@@ -165,9 +163,7 @@ func TestPoolAcquireWaitTargetAccount(t *testing.T) {

 func TestPoolMaxQueueSizeOverride(t *testing.T) {
 	t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "1")
-	t.Setenv("DS2API_ACCOUNT_CONCURRENCY", "")
 	t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "5")
-	t.Setenv("DS2API_ACCOUNT_QUEUE_SIZE", "")
 	t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[{"email":"acc1@example.com","token":"t1"}]}`)
 	pool := NewPool(config.LoadStore())
 	status := pool.Status()
@@ -176,19 +172,6 @@ func TestPoolMaxQueueSizeOverride(t *testing.T) {
 	}
 }

-func TestPoolQueueSizeAliasEnv(t *testing.T) {
-	t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "1")
-	t.Setenv("DS2API_ACCOUNT_CONCURRENCY", "")
-	t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "")
-	t.Setenv("DS2API_ACCOUNT_QUEUE_SIZE", "7")
-	t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[{"email":"acc1@example.com","token":"t1"}]}`)
-	pool := NewPool(config.LoadStore())
-	status := pool.Status()
-	if got, ok := status["max_queue_size"].(int); !ok || got != 7 {
-		t.Fatalf("expected max_queue_size=7, got %#v", status["max_queue_size"])
-	}
-}
-
 func TestPoolMultipleAcquireReleaseCycles(t *testing.T) {
 	pool := newSingleAccountPoolForTest(t, "1")
 	for i := 0; i < 10; i++ {
--- a/internal/account/pool_limits.go
+++ b/internal/account/pool_limits.go
@@ -29,13 +29,8 @@ func (p *Pool) ApplyRuntimeLimits(maxInflightPerAccount, maxQueueSize, globalMax
 }

 func maxInflightFromEnv() int {
-	for _, key := range []string{"DS2API_ACCOUNT_MAX_INFLIGHT", "DS2API_ACCOUNT_CONCURRENCY"} {
-		raw := strings.TrimSpace(os.Getenv(key))
-		if raw == "" {
-			continue
-		}
-		n, err := strconv.Atoi(raw)
-		if err == nil && n > 0 {
+	if raw := strings.TrimSpace(os.Getenv("DS2API_ACCOUNT_MAX_INFLIGHT")); raw != "" {
+		if n, err := strconv.Atoi(raw); err == nil && n > 0 {
 			return n
 		}
 	}
@@ -53,13 +48,8 @@ func defaultRecommendedConcurrency(accountCount, maxInflightPerAccount int) int
 }

 func maxQueueFromEnv(defaultSize int) int {
-	for _, key := range []string{"DS2API_ACCOUNT_MAX_QUEUE", "DS2API_ACCOUNT_QUEUE_SIZE"} {
-		raw := strings.TrimSpace(os.Getenv(key))
-		if raw == "" {
-			continue
-		}
-		n, err := strconv.Atoi(raw)
-		if err == nil && n >= 0 {
+	if raw := strings.TrimSpace(os.Getenv("DS2API_ACCOUNT_MAX_QUEUE")); raw != "" {
+		if n, err := strconv.Atoi(raw); err == nil && n >= 0 {
 			return n
 		}
 	}
--- a/internal/account/pool_test.go
+++ b/internal/account/pool_test.go
@@ -12,9 +12,7 @@ import (
 func newPoolForTest(t *testing.T, maxInflight string) *Pool {
 	t.Helper()
 	t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", maxInflight)
-	t.Setenv("DS2API_ACCOUNT_CONCURRENCY", "")
 	t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "")
-	t.Setenv("DS2API_ACCOUNT_QUEUE_SIZE", "")
 	t.Setenv("DS2API_CONFIG_JSON", `{
 		"keys":["k1"],
 		"accounts":[
@@ -29,9 +27,7 @@ func newPoolForTest(t *testing.T, maxInflight string) *Pool {
 func newSingleAccountPoolForTest(t *testing.T, maxInflight string) *Pool {
 	t.Helper()
 	t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", maxInflight)
-	t.Setenv("DS2API_ACCOUNT_CONCURRENCY", "")
 	t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "")
-	t.Setenv("DS2API_ACCOUNT_QUEUE_SIZE", "")
 	t.Setenv("DS2API_CONFIG_JSON", `{
 		"keys":["k1"],
 		"accounts":[{"email":"acc1@example.com","token":"token1"}]
@@ -170,9 +166,9 @@ func TestPoolStatusRecommendedConcurrencyRespectsOverride(t *testing.T) {
 	}
 }

-func TestPoolAccountConcurrencyAliasEnv(t *testing.T) {
-	t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "")
-	t.Setenv("DS2API_ACCOUNT_CONCURRENCY", "4")
+func TestPoolGlobalMaxInflightEnv(t *testing.T) {
+	t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "1")
+	t.Setenv("DS2API_GLOBAL_MAX_INFLIGHT", "4")
 	t.Setenv("DS2API_CONFIG_JSON", `{
 		"keys":["k1"],
 		"accounts":[
@@ -183,15 +179,15 @@ func TestPoolAccountConcurrencyAliasEnv(t *testing.T) {

 	pool := NewPool(config.LoadStore())
 	status := pool.Status()
-	if got, ok := status["max_inflight_per_account"].(int); !ok || got != 4 {
+	if got, ok := status["global_max_inflight"].(int); !ok || got != 4 {
+		t.Fatalf("unexpected global_max_inflight: %#v", status["global_max_inflight"])
+	}
+	if got, ok := status["max_inflight_per_account"].(int); !ok || got != 1 {
 		t.Fatalf("unexpected max_inflight_per_account: %#v", status["max_inflight_per_account"])
 	}
-	if got, ok := status["recommended_concurrency"].(int); !ok || got != 8 {
+	if got, ok := status["recommended_concurrency"].(int); !ok || got != 2 {
 		t.Fatalf("unexpected recommended_concurrency: %#v", status["recommended_concurrency"])
 	}
-	if got, ok := status["max_queue_size"].(int); !ok || got != 8 {
-		t.Fatalf("unexpected max_queue_size: %#v", status["max_queue_size"])
-	}
 }

 func TestPoolDropsLegacyTokenOnlyAccountOnLoad(t *testing.T) {
@@ -215,6 +211,31 @@ func TestPoolDropsLegacyTokenOnlyAccountOnLoad(t *testing.T) {
 	}
 }

+func TestPoolAcquireRotatesIntoTokenlessAccounts(t *testing.T) {
+	t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "1")
+	t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "")
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["k1"],
+		"accounts":[
+			{"email":"acc1@example.com","token":"token1"},
+			{"email":"acc2@example.com","token":""},
+			{"email":"acc3@example.com","token":""}
+		]
+	}`)
+
+	pool := NewPool(config.LoadStore())
+	for i, want := range []string{"acc1@example.com", "acc2@example.com", "acc3@example.com"} {
+		acc, ok := pool.Acquire("", nil)
+		if !ok {
+			t.Fatalf("expected acquire success at step %d", i+1)
+		}
+		if got := acc.Identifier(); got != want {
+			t.Fatalf("unexpected account at step %d: got %q want %q", i+1, got, want)
+		}
+		pool.Release(acc.Identifier())
+	}
+}
+
 func TestPoolAcquireWaitQueuesAndSucceedsAfterRelease(t *testing.T) {
 	pool := newSingleAccountPoolForTest(t, "1")
 	first, ok := pool.Acquire("", nil)
--- a/internal/adapter/claude/deps_injection_test.go
+++ b/internal/adapter/claude/deps_injection_test.go
@@ -1,33 +0,0 @@
-package claude
-
-import "testing"
-
-type mockClaudeConfig struct {
-	m map[string]string
-}
-
-func (m mockClaudeConfig) ClaudeMapping() map[string]string { return m.m }
-
-func TestNormalizeClaudeRequestUsesConfigInterfaceMapping(t *testing.T) {
-	req := map[string]any{
-		"model": "claude-opus-4-6",
-		"messages": []any{
-			map[string]any{"role": "user", "content": "hello"},
-		},
-	}
-	out, err := normalizeClaudeRequest(mockClaudeConfig{
-		m: map[string]string{
-			"fast": "deepseek-chat",
-			"slow": "deepseek-reasoner-search",
-		},
-	}, req)
-	if err != nil {
-		t.Fatalf("normalizeClaudeRequest error: %v", err)
-	}
-	if out.Standard.ResolvedModel != "deepseek-reasoner-search" {
-		t.Fatalf("resolved model mismatch: got=%q", out.Standard.ResolvedModel)
-	}
-	if !out.Standard.Thinking || !out.Standard.Search {
-		t.Fatalf("unexpected flags: thinking=%v search=%v", out.Standard.Thinking, out.Standard.Search)
-	}
-}
--- a/internal/adapter/claude/handler_messages.go
+++ b/internal/adapter/claude/handler_messages.go
@@ -1,134 +0,0 @@
-package claude
-
-import (
-	"encoding/json"
-	"fmt"
-	"io"
-	"net/http"
-	"strings"
-	"time"
-
-	"ds2api/internal/auth"
-	"ds2api/internal/config"
-	claudefmt "ds2api/internal/format/claude"
-	"ds2api/internal/sse"
-	streamengine "ds2api/internal/stream"
-)
-
-func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) {
-	if strings.TrimSpace(r.Header.Get("anthropic-version")) == "" {
-		r.Header.Set("anthropic-version", "2023-06-01")
-	}
-	a, err := h.Auth.Determine(r)
-	if err != nil {
-		status := http.StatusUnauthorized
-		detail := err.Error()
-		if err == auth.ErrNoAccount {
-			status = http.StatusTooManyRequests
-		}
-		writeClaudeError(w, status, detail)
-		return
-	}
-	defer h.Auth.Release(a)
-
-	var req map[string]any
-	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-		writeClaudeError(w, http.StatusBadRequest, "invalid json")
-		return
-	}
-	norm, err := normalizeClaudeRequest(h.Store, req)
-	if err != nil {
-		writeClaudeError(w, http.StatusBadRequest, err.Error())
-		return
-	}
-	stdReq := norm.Standard
-
-	sessionID, err := h.DS.CreateSession(r.Context(), a, 3)
-	if err != nil {
-		writeClaudeError(w, http.StatusUnauthorized, "invalid token.")
-		return
-	}
-	pow, err := h.DS.GetPow(r.Context(), a, 3)
-	if err != nil {
-		writeClaudeError(w, http.StatusUnauthorized, "Failed to get PoW")
-		return
-	}
-	requestPayload := stdReq.CompletionPayload(sessionID)
-	resp, err := h.DS.CallCompletion(r.Context(), a, requestPayload, pow, 3)
-	if err != nil {
-		writeClaudeError(w, http.StatusInternalServerError, "Failed to get Claude response.")
-		return
-	}
-	if resp.StatusCode != http.StatusOK {
-		defer resp.Body.Close()
-		body, _ := io.ReadAll(resp.Body)
-		writeClaudeError(w, http.StatusInternalServerError, string(body))
-		return
-	}
-
-	if stdReq.Stream {
-		h.handleClaudeStreamRealtime(w, r, resp, stdReq.ResponseModel, norm.NormalizedMessages, stdReq.Thinking, stdReq.Search, stdReq.ToolNames)
-		return
-	}
-	result := sse.CollectStream(resp, stdReq.Thinking, true)
-	respBody := claudefmt.BuildMessageResponse(
-		fmt.Sprintf("msg_%d", time.Now().UnixNano()),
-		stdReq.ResponseModel,
-		norm.NormalizedMessages,
-		result.Thinking,
-		result.Text,
-		stdReq.ToolNames,
-	)
-	writeJSON(w, http.StatusOK, respBody)
-}
-
-func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Request, resp *http.Response, model string, messages []any, thinkingEnabled, searchEnabled bool, toolNames []string) {
-	defer resp.Body.Close()
-	if resp.StatusCode != http.StatusOK {
-		body, _ := io.ReadAll(resp.Body)
-		writeClaudeError(w, http.StatusInternalServerError, string(body))
-		return
-	}
-
-	w.Header().Set("Content-Type", "text/event-stream")
-	w.Header().Set("Cache-Control", "no-cache, no-transform")
-	w.Header().Set("Connection", "keep-alive")
-	w.Header().Set("X-Accel-Buffering", "no")
-	rc := http.NewResponseController(w)
-	_, canFlush := w.(http.Flusher)
-	if !canFlush {
-		config.Logger.Warn("[claude_stream] response writer does not support flush; streaming may be buffered")
-	}
-
-	streamRuntime := newClaudeStreamRuntime(
-		w,
-		rc,
-		canFlush,
-		model,
-		messages,
-		thinkingEnabled,
-		searchEnabled,
-		toolNames,
-	)
-	streamRuntime.sendMessageStart()
-
-	initialType := "text"
-	if thinkingEnabled {
-		initialType = "thinking"
-	}
-	streamengine.ConsumeSSE(streamengine.ConsumeConfig{
-		Context:             r.Context(),
-		Body:                resp.Body,
-		ThinkingEnabled:     thinkingEnabled,
-		InitialType:         initialType,
-		KeepAliveInterval:   claudeStreamPingInterval,
-		IdleTimeout:         claudeStreamIdleTimeout,
-		MaxKeepAliveNoInput: claudeStreamMaxKeepaliveCnt,
-	}, streamengine.ConsumeHooks{
-		OnKeepAlive: func() {
-			streamRuntime.sendPing()
-		},
-		OnParsed:   streamRuntime.onParsed,
-		OnFinalize: streamRuntime.onFinalize,
-	})
-}
--- a/internal/adapter/claude/stream_status_test.go
+++ b/internal/adapter/claude/stream_status_test.go
@@ -1,100 +0,0 @@
-package claude
-
-import (
-	"context"
-	"net/http"
-	"net/http/httptest"
-	"strings"
-	"testing"
-
-	"github.com/go-chi/chi/v5"
-	chimw "github.com/go-chi/chi/v5/middleware"
-
-	"ds2api/internal/auth"
-)
-
-type streamStatusClaudeAuthStub struct{}
-
-func (streamStatusClaudeAuthStub) Determine(_ *http.Request) (*auth.RequestAuth, error) {
-	return &auth.RequestAuth{
-		UseConfigToken: false,
-		DeepSeekToken:  "direct-token",
-		CallerID:       "caller:test",
-		TriedAccounts:  map[string]bool{},
-	}, nil
-}
-
-func (streamStatusClaudeAuthStub) Release(_ *auth.RequestAuth) {}
-
-type streamStatusClaudeDSStub struct{}
-
-func (streamStatusClaudeDSStub) CreateSession(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
-	return "session-id", nil
-}
-
-func (streamStatusClaudeDSStub) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
-	return "pow", nil
-}
-
-func (streamStatusClaudeDSStub) CallCompletion(_ context.Context, _ *auth.RequestAuth, _ map[string]any, _ string, _ int) (*http.Response, error) {
-	body := "data: {\"p\":\"response/content\",\"v\":\"hello\"}\n" + "data: [DONE]\n"
-	return &http.Response{
-		StatusCode: http.StatusOK,
-		Header:     make(http.Header),
-		Body:       ioNopCloser{strings.NewReader(body)},
-	}, nil
-}
-
-type ioNopCloser struct {
-	*strings.Reader
-}
-
-func (ioNopCloser) Close() error { return nil }
-
-type streamStatusClaudeStoreStub struct{}
-
-func (streamStatusClaudeStoreStub) ClaudeMapping() map[string]string {
-	return map[string]string{
-		"fast": "deepseek-chat",
-		"slow": "deepseek-reasoner",
-	}
-}
-
-func captureClaudeStatusMiddleware(statuses *[]int) func(http.Handler) http.Handler {
-	return func(next http.Handler) http.Handler {
-		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-			ww := chimw.NewWrapResponseWriter(w, r.ProtoMajor)
-			next.ServeHTTP(ww, r)
-			*statuses = append(*statuses, ww.Status())
-		})
-	}
-}
-
-func TestClaudeMessagesStreamStatusCapturedAs200(t *testing.T) {
-	statuses := make([]int, 0, 1)
-	h := &Handler{
-		Store: streamStatusClaudeStoreStub{},
-		Auth:  streamStatusClaudeAuthStub{},
-		DS:    streamStatusClaudeDSStub{},
-	}
-	r := chi.NewRouter()
-	r.Use(captureClaudeStatusMiddleware(&statuses))
-	RegisterRoutes(r, h)
-
-	reqBody := `{"model":"claude-sonnet-4-5","messages":[{"role":"user","content":"hi"}],"stream":true}`
-	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", strings.NewReader(reqBody))
-	req.Header.Set("Authorization", "Bearer direct-token")
-	req.Header.Set("Content-Type", "application/json")
-	rec := httptest.NewRecorder()
-	r.ServeHTTP(rec, req)
-
-	if rec.Code != http.StatusOK {
-		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
-	}
-	if len(statuses) != 1 {
-		t.Fatalf("expected one captured status, got %d", len(statuses))
-	}
-	if statuses[0] != http.StatusOK {
-		t.Fatalf("expected captured status 200 (not 000), got %d", statuses[0])
-	}
-}
--- a/internal/adapter/gemini/convert_request.go
+++ b/internal/adapter/gemini/convert_request.go
@@ -1,46 +0,0 @@
-package gemini
-
-import (
-	"fmt"
-	"strings"
-
-	"ds2api/internal/adapter/openai"
-	"ds2api/internal/config"
-	"ds2api/internal/util"
-)
-
-func normalizeGeminiRequest(store ConfigReader, routeModel string, req map[string]any, stream bool) (util.StandardRequest, error) {
-	requestedModel := strings.TrimSpace(routeModel)
-	if requestedModel == "" {
-		return util.StandardRequest{}, fmt.Errorf("model is required in request path")
-	}
-
-	resolvedModel, ok := config.ResolveModel(store, requestedModel)
-	if !ok {
-		return util.StandardRequest{}, fmt.Errorf("Model '%s' is not available.", requestedModel)
-	}
-	thinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
-
-	messagesRaw := geminiMessagesFromRequest(req)
-	if len(messagesRaw) == 0 {
-		return util.StandardRequest{}, fmt.Errorf("Request must include non-empty contents.")
-	}
-
-	toolsRaw := convertGeminiTools(req["tools"])
-	finalPrompt, toolNames := openai.BuildPromptForAdapter(messagesRaw, toolsRaw, "")
-	passThrough := collectGeminiPassThrough(req)
-
-	return util.StandardRequest{
-		Surface:        "google_gemini",
-		RequestedModel: requestedModel,
-		ResolvedModel:  resolvedModel,
-		ResponseModel:  requestedModel,
-		Messages:       messagesRaw,
-		FinalPrompt:    finalPrompt,
-		ToolNames:      toolNames,
-		Stream:         stream,
-		Thinking:       thinkingEnabled,
-		Search:         searchEnabled,
-		PassThrough:    passThrough,
-	}, nil
-}
--- a/internal/adapter/gemini/handler_generate.go
+++ b/internal/adapter/gemini/handler_generate.go
@@ -1,135 +0,0 @@
-package gemini
-
-import (
-	"encoding/json"
-	"io"
-	"net/http"
-	"strings"
-
-	"github.com/go-chi/chi/v5"
-
-	"ds2api/internal/auth"
-	"ds2api/internal/sse"
-	"ds2api/internal/util"
-)
-
-func (h *Handler) handleGenerateContent(w http.ResponseWriter, r *http.Request, stream bool) {
-	a, err := h.Auth.Determine(r)
-	if err != nil {
-		status := http.StatusUnauthorized
-		detail := err.Error()
-		if err == auth.ErrNoAccount {
-			status = http.StatusTooManyRequests
-		}
-		writeGeminiError(w, status, detail)
-		return
-	}
-	defer h.Auth.Release(a)
-
-	var req map[string]any
-	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-		writeGeminiError(w, http.StatusBadRequest, "invalid json")
-		return
-	}
-
-	routeModel := strings.TrimSpace(chi.URLParam(r, "model"))
-	stdReq, err := normalizeGeminiRequest(h.Store, routeModel, req, stream)
-	if err != nil {
-		writeGeminiError(w, http.StatusBadRequest, err.Error())
-		return
-	}
-
-	sessionID, err := h.DS.CreateSession(r.Context(), a, 3)
-	if err != nil {
-		if a.UseConfigToken {
-			writeGeminiError(w, http.StatusUnauthorized, "Account token is invalid. Please re-login the account in admin.")
-		} else {
-			writeGeminiError(w, http.StatusUnauthorized, "Invalid token.")
-		}
-		return
-	}
-	pow, err := h.DS.GetPow(r.Context(), a, 3)
-	if err != nil {
-		writeGeminiError(w, http.StatusUnauthorized, "Failed to get PoW (invalid token or unknown error).")
-		return
-	}
-	payload := stdReq.CompletionPayload(sessionID)
-	resp, err := h.DS.CallCompletion(r.Context(), a, payload, pow, 3)
-	if err != nil {
-		writeGeminiError(w, http.StatusInternalServerError, "Failed to get completion.")
-		return
-	}
-
-	if stream {
-		h.handleStreamGenerateContent(w, r, resp, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames)
-		return
-	}
-	h.handleNonStreamGenerateContent(w, resp, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.ToolNames)
-}
-
-func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *http.Response, model, finalPrompt string, thinkingEnabled bool, toolNames []string) {
-	defer resp.Body.Close()
-	if resp.StatusCode != http.StatusOK {
-		body, _ := io.ReadAll(resp.Body)
-		writeGeminiError(w, resp.StatusCode, strings.TrimSpace(string(body)))
-		return
-	}
-
-	result := sse.CollectStream(resp, thinkingEnabled, true)
-	writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponse(model, finalPrompt, result.Thinking, result.Text, toolNames))
-}
-
-func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any {
-	parts := buildGeminiPartsFromFinal(finalText, finalThinking, toolNames)
-	usage := buildGeminiUsage(finalPrompt, finalThinking, finalText)
-	return map[string]any{
-		"candidates": []map[string]any{
-			{
-				"index": 0,
-				"content": map[string]any{
-					"role":  "model",
-					"parts": parts,
-				},
-				"finishReason": "STOP",
-			},
-		},
-		"modelVersion":  model,
-		"usageMetadata": usage,
-	}
-}
-
-func buildGeminiUsage(finalPrompt, finalThinking, finalText string) map[string]any {
-	promptTokens := util.EstimateTokens(finalPrompt)
-	reasoningTokens := util.EstimateTokens(finalThinking)
-	completionTokens := util.EstimateTokens(finalText)
-	return map[string]any{
-		"promptTokenCount":     promptTokens,
-		"candidatesTokenCount": reasoningTokens + completionTokens,
-		"totalTokenCount":      promptTokens + reasoningTokens + completionTokens,
-	}
-}
-
-func buildGeminiPartsFromFinal(finalText, finalThinking string, toolNames []string) []map[string]any {
-	detected := util.ParseToolCalls(finalText, toolNames)
-	if len(detected) == 0 && strings.TrimSpace(finalThinking) != "" {
-		detected = util.ParseToolCalls(finalThinking, toolNames)
-	}
-	if len(detected) > 0 {
-		parts := make([]map[string]any, 0, len(detected))
-		for _, tc := range detected {
-			parts = append(parts, map[string]any{
-				"functionCall": map[string]any{
-					"name": tc.Name,
-					"args": tc.Input,
-				},
-			})
-		}
-		return parts
-	}
-
-	text := finalText
-	if strings.TrimSpace(text) == "" {
-		text = finalThinking
-	}
-	return []map[string]any{{"text": text}}
-}
--- a/internal/adapter/gemini/handler_test.go
+++ b/internal/adapter/gemini/handler_test.go
@@ -1,252 +0,0 @@
-package gemini
-
-import (
-	"bufio"
-	"context"
-	"encoding/json"
-	"io"
-	"net/http"
-	"net/http/httptest"
-	"strings"
-	"testing"
-
-	"github.com/go-chi/chi/v5"
-
-	"ds2api/internal/auth"
-)
-
-type testGeminiConfig struct{}
-
-func (testGeminiConfig) ModelAliases() map[string]string { return nil }
-
-type testGeminiAuth struct {
-	a   *auth.RequestAuth
-	err error
-}
-
-func (m testGeminiAuth) Determine(_ *http.Request) (*auth.RequestAuth, error) {
-	if m.err != nil {
-		return nil, m.err
-	}
-	if m.a != nil {
-		return m.a, nil
-	}
-	return &auth.RequestAuth{
-		UseConfigToken: false,
-		DeepSeekToken:  "direct-token",
-		CallerID:       "caller:test",
-		TriedAccounts:  map[string]bool{},
-	}, nil
-}
-
-func (testGeminiAuth) Release(_ *auth.RequestAuth) {}
-
-type testGeminiDS struct {
-	resp *http.Response
-	err  error
-}
-
-func (m testGeminiDS) CreateSession(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
-	return "session-id", nil
-}
-
-func (m testGeminiDS) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
-	return "pow", nil
-}
-
-func (m testGeminiDS) CallCompletion(_ context.Context, _ *auth.RequestAuth, _ map[string]any, _ string, _ int) (*http.Response, error) {
-	if m.err != nil {
-		return nil, m.err
-	}
-	return m.resp, nil
-}
-
-func makeGeminiUpstreamResponse(lines ...string) *http.Response {
-	body := strings.Join(lines, "\n")
-	if !strings.HasSuffix(body, "\n") {
-		body += "\n"
-	}
-	return &http.Response{
-		StatusCode: http.StatusOK,
-		Header:     make(http.Header),
-		Body:       io.NopCloser(strings.NewReader(body)),
-	}
-}
-
-func TestGeminiRoutesRegistered(t *testing.T) {
-	h := &Handler{
-		Store: testGeminiConfig{},
-		Auth:  testGeminiAuth{err: auth.ErrUnauthorized},
-	}
-	r := chi.NewRouter()
-	RegisterRoutes(r, h)
-
-	paths := []string{
-		"/v1beta/models/gemini-2.5-pro:generateContent",
-		"/v1beta/models/gemini-2.5-pro:streamGenerateContent",
-		"/v1/models/gemini-2.5-pro:generateContent",
-		"/v1/models/gemini-2.5-pro:streamGenerateContent",
-	}
-	for _, path := range paths {
-		req := httptest.NewRequest(http.MethodPost, path, strings.NewReader(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}`))
-		rec := httptest.NewRecorder()
-		r.ServeHTTP(rec, req)
-		if rec.Code == http.StatusNotFound {
-			t.Fatalf("expected route %s to be registered, got 404", path)
-		}
-	}
-}
-
-func TestGenerateContentReturnsFunctionCallParts(t *testing.T) {
-	upstream := makeGeminiUpstreamResponse(
-		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"eval_javascript\",\"input\":{\"code\":\"1+1\"}}]}"}`,
-		`data: [DONE]`,
-	)
-	h := &Handler{
-		Store: testGeminiConfig{},
-		Auth:  testGeminiAuth{},
-		DS:    testGeminiDS{resp: upstream},
-	}
-	r := chi.NewRouter()
-	RegisterRoutes(r, h)
-
-	body := `{
-		"contents":[{"role":"user","parts":[{"text":"call tool"}]}],
-		"tools":[{"functionDeclarations":[{"name":"eval_javascript","description":"eval","parameters":{"type":"object","properties":{"code":{"type":"string"}}}}]}]
-	}`
-	req := httptest.NewRequest(http.MethodPost, "/v1beta/models/gemini-2.5-pro:generateContent", strings.NewReader(body))
-	req.Header.Set("Authorization", "Bearer direct-token")
-	rec := httptest.NewRecorder()
-	r.ServeHTTP(rec, req)
-	if rec.Code != http.StatusOK {
-		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
-	}
-
-	var out map[string]any
-	if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
-		t.Fatalf("decode response failed: %v", err)
-	}
-	candidates, _ := out["candidates"].([]any)
-	if len(candidates) == 0 {
-		t.Fatalf("expected non-empty candidates: %#v", out)
-	}
-	c0, _ := candidates[0].(map[string]any)
-	content, _ := c0["content"].(map[string]any)
-	parts, _ := content["parts"].([]any)
-	if len(parts) == 0 {
-		t.Fatalf("expected non-empty parts: %#v", content)
-	}
-	part0, _ := parts[0].(map[string]any)
-	functionCall, _ := part0["functionCall"].(map[string]any)
-	if functionCall["name"] != "eval_javascript" {
-		t.Fatalf("expected functionCall name eval_javascript, got %#v", functionCall)
-	}
-}
-
-func TestGenerateContentMixedToolSnippetAlsoTriggersFunctionCall(t *testing.T) {
-	upstream := makeGeminiUpstreamResponse(
-		`data: {"p":"response/content","v":"我来调用工具\n{\"tool_calls\":[{\"name\":\"eval_javascript\",\"input\":{\"code\":\"1+1\"}}]}"}`,
-		`data: [DONE]`,
-	)
-	h := &Handler{Store: testGeminiConfig{}, Auth: testGeminiAuth{}, DS: testGeminiDS{resp: upstream}}
-	r := chi.NewRouter()
-	RegisterRoutes(r, h)
-
-	body := `{
-		"contents":[{"role":"user","parts":[{"text":"call tool"}]}],
-		"tools":[{"functionDeclarations":[{"name":"eval_javascript","description":"eval","parameters":{"type":"object","properties":{"code":{"type":"string"}}}}]}]
-	}`
-	req := httptest.NewRequest(http.MethodPost, "/v1beta/models/gemini-2.5-pro:generateContent", strings.NewReader(body))
-	req.Header.Set("Authorization", "Bearer direct-token")
-	rec := httptest.NewRecorder()
-	r.ServeHTTP(rec, req)
-
-	if rec.Code != http.StatusOK {
-		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
-	}
-	var out map[string]any
-	if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
-		t.Fatalf("decode response failed: %v", err)
-	}
-	candidates, _ := out["candidates"].([]any)
-	c0, _ := candidates[0].(map[string]any)
-	content, _ := c0["content"].(map[string]any)
-	parts, _ := content["parts"].([]any)
-	part0, _ := parts[0].(map[string]any)
-	functionCall, _ := part0["functionCall"].(map[string]any)
-	if functionCall["name"] != "eval_javascript" {
-		t.Fatalf("expected functionCall name eval_javascript for mixed snippet, got %#v", functionCall)
-	}
-}
-
-func TestStreamGenerateContentEmitsSSE(t *testing.T) {
-	upstream := makeGeminiUpstreamResponse(
-		`data: {"p":"response/content","v":"hello "}`,
-		`data: {"p":"response/content","v":"world"}`,
-		`data: [DONE]`,
-	)
-	h := &Handler{
-		Store: testGeminiConfig{},
-		Auth:  testGeminiAuth{},
-		DS:    testGeminiDS{resp: upstream},
-	}
-	r := chi.NewRouter()
-	RegisterRoutes(r, h)
-
-	body := `{"contents":[{"role":"user","parts":[{"text":"hello"}]}]}`
-	req := httptest.NewRequest(http.MethodPost, "/v1/models/gemini-2.5-pro:streamGenerateContent?alt=sse", strings.NewReader(body))
-	req.Header.Set("Authorization", "Bearer direct-token")
-	rec := httptest.NewRecorder()
-	r.ServeHTTP(rec, req)
-
-	if rec.Code != http.StatusOK {
-		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
-	}
-	if !strings.Contains(rec.Body.String(), "data: ") {
-		t.Fatalf("expected SSE data frames, got body=%s", rec.Body.String())
-	}
-	if !strings.Contains(rec.Body.String(), `"finishReason":"STOP"`) {
-		t.Fatalf("expected stream finish frame, got body=%s", rec.Body.String())
-	}
-
-	frames := extractGeminiSSEFrames(t, rec.Body.String())
-	if len(frames) == 0 {
-		t.Fatalf("expected non-empty sse frames, body=%s", rec.Body.String())
-	}
-	last := frames[len(frames)-1]
-	candidates, _ := last["candidates"].([]any)
-	if len(candidates) == 0 {
-		t.Fatalf("expected finish frame candidates, got %#v", last)
-	}
-	c0, _ := candidates[0].(map[string]any)
-	content, _ := c0["content"].(map[string]any)
-	if content == nil {
-		t.Fatalf("expected non-null content in finish frame, got %#v", c0)
-	}
-	parts, _ := content["parts"].([]any)
-	if len(parts) == 0 {
-		t.Fatalf("expected non-empty parts in finish frame content, got %#v", content)
-	}
-}
-
-func extractGeminiSSEFrames(t *testing.T, body string) []map[string]any {
-	t.Helper()
-	scanner := bufio.NewScanner(strings.NewReader(body))
-	out := make([]map[string]any, 0, 4)
-	for scanner.Scan() {
-		line := strings.TrimSpace(scanner.Text())
-		if !strings.HasPrefix(line, "data: ") {
-			continue
-		}
-		raw := strings.TrimSpace(strings.TrimPrefix(line, "data: "))
-		if raw == "" {
-			continue
-		}
-		var frame map[string]any
-		if err := json.Unmarshal([]byte(raw), &frame); err != nil {
-			continue
-		}
-		out = append(out, frame)
-	}
-	return out
-}
--- a/internal/adapter/openai/chat_stream_runtime.go
+++ b/internal/adapter/openai/chat_stream_runtime.go
@@ -1,278 +0,0 @@
-package openai
-
-import (
-	"encoding/json"
-	"net/http"
-	"strings"
-
-	openaifmt "ds2api/internal/format/openai"
-	"ds2api/internal/sse"
-	streamengine "ds2api/internal/stream"
-	"ds2api/internal/util"
-)
-
-type chatStreamRuntime struct {
-	w        http.ResponseWriter
-	rc       *http.ResponseController
-	canFlush bool
-
-	completionID string
-	created      int64
-	model        string
-	finalPrompt  string
-	toolNames    []string
-
-	thinkingEnabled bool
-	searchEnabled   bool
-
-	firstChunkSent       bool
-	bufferToolContent    bool
-	emitEarlyToolDeltas  bool
-	toolCallsEmitted     bool
-	toolCallsDoneEmitted bool
-
-	toolSieve         toolStreamSieveState
-	streamToolCallIDs map[int]string
-	streamToolNames   map[int]string
-	thinking          strings.Builder
-	text              strings.Builder
-}
-
-func newChatStreamRuntime(
-	w http.ResponseWriter,
-	rc *http.ResponseController,
-	canFlush bool,
-	completionID string,
-	created int64,
-	model string,
-	finalPrompt string,
-	thinkingEnabled bool,
-	searchEnabled bool,
-	toolNames []string,
-	bufferToolContent bool,
-	emitEarlyToolDeltas bool,
-) *chatStreamRuntime {
-	return &chatStreamRuntime{
-		w:                   w,
-		rc:                  rc,
-		canFlush:            canFlush,
-		completionID:        completionID,
-		created:             created,
-		model:               model,
-		finalPrompt:         finalPrompt,
-		toolNames:           toolNames,
-		thinkingEnabled:     thinkingEnabled,
-		searchEnabled:       searchEnabled,
-		bufferToolContent:   bufferToolContent,
-		emitEarlyToolDeltas: emitEarlyToolDeltas,
-		streamToolCallIDs:   map[int]string{},
-		streamToolNames:     map[int]string{},
-	}
-}
-
-func (s *chatStreamRuntime) sendKeepAlive() {
-	if !s.canFlush {
-		return
-	}
-	_, _ = s.w.Write([]byte(": keep-alive\n\n"))
-	_ = s.rc.Flush()
-}
-
-func (s *chatStreamRuntime) sendChunk(v any) {
-	b, _ := json.Marshal(v)
-	_, _ = s.w.Write([]byte("data: "))
-	_, _ = s.w.Write(b)
-	_, _ = s.w.Write([]byte("\n\n"))
-	if s.canFlush {
-		_ = s.rc.Flush()
-	}
-}
-
-func (s *chatStreamRuntime) sendDone() {
-	_, _ = s.w.Write([]byte("data: [DONE]\n\n"))
-	if s.canFlush {
-		_ = s.rc.Flush()
-	}
-}
-
-func (s *chatStreamRuntime) finalize(finishReason string) {
-	finalThinking := s.thinking.String()
-	finalText := sanitizeLeakedOutput(s.text.String())
-	detected := util.ParseStandaloneToolCallsDetailed(finalText, s.toolNames)
-	if len(detected.Calls) > 0 && !s.toolCallsDoneEmitted {
-		finishReason = "tool_calls"
-		delta := map[string]any{
-			"tool_calls": formatFinalStreamToolCallsWithStableIDs(detected.Calls, s.streamToolCallIDs),
-		}
-		if !s.firstChunkSent {
-			delta["role"] = "assistant"
-			s.firstChunkSent = true
-		}
-		s.sendChunk(openaifmt.BuildChatStreamChunk(
-			s.completionID,
-			s.created,
-			s.model,
-			[]map[string]any{openaifmt.BuildChatStreamDeltaChoice(0, delta)},
-			nil,
-		))
-		s.toolCallsEmitted = true
-		s.toolCallsDoneEmitted = true
-	} else if s.bufferToolContent {
-		for _, evt := range flushToolSieve(&s.toolSieve, s.toolNames) {
-			if len(evt.ToolCalls) > 0 {
-				finishReason = "tool_calls"
-				s.toolCallsEmitted = true
-				s.toolCallsDoneEmitted = true
-				tcDelta := map[string]any{
-					"tool_calls": formatFinalStreamToolCallsWithStableIDs(evt.ToolCalls, s.streamToolCallIDs),
-				}
-				if !s.firstChunkSent {
-					tcDelta["role"] = "assistant"
-					s.firstChunkSent = true
-				}
-				s.sendChunk(openaifmt.BuildChatStreamChunk(
-					s.completionID,
-					s.created,
-					s.model,
-					[]map[string]any{openaifmt.BuildChatStreamDeltaChoice(0, tcDelta)},
-					nil,
-				))
-			}
-			if evt.Content == "" {
-				continue
-			}
-			cleaned := sanitizeLeakedOutput(evt.Content)
-			if cleaned == "" {
-				continue
-			}
-			delta := map[string]any{
-				"content": cleaned,
-			}
-			if !s.firstChunkSent {
-				delta["role"] = "assistant"
-				s.firstChunkSent = true
-			}
-			s.sendChunk(openaifmt.BuildChatStreamChunk(
-				s.completionID,
-				s.created,
-				s.model,
-				[]map[string]any{openaifmt.BuildChatStreamDeltaChoice(0, delta)},
-				nil,
-			))
-		}
-	}
-
-	if len(detected.Calls) > 0 || s.toolCallsEmitted {
-		finishReason = "tool_calls"
-	}
-	s.sendChunk(openaifmt.BuildChatStreamChunk(
-		s.completionID,
-		s.created,
-		s.model,
-		[]map[string]any{openaifmt.BuildChatStreamFinishChoice(0, finishReason)},
-		openaifmt.BuildChatUsage(s.finalPrompt, finalThinking, finalText),
-	))
-	s.sendDone()
-}
-
-func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedDecision {
-	if !parsed.Parsed {
-		return streamengine.ParsedDecision{}
-	}
-	if parsed.ContentFilter || parsed.ErrorMessage != "" {
-		return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("content_filter")}
-	}
-	if parsed.Stop {
-		return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReasonHandlerRequested}
-	}
-
-	newChoices := make([]map[string]any, 0, len(parsed.Parts))
-	contentSeen := false
-	for _, p := range parsed.Parts {
-		if s.searchEnabled && sse.IsCitation(p.Text) {
-			continue
-		}
-		if p.Text == "" {
-			continue
-		}
-		contentSeen = true
-		delta := map[string]any{}
-		if !s.firstChunkSent {
-			delta["role"] = "assistant"
-			s.firstChunkSent = true
-		}
-		if p.Type == "thinking" {
-			if s.thinkingEnabled {
-				s.thinking.WriteString(p.Text)
-				delta["reasoning_content"] = p.Text
-			}
-		} else {
-			s.text.WriteString(p.Text)
-			if !s.bufferToolContent {
-				delta["content"] = p.Text
-			} else {
-				events := processToolSieveChunk(&s.toolSieve, p.Text, s.toolNames)
-				for _, evt := range events {
-					if len(evt.ToolCallDeltas) > 0 {
-						if !s.emitEarlyToolDeltas {
-							continue
-						}
-						filtered := filterIncrementalToolCallDeltasByAllowed(evt.ToolCallDeltas, s.toolNames, s.streamToolNames)
-						if len(filtered) == 0 {
-							continue
-						}
-						formatted := formatIncrementalStreamToolCallDeltas(filtered, s.streamToolCallIDs)
-						if len(formatted) == 0 {
-							continue
-						}
-						tcDelta := map[string]any{
-							"tool_calls": formatted,
-						}
-						s.toolCallsEmitted = true
-						if !s.firstChunkSent {
-							tcDelta["role"] = "assistant"
-							s.firstChunkSent = true
-						}
-						newChoices = append(newChoices, openaifmt.BuildChatStreamDeltaChoice(0, tcDelta))
-						continue
-					}
-					if len(evt.ToolCalls) > 0 {
-						s.toolCallsEmitted = true
-						s.toolCallsDoneEmitted = true
-						tcDelta := map[string]any{
-							"tool_calls": formatFinalStreamToolCallsWithStableIDs(evt.ToolCalls, s.streamToolCallIDs),
-						}
-						if !s.firstChunkSent {
-							tcDelta["role"] = "assistant"
-							s.firstChunkSent = true
-						}
-						newChoices = append(newChoices, openaifmt.BuildChatStreamDeltaChoice(0, tcDelta))
-						continue
-					}
-					if evt.Content != "" {
-						cleaned := sanitizeLeakedOutput(evt.Content)
-						if cleaned == "" {
-							continue
-						}
-						contentDelta := map[string]any{
-							"content": cleaned,
-						}
-						if !s.firstChunkSent {
-							contentDelta["role"] = "assistant"
-							s.firstChunkSent = true
-						}
-						newChoices = append(newChoices, openaifmt.BuildChatStreamDeltaChoice(0, contentDelta))
-					}
-				}
-			}
-		}
-		if len(delta) > 0 {
-			newChoices = append(newChoices, openaifmt.BuildChatStreamDeltaChoice(0, delta))
-		}
-	}
-
-	if len(newChoices) > 0 {
-		s.sendChunk(openaifmt.BuildChatStreamChunk(s.completionID, s.created, s.model, newChoices, nil))
-	}
-	return streamengine.ParsedDecision{ContentSeen: contentSeen}
-}
--- a/internal/adapter/openai/deps.go
+++ b/internal/adapter/openai/deps.go
@@ -1,37 +0,0 @@
-package openai
-
-import (
-	"context"
-	"net/http"
-
-	"ds2api/internal/auth"
-	"ds2api/internal/config"
-	"ds2api/internal/deepseek"
-)
-
-type AuthResolver interface {
-	Determine(req *http.Request) (*auth.RequestAuth, error)
-	DetermineCaller(req *http.Request) (*auth.RequestAuth, error)
-	Release(a *auth.RequestAuth)
-}
-
-type DeepSeekCaller interface {
-	CreateSession(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
-	GetPow(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
-	CallCompletion(ctx context.Context, a *auth.RequestAuth, payload map[string]any, powResp string, maxAttempts int) (*http.Response, error)
-	DeleteAllSessionsForToken(ctx context.Context, token string) error
-}
-
-type ConfigReader interface {
-	ModelAliases() map[string]string
-	CompatWideInputStrictOutput() bool
-	ToolcallMode() string
-	ToolcallEarlyEmitConfidence() string
-	ResponsesStoreTTLSeconds() int
-	EmbeddingsProvider() string
-	AutoDeleteSessions() bool
-}
-
-var _ AuthResolver = (*auth.Resolver)(nil)
-var _ DeepSeekCaller = (*deepseek.Client)(nil)
-var _ ConfigReader = (*config.Store)(nil)
--- a/internal/adapter/openai/deps_injection_test.go
+++ b/internal/adapter/openai/deps_injection_test.go
@@ -1,71 +0,0 @@
-package openai
-
-import "testing"
-
-type mockOpenAIConfig struct {
-	aliases      map[string]string
-	wideInput    bool
-	toolMode     string
-	earlyEmit    string
-	responsesTTL int
-	embedProv    string
-}
-
-func (m mockOpenAIConfig) ModelAliases() map[string]string { return m.aliases }
-func (m mockOpenAIConfig) CompatWideInputStrictOutput() bool {
-	return m.wideInput
-}
-func (m mockOpenAIConfig) ToolcallMode() string                { return m.toolMode }
-func (m mockOpenAIConfig) ToolcallEarlyEmitConfidence() string { return m.earlyEmit }
-func (m mockOpenAIConfig) ResponsesStoreTTLSeconds() int       { return m.responsesTTL }
-func (m mockOpenAIConfig) EmbeddingsProvider() string          { return m.embedProv }
-func (m mockOpenAIConfig) AutoDeleteSessions() bool            { return false }
-
-func TestNormalizeOpenAIChatRequestWithConfigInterface(t *testing.T) {
-	cfg := mockOpenAIConfig{
-		aliases: map[string]string{
-			"my-model": "deepseek-chat-search",
-		},
-		wideInput: true,
-	}
-	req := map[string]any{
-		"model":    "my-model",
-		"messages": []any{map[string]any{"role": "user", "content": "hello"}},
-	}
-	out, err := normalizeOpenAIChatRequest(cfg, req, "")
-	if err != nil {
-		t.Fatalf("normalizeOpenAIChatRequest error: %v", err)
-	}
-	if out.ResolvedModel != "deepseek-chat-search" {
-		t.Fatalf("resolved model mismatch: got=%q", out.ResolvedModel)
-	}
-	if !out.Search || out.Thinking {
-		t.Fatalf("unexpected model flags: thinking=%v search=%v", out.Thinking, out.Search)
-	}
-}
-
-func TestNormalizeOpenAIResponsesRequestWideInputPolicyFromInterface(t *testing.T) {
-	req := map[string]any{
-		"model": "deepseek-chat",
-		"input": "hi",
-	}
-
-	_, err := normalizeOpenAIResponsesRequest(mockOpenAIConfig{
-		aliases:   map[string]string{},
-		wideInput: false,
-	}, req, "")
-	if err == nil {
-		t.Fatal("expected error when wide input is disabled and only input is provided")
-	}
-
-	out, err := normalizeOpenAIResponsesRequest(mockOpenAIConfig{
-		aliases:   map[string]string{},
-		wideInput: true,
-	}, req, "")
-	if err != nil {
-		t.Fatalf("unexpected error when wide input is enabled: %v", err)
-	}
-	if out.Surface != "openai_responses" {
-		t.Fatalf("unexpected surface: %q", out.Surface)
-	}
-}
--- a/internal/adapter/openai/handler_chat.go
+++ b/internal/adapter/openai/handler_chat.go
@@ -1,174 +0,0 @@
-package openai
-
-import (
-	"context"
-	"encoding/json"
-	"io"
-	"net/http"
-	"time"
-
-	"ds2api/internal/auth"
-	"ds2api/internal/config"
-	"ds2api/internal/deepseek"
-	openaifmt "ds2api/internal/format/openai"
-	"ds2api/internal/sse"
-	streamengine "ds2api/internal/stream"
-)
-
-func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
-	if isVercelStreamReleaseRequest(r) {
-		h.handleVercelStreamRelease(w, r)
-		return
-	}
-	if isVercelStreamPrepareRequest(r) {
-		h.handleVercelStreamPrepare(w, r)
-		return
-	}
-
-	a, err := h.Auth.Determine(r)
-	if err != nil {
-		status := http.StatusUnauthorized
-		detail := err.Error()
-		if err == auth.ErrNoAccount {
-			status = http.StatusTooManyRequests
-		}
-		writeOpenAIError(w, status, detail)
-		return
-	}
-	defer func() {
-		// 自动删除会话（同步）
-		// 必须在 Release 之前同步删除，否则：
-		// 1. 异步删除时账号已被 Release
-		// 2. 新请求可能获取到同一账号并开始使用
-		// 3. 异步删除仍在进行，会截断新请求正在使用的会话
-		if h.Store.AutoDeleteSessions() && a.DeepSeekToken != "" {
-			deleteCtx, cancel := context.WithTimeout(r.Context(), 10*time.Second)
-			defer cancel()
-			err := h.DS.DeleteAllSessionsForToken(deleteCtx, a.DeepSeekToken)
-			if err != nil {
-				config.Logger.Warn("[auto_delete_sessions] failed", "account", a.AccountID, "error", err)
-			} else {
-				config.Logger.Debug("[auto_delete_sessions] success", "account", a.AccountID)
-			}
-		}
-		h.Auth.Release(a)
-	}()
-
-	r = r.WithContext(auth.WithAuth(r.Context(), a))
-
-	var req map[string]any
-	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-		writeOpenAIError(w, http.StatusBadRequest, "invalid json")
-		return
-	}
-	stdReq, err := normalizeOpenAIChatRequest(h.Store, req, requestTraceID(r))
-	if err != nil {
-		writeOpenAIError(w, http.StatusBadRequest, err.Error())
-		return
-	}
-
-	sessionID, err := h.DS.CreateSession(r.Context(), a, 3)
-	if err != nil {
-		if a.UseConfigToken {
-			writeOpenAIError(w, http.StatusUnauthorized, "Account token is invalid. Please re-login the account in admin.")
-		} else {
-			writeOpenAIError(w, http.StatusUnauthorized, "Invalid token. If this should be a DS2API key, add it to config.keys first.")
-		}
-		return
-	}
-	pow, err := h.DS.GetPow(r.Context(), a, 3)
-	if err != nil {
-		writeOpenAIError(w, http.StatusUnauthorized, "Failed to get PoW (invalid token or unknown error).")
-		return
-	}
-	payload := stdReq.CompletionPayload(sessionID)
-	resp, err := h.DS.CallCompletion(r.Context(), a, payload, pow, 3)
-	if err != nil {
-		writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.")
-		return
-	}
-	if stdReq.Stream {
-		h.handleStream(w, r, resp, sessionID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames)
-		return
-	}
-	h.handleNonStream(w, r.Context(), resp, sessionID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.ToolNames)
-}
-
-func (h *Handler) handleNonStream(w http.ResponseWriter, ctx context.Context, resp *http.Response, completionID, model, finalPrompt string, thinkingEnabled bool, toolNames []string) {
-	if resp.StatusCode != http.StatusOK {
-		defer resp.Body.Close()
-		body, _ := io.ReadAll(resp.Body)
-		writeOpenAIError(w, resp.StatusCode, string(body))
-		return
-	}
-	_ = ctx
-	result := sse.CollectStream(resp, thinkingEnabled, true)
-
-	finalThinking := result.Thinking
-	finalText := sanitizeLeakedOutput(result.Text)
-	respBody := openaifmt.BuildChatCompletion(completionID, model, finalPrompt, finalThinking, finalText, toolNames)
-	writeJSON(w, http.StatusOK, respBody)
-}
-
-func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *http.Response, completionID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string) {
-	defer resp.Body.Close()
-	if resp.StatusCode != http.StatusOK {
-		body, _ := io.ReadAll(resp.Body)
-		writeOpenAIError(w, resp.StatusCode, string(body))
-		return
-	}
-	w.Header().Set("Content-Type", "text/event-stream")
-	w.Header().Set("Cache-Control", "no-cache, no-transform")
-	w.Header().Set("Connection", "keep-alive")
-	w.Header().Set("X-Accel-Buffering", "no")
-	rc := http.NewResponseController(w)
-	_, canFlush := w.(http.Flusher)
-	if !canFlush {
-		config.Logger.Warn("[stream] response writer does not support flush; streaming may be buffered")
-	}
-
-	created := time.Now().Unix()
-	bufferToolContent := len(toolNames) > 0
-	emitEarlyToolDeltas := h.toolcallFeatureMatchEnabled() && h.toolcallEarlyEmitHighConfidence()
-	initialType := "text"
-	if thinkingEnabled {
-		initialType = "thinking"
-	}
-
-	streamRuntime := newChatStreamRuntime(
-		w,
-		rc,
-		canFlush,
-		completionID,
-		created,
-		model,
-		finalPrompt,
-		thinkingEnabled,
-		searchEnabled,
-		toolNames,
-		bufferToolContent,
-		emitEarlyToolDeltas,
-	)
-
-	streamengine.ConsumeSSE(streamengine.ConsumeConfig{
-		Context:             r.Context(),
-		Body:                resp.Body,
-		ThinkingEnabled:     thinkingEnabled,
-		InitialType:         initialType,
-		KeepAliveInterval:   time.Duration(deepseek.KeepAliveTimeout) * time.Second,
-		IdleTimeout:         time.Duration(deepseek.StreamIdleTimeout) * time.Second,
-		MaxKeepAliveNoInput: deepseek.MaxKeepaliveCount,
-	}, streamengine.ConsumeHooks{
-		OnKeepAlive: func() {
-			streamRuntime.sendKeepAlive()
-		},
-		OnParsed: streamRuntime.onParsed,
-		OnFinalize: func(reason streamengine.StopReason, _ error) {
-			if string(reason) == "content_filter" {
-				streamRuntime.finalize("content_filter")
-				return
-			}
-			streamRuntime.finalize("stop")
-		},
-	})
-}
--- a/internal/adapter/openai/handler_routes.go
+++ b/internal/adapter/openai/handler_routes.go
@@ -1,57 +0,0 @@
-package openai
-
-import (
-	"net/http"
-	"strings"
-	"sync"
-	"time"
-
-	"github.com/go-chi/chi/v5"
-
-	"ds2api/internal/auth"
-	"ds2api/internal/config"
-	"ds2api/internal/util"
-)
-
-// writeJSON is a package-internal alias kept to avoid mass-renaming across
-// every call-site in this package.
-var writeJSON = util.WriteJSON
-
-type Handler struct {
-	Store ConfigReader
-	Auth  AuthResolver
-	DS    DeepSeekCaller
-
-	leaseMu      sync.Mutex
-	streamLeases map[string]streamLease
-	responsesMu  sync.Mutex
-	responses    *responseStore
-}
-
-type streamLease struct {
-	Auth      *auth.RequestAuth
-	ExpiresAt time.Time
-}
-
-func RegisterRoutes(r chi.Router, h *Handler) {
-	r.Get("/v1/models", h.ListModels)
-	r.Get("/v1/models/{model_id}", h.GetModel)
-	r.Post("/v1/chat/completions", h.ChatCompletions)
-	r.Post("/v1/responses", h.Responses)
-	r.Get("/v1/responses/{response_id}", h.GetResponseByID)
-	r.Post("/v1/embeddings", h.Embeddings)
-}
-
-func (h *Handler) ListModels(w http.ResponseWriter, _ *http.Request) {
-	writeJSON(w, http.StatusOK, config.OpenAIModelsResponse())
-}
-
-func (h *Handler) GetModel(w http.ResponseWriter, r *http.Request) {
-	modelID := strings.TrimSpace(chi.URLParam(r, "model_id"))
-	model, ok := config.OpenAIModelByID(h.Store, modelID)
-	if !ok {
-		writeOpenAIError(w, http.StatusNotFound, "Model not found.")
-		return
-	}
-	writeJSON(w, http.StatusOK, model)
-}
--- a/internal/adapter/openai/handler_toolcall_format.go
+++ b/internal/adapter/openai/handler_toolcall_format.go
@@ -1,169 +0,0 @@
-package openai
-
-import (
-	"encoding/json"
-	"fmt"
-	"strings"
-
-	"github.com/google/uuid"
-
-	"ds2api/internal/util"
-)
-
-func injectToolPrompt(messages []map[string]any, tools []any, policy util.ToolChoicePolicy) ([]map[string]any, []string) {
-	if policy.IsNone() {
-		return messages, nil
-	}
-	toolSchemas := make([]string, 0, len(tools))
-	names := make([]string, 0, len(tools))
-	isAllowed := func(name string) bool {
-		if strings.TrimSpace(name) == "" {
-			return false
-		}
-		if len(policy.Allowed) == 0 {
-			return true
-		}
-		_, ok := policy.Allowed[name]
-		return ok
-	}
-
-	for _, t := range tools {
-		tool, ok := t.(map[string]any)
-		if !ok {
-			continue
-		}
-		fn, _ := tool["function"].(map[string]any)
-		if len(fn) == 0 {
-			fn = tool
-		}
-		name, _ := fn["name"].(string)
-		desc, _ := fn["description"].(string)
-		schema, _ := fn["parameters"].(map[string]any)
-		name = strings.TrimSpace(name)
-		if !isAllowed(name) {
-			continue
-		}
-		names = append(names, name)
-		if desc == "" {
-			desc = "No description available"
-		}
-		b, _ := json.Marshal(schema)
-		toolSchemas = append(toolSchemas, fmt.Sprintf("Tool: %s\nDescription: %s\nParameters: %s", name, desc, string(b)))
-	}
-	if len(toolSchemas) == 0 {
-		return messages, names
-	}
-	toolPrompt := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n") + "\n\n" + buildToolCallInstructions(names)
-	if policy.Mode == util.ToolChoiceRequired {
-		toolPrompt += "\n7) For this response, you MUST call at least one tool from the allowed list."
-	}
-	if policy.Mode == util.ToolChoiceForced && strings.TrimSpace(policy.ForcedName) != "" {
-		toolPrompt += "\n7) For this response, you MUST call exactly this tool name: " + strings.TrimSpace(policy.ForcedName)
-		toolPrompt += "\n8) Do not call any other tool."
-	}
-
-	for i := range messages {
-		if messages[i]["role"] == "system" {
-			old, _ := messages[i]["content"].(string)
-			messages[i]["content"] = strings.TrimSpace(old + "\n\n" + toolPrompt)
-			return messages, names
-		}
-	}
-	messages = append([]map[string]any{{"role": "system", "content": toolPrompt}}, messages...)
-	return messages, names
-}
-
-// buildToolCallInstructions delegates to the shared util implementation.
-func buildToolCallInstructions(toolNames []string) string {
-	return util.BuildToolCallInstructions(toolNames)
-}
-
-func formatIncrementalStreamToolCallDeltas(deltas []toolCallDelta, ids map[int]string) []map[string]any {
-	if len(deltas) == 0 {
-		return nil
-	}
-	out := make([]map[string]any, 0, len(deltas))
-	for _, d := range deltas {
-		if d.Name == "" && d.Arguments == "" {
-			continue
-		}
-		callID, ok := ids[d.Index]
-		if !ok || callID == "" {
-			callID = "call_" + strings.ReplaceAll(uuid.NewString(), "-", "")
-			ids[d.Index] = callID
-		}
-		item := map[string]any{
-			"index": d.Index,
-			"id":    callID,
-			"type":  "function",
-		}
-		fn := map[string]any{}
-		if d.Name != "" {
-			fn["name"] = d.Name
-		}
-		if d.Arguments != "" {
-			fn["arguments"] = d.Arguments
-		}
-		if len(fn) > 0 {
-			item["function"] = fn
-		}
-		out = append(out, item)
-	}
-	return out
-}
-
-func filterIncrementalToolCallDeltasByAllowed(deltas []toolCallDelta, allowedNames []string, seenNames map[int]string) []toolCallDelta {
-	if len(deltas) == 0 {
-		return nil
-	}
-	out := make([]toolCallDelta, 0, len(deltas))
-	for _, d := range deltas {
-		if d.Name != "" {
-			if seenNames != nil {
-				seenNames[d.Index] = d.Name
-			}
-			out = append(out, d)
-			continue
-		}
-		if seenNames == nil {
-			out = append(out, d)
-			continue
-		}
-		name := strings.TrimSpace(seenNames[d.Index])
-		if name == "" {
-			continue
-		}
-		out = append(out, d)
-	}
-	return out
-}
-
-func formatFinalStreamToolCallsWithStableIDs(calls []util.ParsedToolCall, ids map[int]string) []map[string]any {
-	if len(calls) == 0 {
-		return nil
-	}
-	out := make([]map[string]any, 0, len(calls))
-	for i, c := range calls {
-		callID := ""
-		if ids != nil {
-			callID = strings.TrimSpace(ids[i])
-		}
-		if callID == "" {
-			callID = "call_" + strings.ReplaceAll(uuid.NewString(), "-", "")
-			if ids != nil {
-				ids[i] = callID
-			}
-		}
-		args, _ := json.Marshal(c.Input)
-		out = append(out, map[string]any{
-			"index": i,
-			"id":    callID,
-			"type":  "function",
-			"function": map[string]any{
-				"name":      c.Name,
-				"arguments": string(args),
-			},
-		})
-	}
-	return out
-}
--- a/internal/adapter/openai/handler_toolcall_policy.go
+++ b/internal/adapter/openai/handler_toolcall_policy.go
@@ -1,9 +0,0 @@
-package openai
-
-func (h *Handler) toolcallFeatureMatchEnabled() bool {
-	return true
-}
-
-func (h *Handler) toolcallEarlyEmitHighConfidence() bool {
-	return true
-}
--- a/internal/adapter/openai/handler_toolcall_test.go
+++ b/internal/adapter/openai/handler_toolcall_test.go
@@ -1,921 +0,0 @@
-package openai
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"io"
-	"net/http"
-	"net/http/httptest"
-	"strings"
-	"testing"
-)
-
-func makeSSEHTTPResponse(lines ...string) *http.Response {
-	body := strings.Join(lines, "\n")
-	if !strings.HasSuffix(body, "\n") {
-		body += "\n"
-	}
-	return &http.Response{
-		StatusCode: http.StatusOK,
-		Header:     make(http.Header),
-		Body:       io.NopCloser(strings.NewReader(body)),
-	}
-}
-
-func decodeJSONBody(t *testing.T, body string) map[string]any {
-	t.Helper()
-	var out map[string]any
-	if err := json.Unmarshal([]byte(body), &out); err != nil {
-		t.Fatalf("decode json failed: %v, body=%s", err, body)
-	}
-	return out
-}
-
-func parseSSEDataFrames(t *testing.T, body string) ([]map[string]any, bool) {
-	t.Helper()
-	lines := strings.Split(body, "\n")
-	frames := make([]map[string]any, 0, len(lines))
-	done := false
-	for _, line := range lines {
-		line = strings.TrimSpace(line)
-		if !strings.HasPrefix(line, "data:") {
-			continue
-		}
-		payload := strings.TrimSpace(strings.TrimPrefix(line, "data:"))
-		if payload == "" {
-			continue
-		}
-		if payload == "[DONE]" {
-			done = true
-			continue
-		}
-		var frame map[string]any
-		if err := json.Unmarshal([]byte(payload), &frame); err != nil {
-			t.Fatalf("decode sse frame failed: %v, payload=%s", err, payload)
-		}
-		frames = append(frames, frame)
-	}
-	return frames, done
-}
-
-func streamHasRawToolJSONContent(frames []map[string]any) bool {
-	for _, frame := range frames {
-		choices, _ := frame["choices"].([]any)
-		for _, item := range choices {
-			choice, _ := item.(map[string]any)
-			delta, _ := choice["delta"].(map[string]any)
-			content, _ := delta["content"].(string)
-			if strings.Contains(content, `"tool_calls"`) {
-				return true
-			}
-		}
-	}
-	return false
-}
-
-func streamHasToolCallsDelta(frames []map[string]any) bool {
-	for _, frame := range frames {
-		choices, _ := frame["choices"].([]any)
-		for _, item := range choices {
-			choice, _ := item.(map[string]any)
-			delta, _ := choice["delta"].(map[string]any)
-			if _, ok := delta["tool_calls"]; ok {
-				return true
-			}
-		}
-	}
-	return false
-}
-
-func streamFinishReason(frames []map[string]any) string {
-	for _, frame := range frames {
-		choices, _ := frame["choices"].([]any)
-		for _, item := range choices {
-			choice, _ := item.(map[string]any)
-			if reason, ok := choice["finish_reason"].(string); ok && reason != "" {
-				return reason
-			}
-		}
-	}
-	return ""
-}
-
-func streamToolCallArgumentChunks(frames []map[string]any) []string {
-	out := make([]string, 0, 4)
-	for _, frame := range frames {
-		choices, _ := frame["choices"].([]any)
-		for _, item := range choices {
-			choice, _ := item.(map[string]any)
-			delta, _ := choice["delta"].(map[string]any)
-			toolCalls, _ := delta["tool_calls"].([]any)
-			for _, tc := range toolCalls {
-				tcm, _ := tc.(map[string]any)
-				fn, _ := tcm["function"].(map[string]any)
-				if args, ok := fn["arguments"].(string); ok && args != "" {
-					out = append(out, args)
-				}
-			}
-		}
-	}
-	return out
-}
-
-func TestHandleNonStreamToolCallInterceptsChatModel(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}"}`,
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-
-	h.handleNonStream(rec, context.Background(), resp, "cid1", "deepseek-chat", "prompt", false, []string{"search"})
-	if rec.Code != http.StatusOK {
-		t.Fatalf("unexpected status: %d", rec.Code)
-	}
-
-	out := decodeJSONBody(t, rec.Body.String())
-	choices, _ := out["choices"].([]any)
-	if len(choices) != 1 {
-		t.Fatalf("unexpected choices: %#v", out["choices"])
-	}
-	choice, _ := choices[0].(map[string]any)
-	if choice["finish_reason"] != "tool_calls" {
-		t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"])
-	}
-	msg, _ := choice["message"].(map[string]any)
-	if msg["content"] != nil {
-		t.Fatalf("expected content nil, got %#v", msg["content"])
-	}
-	toolCalls, _ := msg["tool_calls"].([]any)
-	if len(toolCalls) != 1 {
-		t.Fatalf("expected 1 tool call, got %#v", msg["tool_calls"])
-	}
-}
-
-func TestHandleNonStreamToolCallInterceptsReasonerModel(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/thinking_content","v":"先想一下"}`,
-		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}"}`,
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-
-	h.handleNonStream(rec, context.Background(), resp, "cid2", "deepseek-reasoner", "prompt", true, []string{"search"})
-	if rec.Code != http.StatusOK {
-		t.Fatalf("unexpected status: %d", rec.Code)
-	}
-
-	out := decodeJSONBody(t, rec.Body.String())
-	choices, _ := out["choices"].([]any)
-	choice, _ := choices[0].(map[string]any)
-	msg, _ := choice["message"].(map[string]any)
-	if msg["reasoning_content"] != "先想一下" {
-		t.Fatalf("expected reasoning_content, got %#v", msg["reasoning_content"])
-	}
-	if msg["content"] != nil {
-		t.Fatalf("expected content nil, got %#v", msg["content"])
-	}
-	if choice["finish_reason"] != "tool_calls" {
-		t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"])
-	}
-}
-
-func TestHandleNonStreamUnknownToolIntercepted(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"not_in_schema\",\"input\":{\"q\":\"go\"}}]}"}`,
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-
-	h.handleNonStream(rec, context.Background(), resp, "cid2b", "deepseek-chat", "prompt", false, []string{"search"})
-	if rec.Code != http.StatusOK {
-		t.Fatalf("unexpected status: %d", rec.Code)
-	}
-
-	out := decodeJSONBody(t, rec.Body.String())
-	choices, _ := out["choices"].([]any)
-	choice, _ := choices[0].(map[string]any)
-	if choice["finish_reason"] != "tool_calls" {
-		t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"])
-	}
-	msg, _ := choice["message"].(map[string]any)
-	toolCalls, _ := msg["tool_calls"].([]any)
-	if len(toolCalls) != 1 {
-		t.Fatalf("expected tool_calls for unknown schema name, got %#v", msg["tool_calls"])
-	}
-}
-
-func TestHandleNonStreamEmbeddedToolCallExamplePromotesToolCall(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/content","v":"下面是示例："}`,
-		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}"}`,
-		`data: {"p":"response/content","v":"请勿执行。"}`,
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-
-	h.handleNonStream(rec, context.Background(), resp, "cid2c", "deepseek-chat", "prompt", false, []string{"search"})
-	if rec.Code != http.StatusOK {
-		t.Fatalf("unexpected status: %d", rec.Code)
-	}
-
-	out := decodeJSONBody(t, rec.Body.String())
-	choices, _ := out["choices"].([]any)
-	choice, _ := choices[0].(map[string]any)
-	if choice["finish_reason"] != "tool_calls" {
-		t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"])
-	}
-	msg, _ := choice["message"].(map[string]any)
-	toolCalls, _ := msg["tool_calls"].([]any)
-	if len(toolCalls) != 1 {
-		t.Fatalf("expected one tool_call field for embedded example: %#v", msg["tool_calls"])
-	}
-	content, _ := msg["content"].(string)
-	if strings.Contains(content, `"tool_calls"`) {
-		t.Fatalf("expected raw tool_calls json stripped from content, got %#v", content)
-	}
-}
-
-func TestHandleNonStreamFencedToolCallExampleDoesNotPromoteToolCall(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		"data: {\"p\":\"response/content\",\"v\":\"```json\\n{\\\"tool_calls\\\":[{\\\"name\\\":\\\"search\\\",\\\"input\\\":{\\\"q\\\":\\\"go\\\"}}]}\\n```\"}",
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-
-	h.handleNonStream(rec, context.Background(), resp, "cid2d", "deepseek-chat", "prompt", false, []string{"search"})
-	if rec.Code != http.StatusOK {
-		t.Fatalf("unexpected status: %d", rec.Code)
-	}
-
-	out := decodeJSONBody(t, rec.Body.String())
-	choices, _ := out["choices"].([]any)
-	choice, _ := choices[0].(map[string]any)
-	if choice["finish_reason"] == "tool_calls" {
-		t.Fatalf("expected fenced example to remain content-only, got finish_reason=%#v", choice["finish_reason"])
-	}
-	msg, _ := choice["message"].(map[string]any)
-	toolCalls, _ := msg["tool_calls"].([]any)
-	if len(toolCalls) != 0 {
-		t.Fatalf("expected no tool_call field for fenced example: %#v", msg["tool_calls"])
-	}
-	content, _ := msg["content"].(string)
-	if !strings.Contains(content, `"tool_calls"`) {
-		t.Fatalf("expected fenced example content preserved, got %q", content)
-	}
-}
-
-// Backward-compatible alias for historical test name used in CI logs.
-func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) {
-	TestHandleNonStreamFencedToolCallExampleDoesNotPromoteToolCall(t)
-}
-
-func TestHandleStreamToolCallInterceptsWithoutRawContentLeak(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\""}`,
-		`data: {"p":"response/content","v":",\"input\":{\"q\":\"go\"}}]}"}`,
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	h.handleStream(rec, req, resp, "cid3", "deepseek-chat", "prompt", false, false, []string{"search"})
-
-	frames, done := parseSSEDataFrames(t, rec.Body.String())
-	if !done {
-		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
-	}
-	if !streamHasToolCallsDelta(frames) {
-		t.Fatalf("expected tool_calls delta, body=%s", rec.Body.String())
-	}
-	foundToolIndex := false
-	for _, frame := range frames {
-		choices, _ := frame["choices"].([]any)
-		for _, item := range choices {
-			choice, _ := item.(map[string]any)
-			delta, _ := choice["delta"].(map[string]any)
-			toolCalls, _ := delta["tool_calls"].([]any)
-			for _, tc := range toolCalls {
-				tcm, _ := tc.(map[string]any)
-				if _, ok := tcm["index"].(float64); ok {
-					foundToolIndex = true
-				}
-			}
-		}
-	}
-	if !foundToolIndex {
-		t.Fatalf("expected stream tool_calls item with index, body=%s", rec.Body.String())
-	}
-	if streamHasRawToolJSONContent(frames) {
-		t.Fatalf("raw tool_calls JSON leaked in content delta: %s", rec.Body.String())
-	}
-	if streamFinishReason(frames) != "tool_calls" {
-		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
-	}
-}
-
-func TestHandleStreamToolCallLargeArgumentsStillIntercepted(t *testing.T) {
-	h := &Handler{}
-	large := strings.Repeat("a", 9000)
-	payload := fmt.Sprintf(`{"tool_calls":[{"name":"search","input":{"q":"%s"}}]}`, large)
-	splitAt := len(payload) / 2
-	resp := makeSSEHTTPResponse(
-		fmt.Sprintf(`data: {"p":"response/content","v":%q}`, payload[:splitAt]),
-		fmt.Sprintf(`data: {"p":"response/content","v":%q}`, payload[splitAt:]),
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	h.handleStream(rec, req, resp, "cid3-large", "deepseek-chat", "prompt", false, false, []string{"search"})
-
-	frames, done := parseSSEDataFrames(t, rec.Body.String())
-	if !done {
-		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
-	}
-	if !streamHasToolCallsDelta(frames) {
-		t.Fatalf("expected tool_calls delta, body=%s", rec.Body.String())
-	}
-	if streamHasRawToolJSONContent(frames) {
-		t.Fatalf("raw tool_calls JSON leaked in content delta: %s", rec.Body.String())
-	}
-	if streamFinishReason(frames) != "tool_calls" {
-		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
-	}
-}
-
-func TestHandleStreamReasonerToolCallInterceptsWithoutRawContentLeak(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/thinking_content","v":"思考中"}`,
-		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}"}`,
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	h.handleStream(rec, req, resp, "cid4", "deepseek-reasoner", "prompt", true, false, []string{"search"})
-
-	frames, done := parseSSEDataFrames(t, rec.Body.String())
-	if !done {
-		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
-	}
-	if !streamHasToolCallsDelta(frames) {
-		t.Fatalf("expected tool_calls delta, body=%s", rec.Body.String())
-	}
-	foundToolIndex := false
-	for _, frame := range frames {
-		choices, _ := frame["choices"].([]any)
-		for _, item := range choices {
-			choice, _ := item.(map[string]any)
-			delta, _ := choice["delta"].(map[string]any)
-			toolCalls, _ := delta["tool_calls"].([]any)
-			for _, tc := range toolCalls {
-				tcm, _ := tc.(map[string]any)
-				if _, ok := tcm["index"].(float64); ok {
-					foundToolIndex = true
-				}
-			}
-		}
-	}
-	if !foundToolIndex {
-		t.Fatalf("expected stream tool_calls item with index, body=%s", rec.Body.String())
-	}
-	if streamHasRawToolJSONContent(frames) {
-		t.Fatalf("raw tool_calls JSON leaked in content delta: %s", rec.Body.String())
-	}
-	if streamFinishReason(frames) != "tool_calls" {
-		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
-	}
-
-	hasThinkingDelta := false
-	for _, frame := range frames {
-		choices, _ := frame["choices"].([]any)
-		for _, item := range choices {
-			choice, _ := item.(map[string]any)
-			delta, _ := choice["delta"].(map[string]any)
-			if _, ok := delta["reasoning_content"]; ok {
-				hasThinkingDelta = true
-			}
-		}
-	}
-	if !hasThinkingDelta {
-		t.Fatalf("expected reasoning_content delta in reasoner stream: %s", rec.Body.String())
-	}
-}
-
-func TestHandleStreamUnknownToolEmitsToolCall(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"not_in_schema\",\"input\":{\"q\":\"go\"}}]}"}`,
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	h.handleStream(rec, req, resp, "cid5", "deepseek-chat", "prompt", false, false, []string{"search"})
-
-	frames, done := parseSSEDataFrames(t, rec.Body.String())
-	if !done {
-		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
-	}
-	if !streamHasToolCallsDelta(frames) {
-		t.Fatalf("expected tool_calls delta for unknown schema name, body=%s", rec.Body.String())
-	}
-	if streamHasRawToolJSONContent(frames) {
-		t.Fatalf("did not expect raw tool_calls json leak for unknown schema name: %s", rec.Body.String())
-	}
-	if streamFinishReason(frames) != "tool_calls" {
-		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
-	}
-}
-
-func TestHandleStreamUnknownToolNoArgsEmitsToolCall(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"not_in_schema\"}]}"}`,
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	h.handleStream(rec, req, resp, "cid5b", "deepseek-chat", "prompt", false, false, []string{"search"})
-
-	frames, done := parseSSEDataFrames(t, rec.Body.String())
-	if !done {
-		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
-	}
-	if !streamHasToolCallsDelta(frames) {
-		t.Fatalf("expected tool_calls delta for unknown schema name (no args), body=%s", rec.Body.String())
-	}
-	if streamHasRawToolJSONContent(frames) {
-		t.Fatalf("did not expect raw tool_calls json leak for unknown schema name (no args): %s", rec.Body.String())
-	}
-	if streamFinishReason(frames) != "tool_calls" {
-		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
-	}
-}
-
-func TestHandleStreamToolsPlainTextStreamsBeforeFinish(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/content","v":"你好，"}`,
-		`data: {"p":"response/content","v":"这是普通文本回复。"}`,
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	h.handleStream(rec, req, resp, "cid6", "deepseek-chat", "prompt", false, false, []string{"search"})
-
-	frames, done := parseSSEDataFrames(t, rec.Body.String())
-	if !done {
-		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
-	}
-	if streamHasToolCallsDelta(frames) {
-		t.Fatalf("did not expect tool_calls delta for plain text: %s", rec.Body.String())
-	}
-	content := strings.Builder{}
-	for _, frame := range frames {
-		choices, _ := frame["choices"].([]any)
-		for _, item := range choices {
-			choice, _ := item.(map[string]any)
-			delta, _ := choice["delta"].(map[string]any)
-			if c, ok := delta["content"].(string); ok {
-				content.WriteString(c)
-			}
-		}
-	}
-	if got := content.String(); got == "" {
-		t.Fatalf("expected streamed content in tool mode plain text, body=%s", rec.Body.String())
-	}
-	if streamFinishReason(frames) != "stop" {
-		t.Fatalf("expected finish_reason=stop, body=%s", rec.Body.String())
-	}
-}
-
-func TestHandleStreamToolCallMixedWithPlainTextSegments(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/content","v":"下面是示例："}`,
-		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}"}`,
-		`data: {"p":"response/content","v":"请勿执行。"}`,
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	h.handleStream(rec, req, resp, "cid7", "deepseek-chat", "prompt", false, false, []string{"search"})
-
-	frames, done := parseSSEDataFrames(t, rec.Body.String())
-	if !done {
-		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
-	}
-	if !streamHasToolCallsDelta(frames) {
-		t.Fatalf("expected tool_calls delta in mixed prose stream, body=%s", rec.Body.String())
-	}
-	content := strings.Builder{}
-	for _, frame := range frames {
-		choices, _ := frame["choices"].([]any)
-		for _, item := range choices {
-			choice, _ := item.(map[string]any)
-			delta, _ := choice["delta"].(map[string]any)
-			if c, ok := delta["content"].(string); ok {
-				content.WriteString(c)
-			}
-		}
-	}
-	got := content.String()
-	if !strings.Contains(got, "下面是示例：") || !strings.Contains(got, "请勿执行。") {
-		t.Fatalf("expected pre/post plain text to pass sieve, got=%q", got)
-	}
-	if streamFinishReason(frames) != "tool_calls" {
-		t.Fatalf("expected finish_reason=tool_calls for mixed prose, body=%s", rec.Body.String())
-	}
-}
-
-func TestHandleStreamToolCallAfterLeadingTextRemainsText(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/content","v":"我将调用工具。"}`,
-		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}"}`,
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	h.handleStream(rec, req, resp, "cid7b", "deepseek-chat", "prompt", false, false, []string{"search"})
-
-	frames, done := parseSSEDataFrames(t, rec.Body.String())
-	if !done {
-		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
-	}
-	if !streamHasToolCallsDelta(frames) {
-		t.Fatalf("expected tool_calls delta, body=%s", rec.Body.String())
-	}
-	content := strings.Builder{}
-	for _, frame := range frames {
-		choices, _ := frame["choices"].([]any)
-		for _, item := range choices {
-			choice, _ := item.(map[string]any)
-			delta, _ := choice["delta"].(map[string]any)
-			if c, ok := delta["content"].(string); ok {
-				content.WriteString(c)
-			}
-		}
-	}
-	got := content.String()
-	if !strings.Contains(got, "我将调用工具。") {
-		t.Fatalf("expected leading text to keep streaming, got=%q", got)
-	}
-
-	if streamFinishReason(frames) != "tool_calls" {
-		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
-	}
-}
-
-func TestHandleStreamToolCallWithSameChunkTrailingTextRemainsText(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}接下来我会继续说明。"}`,
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	h.handleStream(rec, req, resp, "cid7c", "deepseek-chat", "prompt", false, false, []string{"search"})
-
-	frames, done := parseSSEDataFrames(t, rec.Body.String())
-	if !done {
-		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
-	}
-	if !streamHasToolCallsDelta(frames) {
-		t.Fatalf("expected tool_calls delta, body=%s", rec.Body.String())
-	}
-	content := strings.Builder{}
-	for _, frame := range frames {
-		choices, _ := frame["choices"].([]any)
-		for _, item := range choices {
-			choice, _ := item.(map[string]any)
-			delta, _ := choice["delta"].(map[string]any)
-			if c, ok := delta["content"].(string); ok {
-				content.WriteString(c)
-			}
-		}
-	}
-	got := content.String()
-	if !strings.Contains(got, "接下来我会继续说明。") {
-		t.Fatalf("expected trailing plain text to be preserved, got=%q", got)
-	}
-
-	if streamFinishReason(frames) != "tool_calls" {
-		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
-	}
-}
-
-func TestHandleStreamFencedToolCallSnippetPromotesToolCall(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		fmt.Sprintf(`data: {"p":"response/content","v":%q}`, "下面是调用示例：\n```json\n"),
-		fmt.Sprintf(`data: {"p":"response/content","v":%q}`, "{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}\n```\n仅示例，不要执行。"),
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	h.handleStream(rec, req, resp, "cid7f", "deepseek-chat", "prompt", false, false, []string{"search"})
-
-	frames, done := parseSSEDataFrames(t, rec.Body.String())
-	if !done {
-		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
-	}
-	if !streamHasToolCallsDelta(frames) {
-		t.Fatalf("expected tool_calls delta for fenced snippet, body=%s", rec.Body.String())
-	}
-	content := strings.Builder{}
-	for _, frame := range frames {
-		choices, _ := frame["choices"].([]any)
-		for _, item := range choices {
-			choice, _ := item.(map[string]any)
-			delta, _ := choice["delta"].(map[string]any)
-			if c, ok := delta["content"].(string); ok {
-				content.WriteString(c)
-			}
-		}
-	}
-	got := content.String()
-	if strings.Contains(strings.ToLower(got), "tool_calls") {
-		t.Fatalf("expected raw fenced tool_calls snippet stripped from content, got=%q", got)
-	}
-	if strings.Contains(strings.ToLower(got), "```json") || strings.Contains(got, "\n```\n") {
-		t.Fatalf("expected consumed fenced tool payload to not leave empty code fence, got=%q", got)
-	}
-	if streamFinishReason(frames) != "tool_calls" {
-		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
-	}
-}
-
-func TestHandleStreamStandaloneToolCallAfterClosedFenceKeepsFence(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		fmt.Sprintf(`data: {"p":"response/content","v":%q}`, "先给一个代码示例：\n```text\nhello\n```\n"),
-		fmt.Sprintf(`data: {"p":"response/content","v":%q}`, "{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}"),
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	h.handleStream(rec, req, resp, "cid7g", "deepseek-chat", "prompt", false, false, []string{"search"})
-
-	frames, done := parseSSEDataFrames(t, rec.Body.String())
-	if !done {
-		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
-	}
-	if !streamHasToolCallsDelta(frames) {
-		t.Fatalf("expected tool_calls delta for standalone payload, body=%s", rec.Body.String())
-	}
-	content := strings.Builder{}
-	for _, frame := range frames {
-		choices, _ := frame["choices"].([]any)
-		for _, item := range choices {
-			choice, _ := item.(map[string]any)
-			delta, _ := choice["delta"].(map[string]any)
-			if c, ok := delta["content"].(string); ok {
-				content.WriteString(c)
-			}
-		}
-	}
-	got := content.String()
-	if !strings.Contains(got, "```") {
-		t.Fatalf("expected closed fence before standalone tool json to be preserved, got=%q", got)
-	}
-	if streamFinishReason(frames) != "tool_calls" {
-		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
-	}
-}
-
-func TestHandleStreamToolCallKeyAppearsLateRemainsText(t *testing.T) {
-	h := &Handler{}
-	spaces := strings.Repeat(" ", 200)
-	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/content","v":"{`+spaces+`"}`,
-		`data: {"p":"response/content","v":"\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}"}`,
-		`data: {"p":"response/content","v":"后置正文C。"}`,
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	h.handleStream(rec, req, resp, "cid8", "deepseek-chat", "prompt", false, false, []string{"search"})
-
-	frames, done := parseSSEDataFrames(t, rec.Body.String())
-	if !done {
-		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
-	}
-	if !streamHasToolCallsDelta(frames) {
-		t.Fatalf("expected tool_calls delta, body=%s", rec.Body.String())
-	}
-	content := strings.Builder{}
-	for _, frame := range frames {
-		choices, _ := frame["choices"].([]any)
-		for _, item := range choices {
-			choice, _ := item.(map[string]any)
-			delta, _ := choice["delta"].(map[string]any)
-			if c, ok := delta["content"].(string); ok {
-				content.WriteString(c)
-			}
-		}
-	}
-	got := content.String()
-	if !strings.Contains(got, "后置正文C。") {
-		t.Fatalf("expected stream to continue after tool json convergence, got=%q", got)
-	}
-	if streamFinishReason(frames) != "tool_calls" {
-		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
-	}
-}
-
-func TestHandleStreamInvalidToolJSONDoesNotLeakRawObject(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/content","v":"前置正文D。"}`,
-		`data: {"p":"response/content","v":"{'tool_calls':[{'name':'search','input':{'q':'go'}}]}"}`,
-		`data: {"p":"response/content","v":"后置正文E。"}`,
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	h.handleStream(rec, req, resp, "cid9", "deepseek-chat", "prompt", false, false, []string{"search"})
-
-	frames, done := parseSSEDataFrames(t, rec.Body.String())
-	if !done {
-		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
-	}
-	if streamHasToolCallsDelta(frames) {
-		t.Fatalf("did not expect tool_calls delta for invalid json, body=%s", rec.Body.String())
-	}
-	content := strings.Builder{}
-	for _, frame := range frames {
-		choices, _ := frame["choices"].([]any)
-		for _, item := range choices {
-			choice, _ := item.(map[string]any)
-			delta, _ := choice["delta"].(map[string]any)
-			if c, ok := delta["content"].(string); ok {
-				content.WriteString(c)
-			}
-		}
-	}
-	got := content.String()
-	if !strings.Contains(got, "前置正文D。") || !strings.Contains(got, "后置正文E。") {
-		t.Fatalf("expected pre/post plain text to remain, got=%q", content.String())
-	}
-	if !strings.Contains(strings.ToLower(got), "tool_calls") {
-		t.Fatalf("expected invalid embedded tool-like json to pass through as text, got=%q", got)
-	}
-}
-
-func TestHandleStreamIncompleteCapturedToolJSONFlushesAsTextOnFinalize(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\""}`,
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	h.handleStream(rec, req, resp, "cid10", "deepseek-chat", "prompt", false, false, []string{"search"})
-
-	frames, done := parseSSEDataFrames(t, rec.Body.String())
-	if !done {
-		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
-	}
-	if streamHasToolCallsDelta(frames) {
-		t.Fatalf("did not expect tool_calls delta for incomplete json, body=%s", rec.Body.String())
-	}
-	content := strings.Builder{}
-	for _, frame := range frames {
-		choices, _ := frame["choices"].([]any)
-		for _, item := range choices {
-			choice, _ := item.(map[string]any)
-			delta, _ := choice["delta"].(map[string]any)
-			if c, ok := delta["content"].(string); ok {
-				content.WriteString(c)
-			}
-		}
-	}
-	if !strings.Contains(strings.ToLower(content.String()), "tool_calls") || !strings.Contains(content.String(), "{") {
-		t.Fatalf("expected incomplete capture to flush as plain text instead of stalling, got=%q", content.String())
-	}
-}
-
-func TestHandleStreamToolCallArgumentsEmitAsSingleCompletedChunk(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go"}`,
-		`data: {"p":"response/content","v":"lang\",\"page\":1}}]}"}`,
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	h.handleStream(rec, req, resp, "cid11", "deepseek-chat", "prompt", false, false, []string{"search"})
-
-	frames, done := parseSSEDataFrames(t, rec.Body.String())
-	if !done {
-		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
-	}
-	if !streamHasToolCallsDelta(frames) {
-		t.Fatalf("expected tool_calls delta, body=%s", rec.Body.String())
-	}
-	if streamHasRawToolJSONContent(frames) {
-		t.Fatalf("raw tool_calls JSON leaked in content delta: %s", rec.Body.String())
-	}
-	argChunks := streamToolCallArgumentChunks(frames)
-	if len(argChunks) == 0 {
-		t.Fatalf("expected tool call arguments chunk, got=%v body=%s", argChunks, rec.Body.String())
-	}
-	joined := strings.Join(argChunks, "")
-	if !strings.Contains(joined, `"q":"golang"`) || !strings.Contains(joined, `"page":1`) {
-		t.Fatalf("unexpected merged arguments stream: %q", joined)
-	}
-	if streamFinishReason(frames) != "tool_calls" {
-		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
-	}
-}
-
-func TestHandleStreamMultiToolCallDoesNotMergeNamesOrArguments(t *testing.T) {
-	h := &Handler{}
-	resp := makeSSEHTTPResponse(
-		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search_web\",\"input\":{\"query\":\"latest ai news\"}},{"}`,
-		`data: {"p":"response/content","v":"\"name\":\"eval_javascript\",\"input\":{\"code\":\"1+1\"}}]}"}`,
-		`data: [DONE]`,
-	)
-	rec := httptest.NewRecorder()
-	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
-
-	h.handleStream(rec, req, resp, "cid12", "deepseek-chat", "prompt", false, false, []string{"search_web", "eval_javascript"})
-
-	frames, done := parseSSEDataFrames(t, rec.Body.String())
-	if !done {
-		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
-	}
-	if !streamHasToolCallsDelta(frames) {
-		t.Fatalf("expected tool_calls delta, body=%s", rec.Body.String())
-	}
-
-	foundSearch := false
-	foundEval := false
-	foundIndex1 := false
-	toolCallsDeltaLens := make([]int, 0, 2)
-	for _, frame := range frames {
-		choices, _ := frame["choices"].([]any)
-		for _, item := range choices {
-			choice, _ := item.(map[string]any)
-			delta, _ := choice["delta"].(map[string]any)
-			rawToolCalls, hasToolCalls := delta["tool_calls"]
-			if !hasToolCalls {
-				continue
-			}
-			toolCalls, _ := rawToolCalls.([]any)
-			toolCallsDeltaLens = append(toolCallsDeltaLens, len(toolCalls))
-			for _, tc := range toolCalls {
-				tcm, _ := tc.(map[string]any)
-				if idx, ok := tcm["index"].(float64); ok && int(idx) == 1 {
-					foundIndex1 = true
-				}
-				fn, _ := tcm["function"].(map[string]any)
-				name, _ := fn["name"].(string)
-				switch name {
-				case "search_web":
-					foundSearch = true
-				case "eval_javascript":
-					foundEval = true
-				case "search_webeval_javascript":
-					t.Fatalf("unexpected merged tool name: %s, body=%s", name, rec.Body.String())
-				}
-				if args, ok := fn["arguments"].(string); ok && strings.Contains(args, `}{"`) {
-					t.Fatalf("unexpected concatenated tool arguments: %q, body=%s", args, rec.Body.String())
-				}
-			}
-		}
-	}
-	if !foundSearch || !foundEval {
-		t.Fatalf("expected both tool names in stream deltas, foundSearch=%v foundEval=%v body=%s", foundSearch, foundEval, rec.Body.String())
-	}
-	if len(toolCallsDeltaLens) != 1 || toolCallsDeltaLens[0] != 2 {
-		t.Fatalf("expected exactly one tool_calls delta with two calls, got lens=%v body=%s", toolCallsDeltaLens, rec.Body.String())
-	}
-	if !foundIndex1 {
-		t.Fatalf("expected second tool call index in stream deltas, body=%s", rec.Body.String())
-	}
-	if streamFinishReason(frames) != "tool_calls" {
-		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
-	}
-}
--- a/internal/adapter/openai/message_normalize.go
+++ b/internal/adapter/openai/message_normalize.go
@@ -1,96 +0,0 @@
-package openai
-
-import (
-	"strings"
-
-	"ds2api/internal/prompt"
-)
-
-func normalizeOpenAIMessagesForPrompt(raw []any, traceID string) []map[string]any {
-	_ = traceID
-	out := make([]map[string]any, 0, len(raw))
-	for _, item := range raw {
-		msg, ok := item.(map[string]any)
-		if !ok {
-			continue
-		}
-		role := strings.ToLower(strings.TrimSpace(asString(msg["role"])))
-		switch role {
-		case "assistant":
-			content := buildAssistantContentForPrompt(msg)
-			if content == "" {
-				continue
-			}
-			out = append(out, map[string]any{
-				"role":    "assistant",
-				"content": content,
-			})
-		case "tool", "function":
-			content := buildToolContentForPrompt(msg)
-			out = append(out, map[string]any{
-				"role":    "tool",
-				"content": content,
-			})
-		case "user", "system", "developer":
-			out = append(out, map[string]any{
-				"role":    normalizeOpenAIRoleForPrompt(role),
-				"content": normalizeOpenAIContentForPrompt(msg["content"]),
-			})
-		default:
-			content := normalizeOpenAIContentForPrompt(msg["content"])
-			if content == "" {
-				continue
-			}
-			if role == "" {
-				role = "user"
-			}
-			out = append(out, map[string]any{
-				"role":    normalizeOpenAIRoleForPrompt(role),
-				"content": content,
-			})
-		}
-	}
-	return out
-}
-
-func buildAssistantContentForPrompt(msg map[string]any) string {
-	content := strings.TrimSpace(normalizeOpenAIContentForPrompt(msg["content"]))
-	toolHistory := prompt.FormatToolCallsForPrompt(msg["tool_calls"])
-	switch {
-	case content == "" && toolHistory == "":
-		return ""
-	case content == "":
-		return toolHistory
-	case toolHistory == "":
-		return content
-	default:
-		return content + "\n\n" + toolHistory
-	}
-}
-
-func buildToolContentForPrompt(msg map[string]any) string {
-	content := normalizeOpenAIContentForPrompt(msg["content"])
-	if strings.TrimSpace(content) == "" {
-		return "null"
-	}
-	return content
-}
-
-func normalizeOpenAIContentForPrompt(v any) string {
-	return prompt.NormalizeContent(v)
-}
-
-func normalizeOpenAIRoleForPrompt(role string) string {
-	role = strings.ToLower(strings.TrimSpace(role))
-	if role == "developer" {
-		return "system"
-	}
-	return role
-}
-
-func asString(v any) string {
-	if s, ok := v.(string); ok {
-		return s
-	}
-	return ""
-}
--- a/internal/adapter/openai/models_route_test.go
+++ b/internal/adapter/openai/models_route_test.go
@@ -1,46 +0,0 @@
-package openai
-
-import (
-	"net/http"
-	"net/http/httptest"
-	"testing"
-
-	"github.com/go-chi/chi/v5"
-)
-
-func TestGetModelRouteDirectAndAlias(t *testing.T) {
-	h := &Handler{}
-	r := chi.NewRouter()
-	RegisterRoutes(r, h)
-
-	t.Run("direct", func(t *testing.T) {
-		req := httptest.NewRequest(http.MethodGet, "/v1/models/deepseek-chat", nil)
-		rec := httptest.NewRecorder()
-		r.ServeHTTP(rec, req)
-		if rec.Code != http.StatusOK {
-			t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
-		}
-	})
-
-	t.Run("alias", func(t *testing.T) {
-		req := httptest.NewRequest(http.MethodGet, "/v1/models/gpt-4.1", nil)
-		rec := httptest.NewRecorder()
-		r.ServeHTTP(rec, req)
-		if rec.Code != http.StatusOK {
-			t.Fatalf("expected 200 for alias, got %d body=%s", rec.Code, rec.Body.String())
-		}
-	})
-}
-
-func TestGetModelRouteNotFound(t *testing.T) {
-	h := &Handler{}
-	r := chi.NewRouter()
-	RegisterRoutes(r, h)
-
-	req := httptest.NewRequest(http.MethodGet, "/v1/models/not-exists", nil)
-	rec := httptest.NewRecorder()
-	r.ServeHTTP(rec, req)
-	if rec.Code != http.StatusNotFound {
-		t.Fatalf("expected 404, got %d body=%s", rec.Code, rec.Body.String())
-	}
-}
--- a/internal/adapter/openai/prompt_build.go
+++ b/internal/adapter/openai/prompt_build.go
@@ -1,26 +0,0 @@
-package openai
-
-import (
-	"ds2api/internal/deepseek"
-	"ds2api/internal/util"
-)
-
-func buildOpenAIFinalPrompt(messagesRaw []any, toolsRaw any, traceID string) (string, []string) {
-	return buildOpenAIFinalPromptWithPolicy(messagesRaw, toolsRaw, traceID, util.DefaultToolChoicePolicy())
-}
-
-func buildOpenAIFinalPromptWithPolicy(messagesRaw []any, toolsRaw any, traceID string, toolPolicy util.ToolChoicePolicy) (string, []string) {
-	messages := normalizeOpenAIMessagesForPrompt(messagesRaw, traceID)
-	toolNames := []string{}
-	if tools, ok := toolsRaw.([]any); ok && len(tools) > 0 {
-		messages, toolNames = injectToolPrompt(messages, tools, toolPolicy)
-	}
-	return deepseek.MessagesPrepare(messages), toolNames
-}
-
-// BuildPromptForAdapter exposes the OpenAI-compatible prompt building flow so
-// other protocol adapters (for example Gemini) can reuse the same tool/history
-// normalization logic and remain behavior-compatible with chat/completions.
-func BuildPromptForAdapter(messagesRaw []any, toolsRaw any, traceID string) (string, []string) {
-	return buildOpenAIFinalPrompt(messagesRaw, toolsRaw, traceID)
-}
--- a/internal/adapter/openai/prompt_build_test.go
+++ b/internal/adapter/openai/prompt_build_test.go
@@ -1,92 +0,0 @@
-package openai
-
-import (
-	"strings"
-	"testing"
-)
-
-func TestBuildOpenAIFinalPrompt_HandlerPathIncludesToolRoundtripSemantics(t *testing.T) {
-	messages := []any{
-		map[string]any{"role": "user", "content": "查北京天气"},
-		map[string]any{
-			"role": "assistant",
-			"tool_calls": []any{
-				map[string]any{
-					"id": "call_1",
-					"function": map[string]any{
-						"name":      "get_weather",
-						"arguments": "{\"city\":\"beijing\"}",
-					},
-				},
-			},
-		},
-		map[string]any{
-			"role":         "tool",
-			"tool_call_id": "call_1",
-			"name":         "get_weather",
-			"content":      map[string]any{"temp": 18, "condition": "sunny"},
-		},
-	}
-	tools := []any{
-		map[string]any{
-			"type": "function",
-			"function": map[string]any{
-				"name":        "get_weather",
-				"description": "Get weather",
-				"parameters": map[string]any{
-					"type": "object",
-				},
-			},
-		},
-	}
-
-	finalPrompt, toolNames := buildOpenAIFinalPrompt(messages, tools, "")
-	if len(toolNames) != 1 || toolNames[0] != "get_weather" {
-		t.Fatalf("unexpected tool names: %#v", toolNames)
-	}
-	if !strings.Contains(finalPrompt, `"condition":"sunny"`) {
-		t.Fatalf("handler finalPrompt should preserve tool output content: %q", finalPrompt)
-	}
-	if !strings.Contains(finalPrompt, "<tool_calls>") {
-		t.Fatalf("handler finalPrompt should preserve assistant tool history: %q", finalPrompt)
-	}
-	if !strings.Contains(finalPrompt, "<tool_name>get_weather</tool_name>") {
-		t.Fatalf("handler finalPrompt should include tool name history: %q", finalPrompt)
-	}
-}
-
-func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t *testing.T) {
-	messages := []any{
-		map[string]any{"role": "system", "content": "You are helpful"},
-		map[string]any{"role": "user", "content": "请调用工具"},
-	}
-	tools := []any{
-		map[string]any{
-			"type": "function",
-			"function": map[string]any{
-				"name":        "search",
-				"description": "search docs",
-				"parameters": map[string]any{
-					"type": "object",
-				},
-			},
-		},
-	}
-
-	finalPrompt, _ := buildOpenAIFinalPrompt(messages, tools, "")
-	if !strings.Contains(finalPrompt, "After receiving a tool result, use it directly.") {
-		t.Fatalf("vercel prepare finalPrompt missing final-answer instruction: %q", finalPrompt)
-	}
-	if !strings.Contains(finalPrompt, "Only call another tool if the result is insufficient.") {
-		t.Fatalf("vercel prepare finalPrompt missing retry guard instruction: %q", finalPrompt)
-	}
-	if !strings.Contains(finalPrompt, "TOOL CALL FORMAT") {
-		t.Fatalf("vercel prepare finalPrompt missing xml format instruction: %q", finalPrompt)
-	}
-	if !strings.Contains(finalPrompt, "Do NOT wrap the XML in markdown code fences") {
-		t.Fatalf("vercel prepare finalPrompt missing no-fence xml instruction: %q", finalPrompt)
-	}
-	if strings.Contains(finalPrompt, "```json") {
-		t.Fatalf("vercel prepare finalPrompt should not require fenced tool calls: %q", finalPrompt)
-	}
-}
--- a/internal/adapter/openai/responses_stream_runtime_core.go
+++ b/internal/adapter/openai/responses_stream_runtime_core.go
@@ -1,210 +0,0 @@
-package openai
-
-import (
-	"net/http"
-	"strings"
-
-	"ds2api/internal/config"
-	openaifmt "ds2api/internal/format/openai"
-	"ds2api/internal/sse"
-	streamengine "ds2api/internal/stream"
-	"ds2api/internal/util"
-)
-
-type responsesStreamRuntime struct {
-	w        http.ResponseWriter
-	rc       *http.ResponseController
-	canFlush bool
-
-	responseID  string
-	model       string
-	finalPrompt string
-	toolNames   []string
-	traceID     string
-	toolChoice  util.ToolChoicePolicy
-
-	thinkingEnabled bool
-	searchEnabled   bool
-
-	bufferToolContent    bool
-	emitEarlyToolDeltas  bool
-	toolCallsEmitted     bool
-	toolCallsDoneEmitted bool
-
-	sieve             toolStreamSieveState
-	thinking          strings.Builder
-	text              strings.Builder
-	visibleText       strings.Builder
-	streamToolCallIDs map[int]string
-	functionItemIDs   map[int]string
-	functionOutputIDs map[int]int
-	functionArgs      map[int]string
-	functionDone      map[int]bool
-	functionAdded     map[int]bool
-	functionNames     map[int]string
-	messageItemID     string
-	messageOutputID   int
-	nextOutputID      int
-	messageAdded      bool
-	messagePartAdded  bool
-	sequence          int
-	failed            bool
-
-	persistResponse func(obj map[string]any)
-}
-
-func newResponsesStreamRuntime(
-	w http.ResponseWriter,
-	rc *http.ResponseController,
-	canFlush bool,
-	responseID string,
-	model string,
-	finalPrompt string,
-	thinkingEnabled bool,
-	searchEnabled bool,
-	toolNames []string,
-	bufferToolContent bool,
-	emitEarlyToolDeltas bool,
-	toolChoice util.ToolChoicePolicy,
-	traceID string,
-	persistResponse func(obj map[string]any),
-) *responsesStreamRuntime {
-	return &responsesStreamRuntime{
-		w:                   w,
-		rc:                  rc,
-		canFlush:            canFlush,
-		responseID:          responseID,
-		model:               model,
-		finalPrompt:         finalPrompt,
-		thinkingEnabled:     thinkingEnabled,
-		searchEnabled:       searchEnabled,
-		toolNames:           toolNames,
-		bufferToolContent:   bufferToolContent,
-		emitEarlyToolDeltas: emitEarlyToolDeltas,
-		streamToolCallIDs:   map[int]string{},
-		functionItemIDs:     map[int]string{},
-		functionOutputIDs:   map[int]int{},
-		functionArgs:        map[int]string{},
-		functionDone:        map[int]bool{},
-		functionAdded:       map[int]bool{},
-		functionNames:       map[int]string{},
-		messageOutputID:     -1,
-		toolChoice:          toolChoice,
-		traceID:             traceID,
-		persistResponse:     persistResponse,
-	}
-}
-
-func (s *responsesStreamRuntime) finalize() {
-	finalThinking := s.thinking.String()
-	finalText := sanitizeLeakedOutput(s.text.String())
-
-	if s.bufferToolContent {
-		s.processToolStreamEvents(flushToolSieve(&s.sieve, s.toolNames), true)
-	}
-
-	textParsed := util.ParseStandaloneToolCallsDetailed(finalText, s.toolNames)
-	detected := textParsed.Calls
-	s.logToolPolicyRejections(textParsed)
-
-	if len(detected) > 0 {
-		s.toolCallsEmitted = true
-		if !s.toolCallsDoneEmitted {
-			s.emitFunctionCallDoneEvents(detected)
-		}
-	}
-
-	s.closeMessageItem()
-
-	if s.toolChoice.IsRequired() && len(detected) == 0 {
-		s.failed = true
-		message := "tool_choice requires at least one valid tool call."
-		failedResp := map[string]any{
-			"id":          s.responseID,
-			"type":        "response",
-			"object":      "response",
-			"model":       s.model,
-			"status":      "failed",
-			"output":      []any{},
-			"output_text": "",
-			"error": map[string]any{
-				"message": message,
-				"type":    "invalid_request_error",
-				"code":    "tool_choice_violation",
-				"param":   nil,
-			},
-		}
-		if s.persistResponse != nil {
-			s.persistResponse(failedResp)
-		}
-		s.sendEvent("response.failed", openaifmt.BuildResponsesFailedPayload(s.responseID, s.model, message, "tool_choice_violation"))
-		s.sendDone()
-		return
-	}
-	s.closeIncompleteFunctionItems()
-
-	obj := s.buildCompletedResponseObject(finalThinking, finalText, detected)
-	if s.persistResponse != nil {
-		s.persistResponse(obj)
-	}
-	s.sendEvent("response.completed", openaifmt.BuildResponsesCompletedPayload(obj))
-	s.sendDone()
-}
-
-func (s *responsesStreamRuntime) logToolPolicyRejections(textParsed util.ToolCallParseResult) {
-	logRejected := func(parsed util.ToolCallParseResult, channel string) {
-		rejected := filteredRejectedToolNamesForLog(parsed.RejectedToolNames)
-		if !parsed.RejectedByPolicy || len(rejected) == 0 {
-			return
-		}
-		config.Logger.Warn(
-			"[responses] rejected tool calls by policy",
-			"trace_id", strings.TrimSpace(s.traceID),
-			"channel", channel,
-			"tool_choice_mode", s.toolChoice.Mode,
-			"rejected_tool_names", strings.Join(rejected, ","),
-		)
-	}
-	logRejected(textParsed, "text")
-}
-
-func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedDecision {
-	if !parsed.Parsed {
-		return streamengine.ParsedDecision{}
-	}
-	if parsed.ContentFilter || parsed.ErrorMessage != "" || parsed.Stop {
-		return streamengine.ParsedDecision{Stop: true}
-	}
-
-	contentSeen := false
-	for _, p := range parsed.Parts {
-		if p.Text == "" {
-			continue
-		}
-		if p.Type != "thinking" && s.searchEnabled && sse.IsCitation(p.Text) {
-			continue
-		}
-		contentSeen = true
-		if p.Type == "thinking" {
-			if !s.thinkingEnabled {
-				continue
-			}
-			s.thinking.WriteString(p.Text)
-			s.sendEvent("response.reasoning.delta", openaifmt.BuildResponsesReasoningDeltaPayload(s.responseID, p.Text))
-			continue
-		}
-
-		cleanedText := sanitizeLeakedOutput(p.Text)
-		if cleanedText == "" {
-			continue
-		}
-		s.text.WriteString(cleanedText)
-		if !s.bufferToolContent {
-			s.emitTextDelta(cleanedText)
-			continue
-		}
-		s.processToolStreamEvents(processToolSieveChunk(&s.sieve, cleanedText, s.toolNames), true)
-	}
-
-	return streamengine.ParsedDecision{ContentSeen: contentSeen}
-}
--- a/internal/adapter/openai/responses_stream_test.go
+++ b/internal/adapter/openai/responses_stream_test.go
@@ -1,681 +0,0 @@
-package openai
-
-import (
-	"bufio"
-	"encoding/json"
-	"io"
-	"net/http"
-	"net/http/httptest"
-	"strings"
-	"testing"
-
-	"ds2api/internal/util"
-)
-
-func TestHandleResponsesStreamToolCallsHideRawOutputTextInCompleted(t *testing.T) {
-	h := &Handler{}
-	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
-	rec := httptest.NewRecorder()
-
-	sseLine := func(v string) string {
-		b, _ := json.Marshal(map[string]any{
-			"p": "response/content",
-			"v": v,
-		})
-		return "data: " + string(b) + "\n"
-	}
-
-	rawToolJSON := `{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}`
-	streamBody := sseLine(rawToolJSON) + "data: [DONE]\n"
-	resp := &http.Response{
-		StatusCode: http.StatusOK,
-		Body:       io.NopCloser(strings.NewReader(streamBody)),
-	}
-
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, []string{"read_file"}, util.DefaultToolChoicePolicy(), "")
-
-	completed, ok := extractSSEEventPayload(rec.Body.String(), "response.completed")
-	if !ok {
-		t.Fatalf("expected response.completed event, body=%s", rec.Body.String())
-	}
-	responseObj, _ := completed["response"].(map[string]any)
-	outputText, _ := responseObj["output_text"].(string)
-	if outputText != "" {
-		t.Fatalf("expected empty output_text for tool_calls response, got output_text=%q", outputText)
-	}
-	output, _ := responseObj["output"].([]any)
-	if len(output) == 0 {
-		t.Fatalf("expected structured output entries, got %#v", responseObj["output"])
-	}
-	hasFunctionCall := false
-	hasLegacyWrapper := false
-	for _, item := range output {
-		m, _ := item.(map[string]any)
-		if m == nil {
-			continue
-		}
-		if m["type"] == "function_call" {
-			hasFunctionCall = true
-		}
-		if m["type"] == "tool_calls" {
-			hasLegacyWrapper = true
-		}
-	}
-	if !hasFunctionCall {
-		t.Fatalf("expected function_call item, got %#v", responseObj["output"])
-	}
-	if hasLegacyWrapper {
-		t.Fatalf("did not expect legacy tool_calls wrapper, got %#v", responseObj["output"])
-	}
-	if strings.Contains(outputText, `"tool_calls"`) {
-		t.Fatalf("raw tool_calls JSON leaked in output_text: %q", outputText)
-	}
-}
-
-func TestHandleResponsesStreamUsesOfficialOutputItemEvents(t *testing.T) {
-	h := &Handler{}
-	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
-	rec := httptest.NewRecorder()
-
-	sseLine := func(v string) string {
-		b, _ := json.Marshal(map[string]any{
-			"p": "response/content",
-			"v": v,
-		})
-		return "data: " + string(b) + "\n"
-	}
-
-	streamBody := sseLine(`{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}`) + "data: [DONE]\n"
-	resp := &http.Response{
-		StatusCode: http.StatusOK,
-		Body:       io.NopCloser(strings.NewReader(streamBody)),
-	}
-
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, []string{"read_file"}, util.DefaultToolChoicePolicy(), "")
-	body := rec.Body.String()
-	if !strings.Contains(body, "event: response.output_item.added") {
-		t.Fatalf("expected response.output_item.added event, body=%s", body)
-	}
-	if !strings.Contains(body, "event: response.output_item.done") {
-		t.Fatalf("expected response.output_item.done event, body=%s", body)
-	}
-	if !strings.Contains(body, "event: response.function_call_arguments.done") {
-		t.Fatalf("expected response.function_call_arguments.done event, body=%s", body)
-	}
-	if strings.Contains(body, "event: response.output_tool_call.delta") || strings.Contains(body, "event: response.output_tool_call.done") {
-		t.Fatalf("legacy response.output_tool_call.* event must not appear, body=%s", body)
-	}
-
-	addedPayloads := extractAllSSEEventPayloads(body, "response.output_item.added")
-	hasFunctionCallAdded := false
-	for _, payload := range addedPayloads {
-		item, _ := payload["item"].(map[string]any)
-		if item == nil || asString(item["type"]) != "function_call" {
-			continue
-		}
-		hasFunctionCallAdded = true
-		if asString(item["arguments"]) != "" {
-			t.Fatalf("expected in-progress function_call.arguments to start empty string, got %#v", item["arguments"])
-		}
-	}
-	if !hasFunctionCallAdded {
-		t.Fatalf("expected function_call output_item.added payload, body=%s", body)
-	}
-
-	donePayload, ok := extractSSEEventPayload(body, "response.function_call_arguments.done")
-	if !ok {
-		t.Fatalf("expected to parse response.function_call_arguments.done payload, body=%s", body)
-	}
-	doneCallID := strings.TrimSpace(asString(donePayload["call_id"]))
-	if doneCallID == "" {
-		t.Fatalf("expected non-empty call_id in done payload, payload=%#v", donePayload)
-	}
-	completed, ok := extractSSEEventPayload(body, "response.completed")
-	if !ok {
-		t.Fatalf("expected response.completed payload, body=%s", body)
-	}
-	responseObj, _ := completed["response"].(map[string]any)
-	output, _ := responseObj["output"].([]any)
-	var completedCallID string
-	for _, item := range output {
-		m, _ := item.(map[string]any)
-		if m == nil || m["type"] != "function_call" {
-			continue
-		}
-		completedCallID = strings.TrimSpace(asString(m["call_id"]))
-		if completedCallID != "" {
-			break
-		}
-	}
-	if completedCallID == "" {
-		t.Fatalf("expected function_call.call_id in completed output, output=%#v", output)
-	}
-	if completedCallID != doneCallID {
-		t.Fatalf("expected completed call_id to match stream done call_id, done=%q completed=%q", doneCallID, completedCallID)
-	}
-}
-
-func TestHandleResponsesStreamDoesNotEmitReasoningTextCompatEvents(t *testing.T) {
-	h := &Handler{}
-	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
-	rec := httptest.NewRecorder()
-
-	b, _ := json.Marshal(map[string]any{
-		"p": "response/thinking_content",
-		"v": "thought",
-	})
-	streamBody := "data: " + string(b) + "\n" + "data: [DONE]\n"
-	resp := &http.Response{
-		StatusCode: http.StatusOK,
-		Body:       io.NopCloser(strings.NewReader(streamBody)),
-	}
-
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-reasoner", "prompt", true, false, nil, util.DefaultToolChoicePolicy(), "")
-
-	body := rec.Body.String()
-	if !strings.Contains(body, "event: response.reasoning.delta") {
-		t.Fatalf("expected response.reasoning.delta event, body=%s", body)
-	}
-	if strings.Contains(body, "event: response.reasoning_text.delta") || strings.Contains(body, "event: response.reasoning_text.done") {
-		t.Fatalf("did not expect response.reasoning_text.* compatibility events, body=%s", body)
-	}
-}
-
-func TestHandleResponsesStreamMultiToolCallKeepsNameAndCallIDAligned(t *testing.T) {
-	h := &Handler{}
-	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
-	rec := httptest.NewRecorder()
-
-	sseLine := func(v string) string {
-		b, _ := json.Marshal(map[string]any{
-			"p": "response/content",
-			"v": v,
-		})
-		return "data: " + string(b) + "\n"
-	}
-
-	streamBody := sseLine(`{"tool_calls":[{"name":"search_web","input":{"query":"latest ai news"}},`) +
-		sseLine(`{"name":"eval_javascript","input":{"code":"1+1"}}]}`) +
-		"data: [DONE]\n"
-	resp := &http.Response{
-		StatusCode: http.StatusOK,
-		Body:       io.NopCloser(strings.NewReader(streamBody)),
-	}
-
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, []string{"search_web", "eval_javascript"}, util.DefaultToolChoicePolicy(), "")
-
-	body := rec.Body.String()
-	donePayloads := extractAllSSEEventPayloads(body, "response.function_call_arguments.done")
-	if len(donePayloads) != 2 {
-		t.Fatalf("expected two response.function_call_arguments.done events, got %d body=%s", len(donePayloads), body)
-	}
-	seenNames := map[string]string{}
-	for _, payload := range donePayloads {
-		name := strings.TrimSpace(asString(payload["name"]))
-		callID := strings.TrimSpace(asString(payload["call_id"]))
-		if name != "search_web" && name != "eval_javascript" {
-			t.Fatalf("unexpected tool name in done payload: %#v", payload)
-		}
-		if callID == "" {
-			t.Fatalf("expected non-empty call_id in done payload: %#v", payload)
-		}
-		seenNames[name] = callID
-	}
-	if seenNames["search_web"] == seenNames["eval_javascript"] {
-		t.Fatalf("expected distinct call_id per tool, got %#v", seenNames)
-	}
-}
-
-func TestHandleResponsesStreamEmitsOutputTextDoneBeforeContentPartDone(t *testing.T) {
-	h := &Handler{}
-	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
-	rec := httptest.NewRecorder()
-
-	sseLine := func(v string) string {
-		b, _ := json.Marshal(map[string]any{
-			"p": "response/content",
-			"v": v,
-		})
-		return "data: " + string(b) + "\n"
-	}
-
-	streamBody := sseLine("hello") + "data: [DONE]\n"
-	resp := &http.Response{
-		StatusCode: http.StatusOK,
-		Body:       io.NopCloser(strings.NewReader(streamBody)),
-	}
-
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, nil, util.DefaultToolChoicePolicy(), "")
-	body := rec.Body.String()
-	if !strings.Contains(body, "event: response.output_text.done") {
-		t.Fatalf("expected response.output_text.done payload, body=%s", body)
-	}
-	textDoneIdx := strings.Index(body, "event: response.output_text.done")
-	partDoneIdx := strings.Index(body, "event: response.content_part.done")
-	if textDoneIdx < 0 || partDoneIdx < 0 {
-		t.Fatalf("expected output_text.done + content_part.done, body=%s", body)
-	}
-	if textDoneIdx > partDoneIdx {
-		t.Fatalf("expected output_text.done before content_part.done, body=%s", body)
-	}
-}
-
-func TestHandleResponsesStreamOutputTextDeltaCarriesItemIndexes(t *testing.T) {
-	h := &Handler{}
-	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
-	rec := httptest.NewRecorder()
-
-	sseLine := func(v string) string {
-		b, _ := json.Marshal(map[string]any{
-			"p": "response/content",
-			"v": v,
-		})
-		return "data: " + string(b) + "\n"
-	}
-
-	streamBody := sseLine("hello") + "data: [DONE]\n"
-	resp := &http.Response{
-		StatusCode: http.StatusOK,
-		Body:       io.NopCloser(strings.NewReader(streamBody)),
-	}
-
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, nil, util.DefaultToolChoicePolicy(), "")
-	body := rec.Body.String()
-
-	deltaPayload, ok := extractSSEEventPayload(body, "response.output_text.delta")
-	if !ok {
-		t.Fatalf("expected response.output_text.delta payload, body=%s", body)
-	}
-	if strings.TrimSpace(asString(deltaPayload["item_id"])) == "" {
-		t.Fatalf("expected non-empty item_id in output_text.delta, payload=%#v", deltaPayload)
-	}
-	if _, ok := deltaPayload["output_index"]; !ok {
-		t.Fatalf("expected output_index in output_text.delta, payload=%#v", deltaPayload)
-	}
-	if _, ok := deltaPayload["content_index"]; !ok {
-		t.Fatalf("expected content_index in output_text.delta, payload=%#v", deltaPayload)
-	}
-}
-
-func TestHandleResponsesStreamThinkingAndMixedToolExampleEmitsFunctionCall(t *testing.T) {
-	h := &Handler{}
-	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
-	rec := httptest.NewRecorder()
-
-	sseLine := func(path, value string) string {
-		b, _ := json.Marshal(map[string]any{
-			"p": path,
-			"v": value,
-		})
-		return "data: " + string(b) + "\n"
-	}
-
-	streamBody := sseLine("response/thinking_content", "thinking...") +
-		sseLine("response/content", "先读取文件。") +
-		sseLine("response/content", `{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}`) +
-		"data: [DONE]\n"
-	resp := &http.Response{
-		StatusCode: http.StatusOK,
-		Body:       io.NopCloser(strings.NewReader(streamBody)),
-	}
-
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-reasoner", "prompt", true, false, []string{"read_file"}, util.DefaultToolChoicePolicy(), "")
-
-	addedPayloads := extractAllSSEEventPayloads(rec.Body.String(), "response.output_item.added")
-	if len(addedPayloads) < 1 {
-		t.Fatalf("expected at least one output_item.added event, got %d body=%s", len(addedPayloads), rec.Body.String())
-	}
-
-	completedPayload, ok := extractSSEEventPayload(rec.Body.String(), "response.completed")
-	if !ok {
-		t.Fatalf("expected response.completed payload, body=%s", rec.Body.String())
-	}
-	responseObj, _ := completedPayload["response"].(map[string]any)
-	output, _ := responseObj["output"].([]any)
-	hasMessage := false
-	hasFunctionCall := false
-	for _, item := range output {
-		m, _ := item.(map[string]any)
-		if m == nil {
-			continue
-		}
-		if asString(m["type"]) == "message" {
-			hasMessage = true
-		}
-		if asString(m["type"]) == "function_call" {
-			hasFunctionCall = true
-		}
-	}
-	if !hasMessage {
-		t.Fatalf("expected message output for mixed prose tool example, output=%#v", output)
-	}
-	if !hasFunctionCall {
-		t.Fatalf("expected function_call output for mixed prose tool example, output=%#v", output)
-	}
-}
-
-func TestHandleResponsesStreamToolChoiceNoneStillAllowsFunctionCall(t *testing.T) {
-	h := &Handler{}
-	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
-	rec := httptest.NewRecorder()
-
-	sseLine := func(v string) string {
-		b, _ := json.Marshal(map[string]any{
-			"p": "response/content",
-			"v": v,
-		})
-		return "data: " + string(b) + "\n"
-	}
-
-	streamBody := sseLine(`{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}`) + "data: [DONE]\n"
-	resp := &http.Response{
-		StatusCode: http.StatusOK,
-		Body:       io.NopCloser(strings.NewReader(streamBody)),
-	}
-	policy := util.ToolChoicePolicy{Mode: util.ToolChoiceNone}
-
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, nil, policy, "")
-	body := rec.Body.String()
-	if !strings.Contains(body, "event: response.function_call_arguments.done") {
-		t.Fatalf("expected function_call events for tool_choice=none, body=%s", body)
-	}
-}
-
-func TestHandleResponsesStreamMalformedToolJSONFallsBackToText(t *testing.T) {
-	h := &Handler{}
-	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
-	rec := httptest.NewRecorder()
-
-	sseLine := func(v string) string {
-		b, _ := json.Marshal(map[string]any{
-			"p": "response/content",
-			"v": v,
-		})
-		return "data: " + string(b) + "\n"
-	}
-
-	// invalid JSON (NaN) should remain plain text in strict mode.
-	streamBody := sseLine(`{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"},"x":NaN}]}`) + "data: [DONE]\n"
-	resp := &http.Response{
-		StatusCode: http.StatusOK,
-		Body:       io.NopCloser(strings.NewReader(streamBody)),
-	}
-
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, []string{"read_file"}, util.DefaultToolChoicePolicy(), "")
-	body := rec.Body.String()
-	if strings.Contains(body, "event: response.function_call_arguments.delta") || strings.Contains(body, "event: response.function_call_arguments.done") {
-		t.Fatalf("did not expect function_call events for malformed payload in strict mode, body=%s", body)
-	}
-	if !strings.Contains(body, "event: response.output_text.delta") {
-		t.Fatalf("expected response.output_text.delta for malformed payload, body=%s", body)
-	}
-	if !strings.Contains(body, "event: response.completed") {
-		t.Fatalf("expected response.completed event, body=%s", body)
-	}
-}
-
-func TestHandleResponsesStreamRequiredToolChoiceFailure(t *testing.T) {
-	h := &Handler{}
-	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
-	rec := httptest.NewRecorder()
-
-	sseLine := func(v string) string {
-		b, _ := json.Marshal(map[string]any{
-			"p": "response/content",
-			"v": v,
-		})
-		return "data: " + string(b) + "\n"
-	}
-
-	streamBody := sseLine("plain text only") + "data: [DONE]\n"
-	resp := &http.Response{
-		StatusCode: http.StatusOK,
-		Body:       io.NopCloser(strings.NewReader(streamBody)),
-	}
-
-	policy := util.ToolChoicePolicy{
-		Mode:    util.ToolChoiceRequired,
-		Allowed: map[string]struct{}{"read_file": {}},
-	}
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, []string{"read_file"}, policy, "")
-
-	body := rec.Body.String()
-	if !strings.Contains(body, "event: response.failed") {
-		t.Fatalf("expected response.failed event for required tool_choice violation, body=%s", body)
-	}
-	if strings.Contains(body, "event: response.completed") {
-		t.Fatalf("did not expect response.completed after failure, body=%s", body)
-	}
-}
-
-func TestHandleResponsesStreamRequiredToolChoiceIgnoresThinkingToolPayload(t *testing.T) {
-	h := &Handler{}
-	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
-	rec := httptest.NewRecorder()
-
-	sseLine := func(path, value string) string {
-		b, _ := json.Marshal(map[string]any{
-			"p": path,
-			"v": value,
-		})
-		return "data: " + string(b) + "\n"
-	}
-
-	streamBody := sseLine("response/thinking_content", `{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}`) +
-		sseLine("response/content", "plain text only") +
-		"data: [DONE]\n"
-	resp := &http.Response{
-		StatusCode: http.StatusOK,
-		Body:       io.NopCloser(strings.NewReader(streamBody)),
-	}
-
-	policy := util.ToolChoicePolicy{
-		Mode:    util.ToolChoiceRequired,
-		Allowed: map[string]struct{}{"read_file": {}},
-	}
-
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", true, false, []string{"read_file"}, policy, "")
-	body := rec.Body.String()
-	if !strings.Contains(body, "event: response.failed") {
-		t.Fatalf("expected response.failed event for required tool_choice violation, body=%s", body)
-	}
-	if strings.Contains(body, "event: response.completed") {
-		t.Fatalf("did not expect response.completed after failure, body=%s", body)
-	}
-}
-
-func TestHandleResponsesStreamRequiredMalformedToolPayloadFails(t *testing.T) {
-	h := &Handler{}
-	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
-	rec := httptest.NewRecorder()
-
-	sseLine := func(v string) string {
-		b, _ := json.Marshal(map[string]any{
-			"p": "response/content",
-			"v": v,
-		})
-		return "data: " + string(b) + "\n"
-	}
-
-	streamBody := sseLine(`{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"},"x":NaN}]}`) + "data: [DONE]\n"
-	resp := &http.Response{
-		StatusCode: http.StatusOK,
-		Body:       io.NopCloser(strings.NewReader(streamBody)),
-	}
-	policy := util.ToolChoicePolicy{
-		Mode:    util.ToolChoiceRequired,
-		Allowed: map[string]struct{}{"read_file": {}},
-	}
-
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, []string{"read_file"}, policy, "")
-
-	body := rec.Body.String()
-	if !strings.Contains(body, "event: response.failed") {
-		t.Fatalf("expected response.failed event, body=%s", body)
-	}
-	if strings.Contains(body, "event: response.completed") {
-		t.Fatalf("did not expect response.completed, body=%s", body)
-	}
-}
-
-func TestHandleResponsesStreamAllowsUnknownToolName(t *testing.T) {
-	h := &Handler{}
-	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
-	rec := httptest.NewRecorder()
-
-	sseLine := func(v string) string {
-		b, _ := json.Marshal(map[string]any{
-			"p": "response/content",
-			"v": v,
-		})
-		return "data: " + string(b) + "\n"
-	}
-
-	streamBody := sseLine(`{"tool_calls":[{"name":"not_in_schema","input":{"q":"go"}}]}`) + "data: [DONE]\n"
-	resp := &http.Response{
-		StatusCode: http.StatusOK,
-		Body:       io.NopCloser(strings.NewReader(streamBody)),
-	}
-
-	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, []string{"read_file"}, util.DefaultToolChoicePolicy(), "")
-	body := rec.Body.String()
-	if !strings.Contains(body, "event: response.function_call_arguments.done") {
-		t.Fatalf("expected function_call events for unknown tool, body=%s", body)
-	}
-}
-
-func TestHandleResponsesNonStreamRequiredToolChoiceViolation(t *testing.T) {
-	h := &Handler{}
-	rec := httptest.NewRecorder()
-	resp := &http.Response{
-		StatusCode: http.StatusOK,
-		Body: io.NopCloser(strings.NewReader(
-			`data: {"p":"response/content","v":"plain text only"}` + "\n" +
-				`data: [DONE]` + "\n",
-		)),
-	}
-	policy := util.ToolChoicePolicy{
-		Mode:    util.ToolChoiceRequired,
-		Allowed: map[string]struct{}{"read_file": {}},
-	}
-
-	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, []string{"read_file"}, policy, "")
-	if rec.Code != http.StatusUnprocessableEntity {
-		t.Fatalf("expected 422 for required tool_choice violation, got %d body=%s", rec.Code, rec.Body.String())
-	}
-	out := decodeJSONBody(t, rec.Body.String())
-	errObj, _ := out["error"].(map[string]any)
-	if asString(errObj["code"]) != "tool_choice_violation" {
-		t.Fatalf("expected code=tool_choice_violation, got %#v", out)
-	}
-}
-
-func TestHandleResponsesNonStreamRequiredToolChoiceIgnoresThinkingToolPayload(t *testing.T) {
-	h := &Handler{}
-	rec := httptest.NewRecorder()
-	resp := &http.Response{
-		StatusCode: http.StatusOK,
-		Body: io.NopCloser(strings.NewReader(
-			`data: {"p":"response/thinking_content","v":"{\"tool_calls\":[{\"name\":\"read_file\",\"input\":{\"path\":\"README.MD\"}}]}"}` + "\n" +
-				`data: {"p":"response/content","v":"plain text only"}` + "\n" +
-				`data: [DONE]` + "\n",
-		)),
-	}
-	policy := util.ToolChoicePolicy{
-		Mode:    util.ToolChoiceRequired,
-		Allowed: map[string]struct{}{"read_file": {}},
-	}
-
-	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", true, []string{"read_file"}, policy, "")
-	if rec.Code != http.StatusUnprocessableEntity {
-		t.Fatalf("expected 422 for required tool_choice violation, got %d body=%s", rec.Code, rec.Body.String())
-	}
-	out := decodeJSONBody(t, rec.Body.String())
-	errObj, _ := out["error"].(map[string]any)
-	if asString(errObj["code"]) != "tool_choice_violation" {
-		t.Fatalf("expected code=tool_choice_violation, got %#v", out)
-	}
-}
-
-func TestHandleResponsesNonStreamToolChoiceNoneStillAllowsFunctionCall(t *testing.T) {
-	h := &Handler{}
-	rec := httptest.NewRecorder()
-	resp := &http.Response{
-		StatusCode: http.StatusOK,
-		Body: io.NopCloser(strings.NewReader(
-			`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"read_file\",\"input\":{\"path\":\"README.MD\"}}]}"}` + "\n" +
-				`data: [DONE]` + "\n",
-		)),
-	}
-	policy := util.ToolChoicePolicy{Mode: util.ToolChoiceNone}
-
-	h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, nil, policy, "")
-	if rec.Code != http.StatusOK {
-		t.Fatalf("expected 200 for tool_choice=none handling, got %d body=%s", rec.Code, rec.Body.String())
-	}
-	out := decodeJSONBody(t, rec.Body.String())
-	output, _ := out["output"].([]any)
-	foundFunctionCall := false
-	for _, item := range output {
-		m, _ := item.(map[string]any)
-		if m != nil && m["type"] == "function_call" {
-			foundFunctionCall = true
-		}
-	}
-	if !foundFunctionCall {
-		t.Fatalf("expected function_call output item for tool_choice=none, got %#v", output)
-	}
-}
-
-func extractSSEEventPayload(body, targetEvent string) (map[string]any, bool) {
-	scanner := bufio.NewScanner(strings.NewReader(body))
-	matched := false
-	for scanner.Scan() {
-		line := strings.TrimSpace(scanner.Text())
-		if strings.HasPrefix(line, "event: ") {
-			evt := strings.TrimSpace(strings.TrimPrefix(line, "event: "))
-			matched = evt == targetEvent
-			continue
-		}
-		if !matched || !strings.HasPrefix(line, "data: ") {
-			continue
-		}
-		raw := strings.TrimSpace(strings.TrimPrefix(line, "data: "))
-		if raw == "" || raw == "[DONE]" {
-			continue
-		}
-		var payload map[string]any
-		if err := json.Unmarshal([]byte(raw), &payload); err != nil {
-			return nil, false
-		}
-		return payload, true
-	}
-	return nil, false
-}
-
-func extractAllSSEEventPayloads(body, targetEvent string) []map[string]any {
-	scanner := bufio.NewScanner(strings.NewReader(body))
-	matched := false
-	out := make([]map[string]any, 0, 2)
-	for scanner.Scan() {
-		line := strings.TrimSpace(scanner.Text())
-		if strings.HasPrefix(line, "event: ") {
-			evt := strings.TrimSpace(strings.TrimPrefix(line, "event: "))
-			matched = evt == targetEvent
-			continue
-		}
-		if !matched || !strings.HasPrefix(line, "data: ") {
-			continue
-		}
-		raw := strings.TrimSpace(strings.TrimPrefix(line, "data: "))
-		if raw == "" || raw == "[DONE]" {
-			continue
-		}
-		var payload map[string]any
-		if err := json.Unmarshal([]byte(raw), &payload); err != nil {
-			continue
-		}
-		out = append(out, payload)
-	}
-	return out
-}
--- a/internal/adapter/openai/standard_request_test.go
+++ b/internal/adapter/openai/standard_request_test.go
@@ -1,180 +0,0 @@
-package openai
-
-import (
-	"testing"
-
-	"ds2api/internal/config"
-	"ds2api/internal/util"
-)
-
-func newEmptyStoreForNormalizeTest(t *testing.T) *config.Store {
-	t.Helper()
-	t.Setenv("DS2API_CONFIG_JSON", `{}`)
-	return config.LoadStore()
-}
-
-func TestNormalizeOpenAIChatRequest(t *testing.T) {
-	store := newEmptyStoreForNormalizeTest(t)
-	req := map[string]any{
-		"model": "gpt-5-codex",
-		"messages": []any{
-			map[string]any{"role": "user", "content": "hello"},
-		},
-		"temperature": 0.3,
-		"stream":      true,
-	}
-	n, err := normalizeOpenAIChatRequest(store, req, "")
-	if err != nil {
-		t.Fatalf("normalize failed: %v", err)
-	}
-	if n.ResolvedModel != "deepseek-reasoner" {
-		t.Fatalf("unexpected resolved model: %s", n.ResolvedModel)
-	}
-	if !n.Stream {
-		t.Fatalf("expected stream=true")
-	}
-	if _, ok := n.PassThrough["temperature"]; !ok {
-		t.Fatalf("expected temperature passthrough")
-	}
-	if n.FinalPrompt == "" {
-		t.Fatalf("expected non-empty final prompt")
-	}
-}
-
-func TestNormalizeOpenAIResponsesRequestInput(t *testing.T) {
-	store := newEmptyStoreForNormalizeTest(t)
-	req := map[string]any{
-		"model":        "gpt-4o",
-		"input":        "ping",
-		"instructions": "system",
-	}
-	n, err := normalizeOpenAIResponsesRequest(store, req, "")
-	if err != nil {
-		t.Fatalf("normalize failed: %v", err)
-	}
-	if n.ResolvedModel != "deepseek-chat" {
-		t.Fatalf("unexpected resolved model: %s", n.ResolvedModel)
-	}
-	if len(n.Messages) != 2 {
-		t.Fatalf("expected 2 normalized messages, got %d", len(n.Messages))
-	}
-}
-
-func TestNormalizeOpenAIResponsesRequestToolChoiceRequired(t *testing.T) {
-	store := newEmptyStoreForNormalizeTest(t)
-	req := map[string]any{
-		"model": "gpt-4o",
-		"input": "ping",
-		"tools": []any{
-			map[string]any{
-				"type": "function",
-				"function": map[string]any{
-					"name": "search",
-					"parameters": map[string]any{
-						"type": "object",
-					},
-				},
-			},
-		},
-		"tool_choice": "required",
-	}
-	n, err := normalizeOpenAIResponsesRequest(store, req, "")
-	if err != nil {
-		t.Fatalf("normalize failed: %v", err)
-	}
-	if n.ToolChoice.Mode != util.ToolChoiceRequired {
-		t.Fatalf("expected tool choice mode required, got %q", n.ToolChoice.Mode)
-	}
-	if len(n.ToolNames) != 1 || n.ToolNames[0] != "search" {
-		t.Fatalf("unexpected tool names: %#v", n.ToolNames)
-	}
-}
-
-func TestNormalizeOpenAIResponsesRequestToolChoiceForcedFunction(t *testing.T) {
-	store := newEmptyStoreForNormalizeTest(t)
-	req := map[string]any{
-		"model": "gpt-4o",
-		"input": "ping",
-		"tools": []any{
-			map[string]any{
-				"type": "function",
-				"function": map[string]any{
-					"name": "search",
-				},
-			},
-			map[string]any{
-				"type": "function",
-				"function": map[string]any{
-					"name": "read_file",
-				},
-			},
-		},
-		"tool_choice": map[string]any{
-			"type": "function",
-			"name": "read_file",
-		},
-	}
-	n, err := normalizeOpenAIResponsesRequest(store, req, "")
-	if err != nil {
-		t.Fatalf("normalize failed: %v", err)
-	}
-	if n.ToolChoice.Mode != util.ToolChoiceForced {
-		t.Fatalf("expected tool choice mode forced, got %q", n.ToolChoice.Mode)
-	}
-	if n.ToolChoice.ForcedName != "read_file" {
-		t.Fatalf("expected forced tool name read_file, got %q", n.ToolChoice.ForcedName)
-	}
-	if len(n.ToolNames) != 1 || n.ToolNames[0] != "read_file" {
-		t.Fatalf("expected filtered tool names [read_file], got %#v", n.ToolNames)
-	}
-}
-
-func TestNormalizeOpenAIResponsesRequestToolChoiceForcedUndeclaredFails(t *testing.T) {
-	store := newEmptyStoreForNormalizeTest(t)
-	req := map[string]any{
-		"model": "gpt-4o",
-		"input": "ping",
-		"tools": []any{
-			map[string]any{
-				"type": "function",
-				"function": map[string]any{
-					"name": "search",
-				},
-			},
-		},
-		"tool_choice": map[string]any{
-			"type": "function",
-			"name": "read_file",
-		},
-	}
-	if _, err := normalizeOpenAIResponsesRequest(store, req, ""); err == nil {
-		t.Fatalf("expected forced undeclared tool to fail")
-	}
-}
-
-func TestNormalizeOpenAIResponsesRequestToolChoiceNoneKeepsToolDetectionEnabled(t *testing.T) {
-	store := newEmptyStoreForNormalizeTest(t)
-	req := map[string]any{
-		"model": "gpt-4o",
-		"input": "ping",
-		"tools": []any{
-			map[string]any{
-				"type": "function",
-				"function": map[string]any{
-					"name": "search",
-				},
-			},
-		},
-		"tool_choice": "none",
-	}
-	n, err := normalizeOpenAIResponsesRequest(store, req, "")
-	if err != nil {
-		t.Fatalf("normalize failed: %v", err)
-	}
-	if n.ToolChoice.Mode != util.ToolChoiceNone {
-		t.Fatalf("expected tool choice mode none, got %q", n.ToolChoice.Mode)
-	}
-	if len(n.ToolNames) == 0 {
-		t.Fatalf("expected tool detection sentinel when tool_choice=none, got %#v", n.ToolNames)
-	}
-}
--- a/internal/adapter/openai/stream_status_test.go
+++ b/internal/adapter/openai/stream_status_test.go
@@ -1,185 +0,0 @@
-package openai
-
-import (
-	"context"
-	"encoding/json"
-	"io"
-	"net/http"
-	"net/http/httptest"
-	"strings"
-	"testing"
-
-	"github.com/go-chi/chi/v5"
-	chimw "github.com/go-chi/chi/v5/middleware"
-
-	"ds2api/internal/auth"
-)
-
-type streamStatusAuthStub struct{}
-
-func (streamStatusAuthStub) Determine(_ *http.Request) (*auth.RequestAuth, error) {
-	return &auth.RequestAuth{
-		UseConfigToken: false,
-		DeepSeekToken:  "direct-token",
-		CallerID:       "caller:test",
-		TriedAccounts:  map[string]bool{},
-	}, nil
-}
-
-func (streamStatusAuthStub) DetermineCaller(_ *http.Request) (*auth.RequestAuth, error) {
-	return &auth.RequestAuth{
-		UseConfigToken: false,
-		DeepSeekToken:  "direct-token",
-		CallerID:       "caller:test",
-		TriedAccounts:  map[string]bool{},
-	}, nil
-}
-
-func (streamStatusAuthStub) Release(_ *auth.RequestAuth) {}
-
-type streamStatusDSStub struct {
-	resp *http.Response
-}
-
-func (m streamStatusDSStub) CreateSession(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
-	return "session-id", nil
-}
-
-func (m streamStatusDSStub) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
-	return "pow", nil
-}
-
-func (m streamStatusDSStub) CallCompletion(_ context.Context, _ *auth.RequestAuth, _ map[string]any, _ string, _ int) (*http.Response, error) {
-	return m.resp, nil
-}
-
-func (m streamStatusDSStub) DeleteAllSessionsForToken(_ context.Context, _ string) error {
-	return nil
-}
-
-func makeOpenAISSEHTTPResponse(lines ...string) *http.Response {
-	body := strings.Join(lines, "\n")
-	if !strings.HasSuffix(body, "\n") {
-		body += "\n"
-	}
-	return &http.Response{
-		StatusCode: http.StatusOK,
-		Header:     make(http.Header),
-		Body:       io.NopCloser(strings.NewReader(body)),
-	}
-}
-
-func captureStatusMiddleware(statuses *[]int) func(http.Handler) http.Handler {
-	return func(next http.Handler) http.Handler {
-		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-			ww := chimw.NewWrapResponseWriter(w, r.ProtoMajor)
-			next.ServeHTTP(ww, r)
-			*statuses = append(*statuses, ww.Status())
-		})
-	}
-}
-
-func TestChatCompletionsStreamStatusCapturedAs200(t *testing.T) {
-	statuses := make([]int, 0, 1)
-	h := &Handler{
-		Store: mockOpenAIConfig{wideInput: true},
-		Auth:  streamStatusAuthStub{},
-		DS:    streamStatusDSStub{resp: makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"hello"}`, "data: [DONE]")},
-	}
-	r := chi.NewRouter()
-	r.Use(captureStatusMiddleware(&statuses))
-	RegisterRoutes(r, h)
-
-	reqBody := `{"model":"deepseek-chat","messages":[{"role":"user","content":"hi"}],"stream":true}`
-	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
-	req.Header.Set("Authorization", "Bearer direct-token")
-	req.Header.Set("Content-Type", "application/json")
-	rec := httptest.NewRecorder()
-	r.ServeHTTP(rec, req)
-
-	if rec.Code != http.StatusOK {
-		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
-	}
-	if len(statuses) != 1 {
-		t.Fatalf("expected one captured status, got %d", len(statuses))
-	}
-	if statuses[0] != http.StatusOK {
-		t.Fatalf("expected captured status 200 (not 000), got %d", statuses[0])
-	}
-}
-
-func TestResponsesStreamStatusCapturedAs200(t *testing.T) {
-	statuses := make([]int, 0, 1)
-	h := &Handler{
-		Store: mockOpenAIConfig{wideInput: true},
-		Auth:  streamStatusAuthStub{},
-		DS:    streamStatusDSStub{resp: makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"hello"}`, "data: [DONE]")},
-	}
-	r := chi.NewRouter()
-	r.Use(captureStatusMiddleware(&statuses))
-	RegisterRoutes(r, h)
-
-	reqBody := `{"model":"deepseek-chat","input":"hi","stream":true}`
-	req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(reqBody))
-	req.Header.Set("Authorization", "Bearer direct-token")
-	req.Header.Set("Content-Type", "application/json")
-	rec := httptest.NewRecorder()
-	r.ServeHTTP(rec, req)
-
-	if rec.Code != http.StatusOK {
-		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
-	}
-	if len(statuses) != 1 {
-		t.Fatalf("expected one captured status, got %d", len(statuses))
-	}
-	if statuses[0] != http.StatusOK {
-		t.Fatalf("expected captured status 200 (not 000), got %d", statuses[0])
-	}
-}
-
-func TestResponsesNonStreamMixedProseToolPayloadHandlerPath(t *testing.T) {
-	statuses := make([]int, 0, 1)
-	content, _ := json.Marshal(map[string]any{
-		"p": "response/content",
-		"v": "我来调用工具\n{\"tool_calls\":[{\"name\":\"read_file\",\"input\":{\"path\":\"README.MD\"}}]}",
-	})
-	h := &Handler{
-		Store: mockOpenAIConfig{wideInput: true},
-		Auth:  streamStatusAuthStub{},
-		DS:    streamStatusDSStub{resp: makeOpenAISSEHTTPResponse("data: "+string(content), "data: [DONE]")},
-	}
-	r := chi.NewRouter()
-	r.Use(captureStatusMiddleware(&statuses))
-	RegisterRoutes(r, h)
-
-	reqBody := `{"model":"deepseek-chat","input":"请调用工具","tools":[{"type":"function","function":{"name":"read_file","description":"read","parameters":{"type":"object","properties":{"path":{"type":"string"}}}}}],"stream":false}`
-	req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(reqBody))
-	req.Header.Set("Authorization", "Bearer direct-token")
-	req.Header.Set("Content-Type", "application/json")
-	rec := httptest.NewRecorder()
-	r.ServeHTTP(rec, req)
-
-	if rec.Code != http.StatusOK {
-		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
-	}
-	if len(statuses) != 1 || statuses[0] != http.StatusOK {
-		t.Fatalf("expected captured status 200, got %#v", statuses)
-	}
-
-	var out map[string]any
-	if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
-		t.Fatalf("decode response failed: %v body=%s", err, rec.Body.String())
-	}
-	outputText, _ := out["output_text"].(string)
-	if outputText != "" {
-		t.Fatalf("expected output_text hidden for mixed prose tool payload, got %q", outputText)
-	}
-	output, _ := out["output"].([]any)
-	if len(output) != 1 {
-		t.Fatalf("expected one output item, got %#v", output)
-	}
-	first, _ := output[0].(map[string]any)
-	if first["type"] != "function_call" {
-		t.Fatalf("expected function_call output item, got %#v", output)
-	}
-}
--- a/internal/adapter/openai/tool_sieve_core.go
+++ b/internal/adapter/openai/tool_sieve_core.go
@@ -1,278 +0,0 @@
-package openai
-
-import (
-	"strings"
-
-	"ds2api/internal/util"
-)
-
-func processToolSieveChunk(state *toolStreamSieveState, chunk string, toolNames []string) []toolStreamEvent {
-	if state == nil {
-		return nil
-	}
-	if chunk != "" {
-		state.pending.WriteString(chunk)
-	}
-	events := make([]toolStreamEvent, 0, 2)
-	if len(state.pendingToolCalls) > 0 {
-		events = append(events, toolStreamEvent{ToolCalls: state.pendingToolCalls})
-		state.pendingToolRaw = ""
-		state.pendingToolCalls = nil
-	}
-
-	for {
-		if state.capturing {
-			if state.pending.Len() > 0 {
-				state.capture.WriteString(state.pending.String())
-				state.pending.Reset()
-			}
-			prefix, calls, suffix, ready := consumeToolCapture(state, toolNames)
-			if !ready {
-				break
-			}
-			captured := state.capture.String()
-			state.capture.Reset()
-			state.capturing = false
-			state.resetIncrementalToolState()
-			if len(calls) > 0 {
-				if prefix != "" {
-					state.noteText(prefix)
-					events = append(events, toolStreamEvent{Content: prefix})
-				}
-				if suffix != "" {
-					state.pending.WriteString(suffix)
-				}
-				_ = captured
-				state.pendingToolCalls = calls
-				continue
-			}
-			if prefix != "" {
-				state.noteText(prefix)
-				events = append(events, toolStreamEvent{Content: prefix})
-			}
-			if suffix != "" {
-				state.pending.WriteString(suffix)
-			}
-			continue
-		}
-
-		pending := state.pending.String()
-		if pending == "" {
-			break
-		}
-		start := findToolSegmentStart(pending)
-		if start >= 0 {
-			prefix := pending[:start]
-			if prefix != "" {
-				state.noteText(prefix)
-				events = append(events, toolStreamEvent{Content: prefix})
-			}
-			state.pending.Reset()
-			state.capture.WriteString(pending[start:])
-			state.capturing = true
-			state.resetIncrementalToolState()
-			continue
-		}
-
-		safe, hold := splitSafeContentForToolDetection(pending)
-		if safe == "" {
-			break
-		}
-		state.pending.Reset()
-		state.pending.WriteString(hold)
-		state.noteText(safe)
-		events = append(events, toolStreamEvent{Content: safe})
-	}
-
-	return events
-}
-
-func flushToolSieve(state *toolStreamSieveState, toolNames []string) []toolStreamEvent {
-	if state == nil {
-		return nil
-	}
-	events := processToolSieveChunk(state, "", toolNames)
-	if len(state.pendingToolCalls) > 0 {
-		events = append(events, toolStreamEvent{ToolCalls: state.pendingToolCalls})
-		state.pendingToolRaw = ""
-		state.pendingToolCalls = nil
-	}
-	if state.capturing {
-		consumedPrefix, consumedCalls, consumedSuffix, ready := consumeToolCapture(state, toolNames)
-		if ready {
-			if consumedPrefix != "" {
-				state.noteText(consumedPrefix)
-				events = append(events, toolStreamEvent{Content: consumedPrefix})
-			}
-			if len(consumedCalls) > 0 {
-				events = append(events, toolStreamEvent{ToolCalls: consumedCalls})
-			}
-			if consumedSuffix != "" {
-				state.noteText(consumedSuffix)
-				events = append(events, toolStreamEvent{Content: consumedSuffix})
-			}
-		} else {
-			content := state.capture.String()
-			if content != "" {
-				// If the captured text looks like an incomplete XML tool call block,
-				// swallow it to prevent leaking raw XML tags to the client.
-				if hasOpenXMLToolTag(content) {
-					// Drop it silently — incomplete tool call.
-				} else {
-					state.noteText(content)
-					events = append(events, toolStreamEvent{Content: content})
-				}
-			}
-		}
-		state.capture.Reset()
-		state.capturing = false
-		state.resetIncrementalToolState()
-	}
-	if state.pending.Len() > 0 {
-		content := state.pending.String()
-		// Safety: if pending contains XML tool tag fragments (e.g. "tool_calls>"
-		// from a split closing tag), swallow them instead of leaking.
-		if hasOpenXMLToolTag(content) || looksLikeXMLToolTagFragment(content) {
-			// Drop it — likely an incomplete tool call fragment.
-		} else {
-			state.noteText(content)
-			events = append(events, toolStreamEvent{Content: content})
-		}
-		state.pending.Reset()
-	}
-	return events
-}
-
-func splitSafeContentForToolDetection(s string) (safe, hold string) {
-	if s == "" {
-		return "", ""
-	}
-	suspiciousStart := findSuspiciousPrefixStart(s)
-	if suspiciousStart < 0 {
-		return s, ""
-	}
-	if suspiciousStart > 0 {
-		return s[:suspiciousStart], s[suspiciousStart:]
-	}
-	// If suspicious content starts at position 0, keep holding until we can
-	// parse a complete tool JSON block or reach stream flush.
-	return "", s
-}
-
-func findSuspiciousPrefixStart(s string) int {
-	start := -1
-	indices := []int{
-		strings.LastIndex(s, "{"),
-		strings.LastIndex(s, "["),
-		strings.LastIndex(s, "```"),
-	}
-	for _, idx := range indices {
-		if idx > start {
-			start = idx
-		}
-	}
-	// Also check for partial XML tool tag at end of string.
-	if xmlIdx := findPartialXMLToolTagStart(s); xmlIdx >= 0 && xmlIdx > start {
-		start = xmlIdx
-	}
-	return start
-}
-
-func findToolSegmentStart(s string) int {
-	if s == "" {
-		return -1
-	}
-	lower := strings.ToLower(s)
-	keywords := []string{"tool_calls", "\"function\"", "function.name:"}
-	bestKeyIdx := -1
-	for _, kw := range keywords {
-		idx := strings.Index(lower, kw)
-		if idx >= 0 && (bestKeyIdx < 0 || idx < bestKeyIdx) {
-			bestKeyIdx = idx
-		}
-	}
-	// Also detect XML tool call tags.
-	for _, tag := range xmlToolTagsToDetect {
-		idx := strings.Index(lower, tag)
-		if idx >= 0 && (bestKeyIdx < 0 || idx < bestKeyIdx) {
-			bestKeyIdx = idx
-		}
-	}
-	if bestKeyIdx < 0 {
-		return -1
-	}
-	// For XML tags, the '<' is itself the segment start.
-	if bestKeyIdx < len(s) && s[bestKeyIdx] == '<' {
-		if fenceStart, ok := openFenceStartBefore(s, bestKeyIdx); ok {
-			return fenceStart
-		}
-		return bestKeyIdx
-	}
-	start := strings.LastIndex(s[:bestKeyIdx], "{")
-	if start < 0 {
-		start = bestKeyIdx
-	}
-	// If the keyword matched inside an XML tag (e.g. "tool_calls" in "<tool_calls>"),
-	// back up past the '<' to capture the full tag.
-	if start > 0 && s[start-1] == '<' {
-		start--
-	}
-	if fenceStart, ok := openFenceStartBefore(s, start); ok {
-		return fenceStart
-	}
-	return start
-}
-
-func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix string, calls []util.ParsedToolCall, suffix string, ready bool) {
-	captured := state.capture.String()
-	if captured == "" {
-		return "", nil, "", false
-	}
-
-	// Try XML tool call extraction first.
-	if xmlPrefix, xmlCalls, xmlSuffix, xmlReady := consumeXMLToolCapture(captured, toolNames); xmlReady {
-		return xmlPrefix, xmlCalls, xmlSuffix, true
-	}
-	// If XML tags are present but block is incomplete, keep buffering.
-	if hasOpenXMLToolTag(captured) {
-		return "", nil, "", false
-	}
-
-	lower := strings.ToLower(captured)
-	keyIdx := -1
-	keywords := []string{"tool_calls", "\"function\"", "function.name:"}
-	for _, kw := range keywords {
-		idx := strings.Index(lower, kw)
-		if idx >= 0 && (keyIdx < 0 || idx < keyIdx) {
-			keyIdx = idx
-		}
-	}
-
-	if keyIdx < 0 {
-		return "", nil, "", false
-	}
-	start := strings.LastIndex(captured[:keyIdx], "{")
-	if start < 0 {
-		start = keyIdx
-	}
-	obj, end, ok := extractJSONObjectFrom(captured, start)
-	if !ok {
-		return "", nil, "", false
-	}
-	prefixPart := captured[:start]
-	suffixPart := captured[end:]
-	parsed := util.ParseStandaloneToolCallsDetailed(obj, toolNames)
-	if len(parsed.Calls) == 0 {
-		if parsed.SawToolCallSyntax && parsed.RejectedByPolicy {
-			// Parsed as tool-call payload but rejected by schema/policy:
-			// consume it to avoid leaking raw tool_calls JSON to user content.
-			return prefixPart, nil, suffixPart, true
-		}
-		// If it has obvious keywords but failed to parse even after loose repair,
-		// we still might want to intercept it if it looks like an attempt at tool call.
-		// For now, keep the original logic but rely on loose JSON repair.
-		return captured, nil, "", true
-	}
-	prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
-	return prefixPart, parsed.Calls, suffixPart, true
-}
--- a/internal/adapter/openai/tool_sieve_jsonscan.go
+++ b/internal/adapter/openai/tool_sieve_jsonscan.go
@@ -1,84 +0,0 @@
-package openai
-
-import "strings"
-
-func extractJSONObjectFrom(text string, start int) (string, int, bool) {
-	if start < 0 || start >= len(text) || text[start] != '{' {
-		return "", 0, false
-	}
-	depth := 0
-	quote := byte(0)
-	escaped := false
-	for i := start; i < len(text); i++ {
-		ch := text[i]
-		if quote != 0 {
-			if escaped {
-				escaped = false
-				continue
-			}
-			if ch == '\\' {
-				escaped = true
-				continue
-			}
-			if ch == quote {
-				quote = 0
-			}
-			continue
-		}
-		if ch == '"' || ch == '\'' {
-			quote = ch
-			continue
-		}
-		if ch == '{' {
-			depth++
-			continue
-		}
-		if ch == '}' {
-			depth--
-			if depth == 0 {
-				end := i + 1
-				return text[start:end], end, true
-			}
-		}
-	}
-	return "", 0, false
-}
-
-func trimWrappingJSONFence(prefix, suffix string) (string, string) {
-	trimmedPrefix := strings.TrimRight(prefix, " \t\r\n")
-	fenceIdx := strings.LastIndex(trimmedPrefix, "```")
-	if fenceIdx < 0 {
-		return prefix, suffix
-	}
-	// Only strip when the trailing fence in prefix behaves like an opening fence.
-	// A legitimate closing fence before a standalone tool JSON must be preserved.
-	if strings.Count(trimmedPrefix[:fenceIdx+3], "```")%2 == 0 {
-		return prefix, suffix
-	}
-	fenceHeader := strings.TrimSpace(trimmedPrefix[fenceIdx+3:])
-	if fenceHeader != "" && !strings.EqualFold(fenceHeader, "json") {
-		return prefix, suffix
-	}
-
-	trimmedSuffix := strings.TrimLeft(suffix, " \t\r\n")
-	if !strings.HasPrefix(trimmedSuffix, "```") {
-		return prefix, suffix
-	}
-	consumedLeading := len(suffix) - len(trimmedSuffix)
-	return trimmedPrefix[:fenceIdx], suffix[consumedLeading+3:]
-}
-
-func openFenceStartBefore(s string, pos int) (int, bool) {
-	if pos <= 0 || pos > len(s) {
-		return -1, false
-	}
-	segment := s[:pos]
-	lastFence := strings.LastIndex(segment, "```")
-	if lastFence < 0 {
-		return -1, false
-	}
-	if strings.Count(segment, "```")%2 == 1 {
-		return lastFence, true
-	}
-	return -1, false
-}
--- a/internal/adapter/openai/tool_sieve_state.go
+++ b/internal/adapter/openai/tool_sieve_state.go
@@ -1,66 +0,0 @@
-package openai
-
-import (
-	"strings"
-
-	"ds2api/internal/util"
-)
-
-type toolStreamSieveState struct {
-	pending          strings.Builder
-	capture          strings.Builder
-	capturing        bool
-	recentTextTail   string
-	pendingToolRaw   string
-	pendingToolCalls []util.ParsedToolCall
-	disableDeltas    bool
-	toolNameSent     bool
-	toolName         string
-	toolArgsStart    int
-	toolArgsSent     int
-	toolArgsString   bool
-	toolArgsDone     bool
-}
-
-type toolStreamEvent struct {
-	Content        string
-	ToolCalls      []util.ParsedToolCall
-	ToolCallDeltas []toolCallDelta
-}
-
-type toolCallDelta struct {
-	Index     int
-	Name      string
-	Arguments string
-}
-
-// Keep in sync with JS TOOL_SIEVE_CONTEXT_TAIL_LIMIT.
-const toolSieveContextTailLimit = 2048
-
-func (s *toolStreamSieveState) resetIncrementalToolState() {
-	s.disableDeltas = false
-	s.toolNameSent = false
-	s.toolName = ""
-	s.toolArgsStart = -1
-	s.toolArgsSent = -1
-	s.toolArgsString = false
-	s.toolArgsDone = false
-}
-
-func (s *toolStreamSieveState) noteText(content string) {
-	if strings.TrimSpace(content) == "" {
-		return
-	}
-	s.recentTextTail = appendTail(s.recentTextTail, content, toolSieveContextTailLimit)
-}
-
-func appendTail(prev, next string, max int) string {
-	if max <= 0 {
-		return ""
-	}
-	combined := prev + next
-	if len(combined) <= max {
-		return combined
-	}
-	return combined[len(combined)-max:]
-}
--- a/internal/adapter/openai/tool_sieve_xml.go
+++ b/internal/adapter/openai/tool_sieve_xml.go
@@ -1,147 +0,0 @@
-package openai
-
-import (
-	"regexp"
-	"strings"
-
-	"ds2api/internal/util"
-)
-
-// --- XML tool call support for the streaming sieve ---
-
-var xmlToolCallClosingTags = []string{"</tool_calls>", "</tool_call>", "</invoke>", "</function_call>", "</function_calls>", "</tool_use>",
-	// Agent-style XML tags (Roo Code, Cline, etc.)
-	"</attempt_completion>", "</ask_followup_question>", "</new_task>", "</result>"}
-var xmlToolCallOpeningTags = []string{"<tool_calls", "<tool_call", "<invoke", "<function_call", "<function_calls", "<tool_use",
-	// Agent-style XML tags
-	"<attempt_completion", "<ask_followup_question", "<new_task", "<result"}
-
-// xmlToolCallTagPairs maps each opening tag to its expected closing tag.
-// Order matters: longer/wrapper tags must be checked first.
-var xmlToolCallTagPairs = []struct{ open, close string }{
-	{"<tool_calls", "</tool_calls>"},
-	{"<tool_call", "</tool_call>"},
-	{"<function_calls", "</function_calls>"},
-	{"<function_call", "</function_call>"},
-	{"<invoke", "</invoke>"},
-	{"<tool_use", "</tool_use>"},
-	// Agent-style: these are XML "tool call" patterns from coding agents.
-	// They get captured → parsed. If parsing fails, the block is consumed
-	// (swallowed) to prevent raw XML from leaking to the client.
-	{"<attempt_completion", "</attempt_completion>"},
-	{"<ask_followup_question", "</ask_followup_question>"},
-	{"<new_task", "</new_task>"},
-}
-
-// xmlToolCallBlockPattern matches a complete XML tool call block (wrapper or standalone).
-var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)(<tool_calls>\s*(?:.*?)\s*</tool_calls>|<tool_call>\s*(?:.*?)\s*</tool_call>|<invoke\b[^>]*>(?:.*?)</invoke>|<function_calls?\b[^>]*>(?:.*?)</function_calls?>|<tool_use>(?:.*?)</tool_use>|<attempt_completion>(?:.*?)</attempt_completion>|<ask_followup_question>(?:.*?)</ask_followup_question>|<new_task>(?:.*?)</new_task>)`)
-
-// xmlToolTagsToDetect is the set of XML tag prefixes used by findToolSegmentStart.
-var xmlToolTagsToDetect = []string{"<tool_calls>", "<tool_calls\n", "<tool_call>", "<tool_call\n",
-	"<invoke ", "<invoke>", "<function_call", "<function_calls", "<tool_use>",
-	// Agent-style tags
-	"<attempt_completion>", "<ask_followup_question>", "<new_task>"}
-
-// consumeXMLToolCapture tries to extract complete XML tool call blocks from captured text.
-func consumeXMLToolCapture(captured string, toolNames []string) (prefix string, calls []util.ParsedToolCall, suffix string, ready bool) {
-	lower := strings.ToLower(captured)
-	// Find the FIRST matching open/close pair, preferring wrapper tags.
-	// Tag pairs are ordered longest-first (e.g. <tool_calls before <tool_call)
-	// so wrapper tags are checked before inner tags.
-	for _, pair := range xmlToolCallTagPairs {
-		openIdx := strings.Index(lower, pair.open)
-		if openIdx < 0 {
-			continue
-		}
-		// Find the LAST occurrence of the specific closing tag to get the outermost block.
-		closeIdx := strings.LastIndex(lower, pair.close)
-		if closeIdx < openIdx {
-			// Opening tag is present but its specific closing tag hasn't arrived.
-			// Return not-ready so we keep buffering — do NOT fall through to
-			// try inner pairs (e.g. <tool_call inside <tool_calls).
-			return "", nil, "", false
-		}
-		closeEnd := closeIdx + len(pair.close)
-
-		xmlBlock := captured[openIdx:closeEnd]
-		prefixPart := captured[:openIdx]
-		suffixPart := captured[closeEnd:]
-		parsed := util.ParseToolCalls(xmlBlock, toolNames)
-		if len(parsed) > 0 {
-			prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
-			return prefixPart, parsed, suffixPart, true
-		}
-		// Looks like XML tool syntax but failed to parse — consume it to avoid leak.
-		return prefixPart, nil, suffixPart, true
-	}
-	return "", nil, "", false
-}
-
-// hasOpenXMLToolTag returns true if captured text contains an XML tool opening tag
-// whose SPECIFIC closing tag has not appeared yet.
-func hasOpenXMLToolTag(captured string) bool {
-	lower := strings.ToLower(captured)
-	for _, pair := range xmlToolCallTagPairs {
-		if strings.Contains(lower, pair.open) {
-			if !strings.Contains(lower, pair.close) {
-				return true
-			}
-		}
-	}
-	return false
-}
-
-// findPartialXMLToolTagStart checks if the string ends with a partial XML tool tag
-// (e.g., "<tool_ca" or "<inv") and returns the position of the '<'.
-func findPartialXMLToolTagStart(s string) int {
-	lastLT := strings.LastIndex(s, "<")
-	if lastLT < 0 {
-		return -1
-	}
-	tail := s[lastLT:]
-	// If there's a '>' in the tail, the tag is closed — not partial.
-	if strings.Contains(tail, ">") {
-		return -1
-	}
-	lowerTail := strings.ToLower(tail)
-	// Check if the tail is a prefix of any known XML tool tag.
-	for _, tag := range xmlToolCallOpeningTags {
-		tagWithLT := tag
-		if !strings.HasPrefix(tagWithLT, "<") {
-			tagWithLT = "<" + tagWithLT
-		}
-		if strings.HasPrefix(tagWithLT, lowerTail) {
-			return lastLT
-		}
-	}
-	return -1
-}
-
-// looksLikeXMLToolTagFragment returns true if s looks like a fragment from a
-// split XML tool call tag — for example "tool_calls>" or "/tool_call>\n".
-// These fragments arise when '<' was consumed separately and the tail remains.
-func looksLikeXMLToolTagFragment(s string) bool {
-	trimmed := strings.TrimSpace(s)
-	if trimmed == "" {
-		return false
-	}
-	lower := strings.ToLower(trimmed)
-	// Check for closing tag tails like "tool_calls>" or "/tool_calls>"
-	fragments := []string{
-		"tool_calls>", "tool_call>", "/tool_calls>", "/tool_call>",
-		"function_calls>", "function_call>", "/function_calls>", "/function_call>",
-		"invoke>", "/invoke>", "tool_use>", "/tool_use>",
-		"tool_name>", "/tool_name>", "parameters>", "/parameters>",
-		// Agent-style tag fragments
-		"attempt_completion>", "/attempt_completion>",
-		"ask_followup_question>", "/ask_followup_question>",
-		"new_task>", "/new_task>",
-		"result>", "/result>",
-	}
-	for _, f := range fragments {
-		if strings.Contains(lower, f) {
-			return true
-		}
-	}
-	return false
-}
--- a/internal/adapter/openai/tool_sieve_xml_test.go
+++ b/internal/adapter/openai/tool_sieve_xml_test.go
@@ -1,319 +0,0 @@
-package openai
-
-import (
-	"strings"
-	"testing"
-)
-
-func TestProcessToolSieveInterceptsXMLToolCallWithoutLeak(t *testing.T) {
-	var state toolStreamSieveState
-	// Simulate a model producing XML tool call output chunk by chunk.
-	chunks := []string{
-		"<tool_calls>\n",
-		"  <tool_call>\n",
-		"    <tool_name>read_file</tool_name>\n",
-		`    <parameters>{"path":"README.MD"}</parameters>` + "\n",
-		"  </tool_call>\n",
-		"</tool_calls>",
-	}
-	var events []toolStreamEvent
-	for _, c := range chunks {
-		events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
-	}
-	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
-
-	var textContent string
-	var toolCalls int
-	for _, evt := range events {
-		if evt.Content != "" {
-			textContent += evt.Content
-		}
-		toolCalls += len(evt.ToolCalls)
-	}
-
-	if strings.Contains(textContent, "<tool_call") {
-		t.Fatalf("XML tool call content leaked to text: %q", textContent)
-	}
-	if strings.Contains(textContent, "read_file") {
-		t.Fatalf("tool name leaked to text: %q", textContent)
-	}
-	if toolCalls == 0 {
-		t.Fatal("expected tool calls to be extracted, got none")
-	}
-}
-
-func TestProcessToolSieveXMLWithLeadingText(t *testing.T) {
-	var state toolStreamSieveState
-	// Model outputs some prose then an XML tool call.
-	chunks := []string{
-		"Let me check the file.\n",
-		"<tool_calls>\n  <tool_call>\n    <tool_name>read_file</tool_name>\n",
-		`    <parameters>{"path":"go.mod"}</parameters>` + "\n  </tool_call>\n</tool_calls>",
-	}
-	var events []toolStreamEvent
-	for _, c := range chunks {
-		events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
-	}
-	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
-
-	var textContent string
-	var toolCalls int
-	for _, evt := range events {
-		if evt.Content != "" {
-			textContent += evt.Content
-		}
-		toolCalls += len(evt.ToolCalls)
-	}
-
-	// Leading text should be emitted.
-	if !strings.Contains(textContent, "Let me check the file.") {
-		t.Fatalf("expected leading text to be emitted, got %q", textContent)
-	}
-	// The XML itself should NOT leak.
-	if strings.Contains(textContent, "<tool_call") {
-		t.Fatalf("XML tool call content leaked to text: %q", textContent)
-	}
-	if toolCalls == 0 {
-		t.Fatal("expected tool calls to be extracted, got none")
-	}
-}
-
-func TestProcessToolSievePartialXMLTagHeldBack(t *testing.T) {
-	var state toolStreamSieveState
-	// Chunk ends with a partial XML tool tag.
-	events := processToolSieveChunk(&state, "Hello <tool_ca", []string{"read_file"})
-
-	var textContent string
-	for _, evt := range events {
-		textContent += evt.Content
-	}
-
-	// "Hello " should be emitted, but "<tool_ca" should be held back.
-	if strings.Contains(textContent, "<tool_ca") {
-		t.Fatalf("partial XML tag should not be emitted, got %q", textContent)
-	}
-	if !strings.Contains(textContent, "Hello") {
-		t.Fatalf("expected 'Hello' text to be emitted, got %q", textContent)
-	}
-}
-
-func TestFindToolSegmentStartDetectsXMLToolCalls(t *testing.T) {
-	cases := []struct {
-		name  string
-		input string
-		want  int
-	}{
-		{"tool_calls_tag", "some text <tool_calls>\n", 10},
-		{"tool_call_tag", "prefix <tool_call>\n", 7},
-		{"invoke_tag", "text <invoke name=\"foo\">body</invoke>", 5},
-		{"function_call_tag", "<function_call name=\"foo\">body</function_call>", 0},
-		{"no_xml", "just plain text", -1},
-	}
-	for _, tc := range cases {
-		t.Run(tc.name, func(t *testing.T) {
-			got := findToolSegmentStart(tc.input)
-			if got != tc.want {
-				t.Fatalf("findToolSegmentStart(%q) = %d, want %d", tc.input, got, tc.want)
-			}
-		})
-	}
-}
-
-func TestFindPartialXMLToolTagStart(t *testing.T) {
-	cases := []struct {
-		name  string
-		input string
-		want  int
-	}{
-		{"partial_tool_call", "Hello <tool_ca", 6},
-		{"partial_invoke", "Prefix <inv", 7},
-		{"partial_lt_only", "Text <", 5},
-		{"complete_tag", "Text <tool_call>done", -1},
-		{"no_lt", "plain text", -1},
-		{"closed_lt", "a < b > c", -1},
-	}
-	for _, tc := range cases {
-		t.Run(tc.name, func(t *testing.T) {
-			got := findPartialXMLToolTagStart(tc.input)
-			if got != tc.want {
-				t.Fatalf("findPartialXMLToolTagStart(%q) = %d, want %d", tc.input, got, tc.want)
-			}
-		})
-	}
-}
-
-func TestHasOpenXMLToolTag(t *testing.T) {
-	if !hasOpenXMLToolTag("<tool_call>\n<tool_name>foo</tool_name>") {
-		t.Fatal("should detect open XML tool tag without closing tag")
-	}
-	if hasOpenXMLToolTag("<tool_call>\n<tool_name>foo</tool_name></tool_call>") {
-		t.Fatal("should return false when closing tag is present")
-	}
-	if hasOpenXMLToolTag("plain text without any XML") {
-		t.Fatal("should return false for plain text")
-	}
-}
-
-// Test the EXACT scenario the user reports: token-by-token streaming where
-// <tool_calls> tag arrives in small pieces.
-func TestProcessToolSieveTokenByTokenXMLNoLeak(t *testing.T) {
-	var state toolStreamSieveState
-	// Simulate DeepSeek model generating tokens one at a time.
-	chunks := []string{
-		"<",
-		"tool",
-		"_calls",
-		">\n",
-		"  <",
-		"tool",
-		"_call",
-		">\n",
-		"    <",
-		"tool",
-		"_name",
-		">",
-		"read",
-		"_file",
-		"</",
-		"tool",
-		"_name",
-		">\n",
-		"    <",
-		"parameters",
-		">",
-		`{"path"`,
-		`: "README.MD"`,
-		`}`,
-		"</",
-		"parameters",
-		">\n",
-		"  </",
-		"tool",
-		"_call",
-		">\n",
-		"</",
-		"tool",
-		"_calls",
-		">",
-	}
-	var events []toolStreamEvent
-	for _, c := range chunks {
-		events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
-	}
-	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
-
-	var textContent string
-	var toolCalls int
-	for _, evt := range events {
-		if evt.Content != "" {
-			textContent += evt.Content
-		}
-		toolCalls += len(evt.ToolCalls)
-	}
-
-	if strings.Contains(textContent, "<tool_call") {
-		t.Fatalf("XML tool call content leaked to text in token-by-token mode: %q", textContent)
-	}
-	if strings.Contains(textContent, "tool_calls>") {
-		t.Fatalf("closing tag fragment leaked to text: %q", textContent)
-	}
-	if strings.Contains(textContent, "read_file") {
-		t.Fatalf("tool name leaked to text: %q", textContent)
-	}
-	if toolCalls == 0 {
-		t.Fatal("expected tool calls to be extracted, got none")
-	}
-}
-
-// Test that flushToolSieve on incomplete XML does NOT leak the raw XML content.
-func TestFlushToolSieveIncompleteXMLDoesNotLeak(t *testing.T) {
-	var state toolStreamSieveState
-	// XML block starts but stream ends before completion.
-	chunks := []string{
-		"<tool_calls>\n",
-		"  <tool_call>\n",
-		"    <tool_name>read_file</tool_name>\n",
-	}
-	var events []toolStreamEvent
-	for _, c := range chunks {
-		events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
-	}
-	// Stream ends abruptly - flush should NOT dump raw XML.
-	events = append(events, flushToolSieve(&state, []string{"read_file"})...)
-
-	var textContent string
-	for _, evt := range events {
-		if evt.Content != "" {
-			textContent += evt.Content
-		}
-	}
-
-	if strings.Contains(textContent, "<tool_call") {
-		t.Fatalf("incomplete XML leaked on flush: %q", textContent)
-	}
-}
-
-// Test that the opening tag "<tool_calls>\n  " is NOT emitted as text content.
-func TestOpeningXMLTagNotLeakedAsContent(t *testing.T) {
-	var state toolStreamSieveState
-	// First chunk is the opening tag - should be held, not emitted.
-	evts1 := processToolSieveChunk(&state, "<tool_calls>\n  ", []string{"read_file"})
-	for _, evt := range evts1 {
-		if strings.Contains(evt.Content, "<tool_calls>") {
-			t.Fatalf("opening tag leaked on first chunk: %q", evt.Content)
-		}
-	}
-
-	// Remaining content arrives.
-	evts2 := processToolSieveChunk(&state, "<tool_call>\n    <tool_name>read_file</tool_name>\n    <parameters>{\"path\":\"README.MD\"}</parameters>\n  </tool_call>\n</tool_calls>", []string{"read_file"})
-	evts2 = append(evts2, flushToolSieve(&state, []string{"read_file"})...)
-
-	var textContent string
-	var toolCalls int
-	allEvents := append(evts1, evts2...)
-	for _, evt := range allEvents {
-		if evt.Content != "" {
-			textContent += evt.Content
-		}
-		toolCalls += len(evt.ToolCalls)
-	}
-
-	if strings.Contains(textContent, "<tool_call") {
-		t.Fatalf("XML content leaked: %q", textContent)
-	}
-	if toolCalls == 0 {
-		t.Fatal("expected tool calls to be extracted")
-	}
-}
-
-func TestProcessToolSieveInterceptsAttemptCompletionLeak(t *testing.T) {
-	var state toolStreamSieveState
-	// Simulate an agent outputting attempt_completion XML tag 
-	// which shouldn't leak to text output, even if it fails to parse as a valid tool.
-	chunks := []string{
-		"Done with task.\n",
-		"<attempt_completion>\n",
-		"  <result>Here is the answer</result>\n",
-		"</attempt_completion>",
-	}
-	var events []toolStreamEvent
-	for _, c := range chunks {
-		events = append(events, processToolSieveChunk(&state, c, []string{"attempt_completion"})...)
-	}
-	events = append(events, flushToolSieve(&state, []string{"attempt_completion"})...)
-
-	var textContent string
-	for _, evt := range events {
-		if evt.Content != "" {
-			textContent += evt.Content
-		}
-	}
-
-	if !strings.Contains(textContent, "Done with task.\n") {
-		t.Fatalf("expected leading text to be emitted, got %q", textContent)
-	}
-
-	if strings.Contains(textContent, "<attempt_completion>") || strings.Contains(textContent, "result>") {
-		t.Fatalf("agent XML tag content leaked to text: %q", textContent)
-	}
-}
--- a/internal/adapter/openai/vercel_prepare_test.go
+++ b/internal/adapter/openai/vercel_prepare_test.go
@@ -1,83 +0,0 @@
-package openai
-
-import (
-	"ds2api/internal/auth"
-	"net/http/httptest"
-	"testing"
-	"time"
-)
-
-func TestIsVercelStreamPrepareRequest(t *testing.T) {
-	req := httptest.NewRequest("POST", "/v1/chat/completions?__stream_prepare=1", nil)
-	if !isVercelStreamPrepareRequest(req) {
-		t.Fatalf("expected prepare request to be detected")
-	}
-
-	req2 := httptest.NewRequest("POST", "/v1/chat/completions", nil)
-	if isVercelStreamPrepareRequest(req2) {
-		t.Fatalf("expected non-prepare request")
-	}
-}
-
-func TestIsVercelStreamReleaseRequest(t *testing.T) {
-	req := httptest.NewRequest("POST", "/v1/chat/completions?__stream_release=1", nil)
-	if !isVercelStreamReleaseRequest(req) {
-		t.Fatalf("expected release request to be detected")
-	}
-
-	req2 := httptest.NewRequest("POST", "/v1/chat/completions", nil)
-	if isVercelStreamReleaseRequest(req2) {
-		t.Fatalf("expected non-release request")
-	}
-}
-
-func TestVercelInternalSecret(t *testing.T) {
-	t.Run("prefer explicit secret", func(t *testing.T) {
-		t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret")
-		t.Setenv("DS2API_ADMIN_KEY", "admin-fallback")
-		if got := vercelInternalSecret(); got != "stream-secret" {
-			t.Fatalf("expected explicit secret, got %q", got)
-		}
-	})
-
-	t.Run("fallback to admin key", func(t *testing.T) {
-		t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "")
-		t.Setenv("DS2API_ADMIN_KEY", "admin-fallback")
-		if got := vercelInternalSecret(); got != "admin-fallback" {
-			t.Fatalf("expected admin key fallback, got %q", got)
-		}
-	})
-
-	t.Run("default admin when env missing", func(t *testing.T) {
-		t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "")
-		t.Setenv("DS2API_ADMIN_KEY", "")
-		if got := vercelInternalSecret(); got != "admin" {
-			t.Fatalf("expected default admin fallback, got %q", got)
-		}
-	})
-}
-
-func TestStreamLeaseLifecycle(t *testing.T) {
-	h := &Handler{}
-	leaseID := h.holdStreamLease(&auth.RequestAuth{UseConfigToken: false})
-	if leaseID == "" {
-		t.Fatalf("expected non-empty lease id")
-	}
-	if ok := h.releaseStreamLease(leaseID); !ok {
-		t.Fatalf("expected lease release success")
-	}
-	if ok := h.releaseStreamLease(leaseID); ok {
-		t.Fatalf("expected duplicate release to fail")
-	}
-}
-
-func TestStreamLeaseTTL(t *testing.T) {
-	t.Setenv("DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS", "120")
-	if got := streamLeaseTTL(); got != 120*time.Second {
-		t.Fatalf("expected ttl=120s, got %v", got)
-	}
-	t.Setenv("DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS", "invalid")
-	if got := streamLeaseTTL(); got != 15*time.Minute {
-		t.Fatalf("expected default ttl on invalid value, got %v", got)
-	}
-}
--- a/internal/admin/handler.go
+++ b/internal/admin/handler.go
@@ -1,45 +0,0 @@
-package admin
-
-import (
-	"github.com/go-chi/chi/v5"
-)
-
-type Handler struct {
-	Store ConfigStore
-	Pool  PoolController
-	DS    DeepSeekCaller
-}
-
-func RegisterRoutes(r chi.Router, h *Handler) {
-	r.Post("/login", h.login)
-	r.Get("/verify", h.verify)
-	r.Group(func(pr chi.Router) {
-		pr.Use(h.requireAdmin)
-		pr.Get("/vercel/config", h.getVercelConfig)
-		pr.Get("/config", h.getConfig)
-		pr.Post("/config", h.updateConfig)
-		pr.Get("/settings", h.getSettings)
-		pr.Put("/settings", h.updateSettings)
-		pr.Post("/settings/password", h.updateSettingsPassword)
-		pr.Post("/config/import", h.configImport)
-		pr.Get("/config/export", h.configExport)
-		pr.Post("/keys", h.addKey)
-		pr.Delete("/keys/{key}", h.deleteKey)
-		pr.Get("/accounts", h.listAccounts)
-		pr.Post("/accounts", h.addAccount)
-		pr.Delete("/accounts/{identifier}", h.deleteAccount)
-		pr.Get("/queue/status", h.queueStatus)
-		pr.Post("/accounts/test", h.testSingleAccount)
-		pr.Post("/accounts/test-all", h.testAllAccounts)
-		pr.Post("/accounts/sessions/delete-all", h.deleteAllSessions)
-		pr.Post("/import", h.batchImport)
-		pr.Post("/test", h.testAPI)
-		pr.Post("/vercel/sync", h.syncVercel)
-		pr.Get("/vercel/status", h.vercelStatus)
-		pr.Post("/vercel/status", h.vercelStatus)
-		pr.Get("/export", h.exportConfig)
-		pr.Get("/dev/captures", h.getDevCaptures)
-		pr.Delete("/dev/captures", h.clearDevCaptures)
-		pr.Get("/version", h.getVersion)
-	})
-}
--- a/internal/admin/handler_settings_parse.go
+++ b/internal/admin/handler_settings_parse.go
@@ -1,137 +0,0 @@
-package admin
-
-import (
-	"fmt"
-	"strings"
-
-	"ds2api/internal/config"
-)
-
-func boolFrom(v any) bool {
-	if v == nil {
-		return false
-	}
-	switch x := v.(type) {
-	case bool:
-		return x
-	case string:
-		return strings.ToLower(strings.TrimSpace(x)) == "true"
-	default:
-		return false
-	}
-}
-
-func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *config.RuntimeConfig, *config.ResponsesConfig, *config.EmbeddingsConfig, *config.AutoDeleteConfig, map[string]string, map[string]string, error) {
-	var (
-		adminCfg      *config.AdminConfig
-		runtimeCfg    *config.RuntimeConfig
-		respCfg       *config.ResponsesConfig
-		embCfg        *config.EmbeddingsConfig
-		autoDeleteCfg *config.AutoDeleteConfig
-		claudeMap     map[string]string
-		aliasMap      map[string]string
-	)
-
-	if raw, ok := req["admin"].(map[string]any); ok {
-		cfg := &config.AdminConfig{}
-		if v, exists := raw["jwt_expire_hours"]; exists {
-			n := intFrom(v)
-			if n < 1 || n > 720 {
-				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("admin.jwt_expire_hours must be between 1 and 720")
-			}
-			cfg.JWTExpireHours = n
-		}
-		adminCfg = cfg
-	}
-
-	if raw, ok := req["runtime"].(map[string]any); ok {
-		cfg := &config.RuntimeConfig{}
-		if v, exists := raw["account_max_inflight"]; exists {
-			n := intFrom(v)
-			if n < 1 || n > 256 {
-				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.account_max_inflight must be between 1 and 256")
-			}
-			cfg.AccountMaxInflight = n
-		}
-		if v, exists := raw["account_max_queue"]; exists {
-			n := intFrom(v)
-			if n < 1 || n > 200000 {
-				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.account_max_queue must be between 1 and 200000")
-			}
-			cfg.AccountMaxQueue = n
-		}
-		if v, exists := raw["global_max_inflight"]; exists {
-			n := intFrom(v)
-			if n < 1 || n > 200000 {
-				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be between 1 and 200000")
-			}
-			cfg.GlobalMaxInflight = n
-		}
-		if v, exists := raw["token_refresh_interval_hours"]; exists {
-			n := intFrom(v)
-			if n < 1 || n > 720 {
-				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.token_refresh_interval_hours must be between 1 and 720")
-			}
-			cfg.TokenRefreshIntervalHours = n
-		}
-		if cfg.AccountMaxInflight > 0 && cfg.GlobalMaxInflight > 0 && cfg.GlobalMaxInflight < cfg.AccountMaxInflight {
-			return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight")
-		}
-		runtimeCfg = cfg
-	}
-
-	if raw, ok := req["responses"].(map[string]any); ok {
-		cfg := &config.ResponsesConfig{}
-		if v, exists := raw["store_ttl_seconds"]; exists {
-			n := intFrom(v)
-			if n < 30 || n > 86400 {
-				return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("responses.store_ttl_seconds must be between 30 and 86400")
-			}
-			cfg.StoreTTLSeconds = n
-		}
-		respCfg = cfg
-	}
-
-	if raw, ok := req["embeddings"].(map[string]any); ok {
-		cfg := &config.EmbeddingsConfig{}
-		if v, exists := raw["provider"]; exists {
-			p := strings.TrimSpace(fmt.Sprintf("%v", v))
-			cfg.Provider = p
-		}
-		embCfg = cfg
-	}
-
-	if raw, ok := req["claude_mapping"].(map[string]any); ok {
-		claudeMap = map[string]string{}
-		for k, v := range raw {
-			key := strings.TrimSpace(k)
-			val := strings.TrimSpace(fmt.Sprintf("%v", v))
-			if key == "" || val == "" {
-				continue
-			}
-			claudeMap[key] = val
-		}
-	}
-
-	if raw, ok := req["model_aliases"].(map[string]any); ok {
-		aliasMap = map[string]string{}
-		for k, v := range raw {
-			key := strings.TrimSpace(k)
-			val := strings.TrimSpace(fmt.Sprintf("%v", v))
-			if key == "" || val == "" {
-				continue
-			}
-			aliasMap[key] = val
-		}
-	}
-
-	if raw, ok := req["auto_delete"].(map[string]any); ok {
-		cfg := &config.AutoDeleteConfig{}
-		if v, exists := raw["sessions"]; exists {
-			cfg.Sessions = boolFrom(v)
-		}
-		autoDeleteCfg = cfg
-	}
-
-	return adminCfg, runtimeCfg, respCfg, embCfg, autoDeleteCfg, claudeMap, aliasMap, nil
-}
--- a/internal/admin/handler_settings_test.go
+++ b/internal/admin/handler_settings_test.go
@@ -1,406 +0,0 @@
-package admin
-
-import (
-	"bytes"
-	"encoding/json"
-	"net/http"
-	"net/http/httptest"
-	"testing"
-
-	authn "ds2api/internal/auth"
-)
-
-func TestGetSettingsDefaultPasswordWarning(t *testing.T) {
-	t.Setenv("DS2API_ADMIN_KEY", "")
-	h := newAdminTestHandler(t, `{"keys":["k1"]}`)
-	req := httptest.NewRequest(http.MethodGet, "/admin/settings", nil)
-	rec := httptest.NewRecorder()
-	h.getSettings(rec, req)
-	if rec.Code != http.StatusOK {
-		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
-	}
-	var body map[string]any
-	_ = json.Unmarshal(rec.Body.Bytes(), &body)
-	admin, _ := body["admin"].(map[string]any)
-	warn, _ := admin["default_password_warning"].(bool)
-	if !warn {
-		t.Fatalf("expected default password warning true, body=%v", body)
-	}
-}
-
-func TestGetSettingsIncludesTokenRefreshInterval(t *testing.T) {
-	h := newAdminTestHandler(t, `{
-		"keys":["k1"],
-		"runtime":{"token_refresh_interval_hours":9}
-	}`)
-	req := httptest.NewRequest(http.MethodGet, "/admin/settings", nil)
-	rec := httptest.NewRecorder()
-	h.getSettings(rec, req)
-	if rec.Code != http.StatusOK {
-		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
-	}
-	var body map[string]any
-	_ = json.Unmarshal(rec.Body.Bytes(), &body)
-	runtime, _ := body["runtime"].(map[string]any)
-	if got := intFrom(runtime["token_refresh_interval_hours"]); got != 9 {
-		t.Fatalf("expected token_refresh_interval_hours=9, got %d body=%v", got, body)
-	}
-}
-
-func TestUpdateSettingsValidation(t *testing.T) {
-	h := newAdminTestHandler(t, `{"keys":["k1"]}`)
-	payload := map[string]any{
-		"runtime": map[string]any{
-			"account_max_inflight": 0,
-		},
-	}
-	b, _ := json.Marshal(payload)
-	req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b))
-	rec := httptest.NewRecorder()
-	h.updateSettings(rec, req)
-	if rec.Code != http.StatusBadRequest {
-		t.Fatalf("expected 400, got %d body=%s", rec.Code, rec.Body.String())
-	}
-}
-
-func TestUpdateSettingsValidationRejectsTokenRefreshInterval(t *testing.T) {
-	h := newAdminTestHandler(t, `{"keys":["k1"]}`)
-	payload := map[string]any{
-		"runtime": map[string]any{
-			"token_refresh_interval_hours": 0,
-		},
-	}
-	b, _ := json.Marshal(payload)
-	req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b))
-	rec := httptest.NewRecorder()
-	h.updateSettings(rec, req)
-	if rec.Code != http.StatusBadRequest {
-		t.Fatalf("expected 400, got %d body=%s", rec.Code, rec.Body.String())
-	}
-	if !bytes.Contains(rec.Body.Bytes(), []byte("runtime.token_refresh_interval_hours")) {
-		t.Fatalf("expected token refresh validation detail, got %s", rec.Body.String())
-	}
-}
-
-func TestUpdateSettingsValidationWithMergedRuntimeSnapshot(t *testing.T) {
-	h := newAdminTestHandler(t, `{
-		"keys":["k1"],
-		"runtime":{
-			"account_max_inflight":8,
-			"global_max_inflight":8
-		}
-	}`)
-	payload := map[string]any{
-		"runtime": map[string]any{
-			"account_max_inflight": 16,
-		},
-	}
-	b, _ := json.Marshal(payload)
-	req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b))
-	rec := httptest.NewRecorder()
-	h.updateSettings(rec, req)
-	if rec.Code != http.StatusBadRequest {
-		t.Fatalf("expected 400, got %d body=%s", rec.Code, rec.Body.String())
-	}
-	if !bytes.Contains(rec.Body.Bytes(), []byte("runtime.global_max_inflight")) {
-		t.Fatalf("expected merged runtime validation detail, got %s", rec.Body.String())
-	}
-}
-
-func TestUpdateSettingsWithoutRuntimeSkipsMergedRuntimeValidation(t *testing.T) {
-	h := newAdminTestHandler(t, `{
-		"keys":["k1"],
-		"runtime":{
-			"account_max_inflight":8,
-			"global_max_inflight":4
-		}
-	}`)
-	payload := map[string]any{
-		"responses": map[string]any{
-			"store_ttl_seconds": 600,
-		},
-	}
-	b, _ := json.Marshal(payload)
-	req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b))
-	rec := httptest.NewRecorder()
-	h.updateSettings(rec, req)
-	if rec.Code != http.StatusOK {
-		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
-	}
-	if got := h.Store.Snapshot().Responses.StoreTTLSeconds; got != 600 {
-		t.Fatalf("store_ttl_seconds=%d want=600", got)
-	}
-}
-
-func TestUpdateSettingsHotReloadRuntime(t *testing.T) {
-	h := newAdminTestHandler(t, `{
-		"keys":["k1"],
-		"accounts":[{"email":"a@test.com","token":"t1"},{"email":"b@test.com","token":"t2"}]
-	}`)
-
-	payload := map[string]any{
-		"runtime": map[string]any{
-			"account_max_inflight": 3,
-			"account_max_queue":    20,
-			"global_max_inflight":  5,
-		},
-	}
-	b, _ := json.Marshal(payload)
-	req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b))
-	rec := httptest.NewRecorder()
-	h.updateSettings(rec, req)
-	if rec.Code != http.StatusOK {
-		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
-	}
-	status := h.Pool.Status()
-	if got := intFrom(status["max_inflight_per_account"]); got != 3 {
-		t.Fatalf("max_inflight_per_account=%d want=3", got)
-	}
-	if got := intFrom(status["max_queue_size"]); got != 20 {
-		t.Fatalf("max_queue_size=%d want=20", got)
-	}
-	if got := intFrom(status["global_max_inflight"]); got != 5 {
-		t.Fatalf("global_max_inflight=%d want=5", got)
-	}
-}
-
-func TestUpdateSettingsHotReloadTokenRefreshInterval(t *testing.T) {
-	h := newAdminTestHandler(t, `{
-		"keys":["k1"],
-		"runtime":{"token_refresh_interval_hours":6}
-	}`)
-
-	payload := map[string]any{
-		"runtime": map[string]any{
-			"token_refresh_interval_hours": 12,
-		},
-	}
-	b, _ := json.Marshal(payload)
-	req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b))
-	rec := httptest.NewRecorder()
-	h.updateSettings(rec, req)
-	if rec.Code != http.StatusOK {
-		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
-	}
-	if got := h.Store.RuntimeTokenRefreshIntervalHours(); got != 12 {
-		t.Fatalf("token_refresh_interval_hours=%d want=12", got)
-	}
-}
-
-func TestUpdateSettingsPasswordInvalidatesOldJWT(t *testing.T) {
-	hash := authn.HashAdminPassword("old-password")
-	h := newAdminTestHandler(t, `{"admin":{"password_hash":"`+hash+`"}}`)
-
-	token, err := authn.CreateJWTWithStore(1, h.Store)
-	if err != nil {
-		t.Fatalf("create jwt failed: %v", err)
-	}
-	if _, err := authn.VerifyJWTWithStore(token, h.Store); err != nil {
-		t.Fatalf("verify before update failed: %v", err)
-	}
-
-	body := map[string]any{"new_password": "new-password"}
-	b, _ := json.Marshal(body)
-	req := httptest.NewRequest(http.MethodPost, "/admin/settings/password", bytes.NewReader(b))
-	rec := httptest.NewRecorder()
-	h.updateSettingsPassword(rec, req)
-	if rec.Code != http.StatusOK {
-		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
-	}
-
-	if _, err := authn.VerifyJWTWithStore(token, h.Store); err == nil {
-		t.Fatal("expected old token to be invalid after password update")
-	}
-	if !authn.VerifyAdminCredential("new-password", h.Store) {
-		t.Fatal("expected new password credential to be accepted")
-	}
-}
-
-func TestConfigImportMergeAndReplace(t *testing.T) {
-	h := newAdminTestHandler(t, `{
-		"keys":["k1"],
-		"accounts":[{"email":"a@test.com","password":"p1"}]
-	}`)
-
-	merge := map[string]any{
-		"mode": "merge",
-		"config": map[string]any{
-			"keys": []any{"k1", "k2"},
-			"accounts": []any{
-				map[string]any{"email": "a@test.com", "password": "p1"},
-				map[string]any{"email": "b@test.com", "password": "p2"},
-			},
-		},
-	}
-	mergeBytes, _ := json.Marshal(merge)
-	mergeReq := httptest.NewRequest(http.MethodPost, "/admin/config/import?mode=merge", bytes.NewReader(mergeBytes))
-	mergeRec := httptest.NewRecorder()
-	h.configImport(mergeRec, mergeReq)
-	if mergeRec.Code != http.StatusOK {
-		t.Fatalf("merge status=%d body=%s", mergeRec.Code, mergeRec.Body.String())
-	}
-	if got := len(h.Store.Keys()); got != 2 {
-		t.Fatalf("keys after merge=%d want=2", got)
-	}
-	if got := len(h.Store.Accounts()); got != 2 {
-		t.Fatalf("accounts after merge=%d want=2", got)
-	}
-
-	replace := map[string]any{
-		"mode": "replace",
-		"config": map[string]any{
-			"keys": []any{"k9"},
-		},
-	}
-	replaceBytes, _ := json.Marshal(replace)
-	replaceReq := httptest.NewRequest(http.MethodPost, "/admin/config/import?mode=replace", bytes.NewReader(replaceBytes))
-	replaceRec := httptest.NewRecorder()
-	h.configImport(replaceRec, replaceReq)
-	if replaceRec.Code != http.StatusOK {
-		t.Fatalf("replace status=%d body=%s", replaceRec.Code, replaceRec.Body.String())
-	}
-	keys := h.Store.Keys()
-	if len(keys) != 1 || keys[0] != "k9" {
-		t.Fatalf("unexpected keys after replace: %#v", keys)
-	}
-	if got := len(h.Store.Accounts()); got != 0 {
-		t.Fatalf("accounts after replace=%d want=0", got)
-	}
-}
-
-func TestConfigImportAppliesTokenRefreshInterval(t *testing.T) {
-	h := newAdminTestHandler(t, `{"keys":["k1"]}`)
-
-	replace := map[string]any{
-		"mode": "replace",
-		"config": map[string]any{
-			"keys": []any{"k9"},
-			"runtime": map[string]any{
-				"token_refresh_interval_hours": 11,
-			},
-		},
-	}
-	replaceBytes, _ := json.Marshal(replace)
-	replaceReq := httptest.NewRequest(http.MethodPost, "/admin/config/import?mode=replace", bytes.NewReader(replaceBytes))
-	replaceRec := httptest.NewRecorder()
-	h.configImport(replaceRec, replaceReq)
-	if replaceRec.Code != http.StatusOK {
-		t.Fatalf("replace status=%d body=%s", replaceRec.Code, replaceRec.Body.String())
-	}
-	if got := h.Store.RuntimeTokenRefreshIntervalHours(); got != 11 {
-		t.Fatalf("token_refresh_interval_hours=%d want=11", got)
-	}
-}
-
-func TestConfigImportRejectsInvalidRuntimeBounds(t *testing.T) {
-	h := newAdminTestHandler(t, `{"keys":["k1"]}`)
-	payload := map[string]any{
-		"mode": "replace",
-		"config": map[string]any{
-			"keys": []any{"k2"},
-			"runtime": map[string]any{
-				"account_max_inflight": 300,
-			},
-		},
-	}
-	b, _ := json.Marshal(payload)
-	req := httptest.NewRequest(http.MethodPost, "/admin/config/import?mode=replace", bytes.NewReader(b))
-	rec := httptest.NewRecorder()
-	h.configImport(rec, req)
-	if rec.Code != http.StatusBadRequest {
-		t.Fatalf("expected 400, got %d body=%s", rec.Code, rec.Body.String())
-	}
-	if !bytes.Contains(rec.Body.Bytes(), []byte("runtime.account_max_inflight")) {
-		t.Fatalf("expected runtime bound detail, got %s", rec.Body.String())
-	}
-	keys := h.Store.Keys()
-	if len(keys) != 1 || keys[0] != "k1" {
-		t.Fatalf("store should remain unchanged, keys=%v", keys)
-	}
-}
-
-func TestConfigImportRejectsMergedRuntimeConflict(t *testing.T) {
-	h := newAdminTestHandler(t, `{
-		"keys":["k1"],
-		"runtime":{
-			"account_max_inflight":8,
-			"global_max_inflight":8
-		}
-	}`)
-	payload := map[string]any{
-		"mode": "merge",
-		"config": map[string]any{
-			"runtime": map[string]any{
-				"account_max_inflight": 16,
-			},
-		},
-	}
-	b, _ := json.Marshal(payload)
-	req := httptest.NewRequest(http.MethodPost, "/admin/config/import?mode=merge", bytes.NewReader(b))
-	rec := httptest.NewRecorder()
-	h.configImport(rec, req)
-	if rec.Code != http.StatusBadRequest {
-		t.Fatalf("expected 400, got %d body=%s", rec.Code, rec.Body.String())
-	}
-	if !bytes.Contains(rec.Body.Bytes(), []byte("runtime.global_max_inflight")) {
-		t.Fatalf("expected merged runtime validation detail, got %s", rec.Body.String())
-	}
-	snap := h.Store.Snapshot()
-	if snap.Runtime.AccountMaxInflight != 8 || snap.Runtime.GlobalMaxInflight != 8 {
-		t.Fatalf("runtime should remain unchanged, runtime=%+v", snap.Runtime)
-	}
-}
-
-func TestConfigImportMergeDedupesMobileAliases(t *testing.T) {
-	h := newAdminTestHandler(t, `{
-		"keys":["k1"],
-		"accounts":[{"mobile":"+8613800138000","password":"p1"}]
-	}`)
-
-	merge := map[string]any{
-		"mode": "merge",
-		"config": map[string]any{
-			"accounts": []any{
-				map[string]any{"mobile": "13800138000", "password": "p2"},
-			},
-		},
-	}
-	b, _ := json.Marshal(merge)
-	req := httptest.NewRequest(http.MethodPost, "/admin/config/import?mode=merge", bytes.NewReader(b))
-	rec := httptest.NewRecorder()
-	h.configImport(rec, req)
-	if rec.Code != http.StatusOK {
-		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
-	}
-	if got := len(h.Store.Accounts()); got != 1 {
-		t.Fatalf("expected merge dedupe by canonical mobile, got=%d", got)
-	}
-}
-
-func TestUpdateConfigDedupesMobileAliases(t *testing.T) {
-	h := newAdminTestHandler(t, `{
-		"keys":["k1"],
-		"accounts":[{"mobile":"+8613800138000","password":"old"}]
-	}`)
-
-	reqBody := map[string]any{
-		"accounts": []any{
-			map[string]any{"mobile": "+8613800138000"},
-			map[string]any{"mobile": "13800138000"},
-		},
-	}
-	b, _ := json.Marshal(reqBody)
-	req := httptest.NewRequest(http.MethodPost, "/admin/config", bytes.NewReader(b))
-	rec := httptest.NewRecorder()
-	h.updateConfig(rec, req)
-	if rec.Code != http.StatusOK {
-		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
-	}
-	accounts := h.Store.Accounts()
-	if len(accounts) != 1 {
-		t.Fatalf("expected update dedupe by canonical mobile, got=%d", len(accounts))
-	}
-	if accounts[0].Identifier() != "+8613800138000" {
-		t.Fatalf("unexpected identifier: %q", accounts[0].Identifier())
-	}
-}
--- a/internal/admin/helpers.go
+++ b/internal/admin/helpers.go
@@ -1,155 +0,0 @@
-package admin
-
-import (
-	"fmt"
-	"net/http"
-	"strconv"
-	"strings"
-
-	"ds2api/internal/config"
-	"ds2api/internal/util"
-)
-
-// writeJSON and intFrom are package-internal aliases for the shared util versions.
-var writeJSON = util.WriteJSON
-var intFrom = util.IntFrom
-
-func reverseAccounts(a []config.Account) {
-	for i, j := 0, len(a)-1; i < j; i, j = i+1, j-1 {
-		a[i], a[j] = a[j], a[i]
-	}
-}
-
-func intFromQuery(r *http.Request, key string, d int) int {
-	v := r.URL.Query().Get(key)
-	if v == "" {
-		return d
-	}
-	n, err := strconv.Atoi(v)
-	if err != nil {
-		return d
-	}
-	return n
-}
-
-func nilIfEmpty(s string) any {
-	if s == "" {
-		return nil
-	}
-	return s
-}
-
-func nilIfZero(v int64) any {
-	if v == 0 {
-		return nil
-	}
-	return v
-}
-
-func toStringSlice(v any) ([]string, bool) {
-	arr, ok := v.([]any)
-	if !ok {
-		return nil, false
-	}
-	out := make([]string, 0, len(arr))
-	for _, item := range arr {
-		out = append(out, strings.TrimSpace(fmt.Sprintf("%v", item)))
-	}
-	return out, true
-}
-
-func toAccount(m map[string]any) config.Account {
-	email := fieldString(m, "email")
-	mobile := config.NormalizeMobileForStorage(fieldString(m, "mobile"))
-	return config.Account{
-		Email:    email,
-		Mobile:   mobile,
-		Password: fieldString(m, "password"),
-	}
-}
-
-func fieldString(m map[string]any, key string) string {
-	v, ok := m[key]
-	if !ok || v == nil {
-		return ""
-	}
-	return strings.TrimSpace(fmt.Sprintf("%v", v))
-}
-
-func statusOr(v int, d int) int {
-	if v == 0 {
-		return d
-	}
-	return v
-}
-
-func accountMatchesIdentifier(acc config.Account, identifier string) bool {
-	id := strings.TrimSpace(identifier)
-	if id == "" {
-		return false
-	}
-	if strings.TrimSpace(acc.Email) == id {
-		return true
-	}
-	if mobileKey := config.CanonicalMobileKey(id); mobileKey != "" && mobileKey == config.CanonicalMobileKey(acc.Mobile) {
-		return true
-	}
-	return acc.Identifier() == id
-}
-
-func normalizeAccountForStorage(acc config.Account) config.Account {
-	acc.Email = strings.TrimSpace(acc.Email)
-	acc.Mobile = config.NormalizeMobileForStorage(acc.Mobile)
-	return acc
-}
-
-func accountDedupeKey(acc config.Account) string {
-	if email := strings.TrimSpace(acc.Email); email != "" {
-		return "email:" + email
-	}
-	if mobile := config.CanonicalMobileKey(acc.Mobile); mobile != "" {
-		return "mobile:" + mobile
-	}
-	if id := strings.TrimSpace(acc.Identifier()); id != "" {
-		return "id:" + id
-	}
-	return ""
-}
-
-func normalizeAndDedupeAccounts(accounts []config.Account) []config.Account {
-	if len(accounts) == 0 {
-		return nil
-	}
-	out := make([]config.Account, 0, len(accounts))
-	seen := make(map[string]struct{}, len(accounts))
-	for _, acc := range accounts {
-		acc = normalizeAccountForStorage(acc)
-		key := accountDedupeKey(acc)
-		if key == "" {
-			continue
-		}
-		if _, ok := seen[key]; ok {
-			continue
-		}
-		seen[key] = struct{}{}
-		out = append(out, acc)
-	}
-	return out
-}
-
-func findAccountByIdentifier(store ConfigStore, identifier string) (config.Account, bool) {
-	id := strings.TrimSpace(identifier)
-	if id == "" {
-		return config.Account{}, false
-	}
-	if acc, ok := store.FindAccount(id); ok {
-		return acc, true
-	}
-	accounts := store.Snapshot().Accounts
-	for _, acc := range accounts {
-		if accountMatchesIdentifier(acc, id) {
-			return acc, true
-		}
-	}
-	return config.Account{}, false
-}
--- a/internal/admin/settings_validation.go
+++ b/internal/admin/settings_validation.go
@@ -1,51 +0,0 @@
-package admin
-
-import (
-	"fmt"
-	"strings"
-
-	"ds2api/internal/config"
-)
-
-func normalizeSettingsConfig(c *config.Config) {
-	if c == nil {
-		return
-	}
-	c.Admin.PasswordHash = strings.TrimSpace(c.Admin.PasswordHash)
-	c.Embeddings.Provider = strings.TrimSpace(c.Embeddings.Provider)
-}
-
-func validateSettingsConfig(c config.Config) error {
-	if c.Admin.JWTExpireHours != 0 && (c.Admin.JWTExpireHours < 1 || c.Admin.JWTExpireHours > 720) {
-		return fmt.Errorf("admin.jwt_expire_hours must be between 1 and 720")
-	}
-	if err := validateRuntimeSettings(c.Runtime); err != nil {
-		return err
-	}
-	if c.Responses.StoreTTLSeconds != 0 && (c.Responses.StoreTTLSeconds < 30 || c.Responses.StoreTTLSeconds > 86400) {
-		return fmt.Errorf("responses.store_ttl_seconds must be between 30 and 86400")
-	}
-	if c.Embeddings.Provider != "" && strings.TrimSpace(c.Embeddings.Provider) == "" {
-		return fmt.Errorf("embeddings.provider cannot be empty")
-	}
-	return nil
-}
-
-func validateRuntimeSettings(runtime config.RuntimeConfig) error {
-	if runtime.AccountMaxInflight != 0 && (runtime.AccountMaxInflight < 1 || runtime.AccountMaxInflight > 256) {
-		return fmt.Errorf("runtime.account_max_inflight must be between 1 and 256")
-	}
-	if runtime.AccountMaxQueue != 0 && (runtime.AccountMaxQueue < 1 || runtime.AccountMaxQueue > 200000) {
-		return fmt.Errorf("runtime.account_max_queue must be between 1 and 200000")
-	}
-	if runtime.GlobalMaxInflight != 0 && (runtime.GlobalMaxInflight < 1 || runtime.GlobalMaxInflight > 200000) {
-		return fmt.Errorf("runtime.global_max_inflight must be between 1 and 200000")
-	}
-	if runtime.TokenRefreshIntervalHours != 0 && (runtime.TokenRefreshIntervalHours < 1 || runtime.TokenRefreshIntervalHours > 720) {
-		return fmt.Errorf("runtime.token_refresh_interval_hours must be between 1 and 720")
-	}
-	if runtime.AccountMaxInflight > 0 && runtime.GlobalMaxInflight > 0 && runtime.GlobalMaxInflight < runtime.AccountMaxInflight {
-		return fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight")
-	}
-	return nil
-}
--- a/internal/auth/auth_edge_test.go
+++ b/internal/auth/auth_edge_test.go
@@ -130,9 +130,7 @@ func TestMarkTokenInvalidNotConfigToken(t *testing.T) {
 	a := &RequestAuth{UseConfigToken: false, DeepSeekToken: "direct", resolver: r}
 	r.MarkTokenInvalid(a)
 	// Should not panic, token should be unchanged for non-config
-	if a.DeepSeekToken != "" {
-		// Actually it does clear it; that's fine - let's check behavior
-	}
+	_ = a.DeepSeekToken // Actual behavior may clear it; this test only asserts no panic.
 }

 func TestMarkTokenInvalidEmptyAccountID(t *testing.T) {
@@ -204,6 +202,45 @@ func TestSwitchAccountNilTriedAccounts(t *testing.T) {
 	r.Release(a)
 }

+func TestSwitchAccountSkipsLoginFailureAndContinues(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["managed-key"],
+		"accounts":[
+			{"email":"acc1@test.com","password":"pwd","token":"t1"},
+			{"email":"acc2@test.com","password":"pwd"},
+			{"email":"acc3@test.com","password":"pwd","token":"t3"}
+		]
+	}`)
+	store := config.LoadStore()
+	pool := account.NewPool(store)
+	r := NewResolver(store, pool, func(_ context.Context, acc config.Account) (string, error) {
+		if acc.Email == "acc2@test.com" {
+			return "", errors.New("login failed")
+		}
+		return "new-token", nil
+	})
+
+	req, _ := http.NewRequest("POST", "/", nil)
+	req.Header.Set("Authorization", "Bearer managed-key")
+	a, err := r.Determine(req)
+	if err != nil {
+		t.Fatalf("determine failed: %v", err)
+	}
+	defer r.Release(a)
+	if a.AccountID != "acc1@test.com" {
+		t.Fatalf("expected first account, got %q", a.AccountID)
+	}
+	if !r.SwitchAccount(context.Background(), a) {
+		t.Fatal("expected switch to succeed after skipping failed account")
+	}
+	if a.AccountID != "acc3@test.com" {
+		t.Fatalf("expected fallback to third account, got %q", a.AccountID)
+	}
+	if !a.TriedAccounts["acc2@test.com"] {
+		t.Fatalf("expected failed account to be marked as tried")
+	}
+}
+
 // ─── Release edge cases ─────────────────────────────────────────────

 func TestReleaseNilAuth(t *testing.T) {
--- a/internal/auth/request.go
+++ b/internal/auth/request.go
@@ -70,25 +70,53 @@ func (r *Resolver) Determine(req *http.Request) (*RequestAuth, error) {
 		}, nil
 	}
 	target := strings.TrimSpace(req.Header.Get("X-Ds2-Target-Account"))
-	acc, ok := r.Pool.AcquireWait(ctx, target, nil)
-	if !ok {
-		return nil, ErrNoAccount
-	}
-	a := &RequestAuth{
-		UseConfigToken: true,
-		CallerID:       callerID,
-		AccountID:      acc.Identifier(),
-		Account:        acc,
-		TriedAccounts:  map[string]bool{},
-		resolver:       r,
-	}
-	if err := r.ensureManagedToken(ctx, a); err != nil {
-		r.Pool.Release(a.AccountID)
+	a, err := r.acquireManagedRequestAuth(ctx, callerID, target)
+	if err != nil {
 		return nil, err
 	}
 	return a, nil
 }

+func (r *Resolver) acquireManagedRequestAuth(ctx context.Context, callerID, target string) (*RequestAuth, error) {
+	tried := map[string]bool{}
+	var lastEnsureErr error
+	for {
+		if target == "" && len(tried) >= len(r.Store.Accounts()) {
+			if lastEnsureErr != nil {
+				return nil, lastEnsureErr
+			}
+			return nil, ErrNoAccount
+		}
+		acc, ok := r.Pool.AcquireWait(ctx, target, tried)
+		if !ok {
+			if lastEnsureErr != nil {
+				return nil, lastEnsureErr
+			}
+			return nil, ErrNoAccount
+		}
+
+		a := &RequestAuth{
+			UseConfigToken: true,
+			CallerID:       callerID,
+			AccountID:      acc.Identifier(),
+			Account:        acc,
+			TriedAccounts:  tried,
+			resolver:       r,
+		}
+
+		if err := r.ensureManagedToken(ctx, a); err != nil {
+			lastEnsureErr = err
+			tried[a.AccountID] = true
+			r.Pool.Release(a.AccountID)
+			if target != "" {
+				return nil, err
+			}
+			continue
+		}
+		return a, nil
+	}
+}
+
 // DetermineCaller resolves caller identity without acquiring any pooled account.
 // Use this for local-cache lookup routes that only need tenant isolation.
 func (r *Resolver) DetermineCaller(req *http.Request) (*RequestAuth, error) {
@@ -164,16 +192,20 @@ func (r *Resolver) SwitchAccount(ctx context.Context, a *RequestAuth) bool {
 		a.TriedAccounts[a.AccountID] = true
 		r.Pool.Release(a.AccountID)
 	}
-	acc, ok := r.Pool.Acquire("", a.TriedAccounts)
-	if !ok {
-		return false
+	for {
+		acc, ok := r.Pool.Acquire("", a.TriedAccounts)
+		if !ok {
+			return false
+		}
+		a.Account = acc
+		a.AccountID = acc.Identifier()
+		if err := r.ensureManagedToken(ctx, a); err != nil {
+			a.TriedAccounts[a.AccountID] = true
+			r.Pool.Release(a.AccountID)
+			continue
+		}
+		return true
 	}
-	a.Account = acc
-	a.AccountID = acc.Identifier()
-	if err := r.ensureManagedToken(ctx, a); err != nil {
-		return false
-	}
-	return true
 }

 func (r *Resolver) Release(a *RequestAuth) {
--- a/internal/auth/request_test.go
+++ b/internal/auth/request_test.go
@@ -2,6 +2,7 @@ package auth

 import (
 	"context"
+	"errors"
 	"net/http"
 	"sync/atomic"
 	"testing"
@@ -301,3 +302,96 @@ func TestDetermineManagedAccountUsesUpdatedRefreshInterval(t *testing.T) {
 		t.Fatalf("expected exactly one login after runtime update, got %d", got)
 	}
 }
+
+func TestDetermineManagedAccountRetriesOtherAccountOnLoginFailure(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["managed-key"],
+		"accounts":[
+			{"email":"bad@example.com","password":"pwd"},
+			{"email":"good@example.com","password":"pwd","token":"good-token"}
+		]
+	}`)
+	store := config.LoadStore()
+	pool := account.NewPool(store)
+	resolver := NewResolver(store, pool, func(_ context.Context, acc config.Account) (string, error) {
+		if acc.Email == "bad@example.com" {
+			return "", errors.New("stale account")
+		}
+		return "fresh-good-token", nil
+	})
+
+	req, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
+	req.Header.Set("x-api-key", "managed-key")
+
+	a, err := resolver.Determine(req)
+	if err != nil {
+		t.Fatalf("determine failed: %v", err)
+	}
+	defer resolver.Release(a)
+	if a.AccountID != "good@example.com" {
+		t.Fatalf("expected fallback to good account, got %q", a.AccountID)
+	}
+	if a.DeepSeekToken == "" {
+		t.Fatal("expected non-empty token from fallback account")
+	}
+	if !a.TriedAccounts["bad@example.com"] {
+		t.Fatalf("expected bad account to be tracked as tried")
+	}
+}
+
+func TestDetermineTargetAccountDoesNotFallbackOnLoginFailure(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["managed-key"],
+		"accounts":[
+			{"email":"bad@example.com","password":"pwd"},
+			{"email":"good@example.com","password":"pwd","token":"good-token"}
+		]
+	}`)
+	store := config.LoadStore()
+	pool := account.NewPool(store)
+	resolver := NewResolver(store, pool, func(_ context.Context, acc config.Account) (string, error) {
+		if acc.Email == "bad@example.com" {
+			return "", errors.New("stale account")
+		}
+		return "fresh-good-token", nil
+	})
+
+	req, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
+	req.Header.Set("x-api-key", "managed-key")
+	req.Header.Set("X-Ds2-Target-Account", "bad@example.com")
+
+	_, err := resolver.Determine(req)
+	if err == nil {
+		t.Fatal("expected determine to fail for broken target account")
+	}
+}
+
+func TestDetermineManagedAccountReturnsLastEnsureErrorWhenAllFail(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["managed-key"],
+		"accounts":[
+			{"email":"bad1@example.com","password":"pwd"},
+			{"email":"bad2@example.com","password":"pwd"}
+		]
+	}`)
+	store := config.LoadStore()
+	pool := account.NewPool(store)
+	ensureErr := errors.New("all credentials stale")
+	resolver := NewResolver(store, pool, func(_ context.Context, _ config.Account) (string, error) {
+		return "", ensureErr
+	})
+
+	req, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
+	req.Header.Set("x-api-key", "managed-key")
+
+	_, err := resolver.Determine(req)
+	if err == nil {
+		t.Fatal("expected determine to fail")
+	}
+	if !errors.Is(err, ensureErr) {
+		t.Fatalf("expected ensure error, got %v", err)
+	}
+	if errors.Is(err, ErrNoAccount) {
+		t.Fatalf("expected auth-style ensure error, got ErrNoAccount")
+	}
+}
--- a/internal/chathistory/store.go
+++ b/internal/chathistory/store.go
@@ -0,0 +1,802 @@
+package chathistory
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/google/uuid"
+
+	"ds2api/internal/config"
+	"ds2api/internal/util"
+)
+
+const (
+	FileVersion      = 2
+	DisabledLimit    = 0
+	DefaultLimit     = 20
+	MaxLimit         = 50
+	defaultPreviewAt = 160
+)
+
+var allowedLimits = map[int]struct{}{
+	DisabledLimit: {},
+	10:            {},
+	20:            {},
+	50:            {},
+}
+
+var ErrDisabled = errors.New("chat history disabled")
+
+type Entry struct {
+	ID               string         `json:"id"`
+	Revision         int64          `json:"revision"`
+	CreatedAt        int64          `json:"created_at"`
+	UpdatedAt        int64          `json:"updated_at"`
+	CompletedAt      int64          `json:"completed_at,omitempty"`
+	Status           string         `json:"status"`
+	CallerID         string         `json:"caller_id,omitempty"`
+	AccountID        string         `json:"account_id,omitempty"`
+	Model            string         `json:"model,omitempty"`
+	Stream           bool           `json:"stream"`
+	UserInput        string         `json:"user_input,omitempty"`
+	Messages         []Message      `json:"messages,omitempty"`
+	HistoryText      string         `json:"history_text,omitempty"`
+	FinalPrompt      string         `json:"final_prompt,omitempty"`
+	ReasoningContent string         `json:"reasoning_content,omitempty"`
+	Content          string         `json:"content,omitempty"`
+	Error            string         `json:"error,omitempty"`
+	StatusCode       int            `json:"status_code,omitempty"`
+	ElapsedMs        int64          `json:"elapsed_ms,omitempty"`
+	FinishReason     string         `json:"finish_reason,omitempty"`
+	Usage            map[string]any `json:"usage,omitempty"`
+}
+
+type Message struct {
+	Role    string `json:"role"`
+	Content string `json:"content"`
+}
+
+type SummaryEntry struct {
+	ID             string `json:"id"`
+	Revision       int64  `json:"revision"`
+	CreatedAt      int64  `json:"created_at"`
+	UpdatedAt      int64  `json:"updated_at"`
+	CompletedAt    int64  `json:"completed_at,omitempty"`
+	Status         string `json:"status"`
+	CallerID       string `json:"caller_id,omitempty"`
+	AccountID      string `json:"account_id,omitempty"`
+	Model          string `json:"model,omitempty"`
+	Stream         bool   `json:"stream"`
+	UserInput      string `json:"user_input,omitempty"`
+	Preview        string `json:"preview,omitempty"`
+	StatusCode     int    `json:"status_code,omitempty"`
+	ElapsedMs      int64  `json:"elapsed_ms,omitempty"`
+	FinishReason   string `json:"finish_reason,omitempty"`
+	DetailRevision int64  `json:"detail_revision"`
+}
+
+type File struct {
+	Version  int            `json:"version"`
+	Limit    int            `json:"limit"`
+	Revision int64          `json:"revision"`
+	Items    []SummaryEntry `json:"items"`
+}
+
+type StartParams struct {
+	CallerID    string
+	AccountID   string
+	Model       string
+	Stream      bool
+	UserInput   string
+	Messages    []Message
+	HistoryText string
+	FinalPrompt string
+}
+
+type UpdateParams struct {
+	Status           string
+	ReasoningContent string
+	Content          string
+	Error            string
+	StatusCode       int
+	ElapsedMs        int64
+	FinishReason     string
+	Usage            map[string]any
+	Completed        bool
+}
+
+type detailEnvelope struct {
+	Version int   `json:"version"`
+	Item    Entry `json:"item"`
+}
+
+type legacyFile struct {
+	Version int     `json:"version"`
+	Limit   int     `json:"limit"`
+	Items   []Entry `json:"items"`
+}
+
+type legacyProbe struct {
+	Items []map[string]json.RawMessage `json:"items"`
+}
+
+type Store struct {
+	mu        sync.Mutex
+	path      string
+	detailDir string
+	state     File
+	details   map[string]Entry
+	dirty     map[string]struct{}
+	deleted   map[string]struct{}
+	err       error
+}
+
+func New(path string) *Store {
+	s := &Store{
+		path:      strings.TrimSpace(path),
+		detailDir: strings.TrimSpace(path) + ".d",
+		state: File{
+			Version:  FileVersion,
+			Limit:    DefaultLimit,
+			Revision: 0,
+			Items:    []SummaryEntry{},
+		},
+		details: map[string]Entry{},
+		dirty:   map[string]struct{}{},
+		deleted: map[string]struct{}{},
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.err = s.loadLocked()
+	return s
+}
+
+func (s *Store) Path() string {
+	if s == nil {
+		return ""
+	}
+	return s.path
+}
+
+func (s *Store) DetailDir() string {
+	if s == nil {
+		return ""
+	}
+	return s.detailDir
+}
+
+func (s *Store) Err() error {
+	if s == nil {
+		return errors.New("chat history store is nil")
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.err
+}
+
+func (s *Store) Snapshot() (File, error) {
+	if s == nil {
+		return File{}, errors.New("chat history store is nil")
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return File{}, s.err
+	}
+	return cloneFile(s.state), nil
+}
+
+func (s *Store) Revision() (int64, error) {
+	if s == nil {
+		return 0, errors.New("chat history store is nil")
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return 0, s.err
+	}
+	return s.state.Revision, nil
+}
+
+func (s *Store) Enabled() bool {
+	if s == nil {
+		return false
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return false
+	}
+	return s.state.Limit != DisabledLimit
+}
+
+func (s *Store) Get(id string) (Entry, error) {
+	if s == nil {
+		return Entry{}, errors.New("chat history store is nil")
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return Entry{}, s.err
+	}
+	item, ok := s.details[strings.TrimSpace(id)]
+	if !ok {
+		return Entry{}, errors.New("chat history entry not found")
+	}
+	return cloneEntry(item), nil
+}
+
+func (s *Store) DetailRevision(id string) (int64, error) {
+	if s == nil {
+		return 0, errors.New("chat history store is nil")
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return 0, s.err
+	}
+	item, ok := s.details[strings.TrimSpace(id)]
+	if !ok {
+		return 0, errors.New("chat history entry not found")
+	}
+	return item.Revision, nil
+}
+
+func (s *Store) Start(params StartParams) (Entry, error) {
+	if s == nil {
+		return Entry{}, errors.New("chat history store is nil")
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return Entry{}, s.err
+	}
+	if s.state.Limit == DisabledLimit {
+		return Entry{}, ErrDisabled
+	}
+	now := time.Now().UnixMilli()
+	revision := s.nextRevisionLocked()
+	entry := Entry{
+		ID:          "chat_" + strings.ReplaceAll(uuid.NewString(), "-", ""),
+		Revision:    revision,
+		CreatedAt:   now,
+		UpdatedAt:   now,
+		Status:      "streaming",
+		CallerID:    strings.TrimSpace(params.CallerID),
+		AccountID:   strings.TrimSpace(params.AccountID),
+		Model:       strings.TrimSpace(params.Model),
+		Stream:      params.Stream,
+		UserInput:   strings.TrimSpace(params.UserInput),
+		Messages:    cloneMessages(params.Messages),
+		HistoryText: params.HistoryText,
+		FinalPrompt: strings.TrimSpace(params.FinalPrompt),
+	}
+	s.details[entry.ID] = entry
+	s.markDetailDirtyLocked(entry.ID)
+	s.rebuildIndexLocked()
+	if err := s.saveLocked(); err != nil {
+		return cloneEntry(entry), err
+	}
+	return cloneEntry(entry), nil
+}
+
+func (s *Store) Update(id string, params UpdateParams) (Entry, error) {
+	if s == nil {
+		return Entry{}, errors.New("chat history store is nil")
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return Entry{}, s.err
+	}
+	target := strings.TrimSpace(id)
+	if target == "" {
+		return Entry{}, errors.New("history id is required")
+	}
+	item, ok := s.details[target]
+	if !ok {
+		return Entry{}, errors.New("chat history entry not found")
+	}
+	now := time.Now().UnixMilli()
+	item.Revision = s.nextRevisionLocked()
+	item.UpdatedAt = now
+	if params.Status != "" {
+		item.Status = params.Status
+	}
+	if params.ReasoningContent != "" || item.ReasoningContent == "" {
+		item.ReasoningContent = params.ReasoningContent
+	}
+	if params.Content != "" || item.Content == "" {
+		item.Content = params.Content
+	}
+	item.Error = strings.TrimSpace(params.Error)
+	item.StatusCode = params.StatusCode
+	item.ElapsedMs = params.ElapsedMs
+	item.FinishReason = strings.TrimSpace(params.FinishReason)
+	if params.Usage != nil {
+		item.Usage = cloneMap(params.Usage)
+	}
+	if params.Completed {
+		item.CompletedAt = now
+	}
+	s.details[target] = item
+	s.markDetailDirtyLocked(target)
+	s.rebuildIndexLocked()
+	if err := s.saveLocked(); err != nil {
+		return Entry{}, err
+	}
+	return cloneEntry(item), nil
+}
+
+func (s *Store) Delete(id string) error {
+	if s == nil {
+		return errors.New("chat history store is nil")
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return s.err
+	}
+	target := strings.TrimSpace(id)
+	if target == "" {
+		return errors.New("history id is required")
+	}
+	if _, ok := s.details[target]; !ok {
+		return errors.New("chat history entry not found")
+	}
+	s.markDetailDeletedLocked(target)
+	delete(s.details, target)
+	s.nextRevisionLocked()
+	s.rebuildIndexLocked()
+	if err := s.saveLocked(); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (s *Store) Clear() error {
+	if s == nil {
+		return errors.New("chat history store is nil")
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return s.err
+	}
+	for id := range s.details {
+		s.markDetailDeletedLocked(id)
+	}
+	s.details = map[string]Entry{}
+	s.nextRevisionLocked()
+	s.rebuildIndexLocked()
+	if err := s.saveLocked(); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (s *Store) SetLimit(limit int) (File, error) {
+	if s == nil {
+		return File{}, errors.New("chat history store is nil")
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return File{}, s.err
+	}
+	if !isAllowedLimit(limit) {
+		return File{}, fmt.Errorf("unsupported chat history limit: %d", limit)
+	}
+	s.state.Limit = limit
+	s.nextRevisionLocked()
+	s.rebuildIndexLocked()
+	if err := s.saveLocked(); err != nil {
+		return File{}, err
+	}
+	return cloneFile(s.state), nil
+}
+
+func (s *Store) loadLocked() error {
+	if strings.TrimSpace(s.path) == "" {
+		return errors.New("chat history path is required")
+	}
+	if err := os.MkdirAll(filepath.Dir(s.path), 0o755); err != nil && filepath.Dir(s.path) != "." {
+		return fmt.Errorf("create chat history dir: %w", err)
+	}
+	if err := os.MkdirAll(s.detailDir, 0o755); err != nil {
+		return fmt.Errorf("create chat history detail dir: %w", err)
+	}
+
+	raw, err := os.ReadFile(s.path)
+	if err != nil {
+		if errors.Is(err, os.ErrNotExist) {
+			if saveErr := s.saveLocked(); saveErr != nil {
+				config.Logger.Warn("[chat_history] bootstrap write failed", "path", s.path, "error", saveErr)
+			}
+			return nil
+		}
+		return fmt.Errorf("read chat history index: %w", err)
+	}
+
+	legacy, legacyOK, legacyErr := parseLegacy(raw)
+	if legacyErr != nil {
+		return legacyErr
+	}
+	if legacyOK {
+		s.loadLegacyLocked(legacy)
+		if err := s.saveLocked(); err != nil {
+			config.Logger.Warn("[chat_history] legacy migration writeback failed", "path", s.path, "error", err)
+		}
+		return nil
+	}
+
+	var state File
+	if err := json.Unmarshal(raw, &state); err != nil {
+		return fmt.Errorf("decode chat history index: %w", err)
+	}
+	if state.Version == 0 {
+		state.Version = FileVersion
+	}
+	if !isAllowedLimit(state.Limit) {
+		state.Limit = DefaultLimit
+	}
+	s.state = cloneFile(state)
+	s.details = map[string]Entry{}
+	for _, item := range state.Items {
+		detail, err := readDetailFile(filepath.Join(s.detailDir, item.ID+".json"))
+		if err != nil {
+			return err
+		}
+		s.details[item.ID] = detail
+	}
+	s.rebuildIndexLocked()
+	if saveErr := s.saveLocked(); saveErr != nil {
+		config.Logger.Warn("[chat_history] index rewrite failed", "path", s.path, "error", saveErr)
+	}
+	return nil
+}
+
+func (s *Store) loadLegacyLocked(legacy legacyFile) {
+	s.state.Version = FileVersion
+	s.state.Limit = legacy.Limit
+	if !isAllowedLimit(s.state.Limit) {
+		s.state.Limit = DefaultLimit
+	}
+	s.details = map[string]Entry{}
+	s.dirty = map[string]struct{}{}
+	s.deleted = map[string]struct{}{}
+	maxRevision := int64(0)
+	for _, item := range legacy.Items {
+		if strings.TrimSpace(item.ID) == "" {
+			continue
+		}
+		item.Messages = cloneMessages(item.Messages)
+		if item.Revision == 0 {
+			if item.UpdatedAt > 0 {
+				item.Revision = item.UpdatedAt
+			} else {
+				item.Revision = time.Now().UnixNano()
+			}
+		}
+		if item.Revision > maxRevision {
+			maxRevision = item.Revision
+		}
+		s.details[item.ID] = item
+		s.markDetailDirtyLocked(item.ID)
+	}
+	s.state.Revision = maxRevision
+	s.rebuildIndexLocked()
+}
+
+func (s *Store) saveLocked() error {
+	s.state.Version = FileVersion
+	if !isAllowedLimit(s.state.Limit) {
+		s.state.Limit = DefaultLimit
+	}
+	s.rebuildIndexLocked()
+
+	if err := os.MkdirAll(s.detailDir, 0o755); err != nil {
+		return fmt.Errorf("create chat history detail dir: %w", err)
+	}
+	for _, id := range sortedDetailIDs(s.deleted) {
+		path := filepath.Join(s.detailDir, id+".json")
+		if err := os.Remove(path); err != nil && !errors.Is(err, os.ErrNotExist) {
+			return fmt.Errorf("remove stale chat history detail: %w", err)
+		}
+	}
+	for _, id := range sortedDetailIDs(s.dirty) {
+		item, ok := s.details[id]
+		if !ok {
+			continue
+		}
+		path := filepath.Join(s.detailDir, id+".json")
+		payload, err := json.MarshalIndent(detailEnvelope{
+			Version: FileVersion,
+			Item:    item,
+		}, "", "  ")
+		if err != nil {
+			return fmt.Errorf("encode chat history detail: %w", err)
+		}
+		if err := writeFileAtomic(path, append(payload, '\n')); err != nil {
+			return err
+		}
+	}
+
+	payload, err := json.MarshalIndent(s.state, "", "  ")
+	if err != nil {
+		return fmt.Errorf("encode chat history index: %w", err)
+	}
+	if err := writeFileAtomic(s.path, append(payload, '\n')); err != nil {
+		return err
+	}
+	s.clearPendingDetailChangesLocked()
+	return nil
+}
+
+func (s *Store) rebuildIndexLocked() {
+	summaries := make([]SummaryEntry, 0, len(s.details))
+	for _, item := range s.details {
+		summaries = append(summaries, summaryFromEntry(item))
+	}
+	sort.Slice(summaries, func(i, j int) bool {
+		if summaries[i].UpdatedAt == summaries[j].UpdatedAt {
+			return summaries[i].CreatedAt > summaries[j].CreatedAt
+		}
+		return summaries[i].UpdatedAt > summaries[j].UpdatedAt
+	})
+	if s.state.Limit < DisabledLimit || !isAllowedLimit(s.state.Limit) {
+		s.state.Limit = DefaultLimit
+	}
+	if s.state.Limit == DisabledLimit {
+		s.state.Items = summaries
+		return
+	}
+	if len(summaries) > s.state.Limit {
+		keep := make(map[string]struct{}, s.state.Limit)
+		for _, item := range summaries[:s.state.Limit] {
+			keep[item.ID] = struct{}{}
+		}
+		for id := range s.details {
+			if _, ok := keep[id]; !ok {
+				s.markDetailDeletedLocked(id)
+				delete(s.details, id)
+			}
+		}
+		summaries = summaries[:s.state.Limit]
+	}
+	s.state.Items = summaries
+}
+
+func (s *Store) nextRevisionLocked() int64 {
+	next := time.Now().UnixNano()
+	if next <= s.state.Revision {
+		next = s.state.Revision + 1
+	}
+	s.state.Revision = next
+	return next
+}
+
+func summaryFromEntry(item Entry) SummaryEntry {
+	return SummaryEntry{
+		ID:             item.ID,
+		Revision:       item.Revision,
+		CreatedAt:      item.CreatedAt,
+		UpdatedAt:      item.UpdatedAt,
+		CompletedAt:    item.CompletedAt,
+		Status:         item.Status,
+		CallerID:       item.CallerID,
+		AccountID:      item.AccountID,
+		Model:          item.Model,
+		Stream:         item.Stream,
+		UserInput:      item.UserInput,
+		Preview:        buildPreview(item),
+		StatusCode:     item.StatusCode,
+		ElapsedMs:      item.ElapsedMs,
+		FinishReason:   item.FinishReason,
+		DetailRevision: item.Revision,
+	}
+}
+
+func buildPreview(item Entry) string {
+	candidate := strings.TrimSpace(item.Content)
+	if candidate == "" {
+		candidate = strings.TrimSpace(item.ReasoningContent)
+	}
+	if candidate == "" {
+		candidate = strings.TrimSpace(item.Error)
+	}
+	if candidate == "" {
+		candidate = strings.TrimSpace(item.UserInput)
+	}
+	if truncated, ok := util.TruncateRunes(candidate, defaultPreviewAt); ok {
+		return truncated + "..."
+	}
+	return candidate
+}
+
+func readDetailFile(path string) (Entry, error) {
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		return Entry{}, fmt.Errorf("read chat history detail: %w", err)
+	}
+	var env detailEnvelope
+	if err := json.Unmarshal(raw, &env); err != nil {
+		return Entry{}, fmt.Errorf("decode chat history detail: %w", err)
+	}
+	return cloneEntry(env.Item), nil
+}
+
+func parseLegacy(raw []byte) (legacyFile, bool, error) {
+	var legacy legacyFile
+	if err := json.Unmarshal(raw, &legacy); err != nil {
+		return legacyFile{}, false, nil
+	}
+	if len(legacy.Items) == 0 {
+		return legacy, false, nil
+	}
+	var probe legacyProbe
+	if err := json.Unmarshal(raw, &probe); err == nil {
+		for _, item := range probe.Items {
+			if _, ok := item["detail_revision"]; ok {
+				return legacy, false, nil
+			}
+		}
+	}
+	return legacy, true, nil
+}
+
+func writeFileAtomic(path string, body []byte) error {
+	dir := filepath.Dir(path)
+	if dir == "" {
+		dir = "."
+	}
+	if dir != "." {
+		if err := os.MkdirAll(dir, 0o755); err != nil {
+			return fmt.Errorf("create chat history dir: %w", err)
+		}
+	}
+	tmpFile, err := os.CreateTemp(dir, ".chat-history-*.tmp")
+	if err != nil {
+		return fmt.Errorf("create temp chat history: %w", err)
+	}
+	tmpPath := tmpFile.Name()
+	cleanup := func() error {
+		if err := os.Remove(tmpPath); err != nil && !errors.Is(err, os.ErrNotExist) {
+			return fmt.Errorf("remove temp chat history: %w", err)
+		}
+		return nil
+	}
+	withCleanup := func(primary error, closeErr error) error {
+		errs := []error{primary}
+		if closeErr != nil {
+			errs = append(errs, fmt.Errorf("close temp chat history: %w", closeErr))
+		}
+		if cleanupErr := cleanup(); cleanupErr != nil {
+			errs = append(errs, cleanupErr)
+		}
+		return errors.Join(errs...)
+	}
+	if _, err := tmpFile.Write(body); err != nil {
+		return withCleanup(fmt.Errorf("write temp chat history: %w", err), tmpFile.Close())
+	}
+	if err := tmpFile.Sync(); err != nil {
+		return withCleanup(fmt.Errorf("sync temp chat history: %w", err), tmpFile.Close())
+	}
+	if err := tmpFile.Close(); err != nil {
+		if cleanupErr := cleanup(); cleanupErr != nil {
+			return errors.Join(fmt.Errorf("close temp chat history: %w", err), cleanupErr)
+		}
+		return fmt.Errorf("close temp chat history: %w", err)
+	}
+	if err := os.Rename(tmpPath, path); err != nil {
+		if cleanupErr := cleanup(); cleanupErr != nil {
+			return errors.Join(fmt.Errorf("promote temp chat history: %w", err), cleanupErr)
+		}
+		return fmt.Errorf("promote temp chat history: %w", err)
+	}
+	return nil
+}
+
+func ListETag(revision int64) string {
+	return fmt.Sprintf(`W/"chat-history-list-%d"`, revision)
+}
+
+func DetailETag(id string, revision int64) string {
+	return fmt.Sprintf(`W/"chat-history-detail-%s-%d"`, strings.TrimSpace(id), revision)
+}
+
+func isAllowedLimit(limit int) bool {
+	_, ok := allowedLimits[limit]
+	return ok
+}
+
+func (s *Store) markDetailDirtyLocked(id string) {
+	id = strings.TrimSpace(id)
+	if id == "" {
+		return
+	}
+	if s.dirty == nil {
+		s.dirty = map[string]struct{}{}
+	}
+	if s.deleted == nil {
+		s.deleted = map[string]struct{}{}
+	}
+	s.dirty[id] = struct{}{}
+	delete(s.deleted, id)
+}
+
+func (s *Store) markDetailDeletedLocked(id string) {
+	id = strings.TrimSpace(id)
+	if id == "" {
+		return
+	}
+	if s.dirty == nil {
+		s.dirty = map[string]struct{}{}
+	}
+	if s.deleted == nil {
+		s.deleted = map[string]struct{}{}
+	}
+	s.deleted[id] = struct{}{}
+	delete(s.dirty, id)
+}
+
+func (s *Store) clearPendingDetailChangesLocked() {
+	s.dirty = map[string]struct{}{}
+	s.deleted = map[string]struct{}{}
+}
+
+func sortedDetailIDs(ids map[string]struct{}) []string {
+	if len(ids) == 0 {
+		return nil
+	}
+	out := make([]string, 0, len(ids))
+	for id := range ids {
+		out = append(out, id)
+	}
+	sort.Strings(out)
+	return out
+}
+
+func cloneFile(in File) File {
+	out := File{
+		Version:  in.Version,
+		Limit:    in.Limit,
+		Revision: in.Revision,
+		Items:    make([]SummaryEntry, len(in.Items)),
+	}
+	copy(out.Items, in.Items)
+	return out
+}
+
+func cloneEntry(item Entry) Entry {
+	item.Usage = cloneMap(item.Usage)
+	item.Messages = cloneMessages(item.Messages)
+	return item
+}
+
+func cloneMap(in map[string]any) map[string]any {
+	if in == nil {
+		return nil
+	}
+	out := make(map[string]any, len(in))
+	for k, v := range in {
+		out[k] = v
+	}
+	return out
+}
+
+func cloneMessages(messages []Message) []Message {
+	if len(messages) == 0 {
+		return []Message{}
+	}
+	out := make([]Message, len(messages))
+	copy(out, messages)
+	return out
+}
--- a/internal/chathistory/store_test.go
+++ b/internal/chathistory/store_test.go
@@ -0,0 +1,604 @@
+package chathistory
+
+import (
+	"bytes"
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+	"testing"
+	"unicode/utf8"
+)
+
+func blockDetailDir(t *testing.T, detailDir string) func() {
+	t.Helper()
+	blockedDir := detailDir + ".blocked"
+	if err := os.RemoveAll(blockedDir); err != nil {
+		t.Fatalf("remove blocked detail dir failed: %v", err)
+	}
+	if err := os.Rename(detailDir, blockedDir); err != nil {
+		t.Fatalf("move detail dir aside failed: %v", err)
+	}
+	if err := os.RemoveAll(detailDir); err != nil {
+		t.Fatalf("remove blocked detail path failed: %v", err)
+	}
+	if err := os.WriteFile(detailDir, []byte("blocked"), 0o644); err != nil {
+		t.Fatalf("write blocked detail path failed: %v", err)
+	}
+	var once sync.Once
+	return func() {
+		t.Helper()
+		once.Do(func() {
+			if err := os.RemoveAll(detailDir); err != nil {
+				t.Fatalf("remove blocking detail path failed: %v", err)
+			}
+			if err := os.Rename(blockedDir, detailDir); err != nil {
+				t.Fatalf("restore detail dir failed: %v", err)
+			}
+		})
+	}
+}
+
+func TestStoreCreatesAndPersistsEntries(t *testing.T) {
+	path := filepath.Join(t.TempDir(), "chat_history.json")
+	store := New(path)
+
+	started, err := store.Start(StartParams{
+		CallerID:  "caller:abc",
+		AccountID: "user@example.com",
+		Model:     "deepseek-v4-flash",
+		Stream:    true,
+		UserInput: "hello",
+	})
+	if err != nil {
+		t.Fatalf("start entry failed: %v", err)
+	}
+
+	updated, err := store.Update(started.ID, UpdateParams{
+		Status:           "success",
+		ReasoningContent: "thinking",
+		Content:          "answer",
+		StatusCode:       200,
+		ElapsedMs:        321,
+		FinishReason:     "stop",
+		Usage:            map[string]any{"total_tokens": 9},
+		Completed:        true,
+	})
+	if err != nil {
+		t.Fatalf("update entry failed: %v", err)
+	}
+	if updated.Status != "success" || updated.Content != "answer" {
+		t.Fatalf("unexpected updated entry: %#v", updated)
+	}
+
+	snapshot, err := store.Snapshot()
+	if err != nil {
+		t.Fatalf("snapshot failed: %v", err)
+	}
+	if snapshot.Limit != DefaultLimit {
+		t.Fatalf("unexpected default limit: %d", snapshot.Limit)
+	}
+	if len(snapshot.Items) != 1 {
+		t.Fatalf("expected one item, got %d", len(snapshot.Items))
+	}
+	if snapshot.Items[0].CompletedAt == 0 {
+		t.Fatalf("expected completed_at to be populated")
+	}
+	if snapshot.Items[0].Preview != "answer" {
+		t.Fatalf("expected summary preview=answer, got %#v", snapshot.Items[0])
+	}
+
+	reloaded := New(path)
+	reloadedSnapshot, err := reloaded.Snapshot()
+	if err != nil {
+		t.Fatalf("reload snapshot failed: %v", err)
+	}
+	if len(reloadedSnapshot.Items) != 1 {
+		t.Fatalf("unexpected reloaded summaries: %#v", reloadedSnapshot.Items)
+	}
+	full, err := reloaded.Get(started.ID)
+	if err != nil {
+		t.Fatalf("get detail failed: %v", err)
+	}
+	if full.Content != "answer" {
+		t.Fatalf("expected detail content=answer, got %#v", full)
+	}
+}
+
+func TestBuildPreviewPreservesUTF8MB4Characters(t *testing.T) {
+	long := strings.Repeat("😀", defaultPreviewAt+1)
+	preview := buildPreview(Entry{Content: long})
+	if !utf8.ValidString(preview) {
+		t.Fatalf("expected valid utf-8 preview, got %q", preview)
+	}
+	if preview != strings.Repeat("😀", defaultPreviewAt)+"..." {
+		t.Fatalf("unexpected preview: %q", preview)
+	}
+}
+
+func TestStoreTrimsToConfiguredLimit(t *testing.T) {
+	path := filepath.Join(t.TempDir(), "chat_history.json")
+	store := New(path)
+	if _, err := store.SetLimit(10); err != nil {
+		t.Fatalf("set limit failed: %v", err)
+	}
+
+	for i := 0; i < 12; i++ {
+		entry, err := store.Start(StartParams{Model: "deepseek-v4-flash", UserInput: "msg"})
+		if err != nil {
+			t.Fatalf("start %d failed: %v", i, err)
+		}
+		if _, err := store.Update(entry.ID, UpdateParams{Status: "success", Content: "ok", Completed: true}); err != nil {
+			t.Fatalf("update %d failed: %v", i, err)
+		}
+	}
+
+	snapshot, err := store.Snapshot()
+	if err != nil {
+		t.Fatalf("snapshot failed: %v", err)
+	}
+	if len(snapshot.Items) != 10 {
+		t.Fatalf("expected 10 items, got %d", len(snapshot.Items))
+	}
+}
+
+func TestStoreDeleteClearAndLimitValidation(t *testing.T) {
+	path := filepath.Join(t.TempDir(), "chat_history.json")
+	store := New(path)
+	entry, err := store.Start(StartParams{UserInput: "hello"})
+	if err != nil {
+		t.Fatalf("start failed: %v", err)
+	}
+	if err := store.Delete(entry.ID); err != nil {
+		t.Fatalf("delete failed: %v", err)
+	}
+	snapshot, err := store.Snapshot()
+	if err != nil {
+		t.Fatalf("snapshot failed: %v", err)
+	}
+	if len(snapshot.Items) != 0 {
+		t.Fatalf("expected empty items after delete, got %d", len(snapshot.Items))
+	}
+	if _, err := store.SetLimit(999); err == nil {
+		t.Fatalf("expected invalid limit error")
+	}
+	if err := store.Clear(); err != nil {
+		t.Fatalf("clear failed: %v", err)
+	}
+}
+
+func TestStoreDisablePreservesHistoryAndBlocksNewEntries(t *testing.T) {
+	path := filepath.Join(t.TempDir(), "chat_history.json")
+	store := New(path)
+
+	entry, err := store.Start(StartParams{UserInput: "hello"})
+	if err != nil {
+		t.Fatalf("start failed: %v", err)
+	}
+	if _, err := store.Update(entry.ID, UpdateParams{Status: "success", Content: "world", Completed: true}); err != nil {
+		t.Fatalf("update failed: %v", err)
+	}
+
+	snapshot, err := store.SetLimit(DisabledLimit)
+	if err != nil {
+		t.Fatalf("disable failed: %v", err)
+	}
+	if snapshot.Limit != DisabledLimit {
+		t.Fatalf("expected disabled limit, got %d", snapshot.Limit)
+	}
+	if len(snapshot.Items) != 1 {
+		t.Fatalf("expected disabled mode to preserve summaries, got %d", len(snapshot.Items))
+	}
+	if store.Enabled() {
+		t.Fatalf("expected store to report disabled")
+	}
+	if _, err := store.Start(StartParams{UserInput: "later"}); err != ErrDisabled {
+		t.Fatalf("expected ErrDisabled, got %v", err)
+	}
+}
+
+func TestStoreConcurrentUpdatesKeepSplitFilesValid(t *testing.T) {
+	path := filepath.Join(t.TempDir(), "chat_history.json")
+	store := New(path)
+
+	var wg sync.WaitGroup
+	for i := 0; i < 8; i++ {
+		wg.Add(1)
+		go func(idx int) {
+			defer wg.Done()
+			entry, err := store.Start(StartParams{
+				CallerID:  "caller:test",
+				Model:     "deepseek-v4-flash",
+				UserInput: "hello",
+			})
+			if err != nil {
+				t.Errorf("start failed: %v", err)
+				return
+			}
+			_, err = store.Update(entry.ID, UpdateParams{
+				Status:    "success",
+				Content:   "answer",
+				ElapsedMs: int64(idx),
+				Completed: true,
+			})
+			if err != nil {
+				t.Errorf("update failed: %v", err)
+			}
+		}(i)
+	}
+	wg.Wait()
+
+	snapshot, err := store.Snapshot()
+	if err != nil {
+		t.Fatalf("snapshot failed: %v", err)
+	}
+	if len(snapshot.Items) != 8 {
+		t.Fatalf("expected 8 items, got %d", len(snapshot.Items))
+	}
+
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read index failed: %v", err)
+	}
+	var persisted File
+	if err := json.Unmarshal(raw, &persisted); err != nil {
+		t.Fatalf("persisted index is invalid json: %v", err)
+	}
+	if len(persisted.Items) != 8 {
+		t.Fatalf("expected persisted items=8, got %d", len(persisted.Items))
+	}
+
+	detailFiles, err := os.ReadDir(path + ".d")
+	if err != nil {
+		t.Fatalf("read detail dir failed: %v", err)
+	}
+	if len(detailFiles) != 8 {
+		t.Fatalf("expected 8 detail files, got %d", len(detailFiles))
+	}
+}
+
+func TestStoreAutoMigratesLegacyMonolith(t *testing.T) {
+	path := filepath.Join(t.TempDir(), "chat_history.json")
+	legacy := legacyFile{
+		Version: 1,
+		Limit:   20,
+		Items: []Entry{{
+			ID:               "chat_legacy",
+			CreatedAt:        1,
+			UpdatedAt:        2,
+			Status:           "success",
+			UserInput:        "hello",
+			Content:          "world",
+			ReasoningContent: "thinking",
+		}},
+	}
+	body, _ := json.MarshalIndent(legacy, "", "  ")
+	if err := os.WriteFile(path, body, 0o644); err != nil {
+		t.Fatalf("write legacy file failed: %v", err)
+	}
+
+	store := New(path)
+	if err := store.Err(); err != nil {
+		t.Fatalf("expected legacy migration success, got %v", err)
+	}
+	snapshot, err := store.Snapshot()
+	if err != nil {
+		t.Fatalf("snapshot failed: %v", err)
+	}
+	if len(snapshot.Items) != 1 {
+		t.Fatalf("expected one migrated summary, got %#v", snapshot.Items)
+	}
+	full, err := store.Get("chat_legacy")
+	if err != nil {
+		t.Fatalf("get migrated detail failed: %v", err)
+	}
+	if full.Content != "world" {
+		t.Fatalf("expected migrated detail content preserved, got %#v", full)
+	}
+}
+
+func TestStoreAutoMigratesMetadataOnlyLegacyMonolith(t *testing.T) {
+	path := filepath.Join(t.TempDir(), "chat_history.json")
+	legacy := legacyFile{
+		Version: 1,
+		Limit:   20,
+		Items: []Entry{{
+			ID:           "chat_metadata_only",
+			Revision:     0,
+			CreatedAt:    1,
+			UpdatedAt:    2,
+			Status:       "error",
+			CallerID:     "caller:test",
+			AccountID:    "acct:test",
+			Model:        "deepseek-v4-flash",
+			Stream:       true,
+			UserInput:    "hello",
+			Error:        "boom",
+			StatusCode:   500,
+			ElapsedMs:    12,
+			FinishReason: "error",
+		}},
+	}
+	body, _ := json.MarshalIndent(legacy, "", "  ")
+	if err := os.WriteFile(path, body, 0o644); err != nil {
+		t.Fatalf("write legacy file failed: %v", err)
+	}
+
+	store := New(path)
+	if err := store.Err(); err != nil {
+		t.Fatalf("expected legacy metadata-only migration success, got %v", err)
+	}
+	snapshot, err := store.Snapshot()
+	if err != nil {
+		t.Fatalf("snapshot failed: %v", err)
+	}
+	if len(snapshot.Items) != 1 {
+		t.Fatalf("expected one migrated summary, got %#v", snapshot.Items)
+	}
+	full, err := store.Get("chat_metadata_only")
+	if err != nil {
+		t.Fatalf("get migrated detail failed: %v", err)
+	}
+	if full.Error != "boom" || full.UserInput != "hello" {
+		t.Fatalf("expected metadata-only legacy fields preserved, got %#v", full)
+	}
+	if _, err := os.Stat(filepath.Join(store.DetailDir(), "chat_metadata_only.json")); err != nil {
+		t.Fatalf("expected migrated detail file to exist: %v", err)
+	}
+}
+
+func TestStoreLegacyMigrationBestEffortWhenRewriteFails(t *testing.T) {
+	path := filepath.Join(t.TempDir(), "chat_history.json")
+	longID := "chat_" + strings.Repeat("x", 320)
+	legacy := legacyFile{
+		Version: 1,
+		Limit:   20,
+		Items: []Entry{{
+			ID:        longID,
+			CreatedAt: 1,
+			UpdatedAt: 2,
+			Status:    "success",
+			UserInput: "hello",
+			Content:   "world",
+		}},
+	}
+	body, err := json.MarshalIndent(legacy, "", "  ")
+	if err != nil {
+		t.Fatalf("marshal legacy file failed: %v", err)
+	}
+	if err := os.WriteFile(path, body, 0o644); err != nil {
+		t.Fatalf("write legacy file failed: %v", err)
+	}
+
+	store := New(path)
+	if err := store.Err(); err != nil {
+		t.Fatalf("expected store to stay usable after migration writeback failure, got %v", err)
+	}
+	if !store.Enabled() {
+		t.Fatal("expected store to remain enabled after best-effort migration")
+	}
+
+	snapshot, err := store.Snapshot()
+	if err != nil {
+		t.Fatalf("snapshot failed: %v", err)
+	}
+	if len(snapshot.Items) != 1 || snapshot.Items[0].ID != longID {
+		t.Fatalf("unexpected snapshot after best-effort migration: %#v", snapshot.Items)
+	}
+	full, err := store.Get(longID)
+	if err != nil {
+		t.Fatalf("get migrated detail failed: %v", err)
+	}
+	if full.Content != "world" {
+		t.Fatalf("expected migrated content to stay in memory, got %#v", full)
+	}
+	if _, statErr := os.Stat(filepath.Join(store.DetailDir(), longID+".json")); statErr == nil {
+		t.Fatal("expected detail write to fail for overlong legacy id")
+	}
+}
+
+func TestStoreTransientPersistenceFailureDoesNotLatch(t *testing.T) {
+	path := filepath.Join(t.TempDir(), "chat_history.json")
+	store := New(path)
+
+	first, err := store.Start(StartParams{UserInput: "first"})
+	if err != nil {
+		t.Fatalf("start first failed: %v", err)
+	}
+	restore := blockDetailDir(t, store.DetailDir())
+	t.Cleanup(restore)
+
+	blocked, err := store.Start(StartParams{UserInput: "blocked"})
+	if err == nil {
+		t.Fatalf("expected start failure while detail dir is blocked")
+	}
+	if blocked.ID == "" {
+		t.Fatalf("expected in-memory entry from failed start")
+	}
+	if err := store.Err(); err != nil {
+		t.Fatalf("transient start failure should not latch store error: %v", err)
+	}
+	if _, err := store.Update(first.ID, UpdateParams{Status: "success", Content: "one", Completed: true}); err == nil {
+		t.Fatalf("expected update failure while detail dir is blocked")
+	}
+	if err := store.Err(); err != nil {
+		t.Fatalf("transient update failure should not latch store error: %v", err)
+	}
+
+	restore()
+
+	if _, err := store.Update(blocked.ID, UpdateParams{Status: "success", Content: "two", Completed: true}); err != nil {
+		t.Fatalf("update after restore failed: %v", err)
+	}
+	if _, err := store.Start(StartParams{UserInput: "later"}); err != nil {
+		t.Fatalf("start after restore failed: %v", err)
+	}
+	full, err := store.Get(blocked.ID)
+	if err != nil {
+		t.Fatalf("get restored entry failed: %v", err)
+	}
+	if full.Content != "two" || full.Status != "success" {
+		t.Fatalf("expected restored entry persisted, got %#v", full)
+	}
+}
+
+func TestStoreWritesOnlyChangedDetailFiles(t *testing.T) {
+	path := filepath.Join(t.TempDir(), "chat_history.json")
+	store := New(path)
+
+	first, err := store.Start(StartParams{UserInput: "one"})
+	if err != nil {
+		t.Fatalf("start first failed: %v", err)
+	}
+	if _, err := store.Update(first.ID, UpdateParams{Status: "success", Content: "first", Completed: true}); err != nil {
+		t.Fatalf("update first failed: %v", err)
+	}
+	second, err := store.Start(StartParams{UserInput: "two"})
+	if err != nil {
+		t.Fatalf("start second failed: %v", err)
+	}
+	if _, err := store.Update(second.ID, UpdateParams{Status: "success", Content: "second", Completed: true}); err != nil {
+		t.Fatalf("update second failed: %v", err)
+	}
+
+	firstPath := filepath.Join(store.DetailDir(), first.ID+".json")
+	secondPath := filepath.Join(store.DetailDir(), second.ID+".json")
+	beforeFirst, err := os.ReadFile(firstPath)
+	if err != nil {
+		t.Fatalf("read first detail before update failed: %v", err)
+	}
+	beforeSecond, err := os.ReadFile(secondPath)
+	if err != nil {
+		t.Fatalf("read second detail before update failed: %v", err)
+	}
+
+	if _, err := store.Update(first.ID, UpdateParams{Status: "success", Content: "first-updated", Completed: true}); err != nil {
+		t.Fatalf("update first again failed: %v", err)
+	}
+
+	afterFirst, err := os.ReadFile(firstPath)
+	if err != nil {
+		t.Fatalf("read first detail after update failed: %v", err)
+	}
+	afterSecond, err := os.ReadFile(secondPath)
+	if err != nil {
+		t.Fatalf("read second detail after update failed: %v", err)
+	}
+
+	if bytes.Equal(beforeFirst, afterFirst) {
+		t.Fatalf("expected first detail file to change after update")
+	}
+	if !bytes.Equal(beforeSecond, afterSecond) {
+		t.Fatalf("expected untouched detail file to remain byte-identical")
+	}
+}
+
+func TestUpdatePreservesContentWhenNewContentIsEmpty(t *testing.T) {
+	path := filepath.Join(t.TempDir(), "chat_history.json")
+	store := New(path)
+
+	started, err := store.Start(StartParams{
+		CallerID:  "caller:abc",
+		Model:     "deepseek-v4-flash",
+		Stream:    true,
+		UserInput: "hello",
+	})
+	if err != nil {
+		t.Fatalf("start entry failed: %v", err)
+	}
+
+	if _, err := store.Update(started.ID, UpdateParams{
+		Status:           "streaming",
+		ReasoningContent: "let me think",
+		Content:          "I'll help you with that.",
+	}); err != nil {
+		t.Fatalf("progress update failed: %v", err)
+	}
+
+	updated, err := store.Update(started.ID, UpdateParams{
+		Status:    "success",
+		Content:   "",
+		Completed: true,
+	})
+	if err != nil {
+		t.Fatalf("success update failed: %v", err)
+	}
+
+	if updated.Content != "I'll help you with that." {
+		t.Fatalf("expected content to be preserved, got %q", updated.Content)
+	}
+	if updated.ReasoningContent != "let me think" {
+		t.Fatalf("expected reasoning content to be preserved, got %q", updated.ReasoningContent)
+	}
+
+	full, err := store.Get(started.ID)
+	if err != nil {
+		t.Fatalf("get entry failed: %v", err)
+	}
+	if full.Content != "I'll help you with that." {
+		t.Fatalf("expected persisted content to be preserved, got %q", full.Content)
+	}
+	if full.ReasoningContent != "let me think" {
+		t.Fatalf("expected persisted reasoning content to be preserved, got %q", full.ReasoningContent)
+	}
+}
+
+func TestUpdateAllowsSettingContentFromEmpty(t *testing.T) {
+	path := filepath.Join(t.TempDir(), "chat_history.json")
+	store := New(path)
+
+	started, err := store.Start(StartParams{
+		CallerID:  "caller:abc",
+		Model:     "deepseek-v4-flash",
+		Stream:    true,
+		UserInput: "hello",
+	})
+	if err != nil {
+		t.Fatalf("start entry failed: %v", err)
+	}
+
+	updated, err := store.Update(started.ID, UpdateParams{
+		Status:  "success",
+		Content: "final answer",
+	})
+	if err != nil {
+		t.Fatalf("update failed: %v", err)
+	}
+	if updated.Content != "final answer" {
+		t.Fatalf("expected content to be set, got %q", updated.Content)
+	}
+}
+
+func TestUpdateAllowsOverwritingContentWithNewValue(t *testing.T) {
+	path := filepath.Join(t.TempDir(), "chat_history.json")
+	store := New(path)
+
+	started, err := store.Start(StartParams{
+		CallerID:  "caller:abc",
+		Model:     "deepseek-v4-flash",
+		Stream:    true,
+		UserInput: "hello",
+	})
+	if err != nil {
+		t.Fatalf("start entry failed: %v", err)
+	}
+
+	if _, err := store.Update(started.ID, UpdateParams{
+		Status:  "streaming",
+		Content: "partial",
+	}); err != nil {
+		t.Fatalf("first update failed: %v", err)
+	}
+
+	updated, err := store.Update(started.ID, UpdateParams{
+		Status:  "success",
+		Content: "final answer",
+	})
+	if err != nil {
+		t.Fatalf("second update failed: %v", err)
+	}
+	if updated.Content != "final answer" {
+		t.Fatalf("expected content to be overwritten, got %q", updated.Content)
+	}
+}
--- a/internal/claudeconv/convert.go
+++ b/internal/claudeconv/convert.go
@@ -1,32 +1,21 @@
 package claudeconv

-import "strings"
+import (
+	"strings"

-type ClaudeMappingProvider interface {
-	ClaudeMapping() map[string]string
-}
+	"ds2api/internal/config"
+)

-func ConvertClaudeToDeepSeek(claudeReq map[string]any, mappingProvider ClaudeMappingProvider, defaultClaudeModel string) map[string]any {
+func ConvertClaudeToDeepSeek(claudeReq map[string]any, aliasProvider config.ModelAliasReader, defaultClaudeModel string) map[string]any {
 	messages, _ := claudeReq["messages"].([]any)
 	model, _ := claudeReq["model"].(string)
 	if model == "" {
 		model = defaultClaudeModel
 	}

-	mapping := map[string]string{}
-	if mappingProvider != nil {
-		mapping = mappingProvider.ClaudeMapping()
-	}
-	dsModel := mapping["fast"]
-	if dsModel == "" {
-		dsModel = "deepseek-chat"
-	}
-
-	modelLower := strings.ToLower(model)
-	if strings.Contains(modelLower, "opus") || strings.Contains(modelLower, "reasoner") || strings.Contains(modelLower, "slow") {
-		if slow := mapping["slow"]; slow != "" {
-			dsModel = slow
-		}
+	dsModel, ok := config.ResolveModel(aliasProvider, model)
+	if !ok || strings.TrimSpace(dsModel) == "" {
+		dsModel = "deepseek-v4-flash"
 	}

 	convertedMessages := make([]any, 0, len(messages)+1)
--- a/internal/compat/go_compat_test.go
+++ b/internal/compat/go_compat_test.go
@@ -5,7 +5,6 @@ import (
 	"os"
 	"path/filepath"
 	"reflect"
-	"strings"
 	"testing"

 	"ds2api/internal/sse"
@@ -32,72 +31,34 @@ func TestGoCompatSSEFixtures(t *testing.T) {
 		mustLoadJSON(t, fixturePath, &fixture)

 		var expected struct {
-			Parts    []map[string]any `json:"parts"`
-			Finished bool             `json:"finished"`
-			NewType  string           `json:"new_type"`
+			Parts         []map[string]any `json:"parts"`
+			Finished      bool             `json:"finished"`
+			NewType       string           `json:"new_type"`
+			ContentFilter bool             `json:"content_filter"`
+			ErrorMessage  string           `json:"error_message"`
 		}
 		mustLoadJSON(t, expectedPath, &expected)

-		parts, finished, newType := sse.ParseSSEChunkForContent(fixture.Chunk, fixture.ThinkingEnable, fixture.CurrentType)
-		gotParts := make([]map[string]any, 0, len(parts))
-		for _, p := range parts {
+		raw, err := json.Marshal(fixture.Chunk)
+		if err != nil {
+			t.Fatalf("marshal fixture %s failed: %v", name, err)
+		}
+		res := sse.ParseDeepSeekContentLine(append([]byte("data: "), raw...), fixture.ThinkingEnable, fixture.CurrentType)
+		gotParts := make([]map[string]any, 0, len(res.Parts))
+		for _, p := range res.Parts {
 			gotParts = append(gotParts, map[string]any{
 				"text": p.Text,
 				"type": p.Type,
 			})
 		}
-		if !reflect.DeepEqual(gotParts, expected.Parts) || finished != expected.Finished || newType != expected.NewType {
-			t.Fatalf("fixture %s mismatch:\n got parts=%#v finished=%v newType=%q\nwant parts=%#v finished=%v newType=%q",
-				name, gotParts, finished, newType, expected.Parts, expected.Finished, expected.NewType)
-		}
-	}
-}
-
-func TestGoCompatToolcallFixtures(t *testing.T) {
-	files, err := filepath.Glob(compatPath("fixtures", "toolcalls", "*.json"))
-	if err != nil {
-		t.Fatalf("glob toolcall fixtures failed: %v", err)
-	}
-	if len(files) == 0 {
-		t.Fatal("no toolcall fixtures found")
-	}
-	for _, fixturePath := range files {
-		name := trimExt(filepath.Base(fixturePath))
-		expectedPath := compatPath("expected", "toolcalls_"+name+".json")
-
-		var fixture struct {
-			Text      string   `json:"text"`
-			ToolNames []string `json:"tool_names"`
-			Mode      string   `json:"mode"`
-		}
-		mustLoadJSON(t, fixturePath, &fixture)
-
-		var expected struct {
-			Calls             []util.ParsedToolCall `json:"calls"`
-			SawToolCallSyntax bool                  `json:"sawToolCallSyntax"`
-			RejectedByPolicy  bool                  `json:"rejectedByPolicy"`
-			RejectedToolNames []string              `json:"rejectedToolNames"`
-		}
-		mustLoadJSON(t, expectedPath, &expected)
-
-		var got util.ToolCallParseResult
-		switch strings.ToLower(strings.TrimSpace(fixture.Mode)) {
-		case "standalone":
-			got = util.ParseStandaloneToolCallsDetailed(fixture.Text, fixture.ToolNames)
-		default:
-			got = util.ParseToolCallsDetailed(fixture.Text, fixture.ToolNames)
-		}
-		if got.Calls == nil {
-			got.Calls = []util.ParsedToolCall{}
-		}
-		if got.RejectedToolNames == nil {
-			got.RejectedToolNames = []string{}
-		}
-		if !reflect.DeepEqual(got.Calls, expected.Calls) ||
-			got.SawToolCallSyntax != expected.SawToolCallSyntax ||
-			got.RejectedByPolicy != expected.RejectedByPolicy ||
-			!reflect.DeepEqual(got.RejectedToolNames, expected.RejectedToolNames) {
-			t.Fatalf("toolcall fixture %s mismatch:\n got=%#v\nwant=%#v", name, got, expected)
+		if !reflect.DeepEqual(gotParts, expected.Parts) ||
+			res.Stop != expected.Finished ||
+			res.NextType != expected.NewType ||
+			res.ContentFilter != expected.ContentFilter ||
+			res.ErrorMessage != expected.ErrorMessage {
+			t.Fatalf("fixture %s mismatch:\n got parts=%#v finished=%v newType=%q contentFilter=%v errorMessage=%q\nwant parts=%#v finished=%v newType=%q contentFilter=%v errorMessage=%q",
+				name, gotParts, res.Stop, res.NextType, res.ContentFilter, res.ErrorMessage,
+				expected.Parts, expected.Finished, expected.NewType, expected.ContentFilter, expected.ErrorMessage)
 		}
 	}
 }
--- a/internal/config/codec.go
+++ b/internal/config/codec.go
@@ -17,14 +17,14 @@ func (c Config) MarshalJSON() ([]byte, error) {
 	if len(c.Keys) > 0 {
 		m["keys"] = c.Keys
 	}
+	if len(c.APIKeys) > 0 {
+		m["api_keys"] = c.APIKeys
+	}
 	if len(c.Accounts) > 0 {
 		m["accounts"] = c.Accounts
 	}
-	if len(c.ClaudeMapping) > 0 {
-		m["claude_mapping"] = c.ClaudeMapping
-	}
-	if len(c.ClaudeModelMap) > 0 {
-		m["claude_model_mapping"] = c.ClaudeModelMap
+	if len(c.Proxies) > 0 {
+		m["proxies"] = c.Proxies
 	}
 	if len(c.ModelAliases) > 0 {
 		m["model_aliases"] = c.ModelAliases
@@ -35,7 +35,7 @@ func (c Config) MarshalJSON() ([]byte, error) {
 	if c.Runtime.AccountMaxInflight > 0 || c.Runtime.AccountMaxQueue > 0 || c.Runtime.GlobalMaxInflight > 0 || c.Runtime.TokenRefreshIntervalHours > 0 {
 		m["runtime"] = c.Runtime
 	}
-	if c.Compat.WideInputStrictOutput != nil {
+	if c.Compat.WideInputStrictOutput != nil || c.Compat.StripReferenceMarkers != nil {
 		m["compat"] = c.Compat
 	}
 	if c.Responses.StoreTTLSeconds > 0 {
@@ -45,6 +45,15 @@ func (c Config) MarshalJSON() ([]byte, error) {
 		m["embeddings"] = c.Embeddings
 	}
 	m["auto_delete"] = c.AutoDelete
+	if c.HistorySplit.Enabled != nil || c.HistorySplit.TriggerAfterTurns != nil {
+		m["history_split"] = c.HistorySplit
+	}
+	if c.CurrentInputFile.Enabled != nil || c.CurrentInputFile.MinChars != 0 {
+		m["current_input_file"] = c.CurrentInputFile
+	}
+	if c.ThinkingInjection.Enabled != nil || strings.TrimSpace(c.ThinkingInjection.Prompt) != "" {
+		m["thinking_injection"] = c.ThinkingInjection
+	}
 	if c.VercelSyncHash != "" {
 		m["_vercel_sync_hash"] = c.VercelSyncHash
 	}
@@ -66,18 +75,21 @@ func (c *Config) UnmarshalJSON(b []byte) error {
 			if err := json.Unmarshal(v, &c.Keys); err != nil {
 				return fmt.Errorf("invalid field %q: %w", k, err)
 			}
+		case "api_keys":
+			if err := json.Unmarshal(v, &c.APIKeys); err != nil {
+				return fmt.Errorf("invalid field %q: %w", k, err)
+			}
 		case "accounts":
 			if err := json.Unmarshal(v, &c.Accounts); err != nil {
 				return fmt.Errorf("invalid field %q: %w", k, err)
 			}
+		case "proxies":
+			if err := json.Unmarshal(v, &c.Proxies); err != nil {
+				return fmt.Errorf("invalid field %q: %w", k, err)
+			}
 		case "claude_mapping":
-			if err := json.Unmarshal(v, &c.ClaudeMapping); err != nil {
-				return fmt.Errorf("invalid field %q: %w", k, err)
-			}
 		case "claude_model_mapping":
-			if err := json.Unmarshal(v, &c.ClaudeModelMap); err != nil {
-				return fmt.Errorf("invalid field %q: %w", k, err)
-			}
+			// Removed legacy mapping fields are ignored instead of persisted.
 		case "model_aliases":
 			if err := json.Unmarshal(v, &c.ModelAliases); err != nil {
 				return fmt.Errorf("invalid field %q: %w", k, err)
@@ -108,6 +120,18 @@ func (c *Config) UnmarshalJSON(b []byte) error {
 			if err := json.Unmarshal(v, &c.AutoDelete); err != nil {
 				return fmt.Errorf("invalid field %q: %w", k, err)
 			}
+		case "history_split":
+			if err := json.Unmarshal(v, &c.HistorySplit); err != nil {
+				return fmt.Errorf("invalid field %q: %w", k, err)
+			}
+		case "current_input_file":
+			if err := json.Unmarshal(v, &c.CurrentInputFile); err != nil {
+				return fmt.Errorf("invalid field %q: %w", k, err)
+			}
+		case "thinking_injection":
+			if err := json.Unmarshal(v, &c.ThinkingInjection); err != nil {
+				return fmt.Errorf("invalid field %q: %w", k, err)
+			}
 		case "_vercel_sync_hash":
 			if err := json.Unmarshal(v, &c.VercelSyncHash); err != nil {
 				return fmt.Errorf("invalid field %q: %w", k, err)
@@ -123,24 +147,38 @@ func (c *Config) UnmarshalJSON(b []byte) error {
 			}
 		}
 	}
+	c.NormalizeCredentials()
 	return nil
 }

 func (c Config) Clone() Config {
 	clone := Config{
-		Keys:           slices.Clone(c.Keys),
-		Accounts:       slices.Clone(c.Accounts),
-		ClaudeMapping:  cloneStringMap(c.ClaudeMapping),
-		ClaudeModelMap: cloneStringMap(c.ClaudeModelMap),
-		ModelAliases:   cloneStringMap(c.ModelAliases),
-		Admin:          c.Admin,
-		Runtime:        c.Runtime,
+		Keys:         slices.Clone(c.Keys),
+		APIKeys:      slices.Clone(c.APIKeys),
+		Accounts:     slices.Clone(c.Accounts),
+		Proxies:      slices.Clone(c.Proxies),
+		ModelAliases: cloneStringMap(c.ModelAliases),
+		Admin:        c.Admin,
+		Runtime:      c.Runtime,
 		Compat: CompatConfig{
 			WideInputStrictOutput: cloneBoolPtr(c.Compat.WideInputStrictOutput),
+			StripReferenceMarkers: cloneBoolPtr(c.Compat.StripReferenceMarkers),
+		},
+		Responses:  c.Responses,
+		Embeddings: c.Embeddings,
+		AutoDelete: c.AutoDelete,
+		HistorySplit: HistorySplitConfig{
+			Enabled:           cloneBoolPtr(c.HistorySplit.Enabled),
+			TriggerAfterTurns: cloneIntPtr(c.HistorySplit.TriggerAfterTurns),
+		},
+		CurrentInputFile: CurrentInputFileConfig{
+			Enabled:  cloneBoolPtr(c.CurrentInputFile.Enabled),
+			MinChars: c.CurrentInputFile.MinChars,
+		},
+		ThinkingInjection: ThinkingInjectionConfig{
+			Enabled: cloneBoolPtr(c.ThinkingInjection.Enabled),
+			Prompt:  c.ThinkingInjection.Prompt,
 		},
-		Responses:        c.Responses,
-		Embeddings:       c.Embeddings,
-		AutoDelete:       c.AutoDelete,
 		VercelSyncHash:   c.VercelSyncHash,
 		VercelSyncTime:   c.VercelSyncTime,
 		AdditionalFields: map[string]any{},
@@ -170,6 +208,14 @@ func cloneBoolPtr(in *bool) *bool {
 	return &v
 }

+func cloneIntPtr(in *int) *int {
+	if in == nil {
+		return nil
+	}
+	v := *in
+	return &v
+}
+
 func parseConfigString(raw string) (Config, error) {
 	var cfg Config
 	candidates := []string{raw}
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -1,27 +1,77 @@
 package config

+import (
+	"crypto/sha1"
+	"encoding/hex"
+	"fmt"
+	"strings"
+)
+
 type Config struct {
-	Keys             []string          `json:"keys,omitempty"`
-	Accounts         []Account         `json:"accounts,omitempty"`
-	ClaudeMapping    map[string]string `json:"claude_mapping,omitempty"`
-	ClaudeModelMap   map[string]string `json:"claude_model_mapping,omitempty"`
-	ModelAliases     map[string]string `json:"model_aliases,omitempty"`
-	Admin            AdminConfig       `json:"admin,omitempty"`
-	Runtime          RuntimeConfig     `json:"runtime,omitempty"`
-	Compat           CompatConfig      `json:"compat,omitempty"`
-	Responses        ResponsesConfig   `json:"responses,omitempty"`
-	Embeddings       EmbeddingsConfig  `json:"embeddings,omitempty"`
-	AutoDelete       AutoDeleteConfig  `json:"auto_delete"`
-	VercelSyncHash   string            `json:"_vercel_sync_hash,omitempty"`
-	VercelSyncTime   int64             `json:"_vercel_sync_time,omitempty"`
-	AdditionalFields map[string]any    `json:"-"`
+	Keys              []string                `json:"keys,omitempty"`
+	APIKeys           []APIKey                `json:"api_keys,omitempty"`
+	Accounts          []Account               `json:"accounts,omitempty"`
+	Proxies           []Proxy                 `json:"proxies,omitempty"`
+	ModelAliases      map[string]string       `json:"model_aliases,omitempty"`
+	Admin             AdminConfig             `json:"admin,omitempty"`
+	Runtime           RuntimeConfig           `json:"runtime,omitempty"`
+	Compat            CompatConfig            `json:"compat,omitempty"`
+	Responses         ResponsesConfig         `json:"responses,omitempty"`
+	Embeddings        EmbeddingsConfig        `json:"embeddings,omitempty"`
+	AutoDelete        AutoDeleteConfig        `json:"auto_delete"`
+	HistorySplit      HistorySplitConfig      `json:"history_split"`
+	CurrentInputFile  CurrentInputFileConfig  `json:"current_input_file,omitempty"`
+	ThinkingInjection ThinkingInjectionConfig `json:"thinking_injection,omitempty"`
+	VercelSyncHash    string                  `json:"_vercel_sync_hash,omitempty"`
+	VercelSyncTime    int64                   `json:"_vercel_sync_time,omitempty"`
+	AdditionalFields  map[string]any          `json:"-"`
 }

 type Account struct {
+	Name     string `json:"name,omitempty"`
+	Remark   string `json:"remark,omitempty"`
 	Email    string `json:"email,omitempty"`
 	Mobile   string `json:"mobile,omitempty"`
 	Password string `json:"password,omitempty"`
 	Token    string `json:"token,omitempty"`
+	ProxyID  string `json:"proxy_id,omitempty"`
+}
+
+type APIKey struct {
+	Key    string `json:"key"`
+	Name   string `json:"name,omitempty"`
+	Remark string `json:"remark,omitempty"`
+}
+
+type Proxy struct {
+	ID       string `json:"id,omitempty"`
+	Name     string `json:"name,omitempty"`
+	Type     string `json:"type,omitempty"`
+	Host     string `json:"host,omitempty"`
+	Port     int    `json:"port,omitempty"`
+	Username string `json:"username,omitempty"`
+	Password string `json:"password,omitempty"`
+}
+
+func NormalizeProxy(p Proxy) Proxy {
+	p.ID = strings.TrimSpace(p.ID)
+	p.Name = strings.TrimSpace(p.Name)
+	p.Type = strings.ToLower(strings.TrimSpace(p.Type))
+	p.Host = strings.TrimSpace(p.Host)
+	p.Username = strings.TrimSpace(p.Username)
+	p.Password = strings.TrimSpace(p.Password)
+	if p.ID == "" {
+		p.ID = StableProxyID(p)
+	}
+	if p.Name == "" && p.Host != "" && p.Port > 0 {
+		p.Name = fmt.Sprintf("%s:%d", p.Host, p.Port)
+	}
+	return p
+}
+
+func StableProxyID(p Proxy) string {
+	sum := sha1.Sum([]byte(strings.ToLower(strings.TrimSpace(p.Type)) + "|" + strings.ToLower(strings.TrimSpace(p.Host)) + "|" + fmt.Sprintf("%d", p.Port) + "|" + strings.TrimSpace(p.Username)))
+	return "proxy_" + hex.EncodeToString(sum[:6])
 }

 func (c *Config) ClearAccountTokens() {
@@ -33,6 +83,27 @@ func (c *Config) ClearAccountTokens() {
 	}
 }

+func (c *Config) NormalizeCredentials() {
+	if c == nil {
+		return
+	}
+	normalizedAPIKeys := normalizeAPIKeys(c.APIKeys)
+	if len(normalizedAPIKeys) > 0 {
+		c.APIKeys = normalizedAPIKeys
+		c.Keys = apiKeysToStrings(c.APIKeys)
+	} else {
+		c.Keys = normalizeKeys(c.Keys)
+		c.APIKeys = apiKeysFromStrings(c.Keys, nil)
+	}
+
+	for i := range c.Accounts {
+		c.Accounts[i].Name = strings.TrimSpace(c.Accounts[i].Name)
+		c.Accounts[i].Remark = strings.TrimSpace(c.Accounts[i].Remark)
+	}
+
+	c.normalizeModelAliases()
+}
+
 // DropInvalidAccounts removes accounts that cannot be addressed by admin APIs
 // (no email and no normalizable mobile). This prevents legacy token-only
 // records from becoming orphaned empty entries after token stripping.
@@ -50,8 +121,30 @@ func (c *Config) DropInvalidAccounts() {
 	c.Accounts = kept
 }

+func (c *Config) normalizeModelAliases() {
+	if c == nil {
+		return
+	}
+
+	aliases := map[string]string{}
+	for k, v := range c.ModelAliases {
+		key := strings.TrimSpace(lower(k))
+		val := strings.TrimSpace(lower(v))
+		if key == "" || val == "" {
+			continue
+		}
+		aliases[key] = val
+	}
+	if len(aliases) == 0 {
+		c.ModelAliases = nil
+	} else {
+		c.ModelAliases = aliases
+	}
+}
+
 type CompatConfig struct {
 	WideInputStrictOutput *bool `json:"wide_input_strict_output,omitempty"`
+	StripReferenceMarkers *bool `json:"strip_reference_markers,omitempty"`
 }

 type AdminConfig struct {
@@ -76,5 +169,21 @@ type EmbeddingsConfig struct {
 }

 type AutoDeleteConfig struct {
-	Sessions bool `json:"sessions"`
+	Mode     string `json:"mode,omitempty"`
+	Sessions bool   `json:"sessions,omitempty"`
+}
+
+type HistorySplitConfig struct {
+	Enabled           *bool `json:"enabled,omitempty"`
+	TriggerAfterTurns *int  `json:"trigger_after_turns,omitempty"`
+}
+
+type CurrentInputFileConfig struct {
+	Enabled  *bool `json:"enabled,omitempty"`
+	MinChars int   `json:"min_chars,omitempty"`
+}
+
+type ThinkingInjectionConfig struct {
+	Enabled *bool  `json:"enabled,omitempty"`
+	Prompt  string `json:"prompt,omitempty"`
 }
--- a/internal/config/config_edge_test.go
+++ b/internal/config/config_edge_test.go
@@ -10,19 +10,29 @@ import (
 // ─── GetModelConfig edge cases ───────────────────────────────────────

 func TestGetModelConfigDeepSeekChat(t *testing.T) {
-	thinking, search, ok := GetModelConfig("deepseek-chat")
+	thinking, search, ok := GetModelConfig("deepseek-v4-flash")
 	if !ok {
-		t.Fatal("expected ok for deepseek-chat")
+		t.Fatal("expected ok for deepseek-v4-flash")
+	}
+	if !thinking || search {
+		t.Fatalf("expected thinking=true search=false for deepseek-v4-flash, got thinking=%v search=%v", thinking, search)
+	}
+}
+
+func TestGetModelConfigDeepSeekChatNoThinking(t *testing.T) {
+	thinking, search, ok := GetModelConfig("deepseek-v4-flash-nothinking")
+	if !ok {
+		t.Fatal("expected ok for deepseek-v4-flash-nothinking")
 	}
 	if thinking || search {
-		t.Fatalf("expected no thinking/search for deepseek-chat, got thinking=%v search=%v", thinking, search)
+		t.Fatalf("expected thinking=false search=false for deepseek-v4-flash-nothinking, got thinking=%v search=%v", thinking, search)
 	}
 }

 func TestGetModelConfigDeepSeekReasoner(t *testing.T) {
-	thinking, search, ok := GetModelConfig("deepseek-reasoner")
+	thinking, search, ok := GetModelConfig("deepseek-v4-pro")
 	if !ok {
-		t.Fatal("expected ok for deepseek-reasoner")
+		t.Fatal("expected ok for deepseek-v4-pro")
 	}
 	if !thinking || search {
 		t.Fatalf("expected thinking=true search=false, got thinking=%v search=%v", thinking, search)
@@ -30,32 +40,88 @@ func TestGetModelConfigDeepSeekReasoner(t *testing.T) {
 }

 func TestGetModelConfigDeepSeekChatSearch(t *testing.T) {
-	thinking, search, ok := GetModelConfig("deepseek-chat-search")
+	thinking, search, ok := GetModelConfig("deepseek-v4-flash-search")
 	if !ok {
-		t.Fatal("expected ok for deepseek-chat-search")
+		t.Fatal("expected ok for deepseek-v4-flash-search")
 	}
-	if thinking || !search {
-		t.Fatalf("expected thinking=false search=true, got thinking=%v search=%v", thinking, search)
+	if !thinking || !search {
+		t.Fatalf("expected thinking=true search=true, got thinking=%v search=%v", thinking, search)
 	}
 }

 func TestGetModelConfigDeepSeekReasonerSearch(t *testing.T) {
-	thinking, search, ok := GetModelConfig("deepseek-reasoner-search")
+	thinking, search, ok := GetModelConfig("deepseek-v4-pro-search")
 	if !ok {
-		t.Fatal("expected ok for deepseek-reasoner-search")
+		t.Fatal("expected ok for deepseek-v4-pro-search")
 	}
 	if !thinking || !search {
 		t.Fatalf("expected both true, got thinking=%v search=%v", thinking, search)
 	}
 }

-func TestGetModelConfigCaseInsensitive(t *testing.T) {
-	thinking, search, ok := GetModelConfig("DeepSeek-Chat")
+func TestGetModelConfigDeepSeekExpertChat(t *testing.T) {
+	thinking, search, ok := GetModelConfig("deepseek-v4-pro")
 	if !ok {
-		t.Fatal("expected ok for case-insensitive deepseek-chat")
+		t.Fatal("expected ok for deepseek-v4-pro")
 	}
-	if thinking || search {
-		t.Fatalf("expected no thinking/search for case-insensitive deepseek-chat")
+	if !thinking || search {
+		t.Fatalf("expected thinking=true search=false for deepseek-v4-pro, got thinking=%v search=%v", thinking, search)
+	}
+}
+
+func TestGetModelConfigDeepSeekExpertReasonerSearch(t *testing.T) {
+	thinking, search, ok := GetModelConfig("deepseek-v4-pro-search")
+	if !ok {
+		t.Fatal("expected ok for deepseek-v4-pro-search")
+	}
+	if !thinking || !search {
+		t.Fatalf("expected both true, got thinking=%v search=%v", thinking, search)
+	}
+}
+
+func TestGetModelConfigDeepSeekVision(t *testing.T) {
+	thinking, search, ok := GetModelConfig("deepseek-v4-vision")
+	if !ok {
+		t.Fatal("expected ok for deepseek-v4-vision")
+	}
+	if !thinking || search {
+		t.Fatalf("expected thinking=true search=false, got thinking=%v search=%v", thinking, search)
+	}
+}
+
+func TestGetModelConfigDeepSeekVisionSearchUnsupported(t *testing.T) {
+	_, _, ok := GetModelConfig("deepseek-v4-vision-search")
+	if ok {
+		t.Fatal("expected deepseek-v4-vision-search to be unsupported")
+	}
+}
+
+func TestGetModelTypeDefaultExpertAndVision(t *testing.T) {
+	defaultType, ok := GetModelType("deepseek-v4-flash")
+	if !ok || defaultType != "default" {
+		t.Fatalf("expected default model_type, got ok=%v model_type=%q", ok, defaultType)
+	}
+	defaultNoThinkingType, ok := GetModelType("deepseek-v4-flash-nothinking")
+	if !ok || defaultNoThinkingType != "default" {
+		t.Fatalf("expected default model_type for nothinking, got ok=%v model_type=%q", ok, defaultNoThinkingType)
+	}
+	expertType, ok := GetModelType("deepseek-v4-pro")
+	if !ok || expertType != "expert" {
+		t.Fatalf("expected expert model_type, got ok=%v model_type=%q", ok, expertType)
+	}
+	visionType, ok := GetModelType("deepseek-v4-vision")
+	if !ok || visionType != "vision" {
+		t.Fatalf("expected vision model_type, got ok=%v model_type=%q", ok, visionType)
+	}
+}
+
+func TestGetModelConfigCaseInsensitive(t *testing.T) {
+	thinking, search, ok := GetModelConfig("DeepSeek-V4-Flash")
+	if !ok {
+		t.Fatal("expected ok for case-insensitive deepseek-v4-flash")
+	}
+	if !thinking || search {
+		t.Fatalf("expected thinking=true search=false for case-insensitive deepseek-v4-flash")
 	}
 }

@@ -97,16 +163,26 @@ func TestLowerFunction(t *testing.T) {
 // ─── Config.MarshalJSON / UnmarshalJSON roundtrip ────────────────────

 func TestConfigJSONRoundtrip(t *testing.T) {
+	trueVal := true
+	falseVal := false
 	cfg := Config{
-		Keys:     []string{"key1", "key2"},
-		Accounts: []Account{{Email: "user@example.com", Password: "pass", Token: "tok"}},
-		ClaudeMapping: map[string]string{
-			"fast": "deepseek-chat",
-			"slow": "deepseek-reasoner",
+		Keys:         []string{"key1", "key2"},
+		Accounts:     []Account{{Email: "user@example.com", Password: "pass", Token: "tok"}},
+		ModelAliases: map[string]string{"Claude-Sonnet-4-6": "DeepSeek-V4-Flash"},
+		AutoDelete: AutoDeleteConfig{
+			Mode: "single",
+		},
+		HistorySplit: HistorySplitConfig{
+			Enabled:           &trueVal,
+			TriggerAfterTurns: func() *int { v := 2; return &v }(),
 		},
 		Runtime: RuntimeConfig{
 			TokenRefreshIntervalHours: 12,
 		},
+		Compat: CompatConfig{
+			WideInputStrictOutput: &trueVal,
+			StripReferenceMarkers: &falseVal,
+		},
 		VercelSyncHash: "hash123",
 		VercelSyncTime: 1234567890,
 		AdditionalFields: map[string]any{
@@ -130,12 +206,27 @@ func TestConfigJSONRoundtrip(t *testing.T) {
 	if len(decoded.Accounts) != 1 || decoded.Accounts[0].Email != "user@example.com" {
 		t.Fatalf("unexpected accounts: %#v", decoded.Accounts)
 	}
-	if decoded.ClaudeMapping["fast"] != "deepseek-chat" {
-		t.Fatalf("unexpected claude mapping: %#v", decoded.ClaudeMapping)
+	if decoded.ModelAliases["claude-sonnet-4-6"] != "deepseek-v4-flash" {
+		t.Fatalf("unexpected normalized model aliases: %#v", decoded.ModelAliases)
 	}
 	if decoded.Runtime.TokenRefreshIntervalHours != 12 {
 		t.Fatalf("unexpected runtime refresh interval: %#v", decoded.Runtime.TokenRefreshIntervalHours)
 	}
+	if decoded.AutoDelete.Mode != "single" {
+		t.Fatalf("unexpected auto delete mode: %#v", decoded.AutoDelete.Mode)
+	}
+	if decoded.HistorySplit.Enabled == nil || !*decoded.HistorySplit.Enabled {
+		t.Fatalf("unexpected history split enabled: %#v", decoded.HistorySplit.Enabled)
+	}
+	if decoded.HistorySplit.TriggerAfterTurns == nil || *decoded.HistorySplit.TriggerAfterTurns != 2 {
+		t.Fatalf("unexpected history split trigger_after_turns: %#v", decoded.HistorySplit.TriggerAfterTurns)
+	}
+	if decoded.Compat.WideInputStrictOutput == nil || !*decoded.Compat.WideInputStrictOutput {
+		t.Fatalf("unexpected compat wide_input_strict_output: %#v", decoded.Compat.WideInputStrictOutput)
+	}
+	if decoded.Compat.StripReferenceMarkers == nil || *decoded.Compat.StripReferenceMarkers {
+		t.Fatalf("unexpected compat strip_reference_markers: %#v", decoded.Compat.StripReferenceMarkers)
+	}
 	if decoded.VercelSyncHash != "hash123" {
 		t.Fatalf("unexpected vercel sync hash: %q", decoded.VercelSyncHash)
 	}
@@ -144,6 +235,29 @@ func TestConfigJSONRoundtrip(t *testing.T) {
 	}
 }

+func TestAutoDeleteModeResolution(t *testing.T) {
+	tests := []struct {
+		name string
+		cfg  AutoDeleteConfig
+		want string
+	}{
+		{name: "default", cfg: AutoDeleteConfig{}, want: "none"},
+		{name: "legacy all", cfg: AutoDeleteConfig{Sessions: true}, want: "all"},
+		{name: "single", cfg: AutoDeleteConfig{Mode: "single"}, want: "single"},
+		{name: "all", cfg: AutoDeleteConfig{Mode: "all"}, want: "all"},
+		{name: "none", cfg: AutoDeleteConfig{Mode: "none"}, want: "none"},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			store := &Store{cfg: Config{AutoDelete: tc.cfg}}
+			if got := store.AutoDeleteMode(); got != tc.want {
+				t.Fatalf("AutoDeleteMode()=%q want=%q", got, tc.want)
+			}
+		})
+	}
+}
+
 func TestConfigUnmarshalJSONPreservesUnknownFields(t *testing.T) {
 	raw := `{"keys":["k1"],"accounts":[],"my_custom_field":"hello","number_field":42}`
 	var cfg Config
@@ -159,14 +273,39 @@ func TestConfigUnmarshalJSONPreservesUnknownFields(t *testing.T) {
 	}
 }

+func TestConfigUnmarshalJSONIgnoresRemovedLegacyModelMappings(t *testing.T) {
+	raw := `{"keys":["k1"],"accounts":[],"claude_mapping":{"fast":"deepseek-v4-pro"},"claude_model_mapping":{"slow":"deepseek-v4-pro"}}`
+	var cfg Config
+	if err := json.Unmarshal([]byte(raw), &cfg); err != nil {
+		t.Fatalf("unmarshal error: %v", err)
+	}
+	if len(cfg.ModelAliases) != 0 {
+		t.Fatalf("expected removed legacy mappings to be ignored, got %#v", cfg.ModelAliases)
+	}
+	if _, ok := cfg.AdditionalFields["claude_mapping"]; ok {
+		t.Fatalf("expected removed legacy field not to persist in additional fields: %#v", cfg.AdditionalFields)
+	}
+	if _, ok := cfg.AdditionalFields["claude_model_mapping"]; ok {
+		t.Fatalf("expected removed legacy field not to persist in additional fields: %#v", cfg.AdditionalFields)
+	}
+}
+
 // ─── Config.Clone ────────────────────────────────────────────────────

 func TestConfigCloneIsDeepCopy(t *testing.T) {
+	falseVal := false
+	trueVal := true
+	turns := 2
 	cfg := Config{
-		Keys:     []string{"key1"},
-		Accounts: []Account{{Email: "user@test.com", Token: "token"}},
-		ClaudeMapping: map[string]string{
-			"fast": "deepseek-chat",
+		Keys:         []string{"key1"},
+		Accounts:     []Account{{Email: "user@test.com", Token: "token"}},
+		ModelAliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"},
+		Compat: CompatConfig{
+			StripReferenceMarkers: &falseVal,
+		},
+		HistorySplit: HistorySplitConfig{
+			Enabled:           &trueVal,
+			TriggerAfterTurns: &turns,
 		},
 		AdditionalFields: map[string]any{"custom": "value"},
 	}
@@ -176,7 +315,16 @@ func TestConfigCloneIsDeepCopy(t *testing.T) {
 	// Modify original
 	cfg.Keys[0] = "modified"
 	cfg.Accounts[0].Email = "modified@test.com"
-	cfg.ClaudeMapping["fast"] = "modified-model"
+	cfg.ModelAliases["claude-sonnet-4-6"] = "modified-model"
+	if cfg.Compat.StripReferenceMarkers != nil {
+		*cfg.Compat.StripReferenceMarkers = true
+	}
+	if cfg.HistorySplit.Enabled != nil {
+		*cfg.HistorySplit.Enabled = false
+	}
+	if cfg.HistorySplit.TriggerAfterTurns != nil {
+		*cfg.HistorySplit.TriggerAfterTurns = 5
+	}

 	// Cloned should not be affected
 	if cloned.Keys[0] != "key1" {
@@ -185,8 +333,17 @@ func TestConfigCloneIsDeepCopy(t *testing.T) {
 	if cloned.Accounts[0].Email != "user@test.com" {
 		t.Fatalf("clone accounts was affected: %#v", cloned.Accounts)
 	}
-	if cloned.ClaudeMapping["fast"] != "deepseek-chat" {
-		t.Fatalf("clone claude mapping was affected: %#v", cloned.ClaudeMapping)
+	if cloned.ModelAliases["claude-sonnet-4-6"] != "deepseek-v4-flash" {
+		t.Fatalf("clone model aliases was affected: %#v", cloned.ModelAliases)
+	}
+	if cloned.Compat.StripReferenceMarkers == nil || *cloned.Compat.StripReferenceMarkers {
+		t.Fatalf("clone compat was affected: %#v", cloned.Compat.StripReferenceMarkers)
+	}
+	if cloned.HistorySplit.Enabled == nil || !*cloned.HistorySplit.Enabled {
+		t.Fatalf("clone history split enabled was affected: %#v", cloned.HistorySplit.Enabled)
+	}
+	if cloned.HistorySplit.TriggerAfterTurns == nil || *cloned.HistorySplit.TriggerAfterTurns != 2 {
+		t.Fatalf("clone history split trigger was affected: %#v", cloned.HistorySplit.TriggerAfterTurns)
 	}
 }

@@ -359,6 +516,39 @@ func TestStoreCompatWideInputStrictOutputCanDisable(t *testing.T) {
 	}
 }

+func TestStoreCompatStripReferenceMarkersDefaultTrue(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[]}`)
+	store := LoadStore()
+	if !store.CompatStripReferenceMarkers() {
+		t.Fatal("expected default strip_reference_markers=true when unset")
+	}
+}
+
+func TestStoreCompatStripReferenceMarkersCanDisable(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[],"compat":{"strip_reference_markers":false}}`)
+	store := LoadStore()
+	if store.CompatStripReferenceMarkers() {
+		t.Fatal("expected strip_reference_markers=false when explicitly configured")
+	}
+
+	snap := store.Snapshot()
+	data, err := snap.MarshalJSON()
+	if err != nil {
+		t.Fatalf("marshal failed: %v", err)
+	}
+	var out map[string]any
+	if err := json.Unmarshal(data, &out); err != nil {
+		t.Fatalf("decode failed: %v", err)
+	}
+	rawCompat, ok := out["compat"].(map[string]any)
+	if !ok {
+		t.Fatalf("expected compat in marshaled output, got %#v", out)
+	}
+	if rawCompat["strip_reference_markers"] != false {
+		t.Fatalf("expected explicit false in compat, got %#v", rawCompat)
+	}
+}
+
 func TestStoreIsEnvBacked(t *testing.T) {
 	t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[]}`)
 	store := LoadStore()
@@ -400,25 +590,122 @@ func TestStoreUpdate(t *testing.T) {
 	}
 }

-func TestStoreClaudeMapping(t *testing.T) {
-	t.Setenv("DS2API_CONFIG_JSON", `{"keys":[],"accounts":[],"claude_mapping":{"fast":"deepseek-chat","slow":"deepseek-reasoner"}}`)
+func TestStoreUpdateReconcilesAPIKeyMutations(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["k1"],
+		"api_keys":[{"key":"k1","name":"primary","remark":"prod"}],
+		"accounts":[]
+	}`)
 	store := LoadStore()
-	mapping := store.ClaudeMapping()
-	if mapping["fast"] != "deepseek-chat" {
-		t.Fatalf("unexpected fast mapping: %q", mapping["fast"])
+
+	if err := store.Update(func(cfg *Config) error {
+		cfg.APIKeys = append(cfg.APIKeys, APIKey{Key: "k2", Name: "secondary", Remark: "staging"})
+		return nil
+	}); err != nil {
+		t.Fatalf("add api key failed: %v", err)
 	}
-	if mapping["slow"] != "deepseek-reasoner" {
-		t.Fatalf("unexpected slow mapping: %q", mapping["slow"])
+
+	snap := store.Snapshot()
+	if len(snap.Keys) != 2 || snap.Keys[0] != "k1" || snap.Keys[1] != "k2" {
+		t.Fatalf("unexpected keys after api key add: %#v", snap.Keys)
+	}
+	if len(snap.APIKeys) != 2 {
+		t.Fatalf("unexpected api keys length after add: %#v", snap.APIKeys)
+	}
+	if snap.APIKeys[0].Name != "primary" || snap.APIKeys[0].Remark != "prod" {
+		t.Fatalf("metadata for existing key was lost: %#v", snap.APIKeys[0])
+	}
+	if snap.APIKeys[1].Name != "secondary" || snap.APIKeys[1].Remark != "staging" {
+		t.Fatalf("metadata for new key was lost: %#v", snap.APIKeys[1])
+	}
+
+	if err := store.Update(func(cfg *Config) error {
+		cfg.APIKeys = append([]APIKey(nil), cfg.APIKeys[1:]...)
+		return nil
+	}); err != nil {
+		t.Fatalf("delete api key failed: %v", err)
+	}
+
+	snap = store.Snapshot()
+	if len(snap.Keys) != 1 || snap.Keys[0] != "k2" {
+		t.Fatalf("unexpected keys after api key delete: %#v", snap.Keys)
+	}
+	if len(snap.APIKeys) != 1 || snap.APIKeys[0].Key != "k2" {
+		t.Fatalf("unexpected api keys after delete: %#v", snap.APIKeys)
 	}
 }

-func TestStoreClaudeMappingEmpty(t *testing.T) {
+func TestStoreUpdateReconcilesLegacyKeyMutations(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["k1"],
+		"api_keys":[{"key":"k1","name":"primary","remark":"prod"}],
+		"accounts":[]
+	}`)
+	store := LoadStore()
+
+	if err := store.Update(func(cfg *Config) error {
+		cfg.Keys = append(cfg.Keys, "k2")
+		return nil
+	}); err != nil {
+		t.Fatalf("legacy key update failed: %v", err)
+	}
+
+	snap := store.Snapshot()
+	if len(snap.Keys) != 2 || snap.Keys[0] != "k1" || snap.Keys[1] != "k2" {
+		t.Fatalf("unexpected keys after legacy update: %#v", snap.Keys)
+	}
+	if len(snap.APIKeys) != 2 {
+		t.Fatalf("unexpected api keys after legacy update: %#v", snap.APIKeys)
+	}
+	if snap.APIKeys[0].Name != "primary" || snap.APIKeys[0].Remark != "prod" {
+		t.Fatalf("metadata for preserved key was lost: %#v", snap.APIKeys[0])
+	}
+	if snap.APIKeys[1].Key != "k2" || snap.APIKeys[1].Name != "" || snap.APIKeys[1].Remark != "" {
+		t.Fatalf("new legacy key should stay metadata-free: %#v", snap.APIKeys[1])
+	}
+}
+
+func TestNormalizeCredentialsPrefersStructuredAPIKeys(t *testing.T) {
+	cfg := Config{
+		Keys: []string{"legacy-key"},
+		APIKeys: []APIKey{
+			{Key: "structured-key", Name: "primary", Remark: "prod"},
+		},
+	}
+	cfg.NormalizeCredentials()
+
+	if len(cfg.Keys) != 1 || cfg.Keys[0] != "structured-key" {
+		t.Fatalf("unexpected normalized keys: %#v", cfg.Keys)
+	}
+	if len(cfg.APIKeys) != 1 {
+		t.Fatalf("unexpected normalized api keys: %#v", cfg.APIKeys)
+	}
+	if cfg.APIKeys[0].Key != "structured-key" || cfg.APIKeys[0].Name != "primary" || cfg.APIKeys[0].Remark != "prod" {
+		t.Fatalf("unexpected structured api key metadata: %#v", cfg.APIKeys[0])
+	}
+}
+
+func TestStoreModelAliasesIncludesDefaultsAndOverrides(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{"keys":[],"accounts":[],"model_aliases":{"claude-opus-4-6":"deepseek-v4-pro-search"}}`)
+	store := LoadStore()
+	aliases := store.ModelAliases()
+	if aliases["claude-sonnet-4-6"] != "deepseek-v4-flash" {
+		t.Fatalf("expected default alias to remain available, got %q", aliases["claude-sonnet-4-6"])
+	}
+	if aliases["claude-opus-4-6"] != "deepseek-v4-pro-search" {
+		t.Fatalf("expected custom alias override, got %q", aliases["claude-opus-4-6"])
+	}
+}
+
+func TestStoreModelAliasesDefault(t *testing.T) {
 	t.Setenv("DS2API_CONFIG_JSON", `{"keys":[],"accounts":[]}`)
 	store := LoadStore()
-	mapping := store.ClaudeMapping()
-	// Even without config mapping, there are defaults
-	if mapping == nil {
-		t.Fatal("expected non-nil mapping (may contain defaults)")
+	aliases := store.ModelAliases()
+	if aliases == nil {
+		t.Fatal("expected non-nil aliases")
+	}
+	if aliases["claude-sonnet-4-6"] != "deepseek-v4-flash" {
+		t.Fatalf("expected built-in alias, got %q", aliases["claude-sonnet-4-6"])
 	}
 }

@@ -467,6 +754,28 @@ func TestOpenAIModelsResponse(t *testing.T) {
 	if len(data) == 0 {
 		t.Fatal("expected non-empty models list")
 	}
+	expected := map[string]bool{
+		"deepseek-v4-flash":                   false,
+		"deepseek-v4-flash-nothinking":        false,
+		"deepseek-v4-pro":                     false,
+		"deepseek-v4-pro-nothinking":          false,
+		"deepseek-v4-flash-search":            false,
+		"deepseek-v4-flash-search-nothinking": false,
+		"deepseek-v4-pro-search":              false,
+		"deepseek-v4-pro-search-nothinking":   false,
+		"deepseek-v4-vision":                  false,
+		"deepseek-v4-vision-nothinking":       false,
+	}
+	for _, model := range data {
+		if _, ok := expected[model.ID]; ok {
+			expected[model.ID] = true
+		}
+	}
+	for id, seen := range expected {
+		if !seen {
+			t.Fatalf("expected OpenAI model list to include %s", id)
+		}
+	}
 }

 func TestClaudeModelsResponse(t *testing.T) {
--- a/internal/config/config_test.go
+++ b/internal/config/config_test.go
@@ -2,6 +2,7 @@ package config

 import (
 	"encoding/base64"
+	"errors"
 	"os"
 	"strings"
 	"testing"
@@ -31,6 +32,47 @@ func TestLoadStoreClearsTokensFromConfigInput(t *testing.T) {
 	}
 }

+func TestLoadStorePreservesProxiesAndAccountProxyAssignment(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"proxies":[
+			{
+				"id":"proxy-sh-1",
+				"name":"Shanghai Exit",
+				"type":"socks5h",
+				"host":"127.0.0.1",
+				"port":1080,
+				"username":"demo",
+				"password":"secret"
+			}
+		],
+		"accounts":[
+			{
+				"email":"u@example.com",
+				"password":"p",
+				"proxy_id":"proxy-sh-1"
+			}
+		]
+	}`)
+
+	store := LoadStore()
+	snap := store.Snapshot()
+	if len(snap.Proxies) != 1 {
+		t.Fatalf("expected 1 proxy, got %d", len(snap.Proxies))
+	}
+	if snap.Proxies[0].ID != "proxy-sh-1" {
+		t.Fatalf("unexpected proxy id: %#v", snap.Proxies[0])
+	}
+	if snap.Proxies[0].Type != "socks5h" {
+		t.Fatalf("unexpected proxy type: %#v", snap.Proxies[0])
+	}
+	if len(snap.Accounts) != 1 {
+		t.Fatalf("expected 1 account, got %d", len(snap.Accounts))
+	}
+	if snap.Accounts[0].ProxyID != "proxy-sh-1" {
+		t.Fatalf("expected account proxy assignment preserved, got %#v", snap.Accounts[0])
+	}
+}
+
 func TestLoadStoreDropsLegacyTokenOnlyAccounts(t *testing.T) {
 	t.Setenv("DS2API_CONFIG_JSON", `{
 		"accounts":[
@@ -57,8 +99,7 @@ func TestLoadStorePreservesFileBackedTokensForRuntime(t *testing.T) {
 	if err != nil {
 		t.Fatalf("create temp config: %v", err)
 	}
-	defer tmp.Close()
-
+	defer func() { _ = tmp.Close() }()
 	if _, err := tmp.WriteString(`{
 		"accounts":[{"email":"u@example.com","password":"p","token":"persisted-token"}]
 	}`); err != nil {
@@ -66,7 +107,6 @@ func TestLoadStorePreservesFileBackedTokensForRuntime(t *testing.T) {
 	}

 	t.Setenv("DS2API_CONFIG_JSON", "")
-	t.Setenv("CONFIG_JSON", "")
 	t.Setenv("DS2API_CONFIG_PATH", tmp.Name())

 	store := LoadStore()
@@ -79,6 +119,183 @@ func TestLoadStorePreservesFileBackedTokensForRuntime(t *testing.T) {
 	}
 }

+func TestLoadStoreIgnoresLegacyConfigJSONEnv(t *testing.T) {
+	tmp, err := os.CreateTemp(t.TempDir(), "config-*.json")
+	if err != nil {
+		t.Fatalf("create temp config: %v", err)
+	}
+	path := tmp.Name()
+	_ = tmp.Close()
+	_ = os.Remove(path)
+
+	t.Setenv("DS2API_CONFIG_JSON", "")
+	t.Setenv("CONFIG_JSON", `{"keys":["legacy-key"],"accounts":[{"email":"legacy@example.com","password":"p"}]}`)
+	t.Setenv("DS2API_CONFIG_PATH", path)
+
+	store := LoadStore()
+	if store.HasEnvConfigSource() {
+		t.Fatal("expected legacy CONFIG_JSON to be ignored")
+	}
+	if store.IsEnvBacked() {
+		t.Fatal("expected store to remain file-backed/empty when only CONFIG_JSON is set")
+	}
+	if len(store.Keys()) != 0 || len(store.Accounts()) != 0 {
+		t.Fatalf("expected ignored legacy env to leave store empty, got keys=%d accounts=%d", len(store.Keys()), len(store.Accounts()))
+	}
+}
+
+func TestEnvBackedStoreWritebackBootstrapsMissingConfigFile(t *testing.T) {
+	tmp, err := os.CreateTemp(t.TempDir(), "config-*.json")
+	if err != nil {
+		t.Fatalf("create temp config: %v", err)
+	}
+	path := tmp.Name()
+	_ = tmp.Close()
+	_ = os.Remove(path)
+
+	t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[{"email":"seed@example.com","password":"p"}]}`)
+	t.Setenv("DS2API_CONFIG_PATH", path)
+	t.Setenv("DS2API_ENV_WRITEBACK", "1")
+
+	store := LoadStore()
+	if store.IsEnvBacked() {
+		t.Fatalf("expected writeback bootstrap to become file-backed immediately")
+	}
+	if err := store.Update(func(c *Config) error {
+		c.Accounts = append(c.Accounts, Account{Email: "new@example.com", Password: "p2"})
+		return nil
+	}); err != nil {
+		t.Fatalf("update failed: %v", err)
+	}
+	content, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read written config: %v", err)
+	}
+	if !strings.Contains(string(content), "seed@example.com") {
+		t.Fatalf("expected bootstrapped config to contain seed account, got: %s", content)
+	}
+	if !strings.Contains(string(content), "new@example.com") {
+		t.Fatalf("expected persisted config to contain added account, got: %s", content)
+	}
+
+	reloaded := LoadStore()
+	if reloaded.IsEnvBacked() {
+		t.Fatalf("expected reloaded store to prefer persisted config file")
+	}
+	accounts := reloaded.Accounts()
+	if len(accounts) != 2 {
+		t.Fatalf("expected 2 accounts after reload, got %d", len(accounts))
+	}
+}
+
+func TestEnvBackedStoreWritebackDoesNotBootstrapOnInvalidEnvJSON(t *testing.T) {
+	tmp, err := os.CreateTemp(t.TempDir(), "config-*.json")
+	if err != nil {
+		t.Fatalf("create temp config: %v", err)
+	}
+	path := tmp.Name()
+	_ = tmp.Close()
+	_ = os.Remove(path)
+
+	t.Setenv("DS2API_CONFIG_JSON", "{invalid-json")
+	t.Setenv("DS2API_CONFIG_PATH", path)
+	t.Setenv("DS2API_ENV_WRITEBACK", "1")
+
+	cfg, fromEnv, loadErr := loadConfig()
+	if loadErr == nil {
+		t.Fatalf("expected loadConfig error for invalid env json")
+	}
+	if !fromEnv {
+		t.Fatalf("expected fromEnv=true when parsing env config fails")
+	}
+	if len(cfg.Keys) != 0 || len(cfg.Accounts) != 0 {
+		t.Fatalf("expected empty config on parse failure, got keys=%d accounts=%d", len(cfg.Keys), len(cfg.Accounts))
+	}
+	if _, statErr := os.Stat(path); !errors.Is(statErr, os.ErrNotExist) {
+		t.Fatalf("expected no bootstrapped config file, stat err=%v", statErr)
+	}
+}
+
+func TestEnvBackedStoreWritebackDoesNotBootstrapOnInvalidSemanticConfig(t *testing.T) {
+	tmp, err := os.CreateTemp(t.TempDir(), "config-*.json")
+	if err != nil {
+		t.Fatalf("create temp config: %v", err)
+	}
+	path := tmp.Name()
+	_ = tmp.Close()
+	_ = os.Remove(path)
+
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["k1"],
+		"accounts":[{"email":"seed@example.com","password":"p"}],
+		"runtime":{"account_max_inflight":300}
+	}`)
+	t.Setenv("DS2API_CONFIG_PATH", path)
+	t.Setenv("DS2API_ENV_WRITEBACK", "1")
+
+	cfg, fromEnv, loadErr := loadConfig()
+	if loadErr == nil {
+		t.Fatalf("expected loadConfig error for invalid runtime config")
+	}
+	if !fromEnv {
+		t.Fatalf("expected fromEnv=true when env config is the source")
+	}
+	if !strings.Contains(loadErr.Error(), "runtime.account_max_inflight") {
+		t.Fatalf("expected runtime validation error, got %v", loadErr)
+	}
+	if len(cfg.Keys) != 1 || len(cfg.Accounts) != 1 {
+		t.Fatalf("expected env config to be parsed before validation, got keys=%d accounts=%d", len(cfg.Keys), len(cfg.Accounts))
+	}
+	if _, statErr := os.Stat(path); !errors.Is(statErr, os.ErrNotExist) {
+		t.Fatalf("expected invalid config not to be bootstrapped, stat err=%v", statErr)
+	}
+}
+
+func TestLoadStoreWithErrorRejectsInvalidRuntimeConfig(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["k1"],
+		"accounts":[{"email":"u@example.com","password":"p"}],
+		"runtime":{"account_max_inflight":300}
+	}`)
+	t.Setenv("DS2API_ENV_WRITEBACK", "0")
+
+	if _, err := LoadStoreWithError(); err == nil {
+		t.Fatal("expected LoadStoreWithError to reject invalid runtime config")
+	} else if !strings.Contains(err.Error(), "runtime.account_max_inflight") {
+		t.Fatalf("expected runtime validation error, got %v", err)
+	}
+}
+
+func TestEnvBackedStoreWritebackFallsBackToPersistedFileOnInvalidEnvJSON(t *testing.T) {
+	tmp, err := os.CreateTemp(t.TempDir(), "config-*.json")
+	if err != nil {
+		t.Fatalf("create temp config: %v", err)
+	}
+	path := tmp.Name()
+	if _, err := tmp.WriteString(`{"keys":["file-key"],"accounts":[{"email":"persisted@example.com","password":"p"}]}`); err != nil {
+		t.Fatalf("write temp config: %v", err)
+	}
+	_ = tmp.Close()
+
+	t.Setenv("DS2API_CONFIG_JSON", "{invalid-json")
+	t.Setenv("DS2API_CONFIG_PATH", path)
+	t.Setenv("DS2API_ENV_WRITEBACK", "1")
+
+	cfg, fromEnv, loadErr := loadConfig()
+	if loadErr != nil {
+		t.Fatalf("expected fallback to persisted file, got error: %v", loadErr)
+	}
+	if fromEnv {
+		t.Fatalf("expected fallback to file-backed mode")
+	}
+	if len(cfg.Keys) != 1 || cfg.Keys[0] != "file-key" {
+		t.Fatalf("unexpected keys after fallback: %#v", cfg.Keys)
+	}
+	if len(cfg.Accounts) != 1 || cfg.Accounts[0].Email != "persisted@example.com" {
+		t.Fatalf("unexpected accounts after fallback: %#v", cfg.Accounts)
+	}
+}
+
 func TestRuntimeTokenRefreshIntervalHoursDefaultsToSix(t *testing.T) {
 	t.Setenv("DS2API_CONFIG_JSON", `{
 		"keys":["k1"],
@@ -159,7 +376,6 @@ func TestParseConfigStringSupportsRawURLBase64(t *testing.T) {
 func TestLoadConfigOnVercelWithoutConfigFileFallsBackToMemory(t *testing.T) {
 	t.Setenv("VERCEL", "1")
 	t.Setenv("DS2API_CONFIG_JSON", "")
-	t.Setenv("CONFIG_JSON", "")
 	t.Setenv("DS2API_CONFIG_PATH", "testdata/does-not-exist.json")

 	cfg, fromEnv, err := loadConfig()
@@ -179,7 +395,7 @@ func TestAccountTestStatusIsRuntimeOnlyAndNotPersisted(t *testing.T) {
 	if err != nil {
 		t.Fatalf("create temp config: %v", err)
 	}
-	defer tmp.Close()
+	defer func() { _ = tmp.Close() }()
 	if _, err := tmp.WriteString(`{
 		"accounts":[{"email":"u@example.com","password":"p","test_status":"ok"}]
 	}`); err != nil {
@@ -187,7 +403,6 @@ func TestAccountTestStatusIsRuntimeOnlyAndNotPersisted(t *testing.T) {
 	}

 	t.Setenv("DS2API_CONFIG_JSON", "")
-	t.Setenv("CONFIG_JSON", "")
 	t.Setenv("DS2API_CONFIG_PATH", tmp.Name())

 	store := LoadStore()
--- a/internal/config/credentials.go
+++ b/internal/config/credentials.go
@@ -0,0 +1,158 @@
+package config
+
+import (
+	"slices"
+	"strings"
+)
+
+func (c *Config) ReconcileCredentials(base Config) {
+	if c == nil {
+		return
+	}
+	currKeys := normalizeKeys(c.Keys)
+	currAPIKeys := normalizeAPIKeys(c.APIKeys)
+	baseKeys := normalizeKeys(base.Keys)
+	baseAPIKeys := normalizeAPIKeys(base.APIKeys)
+
+	keysChanged := !slices.Equal(currKeys, baseKeys)
+	apiKeysChanged := !equalAPIKeys(currAPIKeys, baseAPIKeys)
+
+	if keysChanged && !apiKeysChanged {
+		c.APIKeys = apiKeysFromStrings(currKeys, apiKeyMap(baseAPIKeys))
+	} else {
+		c.APIKeys = currAPIKeys
+	}
+	c.Keys = apiKeysToStrings(c.APIKeys)
+}
+
+func normalizeKeys(keys []string) []string {
+	if len(keys) == 0 {
+		return nil
+	}
+	out := make([]string, 0, len(keys))
+	seen := make(map[string]struct{}, len(keys))
+	for _, key := range keys {
+		key = strings.TrimSpace(key)
+		if key == "" {
+			continue
+		}
+		if _, ok := seen[key]; ok {
+			continue
+		}
+		seen[key] = struct{}{}
+		out = append(out, key)
+	}
+	if len(out) == 0 {
+		return nil
+	}
+	return out
+}
+
+func normalizeAPIKeys(items []APIKey) []APIKey {
+	if len(items) == 0 {
+		return nil
+	}
+	out := make([]APIKey, 0, len(items))
+	seen := make(map[string]struct{}, len(items))
+	for _, item := range items {
+		key := strings.TrimSpace(item.Key)
+		if key == "" {
+			continue
+		}
+		if _, ok := seen[key]; ok {
+			continue
+		}
+		seen[key] = struct{}{}
+		out = append(out, APIKey{
+			Key:    key,
+			Name:   strings.TrimSpace(item.Name),
+			Remark: strings.TrimSpace(item.Remark),
+		})
+	}
+	if len(out) == 0 {
+		return nil
+	}
+	return out
+}
+
+func apiKeysFromStrings(keys []string, meta map[string]APIKey) []APIKey {
+	if len(keys) == 0 {
+		return nil
+	}
+	out := make([]APIKey, 0, len(keys))
+	seen := make(map[string]struct{}, len(keys))
+	for _, key := range keys {
+		key = strings.TrimSpace(key)
+		if key == "" {
+			continue
+		}
+		if _, ok := seen[key]; ok {
+			continue
+		}
+		seen[key] = struct{}{}
+		if item, ok := meta[key]; ok {
+			out = append(out, APIKey{
+				Key:    key,
+				Name:   strings.TrimSpace(item.Name),
+				Remark: strings.TrimSpace(item.Remark),
+			})
+			continue
+		}
+		out = append(out, APIKey{Key: key})
+	}
+	if len(out) == 0 {
+		return nil
+	}
+	return out
+}
+
+func apiKeysToStrings(items []APIKey) []string {
+	if len(items) == 0 {
+		return nil
+	}
+	keys := make([]string, 0, len(items))
+	for _, item := range items {
+		key := strings.TrimSpace(item.Key)
+		if key == "" {
+			continue
+		}
+		keys = append(keys, key)
+	}
+	if len(keys) == 0 {
+		return nil
+	}
+	return keys
+}
+
+func apiKeyMap(items []APIKey) map[string]APIKey {
+	if len(items) == 0 {
+		return nil
+	}
+	out := make(map[string]APIKey, len(items))
+	for _, item := range items {
+		key := strings.TrimSpace(item.Key)
+		if key == "" {
+			continue
+		}
+		if _, ok := out[key]; ok {
+			continue
+		}
+		out[key] = APIKey{
+			Key:    key,
+			Name:   strings.TrimSpace(item.Name),
+			Remark: strings.TrimSpace(item.Remark),
+		}
+	}
+	return out
+}
+
+func equalAPIKeys(a, b []APIKey) bool {
+	if len(a) != len(b) {
+		return false
+	}
+	return slices.EqualFunc(a, b, func(x, y APIKey) bool {
+		return strings.TrimSpace(x.Key) == strings.TrimSpace(y.Key) &&
+			strings.TrimSpace(x.Name) == strings.TrimSpace(y.Name) &&
+			strings.TrimSpace(x.Remark) == strings.TrimSpace(y.Remark)
+	})
+}
--- a/internal/config/dotenv.go
+++ b/internal/config/dotenv.go
@@ -0,0 +1,137 @@
+package config
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+// LoadDotEnv loads environment variables from .env in the current working
+// directory without overriding variables that are already set.
+func LoadDotEnv() error {
+	return loadDotEnvFromPath(filepath.Join(BaseDir(), ".env"))
+}
+
+func loadDotEnvFromPath(path string) error {
+	content, err := os.ReadFile(path)
+	if err != nil {
+		if errors.Is(err, os.ErrNotExist) {
+			return nil
+		}
+		return err
+	}
+
+	lines := strings.Split(strings.ReplaceAll(string(content), "\r\n", "\n"), "\n")
+	for i, rawLine := range lines {
+		line := strings.TrimSpace(rawLine)
+		if i == 0 {
+			line = strings.TrimPrefix(line, "\ufeff")
+		}
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		if strings.HasPrefix(line, "export ") {
+			line = strings.TrimSpace(strings.TrimPrefix(line, "export "))
+		}
+
+		key, value, ok := strings.Cut(line, "=")
+		if !ok {
+			return fmt.Errorf("%s:%d invalid env assignment", path, i+1)
+		}
+		key = strings.TrimSpace(key)
+		if key == "" {
+			return fmt.Errorf("%s:%d empty env key", path, i+1)
+		}
+		if _, exists := os.LookupEnv(key); exists {
+			continue
+		}
+		if err := os.Setenv(key, normalizeDotEnvValue(trimDotEnvValue(strings.TrimSpace(value)))); err != nil {
+			return fmt.Errorf("%s:%d set env %q: %w", path, i+1, key, err)
+		}
+	}
+
+	return nil
+}
+
+// Preserve quoted values, but drop Compose-style inline comments from unquoted values.
+func trimDotEnvValue(raw string) string {
+	if raw == "" {
+		return raw
+	}
+
+	switch raw[0] {
+	case '"':
+		if trimmed, ok := trimQuotedDotEnvValue(raw, '"'); ok {
+			return trimmed
+		}
+	case '\'':
+		if trimmed, ok := trimQuotedDotEnvValue(raw, '\''); ok {
+			return trimmed
+		}
+	default:
+		if idx := inlineDotEnvCommentStart(raw); idx >= 0 {
+			return strings.TrimSpace(raw[:idx])
+		}
+	}
+
+	return raw
+}
+
+func trimQuotedDotEnvValue(raw string, quote byte) (string, bool) {
+	escaped := false
+	for i := 1; i < len(raw); i++ {
+		ch := raw[i]
+		if quote == '"' && escaped {
+			escaped = false
+			continue
+		}
+		if quote == '"' && ch == '\\' {
+			escaped = true
+			continue
+		}
+		if ch == quote {
+			return strings.TrimSpace(raw[:i+1]), true
+		}
+	}
+	return raw, false
+}
+
+func inlineDotEnvCommentStart(raw string) int {
+	for i := 1; i < len(raw); i++ {
+		if raw[i] == '#' && isDotEnvCommentSpacer(raw[i-1]) {
+			return i
+		}
+	}
+	return -1
+}
+
+func isDotEnvCommentSpacer(b byte) bool {
+	return b == ' ' || b == '\t'
+}
+
+func normalizeDotEnvValue(raw string) string {
+	if len(raw) < 2 {
+		return raw
+	}
+	first := raw[0]
+	last := raw[len(raw)-1]
+	if (first != '"' || last != '"') && (first != '\'' || last != '\'') {
+		return raw
+	}
+
+	raw = raw[1 : len(raw)-1]
+	if first == '\'' {
+		return raw
+	}
+
+	replacer := strings.NewReplacer(
+		`\\`, `\`,
+		`\n`, "\n",
+		`\r`, "\r",
+		`\t`, "\t",
+		`\"`, `"`,
+	)
+	return replacer.Replace(raw)
+}
--- a/internal/config/dotenv_test.go
+++ b/internal/config/dotenv_test.go
@@ -0,0 +1,135 @@
+package config
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestLoadDotEnvLoadsWorkingDirectoryFileWithoutOverridingExistingEnv(t *testing.T) {
+	dir := t.TempDir()
+	oldWD, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("getwd: %v", err)
+	}
+	if err := os.Chdir(dir); err != nil {
+		t.Fatalf("chdir temp dir: %v", err)
+	}
+	t.Cleanup(func() {
+		_ = os.Chdir(oldWD)
+	})
+
+	const newKey = "DS2API_TEST_DOTENV_NEW"
+	const keepKey = "DS2API_TEST_DOTENV_KEEP"
+	const quotedKey = "DS2API_TEST_DOTENV_QUOTED"
+
+	unsetEnv(t, newKey)
+	unsetEnv(t, quotedKey)
+	t.Setenv(keepKey, "from-env")
+
+	content := "DS2API_TEST_DOTENV_NEW=from-file\n" +
+		"DS2API_TEST_DOTENV_KEEP=from-file\n" +
+		"DS2API_TEST_DOTENV_QUOTED=\"line1\\nline2\"\n"
+	if err := os.WriteFile(filepath.Join(dir, ".env"), []byte(content), 0o644); err != nil {
+		t.Fatalf("write .env: %v", err)
+	}
+
+	if err := LoadDotEnv(); err != nil {
+		t.Fatalf("LoadDotEnv() error: %v", err)
+	}
+
+	if got := os.Getenv(newKey); got != "from-file" {
+		t.Fatalf("expected %s from .env, got %q", newKey, got)
+	}
+	if got := os.Getenv(keepKey); got != "from-env" {
+		t.Fatalf("expected existing env to win, got %q", got)
+	}
+	if got := os.Getenv(quotedKey); got != "line1\nline2" {
+		t.Fatalf("expected quoted newline decoding, got %q", got)
+	}
+}
+
+func TestLoadDotEnvIgnoresMissingFile(t *testing.T) {
+	dir := t.TempDir()
+	oldWD, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("getwd: %v", err)
+	}
+	if err := os.Chdir(dir); err != nil {
+		t.Fatalf("chdir temp dir: %v", err)
+	}
+	t.Cleanup(func() {
+		_ = os.Chdir(oldWD)
+	})
+
+	if err := LoadDotEnv(); err != nil {
+		t.Fatalf("expected missing .env to be ignored, got %v", err)
+	}
+}
+
+func TestLoadDotEnvStripsInlineCommentsFromUnquotedValues(t *testing.T) {
+	dir := t.TempDir()
+	oldWD, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("getwd: %v", err)
+	}
+	if err := os.Chdir(dir); err != nil {
+		t.Fatalf("chdir temp dir: %v", err)
+	}
+	t.Cleanup(func() {
+		_ = os.Chdir(oldWD)
+	})
+
+	const plainKey = "DS2API_TEST_DOTENV_PLAIN"
+	const hashKey = "DS2API_TEST_DOTENV_HASH"
+	const quotedKey = "DS2API_TEST_DOTENV_QUOTED_COMMENT"
+	const exportKey = "DS2API_TEST_DOTENV_EXPORT"
+
+	unsetEnv(t, plainKey)
+	unsetEnv(t, hashKey)
+	unsetEnv(t, quotedKey)
+	unsetEnv(t, exportKey)
+
+	content := strings.Join([]string{
+		plainKey + "=5001 # local",
+		hashKey + "=5001#local",
+		quotedKey + `="5001 # local" # keep the inner hash`,
+		"export " + exportKey + "=enabled # exported",
+	}, "\n") + "\n"
+	if err := os.WriteFile(filepath.Join(dir, ".env"), []byte(content), 0o644); err != nil {
+		t.Fatalf("write .env: %v", err)
+	}
+
+	if err := LoadDotEnv(); err != nil {
+		t.Fatalf("LoadDotEnv() error: %v", err)
+	}
+
+	if got := os.Getenv(plainKey); got != "5001" {
+		t.Fatalf("expected inline comment to be stripped, got %q", got)
+	}
+	if got := os.Getenv(hashKey); got != "5001#local" {
+		t.Fatalf("expected hash without preceding whitespace to remain, got %q", got)
+	}
+	if got := os.Getenv(quotedKey); got != "5001 # local" {
+		t.Fatalf("expected quoted value to preserve hash text, got %q", got)
+	}
+	if got := os.Getenv(exportKey); got != "enabled" {
+		t.Fatalf("expected export syntax to load, got %q", got)
+	}
+}
+
+func unsetEnv(t *testing.T, key string) {
+	t.Helper()
+	old, had := os.LookupEnv(key)
+	if err := os.Unsetenv(key); err != nil {
+		t.Fatalf("unset %s: %v", key, err)
+	}
+	t.Cleanup(func() {
+		if had {
+			_ = os.Setenv(key, old)
+			return
+		}
+		_ = os.Unsetenv(key)
+	})
+}
--- a/internal/config/logger.go
+++ b/internal/config/logger.go
@@ -23,3 +23,7 @@ func newLogger() *slog.Logger {
 	h := slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: level})
 	return slog.New(h)
 }
+
+func RefreshLogger() {
+	Logger = newLogger()
+}
--- a/internal/config/model_alias_test.go
+++ b/internal/config/model_alias_test.go
@@ -2,27 +2,93 @@ package config

 import "testing"

+type mockModelAliasReader map[string]string
+
+func (m mockModelAliasReader) ModelAliases() map[string]string { return m }
+
 func TestResolveModelDirectDeepSeek(t *testing.T) {
-	got, ok := ResolveModel(nil, "deepseek-chat")
-	if !ok || got != "deepseek-chat" {
-		t.Fatalf("expected deepseek-chat, got ok=%v model=%q", ok, got)
+	got, ok := ResolveModel(nil, "deepseek-v4-flash")
+	if !ok || got != "deepseek-v4-flash" {
+		t.Fatalf("expected deepseek-v4-flash, got ok=%v model=%q", ok, got)
+	}
+}
+
+func TestResolveModelDirectDeepSeekNoThinking(t *testing.T) {
+	got, ok := ResolveModel(nil, "deepseek-v4-flash-nothinking")
+	if !ok || got != "deepseek-v4-flash-nothinking" {
+		t.Fatalf("expected deepseek-v4-flash-nothinking, got ok=%v model=%q", ok, got)
 	}
 }

 func TestResolveModelAlias(t *testing.T) {
 	got, ok := ResolveModel(nil, "gpt-4.1")
-	if !ok || got != "deepseek-chat" {
-		t.Fatalf("expected alias gpt-4.1 -> deepseek-chat, got ok=%v model=%q", ok, got)
+	if !ok || got != "deepseek-v4-flash" {
+		t.Fatalf("expected alias gpt-4.1 -> deepseek-v4-flash, got ok=%v model=%q", ok, got)
+	}
+}
+
+func TestResolveLatestOpenAIAlias(t *testing.T) {
+	got, ok := ResolveModel(nil, "gpt-5.5")
+	if !ok || got != "deepseek-v4-flash" {
+		t.Fatalf("expected alias gpt-5.5 -> deepseek-v4-flash, got ok=%v model=%q", ok, got)
+	}
+}
+
+func TestResolveLatestClaudeAlias(t *testing.T) {
+	got, ok := ResolveModel(nil, "claude-sonnet-4-6")
+	if !ok || got != "deepseek-v4-flash" {
+		t.Fatalf("expected alias claude-sonnet-4-6 -> deepseek-v4-flash, got ok=%v model=%q", ok, got)
+	}
+}
+
+func TestResolveLatestClaudeAliasNoThinking(t *testing.T) {
+	got, ok := ResolveModel(nil, "claude-sonnet-4-6-nothinking")
+	if !ok || got != "deepseek-v4-flash-nothinking" {
+		t.Fatalf("expected alias claude-sonnet-4-6-nothinking -> deepseek-v4-flash-nothinking, got ok=%v model=%q", ok, got)
+	}
+}
+
+func TestResolveExpandedHistoricalAliases(t *testing.T) {
+	cases := []struct {
+		name  string
+		model string
+		want  string
+	}{
+		{name: "openai old chatgpt", model: "chatgpt-4o", want: "deepseek-v4-flash"},
+		{name: "openai codex max", model: "gpt-5.1-codex-max", want: "deepseek-v4-pro"},
+		{name: "openai deep research", model: "o3-deep-research", want: "deepseek-v4-pro-search"},
+		{name: "openai historical reasoning", model: "o1-preview", want: "deepseek-v4-pro"},
+		{name: "claude latest historical", model: "claude-3-5-sonnet-latest", want: "deepseek-v4-flash"},
+		{name: "claude historical opus", model: "claude-3-opus-20240229", want: "deepseek-v4-pro"},
+		{name: "claude historical haiku", model: "claude-3-haiku-20240307", want: "deepseek-v4-flash"},
+		{name: "gemini latest alias", model: "gemini-flash-latest", want: "deepseek-v4-flash"},
+		{name: "gemini historical pro", model: "gemini-1.5-pro", want: "deepseek-v4-pro"},
+		{name: "gemini vision legacy", model: "gemini-pro-vision", want: "deepseek-v4-vision"},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got, ok := ResolveModel(nil, tc.model)
+			if !ok || got != tc.want {
+				t.Fatalf("expected alias %s -> %s, got ok=%v model=%q", tc.model, tc.want, ok, got)
+			}
+		})
 	}
 }

 func TestResolveModelHeuristicReasoner(t *testing.T) {
 	got, ok := ResolveModel(nil, "o3-super")
-	if !ok || got != "deepseek-reasoner" {
+	if !ok || got != "deepseek-v4-pro" {
 		t.Fatalf("expected heuristic reasoner, got ok=%v model=%q", ok, got)
 	}
 }

+func TestResolveModelHeuristicReasonerNoThinking(t *testing.T) {
+	got, ok := ResolveModel(nil, "o3-super-nothinking")
+	if !ok || got != "deepseek-v4-pro-nothinking" {
+		t.Fatalf("expected heuristic reasoner nothinking, got ok=%v model=%q", ok, got)
+	}
+}
+
 func TestResolveModelUnknown(t *testing.T) {
 	_, ok := ResolveModel(nil, "totally-custom-model")
 	if ok {
@@ -30,6 +96,68 @@ func TestResolveModelUnknown(t *testing.T) {
 	}
 }

+func TestResolveModelRejectsLegacyDeepSeekIDs(t *testing.T) {
+	legacyModels := []string{
+		"deepseek-chat",
+		"deepseek-reasoner",
+		"deepseek-chat-search",
+		"deepseek-reasoner-search",
+		"deepseek-expert-chat",
+		"deepseek-expert-reasoner",
+		"deepseek-vision-chat",
+	}
+	for _, model := range legacyModels {
+		if got, ok := ResolveModel(nil, model); ok {
+			t.Fatalf("expected legacy model %q to be rejected, got %q", model, got)
+		}
+	}
+}
+
+func TestResolveModelRejectsRetiredHistoricalModels(t *testing.T) {
+	retiredModels := []string{
+		"claude-2.1",
+		"claude-instant-1.2",
+		"gpt-3.5-turbo",
+	}
+	for _, model := range retiredModels {
+		if got, ok := ResolveModel(nil, model); ok {
+			t.Fatalf("expected retired model %q to be rejected, got %q", model, got)
+		}
+	}
+}
+
+func TestResolveModelDirectDeepSeekExpert(t *testing.T) {
+	got, ok := ResolveModel(nil, "deepseek-v4-pro")
+	if !ok || got != "deepseek-v4-pro" {
+		t.Fatalf("expected deepseek-v4-pro, got ok=%v model=%q", ok, got)
+	}
+}
+
+func TestResolveModelCustomAliasToExpert(t *testing.T) {
+	got, ok := ResolveModel(mockModelAliasReader{
+		"my-expert-model": "deepseek-v4-pro-search",
+	}, "my-expert-model")
+	if !ok || got != "deepseek-v4-pro-search" {
+		t.Fatalf("expected alias -> deepseek-v4-pro-search, got ok=%v model=%q", ok, got)
+	}
+}
+
+func TestResolveModelCustomAliasToVision(t *testing.T) {
+	got, ok := ResolveModel(mockModelAliasReader{
+		"my-vision-model": "deepseek-v4-vision",
+	}, "my-vision-model")
+	if !ok || got != "deepseek-v4-vision" {
+		t.Fatalf("expected alias -> deepseek-v4-vision, got ok=%v model=%q", ok, got)
+	}
+}
+
+func TestResolveModelHeuristicVisionIgnoresSearchSuffix(t *testing.T) {
+	got, ok := ResolveModel(nil, "gemini-vision-search")
+	if !ok || got != "deepseek-v4-vision" {
+		t.Fatalf("expected heuristic vision alias to resolve without search variant, got ok=%v model=%q", ok, got)
+	}
+}
+
 func TestClaudeModelsResponsePaginationFields(t *testing.T) {
 	resp := ClaudeModelsResponse()
 	if _, ok := resp["first_id"]; !ok {
--- a/internal/config/models.go
+++ b/internal/config/models.go
@@ -14,21 +14,26 @@ type ModelAliasReader interface {
 	ModelAliases() map[string]string
 }

-var DeepSeekModels = []ModelInfo{
-	{ID: "deepseek-chat", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
-	{ID: "deepseek-reasoner", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
-	{ID: "deepseek-chat-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
-	{ID: "deepseek-reasoner-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
+const noThinkingModelSuffix = "-nothinking"
+
+var deepSeekBaseModels = []ModelInfo{
+	{ID: "deepseek-v4-flash", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
+	{ID: "deepseek-v4-pro", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
+	{ID: "deepseek-v4-flash-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
+	{ID: "deepseek-v4-pro-search", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
+	{ID: "deepseek-v4-vision", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
 }

-var ClaudeModels = []ModelInfo{
+var DeepSeekModels = appendNoThinkingVariants(deepSeekBaseModels)
+
+var claudeBaseModels = []ModelInfo{
 	// Current aliases
 	{ID: "claude-opus-4-6", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-sonnet-4-5", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
+	{ID: "claude-sonnet-4-6", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
 	{ID: "claude-haiku-4-5", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},

-	// Current snapshots
-	{ID: "claude-opus-4-5-20251101", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
+	// Claude 4.x snapshots and prior aliases kept for compatibility
+	{ID: "claude-sonnet-4-5", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
 	{ID: "claude-opus-4-1", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
 	{ID: "claude-opus-4-1-20250805", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
 	{ID: "claude-opus-4-0", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
@@ -49,62 +54,150 @@ var ClaudeModels = []ModelInfo{
 	{ID: "claude-3-5-haiku-latest", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
 	{ID: "claude-3-5-haiku-20241022", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
 	{ID: "claude-3-haiku-20240307", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-
-	// Claude 2.x and 1.x (retired but accepted for compatibility)
-	{ID: "claude-2.1", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-2.0", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-1.3", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-1.2", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-1.1", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-1.0", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-instant-1.2", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-instant-1.1", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
-	{ID: "claude-instant-1.0", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
 }

+var ClaudeModels = appendNoThinkingVariants(claudeBaseModels)
+
 func GetModelConfig(model string) (thinking bool, search bool, ok bool) {
-	switch lower(model) {
-	case "deepseek-chat":
-		return false, false, true
-	case "deepseek-reasoner":
-		return true, false, true
-	case "deepseek-chat-search":
-		return false, true, true
-	case "deepseek-reasoner-search":
-		return true, true, true
+	baseModel, noThinking := splitNoThinkingModel(model)
+	if baseModel == "" {
+		return false, false, false
+	}
+	switch baseModel {
+	case "deepseek-v4-flash", "deepseek-v4-pro", "deepseek-v4-vision":
+		return !noThinking, false, true
+	case "deepseek-v4-flash-search", "deepseek-v4-pro-search":
+		return !noThinking, true, true
 	default:
 		return false, false, false
 	}
 }

+func GetModelType(model string) (modelType string, ok bool) {
+	baseModel, _ := splitNoThinkingModel(model)
+	switch baseModel {
+	case "deepseek-v4-flash", "deepseek-v4-flash-search":
+		return "default", true
+	case "deepseek-v4-pro", "deepseek-v4-pro-search":
+		return "expert", true
+	case "deepseek-v4-vision":
+		return "vision", true
+	default:
+		return "", false
+	}
+}
+
 func IsSupportedDeepSeekModel(model string) bool {
 	_, _, ok := GetModelConfig(model)
 	return ok
 }

+func IsNoThinkingModel(model string) bool {
+	_, noThinking := splitNoThinkingModel(model)
+	return noThinking
+}
+
 func DefaultModelAliases() map[string]string {
 	return map[string]string{
-		"gpt-4o":                 "deepseek-chat",
-		"gpt-4.1":                "deepseek-chat",
-		"gpt-4.1-mini":           "deepseek-chat",
-		"gpt-4.1-nano":           "deepseek-chat",
-		"gpt-5":                  "deepseek-chat",
-		"gpt-5-mini":             "deepseek-chat",
-		"gpt-5-codex":            "deepseek-reasoner",
-		"o1":                     "deepseek-reasoner",
-		"o1-mini":                "deepseek-reasoner",
-		"o3":                     "deepseek-reasoner",
-		"o3-mini":                "deepseek-reasoner",
-		"claude-sonnet-4-5":      "deepseek-chat",
-		"claude-haiku-4-5":       "deepseek-chat",
-		"claude-opus-4-6":        "deepseek-reasoner",
-		"claude-3-5-sonnet":      "deepseek-chat",
-		"claude-3-5-haiku":       "deepseek-chat",
-		"claude-3-opus":          "deepseek-reasoner",
-		"gemini-2.5-pro":         "deepseek-chat",
-		"gemini-2.5-flash":       "deepseek-chat",
-		"llama-3.1-70b-instruct": "deepseek-chat",
-		"qwen-max":               "deepseek-chat",
+		// OpenAI GPT / ChatGPT families
+		"chatgpt-4o":          "deepseek-v4-flash",
+		"gpt-4":               "deepseek-v4-flash",
+		"gpt-4-turbo":         "deepseek-v4-flash",
+		"gpt-4-turbo-preview": "deepseek-v4-flash",
+		"gpt-4.5-preview":     "deepseek-v4-flash",
+		"gpt-4o":              "deepseek-v4-flash",
+		"gpt-4o-mini":         "deepseek-v4-flash",
+		"gpt-4.1":             "deepseek-v4-flash",
+		"gpt-4.1-mini":        "deepseek-v4-flash",
+		"gpt-4.1-nano":        "deepseek-v4-flash",
+		"gpt-5":               "deepseek-v4-flash",
+		"gpt-5-chat":          "deepseek-v4-flash",
+		"gpt-5.1":             "deepseek-v4-flash",
+		"gpt-5.1-chat":        "deepseek-v4-flash",
+		"gpt-5.2":             "deepseek-v4-flash",
+		"gpt-5.2-chat":        "deepseek-v4-flash",
+		"gpt-5.3-chat":        "deepseek-v4-flash",
+		"gpt-5.4":             "deepseek-v4-flash",
+		"gpt-5.5":             "deepseek-v4-flash",
+		"gpt-5-mini":          "deepseek-v4-flash",
+		"gpt-5-nano":          "deepseek-v4-flash",
+		"gpt-5.4-mini":        "deepseek-v4-flash",
+		"gpt-5.4-nano":        "deepseek-v4-flash",
+		"gpt-5-pro":           "deepseek-v4-pro",
+		"gpt-5.2-pro":         "deepseek-v4-pro",
+		"gpt-5.4-pro":         "deepseek-v4-pro",
+		"gpt-5.5-pro":         "deepseek-v4-pro",
+		"gpt-5-codex":         "deepseek-v4-pro",
+		"gpt-5.1-codex":       "deepseek-v4-pro",
+		"gpt-5.1-codex-mini":  "deepseek-v4-pro",
+		"gpt-5.1-codex-max":   "deepseek-v4-pro",
+		"gpt-5.2-codex":       "deepseek-v4-pro",
+		"gpt-5.3-codex":       "deepseek-v4-pro",
+		"codex-mini-latest":   "deepseek-v4-pro",
+
+		// OpenAI reasoning / research families
+		"o1":                    "deepseek-v4-pro",
+		"o1-preview":            "deepseek-v4-pro",
+		"o1-mini":               "deepseek-v4-pro",
+		"o1-pro":                "deepseek-v4-pro",
+		"o3":                    "deepseek-v4-pro",
+		"o3-mini":               "deepseek-v4-pro",
+		"o3-pro":                "deepseek-v4-pro",
+		"o3-deep-research":      "deepseek-v4-pro-search",
+		"o4-mini":               "deepseek-v4-pro",
+		"o4-mini-deep-research": "deepseek-v4-pro-search",
+
+		// Claude current and historical aliases
+		"claude-opus-4-6":            "deepseek-v4-pro",
+		"claude-opus-4-1":            "deepseek-v4-pro",
+		"claude-opus-4-1-20250805":   "deepseek-v4-pro",
+		"claude-opus-4-0":            "deepseek-v4-pro",
+		"claude-opus-4-20250514":     "deepseek-v4-pro",
+		"claude-sonnet-4-6":          "deepseek-v4-flash",
+		"claude-sonnet-4-5":          "deepseek-v4-flash",
+		"claude-sonnet-4-5-20250929": "deepseek-v4-flash",
+		"claude-sonnet-4-0":          "deepseek-v4-flash",
+		"claude-sonnet-4-20250514":   "deepseek-v4-flash",
+		"claude-haiku-4-5":           "deepseek-v4-flash",
+		"claude-haiku-4-5-20251001":  "deepseek-v4-flash",
+		"claude-3-7-sonnet":          "deepseek-v4-flash",
+		"claude-3-7-sonnet-latest":   "deepseek-v4-flash",
+		"claude-3-7-sonnet-20250219": "deepseek-v4-flash",
+		"claude-3-5-sonnet":          "deepseek-v4-flash",
+		"claude-3-5-sonnet-latest":   "deepseek-v4-flash",
+		"claude-3-5-sonnet-20240620": "deepseek-v4-flash",
+		"claude-3-5-sonnet-20241022": "deepseek-v4-flash",
+		"claude-3-5-haiku":           "deepseek-v4-flash",
+		"claude-3-5-haiku-latest":    "deepseek-v4-flash",
+		"claude-3-5-haiku-20241022":  "deepseek-v4-flash",
+		"claude-3-opus":              "deepseek-v4-pro",
+		"claude-3-opus-20240229":     "deepseek-v4-pro",
+		"claude-3-sonnet":            "deepseek-v4-flash",
+		"claude-3-sonnet-20240229":   "deepseek-v4-flash",
+		"claude-3-haiku":             "deepseek-v4-flash",
+		"claude-3-haiku-20240307":    "deepseek-v4-flash",
+
+		// Gemini current and historical text / multimodal models
+		"gemini-pro":            "deepseek-v4-pro",
+		"gemini-pro-vision":     "deepseek-v4-vision",
+		"gemini-pro-latest":     "deepseek-v4-pro",
+		"gemini-flash-latest":   "deepseek-v4-flash",
+		"gemini-1.5-pro":        "deepseek-v4-pro",
+		"gemini-1.5-flash":      "deepseek-v4-flash",
+		"gemini-1.5-flash-8b":   "deepseek-v4-flash",
+		"gemini-2.0-flash":      "deepseek-v4-flash",
+		"gemini-2.0-flash-lite": "deepseek-v4-flash",
+		"gemini-2.5-pro":        "deepseek-v4-pro",
+		"gemini-2.5-flash":      "deepseek-v4-flash",
+		"gemini-2.5-flash-lite": "deepseek-v4-flash",
+		"gemini-3.1-pro":        "deepseek-v4-pro",
+		"gemini-3-pro":          "deepseek-v4-pro",
+		"gemini-3-flash":        "deepseek-v4-flash",
+		"gemini-3.1-flash":      "deepseek-v4-flash",
+		"gemini-3.1-flash-lite": "deepseek-v4-flash",
+
+		"llama-3.1-70b-instruct": "deepseek-v4-flash",
+		"qwen-max":               "deepseek-v4-flash",
 	}
 }

@@ -113,52 +206,33 @@ func ResolveModel(store ModelAliasReader, requested string) (string, bool) {
 	if model == "" {
 		return "", false
 	}
+	aliases := loadModelAliases(store)
 	if IsSupportedDeepSeekModel(model) {
 		return model, true
 	}
-	aliases := DefaultModelAliases()
-	if store != nil {
-		for k, v := range store.ModelAliases() {
-			aliases[lower(strings.TrimSpace(k))] = lower(strings.TrimSpace(v))
-		}
-	}
 	if mapped, ok := aliases[model]; ok && IsSupportedDeepSeekModel(mapped) {
 		return mapped, true
 	}
-	if strings.HasPrefix(model, "deepseek-") {
+	baseModel, noThinking := splitNoThinkingModel(model)
+	resolvedModel, ok := resolveCanonicalModel(aliases, baseModel)
+	if !ok {
 		return "", false
 	}
+	return withNoThinkingVariant(resolvedModel, noThinking), true
+}

-	knownFamily := false
-	for _, prefix := range []string{
-		"gpt-", "o1", "o3", "claude-", "gemini-", "llama-", "qwen-", "mistral-", "command-",
-	} {
-		if strings.HasPrefix(model, prefix) {
-			knownFamily = true
-			break
-		}
-	}
-	if !knownFamily {
-		return "", false
-	}
-
-	useReasoner := strings.Contains(model, "reason") ||
-		strings.Contains(model, "reasoner") ||
-		strings.HasPrefix(model, "o1") ||
-		strings.HasPrefix(model, "o3") ||
-		strings.Contains(model, "opus") ||
-		strings.Contains(model, "r1")
-	useSearch := strings.Contains(model, "search")
-
+func isRetiredHistoricalModel(model string) bool {
 	switch {
-	case useReasoner && useSearch:
-		return "deepseek-reasoner-search", true
-	case useReasoner:
-		return "deepseek-reasoner", true
-	case useSearch:
-		return "deepseek-chat-search", true
+	case strings.HasPrefix(model, "claude-1."):
+		return true
+	case strings.HasPrefix(model, "claude-2."):
+		return true
+	case strings.HasPrefix(model, "claude-instant-"):
+		return true
+	case strings.HasPrefix(model, "gpt-3.5"):
+		return true
 	default:
-		return "deepseek-chat", true
+		return false
 	}
 }

@@ -201,3 +275,98 @@ func ClaudeModelsResponse() map[string]any {
 	resp["has_more"] = false
 	return resp
 }
+
+func appendNoThinkingVariants(models []ModelInfo) []ModelInfo {
+	out := make([]ModelInfo, 0, len(models)*2)
+	for _, model := range models {
+		out = append(out, model)
+		variant := model
+		variant.ID = withNoThinkingVariant(model.ID, true)
+		out = append(out, variant)
+	}
+	return out
+}
+
+func splitNoThinkingModel(model string) (string, bool) {
+	model = lower(strings.TrimSpace(model))
+	if strings.HasSuffix(model, noThinkingModelSuffix) {
+		return strings.TrimSuffix(model, noThinkingModelSuffix), true
+	}
+	return model, false
+}
+
+func withNoThinkingVariant(model string, enabled bool) string {
+	baseModel, _ := splitNoThinkingModel(model)
+	if !enabled {
+		return baseModel
+	}
+	if baseModel == "" {
+		return ""
+	}
+	return baseModel + noThinkingModelSuffix
+}
+
+func loadModelAliases(store ModelAliasReader) map[string]string {
+	aliases := DefaultModelAliases()
+	if store != nil {
+		for k, v := range store.ModelAliases() {
+			aliases[lower(strings.TrimSpace(k))] = lower(strings.TrimSpace(v))
+		}
+	}
+	return aliases
+}
+
+func resolveCanonicalModel(aliases map[string]string, model string) (string, bool) {
+	model = lower(strings.TrimSpace(model))
+	if model == "" {
+		return "", false
+	}
+	if isRetiredHistoricalModel(model) {
+		return "", false
+	}
+	if IsSupportedDeepSeekModel(model) {
+		return model, true
+	}
+	if mapped, ok := aliases[model]; ok && IsSupportedDeepSeekModel(mapped) {
+		return mapped, true
+	}
+	if strings.HasPrefix(model, "deepseek-") {
+		return "", false
+	}
+
+	knownFamily := false
+	for _, prefix := range []string{
+		"gpt-", "o1", "o3", "claude-", "gemini-", "llama-", "qwen-", "mistral-", "command-",
+	} {
+		if strings.HasPrefix(model, prefix) {
+			knownFamily = true
+			break
+		}
+	}
+	if !knownFamily {
+		return "", false
+	}
+
+	useVision := strings.Contains(model, "vision")
+	useReasoner := strings.Contains(model, "reason") ||
+		strings.Contains(model, "reasoner") ||
+		strings.HasPrefix(model, "o1") ||
+		strings.HasPrefix(model, "o3") ||
+		strings.Contains(model, "opus") ||
+		strings.Contains(model, "slow") ||
+		strings.Contains(model, "r1")
+	useSearch := strings.Contains(model, "search")
+
+	switch {
+	case useVision:
+		return "deepseek-v4-vision", true
+	case useReasoner && useSearch:
+		return "deepseek-v4-pro-search", true
+	case useReasoner:
+		return "deepseek-v4-pro", true
+	case useSearch:
+		return "deepseek-v4-flash-search", true
+	default:
+		return "deepseek-v4-flash", true
+	}
+}
--- a/internal/config/paths.go
+++ b/internal/config/paths.go
@@ -30,11 +30,35 @@ func ResolvePath(envKey, defaultRel string) string {
 }

 func ConfigPath() string {
+	if strings.TrimSpace(os.Getenv("DS2API_CONFIG_PATH")) == "" && BaseDir() == "/app" {
+		return containerDefaultConfigPath()
+	}
 	return ResolvePath("DS2API_CONFIG_PATH", "config.json")
 }

-func WASMPath() string {
-	return ResolvePath("DS2API_WASM_PATH", "sha3_wasm_bg.7b9ca65ddd.wasm")
+func containerDefaultConfigPath() string {
+	// Container images run as non-root by default. Only use /data when mounted/provisioned.
+	// Otherwise keep /app/config.json so admin-side save does not fail on MkdirAll("/data").
+	if st, err := os.Stat("/data"); err == nil && st.IsDir() {
+		return "/data/config.json"
+	}
+	return "/app/config.json"
+}
+
+func legacyContainerConfigPath() string {
+	return "/app/config.json"
+}
+
+func shouldTryLegacyContainerConfigPath() bool {
+	return strings.TrimSpace(os.Getenv("DS2API_CONFIG_PATH")) == "" && BaseDir() == "/app"
+}
+
+func RawStreamSampleRoot() string {
+	return ResolvePath("DS2API_RAW_STREAM_SAMPLE_ROOT", "tests/raw_stream_samples")
+}
+
+func ChatHistoryPath() string {
+	return ResolvePath("DS2API_CHAT_HISTORY_PATH", "data/chat_history.json")
 }

 func StaticAdminDir() string {
--- a/internal/config/paths_test.go
+++ b/internal/config/paths_test.go
@@ -0,0 +1,28 @@
+package config
+
+import (
+	"os"
+	"testing"
+)
+
+func TestContainerDefaultConfigPath(t *testing.T) {
+	t.Run("fallback to /app when /data is missing", func(t *testing.T) {
+		// This test environment does not guarantee a writable/mounted /data.
+		// If /data is absent we must keep /app fallback to avoid persistence failures.
+		if _, err := os.Stat("/data"); err == nil {
+			t.Skip("/data exists in this environment; cannot validate missing-/data fallback")
+		}
+		if got := containerDefaultConfigPath(); got != "/app/config.json" {
+			t.Fatalf("containerDefaultConfigPath() = %q, want %q", got, "/app/config.json")
+		}
+	})
+
+	t.Run("prefer /data when /data directory exists", func(t *testing.T) {
+		if _, err := os.Stat("/data"); err != nil {
+			t.Skip("/data does not exist in this environment")
+		}
+		if got := containerDefaultConfigPath(); got != "/data/config.json" {
+			t.Fatalf("containerDefaultConfigPath() = %q, want %q", got, "/data/config.json")
+		}
+	})
+}
--- a/internal/config/store.go
+++ b/internal/config/store.go
@@ -21,49 +21,87 @@ type Store struct {
 }

 func LoadStore() *Store {
-	cfg, fromEnv, err := loadConfig()
+	store, err := loadStore()
 	if err != nil {
 		Logger.Warn("[config] load failed", "error", err)
 	}
-	if len(cfg.Keys) == 0 && len(cfg.Accounts) == 0 {
+	if len(store.cfg.Keys) == 0 && len(store.cfg.Accounts) == 0 {
 		Logger.Warn("[config] empty config loaded")
 	}
-	s := &Store{cfg: cfg, path: ConfigPath(), fromEnv: fromEnv}
-	s.rebuildIndexes()
-	return s
+	store.rebuildIndexes()
+	return store
+}
+
+func LoadStoreWithError() (*Store, error) {
+	store, err := loadStore()
+	if err != nil {
+		return nil, err
+	}
+	store.rebuildIndexes()
+	return store, nil
+}
+
+func loadStore() (*Store, error) {
+	cfg, fromEnv, err := loadConfig()
+	cfg.NormalizeCredentials()
+	if validateErr := ValidateConfig(cfg); validateErr != nil {
+		err = errors.Join(err, validateErr)
+	}
+	return &Store{cfg: cfg, path: ConfigPath(), fromEnv: fromEnv}, err
 }

 func loadConfig() (Config, bool, error) {
 	rawCfg := strings.TrimSpace(os.Getenv("DS2API_CONFIG_JSON"))
-	if rawCfg == "" {
-		rawCfg = strings.TrimSpace(os.Getenv("CONFIG_JSON"))
-	}
 	if rawCfg != "" {
 		cfg, err := parseConfigString(rawCfg)
+		if err != nil {
+			if !IsVercel() && envWritebackEnabled() {
+				if fileCfg, fileErr := loadConfigFromFile(ConfigPath()); fileErr == nil {
+					return fileCfg, false, nil
+				}
+			}
+			return cfg, true, err
+		}
 		cfg.ClearAccountTokens()
 		cfg.DropInvalidAccounts()
+		if IsVercel() || !envWritebackEnabled() {
+			return cfg, true, err
+		}
+		content, fileErr := os.ReadFile(ConfigPath())
+		if fileErr == nil {
+			var fileCfg Config
+			if unmarshalErr := json.Unmarshal(content, &fileCfg); unmarshalErr == nil {
+				fileCfg.DropInvalidAccounts()
+				return fileCfg, false, err
+			}
+		}
+		if errors.Is(fileErr, os.ErrNotExist) {
+			if validateErr := ValidateConfig(cfg); validateErr != nil {
+				return cfg, true, validateErr
+			}
+			if writeErr := writeConfigFile(ConfigPath(), cfg.Clone()); writeErr == nil {
+				return cfg, false, err
+			} else {
+				Logger.Warn("[config] env writeback bootstrap failed", "error", writeErr)
+			}
+		}
 		return cfg, true, err
 	}
-
-	content, err := os.ReadFile(ConfigPath())
+	cfg, err := loadConfigFromFile(ConfigPath())
 	if err != nil {
+		if shouldTryLegacyContainerConfigPath() {
+			legacyPath := legacyContainerConfigPath()
+			if legacyCfg, legacyErr := loadConfigFromFile(legacyPath); legacyErr == nil {
+				Logger.Info("[config] loaded legacy container config path", "path", legacyPath)
+				return legacyCfg, false, nil
+			}
+		}
 		if IsVercel() {
-			// Vercel one-click deploy may start without a writable/present config file.
-			// Keep an in-memory config so users can bootstrap via WebUI then sync env.
+			// Vercel may start without writable/present config; keep in-memory bootstrap config.
 			return Config{}, true, nil
 		}
 		return Config{}, false, err
 	}
-	var cfg Config
-	if err := json.Unmarshal(content, &cfg); err != nil {
-		return Config{}, false, err
-	}
-	cfg.DropInvalidAccounts()
-	if strings.Contains(string(content), `"test_status"`) && !IsVercel() {
-		if b, err := json.MarshalIndent(cfg, "", "  "); err == nil {
-			_ = os.WriteFile(ConfigPath(), b, 0o644)
-		}
-	}
 	if IsVercel() {
 		// Vercel filesystem is ephemeral/read-only for runtime writes; avoid save errors.
 		return cfg, true, nil
@@ -71,6 +109,25 @@ func loadConfig() (Config, bool, error) {
 	return cfg, false, nil
 }

+func loadConfigFromFile(path string) (Config, error) {
+	content, err := os.ReadFile(path)
+	if err != nil {
+		return Config{}, err
+	}
+	var cfg Config
+	if err := json.Unmarshal(content, &cfg); err != nil {
+		return Config{}, err
+	}
+	cfg.NormalizeCredentials()
+	cfg.DropInvalidAccounts()
+	if strings.Contains(string(content), `"test_status"`) && !IsVercel() {
+		if b, err := json.MarshalIndent(cfg, "", "  "); err == nil {
+			_ = os.WriteFile(path, b, 0o644)
+		}
+	}
+	return cfg, nil
+}
+
 func (s *Store) Snapshot() Config {
 	s.mu.RLock()
 	defer s.mu.RUnlock()
@@ -157,6 +214,7 @@ func (s *Store) UpdateAccountToken(identifier, token string) error {
 func (s *Store) Replace(cfg Config) error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
+	cfg.NormalizeCredentials()
 	s.cfg = cfg.Clone()
 	s.rebuildIndexes()
 	return s.saveLocked()
@@ -165,10 +223,13 @@ func (s *Store) Replace(cfg Config) error {
 func (s *Store) Update(mutator func(*Config) error) error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
-	cfg := s.cfg.Clone()
+	base := s.cfg.Clone()
+	cfg := base.Clone()
 	if err := mutator(&cfg); err != nil {
 		return err
 	}
+	cfg.ReconcileCredentials(base)
+	cfg.NormalizeCredentials()
 	s.cfg = cfg
 	s.rebuildIndexes()
 	return s.saveLocked()
@@ -177,7 +238,7 @@ func (s *Store) Update(mutator func(*Config) error) error {
 func (s *Store) Save() error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
-	if s.fromEnv {
+	if s.fromEnv && (IsVercel() || !envWritebackEnabled()) {
 		Logger.Info("[save_config] source from env, skip write")
 		return nil
 	}
@@ -187,11 +248,15 @@ func (s *Store) Save() error {
 	if err != nil {
 		return err
 	}
-	return os.WriteFile(s.path, b, 0o644)
+	if err := writeConfigBytes(s.path, b); err != nil {
+		return err
+	}
+	s.fromEnv = false
+	return nil
 }

 func (s *Store) saveLocked() error {
-	if s.fromEnv {
+	if s.fromEnv && (IsVercel() || !envWritebackEnabled()) {
 		Logger.Info("[save_config] source from env, skip write")
 		return nil
 	}
@@ -201,7 +266,11 @@ func (s *Store) saveLocked() error {
 	if err != nil {
 		return err
 	}
-	return os.WriteFile(s.path, b, 0o644)
+	if err := writeConfigBytes(s.path, b); err != nil {
+		return err
+	}
+	s.fromEnv = false
+	return nil
 }

 func (s *Store) IsEnvBacked() bool {
--- a/internal/config/store_accessors.go
+++ b/internal/config/store_accessors.go
@@ -6,18 +6,6 @@ import (
 	"strings"
 )

-func (s *Store) ClaudeMapping() map[string]string {
-	s.mu.RLock()
-	defer s.mu.RUnlock()
-	if len(s.cfg.ClaudeModelMap) > 0 {
-		return cloneStringMap(s.cfg.ClaudeModelMap)
-	}
-	if len(s.cfg.ClaudeMapping) > 0 {
-		return cloneStringMap(s.cfg.ClaudeMapping)
-	}
-	return map[string]string{"fast": "deepseek-chat", "slow": "deepseek-reasoner"}
-}
-
 func (s *Store) ModelAliases() map[string]string {
 	s.mu.RLock()
 	defer s.mu.RUnlock()
@@ -42,6 +30,15 @@ func (s *Store) CompatWideInputStrictOutput() bool {
 	return *s.cfg.Compat.WideInputStrictOutput
 }

+func (s *Store) CompatStripReferenceMarkers() bool {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	if s.cfg.Compat.StripReferenceMarkers == nil {
+		return true
+	}
+	return *s.cfg.Compat.StripReferenceMarkers
+}
+
 func (s *Store) ToolcallMode() string {
 	return "feature_match"
 }
@@ -65,6 +62,20 @@ func (s *Store) EmbeddingsProvider() string {
 	return strings.TrimSpace(s.cfg.Embeddings.Provider)
 }

+func (s *Store) AutoDeleteMode() string {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	mode := strings.ToLower(strings.TrimSpace(s.cfg.AutoDelete.Mode))
+	switch mode {
+	case "none", "single", "all":
+		return mode
+	}
+	if s.cfg.AutoDelete.Sessions {
+		return "all"
+	}
+	return "none"
+}
+
 func (s *Store) AdminPasswordHash() string {
 	s.mu.RLock()
 	defer s.mu.RUnlock()
@@ -97,13 +108,8 @@ func (s *Store) RuntimeAccountMaxInflight() int {
 	if s.cfg.Runtime.AccountMaxInflight > 0 {
 		return s.cfg.Runtime.AccountMaxInflight
 	}
-	for _, key := range []string{"DS2API_ACCOUNT_MAX_INFLIGHT", "DS2API_ACCOUNT_CONCURRENCY"} {
-		raw := strings.TrimSpace(os.Getenv(key))
-		if raw == "" {
-			continue
-		}
-		n, err := strconv.Atoi(raw)
-		if err == nil && n > 0 {
+	if raw := strings.TrimSpace(os.Getenv("DS2API_ACCOUNT_MAX_INFLIGHT")); raw != "" {
+		if n, err := strconv.Atoi(raw); err == nil && n > 0 {
 			return n
 		}
 	}
@@ -116,13 +122,8 @@ func (s *Store) RuntimeAccountMaxQueue(defaultSize int) int {
 	if s.cfg.Runtime.AccountMaxQueue > 0 {
 		return s.cfg.Runtime.AccountMaxQueue
 	}
-	for _, key := range []string{"DS2API_ACCOUNT_MAX_QUEUE", "DS2API_ACCOUNT_QUEUE_SIZE"} {
-		raw := strings.TrimSpace(os.Getenv(key))
-		if raw == "" {
-			continue
-		}
-		n, err := strconv.Atoi(raw)
-		if err == nil && n >= 0 {
+	if raw := strings.TrimSpace(os.Getenv("DS2API_ACCOUNT_MAX_QUEUE")); raw != "" {
+		if n, err := strconv.Atoi(raw); err == nil && n >= 0 {
 			return n
 		}
 	}
@@ -138,13 +139,8 @@ func (s *Store) RuntimeGlobalMaxInflight(defaultSize int) int {
 	if s.cfg.Runtime.GlobalMaxInflight > 0 {
 		return s.cfg.Runtime.GlobalMaxInflight
 	}
-	for _, key := range []string{"DS2API_GLOBAL_MAX_INFLIGHT", "DS2API_MAX_INFLIGHT"} {
-		raw := strings.TrimSpace(os.Getenv(key))
-		if raw == "" {
-			continue
-		}
-		n, err := strconv.Atoi(raw)
-		if err == nil && n > 0 {
+	if raw := strings.TrimSpace(os.Getenv("DS2API_GLOBAL_MAX_INFLIGHT")); raw != "" {
+		if n, err := strconv.Atoi(raw); err == nil && n > 0 {
 			return n
 		}
 	}
@@ -164,7 +160,43 @@ func (s *Store) RuntimeTokenRefreshIntervalHours() int {
 }

 func (s *Store) AutoDeleteSessions() bool {
+	return s.AutoDeleteMode() != "none"
+}
+
+func (s *Store) HistorySplitEnabled() bool {
+	return false
+}
+
+func (s *Store) HistorySplitTriggerAfterTurns() int {
+	return 1
+}
+
+func (s *Store) CurrentInputFileEnabled() bool {
 	s.mu.RLock()
 	defer s.mu.RUnlock()
-	return s.cfg.AutoDelete.Sessions
+	if s.cfg.CurrentInputFile.Enabled == nil {
+		return true
+	}
+	return *s.cfg.CurrentInputFile.Enabled
+}
+
+func (s *Store) CurrentInputFileMinChars() int {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	return s.cfg.CurrentInputFile.MinChars
+}
+
+func (s *Store) ThinkingInjectionEnabled() bool {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	if s.cfg.ThinkingInjection.Enabled == nil {
+		return true
+	}
+	return *s.cfg.ThinkingInjection.Enabled
+}
+
+func (s *Store) ThinkingInjectionPrompt() string {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	return strings.TrimSpace(s.cfg.ThinkingInjection.Prompt)
 }
--- a/internal/config/store_accessors_test.go
+++ b/internal/config/store_accessors_test.go
@@ -0,0 +1,67 @@
+package config
+
+import "testing"
+
+func TestStoreHistorySplitAccessors(t *testing.T) {
+	enabled := true
+	turns := 3
+	store := &Store{cfg: Config{HistorySplit: HistorySplitConfig{
+		Enabled:           &enabled,
+		TriggerAfterTurns: &turns,
+	}}}
+	if store.HistorySplitEnabled() {
+		t.Fatal("expected history split to stay disabled")
+	}
+	if got := store.HistorySplitTriggerAfterTurns(); got != 1 {
+		t.Fatalf("history split trigger_after_turns=%d want=1", got)
+	}
+}
+
+func TestStoreCurrentInputFileAccessors(t *testing.T) {
+	store := &Store{cfg: Config{}}
+	if !store.CurrentInputFileEnabled() {
+		t.Fatal("expected current input file enabled by default")
+	}
+	if got := store.CurrentInputFileMinChars(); got != 0 {
+		t.Fatalf("default current input file min_chars=%d want=0", got)
+	}
+
+	enabled := false
+	store.cfg.CurrentInputFile = CurrentInputFileConfig{Enabled: &enabled, MinChars: 12345}
+	if store.CurrentInputFileEnabled() {
+		t.Fatal("expected current input file disabled")
+	}
+
+	enabled = true
+	store.cfg.CurrentInputFile.Enabled = &enabled
+	if !store.CurrentInputFileEnabled() {
+		t.Fatal("expected current input file enabled")
+	}
+	if got := store.CurrentInputFileMinChars(); got != 12345 {
+		t.Fatalf("current input file min_chars=%d want=12345", got)
+	}
+
+	historyEnabled := true
+	store.cfg.HistorySplit.Enabled = &historyEnabled
+	if !store.CurrentInputFileEnabled() {
+		t.Fatal("expected history split config to not suppress current input file mode")
+	}
+}
+
+func TestStoreThinkingInjectionAccessors(t *testing.T) {
+	store := &Store{cfg: Config{}}
+	if !store.ThinkingInjectionEnabled() {
+		t.Fatal("expected thinking injection enabled by default")
+	}
+
+	disabled := false
+	store.cfg.ThinkingInjection.Enabled = &disabled
+	if store.ThinkingInjectionEnabled() {
+		t.Fatal("expected thinking injection disabled by explicit config")
+	}
+
+	store.cfg.ThinkingInjection.Prompt = "  custom thinking prompt  "
+	if got := store.ThinkingInjectionPrompt(); got != "custom thinking prompt" {
+		t.Fatalf("thinking injection prompt=%q want custom thinking prompt", got)
+	}
+}
--- a/internal/config/store_env_writeback.go
+++ b/internal/config/store_env_writeback.go
@@ -0,0 +1,48 @@
+package config
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+func envWritebackEnabled() bool {
+	v := strings.ToLower(strings.TrimSpace(os.Getenv("DS2API_ENV_WRITEBACK")))
+	return v == "1" || v == "true" || v == "yes" || v == "on"
+}
+
+func (s *Store) IsEnvWritebackEnabled() bool {
+	return envWritebackEnabled()
+}
+
+func (s *Store) HasEnvConfigSource() bool {
+	rawCfg := strings.TrimSpace(os.Getenv("DS2API_CONFIG_JSON"))
+	return rawCfg != ""
+}
+
+func (s *Store) ConfigPath() string {
+	return s.path
+}
+
+func writeConfigFile(path string, cfg Config) error {
+	persistCfg := cfg.Clone()
+	persistCfg.ClearAccountTokens()
+	b, err := json.MarshalIndent(persistCfg, "", "  ")
+	if err != nil {
+		return err
+	}
+	return writeConfigBytes(path, b)
+}
+
+func writeConfigBytes(path string, b []byte) error {
+	dir := filepath.Dir(path)
+	if dir == "." || dir == "" {
+		return os.WriteFile(path, b, 0o644)
+	}
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return fmt.Errorf("mkdir config dir: %w", err)
+	}
+	return os.WriteFile(path, b, 0o644)
+}
--- a/internal/config/validation.go
+++ b/internal/config/validation.go
@@ -0,0 +1,153 @@
+package config
+
+import (
+	"fmt"
+	"strings"
+)
+
+func ValidateConfig(c Config) error {
+	if err := ValidateProxyConfig(c.Proxies); err != nil {
+		return err
+	}
+	if err := ValidateAdminConfig(c.Admin); err != nil {
+		return err
+	}
+	if err := ValidateRuntimeConfig(c.Runtime); err != nil {
+		return err
+	}
+	if err := ValidateResponsesConfig(c.Responses); err != nil {
+		return err
+	}
+	if err := ValidateEmbeddingsConfig(c.Embeddings); err != nil {
+		return err
+	}
+	if err := ValidateAutoDeleteConfig(c.AutoDelete); err != nil {
+		return err
+	}
+	if err := ValidateCurrentInputFileConfig(c.CurrentInputFile); err != nil {
+		return err
+	}
+	if err := ValidateAccountProxyReferences(c.Accounts, c.Proxies); err != nil {
+		return err
+	}
+	return nil
+}
+
+func ValidateProxyConfig(proxies []Proxy) error {
+	seen := make(map[string]struct{}, len(proxies))
+	for _, proxy := range proxies {
+		proxy = NormalizeProxy(proxy)
+		if err := ValidateTrimmedString("proxies.id", proxy.ID, true); err != nil {
+			return err
+		}
+		switch proxy.Type {
+		case "socks5", "socks5h":
+		default:
+			return fmt.Errorf("proxies.type must be one of socks5, socks5h")
+		}
+		if err := ValidateTrimmedString("proxies.host", proxy.Host, true); err != nil {
+			return err
+		}
+		if err := ValidateIntRange("proxies.port", proxy.Port, 1, 65535, true); err != nil {
+			return err
+		}
+		if _, ok := seen[proxy.ID]; ok {
+			return fmt.Errorf("duplicate proxy id: %s", proxy.ID)
+		}
+		seen[proxy.ID] = struct{}{}
+	}
+	return nil
+}
+
+func ValidateAccountProxyReferences(accounts []Account, proxies []Proxy) error {
+	if len(accounts) == 0 {
+		return nil
+	}
+	ids := make(map[string]struct{}, len(proxies))
+	for _, proxy := range proxies {
+		ids[NormalizeProxy(proxy).ID] = struct{}{}
+	}
+	for _, acc := range accounts {
+		proxyID := strings.TrimSpace(acc.ProxyID)
+		if proxyID == "" {
+			continue
+		}
+		if _, ok := ids[proxyID]; !ok {
+			return fmt.Errorf("account proxy_id references unknown proxy: %s", proxyID)
+		}
+	}
+	return nil
+}
+
+func ValidateAdminConfig(admin AdminConfig) error {
+	return ValidateIntRange("admin.jwt_expire_hours", admin.JWTExpireHours, 1, 720, false)
+}
+
+func ValidateRuntimeConfig(runtime RuntimeConfig) error {
+	if err := ValidateIntRange("runtime.account_max_inflight", runtime.AccountMaxInflight, 1, 256, false); err != nil {
+		return err
+	}
+	if err := ValidateIntRange("runtime.account_max_queue", runtime.AccountMaxQueue, 1, 200000, false); err != nil {
+		return err
+	}
+	if err := ValidateIntRange("runtime.global_max_inflight", runtime.GlobalMaxInflight, 1, 200000, false); err != nil {
+		return err
+	}
+	if err := ValidateIntRange("runtime.token_refresh_interval_hours", runtime.TokenRefreshIntervalHours, 1, 720, false); err != nil {
+		return err
+	}
+	if runtime.AccountMaxInflight > 0 && runtime.GlobalMaxInflight > 0 && runtime.GlobalMaxInflight < runtime.AccountMaxInflight {
+		return fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight")
+	}
+	return nil
+}
+
+func ValidateResponsesConfig(responses ResponsesConfig) error {
+	return ValidateIntRange("responses.store_ttl_seconds", responses.StoreTTLSeconds, 30, 86400, false)
+}
+
+func ValidateEmbeddingsConfig(embeddings EmbeddingsConfig) error {
+	return ValidateTrimmedString("embeddings.provider", embeddings.Provider, false)
+}
+
+func ValidateAutoDeleteConfig(autoDelete AutoDeleteConfig) error {
+	return ValidateAutoDeleteMode(autoDelete.Mode)
+}
+
+func ValidateCurrentInputFileConfig(currentInputFile CurrentInputFileConfig) error {
+	if currentInputFile.MinChars != 0 {
+		return ValidateIntRange("current_input_file.min_chars", currentInputFile.MinChars, 1, 100000000, true)
+	}
+	return nil
+}
+
+func ValidateIntRange(name string, value, min, max int, required bool) error {
+	if value == 0 && !required {
+		return nil
+	}
+	if value < min || value > max {
+		return fmt.Errorf("%s must be between %d and %d", name, min, max)
+	}
+	return nil
+}
+
+func ValidateTrimmedString(name, value string, required bool) error {
+	trimmed := strings.TrimSpace(value)
+	if trimmed == "" {
+		if !required && value == "" {
+			return nil
+		}
+		return fmt.Errorf("%s cannot be empty", name)
+	}
+	return nil
+}
+
+func ValidateAutoDeleteMode(mode string) error {
+	mode = strings.ToLower(strings.TrimSpace(mode))
+	switch mode {
+	case "", "none", "single", "all":
+		return nil
+	default:
+		return fmt.Errorf("auto_delete.mode must be one of none, single, all")
+	}
+}
--- a/internal/config/validation_test.go
+++ b/internal/config/validation_test.go
@@ -0,0 +1,66 @@
+package config
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestValidateConfigRejectsInvalidValues(t *testing.T) {
+	tests := []struct {
+		name string
+		cfg  Config
+		want string
+	}{
+		{
+			name: "admin",
+			cfg:  Config{Admin: AdminConfig{JWTExpireHours: 721}},
+			want: "admin.jwt_expire_hours",
+		},
+		{
+			name: "runtime relation",
+			cfg: Config{Runtime: RuntimeConfig{
+				AccountMaxInflight: 8,
+				GlobalMaxInflight:  4,
+			}},
+			want: "runtime.global_max_inflight must be >= runtime.account_max_inflight",
+		},
+		{
+			name: "responses",
+			cfg:  Config{Responses: ResponsesConfig{StoreTTLSeconds: 10}},
+			want: "responses.store_ttl_seconds",
+		},
+		{
+			name: "embeddings",
+			cfg:  Config{Embeddings: EmbeddingsConfig{Provider: "   "}},
+			want: "embeddings.provider",
+		},
+		{
+			name: "auto delete",
+			cfg:  Config{AutoDelete: AutoDeleteConfig{Mode: "maybe"}},
+			want: "auto_delete.mode",
+		},
+		{
+			name: "current input file",
+			cfg:  Config{CurrentInputFile: CurrentInputFileConfig{MinChars: -1}},
+			want: "current_input_file.min_chars",
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			err := ValidateConfig(tc.cfg)
+			if err == nil {
+				t.Fatal("expected validation error")
+			}
+			if !strings.Contains(err.Error(), tc.want) {
+				t.Fatalf("expected %q in error, got %v", tc.want, err)
+			}
+		})
+	}
+}
+
+func TestValidateConfigAcceptsLegacyAutoDeleteSessions(t *testing.T) {
+	if err := ValidateConfig(Config{AutoDelete: AutoDeleteConfig{Sessions: true}}); err != nil {
+		t.Fatalf("expected legacy auto_delete.sessions config to remain valid, got %v", err)
+	}
+}
--- a/internal/deepseek/assets/sha3_wasm_bg.7b9ca65ddd.wasm
+++ b/internal/deepseek/assets/sha3_wasm_bg.7b9ca65ddd.wasm
--- a/internal/deepseek/client/client_auth.go
+++ b/internal/deepseek/client/client_auth.go
@@ -1,7 +1,8 @@
-package deepseek
+package client

 import (
 	"context"
+	dsprotocol "ds2api/internal/deepseek/protocol"
 	"errors"
 	"fmt"
 	"net/http"
@@ -13,6 +14,7 @@ import (
 )

 func (c *Client) Login(ctx context.Context, acc config.Account) (string, error) {
+	clients := c.requestClientsForAccount(acc)
 	payload := map[string]any{
 		"password":  strings.TrimSpace(acc.Password),
 		"device_id": "deepseek_to_api",
@@ -27,7 +29,7 @@ func (c *Client) Login(ctx context.Context, acc config.Account) (string, error)
 	} else {
 		return "", errors.New("missing email/mobile")
 	}
-	resp, err := c.postJSON(ctx, c.regular, DeepSeekLoginURL, BaseHeaders, payload)
+	resp, err := c.postJSON(ctx, clients.regular, clients.fallback, dsprotocol.DeepSeekLoginURL, dsprotocol.BaseHeaders, payload)
 	if err != nil {
 		return "", err
 	}
@@ -52,11 +54,12 @@ func (c *Client) CreateSession(ctx context.Context, a *auth.RequestAuth, maxAtte
 	if maxAttempts <= 0 {
 		maxAttempts = c.maxRetries
 	}
+	clients := c.requestClientsForAuth(ctx, a)
 	attempts := 0
 	refreshed := false
 	for attempts < maxAttempts {
 		headers := c.authHeaders(a.DeepSeekToken)
-		resp, status, err := c.postJSONWithStatus(ctx, c.regular, DeepSeekCreateSessionURL, headers, map[string]any{"agent": "chat"})
+		resp, status, err := c.postJSONWithStatus(ctx, clients.regular, clients.fallback, dsprotocol.DeepSeekCreateSessionURL, headers, map[string]any{"agent": "chat"})
 		if err != nil {
 			config.Logger.Warn("[create_session] request error", "error", err, "account", a.AccountID)
 			attempts++
@@ -64,9 +67,7 @@ func (c *Client) CreateSession(ctx context.Context, a *auth.RequestAuth, maxAtte
 		}
 		code, bizCode, msg, bizMsg := extractResponseStatus(resp)
 		if status == http.StatusOK && code == 0 && bizCode == 0 {
-			data, _ := resp["data"].(map[string]any)
-			bizData, _ := data["biz_data"].(map[string]any)
-			sessionID, _ := bizData["id"].(string)
+			sessionID := extractCreateSessionID(resp)
 			if sessionID != "" {
 				return sessionID, nil
 			}
@@ -91,16 +92,29 @@ func (c *Client) CreateSession(ctx context.Context, a *auth.RequestAuth, maxAtte
 }

 func (c *Client) GetPow(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error) {
+	return c.GetPowForTarget(ctx, a, dsprotocol.DeepSeekCompletionTargetPath, maxAttempts)
+}
+
+func (c *Client) GetPowForTarget(ctx context.Context, a *auth.RequestAuth, targetPath string, maxAttempts int) (string, error) {
 	if maxAttempts <= 0 {
 		maxAttempts = c.maxRetries
 	}
+	targetPath = strings.TrimSpace(targetPath)
+	if targetPath == "" {
+		targetPath = dsprotocol.DeepSeekCompletionTargetPath
+	}
+	clients := c.requestClientsForAuth(ctx, a)
 	attempts := 0
 	refreshed := false
+	lastFailureKind := FailureUnknown
+	lastFailureMessage := ""
 	for attempts < maxAttempts {
 		headers := c.authHeaders(a.DeepSeekToken)
-		resp, status, err := c.postJSONWithStatus(ctx, c.regular, DeepSeekCreatePowURL, headers, map[string]any{"target_path": "/api/v0/chat/completion"})
+		resp, status, err := c.postJSONWithStatus(ctx, clients.regular, clients.fallback, dsprotocol.DeepSeekCreatePowURL, headers, map[string]any{"target_path": targetPath})
 		if err != nil {
-			config.Logger.Warn("[get_pow] request error", "error", err, "account", a.AccountID)
+			config.Logger.Warn("[get_pow] request error", "error", err, "account", a.AccountID, "target_path", targetPath)
+			lastFailureKind = FailureUnknown
+			lastFailureMessage = err.Error()
 			attempts++
 			continue
 		}
@@ -109,14 +123,20 @@ func (c *Client) GetPow(ctx context.Context, a *auth.RequestAuth, maxAttempts in
 			data, _ := resp["data"].(map[string]any)
 			bizData, _ := data["biz_data"].(map[string]any)
 			challenge, _ := bizData["challenge"].(map[string]any)
-			answer, err := c.powSolver.Compute(ctx, challenge)
+			answer, err := ComputePow(ctx, challenge)
 			if err != nil {
 				attempts++
 				continue
 			}
 			return BuildPowHeader(challenge, answer)
 		}
-		config.Logger.Warn("[get_pow] failed", "status", status, "code", code, "biz_code", bizCode, "msg", msg, "biz_msg", bizMsg, "use_config_token", a.UseConfigToken, "account", a.AccountID)
+		config.Logger.Warn("[get_pow] failed", "status", status, "code", code, "biz_code", bizCode, "msg", msg, "biz_msg", bizMsg, "use_config_token", a.UseConfigToken, "account", a.AccountID, "target_path", targetPath)
+		lastFailureMessage = failureMessage(msg, bizMsg, "get pow failed")
+		if isTokenInvalid(status, code, bizCode, msg, bizMsg) || isAuthIndicativeBizFailure(msg, bizMsg) {
+			lastFailureKind = authFailureKind(a.UseConfigToken)
+		} else {
+			lastFailureKind = FailureUnknown
+		}
 		if a.UseConfigToken {
 			if !refreshed && shouldAttemptRefresh(status, code, bizCode, msg, bizMsg) {
 				if c.Auth.RefreshToken(ctx, a) {
@@ -132,12 +152,15 @@ func (c *Client) GetPow(ctx context.Context, a *auth.RequestAuth, maxAttempts in
 		}
 		attempts++
 	}
+	if lastFailureKind != FailureUnknown {
+		return "", &RequestFailure{Op: "get pow", Kind: lastFailureKind, Message: lastFailureMessage}
+	}
 	return "", errors.New("get pow failed")
 }

 func (c *Client) authHeaders(token string) map[string]string {
-	headers := make(map[string]string, len(BaseHeaders)+1)
-	for k, v := range BaseHeaders {
+	headers := make(map[string]string, len(dsprotocol.BaseHeaders)+1)
+	for k, v := range dsprotocol.BaseHeaders {
 		headers[k] = v
 	}
 	headers["authorization"] = "Bearer " + token
@@ -201,6 +224,39 @@ func isAuthIndicativeBizFailure(msg string, bizMsg string) bool {
 	return false
 }

+func authFailureKind(useConfigToken bool) FailureKind {
+	if useConfigToken {
+		return FailureManagedUnauthorized
+	}
+	return FailureDirectUnauthorized
+}
+
+func failureMessage(msg string, bizMsg string, fallback string) string {
+	if trimmed := strings.TrimSpace(bizMsg); trimmed != "" {
+		return trimmed
+	}
+	if trimmed := strings.TrimSpace(msg); trimmed != "" {
+		return trimmed
+	}
+	return strings.TrimSpace(fallback)
+}
+
+// DeepSeek has returned create-session ids in both biz_data.id and
+// biz_data.chat_session.id across observed response variants; accept either.
+func extractCreateSessionID(resp map[string]any) string {
+	data, _ := resp["data"].(map[string]any)
+	bizData, _ := data["biz_data"].(map[string]any)
+	if sessionID, _ := bizData["id"].(string); strings.TrimSpace(sessionID) != "" {
+		return strings.TrimSpace(sessionID)
+	}
+	if chatSession, ok := bizData["chat_session"].(map[string]any); ok {
+		if sessionID, _ := chatSession["id"].(string); strings.TrimSpace(sessionID) != "" {
+			return strings.TrimSpace(sessionID)
+		}
+	}
+	return ""
+}
+
 func extractResponseStatus(resp map[string]any) (code int, bizCode int, msg string, bizMsg string) {
 	code = intFrom(resp["code"])
 	msg, _ = resp["msg"].(string)
--- a/internal/deepseek/client/client_auth_mobile_test.go
+++ b/internal/deepseek/client/client_auth_mobile_test.go
@@ -1,4 +1,4 @@
-package deepseek
+package client

 import "testing"

--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .5.1
 .3.0