Merge pull request #215 from CJackHwang/dev

fix: ignore INCOMPLETE status messages in SSE stream parsing to prevent stream interruption
chore(webui): migrate vite to v8 toolchain
2026-05-03 16:05:26 +08:00 · 2026-04-06 00:31:18 +08:00 · 2026-04-06 00:28:36 +08:00 · 2026-04-06 00:13:22 +08:00 · 2026-04-06 00:00:30 +08:00 · 2026-04-05 23:56:19 +08:00
457 changed files with 57496 additions and 274327 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -10,7 +10,9 @@ __pycache__
 .Python
 build/
 develop-eggs/
-dist/
+dist/*
+!dist/docker-input/
+!dist/docker-input/*.tar.gz
 downloads/
 eggs/
 .eggs/
--- a/.env.example
+++ b/.env.example
@@ -1,74 +1,20 @@
-# DS2API 环境变量配置模板
-# 复制此文件为 .env 并根据需要修改
-# 最后更新：2026-02
-
-# ===============================================================
-#                        核心配置
-# ===============================================================
-
-# ----- 服务配置 -----
-# 服务端口（默认 5001）
+# DS2API runtime
+# Runtime listen port inside the app/container
 PORT=5001
-
-# 服务监听地址
-HOST=0.0.0.0
-
-# 日志级别 (DEBUG, INFO, WARNING, ERROR)
+# Docker Compose host port (compose only; container still listens on PORT)
+DS2API_HOST_PORT=6011
 LOG_LEVEL=INFO

+# Admin authentication
+DS2API_ADMIN_KEY=change-me

-# ===============================================================
-#                    数据配置（三选一）
-# ===============================================================
+# Config loading (choose one)
+# 1) file-based config
+DS2API_CONFIG_PATH=/app/config.json
+# 2) inline JSON or Base64 JSON
+# DS2API_CONFIG_JSON=
+# 3) legacy compatibility alias
+# CONFIG_JSON=

-# 方式1: JSON 字符串（适合简单配置）
-# DS2API_CONFIG_JSON={"keys":["your-api-key"],"accounts":[{"email":"user@example.com","password":"xxx","token":""}]}
-
-# 方式2: Base64 编码的 JSON（推荐用于 Vercel，避免特殊字符转义问题）
-# 生成方式: echo '{"keys":["your-api-key"],"accounts":[...]}' | base64
-# DS2API_CONFIG_JSON=eyJrZXlzIjpbInlvdXItYXBpLWtleSJdLCJhY2NvdW50cyI6W3siZW1haWwiOiJ1c2VyQGV4YW1wbGUuY29tIiwicGFzc3dvcmQiOiJ4eHgiLCJ0b2tlbiI6IiJ9XX0=
-
-# 方式3: 配置文件路径（本地开发推荐）
-# DS2API_CONFIG_PATH=config.json
-
-
-# ===============================================================
-#                    管理界面配置
-# ===============================================================
-
-# Admin API 密钥（Vercel 部署必填！）
-# 用于保护 WebUI 管理界面，首次访问 /admin 时需要输入此密钥登录
-DS2API_ADMIN_KEY=your-admin-secret-key
-
-# JWT Token 过期时间（秒，默认 86400 = 24小时）
-# DS2API_SESSION_EXPIRE=86400
-
-
-# ===============================================================
-#                    Vercel 集成（可选）
-# ===============================================================
-
-# Vercel API Token
-# 获取方式: https://vercel.com/account/tokens
-# VERCEL_TOKEN=your-vercel-token
-
-# Vercel Project ID
-# 获取方式: Vercel 控制台 -> 项目设置 -> General -> Project ID
-# VERCEL_PROJECT_ID=prj_xxxxxxxxxxxx
-
-# Vercel Team ID（个人项目无需填写，团队项目才需要）
-# VERCEL_TEAM_ID=
-
-
-# ===============================================================
-#                    高级配置（可选）
-# ===============================================================
-
-# Tokenizer 目录（留空使用项目根目录）
-# DS2API_TOKENIZER_DIR=
-
-# 模板目录
-# DS2API_TEMPLATES_DIR=templates
-
-# WASM 文件路径（PoW 计算用）
-# DS2API_WASM_PATH=sha3_wasm_bg.7b9ca65ddd.wasm
+# Optional: static admin assets path
+# DS2API_STATIC_ADMIN_DIR=/app/static/admin
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,24 +1,20 @@
-#### 💻 变更类型 | Change Type
-
-<!-- For change type, change [ ] to [x]. -->
-
- [ ] ✨ feat
- [ ] 🐛 fix
- [ ] ♻️ refactor
- [ ] 💄 style
- [ ] 👷 build
- [ ] ⚡️ perf
- [ ] 📝 docs
- [ ] 🔨 chore
-
-#### 🔀 变更说明 | Description of Change
-
-<!-- Thank you for your Pull Request. Please provide a description above. -->
-
-#### 📝 补充信息 | Additional Information
-
-<!-- Add any other context about the Pull Request here. -->
-
---
-
-> 💡 **提示**：如果修改了 `webui/` 目录下的文件，PR 合并后 CI 会自动构建并提交产物，无需手动构建。
+#### 💻 变更类型 | Change Type
+
+<!-- For change type, change [ ] to [x]. -->
+
+- [ ] ✨ feat
+- [ ] 🐛 fix
+- [ ] ♻️ refactor
+- [ ] 💄 style
+- [ ] 👷 build
+- [ ] ⚡️ perf
+- [ ] 📝 docs
+- [ ] 🔨 chore
+
+#### 🔀 变更说明 | Description of Change
+
+<!-- Thank you for your Pull Request. Please provide a description above. -->
+
+#### 📝 补充信息 | Additional Information
+
+<!-- Add any other context about the Pull Request here. -->
--- a/.github/workflows/build-webui.yml
+++ b/.github/workflows/build-webui.yml
@@ -1,76 +0,0 @@
-# 自动构建 WebUI 并提交构建产物
-# 触发条件：webui 目录下的文件变更
-
-name: Build WebUI
-
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - 'webui/**'
-      - '.github/workflows/build-webui.yml'
-  pull_request:
-    branches:
-      - main
-    paths:
-      - 'webui/**'
-  # 允许手动触发
-  workflow_dispatch:
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    
-    # 只在主仓库运行，避免 fork 仓库运行
-    if: github.repository == 'CJackHwang/ds2api'
-    
-    permissions:
-      contents: write
-    
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-      
-      - name: Setup Node.js
-        uses: actions/setup-node@v4
-        with:
-          node-version: '20'
-          cache: 'npm'
-          cache-dependency-path: webui/package-lock.json
-      
-      - name: Install dependencies
-        working-directory: webui
-        run: npm ci
-      
-      - name: Build WebUI
-        working-directory: webui
-        run: npm run build
-      
-      - name: Check for changes
-        id: check_changes
-        run: |
-          git add static/admin
-          if git diff --staged --quiet; then
-            echo "changed=false" >> $GITHUB_OUTPUT
-          else
-            echo "changed=true" >> $GITHUB_OUTPUT
-          fi
-      
-      - name: Commit and push changes
-        if: steps.check_changes.outputs.changed == 'true' && github.event_name == 'push'
-        run: |
-          git config --local user.email "github-actions[bot]@users.noreply.github.com"
-          git config --local user.name "github-actions[bot]"
-          git commit -m "chore: auto-build WebUI [skip ci]"
-          git push
-      
-      - name: Upload build artifacts (for PR review)
-        if: github.event_name == 'pull_request'
-        uses: actions/upload-artifact@v4
-        with:
-          name: webui-build
-          path: static/admin
-          retention-days: 7
--- a/.github/workflows/quality-gates.yml
+++ b/.github/workflows/quality-gates.yml
@@ -0,0 +1,40 @@
+name: Quality Gates
+
+on:
+  pull_request:
+  push:
+    branches:
+      - dev
+
+permissions:
+  contents: read
+
+jobs:
+  quality-gates:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: "1.26.x"
+
+      - name: Setup Node
+        uses: actions/setup-node@v4
+        with:
+          node-version: "24"
+          cache: "npm"
+          cache-dependency-path: webui/package-lock.json
+
+      - name: Refactor Line Gate
+        run: ./tests/scripts/check-refactor-line-gate.sh
+
+      - name: Unit Gates (Go + Node)
+        run: ./tests/scripts/run-unit-all.sh
+
+      - name: WebUI Build Gate
+        run: |
+          npm ci --prefix webui
+          npm run build --prefix webui
--- a/.github/workflows/release-artifacts.yml
+++ b/.github/workflows/release-artifacts.yml
@@ -0,0 +1,207 @@
+name: Release Artifacts
+
+on:
+  release:
+    types:
+      - published
+  workflow_dispatch:
+    inputs:
+      release_tag:
+        description: "Release tag to build/publish (e.g. v2.1.6)"
+        required: true
+        type: string
+
+permissions:
+  contents: write
+  packages: write
+
+jobs:
+  build-and-upload:
+    runs-on: ubuntu-latest
+    env:
+      RELEASE_TAG: ${{ github.event.release.tag_name || github.event.inputs.release_tag }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: "1.26.x"
+
+      - name: Setup Node
+        uses: actions/setup-node@v4
+        with:
+          node-version: "24"
+          cache: "npm"
+          cache-dependency-path: webui/package-lock.json
+
+      - name: Release Blocking Gates
+        run: |
+          ./tests/scripts/check-stage6-manual-smoke.sh
+          ./tests/scripts/check-refactor-line-gate.sh
+          ./tests/scripts/run-unit-all.sh
+
+      - name: Build WebUI
+        run: |
+          npm ci --prefix webui
+          npm run build --prefix webui
+
+      - name: Build Multi-Platform Archives
+        run: |
+          set -euo pipefail
+          TAG="${RELEASE_TAG}"
+          BUILD_VERSION="${TAG}"
+          if [ -z "${BUILD_VERSION}" ] && [ -f VERSION ]; then
+            BUILD_VERSION="$(cat VERSION | tr -d '[:space:]')"
+          fi
+          mkdir -p dist
+
+          targets=(
+            "linux/amd64"
+            "linux/arm64"
+            "darwin/amd64"
+            "darwin/arm64"
+            "windows/amd64"
+          )
+
+          for target in "${targets[@]}"; do
+            GOOS="${target%/*}"
+            GOARCH="${target#*/}"
+            PKG="ds2api_${TAG}_${GOOS}_${GOARCH}"
+            STAGE="dist/${PKG}"
+            BIN="ds2api"
+            if [ "${GOOS}" = "windows" ]; then
+              BIN="ds2api.exe"
+            fi
+
+            mkdir -p "${STAGE}/static"
+            CGO_ENABLED=0 GOOS="${GOOS}" GOARCH="${GOARCH}" \
+              go build -trimpath -ldflags="-s -w -X ds2api/internal/version.BuildVersion=${BUILD_VERSION}" -o "${STAGE}/${BIN}" ./cmd/ds2api
+
+            cp config.example.json .env.example internal/deepseek/assets/sha3_wasm_bg.7b9ca65ddd.wasm LICENSE README.MD README.en.md "${STAGE}/"
+            cp -R static/admin "${STAGE}/static/admin"
+
+            if [ "${GOOS}" = "windows" ]; then
+              (cd dist && zip -rq "${PKG}.zip" "${PKG}")
+            else
+              tar -C dist -czf "dist/${PKG}.tar.gz" "${PKG}"
+            fi
+
+            rm -rf "${STAGE}"
+          done
+
+      - name: Prepare Docker release inputs
+        run: |
+          set -euo pipefail
+          TAG="${RELEASE_TAG}"
+          mkdir -p dist/docker-input
+          cp "dist/ds2api_${TAG}_linux_amd64.tar.gz" "dist/docker-input/linux_amd64.tar.gz"
+          cp "dist/ds2api_${TAG}_linux_arm64.tar.gz" "dist/docker-input/linux_arm64.tar.gz"
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Wait for GHCR endpoint
+        run: |
+          set -euo pipefail
+          for i in {1..6}; do
+            code="$(curl -sS -o /dev/null -w '%{http_code}' --max-time 15 https://ghcr.io/v2/ || true)"
+            if [ "${code}" = "200" ] || [ "${code}" = "401" ] || [ "${code}" = "405" ]; then
+              exit 0
+            fi
+            sleep "$((i * 10))"
+          done
+          echo "GHCR endpoint is unreachable after multiple retries (last status: ${code:-unknown})." >&2
+          exit 1
+
+      - name: Log in to GHCR (with retry)
+        run: |
+          set -euo pipefail
+          for i in {1..6}; do
+            if echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin; then
+              exit 0
+            fi
+            sleep "$((i * 10))"
+          done
+          echo "Failed to login to GHCR after multiple retries." >&2
+          exit 1
+
+      - name: Extract Docker metadata
+        id: meta_release
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            ghcr.io/${{ github.repository }}
+          tags: |
+            type=raw,value=${{ env.RELEASE_TAG }}
+            type=raw,value=latest
+
+      - name: Build and Push Docker Image
+        uses: docker/build-push-action@v6
+        env:
+          DOCKER_BUILD_RECORD_UPLOAD: "false"
+          DOCKER_BUILD_SUMMARY: "false"
+        with:
+          context: .
+          file: ./Dockerfile
+          target: runtime-from-dist
+          push: true
+          platforms: linux/amd64,linux/arm64
+          tags: ${{ steps.meta_release.outputs.tags }}
+          labels: ${{ steps.meta_release.outputs.labels }}
+
+      - name: Export Docker image archives for release assets
+        run: |
+          set -euo pipefail
+          TAG="${RELEASE_TAG}"
+
+          docker buildx build \
+            --platform linux/amd64 \
+            --target runtime-from-dist \
+            --output type=docker,dest="dist/ds2api_${TAG}_docker_linux_amd64.tar" \
+            .
+
+          docker buildx build \
+            --platform linux/arm64 \
+            --target runtime-from-dist \
+            --output type=docker,dest="dist/ds2api_${TAG}_docker_linux_arm64.tar" \
+            .
+
+          gzip -f "dist/ds2api_${TAG}_docker_linux_amd64.tar"
+          gzip -f "dist/ds2api_${TAG}_docker_linux_arm64.tar"
+
+      - name: Generate checksums
+        run: |
+          set -euo pipefail
+          (cd dist && sha256sum *.tar.gz *.zip > sha256sums.txt)
+
+      - name: Validate release tag
+        run: |
+          set -euo pipefail
+          TAG="${RELEASE_TAG}"
+          if [ -z "${TAG}" ]; then
+            echo "release tag is empty; set release_tag when using workflow_dispatch." >&2
+            exit 1
+          fi
+
+      - name: Upload Release Assets
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          set -euo pipefail
+          TAG="${RELEASE_TAG}"
+          FILES=(
+            dist/*.tar.gz
+            dist/*.zip
+            dist/sha256sums.txt
+          )
+
+          if gh release view "${TAG}" >/dev/null 2>&1; then
+            gh release upload "${TAG}" "${FILES[@]}" --clobber
+          else
+            gh release create "${TAG}" "${FILES[@]}" --title "${TAG}" --notes ""
+          fi
--- a/.github/workflows/release-dockerhub.yml
+++ b/.github/workflows/release-dockerhub.yml
@@ -0,0 +1,129 @@
+name: Release to Docker Hub
+
+on:
+  workflow_dispatch:
+    inputs:
+      version_type:
+        description: '版本类型'
+        required: true
+        default: 'patch'
+        type: choice
+        options:
+          - patch
+          - minor
+          - major
+
+permissions:
+  contents: write
+
+jobs:
+  release:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Get current version
+        id: get_version
+        run: |
+          LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0")
+          TAG_VERSION=${LATEST_TAG#v}
+
+          if [ -f VERSION ]; then
+            FILE_VERSION=$(cat VERSION | tr -d '[:space:]')
+          else
+            FILE_VERSION="0.0.0"
+          fi
+
+          function version_gt() { test "$(printf '%s\n' "$@" | sort -V | head -n 1)" != "$1"; }
+
+          if version_gt "$FILE_VERSION" "$TAG_VERSION"; then
+            VERSION="$FILE_VERSION"
+          else
+            VERSION="$TAG_VERSION"
+          fi
+
+          echo "Current version: $VERSION"
+          echo "current_version=$VERSION" >> $GITHUB_OUTPUT
+
+      - name: Calculate next version
+        id: next_version
+        env:
+          VERSION_TYPE: ${{ github.event.inputs.version_type }}
+        run: |
+          VERSION="${{ steps.get_version.outputs.current_version }}"
+          BASE_VERSION=$(echo "$VERSION" | sed 's/-.*$//')
+
+          IFS='.' read -r -a version_parts <<< "$BASE_VERSION"
+          MAJOR="${version_parts[0]:-0}"
+          MINOR="${version_parts[1]:-0}"
+          PATCH="${version_parts[2]:-0}"
+
+          case "$VERSION_TYPE" in
+            major)
+              NEW_VERSION="$((MAJOR + 1)).0.0"
+              ;;
+            minor)
+              NEW_VERSION="${MAJOR}.$((MINOR + 1)).0"
+              ;;
+            *)
+              NEW_VERSION="${MAJOR}.${MINOR}.$((PATCH + 1))"
+              ;;
+          esac
+
+          echo "New version: $NEW_VERSION"
+          echo "new_version=$NEW_VERSION" >> $GITHUB_OUTPUT
+          echo "new_tag=v$NEW_VERSION" >> $GITHUB_OUTPUT
+
+      - name: Update VERSION file
+        run: |
+          echo "${{ steps.next_version.outputs.new_version }}" > VERSION
+
+      - name: Commit VERSION and create tag
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+
+          git add VERSION
+          if ! git diff --cached --quiet; then
+            git commit -m "chore: bump version to ${{ steps.next_version.outputs.new_tag }} [skip ci]"
+          fi
+
+          NEW_TAG="${{ steps.next_version.outputs.new_tag }}"
+          git tag -a "$NEW_TAG" -m "Release $NEW_TAG"
+          git push origin HEAD:main "$NEW_TAG"
+
+      # Docker 构建并推送到 Docker Hub
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: ./Dockerfile
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: |
+            ${{ secrets.DOCKERHUB_USERNAME }}/ds2api:${{ steps.next_version.outputs.new_tag }}
+            ${{ secrets.DOCKERHUB_USERNAME }}/ds2api:${{ steps.next_version.outputs.new_version }}
+            ${{ secrets.DOCKERHUB_USERNAME }}/ds2api:latest
+          labels: |
+            org.opencontainers.image.version=${{ steps.next_version.outputs.new_version }}
+            org.opencontainers.image.revision=${{ github.sha }}
+          build-args: |
+            BUILD_VERSION=${{ steps.next_version.outputs.new_tag }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -0,0 +1,130 @@
+name: Release to Aliyun CR
+
+on:
+  workflow_dispatch:
+    inputs:
+      version_type:
+        description: '版本类型'
+        required: true
+        default: 'patch'
+        type: choice
+        options:
+          - patch
+          - minor
+          - major
+
+permissions:
+  contents: write
+
+jobs:
+  release:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Get current version
+        id: get_version
+        run: |
+          LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0")
+          TAG_VERSION=${LATEST_TAG#v}
+
+          if [ -f VERSION ]; then
+            FILE_VERSION=$(cat VERSION | tr -d '[:space:]')
+          else
+            FILE_VERSION="0.0.0"
+          fi
+
+          function version_gt() { test "$(printf '%s\n' "$@" | sort -V | head -n 1)" != "$1"; }
+
+          if version_gt "$FILE_VERSION" "$TAG_VERSION"; then
+            VERSION="$FILE_VERSION"
+          else
+            VERSION="$TAG_VERSION"
+          fi
+
+          echo "Current version: $VERSION"
+          echo "current_version=$VERSION" >> $GITHUB_OUTPUT
+
+      - name: Calculate next version
+        id: next_version
+        env:
+          VERSION_TYPE: ${{ github.event.inputs.version_type }}
+        run: |
+          VERSION="${{ steps.get_version.outputs.current_version }}"
+          BASE_VERSION=$(echo "$VERSION" | sed 's/-.*$//')
+
+          IFS='.' read -r -a version_parts <<< "$BASE_VERSION"
+          MAJOR="${version_parts[0]:-0}"
+          MINOR="${version_parts[1]:-0}"
+          PATCH="${version_parts[2]:-0}"
+
+          case "$VERSION_TYPE" in
+            major)
+              NEW_VERSION="$((MAJOR + 1)).0.0"
+              ;;
+            minor)
+              NEW_VERSION="${MAJOR}.$((MINOR + 1)).0"
+              ;;
+            *)
+              NEW_VERSION="${MAJOR}.${MINOR}.$((PATCH + 1))"
+              ;;
+          esac
+
+          echo "New version: $NEW_VERSION"
+          echo "new_version=$NEW_VERSION" >> $GITHUB_OUTPUT
+          echo "new_tag=v$NEW_VERSION" >> $GITHUB_OUTPUT
+
+      - name: Update VERSION file
+        run: |
+          echo "${{ steps.next_version.outputs.new_version }}" > VERSION
+
+      - name: Commit VERSION and create tag
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+
+          git add VERSION
+          if ! git diff --cached --quiet; then
+            git commit -m "chore: bump version to ${{ steps.next_version.outputs.new_tag }} [skip ci]"
+          fi
+
+          NEW_TAG="${{ steps.next_version.outputs.new_tag }}"
+          git tag -a "$NEW_TAG" -m "Release $NEW_TAG"
+          git push origin HEAD:main "$NEW_TAG"
+
+      # Docker 构建并推送到阿里云
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to Aliyun Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ secrets.ALIYUN_REGISTRY }}
+          username: ${{ secrets.ALIYUN_REGISTRY_USER }}
+          password: ${{ secrets.ALIYUN_REGISTRY_PASSWORD }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: ./Dockerfile
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: |
+            ${{ secrets.ALIYUN_REGISTRY }}/${{ secrets.ALIYUN_REGISTRY_NAMESPACE }}/ds2api:${{ steps.next_version.outputs.new_tag }}
+            ${{ secrets.ALIYUN_REGISTRY }}/${{ secrets.ALIYUN_REGISTRY_NAMESPACE }}/ds2api:${{ steps.next_version.outputs.new_version }}
+            ${{ secrets.ALIYUN_REGISTRY }}/${{ secrets.ALIYUN_REGISTRY_NAMESPACE }}/ds2api:latest
+          labels: |
+            org.opencontainers.image.version=${{ steps.next_version.outputs.new_version }}
+            org.opencontainers.image.revision=${{ github.sha }}
+          build-args: |
+            BUILD_VERSION=${{ steps.next_version.outputs.new_tag }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
--- a/.gitignore
+++ b/.gitignore
@@ -2,37 +2,6 @@
 config.json
 .env

-# Python
-__pycache__/
-*.py[cod]
-*$py.class
-*.so
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-pip-wheel-metadata/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# Virtual environments
-venv/
-ENV/
-env/
-.venv
-
 # IDE
 .vscode/
 .idea/
@@ -40,11 +9,12 @@ env/
 *.swo
 *~
 .DS_Store
+opencode.json

 # Logs
 *.log
 logs/
-uvicorn.log
+artifacts/

 # Vercel
 .vercel
@@ -55,8 +25,6 @@ webui/node_modules/
 webui/dist/
 .npm
 .pnpm-store/
-# 保留 webui/package-lock.json 用于 CI 缓存
-# package-lock.json  # 如果有根目录的可以忽略
 yarn.lock
 pnpm-lock.yaml

@@ -64,6 +32,12 @@ pnpm-lock.yaml
 *.tsbuildinfo
 .cache/
 .parcel-cache/
+static/admin/
+internal/webui/assets/admin/
+
+# Go compiled binaries
+ds2api
+ds2api-tests

 # Environment
 .env.local
@@ -74,9 +48,14 @@ pnpm-lock.yaml
 htmlcov/
 .pytest_cache/
 .tox/
+*.coverprofile
+coverage*.out
+cover/

 # Misc
-*.pyc
-*.pyo
 .git/
 Thumbs.db
+
+# Claude Code
+.claude/
+CLAUDE.local.md
--- a/API.en.md
+++ b/API.en.md
--- a/API.md
+++ b/API.md
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,90 +0,0 @@
-# 贡献指南
-
-感谢你对 DS2API 的贡献！
-
-## 开发环境设置
-
-### 后端
-
-```bash
-# 1. 克隆仓库
-git clone https://github.com/CJackHwang/ds2api.git
-cd ds2api
-
-# 2. 创建虚拟环境（推荐）
-python -m venv venv
-source venv/bin/activate  # Windows: venv\Scripts\activate
-
-# 3. 安装依赖
-pip install -r requirements.txt
-
-# 4. 配置
-cp config.example.json config.json
-# 编辑 config.json
-
-# 5. 启动
-python dev.py
-```
-
-### 前端 (WebUI)
-
-```bash
-cd webui
-npm install
-npm run dev
-```
-
-## 代码规范
-
- **Python**: 遵循 PEP 8，使用 4 空格缩进
- **JavaScript/React**: 使用 4 空格缩进，使用函数组件
- **提交信息**: 使用语义化提交格式（如 `feat:`, `fix:`, `docs:`）
-
-## 提交 PR
-
-1. Fork 本仓库
-2. 创建功能分支 (`git checkout -b feature/xxx`)
-3. 提交更改 (`git commit -m 'feat: 添加xxx功能'`)
-4. 推送分支 (`git push origin feature/xxx`)
-5. 创建 Pull Request
-
-## WebUI 构建
-
-> **重要**: 修改 `webui/` 目录后 **无需手动构建**！
-
-当 PR 合并到 `main` 分支后，GitHub Actions 会自动：
-1. 构建 WebUI
-2. 提交构建产物到 `static/admin/`
-
-如果需要本地构建（测试用）：
-```bash
-./scripts/build-webui.sh
-```
-
-## 项目结构
-
-```
-ds2api/
-├── app.py              # FastAPI 应用入口
-├── dev.py              # 开发服务器
-├── core/               # 核心模块
-│   ├── auth.py         # 账号认证与轮询
-│   ├── config.py       # 配置管理
-│   ├── deepseek.py     # DeepSeek API 调用
-│   ├── models.py       # 模型定义
-│   ├── pow.py          # PoW 计算
-│   └── sse_parser.py   # SSE 解析
-├── routes/             # API 路由
-│   ├── openai.py       # OpenAI 兼容接口
-│   ├── claude.py       # Claude 兼容接口
-│   ├── home.py         # 首页路由
-│   └── admin/          # 管理接口
-├── webui/              # React WebUI 源码
-├── static/admin/       # WebUI 构建产物（自动生成）
-└── scripts/            # 辅助脚本
-```
-
-## 问题反馈
-
- 使用 [GitHub Issues](https://github.com/CJackHwang/ds2api/issues) 报告问题
- 提供详细的复现步骤和日志信息
--- a/DEPLOY.md
+++ b/DEPLOY.md
@@ -1,406 +0,0 @@
-# DS2API 部署指南
-
-本文档详细介绍 DS2API 的各种部署方式。
-
---
-
-## 目录
-
- [Vercel 部署（推荐）](#vercel-部署推荐)
- [Docker 部署（推荐）](#docker-部署推荐)
- [本地开发](#本地开发)
- [生产环境部署](#生产环境部署)
- [常见问题](#常见问题)
-
---
-
-## Vercel 部署（推荐）
-
-### 一键部署
-
-[![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2FCJackHwang%2Fds2api&env=DS2API_ADMIN_KEY&envDescription=管理面板访问密码（必填）&envLink=https%3A%2F%2Fgithub.com%2FCJackHwang%2Fds2api%23环境变量&project-name=ds2api&repository-name=ds2api)
-
-### 部署步骤
-
-1. **点击部署按钮**
-   - 登录你的 GitHub 账号
-   - 授权 Vercel 访问
-
-2. **设置环境变量**
-   - `DS2API_ADMIN_KEY`: 管理面板密码（**必填**）
-
-3. **等待部署完成**
-   - Vercel 会自动构建并部署项目
-   - 部署完成后获得访问 URL
-
-4. **配置账号**
-   - 访问 `https://your-project.vercel.app/admin`
-   - 输入管理密码登录
-   - 添加 DeepSeek 账号
-   - 设置自定义 API Key
-
-5. **同步配置**
-   - 点击「同步到 Vercel」按钮
-   - 首次需要输入 Vercel Token 和 Project ID
-   - 同步成功后配置会持久化
-
-### 获取 Vercel 凭证
-
-**Vercel Token**:
-1. 访问 https://vercel.com/account/tokens
-2. 点击 "Create Token"
-3. 设置名称和有效期
-4. 复制生成的 Token
-
-**Project ID**:
-1. 进入 Vercel 项目页面
-2. 点击 Settings -> General
-3. 复制 "Project ID"
-
---
-
-## 本地开发
-
-### 环境要求
-
- Python 3.9+
- Node.js 18+ (WebUI 开发)
- pip
-
-### 快速开始
-
-```bash
-# 1. 克隆项目
-git clone https://github.com/CJackHwang/ds2api.git
-cd ds2api
-
-# 2. 安装 Python 依赖
-pip install -r requirements.txt
-
-# 3. 配置账号
-cp config.example.json config.json
-# 编辑 config.json，填入 DeepSeek 账号信息
-
-# 4. 启动服务
-python dev.py
-```
-
-### 配置文件示例
-
-```json
-{
-  "keys": ["my-api-key-1", "my-api-key-2"],
-  "accounts": [
-    {
-      "email": "your-email@example.com",
-      "password": "your-password",
-      "token": ""
-    },
-    {
-      "mobile": "12345678901",
-      "password": "your-password",
-      "token": ""
-    }
-  ]
-}
-```
-
-**说明**：
- `keys`: 自定义 API Key，用于调用本服务的接口
- `accounts`: DeepSeek 网页版账号
-  - 支持 `email` 或 `mobile` 登录
-  - `token` 留空，系统会自动获取
-
-### WebUI 开发
-
-```bash
-# 进入 WebUI 目录
-cd webui
-
-# 安装依赖
-npm install
-
-# 启动开发服务器
-npm run dev
-```
-
-WebUI 开发服务器会启动在 `http://localhost:5173`，并自动代理 API 请求到后端 `http://localhost:5001`。
-
-### WebUI 构建
-
-WebUI 构建产物位于 `static/admin/` 目录。
-
-**自动构建（推荐）**：
- 当 `webui/` 目录下的文件变更并推送到 `main` 分支时，GitHub Actions 会自动构建并提交产物
- PR 合并时会自动触发构建
-
-**手动构建**：
-```bash
-# 方式1：使用脚本
-./scripts/build-webui.sh
-
-# 方式2：直接执行
-cd webui
-npm install
-npm run build
-```
-
-> **贡献者注意**：修改 WebUI 后无需手动构建，CI 会自动处理。
-
---
-
-## Docker 部署（推荐）
-
-Docker 部署采用**零侵入、解耦设计**：
- Dockerfile 仅执行标准 Python 项目操作，不硬编码任何项目特定配置
- 所有配置通过环境变量和 `.env` 文件管理
- **主代码更新时只需重新构建镜像，无需修改 Docker 配置**
-
-### 快速开始（Docker Compose）
-
-```bash
-# 1. 复制环境变量模板
-cp .env.example .env
-# 编辑 .env，填写 DS2API_ADMIN_KEY 和 DS2API_CONFIG_JSON
-
-# 2. 启动服务
-docker-compose up -d
-
-# 3. 查看日志
-docker-compose logs -f
-
-# 4. 主代码更新后重新构建
-docker-compose up -d --build
-```
-
-### 配置文件挂载方式
-
-如需使用 `config.json` 而非环境变量：
-
-```yaml
-# docker-compose.yml
-services:
-  ds2api:
-    build: .
-    ports:
-      - "5001:5001"
-    environment:
-      - DS2API_ADMIN_KEY=your-admin-key
-    volumes:
-      - ./config.json:/app/config.json:ro
-    restart: unless-stopped
-```
-
-### Docker 命令行部署
-
-```bash
-# 构建镜像
-docker build -t ds2api:latest .
-
-# 使用环境变量运行
-docker run -d \
-  --name ds2api \
-  -p 5001:5001 \
-  -e DS2API_ADMIN_KEY=your-admin-key \
-  -e DS2API_CONFIG_JSON='{"keys":["api-key"],"accounts":[...]}' \
-  --restart unless-stopped \
-  ds2api:latest
-
-# 或使用配置文件挂载
-docker run -d \
-  --name ds2api \
-  -p 5001:5001 \
-  -e DS2API_ADMIN_KEY=your-admin-key \
-  -v $(pwd)/config.json:/app/config.json:ro \
-  --restart unless-stopped \
-  ds2api:latest
-```
-
-### 开发模式（热重载）
-
-```bash
-# 使用开发配置启动，代码修改实时生效
-docker-compose -f docker-compose.dev.yml up
-```
-
-开发模式特性：
- 源代码挂载到容器，修改即时生效
- 日志级别设为 DEBUG
- 自动读取本地 `config.json`
-
-### 维护命令
-
-```bash
-# 查看容器状态
-docker-compose ps
-
-# 查看日志
-docker-compose logs -f ds2api
-
-# 重启服务
-docker-compose restart
-
-# 停止服务
-docker-compose down
-
-# 完全重建（清除缓存）
-docker-compose down
-docker-compose build --no-cache
-docker-compose up -d
-```
-
---
-
-## 生产环境部署
-
-### 使用 systemd (Linux)
-
-1. **创建服务文件**
-
-```bash
-sudo nano /etc/systemd/system/ds2api.service
-```
-
-```ini
-[Unit]
-Description=DS2API Service
-After=network.target
-
-[Service]
-Type=simple
-User=www-data
-WorkingDirectory=/opt/ds2api
-ExecStart=/usr/bin/python3 app.py
-Restart=always
-RestartSec=10
-Environment=PORT=5001
-Environment=DS2API_ADMIN_KEY=your-admin-key
-
-[Install]
-WantedBy=multi-user.target
-```
-
-2. **启动服务**
-
-```bash
-sudo systemctl daemon-reload
-sudo systemctl enable ds2api
-sudo systemctl start ds2api
-```
-
-3. **查看状态**
-
-```bash
-sudo systemctl status ds2api
-sudo journalctl -u ds2api -f
-```
-
-### Nginx 反向代理
-
-```nginx
-server {
-    listen 80;
-    server_name api.yourdomain.com;
-
-    # SSL 配置（推荐）
-    # listen 443 ssl http2;
-    # ssl_certificate /path/to/cert.pem;
-    # ssl_certificate_key /path/to/key.pem;
-
-    location / {
-        proxy_pass http://127.0.0.1:5001;
-        proxy_http_version 1.1;
-        
-        # 关闭缓冲，支持 SSE
-        proxy_buffering off;
-        proxy_cache off;
-        
-        # 连接设置
-        proxy_set_header Host $host;
-        proxy_set_header X-Real-IP $remote_addr;
-        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-        proxy_set_header X-Forwarded-Proto $scheme;
-        
-        # SSE 超时设置
-        proxy_read_timeout 300s;
-        proxy_send_timeout 300s;
-        
-        # 分块传输
-        chunked_transfer_encoding on;
-        tcp_nopush on;
-        tcp_nodelay on;
-        keepalive_timeout 120;
-    }
-}
-```
-
---
-
-## 常见问题
-
-### Q: 账号验证失败怎么办？
-
-**A**: 检查以下几点：
-1. 确认 DeepSeek 账号密码正确
-2. 检查账号是否被封禁或需要验证
-3. 尝试在浏览器中手动登录一次
-4. 查看日志获取详细错误信息
-
-### Q: 流式响应断开怎么办？
-
-**A**: 
-1. 检查 Nginx/反向代理配置，确保关闭了 `proxy_buffering`
-2. 增加 `proxy_read_timeout` 超时时间
-3. 检查网络连接稳定性
-
-### Q: Vercel 部署后配置丢失？
-
-**A**: 
-1. 确保点击了「同步到 Vercel」按钮
-2. 检查 Vercel Token 是否正确且未过期
-3. 确认 Project ID 正确
-
-### Q: 如何更新到新版本？
-
-**本地部署**:
-```bash
-git pull origin main
-pip install -r requirements.txt
-# 重启服务
-```
-
-**Docker 部署**:
-```bash
-# 拉取最新代码
-git pull origin main
-
-# 重新构建并启动（无需修改 Docker 配置）
-docker-compose up -d --build
-```
-
-**Vercel 部署**:
- 项目会自动从 GitHub 同步更新
- 或在 Vercel 控制台手动触发重新部署
-
-### Q: 如何查看日志？
-
-**本地开发**:
-```bash
-# 设置日志级别
-export LOG_LEVEL=DEBUG
-python dev.py
-```
-
-**Vercel**:
- 访问 Vercel 控制台 -> 项目 -> Deployments -> Logs
-
-### Q: Token 计数不准确？
-
-**A**: DS2API 使用估算方式计算 token 数量（字符数 / 4），与 OpenAI 官方的 tokenizer 可能有差异，仅供参考。
-
---
-
-## 获取帮助
-
- **GitHub Issues**: https://github.com/CJackHwang/ds2api/issues
- **文档**: https://github.com/CJackHwang/ds2api
--- a/72
+++ b/72
@@ -1,20 +1,66 @@
-# DS2API Docker 镜像
-# 采用极简、零侵入设计，所有配置通过环境变量传递
-# 主代码更新时只需重新构建镜像，无需修改 Dockerfile
+FROM node:24 AS webui-builder

-FROM python:3.11-slim
+WORKDIR /app/webui
+COPY webui/package.json webui/package-lock.json ./
+RUN npm ci
+COPY webui ./
+RUN npm run build

+FROM golang:1.26 AS go-builder
 WORKDIR /app
-
-# 安装依赖（利用 Docker 缓存层）
-COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
-
-# 复制整个项目（保留原始目录结构）
+ARG TARGETOS
+ARG TARGETARCH
+ARG BUILD_VERSION
+COPY go.mod go.sum* ./
+RUN go mod download
 COPY . .
+RUN set -eux; \
+    GOOS="${TARGETOS:-$(go env GOOS)}"; \
+    GOARCH="${TARGETARCH:-$(go env GOARCH)}"; \
+    BUILD_VERSION_RESOLVED="${BUILD_VERSION:-}"; \
+    if [ -z "${BUILD_VERSION_RESOLVED}" ] && [ -f VERSION ]; then BUILD_VERSION_RESOLVED="$(cat VERSION | tr -d "[:space:]")"; fi; \
+    CGO_ENABLED=0 GOOS="${GOOS}" GOARCH="${GOARCH}" go build -ldflags="-s -w -X ds2api/internal/version.BuildVersion=${BUILD_VERSION_RESOLVED}" -o /out/ds2api ./cmd/ds2api

-# 暴露服务端口
+FROM busybox:1.36.1-musl AS busybox-tools
+
+FROM debian:bookworm-slim AS runtime-base
+WORKDIR /app
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+COPY --from=busybox-tools /bin/busybox /usr/local/bin/busybox
 EXPOSE 5001
+CMD ["/usr/local/bin/ds2api"]

-# 启动命令（依赖项目自身的启动逻辑）
-CMD ["python", "app.py"]
+FROM runtime-base AS runtime-from-source
+COPY --from=go-builder /out/ds2api /usr/local/bin/ds2api
+COPY --from=go-builder /app/internal/deepseek/assets/sha3_wasm_bg.7b9ca65ddd.wasm /app/sha3_wasm_bg.7b9ca65ddd.wasm
+COPY --from=go-builder /app/config.example.json /app/config.example.json
+COPY --from=webui-builder /app/static/admin /app/static/admin
+
+FROM busybox-tools AS dist-extract
+ARG TARGETARCH
+COPY dist/docker-input/linux_amd64.tar.gz /tmp/ds2api_linux_amd64.tar.gz
+COPY dist/docker-input/linux_arm64.tar.gz /tmp/ds2api_linux_arm64.tar.gz
+RUN set -eux; \
+    case "${TARGETARCH}" in \
+      amd64) ARCHIVE="/tmp/ds2api_linux_amd64.tar.gz" ;; \
+      arm64) ARCHIVE="/tmp/ds2api_linux_arm64.tar.gz" ;; \
+      *) echo "unsupported TARGETARCH: ${TARGETARCH}" >&2; exit 1 ;; \
+    esac; \
+    tar -xzf "${ARCHIVE}" -C /tmp; \
+    PKG_DIR="$(find /tmp -maxdepth 1 -type d -name "ds2api_*_linux_${TARGETARCH}" | head -n1)"; \
+    test -n "${PKG_DIR}"; \
+    mkdir -p /out/static; \
+    cp "${PKG_DIR}/ds2api" /out/ds2api; \
+    cp "${PKG_DIR}/sha3_wasm_bg.7b9ca65ddd.wasm" /out/sha3_wasm_bg.7b9ca65ddd.wasm; \
+    cp "${PKG_DIR}/config.example.json" /out/config.example.json; \
+    cp -R "${PKG_DIR}/static/admin" /out/static/admin
+
+FROM runtime-base AS runtime-from-dist
+COPY --from=dist-extract /out/ds2api /usr/local/bin/ds2api
+COPY --from=dist-extract /out/sha3_wasm_bg.7b9ca65ddd.wasm /app/sha3_wasm_bg.7b9ca65ddd.wasm
+COPY --from=dist-extract /out/config.example.json /app/config.example.json
+COPY --from=dist-extract /out/static/admin /app/static/admin
+
+FROM runtime-from-source AS final
--- a/README.MD
+++ b/README.MD
@@ -1,99 +1,275 @@
+<p align="center">
+  <img src="webui/public/ds2api-favicon.svg" width="128" height="128" alt="DS2API icon" />
+</p>
+
 # DS2API

 [![License](https://img.shields.io/github/license/CJackHwang/ds2api.svg)](LICENSE)
 ![Stars](https://img.shields.io/github/stars/CJackHwang/ds2api.svg)
 ![Forks](https://img.shields.io/github/forks/CJackHwang/ds2api.svg)
-[![Version](https://img.shields.io/badge/version-1.6.11-blue.svg)](version.txt)
-[![Docker](https://img.shields.io/badge/docker-ready-blue.svg)](DEPLOY.md#docker-部署推荐)
+[![Release](https://img.shields.io/github/v/release/CJackHwang/ds2api?display_name=tag)](https://github.com/CJackHwang/ds2api/releases)
+[![Docker](https://img.shields.io/badge/docker-ready-blue.svg)](docs/DEPLOY.md)
+[![Deploy on Zeabur](https://zeabur.com/button.svg)](https://zeabur.com/templates/L4CFHP)
+[![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https://github.com/CJackHwang/ds2api)

-将 DeepSeek 免费对话版转换为 **OpenAI & Claude 兼容 API**，支持多账号轮询、自动 Token 刷新、可视化管理界面。
+语言 / Language: [中文](README.MD) | [English](README.en.md)

-![p1](https://github.com/user-attachments/assets/07296a50-50d4-4f05-a9e5-280df14e9532)
-![p2](https://github.com/user-attachments/assets/03b4a763-766f-4050-aea8-1a183e70ae6a)
-![p3](https://github.com/user-attachments/assets/beb9e41d-4c12-45d1-a26c-154280211185)
-![p4](https://github.com/user-attachments/assets/fc8b9836-11e3-4c38-a684-eb2c79b80fe9)
-![p5](https://github.com/user-attachments/assets/513e9ca7-aa9e-45a6-8f7e-f362b1650675)
+将 DeepSeek Web 对话能力转换为 OpenAI、Claude 与 Gemini 兼容 API。后端为 **Go 全量实现**，前端为 React WebUI 管理台（源码在 `webui/`，部署时自动构建到 `static/admin`）。
+
+> **重要免责声明**
+>
+> 本仓库仅供学习、研究、个人实验和内部验证使用，不提供任何形式的商业授权、适用性保证或结果保证。
+>
+> 作者及仓库维护者不对因使用、修改、分发、部署或依赖本项目而产生的任何直接或间接损失、账号封禁、数据丢失、法律风险或第三方索赔负责。
+>
+> 请勿将本项目用于违反服务条款、协议、法律法规或平台规则的场景。商业使用前请自行确认 `LICENSE`、相关协议以及你是否获得了作者的书面许可。
+
+## 架构概览
+
+```mermaid
+flowchart LR
+    Client["🖥️ 客户端 / SDK\n(OpenAI / Claude / Gemini)"]
+    Upstream["☁️ DeepSeek API"]
+
+    subgraph DS2API["DS2API 3.x（统一 OpenAI 内核）"]
+        Router["chi Router + 中间件\n(RequestID / RealIP / Logger / Recoverer / CORS)"]
+
+        subgraph Adapters["协议适配层"]
+            OA["OpenAI\n/v1/*"]
+            CA["Claude\n/anthropic/* + /v1/messages"]
+            GA["Gemini\n/v1beta/models/* + /v1/models/*"]
+            Admin["Admin API\n/admin/*"]
+            WebUI["WebUI\n/admin（静态托管）"]
+        end
+
+        subgraph Runtime["运行时核心能力"]
+            Bridge["CLIProxy 转换桥\n(多协议 <-> OpenAI)"]
+            OAEngine["OpenAI ChatCompletions\n(统一工具调用与流式语义)"]
+            Auth["Auth Resolver\n(API key / bearer / x-goog-api-key)"]
+            Pool["Account Pool + Queue\n(并发槽位 + 等待队列)"]
+            DSClient["DeepSeek Client\n(Session / Auth / HTTP)"]
+            Pow["PoW WASM\n(wazero 预加载)"]
+            Tool["Tool Sieve\n(Go/Node 语义对齐)"]
+        end
+    end
+
+    Client --> Router
+    Router --> OA & CA & GA
+    Router --> Admin
+    Router --> WebUI
+
+    OA --> OAEngine
+    CA & GA --> Bridge
+    Bridge --> OAEngine
+    OAEngine --> Auth
+    OAEngine -.账号轮询.-> Pool
+    OAEngine -.工具调用解析.-> Tool
+    OAEngine -.PoW 计算.-> Pow
+    Auth --> DSClient
+    DSClient --> Upstream
+    Upstream --> DSClient
+    OAEngine --> Bridge
+    Bridge --> Client
+```
+
+- **后端**：Go（`cmd/ds2api/`、`api/`、`internal/`），不依赖 Python 运行时
+- **前端**：React 管理台（`webui/`），运行时托管静态构建产物
+- **部署**：本地运行、Docker、Vercel Serverless、Linux systemd
+
+### 3.X 底层架构调整（相较旧版本）
+
+- **统一路由内核**：所有协议入口统一汇聚到 `internal/server/router.go`，并在同一路由树中注册 OpenAI / Claude / Gemini / Admin / WebUI 路由，避免多入口行为漂移。
+- **统一执行链路**：Claude / Gemini 入口先经 `internal/translatorcliproxy` 做协议转换，再进入 `openai.ChatCompletions` 统一处理工具调用与流式语义，最后再转换回原协议响应。
+- **适配器分层更清晰**：`internal/adapter/{claude,gemini}` 负责入口/出口协议封装，`internal/adapter/openai` 负责核心执行，DeepSeek 侧调用只保留在 OpenAI 内核中。
+- **Tool Calling 双运行时对齐**：Go 侧（`internal/util`）与 Vercel Node 侧（`internal/js/helpers/stream-tool-sieve`）保持一致的解析/防泄漏语义，覆盖 JSON / XML / invoke / text-kv 多风格输入。
+- **配置与运行时设置解耦**：静态配置（`config`）与运行时策略（`settings`）通过 Admin API 分离管理，支持热更新和密码轮换失效旧 JWT。
+- **流式能力升级**：`/v1/responses` 与 `/v1/chat/completions` 共享更一致的工具调用增量输出策略，降低不同 SDK 下的行为差异。
+- **可观测与可运维增强**：`/healthz`、`/readyz`、`/admin/version`、`/admin/dev/captures` 形成排障闭环，便于发布后验证。
+
+## 核心能力
+
+| 能力 | 说明 |
+| --- | --- |
+| OpenAI 兼容 | `GET /v1/models`、`GET /v1/models/{id}`、`POST /v1/chat/completions`、`POST /v1/responses`、`GET /v1/responses/{response_id}`、`POST /v1/embeddings` |
+| Claude 兼容 | `GET /anthropic/v1/models`、`POST /anthropic/v1/messages`、`POST /anthropic/v1/messages/count_tokens`（及快捷路径 `/v1/messages`、`/messages`） |
+| Gemini 兼容 | `POST /v1beta/models/{model}:generateContent`、`POST /v1beta/models/{model}:streamGenerateContent`（及 `/v1/models/{model}:*` 路径） |
+| 多账号轮询 | 自动 token 刷新、邮箱/手机号双登录方式 |
+| 并发队列控制 | 每账号 in-flight 上限 + 等待队列，动态计算建议并发值 |
+| DeepSeek PoW | WASM 计算（`wazero`），无需外部 Node.js 依赖 |
+| Tool Calling | 防泄漏处理：非代码块高置信特征识别、`delta.tool_calls` 早发、结构化增量输出 |
+| Admin API | 配置管理、运行时设置热更新、账号测试 / 批量测试、会话清理、导入导出、Vercel 同步、版本检查 |
+| WebUI 管理台 | `/admin` 单页应用（中英文双语、深色模式） |
+| 运维探针 | `GET /healthz`（存活）、`GET /readyz`（就绪） |
+
+## 平台兼容矩阵
+
+| 级别 | 平台 | 当前状态 |
+| --- | --- | --- |
+| P0 | Codex CLI/SDK（`wire_api=chat` / `wire_api=responses`） | ✅ |
+| P0 | OpenAI SDK（JS/Python，chat + responses） | ✅ |
+| P0 | Vercel AI SDK（openai-compatible） | ✅ |
+| P0 | Anthropic SDK（messages） | ✅ |
+| P0 | Google Gemini SDK（generateContent） | ✅ |
+| P1 | LangChain / LlamaIndex / OpenWebUI（OpenAI 兼容接入） | ✅ |
+
+## 模型支持
+
+### OpenAI 接口
+
+| 模型 | thinking | search |
+| --- | --- | --- |
+| `deepseek-chat` | ❌ | ❌ |
+| `deepseek-reasoner` | ✅ | ❌ |
+| `deepseek-chat-search` | ❌ | ✅ |
+| `deepseek-reasoner-search` | ✅ | ✅ |
+
+### Claude 接口
+
+| 模型 | 默认映射 |
+| --- | --- |
+| `claude-sonnet-4-5` | `deepseek-chat` |
+| `claude-haiku-4-5`（兼容 `claude-3-5-haiku-latest`） | `deepseek-chat` |
+| `claude-opus-4-6` | `deepseek-reasoner` |
+
+可通过配置中的 `claude_mapping` 或 `claude_model_mapping` 覆盖映射关系。
+另外，`/anthropic/v1/models` 现已包含 Claude 1.x/2.x/3.x/4.x 历史模型 ID 与常见别名，便于旧客户端直接兼容。


+#### Claude Code 接入避坑（实测）

-## ✨ 特性
+- `ANTHROPIC_BASE_URL` 推荐直接指向 DS2API 根地址（例如 `http://127.0.0.1:5001`），Claude Code 会请求 `/v1/messages?beta=true`。
+- `ANTHROPIC_API_KEY` 需要与 `config.json` 中 `keys` 一致；建议同时保留常规 key 与 `sk-ant-*` 形态 key，兼容不同客户端校验习惯。
+- 若系统设置了代理，建议对 DS2API 地址配置 `NO_PROXY=127.0.0.1,localhost,<你的主机IP>`，避免本地回环请求被代理拦截。
+- 如遇“工具调用输出成文本、未执行”问题，请升级到包含 Claude 工具调用多格式解析（JSON/XML/ANTML/invoke）的版本。

- 🔄 **双协议兼容** - 同时支持 OpenAI 和 Claude (Anthropic) API 格式
- 🚀 **多账号轮询** - Round-Robin 负载均衡，支持高并发场景
- 🔐 **Token 自动刷新** - 过期自动重新登录，无需手动维护
- 🌐 **WebUI 管理** - 可视化添加账号、测试 API、同步 Vercel 配置
- 🔍 **联网搜索** - 支持 DeepSeek 原生搜索增强模式
- 🧠 **深度思考** - 支持推理模式，输出思考过程
- 🛠️ **工具调用** - 兼容 OpenAI Function Calling 格式
- ☁️ **Vercel 一键部署** - 无需服务器，快速上线
+### Gemini 接口

-## 📋 模型支持
+Gemini 适配器将模型名通过 `model_aliases` 或内置规则映射到 DeepSeek 原生模型，支持 `generateContent` 和 `streamGenerateContent` 两种调用方式，并完整支持 Tool Calling（`functionDeclarations` → `functionCall` 输出）。

-### OpenAI 兼容接口 (`/v1/chat/completions`)
+## 快速开始

-| 模型 | 深度思考 | 联网搜索 | 说明 |
-|-----|:--------:|:--------:|------|
-| `deepseek-chat` | ❌ | ❌ | 标准对话模式 |
-| `deepseek-reasoner` | ✅ | ❌ | 推理模式（输出思考过程） |
-| `deepseek-chat-search` | ❌ | ✅ | 联网搜索模式 |
-| `deepseek-reasoner-search` | ✅ | ✅ | 推理 + 联网搜索 |
+### 通用第一步（所有部署方式）

-### Claude 兼容接口 (`/anthropic/v1/messages`)
+把 `config.json` 作为唯一配置源（推荐做法）：

-| 模型 | 说明 |
-|-----|------|
-| `claude-sonnet-4-20250514` | 映射到 deepseek-chat（标准模式） |
-| `claude-sonnet-4-20250514-fast` | 映射到 deepseek-chat（快速模式） |
-| `claude-sonnet-4-20250514-slow` | 映射到 deepseek-reasoner（推理模式） |
+```bash
+cp config.example.json config.json
+# 编辑 config.json
+```

-> **提示**：Claude 接口实际调用的是 DeepSeek，响应格式会自动转换为 Anthropic 标准格式。
+后续部署建议：
+- 本地运行：直接读取 `config.json`
+- Docker / Vercel：由 `config.json` 生成 `DS2API_CONFIG_JSON`（Base64）注入环境变量，也可以直接写原始 JSON

-## 🚀 快速开始
+### 方式一：本地运行

-### 方式一：Vercel 部署（推荐）
-
-[![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2FCJackHwang%2Fds2api&env=DS2API_ADMIN_KEY&envDescription=管理面板访问密码（必填）&envLink=https%3A%2F%2Fgithub.com%2FCJackHwang%2Fds2api%23环境变量&project-name=ds2api&repository-name=ds2api)
-
-1. 点击上方按钮，设置管理密码 `DS2API_ADMIN_KEY`
-2. 部署完成后访问 `/admin` 管理界面
-3. 添加 DeepSeek 账号和自定义 API Key
-4. 点击「同步到 Vercel」保存配置
-
-> **首次同步会自动验证账号并保存 Token，后续操作无需重复输入凭证。**
-
-### 方式二：本地开发
+**前置要求**：Go 1.26+，Node.js `20.19+` 或 `22.12+`（仅在需要构建 WebUI 时）

 ```bash
 # 1. 克隆仓库
 git clone https://github.com/CJackHwang/ds2api.git
 cd ds2api

-# 2. 安装依赖
-pip install -r requirements.txt
-
-# 3. 配置账号
+# 2. 配置
 cp config.example.json config.json
-# 编辑 config.json，添加 DeepSeek 账号信息
+# 编辑 config.json，填入你的 DeepSeek 账号信息和 API key

-# 4. 启动服务
-python dev.py
+# 3. 启动
+go run ./cmd/ds2api
 ```

-服务启动后访问 `http://localhost:5001`
+默认本地访问地址：`http://127.0.0.1:5001`

-## ⚙️ 配置说明
+服务实际绑定：`0.0.0.0:5001`，因此同一局域网设备通常也可以通过你的内网 IP 访问。

-### 环境变量
+> **WebUI 自动构建**：本地首次启动时，若 `static/admin` 不存在，会自动尝试执行 `npm ci`（仅在缺少依赖时）和 `npm run build -- --outDir static/admin --emptyOutDir`（需要本机有 Node.js）。你也可以手动构建：`./scripts/build-webui.sh`

-| 变量 | 说明 | 必填 |
-|-----|------|:----:|
-| `DS2API_ADMIN_KEY` | 管理面板密码 | Vercel 必填 |
-| `DS2API_CONFIG_JSON` | 配置 JSON 或 Base64 编码 | 可选 |
-| `VERCEL_TOKEN` | Vercel API Token（用于同步） | 可选 |
-| `VERCEL_PROJECT_ID` | Vercel 项目 ID | 可选 |
-| `PORT` | 服务端口（默认 5001） | 可选 |
+### 方式二：Docker 运行

-### 配置文件格式 (`config.json`)
+```bash
+# 1. 准备环境变量和配置文件
+cp .env.example .env
+cp config.example.json config.json
+
+# 2. 编辑 .env（至少设置 DS2API_ADMIN_KEY；如需修改宿主机端口，可额外设置 DS2API_HOST_PORT）
+#    DS2API_ADMIN_KEY=请替换为强密码
+
+# 3. 启动
+docker-compose up -d
+
+# 4. 查看日志
+docker-compose logs -f
+```
+
+默认 `docker-compose.yml` 会把宿主机 `6011` 映射到容器内的 `5001`。如果你希望直接对外暴露 `5001`，请设置 `DS2API_HOST_PORT=5001`（或者手动调整 `ports` 配置）。
+
+更新镜像：`docker-compose up -d --build`
+
+#### Zeabur 一键部署（Dockerfile）
+
+1. 点击上方 “Deploy on Zeabur” 按钮，一键部署。
+2. 部署完成后访问 `/admin`，使用 Zeabur 环境变量/模板指引中的 `DS2API_ADMIN_KEY` 登录。
+3. 在管理台导入/编辑配置（会写入并持久化到 `/data/config.json`）。
+
+说明：Zeabur 使用仓库内 `Dockerfile` 直接构建时，不需要额外传入 `BUILD_VERSION`；镜像会优先读取该构建参数，未提供时自动回退到仓库根目录的 `VERSION` 文件。
+
+### 方式三：Vercel 部署
+
+1. Fork 仓库到自己的 GitHub
+2. 在 Vercel 上导入项目
+3. 配置环境变量（最少设置 `DS2API_ADMIN_KEY`；推荐同时设置 `DS2API_CONFIG_JSON`）
+4. 部署
+
+建议先在仓库目录复制模板并填写：
+
+```bash
+cp config.example.json config.json
+# 编辑 config.json
+```
+
+推荐：先本地把 `config.json` 转成 Base64，再粘贴到 `DS2API_CONFIG_JSON`，避免 JSON 格式错误：
+
+```bash
+base64 < config.json | tr -d '\n'
+```
+
+> **流式说明**：`/v1/chat/completions` 在 Vercel 上默认走 `api/chat-stream.js`（Node Runtime）以保证实时 SSE。鉴权、账号选择、会话/PoW 准备仍由 Go 内部 prepare 接口完成；流式响应（含 `tools`）在 Node 侧执行与 Go 对齐的输出组装与防泄漏处理。
+
+详细部署说明请参阅 [部署指南](docs/DEPLOY.md)。
+
+### 方式四：下载 Release 构建包
+
+每次发布 Release 时，GitHub Actions 会自动构建多平台二进制包：
+
+```bash
+# 下载对应平台的压缩包后
+tar -xzf ds2api_<tag>_linux_amd64.tar.gz
+cd ds2api_<tag>_linux_amd64
+cp config.example.json config.json
+# 编辑 config.json
+./ds2api
+```
+
+### 方式五：OpenCode CLI 接入
+
+1. 复制示例配置：
+
+```bash
+cp opencode.json.example opencode.json
+```
+
+2. 编辑 `opencode.json`：
+- 将 `baseURL` 改为你的 DS2API 地址（例如 `https://your-domain.com/v1`）
+- 将 `apiKey` 改为你的 DS2API key（对应 `config.keys`）
+
+3. 在项目目录启动 OpenCode CLI（按你的安装方式运行 `opencode`）。
+
+> 建议优先使用 OpenAI 兼容路径（`/v1/*`），即示例里的 `@ai-sdk/openai-compatible` provider。
+> 若客户端支持 `wire_api`，可分别测试 `responses` 与 `chat`，DS2API 两条链路都兼容。
+
+## 配置说明
+
+### `config.json` 示例

 ```json
 {
@@ -101,133 +277,282 @@ python dev.py
  "accounts": [
    {
      "email": "user@example.com",
-      "password": "your-password",
-      "token": ""
+      "password": "your-password"
    },
    {
      "mobile": "12345678901",
-      "password": "your-password",
-      "token": ""
+      "password": "your-password"
    }
-  ]
+  ],
+  "model_aliases": {
+    "gpt-4o": "deepseek-chat",
+    "gpt-5-codex": "deepseek-reasoner",
+    "o3": "deepseek-reasoner"
+  },
+  "compat": {
+    "wide_input_strict_output": true,
+    "strip_reference_markers": true
+  },
+  "responses": {
+    "store_ttl_seconds": 900
+  },
+  "embeddings": {
+    "provider": "deterministic"
+  },
+  "claude_mapping": {
+    "fast": "deepseek-chat",
+    "slow": "deepseek-reasoner"
+  },
+  "admin": {
+    "jwt_expire_hours": 24
+  },
+  "runtime": {
+    "account_max_inflight": 2,
+    "account_max_queue": 0,
+    "global_max_inflight": 0,
+    "token_refresh_interval_hours": 6
+  },
+  "auto_delete": {
+    "mode": "none"
+  }
 }
 ```

-> **说明**：
-> - `keys`: 自定义的 API 密钥，用于调用本服务
-> - `accounts`: DeepSeek 网页版账号，支持邮箱或手机号登录
-> - `token`: 留空即可，系统会自动获取并刷新
+- `keys`：API 访问密钥列表，客户端通过 `Authorization: Bearer <key>` 鉴权
+- `accounts`：DeepSeek 账号列表，支持 `email` 或 `mobile` 登录
+- `token`：配置文件中即使填写也会在加载时被清空（不会从 `config.json` 读取 token）；实际 token 仅在运行时内存中维护并自动刷新
+- `model_aliases`：常见模型名（如 GPT/Codex/Claude）到 DeepSeek 模型的映射
+- `compat.wide_input_strict_output`：建议保持 `true`（当前实现默认宽进严出）
+- `compat.strip_reference_markers`：建议保持 `true`，用于清理可见输出中的引用/标记
+- `toolcall`：旧字段，当前实现已固定为特征匹配 + 高置信早发；即使保留在配置里也会被忽略
+- `responses.store_ttl_seconds`：`/v1/responses/{id}` 的内存缓存 TTL
+- `embeddings.provider`：embedding 提供方（当前内置 `deterministic/mock/builtin`）
+- `claude_mapping`：字典中 `fast`/`slow` 后缀映射到对应 DeepSeek 模型（兼容读取 `claude_model_mapping`）
+- `admin`：管理后台设置（JWT 过期时间、密码哈希等），可通过 Admin Settings API 热更新
+- `runtime`：运行时参数（并发限制、队列大小、托管账号 token 刷新间隔），可通过 Admin Settings API 热更新；`account_max_queue=0`/`global_max_inflight=0` 表示按推荐值自动计算，`token_refresh_interval_hours=6` 为默认强制重登间隔
+- `auto_delete.mode`：请求结束后如何清理 DeepSeek 远端聊天记录，支持 `none`（默认，不删除）、`single`（仅删除当前会话）、`all`（清空全部会话）；旧配置里的 `auto_delete.sessions=true` 仍会被视为 `all`

-## 📡 API 使用
+### 环境变量

-完整 API 文档请参阅 **[API.md](API.md)**
+| 变量 | 用途 | 默认值 |
+| --- | --- | --- |
+| `PORT` | 服务端口 | `5001` |
+| `LOG_LEVEL` | 日志级别 | `INFO`（可选：`DEBUG`/`WARN`/`ERROR`） |
+| `DS2API_ADMIN_KEY` | Admin 登录密钥 | `admin` |
+| `DS2API_JWT_SECRET` | Admin JWT 签名密钥 | 等同 `DS2API_ADMIN_KEY` |
+| `DS2API_JWT_EXPIRE_HOURS` | Admin JWT 过期小时数 | `24` |
+| `DS2API_CONFIG_PATH` | 配置文件路径 | `config.json` |
+| `DS2API_CONFIG_JSON` | 直接注入配置（JSON 或 Base64） | — |
+| `DS2API_ENV_WRITEBACK` | 环境变量模式下自动写回配置文件并切换文件模式（`1/true/yes/on`） | 关闭 |
+| `DS2API_WASM_PATH` | PoW WASM 文件路径 | 自动查找 |
+| `DS2API_STATIC_ADMIN_DIR` | 管理台静态文件目录 | `static/admin` |
+| `DS2API_AUTO_BUILD_WEBUI` | 启动时自动构建 WebUI | 本地开启，Vercel 关闭 |
+| `DS2API_DEV_PACKET_CAPTURE` | 本地开发抓包开关（记录最近会话请求/响应体） | 本地非 Vercel 默认开启 |
+| `DS2API_DEV_PACKET_CAPTURE_LIMIT` | 本地抓包保留条数（超出自动淘汰） | `5` |
+| `DS2API_DEV_PACKET_CAPTURE_MAX_BODY_BYTES` | 单条响应体最大记录字节数 | `2097152` |
+| `DS2API_ACCOUNT_MAX_INFLIGHT` | 每账号最大并发 in-flight 请求数 | `2` |
+| `DS2API_ACCOUNT_MAX_QUEUE` | 等待队列上限 | `recommended_concurrency` |
+| `DS2API_GLOBAL_MAX_INFLIGHT` | 全局最大 in-flight 请求数 | `recommended_concurrency` |
+| `DS2API_VERCEL_INTERNAL_SECRET` | Vercel 混合流式内部鉴权密钥 | 回退用 `DS2API_ADMIN_KEY` |
+| `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | 流式 lease 过期秒数 | `900` |
+| `VERCEL_TOKEN` | Vercel 同步 token | — |
+| `VERCEL_PROJECT_ID` | Vercel 项目 ID | — |
+| `VERCEL_TEAM_ID` | Vercel 团队 ID | — |
+| `DS2API_VERCEL_PROTECTION_BYPASS` | Vercel 部署保护绕过密钥（内部 Node→Go 调用） | — |

-### 快速示例
+> 提示：当检测到 `DS2API_CONFIG_JSON` 时，管理台会显示当前模式风险与自动持久化状态（含 `DS2API_CONFIG_PATH` 路径与模式切换说明）。

-**获取模型列表**：
-```bash
-curl http://localhost:5001/v1/models
+## 鉴权模式
+
+调用业务接口（`/v1/*`、`/anthropic/*`、Gemini 路由）时支持两种模式：
+
+| 模式 | 说明 |
+| --- | --- |
+| **托管账号模式** | `Bearer` 或 `x-api-key` 传入 `config.keys` 中的 key，由服务自动轮询选择账号 |
+| **直通 token 模式** | 传入 token 不在 `config.keys` 中时，直接作为 DeepSeek token 使用 |
+
+可选请求头 `X-Ds2-Target-Account`：指定使用某个托管账号（值为 email 或 mobile）。
+Gemini 路由还可以使用 `x-goog-api-key`，或在没有认证头时使用 `?key=` / `?api_key=` 作为调用方凭据。
+
+## 并发模型
+
+```
+每账号可用并发 = DS2API_ACCOUNT_MAX_INFLIGHT（默认 2）
+建议并发值 = 账号数量 × 每账号并发上限
+等待队列上限 = DS2API_ACCOUNT_MAX_QUEUE（默认 = 建议并发值）
+429 阈值 = in-flight + 等待队列 ≈ 账号数量 × 4
 ```

-**OpenAI 格式调用**：
-```bash
-curl http://localhost:5001/v1/chat/completions \
-  -H "Authorization: Bearer your-api-key" \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model": "deepseek-chat",
-    "messages": [{"role": "user", "content": "你好"}],
-    "stream": true
-  }'
-```
+- 当 in-flight 槽位满时，请求进入等待队列，**不会立即 429**
+- 超出总承载上限后才返回 `429 Too Many Requests`
+- `GET /admin/queue/status` 返回实时并发状态

-**Claude 格式调用**：
-```bash
-curl http://localhost:5001/anthropic/v1/messages \
-  -H "x-api-key: your-api-key" \
-  -H "Content-Type: application/json" \
-  -H "anthropic-version: 2023-06-01" \
-  -d '{
-    "model": "claude-sonnet-4-20250514",
-    "max_tokens": 1024,
-    "messages": [{"role": "user", "content": "你好"}]
-  }'
-```
+## Tool Call 适配

-### Python SDK 使用
+当请求中带 `tools` 时，DS2API 会做防泄漏处理与结构化转译：

-```python
-from openai import OpenAI
+1. 只在**非代码块上下文**启用执行型 toolcall 识别（代码块示例默认不触发）
+2. 解析层以 XML/Markup 为最高优先级，同时兼容 JSON / ANTML / invoke / text-kv，并统一归一到内部工具调用结构
+3. `responses` 流式严格使用官方 item 生命周期事件（`response.output_item.*`、`response.content_part.*`、`response.function_call_arguments.*`）
+4. `responses` 支持并执行 `tool_choice`（`auto`/`none`/`required`/强制函数）；`required` 违规时非流式返回 `422`，流式返回 `response.failed`
+5. 客户端请求哪种协议，就按该协议返回工具调用（OpenAI/Claude/Gemini 各自原生结构）；模型侧优先约束输出规范 XML，再由兼容层转译

-client = OpenAI(
-    api_key="your-api-key",
-    base_url="http://localhost:5001/v1"
-)
+> 说明：当前版本在 parser 层仍以“尽量解析成功”为优先，未启用基于 allow-list 的工具名硬拒绝。
+>
+> 想评估“把工具调用封装成 XML 再输入模型”的方案，可参考：`docs/toolcall-semantics.md`。

-response = client.chat.completions.create(
-    model="deepseek-reasoner",
-    messages=[{"role": "user", "content": "请解释量子纠缠"}],
-    stream=True
-)
+## 本地开发抓包工具

-for chunk in response:
-    if chunk.choices[0].delta.content:
-        print(chunk.choices[0].delta.content, end="")
-```
+用于定位「responses 思考流/工具调用」等问题。开启后会自动记录最近 N 条 DeepSeek 对话上游请求体与响应体（默认 5 条，超出自动淘汰）。

-## 🔧 部署配置
-
-### Nginx 反向代理
-
-```nginx
-location / {
-    proxy_pass http://localhost:5001;
-    proxy_http_version 1.1;
-    proxy_set_header Connection "";
-    proxy_buffering off;
-    proxy_cache off;
-    chunked_transfer_encoding on;
-    tcp_nopush on;
-    tcp_nodelay on;
-    keepalive_timeout 120;
-}
-```
-
-### 方式三：Docker 部署
+启用示例：

 ```bash
-# 1. 克隆仓库并进入目录
-git clone https://github.com/CJackHwang/ds2api.git
-cd ds2api
-
-# 2. 配置环境变量
-cp .env.example .env
-# 编辑 .env，填写 DS2API_ADMIN_KEY 和 DS2API_CONFIG_JSON
-
-# 3. 启动服务
-docker-compose up -d
-
-# 4. 查看日志
-docker-compose logs -f
+DS2API_DEV_PACKET_CAPTURE=true \
+DS2API_DEV_PACKET_CAPTURE_LIMIT=5 \
+go run ./cmd/ds2api
 ```

-> **Docker 优势**：零侵入设计，主代码更新只需 `docker-compose up -d --build`，无需修改 Docker 配置。详见 [DEPLOY.md](DEPLOY.md#docker-部署推荐)。
+查询/清空（需 Admin JWT）：

-## ⚠️ 免责声明
+- `GET /admin/dev/captures`：查看抓包列表（最新在前）
+- `DELETE /admin/dev/captures`：清空抓包

-**本项目基于逆向工程实现，服务稳定性无法保证。**
+返回字段包含：

- 仅供学习研究使用，**禁止商业用途或对外提供服务**
- 建议正式项目使用 [DeepSeek 官方 API](https://platform.deepseek.com/)
- 使用本项目产生的任何风险由用户自行承担
+- `request_body`：发送给 DeepSeek 的完整请求体
+- `response_body`：上游返回的原始流式内容拼接文本
+- `response_truncated`：是否触发单条大小截断

-## 📜 鸣谢
+## 项目结构

-本项目基于以下开源项目：
+```text
+ds2api/
+├── app/                     # 统一 HTTP Handler 组装层（供本地与 Serverless 复用）
+├── cmd/
+│   ├── ds2api/              # 本地 / 容器启动入口
+│   └── ds2api-tests/        # 端到端测试集入口
+├── api/
+│   ├── index.go             # Vercel Serverless Go 入口
+│   ├── chat-stream.js       # Vercel Node.js 流式转发
+│   └── (rewrite targets in vercel.json)
+├── internal/
+│   ├── account/             # 账号池与并发队列
+│   ├── adapter/
+│   │   ├── openai/          # OpenAI 兼容适配器（含 Tool Call 解析、Vercel 流式 prepare/release）
+│   │   ├── claude/          # Claude 兼容适配器
+│   │   └── gemini/          # Gemini 兼容适配器（generateContent / streamGenerateContent）
+│   ├── admin/               # Admin API handlers（含 Settings 热更新）
+│   ├── auth/                # 鉴权与 JWT
+│   ├── claudeconv/          # Claude 消息格式转换
+│   ├── compat/              # Go 版本兼容与回归测试辅助
+│   ├── config/              # 配置加载、校验与热更新
+│   ├── deepseek/            # DeepSeek API 客户端、PoW WASM
+│   ├── js/                  # Node 运行时流式处理与兼容逻辑
+│   ├── devcapture/          # 开发抓包模块
+│   ├── rawsample/           # 原始流样本可见文本提取与回放辅助
+│   ├── format/              # 输出格式化
+│   ├── prompt/              # Prompt 构建
+│   ├── server/              # HTTP 路由与中间件（chi router）
+│   ├── sse/                 # SSE 解析工具
+│   ├── stream/              # 统一流式消费引擎
+│   ├── testsuite/           # 端到端测试框架与用例编排
+│   ├── translatorcliproxy/  # CLIProxy 桥接与流写入组件
+│   ├── util/                # 通用工具函数
+│   ├── version/             # 版本解析 / 比较与 tag 规范化
+│   └── webui/               # WebUI 静态文件托管与自动构建
+├── webui/                   # React WebUI 源码（Vite + Tailwind）
+│   └── src/
+│       ├── app/             # 路由、鉴权、配置状态管理
+│       ├── features/        # 业务功能模块（account/settings/vercel/apiTester）
+│       ├── components/      # 登录/落地页等通用组件
+│       └── locales/         # 中英文语言包（zh.json / en.json）
+├── scripts/
+│   └── build-webui.sh       # WebUI 手动构建脚本
+├── tests/
+│   ├── compat/              # 兼容性测试夹具与期望输出
+│   ├── node/                # Node 侧单元测试（chat-stream / tool-sieve）
+│   ├── raw_stream_samples/  # 原始 SSE 样本与回放元数据
+│   └── scripts/             # 统一测试脚本入口（unit/e2e）
+├── docs/                    # 部署 / 贡献 / 测试等辅助文档
+├── static/admin/            # WebUI 构建产物（不提交到 Git）
+├── .github/
+│   ├── workflows/           # GitHub Actions（质量门禁 + Release 自动构建）
+│   ├── ISSUE_TEMPLATE/      # Issue 模板
+│   └── PULL_REQUEST_TEMPLATE.md
+├── config.example.json      # 配置文件示例
+├── .env.example             # 环境变量示例
+├── Dockerfile               # 多阶段构建（WebUI + Go）
+├── docker-compose.yml       # 生产环境 Docker Compose
+├── docker-compose.dev.yml   # 开发环境 Docker Compose
+├── vercel.json              # Vercel 路由与构建配置
+└── go.mod / go.sum          # Go 模块依赖
+```

- [iidamie/deepseek2api](https://github.com/iidamie/deepseek2api)
- [LLM-Red-Team/deepseek-free-api](https://github.com/LLM-Red-Team/deepseek-free-api)
+## 文档索引

-## 📊 Star History
+| 文档 | 说明 |
+| --- | --- |
+| [API.md](API.md) / [API.en.md](API.en.md) | API 接口文档（含请求/响应示例） |
+| [DEPLOY.md](docs/DEPLOY.md) / [DEPLOY.en.md](docs/DEPLOY.en.md) | 部署指南（本地/Docker/Vercel/systemd） |
+| [CONTRIBUTING.md](docs/CONTRIBUTING.md) / [CONTRIBUTING.en.md](docs/CONTRIBUTING.en.md) | 贡献指南 |
+| [TESTING.md](docs/TESTING.md) | 测试集使用指南 |

-[![Star History Chart](https://api.star-history.com/svg?repos=CJackHwang/ds2api&type=Date)](https://star-history.com/#CJackHwang/ds2api&Date)
+## 测试
+
+```bash
+# 单元测试（Go + Node）
+./tests/scripts/run-unit-all.sh
+
+# 一键端到端全链路测试（真实账号，生成完整请求/响应日志）
+./tests/scripts/run-live.sh
+
+# 或自定义参数
+go run ./cmd/ds2api-tests \
+  --config config.json \
+  --admin-key admin \
+  --out artifacts/testsuite \
+  --timeout 120 \
+  --retries 2
+```
+
+```bash
+# 发布前阻断门禁
+./tests/scripts/check-stage6-manual-smoke.sh
+./tests/scripts/check-refactor-line-gate.sh
+./tests/scripts/run-unit-all.sh
+npm ci --prefix webui && npm run build --prefix webui
+```
+
+## 测试
+
+详细测试指南请参阅 [docs/TESTING.md](docs/TESTING.md)。
+
+### 快速测试命令
+
+```bash
+# 运行所有单元测试
+go test ./...
+
+# 运行 tool calls 相关测试（调试工具调用问题）
+go test -v -run 'TestParseToolCalls|TestRepair' ./internal/util/
+
+# 运行端到端测试
+./tests/scripts/run-live.sh
+```
+
+## Release 自动构建（GitHub Actions）
+
+工作流文件：`.github/workflows/release-artifacts.yml`
+
+- **触发条件**：仅在 GitHub Release `published` 时触发（普通 push 不会触发）
+- **构建产物**：多平台二进制包（`linux/amd64`、`linux/arm64`、`darwin/amd64`、`darwin/arm64`、`windows/amd64`）+ `sha256sums.txt`
+- **容器镜像发布**：仅推送到 GHCR（`ghcr.io/cjackhwang/ds2api`）
+- **每个压缩包包含**：`ds2api` 可执行文件、`static/admin`、WASM 文件（同时支持内置 fallback）、配置示例、README、LICENSE
+
+## 免责声明
+
+本项目基于逆向方式实现，仅供学习、研究、个人实验和内部验证使用，不提供任何商业授权、稳定性保证或可用性保证。
+作者及仓库维护者不对因使用、修改、分发、部署或依赖本项目而产生的任何直接或间接损失、账号封禁、数据丢失、法律风险或第三方索赔负责。
+
+请勿将本项目用于违反服务条款、协议、法律法规或平台规则的场景。商业使用前请自行确认 `LICENSE`、相关协议以及你是否获得了作者的书面许可。
--- a/README.en.md
+++ b/README.en.md
@@ -0,0 +1,538 @@
+<p align="center">
+  <img src="webui/public/ds2api-favicon.svg" width="128" height="128" alt="DS2API icon" />
+</p>
+
+# DS2API
+
+[![License](https://img.shields.io/github/license/CJackHwang/ds2api.svg)](LICENSE)
+![Stars](https://img.shields.io/github/stars/CJackHwang/ds2api.svg)
+![Forks](https://img.shields.io/github/forks/CJackHwang/ds2api.svg)
+[![Release](https://img.shields.io/github/v/release/CJackHwang/ds2api?display_name=tag)](https://github.com/CJackHwang/ds2api/releases)
+[![Docker](https://img.shields.io/badge/docker-ready-blue.svg)](docs/DEPLOY.en.md)
+[![Deploy on Zeabur](https://zeabur.com/button.svg)](https://zeabur.com/templates/L4CFHP)
+[![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https://github.com/CJackHwang/ds2api)
+
+Language: [中文](README.MD) | [English](README.en.md)
+
+DS2API converts DeepSeek Web chat capability into OpenAI-compatible, Claude-compatible, and Gemini-compatible APIs. The backend is a **pure Go implementation**, with a React WebUI admin panel (source in `webui/`, build output auto-generated to `static/admin` during deployment).
+
+> **Important Disclaimer**
+>
+> This repository is provided for learning, research, personal experimentation, and internal validation only. It does not grant any commercial authorization and comes with no warranty of fitness, stability, or results.
+>
+> The author and repository maintainers are not responsible for any direct or indirect loss, account suspension, data loss, legal risk, or third-party claims arising from use, modification, distribution, deployment, or reliance on this project.
+>
+> Do not use this project in ways that violate service terms, agreements, laws, or platform rules. Before any commercial use, review the `LICENSE`, the relevant terms, and confirm that you have the author's written permission.
+
+## Architecture Overview
+
+```mermaid
+flowchart LR
+    Client["🖥️ Clients / SDKs\n(OpenAI / Claude / Gemini)"]
+    Upstream["☁️ DeepSeek API"]
+
+    subgraph DS2API["DS2API 3.x (Unified OpenAI Core)"]
+        Router["chi Router + Middleware\n(RequestID / RealIP / Logger / Recoverer / CORS)"]
+
+        subgraph Adapters["Protocol Adapters"]
+            OA["OpenAI\n/v1/*"]
+            CA["Claude\n/anthropic/* + /v1/messages"]
+            GA["Gemini\n/v1beta/models/* + /v1/models/*"]
+            Admin["Admin API\n/admin/*"]
+            WebUI["WebUI\n/admin (static hosting)"]
+        end
+
+        subgraph Runtime["Runtime + Core Capabilities"]
+            Bridge["CLIProxy Bridge\n(multi-protocol <-> OpenAI)"]
+            OAEngine["OpenAI ChatCompletions\n(unified tools + stream semantics)"]
+            Auth["Auth Resolver\n(API key / bearer / x-goog-api-key)"]
+            Pool["Account Pool + Queue\n(in-flight slots + wait queue)"]
+            DSClient["DeepSeek Client\n(session / auth / HTTP)"]
+            Pow["PoW WASM\n(wazero preload)"]
+            Tool["Tool Sieve\n(Go/Node semantic parity)"]
+        end
+    end
+
+    Client --> Router
+    Router --> OA & CA & GA
+    Router --> Admin
+    Router --> WebUI
+
+    OA --> OAEngine
+    CA & GA --> Bridge
+    Bridge --> OAEngine
+    OAEngine --> Auth
+    OAEngine -.account rotation.-> Pool
+    OAEngine -.tool-call parsing.-> Tool
+    OAEngine -.PoW solving.-> Pow
+    Auth --> DSClient
+    DSClient --> Upstream
+    Upstream --> DSClient
+    OAEngine --> Bridge
+    Bridge --> Client
+```
+
+- **Backend**: Go (`cmd/ds2api/`, `api/`, `internal/`), no Python runtime
+- **Frontend**: React admin panel (`webui/`), served as static build at runtime
+- **Deployment**: local run, Docker, Vercel serverless, Linux systemd
+
+### 3.X Architecture Changes (vs older releases)
+
+- **Unified routing core**: all protocol entries are now centralized through `internal/server/router.go`, with OpenAI / Claude / Gemini / Admin / WebUI routes registered in one tree to avoid multi-entry drift.
+- **Unified execution chain**: Claude/Gemini entries are translated by `internal/translatorcliproxy`, then executed through `openai.ChatCompletions` for shared tool-calling and stream semantics, then translated back to the client protocol.
+- **Cleaner adapter boundaries**: `internal/adapter/{claude,gemini}` handles protocol wrappers, while `internal/adapter/openai` remains the execution core; upstream DeepSeek calls are retained only in the OpenAI core.
+- **Tool-calling parity across runtimes**: Go (`internal/util`) and Vercel Node (`internal/js/helpers/stream-tool-sieve`) follow aligned parsing/anti-leak semantics across JSON / XML / invoke / text-kv inputs.
+- **Config/runtime separation**: static config (`config`) and runtime policy (`settings`) are managed independently via Admin APIs, enabling hot updates and password rotation with JWT invalidation.
+- **Streaming behavior upgrade**: `/v1/responses` and `/v1/chat/completions` now share a more consistent incremental tool-call emission strategy across SDK ecosystems.
+- **Improved operability**: `/healthz`, `/readyz`, `/admin/version`, and `/admin/dev/captures` form a tighter post-deploy diagnostics loop.
+
+## Key Capabilities
+
+| Capability | Details |
+| --- | --- |
+| OpenAI compatible | `GET /v1/models`, `GET /v1/models/{id}`, `POST /v1/chat/completions`, `POST /v1/responses`, `GET /v1/responses/{response_id}`, `POST /v1/embeddings` |
+| Claude compatible | `GET /anthropic/v1/models`, `POST /anthropic/v1/messages`, `POST /anthropic/v1/messages/count_tokens` (plus shortcut paths `/v1/messages`, `/messages`) |
+| Gemini compatible | `POST /v1beta/models/{model}:generateContent`, `POST /v1beta/models/{model}:streamGenerateContent` (plus `/v1/models/{model}:*` paths) |
+| Multi-account rotation | Auto token refresh, email/mobile dual login |
+| Concurrency control | Per-account in-flight limit + waiting queue, dynamic recommended concurrency |
+| DeepSeek PoW | WASM solving via `wazero`, no external Node.js dependency |
+| Tool Calling | Anti-leak handling: non-code-block feature match, early `delta.tool_calls`, structured incremental output |
+| Admin API | Config management, runtime settings hot-reload, account testing/batch test, session cleanup, import/export, Vercel sync, version check |
+| WebUI Admin Panel | SPA at `/admin` (bilingual Chinese/English, dark mode) |
+| Health Probes | `GET /healthz` (liveness), `GET /readyz` (readiness) |
+
+## Platform Compatibility Matrix
+
+| Tier | Platform | Status |
+| --- | --- | --- |
+| P0 | Codex CLI/SDK (`wire_api=chat` / `wire_api=responses`) | ✅ |
+| P0 | OpenAI SDK (JS/Python, chat + responses) | ✅ |
+| P0 | Vercel AI SDK (openai-compatible) | ✅ |
+| P0 | Anthropic SDK (messages) | ✅ |
+| P0 | Google Gemini SDK (generateContent) | ✅ |
+| P1 | LangChain / LlamaIndex / OpenWebUI (OpenAI-compatible integration) | ✅ |
+
+## Model Support
+
+### OpenAI Endpoint
+
+| Model | thinking | search |
+| --- | --- | --- |
+| `deepseek-chat` | ❌ | ❌ |
+| `deepseek-reasoner` | ✅ | ❌ |
+| `deepseek-chat-search` | ❌ | ✅ |
+| `deepseek-reasoner-search` | ✅ | ✅ |
+
+### Claude Endpoint
+
+| Model | Default Mapping |
+| --- | --- |
+| `claude-sonnet-4-5` | `deepseek-chat` |
+| `claude-haiku-4-5` (compatible with `claude-3-5-haiku-latest`) | `deepseek-chat` |
+| `claude-opus-4-6` | `deepseek-reasoner` |
+
+Override mapping via `claude_mapping` or `claude_model_mapping` in config.
+In addition, `/anthropic/v1/models` now includes historical Claude 1.x/2.x/3.x/4.x IDs and common aliases for legacy client compatibility.
+
+
+#### Claude Code integration pitfalls (validated)
+
+- Set `ANTHROPIC_BASE_URL` to the DS2API root URL (for example `http://127.0.0.1:5001`). Claude Code sends requests to `/v1/messages?beta=true`.
+- `ANTHROPIC_API_KEY` must match an entry in `keys` from `config.json`. Keeping both a regular key and an `sk-ant-*` style key improves client compatibility.
+- If your environment has proxy variables, set `NO_PROXY=127.0.0.1,localhost,<your_host_ip>` for DS2API to avoid proxy interception of local traffic.
+- If tool calls are rendered as plain text and not executed, upgrade to a build that includes multi-format Claude tool-call parsing (JSON/XML/ANTML/invoke).
+
+### Gemini Endpoint
+
+The Gemini adapter maps model names to DeepSeek native models via `model_aliases` or built-in heuristics, supporting both `generateContent` and `streamGenerateContent` call patterns with full Tool Calling support (`functionDeclarations` → `functionCall` output).
+
+## Quick Start
+
+### Universal First Step (all deployment modes)
+
+Use `config.json` as the single source of truth (recommended):
+
+```bash
+cp config.example.json config.json
+# Edit config.json
+```
+
+Recommended per deployment mode:
+- Local run: read `config.json` directly
+- Docker / Vercel: generate Base64 from `config.json` and inject as `DS2API_CONFIG_JSON`, or paste raw JSON directly
+
+### Option 1: Local Run
+
+**Prerequisites**: Go 1.26+, Node.js `20.19+` or `22.12+` (only if building WebUI locally)
+
+```bash
+# 1. Clone
+git clone https://github.com/CJackHwang/ds2api.git
+cd ds2api
+
+# 2. Configure
+cp config.example.json config.json
+# Edit config.json with your DeepSeek account info and API keys
+
+# 3. Start
+go run ./cmd/ds2api
+```
+
+Default local URL: `http://127.0.0.1:5001`
+
+The server actually binds to `0.0.0.0:5001`, so devices on the same LAN can usually reach it through your private IP as well.
+
+> **WebUI auto-build**: On first local startup, if `static/admin` is missing, DS2API will auto-run `npm ci` (only when dependencies are missing) and `npm run build -- --outDir static/admin --emptyOutDir` (requires Node.js). You can also build manually: `./scripts/build-webui.sh`
+
+### Option 2: Docker
+
+```bash
+# 1. Prepare env file and config file
+cp .env.example .env
+cp config.example.json config.json
+
+# 2. Edit .env (at least set DS2API_ADMIN_KEY; optionally set DS2API_HOST_PORT to change the host port)
+#    DS2API_ADMIN_KEY=replace-with-a-strong-secret
+
+# 3. Start
+docker-compose up -d
+
+# 4. View logs
+docker-compose logs -f
+```
+
+The default `docker-compose.yml` maps host port `6011` to container port `5001`. If you want `5001` exposed directly, set `DS2API_HOST_PORT=5001` (or adjust the `ports` mapping).
+
+Rebuild after updates: `docker-compose up -d --build`
+
+#### Zeabur One-Click (Dockerfile)
+
+1. Click the “Deploy on Zeabur” button above to deploy.
+2. After deployment, open `/admin` and login with `DS2API_ADMIN_KEY` shown in Zeabur env/template instructions.
+3. Import / edit config in Admin UI (it will be written and persisted to `/data/config.json`).
+
+Note: when Zeabur builds directly from the repo `Dockerfile`, you do not need to pass `BUILD_VERSION`. The image prefers that build arg when provided, and automatically falls back to the repo-root `VERSION` file when it is absent.
+
+### Option 3: Vercel
+
+1. Fork this repo to your GitHub account
+2. Import the project on Vercel
+3. Set environment variables (minimum: `DS2API_ADMIN_KEY`; recommended to also set `DS2API_CONFIG_JSON`)
+4. Deploy
+
+Recommended first step in repo root:
+
+```bash
+cp config.example.json config.json
+# Edit config.json
+```
+
+Recommended: convert `config.json` to Base64 locally, then paste into `DS2API_CONFIG_JSON` to avoid JSON formatting mistakes:
+
+```bash
+base64 < config.json | tr -d '\n'
+```
+
+> **Streaming note**: `/v1/chat/completions` on Vercel is routed to `api/chat-stream.js` (Node Runtime) for real-time SSE. Auth, account selection, and session/PoW preparation are still handled by the Go internal prepare endpoint; streaming output (including `tools`) is assembled on Node with Go-aligned anti-leak handling.
+
+For detailed deployment instructions, see the [Deployment Guide](docs/DEPLOY.en.md).
+
+### Option 4: Download Release Binaries
+
+GitHub Actions automatically builds multi-platform archives on each Release:
+
+```bash
+# After downloading the archive for your platform
+tar -xzf ds2api_<tag>_linux_amd64.tar.gz
+cd ds2api_<tag>_linux_amd64
+cp config.example.json config.json
+# Edit config.json
+./ds2api
+```
+
+### Option 5: OpenCode CLI
+
+1. Copy the example config:
+
+```bash
+cp opencode.json.example opencode.json
+```
+
+2. Edit `opencode.json`:
+- Set `baseURL` to your DS2API endpoint (for example, `https://your-domain.com/v1`)
+- Set `apiKey` to your DS2API key (from `config.keys`)
+
+3. Start OpenCode CLI in the project directory (run `opencode` using your installed method).
+
+> Recommended: use the OpenAI-compatible path (`/v1/*`) via `@ai-sdk/openai-compatible` as shown in the example.
+> If your client supports `wire_api`, test both `responses` and `chat`; DS2API supports both paths.
+
+## Configuration
+
+### `config.json` Example
+
+```json
+{
+  "keys": ["your-api-key-1", "your-api-key-2"],
+  "accounts": [
+    {
+      "email": "user@example.com",
+      "password": "your-password"
+    },
+    {
+      "mobile": "12345678901",
+      "password": "your-password"
+    }
+  ],
+  "model_aliases": {
+    "gpt-4o": "deepseek-chat",
+    "gpt-5-codex": "deepseek-reasoner",
+    "o3": "deepseek-reasoner"
+  },
+  "compat": {
+    "wide_input_strict_output": true,
+    "strip_reference_markers": true
+  },
+  "responses": {
+    "store_ttl_seconds": 900
+  },
+  "embeddings": {
+    "provider": "deterministic"
+  },
+  "claude_mapping": {
+    "fast": "deepseek-chat",
+    "slow": "deepseek-reasoner"
+  },
+  "admin": {
+    "jwt_expire_hours": 24
+  },
+  "runtime": {
+    "account_max_inflight": 2,
+    "account_max_queue": 0,
+    "global_max_inflight": 0,
+    "token_refresh_interval_hours": 6
+  },
+  "auto_delete": {
+    "mode": "none"
+  }
+}
+```
+
+- `keys`: API access keys; clients authenticate via `Authorization: Bearer <key>`
+- `accounts`: DeepSeek account list, supports `email` or `mobile` login
+- `token`: Even if set in `config.json`, it is cleared during load (DS2API does not read persisted tokens from config); runtime tokens are maintained/refreshed in memory only
+- `model_aliases`: Map common model names (GPT/Codex/Claude) to DeepSeek models
+- `compat.wide_input_strict_output`: Keep `true` (current default policy)
+- `compat.strip_reference_markers`: Keep `true`; it strips reference markers from visible output
+- `toolcall`: Legacy field; the current behavior is fixed to feature matching + high-confidence early emit, and any config value is ignored
+- `responses.store_ttl_seconds`: In-memory TTL for `/v1/responses/{id}`
+- `embeddings.provider`: Embeddings provider (`deterministic/mock/builtin` built-in)
+- `claude_mapping`: Maps `fast`/`slow` suffixes to corresponding DeepSeek models (still compatible with `claude_model_mapping`)
+- `admin`: Admin panel settings (JWT expiry, password hash, etc.), hot-reloadable via Admin Settings API
+- `runtime`: Runtime parameters (concurrency limits, queue sizes, managed token refresh interval), hot-reloadable via Admin Settings API; `account_max_queue=0`/`global_max_inflight=0` means auto-calculate from recommended values, `token_refresh_interval_hours=6` is the default forced re-login interval
+- `auto_delete.mode`: How to clean up DeepSeek remote chat records after each request completes. Supported values: `none` (default, no deletion), `single` (delete only the current session), `all` (delete all sessions); legacy `auto_delete.sessions=true` is still treated as `all`
+
+### Environment Variables
+
+| Variable | Purpose | Default |
+| --- | --- | --- |
+| `PORT` | Service port | `5001` |
+| `LOG_LEVEL` | Log level | `INFO` (`DEBUG`/`WARN`/`ERROR`) |
+| `DS2API_ADMIN_KEY` | Admin login key | `admin` |
+| `DS2API_JWT_SECRET` | Admin JWT signing secret | Same as `DS2API_ADMIN_KEY` |
+| `DS2API_JWT_EXPIRE_HOURS` | Admin JWT TTL in hours | `24` |
+| `DS2API_CONFIG_PATH` | Config file path | `config.json` |
+| `DS2API_CONFIG_JSON` | Inline config (JSON or Base64) | — |
+| `DS2API_ENV_WRITEBACK` | Auto-write env-backed config to file and transition to file mode (`1/true/yes/on`) | Disabled |
+| `DS2API_WASM_PATH` | PoW WASM file path | Auto-detect |
+| `DS2API_STATIC_ADMIN_DIR` | Admin static assets dir | `static/admin` |
+| `DS2API_AUTO_BUILD_WEBUI` | Auto-build WebUI on startup | Enabled locally, disabled on Vercel |
+| `DS2API_ACCOUNT_MAX_INFLIGHT` | Max in-flight requests per account | `2` |
+| `DS2API_ACCOUNT_MAX_QUEUE` | Waiting queue limit | `recommended_concurrency` |
+| `DS2API_GLOBAL_MAX_INFLIGHT` | Global max in-flight requests | `recommended_concurrency` |
+| `DS2API_VERCEL_INTERNAL_SECRET` | Vercel hybrid streaming internal auth | Falls back to `DS2API_ADMIN_KEY` |
+| `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | Stream lease TTL seconds | `900` |
+| `DS2API_DEV_PACKET_CAPTURE` | Local dev packet capture switch (record recent request/response bodies) | Enabled by default on non-Vercel local runtime |
+| `DS2API_DEV_PACKET_CAPTURE_LIMIT` | Number of captured sessions to retain (auto-evict overflow) | `5` |
+| `DS2API_DEV_PACKET_CAPTURE_MAX_BODY_BYTES` | Max recorded bytes per captured response body | `2097152` |
+| `VERCEL_TOKEN` | Vercel sync token | — |
+| `VERCEL_PROJECT_ID` | Vercel project ID | — |
+| `VERCEL_TEAM_ID` | Vercel team ID | — |
+| `DS2API_VERCEL_PROTECTION_BYPASS` | Vercel deployment protection bypass for internal Node→Go calls | — |
+
+> Note: when `DS2API_CONFIG_JSON` is detected, the Admin UI shows mode risk and auto-persistence status (including `DS2API_CONFIG_PATH` and mode-transition hints).
+
+## Authentication Modes
+
+For business endpoints (`/v1/*`, `/anthropic/*`, Gemini routes), DS2API supports two modes:
+
+| Mode | Description |
+| --- | --- |
+| **Managed account** | Use a key from `config.keys` via `Authorization: Bearer ...` or `x-api-key`; DS2API auto-selects an account |
+| **Direct token** | If the token is not in `config.keys`, DS2API treats it as a DeepSeek token directly |
+
+Optional header `X-Ds2-Target-Account`: Pin a specific managed account (value is email or mobile).
+Gemini routes also accept `x-goog-api-key`, or `?key=` / `?api_key=` when no auth header is present.
+
+## Concurrency Model
+
+```
+Per-account inflight = DS2API_ACCOUNT_MAX_INFLIGHT (default 2)
+Recommended concurrency = account_count × per_account_inflight
+Queue limit = DS2API_ACCOUNT_MAX_QUEUE (default = recommended concurrency)
+429 threshold = inflight + queue ≈ account_count × 4
+```
+
+- When inflight slots are full, requests enter a waiting queue — **no immediate 429**
+- 429 is returned only when total load exceeds inflight + queue capacity
+- `GET /admin/queue/status` returns real-time concurrency state
+
+## Tool Call Adaptation
+
+When `tools` is present in the request, DS2API performs anti-leak handling:
+
+1. Toolcall feature matching is enabled only in **non-code-block context** (fenced examples are ignored)
+   - In non-code-block context, tool JSON may still be recognized even when mixed with normal prose; surrounding prose can remain as text output.
+2. `responses` streaming strictly uses official item lifecycle events (`response.output_item.*`, `response.content_part.*`, `response.function_call_arguments.*`)
+3. Tool names not declared in the `tools` schema are strictly rejected and will not be emitted as valid tool calls
+4. `responses` supports and enforces `tool_choice` (`auto`/`none`/`required`/forced function); `required` violations return `422` for non-stream and `response.failed` for stream
+5. Valid tool call events are only emitted after passing policy validation, preventing invalid tool names from entering the client execution chain
+
+## Local Dev Packet Capture
+
+This is for debugging issues such as Responses reasoning streaming and tool-call handoff. When enabled, DS2API stores the latest N DeepSeek conversation payload pairs (request body + upstream response body), defaulting to 5 entries with auto-eviction.
+
+Enable example:
+
+```bash
+DS2API_DEV_PACKET_CAPTURE=true \
+DS2API_DEV_PACKET_CAPTURE_LIMIT=5 \
+go run ./cmd/ds2api
+```
+
+Inspect/clear (Admin JWT required):
+
+- `GET /admin/dev/captures`: list captured items (newest first)
+- `DELETE /admin/dev/captures`: clear captured items
+
+Response fields include:
+
+- `request_body`: full payload sent to DeepSeek
+- `response_body`: concatenated raw upstream stream body text
+- `response_truncated`: whether body-size truncation happened
+
+## Project Structure
+
+```text
+ds2api/
+├── app/                     # Unified HTTP handler assembly (shared by local + serverless)
+├── cmd/
+│   ├── ds2api/              # Local / container entrypoint
+│   └── ds2api-tests/        # End-to-end testsuite entrypoint
+├── api/
+│   ├── index.go             # Vercel Serverless Go entry
+│   ├── chat-stream.js       # Vercel Node.js stream relay
+│   └── (rewrite targets in vercel.json)
+├── internal/
+│   ├── account/             # Account pool and concurrency queue
+│   ├── adapter/
+│   │   ├── openai/          # OpenAI adapter (incl. tool call parsing, Vercel stream prepare/release)
+│   │   ├── claude/          # Claude adapter
+│   │   └── gemini/          # Gemini adapter (generateContent / streamGenerateContent)
+│   ├── admin/               # Admin API handlers (incl. Settings hot-reload)
+│   ├── auth/                # Auth and JWT
+│   ├── claudeconv/          # Claude message format conversion
+│   ├── compat/              # Go-version compatibility and regression helpers
+│   ├── config/              # Config loading, validation, and hot-reload
+│   ├── deepseek/            # DeepSeek API client, PoW WASM
+│   ├── js/                  # Node runtime stream/compat logic
+│   ├── devcapture/          # Dev packet capture module
+│   ├── rawsample/           # Visible-text extraction and replay helpers for raw stream samples
+│   ├── format/              # Output formatting
+│   ├── prompt/              # Prompt construction
+│   ├── server/              # HTTP routing and middleware (chi router)
+│   ├── sse/                 # SSE parsing utilities
+│   ├── stream/              # Unified stream consumption engine
+│   ├── testsuite/           # End-to-end testsuite framework and case orchestration
+│   ├── translatorcliproxy/  # CLIProxy bridge and stream writer components
+│   ├── util/                # Common utilities
+│   ├── version/             # Version parsing/comparison and tag normalization
+│   └── webui/               # WebUI static file serving and auto-build
+├── webui/                   # React WebUI source (Vite + Tailwind)
+│   └── src/
+│       ├── app/             # Routing, auth, config state
+│       ├── features/        # Feature modules (account/settings/vercel/apiTester)
+│       ├── components/      # Shared UI pieces (login/landing, etc.)
+│       └── locales/         # Language packs (zh.json / en.json)
+├── scripts/
+│   └── build-webui.sh       # Manual WebUI build script
+├── tests/
+│   ├── compat/              # Compatibility fixtures and expected outputs
+│   ├── node/                # Node-side unit tests (chat-stream / tool-sieve)
+│   ├── raw_stream_samples/  # Raw SSE samples and replay metadata
+│   └── scripts/             # Unified test script entrypoints (unit/e2e)
+├── docs/                    # Deployment / contributing / testing docs
+├── static/admin/            # WebUI build output (not committed to Git)
+├── .github/
+│   ├── workflows/           # GitHub Actions (quality gates + release automation)
+│   ├── ISSUE_TEMPLATE/      # Issue templates
+│   └── PULL_REQUEST_TEMPLATE.md
+├── config.example.json      # Config file template
+├── .env.example             # Environment variable template
+├── Dockerfile               # Multi-stage build (WebUI + Go)
+├── docker-compose.yml       # Production Docker Compose
+├── docker-compose.dev.yml   # Development Docker Compose
+├── vercel.json              # Vercel routing and build config
+└── go.mod / go.sum          # Go module dependencies
+```
+
+## Documentation Index
+
+| Document | Description |
+| --- | --- |
+| [API.md](API.md) / [API.en.md](API.en.md) | API reference with request/response examples |
+| [DEPLOY.md](docs/DEPLOY.md) / [DEPLOY.en.md](docs/DEPLOY.en.md) | Deployment guide (local/Docker/Vercel/systemd) |
+| [CONTRIBUTING.md](docs/CONTRIBUTING.md) / [CONTRIBUTING.en.md](docs/CONTRIBUTING.en.md) | Contributing guide |
+| [TESTING.md](docs/TESTING.md) | Testsuite guide |
+
+## Testing
+
+```bash
+# Unit tests (Go + Node)
+./tests/scripts/run-unit-all.sh
+
+# One-command live end-to-end tests (real accounts, full request/response logs)
+./tests/scripts/run-live.sh
+
+# Or with custom flags
+go run ./cmd/ds2api-tests \
+  --config config.json \
+  --admin-key admin \
+  --out artifacts/testsuite \
+  --timeout 120 \
+  --retries 2
+```
+
+```bash
+# Release-blocking gates
+./tests/scripts/check-stage6-manual-smoke.sh
+./tests/scripts/check-refactor-line-gate.sh
+./tests/scripts/run-unit-all.sh
+npm ci --prefix webui && npm run build --prefix webui
+```
+
+## Release Artifact Automation (GitHub Actions)
+
+Workflow: `.github/workflows/release-artifacts.yml`
+
+- **Trigger**: only on GitHub Release `published` (normal pushes do not trigger builds)
+- **Outputs**: multi-platform archives (`linux/amd64`, `linux/arm64`, `darwin/amd64`, `darwin/arm64`, `windows/amd64`) + `sha256sums.txt`
+- **Container publishing**: GHCR only (`ghcr.io/cjackhwang/ds2api`)
+- **Each archive includes**: `ds2api` executable, `static/admin`, WASM file (with embedded fallback support), config template, README, LICENSE
+
+## Disclaimer
+
+This project is built through reverse engineering and is provided for learning, research, personal experimentation, and internal validation only. No commercial authorization is granted, and no warranty of stability, fitness, or results is provided.
+The author and repository maintainers are not responsible for any direct or indirect loss, account suspension, data loss, legal risk, or third-party claims arising from use, modification, distribution, deployment, or reliance on this project.
+
+Do not use this project in ways that violate service terms, agreements, laws, or platform rules. Before any commercial use, review the `LICENSE`, the relevant terms, and confirm that you have the author's written permission.
--- a/1
+++ b/1
@@ -0,0 +1 @@
+3.1.0
--- a/api/chat-stream.js
+++ b/api/chat-stream.js
@@ -0,0 +1,3 @@
+'use strict';
+
+module.exports = require('../internal/js/chat-stream/index.js');
--- a/api/index.go
+++ b/api/index.go
@@ -0,0 +1,20 @@
+package handler
+
+import (
+	"net/http"
+	"sync"
+
+	"ds2api/app"
+)
+
+var (
+	once sync.Once
+	h    http.Handler
+)
+
+func Handler(w http.ResponseWriter, r *http.Request) {
+	once.Do(func() {
+		h = app.NewHandler()
+	})
+	h.ServeHTTP(w, r)
+}
--- a/app.py
+++ b/app.py
@@ -1,69 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-DS2API - DeepSeek to OpenAI API 转换服务
-
-支持:
- OpenAI 兼容接口: /v1/chat/completions, /v1/models
- Claude 兼容接口: /anthropic/v1/messages, /anthropic/v1/models
-
-使用方法:
-    本地开发: python dev.py
-    生产环境: uvicorn app:app --host 0.0.0.0 --port 5001
-    Vercel: 自动部署
-"""
-import os
-
-from fastapi import FastAPI, Request
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
-
-from core.config import IS_VERCEL, logger
-
-# 创建 FastAPI 应用
-app = FastAPI(
-    title="DS2API",
-    description="DeepSeek to OpenAI/Claude API",
-    version="1.0.0",
-)
-
-
-# 全局异常处理
-@app.exception_handler(Exception)
-async def unhandled_exception_handler(request: Request, exc: Exception):
-    logger.exception(f"[unhandled_exception] {request.method} {request.url.path}: {exc}")
-    return JSONResponse(
-        status_code=500,
-        content={"error": {"type": "api_error", "message": "Internal Server Error"}},
-    )
-
-
-# CORS 中间件
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["GET", "POST", "OPTIONS", "PUT", "DELETE"],
-    allow_headers=["Content-Type", "Authorization"],
-)
-
-# 注册路由
-from routes.openai import router as openai_router
-from routes.claude import router as claude_router
-from routes.home import router as home_router
-from routes.admin import router as admin_router
-
-app.include_router(openai_router)
-app.include_router(claude_router)
-# admin_router 必须在 home_router 之前，否则 home.py 的 /admin/{path:path} 会拦截 admin API
-app.include_router(admin_router)
-app.include_router(home_router)
-
-
-# ----------------------------------------------------------------------
-# 本地运行入口
-# ----------------------------------------------------------------------
-if __name__ == "__main__" and not IS_VERCEL:
-    import uvicorn
-
-    port = int(os.getenv("PORT", "5001"))
-    uvicorn.run(app, host="0.0.0.0", port=port)
--- a/app/handler.go
+++ b/app/handler.go
@@ -0,0 +1,19 @@
+package app
+
+import (
+	"net/http"
+
+	"ds2api/internal/config"
+	"ds2api/internal/server"
+)
+
+func NewHandler() http.Handler {
+	app, err := server.NewApp()
+	if err != nil {
+		config.Logger.Error("[app] init failed", "error", err)
+		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			server.WriteUnhandledError(w, err)
+		})
+	}
+	return app.Router
+}
--- a/cmd/ds2api-tests/main.go
+++ b/cmd/ds2api-tests/main.go
@@ -0,0 +1,37 @@
+package main
+
+import (
+	"context"
+	"flag"
+	"fmt"
+	"os"
+	"time"
+
+	"ds2api/internal/testsuite"
+)
+
+func main() {
+	opts := testsuite.DefaultOptions()
+	var timeoutSeconds int
+
+	flag.StringVar(&opts.ConfigPath, "config", opts.ConfigPath, "Path to config file (default: config.json)")
+	flag.StringVar(&opts.AdminKey, "admin-key", opts.AdminKey, "Admin key (default: DS2API_ADMIN_KEY or admin)")
+	flag.StringVar(&opts.OutputDir, "out", opts.OutputDir, "Output artifact directory")
+	flag.IntVar(&opts.Port, "port", opts.Port, "Server port (0 means auto-select free port)")
+	flag.IntVar(&timeoutSeconds, "timeout", int(opts.Timeout.Seconds()), "Per-request timeout in seconds")
+	flag.IntVar(&opts.Retries, "retries", opts.Retries, "Retry count for network/5xx requests")
+	flag.BoolVar(&opts.NoPreflight, "no-preflight", opts.NoPreflight, "Skip preflight checks")
+	flag.IntVar(&opts.MaxKeepRuns, "keep", opts.MaxKeepRuns, "Max test runs to keep (0 = keep all)")
+	flag.Parse()
+
+	if timeoutSeconds <= 0 {
+		timeoutSeconds = 120
+	}
+	opts.Timeout = time.Duration(timeoutSeconds) * time.Second
+
+	if err := testsuite.Run(context.Background(), opts); err != nil {
+		fmt.Fprintln(os.Stderr, err.Error())
+		os.Exit(1)
+	}
+	fmt.Fprintln(os.Stdout, "testsuite completed successfully")
+}
--- a/cmd/ds2api/main.go
+++ b/cmd/ds2api/main.go
@@ -0,0 +1,110 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"net"
+	"net/http"
+	"os"
+	"os/signal"
+	"strings"
+	"syscall"
+	"time"
+
+	"ds2api/internal/auth"
+	"ds2api/internal/config"
+	"ds2api/internal/server"
+	"ds2api/internal/webui"
+)
+
+func main() {
+	if err := config.LoadDotEnv(); err != nil {
+		config.Logger.Warn("[dotenv] load failed", "error", err)
+	}
+	config.RefreshLogger()
+	webui.EnsureBuiltOnStartup()
+	_ = auth.AdminKey()
+	app, err := server.NewApp()
+	if err != nil {
+		config.Logger.Error("server initialization failed", "error", err)
+		os.Exit(1)
+	}
+	port := strings.TrimSpace(os.Getenv("PORT"))
+	if port == "" {
+		port = "5001"
+	}
+
+	srv := &http.Server{
+		Addr:    "0.0.0.0:" + port,
+		Handler: app.Router,
+	}
+	localURL := fmt.Sprintf("http://127.0.0.1:%s", port)
+	lanIP := detectLANIPv4()
+	lanURL := ""
+	if lanIP != "" {
+		lanURL = fmt.Sprintf("http://%s:%s", lanIP, port)
+	}
+
+	// Start server in a goroutine so we can listen for shutdown signals.
+	go func() {
+		if lanURL != "" {
+			config.Logger.Info("starting ds2api", "bind", srv.Addr, "port", port, "local_url", localURL, "lan_url", lanURL, "lan_ip", lanIP)
+		} else {
+			config.Logger.Info("starting ds2api", "bind", srv.Addr, "port", port, "local_url", localURL)
+			config.Logger.Warn("lan ip not detected; check active network interfaces")
+		}
+		if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
+			config.Logger.Error("server stopped unexpectedly", "error", err)
+			os.Exit(1)
+		}
+	}()
+
+	// Wait for interrupt signal (Ctrl+C / SIGTERM).
+	quit := make(chan os.Signal, 1)
+	signal.Notify(quit, os.Interrupt, syscall.SIGTERM)
+	sig := <-quit
+	config.Logger.Info("shutdown signal received", "signal", sig.String())
+
+	// Graceful shutdown: allow up to 10 seconds for in-flight requests to complete.
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	if err := srv.Shutdown(ctx); err != nil {
+		config.Logger.Error("graceful shutdown failed, forcing exit", "error", err)
+		os.Exit(1)
+	}
+	config.Logger.Info("server gracefully stopped")
+}
+
+func detectLANIPv4() string {
+	ifaces, err := net.Interfaces()
+	if err != nil {
+		return ""
+	}
+	for _, iface := range ifaces {
+		if iface.Flags&net.FlagUp == 0 || iface.Flags&net.FlagLoopback != 0 {
+			continue
+		}
+		addrs, err := iface.Addrs()
+		if err != nil {
+			continue
+		}
+		for _, addr := range addrs {
+			var ip net.IP
+			switch v := addr.(type) {
+			case *net.IPNet:
+				ip = v.IP
+			case *net.IPAddr:
+				ip = v.IP
+			default:
+				continue
+			}
+			ip = ip.To4()
+			if ip == nil || !ip.IsPrivate() {
+				continue
+			}
+			return ip.String()
+		}
+	}
+	return ""
+}
--- a/config.example.json
+++ b/config.example.json
@@ -9,20 +9,48 @@
    {
      "_comment": "邮箱登录方式",
      "email": "example1@example.com",
-      "password": "your-password-1",
-      "token": ""
+      "password": "your-password-1"
    },
    {
      "_comment": "邮箱登录方式 - 账号2",
      "email": "example2@example.com",
-      "password": "your-password-2",
-      "token": ""
+      "password": "your-password-2"
    },
    {
      "_comment": "手机号登录方式（中国大陆）",
      "mobile": "12345678901",
-      "password": "your-password-3",
-      "token": ""
+      "password": "your-password-3"
    }
-  ]
-}
+  ],
+  "model_aliases": {
+    "gpt-4o": "deepseek-chat",
+    "gpt-5-codex": "deepseek-reasoner",
+    "o3": "deepseek-reasoner"
+  },
+  "compat": {
+    "wide_input_strict_output": true,
+    "strip_reference_markers": true
+  },
+  "responses": {
+    "store_ttl_seconds": 900
+  },
+  "embeddings": {
+    "provider": "deterministic"
+  },
+  "claude_mapping": {
+    "fast": "deepseek-chat",
+    "slow": "deepseek-reasoner"
+  },
+  "admin": {
+    "jwt_expire_hours": 24
+  },
+  "runtime": {
+    "account_max_inflight": 2,
+    "account_max_queue": 0,
+    "global_max_inflight": 0,
+    "token_refresh_interval_hours": 6
+  },
+  "auto_delete": {
+    "mode": "none"
+  }
+}
--- a/core/init.py
+++ b/core/init.py
@@ -1 +0,0 @@
-# DS2API Core Modules
--- a/core/auth.py
+++ b/core/auth.py
@@ -1,223 +0,0 @@
-# -*- coding: utf-8 -*-
-"""账号认证与管理模块 - 轮询(Round-Robin)策略"""
-import threading
-from fastapi import HTTPException, Request
-
-from .config import CONFIG, logger
-from .deepseek import login_deepseek_via_account, BASE_HEADERS
-from .utils import get_account_identifier
-
-# -------------------------- 全局账号队列 --------------------------
-# 使用列表实现轮询队列，配合线程锁保证并发安全
-account_queue = []  # 可用账号队列
-in_use_accounts = {}  # 正在使用的账号 {account_id: account}
-_queue_lock = threading.Lock()  # 线程锁
-
-claude_api_key_queue = []  # 维护所有可用的Claude API keys
-
-
-def init_account_queue():
-    """初始化时从配置加载账号（不再随机排序，保持配置顺序）"""
-    global account_queue, in_use_accounts
-    with _queue_lock:
-        account_queue = CONFIG.get("accounts", [])[:]  # 深拷贝
-        in_use_accounts = {}
-        # 按 token 有无排序：有 token 的账号优先
-        account_queue.sort(key=lambda a: 0 if a.get("token", "").strip() else 1)
-        logger.info(f"[init_account_queue] 初始化 {len(account_queue)} 个账号，轮询模式")
-
-
-def init_claude_api_key_queue():
-    """Claude API keys由用户自己的token提供，这里初始化为空"""
-    global claude_api_key_queue
-    claude_api_key_queue = []
-
-
-# 初始化
-init_account_queue()
-init_claude_api_key_queue()
-
-
-# get_account_identifier 已移至 core.utils
-
-
-def get_queue_status() -> dict:
-    """获取账号队列状态（用于监控）"""
-    with _queue_lock:
-        # total 应该是配置中的账号总数，而非队列相加（避免状态不一致导致重复计数）
-        total_accounts = len(CONFIG.get("accounts", []))
-        return {
-            "available": len(account_queue),
-            "in_use": len(in_use_accounts),
-            "total": total_accounts,
-            "available_accounts": [get_account_identifier(a) for a in account_queue],
-            "in_use_accounts": list(in_use_accounts.keys()),
-        }
-
-
-# ----------------------------------------------------------------------
-# 账号选择与释放 - 轮询(Round-Robin)策略
-# ----------------------------------------------------------------------
-def choose_new_account(exclude_ids=None):
-    """轮询选择策略：
-    1. 使用线程锁保证并发安全
-    2. 优先选择队首的有 token 账号
-    3. 从队列头部取出账号（FIFO）
-    4. 请求完成后调用 release_account 将账号放回队尾
-    """
-    if exclude_ids is None:
-        exclude_ids = []
-
-    with _queue_lock:
-        # 第一轮：优先选择已有 token 的账号
-        for i in range(len(account_queue)):
-            acc = account_queue[i]
-            acc_id = get_account_identifier(acc)
-            if acc_id and acc_id not in exclude_ids:
-                if acc.get("token", "").strip():  # 已有 token
-                    selected = account_queue.pop(i)
-                    in_use_accounts[acc_id] = selected
-                    logger.info(f"[choose_new_account] 轮询选择(有token): {acc_id} | 队列剩余: {len(account_queue)}")
-                    return selected
-
-        # 第二轮：选择任意账号（需要登录）
-        for i in range(len(account_queue)):
-            acc = account_queue[i]
-            acc_id = get_account_identifier(acc)
-            if acc_id and acc_id not in exclude_ids:
-                selected = account_queue.pop(i)
-                in_use_accounts[acc_id] = selected
-                logger.info(f"[choose_new_account] 轮询选择(需登录): {acc_id} | 队列剩余: {len(account_queue)}")
-                return selected
-
-        logger.warning(f"[choose_new_account] 没有可用账号 | 队列: {len(account_queue)}, 使用中: {len(in_use_accounts)}")
-        return None
-
-
-def release_account(account: dict):
-    """将账号重新加入队列末尾（轮询核心：用完放队尾）"""
-    if not account:
-        return
-    
-    acc_id = get_account_identifier(account)
-    with _queue_lock:
-        # 从使用中移除
-        if acc_id in in_use_accounts:
-            del in_use_accounts[acc_id]
-            # 放回队尾
-            account_queue.append(account)
-            logger.debug(f"[release_account] 释放账号: {acc_id} | 队列长度: {len(account_queue)}")
-        else:
-            logger.warning(f"[release_account] 账号 {acc_id} 不在使用列表中 (可能是因为重置了队列)，跳过释放")
-
-
-# ----------------------------------------------------------------------
-# Claude API key 管理函数（简化版本）
-# ----------------------------------------------------------------------
-def choose_claude_api_key():
-    """选择一个可用的Claude API key - 现在直接由用户提供"""
-    return None
-
-
-def release_claude_api_key(api_key):
-    """释放Claude API key - 现在无需操作"""
-    pass
-
-
-# ----------------------------------------------------------------------
-# 判断调用模式：配置模式 vs 用户自带 token
-# ----------------------------------------------------------------------
-def determine_mode_and_token(request: Request):
-    """
-    根据请求头 Authorization 判断使用哪种模式：
-    - 如果 Bearer token 出现在 CONFIG["keys"] 中，则为配置模式，从 CONFIG["accounts"] 中随机选择一个账号（排除已尝试账号），
-      检查该账号是否已有 token，否则调用登录接口获取；
-    - 否则，直接使用请求中的 Bearer 值作为 DeepSeek token。
-    结果存入 request.state.deepseek_token；配置模式下同时存入 request.state.account 与 request.state.tried_accounts。
-    """
-    auth_header = request.headers.get("Authorization", "")
-    if not auth_header.startswith("Bearer "):
-        raise HTTPException(
-            status_code=401, detail="Unauthorized: missing Bearer token."
-        )
-    caller_key = auth_header.replace("Bearer ", "", 1).strip()
-    config_keys = CONFIG.get("keys", [])
-    if caller_key in config_keys:
-        request.state.use_config_token = True
-        request.state.tried_accounts = []  # 初始化已尝试账号
-        selected_account = choose_new_account()
-        if not selected_account:
-            raise HTTPException(
-                status_code=429,
-                detail="No accounts configured or all accounts are busy.",
-            )
-        if not selected_account.get("token", "").strip():
-            try:
-                login_deepseek_via_account(selected_account)
-            except Exception as e:
-                logger.error(
-                    f"[determine_mode_and_token] 账号 {get_account_identifier(selected_account)} 登录失败：{e}"
-                )
-                raise HTTPException(status_code=500, detail="Account login failed.")
-
-        request.state.deepseek_token = selected_account.get("token")
-        request.state.account = selected_account
-
-    else:
-        request.state.use_config_token = False
-        request.state.deepseek_token = caller_key
-
-
-def get_auth_headers(request: Request) -> dict:
-    """返回 DeepSeek 请求所需的公共请求头"""
-    return {**BASE_HEADERS, "authorization": f"Bearer {request.state.deepseek_token}"}
-
-
-# determine_claude_mode_and_token 已移除（直接使用 determine_mode_and_token）
-
-
-# ----------------------------------------------------------------------
-# Token 刷新机制
-# ----------------------------------------------------------------------
-def refresh_account_token(request: Request) -> bool:
-    """当 token 过期时，刷新账号 token。
-    
-    返回 True 表示刷新成功，False 表示刷新失败。
-    调用后 request.state.deepseek_token 会被更新。
-    """
-    if not getattr(request.state, 'use_config_token', False):
-        # 用户自带 token，无法刷新
-        return False
-    
-    account = getattr(request.state, 'account', None)
-    if not account:
-        return False
-    
-    acc_id = get_account_identifier(account)
-    logger.info(f"[refresh_account_token] 尝试刷新账号 {acc_id} 的 token")
-    
-    try:
-        # 清除旧 token
-        account["token"] = ""
-        # 重新登录
-        login_deepseek_via_account(account)
-        # 更新 request 状态
-        request.state.deepseek_token = account.get("token")
-        logger.info(f"[refresh_account_token] 账号 {acc_id} token 刷新成功")
-        return True
-    except Exception as e:
-        logger.error(f"[refresh_account_token] 账号 {acc_id} token 刷新失败: {e}")
-        return False
-
-
-def mark_token_invalid(request: Request):
-    """标记当前账号的 token 为无效，清除它以便下次重新登录"""
-    if not getattr(request.state, 'use_config_token', False):
-        return
-    
-    account = getattr(request.state, 'account', None)
-    if account:
-        acc_id = get_account_identifier(account)
-        logger.warning(f"[mark_token_invalid] 标记账号 {acc_id} 的 token 为无效")
-        account["token"] = ""
-
--- a/core/config.py
+++ b/core/config.py
@@ -1,106 +0,0 @@
-# -*- coding: utf-8 -*-
-"""配置管理模块"""
-import base64
-import json
-import logging
-import os
-import sys
-
-import transformers
-
-# -------------------------- 获取项目根目录 --------------------------
-BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-IS_VERCEL = bool(os.getenv("VERCEL")) or bool(os.getenv("NOW_REGION"))
-
-
-def resolve_path(env_key: str, default_rel: str) -> str:
-    """解析路径，支持环境变量覆盖"""
-    raw = os.getenv(env_key)
-    if raw:
-        return raw if os.path.isabs(raw) else os.path.join(BASE_DIR, raw)
-    return os.path.join(BASE_DIR, default_rel)
-
-
-# -------------------------- 日志配置 --------------------------
-logging.basicConfig(
-    level=os.getenv("LOG_LEVEL", "INFO").upper(),
-    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
-    handlers=[logging.StreamHandler(sys.stdout)],
-    force=True,
-)
-logger = logging.getLogger("ds2api")
-
-# -------------------------- 初始化 tokenizer --------------------------
-chat_tokenizer_dir = resolve_path("DS2API_TOKENIZER_DIR", "")
-tokenizer = transformers.AutoTokenizer.from_pretrained(
-    chat_tokenizer_dir, trust_remote_code=True
-)
-
-# ----------------------------------------------------------------------
-# 配置文件的读写函数
-# ----------------------------------------------------------------------
-CONFIG_PATH = resolve_path("DS2API_CONFIG_PATH", "config.json")
-
-
-def load_config() -> dict:
-    """加载配置。
-
-    优先从环境变量读取：
-      - DS2API_CONFIG_JSON / CONFIG_JSON: 直接 JSON 字符串，或 base64 编码后的 JSON
-
-    若未提供环境变量，再从 CONFIG_PATH 指向的文件读取。
-    """
-    raw_cfg = os.getenv("DS2API_CONFIG_JSON") or os.getenv("CONFIG_JSON")
-    if raw_cfg:
-        try:
-            return json.loads(raw_cfg)
-        except json.JSONDecodeError:
-            try:
-                decoded = base64.b64decode(raw_cfg).decode("utf-8")
-                return json.loads(decoded)
-            except Exception as e:
-                logger.warning(f"[load_config] 环境变量配置解析失败: {e}")
-                return {}
-
-    try:
-        with open(CONFIG_PATH, "r", encoding="utf-8") as f:
-            return json.load(f)
-    except Exception as e:
-        logger.warning(f"[load_config] 无法读取配置文件({CONFIG_PATH}): {e}")
-        return {}
-
-
-def save_config(cfg: dict) -> None:
-    """将配置写回 config.json。
-
-    Vercel 环境文件系统通常是只读的；且如果配置来自环境变量，也无法回写。
-    所以这里失败不应影响主流程。
-    """
-    if os.getenv("DS2API_CONFIG_JSON") or os.getenv("CONFIG_JSON"):
-        logger.info("[save_config] 配置来自环境变量，跳过写回")
-        return
-
-    try:
-        with open(CONFIG_PATH, "w", encoding="utf-8") as f:
-            json.dump(cfg, f, ensure_ascii=False, indent=2)
-    except PermissionError as e:
-        logger.warning(f"[save_config] 配置文件不可写({CONFIG_PATH}): {e}")
-    except Exception as e:
-        logger.exception(f"[save_config] 写入 config.json 失败: {e}")
-
-
-# 全局配置
-CONFIG = load_config()
-if not CONFIG:
-    logger.warning(
-        "[config] 未加载到有效配置，请提供 config.json（路径可用 DS2API_CONFIG_PATH 指定）或设置环境变量 DS2API_CONFIG_JSON"
-    )
-
-# WASM 模块文件路径
-WASM_PATH = resolve_path("DS2API_WASM_PATH", "sha3_wasm_bg.7b9ca65ddd.wasm")
-
-# 模板目录
-TEMPLATES_DIR = resolve_path("DS2API_TEMPLATES_DIR", "templates")
-
-# WebUI 静态文件目录
-STATIC_ADMIN_DIR = resolve_path("DS2API_STATIC_ADMIN_DIR", "static/admin")
--- a/core/constants.py
+++ b/core/constants.py
@@ -1,43 +0,0 @@
-# -*- coding: utf-8 -*-
-"""常量定义模块 - 统一管理项目中的所有常量"""
-
-# ----------------------------------------------------------------------
-# 网络和超时配置
-# ----------------------------------------------------------------------
-KEEP_ALIVE_TIMEOUT = 5  # 保活超时（秒）
-STREAM_IDLE_TIMEOUT = 30  # 流无新内容超时（秒）
-MAX_KEEPALIVE_COUNT = 10  # 最大连续 keepalive 次数
-
-# ----------------------------------------------------------------------
-# DeepSeek API 配置
-# ----------------------------------------------------------------------
-DEEPSEEK_HOST = "chat.deepseek.com"
-DEEPSEEK_LOGIN_URL = f"https://{DEEPSEEK_HOST}/api/v0/users/login"
-DEEPSEEK_CREATE_SESSION_URL = f"https://{DEEPSEEK_HOST}/api/v0/chat_session/create"
-DEEPSEEK_CREATE_POW_URL = f"https://{DEEPSEEK_HOST}/api/v0/chat/create_pow_challenge"
-DEEPSEEK_COMPLETION_URL = f"https://{DEEPSEEK_HOST}/api/v0/chat/completion"
-
-# ----------------------------------------------------------------------
-# 请求头配置
-# ----------------------------------------------------------------------
-BASE_HEADERS = {
-    "Host": "chat.deepseek.com",
-    "User-Agent": "DeepSeek/1.6.11 Android/35",
-    "Accept": "application/json",
-    "Accept-Encoding": "gzip",
-    "Content-Type": "application/json",
-    "x-client-platform": "android",
-    "x-client-version": "1.6.11",
-    "x-client-locale": "zh_CN",
-    "accept-charset": "UTF-8",
-}
-
-# ----------------------------------------------------------------------
-# SSE 解析配置
-# ----------------------------------------------------------------------
-# 跳过的路径模式（状态相关，不是内容）
-SKIP_PATTERNS = [
-    "quasi_status", "elapsed_secs", "token_usage", 
-    "pending_fragment", "conversation_mode",
-    "fragments/-1/status", "fragments/-2/status", "fragments/-3/status"
-]
--- a/core/deepseek.py
+++ b/core/deepseek.py
@@ -1,138 +0,0 @@
-# -*- coding: utf-8 -*-
-"""DeepSeek API 相关逻辑"""
-import time
-from curl_cffi import requests
-from fastapi import HTTPException
-
-from .config import CONFIG, save_config, logger
-from .utils import get_account_identifier
-from .constants import (
-    DEEPSEEK_HOST,
-    DEEPSEEK_LOGIN_URL,
-    DEEPSEEK_CREATE_SESSION_URL,
-    DEEPSEEK_CREATE_POW_URL,
-    DEEPSEEK_COMPLETION_URL,
-    BASE_HEADERS,
-)
-
-
-# get_account_identifier 已移至 core.utils
-
-
-
-
-# ----------------------------------------------------------------------
-# 登录函数：支持使用 email 或 mobile 登录
-# ----------------------------------------------------------------------
-def login_deepseek_via_account(account: dict) -> str:
-    """使用 account 中的 email 或 mobile 登录 DeepSeek，
-    成功后将返回的 token 写入 account 并保存至配置文件，返回新 token。
-    """
-    email = account.get("email", "").strip()
-    mobile = account.get("mobile", "").strip()
-    password = account.get("password", "").strip()
-    if not password or (not email and not mobile):
-        raise HTTPException(
-            status_code=400,
-            detail="账号缺少必要的登录信息（必须提供 email 或 mobile 以及 password）",
-        )
-    if email:
-        payload = {
-            "email": email,
-            "password": password,
-            "device_id": "deepseek_to_api",
-            "os": "android",
-        }
-    else:
-        payload = {
-            "mobile": mobile,
-            "area_code": None,
-            "password": password,
-            "device_id": "deepseek_to_api",
-            "os": "android",
-        }
-    try:
-        resp = requests.post(
-            DEEPSEEK_LOGIN_URL, headers=BASE_HEADERS, json=payload, impersonate="safari15_3"
-        )
-        resp.raise_for_status()
-    except Exception as e:
-        logger.error(f"[login_deepseek_via_account] 登录请求异常: {e}")
-        raise HTTPException(status_code=500, detail="Account login failed: 请求异常")
-    try:
-        logger.warning(f"[login_deepseek_via_account] {resp.text}")
-        data = resp.json()
-    except Exception as e:
-        logger.error(f"[login_deepseek_via_account] JSON解析失败: {e}")
-        raise HTTPException(
-            status_code=500, detail="Account login failed: invalid JSON response"
-        )
-    
-    # 检查 API 错误码
-    if data.get("code") != 0:
-        error_msg = data.get("msg", "Unknown error")
-        logger.error(f"[login_deepseek_via_account] API错误: {error_msg}")
-        raise HTTPException(
-            status_code=500, detail=f"Account login failed: {error_msg}"
-        )
-    
-    # 检查业务错误码
-    biz_code = data.get("data", {}).get("biz_code")
-    biz_msg = data.get("data", {}).get("biz_msg", "")
-    if biz_code != 0:
-        logger.error(f"[login_deepseek_via_account] 业务错误: {biz_msg}")
-        raise HTTPException(
-            status_code=500, detail=f"Account login failed: {biz_msg}"
-        )
-    
-    # 校验响应数据格式是否正确
-    if (
-        data.get("data") is None
-        or data["data"].get("biz_data") is None
-        or data["data"]["biz_data"].get("user") is None
-    ):
-        logger.error(f"[login_deepseek_via_account] 登录响应格式错误: {data}")
-        raise HTTPException(
-            status_code=500, detail="Account login failed: invalid response format"
-        )
-    new_token = data["data"]["biz_data"]["user"].get("token")
-    if not new_token:
-        logger.error(f"[login_deepseek_via_account] 登录响应中缺少 token: {data}")
-        raise HTTPException(
-            status_code=500, detail="Account login failed: missing token"
-        )
-    account["token"] = new_token
-    save_config(CONFIG)
-    return new_token
-
-
-# ----------------------------------------------------------------------
-# 封装对话接口调用的重试机制
-# ----------------------------------------------------------------------
-def call_completion_endpoint(payload: dict, headers: dict, max_attempts: int = 3):
-    """调用 DeepSeek 对话接口，支持重试"""
-    attempts = 0
-    while attempts < max_attempts:
-        try:
-            deepseek_resp = requests.post(
-                DEEPSEEK_COMPLETION_URL,
-                headers=headers,
-                json=payload,
-                stream=True,
-                impersonate="safari15_3",
-            )
-        except Exception as e:
-            logger.warning(f"[call_completion_endpoint] 请求异常: {e}")
-            time.sleep(1)
-            attempts += 1
-            continue
-        if deepseek_resp.status_code == 200:
-            return deepseek_resp
-        else:
-            logger.warning(
-                f"[call_completion_endpoint] 调用对话接口失败, 状态码: {deepseek_resp.status_code}"
-            )
-            deepseek_resp.close()
-            time.sleep(1)
-            attempts += 1
-    return None
--- a/core/messages.py
+++ b/core/messages.py
@@ -1,118 +0,0 @@
-# -*- coding: utf-8 -*-
-"""消息处理模块"""
-import re
-
-from .config import CONFIG, logger
-
-# Claude 默认模型
-CLAUDE_DEFAULT_MODEL = "claude-sonnet-4-20250514"
-
-# 预编译正则表达式（性能优化）
-_MARKDOWN_IMAGE_PATTERN = re.compile(r"!\[(.*?)\]\((.*?)\)")
-
-
-# ----------------------------------------------------------------------
-# 消息预处理函数，将多轮对话合并成最终 prompt
-# ----------------------------------------------------------------------
-def messages_prepare(messages: list) -> str:
-    """处理消息列表，合并连续相同角色的消息，并添加角色标签：
-    - 对于 assistant 消息，加上 <｜Assistant｜> 前缀及 <｜end▁of▁sentence｜> 结束标签；
-    - 对于 user/system 消息（除第一条外）加上 <｜User｜> 前缀；
-    - 如果消息 content 为数组，则提取其中 type 为 "text" 的部分；
-    - 最后移除 markdown 图片格式的内容。
-    """
-    processed = []
-    for m in messages:
-        role = m.get("role", "")
-        content = m.get("content", "")
-        if isinstance(content, list):
-            texts = [
-                item.get("text", "") for item in content if item.get("type") == "text"
-            ]
-            text = "\n".join(texts)
-        else:
-            text = str(content)
-        processed.append({"role": role, "text": text})
-    if not processed:
-        return ""
-    # 合并连续同一角色的消息
-    merged = [processed[0]]
-    for msg in processed[1:]:
-        if msg["role"] == merged[-1]["role"]:
-            merged[-1]["text"] += "\n\n" + msg["text"]
-        else:
-            merged.append(msg)
-    # 添加标签
-    parts = []
-    for idx, block in enumerate(merged):
-        role = block["role"]
-        text = block["text"]
-        if role == "assistant":
-            parts.append(f"<｜Assistant｜>{text}<｜end▁of▁sentence｜>")
-        elif role in ("user", "system"):
-            if idx > 0:
-                parts.append(f"<｜User｜>{text}")
-            else:
-                parts.append(text)
-        else:
-            parts.append(text)
-    final_prompt = "".join(parts)
-    # 仅移除 markdown 图片格式(不全部移除 !）- 使用预编译的正则表达式
-    final_prompt = _MARKDOWN_IMAGE_PATTERN.sub(r"[\1](\2)", final_prompt)
-    return final_prompt
-
-
-# ----------------------------------------------------------------------
-# OpenAI到Claude格式转换函数
-# ----------------------------------------------------------------------
-def convert_claude_to_deepseek(claude_request: dict) -> dict:
-    """将Claude格式的请求转换为DeepSeek格式（基于现有OpenAI接口）"""
-    messages = claude_request.get("messages", [])
-    model = claude_request.get("model", CLAUDE_DEFAULT_MODEL)
-
-    # 从配置文件读取Claude模型映射
-    claude_mapping = CONFIG.get(
-        "claude_model_mapping", {"fast": "deepseek-chat", "slow": "deepseek-chat"}
-    )
-
-    # Claude模型映射到DeepSeek模型 - 基于配置和模型特征判断
-    if (
-        "opus" in model.lower()
-        or "reasoner" in model.lower()
-        or "slow" in model.lower()
-    ):
-        deepseek_model = claude_mapping.get("slow", "deepseek-chat")
-    else:
-        deepseek_model = claude_mapping.get("fast", "deepseek-chat")
-
-    deepseek_request = {"model": deepseek_model, "messages": messages.copy()}
-
-    # 处理system消息 - 将system参数转换为system role消息
-    if "system" in claude_request:
-        system_msg = {"role": "system", "content": claude_request["system"]}
-        deepseek_request["messages"].insert(0, system_msg)
-
-    # 添加可选参数
-    if "temperature" in claude_request:
-        deepseek_request["temperature"] = claude_request["temperature"]
-    if "top_p" in claude_request:
-        deepseek_request["top_p"] = claude_request["top_p"]
-    if "stop_sequences" in claude_request:
-        deepseek_request["stop"] = claude_request["stop_sequences"]
-    if "stream" in claude_request:
-        deepseek_request["stream"] = claude_request["stream"]
-
-    return deepseek_request
-
-
-def convert_deepseek_to_claude_format(
-    deepseek_response: dict, original_claude_model: str = CLAUDE_DEFAULT_MODEL
-) -> dict:
-    """将DeepSeek响应转换为Claude格式的OpenAI响应"""
-    # DeepSeek响应已经是OpenAI格式，只需要修改模型名称
-    if isinstance(deepseek_response, dict):
-        claude_response = deepseek_response.copy()
-        claude_response["model"] = original_claude_model
-        return claude_response
-
-    return deepseek_response
--- a/core/models.py
+++ b/core/models.py
@@ -1,90 +0,0 @@
-# -*- coding: utf-8 -*-
-"""模型定义模块 - 集中管理所有支持的模型"""
-
-# DeepSeek 模型列表（官方模型名称）
-DEEPSEEK_MODELS = [
-    {
-        "id": "deepseek-chat",
-        "object": "model",
-        "created": 1677610602,
-        "owned_by": "deepseek",
-        "permission": [],
-    },
-    {
-        "id": "deepseek-reasoner",
-        "object": "model",
-        "created": 1677610602,
-        "owned_by": "deepseek",
-        "permission": [],
-    },
-    {
-        "id": "deepseek-chat-search",
-        "object": "model",
-        "created": 1677610602,
-        "owned_by": "deepseek",
-        "permission": [],
-    },
-    {
-        "id": "deepseek-reasoner-search",
-        "object": "model",
-        "created": 1677610602,
-        "owned_by": "deepseek",
-        "permission": [],
-    },
-]
-
-# Claude 模型映射列表
-CLAUDE_MODELS = [
-    {
-        "id": "claude-sonnet-4-20250514",
-        "object": "model",
-        "created": 1715635200,
-        "owned_by": "anthropic",
-    },
-    {
-        "id": "claude-sonnet-4-20250514-fast",
-        "object": "model",
-        "created": 1715635200,
-        "owned_by": "anthropic",
-    },
-    {
-        "id": "claude-sonnet-4-20250514-slow",
-        "object": "model",
-        "created": 1715635200,
-        "owned_by": "anthropic",
-    },
-]
-
-
-def get_model_config(model: str) -> tuple[bool, bool]:
-    """根据模型名称获取配置
-    
-    Args:
-        model: 模型名称
-        
-    Returns:
-        (thinking_enabled, search_enabled) 元组
-    """
-    model_lower = model.lower()
-    
-    if model_lower == "deepseek-chat":
-        return False, False
-    elif model_lower == "deepseek-reasoner":
-        return True, False
-    elif model_lower == "deepseek-chat-search":
-        return False, True
-    elif model_lower == "deepseek-reasoner-search":
-        return True, True
-    else:
-        return None, None  # 不支持的模型
-
-
-def get_openai_models_response() -> dict:
-    """获取 OpenAI 格式的模型列表响应"""
-    return {"object": "list", "data": DEEPSEEK_MODELS}
-
-
-def get_claude_models_response() -> dict:
-    """获取 Claude 格式的模型列表响应"""
-    return {"object": "list", "data": CLAUDE_MODELS}
-
--- a/core/pow.py
+++ b/core/pow.py
@@ -1,253 +0,0 @@
-# -*- coding: utf-8 -*-
-"""PoW (Proof of Work) 计算模块"""
-import base64
-import ctypes
-import json
-import struct
-import threading
-import time
-
-from curl_cffi import requests
-from wasmtime import Engine, Linker, Module, Store
-
-from .config import CONFIG, WASM_PATH, logger
-from .utils import get_account_identifier
-
-# ----------------------------------------------------------------------
-# WASM 模块缓存 - 避免每次请求都重新加载
-# ----------------------------------------------------------------------
-_wasm_cache_lock = threading.Lock()
-_wasm_engine = None
-_wasm_module = None
-
-
-def _get_cached_wasm_module(wasm_path: str):
-    """获取缓存的 WASM 模块，首次调用时加载"""
-    global _wasm_engine, _wasm_module
-    
-    if _wasm_module is not None:
-        return _wasm_engine, _wasm_module
-    
-    with _wasm_cache_lock:
-        # 双重检查锁定
-        if _wasm_module is not None:
-            return _wasm_engine, _wasm_module
-        
-        try:
-            with open(wasm_path, "rb") as f:
-                wasm_bytes = f.read()
-            _wasm_engine = Engine()
-            _wasm_module = Module(_wasm_engine, wasm_bytes)
-            logger.info(f"[WASM] 已缓存 WASM 模块: {wasm_path}")
-        except Exception as e:
-            logger.error(f"[WASM] 加载 WASM 模块失败: {e}")
-            raise RuntimeError(f"加载 wasm 文件失败: {wasm_path}, 错误: {e}")
-    
-    return _wasm_engine, _wasm_module
-
-
-# 启动时预加载 WASM 模块
-try:
-    _get_cached_wasm_module(WASM_PATH)
-except Exception as e:
-    logger.warning(f"[WASM] 启动时预加载失败（将在首次使用时重试）: {e}")
-
-# get_account_identifier 已移至 core.utils
-
-
-# ----------------------------------------------------------------------
-# 使用 WASM 模块计算 PoW 答案的辅助函数
-# ----------------------------------------------------------------------
-def compute_pow_answer(
-    algorithm: str,
-    challenge_str: str,
-    salt: str,
-    difficulty: int,
-    expire_at: int,
-    signature: str,
-    target_path: str,
-    wasm_path: str,
-) -> int:
-    """
-    使用 WASM 模块计算 DeepSeekHash 答案（answer）。
-    根据 JS 逻辑：
-      - 拼接前缀： "{salt}_{expire_at}_"
-      - 将 challenge 与前缀写入 wasm 内存后调用 wasm_solve 进行求解，
-      - 从 wasm 内存中读取状态与求解结果，
-      - 若状态非 0，则返回整数形式的答案，否则返回 None。
-    
-    优化：使用缓存的 WASM 模块，避免每次请求都重新加载文件。
-    """
-    if algorithm != "DeepSeekHashV1":
-        raise ValueError(f"不支持的算法：{algorithm}")
-    
-    prefix = f"{salt}_{expire_at}_"
-    
-    # 获取缓存的 WASM 模块（避免重复加载文件）
-    engine, module = _get_cached_wasm_module(wasm_path)
-    
-    # 每次调用创建新的 Store 和实例（必须的，因为 Store 不是线程安全的）
-    store = Store(engine)
-    linker = Linker(engine)
-    instance = linker.instantiate(store, module)
-    exports = instance.exports(store)
-    
-    try:
-        memory = exports["memory"]
-        add_to_stack = exports["__wbindgen_add_to_stack_pointer"]
-        alloc = exports["__wbindgen_export_0"]
-        wasm_solve = exports["wasm_solve"]
-    except KeyError as e:
-        raise RuntimeError(f"缺少 wasm 导出函数: {e}")
-
-    def write_memory(offset: int, data: bytes):
-        size = len(data)
-        base_addr = ctypes.cast(memory.data_ptr(store), ctypes.c_void_p).value
-        ctypes.memmove(base_addr + offset, data, size)
-
-    def read_memory(offset: int, size: int) -> bytes:
-        base_addr = ctypes.cast(memory.data_ptr(store), ctypes.c_void_p).value
-        return ctypes.string_at(base_addr + offset, size)
-
-    def encode_string(text: str):
-        data = text.encode("utf-8")
-        length = len(data)
-        ptr_val = alloc(store, length, 1)
-        ptr = int(ptr_val.value) if hasattr(ptr_val, "value") else int(ptr_val)
-        write_memory(ptr, data)
-        return ptr, length
-
-    # 1. 申请 16 字节栈空间
-    retptr = add_to_stack(store, -16)
-    # 2. 编码 challenge 与 prefix 到 wasm 内存中
-    ptr_challenge, len_challenge = encode_string(challenge_str)
-    ptr_prefix, len_prefix = encode_string(prefix)
-    # 3. 调用 wasm_solve（注意：difficulty 以 float 形式传入）
-    wasm_solve(
-        store,
-        retptr,
-        ptr_challenge,
-        len_challenge,
-        ptr_prefix,
-        len_prefix,
-        float(difficulty),
-    )
-    # 4. 从 retptr 处读取 4 字节状态和 8 字节求解结果
-    status_bytes = read_memory(retptr, 4)
-    if len(status_bytes) != 4:
-        add_to_stack(store, 16)
-        raise RuntimeError("读取状态字节失败")
-    status = struct.unpack("<i", status_bytes)[0]
-    value_bytes = read_memory(retptr + 8, 8)
-    if len(value_bytes) != 8:
-        add_to_stack(store, 16)
-        raise RuntimeError("读取结果字节失败")
-    value = struct.unpack("<d", value_bytes)[0]
-    # 5. 恢复栈指针
-    add_to_stack(store, 16)
-    if status == 0:
-        return None
-    return int(value)
-
-
-def get_pow_response(request, max_attempts: int = 3):
-    """获取 PoW 响应
-    
-    Args:
-        request: FastAPI 请求对象
-        max_attempts: 最大重试次数
-        
-    Returns:
-        Base64 编码的 PoW 响应，如果失败返回 None
-    """
-    from .auth import get_auth_headers, choose_new_account
-    from .deepseek import BASE_HEADERS, login_deepseek_via_account, DEEPSEEK_CREATE_POW_URL
-    
-    pow_url = DEEPSEEK_CREATE_POW_URL
-    
-    attempts = 0
-    while attempts < max_attempts:
-        headers = get_auth_headers(request)
-        try:
-            resp = requests.post(
-                pow_url,
-                headers=headers,
-                json={"target_path": "/api/v0/chat/completion"},
-                timeout=30,
-                impersonate="safari15_3",
-            )
-        except Exception as e:
-            logger.error(f"[get_pow_response] 请求异常: {e}")
-            attempts += 1
-            continue
-        try:
-            data = resp.json()
-        except Exception as e:
-            logger.error(f"[get_pow_response] JSON解析异常: {e}")
-            data = {}
-        if resp.status_code == 200 and data.get("code") == 0:
-            challenge = data["data"]["biz_data"]["challenge"]
-            difficulty = challenge.get("difficulty", 144000)
-            expire_at = challenge.get("expire_at", 1680000000)
-            try:
-                answer = compute_pow_answer(
-                    challenge["algorithm"],
-                    challenge["challenge"],
-                    challenge["salt"],
-                    difficulty,
-                    expire_at,
-                    challenge["signature"],
-                    challenge["target_path"],
-                    WASM_PATH,
-                )
-            except Exception as e:
-                logger.error(f"[get_pow_response] PoW 答案计算异常: {e}")
-                answer = None
-            if answer is None:
-                logger.warning("[get_pow_response] PoW 答案计算失败，重试中...")
-                resp.close()
-                attempts += 1
-                continue
-            pow_dict = {
-                "algorithm": challenge["algorithm"],
-                "challenge": challenge["challenge"],
-                "salt": challenge["salt"],
-                "answer": answer,
-                "signature": challenge["signature"],
-                "target_path": challenge["target_path"],
-            }
-            pow_str = json.dumps(pow_dict, separators=(",", ":"), ensure_ascii=False)
-            encoded = base64.b64encode(pow_str.encode("utf-8")).decode("utf-8").rstrip()
-            resp.close()
-            return encoded
-        else:
-            code = data.get("code")
-            logger.warning(
-                f"[get_pow_response] 获取 PoW 失败, code={code}, msg={data.get('msg')}"
-            )
-            resp.close()
-            if request.state.use_config_token:
-                current_id = get_account_identifier(request.state.account)
-                if not hasattr(request.state, "tried_accounts"):
-                    request.state.tried_accounts = []
-                if current_id not in request.state.tried_accounts:
-                    request.state.tried_accounts.append(current_id)
-                new_account = choose_new_account(request.state.tried_accounts)
-                if new_account is None:
-                    break
-                try:
-                    login_deepseek_via_account(new_account)
-                except Exception as e:
-                    logger.error(
-                        f"[get_pow_response] 账号 {get_account_identifier(new_account)} 登录失败：{e}"
-                    )
-                    attempts += 1
-                    continue
-                request.state.account = new_account
-                request.state.deepseek_token = new_account.get("token")
-            else:
-                attempts += 1
-                continue
-            attempts += 1
-    return None
-
--- a/core/session_manager.py
+++ b/core/session_manager.py
@@ -1,165 +0,0 @@
-# -*- coding: utf-8 -*-
-"""会话管理模块 - 封装公共的会话创建和 PoW 获取逻辑"""
-from curl_cffi import requests as cffi_requests
-from fastapi import HTTPException, Request
-
-from .config import logger
-from .utils import get_account_identifier
-from .models import get_model_config
-from .auth import (
-    get_auth_headers,
-    choose_new_account,
-    release_account,
-    refresh_account_token,
-)
-from .deepseek import (
-    DEEPSEEK_CREATE_SESSION_URL,
-    DEEPSEEK_CREATE_POW_URL,
-    login_deepseek_via_account,
-    call_completion_endpoint,
-)
-from .pow import get_pow_response
-
-
-def create_session(request: Request, max_attempts: int = 3) -> str | None:
-    """创建 DeepSeek 会话
-    
-    Args:
-        request: FastAPI 请求对象
-        max_attempts: 最大重试次数
-        
-    Returns:
-        会话 ID，如果失败返回 None
-    """
-    attempts = 0
-    token_refreshed = False  # 标记是否已尝试刷新 token
-    
-    while attempts < max_attempts:
-        headers = get_auth_headers(request)
-        try:
-            resp = cffi_requests.post(
-                DEEPSEEK_CREATE_SESSION_URL,
-                headers=headers,
-                json={"agent": "chat"},
-                impersonate="safari15_3",
-            )
-        except Exception as e:
-            logger.error(f"[create_session] 请求异常: {e}")
-            attempts += 1
-            continue
-        
-        try:
-            data = resp.json()
-        except Exception as e:
-            logger.error(f"[create_session] JSON解析异常: {e}")
-            data = {}
-        
-        if resp.status_code == 200 and data.get("code") == 0:
-            session_id = data["data"]["biz_data"]["id"]
-            resp.close()
-            return session_id
-        else:
-            code = data.get("code")
-            msg = data.get("msg", "")
-            logger.warning(
-                f"[create_session] 创建会话失败, code={code}, msg={msg}"
-            )
-            resp.close()
-            
-            # 配置模式下尝试处理 token 问题
-            if request.state.use_config_token:
-                # token 无效（认证失败）时，先尝试刷新当前账号的 token
-                if code in [40001, 40002, 40003] or "token" in msg.lower() or "unauthorized" in msg.lower():
-                    if not token_refreshed:
-                        logger.info("[create_session] 检测到 token 可能过期，尝试刷新")
-                        if refresh_account_token(request):
-                            token_refreshed = True
-                            continue  # 使用新 token 重试
-                        else:
-                            logger.warning("[create_session] token 刷新失败，尝试切换账号")
-                
-                # token 刷新失败或其他错误，尝试切换账号
-                current_id = get_account_identifier(request.state.account)
-                if not hasattr(request.state, "tried_accounts"):
-                    request.state.tried_accounts = []
-                if current_id not in request.state.tried_accounts:
-                    request.state.tried_accounts.append(current_id)
-                new_account = choose_new_account(request.state.tried_accounts)
-                if new_account is None:
-                    break
-                try:
-                    login_deepseek_via_account(new_account)
-                except Exception as e:
-                    logger.error(
-                        f"[create_session] 账号 {get_account_identifier(new_account)} 登录失败：{e}"
-                    )
-                    attempts += 1
-                    continue
-                request.state.account = new_account
-                request.state.deepseek_token = new_account.get("token")
-                token_refreshed = False  # 新账号重置刷新标记
-            else:
-                attempts += 1
-                continue
-        attempts += 1
-    return None
-
-
-def get_pow(request: Request, max_attempts: int = 3) -> str | None:
-    """获取 PoW 响应的包装函数
-    
-    Args:
-        request: FastAPI 请求对象
-        max_attempts: 最大重试次数
-        
-    Returns:
-        Base64 编码的 PoW 响应，如果失败返回 None
-    """
-    return get_pow_response(request, max_attempts)
-
-
-def prepare_completion_request(
-    request: Request,
-    session_id: str,
-    prompt: str,
-    thinking_enabled: bool = False,
-    search_enabled: bool = False,
-    max_attempts: int = 3,
-):
-    """准备并执行对话补全请求
-    
-    Args:
-        request: FastAPI 请求对象
-        session_id: 会话 ID
-        prompt: 处理后的提示词
-        thinking_enabled: 是否启用思考模式
-        search_enabled: 是否启用搜索
-        max_attempts: 最大重试次数
-        
-    Returns:
-        DeepSeek 响应对象，如果失败返回 None
-    """
-    pow_resp = get_pow(request, max_attempts)
-    if not pow_resp:
-        return None
-    
-    headers = {**get_auth_headers(request), "x-ds-pow-response": pow_resp}
-    payload = {
-        "chat_session_id": session_id,
-        "parent_message_id": None,
-        "prompt": prompt,
-        "ref_file_ids": [],
-        "thinking_enabled": thinking_enabled,
-        "search_enabled": search_enabled,
-    }
-    
-    return call_completion_endpoint(payload, headers, max_attempts)
-
-
-# get_model_config 已移至 core.models
-
-
-def cleanup_account(request: Request):
-    """清理账号资源（将账号放回队列）"""
-    if getattr(request.state, "use_config_token", False) and hasattr(request.state, "account"):
-        release_account(request.state.account)
--- a/core/sse_parser.py
+++ b/core/sse_parser.py
@@ -1,450 +0,0 @@
-# -*- coding: utf-8 -*-
-"""DeepSeek SSE 流解析模块
-
-这个模块包含解析 DeepSeek SSE 响应的公共逻辑，供 openai.py、claude.py 和 accounts.py 共用。
-合并了原 sse_parser.py 和 stream_parser.py 的功能。
-"""
-import json
-import re
-from typing import List, Tuple, Optional, Dict, Any, Generator
-
-from .config import logger
-from .constants import SKIP_PATTERNS
-
-# 预编译正则表达式
-_TOOL_CALL_PATTERN = re.compile(r'\{\s*["\']tool_calls["\']\s*:\s*\[(.*?)\]\s*\}', re.DOTALL)
-_CITATION_PATTERN = re.compile(r"^\[citation:")
-
-
-# ----------------------------------------------------------------------
-# 基础解析函数
-# ----------------------------------------------------------------------
-
-def parse_deepseek_sse_line(raw_line: bytes) -> Optional[Dict[str, Any]]:
-    """解析 DeepSeek SSE 行
-    
-    Args:
-        raw_line: 原始字节行
-        
-    Returns:
-        解析后的 chunk 字典，如果解析失败或应跳过则返回 None
-    """
-    try:
-        line = raw_line.decode("utf-8")
-    except Exception as e:
-        logger.warning(f"[parse_deepseek_sse_line] 解码失败: {e}")
-        return None
-    
-    if not line or not line.startswith("data:"):
-        return None
-    
-    data_str = line[5:].strip()
-    
-    if data_str == "[DONE]":
-        return {"type": "done"}
-    
-    try:
-        chunk = json.loads(data_str)
-        return chunk
-    except json.JSONDecodeError as e:
-        logger.warning(f"[parse_deepseek_sse_line] JSON解析失败: {e}")
-        return None
-
-
-def should_skip_chunk(chunk_path: str) -> bool:
-    """判断是否应该跳过这个 chunk（状态相关，不是内容）"""
-    if chunk_path == "response/search_status":
-        return True
-    return any(kw in chunk_path for kw in SKIP_PATTERNS)
-
-
-def is_response_finished(chunk_path: str, v_value: Any) -> bool:
-    """判断是否是响应结束信号"""
-    return chunk_path == "response/status" and isinstance(v_value, str) and v_value == "FINISHED"
-
-
-def is_finished_signal(chunk_path: str, v_value: str) -> bool:
-    """判断字符串 v_value 是否是结束信号"""
-    return v_value == "FINISHED" and (not chunk_path or chunk_path == "status")
-
-
-def is_search_result(item: dict) -> bool:
-    """判断是否是搜索结果项（url/title/snippet）"""
-    return "url" in item and "title" in item
-
-
-# ----------------------------------------------------------------------
-# 内容提取函数
-# ----------------------------------------------------------------------
-
-def extract_content_from_item(item: dict, default_type: str = "text") -> Optional[Tuple[str, str]]:
-    """从包含 content 和 type 的项中提取内容
-    
-    返回 (content, content_type) 或 None
-    """
-    if "content" in item and "type" in item:
-        inner_type = item.get("type", "").upper()
-        content = item.get("content", "")
-        if content:
-            if inner_type == "THINK" or inner_type == "THINKING":
-                return (content, "thinking")
-            elif inner_type == "RESPONSE":
-                return (content, "text")
-            else:
-                return (content, default_type)
-    return None
-
-
-def extract_content_recursive(items: List[Dict], default_type: str = "text") -> Optional[List[Tuple[str, str]]]:
-    """递归提取列表中的内容
-    
-    返回 [(content, content_type), ...] 列表，
-    如果遇到 FINISHED 信号返回 None
-    """
-    extracted: List[Tuple[str, str]] = []
-    for item in items:
-        if not isinstance(item, dict):
-            continue
-        
-        item_p = item.get("p", "")
-        item_v = item.get("v")
-        
-        # 跳过搜索结果项
-        if is_search_result(item):
-            continue
-        
-        # 只有当 p="status" (精确匹配) 且 v="FINISHED" 才认为是真正结束
-        if item_p == "status" and item_v == "FINISHED":
-            return None  # 信号结束
-        
-        # 跳过状态相关
-        if should_skip_chunk(item_p):
-            continue
-        
-        # 直接处理包含 content 和 type 的项
-        result = extract_content_from_item(item, default_type)
-        if result:
-            extracted.append(result)
-            continue
-        
-        # 确定类型（基于 p 字段）
-        if "thinking" in item_p:
-            content_type = "thinking"
-        elif "content" in item_p or item_p == "response" or item_p == "fragments":
-            content_type = "text"
-        else:
-            content_type = default_type
-        
-        # 处理不同的 v 类型
-        if isinstance(item_v, str):
-            if item_v and item_v != "FINISHED":
-                extracted.append((item_v, content_type))
-        elif isinstance(item_v, list):
-            # 内层可能是 [{"content": "text", "type": "THINK/RESPONSE", ...}] 格式
-            for inner in item_v:
-                if isinstance(inner, dict):
-                    # 检查内层的 type 字段
-                    inner_type = inner.get("type", "").upper()
-                    # DeepSeek 使用 THINK 而不是 THINKING
-                    if inner_type == "THINK" or inner_type == "THINKING":
-                        final_type = "thinking"
-                    elif inner_type == "RESPONSE":
-                        final_type = "text"
-                    else:
-                        final_type = content_type  # 继承外层类型
-                    
-                    content = inner.get("content", "")
-                    if content:
-                        extracted.append((content, final_type))
-                elif isinstance(inner, str) and inner:
-                    extracted.append((inner, content_type))
-    return extracted
-
-
-# ----------------------------------------------------------------------
-# 高级解析函数
-# ----------------------------------------------------------------------
-
-def parse_sse_chunk_for_content(
-    chunk: Dict[str, Any], 
-    thinking_enabled: bool = False, 
-    current_fragment_type: str = "thinking"
-) -> Tuple[List[Tuple[str, str]], bool, str]:
-    """解析单个 SSE chunk 并提取内容
-    
-    Args:
-        chunk: 解析后的 JSON chunk
-        thinking_enabled: 是否启用思考模式
-        current_fragment_type: 当前活跃的 fragment 类型 ("thinking" 或 "text")
-                              用于处理没有明确路径的空 p 字段内容
-    
-    Returns:
-        (contents, is_finished, new_fragment_type)
-        - contents: [(content, content_type), ...] 列表
-        - is_finished: 是否是结束信号
-        - new_fragment_type: 更新后的 fragment 类型，供下一个 chunk 使用
-    """
-    if "v" not in chunk:
-        return ([], False, current_fragment_type)
-    
-    v_value = chunk["v"]
-    chunk_path = chunk.get("p", "")
-    contents: List[Tuple[str, str]] = []
-    new_fragment_type = current_fragment_type
-    
-    # 跳过状态相关 chunk
-    if should_skip_chunk(chunk_path):
-        return ([], False, current_fragment_type)
-    
-    # 检查是否是真正的响应结束信号
-    if is_response_finished(chunk_path, v_value):
-        return ([], True, current_fragment_type)
-    
-    # 检测 fragment 类型变化（来自 APPEND 操作）
-    # 格式: {'p': 'response', 'o': 'BATCH', 'v': [{'p': 'fragments', 'o': 'APPEND', 'v': [{'type': 'THINK/RESPONSE', ...}]}]}
-    if chunk_path == "response" and isinstance(v_value, list):
-        for batch_item in v_value:
-            if isinstance(batch_item, dict) and batch_item.get("p") == "fragments" and batch_item.get("o") == "APPEND":
-                fragments = batch_item.get("v", [])
-                for frag in fragments:
-                    if isinstance(frag, dict):
-                        frag_type = frag.get("type", "").upper()
-                        if frag_type == "THINK" or frag_type == "THINKING":
-                            new_fragment_type = "thinking"
-                        elif frag_type == "RESPONSE":
-                            new_fragment_type = "text"
-    
-    # 也检测直接的 fragments 路径
-    if "response/fragments" in chunk_path and isinstance(v_value, list):
-        for frag in v_value:
-            if isinstance(frag, dict):
-                frag_type = frag.get("type", "").upper()
-                if frag_type == "THINK" or frag_type == "THINKING":
-                    new_fragment_type = "thinking"
-                elif frag_type == "RESPONSE":
-                    new_fragment_type = "text"
-    
-    # 确定当前内容类型
-    if chunk_path == "response/thinking_content":
-        ptype = "thinking"
-    elif chunk_path == "response/content":
-        ptype = "text"
-    elif "response/fragments" in chunk_path and "/content" in chunk_path:
-        # 如 response/fragments/-1/content - 使用当前 fragment 类型
-        ptype = new_fragment_type
-    elif not chunk_path:
-        # 空路径内容：使用当前活跃的 fragment 类型
-        if thinking_enabled:
-            ptype = new_fragment_type
-        else:
-            ptype = "text"
-    else:
-        ptype = "text"
-    
-    # 处理字符串值
-    if isinstance(v_value, str):
-        if is_finished_signal(chunk_path, v_value):
-            return ([], True, new_fragment_type)
-        if v_value:
-            contents.append((v_value, ptype))
-    
-    # 处理列表值
-    elif isinstance(v_value, list):
-        result = extract_content_recursive(v_value, ptype)
-        if result is None:
-            return ([], True, new_fragment_type)
-        contents.extend(result)
-    
-    return (contents, False, new_fragment_type)
-
-
-def extract_content_from_chunk(chunk: Dict[str, Any]) -> Tuple[str, str, bool]:
-    """从 DeepSeek chunk 中提取内容（简化版本，兼容旧接口）
-    
-    Args:
-        chunk: 解析后的 chunk 字典
-        
-    Returns:
-        (content, content_type, is_finished) 元组
-        content_type 为 "thinking" 或 "text"
-        is_finished 为 True 表示响应结束
-    """
-    if chunk.get("type") == "done":
-        return "", "text", True
-    
-    # 检测内容审核/敏感词阻止
-    if "error" in chunk or chunk.get("code") == "content_filter":
-        logger.warning(f"[extract_content_from_chunk] 检测到内容过滤: {chunk}")
-        return "", "text", True
-    
-    if "v" not in chunk:
-        return "", "text", False
-    
-    v_value = chunk["v"]
-    ptype = "text"
-    
-    # 检查路径确定类型
-    path = chunk.get("p", "")
-    if path == "response/search_status":
-        return "", "text", False  # 跳过搜索状态
-    elif path == "response/thinking_content":
-        ptype = "thinking"
-    elif path == "response/content":
-        ptype = "text"
-    
-    if isinstance(v_value, str):
-        if v_value == "FINISHED":
-            return "", ptype, True
-        return v_value, ptype, False
-    elif isinstance(v_value, list):
-        for item in v_value:
-            if isinstance(item, dict):
-                if item.get("p") == "status" and item.get("v") == "FINISHED":
-                    return "", ptype, True
-        return "", ptype, False
-    
-    return "", ptype, False
-
-
-# ----------------------------------------------------------------------
-# 响应收集函数
-# ----------------------------------------------------------------------
-
-def collect_deepseek_response(response: Any) -> Tuple[str, str]:
-    """收集 DeepSeek 流响应的完整内容
-    
-    Args:
-        response: DeepSeek 流响应对象
-        
-    Returns:
-        (reasoning_content, text_content) 元组
-    """
-    thinking_parts: List[str] = []
-    text_parts: List[str] = []
-    
-    try:
-        for raw_line in response.iter_lines():
-            chunk = parse_deepseek_sse_line(raw_line)
-            if not chunk:
-                continue
-            
-            content, content_type, is_finished = extract_content_from_chunk(chunk)
-            
-            if is_finished:
-                break
-            
-            if content:
-                if content_type == "thinking":
-                    thinking_parts.append(content)
-                else:
-                    text_parts.append(content)
-    except Exception as e:
-        logger.error(f"[collect_deepseek_response] 收集响应失败: {e}")
-    finally:
-        try:
-            response.close()
-        except Exception:
-            pass
-    
-    return "".join(thinking_parts), "".join(text_parts)
-
-
-# ----------------------------------------------------------------------
-# 工具调用解析
-# ----------------------------------------------------------------------
-
-def parse_tool_calls(text: str, tools_requested: List[Dict]) -> List[Dict[str, Any]]:
-    """从响应文本中解析工具调用
-    
-    Args:
-        text: 响应文本
-        tools_requested: 请求中定义的工具列表
-        
-    Returns:
-        检测到的工具调用列表，每项包含 name 和 input
-    """
-    detected_tools: List[Dict[str, Any]] = []
-    cleaned_text = text.strip()
-    
-    # 尝试直接解析完整 JSON
-    if cleaned_text.startswith('{"tool_calls":') and cleaned_text.endswith("]}"):
-        try:
-            tool_data = json.loads(cleaned_text)
-            for tool_call in tool_data.get("tool_calls", []):
-                tool_name = tool_call.get("name")
-                tool_input = tool_call.get("input", {})
-                if any(tool.get("name") == tool_name for tool in tools_requested):
-                    detected_tools.append({"name": tool_name, "input": tool_input})
-            if detected_tools:
-                return detected_tools
-        except json.JSONDecodeError:
-            pass
-    
-    # 使用正则匹配
-    matches = _TOOL_CALL_PATTERN.findall(cleaned_text)
-    for match in matches:
-        try:
-            tool_calls_json = f'{{"tool_calls": [{match}]}}'
-            tool_data = json.loads(tool_calls_json)
-            for tool_call in tool_data.get("tool_calls", []):
-                tool_name = tool_call.get("name")
-                tool_input = tool_call.get("input", {})
-                if any(tool.get("name") == tool_name for tool in tools_requested):
-                    detected_tools.append({"name": tool_name, "input": tool_input})
-        except json.JSONDecodeError:
-            continue
-    
-    return detected_tools
-
-
-# ----------------------------------------------------------------------
-# 引用过滤
-# ----------------------------------------------------------------------
-
-def should_filter_citation(text: str, search_enabled: bool) -> bool:
-    """检查是否应该过滤引用内容
-    
-    Args:
-        text: 内容文本
-        search_enabled: 是否启用搜索
-        
-    Returns:
-        是否应该过滤
-    """
-    if not search_enabled:
-        return False
-    return _CITATION_PATTERN.match(text) is not None
-
-
-# ----------------------------------------------------------------------
-# 工具调用格式化
-# ----------------------------------------------------------------------
-
-def format_openai_tool_calls(
-    detected_tools: List[Dict[str, Any]], 
-    base_id: str = ""
-) -> List[Dict[str, Any]]:
-    """将检测到的工具调用格式化为 OpenAI API 格式
-    
-    Args:
-        detected_tools: parse_tool_calls 返回的工具调用列表
-        base_id: 用于生成唯一 ID 的基础字符串（可选）
-        
-    Returns:
-        OpenAI 格式的 tool_calls 数组，例如：
-        [{"id": "call_xxx", "type": "function", "function": {"name": "...", "arguments": "..."}}]
-    """
-    import random
-    import time
-    
-    tool_calls_data = []
-    for idx, tool_info in enumerate(detected_tools):
-        tool_calls_data.append({
-            "id": f"call_{base_id or int(time.time())}_{random.randint(1000,9999)}_{idx}",
-            "type": "function",
-            "function": {
-                "name": tool_info["name"],
-                "arguments": json.dumps(tool_info.get("input", {}), ensure_ascii=False)
-            }
-        })
-    return tool_calls_data
--- a/core/utils.py
+++ b/core/utils.py
@@ -1,29 +0,0 @@
-# -*- coding: utf-8 -*-
-"""公共工具函数模块"""
-
-
-def get_account_identifier(account: dict) -> str:
-    """返回账号的唯一标识，优先使用 email，否则使用 mobile"""
-    return account.get("email", "").strip() or account.get("mobile", "").strip()
-
-
-def estimate_tokens(text) -> int:
-    """估算文本的 token 数量（简单估算：字符数/4）
-    
-    Args:
-        text: 字符串或其他类型
-        
-    Returns:
-        估算的 token 数量，最小为 1
-    """
-    if isinstance(text, str):
-        return max(1, len(text) // 4)
-    elif isinstance(text, list):
-        return sum(
-            estimate_tokens(item.get("text", ""))
-            if isinstance(item, dict)
-            else estimate_tokens(str(item))
-            for item in text
-        )
-    else:
-        return max(1, len(str(text)) // 4)
--- a/dev.py
+++ b/dev.py
@@ -1,151 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-DS2API 开发服务器 - 统一启动后端和前端
-
-使用方法:
-    python dev.py             # 同时启动后端和前端
-    python dev.py --backend   # 仅启动后端
-    python dev.py --frontend  # 仅启动前端
-    python dev.py --install   # 安装所有依赖
-
-环境变量:
-    PORT - 后端服务端口，默认 5001
-    LOG_LEVEL - 日志级别，默认 INFO
-"""
-import os
-import sys
-import signal
-import subprocess
-import time
-from pathlib import Path
-
-# 配置
-BACKEND_PORT = int(os.getenv("PORT", "5001"))
-FRONTEND_PORT = 5173
-HOST = os.getenv("HOST", "0.0.0.0")
-LOG_LEVEL = os.getenv("LOG_LEVEL", "info").lower()
-PROJECT_DIR = Path(__file__).parent
-WEBUI_DIR = PROJECT_DIR / "webui"
-REQUIREMENTS_FILE = PROJECT_DIR / "requirements.txt"
-
-processes = []
-
-
-def install_dependencies():
-    """安装所有 Python 和 Node.js 依赖"""
-    print("\n📦 安装 Python 依赖...")
-    subprocess.run([
-        sys.executable, "-m", "pip", "install", "-r", str(REQUIREMENTS_FILE), "-q"
-    ], check=True)
-    print("✅ Python 依赖安装完成")
-    
-    if WEBUI_DIR.exists():
-        print("\n📦 安装前端依赖...")
-        subprocess.run(["npm", "install"], cwd=WEBUI_DIR, check=True)
-        print("✅ 前端依赖安装完成")
-    
-    print("\n🎉 所有依赖安装完成！运行 `python dev.py` 启动服务\n")
-
-
-def signal_handler(sig, frame):
-    """处理退出信号，终止所有子进程"""
-    print("\n\n🛑 正在关闭所有服务...")
-    for proc in processes:
-        if proc.poll() is None:
-            proc.terminate()
-            try:
-                proc.wait(timeout=3)
-            except subprocess.TimeoutExpired:
-                proc.kill()
-    print("👋 已退出\n")
-    sys.exit(0)
-
-
-def start_backend():
-    """启动后端服务"""
-    print(f"🚀 启动后端服务... http://localhost:{BACKEND_PORT}")
-    proc = subprocess.Popen(
-        [
-            sys.executable, "-m", "uvicorn",
-            "app:app",
-            "--host", HOST,
-            "--port", str(BACKEND_PORT),
-            "--reload",
-            "--reload-dir", str(PROJECT_DIR),
-            "--log-level", LOG_LEVEL,
-        ],
-        cwd=PROJECT_DIR,
-    )
-    processes.append(proc)
-    return proc
-
-
-def start_frontend():
-    """启动前端开发服务器"""
-    if not WEBUI_DIR.exists():
-        print("⚠️  webui 目录不存在，跳过前端启动")
-        return None
-    
-    node_modules = WEBUI_DIR / "node_modules"
-    if not node_modules.exists():
-        print("📦 安装前端依赖...")
-        subprocess.run(["npm", "install"], cwd=WEBUI_DIR, check=True)
-    
-    print(f"🎨 启动前端服务... http://localhost:{FRONTEND_PORT}")
-    proc = subprocess.Popen(
-        ["npm", "run", "dev"],
-        cwd=WEBUI_DIR,
-    )
-    processes.append(proc)
-    return proc
-
-
-def main():
-    # 解析参数
-    if "--install" in sys.argv or "-i" in sys.argv:
-        install_dependencies()
-        return
-    
-    backend_only = "--backend" in sys.argv or "-b" in sys.argv
-    frontend_only = "--frontend" in sys.argv or "-f" in sys.argv
-    
-    # 注册信号处理
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-    
-    print("\n" + "=" * 50)
-    print("       DS2API 开发服务器")
-    print("=" * 50)
-    
-    if frontend_only:
-        start_frontend()
-    elif backend_only:
-        start_backend()
-    else:
-        # 同时启动
-        start_backend()
-        time.sleep(1)  # 等待后端启动
-        start_frontend()
-    
-    print("\n" + "-" * 50)
-    if not frontend_only:
-        print(f"📡 后端 API:  http://localhost:{BACKEND_PORT}")
-    if not backend_only:
-        print(f"🎨 管理界面: http://localhost:{FRONTEND_PORT}")
-    print("-" * 50)
-    print("按 Ctrl+C 停止所有服务\n")
-    
-    # 等待进程结束
-    try:
-        while processes:
-            for proc in processes[:]:
-                if proc.poll() is not None:
-                    processes.remove(proc)
-            time.sleep(0.5)
-    except KeyboardInterrupt:
-        signal_handler(None, None)
-
-
-if __name__ == "__main__":
-    main()
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@@ -9,11 +9,15 @@

 services:
  ds2api:
-    build: .
+    build:
+      context: .
+      target: go-builder
    image: ds2api:dev
    container_name: ds2api-dev
+    command: ["go", "run", "./cmd/ds2api"]
    ports:
-      - "${PORT:-5001}:5001"
+      # Host port is configurable via DS2API_HOST_PORT; container port stays fixed at 5001.
+      - "${DS2API_HOST_PORT:-6011}:5001"
    env_file:
      - .env
    environment:
@@ -21,12 +25,9 @@ services:
      - LOG_LEVEL=DEBUG
    volumes:
      # 源代码挂载（开发时实时生效）
-      - ./app.py:/app/app.py:ro
-      - ./core:/app/core:ro
-      - ./routes:/app/routes:ro
-      - ./static:/app/static:ro
+      - ./:/app
      # 配置文件挂载（便于本地修改）
-      - ./config.json:/app/config.json:ro
+      - ./config.json:/app/config.json
    restart: "no"
    stdin_open: true
    tty: true
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,29 +1,16 @@
-# DS2API 生产环境配置
-# 使用说明：
-#   1. 复制 .env.example 为 .env 并填写配置
-#   2. docker-compose up -d
-#   3. 主代码更新后：docker-compose up -d --build
-#
-# 设计原则：
-#   - 零侵入：所有项目配置通过 .env 文件传递
-#   - 易维护：主代码更新只需重新构建镜像
-
 services:
  ds2api:
-    build: .
-    image: ds2api:latest
+    image: ghcr.io/cjackhwang/ds2api:latest
    container_name: ds2api
-    ports:
-      - "${PORT:-5001}:5001"
+    restart: always
    env_file:
      - .env
+    ports:
+      # Host port is configurable via DS2API_HOST_PORT; container port stays fixed at 5001.
+      - "${DS2API_HOST_PORT:-6011}:5001"
+    volumes:
+      - ./config.json:/app/config.json    # 配置文件
    environment:
-      # 确保容器内使用正确的主机绑定
-      - HOST=0.0.0.0
-    restart: unless-stopped
-    healthcheck:
-      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:5001/v1/models')"]
-      interval: 30s
-      timeout: 10s
-      retries: 3
-      start_period: 10s
+      - TZ=Asia/Shanghai
+      - LOG_LEVEL=INFO
+      - DS2API_ADMIN_KEY=${DS2API_ADMIN_KEY:-ds2api}
--- a/docs/CONTRIBUTING.en.md
+++ b/docs/CONTRIBUTING.en.md
@@ -0,0 +1,155 @@
+# Contributing Guide
+
+Language: [中文](CONTRIBUTING.md) | [English](CONTRIBUTING.en.md)
+
+Thanks for your interest in contributing to DS2API!
+
+## Development Setup
+
+### Prerequisites
+
+- Go 1.26+
+- Node.js `20.19+` or `22.12+` (for WebUI development)
+- npm (bundled with Node.js)
+
+### Backend Development
+
+```bash
+# 1. Clone
+git clone https://github.com/CJackHwang/ds2api.git
+cd ds2api
+
+# 2. Configure
+cp config.example.json config.json
+# Edit config.json with test accounts
+
+# 3. Run backend
+go run ./cmd/ds2api
+# Local access: http://127.0.0.1:5001
+# Actual bind: 0.0.0.0:5001, so LAN access is available via your private IP
+```
+
+### Frontend Development (WebUI)
+
+```bash
+# 1. Navigate to WebUI directory
+cd webui
+
+# 2. Install dependencies
+npm install
+
+# 3. Start dev server (hot reload)
+npm run dev
+# Default: http://localhost:5173, auto-proxies API to backend
+```
+
+WebUI tech stack:
+- React + Vite
+- Tailwind CSS
+- Bilingual language packs: `webui/src/locales/zh.json` / `en.json`
+
+### Docker Development
+
+```bash
+docker-compose -f docker-compose.dev.yml up
+```
+
+## Code Standards
+
+| Language | Standards |
+| --- | --- |
+| **Go** | Run `gofmt` and ensure `go test ./...` passes before committing |
+| **JavaScript/React** | Follow existing project style (functional components) |
+| **Commit messages** | Use semantic prefixes: `feat:`, `fix:`, `docs:`, `refactor:`, `style:`, `perf:`, `chore:` |
+
+## Submitting a PR
+
+1. Fork the repo
+2. Create a branch (e.g. `feature/xxx` or `fix/xxx`)
+3. Commit changes
+4. Push your branch
+5. Open a Pull Request
+
+> 💡 If you modify files under `webui/`, no manual build is needed — CI handles it automatically.
+> If you want to verify the generated `static/admin/` assets locally, you can still run `./scripts/build-webui.sh`.
+
+## Build WebUI
+
+Manually build WebUI to `static/admin/`:
+
+```bash
+./scripts/build-webui.sh
+```
+
+## Running Tests
+
+```bash
+# Go + Node unit tests (recommended)
+./tests/scripts/run-unit-all.sh
+
+# End-to-end live tests (real accounts)
+./tests/scripts/run-live.sh
+```
+
+## Project Structure
+
+```text
+ds2api/
+├── app/                     # Shared HTTP handler assembly (local + serverless)
+├── cmd/
+│   ├── ds2api/              # Local/container entrypoint
+│   └── ds2api-tests/        # End-to-end testsuite entrypoint
+├── api/
+│   ├── index.go             # Vercel Serverless Go entry
+│   ├── chat-stream.js       # Vercel Node.js stream relay
+│   └── (rewrite targets in vercel.json)
+├── internal/
+│   ├── account/             # Account pool and concurrency queue
+│   ├── adapter/
+│   │   ├── openai/          # OpenAI adapter
+│   │   ├── claude/          # Claude adapter
+│   │   └── gemini/          # Gemini adapter
+│   ├── admin/               # Admin API handlers
+│   ├── auth/                # Auth and JWT
+│   ├── claudeconv/          # Claude message conversion
+│   ├── compat/              # Go-version compatibility and regression helpers
+│   ├── config/              # Config loading, validation, and hot-reload
+│   ├── deepseek/            # DeepSeek client, PoW WASM
+│   ├── js/                  # Node runtime stream/compat logic
+│   ├── devcapture/          # Dev packet capture
+│   ├── format/              # Output formatting
+│   ├── prompt/              # Prompt building
+│   ├── server/              # HTTP routing (chi router)
+│   ├── sse/                 # SSE parsing utilities
+│   ├── stream/              # Unified stream consumption engine
+│   ├── testsuite/           # Testsuite framework and scenario orchestration
+│   ├── translatorcliproxy/  # CLIProxy bridge and stream writer
+│   ├── util/                # Common utilities
+│   ├── version/             # Version parsing and comparison
+│   └── webui/               # WebUI static hosting
+├── webui/                   # React WebUI source
+│   └── src/
+│       ├── app/             # Routing, auth, config state
+│       ├── features/        # Feature modules
+│       ├── components/      # Shared components
+│       └── locales/         # Language packs
+├── scripts/                 # Build and test scripts
+├── tests/
+│   ├── compat/              # Compatibility fixtures and expected outputs
+│   ├── node/                # Node-side unit tests
+│   └── scripts/             # Test script entrypoints (unit/e2e)
+├── plans/                   # Plans, gates, and manual smoke-test records
+├── static/admin/            # WebUI build output (not committed)
+├── Dockerfile               # Multi-stage build
+├── docker-compose.yml       # Production
+├── docker-compose.dev.yml   # Development
+└── vercel.json              # Vercel config
+```
+
+## Reporting Issues
+
+Please use [GitHub Issues](https://github.com/CJackHwang/ds2api/issues) and include:
+
+- Steps to reproduce
+- Relevant log output
+- Environment info (OS, Go version, deployment method)
--- a/docs/CONTRIBUTING.md
+++ b/docs/CONTRIBUTING.md
@@ -0,0 +1,155 @@
+# 贡献指南
+
+语言 / Language: [中文](CONTRIBUTING.md) | [English](CONTRIBUTING.en.md)
+
+感谢你对 DS2API 的关注与贡献！
+
+## 开发环境设置
+
+### 前置要求
+
+- Go 1.26+
+- Node.js `20.19+` 或 `22.12+`（WebUI 开发时）
+- npm（随 Node.js 提供）
+
+### 后端开发
+
+```bash
+# 1. 克隆仓库
+git clone https://github.com/CJackHwang/ds2api.git
+cd ds2api
+
+# 2. 配置
+cp config.example.json config.json
+# 编辑 config.json，填入测试账号
+
+# 3. 启动后端
+go run ./cmd/ds2api
+# 本地访问 http://127.0.0.1:5001
+# 实际绑定 0.0.0.0:5001，可通过局域网 IP 访问
+```
+
+### 前端开发（WebUI）
+
+```bash
+# 1. 进入 WebUI 目录
+cd webui
+
+# 2. 安装依赖
+npm install
+
+# 3. 启动开发服务器（热更新）
+npm run dev
+# 默认监听 http://localhost:5173，自动代理 API 到后端
+```
+
+WebUI 技术栈：
+- React + Vite
+- Tailwind CSS
+- 中英文语言包：`webui/src/locales/zh.json` / `en.json`
+
+### Docker 开发环境
+
+```bash
+docker-compose -f docker-compose.dev.yml up
+```
+
+## 代码规范
+
+| 语言 | 规范 |
+| --- | --- |
+| **Go** | 提交前运行 `gofmt`，确保 `go test ./...` 通过 |
+| **JavaScript/React** | 保持现有代码风格（函数组件） |
+| **提交信息** | 使用语义化前缀：`feat:`、`fix:`、`docs:`、`refactor:`、`style:`、`perf:`、`chore:` |
+
+## 提交 PR
+
+1. Fork 仓库
+2. 创建分支（如 `feature/xxx` 或 `fix/xxx`）
+3. 提交更改
+4. 推送分支
+5. 发起 Pull Request
+
+> 💡 如果修改了 `webui/` 目录下的文件，无需手动构建——CI 会自动处理。
+> 但如果你本地想验证 `static/admin/` 产物，还是可以手动运行 `./scripts/build-webui.sh`。
+
+## WebUI 构建
+
+手动构建 WebUI 到 `static/admin/`：
+
+```bash
+./scripts/build-webui.sh
+```
+
+## 运行测试
+
+```bash
+# Go + Node 单元测试（推荐）
+./tests/scripts/run-unit-all.sh
+
+# 端到端全链路测试（真实账号）
+./tests/scripts/run-live.sh
+```
+
+## 项目结构
+
+```text
+ds2api/
+├── app/                     # 统一 HTTP Handler 装配（本地 + Serverless）
+├── cmd/
+│   ├── ds2api/              # 本地/容器启动入口
+│   └── ds2api-tests/        # 端到端测试集入口
+├── api/
+│   ├── index.go             # Vercel Serverless Go 入口
+│   ├── chat-stream.js       # Vercel Node.js 流式转发
+│   └── (rewrite targets in vercel.json)
+├── internal/
+│   ├── account/             # 账号池与并发队列
+│   ├── adapter/
+│   │   ├── openai/          # OpenAI 兼容适配器
+│   │   ├── claude/          # Claude 兼容适配器
+│   │   └── gemini/          # Gemini 兼容适配器
+│   ├── admin/               # Admin API handlers
+│   ├── auth/                # 鉴权与 JWT
+│   ├── claudeconv/          # Claude 消息格式转换
+│   ├── compat/              # Go 版本兼容与回归测试辅助
+│   ├── config/              # 配置加载、校验与热更新
+│   ├── deepseek/            # DeepSeek 客户端、PoW WASM
+│   ├── js/                  # Node 运行时流式/兼容逻辑
+│   ├── devcapture/          # 开发抓包
+│   ├── format/              # 输出格式化
+│   ├── prompt/              # Prompt 构建
+│   ├── server/              # HTTP 路由（chi router）
+│   ├── sse/                 # SSE 解析工具
+│   ├── stream/              # 统一流式消费引擎
+│   ├── testsuite/           # 测试集框架与场景编排
+│   ├── translatorcliproxy/  # CLIProxy 桥接与流式写入
+│   ├── util/                # 通用工具
+│   ├── version/             # 版本解析与比较
+│   └── webui/               # WebUI 静态托管
+├── webui/                   # React WebUI 源码
+│   └── src/
+│       ├── app/             # 路由、鉴权、配置状态
+│       ├── features/        # 业务功能模块
+│       ├── components/      # 通用组件
+│       └── locales/         # 语言包
+├── scripts/                 # 构建与测试脚本
+├── tests/
+│   ├── compat/              # 兼容夹具与期望输出
+│   ├── node/                # Node 侧单元测试
+│   └── scripts/             # 测试脚本入口（unit/e2e）
+├── plans/                   # 计划、门禁和手工烟测记录
+├── static/admin/            # WebUI 构建产物（不提交）
+├── Dockerfile               # 多阶段构建
+├── docker-compose.yml       # 生产环境
+├── docker-compose.dev.yml   # 开发环境
+└── vercel.json              # Vercel 配置
+```
+
+## 问题反馈
+
+请使用 [GitHub Issues](https://github.com/CJackHwang/ds2api/issues) 并附上：
+
+- 复现步骤
+- 相关日志输出
+- 运行环境信息（OS、Go 版本、部署方式）
--- a/docs/DEPLOY.en.md
+++ b/docs/DEPLOY.en.md
@@ -0,0 +1,570 @@
+# DS2API Deployment Guide
+
+Language: [中文](DEPLOY.md) | [English](DEPLOY.en.md)
+
+This guide covers all deployment methods for the current Go-based codebase.
+
+---
+
+## Table of Contents
+
+- [Prerequisites](#0-prerequisites)
+- [1. Local Run](#1-local-run)
+- [2. Docker Deployment](#2-docker-deployment)
+- [3. Vercel Deployment](#3-vercel-deployment)
+- [4. Download Release Binaries](#4-download-release-binaries)
+- [5. Reverse Proxy (Nginx)](#5-reverse-proxy-nginx)
+- [6. Linux systemd Service](#6-linux-systemd-service)
+- [7. Post-Deploy Checks](#7-post-deploy-checks)
+- [8. Pre-Release Local Regression](#8-pre-release-local-regression)
+
+---
+
+## 0. Prerequisites
+
+| Dependency | Minimum Version | Notes |
+| --- | --- | --- |
+| Go | 1.26+ | Build backend |
+| Node.js | `20.19+` or `22.12+` | Only needed to build WebUI locally |
+| npm | Bundled with Node.js | Install WebUI dependencies |
+
+Config source (choose one):
+
+- **File**: `config.json` (recommended for local/Docker)
+- **Environment variable**: `DS2API_CONFIG_JSON` (recommended for Vercel; supports raw JSON or Base64)
+
+Unified recommendation (best practice):
+
+```bash
+cp config.example.json config.json
+# Edit config.json
+```
+
+Use `config.json` as the single source of truth:
+- Local run: read `config.json` directly
+- Docker / Vercel: generate `DS2API_CONFIG_JSON` (Base64) from `config.json` and inject it
+
+---
+
+## 1. Local Run
+
+### 1.1 Basic Steps
+
+```bash
+# Clone
+git clone https://github.com/CJackHwang/ds2api.git
+cd ds2api
+
+# Copy and edit config
+cp config.example.json config.json
+# Open config.json and fill in:
+#   - keys: your API access keys
+#   - accounts: DeepSeek accounts (email or mobile + password)
+
+# Start
+go run ./cmd/ds2api
+```
+
+Default address: `http://0.0.0.0:5001` (override with `PORT`).
+
+### 1.2 WebUI Build
+
+On first local startup, if `static/admin/` is missing, DS2API will automatically attempt to build the WebUI (requires Node.js/npm; when dependencies are missing it runs `npm ci` first, then `npm run build -- --outDir static/admin --emptyOutDir`).
+
+Manual build:
+
+```bash
+./scripts/build-webui.sh
+```
+
+Or step by step:
+
+```bash
+cd webui
+npm install
+npm run build
+# Output goes to static/admin/
+```
+
+Control auto-build via environment variable:
+
+```bash
+# Disable auto-build
+DS2API_AUTO_BUILD_WEBUI=false go run ./cmd/ds2api
+
+# Force enable auto-build
+DS2API_AUTO_BUILD_WEBUI=true go run ./cmd/ds2api
+```
+
+### 1.3 Compile to Binary
+
+```bash
+go build -o ds2api ./cmd/ds2api
+./ds2api
+```
+
+---
+
+## 2. Docker Deployment
+
+### 2.1 Basic Steps
+
+```bash
+# Copy env template and config file
+cp .env.example .env
+cp config.example.json config.json
+
+# Edit .env and set at least:
+#   DS2API_ADMIN_KEY=your-admin-key
+# Optionally set the host port:
+#   DS2API_HOST_PORT=6011
+
+# Start
+docker-compose up -d
+
+# View logs
+docker-compose logs -f
+```
+
+The default `docker-compose.yml` maps host port `6011` to container port `5001`. If you want `5001` exposed directly, set `DS2API_HOST_PORT=5001` (or adjust the `ports` mapping).
+
+### 2.2 Update
+
+```bash
+docker-compose up -d --build
+```
+
+### 2.3 Docker Architecture
+
+The `Dockerfile` now provides two image paths:
+
+1. **Default local/dev path (`runtime-from-source`)**: a three-stage build (WebUI build + Go build + runtime).
+2. **Release path (`runtime-from-dist`)**: the release workflow first creates tag-named release archives, then copies the Linux bundles to `dist/docker-input/linux_amd64.tar.gz` / `linux_arm64.tar.gz`; Docker consumes those prepared inputs directly, without rerunning `npm build`/`go build`.
+
+The release path keeps Docker images aligned with release archives and reduces duplicate build work.
+
+Container entry command: `/usr/local/bin/ds2api`, default exposed port: `5001`.
+
+### 2.4 Development Mode
+
+```bash
+docker-compose -f docker-compose.dev.yml up
+```
+
+Development features:
+- Source code mounted (live changes)
+- `LOG_LEVEL=DEBUG`
+- No auto-restart
+
+### 2.5 Health Check
+
+Docker Compose includes a built-in health check:
+
+```yaml
+healthcheck:
+  test: ["CMD", "/usr/local/bin/busybox", "wget", "-qO-", "http://localhost:${PORT:-5001}/healthz"]
+  interval: 30s
+  timeout: 10s
+  retries: 3
+  start_period: 10s
+```
+
+### 2.6 Docker Troubleshooting
+
+If container logs look normal but the admin panel is unreachable, check these first:
+
+1. **Port alignment**: when `PORT` is not `5001`, use the same port in your URL (for example `http://localhost:8080/admin`).
+2. **WebUI assets in dev compose**: `docker-compose.dev.yml` runs `go run` in a dev image and does not auto-install Node.js inside the container; if `static/admin` is missing in your repo, `/admin` will return 404. Build once on host: `./scripts/build-webui.sh`.
+
+### 2.7 Zeabur One-Click (Dockerfile)
+
+This repo includes a `zeabur.yaml` template for one-click deployment on Zeabur:
+
+[![Deploy on Zeabur](https://zeabur.com/button.svg)](https://zeabur.com/templates/L4CFHP)
+
+Notes:
+
+- **Port**: DS2API listens on `5001` by default; the template sets `PORT=5001`.
+- **Persistent config**: the template mounts `/data` and sets `DS2API_CONFIG_PATH=/data/config.json`. After importing config in Admin UI, it will be written and persisted to this path.
+- **Build version**: Zeabur / regular `docker build` does not require `BUILD_VERSION` by default. The image prefers that build arg when provided, and automatically falls back to the repo-root `VERSION` file when it is absent.
+- **First login**: after deployment, open `/admin` and login with `DS2API_ADMIN_KEY` shown in Zeabur env/template instructions (recommended: rotate to a strong secret after first login).
+
+---
+
+## 3. Vercel Deployment
+
+### 3.1 Steps
+
+1. **Fork** the repo to your GitHub account
+2. **Import** the project on Vercel
+3. **Set environment variables** (minimum required: one variable):
+
+| Variable | Description |
+| --- | --- |
+| `DS2API_ADMIN_KEY` | Admin key (required) |
+| `DS2API_CONFIG_JSON` | Config content, raw JSON or Base64 (optional, recommended) |
+
+4. **Deploy**
+
+### 3.1.1 Recommended Input (avoid `DS2API_CONFIG_JSON` mistakes)
+
+If you prefer faster one-click bootstrap, you can leave `DS2API_CONFIG_JSON` empty first, then open `/admin` after deployment, import config, and sync it back to Vercel env vars from the "Vercel Sync" page.
+
+Recommended: in repo root, copy the template first and fill your real accounts:
+
+```bash
+cp config.example.json config.json
+# Edit config.json
+```
+
+Do not hand-edit large JSON directly in Vercel. Generate Base64 locally and paste it:
+
+```bash
+# Run in repo root
+DS2API_CONFIG_JSON="$(base64 < config.json | tr -d '\n')"
+echo "$DS2API_CONFIG_JSON"
+```
+
+If you choose to preconfigure before first deploy, set these vars in Vercel Project Settings -> Environment Variables:
+
+```text
+DS2API_ADMIN_KEY=replace-with-a-strong-secret
+DS2API_CONFIG_JSON=<the single-line Base64 output above>
+```
+
+Optional but recommended (for WebUI one-click Vercel sync):
+
+```text
+VERCEL_TOKEN=your-vercel-token
+VERCEL_PROJECT_ID=prj_xxxxxxxxxxxx
+VERCEL_TEAM_ID=team_xxxxxxxxxxxx   # optional for personal accounts
+```
+
+### 3.2 Optional Environment Variables
+
+| Variable | Description | Default |
+| --- | --- | --- |
+| `DS2API_ACCOUNT_MAX_INFLIGHT` | Per-account inflight limit | `2` |
+| `DS2API_ACCOUNT_MAX_QUEUE` | Waiting queue limit | `recommended_concurrency` |
+| `DS2API_GLOBAL_MAX_INFLIGHT` | Global inflight limit | `recommended_concurrency` |
+| `DS2API_ENV_WRITEBACK` | When `DS2API_CONFIG_JSON` is present, auto-write to `DS2API_CONFIG_PATH` and switch to file-backed mode after success (`1/true/yes/on`) | Disabled |
+| `DS2API_VERCEL_INTERNAL_SECRET` | Hybrid streaming internal auth | Falls back to `DS2API_ADMIN_KEY` |
+| `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | Stream lease TTL | `900` |
+| `VERCEL_TOKEN` | Vercel sync token | — |
+| `VERCEL_PROJECT_ID` | Vercel project ID | — |
+| `VERCEL_TEAM_ID` | Vercel team ID | — |
+| `DS2API_VERCEL_PROTECTION_BYPASS` | Deployment protection bypass for internal Node→Go calls | — |
+
+### 3.3 Vercel Architecture
+
+```text
+Request ──────┐
+              │
+              ▼
+         vercel.json routing
+              │
+        ┌─────┴─────┐
+        │           │
+        ▼           ▼
+  api/index.go   api/chat-stream.js
+  (Go Runtime)   (Node Runtime)
+```
+
+- **Go entry**: `api/index.go` (Serverless Go)
+- **Stream entry**: `api/chat-stream.js` (Node Runtime for real-time SSE)
+- **Routing**: `vercel.json`
+- **Build command**: `npm ci --prefix webui && npm run build --prefix webui` (automatic)
+
+#### Streaming Pipeline
+
+Vercel Go Runtime applies platform-level response buffering, so this project uses a hybrid "**Go prepare + Node stream**" path on Vercel:
+
+1. `api/chat-stream.js` receives `/v1/chat/completions` request
+2. Node calls Go internal prepare endpoint (`?__stream_prepare=1`) for session ID, PoW, token
+3. Go prepare creates a stream lease, locking the account
+4. Node connects directly to DeepSeek upstream, relays SSE in real-time to client (including OpenAI chunk framing and tools anti-leak sieve)
+5. After stream ends, Node calls Go release endpoint (`?__stream_release=1`) to free the account
+
+> This adaptation is **Vercel-only**; local and Docker remain pure Go.
+
+#### Non-Stream Fallback and Tool Call Handling
+
+- `api/chat-stream.js` falls back to Go entry (`?__go=1`) for non-stream requests only
+- Streaming requests (including requests with `tools`) stay on the Node path and use Go-aligned tool-call anti-leak handling
+- WebUI non-stream test calls `?__go=1` directly to avoid Node hop timeout on long requests
+
+#### Function Duration
+
+`vercel.json` sets `maxDuration: 300` for both `api/chat-stream.js` and `api/index.go` (subject to your Vercel plan limits).
+
+### 3.4 Vercel Troubleshooting
+
+#### Go Build Failure
+
+```text
+Error: Command failed: go build -ldflags -s -w -o .../bootstrap ...
+```
+
+**Cause**: Invalid Go build flag settings in Vercel (`-ldflags` not passed as a single argument).
+
+**Fix**:
+
+1. Open Vercel Project Settings → Build and Development Settings
+2. **Clear** custom Go Build Flags / Build Command (recommended)
+3. If ldflags must be used, set `-ldflags="-s -w"` (ensure it's one argument)
+4. Verify `go.mod` uses a supported version (currently `go 1.26.0`)
+5. Redeploy (recommended: clear cache)
+
+#### Internal Package Import Error
+
+```text
+use of internal package ds2api/internal/server not allowed
+```
+
+**Cause**: Vercel Go entrypoint directly imports `internal/...`.
+
+**Fix**: This repo uses a public bridge package: `api/index.go` → `ds2api/app` → `internal/server`.
+
+#### Output Directory Error
+
+```text
+No Output Directory named "public" found after the Build completed.
+```
+
+**Fix**: This repo uses `static` as output directory (`"outputDirectory": "static"` in `vercel.json`). If you manually changed Output Directory in Project Settings, set it to `static` or clear it.
+
+#### Deployment Protection Blocking
+
+If API responses return Vercel HTML `Authentication Required`:
+
+- **Option A**: Disable Deployment Protection for that environment (recommended for public APIs)
+- **Option B**: Add `x-vercel-protection-bypass` header to requests
+- **Option C**: Set `VERCEL_AUTOMATION_BYPASS_SECRET` (or `DS2API_VERCEL_PROTECTION_BYPASS`) for internal Node→Go calls
+
+### 3.5 Build Artifacts Not Committed
+
+- `static/admin` directory is not in Git
+- Vercel / Docker automatically generate WebUI assets during build
+
+---
+
+## 4. Download Release Binaries
+
+Built-in GitHub Actions workflow: `.github/workflows/release-artifacts.yml`
+
+- **Trigger**: only on Release `published` (no build on normal push)
+- **Outputs**: multi-platform binary archives + `sha256sums.txt`
+- **Container publishing**: GHCR only (`ghcr.io/cjackhwang/ds2api`)
+
+| Platform | Architecture | Format |
+| --- | --- | --- |
+| Linux | amd64, arm64 | `.tar.gz` |
+| macOS | amd64, arm64 | `.tar.gz` |
+| Windows | amd64 | `.zip` |
+
+Each archive includes:
+
+- `ds2api` executable (`ds2api.exe` on Windows)
+- `static/admin/` (built WebUI assets)
+- `sha3_wasm_bg.7b9ca65ddd.wasm` (optional; binary has embedded fallback)
+- `config.example.json`, `.env.example`
+- `README.MD`, `README.en.md`, `LICENSE`
+
+### Usage
+
+```bash
+# 1. Download the archive for your platform
+# 2. Extract
+tar -xzf ds2api_<tag>_linux_amd64.tar.gz
+cd ds2api_<tag>_linux_amd64
+
+# 3. Configure
+cp config.example.json config.json
+# Edit config.json
+
+# 4. Start
+./ds2api
+```
+
+### Maintainer Release Flow
+
+1. Create and publish a GitHub Release (with tag, for example `vX.Y.Z`)
+2. Wait for the `Release Artifacts` workflow to complete
+3. Download the matching archive from Release Assets
+
+### Pull from GHCR (Optional)
+
+```bash
+# latest
+docker pull ghcr.io/cjackhwang/ds2api:latest
+
+# specific version (example)
+docker pull ghcr.io/cjackhwang/ds2api:v3.0.0
+```
+
+---
+
+## 5. Reverse Proxy (Nginx)
+
+When deploying behind Nginx, **you must disable buffering** for SSE streaming to work:
+
+```nginx
+location / {
+    proxy_pass http://127.0.0.1:5001;
+    proxy_http_version 1.1;
+    proxy_set_header Connection "";
+    proxy_buffering off;
+    proxy_cache off;
+    chunked_transfer_encoding on;
+    tcp_nodelay on;
+}
+```
+
+For HTTPS, add SSL at the Nginx layer:
+
+```nginx
+server {
+    listen 443 ssl;
+    server_name api.example.com;
+
+    ssl_certificate /path/to/cert.pem;
+    ssl_certificate_key /path/to/key.pem;
+
+    location / {
+        proxy_pass http://127.0.0.1:5001;
+        proxy_http_version 1.1;
+        proxy_set_header Connection "";
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        proxy_buffering off;
+        proxy_cache off;
+        chunked_transfer_encoding on;
+        tcp_nodelay on;
+    }
+}
+```
+
+---
+
+## 6. Linux systemd Service
+
+### 6.1 Installation
+
+```bash
+# Copy compiled binary and related files to target directory
+sudo mkdir -p /opt/ds2api
+sudo cp ds2api config.json /opt/ds2api/
+# Optional: if you want to use an external WASM file (override the embedded one, from a release package or build output)
+# sudo cp /path/to/sha3_wasm_bg.7b9ca65ddd.wasm /opt/ds2api/
+sudo cp -r static/admin /opt/ds2api/static/admin
+```
+
+### 6.2 Create systemd Service File
+
+```ini
+# /etc/systemd/system/ds2api.service
+
+[Unit]
+Description=DS2API (Go)
+After=network.target
+
+[Service]
+Type=simple
+WorkingDirectory=/opt/ds2api
+Environment=PORT=5001
+Environment=DS2API_CONFIG_PATH=/opt/ds2api/config.json
+Environment=DS2API_ADMIN_KEY=your-admin-key-here
+ExecStart=/opt/ds2api/ds2api
+Restart=always
+RestartSec=5
+
+[Install]
+WantedBy=multi-user.target
+```
+
+### 6.3 Common Commands
+
+```bash
+# Reload service config
+sudo systemctl daemon-reload
+
+# Enable on boot
+sudo systemctl enable ds2api
+
+# Start
+sudo systemctl start ds2api
+
+# Check status
+sudo systemctl status ds2api
+
+# View logs
+sudo journalctl -u ds2api -f
+
+# Restart
+sudo systemctl restart ds2api
+
+# Stop
+sudo systemctl stop ds2api
+```
+
+---
+
+## 7. Post-Deploy Checks
+
+After deployment (any method), verify in order:
+
+```bash
+# 1. Liveness probe
+curl -s http://127.0.0.1:5001/healthz
+# Expected: {"status":"ok"}
+
+# 2. Readiness probe
+curl -s http://127.0.0.1:5001/readyz
+# Expected: {"status":"ready"}
+
+# 3. Model list
+curl -s http://127.0.0.1:5001/v1/models
+# Expected: {"object":"list","data":[...]}
+
+# 4. Admin panel (if WebUI is built)
+curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:5001/admin
+# Expected: 200
+
+# 5. Test API call
+curl http://127.0.0.1:5001/v1/chat/completions \
+  -H "Authorization: Bearer your-api-key" \
+  -H "Content-Type: application/json" \
+  -d '{"model":"deepseek-chat","messages":[{"role":"user","content":"hello"}]}'
+```
+
+---
+
+## 8. Pre-Release Local Regression
+
+Run the full live testsuite before release (real account tests):
+
+```bash
+./tests/scripts/run-live.sh
+```
+
+With custom flags:
+
+```bash
+go run ./cmd/ds2api-tests \
+  --config config.json \
+  --admin-key admin \
+  --out artifacts/testsuite \
+  --timeout 120 \
+  --retries 2
+```
+
+The testsuite automatically performs:
+
+- ✅ Preflight checks (syntax/build/unit tests)
+- ✅ Isolated config copy startup (no mutation to your original `config.json`)
+- ✅ Live scenario verification (OpenAI/Claude/Admin/concurrency/toolcall/streaming)
+- ✅ Full request/response artifact logging for debugging
+
+For detailed testsuite documentation, see [TESTING.md](TESTING.md).
--- a/docs/DEPLOY.md
+++ b/docs/DEPLOY.md
@@ -0,0 +1,570 @@
+# DS2API 部署指南
+
+语言 / Language: [中文](DEPLOY.md) | [English](DEPLOY.en.md)
+
+本指南基于当前 Go 代码库，详细说明各种部署方式。
+
+---
+
+## 目录
+
+- [前置要求](#0-前置要求)
+- [一、本地运行](#一本地运行)
+- [二、Docker 部署](#二docker-部署)
+- [三、Vercel 部署](#三vercel-部署)
+- [四、下载 Release 构建包](#四下载-release-构建包)
+- [五、反向代理（Nginx）](#五反向代理nginx)
+- [六、Linux systemd 服务化](#六linux-systemd-服务化)
+- [七、部署后检查](#七部署后检查)
+- [八、发布前进行本地回归](#八发布前进行本地回归)
+
+---
+
+## 0. 前置要求
+
+| 依赖 | 最低版本 | 说明 |
+| --- | --- | --- |
+| Go | 1.26+ | 编译后端 |
+| Node.js | `20.19+` 或 `22.12+` | 仅在需要本地构建 WebUI 时 |
+| npm | 随 Node.js 提供 | 安装 WebUI 依赖 |
+
+配置来源（任选其一）：
+
+- **文件方式**：`config.json`（推荐本地/Docker 使用）
+- **环境变量方式**：`DS2API_CONFIG_JSON`（推荐 Vercel 使用，支持 JSON 字符串或 Base64 编码，也可以直接写原始 JSON）
+
+统一建议（最优实践）：
+
+```bash
+cp config.example.json config.json
+# 编辑 config.json
+```
+
+建议把 `config.json` 作为唯一配置源：
+- 本地运行：直接读 `config.json`
+- Docker / Vercel：从 `config.json` 生成 `DS2API_CONFIG_JSON`（Base64）注入环境变量
+
+---
+
+## 一、本地运行
+
+### 1.1 基本步骤
+
+```bash
+# 克隆仓库
+git clone https://github.com/CJackHwang/ds2api.git
+cd ds2api
+
+# 复制并编辑配置
+cp config.example.json config.json
+# 使用你喜欢的编辑器打开 config.json，填入：
+#   - keys: 你的 API 访问密钥
+#   - accounts: DeepSeek 账号（email 或 mobile + password）
+
+# 启动服务
+go run ./cmd/ds2api
+```
+
+默认监听 `http://0.0.0.0:5001`，可通过 `PORT` 环境变量覆盖。
+
+### 1.2 WebUI 构建
+
+本地首次启动时，若 `static/admin/` 不存在，服务会自动尝试构建 WebUI（需要 Node.js/npm；缺依赖时会先执行 `npm ci`，再执行 `npm run build -- --outDir static/admin --emptyOutDir`）。
+
+你也可以手动构建：
+
+```bash
+./scripts/build-webui.sh
+```
+
+或手动执行：
+
+```bash
+cd webui
+npm install
+npm run build
+# 产物输出到 static/admin/
+```
+
+通过环境变量控制自动构建行为：
+
+```bash
+# 强制关闭自动构建
+DS2API_AUTO_BUILD_WEBUI=false go run ./cmd/ds2api
+
+# 强制开启自动构建
+DS2API_AUTO_BUILD_WEBUI=true go run ./cmd/ds2api
+```
+
+### 1.3 编译为二进制文件
+
+```bash
+go build -o ds2api ./cmd/ds2api
+./ds2api
+```
+
+---
+
+## 二、Docker 部署
+
+### 2.1 基本步骤
+
+```bash
+# 复制环境变量模板和配置文件
+cp .env.example .env
+cp config.example.json config.json
+
+# 编辑 .env（请改成你的强密码），至少设置：
+#   DS2API_ADMIN_KEY=your-admin-key
+# 如需修改宿主机端口，可额外设置：
+#   DS2API_HOST_PORT=6011
+
+# 启动
+docker-compose up -d
+
+# 查看日志
+docker-compose logs -f
+```
+
+默认 `docker-compose.yml` 会把宿主机 `6011` 映射到容器内的 `5001`。如果你希望直接对外暴露 `5001`，请设置 `DS2API_HOST_PORT=5001`（或者手动调整 `ports` 配置）。
+
+### 2.2 更新
+
+```bash
+docker-compose up -d --build
+```
+
+### 2.3 Docker 架构说明
+
+`Dockerfile` 提供两条构建路径：
+
+1. **本地/开发默认路径（`runtime-from-source`）**：三阶段构建（WebUI 构建 + Go 构建 + 运行阶段）。
+2. **Release 路径（`runtime-from-dist`）**：发布工作流先生成 tag 命名的 Release 压缩包，再把 Linux 产物复制成 `dist/docker-input/linux_amd64.tar.gz` / `linux_arm64.tar.gz`；Docker 构建阶段直接消费这些输入，不再重复执行 `npm build`/`go build`。
+
+Release 路径可确保 Docker 镜像与 release 压缩包使用同一套产物，减少重复构建带来的差异。
+
+容器内启动命令：`/usr/local/bin/ds2api`，默认暴露端口 `5001`。
+
+### 2.4 开发环境
+
+```bash
+docker-compose -f docker-compose.dev.yml up
+```
+
+开发模式特性：
+- 源代码挂载（修改即生效）
+- `LOG_LEVEL=DEBUG`
+- 不自动重启
+
+### 2.5 健康检查
+
+Docker Compose 已配置内置健康检查：
+
+```yaml
+healthcheck:
+  test: ["CMD", "/usr/local/bin/busybox", "wget", "-qO-", "http://localhost:${PORT:-5001}/healthz"]
+  interval: 30s
+  timeout: 10s
+  retries: 3
+  start_period: 10s
+```
+
+### 2.6 Docker 常见排查
+
+如果容器日志正常但面板打不开，优先检查：
+
+1. **端口是否一致**：`PORT` 改成非 `5001` 时，访问地址也要改成对应端口（如 `http://localhost:8080/admin`）。
+2. **开发 compose 的 WebUI 静态文件**：`docker-compose.dev.yml` 使用 `go run` 开发镜像，不会在容器内自动安装 Node.js；若仓库里没有 `static/admin`，`/admin` 会返回 404。可先在宿主机构建一次：`./scripts/build-webui.sh`。
+
+### 2.7 Zeabur 一键部署（Dockerfile）
+
+仓库提供 `zeabur.yaml` 模板，可在 Zeabur 上一键部署：
+
+[![Deploy on Zeabur](https://zeabur.com/button.svg)](https://zeabur.com/templates/L4CFHP)
+
+部署要点：
+
+- **端口**：服务默认监听 `5001`，模板会固定设置 `PORT=5001`。
+- **配置持久化**：模板挂载卷 `/data`，并设置 `DS2API_CONFIG_PATH=/data/config.json`；在管理台导入配置后，会写入并持久化到该路径。
+- **构建版本号**：Zeabur / 普通 `docker build` 默认不需要传 `BUILD_VERSION`；镜像会优先使用该构建参数，未提供时自动回退到仓库根目录的 `VERSION` 文件。
+- **首次登录**：部署完成后访问 `/admin`，使用 Zeabur 环境变量/模板指引中的 `DS2API_ADMIN_KEY` 登录（建议首次登录后自行更换为强密码）。
+
+---
+
+## 三、Vercel 部署
+
+### 3.1 部署步骤
+
+1. **Fork 仓库**到你的 GitHub 账号
+2. **在 Vercel 上导入项目**
+3. **配置环境变量**（最少只需设置以下一项）：
+
+| 变量 | 说明 |
+| --- | --- |
+| `DS2API_ADMIN_KEY` | 管理密钥（必填） |
+| `DS2API_CONFIG_JSON` | 配置内容，JSON 字符串或 Base64 编码（可选，建议） |
+
+4. **部署**
+
+### 3.1.1 推荐填写方式（避免 `DS2API_CONFIG_JSON` 填错）
+
+如果你想先完成一键部署，也可以先不填 `DS2API_CONFIG_JSON`，部署后进入 `/admin` 导入配置，再在「Vercel 同步」里写回环境变量。
+
+建议先在仓库目录复制示例配置，再按实际账号填写：
+
+```bash
+cp config.example.json config.json
+# 编辑 config.json
+```
+
+不要在 Vercel 面板里手写复杂 JSON，建议本地生成 Base64 后粘贴：
+
+```bash
+# 在仓库根目录执行
+DS2API_CONFIG_JSON="$(base64 < config.json | tr -d '\n')"
+echo "$DS2API_CONFIG_JSON"
+```
+
+如果你选择在部署前就预置配置，请在 Vercel Project Settings -> Environment Variables 配置：
+
+```text
+DS2API_ADMIN_KEY=请替换为强密码
+DS2API_CONFIG_JSON=上一步生成的一整行 Base64
+```
+
+可选但推荐（用于 WebUI 一键同步 Vercel 配置）：
+
+```text
+VERCEL_TOKEN=你的 Vercel Token
+VERCEL_PROJECT_ID=prj_xxxxxxxxxxxx
+VERCEL_TEAM_ID=team_xxxxxxxxxxxx   # 个人账号可留空
+```
+
+### 3.2 可选环境变量
+
+| 变量 | 说明 | 默认值 |
+| --- | --- | --- |
+| `DS2API_ACCOUNT_MAX_INFLIGHT` | 每账号并发上限 | `2` |
+| `DS2API_ACCOUNT_MAX_QUEUE` | 等待队列上限 | `recommended_concurrency` |
+| `DS2API_GLOBAL_MAX_INFLIGHT` | 全局并发上限 | `recommended_concurrency` |
+| `DS2API_ENV_WRITEBACK` | 检测到 `DS2API_CONFIG_JSON` 时自动写入 `DS2API_CONFIG_PATH`，并在成功后转为文件模式（`1/true/yes/on`） | 关闭 |
+| `DS2API_VERCEL_INTERNAL_SECRET` | 混合流式内部鉴权 | 回退用 `DS2API_ADMIN_KEY` |
+| `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | 流式 lease TTL | `900` |
+| `VERCEL_TOKEN` | Vercel 同步 token | — |
+| `VERCEL_PROJECT_ID` | Vercel 项目 ID | — |
+| `VERCEL_TEAM_ID` | Vercel 团队 ID | — |
+| `DS2API_VERCEL_PROTECTION_BYPASS` | 部署保护绕过密钥（内部 Node→Go 调用） | — |
+
+### 3.3 Vercel 架构说明
+
+```text
+请求 ─────┐
+          │
+          ▼
+     vercel.json 路由规则
+          │
+    ┌─────┴─────┐
+    │           │
+    ▼           ▼
+api/index.go  api/chat-stream.js
+(Go Runtime)  (Node Runtime)
+```
+
+- **入口文件**：`api/index.go`（Serverless Go）
+- **流式入口**：`api/chat-stream.js`（Node Runtime，保证实时 SSE）
+- **路由重写**：`vercel.json`
+- **构建命令**：`npm ci --prefix webui && npm run build --prefix webui`（自动执行）
+
+#### 流式处理链路
+
+由于 Vercel Go Runtime 存在平台层响应缓冲，本项目在 Vercel 上采用"**Go prepare + Node stream**"的混合链路：
+
+1. `api/chat-stream.js` 收到 `/v1/chat/completions` 请求
+2. Node 调用 Go 内部 prepare 接口（`?__stream_prepare=1`），获取会话 ID、PoW、token 等
+3. Go prepare 创建 stream lease，锁定账号
+4. Node 直连 DeepSeek 上游，实时流式转发 SSE 给客户端（含 OpenAI chunk 封装与 tools 防泄漏筛分）
+5. 流结束后 Node 调用 Go release 接口（`?__stream_release=1`），释放账号
+
+> 该适配**仅在 Vercel 环境生效**；本地与 Docker 仍走纯 Go 链路。
+
+#### 非流式回退与 Tool Call 处理
+
+- `api/chat-stream.js` 仅对非流式请求回退到 Go 入口（`?__go=1`）
+- 流式请求（包括带 `tools`）走 Node 路径，并执行与 Go 对齐的 tool-call 防泄漏处理
+- WebUI 的"非流式测试"直接请求 `?__go=1`，避免 Node 中转造成长请求超时
+
+#### 函数时长
+
+`vercel.json` 已将 `api/chat-stream.js` 与 `api/index.go` 的 `maxDuration` 设为 `300`（受 Vercel 套餐上限约束）。
+
+### 3.4 Vercel 常见报错排查
+
+#### Go 构建失败
+
+```text
+Error: Command failed: go build -ldflags -s -w -o .../bootstrap ...
+```
+
+**原因**：Vercel 项目的 Go 构建参数配置不正确（`-ldflags` 没有作为一个整体字符串传递）。
+
+**解决**：
+
+1. 进入 Vercel Project Settings → Build and Development Settings
+2. **清空**自定义 Go Build Flags / Build Command（推荐）
+3. 若必须设置 ldflags，使用 `-ldflags="-s -w"`（保证它是一个参数）
+4. 确认仓库 `go.mod` 为受支持版本（当前为 `go 1.26.0`）
+5. 重新部署（建议清缓存后 Redeploy）
+
+#### Internal 包导入错误
+
+```text
+use of internal package ds2api/internal/server not allowed
+```
+
+**原因**：Vercel Go 入口文件直接 `import internal/...`。
+
+**解决**：当前仓库已通过公开桥接包 `app` 解决：`api/index.go` → `ds2api/app` → `internal/server`。
+
+#### 输出目录错误
+
+```text
+No Output Directory named "public" found after the Build completed.
+```
+
+**解决**：当前仓库使用 `static` 作为输出目录（`vercel.json` 中 `"outputDirectory": "static"`）。若你在项目设置里手动改过 Output Directory，请设为 `static` 或清空让仓库配置生效。
+
+#### 部署保护拦截
+
+如果接口返回 Vercel HTML 页面 `Authentication Required`：
+
+- **方案 A**：关闭该部署/环境的 Deployment Protection（推荐用于公开 API）
+- **方案 B**：请求中添加 `x-vercel-protection-bypass` 头
+- **方案 C**：设置 `VERCEL_AUTOMATION_BYPASS_SECRET`（或 `DS2API_VERCEL_PROTECTION_BYPASS`），仅影响内部 Node→Go 调用
+
+### 3.5 仓库不提交构建产物
+
+- `static/admin` 目录不在 Git 中
+- Vercel / Docker 构建阶段自动生成 WebUI 静态文件
+
+---
+
+## 四、下载 Release 构建包
+
+仓库内置 GitHub Actions 工作流：`.github/workflows/release-artifacts.yml`
+
+- **触发条件**：仅在 Release `published` 时触发（普通 push 不会构建）
+- **构建产物**：多平台二进制压缩包 + `sha256sums.txt`
+- **容器镜像发布**：仅发布到 GHCR（`ghcr.io/cjackhwang/ds2api`）
+
+| 平台 | 架构 | 文件格式 |
+| --- | --- | --- |
+| Linux | amd64, arm64 | `.tar.gz` |
+| macOS | amd64, arm64 | `.tar.gz` |
+| Windows | amd64 | `.zip` |
+
+每个压缩包包含：
+
+- `ds2api` 可执行文件（Windows 为 `ds2api.exe`）
+- `static/admin/`（WebUI 构建产物）
+- `sha3_wasm_bg.7b9ca65ddd.wasm`（可选；程序内置 embed fallback）
+- `config.example.json`、`.env.example`
+- `README.MD`、`README.en.md`、`LICENSE`
+
+### 使用步骤
+
+```bash
+# 1. 下载对应平台的压缩包
+# 2. 解压
+tar -xzf ds2api_<tag>_linux_amd64.tar.gz
+cd ds2api_<tag>_linux_amd64
+
+# 3. 配置
+cp config.example.json config.json
+# 编辑 config.json
+
+# 4. 启动
+./ds2api
+```
+
+### 维护者发布步骤
+
+1. 在 GitHub 创建并发布 Release（带 tag，如 `vX.Y.Z`）
+2. 等待 Actions 工作流 `Release Artifacts` 完成
+3. 在 Release 的 Assets 下载对应平台压缩包
+
+### 拉取 GHCR 镜像（可选）
+
+```bash
+# latest
+docker pull ghcr.io/cjackhwang/ds2api:latest
+
+# 指定版本（示例）
+docker pull ghcr.io/cjackhwang/ds2api:v3.0.0
+```
+
+---
+
+## 五、反向代理（Nginx）
+
+如果在 Nginx 后部署，**必须关闭缓冲**以保证 SSE 流式响应正常工作：
+
+```nginx
+location / {
+    proxy_pass http://127.0.0.1:5001;
+    proxy_http_version 1.1;
+    proxy_set_header Connection "";
+    proxy_buffering off;
+    proxy_cache off;
+    chunked_transfer_encoding on;
+    tcp_nodelay on;
+}
+```
+
+如果需要 HTTPS，可以在 Nginx 层配置 SSL 证书：
+
+```nginx
+server {
+    listen 443 ssl;
+    server_name api.example.com;
+
+    ssl_certificate /path/to/cert.pem;
+    ssl_certificate_key /path/to/key.pem;
+
+    location / {
+        proxy_pass http://127.0.0.1:5001;
+        proxy_http_version 1.1;
+        proxy_set_header Connection "";
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        proxy_buffering off;
+        proxy_cache off;
+        chunked_transfer_encoding on;
+        tcp_nodelay on;
+    }
+}
+```
+
+---
+
+## 六、Linux systemd 服务化
+
+### 6.1 安装
+
+```bash
+# 将编译好的二进制文件和相关文件复制到目标目录
+sudo mkdir -p /opt/ds2api
+sudo cp ds2api config.json /opt/ds2api/
+# 可选：若你希望使用外置 WASM 文件（覆盖内置版本，来自 release 包或构建产物）
+# sudo cp /path/to/sha3_wasm_bg.7b9ca65ddd.wasm /opt/ds2api/
+sudo cp -r static/admin /opt/ds2api/static/admin
+```
+
+### 6.2 创建 systemd 服务文件
+
+```ini
+# /etc/systemd/system/ds2api.service
+
+[Unit]
+Description=DS2API (Go)
+After=network.target
+
+[Service]
+Type=simple
+WorkingDirectory=/opt/ds2api
+Environment=PORT=5001
+Environment=DS2API_CONFIG_PATH=/opt/ds2api/config.json
+Environment=DS2API_ADMIN_KEY=your-admin-key-here
+ExecStart=/opt/ds2api/ds2api
+Restart=always
+RestartSec=5
+
+[Install]
+WantedBy=multi-user.target
+```
+
+### 6.3 常用命令
+
+```bash
+# 加载服务配置
+sudo systemctl daemon-reload
+
+# 设置开机自启
+sudo systemctl enable ds2api
+
+# 启动服务
+sudo systemctl start ds2api
+
+# 查看状态
+sudo systemctl status ds2api
+
+# 查看日志
+sudo journalctl -u ds2api -f
+
+# 重启服务
+sudo systemctl restart ds2api
+
+# 停止服务
+sudo systemctl stop ds2api
+```
+
+---
+
+## 七、部署后检查
+
+无论使用哪种部署方式，启动后建议依次检查：
+
+```bash
+# 1. 存活探针
+curl -s http://127.0.0.1:5001/healthz
+# 预期: {"status":"ok"}
+
+# 2. 就绪探针
+curl -s http://127.0.0.1:5001/readyz
+# 预期: {"status":"ready"}
+
+# 3. 模型列表
+curl -s http://127.0.0.1:5001/v1/models
+# 预期: {"object":"list","data":[...]}
+
+# 4. 管理台页面（如果已构建 WebUI）
+curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:5001/admin
+# 预期: 200
+
+# 5. 测试 API 调用
+curl http://127.0.0.1:5001/v1/chat/completions \
+  -H "Authorization: Bearer your-api-key" \
+  -H "Content-Type: application/json" \
+  -d '{"model":"deepseek-chat","messages":[{"role":"user","content":"hello"}]}'
+```
+
+---
+
+## 八、发布前进行本地回归
+
+建议在发布前执行完整的端到端测试集（使用真实账号）：
+
+```bash
+./tests/scripts/run-live.sh
+```
+
+可自定义参数：
+
+```bash
+go run ./cmd/ds2api-tests \
+  --config config.json \
+  --admin-key admin \
+  --out artifacts/testsuite \
+  --timeout 120 \
+  --retries 2
+```
+
+测试集自动执行内容：
+
+- ✅ 语法/构建/单测 preflight
+- ✅ 隔离副本配置启动服务（不污染原始 `config.json`）
+- ✅ 真实调用场景验证（OpenAI/Claude/Admin/并发/toolcall/流式）
+- ✅ 全量请求与响应日志落盘（用于故障复盘）
+
+详细测试集说明参阅 [TESTING.md](TESTING.md)。
--- a/docs/DeepSeekSSE行为结构说明-2026-04-05.md
+++ b/docs/DeepSeekSSE行为结构说明-2026-04-05.md
@@ -0,0 +1,313 @@
+# DeepSeek SSE 行为结构说明（第三方逆向版）
+
+> 说明：本文基于当前仓库 `tests/raw_stream_samples/` 下全部 `upstream.stream.sse` 原始流样本整理而成，属于第三方逆向观察文档，不是官方协议。
+> 当前 corpus 由 4 份原始流组成，覆盖搜索+引用、风控终态、Markdown 输出和空格敏感输出等行为。
+> 补充：文末还会注明少量“当前实现已确认、但 corpus 尚未完整覆盖”的行为，例如长思考场景下的自动续写状态。
+
+## 1. 样本覆盖
+
+下列样本共同构成了本文的观察基础：
+
+| 样本 | 观察重点 |
+| --- | --- |
+| [guangzhou-weather-reasoner-search-20260404](../tests/raw_stream_samples/guangzhou-weather-reasoner-search-20260404/upstream.stream.sse) | 搜索+思考流程，包含 `reference:N` 引用标记与工具片段 |
+| [content-filter-trigger-20260405-jwt3](../tests/raw_stream_samples/content-filter-trigger-20260405-jwt3/upstream.stream.sse) | `CONTENT_FILTER` 终态分支，包含拒答模板与 `ban_regenerate` |
+| [markdown-format-example-20260405](../tests/raw_stream_samples/markdown-format-example-20260405/upstream.stream.sse) | Markdown 输出的早期样本，用于观察 token 级输出形态 |
+| [markdown-format-example-20260405-spacefix](../tests/raw_stream_samples/markdown-format-example-20260405-spacefix/upstream.stream.sse) | Markdown 输出修正样本，用于验证空格 chunk 必须保留 |
+
+当前 corpus 的整体特征是 `message` 帧占绝对多数，控制事件只占很小一部分，但它们决定了流的生命周期和最终状态。
+
+## 2. 总体结构
+
+DeepSeek 的这类输出可以分成两层看：
+
+1. SSE 事件层。
+2. JSON 载荷层。
+
+事件层负责传输边界，载荷层负责业务状态。实现时不要把 HTTP chunk、SSE block 和业务 JSON 混为一体。
+
+最常见的时序可以概括为：
+
+```text
+ready
+update_session
+message(初始化 envelope)
+message(正文 / 片段 / 状态增量)
+message(状态收口)
+finish
+update_session
+title
+close
+```
+
+`finish` 表示生成流结束，但不是唯一的终止信号；真正的语义终态通常还要结合 `response/status`、`quasi_status` 和 `close` 一起判断。
+
+## 3. SSE 事件层
+
+当前 corpus 中观察到的事件类型如下：
+
+| 事件 | 作用 | 处理建议 |
+| --- | --- | --- |
+| `ready` | 传输层就绪，通常携带 `request_message_id`、`response_message_id`、`model_type` | 记录元数据即可，不参与正文拼接 |
+| `update_session` | 会话时间戳或心跳更新 | 当作会话状态帧处理 |
+| `message` | 主体载荷，绝大多数业务信息都在这里 | 必须按顺序解析并保序累积 |
+| `finish` | 生成阶段结束 | 作为流结束标记之一 |
+| `title` | 会话标题生成结果 | 元数据帧，不参与正文拼接 |
+| `close` | 连接关闭信息 | 仅用于收尾与审计 |
+
+说明：
+
+- `message` 是默认事件名，SSE 中没有显式 `event:` 时也应按 `message` 处理。
+- 目前样本里大量 `message` 帧没有独立的业务前缀，不能靠事件名区分正文和控制帧。
+- 可能出现空 payload 的 `message` 帧；它们应被视为 no-op，但不能打乱事件顺序。
+
+## 4. 载荷层形态
+
+`message` 的 `data:` 部分不是单一 schema，而是多种结构混合。当前 corpus 里主要见到以下几种形态：
+
+| 形态 | 典型结构 | 作用 |
+| --- | --- | --- |
+| 初始化 envelope | `{"v":{"response":{...}}}` | 给出会话初始状态、模型状态和片段容器 |
+| 纯文本 token | `{"v":"..."}` | 直接输出可见文本 token |
+| 路径补丁 | `{"p":"...","o":"APPEND|SET|BATCH","v":...}` | 对某个状态路径做增量更新 |
+| 终态 batch | `{"v":[{"p":"status","v":"CONTENT_FILTER"}, ...]}` | 尾部状态收口，常见于风控终态 |
+
+一个简化后的典型样式如下：
+
+```json
+{"v":"输出"}
+{"p":"response/fragments/-1/content","o":"APPEND","v":"..."}
+{"p":"response/fragments","o":"APPEND","v":[...]}
+{"p":"response","o":"BATCH","v":[{"p":"accumulated_token_usage","v":211},{"p":"quasi_status","v":"FINISHED"}]}
+{"p":"response/status","o":"SET","v":"FINISHED"}
+```
+
+注意：
+
+- `v` 可能是字符串、对象、数组、布尔值或数字。
+- `o` 当前样本里主要见到 `APPEND`、`SET`、`BATCH`。
+- `v` 为数组时，通常表示一个批量 patch 集合，而不是正文数组。
+
+## 5. 初始化 envelope
+
+每条流开头，常会先出现一个 `message` 帧，内容是完整的 `response` 初始状态。当前 corpus 中，这个 envelope 常见字段包括：
+
+- `message_id`
+- `parent_id`
+- `model`
+- `role`
+- `thinking_enabled`
+- `ban_edit`
+- `ban_regenerate`
+- `status`
+- `incomplete_message`
+- `accumulated_token_usage`
+- `files`
+- `feedback`
+- `inserted_at`
+- `search_enabled`
+- `fragments`
+- `conversation_mode`
+- `has_pending_fragment`
+- `auto_continue`
+- `search_triggered`
+
+这些字段更像会话状态和策略开关，不是正文内容。第三方实现应把它们保留在内部状态树里，而不是直接拼接到最终答案。
+
+## 6. 路径结构
+
+当前 corpus 里观察到的 `p` 路径可以归成几组：
+
+### 6.1 片段级路径
+
+- `response/fragments/-N/content`
+- `response/fragments/-N/status`
+- `response/fragments/-N/results`
+- `response/fragments/-N/elapsed_secs`
+
+这类路径表示某个片段对象的增量更新。`-N` 只是样本中的索引风格，不应被写死成固定数量。
+
+### 6.2 片段容器路径
+
+- `response/fragments`
+- `fragments`
+
+这两类路径通常承载 fragment 数组。前者更像响应树中的分支，后者更像终态批处理里的片段集合。
+
+### 6.3 语义状态路径
+
+- `response/status`
+- `response/has_pending_fragment`
+- `quasi_status`
+- `status`
+- `ban_regenerate`
+
+这类路径决定流是否结束、是否被风控、是否还有待处理片段。它们不应作为正文输出。
+
+尤其是 `response/status` / `status` 这类路径上的字符串值，应被视为控制信号而不是文本 token。当前已确认需要特殊对待的值包括：
+
+- `FINISHED`：正常完成终态，应触发收口。
+- `CONTENT_FILTER`：风控终态，应走拒答/模板分支。
+- `WIP` / `INCOMPLETE` / `AUTO_CONTINUE`：未完成但可继续生成的中间状态，不应直接输出给客户端。
+
+### 6.4 统计与进度路径
+
+- `accumulated_token_usage`
+
+这类路径用于使用量或进度统计，属于元数据。
+
+### 6.5 非命名空间字段
+
+在片段对象内部，还会看到 `content`、`references`、`result`、`queries`、`stage_id` 等字段。它们不一定带 `response/...` 前缀，但仍然是协议语义的一部分。
+
+## 7. fragment 类型
+
+当前 corpus 里已经观察到的 fragment 类型如下：
+
+| 类型 | 作用 | 是否应直接渲染 |
+| --- | --- | --- |
+| `RESPONSE` | 正常回答片段 | 是，属于正文 |
+| `THINK` | 推理或阶段提示 | 通常否，按产品策略决定是否展示 |
+| `TOOL_SEARCH` | 搜索工具调用元数据 | 否 |
+| `TOOL_OPEN` | 打开 / 抽取结果的工具元数据 | 否 |
+| `TIP` | 提示 / 警告类片段，常带 `style: WARNING` | 视产品策略决定，通常作为附注 |
+| `TEMPLATE_RESPONSE` | 风控拒答模板 | 是，但它属于终态 fallback，不是普通正文 |
+
+观察到的典型片段字段：
+
+- `id`
+- `type`
+- `content`
+- `references`
+- `stage_id`
+- `status`
+- `queries`
+- `results`
+- `result`
+- `elapsed_secs`
+- `style`
+- `hide_on_wip`
+
+第三方实现不要把 `fragment.type` 和 `p` 路径混为一谈。`type` 是语义分类，`p` 是状态树位置。
+
+## 8. 终态行为
+
+当前 corpus 里有两条很重要的终态分支。
+
+### 8.1 正常完成
+
+正常回答通常会出现如下收口顺序：
+
+1. `response` 的 `BATCH` 更新 `accumulated_token_usage`。
+2. `response` 的 `BATCH` 或单独 patch 更新 `quasi_status: FINISHED`。
+3. `response/status` 置为 `FINISHED`。
+4. `finish` 事件到来。
+5. 之后可能还有 `update_session`、`title`、`close`。
+
+### 8.2 风控终态
+
+`content-filter-trigger-20260405-jwt3` 展示了另一种终态路径：
+
+1. 先继续输出一段正常正文。
+2. 出现提示类 fragment，例如 `TIP`。
+3. 可能先把 `quasi_status` 提前收口到 `FINISHED`。
+4. 之后出现一个终态 batch，把 `ban_regenerate` 设为 `true`，把 `status` 置为 `CONTENT_FILTER`，并附带 `TEMPLATE_RESPONSE`。
+5. 最后再出现 `finish`，然后是收尾事件。
+
+这个分支说明：
+
+- `finish` 不等于正常结束。
+- `CONTENT_FILTER` 是一个独立终态，不是普通异常。
+- `TEMPLATE_RESPONSE` 不应被当作常规回答流的中间片段，它是终态 fallback。
+
+一个简化的风控尾部可以写成：
+
+```json
+{"p":"response","o":"BATCH","v":[{"p":"accumulated_token_usage","v":1269},{"p":"quasi_status","v":"FINISHED"}]}
+{"v":[{"p":"ban_regenerate","v":true},{"p":"status","v":"CONTENT_FILTER"},{"p":"fragments","v":[{"id":38,"type":"TEMPLATE_RESPONSE","content":"..."}]},{"p":"quasi_status","v":"CONTENT_FILTER"}]}
+{"event":"finish"}
+```
+
+### 8.3 自动续写中间态（实现补充）
+
+这部分不是当前 corpus 的直接覆盖项，而是 2026-04-05 在长思考实测中观察到、且已在当前实现中兼容的行为：
+
+1. 上游可能先把 `response/status` 或 envelope 内的 `response.status` 置为 `WIP` / `INCOMPLETE`。
+2. 有时还会伴随 `auto_continue: true`。
+3. 这表示当前轮输出尚未真正结束，客户端或代理层可以继续调用 continue 接口续写同一条回答。
+4. 续写后的内容会承接之前的思考与正文，不应把前一轮状态值泄露成可见文本。
+
+对第三方实现，建议把这一类状态统一当作“可继续的控制信号”：
+
+- 可以据此决定是否继续拉取后续流。
+- 不能把 `INCOMPLETE`、`WIP`、`AUTO_CONTINUE` 直接拼接到最终文本。
+- `finish` 事件本身也不能单独说明回答已完全结束，仍要结合状态字段判断。
+
+## 9. 文本重建规则
+
+如果你的目标是把流重建成最终可见文本，必须遵守下面这些规则：
+
+- 按接收顺序逐个追加 token。
+- 不要对每个 `v` 做 `trim` 或 `TrimSpace`。
+- 不要丢弃只包含空格的 chunk。
+- 不要合并连续空格、换行或 Markdown 符号附近的空白。
+- 不要把 `[reference:N]` 视为协议元数据，它在当前 corpus 里就是正文的一部分。
+- 如果你要屏蔽引用标记，应当把它做成可配置的后处理，而不是在解析阶段硬删。
+- `response/status` / `status` 路径上的状态字符串不应进入正文，即使它们不是终态。
+
+这点对 Markdown、代码块、引用、表格都很关键。样本里已经证明，`#`、`-`、`>`、`|` 这类符号后面的空格必须原样保留，否则渲染结果会变形。
+
+## 10. 推荐实现方式
+
+对第三方开发者，建议把实现拆成三条线：
+
+1. 原始事件线：保留 SSE block 顺序、事件名和完整 JSON 载荷。
+2. 状态树线：维护 `response`、`fragments`、`status`、`quasi_status` 等结构。
+3. 可见文本线：只从明确应渲染的 token / fragment 中拼接最终文本。
+
+一个简单的处理顺序可以是：
+
+```text
+parse SSE block
+  -> 识别 event
+  -> 解析 JSON payload
+  -> 更新状态树
+  -> 识别 status / quasi_status / auto_continue 等控制信号
+  -> 判定是否有可见文本
+  -> 追加到输出缓冲
+  -> 遇到 WIP / INCOMPLETE / AUTO_CONTINUE 时决定是否续写
+  -> 遇到 FINISHED / CONTENT_FILTER / finish 时收口
+```
+
+实现时的兼容原则：
+
+- 未知路径保留，不要报错中断。
+- 未知 fragment.type 保留在日志里。
+- 不要假设所有模型都一定输出 `thinking_content`，当前 corpus 的推理更多是通过 fragment 类型表达。
+- 不要假设 `title` 一定存在，它只是后置元数据。
+
+## 11. 本 corpus 证明了什么
+
+当前样本足以证明以下行为：
+
+- 搜索类模型会把工具调用、结果、引用和正文混在同一条 SSE 流里。
+- 风控不会简单地“没有输出”，而是会在正常生成后切换到 `CONTENT_FILTER` 终态。
+- Markdown 和代码输出对空格非常敏感，空格 chunk 不能吞。
+- `message` 是主体承载层，`ready` / `update_session` / `finish` / `title` / `close` 是控制层。
+- `fragment.type` 是可视化和工具链分层的关键，不应只靠 `p` 路径判断。
+
+结合 2026-04-05 的长思考实测，还可以补充一条当前实现层面的结论：
+
+- 长思考场景下，上游可能先给出 `INCOMPLETE` / `WIP` / `AUTO_CONTINUE` 状态，再通过 continue 链路续写；这些状态值本身不应作为正文输出。
+
+## 12. 适用边界
+
+本文是基于当前 corpus 的逆向说明，不是恒定协议。
+
+- 新模型可能增加新的 `p` 路径。
+- 新版本可能增加新的 fragment.type。
+- `CONTENT_FILTER` 的终态模板内容可能变化。
+- 自动续写相关状态（如 `INCOMPLETE` / `AUTO_CONTINUE`）当前主要来自实测与实现兼容逻辑，后续字段形态仍可能变化。
+- 解析器应当对未知字段、未知路径、未知事件保持容忍。
+
+如果你要把这份说明用于实际开发，建议同时保留原始流样本、回放脚本和回归测试，不要只依赖本文。
--- a/docs/TESTING.md
+++ b/docs/TESTING.md
@@ -0,0 +1,281 @@
+# DS2API 测试指南
+
+语言 / Language: 中文 + English（同页）
+
+## 概述 | Overview
+
+DS2API 提供两个层级的测试：
+
+| 层级 | 命令 | 说明 |
+| --- | --- | --- |
+| 单元测试（Go） | `./tests/scripts/run-unit-go.sh` | 不需要真实账号 |
+| 单元测试（Node） | `./tests/scripts/run-unit-node.sh` | 不需要真实账号 |
+| 单元测试（全部） | `./tests/scripts/run-unit-all.sh` | 不需要真实账号 |
+| 端到端测试 | `./tests/scripts/run-live.sh` | 使用真实账号执行全链路测试 |
+
+端到端测试集会录制完整的请求/响应日志，用于故障排查。
+Node 单元测试脚本会先做 `node --check` 语法门禁，再以 `--test-concurrency=1` 串行执行测试文件，减少模块级共享状态带来的干扰。
+
+---
+
+## 快速开始 | Quick Start
+
+### 单元测试 | Unit Tests
+
+```bash
+./tests/scripts/run-unit-all.sh
+```
+
+```bash
+# 或按语言拆分执行
+./tests/scripts/run-unit-go.sh
+./tests/scripts/run-unit-node.sh
+```
+
+```bash
+# 结构与流程门禁
+./tests/scripts/check-refactor-line-gate.sh
+./tests/scripts/check-node-split-syntax.sh
+
+# 发布阻断：阶段 6 手工烟测签字检查（默认读取 plans/stage6-manual-smoke.md）
+./tests/scripts/check-stage6-manual-smoke.sh
+```
+
+### 端到端测试 | End-to-End Tests
+
+```bash
+./tests/scripts/run-live.sh
+```
+
+**默认行为**：
+
+1. **Preflight 检查**：
+   - `go test ./... -count=1`（单元测试）
+   - `./tests/scripts/check-node-split-syntax.sh`（Node 拆分模块语法门禁）
+   - `node --test tests/node/stream-tool-sieve.test.js tests/node/chat-stream.test.js tests/node/js_compat_test.js`
+   - `npm run build --prefix webui`（WebUI 构建检查）
+
+2. **隔离启动**：复制 `config.json` 到临时目录，启动独立服务进程
+
+3. **场景测试**：
+   - ✅ OpenAI 非流式 / 流式
+   - ✅ Claude 非流式 / 流式
+   - ✅ Admin API（登录 / 配置 / 账号管理）
+   - ✅ Tool Calling
+   - ✅ 并发压力测试
+   - ✅ Search 模型
+
+4. **结果收集**：继续执行所有用例（不中断），写入最终汇总
+
+如果你只想跳过这些 preflight 检查，可以直接运行 `go run ./cmd/ds2api-tests --no-preflight`。
+
+---
+
+## CLI 参数 | CLI Flags
+
+```bash
+go run ./cmd/ds2api-tests \
+  --config config.json \
+  --admin-key admin \
+  --out artifacts/testsuite \
+  --port 0 \
+  --timeout 120 \
+  --retries 2 \
+  --no-preflight=false \
+  --keep 5
+```
+
+| 参数 | 说明 | 默认值 |
+| --- | --- | --- |
+| `--config` | 配置文件路径 | `config.json` |
+| `--admin-key` | Admin 密钥 | `DS2API_ADMIN_KEY` 环境变量，回退 `admin` |
+| `--out` | 产物输出根目录 | `artifacts/testsuite` |
+| `--port` | 测试服务端口（`0` = 自动分配空闲端口） | `0` |
+| `--timeout` | 单个请求超时秒数 | `120` |
+| `--retries` | 网络/5xx 请求重试次数 | `2` |
+| `--no-preflight` | 跳过 preflight 检查 | `false` |
+| `--keep` | 保留最近几次测试结果（`0` = 全部保留） | `5` |
+
+---
+
+## 自动清理 | Auto Cleanup
+
+每次测试运行完成后，程序会自动扫描输出目录（`--out`），按时间排序保留最近 `--keep` 次运行的结果，超出部分自动删除。
+
+- 默认保留 **5** 次
+- 设置 `--keep 0` 可关闭自动清理
+- 被删除的旧运行目录会打印日志提示
+
+---
+
+## 产物结构 | Artifact Layout
+
+每次运行会创建一个以运行 ID 命名的目录：
+
+```text
+artifacts/testsuite/<run_id>/
+├── summary.json          # 机器可读报告
+├── summary.md            # 人类可读报告
+├── server.log            # 测试期间服务端日志
+├── preflight.log         # Preflight 命令输出
+└── cases/
+    └── <case_id>/
+        ├── request.json      # 请求体
+        ├── response.headers  # 响应头
+        ├── response.body     # 响应体
+        ├── stream.raw        # 原始 SSE 数据（流式用例）
+        ├── assertions.json   # 断言结果
+        └── meta.json         # 元信息（耗时、状态码等）
+```
+
+---
+
+## Trace 关联 | Trace Binding
+
+每个测试请求自动注入 trace 信息，便于快速定位问题：
+
+| 位置 | 格式 |
+| --- | --- |
+| 请求头 | `X-Ds2-Test-Trace: <trace_id>` |
+| 查询参数 | `__trace_id=<trace_id>` |
+
+当用例失败时，`summary.md` 中会包含 trace ID。你可以快速搜索对应的服务端日志：
+
+```bash
+rg "<trace_id>" artifacts/testsuite/<run_id>/server.log
+```
+
+---
+
+## 退出码 | Exit Code
+
+| 退出码 | 含义 |
+| --- | --- |
+| `0` | 所有用例通过 ✅ |
+| `1` | 有用例失败 ❌ |
+
+可将测试集作为本地发布门禁使用（CI/CD 集成）。
+
+---
+
+## 安全提醒 | Sensitive Data Warning
+
+⚠️ 测试集会存储**完整的原始请求/响应载荷**用于调试。
+
+- **不要**将 artifacts 目录上传到公开仓库
+- **不要**在 Issue tracker 中分享未脱敏的 artifact 文件
+- 如需分享日志，请先手动清除敏感信息（token、密码等）
+
+---
+
+## 常见用法 | Common Usage
+
+### 仅跑单元测试
+
+```bash
+go test ./...
+```
+
+### 运行特定模块的单元测试
+
+```bash
+# 运行 tool calls 相关测试（推荐用于调试 tool call 解析问题）
+go test -v -run 'TestParseToolCalls|TestRepair' ./internal/util/
+
+# 运行单个测试用例
+go test -v -run TestParseToolCallsWithDeepSeekHallucination ./internal/util/
+
+# 运行 format 相关测试
+go test -v ./internal/format/...
+
+# 运行 adapter 相关测试
+go test -v ./internal/adapter/openai/...
+```
+
+### 调试 Tool Call 问题 | Debugging Tool Call Issues
+
+当遇到 DeepSeek 工具调用解析问题时，可以使用以下方法：
+
+```bash
+# 1. 运行 tool calls 相关的所有测试
+go test -v -run 'TestParseToolCalls|TestRepair' ./internal/util/
+
+# 2. 查看测试输出中的详细调试信息
+go test -v -run TestParseToolCallsWithDeepSeekHallucination ./internal/util/ 2>&1
+
+# 3. 检查具体测试用例的修复效果
+# 测试用例位于 internal/util/toolcalls_test.go，包含：
+# - TestParseToolCallsWithDeepSeekHallucination: DeepSeek 典型幻觉输出
+# - TestRepairLooseJSONWithNestedObjects: 嵌套对象的方括号修复
+# - TestParseToolCallsWithMixedWindowsPaths: Windows 路径处理
+```
+
+### 运行 Node.js 测试
+
+```bash
+# 运行 Node 测试
+node --test tests/node/stream-tool-sieve.test.js
+
+# 或使用脚本
+./tests/scripts/run-unit-node.sh
+```
+
+### 跑端到端测试（跳过 preflight）
+
+```bash
+go run ./cmd/ds2api-tests --no-preflight
+```
+
+### 运行原始流仿真（独立工具）
+
+```bash
+./tests/scripts/run-raw-stream-sim.sh
+```
+
+说明：
+- 该工具默认重放 `tests/raw_stream_samples/manifest.json` 声明的 canonical 样本，按上游 SSE 顺序做 1:1 仿真解析。
+- 默认校验不出现 `FINISHED` 文本泄露，并要求存在结束信号。
+- 每次运行都会把本地派生结果写入 `artifacts/raw-stream-sim/<run-id>/<sample-id>/replay.output.txt`，并输出结构化报告。
+- 如果你有历史基线目录，可以通过 `--baseline-root` 让工具直接做文本对比。
+- 更完整的协议级行为结构说明见 [DeepSeekSSE行为结构说明-2026-04-05.md](./DeepSeekSSE行为结构说明-2026-04-05.md)。
+
+### 对单个样本做回放比对
+
+```bash
+./tests/scripts/compare-raw-stream-sample.sh markdown-format-example-20260405-spacefix
+```
+
+说明：
+- 该脚本会从 raw-only 样本目录读取 `upstream.stream.sse`。
+- 回放结果会写入 `artifacts/raw-stream-sim/<run-id>/<sample-id>/`，便于直接查阅。
+- 如果传入历史基线目录，脚本会自动对比当前回放输出和基线文本。
+
+### 采集永久样本
+
+本地启动服务后，可以直接打：
+
+```bash
+POST /admin/dev/raw-samples/capture
+```
+
+这个接口会把请求元信息和上游原始流写入 `tests/raw_stream_samples/<sample-id>/`，以后可以直接拿来做回放和字段分析。派生输出会在本地回放时再生成，不再落在样本目录里。
+
+### 指定输出目录和超时
+
+```bash
+go run ./cmd/ds2api-tests \
+  --out /tmp/ds2api-test \
+  --timeout 60
+```
+
+### 在 CI 中使用
+
+```bash
+# 确保 config.json 存在且包含有效测试账号
+./tests/scripts/run-live.sh
+exit_code=$?
+if [ $exit_code -ne 0 ]; then
+  echo "Tests failed! Check artifacts for details."
+  exit 1
+fi
+```
--- a/docs/toolcall-semantics.md
+++ b/docs/toolcall-semantics.md
@@ -0,0 +1,72 @@
+# Tool call parsing semantics（Go/Node 统一语义）
+
+本文档描述当前代码中 `ParseToolCallsDetailed` / `parseToolCallsDetailed` 的**实际行为**，用于对齐 Go 与 Node Runtime。
+
+## 1) 输出结构（当前实现）
+
+- `calls`：解析得到的工具调用列表（`name` + `input`）。
+- `sawToolCallSyntax`：检测到工具调用语法特征时为 `true`（例如 `tool_calls`、`<tool_call>`、`<function_call>`、`<invoke>`、`function.name:`）。
+- `rejectedByPolicy`：当前实现固定为 `false`（预留字段，尚未启用 allow-list 拒绝）。
+- `rejectedToolNames`：当前实现固定为空数组（预留字段）。
+
+> 说明：`filterToolCallsDetailed` 当前仅做结构清洗，不做工具名策略拒绝。
+
+## 2) 解析管线
+
+1. **示例保护**：若判定为 fenced code block 示例上下文，则跳过执行型解析。
+2. **候选片段构建**：从完整文本中构建候选（原文、围绕 `tool_calls` 的 JSON 片段、首尾大括号切片等）。
+3. **按序尝试解析（命中即停）**：
+   - 对“明显 JSON 工具载荷候选”（以 `{`/`[` 开头且包含 `tool_calls`/`\"function\"`）先走 JSON 解析，避免 JSON 字符串内偶发 XML 片段误命中；
+   - 其余候选优先 XML 解析（`<tool_call>` / `<function_call>` / `<invoke>` / `tool_use` / `antml:function_call` 等）；
+   - JSON 解析（`{"tool_calls": [...]}`、列表、单对象）；
+   - Markup 解析；
+   - Text-KV 回退（如 `function.name:` + `function.arguments:`）。
+4. **兜底**：候选全部失败后，再对全文做 XML / Text-KV 回退。
+
+## 3) XML 能力边界（当前）
+
+当前已支持输入端的“多 XML/标记风格”解析，包括但不限于：
+
+- `<tool_call><tool_name>...</tool_name><parameters>...</parameters></tool_call>`
+- `<function_call>tool</function_call><function parameter name="x">...</function parameter>`
+- `<invoke name="tool"><parameter name="x">...</parameter></invoke>`
+- `antml:function_call` / `antml:argument` / `antml:parameters`
+- `tool_use` 家族标签
+
+但**输出端仍统一转换为 OpenAI 兼容 JSON 事件/对象**（`message.tool_calls`、`delta.tool_calls`、`response.function_call_arguments.*`）。
+
+## 4) 关于“是否可以封装成 XML 再喂给模型”
+
+结论：**可以做，而且当前解析器已经能兼容 XML 作为输入格式之一**，但代码里并没有 `toolcall.prefer_xml_output` 这个开关。现有可调配置只有：
+
+- `toolcall.mode`：`feature_match` / `off`
+- `toolcall.early_emit_confidence`：`high` / `low` / `off`
+
+推荐思路仍然是“输入兼容层 + 输出按客户端协议渲染”：
+
+1. **Prompt 约束层**：如果你要尝试 XML-first，可以在系统提示词里约束模型输出规范 XML tool block（例如 `<tool_calls><tool_call>...</tool_call></tool_calls>`）。
+2. **解析兼容层**：继续在 parser 中同时接受 JSON / XML / ANTML / invoke / text-kv。
+3. **协议归一层**：无论模型输出什么格式，统一落到内部 `ParsedToolCall`。
+4. **对外渲染层**：根据客户端请求协议渲染（OpenAI / Claude / Gemini 各自格式）。
+
+这样可以同时获得：
+
+- 减少模型端 JSON 转义/引号错误；
+- 不破坏现有 SDK / 客户端生态；
+- 逐步灰度（按模型、按租户、按请求开关）。
+
+## 5) 落地建议（低风险迭代）
+
+- 继续使用现有的 `toolcall.mode=feature_match` 和 `toolcall.early_emit_confidence=high` 作为默认策略。
+- 如果要试 XML-first，把它放在 prompt 层或上游模板层，不要假设代码里已有专门的 XML 输出开关。
+- 增加观测指标：
+  - `toolcall_parse_source`（json/xml/markup/textkv）；
+  - `toolcall_parse_success_rate`；
+  - `toolcall_malformed_rate`；
+  - `toolcall_repair_rate`。
+- 先在 `responses` 链路灰度，再扩展 `chat.completions`。
+
+## 6) 兼容性提醒
+
+- 上游模型若输出混合文本 + XML，仍可能出现“半结构化”噪声，需要依赖现有 sieve 增量消费策略。
+- XML 不等于安全：仍需做 tool 名、参数 schema、执行权限的服务端校验。
--- a/go.mod
+++ b/go.mod
@@ -0,0 +1,25 @@
+module ds2api
+
+go 1.26.0
+
+require (
+	github.com/andybalholm/brotli v1.2.1
+	github.com/go-chi/chi/v5 v5.2.5
+	github.com/google/uuid v1.6.0
+	github.com/refraction-networking/utls v1.8.2
+	github.com/router-for-me/CLIProxyAPI/v6 v6.9.14
+	github.com/tetratelabs/wazero v1.11.0
+)
+
+require (
+	github.com/klauspost/compress v1.18.5 // indirect
+	github.com/sirupsen/logrus v1.9.4 // indirect
+	github.com/tidwall/gjson v1.18.0 // indirect
+	github.com/tidwall/match v1.2.0 // indirect
+	github.com/tidwall/pretty v1.2.1 // indirect
+	github.com/tidwall/sjson v1.2.5 // indirect
+	golang.org/x/crypto v0.49.0 // indirect
+	golang.org/x/net v0.52.0 // indirect
+	golang.org/x/sys v0.42.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+)
--- a/go.sum
+++ b/go.sum
@@ -0,0 +1,45 @@
+github.com/andybalholm/brotli v1.2.1 h1:R+f5xP285VArJDRgowrfb9DqL18yVK0gKAW/F+eTWro=
+github.com/andybalholm/brotli v1.2.1/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/go-chi/chi/v5 v5.2.5 h1:Eg4myHZBjyvJmAFjFvWgrqDTXFyOzjj7YIm3L3mu6Ug=
+github.com/go-chi/chi/v5 v5.2.5/go.mod h1:X7Gx4mteadT3eDOMTsXzmI4/rwUpOwBHLpAfupzFJP0=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/klauspost/compress v1.18.5 h1:/h1gH5Ce+VWNLSWqPzOVn6XBO+vJbCNGvjoaGBFW2IE=
+github.com/klauspost/compress v1.18.5/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/refraction-networking/utls v1.8.2 h1:j4Q1gJj0xngdeH+Ox/qND11aEfhpgoEvV+S9iJ2IdQo=
+github.com/refraction-networking/utls v1.8.2/go.mod h1:jkSOEkLqn+S/jtpEHPOsVv/4V4EVnelwbMQl4vCWXAM=
+github.com/router-for-me/CLIProxyAPI/v6 v6.9.14 h1:XItUHrPGE9E5xTeZIPjKGmKqfEs1AZbxl1RPfO5xtrc=
+github.com/router-for-me/CLIProxyAPI/v6 v6.9.14/go.mod h1:P1jsIPFXorYGuS2N/3BlZYkpRKi/z7+oR3+1tdG0u4k=
+github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w=
+github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g=
+github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/tetratelabs/wazero v1.11.0 h1:+gKemEuKCTevU4d7ZTzlsvgd1uaToIDtlQlmNbwqYhA=
+github.com/tetratelabs/wazero v1.11.0/go.mod h1:eV28rsN8Q+xwjogd7f4/Pp4xFxO7uOGbLcD/LzB1wiU=
+github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
+github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
+github.com/tidwall/match v1.2.0 h1:0pt8FlkOwjN2fPt4bIl4BoNxb98gGHN2ObFEDkrfZnM=
+github.com/tidwall/match v1.2.0/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
+github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
+github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
+github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
+github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
+github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
+golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
+golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
+golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
+golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
+golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
+golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/internal/account/pool_acquire.go
+++ b/internal/account/pool_acquire.go
@@ -0,0 +1,99 @@
+package account
+
+import (
+	"context"
+
+	"ds2api/internal/config"
+)
+
+func (p *Pool) Acquire(target string, exclude map[string]bool) (config.Account, bool) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	return p.acquireLocked(target, normalizeExclude(exclude))
+}
+
+func (p *Pool) AcquireWait(ctx context.Context, target string, exclude map[string]bool) (config.Account, bool) {
+	if ctx == nil {
+		ctx = context.Background()
+	}
+	exclude = normalizeExclude(exclude)
+	for {
+		if ctx.Err() != nil {
+			return config.Account{}, false
+		}
+
+		p.mu.Lock()
+		if acc, ok := p.acquireLocked(target, exclude); ok {
+			p.mu.Unlock()
+			return acc, true
+		}
+		if !p.canQueueLocked(target, exclude) {
+			p.mu.Unlock()
+			return config.Account{}, false
+		}
+		waiter := make(chan struct{})
+		p.waiters = append(p.waiters, waiter)
+		p.mu.Unlock()
+
+		select {
+		case <-ctx.Done():
+			p.mu.Lock()
+			p.removeWaiterLocked(waiter)
+			p.mu.Unlock()
+			return config.Account{}, false
+		case <-waiter:
+		}
+	}
+}
+
+func (p *Pool) acquireLocked(target string, exclude map[string]bool) (config.Account, bool) {
+	if target != "" {
+		if exclude[target] || !p.canAcquireIDLocked(target) {
+			return config.Account{}, false
+		}
+		acc, ok := p.store.FindAccount(target)
+		if !ok {
+			return config.Account{}, false
+		}
+		p.inUse[target]++
+		p.bumpQueue(target)
+		return acc, true
+	}
+
+	return p.tryAcquire(exclude)
+}
+
+func (p *Pool) tryAcquire(exclude map[string]bool) (config.Account, bool) {
+	for i := 0; i < len(p.queue); i++ {
+		id := p.queue[i]
+		if exclude[id] || !p.canAcquireIDLocked(id) {
+			continue
+		}
+		acc, ok := p.store.FindAccount(id)
+		if !ok {
+			continue
+		}
+		p.inUse[id]++
+		p.bumpQueue(id)
+		return acc, true
+	}
+	return config.Account{}, false
+}
+
+func (p *Pool) bumpQueue(accountID string) {
+	for i, id := range p.queue {
+		if id != accountID {
+			continue
+		}
+		p.queue = append(p.queue[:i], p.queue[i+1:]...)
+		p.queue = append(p.queue, accountID)
+		return
+	}
+}
+
+func normalizeExclude(exclude map[string]bool) map[string]bool {
+	if exclude == nil {
+		return map[string]bool{}
+	}
+	return exclude
+}
--- a/internal/account/pool_core.go
+++ b/internal/account/pool_core.go
@@ -0,0 +1,132 @@
+package account
+
+import (
+	"sort"
+	"sync"
+
+	"ds2api/internal/config"
+)
+
+type Pool struct {
+	store                  *config.Store
+	mu                     sync.Mutex
+	queue                  []string
+	inUse                  map[string]int
+	waiters                []chan struct{}
+	maxInflightPerAccount  int
+	recommendedConcurrency int
+	maxQueueSize           int
+	globalMaxInflight      int
+}
+
+func NewPool(store *config.Store) *Pool {
+	maxPer := 2
+	if store != nil {
+		maxPer = store.RuntimeAccountMaxInflight()
+	}
+	p := &Pool{
+		store:                 store,
+		inUse:                 map[string]int{},
+		maxInflightPerAccount: maxPer,
+	}
+	p.Reset()
+	return p
+}
+
+func (p *Pool) Reset() {
+	accounts := p.store.Accounts()
+	sort.SliceStable(accounts, func(i, j int) bool {
+		iHas := accounts[i].Token != ""
+		jHas := accounts[j].Token != ""
+		if iHas == jHas {
+			return i < j
+		}
+		return iHas
+	})
+	ids := make([]string, 0, len(accounts))
+	for _, a := range accounts {
+		id := a.Identifier()
+		if id != "" {
+			ids = append(ids, id)
+		}
+	}
+	if p.store != nil {
+		p.maxInflightPerAccount = p.store.RuntimeAccountMaxInflight()
+	} else {
+		p.maxInflightPerAccount = maxInflightFromEnv()
+	}
+	recommended := defaultRecommendedConcurrency(len(ids), p.maxInflightPerAccount)
+	queueLimit := maxQueueFromEnv(recommended)
+	globalLimit := recommended
+	if p.store != nil {
+		queueLimit = p.store.RuntimeAccountMaxQueue(recommended)
+		globalLimit = p.store.RuntimeGlobalMaxInflight(recommended)
+	}
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	p.drainWaitersLocked()
+	p.queue = ids
+	p.inUse = map[string]int{}
+	p.recommendedConcurrency = recommended
+	p.maxQueueSize = queueLimit
+	p.globalMaxInflight = globalLimit
+	config.Logger.Info(
+		"[init_account_queue] initialized",
+		"total", len(ids),
+		"max_inflight_per_account", p.maxInflightPerAccount,
+		"global_max_inflight", p.globalMaxInflight,
+		"recommended_concurrency", p.recommendedConcurrency,
+		"max_queue_size", p.maxQueueSize,
+	)
+}
+
+func (p *Pool) Release(accountID string) {
+	if accountID == "" {
+		return
+	}
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	count := p.inUse[accountID]
+	if count <= 0 {
+		return
+	}
+	if count == 1 {
+		delete(p.inUse, accountID)
+		p.notifyWaiterLocked()
+		return
+	}
+	p.inUse[accountID] = count - 1
+	p.notifyWaiterLocked()
+}
+
+func (p *Pool) Status() map[string]any {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	available := make([]string, 0, len(p.queue))
+	inUseAccounts := make([]string, 0, len(p.inUse))
+	inUseSlots := 0
+	for _, id := range p.queue {
+		if p.inUse[id] < p.maxInflightPerAccount {
+			available = append(available, id)
+		}
+	}
+	for id, count := range p.inUse {
+		if count > 0 {
+			inUseAccounts = append(inUseAccounts, id)
+			inUseSlots += count
+		}
+	}
+	sort.Strings(inUseAccounts)
+	return map[string]any{
+		"available":                len(available),
+		"in_use":                   inUseSlots,
+		"total":                    len(p.store.Accounts()),
+		"available_accounts":       available,
+		"in_use_accounts":          inUseAccounts,
+		"max_inflight_per_account": p.maxInflightPerAccount,
+		"global_max_inflight":      p.globalMaxInflight,
+		"recommended_concurrency":  p.recommendedConcurrency,
+		"waiting":                  len(p.waiters),
+		"max_queue_size":           p.maxQueueSize,
+	}
+}
--- a/internal/account/pool_edge_test.go
+++ b/internal/account/pool_edge_test.go
@@ -0,0 +1,232 @@
+package account
+
+import (
+	"context"
+	"sync"
+	"testing"
+	"time"
+
+	"ds2api/internal/config"
+)
+
+// ─── Pool edge cases ─────────────────────────────────────────────────
+
+func TestPoolEmptyNoAccounts(t *testing.T) {
+	t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "2")
+	t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "")
+	t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[]}`)
+	pool := NewPool(config.LoadStore())
+	if _, ok := pool.Acquire("", nil); ok {
+		t.Fatal("expected acquire to fail with no accounts")
+	}
+	status := pool.Status()
+	if total, ok := status["total"].(int); !ok || total != 0 {
+		t.Fatalf("unexpected total: %#v", status["total"])
+	}
+}
+
+func TestPoolReleaseNonExistentAccount(t *testing.T) {
+	pool := newPoolForTest(t, "2")
+	pool.Release("nonexistent@example.com") // should not panic
+}
+
+func TestPoolReleaseAlreadyReleased(t *testing.T) {
+	pool := newPoolForTest(t, "2")
+	acc, ok := pool.Acquire("", nil)
+	if !ok {
+		t.Fatal("expected acquire success")
+	}
+	pool.Release(acc.Identifier())
+	pool.Release(acc.Identifier()) // double release should not panic
+}
+
+func TestPoolAcquireTargetNotFound(t *testing.T) {
+	pool := newPoolForTest(t, "2")
+	if _, ok := pool.Acquire("nonexistent@example.com", nil); ok {
+		t.Fatal("expected acquire to fail for non-existent target")
+	}
+}
+
+func TestPoolAcquireWithExclusionList(t *testing.T) {
+	pool := newPoolForTest(t, "2")
+	acc, ok := pool.Acquire("", map[string]bool{"acc1@example.com": true})
+	if !ok {
+		t.Fatal("expected acquire success with exclusion")
+	}
+	if acc.Identifier() != "acc2@example.com" {
+		t.Fatalf("expected acc2 when acc1 excluded, got %q", acc.Identifier())
+	}
+	pool.Release(acc.Identifier())
+}
+
+func TestPoolAcquireAllExcluded(t *testing.T) {
+	pool := newPoolForTest(t, "2")
+	if _, ok := pool.Acquire("", map[string]bool{
+		"acc1@example.com": true,
+		"acc2@example.com": true,
+	}); ok {
+		t.Fatal("expected acquire to fail when all accounts excluded")
+	}
+}
+
+func TestPoolStatusFields(t *testing.T) {
+	pool := newPoolForTest(t, "2")
+	status := pool.Status()
+
+	// Check all expected fields are present
+	for _, key := range []string{"total", "available", "max_inflight_per_account", "recommended_concurrency", "available_accounts", "in_use_accounts", "waiting", "max_queue_size"} {
+		if _, ok := status[key]; !ok {
+			t.Fatalf("missing status field: %s", key)
+		}
+	}
+}
+
+func TestPoolStatusAccountDetails(t *testing.T) {
+	pool := newPoolForTest(t, "2")
+	acc, _ := pool.Acquire("acc1@example.com", nil)
+
+	status := pool.Status()
+	inUseAccounts, ok := status["in_use_accounts"].([]string)
+	if !ok {
+		t.Fatalf("unexpected in_use_accounts type: %T", status["in_use_accounts"])
+	}
+	found := false
+	for _, id := range inUseAccounts {
+		if id == "acc1@example.com" {
+			found = true
+			break
+		}
+	}
+	if !found {
+		t.Fatalf("expected acc1 in in_use_accounts, got %v", inUseAccounts)
+	}
+	if status["in_use"] != 1 {
+		t.Fatalf("expected 1 in_use, got %v", status["in_use"])
+	}
+
+	pool.Release(acc.Identifier())
+}
+
+func TestPoolAcquireWaitContextCancelled(t *testing.T) {
+	pool := newSingleAccountPoolForTest(t, "1")
+	// Exhaust the pool
+	first, ok := pool.Acquire("", nil)
+	if !ok {
+		t.Fatal("expected first acquire to succeed")
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+	var waitOK bool
+	go func() {
+		defer wg.Done()
+		_, waitOK = pool.AcquireWait(ctx, "", nil)
+	}()
+
+	// Wait until queued
+	waitForWaitingCount(t, pool, 1)
+
+	// Cancel context
+	cancel()
+
+	wg.Wait()
+	if waitOK {
+		t.Fatal("expected acquire to fail after context cancellation")
+	}
+
+	pool.Release(first.Identifier())
+}
+
+func TestPoolAcquireWaitTargetAccount(t *testing.T) {
+	pool := newPoolForTest(t, "1")
+	// Exhaust acc1
+	acc1, ok := pool.Acquire("acc1@example.com", nil)
+	if !ok {
+		t.Fatal("expected acquire acc1 success")
+	}
+
+	// Acquire acc2 directly (should succeed since acc2 is free)
+	ctx := context.Background()
+	acc2, ok := pool.AcquireWait(ctx, "acc2@example.com", nil)
+	if !ok {
+		t.Fatal("expected acquire acc2 success via AcquireWait")
+	}
+	if acc2.Identifier() != "acc2@example.com" {
+		t.Fatalf("expected acc2, got %q", acc2.Identifier())
+	}
+
+	pool.Release(acc1.Identifier())
+	pool.Release(acc2.Identifier())
+}
+
+func TestPoolMaxQueueSizeOverride(t *testing.T) {
+	t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "1")
+	t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "5")
+	t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[{"email":"acc1@example.com","token":"t1"}]}`)
+	pool := NewPool(config.LoadStore())
+	status := pool.Status()
+	if got, ok := status["max_queue_size"].(int); !ok || got != 5 {
+		t.Fatalf("expected max_queue_size=5, got %#v", status["max_queue_size"])
+	}
+}
+
+func TestPoolMultipleAcquireReleaseCycles(t *testing.T) {
+	pool := newSingleAccountPoolForTest(t, "1")
+	for i := 0; i < 10; i++ {
+		acc, ok := pool.Acquire("", nil)
+		if !ok {
+			t.Fatalf("acquire failed at cycle %d", i)
+		}
+		pool.Release(acc.Identifier())
+	}
+}
+
+func TestPoolConcurrentAcquireWait(t *testing.T) {
+	pool := newSingleAccountPoolForTest(t, "1")
+	first, ok := pool.Acquire("", nil)
+	if !ok {
+		t.Fatal("expected first acquire success")
+	}
+
+	const waiters = 3
+	results := make(chan bool, waiters)
+
+	for i := 0; i < waiters; i++ {
+		go func() {
+			ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
+			defer cancel()
+			_, ok := pool.AcquireWait(ctx, "", nil)
+			results <- ok
+		}()
+	}
+
+	// Wait for all to be queued (only 1 can queue)
+	time.Sleep(50 * time.Millisecond)
+
+	// Release and allow queued requests to proceed
+	pool.Release(first.Identifier())
+
+	successCount := 0
+	timeoutCount := 0
+	for i := 0; i < waiters; i++ {
+		select {
+		case ok := <-results:
+			if ok {
+				successCount++
+				// Release for next waiter
+				pool.Release("acc1@example.com")
+			} else {
+				timeoutCount++
+			}
+		case <-time.After(3 * time.Second):
+			t.Fatal("timed out waiting for results")
+		}
+	}
+
+	// At least 1 should succeed; 2 may fail due to queue limit
+	if successCount < 1 {
+		t.Fatalf("expected at least 1 success, got success=%d timeout=%d", successCount, timeoutCount)
+	}
+}
--- a/internal/account/pool_limits.go
+++ b/internal/account/pool_limits.go
@@ -0,0 +1,81 @@
+package account
+
+import (
+	"os"
+	"strconv"
+	"strings"
+)
+
+func (p *Pool) ApplyRuntimeLimits(maxInflightPerAccount, maxQueueSize, globalMaxInflight int) {
+	if maxInflightPerAccount <= 0 {
+		maxInflightPerAccount = 1
+	}
+	if maxQueueSize < 0 {
+		maxQueueSize = 0
+	}
+	if globalMaxInflight <= 0 {
+		globalMaxInflight = maxInflightPerAccount * len(p.store.Accounts())
+		if globalMaxInflight <= 0 {
+			globalMaxInflight = maxInflightPerAccount
+		}
+	}
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	p.maxInflightPerAccount = maxInflightPerAccount
+	p.maxQueueSize = maxQueueSize
+	p.globalMaxInflight = globalMaxInflight
+	p.recommendedConcurrency = defaultRecommendedConcurrency(len(p.queue), p.maxInflightPerAccount)
+	p.notifyWaiterLocked()
+}
+
+func maxInflightFromEnv() int {
+	if raw := strings.TrimSpace(os.Getenv("DS2API_ACCOUNT_MAX_INFLIGHT")); raw != "" {
+		if n, err := strconv.Atoi(raw); err == nil && n > 0 {
+			return n
+		}
+	}
+	return 2
+}
+
+func defaultRecommendedConcurrency(accountCount, maxInflightPerAccount int) int {
+	if accountCount <= 0 {
+		return 0
+	}
+	if maxInflightPerAccount <= 0 {
+		maxInflightPerAccount = 2
+	}
+	return accountCount * maxInflightPerAccount
+}
+
+func maxQueueFromEnv(defaultSize int) int {
+	if raw := strings.TrimSpace(os.Getenv("DS2API_ACCOUNT_MAX_QUEUE")); raw != "" {
+		if n, err := strconv.Atoi(raw); err == nil && n >= 0 {
+			return n
+		}
+	}
+	if defaultSize < 0 {
+		return 0
+	}
+	return defaultSize
+}
+
+func (p *Pool) canAcquireIDLocked(accountID string) bool {
+	if accountID == "" {
+		return false
+	}
+	if p.inUse[accountID] >= p.maxInflightPerAccount {
+		return false
+	}
+	if p.globalMaxInflight > 0 && p.currentInUseLocked() >= p.globalMaxInflight {
+		return false
+	}
+	return true
+}
+
+func (p *Pool) currentInUseLocked() int {
+	total := 0
+	for _, n := range p.inUse {
+		total += n
+	}
+	return total
+}
--- a/internal/account/pool_test.go
+++ b/internal/account/pool_test.go
@@ -0,0 +1,313 @@
+package account
+
+import (
+	"context"
+	"sync"
+	"testing"
+	"time"
+
+	"ds2api/internal/config"
+)
+
+func newPoolForTest(t *testing.T, maxInflight string) *Pool {
+	t.Helper()
+	t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", maxInflight)
+	t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "")
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["k1"],
+		"accounts":[
+			{"email":"acc1@example.com","token":"token1"},
+			{"email":"acc2@example.com","token":"token2"}
+		]
+	}`)
+	store := config.LoadStore()
+	return NewPool(store)
+}
+
+func newSingleAccountPoolForTest(t *testing.T, maxInflight string) *Pool {
+	t.Helper()
+	t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", maxInflight)
+	t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "")
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["k1"],
+		"accounts":[{"email":"acc1@example.com","token":"token1"}]
+	}`)
+	return NewPool(config.LoadStore())
+}
+
+func waitForWaitingCount(t *testing.T, pool *Pool, want int) {
+	t.Helper()
+	deadline := time.Now().Add(800 * time.Millisecond)
+	for time.Now().Before(deadline) {
+		status := pool.Status()
+		if got, ok := status["waiting"].(int); ok && got == want {
+			return
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+	status := pool.Status()
+	t.Fatalf("waiting count did not reach %d, current status=%v", want, status)
+}
+
+func TestPoolRoundRobinWithConcurrentSlots(t *testing.T) {
+	pool := newPoolForTest(t, "2")
+
+	order := make([]string, 0, 4)
+	for i := 0; i < 4; i++ {
+		acc, ok := pool.Acquire("", nil)
+		if !ok {
+			t.Fatalf("expected acquire success at step %d", i+1)
+		}
+		order = append(order, acc.Identifier())
+	}
+	want := []string{"acc1@example.com", "acc2@example.com", "acc1@example.com", "acc2@example.com"}
+	for i := range want {
+		if order[i] != want[i] {
+			t.Fatalf("unexpected order at %d: got %q want %q (full=%v)", i, order[i], want[i], order)
+		}
+	}
+
+	if _, ok := pool.Acquire("", nil); ok {
+		t.Fatalf("expected acquire to fail when all inflight slots are occupied")
+	}
+
+	pool.Release("acc1@example.com")
+	acc, ok := pool.Acquire("", nil)
+	if !ok || acc.Identifier() != "acc1@example.com" {
+		t.Fatalf("expected reacquire acc1 after releasing one slot, got ok=%v id=%q", ok, acc.Identifier())
+	}
+}
+
+func TestPoolTargetAccountInflightLimit(t *testing.T) {
+	pool := newPoolForTest(t, "2")
+
+	for i := 0; i < 2; i++ {
+		if _, ok := pool.Acquire("acc1@example.com", nil); !ok {
+			t.Fatalf("expected target acquire success at step %d", i+1)
+		}
+	}
+	if _, ok := pool.Acquire("acc1@example.com", nil); ok {
+		t.Fatalf("expected third acquire on same target to fail due to inflight limit")
+	}
+}
+
+func TestPoolConcurrentAcquireDistribution(t *testing.T) {
+	pool := newPoolForTest(t, "2")
+
+	start := make(chan struct{})
+	results := make(chan string, 6)
+	var wg sync.WaitGroup
+	for i := 0; i < 6; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			<-start
+			acc, ok := pool.Acquire("", nil)
+			if !ok {
+				results <- "FAIL"
+				return
+			}
+			results <- acc.Identifier()
+		}()
+	}
+
+	close(start)
+	wg.Wait()
+	close(results)
+
+	success := 0
+	fail := 0
+	perAccount := map[string]int{}
+	for id := range results {
+		if id == "FAIL" {
+			fail++
+			continue
+		}
+		success++
+		perAccount[id]++
+	}
+	if success != 4 || fail != 2 {
+		t.Fatalf("unexpected concurrent acquire result: success=%d fail=%d perAccount=%v", success, fail, perAccount)
+	}
+	for id, n := range perAccount {
+		if n > 2 {
+			t.Fatalf("account %s exceeded inflight limit: %d", id, n)
+		}
+	}
+}
+
+func TestPoolStatusRecommendedConcurrencyDefault(t *testing.T) {
+	pool := newPoolForTest(t, "")
+	status := pool.Status()
+
+	if got, ok := status["max_inflight_per_account"].(int); !ok || got != 2 {
+		t.Fatalf("unexpected max_inflight_per_account: %#v", status["max_inflight_per_account"])
+	}
+	if got, ok := status["recommended_concurrency"].(int); !ok || got != 4 {
+		t.Fatalf("unexpected recommended_concurrency: %#v", status["recommended_concurrency"])
+	}
+	if got, ok := status["max_queue_size"].(int); !ok || got != 4 {
+		t.Fatalf("unexpected max_queue_size: %#v", status["max_queue_size"])
+	}
+}
+
+func TestPoolStatusRecommendedConcurrencyRespectsOverride(t *testing.T) {
+	pool := newPoolForTest(t, "3")
+	status := pool.Status()
+
+	if got, ok := status["max_inflight_per_account"].(int); !ok || got != 3 {
+		t.Fatalf("unexpected max_inflight_per_account: %#v", status["max_inflight_per_account"])
+	}
+	if got, ok := status["recommended_concurrency"].(int); !ok || got != 6 {
+		t.Fatalf("unexpected recommended_concurrency: %#v", status["recommended_concurrency"])
+	}
+	if got, ok := status["max_queue_size"].(int); !ok || got != 6 {
+		t.Fatalf("unexpected max_queue_size: %#v", status["max_queue_size"])
+	}
+}
+
+func TestPoolGlobalMaxInflightEnv(t *testing.T) {
+	t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "1")
+	t.Setenv("DS2API_GLOBAL_MAX_INFLIGHT", "4")
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["k1"],
+		"accounts":[
+			{"email":"acc1@example.com","token":"token1"},
+			{"email":"acc2@example.com","token":"token2"}
+		]
+	}`)
+
+	pool := NewPool(config.LoadStore())
+	status := pool.Status()
+	if got, ok := status["global_max_inflight"].(int); !ok || got != 4 {
+		t.Fatalf("unexpected global_max_inflight: %#v", status["global_max_inflight"])
+	}
+	if got, ok := status["max_inflight_per_account"].(int); !ok || got != 1 {
+		t.Fatalf("unexpected max_inflight_per_account: %#v", status["max_inflight_per_account"])
+	}
+	if got, ok := status["recommended_concurrency"].(int); !ok || got != 2 {
+		t.Fatalf("unexpected recommended_concurrency: %#v", status["recommended_concurrency"])
+	}
+}
+
+func TestPoolDropsLegacyTokenOnlyAccountOnLoad(t *testing.T) {
+	t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "1")
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["k1"],
+		"accounts":[{"token":"token-only-account"}]
+	}`)
+
+	pool := NewPool(config.LoadStore())
+	status := pool.Status()
+	if got, ok := status["total"].(int); !ok || got != 0 {
+		t.Fatalf("unexpected total in pool status: %#v", status["total"])
+	}
+	if got, ok := status["available"].(int); !ok || got != 0 {
+		t.Fatalf("unexpected available in pool status: %#v", status["available"])
+	}
+
+	if _, ok := pool.Acquire("", nil); ok {
+		t.Fatalf("expected acquire to fail for token-only account")
+	}
+}
+
+func TestPoolAcquireRotatesIntoTokenlessAccounts(t *testing.T) {
+	t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "1")
+	t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "")
+	t.Setenv("DS2API_CONFIG_JSON", `{
+		"keys":["k1"],
+		"accounts":[
+			{"email":"acc1@example.com","token":"token1"},
+			{"email":"acc2@example.com","token":""},
+			{"email":"acc3@example.com","token":""}
+		]
+	}`)
+
+	pool := NewPool(config.LoadStore())
+	for i, want := range []string{"acc1@example.com", "acc2@example.com", "acc3@example.com"} {
+		acc, ok := pool.Acquire("", nil)
+		if !ok {
+			t.Fatalf("expected acquire success at step %d", i+1)
+		}
+		if got := acc.Identifier(); got != want {
+			t.Fatalf("unexpected account at step %d: got %q want %q", i+1, got, want)
+		}
+		pool.Release(acc.Identifier())
+	}
+}
+
+func TestPoolAcquireWaitQueuesAndSucceedsAfterRelease(t *testing.T) {
+	pool := newSingleAccountPoolForTest(t, "1")
+	first, ok := pool.Acquire("", nil)
+	if !ok {
+		t.Fatal("expected first acquire to succeed")
+	}
+
+	type result struct {
+		id string
+		ok bool
+	}
+	resCh := make(chan result, 1)
+	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
+	defer cancel()
+	go func() {
+		acc, ok := pool.AcquireWait(ctx, "", nil)
+		resCh <- result{id: acc.Identifier(), ok: ok}
+	}()
+
+	waitForWaitingCount(t, pool, 1)
+	pool.Release(first.Identifier())
+
+	select {
+	case res := <-resCh:
+		if !res.ok {
+			t.Fatal("expected queued acquire to succeed after release")
+		}
+		if res.id != "acc1@example.com" {
+			t.Fatalf("unexpected account id from queued acquire: %q", res.id)
+		}
+	case <-time.After(time.Second):
+		t.Fatal("timed out waiting for queued acquire result")
+	}
+}
+
+func TestPoolAcquireWaitQueueLimitReturnsFalse(t *testing.T) {
+	pool := newSingleAccountPoolForTest(t, "1")
+	first, ok := pool.Acquire("", nil)
+	if !ok {
+		t.Fatal("expected first acquire to succeed")
+	}
+
+	type result struct {
+		id string
+		ok bool
+	}
+	firstWaiter := make(chan result, 1)
+	ctx1, cancel1 := context.WithTimeout(context.Background(), 1200*time.Millisecond)
+	defer cancel1()
+	go func() {
+		acc, ok := pool.AcquireWait(ctx1, "", nil)
+		firstWaiter <- result{id: acc.Identifier(), ok: ok}
+	}()
+	waitForWaitingCount(t, pool, 1)
+
+	ctx2, cancel2 := context.WithTimeout(context.Background(), 500*time.Millisecond)
+	defer cancel2()
+	start := time.Now()
+	if _, ok := pool.AcquireWait(ctx2, "", nil); ok {
+		t.Fatal("expected second queued acquire to fail when queue is full")
+	}
+	if time.Since(start) > 120*time.Millisecond {
+		t.Fatalf("queue-full acquire should fail fast, took %s", time.Since(start))
+	}
+
+	pool.Release(first.Identifier())
+	select {
+	case res := <-firstWaiter:
+		if !res.ok {
+			t.Fatal("expected first queued acquire to succeed after release")
+		}
+	case <-time.After(time.Second):
+		t.Fatal("timed out waiting for first queued acquire")
+	}
+}
--- a/internal/account/pool_waiters.go
+++ b/internal/account/pool_waiters.go
@@ -0,0 +1,43 @@
+package account
+
+func (p *Pool) canQueueLocked(target string, exclude map[string]bool) bool {
+	if target != "" {
+		if exclude[target] {
+			return false
+		}
+		if _, ok := p.store.FindAccount(target); !ok {
+			return false
+		}
+	}
+	if p.maxQueueSize <= 0 {
+		return false
+	}
+	return len(p.waiters) < p.maxQueueSize
+}
+
+func (p *Pool) notifyWaiterLocked() {
+	if len(p.waiters) == 0 {
+		return
+	}
+	waiter := p.waiters[0]
+	p.waiters = p.waiters[1:]
+	close(waiter)
+}
+
+func (p *Pool) removeWaiterLocked(waiter chan struct{}) bool {
+	for i, w := range p.waiters {
+		if w != waiter {
+			continue
+		}
+		p.waiters = append(p.waiters[:i], p.waiters[i+1:]...)
+		return true
+	}
+	return false
+}
+
+func (p *Pool) drainWaitersLocked() {
+	for _, waiter := range p.waiters {
+		close(waiter)
+	}
+	p.waiters = nil
+}
--- a/internal/adapter/claude/convert.go
+++ b/internal/adapter/claude/convert.go
@@ -0,0 +1,11 @@
+package claude
+
+import (
+	"ds2api/internal/claudeconv"
+)
+
+const defaultClaudeModel = "claude-sonnet-4-5"
+
+func convertClaudeToDeepSeek(claudeReq map[string]any, store ConfigReader) map[string]any {
+	return claudeconv.ConvertClaudeToDeepSeek(claudeReq, store, defaultClaudeModel)
+}
--- a/internal/adapter/claude/deps.go
+++ b/internal/adapter/claude/deps.go
@@ -0,0 +1,34 @@
+package claude
+
+import (
+	"context"
+	"net/http"
+
+	"ds2api/internal/auth"
+	"ds2api/internal/config"
+	"ds2api/internal/deepseek"
+)
+
+type AuthResolver interface {
+	Determine(req *http.Request) (*auth.RequestAuth, error)
+	Release(a *auth.RequestAuth)
+}
+
+type DeepSeekCaller interface {
+	CreateSession(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
+	GetPow(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
+	CallCompletion(ctx context.Context, a *auth.RequestAuth, payload map[string]any, powResp string, maxAttempts int) (*http.Response, error)
+}
+
+type ConfigReader interface {
+	ClaudeMapping() map[string]string
+	CompatStripReferenceMarkers() bool
+}
+
+type OpenAIChatRunner interface {
+	ChatCompletions(w http.ResponseWriter, r *http.Request)
+}
+
+var _ AuthResolver = (*auth.Resolver)(nil)
+var _ DeepSeekCaller = (*deepseek.Client)(nil)
+var _ ConfigReader = (*config.Store)(nil)
--- a/internal/adapter/claude/deps_injection_test.go
+++ b/internal/adapter/claude/deps_injection_test.go
@@ -0,0 +1,34 @@
+package claude
+
+import "testing"
+
+type mockClaudeConfig struct {
+	m map[string]string
+}
+
+func (m mockClaudeConfig) ClaudeMapping() map[string]string { return m.m }
+func (mockClaudeConfig) CompatStripReferenceMarkers() bool  { return true }
+
+func TestNormalizeClaudeRequestUsesConfigInterfaceMapping(t *testing.T) {
+	req := map[string]any{
+		"model": "claude-opus-4-6",
+		"messages": []any{
+			map[string]any{"role": "user", "content": "hello"},
+		},
+	}
+	out, err := normalizeClaudeRequest(mockClaudeConfig{
+		m: map[string]string{
+			"fast": "deepseek-chat",
+			"slow": "deepseek-reasoner-search",
+		},
+	}, req)
+	if err != nil {
+		t.Fatalf("normalizeClaudeRequest error: %v", err)
+	}
+	if out.Standard.ResolvedModel != "deepseek-reasoner-search" {
+		t.Fatalf("resolved model mismatch: got=%q", out.Standard.ResolvedModel)
+	}
+	if !out.Standard.Thinking || !out.Standard.Search {
+		t.Fatalf("unexpected flags: thinking=%v search=%v", out.Standard.Thinking, out.Standard.Search)
+	}
+}
--- a/internal/adapter/claude/error_shape_test.go
+++ b/internal/adapter/claude/error_shape_test.go
@@ -0,0 +1,34 @@
+package claude
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+)
+
+func TestWriteClaudeErrorIncludesUnifiedFields(t *testing.T) {
+	rec := httptest.NewRecorder()
+	writeClaudeError(rec, http.StatusUnauthorized, "bad token")
+	if rec.Code != http.StatusUnauthorized {
+		t.Fatalf("expected 401, got %d", rec.Code)
+	}
+
+	var body map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil {
+		t.Fatalf("decode body: %v", err)
+	}
+	errObj, _ := body["error"].(map[string]any)
+	if errObj["message"] != "bad token" {
+		t.Fatalf("unexpected message: %v", errObj["message"])
+	}
+	if errObj["type"] != "invalid_request_error" {
+		t.Fatalf("unexpected type: %v", errObj["type"])
+	}
+	if errObj["code"] != "authentication_failed" {
+		t.Fatalf("unexpected code: %v", errObj["code"])
+	}
+	if _, ok := errObj["param"]; !ok {
+		t.Fatal("expected param field")
+	}
+}
--- a/internal/adapter/claude/handler_errors.go
+++ b/internal/adapter/claude/handler_errors.go
@@ -0,0 +1,25 @@
+package claude
+
+import "net/http"
+
+func writeClaudeError(w http.ResponseWriter, status int, message string) {
+	code := "invalid_request"
+	switch status {
+	case http.StatusUnauthorized:
+		code = "authentication_failed"
+	case http.StatusTooManyRequests:
+		code = "rate_limit_exceeded"
+	case http.StatusNotFound:
+		code = "not_found"
+	case http.StatusInternalServerError:
+		code = "internal_error"
+	}
+	writeJSON(w, status, map[string]any{
+		"error": map[string]any{
+			"type":    "invalid_request_error",
+			"message": message,
+			"code":    code,
+			"param":   nil,
+		},
+	})
+}
--- a/internal/adapter/claude/handler_helpers_misc.go
+++ b/internal/adapter/claude/handler_helpers_misc.go
@@ -0,0 +1,97 @@
+package claude
+
+import (
+	"fmt"
+	"strings"
+)
+
+func hasSystemMessage(messages []any) bool {
+	for _, m := range messages {
+		msg, ok := m.(map[string]any)
+		if ok && msg["role"] == "system" {
+			return true
+		}
+	}
+	return false
+}
+
+func extractClaudeToolNames(tools []any) []string {
+	out := make([]string, 0, len(tools))
+	for _, t := range tools {
+		m, ok := t.(map[string]any)
+		if !ok {
+			continue
+		}
+		name, _, _ := extractClaudeToolMeta(m)
+		if name != "" {
+			out = append(out, name)
+		}
+	}
+	return out
+}
+
+func extractClaudeToolMeta(m map[string]any) (string, string, any) {
+	name, _ := m["name"].(string)
+	desc, _ := m["description"].(string)
+	schemaObj := m["input_schema"]
+	if schemaObj == nil {
+		schemaObj = m["parameters"]
+	}
+
+	if fn, ok := m["function"].(map[string]any); ok {
+		if strings.TrimSpace(name) == "" {
+			name, _ = fn["name"].(string)
+		}
+		if strings.TrimSpace(desc) == "" {
+			desc, _ = fn["description"].(string)
+		}
+		if schemaObj == nil {
+			if v, ok := fn["input_schema"]; ok {
+				schemaObj = v
+			}
+		}
+		if schemaObj == nil {
+			if v, ok := fn["parameters"]; ok {
+				schemaObj = v
+			}
+		}
+	}
+	return strings.TrimSpace(name), strings.TrimSpace(desc), schemaObj
+}
+
+func toMessageMaps(v any) []map[string]any {
+	arr, ok := v.([]any)
+	if !ok {
+		return nil
+	}
+	out := make([]map[string]any, 0, len(arr))
+	for _, item := range arr {
+		if m, ok := item.(map[string]any); ok {
+			out = append(out, m)
+		}
+	}
+	return out
+}
+
+func extractMessageContent(v any) string {
+	switch x := v.(type) {
+	case string:
+		return x
+	case []any:
+		parts := make([]string, 0, len(x))
+		for _, it := range x {
+			parts = append(parts, fmt.Sprintf("%v", it))
+		}
+		return strings.Join(parts, "\n")
+	default:
+		return fmt.Sprintf("%v", x)
+	}
+}
+
+func cloneMap(in map[string]any) map[string]any {
+	out := make(map[string]any, len(in))
+	for k, v := range in {
+		out[k] = v
+	}
+	return out
+}
--- a/internal/adapter/claude/handler_messages.go
+++ b/internal/adapter/claude/handler_messages.go
@@ -0,0 +1,176 @@
+package claude
+
+import (
+	"bytes"
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+
+	"ds2api/internal/config"
+	streamengine "ds2api/internal/stream"
+	"ds2api/internal/translatorcliproxy"
+	"ds2api/internal/util"
+
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
+)
+
+func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) {
+	if strings.TrimSpace(r.Header.Get("anthropic-version")) == "" {
+		r.Header.Set("anthropic-version", "2023-06-01")
+	}
+	if h.OpenAI == nil {
+		writeClaudeError(w, http.StatusInternalServerError, "OpenAI proxy backend unavailable.")
+		return
+	}
+	if h.proxyViaOpenAI(w, r, h.Store) {
+		return
+	}
+	writeClaudeError(w, http.StatusBadGateway, "Failed to proxy Claude request.")
+}
+
+func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, store ConfigReader) bool {
+	raw, err := io.ReadAll(r.Body)
+	if err != nil {
+		writeClaudeError(w, http.StatusBadRequest, "invalid body")
+		return true
+	}
+	var req map[string]any
+	if err := json.Unmarshal(raw, &req); err != nil {
+		writeClaudeError(w, http.StatusBadRequest, "invalid json")
+		return true
+	}
+	model, _ := req["model"].(string)
+	stream := util.ToBool(req["stream"])
+
+	// Preserve claude_mapping (fast/slow/opus routing) while proxying via OpenAI.
+	translateModel := model
+	if store != nil {
+		if norm, normErr := normalizeClaudeRequest(store, cloneMap(req)); normErr == nil && strings.TrimSpace(norm.Standard.ResolvedModel) != "" {
+			translateModel = strings.TrimSpace(norm.Standard.ResolvedModel)
+		}
+	}
+	translatedReq := translatorcliproxy.ToOpenAI(sdktranslator.FormatClaude, translateModel, raw, stream)
+
+	isVercelPrepare := strings.TrimSpace(r.URL.Query().Get("__stream_prepare")) == "1"
+	isVercelRelease := strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1"
+
+	if isVercelRelease {
+		proxyReq := r.Clone(r.Context())
+		proxyReq.URL.Path = "/v1/chat/completions"
+		proxyReq.Body = io.NopCloser(bytes.NewReader(raw))
+		proxyReq.ContentLength = int64(len(raw))
+		rec := httptest.NewRecorder()
+		h.OpenAI.ChatCompletions(rec, proxyReq)
+		res := rec.Result()
+		defer res.Body.Close()
+		body, _ := io.ReadAll(res.Body)
+		for k, vv := range res.Header {
+			for _, v := range vv {
+				w.Header().Add(k, v)
+			}
+		}
+		w.WriteHeader(res.StatusCode)
+		_, _ = w.Write(body)
+		return true
+	}
+
+	proxyReq := r.Clone(r.Context())
+	proxyReq.URL.Path = "/v1/chat/completions"
+	proxyReq.Body = io.NopCloser(bytes.NewReader(translatedReq))
+	proxyReq.ContentLength = int64(len(translatedReq))
+
+	if stream && !isVercelPrepare {
+		w.Header().Set("Content-Type", "text/event-stream")
+		w.Header().Set("Cache-Control", "no-cache, no-transform")
+		w.Header().Set("Connection", "keep-alive")
+		w.Header().Set("X-Accel-Buffering", "no")
+		streamWriter := translatorcliproxy.NewOpenAIStreamTranslatorWriter(w, sdktranslator.FormatClaude, model, raw, translatedReq)
+		h.OpenAI.ChatCompletions(streamWriter, proxyReq)
+		return true
+	}
+
+	rec := httptest.NewRecorder()
+	h.OpenAI.ChatCompletions(rec, proxyReq)
+	res := rec.Result()
+	defer res.Body.Close()
+	body, _ := io.ReadAll(res.Body)
+	if res.StatusCode < 200 || res.StatusCode >= 300 {
+		for k, vv := range res.Header {
+			for _, v := range vv {
+				w.Header().Add(k, v)
+			}
+		}
+		w.WriteHeader(res.StatusCode)
+		_, _ = w.Write(body)
+		return true
+	}
+	if isVercelPrepare {
+		for k, vv := range res.Header {
+			for _, v := range vv {
+				w.Header().Add(k, v)
+			}
+		}
+		w.WriteHeader(res.StatusCode)
+		_, _ = w.Write(body)
+		return true
+	}
+	converted := translatorcliproxy.FromOpenAINonStream(sdktranslator.FormatClaude, model, raw, translatedReq, body)
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(http.StatusOK)
+	_, _ = w.Write(converted)
+	return true
+}
+
+func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Request, resp *http.Response, model string, messages []any, thinkingEnabled, searchEnabled bool, toolNames []string) {
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		writeClaudeError(w, http.StatusInternalServerError, string(body))
+		return
+	}
+
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache, no-transform")
+	w.Header().Set("Connection", "keep-alive")
+	w.Header().Set("X-Accel-Buffering", "no")
+	rc := http.NewResponseController(w)
+	_, canFlush := w.(http.Flusher)
+	if !canFlush {
+		config.Logger.Warn("[claude_stream] response writer does not support flush; streaming may be buffered")
+	}
+
+	streamRuntime := newClaudeStreamRuntime(
+		w,
+		rc,
+		canFlush,
+		model,
+		messages,
+		thinkingEnabled,
+		searchEnabled,
+		h.compatStripReferenceMarkers(),
+		toolNames,
+	)
+	streamRuntime.sendMessageStart()
+
+	initialType := "text"
+	if thinkingEnabled {
+		initialType = "thinking"
+	}
+	streamengine.ConsumeSSE(streamengine.ConsumeConfig{
+		Context:             r.Context(),
+		Body:                resp.Body,
+		ThinkingEnabled:     thinkingEnabled,
+		InitialType:         initialType,
+		KeepAliveInterval:   claudeStreamPingInterval,
+		IdleTimeout:         claudeStreamIdleTimeout,
+		MaxKeepAliveNoInput: claudeStreamMaxKeepaliveCnt,
+	}, streamengine.ConsumeHooks{
+		OnKeepAlive: func() {
+			streamRuntime.sendPing()
+		},
+		OnParsed:   streamRuntime.onParsed,
+		OnFinalize: streamRuntime.onFinalize,
+	})
+}
--- a/internal/adapter/claude/handler_routes.go
+++ b/internal/adapter/claude/handler_routes.go
@@ -0,0 +1,49 @@
+package claude
+
+import (
+	"net/http"
+	"time"
+
+	"github.com/go-chi/chi/v5"
+
+	"ds2api/internal/config"
+	"ds2api/internal/deepseek"
+	"ds2api/internal/util"
+)
+
+// writeJSON is a package-internal alias to avoid mass-renaming all call-sites.
+var writeJSON = util.WriteJSON
+
+type Handler struct {
+	Store  ConfigReader
+	Auth   AuthResolver
+	DS     DeepSeekCaller
+	OpenAI OpenAIChatRunner
+}
+
+func (h *Handler) compatStripReferenceMarkers() bool {
+	if h == nil || h.Store == nil {
+		return true
+	}
+	return h.Store.CompatStripReferenceMarkers()
+}
+
+var (
+	claudeStreamPingInterval    = time.Duration(deepseek.KeepAliveTimeout) * time.Second
+	claudeStreamIdleTimeout     = time.Duration(deepseek.StreamIdleTimeout) * time.Second
+	claudeStreamMaxKeepaliveCnt = deepseek.MaxKeepaliveCount
+)
+
+func RegisterRoutes(r chi.Router, h *Handler) {
+	r.Get("/anthropic/v1/models", h.ListModels)
+	r.Post("/anthropic/v1/messages", h.Messages)
+	r.Post("/anthropic/v1/messages/count_tokens", h.CountTokens)
+	r.Post("/v1/messages", h.Messages)
+	r.Post("/messages", h.Messages)
+	r.Post("/v1/messages/count_tokens", h.CountTokens)
+	r.Post("/messages/count_tokens", h.CountTokens)
+}
+
+func (h *Handler) ListModels(w http.ResponseWriter, _ *http.Request) {
+	writeJSON(w, http.StatusOK, config.ClaudeModelsResponse())
+}
--- a/internal/adapter/claude/handler_stream_test.go
+++ b/internal/adapter/claude/handler_stream_test.go
@@ -0,0 +1,436 @@
+package claude
+
+import (
+	"ds2api/internal/sse"
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+	"time"
+)
+
+type claudeFrame struct {
+	Event   string
+	Payload map[string]any
+}
+
+func makeClaudeSSEHTTPResponse(lines ...string) *http.Response {
+	body := strings.Join(lines, "\n")
+	if !strings.HasSuffix(body, "\n") {
+		body += "\n"
+	}
+	return &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     make(http.Header),
+		Body:       io.NopCloser(strings.NewReader(body)),
+	}
+}
+
+func parseClaudeFrames(t *testing.T, body string) []claudeFrame {
+	t.Helper()
+	chunks := strings.Split(body, "\n\n")
+	frames := make([]claudeFrame, 0, len(chunks))
+	for _, chunk := range chunks {
+		chunk = strings.TrimSpace(chunk)
+		if chunk == "" {
+			continue
+		}
+		lines := strings.Split(chunk, "\n")
+		eventName := ""
+		dataPayload := ""
+		for _, line := range lines {
+			line = strings.TrimSpace(line)
+			switch {
+			case strings.HasPrefix(line, "event:"):
+				eventName = strings.TrimSpace(strings.TrimPrefix(line, "event:"))
+			case strings.HasPrefix(line, "data:"):
+				dataPayload = strings.TrimSpace(strings.TrimPrefix(line, "data:"))
+			}
+		}
+		if eventName == "" || dataPayload == "" {
+			continue
+		}
+		var payload map[string]any
+		if err := json.Unmarshal([]byte(dataPayload), &payload); err != nil {
+			t.Fatalf("decode frame failed: %v, payload=%s", err, dataPayload)
+		}
+		frames = append(frames, claudeFrame{Event: eventName, Payload: payload})
+	}
+	return frames
+}
+
+func findClaudeFrames(frames []claudeFrame, event string) []claudeFrame {
+	out := make([]claudeFrame, 0)
+	for _, f := range frames {
+		if f.Event == event {
+			out = append(out, f)
+		}
+	}
+	return out
+}
+
+func TestHandleClaudeStreamRealtimeTextIncrementsWithEventHeaders(t *testing.T) {
+	h := &Handler{}
+	resp := makeClaudeSSEHTTPResponse(
+		`data: {"p":"response/content","v":"Hel"}`,
+		`data: {"p":"response/content","v":"lo"}`,
+		`data: [DONE]`,
+	)
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
+
+	h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-5", []any{map[string]any{"role": "user", "content": "hi"}}, false, false, nil)
+
+	body := rec.Body.String()
+	if !strings.Contains(body, "event: message_start") {
+		t.Fatalf("missing event header: message_start, body=%s", body)
+	}
+	if !strings.Contains(body, "event: content_block_delta") {
+		t.Fatalf("missing event header: content_block_delta, body=%s", body)
+	}
+	if !strings.Contains(body, "event: message_stop") {
+		t.Fatalf("missing event header: message_stop, body=%s", body)
+	}
+
+	frames := parseClaudeFrames(t, body)
+	deltas := findClaudeFrames(frames, "content_block_delta")
+	if len(deltas) < 2 {
+		t.Fatalf("expected at least 2 text deltas, got=%d body=%s", len(deltas), body)
+	}
+	combined := strings.Builder{}
+	for _, f := range deltas {
+		delta, _ := f.Payload["delta"].(map[string]any)
+		if delta["type"] == "text_delta" {
+			combined.WriteString(asString(delta["text"]))
+		}
+	}
+	if combined.String() != "Hello" {
+		t.Fatalf("unexpected combined text: %q body=%s", combined.String(), body)
+	}
+}
+
+func TestHandleClaudeStreamRealtimeThinkingDelta(t *testing.T) {
+	h := &Handler{}
+	resp := makeClaudeSSEHTTPResponse(
+		`data: {"p":"response/thinking_content","v":"思"}`,
+		`data: {"p":"response/thinking_content","v":"考"}`,
+		`data: {"p":"response/content","v":"ok"}`,
+		`data: [DONE]`,
+	)
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
+
+	h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-5", []any{map[string]any{"role": "user", "content": "hi"}}, true, false, nil)
+
+	frames := parseClaudeFrames(t, rec.Body.String())
+	foundThinkingDelta := false
+	for _, f := range findClaudeFrames(frames, "content_block_delta") {
+		delta, _ := f.Payload["delta"].(map[string]any)
+		if delta["type"] == "thinking_delta" {
+			foundThinkingDelta = true
+			break
+		}
+	}
+	if !foundThinkingDelta {
+		t.Fatalf("expected thinking_delta event, body=%s", rec.Body.String())
+	}
+}
+
+func TestHandleClaudeStreamRealtimeToolSafety(t *testing.T) {
+	h := &Handler{}
+	resp := makeClaudeSSEHTTPResponse(
+		`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\""}`,
+		`data: {"p":"response/content","v":",\"input\":{\"q\":\"go\"}}]}"}`,
+		`data: [DONE]`,
+	)
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
+
+	h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-5", []any{map[string]any{"role": "user", "content": "use tool"}}, false, false, []string{"search"})
+
+	frames := parseClaudeFrames(t, rec.Body.String())
+	for _, f := range findClaudeFrames(frames, "content_block_delta") {
+		delta, _ := f.Payload["delta"].(map[string]any)
+		if delta["type"] == "text_delta" && strings.Contains(asString(delta["text"]), `"tool_calls"`) {
+			t.Fatalf("raw tool_calls JSON leaked in text delta: body=%s", rec.Body.String())
+		}
+	}
+
+	foundToolUse := false
+	for _, f := range findClaudeFrames(frames, "content_block_start") {
+		contentBlock, _ := f.Payload["content_block"].(map[string]any)
+		if contentBlock["type"] == "tool_use" {
+			foundToolUse = true
+			break
+		}
+	}
+	if !foundToolUse {
+		t.Fatalf("expected tool_use block in stream, body=%s", rec.Body.String())
+	}
+
+	foundToolUseStop := false
+	for _, f := range findClaudeFrames(frames, "message_delta") {
+		delta, _ := f.Payload["delta"].(map[string]any)
+		if delta["stop_reason"] == "tool_use" {
+			foundToolUseStop = true
+			break
+		}
+	}
+	if !foundToolUseStop {
+		t.Fatalf("expected stop_reason=tool_use, body=%s", rec.Body.String())
+	}
+}
+
+func TestHandleClaudeStreamRealtimeToolDetectionFromThinkingFallback(t *testing.T) {
+	h := &Handler{}
+	resp := makeClaudeSSEHTTPResponse(
+		`data: {"p":"response/thinking_content","v":"{\"tool_calls\":[{\"name\":\"search\""}`,
+		`data: {"p":"response/thinking_content","v":",\"input\":{\"q\":\"go\"}}]}"}`,
+		`data: [DONE]`,
+	)
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
+
+	h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-5", []any{map[string]any{"role": "user", "content": "use tool"}}, true, false, []string{"search"})
+
+	frames := parseClaudeFrames(t, rec.Body.String())
+	foundToolUse := false
+	for _, f := range findClaudeFrames(frames, "content_block_start") {
+		contentBlock, _ := f.Payload["content_block"].(map[string]any)
+		if contentBlock["type"] == "tool_use" && contentBlock["name"] == "search" {
+			foundToolUse = true
+			break
+		}
+	}
+	if !foundToolUse {
+		t.Fatalf("expected tool_use block from thinking fallback, body=%s", rec.Body.String())
+	}
+}
+
+func TestHandleClaudeStreamRealtimeSkipsThinkingFallbackWhenFinalTextExists(t *testing.T) {
+	h := &Handler{}
+	resp := makeClaudeSSEHTTPResponse(
+		`data: {"p":"response/thinking_content","v":"{\"tool_calls\":[{\"name\":\"search\""}`,
+		`data: {"p":"response/thinking_content","v":",\"input\":{\"q\":\"go\"}}]}"}`,
+		`data: {"p":"response/content","v":"normal answer"}`,
+		`data: [DONE]`,
+	)
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
+
+	h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-5", []any{map[string]any{"role": "user", "content": "use tool"}}, true, false, []string{"search"})
+
+	frames := parseClaudeFrames(t, rec.Body.String())
+	for _, f := range findClaudeFrames(frames, "content_block_start") {
+		contentBlock, _ := f.Payload["content_block"].(map[string]any)
+		if contentBlock["type"] == "tool_use" {
+			t.Fatalf("unexpected tool_use block when final text exists, body=%s", rec.Body.String())
+		}
+	}
+
+	foundEndTurn := false
+	for _, f := range findClaudeFrames(frames, "message_delta") {
+		delta, _ := f.Payload["delta"].(map[string]any)
+		if delta["stop_reason"] == "end_turn" {
+			foundEndTurn = true
+			break
+		}
+	}
+	if !foundEndTurn {
+		t.Fatalf("expected stop_reason=end_turn, body=%s", rec.Body.String())
+	}
+}
+
+func TestHandleClaudeStreamRealtimeUpstreamErrorEvent(t *testing.T) {
+	h := &Handler{}
+	resp := makeClaudeSSEHTTPResponse(
+		`data: {"error":{"message":"boom"}}`,
+	)
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
+
+	h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-5", []any{map[string]any{"role": "user", "content": "hi"}}, false, false, nil)
+
+	frames := parseClaudeFrames(t, rec.Body.String())
+	errFrames := findClaudeFrames(frames, "error")
+	if len(errFrames) == 0 {
+		t.Fatalf("expected error event frame, body=%s", rec.Body.String())
+	}
+	if errFrames[0].Payload["type"] != "error" {
+		t.Fatalf("expected error payload type, body=%s", rec.Body.String())
+	}
+}
+
+func TestHandleClaudeStreamRealtimePingEvent(t *testing.T) {
+	h := &Handler{}
+	oldPing := claudeStreamPingInterval
+	oldIdle := claudeStreamIdleTimeout
+	oldKeepalive := claudeStreamMaxKeepaliveCnt
+	claudeStreamPingInterval = 10 * time.Millisecond
+	claudeStreamIdleTimeout = 300 * time.Millisecond
+	claudeStreamMaxKeepaliveCnt = 50
+	defer func() {
+		claudeStreamPingInterval = oldPing
+		claudeStreamIdleTimeout = oldIdle
+		claudeStreamMaxKeepaliveCnt = oldKeepalive
+	}()
+
+	pr, pw := io.Pipe()
+	resp := &http.Response{StatusCode: http.StatusOK, Header: make(http.Header), Body: pr}
+	go func() {
+		time.Sleep(40 * time.Millisecond)
+		_, _ = io.WriteString(pw, "data: {\"p\":\"response/content\",\"v\":\"hi\"}\n")
+		_, _ = io.WriteString(pw, "data: [DONE]\n")
+		_ = pw.Close()
+	}()
+
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
+	h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-5", []any{map[string]any{"role": "user", "content": "hi"}}, false, false, nil)
+
+	frames := parseClaudeFrames(t, rec.Body.String())
+	if len(findClaudeFrames(frames, "ping")) == 0 {
+		t.Fatalf("expected ping event in stream, body=%s", rec.Body.String())
+	}
+}
+
+func TestCollectDeepSeekRegression(t *testing.T) {
+	resp := makeClaudeSSEHTTPResponse(
+		`data: {"p":"response/thinking_content","v":"想"}`,
+		`data: {"p":"response/content","v":"答"}`,
+		`data: [DONE]`,
+	)
+	result := sse.CollectStream(resp, true, true)
+	if result.Thinking != "想" {
+		t.Fatalf("unexpected thinking: %q", result.Thinking)
+	}
+	if result.Text != "答" {
+		t.Fatalf("unexpected text: %q", result.Text)
+	}
+}
+
+func asString(v any) string {
+	s, _ := v.(string)
+	return s
+}
+
+func TestHandleClaudeStreamRealtimeToolSafetyAcrossStructuredFormats(t *testing.T) {
+	tests := []struct {
+		name    string
+		payload string
+	}{
+		{name: "xml_tool_call", payload: `<tool_call><tool_name>Bash</tool_name><parameters><command>pwd</command></parameters></tool_call>`},
+		{name: "xml_json_tool_call", payload: `<tool_call>{"tool":"Bash","params":{"command":"pwd"}}</tool_call>`},
+		{name: "nested_tool_tag_style", payload: `<tool_call><tool name="Bash"><command>pwd</command></tool></tool_call>`},
+		{name: "function_tag_style", payload: `<function_call>Bash</function_call><function parameter name="command">pwd</function parameter>`},
+		{name: "antml_argument_style", payload: `<antml:function_calls><antml:function_call id="1" name="Bash"><antml:argument name="command">pwd</antml:argument></antml:function_call></antml:function_calls>`},
+		{name: "antml_function_attr_parameters", payload: `<antml:function_calls><antml:function_call id="1" function="Bash"><antml:parameters>{"command":"pwd"}</antml:parameters></antml:function_call></antml:function_calls>`},
+		{name: "invoke_parameter_style", payload: `<function_calls><invoke name="Bash"><parameter name="command">pwd</parameter></invoke></function_calls>`},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			h := &Handler{}
+			resp := makeClaudeSSEHTTPResponse(
+				`data: {"p":"response/content","v":"`+strings.ReplaceAll(tc.payload, `"`, `\"`)+`"}`,
+				`data: [DONE]`,
+			)
+			rec := httptest.NewRecorder()
+			req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
+
+			h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-5", []any{map[string]any{"role": "user", "content": "use tool"}}, false, false, []string{"Bash"})
+
+			frames := parseClaudeFrames(t, rec.Body.String())
+			foundToolUse := false
+			for _, f := range findClaudeFrames(frames, "content_block_start") {
+				contentBlock, _ := f.Payload["content_block"].(map[string]any)
+				if contentBlock["type"] == "tool_use" {
+					foundToolUse = true
+					break
+				}
+			}
+			if !foundToolUse {
+				t.Fatalf("expected tool_use block for format %s, body=%s", tc.name, rec.Body.String())
+			}
+		})
+	}
+}
+
+func TestHandleClaudeStreamRealtimeDetectsToolUseWithLeadingProse(t *testing.T) {
+	h := &Handler{}
+	payload := "I'll call a tool now.\\n<tool_use><tool_name>write_file</tool_name><parameters>{\\\"path\\\":\\\"/tmp/a.txt\\\",\\\"content\\\":\\\"abc\\\"}</parameters></tool_use>"
+	resp := makeClaudeSSEHTTPResponse(
+		`data: {"p":"response/content","v":"`+payload+`"}`,
+		`data: [DONE]`,
+	)
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
+
+	h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-5", []any{map[string]any{"role": "user", "content": "use tool"}}, false, false, []string{"write_file"})
+
+	frames := parseClaudeFrames(t, rec.Body.String())
+	foundToolUse := false
+	for _, f := range findClaudeFrames(frames, "content_block_start") {
+		contentBlock, _ := f.Payload["content_block"].(map[string]any)
+		if contentBlock["type"] == "tool_use" && contentBlock["name"] == "write_file" {
+			foundToolUse = true
+			break
+		}
+	}
+	if !foundToolUse {
+		t.Fatalf("expected tool_use block with leading prose payload, body=%s", rec.Body.String())
+	}
+
+	for _, f := range findClaudeFrames(frames, "message_delta") {
+		delta, _ := f.Payload["delta"].(map[string]any)
+		if delta["stop_reason"] == "tool_use" {
+			return
+		}
+	}
+	t.Fatalf("expected stop_reason=tool_use, body=%s", rec.Body.String())
+}
+
+func TestHandleClaudeStreamRealtimeIgnoresUnclosedFencedToolExample(t *testing.T) {
+	h := &Handler{}
+	resp := makeClaudeSSEHTTPResponse(
+		"data: {\"p\":\"response/content\",\"v\":\"Here is an example:\\n```json\\n{\\\"tool_calls\\\":[{\\\"name\\\":\\\"Bash\\\",\\\"input\\\":{\\\"command\\\":\\\"pwd\\\"}}]}\"}",
+		"data: {\"p\":\"response/content\",\"v\":\"\\n```\\nDo not execute it.\"}",
+		`data: [DONE]`,
+	)
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", nil)
+
+	h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-5", []any{map[string]any{"role": "user", "content": "show example only"}}, false, false, []string{"Bash"})
+
+	frames := parseClaudeFrames(t, rec.Body.String())
+	foundToolUse := false
+	for _, f := range findClaudeFrames(frames, "content_block_start") {
+		contentBlock, _ := f.Payload["content_block"].(map[string]any)
+		if contentBlock["type"] == "tool_use" {
+			foundToolUse = true
+			break
+		}
+	}
+	if foundToolUse {
+		t.Fatalf("expected no tool_use for fenced example, body=%s", rec.Body.String())
+	}
+
+	foundToolStop := false
+	for _, f := range findClaudeFrames(frames, "message_delta") {
+		delta, _ := f.Payload["delta"].(map[string]any)
+		if delta["stop_reason"] == "tool_use" {
+			foundToolStop = true
+			break
+		}
+	}
+	if foundToolStop {
+		t.Fatalf("expected stop_reason to remain content-only, body=%s", rec.Body.String())
+	}
+}
+
+// Backward-compatible alias for historical test name used in CI logs.
+func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.T) {
+	TestHandleClaudeStreamRealtimeIgnoresUnclosedFencedToolExample(t)
+}
--- a/internal/adapter/claude/handler_tokens.go
+++ b/internal/adapter/claude/handler_tokens.go
@@ -0,0 +1,51 @@
+package claude
+
+import (
+	"encoding/json"
+	"net/http"
+
+	"ds2api/internal/util"
+)
+
+func (h *Handler) CountTokens(w http.ResponseWriter, r *http.Request) {
+	a, err := h.Auth.Determine(r)
+	if err != nil {
+		writeClaudeError(w, http.StatusUnauthorized, err.Error())
+		return
+	}
+	defer h.Auth.Release(a)
+
+	var req map[string]any
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		writeClaudeError(w, http.StatusBadRequest, "invalid json")
+		return
+	}
+	model, _ := req["model"].(string)
+	messages, _ := req["messages"].([]any)
+	if model == "" || len(messages) == 0 {
+		writeClaudeError(w, http.StatusBadRequest, "Request must include 'model' and 'messages'.")
+		return
+	}
+	inputTokens := 0
+	if sys, ok := req["system"].(string); ok {
+		inputTokens += util.EstimateTokens(sys)
+	}
+	for _, item := range messages {
+		msg, ok := item.(map[string]any)
+		if !ok {
+			continue
+		}
+		inputTokens += 2
+		inputTokens += util.EstimateTokens(extractMessageContent(msg["content"]))
+	}
+	if tools, ok := req["tools"].([]any); ok {
+		for _, t := range tools {
+			b, _ := json.Marshal(t)
+			inputTokens += util.EstimateTokens(string(b))
+		}
+	}
+	if inputTokens < 1 {
+		inputTokens = 1
+	}
+	writeJSON(w, http.StatusOK, map[string]any{"input_tokens": inputTokens})
+}
--- a/internal/adapter/claude/handler_util_test.go
+++ b/internal/adapter/claude/handler_util_test.go
@@ -0,0 +1,560 @@
+package claude
+
+import (
+	"strings"
+	"testing"
+)
+
+// ─── normalizeClaudeMessages ─────────────────────────────────────────
+
+func TestNormalizeClaudeMessagesSimpleString(t *testing.T) {
+	msgs := []any{
+		map[string]any{"role": "user", "content": "Hello"},
+	}
+	got := normalizeClaudeMessages(msgs)
+	if len(got) != 1 {
+		t.Fatalf("expected 1 message, got %d", len(got))
+	}
+	m := got[0].(map[string]any)
+	if m["content"] != "Hello" {
+		t.Fatalf("expected 'Hello', got %v", m["content"])
+	}
+}
+
+func TestNormalizeClaudeMessagesArrayContent(t *testing.T) {
+	msgs := []any{
+		map[string]any{
+			"role": "user",
+			"content": []any{
+				map[string]any{"type": "text", "text": "line1"},
+				map[string]any{"type": "text", "text": "line2"},
+			},
+		},
+	}
+	got := normalizeClaudeMessages(msgs)
+	m := got[0].(map[string]any)
+	if m["content"] != "line1\nline2" {
+		t.Fatalf("expected joined text, got %q", m["content"])
+	}
+}
+
+func TestNormalizeClaudeMessagesToolResult(t *testing.T) {
+	msgs := []any{
+		map[string]any{
+			"role": "user",
+			"content": []any{
+				map[string]any{"type": "tool_result", "content": "tool output"},
+			},
+		},
+	}
+	got := normalizeClaudeMessages(msgs)
+	if len(got) != 1 {
+		t.Fatalf("expected one normalized message, got %d", len(got))
+	}
+	m := got[0].(map[string]any)
+	if m["role"] != "tool" {
+		t.Fatalf("expected tool role preserved, got %#v", m["role"])
+	}
+	content, _ := m["content"].(string)
+	if content != "tool output" {
+		t.Fatalf("expected raw tool output content preserved, got %q", content)
+	}
+}
+
+func TestNormalizeClaudeMessagesToolUseToAssistantToolCalls(t *testing.T) {
+	msgs := []any{
+		map[string]any{
+			"role": "assistant",
+			"content": []any{
+				map[string]any{
+					"type":  "tool_use",
+					"id":    "call_1",
+					"name":  "search_web",
+					"input": map[string]any{"query": "latest"},
+				},
+			},
+		},
+	}
+
+	got := normalizeClaudeMessages(msgs)
+	if len(got) != 1 {
+		t.Fatalf("expected one normalized tool-call message, got %d", len(got))
+	}
+	m := got[0].(map[string]any)
+	if m["role"] != "assistant" {
+		t.Fatalf("expected assistant role, got %#v", m["role"])
+	}
+	tc, _ := m["tool_calls"].([]any)
+	if len(tc) != 1 {
+		t.Fatalf("expected one tool call, got %#v", m["tool_calls"])
+	}
+	call, _ := tc[0].(map[string]any)
+	if call["id"] != "call_1" {
+		t.Fatalf("expected call id preserved, got %#v", call)
+	}
+	content, _ := m["content"].(string)
+	if !containsStr(content, "<tool_calls>") || !containsStr(content, "<tool_name>search_web</tool_name>") {
+		t.Fatalf("expected assistant content to include XML tool call history, got %q", content)
+	}
+	if !containsStr(content, `<parameters>{"query":"latest"}</parameters>`) {
+		t.Fatalf("expected assistant content to include serialized parameters, got %q", content)
+	}
+}
+
+func TestNormalizeClaudeMessagesDoesNotPromoteUserToolUse(t *testing.T) {
+	msgs := []any{
+		map[string]any{
+			"role": "user",
+			"content": []any{
+				map[string]any{
+					"type":  "tool_use",
+					"id":    "call_unsafe",
+					"name":  "dangerous_tool",
+					"input": map[string]any{"value": "x"},
+				},
+			},
+		},
+	}
+
+	got := normalizeClaudeMessages(msgs)
+	if len(got) != 1 {
+		t.Fatalf("expected one normalized message, got %d", len(got))
+	}
+	m := got[0].(map[string]any)
+	if m["role"] != "user" {
+		t.Fatalf("expected user role preserved, got %#v", m["role"])
+	}
+	if _, ok := m["tool_calls"]; ok {
+		t.Fatalf("expected no tool_calls promotion for user message, got %#v", m["tool_calls"])
+	}
+	content, _ := m["content"].(string)
+	if !containsStr(content, `"type":"tool_use"`) || !containsStr(content, "dangerous_tool") {
+		t.Fatalf("expected raw tool_use block preserved in user content, got %q", content)
+	}
+}
+
+func TestNormalizeClaudeMessagesSkipsNonMap(t *testing.T) {
+	msgs := []any{"not a map", 42}
+	got := normalizeClaudeMessages(msgs)
+	if len(got) != 0 {
+		t.Fatalf("expected 0 messages for non-map items, got %d", len(got))
+	}
+}
+
+func TestNormalizeClaudeMessagesEmpty(t *testing.T) {
+	got := normalizeClaudeMessages(nil)
+	if len(got) != 0 {
+		t.Fatalf("expected 0, got %d", len(got))
+	}
+}
+
+func TestNormalizeClaudeMessagesPreservesRole(t *testing.T) {
+	msgs := []any{
+		map[string]any{"role": "assistant", "content": "response"},
+	}
+	got := normalizeClaudeMessages(msgs)
+	m := got[0].(map[string]any)
+	if m["role"] != "assistant" {
+		t.Fatalf("expected 'assistant', got %q", m["role"])
+	}
+}
+
+func TestNormalizeClaudeMessagesMixedContentBlocks(t *testing.T) {
+	msgs := []any{
+		map[string]any{
+			"role": "user",
+			"content": []any{
+				map[string]any{"type": "text", "text": "Hello"},
+				map[string]any{"type": "image", "source": map[string]any{"type": "base64", "data": strings.Repeat("A", 2048)}},
+				map[string]any{"type": "text", "text": "World"},
+			},
+		},
+	}
+	got := normalizeClaudeMessages(msgs)
+	m := got[0].(map[string]any)
+	content, _ := m["content"].(string)
+	if !containsStr(content, "Hello") || !containsStr(content, "World") || !containsStr(content, `"type":"image"`) {
+		t.Fatalf("expected text plus non-text block marker preserved, got %q", content)
+	}
+	if !containsStr(content, omittedBinaryMarker) {
+		t.Fatalf("expected binary payload omitted marker, got %q", content)
+	}
+	if containsStr(content, strings.Repeat("A", 100)) {
+		t.Fatalf("expected raw base64 payload not to be included, got %q", content)
+	}
+}
+
+func TestNormalizeClaudeMessagesToolResultNonTextPayloadStringified(t *testing.T) {
+	msgs := []any{
+		map[string]any{
+			"role": "user",
+			"content": []any{
+				map[string]any{
+					"type":        "tool_result",
+					"tool_use_id": "call_image_1",
+					"name":        "vision_tool",
+					"content": []any{
+						map[string]any{"type": "text", "text": "image analysis"},
+						map[string]any{
+							"type":   "image",
+							"source": map[string]any{"type": "base64", "media_type": "image/png", "data": strings.Repeat("B", 2048)},
+						},
+					},
+				},
+			},
+		},
+	}
+
+	got := normalizeClaudeMessages(msgs)
+	if len(got) != 1 {
+		t.Fatalf("expected one normalized message, got %d", len(got))
+	}
+	m := got[0].(map[string]any)
+	if m["role"] != "tool" {
+		t.Fatalf("expected tool role, got %#v", m["role"])
+	}
+	content, _ := m["content"].(string)
+	if !containsStr(content, `"type":"tool_result"`) || !containsStr(content, `"type":"image"`) {
+		t.Fatalf("expected non-text tool_result payload to be JSON stringified, got %q", content)
+	}
+	if !containsStr(content, omittedBinaryMarker) {
+		t.Fatalf("expected binary data to be sanitized with omitted marker, got %q", content)
+	}
+	if containsStr(content, strings.Repeat("B", 100)) {
+		t.Fatalf("expected raw base64 payload not to be included, got %q", content)
+	}
+}
+
+func TestNormalizeClaudeMessagesBackfillsToolResultCallIDByName(t *testing.T) {
+	msgs := []any{
+		map[string]any{
+			"role": "assistant",
+			"content": []any{
+				map[string]any{
+					"type":  "tool_use",
+					"name":  "search_web",
+					"input": map[string]any{"query": "latest"},
+				},
+			},
+		},
+		map[string]any{
+			"role": "user",
+			"content": []any{
+				map[string]any{
+					"type":    "tool_result",
+					"name":    "search_web",
+					"content": "ok",
+				},
+			},
+		},
+	}
+
+	got := normalizeClaudeMessages(msgs)
+	if len(got) != 2 {
+		t.Fatalf("expected 2 messages, got %#v", got)
+	}
+	assistant, _ := got[0].(map[string]any)
+	tc, _ := assistant["tool_calls"].([]any)
+	call, _ := tc[0].(map[string]any)
+	callID, _ := call["id"].(string)
+	if !strings.HasPrefix(callID, "call_claude_") {
+		t.Fatalf("expected generated call id, got %#v", call)
+	}
+	toolMsg, _ := got[1].(map[string]any)
+	if toolMsg["tool_call_id"] != callID {
+		t.Fatalf("expected tool_result to reuse generated id, got %#v", toolMsg)
+	}
+}
+
+// ─── buildClaudeToolPrompt ───────────────────────────────────────────
+
+func TestBuildClaudeToolPromptSingleTool(t *testing.T) {
+	tools := []any{
+		map[string]any{
+			"name":        "search",
+			"description": "Search the web",
+			"input_schema": map[string]any{
+				"type": "object",
+				"properties": map[string]any{
+					"query": map[string]any{"type": "string"},
+				},
+			},
+		},
+	}
+	prompt := buildClaudeToolPrompt(tools)
+	if prompt == "" {
+		t.Fatal("expected non-empty prompt")
+	}
+	// Should contain tool name and description
+	if !containsStr(prompt, "search") {
+		t.Fatalf("expected 'search' in prompt")
+	}
+	if !containsStr(prompt, "Search the web") {
+		t.Fatalf("expected description in prompt")
+	}
+	if !containsStr(prompt, "<tool_calls>") {
+		t.Fatalf("expected XML tool_calls format in prompt")
+	}
+	if !containsStr(prompt, "TOOL CALL FORMAT") {
+		t.Fatalf("expected tool call format header in prompt")
+	}
+}
+
+func TestBuildClaudeToolPromptMultipleTools(t *testing.T) {
+	tools := []any{
+		map[string]any{"name": "tool1", "description": "desc1"},
+		map[string]any{"name": "tool2", "description": "desc2"},
+	}
+	prompt := buildClaudeToolPrompt(tools)
+	if !containsStr(prompt, "tool1") || !containsStr(prompt, "tool2") {
+		t.Fatalf("expected both tools in prompt")
+	}
+}
+
+func TestBuildClaudeToolPromptSupportsOpenAIStyleFunctionTool(t *testing.T) {
+	tools := []any{
+		map[string]any{
+			"type": "function",
+			"function": map[string]any{
+				"name":        "search",
+				"description": "Search via function tool",
+				"parameters": map[string]any{
+					"type": "object",
+					"properties": map[string]any{
+						"q": map[string]any{"type": "string"},
+					},
+				},
+			},
+		},
+	}
+	prompt := buildClaudeToolPrompt(tools)
+	if !containsStr(prompt, "Tool: search") {
+		t.Fatalf("expected OpenAI-style function tool name in prompt, got: %q", prompt)
+	}
+	if !containsStr(prompt, "Search via function tool") {
+		t.Fatalf("expected OpenAI-style function tool description in prompt, got: %q", prompt)
+	}
+	if !containsStr(prompt, "\"q\"") {
+		t.Fatalf("expected parameters schema serialized in prompt, got: %q", prompt)
+	}
+}
+
+func TestBuildClaudeToolPromptSkipsNonMap(t *testing.T) {
+	tools := []any{"not a map"}
+	prompt := buildClaudeToolPrompt(tools)
+	// No valid tools → empty prompt
+	if prompt != "" {
+		t.Fatalf("expected empty prompt for non-map tools, got: %q", prompt)
+	}
+}
+
+// ─── hasSystemMessage ────────────────────────────────────────────────
+
+func TestHasSystemMessageTrue(t *testing.T) {
+	msgs := []any{
+		map[string]any{"role": "system", "content": "You are a helper"},
+		map[string]any{"role": "user", "content": "Hi"},
+	}
+	if !hasSystemMessage(msgs) {
+		t.Fatal("expected true")
+	}
+}
+
+func TestHasSystemMessageFalse(t *testing.T) {
+	msgs := []any{
+		map[string]any{"role": "user", "content": "Hi"},
+		map[string]any{"role": "assistant", "content": "Hello"},
+	}
+	if hasSystemMessage(msgs) {
+		t.Fatal("expected false")
+	}
+}
+
+func TestHasSystemMessageEmpty(t *testing.T) {
+	if hasSystemMessage(nil) {
+		t.Fatal("expected false for nil")
+	}
+}
+
+func TestHasSystemMessageNonMap(t *testing.T) {
+	msgs := []any{"not a map"}
+	if hasSystemMessage(msgs) {
+		t.Fatal("expected false for non-map")
+	}
+}
+
+// ─── extractClaudeToolNames ──────────────────────────────────────────
+
+func TestExtractClaudeToolNamesSingle(t *testing.T) {
+	tools := []any{
+		map[string]any{"name": "search"},
+	}
+	names := extractClaudeToolNames(tools)
+	if len(names) != 1 || names[0] != "search" {
+		t.Fatalf("expected [search], got %v", names)
+	}
+}
+
+func TestExtractClaudeToolNamesMultiple(t *testing.T) {
+	tools := []any{
+		map[string]any{"name": "search"},
+		map[string]any{"name": "calculate"},
+	}
+	names := extractClaudeToolNames(tools)
+	if len(names) != 2 {
+		t.Fatalf("expected 2 names, got %v", names)
+	}
+}
+
+func TestExtractClaudeToolNamesSkipsEmptyName(t *testing.T) {
+	tools := []any{
+		map[string]any{"name": ""},
+		map[string]any{"name": "valid"},
+	}
+	names := extractClaudeToolNames(tools)
+	if len(names) != 1 || names[0] != "valid" {
+		t.Fatalf("expected [valid], got %v", names)
+	}
+}
+
+func TestExtractClaudeToolNamesSkipsNonMap(t *testing.T) {
+	tools := []any{"not a map", 42}
+	names := extractClaudeToolNames(tools)
+	if len(names) != 0 {
+		t.Fatalf("expected 0, got %v", names)
+	}
+}
+
+func TestExtractClaudeToolNamesNil(t *testing.T) {
+	names := extractClaudeToolNames(nil)
+	if len(names) != 0 {
+		t.Fatalf("expected 0, got %v", names)
+	}
+}
+
+func TestExtractClaudeToolNamesSupportsOpenAIStyleFunctionTool(t *testing.T) {
+	tools := []any{
+		map[string]any{
+			"type": "function",
+			"function": map[string]any{
+				"name": "search",
+			},
+		},
+	}
+	names := extractClaudeToolNames(tools)
+	if len(names) != 1 || names[0] != "search" {
+		t.Fatalf("expected [search], got %v", names)
+	}
+}
+
+// ─── toMessageMaps ───────────────────────────────────────────────────
+
+func TestToMessageMapsNormal(t *testing.T) {
+	input := []any{
+		map[string]any{"role": "user", "content": "Hello"},
+	}
+	got := toMessageMaps(input)
+	if len(got) != 1 {
+		t.Fatalf("expected 1, got %d", len(got))
+	}
+}
+
+func TestToMessageMapsNonSlice(t *testing.T) {
+	got := toMessageMaps("not a slice")
+	if got != nil {
+		t.Fatalf("expected nil, got %v", got)
+	}
+}
+
+func TestToMessageMapsSkipsNonMap(t *testing.T) {
+	input := []any{"string", map[string]any{"role": "user"}, 42}
+	got := toMessageMaps(input)
+	if len(got) != 1 {
+		t.Fatalf("expected 1 map, got %d", len(got))
+	}
+}
+
+func TestToMessageMapsNil(t *testing.T) {
+	got := toMessageMaps(nil)
+	if got != nil {
+		t.Fatalf("expected nil, got %v", got)
+	}
+}
+
+// ─── extractMessageContent ──────────────────────────────────────────
+
+func TestExtractMessageContentString(t *testing.T) {
+	if got := extractMessageContent("hello"); got != "hello" {
+		t.Fatalf("expected 'hello', got %q", got)
+	}
+}
+
+func TestExtractMessageContentArray(t *testing.T) {
+	input := []any{"part1", "part2"}
+	got := extractMessageContent(input)
+	if got != "part1\npart2" {
+		t.Fatalf("expected joined, got %q", got)
+	}
+}
+
+func TestExtractMessageContentOther(t *testing.T) {
+	got := extractMessageContent(42)
+	if got != "42" {
+		t.Fatalf("expected '42', got %q", got)
+	}
+}
+
+func TestExtractMessageContentNil(t *testing.T) {
+	got := extractMessageContent(nil)
+	if got != "<nil>" {
+		t.Fatalf("expected '<nil>', got %q", got)
+	}
+}
+
+// ─── cloneMap ────────────────────────────────────────────────────────
+
+func TestCloneMapBasic(t *testing.T) {
+	original := map[string]any{"a": 1, "b": "hello"}
+	clone := cloneMap(original)
+	original["a"] = 999
+	if clone["a"] != 1 {
+		t.Fatalf("expected 1, got %v", clone["a"])
+	}
+	if clone["b"] != "hello" {
+		t.Fatalf("expected 'hello', got %v", clone["b"])
+	}
+}
+
+func TestCloneMapEmpty(t *testing.T) {
+	clone := cloneMap(map[string]any{})
+	if len(clone) != 0 {
+		t.Fatalf("expected empty, got %v", clone)
+	}
+}
+
+func TestCloneMapNested(t *testing.T) {
+	// cloneMap is shallow, so nested maps share references
+	inner := map[string]any{"key": "value"}
+	original := map[string]any{"nested": inner}
+	clone := cloneMap(original)
+	// Shallow clone means inner is shared
+	inner["key"] = "modified"
+	cloneNested := clone["nested"].(map[string]any)
+	if cloneNested["key"] != "modified" {
+		t.Fatal("expected shallow clone to share nested references")
+	}
+}
+
+// helper
+func containsStr(s, sub string) bool {
+	return len(s) >= len(sub) && (s == sub || len(s) > 0 && findSubstring(s, sub))
+}
+
+func findSubstring(s, sub string) bool {
+	for i := 0; i <= len(s)-len(sub); i++ {
+		if s[i:i+len(sub)] == sub {
+			return true
+		}
+	}
+	return false
+}
--- a/internal/adapter/claude/handler_utils.go
+++ b/internal/adapter/claude/handler_utils.go
@@ -0,0 +1,225 @@
+package claude
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+
+	"ds2api/internal/prompt"
+	"ds2api/internal/util"
+)
+
+func normalizeClaudeMessages(messages []any) []any {
+	out := make([]any, 0, len(messages))
+	state := &claudeToolCallState{
+		nameByID:       map[string]string{},
+		lastIDByName:   map[string]string{},
+		callIDSequence: 0,
+	}
+	for _, m := range messages {
+		msg, ok := m.(map[string]any)
+		if !ok {
+			continue
+		}
+		role := strings.ToLower(strings.TrimSpace(fmt.Sprintf("%v", msg["role"])))
+		switch content := msg["content"].(type) {
+		case []any:
+			textParts := make([]string, 0, len(content))
+			flushText := func() {
+				if len(textParts) == 0 {
+					return
+				}
+				out = append(out, map[string]any{
+					"role":    role,
+					"content": strings.Join(textParts, "\n"),
+				})
+				textParts = textParts[:0]
+			}
+			for _, block := range content {
+				b, ok := block.(map[string]any)
+				if !ok {
+					continue
+				}
+				typeStr := strings.ToLower(strings.TrimSpace(fmt.Sprintf("%v", b["type"])))
+				switch typeStr {
+				case "text":
+					if t, ok := b["text"].(string); ok {
+						textParts = append(textParts, t)
+					}
+				case "tool_use":
+					if role == "assistant" {
+						flushText()
+						if toolMsg := normalizeClaudeToolUseToAssistant(b, state); toolMsg != nil {
+							out = append(out, toolMsg)
+						}
+						continue
+					}
+					if raw := strings.TrimSpace(formatClaudeUnknownBlockForPrompt(b)); raw != "" {
+						textParts = append(textParts, raw)
+					}
+				case "tool_result":
+					flushText()
+					if toolMsg := normalizeClaudeToolResultToToolMessage(b, state); toolMsg != nil {
+						out = append(out, toolMsg)
+					}
+				default:
+					if raw := strings.TrimSpace(formatClaudeUnknownBlockForPrompt(b)); raw != "" {
+						textParts = append(textParts, raw)
+					}
+				}
+			}
+			flushText()
+		default:
+			copied := cloneMap(msg)
+			out = append(out, copied)
+		}
+	}
+	return out
+}
+
+func buildClaudeToolPrompt(tools []any) string {
+	toolSchemas := make([]string, 0, len(tools))
+	names := make([]string, 0, len(tools))
+	for _, t := range tools {
+		m, ok := t.(map[string]any)
+		if !ok {
+			continue
+		}
+		name, desc, schemaObj := extractClaudeToolMeta(m)
+		if name == "" {
+			continue
+		}
+		names = append(names, name)
+		schema, _ := json.Marshal(schemaObj)
+		toolSchemas = append(toolSchemas, fmt.Sprintf("Tool: %s\nDescription: %s\nParameters: %s", name, desc, schema))
+	}
+	if len(toolSchemas) == 0 {
+		return ""
+	}
+	return "You have access to these tools:\n\n" +
+		strings.Join(toolSchemas, "\n\n") + "\n\n" +
+		util.BuildToolCallInstructions(names)
+}
+
+func formatClaudeToolResultForPrompt(block map[string]any) string {
+	if block == nil {
+		return ""
+	}
+	payload := map[string]any{
+		"type":    "tool_result",
+		"content": block["content"],
+	}
+	if toolCallID := strings.TrimSpace(fmt.Sprintf("%v", block["tool_use_id"])); toolCallID != "" {
+		payload["tool_call_id"] = toolCallID
+	} else if toolCallID := strings.TrimSpace(fmt.Sprintf("%v", block["tool_call_id"])); toolCallID != "" {
+		payload["tool_call_id"] = toolCallID
+	}
+	if name := strings.TrimSpace(fmt.Sprintf("%v", block["name"])); name != "" {
+		payload["name"] = name
+	}
+	b, err := json.Marshal(payload)
+	if err != nil {
+		return strings.TrimSpace(fmt.Sprintf("%v", payload))
+	}
+	return string(b)
+}
+
+func normalizeClaudeToolUseToAssistant(block map[string]any, state *claudeToolCallState) map[string]any {
+	if block == nil {
+		return nil
+	}
+	name := strings.TrimSpace(fmt.Sprintf("%v", block["name"]))
+	if name == "" {
+		return nil
+	}
+	callID := safeStringValue(block["id"])
+	if callID == "" {
+		callID = safeStringValue(block["tool_use_id"])
+	}
+	if callID == "" {
+		callID = state.nextID()
+	}
+	state.nameByID[callID] = name
+	state.lastIDByName[strings.ToLower(name)] = callID
+	arguments := block["input"]
+	if arguments == nil {
+		arguments = map[string]any{}
+	}
+	argsJSON, err := json.Marshal(arguments)
+	if err != nil || len(argsJSON) == 0 {
+		argsJSON = []byte("{}")
+	}
+	toolCalls := []any{
+		map[string]any{
+			"id":   callID,
+			"type": "function",
+			"function": map[string]any{
+				"name":      name,
+				"arguments": string(argsJSON),
+			},
+		},
+	}
+	return map[string]any{
+		"role":       "assistant",
+		"content":    prompt.FormatToolCallsForPrompt(toolCalls),
+		"tool_calls": toolCalls,
+	}
+}
+
+func normalizeClaudeToolResultToToolMessage(block map[string]any, state *claudeToolCallState) map[string]any {
+	if block == nil {
+		return nil
+	}
+	name := safeStringValue(block["name"])
+	toolCallID := safeStringValue(block["tool_use_id"])
+	if toolCallID == "" {
+		toolCallID = safeStringValue(block["tool_call_id"])
+	}
+	if toolCallID == "" {
+		if name != "" {
+			toolCallID = strings.TrimSpace(state.lastIDByName[strings.ToLower(name)])
+		}
+	}
+	if toolCallID == "" {
+		toolCallID = state.nextID()
+	}
+	out := map[string]any{
+		"role":         "tool",
+		"tool_call_id": toolCallID,
+		"content":      normalizeClaudeToolResultContent(block["content"]),
+	}
+	if name != "" {
+		out["name"] = name
+		state.nameByID[toolCallID] = name
+		state.lastIDByName[strings.ToLower(name)] = toolCallID
+	} else if inferred := strings.TrimSpace(state.nameByID[toolCallID]); inferred != "" {
+		out["name"] = inferred
+	}
+	return out
+}
+
+func normalizeClaudeToolResultContent(content any) any {
+	if text, ok := content.(string); ok {
+		return text
+	}
+	payload := map[string]any{
+		"type":    "tool_result",
+		"content": content,
+	}
+	b, err := json.Marshal(sanitizeClaudeBlockForPrompt(payload))
+	if err != nil {
+		return strings.TrimSpace(fmt.Sprintf("%v", content))
+	}
+	return string(b)
+}
+
+func formatClaudeBlockRaw(block map[string]any) string {
+	if block == nil {
+		return ""
+	}
+	b, err := json.Marshal(block)
+	if err != nil {
+		return strings.TrimSpace(fmt.Sprintf("%v", block))
+	}
+	return string(b)
+}
--- a/internal/adapter/claude/handler_utils_sanitize.go
+++ b/internal/adapter/claude/handler_utils_sanitize.go
@@ -0,0 +1,105 @@
+package claude
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+)
+
+const (
+	maxClaudeRawPromptChars = 1024
+	omittedBinaryMarker     = "[omitted_binary_payload]"
+)
+
+func formatClaudeUnknownBlockForPrompt(block map[string]any) string {
+	if block == nil {
+		return ""
+	}
+	safe := sanitizeClaudeBlockForPrompt(block)
+	raw := strings.TrimSpace(formatClaudeBlockRaw(safe))
+	if raw == "" {
+		return ""
+	}
+	if len(raw) > maxClaudeRawPromptChars {
+		return raw[:maxClaudeRawPromptChars] + "...(truncated)"
+	}
+	return raw
+}
+
+func sanitizeClaudeBlockForPrompt(block map[string]any) map[string]any {
+	out := cloneMap(block)
+	for k, v := range out {
+		if looksLikeBinaryFieldName(k) {
+			out[k] = omittedBinaryMarker
+			continue
+		}
+		switch inner := v.(type) {
+		case map[string]any:
+			out[k] = sanitizeClaudeBlockForPrompt(inner)
+		case []any:
+			out[k] = sanitizeClaudeArrayForPrompt(inner)
+		case string:
+			out[k] = sanitizeClaudeStringForPrompt(k, inner)
+		}
+	}
+	return out
+}
+
+func sanitizeClaudeArrayForPrompt(items []any) []any {
+	out := make([]any, 0, len(items))
+	for _, item := range items {
+		switch v := item.(type) {
+		case map[string]any:
+			out = append(out, sanitizeClaudeBlockForPrompt(v))
+		case []any:
+			out = append(out, sanitizeClaudeArrayForPrompt(v))
+		default:
+			out = append(out, v)
+		}
+	}
+	return out
+}
+
+func sanitizeClaudeStringForPrompt(key, value string) string {
+	trimmed := strings.TrimSpace(value)
+	if trimmed == "" {
+		return ""
+	}
+	if looksLikeBinaryFieldName(key) || looksLikeBase64Payload(trimmed) {
+		return omittedBinaryMarker
+	}
+	if len(trimmed) > maxClaudeRawPromptChars {
+		return trimmed[:maxClaudeRawPromptChars] + "...(truncated)"
+	}
+	return trimmed
+}
+
+func looksLikeBinaryFieldName(name string) bool {
+	n := strings.ToLower(strings.TrimSpace(name))
+	return n == "data" || n == "bytes" || n == "base64" || n == "inline_data" || n == "inlinedata"
+}
+
+func looksLikeBase64Payload(v string) bool {
+	if len(v) < 512 {
+		return false
+	}
+	compact := strings.TrimRight(v, "=")
+	if compact == "" {
+		return false
+	}
+	for _, ch := range compact {
+		if (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '+' || ch == '/' || ch == '-' || ch == '_' {
+			continue
+		}
+		return false
+	}
+	return true
+}
+
+func marshalCompactJSON(v any) string {
+	b, err := json.Marshal(v)
+	if err != nil {
+		return strings.TrimSpace(fmt.Sprintf("%v", v))
+	}
+	return string(b)
+}
--- a/internal/adapter/claude/output_clean.go
+++ b/internal/adapter/claude/output_clean.go
@@ -0,0 +1,13 @@
+package claude
+
+import textclean "ds2api/internal/textclean"
+
+func cleanVisibleOutput(text string, stripReferenceMarkers bool) string {
+	if text == "" {
+		return text
+	}
+	if stripReferenceMarkers {
+		text = textclean.StripReferenceMarkers(text)
+	}
+	return text
+}
--- a/internal/adapter/claude/proxy_vercel_test.go
+++ b/internal/adapter/claude/proxy_vercel_test.go
@@ -0,0 +1,86 @@
+package claude
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+)
+
+type claudeProxyStoreStub struct {
+	mapping map[string]string
+}
+
+func (s claudeProxyStoreStub) ClaudeMapping() map[string]string {
+	return s.mapping
+}
+
+func (claudeProxyStoreStub) CompatStripReferenceMarkers() bool { return true }
+
+type openAIProxyStub struct {
+	status int
+	body   string
+}
+
+func (s openAIProxyStub) ChatCompletions(w http.ResponseWriter, _ *http.Request) {
+	if s.status == 0 {
+		s.status = http.StatusOK
+	}
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(s.status)
+	_, _ = w.Write([]byte(s.body))
+}
+
+type openAIProxyCaptureStub struct {
+	seenModel string
+}
+
+func (s *openAIProxyCaptureStub) ChatCompletions(w http.ResponseWriter, r *http.Request) {
+	var req map[string]any
+	_ = json.NewDecoder(r.Body).Decode(&req)
+	if m, ok := req["model"].(string); ok {
+		s.seenModel = m
+	}
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(http.StatusOK)
+	_, _ = w.Write([]byte(`{"id":"ok","choices":[{"message":{"role":"assistant","content":"ok"}}]}`))
+}
+
+func TestClaudeProxyViaOpenAIVercelPreparePassthrough(t *testing.T) {
+	h := &Handler{OpenAI: openAIProxyStub{status: 200, body: `{"lease_id":"lease_123","payload":{"a":1}}`}}
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages?__stream_prepare=1", strings.NewReader(`{"model":"claude-sonnet-4-5","messages":[{"role":"user","content":"hi"}],"stream":true}`))
+	rec := httptest.NewRecorder()
+
+	h.Messages(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
+	}
+	var out map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
+		t.Fatalf("expected json response, got err=%v body=%s", err, rec.Body.String())
+	}
+	if _, ok := out["lease_id"]; !ok {
+		t.Fatalf("expected lease_id in prepare passthrough, got=%v", out)
+	}
+}
+
+func TestClaudeProxyViaOpenAIPreservesClaudeMapping(t *testing.T) {
+	openAI := &openAIProxyCaptureStub{}
+	h := &Handler{
+		Store:  claudeProxyStoreStub{mapping: map[string]string{"fast": "deepseek-chat", "slow": "deepseek-reasoner"}},
+		OpenAI: openAI,
+	}
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", strings.NewReader(`{"model":"claude-3-opus","messages":[{"role":"user","content":"hi"}],"stream":false}`))
+	rec := httptest.NewRecorder()
+
+	h.Messages(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
+	}
+	if got := strings.TrimSpace(openAI.seenModel); got != "deepseek-reasoner" {
+		t.Fatalf("expected mapped proxy model deepseek-reasoner, got %q", got)
+	}
+}
--- a/internal/adapter/claude/route_alias_test.go
+++ b/internal/adapter/claude/route_alias_test.go
@@ -0,0 +1,44 @@
+package claude
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/go-chi/chi/v5"
+
+	"ds2api/internal/auth"
+)
+
+type routeAliasAuthStub struct{}
+
+func (routeAliasAuthStub) Determine(_ *http.Request) (*auth.RequestAuth, error) {
+	return nil, auth.ErrUnauthorized
+}
+
+func (routeAliasAuthStub) Release(_ *auth.RequestAuth) {}
+
+func TestClaudeRouteAliasesDoNot404(t *testing.T) {
+	h := &Handler{
+		Auth: routeAliasAuthStub{},
+	}
+	r := chi.NewRouter()
+	RegisterRoutes(r, h)
+
+	paths := []string{
+		"/anthropic/v1/messages",
+		"/v1/messages",
+		"/messages",
+		"/anthropic/v1/messages/count_tokens",
+		"/v1/messages/count_tokens",
+		"/messages/count_tokens",
+	}
+	for _, path := range paths {
+		req := httptest.NewRequest(http.MethodPost, path, nil)
+		rec := httptest.NewRecorder()
+		r.ServeHTTP(rec, req)
+		if rec.Code == http.StatusNotFound {
+			t.Fatalf("expected route %s to be registered, got 404", path)
+		}
+	}
+}
--- a/internal/adapter/claude/standard_request.go
+++ b/internal/adapter/claude/standard_request.go
@@ -0,0 +1,116 @@
+package claude
+
+import (
+	"fmt"
+	"strings"
+
+	"ds2api/internal/config"
+	"ds2api/internal/deepseek"
+	"ds2api/internal/util"
+)
+
+type claudeNormalizedRequest struct {
+	Standard           util.StandardRequest
+	NormalizedMessages []any
+}
+
+func normalizeClaudeRequest(store ConfigReader, req map[string]any) (claudeNormalizedRequest, error) {
+	model, _ := req["model"].(string)
+	messagesRaw, _ := req["messages"].([]any)
+	if strings.TrimSpace(model) == "" || len(messagesRaw) == 0 {
+		return claudeNormalizedRequest{}, fmt.Errorf("Request must include 'model' and 'messages'.")
+	}
+	if _, ok := req["max_tokens"]; !ok {
+		req["max_tokens"] = 8192
+	}
+	normalizedMessages := normalizeClaudeMessages(messagesRaw)
+	payload := cloneMap(req)
+	payload["messages"] = normalizedMessages
+	toolsRequested, _ := req["tools"].([]any)
+	payload["messages"] = injectClaudeToolPrompt(payload, normalizedMessages, toolsRequested)
+
+	dsPayload := convertClaudeToDeepSeek(payload, store)
+	dsModel, _ := dsPayload["model"].(string)
+	thinkingEnabled, searchEnabled, ok := config.GetModelConfig(dsModel)
+	if !ok {
+		thinkingEnabled = false
+		searchEnabled = false
+	}
+	finalPrompt := deepseek.MessagesPrepare(toMessageMaps(dsPayload["messages"]))
+	toolNames := extractClaudeToolNames(toolsRequested)
+	if len(toolNames) == 0 && len(toolsRequested) > 0 {
+		toolNames = []string{"__any_tool__"}
+	}
+
+	return claudeNormalizedRequest{
+		Standard: util.StandardRequest{
+			Surface:        "anthropic_messages",
+			RequestedModel: strings.TrimSpace(model),
+			ResolvedModel:  dsModel,
+			ResponseModel:  strings.TrimSpace(model),
+			Messages:       payload["messages"].([]any),
+			FinalPrompt:    finalPrompt,
+			ToolNames:      toolNames,
+			Stream:         util.ToBool(req["stream"]),
+			Thinking:       thinkingEnabled,
+			Search:         searchEnabled,
+		},
+		NormalizedMessages: normalizedMessages,
+	}, nil
+}
+
+func injectClaudeToolPrompt(payload map[string]any, normalizedMessages []any, tools []any) []any {
+	if len(tools) == 0 {
+		return normalizedMessages
+	}
+	toolPrompt := strings.TrimSpace(buildClaudeToolPrompt(tools))
+	if toolPrompt == "" {
+		return normalizedMessages
+	}
+
+	// Prefer top-level Anthropic-style system prompt when available.
+	if systemText, ok := payload["system"].(string); ok && strings.TrimSpace(systemText) != "" {
+		payload["system"] = mergeSystemPrompt(systemText, toolPrompt)
+		return normalizedMessages
+	}
+
+	messages := cloneAnySlice(normalizedMessages)
+	for i := range messages {
+		msg, ok := messages[i].(map[string]any)
+		if !ok {
+			continue
+		}
+		role, _ := msg["role"].(string)
+		if !strings.EqualFold(strings.TrimSpace(role), "system") {
+			continue
+		}
+		copied := cloneMap(msg)
+		copied["content"] = mergeSystemPrompt(strings.TrimSpace(fmt.Sprintf("%v", copied["content"])), toolPrompt)
+		messages[i] = copied
+		return messages
+	}
+
+	return append([]any{map[string]any{"role": "system", "content": toolPrompt}}, messages...)
+}
+
+func mergeSystemPrompt(base, extra string) string {
+	base = strings.TrimSpace(base)
+	extra = strings.TrimSpace(extra)
+	switch {
+	case base == "":
+		return extra
+	case extra == "":
+		return base
+	default:
+		return base + "\n\n" + extra
+	}
+}
+
+func cloneAnySlice(in []any) []any {
+	if len(in) == 0 {
+		return nil
+	}
+	out := make([]any, len(in))
+	copy(out, in)
+	return out
+}
--- a/internal/adapter/claude/standard_request_test.go
+++ b/internal/adapter/claude/standard_request_test.go
@@ -0,0 +1,92 @@
+package claude
+
+import (
+	"testing"
+
+	"ds2api/internal/config"
+)
+
+func TestNormalizeClaudeRequest(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{}`)
+	store := config.LoadStore()
+	req := map[string]any{
+		"model": "claude-opus-4-6",
+		"messages": []any{
+			map[string]any{"role": "user", "content": "hello"},
+		},
+		"stream": true,
+		"tools": []any{
+			map[string]any{"name": "search", "description": "Search"},
+		},
+	}
+	norm, err := normalizeClaudeRequest(store, req)
+	if err != nil {
+		t.Fatalf("normalize failed: %v", err)
+	}
+	if norm.Standard.ResolvedModel == "" {
+		t.Fatalf("expected resolved model")
+	}
+	if !norm.Standard.Stream {
+		t.Fatalf("expected stream=true")
+	}
+	if len(norm.Standard.ToolNames) == 0 {
+		t.Fatalf("expected tool names")
+	}
+	if norm.Standard.FinalPrompt == "" {
+		t.Fatalf("expected non-empty final prompt")
+	}
+}
+
+func TestNormalizeClaudeRequestInjectsToolsIntoExistingSystemMessage(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{}`)
+	store := config.LoadStore()
+	req := map[string]any{
+		"model": "claude-sonnet-4-5",
+		"messages": []any{
+			map[string]any{"role": "system", "content": "baseline rule"},
+			map[string]any{"role": "user", "content": "hello"},
+		},
+		"tools": []any{
+			map[string]any{"name": "search", "description": "Search"},
+		},
+	}
+
+	norm, err := normalizeClaudeRequest(store, req)
+	if err != nil {
+		t.Fatalf("normalize failed: %v", err)
+	}
+
+	if !containsStr(norm.Standard.FinalPrompt, "You have access to these tools") {
+		t.Fatalf("expected tool prompt injected into final prompt, got=%q", norm.Standard.FinalPrompt)
+	}
+	if !containsStr(norm.Standard.FinalPrompt, "baseline rule") {
+		t.Fatalf("expected existing system message preserved, got=%q", norm.Standard.FinalPrompt)
+	}
+}
+
+func TestNormalizeClaudeRequestInjectsToolsIntoTopLevelSystem(t *testing.T) {
+	t.Setenv("DS2API_CONFIG_JSON", `{}`)
+	store := config.LoadStore()
+	req := map[string]any{
+		"model":  "claude-sonnet-4-5",
+		"system": "top-level system",
+		"messages": []any{
+			map[string]any{"role": "user", "content": "hello"},
+		},
+		"tools": []any{
+			map[string]any{"name": "search", "description": "Search"},
+		},
+	}
+
+	norm, err := normalizeClaudeRequest(store, req)
+	if err != nil {
+		t.Fatalf("normalize failed: %v", err)
+	}
+
+	if !containsStr(norm.Standard.FinalPrompt, "top-level system") {
+		t.Fatalf("expected top-level system preserved, got=%q", norm.Standard.FinalPrompt)
+	}
+	if !containsStr(norm.Standard.FinalPrompt, "You have access to these tools") {
+		t.Fatalf("expected tool prompt injected, got=%q", norm.Standard.FinalPrompt)
+	}
+}
--- a/internal/adapter/claude/stream_runtime_core.go
+++ b/internal/adapter/claude/stream_runtime_core.go
@@ -0,0 +1,161 @@
+package claude
+
+import (
+	"fmt"
+	"net/http"
+	"strings"
+	"time"
+
+	"ds2api/internal/sse"
+	streamengine "ds2api/internal/stream"
+)
+
+type claudeStreamRuntime struct {
+	w        http.ResponseWriter
+	rc       *http.ResponseController
+	canFlush bool
+
+	model     string
+	toolNames []string
+	messages  []any
+
+	thinkingEnabled       bool
+	searchEnabled         bool
+	bufferToolContent     bool
+	stripReferenceMarkers bool
+
+	messageID    string
+	thinking     strings.Builder
+	text         strings.Builder
+	outputTokens int
+
+	nextBlockIndex     int
+	thinkingBlockOpen  bool
+	thinkingBlockIndex int
+	textBlockOpen      bool
+	textBlockIndex     int
+	ended              bool
+	upstreamErr        string
+}
+
+func newClaudeStreamRuntime(
+	w http.ResponseWriter,
+	rc *http.ResponseController,
+	canFlush bool,
+	model string,
+	messages []any,
+	thinkingEnabled bool,
+	searchEnabled bool,
+	stripReferenceMarkers bool,
+	toolNames []string,
+) *claudeStreamRuntime {
+	return &claudeStreamRuntime{
+		w:                     w,
+		rc:                    rc,
+		canFlush:              canFlush,
+		model:                 model,
+		messages:              messages,
+		thinkingEnabled:       thinkingEnabled,
+		searchEnabled:         searchEnabled,
+		bufferToolContent:     len(toolNames) > 0,
+		stripReferenceMarkers: stripReferenceMarkers,
+		toolNames:             toolNames,
+		messageID:             fmt.Sprintf("msg_%d", time.Now().UnixNano()),
+		thinkingBlockIndex:    -1,
+		textBlockIndex:        -1,
+	}
+}
+
+func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedDecision {
+	if !parsed.Parsed {
+		return streamengine.ParsedDecision{}
+	}
+	if parsed.OutputTokens > 0 {
+		s.outputTokens = parsed.OutputTokens
+	}
+	if parsed.ErrorMessage != "" {
+		s.upstreamErr = parsed.ErrorMessage
+		return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("upstream_error")}
+	}
+	if parsed.Stop {
+		return streamengine.ParsedDecision{Stop: true}
+	}
+
+	contentSeen := false
+	for _, p := range parsed.Parts {
+		cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
+		if cleanedText == "" {
+			continue
+		}
+		if p.Type != "thinking" && s.searchEnabled && sse.IsCitation(cleanedText) {
+			continue
+		}
+		contentSeen = true
+
+		if p.Type == "thinking" {
+			if !s.thinkingEnabled {
+				continue
+			}
+			s.thinking.WriteString(cleanedText)
+			s.closeTextBlock()
+			if !s.thinkingBlockOpen {
+				s.thinkingBlockIndex = s.nextBlockIndex
+				s.nextBlockIndex++
+				s.send("content_block_start", map[string]any{
+					"type":  "content_block_start",
+					"index": s.thinkingBlockIndex,
+					"content_block": map[string]any{
+						"type":     "thinking",
+						"thinking": "",
+					},
+				})
+				s.thinkingBlockOpen = true
+			}
+			s.send("content_block_delta", map[string]any{
+				"type":  "content_block_delta",
+				"index": s.thinkingBlockIndex,
+				"delta": map[string]any{
+					"type":     "thinking_delta",
+					"thinking": cleanedText,
+				},
+			})
+			continue
+		}
+
+		s.text.WriteString(cleanedText)
+		if s.bufferToolContent {
+			if hasUnclosedCodeFence(s.text.String()) {
+				continue
+			}
+			continue
+		}
+		s.closeThinkingBlock()
+		if !s.textBlockOpen {
+			s.textBlockIndex = s.nextBlockIndex
+			s.nextBlockIndex++
+			s.send("content_block_start", map[string]any{
+				"type":  "content_block_start",
+				"index": s.textBlockIndex,
+				"content_block": map[string]any{
+					"type": "text",
+					"text": "",
+				},
+			})
+			s.textBlockOpen = true
+		}
+		s.send("content_block_delta", map[string]any{
+			"type":  "content_block_delta",
+			"index": s.textBlockIndex,
+			"delta": map[string]any{
+				"type": "text_delta",
+				"text": cleanedText,
+			},
+		})
+	}
+
+	return streamengine.ParsedDecision{ContentSeen: contentSeen}
+}
+
+func hasUnclosedCodeFence(text string) bool {
+	return strings.Count(text, "```")%2 == 1
+}
--- a/internal/adapter/claude/stream_runtime_emit.go
+++ b/internal/adapter/claude/stream_runtime_emit.go
@@ -0,0 +1,59 @@
+package claude
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+
+	"ds2api/internal/util"
+)
+
+func (s *claudeStreamRuntime) send(event string, v any) {
+	b, _ := json.Marshal(v)
+	_, _ = s.w.Write([]byte("event: "))
+	_, _ = s.w.Write([]byte(event))
+	_, _ = s.w.Write([]byte("\n"))
+	_, _ = s.w.Write([]byte("data: "))
+	_, _ = s.w.Write(b)
+	_, _ = s.w.Write([]byte("\n\n"))
+	if s.canFlush {
+		_ = s.rc.Flush()
+	}
+}
+
+func (s *claudeStreamRuntime) sendError(message string) {
+	msg := strings.TrimSpace(message)
+	if msg == "" {
+		msg = "upstream stream error"
+	}
+	s.send("error", map[string]any{
+		"type": "error",
+		"error": map[string]any{
+			"type":    "api_error",
+			"message": msg,
+			"code":    "internal_error",
+			"param":   nil,
+		},
+	})
+}
+
+func (s *claudeStreamRuntime) sendPing() {
+	s.send("ping", map[string]any{"type": "ping"})
+}
+
+func (s *claudeStreamRuntime) sendMessageStart() {
+	inputTokens := util.EstimateTokens(fmt.Sprintf("%v", s.messages))
+	s.send("message_start", map[string]any{
+		"type": "message_start",
+		"message": map[string]any{
+			"id":            s.messageID,
+			"type":          "message",
+			"role":          "assistant",
+			"model":         s.model,
+			"content":       []any{},
+			"stop_reason":   nil,
+			"stop_sequence": nil,
+			"usage":         map[string]any{"input_tokens": inputTokens, "output_tokens": 0},
+		},
+	})
+}
--- a/internal/adapter/claude/stream_runtime_finalize.go
+++ b/internal/adapter/claude/stream_runtime_finalize.go
@@ -0,0 +1,137 @@
+package claude
+
+import (
+	"encoding/json"
+	"fmt"
+	"time"
+
+	streamengine "ds2api/internal/stream"
+	"ds2api/internal/util"
+)
+
+func (s *claudeStreamRuntime) closeThinkingBlock() {
+	if !s.thinkingBlockOpen {
+		return
+	}
+	s.send("content_block_stop", map[string]any{
+		"type":  "content_block_stop",
+		"index": s.thinkingBlockIndex,
+	})
+	s.thinkingBlockOpen = false
+	s.thinkingBlockIndex = -1
+}
+
+func (s *claudeStreamRuntime) closeTextBlock() {
+	if !s.textBlockOpen {
+		return
+	}
+	s.send("content_block_stop", map[string]any{
+		"type":  "content_block_stop",
+		"index": s.textBlockIndex,
+	})
+	s.textBlockOpen = false
+	s.textBlockIndex = -1
+}
+
+func (s *claudeStreamRuntime) finalize(stopReason string) {
+	if s.ended {
+		return
+	}
+	s.ended = true
+
+	s.closeThinkingBlock()
+	s.closeTextBlock()
+
+	finalThinking := s.thinking.String()
+	finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
+
+	if s.bufferToolContent {
+		detected := util.ParseStandaloneToolCalls(finalText, s.toolNames)
+		if len(detected) == 0 && finalText == "" && finalThinking != "" {
+			detected = util.ParseStandaloneToolCalls(finalThinking, s.toolNames)
+		}
+		if len(detected) > 0 {
+			stopReason = "tool_use"
+			for i, tc := range detected {
+				idx := s.nextBlockIndex + i
+				s.send("content_block_start", map[string]any{
+					"type":  "content_block_start",
+					"index": idx,
+					"content_block": map[string]any{
+						"type":  "tool_use",
+						"id":    fmt.Sprintf("toolu_%d_%d", time.Now().Unix(), idx),
+						"name":  tc.Name,
+						"input": map[string]any{},
+					},
+				})
+
+				inputBytes, _ := json.Marshal(tc.Input)
+				s.send("content_block_delta", map[string]any{
+					"type":  "content_block_delta",
+					"index": idx,
+					"delta": map[string]any{
+						"type":         "input_json_delta",
+						"partial_json": string(inputBytes),
+					},
+				})
+
+				s.send("content_block_stop", map[string]any{
+					"type":  "content_block_stop",
+					"index": idx,
+				})
+			}
+			s.nextBlockIndex += len(detected)
+		} else if finalText != "" {
+			idx := s.nextBlockIndex
+			s.nextBlockIndex++
+			s.send("content_block_start", map[string]any{
+				"type":  "content_block_start",
+				"index": idx,
+				"content_block": map[string]any{
+					"type": "text",
+					"text": "",
+				},
+			})
+			s.send("content_block_delta", map[string]any{
+				"type":  "content_block_delta",
+				"index": idx,
+				"delta": map[string]any{
+					"type": "text_delta",
+					"text": finalText,
+				},
+			})
+			s.send("content_block_stop", map[string]any{
+				"type":  "content_block_stop",
+				"index": idx,
+			})
+		}
+	}
+
+	outputTokens := util.EstimateTokens(finalThinking) + util.EstimateTokens(finalText)
+	if s.outputTokens > 0 {
+		outputTokens = s.outputTokens
+	}
+	s.send("message_delta", map[string]any{
+		"type": "message_delta",
+		"delta": map[string]any{
+			"stop_reason":   stopReason,
+			"stop_sequence": nil,
+		},
+		"usage": map[string]any{
+			"output_tokens": outputTokens,
+		},
+	})
+	s.send("message_stop", map[string]any{"type": "message_stop"})
+}
+
+func (s *claudeStreamRuntime) onFinalize(reason streamengine.StopReason, scannerErr error) {
+	if string(reason) == "upstream_error" {
+		s.sendError(s.upstreamErr)
+		return
+	}
+	if scannerErr != nil {
+		s.sendError(scannerErr.Error())
+		return
+	}
+	s.finalize("end_turn")
+}
--- a/internal/adapter/claude/stream_status_test.go
+++ b/internal/adapter/claude/stream_status_test.go
@@ -0,0 +1,68 @@
+package claude
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"github.com/go-chi/chi/v5"
+	chimw "github.com/go-chi/chi/v5/middleware"
+)
+
+type streamStatusClaudeOpenAIStub struct{}
+
+func (streamStatusClaudeOpenAIStub) ChatCompletions(w http.ResponseWriter, _ *http.Request) {
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.WriteHeader(http.StatusOK)
+	_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-1\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"hello\"},\"finish_reason\":null}]}\n\n"))
+	_, _ = w.Write([]byte("data: [DONE]\n\n"))
+}
+
+type streamStatusClaudeStoreStub struct{}
+
+func (streamStatusClaudeStoreStub) ClaudeMapping() map[string]string {
+	return map[string]string{
+		"fast": "deepseek-chat",
+		"slow": "deepseek-reasoner",
+	}
+}
+
+func (streamStatusClaudeStoreStub) CompatStripReferenceMarkers() bool { return true }
+
+func captureClaudeStatusMiddleware(statuses *[]int) func(http.Handler) http.Handler {
+	return func(next http.Handler) http.Handler {
+		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			ww := chimw.NewWrapResponseWriter(w, r.ProtoMajor)
+			next.ServeHTTP(ww, r)
+			*statuses = append(*statuses, ww.Status())
+		})
+	}
+}
+
+func TestClaudeMessagesStreamStatusCapturedAs200(t *testing.T) {
+	statuses := make([]int, 0, 1)
+	h := &Handler{
+		Store:  streamStatusClaudeStoreStub{},
+		OpenAI: streamStatusClaudeOpenAIStub{},
+	}
+	r := chi.NewRouter()
+	r.Use(captureClaudeStatusMiddleware(&statuses))
+	RegisterRoutes(r, h)
+
+	reqBody := `{"model":"claude-sonnet-4-5","messages":[{"role":"user","content":"hi"}],"stream":true}`
+	req := httptest.NewRequest(http.MethodPost, "/anthropic/v1/messages", strings.NewReader(reqBody))
+	req.Header.Set("Content-Type", "application/json")
+	rec := httptest.NewRecorder()
+	r.ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	if len(statuses) != 1 {
+		t.Fatalf("expected one captured status, got %d", len(statuses))
+	}
+	if statuses[0] != http.StatusOK {
+		t.Fatalf("expected captured status 200 (not 000), got %d", statuses[0])
+	}
+}
--- a/internal/adapter/claude/tool_call_state.go
+++ b/internal/adapter/claude/tool_call_state.go
@@ -0,0 +1,25 @@
+package claude
+
+import (
+	"fmt"
+	"strings"
+)
+
+type claudeToolCallState struct {
+	nameByID       map[string]string
+	lastIDByName   map[string]string
+	callIDSequence int
+}
+
+func (s *claudeToolCallState) nextID() string {
+	s.callIDSequence++
+	return fmt.Sprintf("call_claude_%d", s.callIDSequence)
+}
+
+func safeStringValue(v any) string {
+	s, ok := v.(string)
+	if !ok {
+		return ""
+	}
+	return strings.TrimSpace(s)
+}
--- a/internal/adapter/gemini/convert_messages.go
+++ b/internal/adapter/gemini/convert_messages.go
@@ -0,0 +1,259 @@
+package gemini
+
+import (
+	"fmt"
+	"strings"
+)
+
+const maxGeminiRawPromptChars = 1024
+
+func geminiMessagesFromRequest(req map[string]any) []any {
+	out := make([]any, 0, 8)
+	toolCallCounter := 0
+	nextToolCallID := func() string {
+		toolCallCounter++
+		return fmt.Sprintf("call_gemini_%d", toolCallCounter)
+	}
+	lastToolCallIDByName := map[string]string{}
+	if sys := normalizeGeminiSystemInstruction(req["systemInstruction"]); strings.TrimSpace(sys) != "" {
+		out = append(out, map[string]any{
+			"role":    "system",
+			"content": sys,
+		})
+	}
+
+	contents, _ := req["contents"].([]any)
+	for _, item := range contents {
+		content, ok := item.(map[string]any)
+		if !ok {
+			continue
+		}
+		role := mapGeminiRole(content["role"])
+		if role == "" {
+			role = "user"
+		}
+		parts, _ := content["parts"].([]any)
+		if len(parts) == 0 {
+			if text := strings.TrimSpace(asString(content["text"])); text != "" {
+				out = append(out, map[string]any{
+					"role":    role,
+					"content": text,
+				})
+			}
+			continue
+		}
+
+		textParts := make([]string, 0, len(parts))
+		flushText := func() {
+			if len(textParts) == 0 {
+				return
+			}
+			out = append(out, map[string]any{
+				"role":    role,
+				"content": strings.Join(textParts, "\n"),
+			})
+			textParts = textParts[:0]
+		}
+
+		for _, rawPart := range parts {
+			part, ok := rawPart.(map[string]any)
+			if !ok {
+				continue
+			}
+			if text := strings.TrimSpace(asString(part["text"])); text != "" {
+				textParts = append(textParts, text)
+				continue
+			}
+
+			if fnCall, ok := part["functionCall"].(map[string]any); ok {
+				flushText()
+				if name := strings.TrimSpace(asString(fnCall["name"])); name != "" {
+					callID := strings.TrimSpace(asString(fnCall["id"]))
+					if callID == "" {
+						if callID = strings.TrimSpace(asString(fnCall["call_id"])); callID == "" {
+							callID = nextToolCallID()
+						}
+					}
+					lastToolCallIDByName[strings.ToLower(name)] = callID
+					out = append(out, map[string]any{
+						"role": "assistant",
+						"tool_calls": []any{
+							map[string]any{
+								"id":   callID,
+								"type": "function",
+								"function": map[string]any{
+									"name":      name,
+									"arguments": stringifyJSON(fnCall["args"]),
+								},
+							},
+						},
+					})
+				}
+				continue
+			}
+
+			if fnResp, ok := part["functionResponse"].(map[string]any); ok {
+				flushText()
+				name := strings.TrimSpace(asString(fnResp["name"]))
+				callID := strings.TrimSpace(asString(fnResp["id"]))
+				if callID == "" {
+					callID = strings.TrimSpace(asString(fnResp["callId"]))
+				}
+				if callID == "" {
+					callID = strings.TrimSpace(asString(fnResp["tool_call_id"]))
+				}
+				if callID == "" {
+					callID = strings.TrimSpace(lastToolCallIDByName[strings.ToLower(name)])
+				}
+				if callID == "" {
+					callID = nextToolCallID()
+				}
+				content := fnResp["response"]
+				if content == nil {
+					content = fnResp["output"]
+				}
+				if content == nil {
+					content = ""
+				}
+				msg := map[string]any{
+					"role":         "tool",
+					"tool_call_id": callID,
+					"content":      content,
+				}
+				if name != "" {
+					msg["name"] = name
+				}
+				out = append(out, msg)
+				continue
+			}
+
+			if raw := strings.TrimSpace(formatGeminiUnknownPartForPrompt(part)); raw != "" && raw != "null" {
+				textParts = append(textParts, raw)
+			}
+		}
+		flushText()
+	}
+	return out
+}
+
+func normalizeGeminiSystemInstruction(raw any) string {
+	switch v := raw.(type) {
+	case string:
+		return strings.TrimSpace(v)
+	case map[string]any:
+		if parts, ok := v["parts"].([]any); ok {
+			texts := make([]string, 0, len(parts))
+			for _, item := range parts {
+				part, ok := item.(map[string]any)
+				if !ok {
+					continue
+				}
+				if text := strings.TrimSpace(asString(part["text"])); text != "" {
+					texts = append(texts, text)
+				}
+			}
+			return strings.Join(texts, "\n")
+		}
+		if text := strings.TrimSpace(asString(v["text"])); text != "" {
+			return text
+		}
+	}
+	return ""
+}
+
+func mapGeminiRole(v any) string {
+	switch strings.ToLower(strings.TrimSpace(asString(v))) {
+	case "user":
+		return "user"
+	case "model", "assistant":
+		return "assistant"
+	case "system":
+		return "system"
+	default:
+		return ""
+	}
+}
+
+func formatGeminiUnknownPartForPrompt(part map[string]any) string {
+	safe := sanitizeGeminiPartForPrompt(part)
+	raw := strings.TrimSpace(stringifyJSON(safe))
+	if raw == "" {
+		return ""
+	}
+	if len(raw) > maxGeminiRawPromptChars {
+		return raw[:maxGeminiRawPromptChars] + "...(truncated)"
+	}
+	return raw
+}
+
+func sanitizeGeminiPartForPrompt(part map[string]any) map[string]any {
+	out := make(map[string]any, len(part))
+	for k, v := range part {
+		if looksLikeGeminiBinaryField(k) {
+			out[k] = "[omitted_binary_payload]"
+			continue
+		}
+		switch x := v.(type) {
+		case map[string]any:
+			out[k] = sanitizeGeminiPartForPrompt(x)
+		case []any:
+			out[k] = sanitizeGeminiArrayForPrompt(x)
+		case string:
+			out[k] = sanitizeGeminiStringForPrompt(k, x)
+		default:
+			out[k] = v
+		}
+	}
+	return out
+}
+
+func sanitizeGeminiArrayForPrompt(items []any) []any {
+	out := make([]any, 0, len(items))
+	for _, item := range items {
+		switch x := item.(type) {
+		case map[string]any:
+			out = append(out, sanitizeGeminiPartForPrompt(x))
+		case []any:
+			out = append(out, sanitizeGeminiArrayForPrompt(x))
+		default:
+			out = append(out, x)
+		}
+	}
+	return out
+}
+
+func sanitizeGeminiStringForPrompt(key, value string) string {
+	trimmed := strings.TrimSpace(value)
+	if trimmed == "" {
+		return ""
+	}
+	if looksLikeGeminiBinaryField(key) || looksLikeGeminiBase64(trimmed) {
+		return "[omitted_binary_payload]"
+	}
+	if len(trimmed) > maxGeminiRawPromptChars {
+		return trimmed[:maxGeminiRawPromptChars] + "...(truncated)"
+	}
+	return trimmed
+}
+
+func looksLikeGeminiBinaryField(name string) bool {
+	n := strings.ToLower(strings.TrimSpace(name))
+	return n == "data" || n == "bytes" || n == "inlinedata" || n == "inline_data" || n == "base64"
+}
+
+func looksLikeGeminiBase64(v string) bool {
+	if len(v) < 512 {
+		return false
+	}
+	compact := strings.TrimRight(v, "=")
+	if compact == "" {
+		return false
+	}
+	for _, ch := range compact {
+		if (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '+' || ch == '/' || ch == '-' || ch == '_' {
+			continue
+		}
+		return false
+	}
+	return true
+}
--- a/internal/adapter/gemini/convert_messages_test.go
+++ b/internal/adapter/gemini/convert_messages_test.go
@@ -0,0 +1,129 @@
+package gemini
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestGeminiMessagesFromRequestPreservesFunctionRoundtrip(t *testing.T) {
+	req := map[string]any{
+		"contents": []any{
+			map[string]any{
+				"role": "model",
+				"parts": []any{
+					map[string]any{
+						"functionCall": map[string]any{
+							"id":   "call_g1",
+							"name": "search_web",
+							"args": map[string]any{"query": "ai"},
+						},
+					},
+				},
+			},
+			map[string]any{
+				"role": "user",
+				"parts": []any{
+					map[string]any{
+						"functionResponse": map[string]any{
+							"id":       "call_g1",
+							"name":     "search_web",
+							"response": "ok",
+						},
+					},
+				},
+			},
+		},
+	}
+
+	got := geminiMessagesFromRequest(req)
+	if len(got) != 2 {
+		t.Fatalf("expected two normalized messages, got %#v", got)
+	}
+	assistant, _ := got[0].(map[string]any)
+	if assistant["role"] != "assistant" {
+		t.Fatalf("expected assistant first, got %#v", assistant)
+	}
+	tc, _ := assistant["tool_calls"].([]any)
+	if len(tc) != 1 {
+		t.Fatalf("expected one tool call, got %#v", assistant["tool_calls"])
+	}
+	toolMsg, _ := got[1].(map[string]any)
+	if toolMsg["role"] != "tool" || toolMsg["tool_call_id"] != "call_g1" {
+		t.Fatalf("expected tool message with call id, got %#v", toolMsg)
+	}
+}
+
+func TestGeminiMessagesFromRequestPreservesUnknownPartAsRawJSONText(t *testing.T) {
+	req := map[string]any{
+		"contents": []any{
+			map[string]any{
+				"role": "user",
+				"parts": []any{
+					map[string]any{"text": "hello"},
+					map[string]any{"inlineData": map[string]any{"mimeType": "image/png", "data": strings.Repeat("A", 2048)}},
+				},
+			},
+		},
+	}
+
+	got := geminiMessagesFromRequest(req)
+	if len(got) != 1 {
+		t.Fatalf("expected one normalized message, got %#v", got)
+	}
+	msg, _ := got[0].(map[string]any)
+	content, _ := msg["content"].(string)
+	if !strings.Contains(content, "hello") || !strings.Contains(content, "inlineData") {
+		t.Fatalf("expected unknown part preserved as raw json text, got %q", content)
+	}
+	if !strings.Contains(content, "[omitted_binary_payload]") {
+		t.Fatalf("expected inlineData payload to be redacted, got %q", content)
+	}
+	if strings.Contains(content, strings.Repeat("A", 100)) {
+		t.Fatalf("expected raw base64 payload not to be embedded, got %q", content)
+	}
+}
+
+func TestGeminiMessagesFromRequestBackfillsFunctionResponseCallIDByName(t *testing.T) {
+	req := map[string]any{
+		"contents": []any{
+			map[string]any{
+				"role": "model",
+				"parts": []any{
+					map[string]any{
+						"functionCall": map[string]any{
+							"name": "search_web",
+							"args": map[string]any{"query": "docs"},
+						},
+					},
+				},
+			},
+			map[string]any{
+				"role": "user",
+				"parts": []any{
+					map[string]any{
+						"functionResponse": map[string]any{
+							"name":     "search_web",
+							"response": map[string]any{"ok": true},
+						},
+					},
+				},
+			},
+		},
+	}
+
+	got := geminiMessagesFromRequest(req)
+	if len(got) != 2 {
+		t.Fatalf("expected two normalized messages, got %#v", got)
+	}
+	assistant, _ := got[0].(map[string]any)
+	tc, _ := assistant["tool_calls"].([]any)
+	call, _ := tc[0].(map[string]any)
+	callID, _ := call["id"].(string)
+	if !strings.HasPrefix(callID, "call_gemini_") {
+		t.Fatalf("expected generated call id prefix, got %#v", call)
+	}
+	toolMsg, _ := got[1].(map[string]any)
+	if toolMsg["tool_call_id"] != callID {
+		t.Fatalf("expected tool response to inherit generated call id, tool=%#v call=%#v", toolMsg, call)
+	}
+}
--- a/internal/adapter/gemini/convert_passthrough.go
+++ b/internal/adapter/gemini/convert_passthrough.go
@@ -0,0 +1,54 @@
+package gemini
+
+import (
+	"encoding/json"
+	"strings"
+)
+
+func collectGeminiPassThrough(req map[string]any) map[string]any {
+	cfg, _ := req["generationConfig"].(map[string]any)
+	if len(cfg) == 0 {
+		return nil
+	}
+	out := map[string]any{}
+	if v, ok := cfg["temperature"]; ok {
+		out["temperature"] = v
+	}
+	if v, ok := cfg["topP"]; ok {
+		out["top_p"] = v
+	}
+	if v, ok := cfg["maxOutputTokens"]; ok {
+		out["max_tokens"] = v
+	}
+	if v, ok := cfg["stopSequences"]; ok {
+		out["stop"] = v
+	}
+	if len(out) == 0 {
+		return nil
+	}
+	return out
+}
+
+func asString(v any) string {
+	s, _ := v.(string)
+	return s
+}
+
+func stringifyJSON(v any) string {
+	switch x := v.(type) {
+	case nil:
+		return "{}"
+	case string:
+		s := strings.TrimSpace(x)
+		if s == "" {
+			return "{}"
+		}
+		return s
+	default:
+		b, err := json.Marshal(x)
+		if err != nil || len(b) == 0 {
+			return "{}"
+		}
+		return string(b)
+	}
+}
--- a/internal/adapter/gemini/convert_request.go
+++ b/internal/adapter/gemini/convert_request.go
@@ -0,0 +1,46 @@
+package gemini
+
+import (
+	"fmt"
+	"strings"
+
+	"ds2api/internal/adapter/openai"
+	"ds2api/internal/config"
+	"ds2api/internal/util"
+)
+
+func normalizeGeminiRequest(store ConfigReader, routeModel string, req map[string]any, stream bool) (util.StandardRequest, error) {
+	requestedModel := strings.TrimSpace(routeModel)
+	if requestedModel == "" {
+		return util.StandardRequest{}, fmt.Errorf("model is required in request path")
+	}
+
+	resolvedModel, ok := config.ResolveModel(store, requestedModel)
+	if !ok {
+		return util.StandardRequest{}, fmt.Errorf("Model '%s' is not available.", requestedModel)
+	}
+	thinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
+
+	messagesRaw := geminiMessagesFromRequest(req)
+	if len(messagesRaw) == 0 {
+		return util.StandardRequest{}, fmt.Errorf("Request must include non-empty contents.")
+	}
+
+	toolsRaw := convertGeminiTools(req["tools"])
+	finalPrompt, toolNames := openai.BuildPromptForAdapter(messagesRaw, toolsRaw, "")
+	passThrough := collectGeminiPassThrough(req)
+
+	return util.StandardRequest{
+		Surface:        "google_gemini",
+		RequestedModel: requestedModel,
+		ResolvedModel:  resolvedModel,
+		ResponseModel:  requestedModel,
+		Messages:       messagesRaw,
+		FinalPrompt:    finalPrompt,
+		ToolNames:      toolNames,
+		Stream:         stream,
+		Thinking:       thinkingEnabled,
+		Search:         searchEnabled,
+		PassThrough:    passThrough,
+	}, nil
+}
--- a/internal/adapter/gemini/convert_tools.go
+++ b/internal/adapter/gemini/convert_tools.go
@@ -0,0 +1,71 @@
+package gemini
+
+import "strings"
+
+func convertGeminiTools(raw any) []any {
+	tools, _ := raw.([]any)
+	if len(tools) == 0 {
+		return nil
+	}
+	out := make([]any, 0, len(tools))
+	for _, item := range tools {
+		tool, ok := item.(map[string]any)
+		if !ok {
+			continue
+		}
+
+		if fnDecls, ok := tool["functionDeclarations"].([]any); ok && len(fnDecls) > 0 {
+			for _, declRaw := range fnDecls {
+				decl, ok := declRaw.(map[string]any)
+				if !ok {
+					continue
+				}
+				name := strings.TrimSpace(asString(decl["name"]))
+				if name == "" {
+					continue
+				}
+				function := map[string]any{
+					"name": name,
+				}
+				if desc := strings.TrimSpace(asString(decl["description"])); desc != "" {
+					function["description"] = desc
+				}
+				if params, ok := decl["parameters"].(map[string]any); ok {
+					function["parameters"] = params
+				}
+				out = append(out, map[string]any{
+					"type":     "function",
+					"function": function,
+				})
+			}
+			continue
+		}
+
+		// OpenAI-style passthrough fallback.
+		if _, ok := tool["function"].(map[string]any); ok {
+			out = append(out, tool)
+			continue
+		}
+
+		// Loose fallback for flattened function schema objects.
+		name := strings.TrimSpace(asString(tool["name"]))
+		if name == "" {
+			continue
+		}
+		fn := map[string]any{"name": name}
+		if desc := strings.TrimSpace(asString(tool["description"])); desc != "" {
+			fn["description"] = desc
+		}
+		if params, ok := tool["parameters"].(map[string]any); ok {
+			fn["parameters"] = params
+		}
+		out = append(out, map[string]any{
+			"type":     "function",
+			"function": fn,
+		})
+	}
+	if len(out) == 0 {
+		return nil
+	}
+	return out
+}
--- a/internal/adapter/gemini/deps.go
+++ b/internal/adapter/gemini/deps.go
@@ -0,0 +1,34 @@
+package gemini
+
+import (
+	"context"
+	"net/http"
+
+	"ds2api/internal/auth"
+	"ds2api/internal/config"
+	"ds2api/internal/deepseek"
+)
+
+type AuthResolver interface {
+	Determine(req *http.Request) (*auth.RequestAuth, error)
+	Release(a *auth.RequestAuth)
+}
+
+type DeepSeekCaller interface {
+	CreateSession(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
+	GetPow(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
+	CallCompletion(ctx context.Context, a *auth.RequestAuth, payload map[string]any, powResp string, maxAttempts int) (*http.Response, error)
+}
+
+type ConfigReader interface {
+	ModelAliases() map[string]string
+	CompatStripReferenceMarkers() bool
+}
+
+type OpenAIChatRunner interface {
+	ChatCompletions(w http.ResponseWriter, r *http.Request)
+}
+
+var _ AuthResolver = (*auth.Resolver)(nil)
+var _ DeepSeekCaller = (*deepseek.Client)(nil)
+var _ ConfigReader = (*config.Store)(nil)
--- a/internal/adapter/gemini/handler_errors.go
+++ b/internal/adapter/gemini/handler_errors.go
@@ -0,0 +1,28 @@
+package gemini
+
+import "net/http"
+
+func writeGeminiError(w http.ResponseWriter, status int, message string) {
+	errorStatus := "INVALID_ARGUMENT"
+	switch status {
+	case http.StatusUnauthorized:
+		errorStatus = "UNAUTHENTICATED"
+	case http.StatusForbidden:
+		errorStatus = "PERMISSION_DENIED"
+	case http.StatusTooManyRequests:
+		errorStatus = "RESOURCE_EXHAUSTED"
+	case http.StatusNotFound:
+		errorStatus = "NOT_FOUND"
+	default:
+		if status >= 500 {
+			errorStatus = "INTERNAL"
+		}
+	}
+	writeJSON(w, status, map[string]any{
+		"error": map[string]any{
+			"code":    status,
+			"message": message,
+			"status":  errorStatus,
+		},
+	})
+}
--- a/internal/adapter/gemini/handler_generate.go
+++ b/internal/adapter/gemini/handler_generate.go
@@ -0,0 +1,211 @@
+package gemini
+
+import (
+	"bytes"
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+
+	"github.com/go-chi/chi/v5"
+
+	"ds2api/internal/sse"
+	"ds2api/internal/translatorcliproxy"
+	"ds2api/internal/util"
+
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
+)
+
+func (h *Handler) handleGenerateContent(w http.ResponseWriter, r *http.Request, stream bool) {
+	if h.OpenAI == nil {
+		writeGeminiError(w, http.StatusInternalServerError, "OpenAI proxy backend unavailable.")
+		return
+	}
+	if h.proxyViaOpenAI(w, r, stream) {
+		return
+	}
+	writeGeminiError(w, http.StatusBadGateway, "Failed to proxy Gemini request.")
+}
+
+func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, stream bool) bool {
+	raw, err := io.ReadAll(r.Body)
+	if err != nil {
+		writeGeminiError(w, http.StatusBadRequest, "invalid body")
+		return true
+	}
+	routeModel := strings.TrimSpace(chi.URLParam(r, "model"))
+	translatedReq := translatorcliproxy.ToOpenAI(sdktranslator.FormatGemini, routeModel, raw, stream)
+	if !strings.Contains(string(translatedReq), `"stream"`) {
+		var reqMap map[string]any
+		if json.Unmarshal(translatedReq, &reqMap) == nil {
+			reqMap["stream"] = stream
+			if b, e := json.Marshal(reqMap); e == nil {
+				translatedReq = b
+			}
+		}
+	}
+
+	isVercelPrepare := strings.TrimSpace(r.URL.Query().Get("__stream_prepare")) == "1"
+	isVercelRelease := strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1"
+
+	if isVercelRelease {
+		proxyReq := r.Clone(r.Context())
+		proxyReq.URL.Path = "/v1/chat/completions"
+		proxyReq.Body = io.NopCloser(bytes.NewReader(raw))
+		proxyReq.ContentLength = int64(len(raw))
+		rec := httptest.NewRecorder()
+		h.OpenAI.ChatCompletions(rec, proxyReq)
+		res := rec.Result()
+		defer res.Body.Close()
+		body, _ := io.ReadAll(res.Body)
+		for k, vv := range res.Header {
+			for _, v := range vv {
+				w.Header().Add(k, v)
+			}
+		}
+		w.WriteHeader(res.StatusCode)
+		_, _ = w.Write(body)
+		return true
+	}
+
+	proxyReq := r.Clone(r.Context())
+	proxyReq.URL.Path = "/v1/chat/completions"
+	proxyReq.Body = io.NopCloser(bytes.NewReader(translatedReq))
+	proxyReq.ContentLength = int64(len(translatedReq))
+
+	if stream && !isVercelPrepare {
+		w.Header().Set("Content-Type", "text/event-stream")
+		w.Header().Set("Cache-Control", "no-cache, no-transform")
+		w.Header().Set("Connection", "keep-alive")
+		w.Header().Set("X-Accel-Buffering", "no")
+		streamWriter := translatorcliproxy.NewOpenAIStreamTranslatorWriter(w, sdktranslator.FormatGemini, routeModel, raw, translatedReq)
+		h.OpenAI.ChatCompletions(streamWriter, proxyReq)
+		return true
+	}
+
+	rec := httptest.NewRecorder()
+	h.OpenAI.ChatCompletions(rec, proxyReq)
+	res := rec.Result()
+	defer res.Body.Close()
+	body, _ := io.ReadAll(res.Body)
+	if res.StatusCode < 200 || res.StatusCode >= 300 {
+		for k, vv := range res.Header {
+			for _, v := range vv {
+				w.Header().Add(k, v)
+			}
+		}
+		writeGeminiErrorFromOpenAI(w, res.StatusCode, body)
+		return true
+	}
+	if isVercelPrepare {
+		for k, vv := range res.Header {
+			for _, v := range vv {
+				w.Header().Add(k, v)
+			}
+		}
+		w.WriteHeader(res.StatusCode)
+		_, _ = w.Write(body)
+		return true
+	}
+	converted := translatorcliproxy.FromOpenAINonStream(sdktranslator.FormatGemini, routeModel, raw, translatedReq, body)
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(http.StatusOK)
+	_, _ = w.Write(converted)
+	return true
+}
+
+func writeGeminiErrorFromOpenAI(w http.ResponseWriter, status int, raw []byte) {
+	message := strings.TrimSpace(string(raw))
+	var parsed map[string]any
+	if err := json.Unmarshal(raw, &parsed); err == nil {
+		if errObj, ok := parsed["error"].(map[string]any); ok {
+			if msg, ok := errObj["message"].(string); ok && strings.TrimSpace(msg) != "" {
+				message = strings.TrimSpace(msg)
+			}
+		}
+	}
+	if message == "" {
+		message = http.StatusText(status)
+	}
+	writeGeminiError(w, status, message)
+}
+
+func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *http.Response, model, finalPrompt string, thinkingEnabled bool, toolNames []string) {
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		writeGeminiError(w, resp.StatusCode, strings.TrimSpace(string(body)))
+		return
+	}
+
+	result := sse.CollectStream(resp, thinkingEnabled, true)
+	stripReferenceMarkers := h.compatStripReferenceMarkers()
+	writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponse(
+		model,
+		finalPrompt,
+		cleanVisibleOutput(result.Thinking, stripReferenceMarkers),
+		cleanVisibleOutput(result.Text, stripReferenceMarkers),
+		toolNames,
+		result.OutputTokens,
+	))
+}
+
+func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string, outputTokens int) map[string]any {
+	parts := buildGeminiPartsFromFinal(finalText, finalThinking, toolNames)
+	usage := buildGeminiUsage(finalPrompt, finalThinking, finalText, outputTokens)
+	return map[string]any{
+		"candidates": []map[string]any{
+			{
+				"index": 0,
+				"content": map[string]any{
+					"role":  "model",
+					"parts": parts,
+				},
+				"finishReason": "STOP",
+			},
+		},
+		"modelVersion":  model,
+		"usageMetadata": usage,
+	}
+}
+
+func buildGeminiUsage(finalPrompt, finalThinking, finalText string, outputTokens int) map[string]any {
+	promptTokens := util.EstimateTokens(finalPrompt)
+	reasoningTokens := util.EstimateTokens(finalThinking)
+	completionTokens := util.EstimateTokens(finalText)
+	if outputTokens > 0 {
+		completionTokens = outputTokens
+		reasoningTokens = 0
+	}
+	return map[string]any{
+		"promptTokenCount":     promptTokens,
+		"candidatesTokenCount": reasoningTokens + completionTokens,
+		"totalTokenCount":      promptTokens + reasoningTokens + completionTokens,
+	}
+}
+
+func buildGeminiPartsFromFinal(finalText, finalThinking string, toolNames []string) []map[string]any {
+	detected := util.ParseToolCalls(finalText, toolNames)
+	if len(detected) == 0 && finalThinking != "" {
+		detected = util.ParseToolCalls(finalThinking, toolNames)
+	}
+	if len(detected) > 0 {
+		parts := make([]map[string]any, 0, len(detected))
+		for _, tc := range detected {
+			parts = append(parts, map[string]any{
+				"functionCall": map[string]any{
+					"name": tc.Name,
+					"args": tc.Input,
+				},
+			})
+		}
+		return parts
+	}
+
+	text := finalText
+	if text == "" {
+		text = finalThinking
+	}
+	return []map[string]any{{"text": text}}
+}
--- a/internal/adapter/gemini/handler_routes.go
+++ b/internal/adapter/gemini/handler_routes.go
@@ -0,0 +1,40 @@
+package gemini
+
+import (
+	"net/http"
+
+	"github.com/go-chi/chi/v5"
+
+	"ds2api/internal/util"
+)
+
+var writeJSON = util.WriteJSON
+
+type Handler struct {
+	Store  ConfigReader
+	Auth   AuthResolver
+	DS     DeepSeekCaller
+	OpenAI OpenAIChatRunner
+}
+
+func (h *Handler) compatStripReferenceMarkers() bool {
+	if h == nil || h.Store == nil {
+		return true
+	}
+	return h.Store.CompatStripReferenceMarkers()
+}
+
+func RegisterRoutes(r chi.Router, h *Handler) {
+	r.Post("/v1beta/models/{model}:generateContent", h.GenerateContent)
+	r.Post("/v1beta/models/{model}:streamGenerateContent", h.StreamGenerateContent)
+	r.Post("/v1/models/{model}:generateContent", h.GenerateContent)
+	r.Post("/v1/models/{model}:streamGenerateContent", h.StreamGenerateContent)
+}
+
+func (h *Handler) GenerateContent(w http.ResponseWriter, r *http.Request) {
+	h.handleGenerateContent(w, r, false)
+}
+
+func (h *Handler) StreamGenerateContent(w http.ResponseWriter, r *http.Request) {
+	h.handleGenerateContent(w, r, true)
+}
--- a/internal/adapter/gemini/handler_stream_runtime.go
+++ b/internal/adapter/gemini/handler_stream_runtime.go
@@ -0,0 +1,189 @@
+package gemini
+
+import (
+	"encoding/json"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"ds2api/internal/deepseek"
+	"ds2api/internal/sse"
+	streamengine "ds2api/internal/stream"
+)
+
+func (h *Handler) handleStreamGenerateContent(w http.ResponseWriter, r *http.Request, resp *http.Response, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string) {
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		writeGeminiError(w, resp.StatusCode, strings.TrimSpace(string(body)))
+		return
+	}
+
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache, no-transform")
+	w.Header().Set("Connection", "keep-alive")
+	w.Header().Set("X-Accel-Buffering", "no")
+
+	rc := http.NewResponseController(w)
+	_, canFlush := w.(http.Flusher)
+	runtime := newGeminiStreamRuntime(w, rc, canFlush, model, finalPrompt, thinkingEnabled, searchEnabled, h.compatStripReferenceMarkers(), toolNames)
+
+	initialType := "text"
+	if thinkingEnabled {
+		initialType = "thinking"
+	}
+	streamengine.ConsumeSSE(streamengine.ConsumeConfig{
+		Context:             r.Context(),
+		Body:                resp.Body,
+		ThinkingEnabled:     thinkingEnabled,
+		InitialType:         initialType,
+		KeepAliveInterval:   time.Duration(deepseek.KeepAliveTimeout) * time.Second,
+		IdleTimeout:         time.Duration(deepseek.StreamIdleTimeout) * time.Second,
+		MaxKeepAliveNoInput: deepseek.MaxKeepaliveCount,
+	}, streamengine.ConsumeHooks{
+		OnParsed: runtime.onParsed,
+		OnFinalize: func(_ streamengine.StopReason, _ error) {
+			runtime.finalize()
+		},
+	})
+}
+
+type geminiStreamRuntime struct {
+	w        http.ResponseWriter
+	rc       *http.ResponseController
+	canFlush bool
+
+	model       string
+	finalPrompt string
+
+	thinkingEnabled       bool
+	searchEnabled         bool
+	bufferContent         bool
+	stripReferenceMarkers bool
+	toolNames             []string
+
+	thinking     strings.Builder
+	text         strings.Builder
+	outputTokens int
+}
+
+func newGeminiStreamRuntime(
+	w http.ResponseWriter,
+	rc *http.ResponseController,
+	canFlush bool,
+	model string,
+	finalPrompt string,
+	thinkingEnabled bool,
+	searchEnabled bool,
+	stripReferenceMarkers bool,
+	toolNames []string,
+) *geminiStreamRuntime {
+	return &geminiStreamRuntime{
+		w:                     w,
+		rc:                    rc,
+		canFlush:              canFlush,
+		model:                 model,
+		finalPrompt:           finalPrompt,
+		thinkingEnabled:       thinkingEnabled,
+		searchEnabled:         searchEnabled,
+		bufferContent:         len(toolNames) > 0,
+		stripReferenceMarkers: stripReferenceMarkers,
+		toolNames:             toolNames,
+	}
+}
+
+func (s *geminiStreamRuntime) sendChunk(payload map[string]any) {
+	b, _ := json.Marshal(payload)
+	_, _ = s.w.Write([]byte("data: "))
+	_, _ = s.w.Write(b)
+	_, _ = s.w.Write([]byte("\n\n"))
+	if s.canFlush {
+		_ = s.rc.Flush()
+	}
+}
+
+func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedDecision {
+	if !parsed.Parsed {
+		return streamengine.ParsedDecision{}
+	}
+	if parsed.OutputTokens > 0 {
+		s.outputTokens = parsed.OutputTokens
+	}
+	if parsed.ContentFilter || parsed.ErrorMessage != "" || parsed.Stop {
+		return streamengine.ParsedDecision{Stop: true}
+	}
+
+	contentSeen := false
+	for _, p := range parsed.Parts {
+		cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
+		if cleanedText == "" {
+			continue
+		}
+		if p.Type != "thinking" && s.searchEnabled && sse.IsCitation(cleanedText) {
+			continue
+		}
+		contentSeen = true
+		if p.Type == "thinking" {
+			if s.thinkingEnabled {
+				s.thinking.WriteString(cleanedText)
+			}
+			continue
+		}
+		s.text.WriteString(cleanedText)
+		if s.bufferContent {
+			continue
+		}
+		s.sendChunk(map[string]any{
+			"candidates": []map[string]any{
+				{
+					"index": 0,
+					"content": map[string]any{
+						"role":  "model",
+						"parts": []map[string]any{{"text": cleanedText}},
+					},
+				},
+			},
+			"modelVersion": s.model,
+		})
+	}
+	return streamengine.ParsedDecision{ContentSeen: contentSeen}
+}
+
+func (s *geminiStreamRuntime) finalize() {
+	finalThinking := s.thinking.String()
+	finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
+
+	if s.bufferContent {
+		parts := buildGeminiPartsFromFinal(finalText, finalThinking, s.toolNames)
+		s.sendChunk(map[string]any{
+			"candidates": []map[string]any{
+				{
+					"index": 0,
+					"content": map[string]any{
+						"role":  "model",
+						"parts": parts,
+					},
+				},
+			},
+			"modelVersion": s.model,
+		})
+	}
+
+	s.sendChunk(map[string]any{
+		"candidates": []map[string]any{
+			{
+				"index": 0,
+				"content": map[string]any{
+					"role": "model",
+					"parts": []map[string]any{
+						{"text": ""},
+					},
+				},
+				"finishReason": "STOP",
+			},
+		},
+		"modelVersion":  s.model,
+		"usageMetadata": buildGeminiUsage(s.finalPrompt, finalThinking, finalText, s.outputTokens),
+	})
+}
--- a/internal/adapter/gemini/handler_test.go
+++ b/internal/adapter/gemini/handler_test.go
@@ -0,0 +1,314 @@
+package gemini
+
+import (
+	"bufio"
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"github.com/go-chi/chi/v5"
+
+	"ds2api/internal/auth"
+)
+
+type testGeminiConfig struct{}
+
+func (testGeminiConfig) ModelAliases() map[string]string   { return nil }
+func (testGeminiConfig) CompatStripReferenceMarkers() bool { return true }
+
+type testGeminiAuth struct {
+	a   *auth.RequestAuth
+	err error
+}
+
+func (m testGeminiAuth) Determine(_ *http.Request) (*auth.RequestAuth, error) {
+	if m.err != nil {
+		return nil, m.err
+	}
+	if m.a != nil {
+		return m.a, nil
+	}
+	return &auth.RequestAuth{
+		UseConfigToken: false,
+		DeepSeekToken:  "direct-token",
+		CallerID:       "caller:test",
+		TriedAccounts:  map[string]bool{},
+	}, nil
+}
+
+func (testGeminiAuth) Release(_ *auth.RequestAuth) {}
+
+type testGeminiDS struct {
+	resp *http.Response
+	err  error
+}
+
+func (m testGeminiDS) CreateSession(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
+	return "session-id", nil
+}
+
+func (m testGeminiDS) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
+	return "pow", nil
+}
+
+func (m testGeminiDS) CallCompletion(_ context.Context, _ *auth.RequestAuth, _ map[string]any, _ string, _ int) (*http.Response, error) {
+	if m.err != nil {
+		return nil, m.err
+	}
+	return m.resp, nil
+}
+
+type geminiOpenAIErrorStub struct {
+	status  int
+	body    string
+	headers map[string]string
+}
+
+func (s geminiOpenAIErrorStub) ChatCompletions(w http.ResponseWriter, _ *http.Request) {
+	for k, v := range s.headers {
+		w.Header().Set(k, v)
+	}
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(s.status)
+	_, _ = w.Write([]byte(s.body))
+}
+
+type geminiOpenAISuccessStub struct {
+	stream bool
+	body   string
+}
+
+func (s geminiOpenAISuccessStub) ChatCompletions(w http.ResponseWriter, _ *http.Request) {
+	if s.stream {
+		w.Header().Set("Content-Type", "text/event-stream")
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-1\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"hello \"},\"finish_reason\":null}]}\n\n"))
+		_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-1\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"world\"},\"finish_reason\":\"stop\"}]}\n\n"))
+		_, _ = w.Write([]byte("data: [DONE]\n\n"))
+		return
+	}
+	out := s.body
+	if strings.TrimSpace(out) == "" {
+		out = `{"id":"chatcmpl-1","object":"chat.completion","choices":[{"index":0,"message":{"role":"assistant","tool_calls":[{"id":"call_1","type":"function","function":{"name":"eval_javascript","arguments":"{\"code\":\"1+1\"}"}}]},"finish_reason":"tool_calls"}]}`
+	}
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(http.StatusOK)
+	_, _ = w.Write([]byte(out))
+}
+
+func makeGeminiUpstreamResponse(lines ...string) *http.Response {
+	body := strings.Join(lines, "\n")
+	if !strings.HasSuffix(body, "\n") {
+		body += "\n"
+	}
+	return &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     make(http.Header),
+		Body:       io.NopCloser(strings.NewReader(body)),
+	}
+}
+
+func TestGeminiRoutesRegistered(t *testing.T) {
+	h := &Handler{
+		Store: testGeminiConfig{},
+		Auth:  testGeminiAuth{err: auth.ErrUnauthorized},
+	}
+	r := chi.NewRouter()
+	RegisterRoutes(r, h)
+
+	paths := []string{
+		"/v1beta/models/gemini-2.5-pro:generateContent",
+		"/v1beta/models/gemini-2.5-pro:streamGenerateContent",
+		"/v1/models/gemini-2.5-pro:generateContent",
+		"/v1/models/gemini-2.5-pro:streamGenerateContent",
+	}
+	for _, path := range paths {
+		req := httptest.NewRequest(http.MethodPost, path, strings.NewReader(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}`))
+		rec := httptest.NewRecorder()
+		r.ServeHTTP(rec, req)
+		if rec.Code == http.StatusNotFound {
+			t.Fatalf("expected route %s to be registered, got 404", path)
+		}
+	}
+}
+
+func TestGenerateContentReturnsFunctionCallParts(t *testing.T) {
+	h := &Handler{
+		Store: testGeminiConfig{},
+		OpenAI: geminiOpenAISuccessStub{
+			body: `{"id":"chatcmpl-1","object":"chat.completion","choices":[{"index":0,"message":{"role":"assistant","tool_calls":[{"id":"call_1","type":"function","function":{"name":"eval_javascript","arguments":"{\"code\":\"1+1\"}"}}]},"finish_reason":"tool_calls"}]}`,
+		},
+	}
+	r := chi.NewRouter()
+	RegisterRoutes(r, h)
+
+	body := `{
+		"contents":[{"role":"user","parts":[{"text":"call tool"}]}],
+		"tools":[{"functionDeclarations":[{"name":"eval_javascript","description":"eval","parameters":{"type":"object","properties":{"code":{"type":"string"}}}}]}]
+	}`
+	req := httptest.NewRequest(http.MethodPost, "/v1beta/models/gemini-2.5-pro:generateContent", strings.NewReader(body))
+	rec := httptest.NewRecorder()
+	r.ServeHTTP(rec, req)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+	}
+
+	var out map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
+		t.Fatalf("decode response failed: %v", err)
+	}
+	candidates, _ := out["candidates"].([]any)
+	if len(candidates) == 0 {
+		t.Fatalf("expected non-empty candidates: %#v", out)
+	}
+	c0, _ := candidates[0].(map[string]any)
+	content, _ := c0["content"].(map[string]any)
+	parts, _ := content["parts"].([]any)
+	if len(parts) == 0 {
+		t.Fatalf("expected non-empty parts: %#v", content)
+	}
+	part0, _ := parts[0].(map[string]any)
+	functionCall, _ := part0["functionCall"].(map[string]any)
+	if functionCall["name"] != "eval_javascript" {
+		t.Fatalf("expected functionCall name eval_javascript, got %#v", functionCall)
+	}
+}
+
+func TestGenerateContentMixedToolSnippetAlsoTriggersFunctionCall(t *testing.T) {
+	h := &Handler{Store: testGeminiConfig{}, OpenAI: geminiOpenAISuccessStub{}}
+	r := chi.NewRouter()
+	RegisterRoutes(r, h)
+
+	body := `{
+		"contents":[{"role":"user","parts":[{"text":"call tool"}]}],
+		"tools":[{"functionDeclarations":[{"name":"eval_javascript","description":"eval","parameters":{"type":"object","properties":{"code":{"type":"string"}}}}]}]
+	}`
+	req := httptest.NewRequest(http.MethodPost, "/v1beta/models/gemini-2.5-pro:generateContent", strings.NewReader(body))
+	rec := httptest.NewRecorder()
+	r.ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	var out map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
+		t.Fatalf("decode response failed: %v", err)
+	}
+	candidates, _ := out["candidates"].([]any)
+	c0, _ := candidates[0].(map[string]any)
+	content, _ := c0["content"].(map[string]any)
+	parts, _ := content["parts"].([]any)
+	part0, _ := parts[0].(map[string]any)
+	functionCall, _ := part0["functionCall"].(map[string]any)
+	if functionCall["name"] != "eval_javascript" {
+		t.Fatalf("expected functionCall name eval_javascript for mixed snippet, got %#v", functionCall)
+	}
+}
+
+func TestStreamGenerateContentEmitsSSE(t *testing.T) {
+	h := &Handler{
+		Store:  testGeminiConfig{},
+		OpenAI: geminiOpenAISuccessStub{stream: true},
+	}
+	r := chi.NewRouter()
+	RegisterRoutes(r, h)
+
+	body := `{"contents":[{"role":"user","parts":[{"text":"hello"}]}]}`
+	req := httptest.NewRequest(http.MethodPost, "/v1/models/gemini-2.5-pro:streamGenerateContent?alt=sse", strings.NewReader(body))
+	rec := httptest.NewRecorder()
+	r.ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+	}
+
+	frames := extractGeminiSSEFrames(t, rec.Body.String())
+	if len(frames) == 0 {
+		t.Fatalf("expected non-empty stream frames, body=%s", rec.Body.String())
+	}
+	last := frames[len(frames)-1]
+	candidates, _ := last["candidates"].([]any)
+	if len(candidates) == 0 {
+		t.Fatalf("expected finish frame candidates, got %#v", last)
+	}
+	c0, _ := candidates[0].(map[string]any)
+	content, _ := c0["content"].(map[string]any)
+	if content == nil {
+		t.Fatalf("expected non-null content in finish frame, got %#v", c0)
+	}
+	parts, _ := content["parts"].([]any)
+	if len(parts) == 0 {
+		t.Fatalf("expected non-empty parts in finish frame content, got %#v", content)
+	}
+}
+
+func TestGenerateContentOpenAIProxyErrorUsesGeminiEnvelope(t *testing.T) {
+	h := &Handler{
+		Store: testGeminiConfig{},
+		OpenAI: geminiOpenAIErrorStub{
+			status: http.StatusUnauthorized,
+			body:   `{"error":{"message":"invalid api key"}}`,
+			headers: map[string]string{
+				"WWW-Authenticate":      `Bearer realm="example"`,
+				"Retry-After":           "30",
+				"X-RateLimit-Remaining": "0",
+			},
+		},
+	}
+	r := chi.NewRouter()
+	RegisterRoutes(r, h)
+
+	req := httptest.NewRequest(http.MethodPost, "/v1/models/gemini-2.5-pro:generateContent", strings.NewReader(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}`))
+	rec := httptest.NewRecorder()
+	r.ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusUnauthorized {
+		t.Fatalf("expected 401, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	var out map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
+		t.Fatalf("expected json body: %v", err)
+	}
+	errObj, _ := out["error"].(map[string]any)
+	if errObj["status"] != "UNAUTHENTICATED" {
+		t.Fatalf("expected Gemini status UNAUTHENTICATED, got=%v", errObj["status"])
+	}
+	if errObj["message"] != "invalid api key" {
+		t.Fatalf("expected parsed error message, got=%v", errObj["message"])
+	}
+	if got := rec.Header().Get("WWW-Authenticate"); got == "" {
+		t.Fatalf("expected WWW-Authenticate header to be preserved")
+	}
+	if got := rec.Header().Get("Retry-After"); got != "30" {
+		t.Fatalf("expected Retry-After header 30, got=%q", got)
+	}
+	if got := rec.Header().Get("X-RateLimit-Remaining"); got != "0" {
+		t.Fatalf("expected X-RateLimit-Remaining header 0, got=%q", got)
+	}
+}
+
+func extractGeminiSSEFrames(t *testing.T, body string) []map[string]any {
+	t.Helper()
+	scanner := bufio.NewScanner(strings.NewReader(body))
+	out := make([]map[string]any, 0, 4)
+	for scanner.Scan() {
+		line := strings.TrimSpace(scanner.Text())
+		raw := line
+		if strings.HasPrefix(line, "data: ") {
+			raw = strings.TrimSpace(strings.TrimPrefix(line, "data: "))
+		}
+		if raw == "" {
+			continue
+		}
+		var frame map[string]any
+		if err := json.Unmarshal([]byte(raw), &frame); err != nil {
+			continue
+		}
+		out = append(out, frame)
+	}
+	return out
+}
--- a/internal/adapter/gemini/output_clean.go
+++ b/internal/adapter/gemini/output_clean.go
@@ -0,0 +1,13 @@
+package gemini
+
+import textclean "ds2api/internal/textclean"
+
+func cleanVisibleOutput(text string, stripReferenceMarkers bool) string {
+	if text == "" {
+		return text
+	}
+	if stripReferenceMarkers {
+		text = textclean.StripReferenceMarkers(text)
+	}
+	return text
+}
--- a/internal/adapter/gemini/proxy_vercel_test.go
+++ b/internal/adapter/gemini/proxy_vercel_test.go
@@ -0,0 +1,42 @@
+package gemini
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+)
+
+type openAIProxyStub struct {
+	status int
+	body   string
+}
+
+func (s openAIProxyStub) ChatCompletions(w http.ResponseWriter, _ *http.Request) {
+	if s.status == 0 {
+		s.status = http.StatusOK
+	}
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(s.status)
+	_, _ = w.Write([]byte(s.body))
+}
+
+func TestGeminiProxyViaOpenAIVercelReleasePassthrough(t *testing.T) {
+	h := &Handler{OpenAI: openAIProxyStub{status: 200, body: `{"success":true}`}}
+	req := httptest.NewRequest(http.MethodPost, "/v1beta/models/gemini-2.5-pro:streamGenerateContent?__stream_release=1", strings.NewReader(`{"lease_id":"lease_123"}`))
+	rec := httptest.NewRecorder()
+
+	h.StreamGenerateContent(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("unexpected status: %d body=%s", rec.Code, rec.Body.String())
+	}
+	var out map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
+		t.Fatalf("expected json response, got err=%v body=%s", err, rec.Body.String())
+	}
+	if v, ok := out["success"].(bool); !ok || !v {
+		t.Fatalf("expected success=true passthrough, got=%v", out)
+	}
+}
--- a/internal/adapter/openai/chat_stream_runtime.go
+++ b/internal/adapter/openai/chat_stream_runtime.go
@@ -0,0 +1,296 @@
+package openai
+
+import (
+	"encoding/json"
+	"net/http"
+	"strings"
+
+	openaifmt "ds2api/internal/format/openai"
+	"ds2api/internal/sse"
+	streamengine "ds2api/internal/stream"
+	"ds2api/internal/util"
+)
+
+type chatStreamRuntime struct {
+	w        http.ResponseWriter
+	rc       *http.ResponseController
+	canFlush bool
+
+	completionID string
+	created      int64
+	model        string
+	finalPrompt  string
+	toolNames    []string
+
+	thinkingEnabled       bool
+	searchEnabled         bool
+	stripReferenceMarkers bool
+
+	firstChunkSent       bool
+	bufferToolContent    bool
+	emitEarlyToolDeltas  bool
+	toolCallsEmitted     bool
+	toolCallsDoneEmitted bool
+
+	toolSieve         toolStreamSieveState
+	streamToolCallIDs map[int]string
+	streamToolNames   map[int]string
+	thinking          strings.Builder
+	text              strings.Builder
+	outputTokens      int
+}
+
+func newChatStreamRuntime(
+	w http.ResponseWriter,
+	rc *http.ResponseController,
+	canFlush bool,
+	completionID string,
+	created int64,
+	model string,
+	finalPrompt string,
+	thinkingEnabled bool,
+	searchEnabled bool,
+	stripReferenceMarkers bool,
+	toolNames []string,
+	bufferToolContent bool,
+	emitEarlyToolDeltas bool,
+) *chatStreamRuntime {
+	return &chatStreamRuntime{
+		w:                     w,
+		rc:                    rc,
+		canFlush:              canFlush,
+		completionID:          completionID,
+		created:               created,
+		model:                 model,
+		finalPrompt:           finalPrompt,
+		toolNames:             toolNames,
+		thinkingEnabled:       thinkingEnabled,
+		searchEnabled:         searchEnabled,
+		stripReferenceMarkers: stripReferenceMarkers,
+		bufferToolContent:     bufferToolContent,
+		emitEarlyToolDeltas:   emitEarlyToolDeltas,
+		streamToolCallIDs:     map[int]string{},
+		streamToolNames:       map[int]string{},
+	}
+}
+
+func (s *chatStreamRuntime) sendKeepAlive() {
+	if !s.canFlush {
+		return
+	}
+	_, _ = s.w.Write([]byte(": keep-alive\n\n"))
+	_ = s.rc.Flush()
+}
+
+func (s *chatStreamRuntime) sendChunk(v any) {
+	b, _ := json.Marshal(v)
+	_, _ = s.w.Write([]byte("data: "))
+	_, _ = s.w.Write(b)
+	_, _ = s.w.Write([]byte("\n\n"))
+	if s.canFlush {
+		_ = s.rc.Flush()
+	}
+}
+
+func (s *chatStreamRuntime) sendDone() {
+	_, _ = s.w.Write([]byte("data: [DONE]\n\n"))
+	if s.canFlush {
+		_ = s.rc.Flush()
+	}
+}
+
+func (s *chatStreamRuntime) finalize(finishReason string) {
+	finalThinking := s.thinking.String()
+	finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
+	detected := util.ParseStandaloneToolCallsDetailed(finalText, s.toolNames)
+	if len(detected.Calls) > 0 && !s.toolCallsDoneEmitted {
+		finishReason = "tool_calls"
+		delta := map[string]any{
+			"tool_calls": formatFinalStreamToolCallsWithStableIDs(detected.Calls, s.streamToolCallIDs),
+		}
+		if !s.firstChunkSent {
+			delta["role"] = "assistant"
+			s.firstChunkSent = true
+		}
+		s.sendChunk(openaifmt.BuildChatStreamChunk(
+			s.completionID,
+			s.created,
+			s.model,
+			[]map[string]any{openaifmt.BuildChatStreamDeltaChoice(0, delta)},
+			nil,
+		))
+		s.toolCallsEmitted = true
+		s.toolCallsDoneEmitted = true
+	} else if s.bufferToolContent {
+		for _, evt := range flushToolSieve(&s.toolSieve, s.toolNames) {
+			if len(evt.ToolCalls) > 0 {
+				finishReason = "tool_calls"
+				s.toolCallsEmitted = true
+				s.toolCallsDoneEmitted = true
+				tcDelta := map[string]any{
+					"tool_calls": formatFinalStreamToolCallsWithStableIDs(evt.ToolCalls, s.streamToolCallIDs),
+				}
+				if !s.firstChunkSent {
+					tcDelta["role"] = "assistant"
+					s.firstChunkSent = true
+				}
+				s.sendChunk(openaifmt.BuildChatStreamChunk(
+					s.completionID,
+					s.created,
+					s.model,
+					[]map[string]any{openaifmt.BuildChatStreamDeltaChoice(0, tcDelta)},
+					nil,
+				))
+			}
+			if evt.Content == "" {
+				continue
+			}
+			cleaned := cleanVisibleOutput(evt.Content, s.stripReferenceMarkers)
+			if cleaned == "" {
+				continue
+			}
+			delta := map[string]any{
+				"content": cleaned,
+			}
+			if !s.firstChunkSent {
+				delta["role"] = "assistant"
+				s.firstChunkSent = true
+			}
+			s.sendChunk(openaifmt.BuildChatStreamChunk(
+				s.completionID,
+				s.created,
+				s.model,
+				[]map[string]any{openaifmt.BuildChatStreamDeltaChoice(0, delta)},
+				nil,
+			))
+		}
+	}
+
+	if len(detected.Calls) > 0 || s.toolCallsEmitted {
+		finishReason = "tool_calls"
+	}
+	usage := openaifmt.BuildChatUsage(s.finalPrompt, finalThinking, finalText)
+	if s.outputTokens > 0 {
+		usage["completion_tokens"] = s.outputTokens
+		if prompt, ok := usage["prompt_tokens"].(int); ok {
+			usage["total_tokens"] = prompt + s.outputTokens
+		}
+	}
+	s.sendChunk(openaifmt.BuildChatStreamChunk(
+		s.completionID,
+		s.created,
+		s.model,
+		[]map[string]any{openaifmt.BuildChatStreamFinishChoice(0, finishReason)},
+		usage,
+	))
+	s.sendDone()
+}
+
+func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedDecision {
+	if !parsed.Parsed {
+		return streamengine.ParsedDecision{}
+	}
+	if parsed.OutputTokens > 0 {
+		s.outputTokens = parsed.OutputTokens
+	}
+	if parsed.ContentFilter {
+		return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReasonHandlerRequested}
+	}
+	if parsed.ErrorMessage != "" {
+		return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("content_filter")}
+	}
+	if parsed.Stop {
+		return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReasonHandlerRequested}
+	}
+
+	newChoices := make([]map[string]any, 0, len(parsed.Parts))
+	contentSeen := false
+	for _, p := range parsed.Parts {
+		cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
+		if s.searchEnabled && sse.IsCitation(cleanedText) {
+			continue
+		}
+		if cleanedText == "" {
+			continue
+		}
+		contentSeen = true
+		delta := map[string]any{}
+		if !s.firstChunkSent {
+			delta["role"] = "assistant"
+			s.firstChunkSent = true
+		}
+		if p.Type == "thinking" {
+			if s.thinkingEnabled {
+				s.thinking.WriteString(cleanedText)
+				delta["reasoning_content"] = cleanedText
+			}
+		} else {
+			s.text.WriteString(cleanedText)
+			if !s.bufferToolContent {
+				delta["content"] = cleanedText
+			} else {
+				events := processToolSieveChunk(&s.toolSieve, cleanedText, s.toolNames)
+				for _, evt := range events {
+					if len(evt.ToolCallDeltas) > 0 {
+						if !s.emitEarlyToolDeltas {
+							continue
+						}
+						filtered := filterIncrementalToolCallDeltasByAllowed(evt.ToolCallDeltas, s.toolNames, s.streamToolNames)
+						if len(filtered) == 0 {
+							continue
+						}
+						formatted := formatIncrementalStreamToolCallDeltas(filtered, s.streamToolCallIDs)
+						if len(formatted) == 0 {
+							continue
+						}
+						tcDelta := map[string]any{
+							"tool_calls": formatted,
+						}
+						s.toolCallsEmitted = true
+						if !s.firstChunkSent {
+							tcDelta["role"] = "assistant"
+							s.firstChunkSent = true
+						}
+						newChoices = append(newChoices, openaifmt.BuildChatStreamDeltaChoice(0, tcDelta))
+						continue
+					}
+					if len(evt.ToolCalls) > 0 {
+						s.toolCallsEmitted = true
+						s.toolCallsDoneEmitted = true
+						tcDelta := map[string]any{
+							"tool_calls": formatFinalStreamToolCallsWithStableIDs(evt.ToolCalls, s.streamToolCallIDs),
+						}
+						if !s.firstChunkSent {
+							tcDelta["role"] = "assistant"
+							s.firstChunkSent = true
+						}
+						newChoices = append(newChoices, openaifmt.BuildChatStreamDeltaChoice(0, tcDelta))
+						continue
+					}
+					if evt.Content != "" {
+						cleaned := cleanVisibleOutput(evt.Content, s.stripReferenceMarkers)
+						if cleaned == "" {
+							continue
+						}
+						contentDelta := map[string]any{
+							"content": cleaned,
+						}
+						if !s.firstChunkSent {
+							contentDelta["role"] = "assistant"
+							s.firstChunkSent = true
+						}
+						newChoices = append(newChoices, openaifmt.BuildChatStreamDeltaChoice(0, contentDelta))
+					}
+				}
+			}
+		}
+		if len(delta) > 0 {
+			newChoices = append(newChoices, openaifmt.BuildChatStreamDeltaChoice(0, delta))
+		}
+	}
+
+	if len(newChoices) > 0 {
+		s.sendChunk(openaifmt.BuildChatStreamChunk(s.completionID, s.created, s.model, newChoices, nil))
+	}
+	return streamengine.ParsedDecision{ContentSeen: contentSeen}
+}
--- a/internal/adapter/openai/deps.go
+++ b/internal/adapter/openai/deps.go
@@ -0,0 +1,40 @@
+package openai
+
+import (
+	"context"
+	"net/http"
+
+	"ds2api/internal/auth"
+	"ds2api/internal/config"
+	"ds2api/internal/deepseek"
+)
+
+type AuthResolver interface {
+	Determine(req *http.Request) (*auth.RequestAuth, error)
+	DetermineCaller(req *http.Request) (*auth.RequestAuth, error)
+	Release(a *auth.RequestAuth)
+}
+
+type DeepSeekCaller interface {
+	CreateSession(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
+	GetPow(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
+	CallCompletion(ctx context.Context, a *auth.RequestAuth, payload map[string]any, powResp string, maxAttempts int) (*http.Response, error)
+	DeleteSessionForToken(ctx context.Context, token string, sessionID string) (*deepseek.DeleteSessionResult, error)
+	DeleteAllSessionsForToken(ctx context.Context, token string) error
+}
+
+type ConfigReader interface {
+	ModelAliases() map[string]string
+	CompatWideInputStrictOutput() bool
+	CompatStripReferenceMarkers() bool
+	ToolcallMode() string
+	ToolcallEarlyEmitConfidence() string
+	ResponsesStoreTTLSeconds() int
+	EmbeddingsProvider() string
+	AutoDeleteMode() string
+	AutoDeleteSessions() bool
+}
+
+var _ AuthResolver = (*auth.Resolver)(nil)
+var _ DeepSeekCaller = (*deepseek.Client)(nil)
+var _ ConfigReader = (*config.Store)(nil)
--- a/internal/adapter/openai/deps_injection_test.go
+++ b/internal/adapter/openai/deps_injection_test.go
@@ -0,0 +1,79 @@
+package openai
+
+import "testing"
+
+type mockOpenAIConfig struct {
+	aliases        map[string]string
+	wideInput      bool
+	autoDeleteMode string
+	toolMode       string
+	earlyEmit      string
+	responsesTTL   int
+	embedProv      string
+}
+
+func (m mockOpenAIConfig) ModelAliases() map[string]string { return m.aliases }
+func (m mockOpenAIConfig) CompatWideInputStrictOutput() bool {
+	return m.wideInput
+}
+func (m mockOpenAIConfig) CompatStripReferenceMarkers() bool   { return true }
+func (m mockOpenAIConfig) ToolcallMode() string                { return m.toolMode }
+func (m mockOpenAIConfig) ToolcallEarlyEmitConfidence() string { return m.earlyEmit }
+func (m mockOpenAIConfig) ResponsesStoreTTLSeconds() int       { return m.responsesTTL }
+func (m mockOpenAIConfig) EmbeddingsProvider() string          { return m.embedProv }
+func (m mockOpenAIConfig) AutoDeleteMode() string {
+	if m.autoDeleteMode == "" {
+		return "none"
+	}
+	return m.autoDeleteMode
+}
+func (m mockOpenAIConfig) AutoDeleteSessions() bool { return false }
+
+func TestNormalizeOpenAIChatRequestWithConfigInterface(t *testing.T) {
+	cfg := mockOpenAIConfig{
+		aliases: map[string]string{
+			"my-model": "deepseek-chat-search",
+		},
+		wideInput: true,
+	}
+	req := map[string]any{
+		"model":    "my-model",
+		"messages": []any{map[string]any{"role": "user", "content": "hello"}},
+	}
+	out, err := normalizeOpenAIChatRequest(cfg, req, "")
+	if err != nil {
+		t.Fatalf("normalizeOpenAIChatRequest error: %v", err)
+	}
+	if out.ResolvedModel != "deepseek-chat-search" {
+		t.Fatalf("resolved model mismatch: got=%q", out.ResolvedModel)
+	}
+	if !out.Search || out.Thinking {
+		t.Fatalf("unexpected model flags: thinking=%v search=%v", out.Thinking, out.Search)
+	}
+}
+
+func TestNormalizeOpenAIResponsesRequestWideInputPolicyFromInterface(t *testing.T) {
+	req := map[string]any{
+		"model": "deepseek-chat",
+		"input": "hi",
+	}
+
+	_, err := normalizeOpenAIResponsesRequest(mockOpenAIConfig{
+		aliases:   map[string]string{},
+		wideInput: false,
+	}, req, "")
+	if err == nil {
+		t.Fatal("expected error when wide input is disabled and only input is provided")
+	}
+
+	out, err := normalizeOpenAIResponsesRequest(mockOpenAIConfig{
+		aliases:   map[string]string{},
+		wideInput: true,
+	}, req, "")
+	if err != nil {
+		t.Fatalf("unexpected error when wide input is enabled: %v", err)
+	}
+	if out.Surface != "openai_responses" {
+		t.Fatalf("unexpected surface: %q", out.Surface)
+	}
+}
--- a/internal/adapter/openai/embeddings_handler.go
+++ b/internal/adapter/openai/embeddings_handler.go
@@ -0,0 +1,138 @@
+package openai
+
+import (
+	"crypto/sha256"
+	"encoding/binary"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"strings"
+
+	"ds2api/internal/auth"
+	"ds2api/internal/config"
+	"ds2api/internal/util"
+)
+
+func (h *Handler) Embeddings(w http.ResponseWriter, r *http.Request) {
+	a, err := h.Auth.Determine(r)
+	if err != nil {
+		status := http.StatusUnauthorized
+		detail := err.Error()
+		if err == auth.ErrNoAccount {
+			status = http.StatusTooManyRequests
+		}
+		writeOpenAIError(w, status, detail)
+		return
+	}
+	defer h.Auth.Release(a)
+
+	var req map[string]any
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		writeOpenAIError(w, http.StatusBadRequest, "invalid json")
+		return
+	}
+	model, _ := req["model"].(string)
+	model = strings.TrimSpace(model)
+	if model == "" {
+		writeOpenAIError(w, http.StatusBadRequest, "Request must include 'model'.")
+		return
+	}
+	if _, ok := config.ResolveModel(h.Store, model); !ok {
+		writeOpenAIError(w, http.StatusBadRequest, fmt.Sprintf("Model '%s' is not available.", model))
+		return
+	}
+
+	inputs := extractEmbeddingInputs(req["input"])
+	if len(inputs) == 0 {
+		writeOpenAIError(w, http.StatusBadRequest, "Request must include non-empty 'input'.")
+		return
+	}
+
+	provider := ""
+	if h.Store != nil {
+		provider = strings.ToLower(strings.TrimSpace(h.Store.EmbeddingsProvider()))
+	}
+	if provider == "" {
+		writeOpenAIError(w, http.StatusNotImplemented, "Embeddings provider is not configured. Set embeddings.provider in config.")
+		return
+	}
+	switch provider {
+	case "mock", "deterministic", "builtin":
+		// supported local deterministic provider
+	default:
+		writeOpenAIError(w, http.StatusNotImplemented, fmt.Sprintf("Embeddings provider '%s' is not supported.", provider))
+		return
+	}
+
+	data := make([]map[string]any, 0, len(inputs))
+	totalTokens := 0
+	for i, input := range inputs {
+		totalTokens += util.EstimateTokens(input)
+		data = append(data, map[string]any{
+			"object":    "embedding",
+			"index":     i,
+			"embedding": deterministicEmbedding(input),
+		})
+	}
+	writeJSON(w, http.StatusOK, map[string]any{
+		"object": "list",
+		"data":   data,
+		"model":  model,
+		"usage": map[string]any{
+			"prompt_tokens": totalTokens,
+			"total_tokens":  totalTokens,
+		},
+	})
+}
+
+func extractEmbeddingInputs(raw any) []string {
+	switch v := raw.(type) {
+	case string:
+		s := strings.TrimSpace(v)
+		if s == "" {
+			return nil
+		}
+		return []string{s}
+	case []any:
+		out := make([]string, 0, len(v))
+		for _, item := range v {
+			switch iv := item.(type) {
+			case string:
+				s := strings.TrimSpace(iv)
+				if s != "" {
+					out = append(out, s)
+				}
+			case []any:
+				// Token array input support: convert to stable string form.
+				out = append(out, fmt.Sprintf("%v", iv))
+			default:
+				s := strings.TrimSpace(fmt.Sprintf("%v", iv))
+				if s != "" {
+					out = append(out, s)
+				}
+			}
+		}
+		return out
+	default:
+		return nil
+	}
+}
+
+func deterministicEmbedding(input string) []float64 {
+	// Keep response shape stable without external dependencies.
+	const dims = 64
+	out := make([]float64, dims)
+	seed := sha256.Sum256([]byte(input))
+	buf := seed[:]
+	for i := 0; i < dims; i++ {
+		if len(buf) < 4 {
+			next := sha256.Sum256(buf)
+			buf = next[:]
+		}
+		v := binary.BigEndian.Uint32(buf[:4])
+		buf = buf[4:]
+		// map [0, 2^32) -> [-1, 1]
+		out[i] = (float64(v)/2147483647.5 - 1.0)
+	}
+	return out
+}
--- a/internal/adapter/openai/embeddings_route_test.go
+++ b/internal/adapter/openai/embeddings_route_test.go
@@ -0,0 +1,96 @@
+package openai
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/go-chi/chi/v5"
+
+	"ds2api/internal/account"
+	"ds2api/internal/auth"
+	"ds2api/internal/config"
+)
+
+func newResolverWithConfigJSON(t *testing.T, cfgJSON string) (*config.Store, *auth.Resolver) {
+	t.Helper()
+	t.Setenv("DS2API_CONFIG_JSON", cfgJSON)
+	store := config.LoadStore()
+	pool := account.NewPool(store)
+	resolver := auth.NewResolver(store, pool, func(_ context.Context, _ config.Account) (string, error) {
+		return "unused", nil
+	})
+	return store, resolver
+}
+
+func TestEmbeddingsRouteContract(t *testing.T) {
+	store, resolver := newResolverWithConfigJSON(t, `{"embeddings":{"provider":"deterministic"}}`)
+	h := &Handler{Store: store, Auth: resolver}
+	r := chi.NewRouter()
+	RegisterRoutes(r, h)
+
+	t.Run("unauthorized", func(t *testing.T) {
+		body := bytes.NewBufferString(`{"model":"gpt-4o","input":"hello"}`)
+		req := httptest.NewRequest(http.MethodPost, "/v1/embeddings", body)
+		req.Header.Set("Content-Type", "application/json")
+		rec := httptest.NewRecorder()
+		r.ServeHTTP(rec, req)
+		if rec.Code != http.StatusUnauthorized {
+			t.Fatalf("expected 401, got %d body=%s", rec.Code, rec.Body.String())
+		}
+	})
+
+	t.Run("ok", func(t *testing.T) {
+		body := bytes.NewBufferString(`{"model":"gpt-4o","input":["a","b"]}`)
+		req := httptest.NewRequest(http.MethodPost, "/v1/embeddings", body)
+		req.Header.Set("Authorization", "Bearer test-token")
+		req.Header.Set("Content-Type", "application/json")
+		rec := httptest.NewRecorder()
+		r.ServeHTTP(rec, req)
+		if rec.Code != http.StatusOK {
+			t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
+		}
+		var out map[string]any
+		if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
+			t.Fatalf("decode response failed: %v", err)
+		}
+		if out["object"] != "list" {
+			t.Fatalf("unexpected object: %#v", out["object"])
+		}
+		data, _ := out["data"].([]any)
+		if len(data) != 2 {
+			t.Fatalf("expected 2 embeddings, got %d", len(data))
+		}
+	})
+}
+
+func TestEmbeddingsRouteProviderMissing(t *testing.T) {
+	store, resolver := newResolverWithConfigJSON(t, `{}`)
+	h := &Handler{Store: store, Auth: resolver}
+	r := chi.NewRouter()
+	RegisterRoutes(r, h)
+
+	body := bytes.NewBufferString(`{"model":"gpt-4o","input":"hello"}`)
+	req := httptest.NewRequest(http.MethodPost, "/v1/embeddings", body)
+	req.Header.Set("Authorization", "Bearer test-token")
+	req.Header.Set("Content-Type", "application/json")
+	rec := httptest.NewRecorder()
+	r.ServeHTTP(rec, req)
+	if rec.Code != http.StatusNotImplemented {
+		t.Fatalf("expected 501, got %d body=%s", rec.Code, rec.Body.String())
+	}
+	var out map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
+		t.Fatalf("decode response failed: %v", err)
+	}
+	errObj, _ := out["error"].(map[string]any)
+	if _, ok := errObj["code"]; !ok {
+		t.Fatalf("expected error.code in response: %#v", out)
+	}
+	if _, ok := errObj["param"]; !ok {
+		t.Fatalf("expected error.param in response: %#v", out)
+	}
+}
--- a/internal/adapter/openai/error_shape_test.go
+++ b/internal/adapter/openai/error_shape_test.go
@@ -0,0 +1,34 @@
+package openai
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+)
+
+func TestWriteOpenAIErrorIncludesUnifiedFields(t *testing.T) {
+	rec := httptest.NewRecorder()
+	writeOpenAIError(rec, http.StatusBadRequest, "invalid input")
+	if rec.Code != http.StatusBadRequest {
+		t.Fatalf("expected 400, got %d", rec.Code)
+	}
+
+	var body map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil {
+		t.Fatalf("decode body: %v", err)
+	}
+	errObj, _ := body["error"].(map[string]any)
+	if errObj["message"] != "invalid input" {
+		t.Fatalf("unexpected message: %v", errObj["message"])
+	}
+	if errObj["type"] != "invalid_request_error" {
+		t.Fatalf("unexpected type: %v", errObj["type"])
+	}
+	if errObj["code"] != "invalid_request" {
+		t.Fatalf("unexpected code: %v", errObj["code"])
+	}
+	if _, ok := errObj["param"]; !ok {
+		t.Fatal("expected param field")
+	}
+}
--- a/internal/adapter/openai/handler_chat.go
+++ b/internal/adapter/openai/handler_chat.go
@@ -0,0 +1,208 @@
+package openai
+
+import (
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+	"time"
+
+	"ds2api/internal/auth"
+	"ds2api/internal/config"
+	"ds2api/internal/deepseek"
+	openaifmt "ds2api/internal/format/openai"
+	"ds2api/internal/sse"
+	streamengine "ds2api/internal/stream"
+)
+
+func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
+	if isVercelStreamReleaseRequest(r) {
+		h.handleVercelStreamRelease(w, r)
+		return
+	}
+	if isVercelStreamPrepareRequest(r) {
+		h.handleVercelStreamPrepare(w, r)
+		return
+	}
+
+	a, err := h.Auth.Determine(r)
+	if err != nil {
+		status := http.StatusUnauthorized
+		detail := err.Error()
+		if err == auth.ErrNoAccount {
+			status = http.StatusTooManyRequests
+		}
+		writeOpenAIError(w, status, detail)
+		return
+	}
+	var sessionID string
+	defer func() {
+		h.autoDeleteRemoteSession(r.Context(), a, sessionID)
+		h.Auth.Release(a)
+	}()
+
+	r = r.WithContext(auth.WithAuth(r.Context(), a))
+
+	var req map[string]any
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		writeOpenAIError(w, http.StatusBadRequest, "invalid json")
+		return
+	}
+	stdReq, err := normalizeOpenAIChatRequest(h.Store, req, requestTraceID(r))
+	if err != nil {
+		writeOpenAIError(w, http.StatusBadRequest, err.Error())
+		return
+	}
+
+	sessionID, err = h.DS.CreateSession(r.Context(), a, 3)
+	if err != nil {
+		if a.UseConfigToken {
+			writeOpenAIError(w, http.StatusUnauthorized, "Account token is invalid. Please re-login the account in admin.")
+		} else {
+			writeOpenAIError(w, http.StatusUnauthorized, "Invalid token. If this should be a DS2API key, add it to config.keys first.")
+		}
+		return
+	}
+	pow, err := h.DS.GetPow(r.Context(), a, 3)
+	if err != nil {
+		writeOpenAIError(w, http.StatusUnauthorized, "Failed to get PoW (invalid token or unknown error).")
+		return
+	}
+	payload := stdReq.CompletionPayload(sessionID)
+	resp, err := h.DS.CallCompletion(r.Context(), a, payload, pow, 3)
+	if err != nil {
+		writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.")
+		return
+	}
+	if stdReq.Stream {
+		h.handleStream(w, r, resp, sessionID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.Search, stdReq.ToolNames)
+		return
+	}
+	h.handleNonStream(w, r.Context(), resp, sessionID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.ToolNames)
+}
+
+func (h *Handler) autoDeleteRemoteSession(ctx context.Context, a *auth.RequestAuth, sessionID string) {
+	mode := h.Store.AutoDeleteMode()
+	if mode == "none" || a.DeepSeekToken == "" {
+		return
+	}
+
+	deleteBaseCtx := context.WithoutCancel(ctx)
+	deleteCtx, cancel := context.WithTimeout(deleteBaseCtx, 10*time.Second)
+	defer cancel()
+
+	switch mode {
+	case "single":
+		if sessionID == "" {
+			config.Logger.Warn("[auto_delete_sessions] skipped single-session delete because session_id is empty", "account", a.AccountID)
+			return
+		}
+		_, err := h.DS.DeleteSessionForToken(deleteCtx, a.DeepSeekToken, sessionID)
+		if err != nil {
+			config.Logger.Warn("[auto_delete_sessions] failed", "account", a.AccountID, "mode", mode, "session_id", sessionID, "error", err)
+			return
+		}
+		config.Logger.Debug("[auto_delete_sessions] success", "account", a.AccountID, "mode", mode, "session_id", sessionID)
+	case "all":
+		if err := h.DS.DeleteAllSessionsForToken(deleteCtx, a.DeepSeekToken); err != nil {
+			config.Logger.Warn("[auto_delete_sessions] failed", "account", a.AccountID, "mode", mode, "error", err)
+			return
+		}
+		config.Logger.Debug("[auto_delete_sessions] success", "account", a.AccountID, "mode", mode)
+	default:
+		config.Logger.Warn("[auto_delete_sessions] unknown mode", "account", a.AccountID, "mode", mode)
+	}
+}
+
+func (h *Handler) handleNonStream(w http.ResponseWriter, ctx context.Context, resp *http.Response, completionID, model, finalPrompt string, thinkingEnabled bool, toolNames []string) {
+	if resp.StatusCode != http.StatusOK {
+		defer resp.Body.Close()
+		body, _ := io.ReadAll(resp.Body)
+		writeOpenAIError(w, resp.StatusCode, string(body))
+		return
+	}
+	_ = ctx
+	result := sse.CollectStream(resp, thinkingEnabled, true)
+
+	stripReferenceMarkers := h.compatStripReferenceMarkers()
+	finalThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers)
+	finalText := cleanVisibleOutput(result.Text, stripReferenceMarkers)
+	if writeUpstreamEmptyOutputError(w, finalThinking, finalText, result.ContentFilter) {
+		return
+	}
+	respBody := openaifmt.BuildChatCompletion(completionID, model, finalPrompt, finalThinking, finalText, toolNames)
+	if result.OutputTokens > 0 {
+		if usage, ok := respBody["usage"].(map[string]any); ok {
+			usage["completion_tokens"] = result.OutputTokens
+			if prompt, ok := usage["prompt_tokens"].(int); ok {
+				usage["total_tokens"] = prompt + result.OutputTokens
+			}
+		}
+	}
+	writeJSON(w, http.StatusOK, respBody)
+}
+
+func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *http.Response, completionID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string) {
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		writeOpenAIError(w, resp.StatusCode, string(body))
+		return
+	}
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache, no-transform")
+	w.Header().Set("Connection", "keep-alive")
+	w.Header().Set("X-Accel-Buffering", "no")
+	rc := http.NewResponseController(w)
+	_, canFlush := w.(http.Flusher)
+	if !canFlush {
+		config.Logger.Warn("[stream] response writer does not support flush; streaming may be buffered")
+	}
+
+	created := time.Now().Unix()
+	bufferToolContent := len(toolNames) > 0
+	emitEarlyToolDeltas := h.toolcallFeatureMatchEnabled() && h.toolcallEarlyEmitHighConfidence()
+	stripReferenceMarkers := h.compatStripReferenceMarkers()
+	initialType := "text"
+	if thinkingEnabled {
+		initialType = "thinking"
+	}
+
+	streamRuntime := newChatStreamRuntime(
+		w,
+		rc,
+		canFlush,
+		completionID,
+		created,
+		model,
+		finalPrompt,
+		thinkingEnabled,
+		searchEnabled,
+		stripReferenceMarkers,
+		toolNames,
+		bufferToolContent,
+		emitEarlyToolDeltas,
+	)
+
+	streamengine.ConsumeSSE(streamengine.ConsumeConfig{
+		Context:             r.Context(),
+		Body:                resp.Body,
+		ThinkingEnabled:     thinkingEnabled,
+		InitialType:         initialType,
+		KeepAliveInterval:   time.Duration(deepseek.KeepAliveTimeout) * time.Second,
+		IdleTimeout:         time.Duration(deepseek.StreamIdleTimeout) * time.Second,
+		MaxKeepAliveNoInput: deepseek.MaxKeepaliveCount,
+	}, streamengine.ConsumeHooks{
+		OnKeepAlive: func() {
+			streamRuntime.sendKeepAlive()
+		},
+		OnParsed: streamRuntime.onParsed,
+		OnFinalize: func(reason streamengine.StopReason, _ error) {
+			if string(reason) == "content_filter" {
+				streamRuntime.finalize("content_filter")
+				return
+			}
+			streamRuntime.finalize("stop")
+		},
+	})
+}
--- a/internal/adapter/openai/handler_chat_auto_delete_test.go
+++ b/internal/adapter/openai/handler_chat_auto_delete_test.go
@@ -0,0 +1,139 @@
+package openai
+
+import (
+	"context"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"ds2api/internal/auth"
+	"ds2api/internal/deepseek"
+)
+
+type autoDeleteModeDSStub struct {
+	resp          *http.Response
+	singleCalls   int
+	allCalls      int
+	lastSessionID string
+	lastCtxErr    error
+}
+
+func (m *autoDeleteModeDSStub) CreateSession(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
+	return "session-id", nil
+}
+
+func (m *autoDeleteModeDSStub) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
+	return "pow", nil
+}
+
+func (m *autoDeleteModeDSStub) CallCompletion(_ context.Context, _ *auth.RequestAuth, _ map[string]any, _ string, _ int) (*http.Response, error) {
+	return m.resp, nil
+}
+
+func (m *autoDeleteModeDSStub) DeleteSessionForToken(_ context.Context, _ string, sessionID string) (*deepseek.DeleteSessionResult, error) {
+	m.singleCalls++
+	m.lastSessionID = sessionID
+	return &deepseek.DeleteSessionResult{SessionID: sessionID, Success: true}, nil
+}
+
+func (m *autoDeleteModeDSStub) DeleteAllSessionsForToken(_ context.Context, _ string) error {
+	m.allCalls++
+	return nil
+}
+
+func (m *autoDeleteModeDSStub) DeleteSessionForTokenCtx(ctx context.Context, _ string, sessionID string) (*deepseek.DeleteSessionResult, error) {
+	m.singleCalls++
+	m.lastSessionID = sessionID
+	m.lastCtxErr = ctx.Err()
+	return &deepseek.DeleteSessionResult{SessionID: sessionID, Success: true}, nil
+}
+
+func TestChatCompletionsAutoDeleteModes(t *testing.T) {
+	tests := []struct {
+		name       string
+		mode       string
+		wantSingle int
+		wantAll    int
+	}{
+		{name: "none", mode: "none"},
+		{name: "single", mode: "single", wantSingle: 1},
+		{name: "all", mode: "all", wantAll: 1},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			ds := &autoDeleteModeDSStub{
+				resp: makeOpenAISSEHTTPResponse(
+					`data: {"p":"response/content","v":"hello"}`,
+					"data: [DONE]",
+				),
+			}
+			h := &Handler{
+				Store: mockOpenAIConfig{
+					wideInput:      true,
+					autoDeleteMode: tc.mode,
+				},
+				Auth: streamStatusAuthStub{},
+				DS:   ds,
+			}
+
+			reqBody := `{"model":"deepseek-chat","messages":[{"role":"user","content":"hi"}],"stream":false}`
+			req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
+			req.Header.Set("Authorization", "Bearer direct-token")
+			req.Header.Set("Content-Type", "application/json")
+			rec := httptest.NewRecorder()
+
+			h.ChatCompletions(rec, req)
+
+			if rec.Code != http.StatusOK {
+				t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
+			}
+			if ds.singleCalls != tc.wantSingle {
+				t.Fatalf("single delete calls=%d want=%d", ds.singleCalls, tc.wantSingle)
+			}
+			if ds.allCalls != tc.wantAll {
+				t.Fatalf("all delete calls=%d want=%d", ds.allCalls, tc.wantAll)
+			}
+			if tc.wantSingle > 0 && ds.lastSessionID != "session-id" {
+				t.Fatalf("expected single delete for session-id, got %q", ds.lastSessionID)
+			}
+		})
+	}
+}
+
+type autoDeleteCtxDSStub struct {
+	autoDeleteModeDSStub
+}
+
+func (m *autoDeleteCtxDSStub) DeleteSessionForToken(ctx context.Context, token string, sessionID string) (*deepseek.DeleteSessionResult, error) {
+	return m.autoDeleteModeDSStub.DeleteSessionForTokenCtx(ctx, token, sessionID)
+}
+
+func (m *autoDeleteCtxDSStub) DeleteAllSessionsForToken(_ context.Context, _ string) error {
+	m.allCalls++
+	return nil
+}
+
+func TestAutoDeleteRemoteSessionIgnoresCanceledParentContext(t *testing.T) {
+	ds := &autoDeleteCtxDSStub{}
+	h := &Handler{
+		Store: mockOpenAIConfig{
+			wideInput:      true,
+			autoDeleteMode: "single",
+		},
+		DS: ds,
+	}
+	a := &auth.RequestAuth{DeepSeekToken: "token", AccountID: "acct"}
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+
+	h.autoDeleteRemoteSession(ctx, a, "session-id")
+
+	if ds.singleCalls != 1 {
+		t.Fatalf("single delete calls=%d want=1", ds.singleCalls)
+	}
+	if ds.lastCtxErr != nil {
+		t.Fatalf("delete ctx should not inherit cancellation, got %v", ds.lastCtxErr)
+	}
+}
--- a/internal/adapter/openai/handler_errors.go
+++ b/internal/adapter/openai/handler_errors.go
@@ -0,0 +1,63 @@
+package openai
+
+import "net/http"
+
+func writeOpenAIError(w http.ResponseWriter, status int, message string) {
+	writeOpenAIErrorWithCode(w, status, message, "")
+}
+
+func writeOpenAIErrorWithCode(w http.ResponseWriter, status int, message, code string) {
+	if code == "" {
+		code = openAIErrorCode(status)
+	}
+	writeJSON(w, status, map[string]any{
+		"error": map[string]any{
+			"message": message,
+			"type":    openAIErrorType(status),
+			"code":    code,
+			"param":   nil,
+		},
+	})
+}
+
+func openAIErrorType(status int) string {
+	switch status {
+	case http.StatusBadRequest:
+		return "invalid_request_error"
+	case http.StatusUnauthorized:
+		return "authentication_error"
+	case http.StatusForbidden:
+		return "permission_error"
+	case http.StatusTooManyRequests:
+		return "rate_limit_error"
+	case http.StatusServiceUnavailable:
+		return "service_unavailable_error"
+	default:
+		if status >= 500 {
+			return "api_error"
+		}
+		return "invalid_request_error"
+	}
+}
+
+func openAIErrorCode(status int) string {
+	switch status {
+	case http.StatusBadRequest:
+		return "invalid_request"
+	case http.StatusUnauthorized:
+		return "authentication_failed"
+	case http.StatusForbidden:
+		return "forbidden"
+	case http.StatusTooManyRequests:
+		return "rate_limit_exceeded"
+	case http.StatusNotFound:
+		return "not_found"
+	case http.StatusServiceUnavailable:
+		return "service_unavailable"
+	default:
+		if status >= 500 {
+			return "internal_error"
+		}
+		return "invalid_request"
+	}
+}
--- a/internal/adapter/openai/handler_routes.go
+++ b/internal/adapter/openai/handler_routes.go
@@ -0,0 +1,64 @@
+package openai
+
+import (
+	"net/http"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/go-chi/chi/v5"
+
+	"ds2api/internal/auth"
+	"ds2api/internal/config"
+	"ds2api/internal/util"
+)
+
+// writeJSON is a package-internal alias kept to avoid mass-renaming across
+// every call-site in this package.
+var writeJSON = util.WriteJSON
+
+type Handler struct {
+	Store ConfigReader
+	Auth  AuthResolver
+	DS    DeepSeekCaller
+
+	leaseMu      sync.Mutex
+	streamLeases map[string]streamLease
+	responsesMu  sync.Mutex
+	responses    *responseStore
+}
+
+func (h *Handler) compatStripReferenceMarkers() bool {
+	if h == nil || h.Store == nil {
+		return true
+	}
+	return h.Store.CompatStripReferenceMarkers()
+}
+
+type streamLease struct {
+	Auth      *auth.RequestAuth
+	ExpiresAt time.Time
+}
+
+func RegisterRoutes(r chi.Router, h *Handler) {
+	r.Get("/v1/models", h.ListModels)
+	r.Get("/v1/models/{model_id}", h.GetModel)
+	r.Post("/v1/chat/completions", h.ChatCompletions)
+	r.Post("/v1/responses", h.Responses)
+	r.Get("/v1/responses/{response_id}", h.GetResponseByID)
+	r.Post("/v1/embeddings", h.Embeddings)
+}
+
+func (h *Handler) ListModels(w http.ResponseWriter, _ *http.Request) {
+	writeJSON(w, http.StatusOK, config.OpenAIModelsResponse())
+}
+
+func (h *Handler) GetModel(w http.ResponseWriter, r *http.Request) {
+	modelID := strings.TrimSpace(chi.URLParam(r, "model_id"))
+	model, ok := config.OpenAIModelByID(h.Store, modelID)
+	if !ok {
+		writeOpenAIError(w, http.StatusNotFound, "Model not found.")
+		return
+	}
+	writeJSON(w, http.StatusOK, model)
+}
--- a/internal/adapter/openai/handler_toolcall_format.go
+++ b/internal/adapter/openai/handler_toolcall_format.go
@@ -0,0 +1,169 @@
+package openai
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+
+	"github.com/google/uuid"
+
+	"ds2api/internal/util"
+)
+
+func injectToolPrompt(messages []map[string]any, tools []any, policy util.ToolChoicePolicy) ([]map[string]any, []string) {
+	if policy.IsNone() {
+		return messages, nil
+	}
+	toolSchemas := make([]string, 0, len(tools))
+	names := make([]string, 0, len(tools))
+	isAllowed := func(name string) bool {
+		if strings.TrimSpace(name) == "" {
+			return false
+		}
+		if len(policy.Allowed) == 0 {
+			return true
+		}
+		_, ok := policy.Allowed[name]
+		return ok
+	}
+
+	for _, t := range tools {
+		tool, ok := t.(map[string]any)
+		if !ok {
+			continue
+		}
+		fn, _ := tool["function"].(map[string]any)
+		if len(fn) == 0 {
+			fn = tool
+		}
+		name, _ := fn["name"].(string)
+		desc, _ := fn["description"].(string)
+		schema, _ := fn["parameters"].(map[string]any)
+		name = strings.TrimSpace(name)
+		if !isAllowed(name) {
+			continue
+		}
+		names = append(names, name)
+		if desc == "" {
+			desc = "No description available"
+		}
+		b, _ := json.Marshal(schema)
+		toolSchemas = append(toolSchemas, fmt.Sprintf("Tool: %s\nDescription: %s\nParameters: %s", name, desc, string(b)))
+	}
+	if len(toolSchemas) == 0 {
+		return messages, names
+	}
+	toolPrompt := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n") + "\n\n" + buildToolCallInstructions(names)
+	if policy.Mode == util.ToolChoiceRequired {
+		toolPrompt += "\n7) For this response, you MUST call at least one tool from the allowed list."
+	}
+	if policy.Mode == util.ToolChoiceForced && strings.TrimSpace(policy.ForcedName) != "" {
+		toolPrompt += "\n7) For this response, you MUST call exactly this tool name: " + strings.TrimSpace(policy.ForcedName)
+		toolPrompt += "\n8) Do not call any other tool."
+	}
+
+	for i := range messages {
+		if messages[i]["role"] == "system" {
+			old, _ := messages[i]["content"].(string)
+			messages[i]["content"] = strings.TrimSpace(old + "\n\n" + toolPrompt)
+			return messages, names
+		}
+	}
+	messages = append([]map[string]any{{"role": "system", "content": toolPrompt}}, messages...)
+	return messages, names
+}
+
+// buildToolCallInstructions delegates to the shared util implementation.
+func buildToolCallInstructions(toolNames []string) string {
+	return util.BuildToolCallInstructions(toolNames)
+}
+
+func formatIncrementalStreamToolCallDeltas(deltas []toolCallDelta, ids map[int]string) []map[string]any {
+	if len(deltas) == 0 {
+		return nil
+	}
+	out := make([]map[string]any, 0, len(deltas))
+	for _, d := range deltas {
+		if d.Name == "" && d.Arguments == "" {
+			continue
+		}
+		callID, ok := ids[d.Index]
+		if !ok || callID == "" {
+			callID = "call_" + strings.ReplaceAll(uuid.NewString(), "-", "")
+			ids[d.Index] = callID
+		}
+		item := map[string]any{
+			"index": d.Index,
+			"id":    callID,
+			"type":  "function",
+		}
+		fn := map[string]any{}
+		if d.Name != "" {
+			fn["name"] = d.Name
+		}
+		if d.Arguments != "" {
+			fn["arguments"] = d.Arguments
+		}
+		if len(fn) > 0 {
+			item["function"] = fn
+		}
+		out = append(out, item)
+	}
+	return out
+}
+
+func filterIncrementalToolCallDeltasByAllowed(deltas []toolCallDelta, allowedNames []string, seenNames map[int]string) []toolCallDelta {
+	if len(deltas) == 0 {
+		return nil
+	}
+	out := make([]toolCallDelta, 0, len(deltas))
+	for _, d := range deltas {
+		if d.Name != "" {
+			if seenNames != nil {
+				seenNames[d.Index] = d.Name
+			}
+			out = append(out, d)
+			continue
+		}
+		if seenNames == nil {
+			out = append(out, d)
+			continue
+		}
+		name := strings.TrimSpace(seenNames[d.Index])
+		if name == "" {
+			continue
+		}
+		out = append(out, d)
+	}
+	return out
+}
+
+func formatFinalStreamToolCallsWithStableIDs(calls []util.ParsedToolCall, ids map[int]string) []map[string]any {
+	if len(calls) == 0 {
+		return nil
+	}
+	out := make([]map[string]any, 0, len(calls))
+	for i, c := range calls {
+		callID := ""
+		if ids != nil {
+			callID = strings.TrimSpace(ids[i])
+		}
+		if callID == "" {
+			callID = "call_" + strings.ReplaceAll(uuid.NewString(), "-", "")
+			if ids != nil {
+				ids[i] = callID
+			}
+		}
+		args, _ := json.Marshal(c.Input)
+		out = append(out, map[string]any{
+			"index": i,
+			"id":    callID,
+			"type":  "function",
+			"function": map[string]any{
+				"name":      c.Name,
+				"arguments": string(args),
+			},
+		})
+	}
+	return out
+}
--- a/Show More
+++ b/Show More