Compare commits

..

45 Commits

Author SHA1 Message Date
CJACK.
3b29cf3da4 Merge pull request #215 from CJackHwang/dev
fix: ignore INCOMPLETE status messages in SSE stream parsing to prevent stream interruption
2026-04-06 00:31:18 +08:00
CJACK
a608a4bd95 chore(webui): migrate vite to v8 toolchain 2026-04-06 00:28:36 +08:00
CJACK
d9eee5fd2d docs: clarify server binding address and LAN accessibility in documentation and startup logs 2026-04-06 00:13:22 +08:00
CJACK
bac7345309 chore: remove unused file and associated references 2026-04-06 00:00:30 +08:00
CJACK
b699077b43 fix: make FINISHED status check case-insensitive in chat stream parsing 2026-04-05 23:56:19 +08:00
CJACK
49430123d8 fix: ignore INCOMPLETE status messages in SSE stream parsing to prevent stream interruption 2026-04-05 23:38:47 +08:00
CJACK.
6b5af06c19 Merge pull request #214 from CJackHwang/dev
Dev
2026-04-05 23:06:05 +08:00
CJACK
84813eca80 feat: support multi-round upstream captures in raw sample generation 2026-04-05 22:48:41 +08:00
CJACK
2a6b787f38 feat: implement mandatory DeepSeek turn termination markers for system, user, assistant, and tool roles 2026-04-05 21:48:18 +08:00
CJACK
a28c9fb67f feat: implement comprehensive configuration validation and integrate into store loading and server initialization. 2026-04-05 21:18:51 +08:00
CJACK
585d35e592 refactor: standardize prompt markers and remove legacy EOS and system instructions tags 2026-04-05 20:50:12 +08:00
CJACK
dc912419c4 Revert "refactor: rename tool XML wrapper from tool_calls to tool_batch and add schema attention blocks to tool prompts"
This reverts commit ade648033d.
2026-04-05 20:08:04 +08:00
CJACK
ade648033d refactor: rename tool XML wrapper from tool_calls to tool_batch and add schema attention blocks to tool prompts 2026-04-05 19:22:43 +08:00
CJACK
b8e9ca2028 refactor: stop stripping _raw and _xml fields from tool call inputs to preserve raw parameter data 2026-04-05 18:33:15 +08:00
CJACK
f2ad888de4 refactor: clean up config schema by removing legacy toolcall fields, standardizing auto_delete mode, and updating admin API documentation. 2026-04-05 18:16:31 +08:00
CJACK
e37ed38dc8 refactor: remove instruction regarding text-before-tool output in tool prompt documentation 2026-04-05 17:51:26 +08:00
CJACK
5d59775051 refactor: enforce strict XML-only output for tool calls and remove mixed-content instructions 2026-04-05 17:25:52 +08:00
CJACK
22efd8178b refactor: clarify tool call XML placement and add strict rules against result fabrication and internal narration 2026-04-05 17:13:59 +08:00
CJACK
088a750338 feat: add strict schema enforcement and strip hallucinated fields from tool call parameters 2026-04-05 16:59:39 +08:00
CJACK
298a6f27cc refactor: extract SSE parsing and Vercel stream logic into dedicated implementation modules 2026-04-05 16:32:13 +08:00
CJACK
1d80f644d4 refactor: remove legacy environment variable aliases for configuration and concurrency settings 2026-04-05 16:27:58 +08:00
CJACK
0b0cf60982 feat: propagate Proof-of-Work header to auto-continue requests and ensure remote session deletion ignores parent context cancellation 2026-04-05 14:33:09 +08:00
CJACK
32b9cbb61f feat: implement automatic completion continuation for incomplete DeepSeek responses 2026-04-05 13:59:39 +08:00
CJACK
eff979d9ef feat: enhance content filtering, token usage tracking, and stream error handling in chat-stream modules 2026-04-05 13:41:38 +08:00
CJACK
47dc121690 refactor: improve prompt construction by enforcing explicit newline boundaries between role markers and message content 2026-04-05 04:44:46 +08:00
CJACK
b94a16eca9 style: update AutoDeleteSection layout spacing and input styling 2026-04-05 04:37:16 +08:00
CJACK
97e72fb174 feat: add configurable auto-delete modes (none, single, all) for remote chat sessions 2026-04-05 04:18:34 +08:00
CJACK
f7261bec0d docs: remove obsolete raw stream samples and update testing documentation 2026-04-05 03:07:10 +08:00
CJACK
0bebb4b28d refactor: replace processed output comparison with baseline-based validation in SSE simulator 2026-04-05 01:34:06 +08:00
CJACK
93879c9808 feat: implement rawsample package for automated capture persistence and add admin handlers for sample management 2026-04-05 01:12:31 +08:00
CJACK
c9201174f6 feat: add compatibility setting to strip reference markers from model output and update stream handlers accordingly 2026-04-05 00:50:30 +08:00
CJACK
a6836455dc feat: add support for stripping inline comments in .env files and make Docker host port configurable via DS2API_HOST_PORT 2026-04-04 22:30:57 +08:00
CJACK
eab197f4d9 chore: upgrade Go to 1.26.x and update project dependencies 2026-04-04 22:00:18 +08:00
CJACK.
c8b6dfc290 Merge pull request #213 from Kazakiri220/main
Load .env and config.json for local go runs
2026-04-04 21:44:36 +08:00
Kazakiri220
541bd67c7c Load .env and config.json for local go runs 2026-04-04 20:21:17 +08:00
CJACK.
068f4b0df6 Merge pull request #211 from CJackHwang/dev
Merge pull request #209 from CJackHwang/codex/investigate-command-output-loss-issue

Preserve trailing newlines when stripping leaked CONTENT_FILTER and strengthen XML tool-call parsing
2026-04-04 02:20:39 +08:00
CJACK.
5a51045ba4 Merge pull request #212 from CJackHwang/codex/address-handling-of-upstream-vacuum-returns
fix: classify empty upstream output correctly and harden XML tool-name parsing
2026-04-04 02:20:01 +08:00
CJACK.
3497d5d019 fix: classify empty upstream and tighten xml tool-name parsing 2026-04-04 02:14:39 +08:00
CJACK.
95a9d16843 Merge pull request #209 from CJackHwang/codex/investigate-command-output-loss-issue
Preserve trailing newlines when stripping leaked CONTENT_FILTER and strengthen XML tool-call parsing
2026-04-04 01:53:39 +08:00
CJACK.
0847091864 fix: avoid taking generic <name> as xml tool name 2026-04-04 01:52:57 +08:00
CJACK.
c6340354ec fix: keep execute_command args from xml parameters blocks 2026-04-04 01:42:31 +08:00
CJACK.
6bf08e00cd Update VERSION 2026-04-03 23:38:34 +08:00
CJACK.
35221002d5 Merge pull request #207 from CJackHwang/codex/svg
Fix XML passthrough in tool sieve and return 429 on empty upstream output
2026-04-03 23:37:27 +08:00
CJACK.
4b1f1ea550 Preserve suffix after non-tool XML passthrough 2026-04-03 23:36:28 +08:00
CJACK.
0258f83d10 Fix XML passthrough and empty-upstream handling 2026-04-03 22:31:15 +08:00
152 changed files with 12050 additions and 3434 deletions

View File

@@ -1,5 +1,8 @@
# DS2API runtime
# Runtime listen port inside the app/container
PORT=5001
# Docker Compose host port (compose only; container still listens on PORT)
DS2API_HOST_PORT=6011
LOG_LEVEL=INFO
# Admin authentication

View File

@@ -19,7 +19,7 @@ jobs:
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: "1.24.x"
go-version: "1.26.x"
- name: Setup Node
uses: actions/setup-node@v4

View File

@@ -27,7 +27,7 @@ jobs:
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: "1.24.x"
go-version: "1.26.x"
- name: Setup Node
uses: actions/setup-node@v4

1
.gitignore vendored
View File

@@ -9,6 +9,7 @@ config.json
*.swo
*~
.DS_Store
opencode.json
# Logs
*.log

View File

@@ -52,8 +52,7 @@ cp config.example.json config.json
Use it per deployment mode:
- Local run: read `config.json` directly
- Docker / Vercel: generate Base64 from `config.json`, then set `DS2API_CONFIG_JSON`
- Compatibility note: `DS2API_CONFIG_JSON` may also contain raw JSON directly; `CONFIG_JSON` is the legacy fallback variable
- Docker / Vercel: generate Base64 from `config.json`, then set `DS2API_CONFIG_JSON`, or paste raw JSON directly
```bash
DS2API_CONFIG_JSON="$(base64 < config.json | tr -d '\n')"
@@ -356,7 +355,8 @@ data: [DONE]
```
If `tool_choice=required` is violated in stream mode, DS2API emits `response.failed` then `[DONE]` (no `response.completed`).
Unknown tool names (outside declared `tools`) are rejected and will not be emitted as valid tool calls.
> Current behavior: the parser tries to extract structured tool calls and does not enforce a hard allow-list reject; your tool executor should still validate against a whitelist before executing.
### `GET /v1/responses/{response_id}`
@@ -642,8 +642,9 @@ Reads runtime settings and status, including:
- `success`
- `admin` (`has_password_hash`, `jwt_expire_hours`, `jwt_valid_after_unix`, `default_password_warning`)
- `runtime` (`account_max_inflight`, `account_max_queue`, `global_max_inflight`, `token_refresh_interval_hours`)
- `compat` (`wide_input_strict_output`, `strip_reference_markers`)
- `responses` / `embeddings`
- `auto_delete` (`sessions`)
- `auto_delete` (`mode`: `none` / `single` / `all`; legacy `sessions=true` is still treated as `all`)
- `claude_mapping` / `model_aliases`
- `env_backed`, `needs_vercel_sync`
- `toolcall` policy is fixed to `feature_match + high` and is no longer returned or editable via settings
@@ -654,9 +655,10 @@ Hot-updates runtime settings. Supported fields:
- `admin.jwt_expire_hours`
- `runtime.account_max_inflight` / `runtime.account_max_queue` / `runtime.global_max_inflight` / `runtime.token_refresh_interval_hours`
- `compat.wide_input_strict_output` / `compat.strip_reference_markers`
- `responses.store_ttl_seconds`
- `embeddings.provider`
- `auto_delete.sessions`
- `auto_delete.mode`
- `claude_mapping`
- `model_aliases`
- `toolcall` policy is fixed and is no longer writable through settings
@@ -684,6 +686,8 @@ The request can send config directly, or wrapped as `{"config": {...}, "mode":"m
Query params `?mode=merge` / `?mode=replace` are also supported.
Import accepts `keys`, `accounts`, `claude_mapping` / `claude_model_mapping`, `model_aliases`, `admin`, `runtime`, `responses`, `embeddings`, and `auto_delete`; legacy `toolcall` fields are ignored.
> `compat` fields are managed via `/admin/settings` or the config file; this import endpoint does not update `compat`.
### `GET /admin/config/export`
Exports full config in three forms: `config`, `json`, and `base64`.
@@ -758,17 +762,25 @@ Returned items also include `test_status`, usually `ok` or `failed`.
"available_accounts": ["a@example.com"],
"in_use_accounts": ["b@example.com"],
"max_inflight_per_account": 2,
"recommended_concurrency": 8
"global_max_inflight": 8,
"recommended_concurrency": 8,
"waiting": 0,
"max_queue_size": 8
}
```
| Field | Description |
| --- | --- |
| `available` | Currently available accounts |
| `in_use` | Currently in-use accounts |
| `available` | Accounts that still have spare inflight capacity |
| `in_use` | Number of occupied in-flight slots |
| `total` | Total accounts |
| `available_accounts` | List of account IDs with remaining inflight capacity |
| `in_use_accounts` | List of account IDs currently in use |
| `max_inflight_per_account` | Per-account inflight limit |
| `global_max_inflight` | Global inflight limit |
| `recommended_concurrency` | Suggested concurrency (`total × max_inflight_per_account`) |
| `waiting` | Number of queued requests currently waiting |
| `max_queue_size` | Waiting queue limit |
### `POST /admin/accounts/test`

27
API.md
View File

@@ -52,8 +52,7 @@ cp config.example.json config.json
按部署方式使用:
- 本地运行:直接读取 `config.json`
- Docker / Vercel`config.json` 生成 Base64填入 `DS2API_CONFIG_JSON`
- 兼容写法:`DS2API_CONFIG_JSON` 也可直接填原始 JSON`CONFIG_JSON` 是旧版兼容回退变量
- Docker / Vercel`config.json` 生成 Base64填入 `DS2API_CONFIG_JSON`,也可以直接填原始 JSON
```bash
DS2API_CONFIG_JSON="$(base64 < config.json | tr -d '\n')"
@@ -509,8 +508,6 @@ data: {"type":"message_stop"}
}
```
返回项还会包含 `test_status`,当前值通常为 `ok``failed`
---
## Gemini 兼容接口
@@ -651,8 +648,9 @@ data: {"type":"message_stop"}
- `success`
- `admin``has_password_hash``jwt_expire_hours``jwt_valid_after_unix``default_password_warning`
- `runtime``account_max_inflight``account_max_queue``global_max_inflight``token_refresh_interval_hours`
- `compat``wide_input_strict_output``strip_reference_markers`
- `responses` / `embeddings`
- `auto_delete``sessions`
- `auto_delete``mode``none` / `single` / `all`;旧配置 `sessions=true` 仍按 `all` 处理
- `claude_mapping` / `model_aliases`
- `env_backed``needs_vercel_sync`
- `toolcall` 策略已固定为 `feature_match + high`,不再通过 settings 返回或修改
@@ -663,9 +661,10 @@ data: {"type":"message_stop"}
- `admin.jwt_expire_hours`
- `runtime.account_max_inflight` / `runtime.account_max_queue` / `runtime.global_max_inflight` / `runtime.token_refresh_interval_hours`
- `compat.wide_input_strict_output` / `compat.strip_reference_markers`
- `responses.store_ttl_seconds`
- `embeddings.provider`
- `auto_delete.sessions`
- `auto_delete.mode`
- `claude_mapping`
- `model_aliases`
- `toolcall` 策略已固定,不再作为可写入字段
@@ -693,6 +692,8 @@ data: {"type":"message_stop"}
也支持在查询参数里传 `?mode=merge` / `?mode=replace`
导入时会接受 `keys``accounts``claude_mapping` / `claude_model_mapping``model_aliases``admin``runtime``responses``embeddings``auto_delete` 等字段;`toolcall` 相关字段会被忽略。
> `compat` 相关字段请通过 `/admin/settings` 或配置文件管理;该导入接口不会更新 `compat`。
### `GET /admin/config/export`
导出完整配置,返回 `config``json``base64` 三种格式。
@@ -765,17 +766,25 @@ data: {"type":"message_stop"}
"available_accounts": ["a@example.com"],
"in_use_accounts": ["b@example.com"],
"max_inflight_per_account": 2,
"recommended_concurrency": 8
"global_max_inflight": 8,
"recommended_concurrency": 8,
"waiting": 0,
"max_queue_size": 8
}
```
| 字段 | 说明 |
| --- | --- |
| `available` | 当前可用账号数 |
| `in_use` | 当前使用中的账号数 |
| `available` | 仍有剩余并发槽位的账号数 |
| `in_use` | 当前已占用的 in-flight 槽位数 |
| `total` | 总账号数 |
| `available_accounts` | 仍有剩余并发槽位的账号 ID 列表 |
| `in_use_accounts` | 当前处于使用中的账号 ID 列表 |
| `max_inflight_per_account` | 每账号并发上限 |
| `global_max_inflight` | 全局并发上限 |
| `recommended_concurrency` | 建议并发值(`total × max_inflight_per_account` |
| `waiting` | 当前等待中的请求数 |
| `max_queue_size` | 等待队列上限 |
### `POST /admin/accounts/test`

View File

@@ -76,7 +76,7 @@ flowchart LR
- **前端**React 管理台(`webui/`),运行时托管静态构建产物
- **部署**本地运行、Docker、Vercel Serverless、Linux systemd
### 3.0 底层架构调整(相较旧版本)
### 3.X 底层架构调整(相较旧版本)
- **统一路由内核**:所有协议入口统一汇聚到 `internal/server/router.go`,并在同一路由树中注册 OpenAI / Claude / Gemini / Admin / WebUI 路由,避免多入口行为漂移。
- **统一执行链路**Claude / Gemini 入口先经 `internal/translatorcliproxy` 做协议转换,再进入 `openai.ChatCompletions` 统一处理工具调用与流式语义,最后再转换回原协议响应。
@@ -111,7 +111,6 @@ flowchart LR
| P0 | Anthropic SDKmessages | ✅ |
| P0 | Google Gemini SDKgenerateContent | ✅ |
| P1 | LangChain / LlamaIndex / OpenWebUIOpenAI 兼容接入) | ✅ |
| P2 | MCP 独立桥接层 | 规划中 |
## 模型支持
@@ -160,12 +159,11 @@ cp config.example.json config.json
后续部署建议:
- 本地运行:直接读取 `config.json`
- Docker / Vercel由 `config.json` 生成 `DS2API_CONFIG_JSON`Base64注入环境变量
- 兼容写法:`DS2API_CONFIG_JSON` 也可以直接写原始 JSON`CONFIG_JSON` 是旧版回退变量
- Docker / Vercel由 `config.json` 生成 `DS2API_CONFIG_JSON`Base64注入环境变量,也可以直接写原始 JSON
### 方式一:本地运行
**前置要求**Go 1.26+Node.js 20+(仅在需要构建 WebUI 时)
**前置要求**Go 1.26+Node.js `20.19+` 或 `22.12+`(仅在需要构建 WebUI 时)
```bash
# 1. 克隆仓库
@@ -180,7 +178,9 @@ cp config.example.json config.json
go run ./cmd/ds2api
```
默认监听地址:`http://localhost:5001`
默认本地访问地址:`http://127.0.0.1:5001`
服务实际绑定:`0.0.0.0:5001`,因此同一局域网设备通常也可以通过你的内网 IP 访问。
> **WebUI 自动构建**:本地首次启动时,若 `static/admin` 不存在,会自动尝试执行 `npm ci`(仅在缺少依赖时)和 `npm run build -- --outDir static/admin --emptyOutDir`(需要本机有 Node.js。你也可以手动构建`./scripts/build-webui.sh`
@@ -191,7 +191,7 @@ go run ./cmd/ds2api
cp .env.example .env
cp config.example.json config.json
# 2. 编辑 .env至少设置 DS2API_ADMIN_KEY
# 2. 编辑 .env至少设置 DS2API_ADMIN_KEY;如需修改宿主机端口,可额外设置 DS2API_HOST_PORT
# DS2API_ADMIN_KEY=请替换为强密码
# 3. 启动
@@ -201,7 +201,7 @@ docker-compose up -d
docker-compose logs -f
```
默认 `docker-compose.yml` 会把宿主机 `6011` 映射到容器内的 `5001`。如果你希望直接对外暴露 `5001`,请调整 `ports` 配置。
默认 `docker-compose.yml` 会把宿主机 `6011` 映射到容器内的 `5001`。如果你希望直接对外暴露 `5001`,请设置 `DS2API_HOST_PORT=5001`(或者手动调整 `ports` 配置
更新镜像:`docker-compose up -d --build`
@@ -290,7 +290,8 @@ cp opencode.json.example opencode.json
"o3": "deepseek-reasoner"
},
"compat": {
"wide_input_strict_output": true
"wide_input_strict_output": true,
"strip_reference_markers": true
},
"responses": {
"store_ttl_seconds": 900
@@ -312,7 +313,7 @@ cp opencode.json.example opencode.json
"token_refresh_interval_hours": 6
},
"auto_delete": {
"sessions": false
"mode": "none"
}
}
```
@@ -322,13 +323,14 @@ cp opencode.json.example opencode.json
- `token`:配置文件中即使填写也会在加载时被清空(不会从 `config.json` 读取 token实际 token 仅在运行时内存中维护并自动刷新
- `model_aliases`:常见模型名(如 GPT/Codex/Claude到 DeepSeek 模型的映射
- `compat.wide_input_strict_output`:建议保持 `true`(当前实现默认宽进严出)
- `toolcall`:策略已固定为特征匹配 + 高置信早发,不再作为可配置项
- `compat.strip_reference_markers`:建议保持 `true`,用于清理可见输出中的引用/标记
- `toolcall`:旧字段,当前实现已固定为特征匹配 + 高置信早发;即使保留在配置里也会被忽略
- `responses.store_ttl_seconds``/v1/responses/{id}` 的内存缓存 TTL
- `embeddings.provider`embedding 提供方(当前内置 `deterministic/mock/builtin`
- `claude_mapping`:字典中 `fast`/`slow` 后缀映射到对应 DeepSeek 模型(兼容读取 `claude_model_mapping`
- `admin`管理后台设置JWT 过期时间、密码哈希等),可通过 Admin Settings API 热更新
- `runtime`:运行时参数(并发限制、队列大小、托管账号 token 刷新间隔),可通过 Admin Settings API 热更新;`account_max_queue=0`/`global_max_inflight=0` 表示按推荐值自动计算,`token_refresh_interval_hours=6` 为默认强制重登间隔
- `auto_delete.sessions`:是否在请求结束后自动清理 DeepSeek 会话(默认 `false`,可在 Settings 热更新)
- `auto_delete.mode`请求结束后如何清理 DeepSeek 远端聊天记录,支持 `none`(默认,不删除)、`single`(仅删除当前会话)、`all`(清空全部会话);旧配置里的 `auto_delete.sessions=true` 仍会被视为 `all`
### 环境变量
@@ -341,7 +343,6 @@ cp opencode.json.example opencode.json
| `DS2API_JWT_EXPIRE_HOURS` | Admin JWT 过期小时数 | `24` |
| `DS2API_CONFIG_PATH` | 配置文件路径 | `config.json` |
| `DS2API_CONFIG_JSON` | 直接注入配置JSON 或 Base64 | — |
| `CONFIG_JSON` | 旧版兼容配置注入 | — |
| `DS2API_ENV_WRITEBACK` | 环境变量模式下自动写回配置文件并切换文件模式(`1/true/yes/on` | 关闭 |
| `DS2API_WASM_PATH` | PoW WASM 文件路径 | 自动查找 |
| `DS2API_STATIC_ADMIN_DIR` | 管理台静态文件目录 | `static/admin` |
@@ -350,22 +351,16 @@ cp opencode.json.example opencode.json
| `DS2API_DEV_PACKET_CAPTURE_LIMIT` | 本地抓包保留条数(超出自动淘汰) | `5` |
| `DS2API_DEV_PACKET_CAPTURE_MAX_BODY_BYTES` | 单条响应体最大记录字节数 | `2097152` |
| `DS2API_ACCOUNT_MAX_INFLIGHT` | 每账号最大并发 in-flight 请求数 | `2` |
| `DS2API_ACCOUNT_CONCURRENCY` | 同上(兼容旧名) | — |
| `DS2API_ACCOUNT_MAX_QUEUE` | 等待队列上限 | `recommended_concurrency` |
| `DS2API_ACCOUNT_QUEUE_SIZE` | 同上(兼容旧名) | — |
| `DS2API_GLOBAL_MAX_INFLIGHT` | 全局最大 in-flight 请求数 | `recommended_concurrency` |
| `DS2API_MAX_INFLIGHT` | 同上(兼容旧名) | — |
| `DS2API_VERCEL_INTERNAL_SECRET` | Vercel 混合流式内部鉴权密钥 | 回退用 `DS2API_ADMIN_KEY` |
| `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | 流式 lease 过期秒数 | `900` |
| `DS2API_DEV_PACKET_CAPTURE` | 本地开发抓包开关(记录最近会话请求/响应体) | 本地非 Vercel 默认开启 |
| `DS2API_DEV_PACKET_CAPTURE_LIMIT` | 本地抓包保留条数(超出自动淘汰) | `5` |
| `DS2API_DEV_PACKET_CAPTURE_MAX_BODY_BYTES` | 单条响应体最大记录字节数 | `2097152` |
| `VERCEL_TOKEN` | Vercel 同步 token | — |
| `VERCEL_PROJECT_ID` | Vercel 项目 ID | — |
| `VERCEL_TEAM_ID` | Vercel 团队 ID | — |
| `DS2API_VERCEL_PROTECTION_BYPASS` | Vercel 部署保护绕过密钥(内部 Node→Go 调用) | — |
> 提示:当检测到 `DS2API_CONFIG_JSON/CONFIG_JSON` 时,管理台会显示当前模式风险与自动持久化状态(含 `DS2API_CONFIG_PATH` 路径与模式切换说明)。
> 提示:当检测到 `DS2API_CONFIG_JSON` 时,管理台会显示当前模式风险与自动持久化状态(含 `DS2API_CONFIG_PATH` 路径与模式切换说明)。
## 鉴权模式
@@ -455,6 +450,7 @@ ds2api/
│ ├── deepseek/ # DeepSeek API 客户端、PoW WASM
│ ├── js/ # Node 运行时流式处理与兼容逻辑
│ ├── devcapture/ # 开发抓包模块
│ ├── rawsample/ # 原始流样本可见文本提取与回放辅助
│ ├── format/ # 输出格式化
│ ├── prompt/ # Prompt 构建
│ ├── server/ # HTTP 路由与中间件chi router
@@ -476,6 +472,7 @@ ds2api/
├── tests/
│ ├── compat/ # 兼容性测试夹具与期望输出
│ ├── node/ # Node 侧单元测试chat-stream / tool-sieve
│ ├── raw_stream_samples/ # 原始 SSE 样本与回放元数据
│ └── scripts/ # 统一测试脚本入口unit/e2e
├── docs/ # 部署 / 贡献 / 测试等辅助文档
├── static/admin/ # WebUI 构建产物(不提交到 Git

View File

@@ -76,7 +76,7 @@ flowchart LR
- **Frontend**: React admin panel (`webui/`), served as static build at runtime
- **Deployment**: local run, Docker, Vercel serverless, Linux systemd
### 3.0 Architecture Changes (vs older releases)
### 3.X Architecture Changes (vs older releases)
- **Unified routing core**: all protocol entries are now centralized through `internal/server/router.go`, with OpenAI / Claude / Gemini / Admin / WebUI routes registered in one tree to avoid multi-entry drift.
- **Unified execution chain**: Claude/Gemini entries are translated by `internal/translatorcliproxy`, then executed through `openai.ChatCompletions` for shared tool-calling and stream semantics, then translated back to the client protocol.
@@ -111,7 +111,6 @@ flowchart LR
| P0 | Anthropic SDK (messages) | ✅ |
| P0 | Google Gemini SDK (generateContent) | ✅ |
| P1 | LangChain / LlamaIndex / OpenWebUI (OpenAI-compatible integration) | ✅ |
| P2 | MCP standalone bridge | Planned |
## Model Support
@@ -160,12 +159,11 @@ cp config.example.json config.json
Recommended per deployment mode:
- Local run: read `config.json` directly
- Docker / Vercel: generate Base64 from `config.json` and inject as `DS2API_CONFIG_JSON`
- Compatibility note: `DS2API_CONFIG_JSON` may also contain raw JSON directly; `CONFIG_JSON` is the legacy fallback variable
- Docker / Vercel: generate Base64 from `config.json` and inject as `DS2API_CONFIG_JSON`, or paste raw JSON directly
### Option 1: Local Run
**Prerequisites**: Go 1.26+, Node.js 20+ (only if building WebUI locally)
**Prerequisites**: Go 1.26+, Node.js `20.19+` or `22.12+` (only if building WebUI locally)
```bash
# 1. Clone
@@ -180,7 +178,9 @@ cp config.example.json config.json
go run ./cmd/ds2api
```
Default URL: `http://localhost:5001`
Default local URL: `http://127.0.0.1:5001`
The server actually binds to `0.0.0.0:5001`, so devices on the same LAN can usually reach it through your private IP as well.
> **WebUI auto-build**: On first local startup, if `static/admin` is missing, DS2API will auto-run `npm ci` (only when dependencies are missing) and `npm run build -- --outDir static/admin --emptyOutDir` (requires Node.js). You can also build manually: `./scripts/build-webui.sh`
@@ -191,7 +191,7 @@ Default URL: `http://localhost:5001`
cp .env.example .env
cp config.example.json config.json
# 2. Edit .env (at least set DS2API_ADMIN_KEY)
# 2. Edit .env (at least set DS2API_ADMIN_KEY; optionally set DS2API_HOST_PORT to change the host port)
# DS2API_ADMIN_KEY=replace-with-a-strong-secret
# 3. Start
@@ -201,7 +201,7 @@ docker-compose up -d
docker-compose logs -f
```
The default `docker-compose.yml` maps host port `6011` to container port `5001`. If you want `5001` exposed directly, adjust the `ports` mapping.
The default `docker-compose.yml` maps host port `6011` to container port `5001`. If you want `5001` exposed directly, set `DS2API_HOST_PORT=5001` (or adjust the `ports` mapping).
Rebuild after updates: `docker-compose up -d --build`
@@ -290,7 +290,8 @@ cp opencode.json.example opencode.json
"o3": "deepseek-reasoner"
},
"compat": {
"wide_input_strict_output": true
"wide_input_strict_output": true,
"strip_reference_markers": true
},
"responses": {
"store_ttl_seconds": 900
@@ -312,7 +313,7 @@ cp opencode.json.example opencode.json
"token_refresh_interval_hours": 6
},
"auto_delete": {
"sessions": false
"mode": "none"
}
}
```
@@ -322,13 +323,14 @@ cp opencode.json.example opencode.json
- `token`: Even if set in `config.json`, it is cleared during load (DS2API does not read persisted tokens from config); runtime tokens are maintained/refreshed in memory only
- `model_aliases`: Map common model names (GPT/Codex/Claude) to DeepSeek models
- `compat.wide_input_strict_output`: Keep `true` (current default policy)
- `toolcall`: Fixed to feature matching + high-confidence early emit, no longer configurable
- `compat.strip_reference_markers`: Keep `true`; it strips reference markers from visible output
- `toolcall`: Legacy field; the current behavior is fixed to feature matching + high-confidence early emit, and any config value is ignored
- `responses.store_ttl_seconds`: In-memory TTL for `/v1/responses/{id}`
- `embeddings.provider`: Embeddings provider (`deterministic/mock/builtin` built-in)
- `claude_mapping`: Maps `fast`/`slow` suffixes to corresponding DeepSeek models (still compatible with `claude_model_mapping`)
- `admin`: Admin panel settings (JWT expiry, password hash, etc.), hot-reloadable via Admin Settings API
- `runtime`: Runtime parameters (concurrency limits, queue sizes, managed token refresh interval), hot-reloadable via Admin Settings API; `account_max_queue=0`/`global_max_inflight=0` means auto-calculate from recommended values, `token_refresh_interval_hours=6` is the default forced re-login interval
- `auto_delete.sessions`: Whether to auto-delete DeepSeek sessions after request completion (default `false`, hot-reloadable via Settings)
- `auto_delete.mode`: How to clean up DeepSeek remote chat records after each request completes. Supported values: `none` (default, no deletion), `single` (delete only the current session), `all` (delete all sessions); legacy `auto_delete.sessions=true` is still treated as `all`
### Environment Variables
@@ -341,17 +343,13 @@ cp opencode.json.example opencode.json
| `DS2API_JWT_EXPIRE_HOURS` | Admin JWT TTL in hours | `24` |
| `DS2API_CONFIG_PATH` | Config file path | `config.json` |
| `DS2API_CONFIG_JSON` | Inline config (JSON or Base64) | — |
| `CONFIG_JSON` | Legacy compatibility config input | — |
| `DS2API_ENV_WRITEBACK` | Auto-write env-backed config to file and transition to file mode (`1/true/yes/on`) | Disabled |
| `DS2API_WASM_PATH` | PoW WASM file path | Auto-detect |
| `DS2API_STATIC_ADMIN_DIR` | Admin static assets dir | `static/admin` |
| `DS2API_AUTO_BUILD_WEBUI` | Auto-build WebUI on startup | Enabled locally, disabled on Vercel |
| `DS2API_ACCOUNT_MAX_INFLIGHT` | Max in-flight requests per account | `2` |
| `DS2API_ACCOUNT_CONCURRENCY` | Alias (legacy compat) | — |
| `DS2API_ACCOUNT_MAX_QUEUE` | Waiting queue limit | `recommended_concurrency` |
| `DS2API_ACCOUNT_QUEUE_SIZE` | Alias (legacy compat) | — |
| `DS2API_GLOBAL_MAX_INFLIGHT` | Global max in-flight requests | `recommended_concurrency` |
| `DS2API_MAX_INFLIGHT` | Alias (legacy compat) | — |
| `DS2API_VERCEL_INTERNAL_SECRET` | Vercel hybrid streaming internal auth | Falls back to `DS2API_ADMIN_KEY` |
| `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | Stream lease TTL seconds | `900` |
| `DS2API_DEV_PACKET_CAPTURE` | Local dev packet capture switch (record recent request/response bodies) | Enabled by default on non-Vercel local runtime |
@@ -362,7 +360,7 @@ cp opencode.json.example opencode.json
| `VERCEL_TEAM_ID` | Vercel team ID | — |
| `DS2API_VERCEL_PROTECTION_BYPASS` | Vercel deployment protection bypass for internal Node→Go calls | — |
> Note: when `DS2API_CONFIG_JSON/CONFIG_JSON` is detected, the Admin UI shows mode risk and auto-persistence status (including `DS2API_CONFIG_PATH` and mode-transition hints).
> Note: when `DS2API_CONFIG_JSON` is detected, the Admin UI shows mode risk and auto-persistence status (including `DS2API_CONFIG_PATH` and mode-transition hints).
## Authentication Modes
@@ -449,6 +447,7 @@ ds2api/
│ ├── deepseek/ # DeepSeek API client, PoW WASM
│ ├── js/ # Node runtime stream/compat logic
│ ├── devcapture/ # Dev packet capture module
│ ├── rawsample/ # Visible-text extraction and replay helpers for raw stream samples
│ ├── format/ # Output formatting
│ ├── prompt/ # Prompt construction
│ ├── server/ # HTTP routing and middleware (chi router)
@@ -470,6 +469,7 @@ ds2api/
├── tests/
│ ├── compat/ # Compatibility fixtures and expected outputs
│ ├── node/ # Node-side unit tests (chat-stream / tool-sieve)
│ ├── raw_stream_samples/ # Raw SSE samples and replay metadata
│ └── scripts/ # Unified test script entrypoints (unit/e2e)
├── docs/ # Deployment / contributing / testing docs
├── static/admin/ # WebUI build output (not committed to Git)

View File

@@ -1 +1 @@
3.0.0
3.1.0

View File

@@ -3,9 +3,17 @@ package app
import (
"net/http"
"ds2api/internal/config"
"ds2api/internal/server"
)
func NewHandler() http.Handler {
return server.NewApp().Router
app, err := server.NewApp()
if err != nil {
config.Logger.Error("[app] init failed", "error", err)
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
server.WriteUnhandledError(w, err)
})
}
return app.Router
}

View File

@@ -18,9 +18,17 @@ import (
)
func main() {
if err := config.LoadDotEnv(); err != nil {
config.Logger.Warn("[dotenv] load failed", "error", err)
}
config.RefreshLogger()
webui.EnsureBuiltOnStartup()
_ = auth.AdminKey()
app := server.NewApp()
app, err := server.NewApp()
if err != nil {
config.Logger.Error("server initialization failed", "error", err)
os.Exit(1)
}
port := strings.TrimSpace(os.Getenv("PORT"))
if port == "" {
port = "5001"

View File

@@ -28,11 +28,8 @@
"o3": "deepseek-reasoner"
},
"compat": {
"wide_input_strict_output": true
},
"toolcall": {
"mode": "feature_match",
"early_emit_confidence": "high"
"wide_input_strict_output": true,
"strip_reference_markers": true
},
"responses": {
"store_ttl_seconds": 900
@@ -50,9 +47,10 @@
"runtime": {
"account_max_inflight": 2,
"account_max_queue": 0,
"global_max_inflight": 0
"global_max_inflight": 0,
"token_refresh_interval_hours": 6
},
"auto_delete": {
"sessions": false
"mode": "none"
}
}

View File

@@ -16,7 +16,8 @@ services:
container_name: ds2api-dev
command: ["go", "run", "./cmd/ds2api"]
ports:
- "${PORT:-5001}:${PORT:-5001}"
# Host port is configurable via DS2API_HOST_PORT; container port stays fixed at 5001.
- "${DS2API_HOST_PORT:-6011}:5001"
env_file:
- .env
environment:

View File

@@ -1,14 +1,16 @@
services:
ds2api:
image: ghcr.io/cjackhwang/ds2api:latest
container_name: ds2api
restart: always
ports:
- "6011:5001"
volumes:
- ./config.json:/app/config.json # 配置文件
- ./.env:/app/.env # 环境变量
environment:
- TZ=Asia/Shanghai
- LOG_LEVEL=INFO
- DS2API_ADMIN_KEY=${DS2API_ADMIN_KEY:-ds2api}
container_name: ds2api
restart: always
env_file:
- .env
ports:
# Host port is configurable via DS2API_HOST_PORT; container port stays fixed at 5001.
- "${DS2API_HOST_PORT:-6011}:5001"
volumes:
- ./config.json:/app/config.json # 配置文件
environment:
- TZ=Asia/Shanghai
- LOG_LEVEL=INFO
- DS2API_ADMIN_KEY=${DS2API_ADMIN_KEY:-ds2api}

View File

@@ -9,7 +9,7 @@ Thanks for your interest in contributing to DS2API!
### Prerequisites
- Go 1.26+
- Node.js 20+ (for WebUI development)
- Node.js `20.19+` or `22.12+` (for WebUI development)
- npm (bundled with Node.js)
### Backend Development
@@ -25,7 +25,8 @@ cp config.example.json config.json
# 3. Run backend
go run ./cmd/ds2api
# Default: http://localhost:5001
# Local access: http://127.0.0.1:5001
# Actual bind: 0.0.0.0:5001, so LAN access is available via your private IP
```
### Frontend Development (WebUI)

View File

@@ -9,7 +9,7 @@
### 前置要求
- Go 1.26+
- Node.js 20+WebUI 开发时)
- Node.js `20.19+``22.12+`WebUI 开发时)
- npm随 Node.js 提供)
### 后端开发
@@ -25,7 +25,8 @@ cp config.example.json config.json
# 3. 启动后端
go run ./cmd/ds2api
# 默认监听 http://localhost:5001
# 本地访问 http://127.0.0.1:5001
# 实际绑定 0.0.0.0:5001可通过局域网 IP 访问
```
### 前端开发WebUI

View File

@@ -25,14 +25,13 @@ This guide covers all deployment methods for the current Go-based codebase.
| Dependency | Minimum Version | Notes |
| --- | --- | --- |
| Go | 1.26+ | Build backend |
| Node.js | 20+ | Only needed to build WebUI locally |
| Node.js | `20.19+` or `22.12+` | Only needed to build WebUI locally |
| npm | Bundled with Node.js | Install WebUI dependencies |
Config source (choose one):
- **File**: `config.json` (recommended for local/Docker)
- **Environment variable**: `DS2API_CONFIG_JSON` (recommended for Vercel; supports raw JSON or Base64)
- Compatibility note: `CONFIG_JSON` is the legacy fallback variable; `DS2API_CONFIG_JSON` may also contain raw JSON directly
Unified recommendation (best practice):
@@ -117,6 +116,8 @@ cp config.example.json config.json
# Edit .env and set at least:
# DS2API_ADMIN_KEY=your-admin-key
# Optionally set the host port:
# DS2API_HOST_PORT=6011
# Start
docker-compose up -d
@@ -125,7 +126,7 @@ docker-compose up -d
docker-compose logs -f
```
The default `docker-compose.yml` maps host port `6011` to container port `5001`. If you want `5001` exposed directly, adjust the `ports` mapping.
The default `docker-compose.yml` maps host port `6011` to container port `5001`. If you want `5001` exposed directly, set `DS2API_HOST_PORT=5001` (or adjust the `ports` mapping).
### 2.2 Update
@@ -138,7 +139,7 @@ docker-compose up -d --build
The `Dockerfile` now provides two image paths:
1. **Default local/dev path (`runtime-from-source`)**: a three-stage build (WebUI build + Go build + runtime).
2. **Release path (`runtime-from-dist`)**: CI first creates `dist/ds2api_<tag>_linux_<arch>.tar.gz`, then Docker directly reuses the binary and `static/admin` assets from those release archives, without running `npm build`/`go build` again.
2. **Release path (`runtime-from-dist`)**: the release workflow first creates tag-named release archives, then copies the Linux bundles to `dist/docker-input/linux_amd64.tar.gz` / `linux_arm64.tar.gz`; Docker consumes those prepared inputs directly, without rerunning `npm build`/`go build`.
The release path keeps Docker images aligned with release archives and reduces duplicate build work.
@@ -198,10 +199,10 @@ Notes:
2. **Import** the project on Vercel
3. **Set environment variables** (minimum required: one variable):
| Variable | Description |
| --- | --- |
| `DS2API_ADMIN_KEY` | Admin key (required) |
| `DS2API_CONFIG_JSON` | Config content, raw JSON or Base64 (optional, recommended) |
| Variable | Description |
| --- | --- |
| `DS2API_ADMIN_KEY` | Admin key (required) |
| `DS2API_CONFIG_JSON` | Config content, raw JSON or Base64 (optional, recommended) |
4. **Deploy**
@@ -244,11 +245,8 @@ VERCEL_TEAM_ID=team_xxxxxxxxxxxx # optional for personal accounts
| Variable | Description | Default |
| --- | --- | --- |
| `DS2API_ACCOUNT_MAX_INFLIGHT` | Per-account inflight limit | `2` |
| `DS2API_ACCOUNT_CONCURRENCY` | Alias (legacy compat) | — |
| `DS2API_ACCOUNT_MAX_QUEUE` | Waiting queue limit | `recommended_concurrency` |
| `DS2API_ACCOUNT_QUEUE_SIZE` | Alias (legacy compat) | — |
| `DS2API_GLOBAL_MAX_INFLIGHT` | Global inflight limit | `recommended_concurrency` |
| `DS2API_MAX_INFLIGHT` | Alias (legacy compat) | — |
| `DS2API_ENV_WRITEBACK` | When `DS2API_CONFIG_JSON` is present, auto-write to `DS2API_CONFIG_PATH` and switch to file-backed mode after success (`1/true/yes/on`) | Disabled |
| `DS2API_VERCEL_INTERNAL_SECRET` | Hybrid streaming internal auth | Falls back to `DS2API_ADMIN_KEY` |
| `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | Stream lease TTL | `900` |
@@ -314,7 +312,7 @@ Error: Command failed: go build -ldflags -s -w -o .../bootstrap ...
1. Open Vercel Project Settings → Build and Development Settings
2. **Clear** custom Go Build Flags / Build Command (recommended)
3. If ldflags must be used, set `-ldflags="-s -w"` (ensure it's one argument)
4. Verify `go.mod` uses a supported version (currently `go 1.24`)
4. Verify `go.mod` uses a supported version (currently `go 1.26.0`)
5. Redeploy (recommended: clear cache)
#### Internal Package Import Error

View File

@@ -25,14 +25,13 @@
| 依赖 | 最低版本 | 说明 |
| --- | --- | --- |
| Go | 1.26+ | 编译后端 |
| Node.js | 20+ | 仅在需要本地构建 WebUI 时 |
| Node.js | `20.19+``22.12+` | 仅在需要本地构建 WebUI 时 |
| npm | 随 Node.js 提供 | 安装 WebUI 依赖 |
配置来源(任选其一):
- **文件方式**`config.json`(推荐本地/Docker 使用)
- **环境变量方式**`DS2API_CONFIG_JSON`(推荐 Vercel 使用,支持 JSON 字符串或 Base64 编码)
- 兼容写法:`CONFIG_JSON` 是旧版回退变量;`DS2API_CONFIG_JSON` 也可以直接写原始 JSON
- **环境变量方式**`DS2API_CONFIG_JSON`(推荐 Vercel 使用,支持 JSON 字符串或 Base64 编码,也可以直接写原始 JSON
统一建议(最优实践):
@@ -117,6 +116,8 @@ cp config.example.json config.json
# 编辑 .env请改成你的强密码至少设置
# DS2API_ADMIN_KEY=your-admin-key
# 如需修改宿主机端口,可额外设置:
# DS2API_HOST_PORT=6011
# 启动
docker-compose up -d
@@ -125,7 +126,7 @@ docker-compose up -d
docker-compose logs -f
```
默认 `docker-compose.yml` 会把宿主机 `6011` 映射到容器内的 `5001`。如果你希望直接对外暴露 `5001`,请调整 `ports` 配置。
默认 `docker-compose.yml` 会把宿主机 `6011` 映射到容器内的 `5001`。如果你希望直接对外暴露 `5001`,请设置 `DS2API_HOST_PORT=5001`(或者手动调整 `ports` 配置
### 2.2 更新
@@ -138,7 +139,7 @@ docker-compose up -d --build
`Dockerfile` 提供两条构建路径:
1. **本地/开发默认路径(`runtime-from-source`**三阶段构建WebUI 构建 + Go 构建 + 运行阶段)。
2. **Release 路径(`runtime-from-dist`**CI 先生成 `dist/ds2api_<tag>_linux_<arch>.tar.gz`,再由 Docker 直接复用该发布包内的二进制和 `static/admin` 产物组装运行镜像,不再重复执行 `npm build`/`go build`
2. **Release 路径(`runtime-from-dist`**发布工作流先生成 tag 命名的 Release 压缩包,再把 Linux 产物复制成 `dist/docker-input/linux_amd64.tar.gz` / `linux_arm64.tar.gz`Docker 构建阶段直接消费这些输入,不再重复执行 `npm build`/`go build`
Release 路径可确保 Docker 镜像与 release 压缩包使用同一套产物,减少重复构建带来的差异。
@@ -198,10 +199,10 @@ healthcheck:
2. **在 Vercel 上导入项目**
3. **配置环境变量**(最少只需设置以下一项):
| 变量 | 说明 |
| --- | --- |
| `DS2API_ADMIN_KEY` | 管理密钥(必填) |
| `DS2API_CONFIG_JSON` | 配置内容JSON 字符串或 Base64 编码(可选,建议) |
| 变量 | 说明 |
| --- | --- |
| `DS2API_ADMIN_KEY` | 管理密钥(必填) |
| `DS2API_CONFIG_JSON` | 配置内容JSON 字符串或 Base64 编码(可选,建议) |
4. **部署**
@@ -244,11 +245,8 @@ VERCEL_TEAM_ID=team_xxxxxxxxxxxx # 个人账号可留空
| 变量 | 说明 | 默认值 |
| --- | --- | --- |
| `DS2API_ACCOUNT_MAX_INFLIGHT` | 每账号并发上限 | `2` |
| `DS2API_ACCOUNT_CONCURRENCY` | 同上(兼容别名) | — |
| `DS2API_ACCOUNT_MAX_QUEUE` | 等待队列上限 | `recommended_concurrency` |
| `DS2API_ACCOUNT_QUEUE_SIZE` | 同上(兼容别名) | — |
| `DS2API_GLOBAL_MAX_INFLIGHT` | 全局并发上限 | `recommended_concurrency` |
| `DS2API_MAX_INFLIGHT` | 同上(兼容别名) | — |
| `DS2API_ENV_WRITEBACK` | 检测到 `DS2API_CONFIG_JSON` 时自动写入 `DS2API_CONFIG_PATH`,并在成功后转为文件模式(`1/true/yes/on` | 关闭 |
| `DS2API_VERCEL_INTERNAL_SECRET` | 混合流式内部鉴权 | 回退用 `DS2API_ADMIN_KEY` |
| `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | 流式 lease TTL | `900` |
@@ -314,7 +312,7 @@ Error: Command failed: go build -ldflags -s -w -o .../bootstrap ...
1. 进入 Vercel Project Settings → Build and Development Settings
2. **清空**自定义 Go Build Flags / Build Command推荐
3. 若必须设置 ldflags使用 `-ldflags="-s -w"`(保证它是一个参数)
4. 确认仓库 `go.mod` 为受支持版本(当前为 `go 1.24`
4. 确认仓库 `go.mod` 为受支持版本(当前为 `go 1.26.0`
5. 重新部署(建议清缓存后 Redeploy
#### Internal 包导入错误

View File

@@ -1,82 +0,0 @@
# DeepSeek SSE 流格式字段分析2026-04-03
> 日期2026-04-03UTC
>
> 样本:`tests/raw_stream_samples/guangzhou-weather-reasoner-search-20260403/upstream.stream.sse`
>
> 模型:`deepseek-reasoner-search`(搜索 + 思考)
## 1. SSE 事件层结构
原始流由标准 SSE 帧组成,常见形态:
```text
event: <type>
data: <json or text>
```
样本中主要 `event` 类型:
- `ready`:流建立后返回请求/响应消息 ID。
- `update_session`:会话时间戳更新。
- `finish`:流式阶段结束。
- (无 `event` 时)默认为 message 事件,`data:` 中承载主要增量数据。
## 2. `data` JSON 常见字段
上游增量主体多为 JSON Patch 风格对象:
- `p`path字段路径`response/fragments/-1/content`
- `o`op可选操作类型常见 `SET` / `APPEND` / `BATCH`
- `v`value字符串、布尔、对象、数组都可能
示例(语义):
- `{"p":"response/fragments/-1/content","o":"APPEND","v":"..."}`
- `{"p":"response/fragments/-16/status","v":"FINISHED"}`
- `{"p":"response/status","o":"SET","v":"FINISHED"}`
## 3. 搜索+思考场景关键路径
### 3.1 文本内容
- `response/fragments/<idx>/content`
- `response/content`
- `response/thinking_content`
- `response/fragments``APPEND` + fragment 数组)
### 3.2 搜索相关
- `response/fragments/<idx>/results`(检索结果数组)
- `response/search_status`(检索状态,建议跳过展示)
### 3.3 状态相关(重点)
- `response/status = FINISHED`**最终结束信号**(需要保留用于结束判定)
- `response/fragments/<idx>/status = FINISHED`**分片级状态**(高频,建议跳过输出)
- `response/quasi_status`:过程状态(建议跳过输出)
## 4. 泄露问题根因FINISHED 重复)
在搜索 + 思考模型中,`response/fragments/<idx>/status` 会出现大量不同 `<idx>`(例如 `-1/-2/-3/-16...`)的 `FINISHED`
若只过滤固定少量索引(例如仅 `-1/-2/-3`),其他索引的状态会当普通文本透传,导致前端出现:
- `FINISHEDFINISHEDFINISHED...`
## 5. 适配建议(已落地)
1. 跳过所有 `response/fragments/-?\d+/status`
2. 继续保留 `response/status=FINISHED` 作为真正结束判定。
3. 通过独立仿真工具持续回放全部样本,作为回归门禁:
```bash
./tests/scripts/run-raw-stream-sim.sh
```
## 6. 后续扩展建议
- 增加不同模型(`deepseek-chat-search` / 非 search / 非 thinking样本。
- 增加异常样本限流、中断、content_filter、空结果
- 为仿真报告加入字段覆盖率统计(路径频次、事件频次、终止路径命中率)。

View File

@@ -0,0 +1,313 @@
# DeepSeek SSE 行为结构说明(第三方逆向版)
> 说明:本文基于当前仓库 `tests/raw_stream_samples/` 下全部 `upstream.stream.sse` 原始流样本整理而成,属于第三方逆向观察文档,不是官方协议。
> 当前 corpus 由 4 份原始流组成,覆盖搜索+引用、风控终态、Markdown 输出和空格敏感输出等行为。
> 补充:文末还会注明少量“当前实现已确认、但 corpus 尚未完整覆盖”的行为,例如长思考场景下的自动续写状态。
## 1. 样本覆盖
下列样本共同构成了本文的观察基础:
| 样本 | 观察重点 |
| --- | --- |
| [guangzhou-weather-reasoner-search-20260404](../tests/raw_stream_samples/guangzhou-weather-reasoner-search-20260404/upstream.stream.sse) | 搜索+思考流程,包含 `reference:N` 引用标记与工具片段 |
| [content-filter-trigger-20260405-jwt3](../tests/raw_stream_samples/content-filter-trigger-20260405-jwt3/upstream.stream.sse) | `CONTENT_FILTER` 终态分支,包含拒答模板与 `ban_regenerate` |
| [markdown-format-example-20260405](../tests/raw_stream_samples/markdown-format-example-20260405/upstream.stream.sse) | Markdown 输出的早期样本,用于观察 token 级输出形态 |
| [markdown-format-example-20260405-spacefix](../tests/raw_stream_samples/markdown-format-example-20260405-spacefix/upstream.stream.sse) | Markdown 输出修正样本,用于验证空格 chunk 必须保留 |
当前 corpus 的整体特征是 `message` 帧占绝对多数,控制事件只占很小一部分,但它们决定了流的生命周期和最终状态。
## 2. 总体结构
DeepSeek 的这类输出可以分成两层看:
1. SSE 事件层。
2. JSON 载荷层。
事件层负责传输边界,载荷层负责业务状态。实现时不要把 HTTP chunk、SSE block 和业务 JSON 混为一体。
最常见的时序可以概括为:
```text
ready
update_session
message(初始化 envelope)
message(正文 / 片段 / 状态增量)
message(状态收口)
finish
update_session
title
close
```
`finish` 表示生成流结束,但不是唯一的终止信号;真正的语义终态通常还要结合 `response/status``quasi_status``close` 一起判断。
## 3. SSE 事件层
当前 corpus 中观察到的事件类型如下:
| 事件 | 作用 | 处理建议 |
| --- | --- | --- |
| `ready` | 传输层就绪,通常携带 `request_message_id``response_message_id``model_type` | 记录元数据即可,不参与正文拼接 |
| `update_session` | 会话时间戳或心跳更新 | 当作会话状态帧处理 |
| `message` | 主体载荷,绝大多数业务信息都在这里 | 必须按顺序解析并保序累积 |
| `finish` | 生成阶段结束 | 作为流结束标记之一 |
| `title` | 会话标题生成结果 | 元数据帧,不参与正文拼接 |
| `close` | 连接关闭信息 | 仅用于收尾与审计 |
说明:
- `message` 是默认事件名SSE 中没有显式 `event:` 时也应按 `message` 处理。
- 目前样本里大量 `message` 帧没有独立的业务前缀,不能靠事件名区分正文和控制帧。
- 可能出现空 payload 的 `message` 帧;它们应被视为 no-op但不能打乱事件顺序。
## 4. 载荷层形态
`message``data:` 部分不是单一 schema而是多种结构混合。当前 corpus 里主要见到以下几种形态:
| 形态 | 典型结构 | 作用 |
| --- | --- | --- |
| 初始化 envelope | `{"v":{"response":{...}}}` | 给出会话初始状态、模型状态和片段容器 |
| 纯文本 token | `{"v":"..."}` | 直接输出可见文本 token |
| 路径补丁 | `{"p":"...","o":"APPEND|SET|BATCH","v":...}` | 对某个状态路径做增量更新 |
| 终态 batch | `{"v":[{"p":"status","v":"CONTENT_FILTER"}, ...]}` | 尾部状态收口,常见于风控终态 |
一个简化后的典型样式如下:
```json
{"v":"输出"}
{"p":"response/fragments/-1/content","o":"APPEND","v":"..."}
{"p":"response/fragments","o":"APPEND","v":[...]}
{"p":"response","o":"BATCH","v":[{"p":"accumulated_token_usage","v":211},{"p":"quasi_status","v":"FINISHED"}]}
{"p":"response/status","o":"SET","v":"FINISHED"}
```
注意:
- `v` 可能是字符串、对象、数组、布尔值或数字。
- `o` 当前样本里主要见到 `APPEND``SET``BATCH`
- `v` 为数组时,通常表示一个批量 patch 集合,而不是正文数组。
## 5. 初始化 envelope
每条流开头,常会先出现一个 `message` 帧,内容是完整的 `response` 初始状态。当前 corpus 中,这个 envelope 常见字段包括:
- `message_id`
- `parent_id`
- `model`
- `role`
- `thinking_enabled`
- `ban_edit`
- `ban_regenerate`
- `status`
- `incomplete_message`
- `accumulated_token_usage`
- `files`
- `feedback`
- `inserted_at`
- `search_enabled`
- `fragments`
- `conversation_mode`
- `has_pending_fragment`
- `auto_continue`
- `search_triggered`
这些字段更像会话状态和策略开关,不是正文内容。第三方实现应把它们保留在内部状态树里,而不是直接拼接到最终答案。
## 6. 路径结构
当前 corpus 里观察到的 `p` 路径可以归成几组:
### 6.1 片段级路径
- `response/fragments/-N/content`
- `response/fragments/-N/status`
- `response/fragments/-N/results`
- `response/fragments/-N/elapsed_secs`
这类路径表示某个片段对象的增量更新。`-N` 只是样本中的索引风格,不应被写死成固定数量。
### 6.2 片段容器路径
- `response/fragments`
- `fragments`
这两类路径通常承载 fragment 数组。前者更像响应树中的分支,后者更像终态批处理里的片段集合。
### 6.3 语义状态路径
- `response/status`
- `response/has_pending_fragment`
- `quasi_status`
- `status`
- `ban_regenerate`
这类路径决定流是否结束、是否被风控、是否还有待处理片段。它们不应作为正文输出。
尤其是 `response/status` / `status` 这类路径上的字符串值,应被视为控制信号而不是文本 token。当前已确认需要特殊对待的值包括
- `FINISHED`:正常完成终态,应触发收口。
- `CONTENT_FILTER`:风控终态,应走拒答/模板分支。
- `WIP` / `INCOMPLETE` / `AUTO_CONTINUE`:未完成但可继续生成的中间状态,不应直接输出给客户端。
### 6.4 统计与进度路径
- `accumulated_token_usage`
这类路径用于使用量或进度统计,属于元数据。
### 6.5 非命名空间字段
在片段对象内部,还会看到 `content``references``result``queries``stage_id` 等字段。它们不一定带 `response/...` 前缀,但仍然是协议语义的一部分。
## 7. fragment 类型
当前 corpus 里已经观察到的 fragment 类型如下:
| 类型 | 作用 | 是否应直接渲染 |
| --- | --- | --- |
| `RESPONSE` | 正常回答片段 | 是,属于正文 |
| `THINK` | 推理或阶段提示 | 通常否,按产品策略决定是否展示 |
| `TOOL_SEARCH` | 搜索工具调用元数据 | 否 |
| `TOOL_OPEN` | 打开 / 抽取结果的工具元数据 | 否 |
| `TIP` | 提示 / 警告类片段,常带 `style: WARNING` | 视产品策略决定,通常作为附注 |
| `TEMPLATE_RESPONSE` | 风控拒答模板 | 是,但它属于终态 fallback不是普通正文 |
观察到的典型片段字段:
- `id`
- `type`
- `content`
- `references`
- `stage_id`
- `status`
- `queries`
- `results`
- `result`
- `elapsed_secs`
- `style`
- `hide_on_wip`
第三方实现不要把 `fragment.type``p` 路径混为一谈。`type` 是语义分类,`p` 是状态树位置。
## 8. 终态行为
当前 corpus 里有两条很重要的终态分支。
### 8.1 正常完成
正常回答通常会出现如下收口顺序:
1. `response``BATCH` 更新 `accumulated_token_usage`
2. `response``BATCH` 或单独 patch 更新 `quasi_status: FINISHED`
3. `response/status` 置为 `FINISHED`
4. `finish` 事件到来。
5. 之后可能还有 `update_session``title``close`
### 8.2 风控终态
`content-filter-trigger-20260405-jwt3` 展示了另一种终态路径:
1. 先继续输出一段正常正文。
2. 出现提示类 fragment例如 `TIP`
3. 可能先把 `quasi_status` 提前收口到 `FINISHED`
4. 之后出现一个终态 batch`ban_regenerate` 设为 `true`,把 `status` 置为 `CONTENT_FILTER`,并附带 `TEMPLATE_RESPONSE`
5. 最后再出现 `finish`,然后是收尾事件。
这个分支说明:
- `finish` 不等于正常结束。
- `CONTENT_FILTER` 是一个独立终态,不是普通异常。
- `TEMPLATE_RESPONSE` 不应被当作常规回答流的中间片段,它是终态 fallback。
一个简化的风控尾部可以写成:
```json
{"p":"response","o":"BATCH","v":[{"p":"accumulated_token_usage","v":1269},{"p":"quasi_status","v":"FINISHED"}]}
{"v":[{"p":"ban_regenerate","v":true},{"p":"status","v":"CONTENT_FILTER"},{"p":"fragments","v":[{"id":38,"type":"TEMPLATE_RESPONSE","content":"..."}]},{"p":"quasi_status","v":"CONTENT_FILTER"}]}
{"event":"finish"}
```
### 8.3 自动续写中间态(实现补充)
这部分不是当前 corpus 的直接覆盖项,而是 2026-04-05 在长思考实测中观察到、且已在当前实现中兼容的行为:
1. 上游可能先把 `response/status` 或 envelope 内的 `response.status` 置为 `WIP` / `INCOMPLETE`
2. 有时还会伴随 `auto_continue: true`
3. 这表示当前轮输出尚未真正结束,客户端或代理层可以继续调用 continue 接口续写同一条回答。
4. 续写后的内容会承接之前的思考与正文,不应把前一轮状态值泄露成可见文本。
对第三方实现,建议把这一类状态统一当作“可继续的控制信号”:
- 可以据此决定是否继续拉取后续流。
- 不能把 `INCOMPLETE``WIP``AUTO_CONTINUE` 直接拼接到最终文本。
- `finish` 事件本身也不能单独说明回答已完全结束,仍要结合状态字段判断。
## 9. 文本重建规则
如果你的目标是把流重建成最终可见文本,必须遵守下面这些规则:
- 按接收顺序逐个追加 token。
- 不要对每个 `v``trim``TrimSpace`
- 不要丢弃只包含空格的 chunk。
- 不要合并连续空格、换行或 Markdown 符号附近的空白。
- 不要把 `[reference:N]` 视为协议元数据,它在当前 corpus 里就是正文的一部分。
- 如果你要屏蔽引用标记,应当把它做成可配置的后处理,而不是在解析阶段硬删。
- `response/status` / `status` 路径上的状态字符串不应进入正文,即使它们不是终态。
这点对 Markdown、代码块、引用、表格都很关键。样本里已经证明`#``-``>``|` 这类符号后面的空格必须原样保留,否则渲染结果会变形。
## 10. 推荐实现方式
对第三方开发者,建议把实现拆成三条线:
1. 原始事件线:保留 SSE block 顺序、事件名和完整 JSON 载荷。
2. 状态树线:维护 `response``fragments``status``quasi_status` 等结构。
3. 可见文本线:只从明确应渲染的 token / fragment 中拼接最终文本。
一个简单的处理顺序可以是:
```text
parse SSE block
-> 识别 event
-> 解析 JSON payload
-> 更新状态树
-> 识别 status / quasi_status / auto_continue 等控制信号
-> 判定是否有可见文本
-> 追加到输出缓冲
-> 遇到 WIP / INCOMPLETE / AUTO_CONTINUE 时决定是否续写
-> 遇到 FINISHED / CONTENT_FILTER / finish 时收口
```
实现时的兼容原则:
- 未知路径保留,不要报错中断。
- 未知 fragment.type 保留在日志里。
- 不要假设所有模型都一定输出 `thinking_content`,当前 corpus 的推理更多是通过 fragment 类型表达。
- 不要假设 `title` 一定存在,它只是后置元数据。
## 11. 本 corpus 证明了什么
当前样本足以证明以下行为:
- 搜索类模型会把工具调用、结果、引用和正文混在同一条 SSE 流里。
- 风控不会简单地“没有输出”,而是会在正常生成后切换到 `CONTENT_FILTER` 终态。
- Markdown 和代码输出对空格非常敏感,空格 chunk 不能吞。
- `message` 是主体承载层,`ready` / `update_session` / `finish` / `title` / `close` 是控制层。
- `fragment.type` 是可视化和工具链分层的关键,不应只靠 `p` 路径判断。
结合 2026-04-05 的长思考实测,还可以补充一条当前实现层面的结论:
- 长思考场景下,上游可能先给出 `INCOMPLETE` / `WIP` / `AUTO_CONTINUE` 状态,再通过 continue 链路续写;这些状态值本身不应作为正文输出。
## 12. 适用边界
本文是基于当前 corpus 的逆向说明,不是恒定协议。
- 新模型可能增加新的 `p` 路径。
- 新版本可能增加新的 fragment.type。
- `CONTENT_FILTER` 的终态模板内容可能变化。
- 自动续写相关状态(如 `INCOMPLETE` / `AUTO_CONTINUE`)当前主要来自实测与实现兼容逻辑,后续字段形态仍可能变化。
- 解析器应当对未知字段、未知路径、未知事件保持容忍。
如果你要把这份说明用于实际开发,建议同时保留原始流样本、回放脚本和回归测试,不要只依赖本文。

View File

@@ -233,9 +233,32 @@ go run ./cmd/ds2api-tests --no-preflight
```
说明:
- 该工具重放 `tests/raw_stream_samples` 下全部样本,按上游 SSE 顺序做 1:1 仿真解析。
- 该工具默认重放 `tests/raw_stream_samples/manifest.json` 声明的 canonical 样本,按上游 SSE 顺序做 1:1 仿真解析。
- 默认校验不出现 `FINISHED` 文本泄露,并要求存在结束信号。
- 结果写入 `artifacts/raw-stream-sim/*.json`,可供其他测试脚本或排障流程复用
- 每次运行都会把本地派生结果写入 `artifacts/raw-stream-sim/<run-id>/<sample-id>/replay.output.txt`,并输出结构化报告
- 如果你有历史基线目录,可以通过 `--baseline-root` 让工具直接做文本对比。
- 更完整的协议级行为结构说明见 [DeepSeekSSE行为结构说明-2026-04-05.md](./DeepSeekSSE行为结构说明-2026-04-05.md)。
### 对单个样本做回放比对
```bash
./tests/scripts/compare-raw-stream-sample.sh markdown-format-example-20260405-spacefix
```
说明:
- 该脚本会从 raw-only 样本目录读取 `upstream.stream.sse`
- 回放结果会写入 `artifacts/raw-stream-sim/<run-id>/<sample-id>/`,便于直接查阅。
- 如果传入历史基线目录,脚本会自动对比当前回放输出和基线文本。
### 采集永久样本
本地启动服务后,可以直接打:
```bash
POST /admin/dev/raw-samples/capture
```
这个接口会把请求元信息和上游原始流写入 `tests/raw_stream_samples/<sample-id>/`,以后可以直接拿来做回放和字段分析。派生输出会在本地回放时再生成,不再落在样本目录里。
### 指定输出目录和超时

22
go.mod
View File

@@ -3,23 +3,23 @@ module ds2api
go 1.26.0
require (
github.com/andybalholm/brotli v1.0.6
github.com/go-chi/chi/v5 v5.2.3
github.com/andybalholm/brotli v1.2.1
github.com/go-chi/chi/v5 v5.2.5
github.com/google/uuid v1.6.0
github.com/refraction-networking/utls v1.8.2
github.com/tetratelabs/wazero v1.9.0
github.com/router-for-me/CLIProxyAPI/v6 v6.9.14
github.com/tetratelabs/wazero v1.11.0
)
require (
github.com/klauspost/compress v1.17.4 // indirect
github.com/router-for-me/CLIProxyAPI/v6 v6.9.8 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/klauspost/compress v1.18.5 // indirect
github.com/sirupsen/logrus v1.9.4 // indirect
github.com/tidwall/gjson v1.18.0 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
github.com/tidwall/match v1.2.0 // indirect
github.com/tidwall/pretty v1.2.1 // indirect
github.com/tidwall/sjson v1.2.5 // indirect
golang.org/x/crypto v0.45.0 // indirect
golang.org/x/net v0.47.0 // indirect
golang.org/x/sys v0.38.0 // indirect
golang.org/x/crypto v0.49.0 // indirect
golang.org/x/net v0.52.0 // indirect
golang.org/x/sys v0.42.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

60
go.sum
View File

@@ -1,47 +1,45 @@
github.com/andybalholm/brotli v1.0.6 h1:Yf9fFpf49Zrxb9NlQaluyE92/+X7UVHlhMNJN2sxfOI=
github.com/andybalholm/brotli v1.0.6/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/andybalholm/brotli v1.2.1 h1:R+f5xP285VArJDRgowrfb9DqL18yVK0gKAW/F+eTWro=
github.com/andybalholm/brotli v1.2.1/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-chi/chi/v5 v5.2.3 h1:WQIt9uxdsAbgIYgid+BpYc+liqQZGMHRaUwp0JUcvdE=
github.com/go-chi/chi/v5 v5.2.3/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops=
github.com/go-chi/chi/v5 v5.2.5 h1:Eg4myHZBjyvJmAFjFvWgrqDTXFyOzjj7YIm3L3mu6Ug=
github.com/go-chi/chi/v5 v5.2.5/go.mod h1:X7Gx4mteadT3eDOMTsXzmI4/rwUpOwBHLpAfupzFJP0=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
github.com/klauspost/compress v1.18.5 h1:/h1gH5Ce+VWNLSWqPzOVn6XBO+vJbCNGvjoaGBFW2IE=
github.com/klauspost/compress v1.18.5/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/refraction-networking/utls v1.8.1 h1:yNY1kapmQU8JeM1sSw2H2asfTIwWxIkrMJI0pRUOCAo=
github.com/refraction-networking/utls v1.8.1/go.mod h1:jkSOEkLqn+S/jtpEHPOsVv/4V4EVnelwbMQl4vCWXAM=
github.com/refraction-networking/utls v1.8.2 h1:j4Q1gJj0xngdeH+Ox/qND11aEfhpgoEvV+S9iJ2IdQo=
github.com/refraction-networking/utls v1.8.2/go.mod h1:jkSOEkLqn+S/jtpEHPOsVv/4V4EVnelwbMQl4vCWXAM=
github.com/router-for-me/CLIProxyAPI/v6 v6.9.8 h1:O65R38THenp8E1IK0paQlOfop3Y6UYlfqSdLlepidSY=
github.com/router-for-me/CLIProxyAPI/v6 v6.9.8/go.mod h1:P1jsIPFXorYGuS2N/3BlZYkpRKi/z7+oR3+1tdG0u4k=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/tetratelabs/wazero v1.9.0 h1:IcZ56OuxrtaEz8UYNRHBrUa9bYeX9oVY93KspZZBf/I=
github.com/tetratelabs/wazero v1.9.0/go.mod h1:TSbcXCfFP0L2FGkRPxHphadXPjo1T6W+CseNNY7EkjM=
github.com/router-for-me/CLIProxyAPI/v6 v6.9.14 h1:XItUHrPGE9E5xTeZIPjKGmKqfEs1AZbxl1RPfO5xtrc=
github.com/router-for-me/CLIProxyAPI/v6 v6.9.14/go.mod h1:P1jsIPFXorYGuS2N/3BlZYkpRKi/z7+oR3+1tdG0u4k=
github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w=
github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tetratelabs/wazero v1.11.0 h1:+gKemEuKCTevU4d7ZTzlsvgd1uaToIDtlQlmNbwqYhA=
github.com/tetratelabs/wazero v1.11.0/go.mod h1:eV28rsN8Q+xwjogd7f4/Pp4xFxO7uOGbLcD/LzB1wiU=
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
github.com/tidwall/match v1.2.0 h1:0pt8FlkOwjN2fPt4bIl4BoNxb98gGHN2ObFEDkrfZnM=
github.com/tidwall/match v1.2.0/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -13,9 +13,7 @@ import (
func TestPoolEmptyNoAccounts(t *testing.T) {
t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "2")
t.Setenv("DS2API_ACCOUNT_CONCURRENCY", "")
t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "")
t.Setenv("DS2API_ACCOUNT_QUEUE_SIZE", "")
t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[]}`)
pool := NewPool(config.LoadStore())
if _, ok := pool.Acquire("", nil); ok {
@@ -165,9 +163,7 @@ func TestPoolAcquireWaitTargetAccount(t *testing.T) {
func TestPoolMaxQueueSizeOverride(t *testing.T) {
t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "1")
t.Setenv("DS2API_ACCOUNT_CONCURRENCY", "")
t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "5")
t.Setenv("DS2API_ACCOUNT_QUEUE_SIZE", "")
t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[{"email":"acc1@example.com","token":"t1"}]}`)
pool := NewPool(config.LoadStore())
status := pool.Status()
@@ -176,19 +172,6 @@ func TestPoolMaxQueueSizeOverride(t *testing.T) {
}
}
func TestPoolQueueSizeAliasEnv(t *testing.T) {
t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "1")
t.Setenv("DS2API_ACCOUNT_CONCURRENCY", "")
t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "")
t.Setenv("DS2API_ACCOUNT_QUEUE_SIZE", "7")
t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[{"email":"acc1@example.com","token":"t1"}]}`)
pool := NewPool(config.LoadStore())
status := pool.Status()
if got, ok := status["max_queue_size"].(int); !ok || got != 7 {
t.Fatalf("expected max_queue_size=7, got %#v", status["max_queue_size"])
}
}
func TestPoolMultipleAcquireReleaseCycles(t *testing.T) {
pool := newSingleAccountPoolForTest(t, "1")
for i := 0; i < 10; i++ {

View File

@@ -29,13 +29,8 @@ func (p *Pool) ApplyRuntimeLimits(maxInflightPerAccount, maxQueueSize, globalMax
}
func maxInflightFromEnv() int {
for _, key := range []string{"DS2API_ACCOUNT_MAX_INFLIGHT", "DS2API_ACCOUNT_CONCURRENCY"} {
raw := strings.TrimSpace(os.Getenv(key))
if raw == "" {
continue
}
n, err := strconv.Atoi(raw)
if err == nil && n > 0 {
if raw := strings.TrimSpace(os.Getenv("DS2API_ACCOUNT_MAX_INFLIGHT")); raw != "" {
if n, err := strconv.Atoi(raw); err == nil && n > 0 {
return n
}
}
@@ -53,13 +48,8 @@ func defaultRecommendedConcurrency(accountCount, maxInflightPerAccount int) int
}
func maxQueueFromEnv(defaultSize int) int {
for _, key := range []string{"DS2API_ACCOUNT_MAX_QUEUE", "DS2API_ACCOUNT_QUEUE_SIZE"} {
raw := strings.TrimSpace(os.Getenv(key))
if raw == "" {
continue
}
n, err := strconv.Atoi(raw)
if err == nil && n >= 0 {
if raw := strings.TrimSpace(os.Getenv("DS2API_ACCOUNT_MAX_QUEUE")); raw != "" {
if n, err := strconv.Atoi(raw); err == nil && n >= 0 {
return n
}
}

View File

@@ -12,9 +12,7 @@ import (
func newPoolForTest(t *testing.T, maxInflight string) *Pool {
t.Helper()
t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", maxInflight)
t.Setenv("DS2API_ACCOUNT_CONCURRENCY", "")
t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "")
t.Setenv("DS2API_ACCOUNT_QUEUE_SIZE", "")
t.Setenv("DS2API_CONFIG_JSON", `{
"keys":["k1"],
"accounts":[
@@ -29,9 +27,7 @@ func newPoolForTest(t *testing.T, maxInflight string) *Pool {
func newSingleAccountPoolForTest(t *testing.T, maxInflight string) *Pool {
t.Helper()
t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", maxInflight)
t.Setenv("DS2API_ACCOUNT_CONCURRENCY", "")
t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "")
t.Setenv("DS2API_ACCOUNT_QUEUE_SIZE", "")
t.Setenv("DS2API_CONFIG_JSON", `{
"keys":["k1"],
"accounts":[{"email":"acc1@example.com","token":"token1"}]
@@ -170,9 +166,9 @@ func TestPoolStatusRecommendedConcurrencyRespectsOverride(t *testing.T) {
}
}
func TestPoolAccountConcurrencyAliasEnv(t *testing.T) {
t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "")
t.Setenv("DS2API_ACCOUNT_CONCURRENCY", "4")
func TestPoolGlobalMaxInflightEnv(t *testing.T) {
t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "1")
t.Setenv("DS2API_GLOBAL_MAX_INFLIGHT", "4")
t.Setenv("DS2API_CONFIG_JSON", `{
"keys":["k1"],
"accounts":[
@@ -183,15 +179,15 @@ func TestPoolAccountConcurrencyAliasEnv(t *testing.T) {
pool := NewPool(config.LoadStore())
status := pool.Status()
if got, ok := status["max_inflight_per_account"].(int); !ok || got != 4 {
if got, ok := status["global_max_inflight"].(int); !ok || got != 4 {
t.Fatalf("unexpected global_max_inflight: %#v", status["global_max_inflight"])
}
if got, ok := status["max_inflight_per_account"].(int); !ok || got != 1 {
t.Fatalf("unexpected max_inflight_per_account: %#v", status["max_inflight_per_account"])
}
if got, ok := status["recommended_concurrency"].(int); !ok || got != 8 {
if got, ok := status["recommended_concurrency"].(int); !ok || got != 2 {
t.Fatalf("unexpected recommended_concurrency: %#v", status["recommended_concurrency"])
}
if got, ok := status["max_queue_size"].(int); !ok || got != 8 {
t.Fatalf("unexpected max_queue_size: %#v", status["max_queue_size"])
}
}
func TestPoolDropsLegacyTokenOnlyAccountOnLoad(t *testing.T) {
@@ -217,9 +213,7 @@ func TestPoolDropsLegacyTokenOnlyAccountOnLoad(t *testing.T) {
func TestPoolAcquireRotatesIntoTokenlessAccounts(t *testing.T) {
t.Setenv("DS2API_ACCOUNT_MAX_INFLIGHT", "1")
t.Setenv("DS2API_ACCOUNT_CONCURRENCY", "")
t.Setenv("DS2API_ACCOUNT_MAX_QUEUE", "")
t.Setenv("DS2API_ACCOUNT_QUEUE_SIZE", "")
t.Setenv("DS2API_CONFIG_JSON", `{
"keys":["k1"],
"accounts":[

View File

@@ -22,6 +22,7 @@ type DeepSeekCaller interface {
type ConfigReader interface {
ClaudeMapping() map[string]string
CompatStripReferenceMarkers() bool
}
type OpenAIChatRunner interface {

View File

@@ -7,6 +7,7 @@ type mockClaudeConfig struct {
}
func (m mockClaudeConfig) ClaudeMapping() map[string]string { return m.m }
func (mockClaudeConfig) CompatStripReferenceMarkers() bool { return true }
func TestNormalizeClaudeRequestUsesConfigInterfaceMapping(t *testing.T) {
req := map[string]any{

View File

@@ -149,6 +149,7 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ
messages,
thinkingEnabled,
searchEnabled,
h.compatStripReferenceMarkers(),
toolNames,
)
streamRuntime.sendMessageStart()

View File

@@ -21,6 +21,13 @@ type Handler struct {
OpenAI OpenAIChatRunner
}
func (h *Handler) compatStripReferenceMarkers() bool {
if h == nil || h.Store == nil {
return true
}
return h.Store.CompatStripReferenceMarkers()
}
var (
claudeStreamPingInterval = time.Duration(deepseek.KeepAliveTimeout) * time.Second
claudeStreamIdleTimeout = time.Duration(deepseek.StreamIdleTimeout) * time.Second

View File

@@ -0,0 +1,13 @@
package claude
import textclean "ds2api/internal/textclean"
func cleanVisibleOutput(text string, stripReferenceMarkers bool) string {
if text == "" {
return text
}
if stripReferenceMarkers {
text = textclean.StripReferenceMarkers(text)
}
return text
}

View File

@@ -16,6 +16,8 @@ func (s claudeProxyStoreStub) ClaudeMapping() map[string]string {
return s.mapping
}
func (claudeProxyStoreStub) CompatStripReferenceMarkers() bool { return true }
type openAIProxyStub struct {
status int
body string

View File

@@ -19,13 +19,14 @@ type claudeStreamRuntime struct {
toolNames []string
messages []any
thinkingEnabled bool
searchEnabled bool
bufferToolContent bool
thinkingEnabled bool
searchEnabled bool
bufferToolContent bool
stripReferenceMarkers bool
messageID string
thinking strings.Builder
text strings.Builder
messageID string
thinking strings.Builder
text strings.Builder
outputTokens int
nextBlockIndex int
@@ -45,21 +46,23 @@ func newClaudeStreamRuntime(
messages []any,
thinkingEnabled bool,
searchEnabled bool,
stripReferenceMarkers bool,
toolNames []string,
) *claudeStreamRuntime {
return &claudeStreamRuntime{
w: w,
rc: rc,
canFlush: canFlush,
model: model,
messages: messages,
thinkingEnabled: thinkingEnabled,
searchEnabled: searchEnabled,
bufferToolContent: len(toolNames) > 0,
toolNames: toolNames,
messageID: fmt.Sprintf("msg_%d", time.Now().UnixNano()),
thinkingBlockIndex: -1,
textBlockIndex: -1,
w: w,
rc: rc,
canFlush: canFlush,
model: model,
messages: messages,
thinkingEnabled: thinkingEnabled,
searchEnabled: searchEnabled,
bufferToolContent: len(toolNames) > 0,
stripReferenceMarkers: stripReferenceMarkers,
toolNames: toolNames,
messageID: fmt.Sprintf("msg_%d", time.Now().UnixNano()),
thinkingBlockIndex: -1,
textBlockIndex: -1,
}
}
@@ -80,10 +83,11 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
contentSeen := false
for _, p := range parsed.Parts {
if p.Text == "" {
cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
if cleanedText == "" {
continue
}
if p.Type != "thinking" && s.searchEnabled && sse.IsCitation(p.Text) {
if p.Type != "thinking" && s.searchEnabled && sse.IsCitation(cleanedText) {
continue
}
contentSeen = true
@@ -92,7 +96,7 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
if !s.thinkingEnabled {
continue
}
s.thinking.WriteString(p.Text)
s.thinking.WriteString(cleanedText)
s.closeTextBlock()
if !s.thinkingBlockOpen {
s.thinkingBlockIndex = s.nextBlockIndex
@@ -112,13 +116,13 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
"index": s.thinkingBlockIndex,
"delta": map[string]any{
"type": "thinking_delta",
"thinking": p.Text,
"thinking": cleanedText,
},
})
continue
}
s.text.WriteString(p.Text)
s.text.WriteString(cleanedText)
if s.bufferToolContent {
if hasUnclosedCodeFence(s.text.String()) {
continue
@@ -144,7 +148,7 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
"index": s.textBlockIndex,
"delta": map[string]any{
"type": "text_delta",
"text": p.Text,
"text": cleanedText,
},
})
}

View File

@@ -43,7 +43,7 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
s.closeTextBlock()
finalThinking := s.thinking.String()
finalText := s.text.String()
finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
if s.bufferToolContent {
detected := util.ParseStandaloneToolCalls(finalText, s.toolNames)
@@ -64,7 +64,7 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
"input": map[string]any{},
},
})
inputBytes, _ := json.Marshal(tc.Input)
s.send("content_block_delta", map[string]any{
"type": "content_block_delta",

View File

@@ -28,6 +28,8 @@ func (streamStatusClaudeStoreStub) ClaudeMapping() map[string]string {
}
}
func (streamStatusClaudeStoreStub) CompatStripReferenceMarkers() bool { return true }
func captureClaudeStatusMiddleware(statuses *[]int) func(http.Handler) http.Handler {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {

View File

@@ -22,6 +22,7 @@ type DeepSeekCaller interface {
type ConfigReader interface {
ModelAliases() map[string]string
CompatStripReferenceMarkers() bool
}
type OpenAIChatRunner interface {

View File

@@ -140,7 +140,15 @@ func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *ht
}
result := sse.CollectStream(resp, thinkingEnabled, true)
writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponse(model, finalPrompt, result.Thinking, result.Text, toolNames, result.OutputTokens))
stripReferenceMarkers := h.compatStripReferenceMarkers()
writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponse(
model,
finalPrompt,
cleanVisibleOutput(result.Thinking, stripReferenceMarkers),
cleanVisibleOutput(result.Text, stripReferenceMarkers),
toolNames,
result.OutputTokens,
))
}
func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string, outputTokens int) map[string]any {
@@ -179,7 +187,7 @@ func buildGeminiUsage(finalPrompt, finalThinking, finalText string, outputTokens
func buildGeminiPartsFromFinal(finalText, finalThinking string, toolNames []string) []map[string]any {
detected := util.ParseToolCalls(finalText, toolNames)
if len(detected) == 0 && strings.TrimSpace(finalThinking) != "" {
if len(detected) == 0 && finalThinking != "" {
detected = util.ParseToolCalls(finalThinking, toolNames)
}
if len(detected) > 0 {
@@ -196,7 +204,7 @@ func buildGeminiPartsFromFinal(finalText, finalThinking string, toolNames []stri
}
text := finalText
if strings.TrimSpace(text) == "" {
if text == "" {
text = finalThinking
}
return []map[string]any{{"text": text}}

View File

@@ -17,6 +17,13 @@ type Handler struct {
OpenAI OpenAIChatRunner
}
func (h *Handler) compatStripReferenceMarkers() bool {
if h == nil || h.Store == nil {
return true
}
return h.Store.CompatStripReferenceMarkers()
}
func RegisterRoutes(r chi.Router, h *Handler) {
r.Post("/v1beta/models/{model}:generateContent", h.GenerateContent)
r.Post("/v1beta/models/{model}:streamGenerateContent", h.StreamGenerateContent)

View File

@@ -27,7 +27,7 @@ func (h *Handler) handleStreamGenerateContent(w http.ResponseWriter, r *http.Req
rc := http.NewResponseController(w)
_, canFlush := w.(http.Flusher)
runtime := newGeminiStreamRuntime(w, rc, canFlush, model, finalPrompt, thinkingEnabled, searchEnabled, toolNames)
runtime := newGeminiStreamRuntime(w, rc, canFlush, model, finalPrompt, thinkingEnabled, searchEnabled, h.compatStripReferenceMarkers(), toolNames)
initialType := "text"
if thinkingEnabled {
@@ -57,13 +57,14 @@ type geminiStreamRuntime struct {
model string
finalPrompt string
thinkingEnabled bool
searchEnabled bool
bufferContent bool
toolNames []string
thinkingEnabled bool
searchEnabled bool
bufferContent bool
stripReferenceMarkers bool
toolNames []string
thinking strings.Builder
text strings.Builder
thinking strings.Builder
text strings.Builder
outputTokens int
}
@@ -75,18 +76,20 @@ func newGeminiStreamRuntime(
finalPrompt string,
thinkingEnabled bool,
searchEnabled bool,
stripReferenceMarkers bool,
toolNames []string,
) *geminiStreamRuntime {
return &geminiStreamRuntime{
w: w,
rc: rc,
canFlush: canFlush,
model: model,
finalPrompt: finalPrompt,
thinkingEnabled: thinkingEnabled,
searchEnabled: searchEnabled,
bufferContent: len(toolNames) > 0,
toolNames: toolNames,
w: w,
rc: rc,
canFlush: canFlush,
model: model,
finalPrompt: finalPrompt,
thinkingEnabled: thinkingEnabled,
searchEnabled: searchEnabled,
bufferContent: len(toolNames) > 0,
stripReferenceMarkers: stripReferenceMarkers,
toolNames: toolNames,
}
}
@@ -113,20 +116,21 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
contentSeen := false
for _, p := range parsed.Parts {
if p.Text == "" {
cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
if cleanedText == "" {
continue
}
if p.Type != "thinking" && s.searchEnabled && sse.IsCitation(p.Text) {
if p.Type != "thinking" && s.searchEnabled && sse.IsCitation(cleanedText) {
continue
}
contentSeen = true
if p.Type == "thinking" {
if s.thinkingEnabled {
s.thinking.WriteString(p.Text)
s.thinking.WriteString(cleanedText)
}
continue
}
s.text.WriteString(p.Text)
s.text.WriteString(cleanedText)
if s.bufferContent {
continue
}
@@ -136,7 +140,7 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
"index": 0,
"content": map[string]any{
"role": "model",
"parts": []map[string]any{{"text": p.Text}},
"parts": []map[string]any{{"text": cleanedText}},
},
},
},
@@ -148,7 +152,7 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
func (s *geminiStreamRuntime) finalize() {
finalThinking := s.thinking.String()
finalText := s.text.String()
finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
if s.bufferContent {
parts := buildGeminiPartsFromFinal(finalText, finalThinking, s.toolNames)

View File

@@ -17,7 +17,8 @@ import (
type testGeminiConfig struct{}
func (testGeminiConfig) ModelAliases() map[string]string { return nil }
func (testGeminiConfig) ModelAliases() map[string]string { return nil }
func (testGeminiConfig) CompatStripReferenceMarkers() bool { return true }
type testGeminiAuth struct {
a *auth.RequestAuth
@@ -62,8 +63,8 @@ func (m testGeminiDS) CallCompletion(_ context.Context, _ *auth.RequestAuth, _ m
}
type geminiOpenAIErrorStub struct {
status int
body string
status int
body string
headers map[string]string
}
@@ -247,7 +248,7 @@ func TestStreamGenerateContentEmitsSSE(t *testing.T) {
func TestGenerateContentOpenAIProxyErrorUsesGeminiEnvelope(t *testing.T) {
h := &Handler{
Store: testGeminiConfig{},
Store: testGeminiConfig{},
OpenAI: geminiOpenAIErrorStub{
status: http.StatusUnauthorized,
body: `{"error":{"message":"invalid api key"}}`,

View File

@@ -0,0 +1,13 @@
package gemini
import textclean "ds2api/internal/textclean"
func cleanVisibleOutput(text string, stripReferenceMarkers bool) string {
if text == "" {
return text
}
if stripReferenceMarkers {
text = textclean.StripReferenceMarkers(text)
}
return text
}

View File

@@ -22,8 +22,9 @@ type chatStreamRuntime struct {
finalPrompt string
toolNames []string
thinkingEnabled bool
searchEnabled bool
thinkingEnabled bool
searchEnabled bool
stripReferenceMarkers bool
firstChunkSent bool
bufferToolContent bool
@@ -49,25 +50,27 @@ func newChatStreamRuntime(
finalPrompt string,
thinkingEnabled bool,
searchEnabled bool,
stripReferenceMarkers bool,
toolNames []string,
bufferToolContent bool,
emitEarlyToolDeltas bool,
) *chatStreamRuntime {
return &chatStreamRuntime{
w: w,
rc: rc,
canFlush: canFlush,
completionID: completionID,
created: created,
model: model,
finalPrompt: finalPrompt,
toolNames: toolNames,
thinkingEnabled: thinkingEnabled,
searchEnabled: searchEnabled,
bufferToolContent: bufferToolContent,
emitEarlyToolDeltas: emitEarlyToolDeltas,
streamToolCallIDs: map[int]string{},
streamToolNames: map[int]string{},
w: w,
rc: rc,
canFlush: canFlush,
completionID: completionID,
created: created,
model: model,
finalPrompt: finalPrompt,
toolNames: toolNames,
thinkingEnabled: thinkingEnabled,
searchEnabled: searchEnabled,
stripReferenceMarkers: stripReferenceMarkers,
bufferToolContent: bufferToolContent,
emitEarlyToolDeltas: emitEarlyToolDeltas,
streamToolCallIDs: map[int]string{},
streamToolNames: map[int]string{},
}
}
@@ -98,7 +101,7 @@ func (s *chatStreamRuntime) sendDone() {
func (s *chatStreamRuntime) finalize(finishReason string) {
finalThinking := s.thinking.String()
finalText := sanitizeLeakedOutput(s.text.String())
finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
detected := util.ParseStandaloneToolCallsDetailed(finalText, s.toolNames)
if len(detected.Calls) > 0 && !s.toolCallsDoneEmitted {
finishReason = "tool_calls"
@@ -142,7 +145,7 @@ func (s *chatStreamRuntime) finalize(finishReason string) {
if evt.Content == "" {
continue
}
cleaned := sanitizeLeakedOutput(evt.Content)
cleaned := cleanVisibleOutput(evt.Content, s.stripReferenceMarkers)
if cleaned == "" {
continue
}
@@ -203,10 +206,11 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
newChoices := make([]map[string]any, 0, len(parsed.Parts))
contentSeen := false
for _, p := range parsed.Parts {
if s.searchEnabled && sse.IsCitation(p.Text) {
cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
if s.searchEnabled && sse.IsCitation(cleanedText) {
continue
}
if p.Text == "" {
if cleanedText == "" {
continue
}
contentSeen = true
@@ -217,15 +221,15 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
}
if p.Type == "thinking" {
if s.thinkingEnabled {
s.thinking.WriteString(p.Text)
delta["reasoning_content"] = p.Text
s.thinking.WriteString(cleanedText)
delta["reasoning_content"] = cleanedText
}
} else {
s.text.WriteString(p.Text)
s.text.WriteString(cleanedText)
if !s.bufferToolContent {
delta["content"] = p.Text
delta["content"] = cleanedText
} else {
events := processToolSieveChunk(&s.toolSieve, p.Text, s.toolNames)
events := processToolSieveChunk(&s.toolSieve, cleanedText, s.toolNames)
for _, evt := range events {
if len(evt.ToolCallDeltas) > 0 {
if !s.emitEarlyToolDeltas {
@@ -264,7 +268,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
continue
}
if evt.Content != "" {
cleaned := sanitizeLeakedOutput(evt.Content)
cleaned := cleanVisibleOutput(evt.Content, s.stripReferenceMarkers)
if cleaned == "" {
continue
}

View File

@@ -19,16 +19,19 @@ type DeepSeekCaller interface {
CreateSession(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
GetPow(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)
CallCompletion(ctx context.Context, a *auth.RequestAuth, payload map[string]any, powResp string, maxAttempts int) (*http.Response, error)
DeleteSessionForToken(ctx context.Context, token string, sessionID string) (*deepseek.DeleteSessionResult, error)
DeleteAllSessionsForToken(ctx context.Context, token string) error
}
type ConfigReader interface {
ModelAliases() map[string]string
CompatWideInputStrictOutput() bool
CompatStripReferenceMarkers() bool
ToolcallMode() string
ToolcallEarlyEmitConfidence() string
ResponsesStoreTTLSeconds() int
EmbeddingsProvider() string
AutoDeleteMode() string
AutoDeleteSessions() bool
}

View File

@@ -3,23 +3,31 @@ package openai
import "testing"
type mockOpenAIConfig struct {
aliases map[string]string
wideInput bool
toolMode string
earlyEmit string
responsesTTL int
embedProv string
aliases map[string]string
wideInput bool
autoDeleteMode string
toolMode string
earlyEmit string
responsesTTL int
embedProv string
}
func (m mockOpenAIConfig) ModelAliases() map[string]string { return m.aliases }
func (m mockOpenAIConfig) CompatWideInputStrictOutput() bool {
return m.wideInput
}
func (m mockOpenAIConfig) CompatStripReferenceMarkers() bool { return true }
func (m mockOpenAIConfig) ToolcallMode() string { return m.toolMode }
func (m mockOpenAIConfig) ToolcallEarlyEmitConfidence() string { return m.earlyEmit }
func (m mockOpenAIConfig) ResponsesStoreTTLSeconds() int { return m.responsesTTL }
func (m mockOpenAIConfig) EmbeddingsProvider() string { return m.embedProv }
func (m mockOpenAIConfig) AutoDeleteSessions() bool { return false }
func (m mockOpenAIConfig) AutoDeleteMode() string {
if m.autoDeleteMode == "" {
return "none"
}
return m.autoDeleteMode
}
func (m mockOpenAIConfig) AutoDeleteSessions() bool { return false }
func TestNormalizeOpenAIChatRequestWithConfigInterface(t *testing.T) {
cfg := mockOpenAIConfig{

View File

@@ -35,22 +35,9 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
writeOpenAIError(w, status, detail)
return
}
var sessionID string
defer func() {
// 自动删除会话(同步)
// 必须在 Release 之前同步删除,否则:
// 1. 异步删除时账号已被 Release
// 2. 新请求可能获取到同一账号并开始使用
// 3. 异步删除仍在进行,会截断新请求正在使用的会话
if h.Store.AutoDeleteSessions() && a.DeepSeekToken != "" {
deleteCtx, cancel := context.WithTimeout(r.Context(), 10*time.Second)
defer cancel()
err := h.DS.DeleteAllSessionsForToken(deleteCtx, a.DeepSeekToken)
if err != nil {
config.Logger.Warn("[auto_delete_sessions] failed", "account", a.AccountID, "error", err)
} else {
config.Logger.Debug("[auto_delete_sessions] success", "account", a.AccountID)
}
}
h.autoDeleteRemoteSession(r.Context(), a, sessionID)
h.Auth.Release(a)
}()
@@ -67,7 +54,7 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
return
}
sessionID, err := h.DS.CreateSession(r.Context(), a, 3)
sessionID, err = h.DS.CreateSession(r.Context(), a, 3)
if err != nil {
if a.UseConfigToken {
writeOpenAIError(w, http.StatusUnauthorized, "Account token is invalid. Please re-login the account in admin.")
@@ -94,6 +81,39 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
h.handleNonStream(w, r.Context(), resp, sessionID, stdReq.ResponseModel, stdReq.FinalPrompt, stdReq.Thinking, stdReq.ToolNames)
}
func (h *Handler) autoDeleteRemoteSession(ctx context.Context, a *auth.RequestAuth, sessionID string) {
mode := h.Store.AutoDeleteMode()
if mode == "none" || a.DeepSeekToken == "" {
return
}
deleteBaseCtx := context.WithoutCancel(ctx)
deleteCtx, cancel := context.WithTimeout(deleteBaseCtx, 10*time.Second)
defer cancel()
switch mode {
case "single":
if sessionID == "" {
config.Logger.Warn("[auto_delete_sessions] skipped single-session delete because session_id is empty", "account", a.AccountID)
return
}
_, err := h.DS.DeleteSessionForToken(deleteCtx, a.DeepSeekToken, sessionID)
if err != nil {
config.Logger.Warn("[auto_delete_sessions] failed", "account", a.AccountID, "mode", mode, "session_id", sessionID, "error", err)
return
}
config.Logger.Debug("[auto_delete_sessions] success", "account", a.AccountID, "mode", mode, "session_id", sessionID)
case "all":
if err := h.DS.DeleteAllSessionsForToken(deleteCtx, a.DeepSeekToken); err != nil {
config.Logger.Warn("[auto_delete_sessions] failed", "account", a.AccountID, "mode", mode, "error", err)
return
}
config.Logger.Debug("[auto_delete_sessions] success", "account", a.AccountID, "mode", mode)
default:
config.Logger.Warn("[auto_delete_sessions] unknown mode", "account", a.AccountID, "mode", mode)
}
}
func (h *Handler) handleNonStream(w http.ResponseWriter, ctx context.Context, resp *http.Response, completionID, model, finalPrompt string, thinkingEnabled bool, toolNames []string) {
if resp.StatusCode != http.StatusOK {
defer resp.Body.Close()
@@ -104,8 +124,12 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, ctx context.Context, re
_ = ctx
result := sse.CollectStream(resp, thinkingEnabled, true)
finalThinking := result.Thinking
finalText := sanitizeLeakedOutput(result.Text)
stripReferenceMarkers := h.compatStripReferenceMarkers()
finalThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers)
finalText := cleanVisibleOutput(result.Text, stripReferenceMarkers)
if writeUpstreamEmptyOutputError(w, finalThinking, finalText, result.ContentFilter) {
return
}
respBody := openaifmt.BuildChatCompletion(completionID, model, finalPrompt, finalThinking, finalText, toolNames)
if result.OutputTokens > 0 {
if usage, ok := respBody["usage"].(map[string]any); ok {
@@ -138,6 +162,7 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
created := time.Now().Unix()
bufferToolContent := len(toolNames) > 0
emitEarlyToolDeltas := h.toolcallFeatureMatchEnabled() && h.toolcallEarlyEmitHighConfidence()
stripReferenceMarkers := h.compatStripReferenceMarkers()
initialType := "text"
if thinkingEnabled {
initialType = "thinking"
@@ -153,6 +178,7 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
finalPrompt,
thinkingEnabled,
searchEnabled,
stripReferenceMarkers,
toolNames,
bufferToolContent,
emitEarlyToolDeltas,

View File

@@ -0,0 +1,139 @@
package openai
import (
"context"
"net/http"
"net/http/httptest"
"strings"
"testing"
"ds2api/internal/auth"
"ds2api/internal/deepseek"
)
type autoDeleteModeDSStub struct {
resp *http.Response
singleCalls int
allCalls int
lastSessionID string
lastCtxErr error
}
func (m *autoDeleteModeDSStub) CreateSession(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
return "session-id", nil
}
func (m *autoDeleteModeDSStub) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) {
return "pow", nil
}
func (m *autoDeleteModeDSStub) CallCompletion(_ context.Context, _ *auth.RequestAuth, _ map[string]any, _ string, _ int) (*http.Response, error) {
return m.resp, nil
}
func (m *autoDeleteModeDSStub) DeleteSessionForToken(_ context.Context, _ string, sessionID string) (*deepseek.DeleteSessionResult, error) {
m.singleCalls++
m.lastSessionID = sessionID
return &deepseek.DeleteSessionResult{SessionID: sessionID, Success: true}, nil
}
func (m *autoDeleteModeDSStub) DeleteAllSessionsForToken(_ context.Context, _ string) error {
m.allCalls++
return nil
}
func (m *autoDeleteModeDSStub) DeleteSessionForTokenCtx(ctx context.Context, _ string, sessionID string) (*deepseek.DeleteSessionResult, error) {
m.singleCalls++
m.lastSessionID = sessionID
m.lastCtxErr = ctx.Err()
return &deepseek.DeleteSessionResult{SessionID: sessionID, Success: true}, nil
}
func TestChatCompletionsAutoDeleteModes(t *testing.T) {
tests := []struct {
name string
mode string
wantSingle int
wantAll int
}{
{name: "none", mode: "none"},
{name: "single", mode: "single", wantSingle: 1},
{name: "all", mode: "all", wantAll: 1},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
ds := &autoDeleteModeDSStub{
resp: makeOpenAISSEHTTPResponse(
`data: {"p":"response/content","v":"hello"}`,
"data: [DONE]",
),
}
h := &Handler{
Store: mockOpenAIConfig{
wideInput: true,
autoDeleteMode: tc.mode,
},
Auth: streamStatusAuthStub{},
DS: ds,
}
reqBody := `{"model":"deepseek-chat","messages":[{"role":"user","content":"hi"}],"stream":false}`
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
req.Header.Set("Authorization", "Bearer direct-token")
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
h.ChatCompletions(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
}
if ds.singleCalls != tc.wantSingle {
t.Fatalf("single delete calls=%d want=%d", ds.singleCalls, tc.wantSingle)
}
if ds.allCalls != tc.wantAll {
t.Fatalf("all delete calls=%d want=%d", ds.allCalls, tc.wantAll)
}
if tc.wantSingle > 0 && ds.lastSessionID != "session-id" {
t.Fatalf("expected single delete for session-id, got %q", ds.lastSessionID)
}
})
}
}
type autoDeleteCtxDSStub struct {
autoDeleteModeDSStub
}
func (m *autoDeleteCtxDSStub) DeleteSessionForToken(ctx context.Context, token string, sessionID string) (*deepseek.DeleteSessionResult, error) {
return m.autoDeleteModeDSStub.DeleteSessionForTokenCtx(ctx, token, sessionID)
}
func (m *autoDeleteCtxDSStub) DeleteAllSessionsForToken(_ context.Context, _ string) error {
m.allCalls++
return nil
}
func TestAutoDeleteRemoteSessionIgnoresCanceledParentContext(t *testing.T) {
ds := &autoDeleteCtxDSStub{}
h := &Handler{
Store: mockOpenAIConfig{
wideInput: true,
autoDeleteMode: "single",
},
DS: ds,
}
a := &auth.RequestAuth{DeepSeekToken: "token", AccountID: "acct"}
ctx, cancel := context.WithCancel(context.Background())
cancel()
h.autoDeleteRemoteSession(ctx, a, "session-id")
if ds.singleCalls != 1 {
t.Fatalf("single delete calls=%d want=1", ds.singleCalls)
}
if ds.lastCtxErr != nil {
t.Fatalf("delete ctx should not inherit cancellation, got %v", ds.lastCtxErr)
}
}

View File

@@ -28,6 +28,13 @@ type Handler struct {
responses *responseStore
}
func (h *Handler) compatStripReferenceMarkers() bool {
if h == nil || h.Store == nil {
return true
}
return h.Store.CompatStripReferenceMarkers()
}
type streamLease struct {
Auth *auth.RequestAuth
ExpiresAt time.Time

View File

@@ -275,6 +275,44 @@ func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) {
TestHandleNonStreamFencedToolCallExampleDoesNotPromoteToolCall(t)
}
func TestHandleNonStreamReturns502WhenUpstreamOutputEmpty(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
`data: {"p":"response/content","v":""}`,
`data: [DONE]`,
)
rec := httptest.NewRecorder()
h.handleNonStream(rec, context.Background(), resp, "cid-empty", "deepseek-chat", "prompt", false, nil)
if rec.Code != http.StatusBadGateway {
t.Fatalf("expected status 502 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
}
out := decodeJSONBody(t, rec.Body.String())
errObj, _ := out["error"].(map[string]any)
if asString(errObj["code"]) != "upstream_empty_output" {
t.Fatalf("expected code=upstream_empty_output, got %#v", out)
}
}
func TestHandleNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
`data: {"code":"content_filter"}`,
`data: [DONE]`,
)
rec := httptest.NewRecorder()
h.handleNonStream(rec, context.Background(), resp, "cid-empty-filtered", "deepseek-chat", "prompt", false, nil)
if rec.Code != http.StatusBadRequest {
t.Fatalf("expected status 400 for filtered upstream output, got %d body=%s", rec.Code, rec.Body.String())
}
out := decodeJSONBody(t, rec.Body.String())
errObj, _ := out["error"].(map[string]any)
if asString(errObj["code"]) != "content_filter" {
t.Fatalf("expected code=content_filter, got %#v", out)
}
}
func TestHandleStreamToolCallInterceptsWithoutRawContentLeak(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(

View File

@@ -9,9 +9,9 @@ var leakedToolCallArrayPattern = regexp.MustCompile(`(?is)\[\{\s*"function"\s*:\
var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*>\s*\{[\s\S]*?"tool_call_id"\s*:\s*"call[^"]*"\s*}`)
// leakedMetaMarkerPattern matches DeepSeek special tokens in BOTH forms:
// - ASCII underscore: <end_of_sentence>
// - U+2581 variant: <end▁of▁sentence> (used in some DeepSeek outputs)
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking)\s*[\|]>`)
// - ASCII underscore: <end_of_sentence>, <end_of_toolresults>, <end_of_instructions>
// - U+2581 variant: <end▁of▁sentence>, <end▁of▁toolresults>, <end▁of▁instructions>
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[\|]>`)
// leakedAgentXMLBlockPatterns catch agent-style XML blocks that leak through
// when the sieve fails to capture them. These are applied only to complete

View File

@@ -19,9 +19,9 @@ func TestSanitizeLeakedOutputRemovesLeakedWireToolCallAndResult(t *testing.T) {
}
func TestSanitizeLeakedOutputRemovesStandaloneMetaMarkers(t *testing.T) {
raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<end▁of▁thinking>D<end▁of▁sentence>E"
raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<end▁of▁thinking>D<end▁of▁sentence>E<| end_of_toolresults |>F<end▁of▁instructions>G"
got := sanitizeLeakedOutput(raw)
if got != "ABCDE" {
if got != "ABCDEFG" {
t.Fatalf("unexpected sanitize result for meta markers: %q", got)
}
}

View File

@@ -0,0 +1,13 @@
package openai
import textclean "ds2api/internal/textclean"
func cleanVisibleOutput(text string, stripReferenceMarkers bool) string {
if text == "" {
return text
}
if stripReferenceMarkers {
text = textclean.StripReferenceMarkers(text)
}
return sanitizeLeakedOutput(text)
}

View File

@@ -113,7 +113,12 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
return
}
result := sse.CollectStream(resp, thinkingEnabled, true)
sanitizedText := sanitizeLeakedOutput(result.Text)
stripReferenceMarkers := h.compatStripReferenceMarkers()
sanitizedThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers)
sanitizedText := cleanVisibleOutput(result.Text, stripReferenceMarkers)
if writeUpstreamEmptyOutputError(w, sanitizedThinking, sanitizedText, result.ContentFilter) {
return
}
textParsed := util.ParseStandaloneToolCallsDetailed(sanitizedText, toolNames)
logResponsesToolPolicyRejection(traceID, toolChoice, textParsed, "text")
@@ -123,7 +128,7 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
return
}
responseObj := openaifmt.BuildResponseObject(responseID, model, finalPrompt, result.Thinking, sanitizedText, toolNames)
responseObj := openaifmt.BuildResponseObject(responseID, model, finalPrompt, sanitizedThinking, sanitizedText, toolNames)
if result.OutputTokens > 0 {
if usage, ok := responseObj["usage"].(map[string]any); ok {
usage["output_tokens"] = result.OutputTokens
@@ -156,6 +161,7 @@ func (h *Handler) handleResponsesStream(w http.ResponseWriter, r *http.Request,
}
bufferToolContent := len(toolNames) > 0
emitEarlyToolDeltas := h.toolcallFeatureMatchEnabled() && h.toolcallEarlyEmitHighConfidence()
stripReferenceMarkers := h.compatStripReferenceMarkers()
streamRuntime := newResponsesStreamRuntime(
w,
@@ -166,6 +172,7 @@ func (h *Handler) handleResponsesStream(w http.ResponseWriter, r *http.Request,
finalPrompt,
thinkingEnabled,
searchEnabled,
stripReferenceMarkers,
toolNames,
bufferToolContent,
emitEarlyToolDeltas,

View File

@@ -23,8 +23,9 @@ type responsesStreamRuntime struct {
traceID string
toolChoice util.ToolChoicePolicy
thinkingEnabled bool
searchEnabled bool
thinkingEnabled bool
searchEnabled bool
stripReferenceMarkers bool
bufferToolContent bool
emitEarlyToolDeltas bool
@@ -63,6 +64,7 @@ func newResponsesStreamRuntime(
finalPrompt string,
thinkingEnabled bool,
searchEnabled bool,
stripReferenceMarkers bool,
toolNames []string,
bufferToolContent bool,
emitEarlyToolDeltas bool,
@@ -71,34 +73,35 @@ func newResponsesStreamRuntime(
persistResponse func(obj map[string]any),
) *responsesStreamRuntime {
return &responsesStreamRuntime{
w: w,
rc: rc,
canFlush: canFlush,
responseID: responseID,
model: model,
finalPrompt: finalPrompt,
thinkingEnabled: thinkingEnabled,
searchEnabled: searchEnabled,
toolNames: toolNames,
bufferToolContent: bufferToolContent,
emitEarlyToolDeltas: emitEarlyToolDeltas,
streamToolCallIDs: map[int]string{},
functionItemIDs: map[int]string{},
functionOutputIDs: map[int]int{},
functionArgs: map[int]string{},
functionDone: map[int]bool{},
functionAdded: map[int]bool{},
functionNames: map[int]string{},
messageOutputID: -1,
toolChoice: toolChoice,
traceID: traceID,
persistResponse: persistResponse,
w: w,
rc: rc,
canFlush: canFlush,
responseID: responseID,
model: model,
finalPrompt: finalPrompt,
thinkingEnabled: thinkingEnabled,
searchEnabled: searchEnabled,
stripReferenceMarkers: stripReferenceMarkers,
toolNames: toolNames,
bufferToolContent: bufferToolContent,
emitEarlyToolDeltas: emitEarlyToolDeltas,
streamToolCallIDs: map[int]string{},
functionItemIDs: map[int]string{},
functionOutputIDs: map[int]int{},
functionArgs: map[int]string{},
functionDone: map[int]bool{},
functionAdded: map[int]bool{},
functionNames: map[int]string{},
messageOutputID: -1,
toolChoice: toolChoice,
traceID: traceID,
persistResponse: persistResponse,
}
}
func (s *responsesStreamRuntime) finalize() {
finalThinking := s.thinking.String()
finalText := sanitizeLeakedOutput(s.text.String())
finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers)
if s.bufferToolContent {
s.processToolStreamEvents(flushToolSieve(&s.sieve, s.toolNames), true)
@@ -190,10 +193,11 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
contentSeen := false
for _, p := range parsed.Parts {
if p.Text == "" {
cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers)
if cleanedText == "" {
continue
}
if p.Type != "thinking" && s.searchEnabled && sse.IsCitation(p.Text) {
if p.Type != "thinking" && s.searchEnabled && sse.IsCitation(cleanedText) {
continue
}
contentSeen = true
@@ -201,15 +205,11 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
if !s.thinkingEnabled {
continue
}
s.thinking.WriteString(p.Text)
s.sendEvent("response.reasoning.delta", openaifmt.BuildResponsesReasoningDeltaPayload(s.responseID, p.Text))
s.thinking.WriteString(cleanedText)
s.sendEvent("response.reasoning.delta", openaifmt.BuildResponsesReasoningDeltaPayload(s.responseID, cleanedText))
continue
}
cleanedText := sanitizeLeakedOutput(p.Text)
if cleanedText == "" {
continue
}
s.text.WriteString(cleanedText)
if !s.bufferToolContent {
s.emitTextDelta(cleanedText)

View File

@@ -69,7 +69,7 @@ func (s *responsesStreamRuntime) ensureMessageContentPartAdded() {
}
func (s *responsesStreamRuntime) emitTextDelta(content string) {
if strings.TrimSpace(content) == "" {
if content == "" {
return
}
s.ensureMessageContentPartAdded()

View File

@@ -83,13 +83,13 @@ func (s *responsesStreamRuntime) buildCompletedResponseObject(finalThinking, fin
})
} else if len(calls) == 0 {
content := make([]map[string]any, 0, 2)
if strings.TrimSpace(finalThinking) != "" {
if finalThinking != "" {
content = append(content, map[string]any{
"type": "reasoning",
"text": finalThinking,
})
}
if strings.TrimSpace(finalText) != "" {
if finalText != "" {
content = append(content, map[string]any{
"type": "output_text",
"text": finalText,
@@ -136,10 +136,10 @@ func (s *responsesStreamRuntime) buildCompletedResponseObject(finalThinking, fin
}
outputText := s.visibleText.String()
if strings.TrimSpace(outputText) == "" && len(calls) == 0 {
if strings.TrimSpace(finalText) != "" {
if outputText == "" && len(calls) == 0 {
if finalText != "" {
outputText = finalText
} else if strings.TrimSpace(finalThinking) != "" {
} else if finalThinking != "" {
outputText = finalThinking
}
}

View File

@@ -627,6 +627,50 @@ func TestHandleResponsesNonStreamToolChoiceNoneStillAllowsFunctionCall(t *testin
}
}
func TestHandleResponsesNonStreamReturns502WhenUpstreamOutputEmpty(t *testing.T) {
h := &Handler{}
rec := httptest.NewRecorder()
resp := &http.Response{
StatusCode: http.StatusOK,
Body: io.NopCloser(strings.NewReader(
`data: {"p":"response/content","v":""}` + "\n" +
`data: [DONE]` + "\n",
)),
}
h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, nil, util.DefaultToolChoicePolicy(), "")
if rec.Code != http.StatusBadGateway {
t.Fatalf("expected 502 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
}
out := decodeJSONBody(t, rec.Body.String())
errObj, _ := out["error"].(map[string]any)
if asString(errObj["code"]) != "upstream_empty_output" {
t.Fatalf("expected code=upstream_empty_output, got %#v", out)
}
}
func TestHandleResponsesNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) {
h := &Handler{}
rec := httptest.NewRecorder()
resp := &http.Response{
StatusCode: http.StatusOK,
Body: io.NopCloser(strings.NewReader(
`data: {"code":"content_filter"}` + "\n" +
`data: [DONE]` + "\n",
)),
}
h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, nil, util.DefaultToolChoicePolicy(), "")
if rec.Code != http.StatusBadRequest {
t.Fatalf("expected 400 for filtered empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
}
out := decodeJSONBody(t, rec.Body.String())
errObj, _ := out["error"].(map[string]any)
if asString(errObj["code"]) != "content_filter" {
t.Fatalf("expected code=content_filter, got %#v", out)
}
}
func extractSSEEventPayload(body, targetEvent string) (map[string]any, bool) {
scanner := bufio.NewScanner(strings.NewReader(body))
matched := false

View File

@@ -13,6 +13,7 @@ import (
chimw "github.com/go-chi/chi/v5/middleware"
"ds2api/internal/auth"
"ds2api/internal/deepseek"
)
type streamStatusAuthStub struct{}
@@ -53,6 +54,10 @@ func (m streamStatusDSStub) CallCompletion(_ context.Context, _ *auth.RequestAut
return m.resp, nil
}
func (m streamStatusDSStub) DeleteSessionForToken(_ context.Context, _ string, _ string) (*deepseek.DeleteSessionResult, error) {
return &deepseek.DeleteSessionResult{Success: true}, nil
}
func (m streamStatusDSStub) DeleteAllSessionsForToken(_ context.Context, _ string) error {
return nil
}

View File

@@ -48,7 +48,7 @@ func (s *toolStreamSieveState) resetIncrementalToolState() {
}
func (s *toolStreamSieveState) noteText(content string) {
if strings.TrimSpace(content) == "" {
if content == "" {
return
}
s.recentTextTail = appendTail(s.recentTextTail, content, toolSieveContextTailLimit)

View File

@@ -71,12 +71,31 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string,
prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
return prefixPart, parsed, suffixPart, true
}
// If this block does not look like an executable tool-call payload,
// pass it through as normal content (e.g. user-requested XML snippets).
if !looksLikeExecutableXMLToolCallBlock(xmlBlock, pair.open) {
return prefixPart + xmlBlock, nil, suffixPart, true
}
// Looks like XML tool syntax but failed to parse — consume it to avoid leak.
return prefixPart, nil, suffixPart, true
}
return "", nil, "", false
}
func looksLikeExecutableXMLToolCallBlock(xmlBlock, openTag string) bool {
lower := strings.ToLower(xmlBlock)
// Agent wrapper tags are always treated as internal tool-call wrappers.
switch openTag {
case "<attempt_completion", "<ask_followup_question", "<new_task":
return true
}
return strings.Contains(lower, "<tool_name") ||
strings.Contains(lower, "<parameters") ||
strings.Contains(lower, `"tool"`) ||
strings.Contains(lower, `"tool_name"`) ||
strings.Contains(lower, `"name"`)
}
// hasOpenXMLToolTag returns true if captured text contains an XML tool opening tag
// whose SPECIFIC closing tag has not appeared yet.
func hasOpenXMLToolTag(captured string) bool {

View File

@@ -78,6 +78,49 @@ func TestProcessToolSieveXMLWithLeadingText(t *testing.T) {
}
}
func TestProcessToolSievePassesThroughNonToolXMLBlock(t *testing.T) {
var state toolStreamSieveState
chunk := `<tool_call><title>示例 XML</title><body>plain text xml payload</body></tool_call>`
events := processToolSieveChunk(&state, chunk, []string{"read_file"})
events = append(events, flushToolSieve(&state, []string{"read_file"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 0 {
t.Fatalf("expected no tool calls for plain XML payload, got %d events=%#v", toolCalls, events)
}
if textContent.String() != chunk {
t.Fatalf("expected XML payload to pass through unchanged, got %q", textContent.String())
}
}
func TestProcessToolSieveNonToolXMLKeepsSuffixForToolParsing(t *testing.T) {
var state toolStreamSieveState
chunk := `<tool_call><title>plain xml</title></tool_call><invoke name="read_file"><parameters>{"path":"README.MD"}</parameters></invoke>`
events := processToolSieveChunk(&state, chunk, []string{"read_file"})
events = append(events, flushToolSieve(&state, []string{"read_file"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if !strings.Contains(textContent.String(), `<tool_call><title>plain xml</title></tool_call>`) {
t.Fatalf("expected leading non-tool XML to be preserved, got %q", textContent.String())
}
if strings.Contains(textContent.String(), `<invoke name="read_file">`) {
t.Fatalf("expected invoke tool XML to be intercepted, got %q", textContent.String())
}
if toolCalls != 1 {
t.Fatalf("expected exactly one parsed tool call from suffix, got %d events=%#v", toolCalls, events)
}
}
func TestProcessToolSievePartialXMLTagHeldBack(t *testing.T) {
var state toolStreamSieveState
// Chunk ends with a partial XML tool tag.
@@ -364,7 +407,7 @@ func TestOpeningXMLTagNotLeakedAsContent(t *testing.T) {
func TestProcessToolSieveInterceptsAttemptCompletionLeak(t *testing.T) {
var state toolStreamSieveState
// Simulate an agent outputting attempt_completion XML tag
// Simulate an agent outputting attempt_completion XML tag
// which shouldn't leak to text output, even if it fails to parse as a valid tool.
chunks := []string{
"Done with task.\n",

View File

@@ -0,0 +1,15 @@
package openai
import "net/http"
func writeUpstreamEmptyOutputError(w http.ResponseWriter, thinking, text string, contentFilter bool) bool {
if thinking != "" || text != "" {
return false
}
if contentFilter {
writeOpenAIErrorWithCode(w, http.StatusBadRequest, "Upstream content filtered the response and returned no output.", "content_filter")
return true
}
writeOpenAIErrorWithCode(w, http.StatusBadGateway, "Upstream model returned empty output.", "upstream_empty_output")
return true
}

View File

@@ -99,10 +99,13 @@ func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Reque
"final_prompt": stdReq.FinalPrompt,
"thinking_enabled": stdReq.Thinking,
"search_enabled": stdReq.Search,
"tool_names": stdReq.ToolNames,
"deepseek_token": a.DeepSeekToken,
"pow_header": powHeader,
"payload": payload,
"compat": map[string]any{
"strip_reference_markers": h.compatStripReferenceMarkers(),
},
"tool_names": stdReq.ToolNames,
"deepseek_token": a.DeepSeekToken,
"pow_header": powHeader,
"payload": payload,
})
}

View File

@@ -32,6 +32,8 @@ type ConfigStore interface {
RuntimeAccountMaxQueue(defaultSize int) int
RuntimeGlobalMaxInflight(defaultSize int) int
RuntimeTokenRefreshIntervalHours() int
AutoDeleteMode() string
CompatStripReferenceMarkers() bool
AutoDeleteSessions() bool
}
@@ -41,6 +43,10 @@ type PoolController interface {
ApplyRuntimeLimits(maxInflightPerAccount, maxQueueSize, globalMaxInflight int)
}
type OpenAIChatCaller interface {
ChatCompletions(w http.ResponseWriter, r *http.Request)
}
type DeepSeekCaller interface {
Login(ctx context.Context, acc config.Account) (string, error)
CreateSession(ctx context.Context, a *auth.RequestAuth, maxAttempts int) (string, error)

View File

@@ -5,9 +5,10 @@ import (
)
type Handler struct {
Store ConfigStore
Pool PoolController
DS DeepSeekCaller
Store ConfigStore
Pool PoolController
DS DeepSeekCaller
OpenAI OpenAIChatCaller
}
func RegisterRoutes(r chi.Router, h *Handler) {
@@ -34,6 +35,7 @@ func RegisterRoutes(r chi.Router, h *Handler) {
pr.Post("/accounts/sessions/delete-all", h.deleteAllSessions)
pr.Post("/import", h.batchImport)
pr.Post("/test", h.testAPI)
pr.Post("/dev/raw-samples/capture", h.captureRawSample)
pr.Post("/vercel/sync", h.syncVercel)
pr.Get("/vercel/status", h.vercelStatus)
pr.Post("/vercel/status", h.vercelStatus)

View File

@@ -17,7 +17,6 @@ import (
func newAdminTestHandler(t *testing.T, raw string) *Handler {
t.Helper()
t.Setenv("DS2API_CONFIG_JSON", raw)
t.Setenv("CONFIG_JSON", "")
store := config.LoadStore()
return &Handler{
Store: store,

View File

@@ -13,6 +13,7 @@ import (
authn "ds2api/internal/auth"
"ds2api/internal/config"
"ds2api/internal/deepseek"
"ds2api/internal/sse"
)
@@ -157,7 +158,7 @@ func (h *Handler) testAccount(ctx context.Context, acc config.Account, model, me
result["message"] = "获取 PoW 失败: " + err.Error()
return result
}
payload := map[string]any{"chat_session_id": sessionID, "prompt": "<User>" + message, "ref_file_ids": []any{}, "thinking_enabled": thinking, "search_enabled": search}
payload := map[string]any{"chat_session_id": sessionID, "prompt": deepseek.MessagesPrepare([]map[string]any{{"role": "user", "content": message}}), "ref_file_ids": []any{}, "thinking_enabled": thinking, "search_enabled": search}
resp, err := h.DS.CallCompletion(ctx, authCtx, payload, pow, 1)
if err != nil {
result["message"] = "请求失败: " + err.Error()

View File

@@ -0,0 +1,233 @@
package admin
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"net/http/httptest"
"net/url"
"strings"
"ds2api/internal/config"
"ds2api/internal/devcapture"
"ds2api/internal/rawsample"
)
func (h *Handler) captureRawSample(w http.ResponseWriter, r *http.Request) {
if h.OpenAI == nil {
writeJSON(w, http.StatusInternalServerError, map[string]any{"detail": "OpenAI handler is not configured"})
return
}
var req map[string]any
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSON(w, http.StatusBadRequest, map[string]any{"detail": "invalid json"})
return
}
payload, sampleID, apiKey, err := prepareRawSampleCaptureRequest(h.Store, req)
if err != nil {
writeJSON(w, http.StatusBadRequest, map[string]any{"detail": err.Error()})
return
}
body, err := json.Marshal(payload)
if err != nil {
writeJSON(w, http.StatusInternalServerError, map[string]any{"detail": "failed to encode capture request"})
return
}
traceID := rawsample.NormalizeSampleID(sampleID)
if traceID == "" {
traceID = rawsample.DefaultSampleID("capture")
}
before := devcapture.Global().Snapshot()
rec := httptest.NewRecorder()
captureReq := httptest.NewRequest(http.MethodPost, "/v1/chat/completions?__trace_id="+url.QueryEscape(traceID), bytes.NewReader(body))
captureReq.Header.Set("Authorization", "Bearer "+apiKey)
captureReq.Header.Set("Content-Type", "application/json")
h.OpenAI.ChatCompletions(rec, captureReq)
after := devcapture.Global().Snapshot()
if rec.Code >= http.StatusBadRequest {
copyHeader(w.Header(), rec.Header())
w.WriteHeader(rec.Code)
_, _ = io.Copy(w, bytes.NewReader(rec.Body.Bytes()))
return
}
captureEntries, err := collectNewCaptureEntries(before, after)
if err != nil {
writeJSON(w, http.StatusInternalServerError, map[string]any{"detail": err.Error()})
return
}
saved, err := rawsample.Persist(rawsample.PersistOptions{
RootDir: config.RawStreamSampleRoot(),
SampleID: sampleID,
Source: "admin/dev/raw-samples/capture",
Request: payload,
Capture: captureSummaryFromEntries(captureEntries),
UpstreamBody: combineCaptureBodies(captureEntries),
})
if err != nil {
writeJSON(w, http.StatusInternalServerError, map[string]any{"detail": err.Error()})
return
}
copyHeader(w.Header(), rec.Header())
w.Header().Set("X-Ds2-Sample-Id", saved.SampleID)
w.Header().Set("X-Ds2-Sample-Dir", saved.Dir)
w.Header().Set("X-Ds2-Sample-Meta", saved.MetaPath)
w.Header().Set("X-Ds2-Sample-Upstream", saved.UpstreamPath)
w.WriteHeader(rec.Code)
_, _ = io.Copy(w, bytes.NewReader(rec.Body.Bytes()))
}
func prepareRawSampleCaptureRequest(store ConfigStore, req map[string]any) (map[string]any, string, string, error) {
payload := cloneMap(req)
sampleID := strings.TrimSpace(fieldString(payload, "sample_id"))
apiKey := strings.TrimSpace(fieldString(payload, "api_key"))
for _, k := range []string{"sample_id", "api_key", "promote_default", "persist", "source"} {
delete(payload, k)
}
if apiKey == "" {
if store == nil {
return nil, "", "", fmt.Errorf("no api key provided")
}
keys := store.Keys()
if len(keys) == 0 {
return nil, "", "", fmt.Errorf("no api key available")
}
apiKey = strings.TrimSpace(keys[0])
}
if model := strings.TrimSpace(fieldString(payload, "model")); model == "" {
payload["model"] = "deepseek-chat"
}
if _, ok := payload["stream"]; !ok {
payload["stream"] = true
}
if messagesRaw, ok := payload["messages"].([]any); !ok || len(messagesRaw) == 0 {
message := strings.TrimSpace(fieldString(payload, "message"))
if message == "" {
message = "你好"
}
payload["messages"] = []map[string]any{{"role": "user", "content": message}}
}
delete(payload, "message")
if sampleID == "" {
model := strings.TrimSpace(fieldString(payload, "model"))
if model == "" {
model = "capture"
}
sampleID = rawsample.DefaultSampleID(model)
}
return payload, sampleID, apiKey, nil
}
func collectNewCaptureEntries(before, after []devcapture.Entry) ([]devcapture.Entry, error) {
beforeIDs := make(map[string]struct{}, len(before))
for _, entry := range before {
beforeIDs[entry.ID] = struct{}{}
}
entries := make([]devcapture.Entry, 0, len(after))
for _, entry := range after {
if _, ok := beforeIDs[entry.ID]; ok {
continue
}
if strings.TrimSpace(entry.ResponseBody) == "" {
continue
}
entries = append(entries, entry)
}
if len(entries) == 0 {
return nil, fmt.Errorf("no upstream capture was recorded")
}
// Snapshot order is newest-first; reverse to preserve the actual request order.
for i, j := 0, len(entries)-1; i < j; i, j = i+1, j-1 {
entries[i], entries[j] = entries[j], entries[i]
}
return entries, nil
}
func captureSummaryFromEntries(entries []devcapture.Entry) rawsample.CaptureSummary {
if len(entries) == 0 {
return rawsample.CaptureSummary{}
}
// Primary metadata comes from the first (initial) capture.
summary := rawsample.CaptureSummary{
Label: strings.TrimSpace(entries[0].Label),
URL: strings.TrimSpace(entries[0].URL),
StatusCode: entries[0].StatusCode,
}
// Record every round (initial + continuations) so replay/debug
// can reconstruct the full multi-round interaction.
totalBytes := 0
rounds := make([]rawsample.CaptureRound, 0, len(entries))
for _, entry := range entries {
n := len(entry.ResponseBody)
totalBytes += n
rounds = append(rounds, rawsample.CaptureRound{
Label: strings.TrimSpace(entry.Label),
URL: strings.TrimSpace(entry.URL),
StatusCode: entry.StatusCode,
ResponseBytes: n,
})
}
summary.ResponseBytes = totalBytes
if len(rounds) > 1 {
summary.Rounds = rounds
}
return summary
}
func combineCaptureBodies(entries []devcapture.Entry) []byte {
if len(entries) == 0 {
return nil
}
var buf bytes.Buffer
for _, entry := range entries {
if buf.Len() > 0 {
last := buf.Bytes()[buf.Len()-1]
if last != '\n' {
buf.WriteByte('\n')
}
}
buf.WriteString(entry.ResponseBody)
}
return buf.Bytes()
}
func copyHeader(dst, src http.Header) {
for k, vv := range src {
dst.Del(k)
for _, v := range vv {
dst.Add(k, v)
}
}
}
func cloneMap(in map[string]any) map[string]any {
if len(in) == 0 {
return map[string]any{}
}
out := make(map[string]any, len(in))
for k, v := range in {
out[k] = v
}
return out
}

View File

@@ -0,0 +1,232 @@
package admin
import (
"bytes"
"encoding/json"
"io"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"ds2api/internal/devcapture"
)
type stubOpenAIChatCaller struct{}
func (stubOpenAIChatCaller) ChatCompletions(w http.ResponseWriter, _ *http.Request) {
store := devcapture.Global()
session := store.Start("deepseek_completion", "https://chat.deepseek.com/api/v0/chat/completion", "acct-test", map[string]any{"model": "deepseek-chat"})
raw := io.NopCloser(strings.NewReader(
"data: {\"v\":\"hello [reference:1]\"}\n\n" +
"data: {\"v\":\"FINISHED\",\"p\":\"response/status\"}\n\n",
))
if session != nil {
raw = session.WrapBody(raw, http.StatusOK)
}
_, _ = io.ReadAll(raw)
_ = raw.Close()
w.Header().Set("Content-Type", "text/event-stream")
w.WriteHeader(http.StatusOK)
_, _ = io.WriteString(w, "data: {\"choices\":[{\"delta\":{\"content\":\"hello\"},\"index\":0}],\"created\":1,\"id\":\"id\",\"model\":\"m\",\"object\":\"chat.completion.chunk\"}\n\n")
}
type stubOpenAIChatCallerWithContinuations struct{}
func (stubOpenAIChatCallerWithContinuations) ChatCompletions(w http.ResponseWriter, _ *http.Request) {
recordCapturedResponse("deepseek_completion", "https://chat.deepseek.com/api/v0/chat/completion", http.StatusOK, map[string]any{"model": "deepseek-chat"}, "data: {\"v\":\"hello [reference:1]\"}\n\n"+"data: [DONE]\n\n")
recordCapturedResponse("deepseek_continue", "https://chat.deepseek.com/api/v0/chat/continue", http.StatusOK, map[string]any{"chat_session_id": "session-1", "message_id": 2}, "data: {\"v\":\"continued\"}\n\n"+"data: [DONE]\n\n")
w.Header().Set("Content-Type", "text/event-stream")
w.WriteHeader(http.StatusOK)
_, _ = io.WriteString(w, "data: {\"choices\":[{\"delta\":{\"content\":\"hello continued\"},\"index\":0}],\"created\":1,\"id\":\"id\",\"model\":\"m\",\"object\":\"chat.completion.chunk\"}\n\n")
}
type stubOpenAIChatCallerWithoutCapture struct{}
func (stubOpenAIChatCallerWithoutCapture) ChatCompletions(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Content-Type", "text/event-stream")
w.WriteHeader(http.StatusOK)
_, _ = io.WriteString(w, "data: {\"choices\":[{\"delta\":{\"content\":\"hello\"},\"index\":0}],\"created\":1,\"id\":\"id\",\"model\":\"m\",\"object\":\"chat.completion.chunk\"}\n\n")
}
func recordCapturedResponse(label, rawURL string, statusCode int, request any, body string) {
store := devcapture.Global()
session := store.Start(label, rawURL, "acct-test", request)
raw := io.NopCloser(strings.NewReader(body))
if session != nil {
raw = session.WrapBody(raw, statusCode)
}
_, _ = io.ReadAll(raw)
_ = raw.Close()
}
func TestCaptureRawSampleWritesPersistentSample(t *testing.T) {
t.Setenv("DS2API_RAW_STREAM_SAMPLE_ROOT", t.TempDir())
devcapture.Global().Clear()
defer devcapture.Global().Clear()
h := &Handler{OpenAI: stubOpenAIChatCaller{}}
reqBody := `{
"sample_id":"My Sample 01",
"api_key":"local-key",
"model":"deepseek-chat",
"message":"广州天气",
"stream":true
}`
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/admin/dev/raw-samples/capture", strings.NewReader(reqBody))
h.captureRawSample(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
}
if got := rec.Header().Get("X-Ds2-Sample-Id"); got != "my-sample-01" {
t.Fatalf("expected sample id header my-sample-01, got %q", got)
}
if got := rec.Header().Get("X-Ds2-Sample-Upstream"); got != filepath.Join(os.Getenv("DS2API_RAW_STREAM_SAMPLE_ROOT"), "my-sample-01", "upstream.stream.sse") {
t.Fatalf("unexpected sample upstream header: %q", got)
}
if !strings.Contains(rec.Body.String(), `"content":"hello"`) {
t.Fatalf("expected proxied openai output, got %s", rec.Body.String())
}
sampleDir := filepath.Join(os.Getenv("DS2API_RAW_STREAM_SAMPLE_ROOT"), "my-sample-01")
if _, err := os.Stat(sampleDir); err != nil {
t.Fatalf("sample dir missing: %v", err)
}
metaBytes, err := os.ReadFile(filepath.Join(sampleDir, "meta.json"))
if err != nil {
t.Fatalf("read meta: %v", err)
}
var meta map[string]any
if err := json.Unmarshal(metaBytes, &meta); err != nil {
t.Fatalf("decode meta: %v", err)
}
if meta["sample_id"] != "my-sample-01" {
t.Fatalf("unexpected meta sample_id: %#v", meta["sample_id"])
}
capture, _ := meta["capture"].(map[string]any)
if capture == nil {
t.Fatalf("missing capture meta: %#v", meta)
}
if got := int(capture["response_bytes"].(float64)); got == 0 {
t.Fatalf("expected capture bytes to be recorded, got %#v", capture)
}
if _, ok := meta["processed"]; ok {
t.Fatalf("unexpected processed meta: %#v", meta["processed"])
}
}
func TestCaptureRawSampleCombinesContinuationCaptures(t *testing.T) {
t.Setenv("DS2API_RAW_STREAM_SAMPLE_ROOT", t.TempDir())
devcapture.Global().Clear()
defer devcapture.Global().Clear()
h := &Handler{OpenAI: stubOpenAIChatCallerWithContinuations{}}
reqBody := `{
"sample_id":"My Sample 02",
"api_key":"local-key",
"model":"deepseek-chat",
"message":"广州天气",
"stream":true
}`
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/admin/dev/raw-samples/capture", strings.NewReader(reqBody))
h.captureRawSample(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
}
sampleDir := filepath.Join(os.Getenv("DS2API_RAW_STREAM_SAMPLE_ROOT"), "my-sample-02")
upstreamBytes, err := os.ReadFile(filepath.Join(sampleDir, "upstream.stream.sse"))
if err != nil {
t.Fatalf("read upstream: %v", err)
}
upstream := string(upstreamBytes)
if !strings.Contains(upstream, "hello [reference:1]") {
t.Fatalf("expected initial capture in combined upstream, got %s", upstream)
}
if !strings.Contains(upstream, "continued") {
t.Fatalf("expected continuation capture in combined upstream, got %s", upstream)
}
if strings.Index(upstream, "hello [reference:1]") > strings.Index(upstream, "continued") {
t.Fatalf("expected initial capture before continuation, got %s", upstream)
}
metaBytes, err := os.ReadFile(filepath.Join(sampleDir, "meta.json"))
if err != nil {
t.Fatalf("read meta: %v", err)
}
var meta map[string]any
if err := json.Unmarshal(metaBytes, &meta); err != nil {
t.Fatalf("decode meta: %v", err)
}
capture, _ := meta["capture"].(map[string]any)
if capture == nil {
t.Fatalf("missing capture meta: %#v", meta)
}
if got := int(capture["response_bytes"].(float64)); got != len(upstreamBytes) {
t.Fatalf("expected combined response_bytes %d, got %#v", len(upstreamBytes), capture["response_bytes"])
}
rounds, _ := capture["rounds"].([]any)
if len(rounds) != 2 {
t.Fatalf("expected 2 capture rounds, got %d: %#v", len(rounds), capture)
}
r0, _ := rounds[0].(map[string]any)
r1, _ := rounds[1].(map[string]any)
if r0["label"] != "deepseek_completion" {
t.Fatalf("expected first round label deepseek_completion, got %v", r0["label"])
}
if r1["label"] != "deepseek_continue" {
t.Fatalf("expected second round label deepseek_continue, got %v", r1["label"])
}
}
func TestCaptureRawSampleReturnsErrorWhenNoNewCaptureRecorded(t *testing.T) {
root := t.TempDir()
t.Setenv("DS2API_RAW_STREAM_SAMPLE_ROOT", root)
devcapture.Global().Clear()
defer devcapture.Global().Clear()
recordCapturedResponse("preexisting", "https://chat.deepseek.com/api/v0/chat/completion", http.StatusOK, map[string]any{"model": "deepseek-chat"}, "data: {\"v\":\"old\"}\n\n")
h := &Handler{OpenAI: stubOpenAIChatCallerWithoutCapture{}}
reqBody := `{
"sample_id":"My Sample 03",
"api_key":"local-key",
"model":"deepseek-chat",
"message":"广州天气",
"stream":true
}`
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/admin/dev/raw-samples/capture", strings.NewReader(reqBody))
h.captureRawSample(rec, req)
if rec.Code != http.StatusInternalServerError {
t.Fatalf("expected 500, got %d body=%s", rec.Code, rec.Body.String())
}
if !strings.Contains(rec.Body.String(), "no upstream capture was recorded") {
t.Fatalf("expected no-capture error, got %s", rec.Body.String())
}
if _, err := os.Stat(filepath.Join(root, "my-sample-03")); !os.IsNotExist(err) {
t.Fatalf("expected no sample dir to be created, stat err=%v", err)
}
}
func TestCombineCaptureBodiesPreservesOrderAndSeparators(t *testing.T) {
entries := []devcapture.Entry{
{ResponseBody: "first"},
{ResponseBody: "second"},
}
got := combineCaptureBodies(entries)
if !bytes.Equal(got, []byte("first\nsecond")) {
t.Fatalf("unexpected combined body: %q", string(got))
}
}

View File

@@ -21,10 +21,11 @@ func boolFrom(v any) bool {
}
}
func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *config.RuntimeConfig, *config.ResponsesConfig, *config.EmbeddingsConfig, *config.AutoDeleteConfig, map[string]string, map[string]string, error) {
func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *config.RuntimeConfig, *config.CompatConfig, *config.ResponsesConfig, *config.EmbeddingsConfig, *config.AutoDeleteConfig, map[string]string, map[string]string, error) {
var (
adminCfg *config.AdminConfig
runtimeCfg *config.RuntimeConfig
compatCfg *config.CompatConfig
respCfg *config.ResponsesConfig
embCfg *config.EmbeddingsConfig
autoDeleteCfg *config.AutoDeleteConfig
@@ -36,8 +37,8 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
cfg := &config.AdminConfig{}
if v, exists := raw["jwt_expire_hours"]; exists {
n := intFrom(v)
if n < 1 || n > 720 {
return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("admin.jwt_expire_hours must be between 1 and 720")
if err := config.ValidateIntRange("admin.jwt_expire_hours", n, 1, 720, true); err != nil {
return nil, nil, nil, nil, nil, nil, nil, nil, err
}
cfg.JWTExpireHours = n
}
@@ -48,44 +49,57 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
cfg := &config.RuntimeConfig{}
if v, exists := raw["account_max_inflight"]; exists {
n := intFrom(v)
if n < 1 || n > 256 {
return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.account_max_inflight must be between 1 and 256")
if err := config.ValidateIntRange("runtime.account_max_inflight", n, 1, 256, true); err != nil {
return nil, nil, nil, nil, nil, nil, nil, nil, err
}
cfg.AccountMaxInflight = n
}
if v, exists := raw["account_max_queue"]; exists {
n := intFrom(v)
if n < 1 || n > 200000 {
return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.account_max_queue must be between 1 and 200000")
if err := config.ValidateIntRange("runtime.account_max_queue", n, 1, 200000, true); err != nil {
return nil, nil, nil, nil, nil, nil, nil, nil, err
}
cfg.AccountMaxQueue = n
}
if v, exists := raw["global_max_inflight"]; exists {
n := intFrom(v)
if n < 1 || n > 200000 {
return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be between 1 and 200000")
if err := config.ValidateIntRange("runtime.global_max_inflight", n, 1, 200000, true); err != nil {
return nil, nil, nil, nil, nil, nil, nil, nil, err
}
cfg.GlobalMaxInflight = n
}
if v, exists := raw["token_refresh_interval_hours"]; exists {
n := intFrom(v)
if n < 1 || n > 720 {
return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.token_refresh_interval_hours must be between 1 and 720")
if err := config.ValidateIntRange("runtime.token_refresh_interval_hours", n, 1, 720, true); err != nil {
return nil, nil, nil, nil, nil, nil, nil, nil, err
}
cfg.TokenRefreshIntervalHours = n
}
if cfg.AccountMaxInflight > 0 && cfg.GlobalMaxInflight > 0 && cfg.GlobalMaxInflight < cfg.AccountMaxInflight {
return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight")
return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight")
}
runtimeCfg = cfg
}
if raw, ok := req["compat"].(map[string]any); ok {
cfg := &config.CompatConfig{}
if v, exists := raw["wide_input_strict_output"]; exists {
b := boolFrom(v)
cfg.WideInputStrictOutput = &b
}
if v, exists := raw["strip_reference_markers"]; exists {
b := boolFrom(v)
cfg.StripReferenceMarkers = &b
}
compatCfg = cfg
}
if raw, ok := req["responses"].(map[string]any); ok {
cfg := &config.ResponsesConfig{}
if v, exists := raw["store_ttl_seconds"]; exists {
n := intFrom(v)
if n < 30 || n > 86400 {
return nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("responses.store_ttl_seconds must be between 30 and 86400")
if err := config.ValidateIntRange("responses.store_ttl_seconds", n, 30, 86400, true); err != nil {
return nil, nil, nil, nil, nil, nil, nil, nil, err
}
cfg.StoreTTLSeconds = n
}
@@ -96,6 +110,9 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
cfg := &config.EmbeddingsConfig{}
if v, exists := raw["provider"]; exists {
p := strings.TrimSpace(fmt.Sprintf("%v", v))
if err := config.ValidateTrimmedString("embeddings.provider", p, false); err != nil {
return nil, nil, nil, nil, nil, nil, nil, nil, err
}
cfg.Provider = p
}
embCfg = cfg
@@ -127,11 +144,21 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi
if raw, ok := req["auto_delete"].(map[string]any); ok {
cfg := &config.AutoDeleteConfig{}
if v, exists := raw["mode"]; exists {
mode := strings.ToLower(strings.TrimSpace(fmt.Sprintf("%v", v)))
if err := config.ValidateAutoDeleteMode(mode); err != nil {
return nil, nil, nil, nil, nil, nil, nil, nil, err
}
if mode == "" {
mode = "none"
}
cfg.Mode = mode
}
if v, exists := raw["sessions"]; exists {
cfg.Sessions = boolFrom(v)
}
autoDeleteCfg = cfg
}
return adminCfg, runtimeCfg, respCfg, embCfg, autoDeleteCfg, claudeMap, aliasMap, nil
return adminCfg, runtimeCfg, compatCfg, respCfg, embCfg, autoDeleteCfg, claudeMap, aliasMap, nil
}

View File

@@ -26,6 +26,7 @@ func (h *Handler) getSettings(w http.ResponseWriter, _ *http.Request) {
"global_max_inflight": h.Store.RuntimeGlobalMaxInflight(recommended),
"token_refresh_interval_hours": h.Store.RuntimeTokenRefreshIntervalHours(),
},
"compat": snap.Compat,
"responses": snap.Responses,
"embeddings": snap.Embeddings,
"auto_delete": snap.AutoDelete,

View File

@@ -82,6 +82,28 @@ func TestUpdateSettingsValidationRejectsTokenRefreshInterval(t *testing.T) {
}
}
func TestUpdateSettingsAllowsEmptyEmbeddingsProvider(t *testing.T) {
h := newAdminTestHandler(t, `{"keys":["k1"]}`)
payload := map[string]any{
"responses": map[string]any{
"store_ttl_seconds": 600,
},
"embeddings": map[string]any{
"provider": "",
},
}
b, _ := json.Marshal(payload)
req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b))
rec := httptest.NewRecorder()
h.updateSettings(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
}
if got := h.Store.Snapshot().Responses.StoreTTLSeconds; got != 600 {
t.Fatalf("store_ttl_seconds=%d want=600", got)
}
}
func TestUpdateSettingsValidationWithMergedRuntimeSnapshot(t *testing.T) {
h := newAdminTestHandler(t, `{
"keys":["k1"],
@@ -132,6 +154,31 @@ func TestUpdateSettingsWithoutRuntimeSkipsMergedRuntimeValidation(t *testing.T)
}
}
func TestUpdateSettingsAutoDeleteMode(t *testing.T) {
h := newAdminTestHandler(t, `{"keys":["k1"],"auto_delete":{"sessions":true}}`)
payload := map[string]any{
"auto_delete": map[string]any{
"mode": "single",
},
}
b, _ := json.Marshal(payload)
req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b))
rec := httptest.NewRecorder()
h.updateSettings(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
}
snap := h.Store.Snapshot()
if got := snap.AutoDelete.Mode; got != "single" {
t.Fatalf("auto_delete.mode=%q want=single", got)
}
if got := h.Store.AutoDeleteMode(); got != "single" {
t.Fatalf("AutoDeleteMode()=%q want=single", got)
}
}
func TestUpdateSettingsHotReloadRuntime(t *testing.T) {
h := newAdminTestHandler(t, `{
"keys":["k1"],

View File

@@ -17,7 +17,7 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) {
return
}
adminCfg, runtimeCfg, responsesCfg, embeddingsCfg, autoDeleteCfg, claudeMap, aliasMap, err := parseSettingsUpdateRequest(req)
adminCfg, runtimeCfg, compatCfg, responsesCfg, embeddingsCfg, autoDeleteCfg, claudeMap, aliasMap, err := parseSettingsUpdateRequest(req)
if err != nil {
writeJSON(w, http.StatusBadRequest, map[string]any{"detail": err.Error()})
return
@@ -49,6 +49,14 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) {
c.Runtime.TokenRefreshIntervalHours = runtimeCfg.TokenRefreshIntervalHours
}
}
if compatCfg != nil {
if compatCfg.WideInputStrictOutput != nil {
c.Compat.WideInputStrictOutput = compatCfg.WideInputStrictOutput
}
if compatCfg.StripReferenceMarkers != nil {
c.Compat.StripReferenceMarkers = compatCfg.StripReferenceMarkers
}
}
if responsesCfg != nil && responsesCfg.StoreTTLSeconds > 0 {
c.Responses.StoreTTLSeconds = responsesCfg.StoreTTLSeconds
}
@@ -56,6 +64,7 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) {
c.Embeddings.Provider = strings.TrimSpace(embeddingsCfg.Provider)
}
if autoDeleteCfg != nil {
c.AutoDelete.Mode = autoDeleteCfg.Mode
c.AutoDelete.Sessions = autoDeleteCfg.Sessions
}
if claudeMap != nil {

View File

@@ -1,7 +1,6 @@
package admin
import (
"fmt"
"strings"
"ds2api/internal/config"
@@ -16,36 +15,9 @@ func normalizeSettingsConfig(c *config.Config) {
}
func validateSettingsConfig(c config.Config) error {
if c.Admin.JWTExpireHours != 0 && (c.Admin.JWTExpireHours < 1 || c.Admin.JWTExpireHours > 720) {
return fmt.Errorf("admin.jwt_expire_hours must be between 1 and 720")
}
if err := validateRuntimeSettings(c.Runtime); err != nil {
return err
}
if c.Responses.StoreTTLSeconds != 0 && (c.Responses.StoreTTLSeconds < 30 || c.Responses.StoreTTLSeconds > 86400) {
return fmt.Errorf("responses.store_ttl_seconds must be between 30 and 86400")
}
if c.Embeddings.Provider != "" && strings.TrimSpace(c.Embeddings.Provider) == "" {
return fmt.Errorf("embeddings.provider cannot be empty")
}
return nil
return config.ValidateConfig(c)
}
func validateRuntimeSettings(runtime config.RuntimeConfig) error {
if runtime.AccountMaxInflight != 0 && (runtime.AccountMaxInflight < 1 || runtime.AccountMaxInflight > 256) {
return fmt.Errorf("runtime.account_max_inflight must be between 1 and 256")
}
if runtime.AccountMaxQueue != 0 && (runtime.AccountMaxQueue < 1 || runtime.AccountMaxQueue > 200000) {
return fmt.Errorf("runtime.account_max_queue must be between 1 and 200000")
}
if runtime.GlobalMaxInflight != 0 && (runtime.GlobalMaxInflight < 1 || runtime.GlobalMaxInflight > 200000) {
return fmt.Errorf("runtime.global_max_inflight must be between 1 and 200000")
}
if runtime.TokenRefreshIntervalHours != 0 && (runtime.TokenRefreshIntervalHours < 1 || runtime.TokenRefreshIntervalHours > 720) {
return fmt.Errorf("runtime.token_refresh_interval_hours must be between 1 and 720")
}
if runtime.AccountMaxInflight > 0 && runtime.GlobalMaxInflight > 0 && runtime.GlobalMaxInflight < runtime.AccountMaxInflight {
return fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight")
}
return nil
return config.ValidateRuntimeConfig(runtime)
}

View File

@@ -17,7 +17,6 @@ import (
func newHTTPAdminHarness(t *testing.T, rawConfig string, ds DeepSeekCaller) http.Handler {
t.Helper()
t.Setenv("DS2API_CONFIG_JSON", rawConfig)
t.Setenv("CONFIG_JSON", "")
store := config.LoadStore()
h := &Handler{
Store: store,

View File

@@ -32,23 +32,36 @@ func TestGoCompatSSEFixtures(t *testing.T) {
mustLoadJSON(t, fixturePath, &fixture)
var expected struct {
Parts []map[string]any `json:"parts"`
Finished bool `json:"finished"`
NewType string `json:"new_type"`
Parts []map[string]any `json:"parts"`
Finished bool `json:"finished"`
NewType string `json:"new_type"`
ContentFilter bool `json:"content_filter"`
OutputTokens int `json:"output_tokens"`
ErrorMessage string `json:"error_message"`
}
mustLoadJSON(t, expectedPath, &expected)
parts, finished, newType := sse.ParseSSEChunkForContent(fixture.Chunk, fixture.ThinkingEnable, fixture.CurrentType)
gotParts := make([]map[string]any, 0, len(parts))
for _, p := range parts {
raw, err := json.Marshal(fixture.Chunk)
if err != nil {
t.Fatalf("marshal fixture %s failed: %v", name, err)
}
res := sse.ParseDeepSeekContentLine(append([]byte("data: "), raw...), fixture.ThinkingEnable, fixture.CurrentType)
gotParts := make([]map[string]any, 0, len(res.Parts))
for _, p := range res.Parts {
gotParts = append(gotParts, map[string]any{
"text": p.Text,
"type": p.Type,
})
}
if !reflect.DeepEqual(gotParts, expected.Parts) || finished != expected.Finished || newType != expected.NewType {
t.Fatalf("fixture %s mismatch:\n got parts=%#v finished=%v newType=%q\nwant parts=%#v finished=%v newType=%q",
name, gotParts, finished, newType, expected.Parts, expected.Finished, expected.NewType)
if !reflect.DeepEqual(gotParts, expected.Parts) ||
res.Stop != expected.Finished ||
res.NextType != expected.NewType ||
res.ContentFilter != expected.ContentFilter ||
res.OutputTokens != expected.OutputTokens ||
res.ErrorMessage != expected.ErrorMessage {
t.Fatalf("fixture %s mismatch:\n got parts=%#v finished=%v newType=%q contentFilter=%v outputTokens=%d errorMessage=%q\nwant parts=%#v finished=%v newType=%q contentFilter=%v outputTokens=%d errorMessage=%q",
name, gotParts, res.Stop, res.NextType, res.ContentFilter, res.OutputTokens, res.ErrorMessage,
expected.Parts, expected.Finished, expected.NewType, expected.ContentFilter, expected.OutputTokens, expected.ErrorMessage)
}
}
}

View File

@@ -35,7 +35,7 @@ func (c Config) MarshalJSON() ([]byte, error) {
if c.Runtime.AccountMaxInflight > 0 || c.Runtime.AccountMaxQueue > 0 || c.Runtime.GlobalMaxInflight > 0 || c.Runtime.TokenRefreshIntervalHours > 0 {
m["runtime"] = c.Runtime
}
if c.Compat.WideInputStrictOutput != nil {
if c.Compat.WideInputStrictOutput != nil || c.Compat.StripReferenceMarkers != nil {
m["compat"] = c.Compat
}
if c.Responses.StoreTTLSeconds > 0 {
@@ -137,6 +137,7 @@ func (c Config) Clone() Config {
Runtime: c.Runtime,
Compat: CompatConfig{
WideInputStrictOutput: cloneBoolPtr(c.Compat.WideInputStrictOutput),
StripReferenceMarkers: cloneBoolPtr(c.Compat.StripReferenceMarkers),
},
Responses: c.Responses,
Embeddings: c.Embeddings,

View File

@@ -52,6 +52,7 @@ func (c *Config) DropInvalidAccounts() {
type CompatConfig struct {
WideInputStrictOutput *bool `json:"wide_input_strict_output,omitempty"`
StripReferenceMarkers *bool `json:"strip_reference_markers,omitempty"`
}
type AdminConfig struct {
@@ -76,5 +77,6 @@ type EmbeddingsConfig struct {
}
type AutoDeleteConfig struct {
Sessions bool `json:"sessions"`
Mode string `json:"mode,omitempty"`
Sessions bool `json:"sessions,omitempty"`
}

View File

@@ -97,6 +97,8 @@ func TestLowerFunction(t *testing.T) {
// ─── Config.MarshalJSON / UnmarshalJSON roundtrip ────────────────────
func TestConfigJSONRoundtrip(t *testing.T) {
trueVal := true
falseVal := false
cfg := Config{
Keys: []string{"key1", "key2"},
Accounts: []Account{{Email: "user@example.com", Password: "pass", Token: "tok"}},
@@ -104,9 +106,16 @@ func TestConfigJSONRoundtrip(t *testing.T) {
"fast": "deepseek-chat",
"slow": "deepseek-reasoner",
},
AutoDelete: AutoDeleteConfig{
Mode: "single",
},
Runtime: RuntimeConfig{
TokenRefreshIntervalHours: 12,
},
Compat: CompatConfig{
WideInputStrictOutput: &trueVal,
StripReferenceMarkers: &falseVal,
},
VercelSyncHash: "hash123",
VercelSyncTime: 1234567890,
AdditionalFields: map[string]any{
@@ -136,6 +145,15 @@ func TestConfigJSONRoundtrip(t *testing.T) {
if decoded.Runtime.TokenRefreshIntervalHours != 12 {
t.Fatalf("unexpected runtime refresh interval: %#v", decoded.Runtime.TokenRefreshIntervalHours)
}
if decoded.AutoDelete.Mode != "single" {
t.Fatalf("unexpected auto delete mode: %#v", decoded.AutoDelete.Mode)
}
if decoded.Compat.WideInputStrictOutput == nil || !*decoded.Compat.WideInputStrictOutput {
t.Fatalf("unexpected compat wide_input_strict_output: %#v", decoded.Compat.WideInputStrictOutput)
}
if decoded.Compat.StripReferenceMarkers == nil || *decoded.Compat.StripReferenceMarkers {
t.Fatalf("unexpected compat strip_reference_markers: %#v", decoded.Compat.StripReferenceMarkers)
}
if decoded.VercelSyncHash != "hash123" {
t.Fatalf("unexpected vercel sync hash: %q", decoded.VercelSyncHash)
}
@@ -144,6 +162,29 @@ func TestConfigJSONRoundtrip(t *testing.T) {
}
}
func TestAutoDeleteModeResolution(t *testing.T) {
tests := []struct {
name string
cfg AutoDeleteConfig
want string
}{
{name: "default", cfg: AutoDeleteConfig{}, want: "none"},
{name: "legacy all", cfg: AutoDeleteConfig{Sessions: true}, want: "all"},
{name: "single", cfg: AutoDeleteConfig{Mode: "single"}, want: "single"},
{name: "all", cfg: AutoDeleteConfig{Mode: "all"}, want: "all"},
{name: "none", cfg: AutoDeleteConfig{Mode: "none"}, want: "none"},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
store := &Store{cfg: Config{AutoDelete: tc.cfg}}
if got := store.AutoDeleteMode(); got != tc.want {
t.Fatalf("AutoDeleteMode()=%q want=%q", got, tc.want)
}
})
}
}
func TestConfigUnmarshalJSONPreservesUnknownFields(t *testing.T) {
raw := `{"keys":["k1"],"accounts":[],"my_custom_field":"hello","number_field":42}`
var cfg Config
@@ -162,12 +203,16 @@ func TestConfigUnmarshalJSONPreservesUnknownFields(t *testing.T) {
// ─── Config.Clone ────────────────────────────────────────────────────
func TestConfigCloneIsDeepCopy(t *testing.T) {
falseVal := false
cfg := Config{
Keys: []string{"key1"},
Accounts: []Account{{Email: "user@test.com", Token: "token"}},
ClaudeMapping: map[string]string{
"fast": "deepseek-chat",
},
Compat: CompatConfig{
StripReferenceMarkers: &falseVal,
},
AdditionalFields: map[string]any{"custom": "value"},
}
@@ -177,6 +222,9 @@ func TestConfigCloneIsDeepCopy(t *testing.T) {
cfg.Keys[0] = "modified"
cfg.Accounts[0].Email = "modified@test.com"
cfg.ClaudeMapping["fast"] = "modified-model"
if cfg.Compat.StripReferenceMarkers != nil {
*cfg.Compat.StripReferenceMarkers = true
}
// Cloned should not be affected
if cloned.Keys[0] != "key1" {
@@ -188,6 +236,9 @@ func TestConfigCloneIsDeepCopy(t *testing.T) {
if cloned.ClaudeMapping["fast"] != "deepseek-chat" {
t.Fatalf("clone claude mapping was affected: %#v", cloned.ClaudeMapping)
}
if cloned.Compat.StripReferenceMarkers == nil || *cloned.Compat.StripReferenceMarkers {
t.Fatalf("clone compat was affected: %#v", cloned.Compat.StripReferenceMarkers)
}
}
func TestConfigCloneNilMaps(t *testing.T) {
@@ -359,6 +410,39 @@ func TestStoreCompatWideInputStrictOutputCanDisable(t *testing.T) {
}
}
func TestStoreCompatStripReferenceMarkersDefaultTrue(t *testing.T) {
t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[]}`)
store := LoadStore()
if !store.CompatStripReferenceMarkers() {
t.Fatal("expected default strip_reference_markers=true when unset")
}
}
func TestStoreCompatStripReferenceMarkersCanDisable(t *testing.T) {
t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[],"compat":{"strip_reference_markers":false}}`)
store := LoadStore()
if store.CompatStripReferenceMarkers() {
t.Fatal("expected strip_reference_markers=false when explicitly configured")
}
snap := store.Snapshot()
data, err := snap.MarshalJSON()
if err != nil {
t.Fatalf("marshal failed: %v", err)
}
var out map[string]any
if err := json.Unmarshal(data, &out); err != nil {
t.Fatalf("decode failed: %v", err)
}
rawCompat, ok := out["compat"].(map[string]any)
if !ok {
t.Fatalf("expected compat in marshaled output, got %#v", out)
}
if rawCompat["strip_reference_markers"] != false {
t.Fatalf("expected explicit false in compat, got %#v", rawCompat)
}
}
func TestStoreIsEnvBacked(t *testing.T) {
t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[]}`)
store := LoadStore()

View File

@@ -67,7 +67,6 @@ func TestLoadStorePreservesFileBackedTokensForRuntime(t *testing.T) {
}
t.Setenv("DS2API_CONFIG_JSON", "")
t.Setenv("CONFIG_JSON", "")
t.Setenv("DS2API_CONFIG_PATH", tmp.Name())
store := LoadStore()
@@ -80,6 +79,31 @@ func TestLoadStorePreservesFileBackedTokensForRuntime(t *testing.T) {
}
}
func TestLoadStoreIgnoresLegacyConfigJSONEnv(t *testing.T) {
tmp, err := os.CreateTemp(t.TempDir(), "config-*.json")
if err != nil {
t.Fatalf("create temp config: %v", err)
}
path := tmp.Name()
_ = tmp.Close()
_ = os.Remove(path)
t.Setenv("DS2API_CONFIG_JSON", "")
t.Setenv("CONFIG_JSON", `{"keys":["legacy-key"],"accounts":[{"email":"legacy@example.com","password":"p"}]}`)
t.Setenv("DS2API_CONFIG_PATH", path)
store := LoadStore()
if store.HasEnvConfigSource() {
t.Fatal("expected legacy CONFIG_JSON to be ignored")
}
if store.IsEnvBacked() {
t.Fatal("expected store to remain file-backed/empty when only CONFIG_JSON is set")
}
if len(store.Keys()) != 0 || len(store.Accounts()) != 0 {
t.Fatalf("expected ignored legacy env to leave store empty, got keys=%d accounts=%d", len(store.Keys()), len(store.Accounts()))
}
}
func TestEnvBackedStoreWritebackBootstrapsMissingConfigFile(t *testing.T) {
tmp, err := os.CreateTemp(t.TempDir(), "config-*.json")
if err != nil {
@@ -90,7 +114,6 @@ func TestEnvBackedStoreWritebackBootstrapsMissingConfigFile(t *testing.T) {
_ = os.Remove(path)
t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[{"email":"seed@example.com","password":"p"}]}`)
t.Setenv("CONFIG_JSON", "")
t.Setenv("DS2API_CONFIG_PATH", path)
t.Setenv("DS2API_ENV_WRITEBACK", "1")
@@ -135,7 +158,6 @@ func TestEnvBackedStoreWritebackDoesNotBootstrapOnInvalidEnvJSON(t *testing.T) {
_ = os.Remove(path)
t.Setenv("DS2API_CONFIG_JSON", "{invalid-json")
t.Setenv("CONFIG_JSON", "")
t.Setenv("DS2API_CONFIG_PATH", path)
t.Setenv("DS2API_ENV_WRITEBACK", "1")
@@ -154,6 +176,56 @@ func TestEnvBackedStoreWritebackDoesNotBootstrapOnInvalidEnvJSON(t *testing.T) {
}
}
func TestEnvBackedStoreWritebackDoesNotBootstrapOnInvalidSemanticConfig(t *testing.T) {
tmp, err := os.CreateTemp(t.TempDir(), "config-*.json")
if err != nil {
t.Fatalf("create temp config: %v", err)
}
path := tmp.Name()
_ = tmp.Close()
_ = os.Remove(path)
t.Setenv("DS2API_CONFIG_JSON", `{
"keys":["k1"],
"accounts":[{"email":"seed@example.com","password":"p"}],
"runtime":{"account_max_inflight":300}
}`)
t.Setenv("DS2API_CONFIG_PATH", path)
t.Setenv("DS2API_ENV_WRITEBACK", "1")
cfg, fromEnv, loadErr := loadConfig()
if loadErr == nil {
t.Fatalf("expected loadConfig error for invalid runtime config")
}
if !fromEnv {
t.Fatalf("expected fromEnv=true when env config is the source")
}
if !strings.Contains(loadErr.Error(), "runtime.account_max_inflight") {
t.Fatalf("expected runtime validation error, got %v", loadErr)
}
if len(cfg.Keys) != 1 || len(cfg.Accounts) != 1 {
t.Fatalf("expected env config to be parsed before validation, got keys=%d accounts=%d", len(cfg.Keys), len(cfg.Accounts))
}
if _, statErr := os.Stat(path); !errors.Is(statErr, os.ErrNotExist) {
t.Fatalf("expected invalid config not to be bootstrapped, stat err=%v", statErr)
}
}
func TestLoadStoreWithErrorRejectsInvalidRuntimeConfig(t *testing.T) {
t.Setenv("DS2API_CONFIG_JSON", `{
"keys":["k1"],
"accounts":[{"email":"u@example.com","password":"p"}],
"runtime":{"account_max_inflight":300}
}`)
t.Setenv("DS2API_ENV_WRITEBACK", "0")
if _, err := LoadStoreWithError(); err == nil {
t.Fatal("expected LoadStoreWithError to reject invalid runtime config")
} else if !strings.Contains(err.Error(), "runtime.account_max_inflight") {
t.Fatalf("expected runtime validation error, got %v", err)
}
}
func TestEnvBackedStoreWritebackFallsBackToPersistedFileOnInvalidEnvJSON(t *testing.T) {
tmp, err := os.CreateTemp(t.TempDir(), "config-*.json")
if err != nil {
@@ -166,7 +238,6 @@ func TestEnvBackedStoreWritebackFallsBackToPersistedFileOnInvalidEnvJSON(t *test
_ = tmp.Close()
t.Setenv("DS2API_CONFIG_JSON", "{invalid-json")
t.Setenv("CONFIG_JSON", "")
t.Setenv("DS2API_CONFIG_PATH", path)
t.Setenv("DS2API_ENV_WRITEBACK", "1")
@@ -265,7 +336,6 @@ func TestParseConfigStringSupportsRawURLBase64(t *testing.T) {
func TestLoadConfigOnVercelWithoutConfigFileFallsBackToMemory(t *testing.T) {
t.Setenv("VERCEL", "1")
t.Setenv("DS2API_CONFIG_JSON", "")
t.Setenv("CONFIG_JSON", "")
t.Setenv("DS2API_CONFIG_PATH", "testdata/does-not-exist.json")
cfg, fromEnv, err := loadConfig()
@@ -293,7 +363,6 @@ func TestAccountTestStatusIsRuntimeOnlyAndNotPersisted(t *testing.T) {
}
t.Setenv("DS2API_CONFIG_JSON", "")
t.Setenv("CONFIG_JSON", "")
t.Setenv("DS2API_CONFIG_PATH", tmp.Name())
store := LoadStore()

137
internal/config/dotenv.go Normal file
View File

@@ -0,0 +1,137 @@
package config
import (
"errors"
"fmt"
"os"
"path/filepath"
"strings"
)
// LoadDotEnv loads environment variables from .env in the current working
// directory without overriding variables that are already set.
func LoadDotEnv() error {
return loadDotEnvFromPath(filepath.Join(BaseDir(), ".env"))
}
func loadDotEnvFromPath(path string) error {
content, err := os.ReadFile(path)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return nil
}
return err
}
lines := strings.Split(strings.ReplaceAll(string(content), "\r\n", "\n"), "\n")
for i, rawLine := range lines {
line := strings.TrimSpace(rawLine)
if i == 0 {
line = strings.TrimPrefix(line, "\ufeff")
}
if line == "" || strings.HasPrefix(line, "#") {
continue
}
if strings.HasPrefix(line, "export ") {
line = strings.TrimSpace(strings.TrimPrefix(line, "export "))
}
key, value, ok := strings.Cut(line, "=")
if !ok {
return fmt.Errorf("%s:%d invalid env assignment", path, i+1)
}
key = strings.TrimSpace(key)
if key == "" {
return fmt.Errorf("%s:%d empty env key", path, i+1)
}
if _, exists := os.LookupEnv(key); exists {
continue
}
if err := os.Setenv(key, normalizeDotEnvValue(trimDotEnvValue(strings.TrimSpace(value)))); err != nil {
return fmt.Errorf("%s:%d set env %q: %w", path, i+1, key, err)
}
}
return nil
}
// Preserve quoted values, but drop Compose-style inline comments from unquoted values.
func trimDotEnvValue(raw string) string {
if raw == "" {
return raw
}
switch raw[0] {
case '"':
if trimmed, ok := trimQuotedDotEnvValue(raw, '"'); ok {
return trimmed
}
case '\'':
if trimmed, ok := trimQuotedDotEnvValue(raw, '\''); ok {
return trimmed
}
default:
if idx := inlineDotEnvCommentStart(raw); idx >= 0 {
return strings.TrimSpace(raw[:idx])
}
}
return raw
}
func trimQuotedDotEnvValue(raw string, quote byte) (string, bool) {
escaped := false
for i := 1; i < len(raw); i++ {
ch := raw[i]
if quote == '"' && escaped {
escaped = false
continue
}
if quote == '"' && ch == '\\' {
escaped = true
continue
}
if ch == quote {
return strings.TrimSpace(raw[:i+1]), true
}
}
return raw, false
}
func inlineDotEnvCommentStart(raw string) int {
for i := 1; i < len(raw); i++ {
if raw[i] == '#' && isDotEnvCommentSpacer(raw[i-1]) {
return i
}
}
return -1
}
func isDotEnvCommentSpacer(b byte) bool {
return b == ' ' || b == '\t'
}
func normalizeDotEnvValue(raw string) string {
if len(raw) < 2 {
return raw
}
first := raw[0]
last := raw[len(raw)-1]
if (first != '"' || last != '"') && (first != '\'' || last != '\'') {
return raw
}
raw = raw[1 : len(raw)-1]
if first == '\'' {
return raw
}
replacer := strings.NewReplacer(
`\\`, `\`,
`\n`, "\n",
`\r`, "\r",
`\t`, "\t",
`\"`, `"`,
)
return replacer.Replace(raw)
}

View File

@@ -0,0 +1,135 @@
package config
import (
"os"
"path/filepath"
"strings"
"testing"
)
func TestLoadDotEnvLoadsWorkingDirectoryFileWithoutOverridingExistingEnv(t *testing.T) {
dir := t.TempDir()
oldWD, err := os.Getwd()
if err != nil {
t.Fatalf("getwd: %v", err)
}
if err := os.Chdir(dir); err != nil {
t.Fatalf("chdir temp dir: %v", err)
}
t.Cleanup(func() {
_ = os.Chdir(oldWD)
})
const newKey = "DS2API_TEST_DOTENV_NEW"
const keepKey = "DS2API_TEST_DOTENV_KEEP"
const quotedKey = "DS2API_TEST_DOTENV_QUOTED"
unsetEnv(t, newKey)
unsetEnv(t, quotedKey)
t.Setenv(keepKey, "from-env")
content := "DS2API_TEST_DOTENV_NEW=from-file\n" +
"DS2API_TEST_DOTENV_KEEP=from-file\n" +
"DS2API_TEST_DOTENV_QUOTED=\"line1\\nline2\"\n"
if err := os.WriteFile(filepath.Join(dir, ".env"), []byte(content), 0o644); err != nil {
t.Fatalf("write .env: %v", err)
}
if err := LoadDotEnv(); err != nil {
t.Fatalf("LoadDotEnv() error: %v", err)
}
if got := os.Getenv(newKey); got != "from-file" {
t.Fatalf("expected %s from .env, got %q", newKey, got)
}
if got := os.Getenv(keepKey); got != "from-env" {
t.Fatalf("expected existing env to win, got %q", got)
}
if got := os.Getenv(quotedKey); got != "line1\nline2" {
t.Fatalf("expected quoted newline decoding, got %q", got)
}
}
func TestLoadDotEnvIgnoresMissingFile(t *testing.T) {
dir := t.TempDir()
oldWD, err := os.Getwd()
if err != nil {
t.Fatalf("getwd: %v", err)
}
if err := os.Chdir(dir); err != nil {
t.Fatalf("chdir temp dir: %v", err)
}
t.Cleanup(func() {
_ = os.Chdir(oldWD)
})
if err := LoadDotEnv(); err != nil {
t.Fatalf("expected missing .env to be ignored, got %v", err)
}
}
func TestLoadDotEnvStripsInlineCommentsFromUnquotedValues(t *testing.T) {
dir := t.TempDir()
oldWD, err := os.Getwd()
if err != nil {
t.Fatalf("getwd: %v", err)
}
if err := os.Chdir(dir); err != nil {
t.Fatalf("chdir temp dir: %v", err)
}
t.Cleanup(func() {
_ = os.Chdir(oldWD)
})
const plainKey = "DS2API_TEST_DOTENV_PLAIN"
const hashKey = "DS2API_TEST_DOTENV_HASH"
const quotedKey = "DS2API_TEST_DOTENV_QUOTED_COMMENT"
const exportKey = "DS2API_TEST_DOTENV_EXPORT"
unsetEnv(t, plainKey)
unsetEnv(t, hashKey)
unsetEnv(t, quotedKey)
unsetEnv(t, exportKey)
content := strings.Join([]string{
plainKey + "=5001 # local",
hashKey + "=5001#local",
quotedKey + `="5001 # local" # keep the inner hash`,
"export " + exportKey + "=enabled # exported",
}, "\n") + "\n"
if err := os.WriteFile(filepath.Join(dir, ".env"), []byte(content), 0o644); err != nil {
t.Fatalf("write .env: %v", err)
}
if err := LoadDotEnv(); err != nil {
t.Fatalf("LoadDotEnv() error: %v", err)
}
if got := os.Getenv(plainKey); got != "5001" {
t.Fatalf("expected inline comment to be stripped, got %q", got)
}
if got := os.Getenv(hashKey); got != "5001#local" {
t.Fatalf("expected hash without preceding whitespace to remain, got %q", got)
}
if got := os.Getenv(quotedKey); got != "5001 # local" {
t.Fatalf("expected quoted value to preserve hash text, got %q", got)
}
if got := os.Getenv(exportKey); got != "enabled" {
t.Fatalf("expected export syntax to load, got %q", got)
}
}
func unsetEnv(t *testing.T, key string) {
t.Helper()
old, had := os.LookupEnv(key)
if err := os.Unsetenv(key); err != nil {
t.Fatalf("unset %s: %v", key, err)
}
t.Cleanup(func() {
if had {
_ = os.Setenv(key, old)
return
}
_ = os.Unsetenv(key)
})
}

View File

@@ -23,3 +23,7 @@ func newLogger() *slog.Logger {
h := slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: level})
return slog.New(h)
}
func RefreshLogger() {
Logger = newLogger()
}

View File

@@ -37,6 +37,10 @@ func WASMPath() string {
return ResolvePath("DS2API_WASM_PATH", "sha3_wasm_bg.7b9ca65ddd.wasm")
}
func RawStreamSampleRoot() string {
return ResolvePath("DS2API_RAW_STREAM_SAMPLE_ROOT", "tests/raw_stream_samples")
}
func StaticAdminDir() string {
return ResolvePath("DS2API_STATIC_ADMIN_DIR", "static/admin")
}

View File

@@ -21,23 +21,36 @@ type Store struct {
}
func LoadStore() *Store {
cfg, fromEnv, err := loadConfig()
store, err := loadStore()
if err != nil {
Logger.Warn("[config] load failed", "error", err)
}
if len(cfg.Keys) == 0 && len(cfg.Accounts) == 0 {
if len(store.cfg.Keys) == 0 && len(store.cfg.Accounts) == 0 {
Logger.Warn("[config] empty config loaded")
}
s := &Store{cfg: cfg, path: ConfigPath(), fromEnv: fromEnv}
s.rebuildIndexes()
return s
store.rebuildIndexes()
return store
}
func LoadStoreWithError() (*Store, error) {
store, err := loadStore()
if err != nil {
return nil, err
}
store.rebuildIndexes()
return store, nil
}
func loadStore() (*Store, error) {
cfg, fromEnv, err := loadConfig()
if validateErr := ValidateConfig(cfg); validateErr != nil {
err = errors.Join(err, validateErr)
}
return &Store{cfg: cfg, path: ConfigPath(), fromEnv: fromEnv}, err
}
func loadConfig() (Config, bool, error) {
rawCfg := strings.TrimSpace(os.Getenv("DS2API_CONFIG_JSON"))
if rawCfg == "" {
rawCfg = strings.TrimSpace(os.Getenv("CONFIG_JSON"))
}
if rawCfg != "" {
cfg, err := parseConfigString(rawCfg)
if err != nil {
@@ -62,6 +75,9 @@ func loadConfig() (Config, bool, error) {
}
}
if errors.Is(fileErr, os.ErrNotExist) {
if validateErr := ValidateConfig(cfg); validateErr != nil {
return cfg, true, validateErr
}
if writeErr := writeConfigFile(ConfigPath(), cfg.Clone()); writeErr == nil {
return cfg, false, err
} else {

View File

@@ -42,6 +42,15 @@ func (s *Store) CompatWideInputStrictOutput() bool {
return *s.cfg.Compat.WideInputStrictOutput
}
func (s *Store) CompatStripReferenceMarkers() bool {
s.mu.RLock()
defer s.mu.RUnlock()
if s.cfg.Compat.StripReferenceMarkers == nil {
return true
}
return *s.cfg.Compat.StripReferenceMarkers
}
func (s *Store) ToolcallMode() string {
return "feature_match"
}
@@ -65,6 +74,20 @@ func (s *Store) EmbeddingsProvider() string {
return strings.TrimSpace(s.cfg.Embeddings.Provider)
}
func (s *Store) AutoDeleteMode() string {
s.mu.RLock()
defer s.mu.RUnlock()
mode := strings.ToLower(strings.TrimSpace(s.cfg.AutoDelete.Mode))
switch mode {
case "none", "single", "all":
return mode
}
if s.cfg.AutoDelete.Sessions {
return "all"
}
return "none"
}
func (s *Store) AdminPasswordHash() string {
s.mu.RLock()
defer s.mu.RUnlock()
@@ -97,13 +120,8 @@ func (s *Store) RuntimeAccountMaxInflight() int {
if s.cfg.Runtime.AccountMaxInflight > 0 {
return s.cfg.Runtime.AccountMaxInflight
}
for _, key := range []string{"DS2API_ACCOUNT_MAX_INFLIGHT", "DS2API_ACCOUNT_CONCURRENCY"} {
raw := strings.TrimSpace(os.Getenv(key))
if raw == "" {
continue
}
n, err := strconv.Atoi(raw)
if err == nil && n > 0 {
if raw := strings.TrimSpace(os.Getenv("DS2API_ACCOUNT_MAX_INFLIGHT")); raw != "" {
if n, err := strconv.Atoi(raw); err == nil && n > 0 {
return n
}
}
@@ -116,13 +134,8 @@ func (s *Store) RuntimeAccountMaxQueue(defaultSize int) int {
if s.cfg.Runtime.AccountMaxQueue > 0 {
return s.cfg.Runtime.AccountMaxQueue
}
for _, key := range []string{"DS2API_ACCOUNT_MAX_QUEUE", "DS2API_ACCOUNT_QUEUE_SIZE"} {
raw := strings.TrimSpace(os.Getenv(key))
if raw == "" {
continue
}
n, err := strconv.Atoi(raw)
if err == nil && n >= 0 {
if raw := strings.TrimSpace(os.Getenv("DS2API_ACCOUNT_MAX_QUEUE")); raw != "" {
if n, err := strconv.Atoi(raw); err == nil && n >= 0 {
return n
}
}
@@ -138,13 +151,8 @@ func (s *Store) RuntimeGlobalMaxInflight(defaultSize int) int {
if s.cfg.Runtime.GlobalMaxInflight > 0 {
return s.cfg.Runtime.GlobalMaxInflight
}
for _, key := range []string{"DS2API_GLOBAL_MAX_INFLIGHT", "DS2API_MAX_INFLIGHT"} {
raw := strings.TrimSpace(os.Getenv(key))
if raw == "" {
continue
}
n, err := strconv.Atoi(raw)
if err == nil && n > 0 {
if raw := strings.TrimSpace(os.Getenv("DS2API_GLOBAL_MAX_INFLIGHT")); raw != "" {
if n, err := strconv.Atoi(raw); err == nil && n > 0 {
return n
}
}
@@ -164,7 +172,5 @@ func (s *Store) RuntimeTokenRefreshIntervalHours() int {
}
func (s *Store) AutoDeleteSessions() bool {
s.mu.RLock()
defer s.mu.RUnlock()
return s.cfg.AutoDelete.Sessions
return s.AutoDeleteMode() != "none"
}

View File

@@ -19,9 +19,6 @@ func (s *Store) IsEnvWritebackEnabled() bool {
func (s *Store) HasEnvConfigSource() bool {
rawCfg := strings.TrimSpace(os.Getenv("DS2API_CONFIG_JSON"))
if rawCfg == "" {
rawCfg = strings.TrimSpace(os.Getenv("CONFIG_JSON"))
}
return rawCfg != ""
}

View File

@@ -0,0 +1,91 @@
package config
import (
"fmt"
"strings"
)
func ValidateConfig(c Config) error {
if err := ValidateAdminConfig(c.Admin); err != nil {
return err
}
if err := ValidateRuntimeConfig(c.Runtime); err != nil {
return err
}
if err := ValidateResponsesConfig(c.Responses); err != nil {
return err
}
if err := ValidateEmbeddingsConfig(c.Embeddings); err != nil {
return err
}
if err := ValidateAutoDeleteConfig(c.AutoDelete); err != nil {
return err
}
return nil
}
func ValidateAdminConfig(admin AdminConfig) error {
return ValidateIntRange("admin.jwt_expire_hours", admin.JWTExpireHours, 1, 720, false)
}
func ValidateRuntimeConfig(runtime RuntimeConfig) error {
if err := ValidateIntRange("runtime.account_max_inflight", runtime.AccountMaxInflight, 1, 256, false); err != nil {
return err
}
if err := ValidateIntRange("runtime.account_max_queue", runtime.AccountMaxQueue, 1, 200000, false); err != nil {
return err
}
if err := ValidateIntRange("runtime.global_max_inflight", runtime.GlobalMaxInflight, 1, 200000, false); err != nil {
return err
}
if err := ValidateIntRange("runtime.token_refresh_interval_hours", runtime.TokenRefreshIntervalHours, 1, 720, false); err != nil {
return err
}
if runtime.AccountMaxInflight > 0 && runtime.GlobalMaxInflight > 0 && runtime.GlobalMaxInflight < runtime.AccountMaxInflight {
return fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight")
}
return nil
}
func ValidateResponsesConfig(responses ResponsesConfig) error {
return ValidateIntRange("responses.store_ttl_seconds", responses.StoreTTLSeconds, 30, 86400, false)
}
func ValidateEmbeddingsConfig(embeddings EmbeddingsConfig) error {
return ValidateTrimmedString("embeddings.provider", embeddings.Provider, false)
}
func ValidateAutoDeleteConfig(autoDelete AutoDeleteConfig) error {
return ValidateAutoDeleteMode(autoDelete.Mode)
}
func ValidateIntRange(name string, value, min, max int, required bool) error {
if value == 0 && !required {
return nil
}
if value < min || value > max {
return fmt.Errorf("%s must be between %d and %d", name, min, max)
}
return nil
}
func ValidateTrimmedString(name, value string, required bool) error {
trimmed := strings.TrimSpace(value)
if trimmed == "" {
if !required && value == "" {
return nil
}
return fmt.Errorf("%s cannot be empty", name)
}
return nil
}
func ValidateAutoDeleteMode(mode string) error {
mode = strings.ToLower(strings.TrimSpace(mode))
switch mode {
case "", "none", "single", "all":
return nil
default:
return fmt.Errorf("auto_delete.mode must be one of none, single, all")
}
}

View File

@@ -0,0 +1,61 @@
package config
import (
"strings"
"testing"
)
func TestValidateConfigRejectsInvalidValues(t *testing.T) {
tests := []struct {
name string
cfg Config
want string
}{
{
name: "admin",
cfg: Config{Admin: AdminConfig{JWTExpireHours: 721}},
want: "admin.jwt_expire_hours",
},
{
name: "runtime relation",
cfg: Config{Runtime: RuntimeConfig{
AccountMaxInflight: 8,
GlobalMaxInflight: 4,
}},
want: "runtime.global_max_inflight must be >= runtime.account_max_inflight",
},
{
name: "responses",
cfg: Config{Responses: ResponsesConfig{StoreTTLSeconds: 10}},
want: "responses.store_ttl_seconds",
},
{
name: "embeddings",
cfg: Config{Embeddings: EmbeddingsConfig{Provider: " "}},
want: "embeddings.provider",
},
{
name: "auto delete",
cfg: Config{AutoDelete: AutoDeleteConfig{Mode: "maybe"}},
want: "auto_delete.mode",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
err := ValidateConfig(tc.cfg)
if err == nil {
t.Fatal("expected validation error")
}
if !strings.Contains(err.Error(), tc.want) {
t.Fatalf("expected %q in error, got %v", tc.want, err)
}
})
}
}
func TestValidateConfigAcceptsLegacyAutoDeleteSessions(t *testing.T) {
if err := ValidateConfig(Config{AutoDelete: AutoDeleteConfig{Sessions: true}}); err != nil {
t.Fatalf("expected legacy auto_delete.sessions config to remain valid, got %v", err)
}
}

View File

@@ -31,6 +31,7 @@ func (c *Client) CallCompletion(ctx context.Context, a *auth.RequestAuth, payloa
if captureSession != nil {
resp.Body = captureSession.WrapBody(resp.Body, resp.StatusCode)
}
resp = c.wrapCompletionWithAutoContinue(ctx, a, payload, powResp, resp)
return resp, nil
}
if captureSession != nil {
@@ -60,7 +61,7 @@ func (c *Client) streamPost(ctx context.Context, url string, headers map[string]
config.Logger.Warn("[deepseek] fingerprint stream request failed, fallback to std transport", "url", url, "error", err)
req2, reqErr := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(b))
if reqErr != nil {
return nil, err
return nil, reqErr
}
for k, v := range headers {
req2.Header.Set(k, v)

View File

@@ -0,0 +1,241 @@
package deepseek
import (
"bufio"
"bytes"
"context"
"encoding/json"
"errors"
"io"
"net/http"
"strings"
"ds2api/internal/auth"
"ds2api/internal/config"
)
const defaultAutoContinueLimit = 8
type continueOpenFunc func(context.Context, string, int) (*http.Response, error)
type continueState struct {
sessionID string
responseMessageID int
lastStatus string
finished bool
}
// wrapCompletionWithAutoContinue wraps the completion response body so that
// if the upstream indicates the response is incomplete (WIP / INCOMPLETE /
// AUTO_CONTINUE), ds2api will automatically call the DeepSeek continue
// endpoint and splice the continuation SSE stream onto the original.
// The caller sees a single, seamless SSE stream.
func (c *Client) wrapCompletionWithAutoContinue(ctx context.Context, a *auth.RequestAuth, payload map[string]any, powResp string, resp *http.Response) *http.Response {
if resp == nil || resp.Body == nil {
return resp
}
sessionID, _ := payload["chat_session_id"].(string)
sessionID = strings.TrimSpace(sessionID)
if sessionID == "" {
return resp
}
config.Logger.Debug("[auto_continue] wrapping completion response", "session_id", sessionID)
resp.Body = newAutoContinueBody(ctx, resp.Body, sessionID, defaultAutoContinueLimit, func(ctx context.Context, sessionID string, responseMessageID int) (*http.Response, error) {
return c.callContinue(ctx, a, sessionID, responseMessageID, powResp)
})
return resp
}
// callContinue sends a continue request to DeepSeek to resume generation.
func (c *Client) callContinue(ctx context.Context, a *auth.RequestAuth, sessionID string, responseMessageID int, powResp string) (*http.Response, error) {
if strings.TrimSpace(sessionID) == "" || responseMessageID <= 0 {
return nil, errors.New("missing continue identifiers")
}
headers := c.authHeaders(a.DeepSeekToken)
headers["x-ds-pow-response"] = powResp
payload := map[string]any{
"chat_session_id": sessionID,
"message_id": responseMessageID,
"fallback_to_resume": true,
}
config.Logger.Info("[auto_continue] calling continue", "session_id", sessionID, "message_id", responseMessageID)
captureSession := c.capture.Start("deepseek_continue", DeepSeekContinueURL, a.AccountID, payload)
resp, err := c.streamPost(ctx, DeepSeekContinueURL, headers, payload)
if err != nil {
return nil, err
}
if captureSession != nil {
resp.Body = captureSession.WrapBody(resp.Body, resp.StatusCode)
}
if resp.StatusCode != http.StatusOK {
_ = resp.Body.Close()
return nil, errors.New("continue failed")
}
return resp, nil
}
// newAutoContinueBody returns a new ReadCloser that transparently pumps
// continuation rounds via an io.Pipe.
func newAutoContinueBody(ctx context.Context, initial io.ReadCloser, sessionID string, maxRounds int, openContinue continueOpenFunc) io.ReadCloser {
if initial == nil || strings.TrimSpace(sessionID) == "" || openContinue == nil {
return initial
}
if maxRounds <= 0 {
maxRounds = defaultAutoContinueLimit
}
pr, pw := io.Pipe()
go pumpAutoContinue(ctx, pw, initial, continueState{sessionID: sessionID}, maxRounds, openContinue)
return pr
}
// pumpAutoContinue is the goroutine that drives the auto-continue loop.
// It reads the initial SSE body, checks whether a continue is required,
// and if so opens a new continue stream and splices it onto the pipe writer.
func pumpAutoContinue(ctx context.Context, pw *io.PipeWriter, initial io.ReadCloser, state continueState, maxRounds int, openContinue continueOpenFunc) {
defer func() { _ = pw.Close() }()
current := initial
rounds := 0
for {
hadDone, err := streamBodyWithContinueState(ctx, pw, current, &state)
_ = current.Close()
if err != nil {
_ = pw.CloseWithError(err)
return
}
if state.shouldContinue() && rounds < maxRounds {
rounds++
config.Logger.Info("[auto_continue] continuing", "round", rounds, "session_id", state.sessionID, "message_id", state.responseMessageID, "status", state.lastStatus)
nextResp, err := openContinue(ctx, state.sessionID, state.responseMessageID)
if err != nil {
config.Logger.Warn("[auto_continue] continue request failed", "round", rounds, "error", err)
_ = pw.CloseWithError(err)
return
}
current = nextResp.Body
state.prepareForNextRound()
continue
}
// Emit the final [DONE] sentinel if the upstream had one.
if hadDone {
if _, err := io.Copy(pw, bytes.NewBufferString("data: [DONE]\n")); err != nil {
_ = pw.CloseWithError(err)
}
}
return
}
}
// streamBodyWithContinueState scans an SSE body line-by-line, writing each
// line through to pw while observing state signals. Intermediate [DONE]
// sentinels are consumed (not forwarded) so that the downstream only sees
// one final [DONE] at the very end.
func streamBodyWithContinueState(ctx context.Context, pw *io.PipeWriter, body io.Reader, state *continueState) (bool, error) {
scanner := bufio.NewScanner(body)
scanner.Buffer(make([]byte, 0, 64*1024), 2*1024*1024)
hadDone := false
for scanner.Scan() {
select {
case <-ctx.Done():
return hadDone, ctx.Err()
default:
}
line := append([]byte{}, scanner.Bytes()...)
trimmed := strings.TrimSpace(string(line))
if trimmed == "" {
continue
}
if strings.HasPrefix(trimmed, "data:") {
data := strings.TrimSpace(strings.TrimPrefix(trimmed, "data:"))
if data == "[DONE]" {
hadDone = true
continue
}
state.observe(data)
}
if _, err := io.Copy(pw, bytes.NewReader(append(line, '\n'))); err != nil {
return hadDone, err
}
}
return hadDone, scanner.Err()
}
// observe extracts continue-relevant signals from an SSE JSON chunk.
func (s *continueState) observe(data string) {
if s == nil || strings.TrimSpace(data) == "" {
return
}
var chunk map[string]any
if err := json.Unmarshal([]byte(data), &chunk); err != nil {
return
}
// Top-level response_message_id
if id := intFrom(chunk["response_message_id"]); id > 0 {
s.responseMessageID = id
}
// Path-based status: {"p": "response/status", "v": "FINISHED"}
if p, _ := chunk["p"].(string); p == "response/status" {
if status, _ := chunk["v"].(string); status != "" {
s.lastStatus = strings.TrimSpace(status)
if strings.EqualFold(s.lastStatus, "FINISHED") {
s.finished = true
}
}
}
// Nested v.response
v, _ := chunk["v"].(map[string]any)
if response, _ := v["response"].(map[string]any); response != nil {
if id := intFrom(response["message_id"]); id > 0 {
s.responseMessageID = id
}
if status, _ := response["status"].(string); status != "" {
s.lastStatus = strings.TrimSpace(status)
if strings.EqualFold(s.lastStatus, "FINISHED") {
s.finished = true
}
}
if autoContinue, ok := response["auto_continue"].(bool); ok && autoContinue {
s.lastStatus = "AUTO_CONTINUE"
}
}
// Nested message.response
if message, _ := chunk["message"].(map[string]any); message != nil {
if response, _ := message["response"].(map[string]any); response != nil {
if id := intFrom(response["message_id"]); id > 0 {
s.responseMessageID = id
}
if status, _ := response["status"].(string); status != "" {
s.lastStatus = strings.TrimSpace(status)
if strings.EqualFold(s.lastStatus, "FINISHED") {
s.finished = true
}
}
}
}
}
// shouldContinue returns true when the upstream indicates the response is
// not yet finished and we have enough information to issue a continue request.
func (s *continueState) shouldContinue() bool {
if s == nil {
return false
}
if s.finished || s.responseMessageID <= 0 || strings.TrimSpace(s.sessionID) == "" {
return false
}
switch strings.ToUpper(strings.TrimSpace(s.lastStatus)) {
case "WIP", "INCOMPLETE", "AUTO_CONTINUE":
return true
default:
return false
}
}
// prepareForNextRound resets ephemeral state before processing the next
// continuation stream.
func (s *continueState) prepareForNextRound() {
if s == nil {
return
}
s.finished = false
s.lastStatus = ""
}

View File

@@ -0,0 +1,137 @@
package deepseek
import (
"bytes"
"context"
"errors"
"io"
"net/http"
"strings"
"testing"
"ds2api/internal/auth"
)
type failingDoer struct {
err error
}
func (d failingDoer) Do(_ *http.Request) (*http.Response, error) {
return nil, d.err
}
type roundTripperFunc func(*http.Request) (*http.Response, error)
func (f roundTripperFunc) RoundTrip(req *http.Request) (*http.Response, error) {
return f(req)
}
func TestCallContinuePropagatesPowHeaderToFallbackRequest(t *testing.T) {
var seenPow string
var seenURL string
client := &Client{
stream: failingDoer{err: errors.New("stream transport failed")},
fallbackS: &http.Client{
Transport: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
seenPow = req.Header.Get("x-ds-pow-response")
seenURL = req.URL.String()
body := io.NopCloser(strings.NewReader("data: {\"p\":\"response/content\",\"v\":\"continued\"}\n" + "data: [DONE]\n"))
return &http.Response{
StatusCode: http.StatusOK,
Header: make(http.Header),
Body: body,
Request: req,
}, nil
}),
},
}
resp, err := client.callContinue(context.Background(), &auth.RequestAuth{
DeepSeekToken: "token",
AccountID: "acct",
}, "session-123", 99, "pow-response-abc")
if err != nil {
t.Fatalf("callContinue returned error: %v", err)
}
defer resp.Body.Close()
if seenPow != "pow-response-abc" {
t.Fatalf("continue request pow header=%q want=%q", seenPow, "pow-response-abc")
}
if seenURL != DeepSeekContinueURL {
t.Fatalf("continue request url=%q want=%q", seenURL, DeepSeekContinueURL)
}
}
func TestCallCompletionAutoContinueThreadsPowHeader(t *testing.T) {
var seenPow string
var seenContinueURL string
initialBody := strings.Join([]string{
`data: {"response_message_id":321,"v":{"response":{"message_id":321,"status":"WIP","auto_continue":true}}}`,
`data: [DONE]`,
}, "\n") + "\n"
client := &Client{
stream: failingOrCompletionDoer{
completionResp: &http.Response{
StatusCode: http.StatusOK,
Header: make(http.Header),
Body: io.NopCloser(strings.NewReader(initialBody)),
},
},
fallbackS: &http.Client{
Transport: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
seenPow = req.Header.Get("x-ds-pow-response")
seenContinueURL = req.URL.String()
body := io.NopCloser(strings.NewReader("data: {\"response_message_id\":322,\"v\":{\"response\":{\"message_id\":322,\"status\":\"FINISHED\"}}}\n" + "data: [DONE]\n"))
return &http.Response{
StatusCode: http.StatusOK,
Header: make(http.Header),
Body: body,
Request: req,
}, nil
}),
},
}
resp, err := client.CallCompletion(context.Background(), &auth.RequestAuth{
DeepSeekToken: "token",
AccountID: "acct",
}, map[string]any{
"chat_session_id": "session-123",
}, "pow-response-xyz", 1)
if err != nil {
t.Fatalf("CallCompletion returned error: %v", err)
}
defer resp.Body.Close()
out, err := io.ReadAll(resp.Body)
if err != nil {
t.Fatalf("read auto-continued body failed: %v", err)
}
if seenPow != "pow-response-xyz" {
t.Fatalf("threaded continue pow header=%q want=%q", seenPow, "pow-response-xyz")
}
if seenContinueURL != DeepSeekContinueURL {
t.Fatalf("continue url=%q want=%q", seenContinueURL, DeepSeekContinueURL)
}
if !bytes.Contains(out, []byte(`"status":"WIP"`)) {
t.Fatalf("expected initial stream content in body, got=%s", string(out))
}
if !bytes.Contains(out, []byte(`data: [DONE]`)) {
t.Fatalf("expected final DONE sentinel in body, got=%s", string(out))
}
}
type failingOrCompletionDoer struct {
completionResp *http.Response
}
func (d failingOrCompletionDoer) Do(req *http.Request) (*http.Response, error) {
if strings.Contains(req.URL.Path, "/chat/completion") {
return d.completionResp, nil
}
return nil, errors.New("forced stream failure")
}

View File

@@ -39,7 +39,7 @@ func (c *Client) postJSONWithStatus(ctx context.Context, doer trans.Doer, url st
config.Logger.Warn("[deepseek] fingerprint request failed, fallback to std transport", "url", url, "error", err)
req2, reqErr := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(b))
if reqErr != nil {
return nil, 0, err
return nil, 0, reqErr
}
for k, v := range headers {
req2.Header.Set(k, v)
@@ -76,7 +76,7 @@ func (c *Client) getJSONWithStatus(ctx context.Context, doer trans.Doer, url str
config.Logger.Warn("[deepseek] fingerprint GET request failed, fallback to std transport", "url", url, "error", err)
req2, reqErr := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if reqErr != nil {
return nil, 0, err
return nil, 0, reqErr
}
for k, v := range headers {
req2.Header.Set(k, v)

View File

@@ -11,6 +11,7 @@ const (
DeepSeekCreateSessionURL = "https://chat.deepseek.com/api/v0/chat_session/create"
DeepSeekCreatePowURL = "https://chat.deepseek.com/api/v0/chat/create_pow_challenge"
DeepSeekCompletionURL = "https://chat.deepseek.com/api/v0/chat/completion"
DeepSeekContinueURL = "https://chat.deepseek.com/api/v0/chat/continue"
DeepSeekFetchSessionURL = "https://chat.deepseek.com/api/v0/chat_session/fetch_page"
DeepSeekDeleteSessionURL = "https://chat.deepseek.com/api/v0/chat_session/delete"
DeepSeekDeleteAllSessionsURL = "https://chat.deepseek.com/api/v0/chat_session/delete_all"

View File

@@ -6,7 +6,11 @@ const {
const {
parseChunkForContent,
extractContentRecursive,
filterLeakedContentFilterParts,
hasContentFilterStatus,
extractAccumulatedTokenUsage,
shouldSkipPath,
stripReferenceMarkers,
} = require('./sse_parse');
const {
resolveToolcallPolicy,
@@ -17,6 +21,7 @@ const {
} = require('./toolcall_policy');
const {
estimateTokens,
buildUsage,
} = require('./token_usage');
const {
setCorsHeaders,
@@ -100,6 +105,7 @@ module.exports.__test = {
parseChunkForContent,
extractContentRecursive,
shouldSkipPath,
stripReferenceMarkers,
asString,
resolveToolcallPolicy,
formatIncrementalToolCallDeltas,
@@ -107,6 +113,10 @@ module.exports.__test = {
boolDefaultTrue,
filterIncrementalToolCallDeltasByAllowed,
estimateTokens,
buildUsage,
filterLeakedContentFilterParts,
hasContentFilterStatus,
extractAccumulatedTokenUsage,
isNodeStreamSupportedPath,
extractPathname,
};

View File

@@ -1,240 +1,3 @@
'use strict';
const {
SKIP_PATTERNS,
SKIP_EXACT_PATHS,
} = require('../shared/deepseek-constants');
function parseChunkForContent(chunk, thinkingEnabled, currentType) {
if (!chunk || typeof chunk !== 'object' || !Object.prototype.hasOwnProperty.call(chunk, 'v')) {
return { parts: [], finished: false, newType: currentType };
}
const pathValue = asString(chunk.p);
if (shouldSkipPath(pathValue)) {
return { parts: [], finished: false, newType: currentType };
}
if (pathValue === 'response/status' && asString(chunk.v) === 'FINISHED') {
return { parts: [], finished: true, newType: currentType };
}
let newType = currentType;
const parts = [];
if (pathValue === 'response/fragments' && asString(chunk.o).toUpperCase() === 'APPEND' && Array.isArray(chunk.v)) {
for (const frag of chunk.v) {
if (!frag || typeof frag !== 'object') {
continue;
}
const fragType = asString(frag.type).toUpperCase();
const content = asString(frag.content);
if (!content) {
continue;
}
if (fragType === 'THINK' || fragType === 'THINKING') {
newType = 'thinking';
parts.push({ text: content, type: 'thinking' });
} else if (fragType === 'RESPONSE') {
newType = 'text';
parts.push({ text: content, type: 'text' });
} else {
parts.push({ text: content, type: 'text' });
}
}
}
if (pathValue === 'response' && Array.isArray(chunk.v)) {
for (const item of chunk.v) {
if (!item || typeof item !== 'object') {
continue;
}
if (item.p === 'fragments' && item.o === 'APPEND' && Array.isArray(item.v)) {
for (const frag of item.v) {
const fragType = asString(frag && frag.type).toUpperCase();
if (fragType === 'THINK' || fragType === 'THINKING') {
newType = 'thinking';
} else if (fragType === 'RESPONSE') {
newType = 'text';
}
}
}
}
}
let partType = 'text';
if (pathValue === 'response/thinking_content') {
partType = 'thinking';
} else if (pathValue === 'response/content') {
partType = 'text';
} else if (pathValue.includes('response/fragments') && pathValue.includes('/content')) {
partType = newType;
} else if (!pathValue && thinkingEnabled) {
partType = newType;
}
const val = chunk.v;
if (typeof val === 'string') {
if (val === 'FINISHED' && (!pathValue || pathValue === 'status')) {
return { parts: [], finished: true, newType };
}
if (val) {
parts.push({ text: val, type: partType });
}
return { parts, finished: false, newType };
}
if (Array.isArray(val)) {
const extracted = extractContentRecursive(val, partType);
if (extracted.finished) {
return { parts: [], finished: true, newType };
}
parts.push(...extracted.parts);
return { parts, finished: false, newType };
}
if (val && typeof val === 'object') {
const resp = val.response && typeof val.response === 'object' ? val.response : val;
if (Array.isArray(resp.fragments)) {
for (const frag of resp.fragments) {
if (!frag || typeof frag !== 'object') {
continue;
}
const content = asString(frag.content);
if (!content) {
continue;
}
const t = asString(frag.type).toUpperCase();
if (t === 'THINK' || t === 'THINKING') {
newType = 'thinking';
parts.push({ text: content, type: 'thinking' });
} else if (t === 'RESPONSE') {
newType = 'text';
parts.push({ text: content, type: 'text' });
} else {
parts.push({ text: content, type: partType });
}
}
}
}
return { parts, finished: false, newType };
}
function extractContentRecursive(items, defaultType) {
const parts = [];
for (const it of items) {
if (!it || typeof it !== 'object') {
continue;
}
if (!Object.prototype.hasOwnProperty.call(it, 'v')) {
continue;
}
const itemPath = asString(it.p);
const itemV = it.v;
if (itemPath === 'status' && asString(itemV) === 'FINISHED') {
return { parts: [], finished: true };
}
if (shouldSkipPath(itemPath)) {
continue;
}
const content = asString(it.content);
if (content) {
const typeName = asString(it.type).toUpperCase();
if (typeName === 'THINK' || typeName === 'THINKING') {
parts.push({ text: content, type: 'thinking' });
} else if (typeName === 'RESPONSE') {
parts.push({ text: content, type: 'text' });
} else {
parts.push({ text: content, type: defaultType });
}
continue;
}
let partType = defaultType;
if (itemPath.includes('thinking')) {
partType = 'thinking';
} else if (itemPath.includes('content') || itemPath === 'response' || itemPath === 'fragments') {
partType = 'text';
}
if (typeof itemV === 'string') {
if (itemV && itemV !== 'FINISHED') {
parts.push({ text: itemV, type: partType });
}
continue;
}
if (!Array.isArray(itemV)) {
continue;
}
for (const inner of itemV) {
if (typeof inner === 'string') {
if (inner) {
parts.push({ text: inner, type: partType });
}
continue;
}
if (!inner || typeof inner !== 'object') {
continue;
}
const ct = asString(inner.content);
if (!ct) {
continue;
}
const typeName = asString(inner.type).toUpperCase();
if (typeName === 'THINK' || typeName === 'THINKING') {
parts.push({ text: ct, type: 'thinking' });
} else if (typeName === 'RESPONSE') {
parts.push({ text: ct, type: 'text' });
} else {
parts.push({ text: ct, type: partType });
}
}
}
return { parts, finished: false };
}
function shouldSkipPath(pathValue) {
if (isFragmentStatusPath(pathValue)) {
return true;
}
if (SKIP_EXACT_PATHS.has(pathValue)) {
return true;
}
for (const p of SKIP_PATTERNS) {
if (pathValue.includes(p)) {
return true;
}
}
return false;
}
function isFragmentStatusPath(pathValue) {
if (!pathValue || pathValue === 'response/status') {
return false;
}
return /^response\/fragments\/-?\d+\/status$/i.test(pathValue);
}
function isCitation(text) {
return asString(text).trim().startsWith('[citation:');
}
function asString(v) {
if (typeof v === 'string') {
return v.trim();
}
if (Array.isArray(v)) {
return asString(v[0]);
}
if (v == null) {
return '';
}
return String(v).trim();
}
module.exports = {
parseChunkForContent,
extractContentRecursive,
shouldSkipPath,
isFragmentStatusPath,
isCitation,
};
module.exports = require('./sse_parse_impl');

View File

@@ -0,0 +1,571 @@
'use strict';
// Implementation moved here to keep the line-gate wrapper tiny.
const {
SKIP_PATTERNS,
SKIP_EXACT_PATHS,
} = require('../shared/deepseek-constants');
function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenceMarkers = true) {
if (!chunk || typeof chunk !== 'object') {
return {
parsed: false,
parts: [],
finished: false,
contentFilter: false,
errorMessage: '',
outputTokens: 0,
newType: currentType,
};
}
if (Object.prototype.hasOwnProperty.call(chunk, 'error')) {
return {
parsed: true,
parts: [],
finished: true,
contentFilter: false,
errorMessage: formatErrorMessage(chunk.error),
outputTokens: 0,
newType: currentType,
};
}
const pathValue = asString(chunk.p);
const outputTokens = extractAccumulatedTokenUsage(chunk);
if (hasContentFilterStatus(chunk)) {
return {
parsed: true,
parts: [],
finished: true,
contentFilter: true,
errorMessage: '',
outputTokens,
newType: currentType,
};
}
if (shouldSkipPath(pathValue)) {
return {
parsed: true,
parts: [],
finished: false,
contentFilter: false,
errorMessage: '',
outputTokens,
newType: currentType,
};
}
if (isStatusPath(pathValue)) {
if (isFinishedStatus(chunk.v)) {
return {
parsed: true,
parts: [],
finished: true,
contentFilter: false,
errorMessage: '',
outputTokens,
newType: currentType,
};
}
return {
parsed: true,
parts: [],
finished: false,
contentFilter: false,
errorMessage: '',
outputTokens,
newType: currentType,
};
}
if (!Object.prototype.hasOwnProperty.call(chunk, 'v')) {
return {
parsed: true,
parts: [],
finished: false,
contentFilter: false,
errorMessage: '',
outputTokens,
newType: currentType,
};
}
let newType = currentType;
const parts = [];
if (pathValue === 'response/fragments' && asString(chunk.o).toUpperCase() === 'APPEND' && Array.isArray(chunk.v)) {
for (const frag of chunk.v) {
if (!frag || typeof frag !== 'object') {
continue;
}
const fragType = asString(frag.type).toUpperCase();
const content = asContentString(frag.content, stripReferenceMarkers);
if (!content) {
continue;
}
if (fragType === 'THINK' || fragType === 'THINKING') {
newType = 'thinking';
parts.push({ text: content, type: 'thinking' });
} else if (fragType === 'RESPONSE') {
newType = 'text';
parts.push({ text: content, type: 'text' });
} else {
parts.push({ text: content, type: 'text' });
}
}
}
if (pathValue === 'response' && Array.isArray(chunk.v)) {
for (const item of chunk.v) {
if (!item || typeof item !== 'object') {
continue;
}
if (item.p === 'fragments' && item.o === 'APPEND' && Array.isArray(item.v)) {
for (const frag of item.v) {
const fragType = asString(frag && frag.type).toUpperCase();
if (fragType === 'THINK' || fragType === 'THINKING') {
newType = 'thinking';
} else if (fragType === 'RESPONSE') {
newType = 'text';
}
}
}
}
}
let partType = 'text';
if (pathValue === 'response/thinking_content') {
partType = 'thinking';
} else if (pathValue === 'response/content') {
partType = 'text';
} else if (pathValue.includes('response/fragments') && pathValue.includes('/content')) {
partType = newType;
} else if (!pathValue && thinkingEnabled) {
partType = newType;
}
const val = chunk.v;
if (typeof val === 'string') {
if (isFinishedStatus(val) && (!pathValue || pathValue === 'status')) {
return {
parsed: true,
parts: [],
finished: true,
contentFilter: false,
errorMessage: '',
outputTokens,
newType,
};
}
if (isStatusPath(pathValue)) {
return {
parsed: true,
parts: [],
finished: false,
contentFilter: false,
errorMessage: '',
outputTokens,
newType,
};
}
const content = asContentString(val, stripReferenceMarkers);
if (content) {
parts.push({ text: content, type: partType });
}
return {
parsed: true,
parts: filterLeakedContentFilterParts(parts),
finished: false,
contentFilter: false,
errorMessage: '',
outputTokens,
newType,
};
}
if (Array.isArray(val)) {
const extracted = extractContentRecursive(val, partType, stripReferenceMarkers);
if (extracted.finished) {
return {
parsed: true,
parts: [],
finished: true,
contentFilter: false,
errorMessage: '',
outputTokens,
newType,
};
}
parts.push(...extracted.parts);
return {
parsed: true,
parts: filterLeakedContentFilterParts(parts),
finished: false,
contentFilter: false,
errorMessage: '',
outputTokens,
newType,
};
}
if (val && typeof val === 'object') {
const resp = val.response && typeof val.response === 'object' ? val.response : val;
if (Array.isArray(resp.fragments)) {
for (const frag of resp.fragments) {
if (!frag || typeof frag !== 'object') {
continue;
}
const content = asContentString(frag.content, stripReferenceMarkers);
if (!content) {
continue;
}
const t = asString(frag.type).toUpperCase();
if (t === 'THINK' || t === 'THINKING') {
newType = 'thinking';
parts.push({ text: content, type: 'thinking' });
} else if (t === 'RESPONSE') {
newType = 'text';
parts.push({ text: content, type: 'text' });
} else {
parts.push({ text: content, type: partType });
}
}
}
}
return {
parsed: true,
parts: filterLeakedContentFilterParts(parts),
finished: false,
contentFilter: false,
errorMessage: '',
outputTokens,
newType,
};
}
function extractContentRecursive(items, defaultType, stripReferenceMarkers = true) {
const parts = [];
for (const it of items) {
if (!it || typeof it !== 'object') {
continue;
}
if (!Object.prototype.hasOwnProperty.call(it, 'v')) {
continue;
}
const itemPath = asString(it.p);
const itemV = it.v;
if (isStatusPath(itemPath)) {
if (isFinishedStatus(itemV)) {
return { parts: [], finished: true };
}
continue;
}
if (shouldSkipPath(itemPath)) {
continue;
}
const content = asContentString(it.content, stripReferenceMarkers);
if (content) {
const typeName = asString(it.type).toUpperCase();
if (typeName === 'THINK' || typeName === 'THINKING') {
parts.push({ text: content, type: 'thinking' });
} else if (typeName === 'RESPONSE') {
parts.push({ text: content, type: 'text' });
} else {
parts.push({ text: content, type: defaultType });
}
continue;
}
let partType = defaultType;
if (itemPath.includes('thinking')) {
partType = 'thinking';
} else if (itemPath.includes('content') || itemPath === 'response' || itemPath === 'fragments') {
partType = 'text';
}
if (typeof itemV === 'string') {
if (isStatusPath(itemPath)) {
continue;
}
if (itemV && itemV !== 'FINISHED') {
const content = asContentString(itemV, stripReferenceMarkers);
if (content) {
parts.push({ text: content, type: partType });
}
}
continue;
}
if (!Array.isArray(itemV)) {
continue;
}
for (const inner of itemV) {
if (typeof inner === 'string') {
if (inner) {
const content = asContentString(inner, stripReferenceMarkers);
if (content) {
parts.push({ text: content, type: partType });
}
}
continue;
}
if (!inner || typeof inner !== 'object') {
continue;
}
const ct = asContentString(inner.content, stripReferenceMarkers);
if (!ct) {
continue;
}
const typeName = asString(inner.type).toUpperCase();
if (typeName === 'THINK' || typeName === 'THINKING') {
parts.push({ text: ct, type: 'thinking' });
} else if (typeName === 'RESPONSE') {
parts.push({ text: ct, type: 'text' });
} else {
parts.push({ text: ct, type: partType });
}
}
}
return { parts, finished: false };
}
function isStatusPath(pathValue) {
return pathValue === 'response/status' || pathValue === 'status';
}
function isFinishedStatus(value) {
return asString(value).toUpperCase() === 'FINISHED';
}
function filterLeakedContentFilterParts(parts) {
if (!Array.isArray(parts) || parts.length === 0) {
return parts;
}
const out = [];
for (const p of parts) {
if (!p || typeof p !== 'object') {
continue;
}
const { text, stripped } = stripLeakedContentFilterSuffix(p.text);
if (stripped && shouldDropCleanedLeakedChunk(text)) {
continue;
}
if (stripped) {
out.push({ ...p, text });
continue;
}
out.push(p);
}
return out;
}
function stripLeakedContentFilterSuffix(text) {
if (typeof text !== 'string' || text === '') {
return { text, stripped: false };
}
const upperText = text.toUpperCase();
const idx = upperText.indexOf('CONTENT_FILTER');
if (idx < 0) {
return { text, stripped: false };
}
return {
text: text.slice(0, idx).replace(/[ \t\r]+$/g, ''),
stripped: true,
};
}
function shouldDropCleanedLeakedChunk(cleaned) {
if (cleaned === '') {
return true;
}
if (typeof cleaned === 'string' && cleaned.includes('\n')) {
return false;
}
return asString(cleaned).trim() === '';
}
function hasContentFilterStatus(chunk) {
if (!chunk || typeof chunk !== 'object') {
return false;
}
const code = asString(chunk.code);
if (code && code.toLowerCase() === 'content_filter') {
return true;
}
return hasContentFilterStatusValue(chunk);
}
function hasContentFilterStatusValue(v) {
if (Array.isArray(v)) {
for (const item of v) {
if (hasContentFilterStatusValue(item)) {
return true;
}
}
return false;
}
if (!v || typeof v !== 'object') {
return false;
}
const pathValue = asString(v.p);
if (pathValue && pathValue.toLowerCase().includes('status')) {
if (asString(v.v).toLowerCase() === 'content_filter') {
return true;
}
}
if (asString(v.code).toLowerCase() === 'content_filter') {
return true;
}
for (const value of Object.values(v)) {
if (hasContentFilterStatusValue(value)) {
return true;
}
}
return false;
}
function extractAccumulatedTokenUsage(chunk) {
return findAccumulatedTokenUsage(chunk);
}
function findAccumulatedTokenUsage(v) {
if (Array.isArray(v)) {
for (const item of v) {
const n = findAccumulatedTokenUsage(item);
if (n > 0) {
return n;
}
}
return 0;
}
if (!v || typeof v !== 'object') {
return 0;
}
const pathValue = asString(v.p);
if (pathValue && pathValue.toLowerCase().includes('accumulated_token_usage')) {
const n = toInt(v.v);
if (n > 0) {
return n;
}
}
const direct = toInt(v.accumulated_token_usage);
if (direct > 0) {
return direct;
}
for (const value of Object.values(v)) {
const n = findAccumulatedTokenUsage(value);
if (n > 0) {
return n;
}
}
return 0;
}
function toInt(v) {
if (typeof v !== 'number' || !Number.isFinite(v)) {
return 0;
}
return Math.trunc(v);
}
function formatErrorMessage(v) {
if (typeof v === 'string') {
return v;
}
if (v == null) {
return String(v);
}
try {
return JSON.stringify(v);
} catch (_err) {
return String(v);
}
}
function shouldSkipPath(pathValue) {
if (isFragmentStatusPath(pathValue)) {
return true;
}
if (SKIP_EXACT_PATHS.has(pathValue)) {
return true;
}
for (const p of SKIP_PATTERNS) {
if (pathValue.includes(p)) {
return true;
}
}
return false;
}
function isFragmentStatusPath(pathValue) {
if (!pathValue || pathValue === 'response/status') {
return false;
}
return /^response\/fragments\/-?\d+\/status$/i.test(pathValue);
}
function isCitation(text) {
return asString(text).trim().startsWith('[citation:');
}
function asContentString(v, stripReferenceMarkers = true) {
if (typeof v === 'string') {
return stripReferenceMarkers ? stripReferenceMarkersText(v) : v;
}
if (Array.isArray(v)) {
let out = '';
for (const item of v) {
out += asContentString(item, stripReferenceMarkers);
}
return out;
}
if (v && typeof v === 'object') {
if (Object.prototype.hasOwnProperty.call(v, 'content')) {
return asContentString(v.content, stripReferenceMarkers);
}
if (Object.prototype.hasOwnProperty.call(v, 'v')) {
return asContentString(v.v, stripReferenceMarkers);
}
return '';
}
if (v == null) {
return '';
}
const text = String(v);
return stripReferenceMarkers ? stripReferenceMarkersText(text) : text;
}
function stripReferenceMarkersText(text) {
if (!text) {
return text;
}
return text.replace(/\[reference:\s*\d+\]/gi, '');
}
function asString(v) {
if (typeof v === 'string') {
return v.trim();
}
if (Array.isArray(v)) {
return asString(v[0]);
}
if (v == null) {
return '';
}
return String(v).trim();
}
module.exports = {
parseChunkForContent,
extractContentRecursive,
filterLeakedContentFilterParts,
hasContentFilterStatus,
extractAccumulatedTokenUsage,
shouldSkipPath,
isFragmentStatusPath,
isCitation,
stripReferenceMarkers: stripReferenceMarkersText,
};

View File

@@ -1,13 +1,15 @@
'use strict';
function buildUsage(prompt, thinking, output) {
function buildUsage(prompt, thinking, output, outputTokens = 0) {
const promptTokens = estimateTokens(prompt);
const reasoningTokens = estimateTokens(thinking);
const completionTokens = estimateTokens(output);
const overriddenCompletionTokens = Number.isFinite(outputTokens) && outputTokens > 0 ? Math.trunc(outputTokens) : 0;
const finalCompletionTokens = overriddenCompletionTokens > 0 ? overriddenCompletionTokens : reasoningTokens + completionTokens;
return {
prompt_tokens: promptTokens,
completion_tokens: reasoningTokens + completionTokens,
total_tokens: promptTokens + reasoningTokens + completionTokens,
completion_tokens: finalCompletionTokens,
total_tokens: promptTokens + finalCompletionTokens,
completion_tokens_details: {
reasoning_tokens: reasoningTokens,
},
@@ -15,7 +17,7 @@ function buildUsage(prompt, thinking, output) {
}
function estimateTokens(text) {
const t = asString(text);
const t = asTokenString(text);
if (!t) {
return 0;
}
@@ -32,17 +34,17 @@ function estimateTokens(text) {
return n < 1 ? 1 : n;
}
function asString(v) {
function asTokenString(v) {
if (typeof v === 'string') {
return v.trim();
return v;
}
if (Array.isArray(v)) {
return asString(v[0]);
return asTokenString(v[0]);
}
if (v == null) {
return '';
}
return String(v).trim();
return String(v);
}
module.exports = {

View File

@@ -1,295 +1,3 @@
'use strict';
const {
createToolSieveState,
processToolSieveChunk,
flushToolSieve,
parseStandaloneToolCalls,
formatOpenAIStreamToolCalls,
} = require('../helpers/stream-tool-sieve');
const { BASE_HEADERS } = require('../shared/deepseek-constants');
const { writeOpenAIError } = require('./error_shape');
const { parseChunkForContent, isCitation } = require('./sse_parse');
const { buildUsage } = require('./token_usage');
const {
resolveToolcallPolicy,
formatIncrementalToolCallDeltas,
filterIncrementalToolCallDeltasByAllowed,
} = require('./toolcall_policy');
const { createChatCompletionEmitter } = require('./stream_emitter');
const {
asString,
isAbortError,
fetchStreamPrepare,
relayPreparedFailure,
safeReadText,
createLeaseReleaser,
} = require('./http_internal');
const DEEPSEEK_COMPLETION_URL = 'https://chat.deepseek.com/api/v0/chat/completion';
async function handleVercelStream(req, res, rawBody, payload) {
const prep = await fetchStreamPrepare(req, rawBody);
if (!prep.ok) {
relayPreparedFailure(res, prep);
return;
}
const model = asString(prep.body.model) || asString(payload.model);
const sessionID = asString(prep.body.session_id) || `chatcmpl-${Date.now()}`;
const leaseID = asString(prep.body.lease_id);
const deepseekToken = asString(prep.body.deepseek_token);
const powHeader = asString(prep.body.pow_header);
const completionPayload = prep.body.payload && typeof prep.body.payload === 'object' ? prep.body.payload : null;
const finalPrompt = asString(prep.body.final_prompt);
const thinkingEnabled = toBool(prep.body.thinking_enabled);
const searchEnabled = toBool(prep.body.search_enabled);
const toolPolicy = resolveToolcallPolicy(prep.body, payload.tools);
const toolNames = toolPolicy.toolNames;
const emitEarlyToolDeltas = toolPolicy.emitEarlyToolDeltas;
if (!model || !leaseID || !deepseekToken || !powHeader || !completionPayload) {
writeOpenAIError(res, 500, 'invalid vercel prepare response');
return;
}
const releaseLease = createLeaseReleaser(req, leaseID);
const upstreamController = new AbortController();
let clientClosed = false;
let reader = null;
const markClientClosed = () => {
if (clientClosed) {
return;
}
clientClosed = true;
upstreamController.abort();
if (reader && typeof reader.cancel === 'function') {
Promise.resolve(reader.cancel()).catch(() => {});
}
};
const onReqAborted = () => markClientClosed();
const onResClose = () => {
if (!res.writableEnded) {
markClientClosed();
}
};
req.on('aborted', onReqAborted);
res.on('close', onResClose);
try {
let completionRes;
try {
completionRes = await fetch(DEEPSEEK_COMPLETION_URL, {
method: 'POST',
headers: {
...BASE_HEADERS,
authorization: `Bearer ${deepseekToken}`,
'x-ds-pow-response': powHeader,
},
body: JSON.stringify(completionPayload),
signal: upstreamController.signal,
});
} catch (err) {
if (clientClosed || isAbortError(err)) {
return;
}
throw err;
}
if (clientClosed) {
return;
}
if (!completionRes.ok || !completionRes.body) {
const detail = await safeReadText(completionRes);
writeOpenAIError(res, 500, detail ? `Failed to get completion: ${detail}` : 'Failed to get completion.');
return;
}
res.statusCode = 200;
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache, no-transform');
res.setHeader('Connection', 'keep-alive');
res.setHeader('X-Accel-Buffering', 'no');
if (typeof res.flushHeaders === 'function') {
res.flushHeaders();
}
const created = Math.floor(Date.now() / 1000);
let currentType = thinkingEnabled ? 'thinking' : 'text';
let thinkingText = '';
let outputText = '';
const toolSieveEnabled = toolPolicy.toolSieveEnabled;
const toolSieveState = createToolSieveState();
let toolCallsEmitted = false;
const streamToolCallIDs = new Map();
const streamToolNames = new Map();
const decoder = new TextDecoder();
reader = completionRes.body.getReader();
let buffered = '';
let ended = false;
const { sendFrame, sendDeltaFrame } = createChatCompletionEmitter({
res,
sessionID,
created,
model,
isClosed: () => clientClosed,
});
const finish = async (reason) => {
if (ended) {
return;
}
ended = true;
if (clientClosed || res.writableEnded || res.destroyed) {
await releaseLease();
return;
}
const detected = parseStandaloneToolCalls(outputText, toolNames);
if (detected.length > 0 && !toolCallsEmitted) {
toolCallsEmitted = true;
sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(detected, streamToolCallIDs) });
} else if (toolSieveEnabled) {
const tailEvents = flushToolSieve(toolSieveState, toolNames);
for (const evt of tailEvents) {
if (evt.type === 'tool_calls' && Array.isArray(evt.calls) && evt.calls.length > 0) {
toolCallsEmitted = true;
sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(evt.calls, streamToolCallIDs) });
continue;
}
if (evt.text) {
sendDeltaFrame({ content: evt.text });
}
}
}
if (detected.length > 0 || toolCallsEmitted) {
reason = 'tool_calls';
}
sendFrame({
id: sessionID,
object: 'chat.completion.chunk',
created,
model,
choices: [{ delta: {}, index: 0, finish_reason: reason }],
usage: buildUsage(finalPrompt, thinkingText, outputText),
});
if (!res.writableEnded && !res.destroyed) {
res.write('data: [DONE]\n\n');
}
await releaseLease();
if (!res.writableEnded && !res.destroyed) {
res.end();
}
};
try {
// eslint-disable-next-line no-constant-condition
while (true) {
if (clientClosed) {
await finish('stop');
return;
}
const { value, done } = await reader.read();
if (done) {
break;
}
buffered += decoder.decode(value, { stream: true });
const lines = buffered.split('\n');
buffered = lines.pop() || '';
for (const rawLine of lines) {
const line = rawLine.trim();
if (!line.startsWith('data:')) {
continue;
}
const dataStr = line.slice(5).trim();
if (!dataStr) {
continue;
}
if (dataStr === '[DONE]') {
await finish('stop');
return;
}
let chunk;
try {
chunk = JSON.parse(dataStr);
} catch (_err) {
continue;
}
if (chunk.error || chunk.code === 'content_filter') {
await finish('content_filter');
return;
}
const parsed = parseChunkForContent(chunk, thinkingEnabled, currentType);
currentType = parsed.newType;
if (parsed.finished) {
await finish('stop');
return;
}
for (const p of parsed.parts) {
if (!p.text) {
continue;
}
if (searchEnabled && isCitation(p.text)) {
continue;
}
if (p.type === 'thinking') {
if (thinkingEnabled) {
thinkingText += p.text;
sendDeltaFrame({ reasoning_content: p.text });
}
} else {
outputText += p.text;
if (!toolSieveEnabled) {
sendDeltaFrame({ content: p.text });
continue;
}
const events = processToolSieveChunk(toolSieveState, p.text, toolNames);
for (const evt of events) {
if (evt.type === 'tool_call_deltas') {
if (!emitEarlyToolDeltas) {
continue;
}
const filtered = filterIncrementalToolCallDeltasByAllowed(evt.deltas, toolNames, streamToolNames);
const formatted = formatIncrementalToolCallDeltas(filtered, streamToolCallIDs);
if (formatted.length > 0) {
toolCallsEmitted = true;
sendDeltaFrame({ tool_calls: formatted });
}
continue;
}
if (evt.type === 'tool_calls') {
toolCallsEmitted = true;
sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(evt.calls, streamToolCallIDs) });
continue;
}
if (evt.text) {
sendDeltaFrame({ content: evt.text });
}
}
}
}
}
}
await finish('stop');
} catch (err) {
if (clientClosed || isAbortError(err)) {
await finish('stop');
return;
}
await finish('stop');
}
} finally {
req.removeListener('aborted', onReqAborted);
res.removeListener('close', onResClose);
await releaseLease();
}
}
function toBool(v) {
return v === true;
}
module.exports = {
handleVercelStream,
};
module.exports = require('./vercel_stream_impl');

View File

@@ -0,0 +1,310 @@
'use strict';
// Implementation moved here to keep the line-gate wrapper tiny.
const {
createToolSieveState,
processToolSieveChunk,
flushToolSieve,
parseStandaloneToolCalls,
formatOpenAIStreamToolCalls,
} = require('../helpers/stream-tool-sieve');
const { BASE_HEADERS } = require('../shared/deepseek-constants');
const { writeOpenAIError } = require('./error_shape');
const { parseChunkForContent, isCitation } = require('./sse_parse');
const { buildUsage } = require('./token_usage');
const {
resolveToolcallPolicy,
formatIncrementalToolCallDeltas,
filterIncrementalToolCallDeltasByAllowed,
boolDefaultTrue,
} = require('./toolcall_policy');
const { createChatCompletionEmitter } = require('./stream_emitter');
const {
asString,
isAbortError,
fetchStreamPrepare,
relayPreparedFailure,
createLeaseReleaser,
} = require('./http_internal');
const DEEPSEEK_COMPLETION_URL = 'https://chat.deepseek.com/api/v0/chat/completion';
async function handleVercelStream(req, res, rawBody, payload) {
const prep = await fetchStreamPrepare(req, rawBody);
if (!prep.ok) {
relayPreparedFailure(res, prep);
return;
}
const model = asString(prep.body.model) || asString(payload.model);
const sessionID = asString(prep.body.session_id) || `chatcmpl-${Date.now()}`;
const leaseID = asString(prep.body.lease_id);
const deepseekToken = asString(prep.body.deepseek_token);
const powHeader = asString(prep.body.pow_header);
const completionPayload = prep.body.payload && typeof prep.body.payload === 'object' ? prep.body.payload : null;
const finalPrompt = asString(prep.body.final_prompt);
const thinkingEnabled = toBool(prep.body.thinking_enabled);
const searchEnabled = toBool(prep.body.search_enabled);
const toolPolicy = resolveToolcallPolicy(prep.body, payload.tools);
const toolNames = toolPolicy.toolNames;
const emitEarlyToolDeltas = toolPolicy.emitEarlyToolDeltas;
const stripReferenceMarkers = boolDefaultTrue(prep.body.compat && prep.body.compat.strip_reference_markers);
if (!model || !leaseID || !deepseekToken || !powHeader || !completionPayload) {
writeOpenAIError(res, 500, 'invalid vercel prepare response');
return;
}
const releaseLease = createLeaseReleaser(req, leaseID);
const upstreamController = new AbortController();
let clientClosed = false;
let reader = null;
const markClientClosed = () => {
if (clientClosed) {
return;
}
clientClosed = true;
upstreamController.abort();
if (reader && typeof reader.cancel === 'function') {
Promise.resolve(reader.cancel()).catch(() => {});
}
};
const onReqAborted = () => markClientClosed();
const onResClose = () => {
if (!res.writableEnded) {
markClientClosed();
}
};
req.on('aborted', onReqAborted);
res.on('close', onResClose);
try {
let completionRes;
try {
completionRes = await fetch(DEEPSEEK_COMPLETION_URL, {
method: 'POST',
headers: {
...BASE_HEADERS,
authorization: `Bearer ${deepseekToken}`,
'x-ds-pow-response': powHeader,
},
body: JSON.stringify(completionPayload),
signal: upstreamController.signal,
});
} catch (err) {
if (clientClosed || isAbortError(err)) {
return;
}
throw err;
}
if (clientClosed) {
return;
}
if (!completionRes.ok || !completionRes.body) {
const detail = completionRes.body ? await completionRes.text() : '';
const status = completionRes.ok ? 500 : completionRes.status || 500;
writeOpenAIError(res, status, detail);
return;
}
res.statusCode = 200;
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache, no-transform');
res.setHeader('Connection', 'keep-alive');
res.setHeader('X-Accel-Buffering', 'no');
if (typeof res.flushHeaders === 'function') {
res.flushHeaders();
}
const created = Math.floor(Date.now() / 1000);
let currentType = thinkingEnabled ? 'thinking' : 'text';
let thinkingText = '';
let outputText = '';
let outputTokens = 0;
const toolSieveEnabled = toolPolicy.toolSieveEnabled;
const toolSieveState = createToolSieveState();
let toolCallsEmitted = false;
const streamToolCallIDs = new Map();
const streamToolNames = new Map();
const decoder = new TextDecoder();
reader = completionRes.body.getReader();
let buffered = '';
let ended = false;
const { sendFrame, sendDeltaFrame } = createChatCompletionEmitter({
res,
sessionID,
created,
model,
isClosed: () => clientClosed,
});
const finish = async (reason) => {
if (ended) {
return;
}
ended = true;
if (clientClosed || res.writableEnded || res.destroyed) {
await releaseLease();
return;
}
const detected = parseStandaloneToolCalls(outputText, toolNames);
if (detected.length > 0 && !toolCallsEmitted) {
toolCallsEmitted = true;
sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(detected, streamToolCallIDs) });
} else if (toolSieveEnabled) {
const tailEvents = flushToolSieve(toolSieveState, toolNames);
for (const evt of tailEvents) {
if (evt.type === 'tool_calls' && Array.isArray(evt.calls) && evt.calls.length > 0) {
toolCallsEmitted = true;
sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(evt.calls, streamToolCallIDs) });
continue;
}
if (evt.text) {
sendDeltaFrame({ content: evt.text });
}
}
}
if (detected.length > 0 || toolCallsEmitted) {
reason = 'tool_calls';
}
sendFrame({
id: sessionID,
object: 'chat.completion.chunk',
created,
model,
choices: [{ delta: {}, index: 0, finish_reason: reason }],
usage: buildUsage(finalPrompt, thinkingText, outputText, outputTokens),
});
if (!res.writableEnded && !res.destroyed) {
res.write('data: [DONE]\n\n');
}
await releaseLease();
if (!res.writableEnded && !res.destroyed) {
res.end();
}
};
try {
// eslint-disable-next-line no-constant-condition
while (true) {
if (clientClosed) {
await finish('stop');
return;
}
const { value, done } = await reader.read();
if (done) {
break;
}
buffered += decoder.decode(value, { stream: true });
const lines = buffered.split('\n');
buffered = lines.pop() || '';
for (const rawLine of lines) {
const line = rawLine.trim();
if (!line.startsWith('data:')) {
continue;
}
const dataStr = line.slice(5).trim();
if (!dataStr) {
continue;
}
if (dataStr === '[DONE]') {
await finish('stop');
return;
}
let chunk;
try {
chunk = JSON.parse(dataStr);
} catch (_err) {
continue;
}
const parsed = parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenceMarkers);
if (!parsed.parsed) {
continue;
}
if (parsed.outputTokens > 0) {
outputTokens = parsed.outputTokens;
}
currentType = parsed.newType;
if (parsed.errorMessage) {
await finish('content_filter');
return;
}
if (parsed.contentFilter) {
await finish('stop');
return;
}
if (parsed.finished) {
await finish('stop');
return;
}
for (const p of parsed.parts) {
if (!p.text) {
continue;
}
if (searchEnabled && isCitation(p.text)) {
continue;
}
if (p.type === 'thinking') {
if (thinkingEnabled) {
thinkingText += p.text;
sendDeltaFrame({ reasoning_content: p.text });
}
} else {
outputText += p.text;
if (!toolSieveEnabled) {
sendDeltaFrame({ content: p.text });
continue;
}
const events = processToolSieveChunk(toolSieveState, p.text, toolNames);
for (const evt of events) {
if (evt.type === 'tool_call_deltas') {
if (!emitEarlyToolDeltas) {
continue;
}
const filtered = filterIncrementalToolCallDeltasByAllowed(evt.deltas, toolNames, streamToolNames);
const formatted = formatIncrementalToolCallDeltas(filtered, streamToolCallIDs);
if (formatted.length > 0) {
toolCallsEmitted = true;
sendDeltaFrame({ tool_calls: formatted });
}
continue;
}
if (evt.type === 'tool_calls') {
toolCallsEmitted = true;
sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(evt.calls, streamToolCallIDs) });
continue;
}
if (evt.text) {
sendDeltaFrame({ content: evt.text });
}
}
}
}
}
}
await finish('stop');
} catch (err) {
if (clientClosed || isAbortError(err)) {
await finish('stop');
return;
}
await finish('stop');
}
} finally {
req.removeListener('aborted', onReqAborted);
res.removeListener('close', onResClose);
await releaseLease();
}
}
function toBool(v) {
return v === true;
}
module.exports = {
handleVercelStream,
};

View File

@@ -9,6 +9,16 @@ import (
var markdownImagePattern = regexp.MustCompile(`!\[(.*?)\]\((.*?)\)`)
const (
systemMarker = "<System>"
userMarker = "<User>"
assistantMarker = "<Assistant>"
toolMarker = "<Tool>"
endSentenceMarker = "<end▁of▁sentence>"
endToolResultsMarker = "<end▁of▁toolresults>"
endInstructionsMarker = "<end▁of▁instructions>"
)
func MessagesPrepare(messages []map[string]any) string {
type block struct {
Role string
@@ -32,33 +42,39 @@ func MessagesPrepare(messages []map[string]any) string {
merged = append(merged, msg)
}
parts := make([]string, 0, len(merged))
for i, m := range merged {
for _, m := range merged {
switch m.Role {
case "assistant":
parts = append(parts, "<Assistant>"+m.Text+"<end▁of▁sentence>")
parts = append(parts, formatRoleBlock(assistantMarker, m.Text, endSentenceMarker))
case "tool":
if i > 0 {
parts = append(parts, "<Tool>"+m.Text)
} else {
parts = append(parts, m.Text)
if strings.TrimSpace(m.Text) != "" {
parts = append(parts, formatRoleBlock(toolMarker, m.Text, endToolResultsMarker))
}
case "system":
// Clear system boundary improves R1 and V3 context understanding significantly
if strings.TrimSpace(m.Text) != "" {
parts = append(parts, "<system_instructions>\n"+strings.TrimSpace(m.Text)+"\n</system_instructions>\n\n")
if text := strings.TrimSpace(m.Text); text != "" {
parts = append(parts, formatRoleBlock(systemMarker, text, endInstructionsMarker))
}
case "user":
// Always prepend <User> to user messages. DeepSeek R1 reasoning triggers best
// and aligns context perfectly when the user turn is explicitly marked.
parts = append(parts, "<User>"+m.Text)
parts = append(parts, formatRoleBlock(userMarker, m.Text, endSentenceMarker))
default:
parts = append(parts, m.Text)
if strings.TrimSpace(m.Text) != "" {
parts = append(parts, m.Text)
}
}
}
out := strings.Join(parts, "")
out := strings.Join(parts, "\n\n")
return markdownImagePattern.ReplaceAllString(out, `[${1}](${2})`)
}
// DeepSeek-style turn suffixes stay attached to the same block as the role content.
func formatRoleBlock(marker, text, endMarker string) string {
out := marker + "\n" + text
if strings.TrimSpace(endMarker) != "" {
out += endMarker
}
return out
}
func NormalizeContent(v any) string {
if v == nil {
return ""

Some files were not shown because too many files have changed in this diff Show More