diff --git a/AGENTS.md b/AGENTS.md index 1c71307..664f3f0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -22,6 +22,13 @@ These rules apply to all agent-made changes in this repository. - Keep changes additive and tightly scoped to the requested feature or bugfix. - Do not mix unrelated refactors into feature PRs unless they are required to make the change pass gates. +## Protocol Adapter Boundary + +- Do not let OpenAI Chat, OpenAI Responses, Claude, Gemini, or other interface protocol formatting own shared business behavior. +- Normalize protocol-specific request shapes into the project standard request/turn model first, run shared business logic in one place, then render back to the target protocol at the boundary. +- Business logic that must stay globally consistent includes empty-output retry, thinking/reasoning handling, tool-call detection and policy, usage accounting, current-input-file injection, history persistence, file/reference handling, and completion payload assembly. +- If a behavior must differ by protocol, keep the difference as an explicit adapter/rendering concern and document why it cannot live in the shared normalized path. + ## Documentation Sync - When business logic or user-visible behavior changes, update the corresponding documentation in the same change. diff --git a/API.en.md b/API.en.md index 6e93202..19368ec 100644 --- a/API.en.md +++ b/API.en.md @@ -32,7 +32,7 @@ Docs: [Overview](README.en.md) / [Architecture](docs/ARCHITECTURE.en.md) / [Depl | Base URL | `http://localhost:5001` or your deployment domain | | Default Content-Type | `application/json` | | Health probes | `GET /healthz`, `GET /readyz` | -| CORS | Enabled (uniformly covers `/v1/*`, `/anthropic/*`, `/v1beta/models/*`, and `/admin/*`; echoes the browser `Origin` when present, otherwise `*`; default allow-list includes `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Ds2-Source`, `X-Vercel-Protection-Bypass`, `X-Goog-Api-Key`, `Anthropic-Version`, `Anthropic-Beta`, and also accepts third-party preflight-requested headers such as `x-stainless-*`; `/v1/chat/completions` on Vercel Node Runtime matches the same behavior; internal-only `X-Ds2-Internal-Token` remains blocked) | +| CORS | Enabled (uniformly covers `/v1/*`, `/anthropic/*`, `/v1beta/models/*`, `/api/*`, and `/admin/*`; echoes the browser `Origin` when present, otherwise `*`; default allow-list includes `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Ds2-Source`, `X-Vercel-Protection-Bypass`, `X-Goog-Api-Key`, `Anthropic-Version`, `Anthropic-Beta`, and also accepts third-party preflight-requested headers such as `x-stainless-*`; `/v1/chat/completions` on Vercel Node Runtime matches the same behavior; internal-only `X-Ds2-Internal-Token` remains blocked) | - All JSON request bodies must be valid UTF-8; malformed byte sequences are rejected on ingress with `400 invalid json`. @@ -40,8 +40,10 @@ Docs: [Overview](README.en.md) / [Architecture](docs/ARCHITECTURE.en.md) / [Depl - OpenAI / Claude / Gemini protocols are now mounted on one shared `chi` router tree assembled in `internal/server/router.go`. - Adapter responsibilities are streamlined to: **request normalization → DeepSeek invocation → protocol-shaped rendering**, reducing legacy split-logic paths. -- Tool-calling semantics are aligned between Go and Node runtime: models should output the DSML shell `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; DS2API also accepts legacy canonical XML `` → `` → ``. DSML is normalized back to XML at the parser entry, so internal parsing remains XML-based, with stream-time anti-leak filtering. +- Tool-calling semantics are aligned between Go and Node runtime: models should output the fullwidth-separator DSML shell `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; DS2API also accepts the halfwidth DSML wrapper `<|DSML|tool_calls>`, DSML wrapper aliases such as ``, `<|tool_calls>`, `<|tool_calls>`, common DSML separator drift such as `<|DSML tool_calls>`, collapsed DSML local names such as ``, control-separator drift such as `` / raw STX `\x02`, CJK angle bracket and trailing attribute separator drift such as `...〈/DSM|parameter〉`, arbitrary protocol prefixes such as ``, and legacy canonical XML `` → `` → ``. The scanner normalizes fixed local names (`tool_calls` / `invoke` / `parameter`) back to XML before parsing; only wrapped tool blocks or the narrow missing-opening-wrapper repair path enter the tool path, while bare `` does not count as supported syntax. JSON literal parameter bodies are preserved as structured values, explicit empty or whitespace-only parameters are preserved as empty strings, malformed complete wrappers are released as plain text, and loose CDATA is narrowly repaired at final parse/flush when it can preserve a complete outer tool call. - `Admin API` separates static config from runtime policy: `/admin/config*` for configuration state, `/admin/settings*` for runtime behavior. +- When upstream returns a thinking-only response with no visible text, the Go main path for both streaming and non-streaming completions retries once in the same DeepSeek session: it appends the prompt suffix `"Previous reply had no visible output. Please regenerate the visible final answer or tool call now."` and sets `parent_message_id`. If that same-account retry would still end as `429 upstream_empty_output`, managed-account mode switches to the next available account, creates a fresh session, and retries the original payload once before returning 429. +- Citation/reference marker boundary: streaming output hides upstream `[citation:N]` / `[reference:N]` placeholders by default; non-stream output converts DeepSeek search reference markers into Markdown links. --- @@ -84,7 +86,7 @@ Two header formats accepted: - Token is in `config.keys` → **Managed account mode**: DS2API auto-selects an account via rotation - Token is not in `config.keys` → **Direct token mode**: treated as a DeepSeek token directly -**Optional header**: `X-Ds2-Target-Account: ` — Pin a specific managed account; if the target account does not exist or the managed-account queue is exhausted, the request returns `429`, and current responses do not include `Retry-After`. If the account exists but login/refresh fails, the request returns the underlying `401` or upstream error. +**Optional header**: `X-Ds2-Target-Account: ` — Pin a specific managed account; if the target account does not exist or the managed-account queue is exhausted, the request returns `429`, and current responses do not include `Retry-After`. If the account exists but login/refresh fails, the request returns the underlying `401` or upstream error. Without a pinned target, managed-account completion requests try one alternate-account fresh retry before returning an empty-output 429; pinned-target requests and requests with no other available account do not switch. Gemini-compatible clients can also send `x-goog-api-key`, `?key=`, or `?api_key=` as the caller credential source. ### Admin Endpoints (`/admin/*`) @@ -226,16 +228,18 @@ For `chat` / `responses` / `embeddings`, DS2API follows a wide-input/strict-outp 1. Match DeepSeek native model IDs first. 2. Then match exact keys in `model_aliases`. -3. If still unmatched, fall back by known family heuristics (`o*`, `gpt-*`, `claude-*`, etc.). -4. If still unmatched, return `invalid_request_error`. +3. If the request name ends with `-nothinking`, resolve the base alias and append the corresponding no-thinking variant. +4. If still unmatched, return `invalid_request_error`. Unknown model families are not guessed heuristically; add explicit compatibility names through `model_aliases`. Built-in aliases come from `internal/config/models.go`; `config.model_aliases` can override or add mappings at runtime. Excerpt: - OpenAI / Codex: `gpt-4o`, `gpt-4.1`, `gpt-5`, `gpt-5.5`, `gpt-5-codex`, `gpt-5.3-codex`, `codex-mini-latest` - OpenAI reasoning: `o1`, `o3`, `o3-deep-research`, `o4-mini` - Claude: `claude-opus-4-6`, `claude-sonnet-4-6`, `claude-haiku-4-5`, `claude-3-5-sonnet-latest` -- Gemini: `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-pro-vision` -- Other compatibility families: `llama-*`, `qwen-*`, `mistral-*`, and `command-*` fall back through family heuristics +- Gemini: `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-3.1-pro`, `gemini-3-pro`, `gemini-3-flash`, `gemini-3.1-flash-lite`, `gemini-pro-vision` +- Other exact built-in aliases: `llama-3.1-70b-instruct`, `qwen-max` + +Aliases with a `-nothinking` suffix also map to the corresponding forced no-thinking DeepSeek model. Current vision support resolves only to `deepseek-v4-vision` and does not expose a separate `vision-search` variant. @@ -243,7 +247,7 @@ Retired historical families such as `claude-1.*`, `claude-2.*`, `claude-instant- ### `POST /v1/chat/completions` -> Path note: besides the canonical `/v1/chat/completions`, DS2API also accepts the root shortcut `/chat/completions`. On Vercel Runtime, `stream=true` on either path is handled by the Node streaming bridge, while non-stream stays on the Go primary path. +> Path note: besides the canonical `/v1/chat/completions`, DS2API also accepts the root shortcut `/chat/completions`. On Vercel Runtime, `vercel.json` rewrites only the canonical `/v1/chat/completions` path to the Node streaming bridge; the root shortcut stays on the Go primary path. Use `/v1/chat/completions` on Vercel when real-time streaming is required. **Headers**: @@ -256,7 +260,7 @@ Content-Type: application/json | Field | Type | Required | Notes | | --- | --- | --- | --- | -| `model` | string | ✅ | DeepSeek native models + common aliases (`gpt-5.5`, `gpt-5.4-mini`, `gpt-5.3-codex`, `o3`, `claude-opus-4-6`, `gemini-2.5-pro`, `gemini-2.5-flash`, etc.) | +| `model` | string | ✅ | DeepSeek native models + common aliases (`gpt-5.5`, `gpt-5.4-mini`, `gpt-5.3-codex`, `o3`, `claude-opus-4-6`, `gemini-2.5-pro`, `gemini-3.1-pro`, `gemini-3-flash`, etc.); `-nothinking` suffixes force thinking / reasoning off | | `messages` | array | ✅ | OpenAI-style messages | | `stream` | boolean | ❌ | Default `false` | | `tools` | array | ❌ | Function calling schema | @@ -351,7 +355,8 @@ When `tools` is present, DS2API performs anti-leak handling: Additional notes: -- The parser treats DSML shell tool blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`) and legacy canonical XML tool blocks (`` / `` / ``) as executable tool calls. DSML is normalized back to XML at the parser entry; internal parsing remains XML-based. Legacy ``, ``, ``, ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text. +- The parser treats the recommended DSML shell tool blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), halfwidth DSML shell blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), DSML wrapper aliases (``, `<|tool_calls>`, `<|tool_calls>`), common DSML separator drift (`<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`), collapsed DSML local names (`` / `` / ``), control-separator drift (`` / raw STX `\x02`), CJK angle bracket and trailing attribute separator drift (`...〈/DSM|parameter〉`), arbitrary protocol prefixes (``), and legacy canonical XML tool blocks (`` / `` / ``) as executable tool calls. These shells normalize back to XML first, while internal parsing remains XML-based. Legacy ``, ``, ``, ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text; complete but malformed wrappers are also released as plain text. +- The parser no longer drops tool calls solely because parameter values are empty; explicit empty strings or whitespace-only parameters become empty strings in structured `tool_calls`. Prompting still tells the model not to emit blank parameters, and missing/empty argument rejection belongs in the tool executor or client schema validation. - If the final visible response text is empty but the reasoning stream contains an executable tool call, Chat / Responses emits a standard OpenAI `tool_calls` / `function_call` output during finalization. If thinking/reasoning was not enabled by the client, that reasoning text is used only for detection and is not exposed as visible text or `reasoning_content`. - `tool_calls` shown inside fenced markdown code blocks (for example, ```json ... ```) are treated as examples, not executable calls. @@ -764,6 +769,7 @@ Reads runtime settings and status, including: - `responses` / `embeddings` - `auto_delete` (`mode`: `none` / `single` / `all`; legacy `sessions=true` is still treated as `all`) - `current_input_file` (`enabled` defaults to `true`, plus `min_chars`) +- `thinking_injection` (`enabled` defaults to `true`, `prompt`, and `default_prompt`) - `model_aliases` - `env_backed`, `needs_vercel_sync` - `toolcall` policy is fixed to `feature_match + high` and is no longer returned or editable via settings @@ -778,6 +784,7 @@ Hot-updates runtime settings. Supported fields: - `embeddings.provider` - `auto_delete.mode` - `current_input_file.enabled` / `current_input_file.min_chars` +- `thinking_injection.enabled` / `thinking_injection.prompt` - `model_aliases` - `toolcall` policy is fixed and is no longer writable through settings @@ -1258,7 +1265,7 @@ Clients should handle HTTP status code plus `error` / `detail` fields. | Code | Meaning | | --- | --- | | `401` | Authentication failed (invalid key/token, or expired admin JWT) | -| `429` | Too many requests (exceeded inflight + queue capacity; current responses do not include `Retry-After`) | +| `429` | Too many requests (exceeded inflight + queue capacity, or upstream thinking-only output with no visible answer; managed-account mode first tries one alternate-account fresh retry; current responses do not include `Retry-After`) | | `503` | Model unavailable or upstream error | --- diff --git a/API.md b/API.md index 0470fd5..8d8c827 100644 --- a/API.md +++ b/API.md @@ -32,7 +32,7 @@ | Base URL | `http://localhost:5001` 或你的部署域名 | | 默认 Content-Type | `application/json` | | 健康检查 | `GET /healthz`、`GET /readyz` | -| CORS | 已启用(统一覆盖 `/v1/*`、`/anthropic/*`、`/v1beta/models/*`、`/admin/*`;浏览器有 `Origin` 时回显该 Origin,否则为 `*`;默认允许 `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Ds2-Source`, `X-Vercel-Protection-Bypass`, `X-Goog-Api-Key`, `Anthropic-Version`, `Anthropic-Beta`,并会放行预检里声明的第三方请求头,如 `x-stainless-*`;Vercel 上 `/v1/chat/completions` 的 Node Runtime 也对齐相同行为;内部专用头 `X-Ds2-Internal-Token` 仍被拦截) | +| CORS | 已启用(统一覆盖 `/v1/*`、`/anthropic/*`、`/v1beta/models/*`、`/api/*`、`/admin/*`;浏览器有 `Origin` 时回显该 Origin,否则为 `*`;默认允许 `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Ds2-Source`, `X-Vercel-Protection-Bypass`, `X-Goog-Api-Key`, `Anthropic-Version`, `Anthropic-Beta`,并会放行预检里声明的第三方请求头,如 `x-stainless-*`;Vercel 上 `/v1/chat/completions` 的 Node Runtime 也对齐相同行为;内部专用头 `X-Ds2-Internal-Token` 仍被拦截) | - 所有 JSON 请求体都必须是合法 UTF-8;非法字节序列会在入站阶段被拒绝为 `400 invalid json`。 @@ -40,9 +40,9 @@ - OpenAI / Claude / Gemini 三套协议已统一挂在同一 `chi` 路由树上,由 `internal/server/router.go` 负责装配。 - 适配器层职责收敛为:**请求归一化 → DeepSeek 调用 → 协议形态渲染**,减少历史版本中“同能力多处实现”的分叉。 -- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致:推荐模型输出 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容层也接受 DSML wrapper 别名 ``、`<|tool_calls>`、`<|tool_calls>`、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>`)、`DSML` 与工具标签名黏连的常见 typo(如 ``),以及旧式 canonical XML `` → `` → ``。实现上采用窄容错结构扫描:只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 会进入工具路径,裸 `` 不计为已支持语法;流式场景继续执行防泄漏筛分。若参数体本身是合法 JSON 字面量(如 `123`、`true`、`null`、数组或对象),会按结构化值输出,不再一律当作字符串;若 CDATA 偶发漏闭合,则会在最终 parse / flush 恢复阶段做窄修复,尽量保住已完整包裹的外层工具调用。 +- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致:推荐模型输出全角分隔符 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容层也接受半角 DSML wrapper `<|DSML|tool_calls>`、DSML wrapper 别名 ``、`<|tool_calls>`、`<|tool_calls>`、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>`)、`DSML` 与工具标签名黏连的常见 typo(如 ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉`)、任意协议前缀壳(如 ``),以及旧式 canonical XML `` → `` → ``。实现上采用结构扫描:只要固定本地标签名是 `tool_calls` / `invoke` / `parameter`,前缀壳会在解析入口归一化;只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 会进入工具路径,裸 `` 不计为已支持语法;流式场景继续执行防泄漏筛分。若参数体本身是合法 JSON 字面量(如 `123`、`true`、`null`、数组或对象),会按结构化值输出,不再一律当作字符串;显式空字符串和纯空白参数会结构化保留为空字符串,是否拒绝缺参由工具执行侧决定;完整但 malformed 的 wrapper 会作为普通文本释放,不会吞掉或伪造成工具调用;若 CDATA 偶发漏闭合,则会在最终 parse / flush 恢复阶段做窄修复,尽量保住已完整包裹的外层工具调用。 - `Admin API` 将配置与运行时策略分开:`/admin/config*` 管静态配置,`/admin/settings*` 管运行时行为。 -- 当上游返回 thinking-only 响应(模型输出了推理链但无可见文本)时,非流式补全会自动重试一次:以多轮对话 follow-up 方式追加 prompt 后缀 `"Previous reply had no visible output. Please regenerate the visible final answer or tool call now."` 并设置 `parent_message_id` 在同一 DeepSeek session 内让模型重新输出;重试最大 1 次。 +- 当上游返回 thinking-only 响应(模型输出了推理链但无可见文本)时,Go 主路径的流式与非流式补全都会先自动重试一次:以多轮对话 follow-up 方式追加 prompt 后缀 `"Previous reply had no visible output. Please regenerate the visible final answer or tool call now."` 并设置 `parent_message_id` 在同一 DeepSeek session 内让模型重新输出;同账号重试最大 1 次。若同账号重试后仍即将返回 `429 upstream_empty_output`,托管账号模式会在返回 429 前自动切换到下一个可用账号,新建 session,用原始 payload 再 fresh retry 一次。 - 引用标记处理边界:流式输出默认隐藏 `[citation:N]` / `[reference:N]` 这类上游内部占位符;非流式输出默认把 DeepSeek 搜索引用标记转换为 Markdown 引用链接。 --- @@ -86,7 +86,7 @@ Vercel 一键部署可先只填 `DS2API_ADMIN_KEY`,部署后在 `/admin` 导 - token 在 `config.keys` 中 → **托管账号模式**,自动轮询选择账号 - token 不在 `config.keys` 中 → **直通 token 模式**,直接作为 DeepSeek token 使用 -**可选请求头**:`X-Ds2-Target-Account: ` — 指定使用某个托管账号;如果目标账号不存在,或管理账号队列已耗尽,相关业务请求会返回 `429`,当前不会附带 `Retry-After` 头。若账号存在但登录/刷新失败,则返回对应的 `401` 或上游错误。 +**可选请求头**:`X-Ds2-Target-Account: ` — 指定使用某个托管账号;如果目标账号不存在,或管理账号队列已耗尽,相关业务请求会返回 `429`,当前不会附带 `Retry-After` 头。若账号存在但登录/刷新失败,则返回对应的 `401` 或上游错误。未指定目标账号时,托管账号模式的 completion 空输出 429 会先尝试切到另一个可用账号 fresh retry 一次;指定目标账号或无其他可用账号时不会切号。 Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=` 作为凭据来源。 ### Admin 接口(`/admin/*`) @@ -172,12 +172,12 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=` | GET | `/admin/chat-history/{id}` | Admin | 查看单条服务器端对话记录 | | DELETE | `/admin/chat-history/{id}` | Admin | 删除单条服务器端对话记录 | | PUT | `/admin/chat-history/settings` | Admin | 更新对话记录保留条数 | - -服务器端记录本质上是 DeepSeek 上游响应归档:OpenAI Chat、OpenAI Responses、Claude Messages、Gemini GenerateContent 等直连 DeepSeek 的生成接口,在收到上游响应后会于各协议回译/裁剪前写入记录;列表按请求创建时间倒序展示,流式请求会在生成过程中持续刷新状态与详情。WebUI「API 测试」发出的请求也会进入该记录。 | GET | `/admin/version` | Admin | 查询当前版本与最新 Release | OpenAI `/v1/*` 仍是规范路径。对于只配置 DS2API 根地址的客户端,同一套 OpenAI handler 也通过根路径快捷路由暴露:`/models`、`/models/{id}`、`/chat/completions`、`/responses`、`/responses/{response_id}`、`/embeddings`、`/files`、`/files/{file_id}`。 +服务器端记录本质上是 DeepSeek 上游响应归档:OpenAI Chat、OpenAI Responses、Claude Messages、Gemini GenerateContent 等直连 DeepSeek 的生成接口,在收到上游响应后会于各协议回译/裁剪前写入记录;列表按请求创建时间倒序展示,流式请求会在生成过程中持续刷新状态与详情。WebUI「API 测试」发出的请求也会进入该记录。 + --- ## 健康检查 @@ -231,16 +231,15 @@ OpenAI `/v1/*` 仍是规范路径。对于只配置 DS2API 根地址的客户端 1. 先匹配 DeepSeek 原生模型。 2. 再匹配 `model_aliases` 精确映射。 3. 如果请求名以 `-nothinking` 结尾,则在最终解析出的规范模型上追加对应的无思考变体。 -4. 未命中时按模型家族规则回退(如 `o*`、`gpt-*`、`claude-*`)。 -5. 仍未命中则返回 `invalid_request_error`。 +4. 仍未命中则返回 `invalid_request_error`。当前不会按未知模型家族做启发式兜底;需要新增兼容名时请通过 `model_aliases` 明确配置。 当前内置默认 alias 来自 `internal/config/models.go`,`config.model_aliases` 会在运行时覆盖或补充同名映射。节选: - OpenAI / Codex:`gpt-4o`、`gpt-4.1`、`gpt-5`、`gpt-5.5`、`gpt-5-codex`、`gpt-5.3-codex`、`codex-mini-latest` - OpenAI reasoning:`o1`、`o3`、`o3-deep-research`、`o4-mini` - Claude:`claude-opus-4-6`、`claude-sonnet-4-6`、`claude-haiku-4-5`、`claude-3-5-sonnet-latest` -- Gemini:`gemini-2.5-pro`、`gemini-2.5-flash`、`gemini-pro-vision` -- 其他兼容族:`llama-*`、`qwen-*`、`mistral-*`、`command-*` 会按家族启发式回退 +- Gemini:`gemini-2.5-pro`、`gemini-2.5-flash`、`gemini-3.1-pro`、`gemini-3-pro`、`gemini-3-flash`、`gemini-3.1-flash-lite`、`gemini-pro-vision` +- 其他内置精确 alias:`llama-3.1-70b-instruct`、`qwen-max` 上述 alias 若在请求名后追加 `-nothinking` 后缀,也会映射到对应的强制关闭 thinking 版本。 当前视觉能力仅对应 `deepseek-v4-vision` / `deepseek-v4-vision-nothinking`,不会解析出独立的 `vision-search` 变体。 @@ -249,7 +248,7 @@ OpenAI `/v1/*` 仍是规范路径。对于只配置 DS2API 根地址的客户端 ### `POST /v1/chat/completions` -> 路径说明:除规范路径 `/v1/chat/completions` 外,也支持根路径快捷别名 `/chat/completions`;在 Vercel Runtime 上,这两个路径的 `stream=true` 请求都会进入 Node 流式桥接逻辑,非流式仍走 Go 主链路。 +> 路径说明:除规范路径 `/v1/chat/completions` 外,也支持根路径快捷别名 `/chat/completions`。在 Vercel Runtime 上,`vercel.json` 仅把规范路径 `/v1/chat/completions` 重写到 Node 流式桥接;根路径快捷别名仍走 Go 主链路。因此 Vercel 上需要实时流式时请使用 `/v1/chat/completions`。 **请求头**: @@ -262,7 +261,7 @@ Content-Type: application/json | 字段 | 类型 | 必填 | 说明 | | --- | --- | --- | --- | -| `model` | string | ✅ | 支持 DeepSeek 原生模型 + 常见 alias(如 `gpt-5.5`、`gpt-5.4-mini`、`gpt-5.3-codex`、`o3`、`claude-opus-4-6`、`claude-sonnet-4-6`、`gemini-2.5-pro`、`gemini-2.5-flash` 等);若模型名带 `-nothinking` 后缀,则强制关闭 thinking / reasoning | +| `model` | string | ✅ | 支持 DeepSeek 原生模型 + 常见 alias(如 `gpt-5.5`、`gpt-5.4-mini`、`gpt-5.3-codex`、`o3`、`claude-opus-4-6`、`claude-sonnet-4-6`、`gemini-2.5-pro`、`gemini-3.1-pro`、`gemini-3-flash` 等);若模型名带 `-nothinking` 后缀,则强制关闭 thinking / reasoning | | `messages` | array | ✅ | OpenAI 风格消息数组 | | `stream` | boolean | ❌ | 默认 `false` | | `tools` | array | ❌ | Function Calling 定义 | @@ -358,7 +357,8 @@ data: [DONE] 补充说明: - **非代码块上下文**下,工具负载即使与普通文本混合,也会按特征识别并产出可执行 tool call(前后普通文本仍可透传)。 -- 解析器当前把 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、DSML wrapper 别名(``、`<|tool_calls>`、`<|tool_calls>`)、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`)、`DSML` 与工具标签名黏连的常见 typo(如 `` / `` / ``)和旧式 canonical XML 工具块(`` / `` / ``)作为可执行调用解析;DSML 会先归一化回 XML,内部仍以 XML 解析语义为准。旧式 ``、``、``、``、``、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理。 +- 解析器当前把推荐 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、半角 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、DSML wrapper 别名(``、`<|tool_calls>`、`<|tool_calls>`)、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`)、`DSML` 与工具标签名黏连的常见 typo(如 `` / `` / ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉`)、任意协议前缀壳(如 ``)和旧式 canonical XML 工具块(`` / `` / ``)作为可执行调用解析;这些前缀壳会先归一化回 XML,内部仍以 XML 解析语义为准。旧式 ``、``、``、``、``、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理;完整但 malformed 的 wrapper 同样会作为普通文本释放。 +- 解析层不会因为参数值为空而丢弃工具调用;显式空字符串或纯空白参数会按空字符串进入结构化 `tool_calls`。Prompt 会要求模型不要主动输出空参数,缺参/空命令的拒绝应由工具执行侧或客户端 schema 校验负责。 - 当最终可见正文为空但思维链里包含可执行工具调用时,Chat / Responses 会在收尾阶段补发标准 OpenAI `tool_calls` / `function_call` 输出;如果客户端未开启 thinking / reasoning,该思维链只用于检测,不会作为可见正文或 `reasoning_content` 暴露。 - Markdown fenced code block(例如 ```json ... ```)中的 `tool_calls` 仅视为示例文本,不会被执行。 @@ -775,6 +775,7 @@ data: {"type":"message_stop"} - `responses` / `embeddings` - `auto_delete`(`mode`:`none` / `single` / `all`;旧配置 `sessions=true` 仍按 `all` 处理) - `current_input_file`(`enabled` 默认返回 `true`、`min_chars`) +- `thinking_injection`(`enabled` 默认返回 `true`、`prompt`、`default_prompt`) - `model_aliases` - `env_backed`、`needs_vercel_sync` - `toolcall` 策略已固定为 `feature_match + high`,不再通过 settings 返回或修改 @@ -789,6 +790,7 @@ data: {"type":"message_stop"} - `embeddings.provider` - `auto_delete.mode` - `current_input_file.enabled` / `current_input_file.min_chars` +- `thinking_injection.enabled` / `thinking_injection.prompt` - `model_aliases` - `toolcall` 策略已固定,不再作为可写入字段 @@ -1271,7 +1273,7 @@ Gemini 路由使用 Google 风格错误结构: | 状态码 | 说明 | | --- | --- | | `401` | 鉴权失败(key/token 无效,或 Admin JWT 过期) | -| `429` | 请求过多(超出并发上限 + 等待队列;当前不附带 `Retry-After` 头) | +| `429` | 请求过多(超出并发上限 + 等待队列,或上游账号 thinking-only 后仍无可见输出;托管账号模式会先尝试一次切号 fresh retry;当前不附带 `Retry-After` 头) | | `503` | 模型不可用或上游服务异常 | --- diff --git a/README.MD b/README.MD index 3edf3b8..ae5eafc 100644 --- a/README.MD +++ b/README.MD @@ -134,7 +134,8 @@ flowchart LR | OpenAI 兼容 | `GET /v1/models`、`GET /v1/models/{id}`、`POST /v1/chat/completions`、`POST /v1/responses`、`GET /v1/responses/{response_id}`、`POST /v1/embeddings`、`POST /v1/files`、`GET /v1/files/{file_id}` | | Claude 兼容 | `GET /anthropic/v1/models`、`POST /anthropic/v1/messages`、`POST /anthropic/v1/messages/count_tokens`(及快捷路径 `/v1/messages`、`/messages`) | | Gemini 兼容 | `POST /v1beta/models/{model}:generateContent`、`POST /v1beta/models/{model}:streamGenerateContent`(及 `/v1/models/{model}:*` 路径) | -| 统一 CORS 兼容 | `/v1/*`、`/anthropic/*`、`/v1beta/models/*`、`/admin/*` 统一走同一套 CORS 策略;Vercel 上 `/v1/chat/completions` 的 Node Runtime 也对齐相同放行规则,尽量减少第三方预检请求头限制 | +| Ollama 兼容 | `GET /api/version`、`GET /api/tags`、`POST /api/show` | +| 统一 CORS 兼容 | `/v1/*`、`/anthropic/*`、`/v1beta/models/*`、`/api/*`、`/admin/*` 统一走同一套 CORS 策略;Vercel 上 `/v1/chat/completions` 的 Node Runtime 也对齐相同放行规则,尽量减少第三方预检请求头限制 | | 多账号轮询 | 自动 token 刷新、邮箱/手机号双登录方式 | | 并发队列控制 | 每账号 in-flight 上限 + 等待队列,动态计算建议并发值 | | DeepSeek PoW | 纯 Go 高性能实现(DeepSeekHashV1),毫秒级响应 | @@ -195,11 +196,11 @@ OpenAI `/v1/*` 仍是推荐的规范路径;同时支持 `/models`、`/chat/com - `ANTHROPIC_BASE_URL` 推荐直接指向 DS2API 根地址(例如 `http://127.0.0.1:5001`),Claude Code 会请求 `/v1/messages?beta=true`。 - `ANTHROPIC_API_KEY` 需要与 `config.json` 中 `keys` 一致;建议同时保留常规 key 与 `sk-ant-*` 形态 key,兼容不同客户端校验习惯。 - 若系统设置了代理,建议对 DS2API 地址配置 `NO_PROXY=127.0.0.1,localhost,<你的主机IP>`,避免本地回环请求被代理拦截。 -- 如遇“工具调用输出成文本、未执行”问题,请优先检查模型输出是否为推荐的 DSML 工具块:`<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`。兼容层也接受旧式 canonical XML:`...`;旧式 `` / `` / `` / ``、``、`tool_use` 或纯 JSON `tool_calls` 片段不会执行。 +- 如遇“工具调用输出成文本、未执行”问题,请优先检查模型输出是否为推荐的全角分隔符 DSML 工具块:`<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`。兼容层也接受半角 DSML 与旧式 canonical XML:`...`;旧式 `` / `` / `` / ``、``、`tool_use` 或纯 JSON `tool_calls` 片段不会执行,会作为普通文本处理。 ### Gemini 接口 -Gemini 适配器将模型名通过 `model_aliases` 或内置规则映射到 DeepSeek 原生模型,支持 `generateContent` 和 `streamGenerateContent` 两种调用方式,并完整支持 Tool Calling(`functionDeclarations` → `functionCall` 输出)。若 Gemini 模型名带 `-nothinking` 后缀,例如 `gemini-2.5-pro-nothinking`,会映射到对应的强制关闭思考模型。 +Gemini 适配器将模型名通过 `model_aliases` 或内置精确 alias 映射到 DeepSeek 原生模型(覆盖 `gemini-2.5-*`、`gemini-3*`、`gemini-pro-vision` 等常见名称),支持 `generateContent` 和 `streamGenerateContent` 两种调用方式,并完整支持 Tool Calling(`functionDeclarations` → `functionCall` 输出)。若 Gemini 模型名带 `-nothinking` 后缀,例如 `gemini-2.5-pro-nothinking`,会映射到对应的强制关闭思考模型。 ## 快速开始 @@ -295,13 +296,13 @@ cp config.example.json config.json base64 < config.json | tr -d '\n' ``` -> **流式说明**:OpenAI Chat 流式在 Vercel 上会由 `api/chat-stream.js`(Node Runtime)承接,支持规范路径 `/v1/chat/completions` 与根路径快捷别名 `/chat/completions`。鉴权、账号选择、会话/PoW 准备仍由 Go 内部 prepare 接口完成;流式响应(含 `tools`)在 Node 侧执行与 Go 对齐的输出组装与防泄漏处理。虽然这里只有 OpenAI chat 流式走 Node,但 CORS 放行策略仍与 Go 主路由保持一致,统一覆盖第三方客户端预检场景。 +> **流式说明**:OpenAI Chat 流式在 Vercel 上会由 `api/chat-stream.js`(Node Runtime)承接,但 `vercel.json` 只把规范路径 `/v1/chat/completions` 重写到 Node;根路径快捷别名 `/chat/completions` 仍走 Go 主链路。鉴权、账号选择、会话/PoW 准备仍由 Go 内部 prepare 接口完成;流式响应(含 `tools`)在 Node 侧执行与 Go 对齐的输出组装与防泄漏处理。Vercel 上需要实时流式时请使用 `/v1/chat/completions`。 详细部署说明请参阅 [部署指南](docs/DEPLOY.md)。 ### 方式四:本地源码运行 -**前置要求**:Go 1.26+,Node.js `20.19+` 或 `22.12+`(仅在需要构建 WebUI 时);同时确保 `npm` 可用,建议 `npm 10+` +**前置要求**:Go 1.26+,Node.js `20.19+` 或 `22.12+`(仅在需要构建 WebUI 时;CI / Docker 构建使用 Node 24);同时确保 `npm` 可用,建议 `npm 10+` ```bash # 1. 克隆仓库 @@ -320,7 +321,7 @@ go run ./cmd/ds2api 服务实际绑定:`0.0.0.0:5001`,因此同一局域网设备通常也可以通过你的内网 IP 访问。 -> **WebUI 自动构建**:本地首次启动时,若 `static/admin` 不存在,会自动尝试执行 `npm ci`(仅在缺少依赖时)和 `npm run build -- --outDir static/admin --emptyOutDir`(需要本机有 Node.js 和 npm)。你也可以手动构建:`./scripts/build-webui.sh` +> **WebUI 自动构建**:本地首次启动时,若 WebUI 静态目录不存在,会自动尝试执行 `npm ci --prefix webui`(仅在缺少依赖时)和 `npm run build --prefix webui -- --outDir static/admin --emptyOutDir`(需要本机有 Node.js 和 npm;静态目录可用 `DS2API_STATIC_ADMIN_DIR` 覆盖)。你也可以手动构建:`./scripts/build-webui.sh` ## 配置说明 @@ -350,6 +351,7 @@ go run ./cmd/ds2api 可选请求头 `X-Ds2-Target-Account`:指定使用某个托管账号(值为 email 或 mobile)。 如果指定账号不存在,或者当前管理账号队列已满,请求会返回 `429`;当前 `429` 不附带 `Retry-After` 头。若账号存在但登录/刷新失败,则返回对应的鉴权错误。 +未指定目标账号时,如果 completion 因上游 thinking-only 空输出在同账号补偿重试后仍将返回 `429 upstream_empty_output`,托管账号模式会自动切到下一个可用账号,新建 session,并用原始 payload 再 fresh retry 一次。 Gemini 路由还可以使用 `x-goog-api-key`,或在没有认证头时使用 `?key=` / `?api_key=` 作为调用方凭据。 ## 并发模型 @@ -363,6 +365,7 @@ Gemini 路由还可以使用 `x-goog-api-key`,或在没有认证头时使用 ` - 当 in-flight 槽位满时,请求进入等待队列,**不会立即 429** - 超出总承载上限后才返回 `429 Too Many Requests`,当前响应不附带 `Retry-After` +- completion 空输出类 429 会先做同账号补偿重试;托管账号模式还会在最终返回 429 前切到另一个可用账号 fresh retry 一次 - `GET /admin/queue/status` 返回实时并发状态 ## Tool Call 适配 @@ -370,12 +373,13 @@ Gemini 路由还可以使用 `x-goog-api-key`,或在没有认证头时使用 ` 当请求中带 `tools` 时,DS2API 会做防泄漏处理与结构化转译: 1. 只在**非代码块上下文**启用执行型 toolcall 识别(代码块示例默认不触发) -2. 解析层当前把 DSML 外壳视为推荐可执行调用:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容旧式 canonical XML `` → `` → ``。DSML 只是外壳别名,内部仍以 XML 解析语义为准;旧式 `` / `` / `` / ``、``、`tool_use` / antml 变体与纯 JSON `tool_calls` 片段都会按普通文本处理 +2. 解析层当前把全角分隔符 DSML 外壳视为推荐可执行调用:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容半角 DSML、旧式 canonical XML `` → `` → ``,以及若干 DSML 前缀/分隔符漂移。DSML 只是外壳别名,内部仍以 XML 解析语义为准;旧式 `` / `` / `` / ``、``、`tool_use` / antml 变体与纯 JSON `tool_calls` 片段都会按普通文本处理,完整但 malformed 的 wrapper 也会作为普通文本释放 3. `responses` 流式严格使用官方 item 生命周期事件(`response.output_item.*`、`response.content_part.*`、`response.function_call_arguments.*`) 4. `responses` 支持并执行 `tool_choice`(`auto`/`none`/`required`/强制函数);`required` 违规时非流式返回 `422`,流式返回 `response.failed` 5. 客户端请求哪种协议,就按该协议返回工具调用(OpenAI/Claude/Gemini 各自原生结构);模型侧优先约束输出规范 XML,再由兼容层转译 > 说明:当前版本 parser 层以”尽量解析成功”为优先,所有格式合法的 XML 工具调用都会通过,不做工具名 allow-list 过滤。 +> 解析层会保留显式空字符串或纯空白参数;Prompt 会要求模型不要主动输出空参数,缺参/空命令的拒绝应由工具执行侧或客户端 schema 校验负责。 > > 想评估”把工具调用封装成 XML 再输入模型”的方案,可参考:`docs/toolcall-semantics.md`。 diff --git a/README.en.md b/README.en.md index 62503b6..81ef313 100644 --- a/README.en.md +++ b/README.en.md @@ -131,7 +131,8 @@ For the full module-by-module architecture and directory responsibilities, see [ | OpenAI compatible | `GET /v1/models`, `GET /v1/models/{id}`, `POST /v1/chat/completions`, `POST /v1/responses`, `GET /v1/responses/{response_id}`, `POST /v1/embeddings`, `POST /v1/files`, `GET /v1/files/{file_id}` | | Claude compatible | `GET /anthropic/v1/models`, `POST /anthropic/v1/messages`, `POST /anthropic/v1/messages/count_tokens` (plus shortcut paths `/v1/messages`, `/messages`) | | Gemini compatible | `POST /v1beta/models/{model}:generateContent`, `POST /v1beta/models/{model}:streamGenerateContent` (plus `/v1/models/{model}:*` paths) | -| Unified CORS compatibility | `/v1/*`, `/anthropic/*`, `/v1beta/models/*`, and `/admin/*` share one CORS policy; on Vercel, the Node Runtime for `/v1/chat/completions` mirrors the same relaxed preflight behavior for third-party clients | +| Ollama compatible | `GET /api/version`, `GET /api/tags`, `POST /api/show` | +| Unified CORS compatibility | `/v1/*`, `/anthropic/*`, `/v1beta/models/*`, `/api/*`, and `/admin/*` share one CORS policy; on Vercel, the Node Runtime for `/v1/chat/completions` mirrors the same relaxed preflight behavior for third-party clients | | Multi-account rotation | Auto token refresh, email/mobile dual login | | Concurrency control | Per-account in-flight limit + waiting queue, dynamic recommended concurrency | | DeepSeek PoW | Pure Go high-performance solver (DeepSeekHashV1), ms-level response | @@ -184,11 +185,11 @@ Besides the primary aliases above, `/anthropic/v1/models` also returns Claude 4. - Set `ANTHROPIC_BASE_URL` to the DS2API root URL (for example `http://127.0.0.1:5001`). Claude Code sends requests to `/v1/messages?beta=true`. - `ANTHROPIC_API_KEY` must match an entry in `keys` from `config.json`. Keeping both a regular key and an `sk-ant-*` style key improves client compatibility. - If your environment has proxy variables, set `NO_PROXY=127.0.0.1,localhost,` for DS2API to avoid proxy interception of local traffic. -- If tool calls are rendered as plain text and not executed, first verify the model output uses the recommended DSML block: `<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`. DS2API also accepts legacy canonical XML: `...`; legacy `` / `` / `` / ``, ``, `tool_use`, or standalone JSON `tool_calls` are not executed. +- If tool calls are rendered as plain text and not executed, first verify the model output uses the recommended fullwidth-separator DSML block: `<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`. DS2API also accepts halfwidth DSML and legacy canonical XML: `...`; legacy `` / `` / `` / ``, ``, `tool_use`, or standalone JSON `tool_calls` are not executed and stay plain text. ### Gemini Endpoint -The Gemini adapter maps model names to DeepSeek native models via `model_aliases` or built-in heuristics, supporting both `generateContent` and `streamGenerateContent` call patterns with full Tool Calling support (`functionDeclarations` → `functionCall` output). +The Gemini adapter maps model names to DeepSeek native models via `model_aliases` or exact built-in aliases (covering common `gemini-2.5-*`, `gemini-3*`, and `gemini-pro-vision` names), supporting both `generateContent` and `streamGenerateContent` call patterns with full Tool Calling support (`functionDeclarations` → `functionCall` output). If the Gemini model name has a `-nothinking` suffix, such as `gemini-2.5-pro-nothinking`, it maps to the corresponding forced no-thinking model. ## Quick Start @@ -283,13 +284,13 @@ Recommended: convert `config.json` to Base64 locally, then paste into `DS2API_CO base64 < config.json | tr -d '\n' ``` -> **Streaming note**: OpenAI Chat streaming on Vercel is routed to `api/chat-stream.js` (Node Runtime), with both the canonical `/v1/chat/completions` path and the root shortcut `/chat/completions` supported. Auth, account selection, and session/PoW preparation are still handled by the Go internal prepare endpoint; streaming output (including `tools`) is assembled on Node with Go-aligned anti-leak handling. This is the only interface family currently routed through Node, and its CORS allow behavior is kept aligned with the Go router so third-party preflight handling stays unified. +> **Streaming note**: OpenAI Chat streaming on Vercel is routed to `api/chat-stream.js` (Node Runtime), but `vercel.json` rewrites only the canonical `/v1/chat/completions` path to Node; the root shortcut `/chat/completions` stays on the Go main path. Auth, account selection, and session/PoW preparation are still handled by the Go internal prepare endpoint; streaming output (including `tools`) is assembled on Node with Go-aligned anti-leak handling. Use `/v1/chat/completions` on Vercel when real-time streaming is required. For detailed deployment instructions, see the [Deployment Guide](docs/DEPLOY.en.md). ### Option 4: Local Run -**Prerequisites**: Go 1.26+, Node.js `20.19+` or `22.12+` (only if building WebUI locally) +**Prerequisites**: Go 1.26+, Node.js `20.19+` or `22.12+` (only if building WebUI locally; CI / Docker builds use Node 24), and npm available; npm 10+ is recommended ```bash # 1. Clone @@ -308,7 +309,7 @@ Default local URL: `http://127.0.0.1:5001` The server actually binds to `0.0.0.0:5001`, so devices on the same LAN can usually reach it through your private IP as well. -> **WebUI auto-build**: On first local startup, if `static/admin` is missing, DS2API will auto-run `npm ci` (only when dependencies are missing) and `npm run build -- --outDir static/admin --emptyOutDir` (requires Node.js). You can also build manually: `./scripts/build-webui.sh` +> **WebUI auto-build**: On first local startup, if the WebUI static directory is missing, DS2API auto-runs `npm ci --prefix webui` (only when dependencies are missing) and `npm run build --prefix webui -- --outDir static/admin --emptyOutDir` (requires Node.js; `DS2API_STATIC_ADMIN_DIR` can override the static directory). You can also build manually: `./scripts/build-webui.sh` ## Configuration @@ -336,6 +337,7 @@ For business endpoints (`/v1/*`, `/anthropic/*`, Gemini routes), DS2API supports | **Direct token** | If the token is not in `config.keys`, DS2API treats it as a DeepSeek token directly | Optional header `X-Ds2-Target-Account`: Pin a specific managed account (value is email or mobile). +When no target account is pinned, if a completion would end as `429 upstream_empty_output` after the same-account empty-output retry, managed-account mode switches to the next available account, creates a fresh session, and retries the original payload once. Gemini routes also accept `x-goog-api-key`, or `?key=` / `?api_key=` when no auth header is present. ## Concurrency Model @@ -348,7 +350,8 @@ Queue limit = DS2API_ACCOUNT_MAX_QUEUE (default = recommended concurrency) ``` - When inflight slots are full, requests enter a waiting queue — **no immediate 429** -- 429 is returned only when total load exceeds inflight + queue capacity +- 429 is returned only when total load exceeds inflight + queue capacity; current responses do not include `Retry-After` +- Completion empty-output 429s first get the same-account compensation retry; managed-account mode also tries one alternate-account fresh retry before returning the final 429 - `GET /admin/queue/status` returns real-time concurrency state ## Tool Call Adaptation @@ -356,12 +359,13 @@ Queue limit = DS2API_ACCOUNT_MAX_QUEUE (default = recommended concurrency) When `tools` is present in the request, DS2API performs anti-leak handling: 1. Toolcall feature matching is enabled only in **non-code-block context** (fenced examples are ignored) -2. The parser now treats the DSML shell as the recommended executable tool-calling syntax: `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; it also accepts legacy canonical XML `` → `` → ``. DSML is a shell alias and internal parsing remains XML-based; legacy `` / `` / `` / ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text +2. The parser treats the fullwidth-separator DSML shell as the recommended executable tool-calling syntax: `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; it also accepts halfwidth DSML, legacy canonical XML `` → `` → ``, plus common DSML prefix/separator drift. DSML is a shell alias and internal parsing remains XML-based; legacy `` / `` / `` / ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text, and complete but malformed wrappers are released as plain text too 3. `responses` streaming strictly uses official item lifecycle events (`response.output_item.*`, `response.content_part.*`, `response.function_call_arguments.*`) 4. `responses` supports and enforces `tool_choice` (`auto`/`none`/`required`/forced function); `required` violations return `422` for non-stream and `response.failed` for stream 5. The output protocol follows the client request (OpenAI / Claude / Gemini native shapes); model-side prompting can prefer XML, and the compatibility layer handles the protocol-specific translation > Note: the current parser still prioritizes “parse successfully whenever possible”; hard allow-list rejection for undeclared tool names is not enabled yet. +> Explicit empty strings or whitespace-only parameters are preserved by the parser; prompting tells the model not to emit blank parameters, and missing/empty argument rejection belongs in the tool executor or client schema validation. ## Local Dev Packet Capture diff --git a/VERSION b/VERSION index b98ff4c..a84947d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.4.6 +4.5.0 diff --git a/docs/ARCHITECTURE.en.md b/docs/ARCHITECTURE.en.md index aa93142..22fb904 100644 --- a/docs/ARCHITECTURE.en.md +++ b/docs/ARCHITECTURE.en.md @@ -27,7 +27,7 @@ ds2api/ │ ├── claudeconv/ # Claude message conversion helpers │ ├── compat/ # Compatibility and regression helpers │ ├── assistantturn/ # Upstream output to canonical assistant turn / stream event semantics -│ ├── completionruntime/ # Shared Go DeepSeek completion startup, non-stream collection, and retry +│ ├── completionruntime/ # Shared Go DeepSeek completion startup, collection, empty-output/account-switch retry │ ├── config/ # Config loading/validation/hot reload │ ├── deepseek/ # DeepSeek upstream client/protocol/transport │ │ ├── client/ # Login/session/completion/upload/delete calls @@ -41,6 +41,7 @@ ds2api/ │ │ ├── admin/ # Admin API root assembly and resource packages │ │ ├── claude/ # Claude HTTP protocol adapter │ │ ├── gemini/ # Gemini HTTP protocol adapter +│ │ ├── ollama/ # Ollama-compatible model/capability query endpoints │ │ ├── openai/ # OpenAI HTTP surface │ │ │ ├── chat/ # Chat Completions execution entrypoint │ │ │ ├── responses/ # Responses API and response store @@ -57,6 +58,7 @@ ds2api/ │ ├── prompt/ # Prompt composition │ ├── promptcompat/ # API request -> DeepSeek web-chat plain-text compatibility │ ├── rawsample/ # Raw sample read/write and management +│ ├── responsehistory/ # DeepSeek upstream response archive and session snapshots │ ├── server/ # Router and middleware assembly │ │ └── data/ # Router/runtime helper data │ ├── sse/ # SSE parsing utilities @@ -188,10 +190,11 @@ flowchart LR - `internal/server`: router tree + middlewares (health, protocol routes, Admin/WebUI). - `internal/httpapi/openai/*`: OpenAI HTTP surface split into chat, responses, files, embeddings, history, and shared packages; chat/responses share the promptcompat, stream, and toolcall semantics. - `internal/httpapi/{claude,gemini}`: protocol adapters that normalize into the same prompt compatibility semantics; normal direct paths must share DeepSeek session/PoW/completion execution through `completionruntime`, while `translatorcliproxy` is reserved for Vercel prepare/release, missing-backend fallback, and regression tests. +- `internal/httpapi/ollama`: Ollama-compatible model list and capability query endpoints. - `internal/httpapi/requestbody`: shared HTTP body reading, JSON pre-validation, and UTF-8 error helpers across protocol adapters. - `internal/promptcompat`: compatibility core for turning OpenAI/Claude/Gemini requests into DeepSeek web-chat plain-text context. - `internal/assistantturn`: Go output-side canonical semantics, converting DeepSeek SSE collection results and stream finalization state into assistant turns and centralizing thinking, tool call, citation, usage, stop/error behavior. -- `internal/completionruntime`: shared Go completion execution helpers for DeepSeek session/PoW/call startup, non-stream collection, and empty-output retry; streaming paths use it to start upstream requests, continue to use `internal/stream` for real-time consumption, and use `assistantturn` during finalization. +- `internal/completionruntime`: shared Go completion execution helpers for DeepSeek session/PoW/call startup, non-stream collection, empty-output retry, and one managed-account fresh retry before a final 429; streaming paths use it to start upstream requests, continue to use `internal/stream` for real-time consumption, and use `assistantturn` during finalization. - `internal/translatorcliproxy`: bridge compatibility layer for Claude/Gemini and OpenAI shape translation; it is not the main business protocol conversion center. - `internal/deepseek/{client,protocol,transport}`: upstream requests, sessions, PoW adaptation, protocol constants, and transport details. - `internal/js/chat-stream` + `api/chat-stream.js`: Vercel Node streaming bridge; Go prepare/release owns auth, account lease, and completion payload assembly, while Node relays real-time SSE with Go-aligned finalization and tool sieve semantics. @@ -199,6 +202,7 @@ flowchart LR - `internal/toolcall` + `internal/toolstream`: DSML shell compatibility plus canonical XML tool-call parsing and anti-leak sieve; DSML is normalized back to XML at the entrypoint, and internal parsing remains XML-based. - `internal/httpapi/admin/*`: Admin API root assembly plus auth/accounts/config/settings/proxies/rawsamples/vercel/history/devcapture/version resource packages. - `internal/chathistory`: server-side conversation history persistence, pagination, detail lookup, and retention policy. +- `internal/responsehistory`: DeepSeek upstream response archive, saving assistant text, thinking, raw tool-call fragments, and streaming detail before protocol rendering/trimming. - `internal/config`: config loading/validation + runtime settings hot-reload. - `internal/account`: managed account pool, inflight slots, waiting queue. - `internal/textclean`: text cleanup helpers, e.g. stripping `[reference: N]` markers. diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index a4da59e..1cc3b9f 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -27,7 +27,7 @@ ds2api/ │ ├── claudeconv/ # Claude 消息格式转换工具 │ ├── compat/ # 兼容性辅助与回归支持 │ ├── assistantturn/ # 上游输出到统一 assistant turn / stream event 的语义层 -│ ├── completionruntime/ # Go 主路径共享 DeepSeek completion 启动、非流式收集与 retry +│ ├── completionruntime/ # Go 主路径共享 DeepSeek completion 启动、收集、空输出/切号 retry │ ├── config/ # 配置加载、校验、热更新 │ ├── deepseek/ # DeepSeek 上游 client/protocol/transport │ │ ├── client/ # 登录、会话、completion、上传/删除等上游调用 @@ -41,6 +41,7 @@ ds2api/ │ │ ├── admin/ # Admin API 根装配与资源子包 │ │ ├── claude/ # Claude HTTP 协议适配 │ │ ├── gemini/ # Gemini HTTP 协议适配 +│ │ ├── ollama/ # Ollama 兼容模型/能力查询接口 │ │ ├── openai/ # OpenAI HTTP surface │ │ │ ├── chat/ # Chat Completions 执行入口 │ │ │ ├── responses/ # Responses API 与 response store @@ -57,6 +58,7 @@ ds2api/ │ ├── prompt/ # Prompt 组装 │ ├── promptcompat/ # API 请求到 DeepSeek 网页纯文本上下文兼容层 │ ├── rawsample/ # raw sample 读写与管理 +│ ├── responsehistory/ # DeepSeek 上游响应归档与会话快照 │ ├── server/ # 路由与中间件装配 │ │ └── data/ # 路由/运行时辅助数据 │ ├── sse/ # SSE 解析工具 @@ -188,10 +190,11 @@ flowchart LR - `internal/server`:路由树和中间件挂载(健康检查、协议入口、Admin/WebUI)。 - `internal/httpapi/openai/*`:OpenAI HTTP surface,按 chat、responses、files、embeddings、history、shared 拆分;chat/responses 共享 promptcompat、stream、toolcall 等核心语义。 - `internal/httpapi/{claude,gemini}`:协议输入输出适配,归一到同一套 prompt compatibility 语义;正常直连路径必须通过 `completionruntime` 共享 DeepSeek session/PoW/completion 调用,`translatorcliproxy` 仅保留给 Vercel prepare/release、后端缺失 fallback 和回归测试。 +- `internal/httpapi/ollama`:Ollama 兼容的模型列表与能力查询入口。 - `internal/httpapi/requestbody`:跨协议复用的请求体读取、JSON 解码前置校验与 UTF-8 错误处理辅助。 - `internal/promptcompat`:OpenAI/Claude/Gemini 请求到 DeepSeek 网页纯文本上下文的兼容内核。 - `internal/assistantturn`:Go 输出侧统一语义层,把 DeepSeek SSE 收集结果和流式收尾状态归一成 assistant turn,集中处理 thinking、tool call、citation、usage、stop/error 语义。 -- `internal/completionruntime`:Go surface 共享的 completion 执行辅助,负责 DeepSeek session/PoW/call 启动、非流式 collect 和 empty-output retry;流式路径复用它启动上游请求,继续用 `internal/stream` 做实时消费,并在最终收尾阶段接入 `assistantturn`。 +- `internal/completionruntime`:Go surface 共享的 completion 执行辅助,负责 DeepSeek session/PoW/call 启动、非流式 collect、empty-output retry,以及托管账号在最终 429 前的一次切号 fresh retry;流式路径复用它启动上游请求,继续用 `internal/stream` 做实时消费,并在最终收尾阶段接入 `assistantturn`。 - `internal/translatorcliproxy`:Claude/Gemini 与 OpenAI 结构互转的桥接兼容层,不作为主业务协议转换中心。 - `internal/deepseek/{client,protocol,transport}`:上游请求、会话、PoW 适配、协议常量与传输层。 - `internal/js/chat-stream` + `api/chat-stream.js`:Vercel Node 流式桥;Go prepare/release 管理鉴权、账号租约和 completion payload,Node 侧负责实时 SSE 转发并保持 Go 对齐的终结态和 tool sieve 语义。 @@ -199,6 +202,7 @@ flowchart LR - `internal/toolcall` + `internal/toolstream`:DSML 外壳兼容与 canonical XML 工具调用解析、防泄漏筛分;DSML 会在入口归一化回 XML,内部仍按 XML 语义解析。 - `internal/httpapi/admin/*`:Admin API 根装配与 auth/accounts/config/settings/proxies/rawsamples/vercel/history/devcapture/version 等资源子包。 - `internal/chathistory`:服务器端对话记录持久化、分页、单条详情和保留策略。 +- `internal/responsehistory`:DeepSeek 上游响应归档,会在协议回译/裁剪前保存 assistant text、thinking、tool-call 原始片段和流式详情。 - `internal/config`:配置加载、校验、运行时 settings 热更新。 - `internal/account`:托管账号池、并发槽位、等待队列。 - `internal/textclean`:文本清洗,移除 `[reference: N]` 标记等噪声。 diff --git a/docs/CONTRIBUTING.en.md b/docs/CONTRIBUTING.en.md index 94cade1..e0e9c18 100644 --- a/docs/CONTRIBUTING.en.md +++ b/docs/CONTRIBUTING.en.md @@ -9,8 +9,8 @@ Thanks for your interest in contributing to DS2API! ### Prerequisites - Go 1.26+ -- Node.js `20.19+` or `22.12+` (for WebUI development) -- npm (bundled with Node.js) +- Node.js `20.19+` or `22.12+` (for WebUI development; CI / Docker builds use Node 24) +- npm (bundled with Node.js; 10+ recommended) ### Backend Development diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 69424d4..9a32868 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -9,8 +9,8 @@ ### 前置要求 - Go 1.26+ -- Node.js `20.19+` 或 `22.12+`(WebUI 开发时) -- npm(随 Node.js 提供) +- Node.js `20.19+` 或 `22.12+`(WebUI 开发时;CI / Docker 构建使用 Node 24) +- npm(随 Node.js 提供,建议 10+) ### 后端开发 diff --git a/docs/DEPLOY.en.md b/docs/DEPLOY.en.md index 94ba958..9d658d1 100644 --- a/docs/DEPLOY.en.md +++ b/docs/DEPLOY.en.md @@ -39,8 +39,8 @@ Recommended order when choosing a deployment method: | Dependency | Minimum Version | Notes | | --- | --- | --- | | Go | 1.26+ | Build backend | -| Node.js | `20.19+` or `22.12+` | Only needed to build WebUI locally | -| npm | Bundled with Node.js | Install WebUI dependencies | +| Node.js | `20.19+` or `22.12+` (CI / Docker builds use Node 24) | Only needed to build WebUI locally | +| npm | Bundled with Node.js; 10+ recommended | Install WebUI dependencies | Config source (choose one): @@ -299,6 +299,8 @@ VERCEL_TEAM_ID=team_xxxxxxxxxxxx # optional for personal accounts | `DS2API_VERCEL_INTERNAL_SECRET` | Hybrid streaming internal auth | Falls back to `DS2API_ADMIN_KEY` | | `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | Stream lease TTL | `900` | | `DS2API_RAW_STREAM_SAMPLE_ROOT` | Raw stream sample root for saving/reading samples | `tests/raw_stream_samples` | +| `DS2API_STATIC_ADMIN_DIR` | WebUI static asset directory | `static/admin` | +| `DS2API_AUTO_BUILD_WEBUI` | Whether local startup auto-builds missing WebUI assets (`1/true/yes/on` or `0/false/no/off`) | Enabled outside Vercel | | `VERCEL_TOKEN` | Vercel sync token | — | | `VERCEL_PROJECT_ID` | Vercel project ID | — | | `VERCEL_TEAM_ID` | Vercel team ID | — | @@ -321,7 +323,7 @@ Request ──────┐ ``` - **Go entry**: `api/index.go` (Serverless Go) -- **Stream entry**: `api/chat-stream.js` (Node Runtime for real-time SSE) +- **Stream entry**: `api/chat-stream.js` (Node Runtime for real-time SSE; `vercel.json` rewrites only the canonical `/v1/chat/completions` path here, while the root shortcut `/chat/completions` stays on the Go entry) - **Routing**: `vercel.json` - **Build command**: `npm ci --prefix webui && npm run build --prefix webui` (automatic) @@ -438,7 +440,7 @@ Default local access URL: `http://127.0.0.1:5001`; the server actually binds to ### 4.2 WebUI Build -On first local startup, if `static/admin/` is missing, DS2API will automatically attempt to build the WebUI (requires Node.js/npm; when dependencies are missing it runs `npm ci` first, then `npm run build -- --outDir static/admin --emptyOutDir`). +On first local startup, if the WebUI static directory is missing, DS2API automatically attempts to build it (requires Node.js/npm; when dependencies are missing it runs `npm ci --prefix webui`, then `npm run build --prefix webui -- --outDir --emptyOutDir`). The default static directory is `static/admin/`, and `DS2API_STATIC_ADMIN_DIR` can override it. Manual build: diff --git a/docs/DEPLOY.md b/docs/DEPLOY.md index f640210..d0f23de 100644 --- a/docs/DEPLOY.md +++ b/docs/DEPLOY.md @@ -39,8 +39,8 @@ | 依赖 | 最低版本 | 说明 | | --- | --- | --- | | Go | 1.26+ | 编译后端 | -| Node.js | `20.19+` 或 `22.12+` | 仅在需要本地构建 WebUI 时 | -| npm | 随 Node.js 提供 | 安装 WebUI 依赖 | +| Node.js | `20.19+` 或 `22.12+`(CI / Docker 构建使用 Node 24) | 仅在需要本地构建 WebUI 时 | +| npm | 随 Node.js 提供,建议 10+ | 安装 WebUI 依赖 | 配置来源(任选其一): @@ -299,6 +299,8 @@ VERCEL_TEAM_ID=team_xxxxxxxxxxxx # 个人账号可留空 | `DS2API_VERCEL_INTERNAL_SECRET` | 混合流式内部鉴权 | 回退用 `DS2API_ADMIN_KEY` | | `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | 流式 lease TTL | `900` | | `DS2API_RAW_STREAM_SAMPLE_ROOT` | raw stream 样本保存/读取根目录 | `tests/raw_stream_samples` | +| `DS2API_STATIC_ADMIN_DIR` | WebUI 静态资源目录 | `static/admin` | +| `DS2API_AUTO_BUILD_WEBUI` | 本地启动时是否自动构建缺失的 WebUI(`1/true/yes/on` 或 `0/false/no/off`) | 非 Vercel 默认开启 | | `VERCEL_TOKEN` | Vercel 同步 token | — | | `VERCEL_PROJECT_ID` | Vercel 项目 ID | — | | `VERCEL_TEAM_ID` | Vercel 团队 ID | — | @@ -331,7 +333,7 @@ api/index.go api/chat-stream.js ``` - **入口文件**:`api/index.go`(Serverless Go) -- **流式入口**:`api/chat-stream.js`(Node Runtime,保证实时 SSE) +- **流式入口**:`api/chat-stream.js`(Node Runtime,保证实时 SSE;`vercel.json` 仅把规范路径 `/v1/chat/completions` 重写到这里,根路径快捷别名 `/chat/completions` 仍走 Go 入口) - **路由重写**:`vercel.json` - **构建命令**:`npm ci --prefix webui && npm run build --prefix webui`(自动执行) @@ -448,7 +450,7 @@ go run ./cmd/ds2api ### 4.2 WebUI 构建 -本地首次启动时,若 `static/admin/` 不存在,服务会自动尝试构建 WebUI(需要 Node.js/npm;缺依赖时会先执行 `npm ci`,再执行 `npm run build -- --outDir static/admin --emptyOutDir`)。 +本地首次启动时,若 WebUI 静态目录不存在,服务会自动尝试构建 WebUI(需要 Node.js/npm;缺依赖时会先执行 `npm ci --prefix webui`,再执行 `npm run build --prefix webui -- --outDir <静态目录> --emptyOutDir`)。默认静态目录为 `static/admin/`,可用 `DS2API_STATIC_ADMIN_DIR` 覆盖。 你也可以手动构建: diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md index dee5b07..1556547 100644 --- a/docs/DEVELOPMENT.md +++ b/docs/DEVELOPMENT.md @@ -81,7 +81,7 @@ Tool call 问题优先跑: ```bash go test -v ./internal/toolcall ./internal/toolstream -count=1 -node --test tests/node/stream-tool-sieve.test.js tests/node/chat-stream.test.js +./tests/scripts/run-unit-node.sh ``` ## 5. 测试选择 diff --git a/docs/TESTING.md b/docs/TESTING.md index fc19f11..28b099e 100644 --- a/docs/TESTING.md +++ b/docs/TESTING.md @@ -75,7 +75,7 @@ npm run build --prefix webui 1. **Preflight 检查**: - `go test ./... -count=1`(单元测试) - `./tests/scripts/check-node-split-syntax.sh`(Node 拆分模块语法门禁) - - `node --test tests/node/stream-tool-sieve.test.js tests/node/chat-stream.test.js tests/node/js_compat_test.js` + - `node --test --test-concurrency=1 tests/node/stream-tool-sieve.test.js tests/node/chat-stream.test.js tests/node/chat-history-utils.test.js tests/node/js_compat_test.js` - `npm run build --prefix webui`(WebUI 构建检查) 2. **隔离启动**:复制 `config.json` 到临时目录,启动独立服务进程 @@ -203,10 +203,10 @@ go test ./... ```bash # 运行 tool calls 相关测试(推荐用于调试 tool call 解析问题) -go test -v -run 'TestParseToolCalls|TestRepair' ./internal/toolcall/ +go test -v -run 'TestParseToolCalls|TestProcessToolSieve|TestRepair' ./internal/toolcall ./internal/toolstream # 运行单个测试用例 -go test -v -run TestParseToolCallsWithDeepSeekHallucination ./internal/toolcall/ +go test -v -run TestParseToolCallsAllowsAllEmptyParameterPayload ./internal/toolcall # 运行 format 相关测试 go test -v ./internal/format/... @@ -221,23 +221,23 @@ go test -v ./internal/httpapi/openai/... ```bash # 1. 运行 tool calls 相关的所有测试 -go test -v -run 'TestParseToolCalls|TestRepair' ./internal/toolcall/ +go test -v -run 'TestParseToolCalls|TestProcessToolSieve|TestRepair' ./internal/toolcall ./internal/toolstream # 2. 查看测试输出中的详细调试信息 -go test -v -run TestParseToolCallsWithDeepSeekHallucination ./internal/toolcall/ 2>&1 +go test -v -run TestProcessToolSieveReleasesMalformedExecutableXMLBlock ./internal/toolstream 2>&1 # 3. 检查具体测试用例的修复效果 -# 测试用例位于 internal/toolcall/toolcalls_test.go,包含: -# - TestParseToolCallsWithDeepSeekHallucination: DeepSeek 典型幻觉输出 +# 重点测试位于 internal/toolcall/toolcalls_test.go 与 internal/toolstream/tool_sieve_xml_test.go,包含: +# - TestParseToolCallsAllowsAllEmptyParameterPayload: 空参数结构化保留 +# - TestProcessToolSieveReleasesMalformedExecutableXMLBlock: malformed XML wrapper 释放为文本 # - TestRepairLooseJSONWithNestedObjects: 嵌套对象的方括号修复 -# - TestParseToolCallsWithMixedWindowsPaths: Windows 路径处理 ``` ### 运行 Node.js 测试 ```bash # 运行 Node 测试 -node --test tests/node/stream-tool-sieve.test.js +node --test --test-concurrency=1 tests/node/stream-tool-sieve.test.js tests/node/chat-stream.test.js tests/node/chat-history-utils.test.js tests/node/js_compat_test.js # 或使用脚本 ./tests/scripts/run-unit-node.sh diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md index ee97b2a..dd8196a 100644 --- a/docs/prompt-compatibility.md +++ b/docs/prompt-compatibility.md @@ -111,8 +111,8 @@ DS2API 当前的核心思路,不是把客户端传来的 `messages`、`tools` - OpenAI Chat / Responses 原生走统一 OpenAI 标准化与 DeepSeek payload 组装;Claude / Gemini 会尽量复用 OpenAI prompt/tool 语义,其中 Gemini 直接复用 `promptcompat.BuildOpenAIPromptForAdapter`。Go 主服务新增 `completionruntime` 启动层,统一执行 DeepSeek session/PoW/call;输出侧新增 `assistantturn` 语义层:非流式 OpenAI Chat / Responses / Claude / Gemini 会把 DeepSeek SSE 收集结果先归一成同一份 assistant turn,再分别渲染成各协议原生外形;流式 OpenAI Chat / Responses / Claude / Gemini 继续保持各协议实时 SSE framing,但最终收尾的 tool fallback、schema 归一、usage、empty-output / content-filter 错误语义同样由 `assistantturn` 判定。Claude / Gemini 的常规 Go 主路径不再依赖内部 `httptest` 转发到 OpenAI handler;`translatorcliproxy` 仅保留用于 Vercel bridge、后端缺失 fallback 和回归测试,不作为主业务协议转换中心。 - Vercel Node 流式路径本轮不迁移,仍使用现有 Node bridge / stream-tool-sieve 实现;后续若变更 Node 流式语义,需要按 `assistantturn` 的 Go canonical 输出语义同步对齐。 - 客户端传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`。Gemini `generationConfig.thinkingConfig.thinkingBudget` 会翻译成同一套 thinking 开关;关闭时即使上游返回 `response/thinking_content`,兼容层也不会把它当作可见正文输出。若最终解析出的模型名带 `-nothinking` 后缀,则会无条件强制关闭 thinking,优先级高于请求体中的 `thinking` / `reasoning` / `reasoning_effort`。未显式关闭时,各 surface 会按解析后的 DeepSeek 模型默认能力开启 thinking,并用各自协议的原生形态暴露:OpenAI Chat 为 `reasoning_content`,OpenAI Responses 为 `response.reasoning.delta` / `reasoning` content,Claude 为 `thinking` block / `thinking_delta`,Gemini 为 `thought: true` part。 -- 对 OpenAI Chat / Responses 的非流式收尾,如果最终可见正文为空,兼容层会优先尝试把思维链中的独立 DSML / XML 工具块当作真实工具调用解析出来。流式链路也会在收尾阶段做同样的 fallback 检测,但不会因为思维链内容去中途拦截或改写流式输出;真正的工具识别始终基于原始上游文本,而不是基于“已经做过可见输出清洗”的版本,因此即使最终可见层会剥离完整 leaked DSML / XML `tool_calls` wrapper、并抑制全空参数或无效 wrapper 块,也不会影响真实工具调用转成结构化 `tool_calls` / `function_call`。补发结果会作为本轮 assistant 的结构化 `tool_calls` / `function_call` 输出返回,而不是塞进 `content` 文本;如果客户端没有开启 thinking / reasoning,思维链只用于检测,不会作为 `reasoning_content` 或可见正文暴露。只有正文为空且思维链里也没有可执行工具调用时,才继续按空回复错误处理。 -- OpenAI Chat / Responses 的空回复错误处理之前会默认做一次内部补偿重试:第一次上游完整结束后,如果最终可见正文为空、没有解析到工具调用、也没有已经向客户端流式发出工具调用,并且终止原因不是 `content_filter`,兼容层会复用同一个 `chat_session_id`、账号、token 与工具策略,把原始 completion `prompt` 追加固定后缀 `Previous reply had no visible output. Please regenerate the visible final answer or tool call now.` 后重新提交一次。重试遵循 DeepSeek 多轮对话协议:从第一次上游 SSE 流中提取 `response_message_id`,并在重试 payload 中设置 `parent_message_id` 为该值,使重试成为同一会话的后续轮次而非断裂的根消息;同时重新获取一次 PoW(若 PoW 获取失败则回退到原始 PoW)。该重试不会重新标准化消息、不会新建 session、不会切换账号,也不会向流式客户端插入重试标记;第二次 thinking / reasoning 会按正常增量直接接到第一次之后,并继续使用 overlap trim 去重。若第二次仍为空,终端错误码仍保持现有 `upstream_empty_output`;若任一尝试触发空 `content_filter`,不做补偿重试并保持 `content_filter` 错误。JS Vercel 运行时同样设置 `parent_message_id`,但因无法直接调用 PoW API 而复用原始 PoW。 +- 对 OpenAI Chat / Responses 的非流式收尾,如果最终可见正文为空,兼容层会优先尝试把思维链中的独立 DSML / XML 工具块当作真实工具调用解析出来。流式链路也会在收尾阶段做同样的 fallback 检测,但不会因为思维链内容去中途拦截或改写流式输出;真正的工具识别始终基于原始上游文本,而不是基于“已经做过可见输出清洗”的版本。最终可见层会剥离已经成功解析成工具调用的完整 leaked DSML / XML `tool_calls` wrapper;如果遇到完整 wrapper 但内部形态不符合可执行工具调用语义(例如 `` 这类 malformed XML 工具壳),流式 sieve 会把该块作为普通文本释放,而不是吞掉或伪造成工具调用。补发结果会作为本轮 assistant 的结构化 `tool_calls` / `function_call` 输出返回,而不是塞进 `content` 文本;如果客户端没有开启 thinking / reasoning,思维链只用于检测,不会作为 `reasoning_content` 或可见正文暴露。只有正文为空且思维链里也没有可执行工具调用时,才继续按空回复错误处理。 +- OpenAI Chat / Responses、Claude Messages、Gemini generateContent 的空回复错误处理之前会默认做一次内部补偿重试:第一次上游完整结束后,如果最终可见正文为空、没有解析到工具调用、也没有已经向客户端流式发出工具调用,并且终止原因不是 `content_filter`,兼容层会复用同一个 `chat_session_id`、账号、token 与工具策略,把原始 completion `prompt` 追加固定后缀 `Previous reply had no visible output. Please regenerate the visible final answer or tool call now.` 后重新提交一次。Go 主路径的非流式重试由 `completionruntime.ExecuteNonStreamWithRetry` 统一处理;流式重试由 `completionruntime.ExecuteStreamWithRetry` 统一处理,各协议 runtime 只负责消费/渲染本协议 SSE framing。重试遵循 DeepSeek 多轮对话协议:从第一次上游 SSE 流中提取 `response_message_id`,并在重试 payload 中设置 `parent_message_id` 为该值,使重试成为同一会话的后续轮次而非断裂的根消息;同时重新获取一次 PoW(若 PoW 获取失败则回退到原始 PoW)。该同账号重试不会重新标准化消息、不会新建 session,也不会向流式客户端插入重试标记;第二次 thinking / reasoning 会按正常增量直接接到第一次之后,并继续使用 overlap trim 去重。若同账号补偿重试后即将返回 429 `upstream_empty_output`,并且当前是托管账号模式,Go 主路径会在返回 429 前切换到下一个可用账号,新建 `chat_session_id`,使用原始 completion payload 再做一次 fresh retry;该切号重试不携带空回复 prompt 后缀,也不设置上一账号的 `parent_message_id`。如果没有可切换账号,或切号后的 fresh retry 仍没有可见正文或工具调用,则继续按原错误返回:无任何输出为 503 `upstream_unavailable`,有 reasoning 但没有可见正文或工具调用为 429 `upstream_empty_output`。若任一尝试触发空 `content_filter`,不做补偿重试并保持 `content_filter` 错误。JS Vercel 运行时同样设置 `parent_message_id`,但因无法直接调用 PoW API 而复用原始 PoW;切号 fresh retry 目前由 Go 主路径提供。 - 非流式 OpenAI Chat / Responses、Claude Messages、Gemini generateContent 在最终可见正文渲染阶段,会把 DeepSeek 搜索返回中的 `[citation:N]` / `[reference:N]` 标记替换成对应 Markdown 链接。`citation` 标记按一基序号解析;`reference` 标记只有在同一段正文中出现 `[reference:0]`(允许冒号后有空格)时才按零基序号映射,并且不会影响同段正文里的 `citation` 标记。 - 流式输出仍默认隐藏 `[citation:N]` / `[reference:N]` 这类上游内部标记,避免分片输出中泄漏尚未完成映射的引用占位符。 @@ -167,14 +167,15 @@ OpenAI Chat / Responses 在标准化后、current input file 之前,会默认 3. 再附上统一的 DSML tool call 外壳格式约束。 4. 把这整段内容并入 system prompt。 -工具调用正例现在优先示范官方 DSML 风格:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`。 -兼容层仍接受旧式纯 `` wrapper,并会容错若干 DSML 标签变体,包括短横线形式 `` / `` / ``;但提示词会优先要求模型输出官方 DSML 标签,并强调不能只输出 closing wrapper 而漏掉 opening tag。需要注意:这是“兼容 DSML 外壳,内部仍以 XML 解析语义为准”,不是原生 DSML 全链路实现;DSML 标签会在解析入口归一化回现有 XML 标签后继续走同一套 parser。 +工具调用正例现在优先示范全角分隔符 DSML 风格:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`。 +兼容层仍接受旧式纯 `` wrapper,并会容错若干 DSML 标签变体,包括短横线形式 `` / `` / ``、下划线形式 `` / `` / ``,以及其他前缀分隔形态如 `` / `` / ``;标签壳扫描还会把全角 ASCII 漂移归一化,例如 `<dSML|tool_calls>` 与全角 `>` 结束符,也会容错 CJK 尖括号和属性尾部分隔符漂移,例如 `...〈/DSM|parameter〉`。更一般地,Go / Node tag 扫描以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,标签名前任意协议前缀壳都会在解析入口剥离,例如 ``、`` 这类控制符或非 ASCII 分隔符漂移也会归一化回现有 XML 标签后继续走同一套 parser。但提示词会优先要求模型输出官方 DSML 标签,并强调不能只输出 closing wrapper 而漏掉 opening tag。需要注意:这是“兼容 DSML 外壳,内部仍以 XML 解析语义为准”,不是原生 DSML 全链路实现。解析器会先截获非代码块中的疑似工具 wrapper,完整解析失败或工具语义无效时再按普通文本放行。 数组参数使用 `...` 子节点表示;当某个参数体只包含 item 子节点时,Go / Node 解析器会把它还原成数组,避免 `questions` / `options` 这类 schema 中要求 array 的参数被误解析成 `{ "item": ... }` 对象。除此之外,解析器还会回收一些更松散的列表写法,例如 JSON array 字面量或逗号分隔的 JSON 项序列,只要它们足够明确;但 `` 仍然是首选形态。若模型把完整结构化 XML fragment 误包进 CDATA,兼容层会在保护 `content` / `command` 等原文字段的前提下,尝试把非原文字段中的 CDATA XML fragment 还原成 object / array。不过,如果 CDATA 只是单个平面的 XML/HTML 标签,例如 `urgent` 这种行内标记,兼容层会保留原始字符串,不会强行升成 object / array;只有明显表示结构的 CDATA 片段,例如多兄弟节点、嵌套子节点或 `item` 列表,才会触发结构化恢复。对 `command` / `content` 等长文本参数,CDATA 内部的 Markdown fenced DSML / XML 示例会作为原文保护;示例里的 `]]>` 或 `` 不会截断外层工具调用,解析器会继续等待围栏外真正的参数 / wrapper 结束标签。 Go 侧读取 DeepSeek SSE 时不再依赖 `bufio.Scanner` 的固定 2MiB 单行上限;当写文件类工具把很长的 `content` 放在单个 `data:` 行里返回时,非流式收集、流式解析和 auto-continue 透传都会保留完整行,再进入同一套工具解析与序列化流程。 在 assistant 最终回包阶段,如果某个 tool 参数在声明 schema 中明确是 `string`,兼容层会在把解析后的 `tool_calls` / `function_call` 重新序列化成 OpenAI / Responses / Claude 可见参数前,递归把该路径上的 number / bool / object / array 统一转成字符串;其中 object / array 会压成紧凑 JSON 字符串。这个保护只对 schema 明确声明为 string 的路径生效,不会改写本来就是 `number` / `boolean` / `object` / `array` 的参数。这样可以兼容 DeepSeek 输出了结构化片段、但上游客户端工具 schema 又严格要求字符串参数的场景(例如 `content`、`prompt`、`path`、`taskId` 等)。 工具 schema 的权威来源始终是**当前请求实际携带的 schema**,而不是同名工具在其他 runtime(Claude Code / OpenCode / Codex 等)里的默认印象。兼容层现在会同时兼容 OpenAI 风格 `function.parameters`、直接工具对象上的 `parameters` / `input_schema`、以及 camelCase 的 `inputSchema` / `schema`,并在最终输出阶段按这份请求内 schema 决定是保留 array/object,还是仅对明确声明为 `string` 的路径做字符串化。该规则同样适用于 Claude 的流式收尾和 Vercel Node 流式 tool-call formatter,避免不同 runtime 因 schema shape 差异而出现同名工具参数类型漂移。 正例中的工具名只会来自当前请求实际声明的工具;如果当前请求没有足够的已知工具形态,就省略对应的单工具、多工具或嵌套示例,避免把不可用工具名写进 prompt。 对执行类工具,脚本内容必须进入执行参数本身:`Bash` / `execute_command` 使用 `command`,`exec_command` 使用 `cmd`;不要把脚本示范成 `path` / `content` 文件写入参数。 +工具提示词也会明确要求模型按本次调用实际需要填写参数,禁止输出 placeholder、空字符串或纯空白参数;如果必填参数未知,应先追问用户或正常文字回复,而不是输出空工具壳。对 `Bash` / `execute_command` 这类 shell 工具,命令或脚本必须写入 `command` 参数。解析层仍会把空字符串参数结构化返回;是否拒绝空 `command` 由后续工具执行侧 / 客户端 schema 校验决定。 如果当前请求声明了 `Read` / `read_file` 这类读取工具,兼容层会额外注入一条 read-tool cache guard:当读取结果只表示“文件未变更 / 已在历史中 / 请引用先前上下文 / 没有正文内容”时,模型必须把它视为内容不可用,不能反复调用同一个无正文读取;应改为请求完整正文读取能力,或向用户说明需要重新提供文件内容。这个约束只缓解客户端缓存返回空内容导致的死循环,DS2API 不会也无法凭空恢复客户端本地文件正文。 OpenAI 路径实现: @@ -205,16 +206,20 @@ assistant 的 reasoning 会变成一个显式标签块: 然后再接可见回答正文。 +对最终返回给客户端的 assistant 轮次,reasoning 不会因为本轮输出了工具调用而被丢弃。OpenAI Chat 会在同一个 assistant message 上同时返回 `reasoning_content` 和 `tool_calls`;OpenAI Responses 会先返回一个包含 `reasoning` content 的 assistant message item,再返回后续 `function_call` item;Claude / Gemini 也会在各自原生 thinking / thought 结构后继续返回 tool_use / functionCall。 + +对进入后续 prompt / `DS2API_HISTORY.txt` 的历史轮次,兼容层也会把同一轮工具调用前的 reasoning 绑定到 assistant tool call 历史上。OpenAI Chat 原生 `reasoning_content + tool_calls` 会直接保留;OpenAI Responses 若以 `reasoning` message item 后接 `function_call` item 的形式回放历史,会在归一化时合并为同一个 assistant 历史块;Claude 的 `thinking` block 会绑定到后续 `tool_use`;Gemini 的 `thought: true` part 会绑定到后续 `functionCall`。最终 prompt 中的顺序固定为 `[reasoning_content]...[/reasoning_content]`,再接 DSML tool call 外壳。 + ### 7.2 历史 tool_calls 保留方式 assistant 历史 `tool_calls` 不会保留成 OpenAI 原生 JSON,而会转成 prompt 可见的 DSML 外壳: ```xml -<|DSML|tool_calls> - <|DSML|invoke name="read_file"> - <|DSML|parameter name="path"> - - +<|DSML|tool_calls> + <|DSML|invoke name="read_file"> + <|DSML|parameter name="path"> + + ``` 解析层同时兼容旧式纯 XML 形态:`` / `` / ``。两者都会先归一到现有 XML 解析语义;其他旧格式都会作为普通文本保留,不会作为可执行调用语法。 @@ -419,7 +424,8 @@ Prior conversation history and tool progress. 如果改的是 tool call 相关兼容语义,还应同时检查: - `go test ./internal/toolcall/...` -- `node --test tests/node/stream-tool-sieve.test.js` +- `go test ./internal/toolstream/...` +- `./tests/scripts/run-unit-node.sh` ## 14. 文档同步约定 diff --git a/docs/toolcall-semantics.md b/docs/toolcall-semantics.md index e84945d..f2c148f 100644 --- a/docs/toolcall-semantics.md +++ b/docs/toolcall-semantics.md @@ -6,14 +6,14 @@ ## 1) 当前可执行格式 -当前版本推荐模型输出 DSML 外壳: +当前版本推荐模型输出全角分隔符 DSML 外壳: ```xml -<|DSML|tool_calls> - <|DSML|invoke name="read_file"> - <|DSML|parameter name="path"> - - +<|DSML|tool_calls> + <|DSML|invoke name="read_file"> + <|DSML|parameter name="path"> + + ``` 兼容层仍接受旧式 canonical XML: @@ -30,17 +30,17 @@ 约束: -- 必须有 `<|DSML|tool_calls>...` 或 `...` wrapper -- 每个调用必须在 `<|DSML|invoke name="...">...` 或 `...` 内 +- 必须有 `<|DSML|tool_calls>...` 或 `...` wrapper +- 每个调用必须在 `<|DSML|invoke name="...">...` 或 `...` 内 - 工具名必须放在 `invoke` 的 `name` 属性 -- 参数必须使用 `<|DSML|parameter name="...">...` 或 `...` +- 参数必须使用 `<|DSML|parameter name="...">...` 或 `...` - 同一个工具块内不要混用 DSML 标签和旧 XML 工具标签;混搭会被视为非法工具块 兼容修复: - 如果模型漏掉 opening wrapper,但后面仍输出了一个或多个 invoke 并以 closing wrapper 收尾,Go 解析链路会在解析前补回缺失的 opening wrapper。 -- Go / Node 解析层不再枚举每一种 DSML typo。它会把工具标签名前的 `DSML`、管道符 `|` / `|`、空白、重复 leading `<` 视为可容忍的协议噪声,然后只匹配固定本地标签名 `tool_calls` / `invoke` / `parameter`。例如 ``、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、``、`<` 都会归一化;相似但非固定标签名(如 `tool_calls_extra`)仍按普通文本处理。 -- 如果模型在固定工具标签名后多输出一个尾部管道符,例如 `<|DSML|tool_calls|` / `<|DSML|invoke|` / `<|DSML|parameter|`,兼容层会把这个尾部 `|` 当作异常标签终止符并补齐缺失的 `>`;如果后面已经有 `>`,也会消费这个多余 `|` 后再归一化。 +- Go / Node 解析层不再枚举每一种 DSML typo。它以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,把标签名前的任意协议前缀壳视为可容忍噪声,并继续兼容管道符 `|` / `|`、空白、重复 leading `<`、可视控制符 `␂`、原始 STX `\x02`、非 ASCII 分隔符、CJK 尖括号 `〈` / `〉` 等漂移。例如 ``、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、``、`<`、``、``、`...〈/DSM|tool_calls〉` 都会归一化;相似但非固定标签名(如 `tool_calls_extra`)仍按普通文本处理。 +- 如果模型在固定工具标签名后多输出一个尾部管道符,例如 `<|DSML|tool_calls|` / `<|DSML|invoke|` / `<|DSML|parameter|`,或在带属性标签的结束符前多输出一个尾部管道符(如 ``),兼容层会把这个尾部 `|` / `|` 当作异常标签终止符并补齐或归一化;如果后面已经有 `>` / `〉`,也会消费这个多余分隔符后再归一化。 - 这是一个针对常见模型失误的窄修复,不改变推荐输出格式;prompt 仍要求模型直接输出完整 DSML 外壳。 - 裸 `` / `` 不会被当成“已支持的工具语法”;只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 才会进入工具调用路径。 @@ -54,7 +54,7 @@ 在流式链路中(Go / Node 一致): -- DSML `<|DSML|tool_calls>` wrapper、短横线形式(如 `` / `` / ``)、基于固定本地标签名的 DSML 噪声容错形态、尾部管道符形态(如 `<|DSML|tool_calls|`)和 canonical `` wrapper 都会进入结构化捕获 +- DSML `<|DSML|tool_calls>` wrapper、短横线形式(如 `` / `` / ``)、基于固定本地标签名的 DSML 噪声容错形态、尾部管道符形态(如 `<|DSML|tool_calls|`)和 canonical `` wrapper 都会进入结构化捕获 - 如果流里直接从 invoke 开始,但后面补上了 closing wrapper,Go 流式筛分也会按缺失 opening wrapper 的修复路径尝试恢复 - 已识别成功的工具调用不会再次回流到普通文本 - 不符合新格式的块不会执行,并继续按原样文本透传 @@ -78,11 +78,16 @@ - `rejectedByPolicy`:当前固定为 `false` - `rejectedToolNames`:当前固定为空数组 +解析层不会因为参数值为空而丢弃工具调用。若模型输出了显式空字符串或纯空白参数,它们会按空字符串进入结构化 `tool_calls`;是否拒绝缺参或空命令应由后续工具执行侧 / 客户端 schema 校验决定。Prompt 层仍会要求模型不要主动输出空参数。 + +完整的 DSML / XML wrapper 只有在成功解析出有效 `invoke name`,并且参数节点(如存在)符合 `parameter` 语义后,才会变成结构化工具调用;真正的零参数工具调用仍然有效。如果 wrapper 完整但内部不是可执行工具调用形态(例如使用 ``、缺少有效 `invoke name`、或其他 malformed XML 工具壳),流式 sieve 会把原始 wrapper 作为普通文本释放,不会吞掉内容,也不会生成空的工具调用。 + ## 5) 落地建议 1. Prompt 里只示范 DSML 外壳语法。 2. 上游客户端应直接输出完整 DSML 外壳;DS2API 兼容旧式 canonical XML,并只对“closing tag 在、opening tag 漏掉”的常见失误做窄修复,不会泛化接受其他旧格式。 -3. 不要依赖 parser 做安全控制;执行器侧仍应做工具名和参数校验。 +3. 模型只有在知道本次调用所需参数值时才应输出工具调用;不要输出 placeholder、空字符串或纯空白参数。对 `Bash` / `execute_command`,实际命令必须在 `command` 参数里。 +4. 不要依赖 parser 做安全控制;执行器侧仍应做工具名和参数校验。 ## 6) 回归验证 @@ -90,17 +95,18 @@ ```bash go test -v -run 'TestParseToolCalls|TestProcessToolSieve' ./internal/toolcall ./internal/toolstream ./internal/httpapi/openai/... -node --test tests/node/stream-tool-sieve.test.js +./tests/scripts/run-unit-node.sh ``` 重点覆盖: -- DSML `<|DSML|tool_calls>` wrapper 正常解析 +- DSML `<|DSML|tool_calls>` wrapper 正常解析 - legacy canonical `` wrapper 正常解析 -- 固定本地标签名的 DSML 噪声容错形态(如 ``、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、``、`<`)正常解析 +- 固定本地标签名的 DSML 噪声容错形态(如 ``、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、``、`<`、`...〈/DSM|tool_calls〉`)正常解析 - 混搭标签(DSML wrapper + canonical inner)归一化后正常解析 - 波浪线围栏 `~~~` 内的示例不执行 - 嵌套围栏(4 反引号嵌套 3 反引号)内的示例不执行 - 文本 mention 标签名后紧跟真正工具调用的场景(含同一 wrapper 变体) +- 空参数结构化保留,malformed executable-looking XML wrapper 作为文本释放 - 非兼容内容按普通文本透传 - 代码块示例不执行 diff --git a/internal/assistantturn/turn.go b/internal/assistantturn/turn.go index bc8bd19..b329e65 100644 --- a/internal/assistantturn/turn.go +++ b/internal/assistantturn/turn.go @@ -218,7 +218,7 @@ func UpstreamEmptyOutputDetail(contentFilter bool, text, thinking string) (int, if strings.TrimSpace(thinking) != "" { return http.StatusTooManyRequests, "Upstream account hit a rate limit and returned reasoning without visible output.", "upstream_empty_output" } - return http.StatusTooManyRequests, "Upstream account hit a rate limit and returned empty output.", "upstream_empty_output" + return http.StatusServiceUnavailable, "Upstream service is unavailable and returned no output.", "upstream_unavailable" } // ShouldRetryEmptyOutput returns true when the turn produced no visible text diff --git a/internal/assistantturn/turn_test.go b/internal/assistantturn/turn_test.go index 4fa6c99..b2f9445 100644 --- a/internal/assistantturn/turn_test.go +++ b/internal/assistantturn/turn_test.go @@ -1,6 +1,7 @@ package assistantturn import ( + "net/http" "testing" "ds2api/internal/promptcompat" @@ -70,6 +71,13 @@ func TestBuildTurnFromCollectedThinkingOnlyIsEmptyOutput(t *testing.T) { } } +func TestBuildTurnFromCollectedPureEmptyOutputIsUpstreamUnavailable(t *testing.T) { + turn := BuildTurnFromCollected(sse.CollectResult{}, BuildOptions{}) + if turn.Error == nil || turn.Error.Status != http.StatusServiceUnavailable || turn.Error.Code != "upstream_unavailable" { + t.Fatalf("expected upstream unavailable error, got %#v", turn.Error) + } +} + func TestBuildTurnFromCollectedToolChoiceRequired(t *testing.T) { turn := BuildTurnFromCollected(sse.CollectResult{Text: "hello"}, BuildOptions{ ToolChoice: promptcompat.ToolChoicePolicy{Mode: promptcompat.ToolChoiceRequired}, diff --git a/internal/auth/auth_edge_test.go b/internal/auth/auth_edge_test.go index 0dad649..73e970d 100644 --- a/internal/auth/auth_edge_test.go +++ b/internal/auth/auth_edge_test.go @@ -241,6 +241,36 @@ func TestSwitchAccountSkipsLoginFailureAndContinues(t *testing.T) { } } +func TestSwitchAccountRespectsPinnedTargetAccount(t *testing.T) { + t.Setenv("DS2API_CONFIG_JSON", `{ + "keys":["managed-key"], + "accounts":[ + {"email":"acc1@test.com","token":"t1"}, + {"email":"acc2@test.com","token":"t2"} + ] + }`) + store := config.LoadStore() + pool := account.NewPool(store) + r := NewResolver(store, pool, func(_ context.Context, _ config.Account) (string, error) { + return "new-token", nil + }) + + req, _ := http.NewRequest("POST", "/", nil) + req.Header.Set("Authorization", "Bearer managed-key") + req.Header.Set("X-Ds2-Target-Account", "acc1@test.com") + a, err := r.Determine(req) + if err != nil { + t.Fatalf("determine failed: %v", err) + } + defer r.Release(a) + if r.SwitchAccount(context.Background(), a) { + t.Fatal("expected switch to be disabled for pinned target account") + } + if a.AccountID != "acc1@test.com" { + t.Fatalf("expected pinned account to remain selected, got %q", a.AccountID) + } +} + // ─── Release edge cases ───────────────────────────────────────────── func TestReleaseNilAuth(t *testing.T) { diff --git a/internal/auth/request.go b/internal/auth/request.go index e6a0d88..fd84a12 100644 --- a/internal/auth/request.go +++ b/internal/auth/request.go @@ -28,6 +28,7 @@ type RequestAuth struct { DeepSeekToken string CallerID string AccountID string + TargetAccount string Account config.Account TriedAccounts map[string]bool resolver *Resolver @@ -99,6 +100,7 @@ func (r *Resolver) acquireManagedRequestAuth(ctx context.Context, callerID, targ UseConfigToken: true, CallerID: callerID, AccountID: acc.Identifier(), + TargetAccount: target, Account: acc, TriedAccounts: tried, resolver: r, @@ -185,6 +187,9 @@ func (r *Resolver) SwitchAccount(ctx context.Context, a *RequestAuth) bool { if !a.UseConfigToken { return false } + if strings.TrimSpace(a.TargetAccount) != "" { + return false + } if a.TriedAccounts == nil { a.TriedAccounts = map[string]bool{} } @@ -208,6 +213,13 @@ func (r *Resolver) SwitchAccount(ctx context.Context, a *RequestAuth) bool { } } +func (a *RequestAuth) SwitchAccount(ctx context.Context) bool { + if a == nil || a.resolver == nil { + return false + } + return a.resolver.SwitchAccount(ctx, a) +} + func (r *Resolver) Release(a *RequestAuth) { if a == nil || !a.UseConfigToken || a.AccountID == "" { return diff --git a/internal/completionruntime/nonstream.go b/internal/completionruntime/nonstream.go index 83709ca..921d3b4 100644 --- a/internal/completionruntime/nonstream.go +++ b/internal/completionruntime/nonstream.go @@ -90,7 +90,11 @@ func ExecuteNonStreamWithRetry(ctx context.Context, ds DeepSeekCaller, a *auth.R if startErr != nil { return NonStreamResult{SessionID: start.SessionID, Payload: start.Payload}, startErr } - stdReq = start.Request + return ExecuteNonStreamStartedWithRetry(ctx, ds, a, start, opts) +} + +func ExecuteNonStreamStartedWithRetry(ctx context.Context, ds DeepSeekCaller, a *auth.RequestAuth, start StartResult, opts Options) (NonStreamResult, *assistantturn.OutputError) { + stdReq := start.Request maxAttempts := opts.MaxAttempts if maxAttempts <= 0 { maxAttempts = 3 @@ -100,6 +104,7 @@ func ExecuteNonStreamWithRetry(ctx context.Context, ds DeepSeekCaller, a *auth.R pow := start.Pow attempts := 0 + accountSwitchAttempted := false currentResp := start.Response usagePrompt := stdReq.PromptTokenText accumulatedThinking := "" @@ -108,6 +113,24 @@ func ExecuteNonStreamWithRetry(ctx context.Context, ds DeepSeekCaller, a *auth.R for { turn, outErr := collectAttempt(currentResp, stdReq, usagePrompt, opts) if outErr != nil { + if canRetryOnAlternateAccount(ctx, a, outErr, opts.RetryEnabled, &accountSwitchAttempted) { + switched, switchErr := startStandardCompletionOnAlternateAccount(ctx, ds, a, stdReq, maxAttempts) + if switchErr != nil { + return NonStreamResult{SessionID: sessionID, Payload: payload, Attempts: attempts}, switchErr + } + if switched.Response != nil { + config.Logger.Info("[completion_runtime_account_switch_retry] retrying after 429", "surface", stdReq.Surface, "stream", false, "account", a.AccountID) + sessionID = switched.SessionID + payload = switched.Payload + pow = switched.Pow + currentResp = switched.Response + usagePrompt = stdReq.PromptTokenText + accumulatedThinking = "" + accumulatedRawThinking = "" + accumulatedToolDetectionThinking = "" + continue + } + } return NonStreamResult{SessionID: sessionID, Payload: payload, Attempts: attempts}, outErr } accumulatedThinking += sse.TrimContinuationOverlap(accumulatedThinking, turn.Thinking) @@ -130,6 +153,24 @@ func ExecuteNonStreamWithRetry(ctx context.Context, ds DeepSeekCaller, a *auth.R retryMax = shared.EmptyOutputRetryMaxAttempts() } if !opts.RetryEnabled || !assistantturn.ShouldRetryEmptyOutput(turn, attempts, retryMax) { + if canRetryOnAlternateAccount(ctx, a, turn.Error, opts.RetryEnabled, &accountSwitchAttempted) { + switched, switchErr := startStandardCompletionOnAlternateAccount(ctx, ds, a, stdReq, maxAttempts) + if switchErr != nil { + return NonStreamResult{SessionID: sessionID, Payload: payload, Turn: turn, Attempts: attempts}, switchErr + } + if switched.Response != nil { + config.Logger.Info("[completion_runtime_account_switch_retry] retrying after 429", "surface", stdReq.Surface, "stream", false, "account", a.AccountID) + sessionID = switched.SessionID + payload = switched.Payload + pow = switched.Pow + currentResp = switched.Response + usagePrompt = stdReq.PromptTokenText + accumulatedThinking = "" + accumulatedRawThinking = "" + accumulatedToolDetectionThinking = "" + continue + } + } return NonStreamResult{SessionID: sessionID, Payload: payload, Turn: turn, Attempts: attempts}, turn.Error } @@ -150,6 +191,37 @@ func ExecuteNonStreamWithRetry(ctx context.Context, ds DeepSeekCaller, a *auth.R } } +func canRetryOnAlternateAccount(ctx context.Context, a *auth.RequestAuth, outErr *assistantturn.OutputError, retryEnabled bool, attempted *bool) bool { + if outErr == nil || outErr.Status != http.StatusTooManyRequests { + return false + } + if !retryEnabled || attempted == nil || *attempted { + return false + } + if a == nil || !a.UseConfigToken { + return false + } + *attempted = true + return a.SwitchAccount(ctx) +} + +func startStandardCompletionOnAlternateAccount(ctx context.Context, ds DeepSeekCaller, a *auth.RequestAuth, stdReq promptcompat.StandardRequest, maxAttempts int) (StartResult, *assistantturn.OutputError) { + sessionID, err := ds.CreateSession(ctx, a, maxAttempts) + if err != nil { + return StartResult{}, authOutputError(a) + } + pow, err := ds.GetPow(ctx, a, maxAttempts) + if err != nil { + return StartResult{SessionID: sessionID}, &assistantturn.OutputError{Status: http.StatusUnauthorized, Message: "Failed to get PoW (invalid token or unknown error).", Code: "error"} + } + payload := stdReq.CompletionPayload(sessionID) + resp, err := ds.CallCompletion(ctx, a, payload, pow, maxAttempts) + if err != nil { + return StartResult{SessionID: sessionID, Payload: payload, Pow: pow}, &assistantturn.OutputError{Status: http.StatusInternalServerError, Message: "Failed to get completion.", Code: "error"} + } + return StartResult{SessionID: sessionID, Payload: payload, Pow: pow, Response: resp, Request: stdReq}, nil +} + func collectAttempt(resp *http.Response, stdReq promptcompat.StandardRequest, usagePrompt string, opts Options) (assistantturn.Turn, *assistantturn.OutputError) { defer func() { if err := resp.Body.Close(); err != nil { diff --git a/internal/completionruntime/nonstream_test.go b/internal/completionruntime/nonstream_test.go index e10b927..7c5959a 100644 --- a/internal/completionruntime/nonstream_test.go +++ b/internal/completionruntime/nonstream_test.go @@ -7,15 +7,19 @@ import ( "strings" "testing" + "ds2api/internal/account" "ds2api/internal/auth" + "ds2api/internal/config" dsclient "ds2api/internal/deepseek/client" "ds2api/internal/promptcompat" ) type fakeDeepSeekCaller struct { - responses []*http.Response - payloads []map[string]any - uploads []dsclient.UploadFileRequest + responses []*http.Response + payloads []map[string]any + uploads []dsclient.UploadFileRequest + completionAccounts []string + sessionByAccount bool } type currentInputRuntimeConfig struct{} @@ -23,7 +27,10 @@ type currentInputRuntimeConfig struct{} func (currentInputRuntimeConfig) CurrentInputFileEnabled() bool { return true } func (currentInputRuntimeConfig) CurrentInputFileMinChars() int { return 0 } -func (f *fakeDeepSeekCaller) CreateSession(context.Context, *auth.RequestAuth, int) (string, error) { +func (f *fakeDeepSeekCaller) CreateSession(_ context.Context, a *auth.RequestAuth, _ int) (string, error) { + if f.sessionByAccount && a != nil && a.AccountID != "" { + return "session-" + a.AccountID, nil + } return "session-1", nil } @@ -36,8 +43,11 @@ func (f *fakeDeepSeekCaller) UploadFile(_ context.Context, _ *auth.RequestAuth, return &dsclient.UploadFileResult{ID: "file-runtime-1"}, nil } -func (f *fakeDeepSeekCaller) CallCompletion(_ context.Context, _ *auth.RequestAuth, payload map[string]any, _ string, _ int) (*http.Response, error) { +func (f *fakeDeepSeekCaller) CallCompletion(_ context.Context, a *auth.RequestAuth, payload map[string]any, _ string, _ int) (*http.Response, error) { f.payloads = append(f.payloads, payload) + if a != nil { + f.completionAccounts = append(f.completionAccounts, a.AccountID) + } if len(f.responses) == 0 { return sseHTTPResponse(http.StatusOK, `data: {"p":"response/content","v":"fallback"}`), nil } @@ -89,9 +99,72 @@ func TestExecuteNonStreamWithRetryBuildsCanonicalTurn(t *testing.T) { } } +func TestExecuteNonStreamWithRetrySwitchesManagedAccountBeforeFinal429(t *testing.T) { + t.Setenv("DS2API_CONFIG_JSON", `{ + "keys":["managed-key"], + "accounts":[ + {"email":"acc1@test.com","password":"pwd"}, + {"email":"acc2@test.com","password":"pwd"} + ] + }`) + store := config.LoadStore() + resolver := auth.NewResolver(store, account.NewPool(store), func(_ context.Context, acc config.Account) (string, error) { + return "token-" + acc.Identifier(), nil + }) + req, _ := http.NewRequest(http.MethodPost, "/", nil) + req.Header.Set("Authorization", "Bearer managed-key") + a, err := resolver.Determine(req) + if err != nil { + t.Fatalf("determine failed: %v", err) + } + defer resolver.Release(a) + + ds := &fakeDeepSeekCaller{ + sessionByAccount: true, + responses: []*http.Response{ + sseHTTPResponse(http.StatusOK, `data: {"response_message_id":11,"p":"response/thinking_content","v":"first empty"}`), + sseHTTPResponse(http.StatusOK, `data: {"response_message_id":12,"p":"response/thinking_content","v":"retry empty"}`), + sseHTTPResponse(http.StatusOK, `data: {"response_message_id":21,"p":"response/content","v":"ok from second account"}`), + }, + } + stdReq := promptcompat.StandardRequest{ + Surface: "test", + ResponseModel: "deepseek-v4-flash", + PromptTokenText: "prompt", + FinalPrompt: "final prompt", + Thinking: true, + } + + result, outErr := ExecuteNonStreamWithRetry(context.Background(), ds, a, stdReq, Options{RetryEnabled: true}) + if outErr != nil { + t.Fatalf("unexpected output error after account switch retry: %#v", outErr) + } + if result.Turn.Text != "ok from second account" { + t.Fatalf("text mismatch after switch retry: %q", result.Turn.Text) + } + if result.SessionID != "session-acc2@test.com" { + t.Fatalf("expected switched account session, got %q", result.SessionID) + } + wantAccounts := []string{"acc1@test.com", "acc1@test.com", "acc2@test.com"} + if len(ds.completionAccounts) != len(wantAccounts) { + t.Fatalf("completion account count mismatch: got %v want %v", ds.completionAccounts, wantAccounts) + } + for i, want := range wantAccounts { + if ds.completionAccounts[i] != want { + t.Fatalf("completion account %d = %q want %q (all=%v)", i, ds.completionAccounts[i], want, ds.completionAccounts) + } + } + if got := ds.payloads[2]["chat_session_id"]; got != "session-acc2@test.com" { + t.Fatalf("switched payload session mismatch: %#v", got) + } + if prompt, _ := ds.payloads[2]["prompt"].(string); strings.Contains(prompt, "Previous reply had no visible output") { + t.Fatalf("expected fresh switched-account prompt without empty-output suffix, got %q", prompt) + } +} + func TestExecuteNonStreamWithRetryUsesParentMessageForEmptyRetry(t *testing.T) { ds := &fakeDeepSeekCaller{responses: []*http.Response{ - sseHTTPResponse(http.StatusOK, `data: {"response_message_id":77,"p":"response/status","v":"FINISHED"}`), + sseHTTPResponse(http.StatusOK, `data: {"response_message_id":77,"p":"response/thinking_content","v":"plan"}`), sseHTTPResponse(http.StatusOK, `data: {"response_message_id":78,"p":"response/content","v":"ok"}`), }} stdReq := promptcompat.StandardRequest{ diff --git a/internal/completionruntime/stream_retry.go b/internal/completionruntime/stream_retry.go new file mode 100644 index 0000000..03c9dc7 --- /dev/null +++ b/internal/completionruntime/stream_retry.go @@ -0,0 +1,179 @@ +package completionruntime + +import ( + "context" + "io" + "net/http" + "strings" + + "ds2api/internal/assistantturn" + "ds2api/internal/auth" + "ds2api/internal/config" + "ds2api/internal/httpapi/openai/shared" +) + +type StreamRetryOptions struct { + Surface string + Stream bool + RetryEnabled bool + RetryMaxAttempts int + MaxAttempts int + UsagePrompt string +} + +type StreamRetryHooks struct { + ConsumeAttempt func(resp *http.Response, allowDeferEmpty bool) (terminalWritten bool, retryable bool) + Finalize func(attempts int) + ParentMessageID func() int + OnRetry func(attempts int) + OnRetryPrompt func(prompt string) + OnRetryFailure func(status int, message, code string) + OnAccountSwitch func(sessionID string) + OnTerminal func(attempts int) +} + +func ExecuteStreamWithRetry(ctx context.Context, ds DeepSeekCaller, a *auth.RequestAuth, initialResp *http.Response, payload map[string]any, pow string, opts StreamRetryOptions, hooks StreamRetryHooks) { + if hooks.ConsumeAttempt == nil { + return + } + surface := strings.TrimSpace(opts.Surface) + if surface == "" { + surface = "completion" + } + maxAttempts := opts.MaxAttempts + if maxAttempts <= 0 { + maxAttempts = 3 + } + retryMax := opts.RetryMaxAttempts + if retryMax <= 0 { + retryMax = shared.EmptyOutputRetryMaxAttempts() + } + + attempts := 0 + accountSwitchAttempted := false + currentResp := initialResp + currentPayload := clonePayload(payload) + for { + allowAccountSwitch := opts.RetryEnabled && attempts >= retryMax && !accountSwitchAttempted && a != nil && a.UseConfigToken + terminalWritten, retryable := hooks.ConsumeAttempt(currentResp, opts.RetryEnabled && (attempts < retryMax || allowAccountSwitch)) + if terminalWritten { + if hooks.OnTerminal != nil { + hooks.OnTerminal(attempts) + } + return + } + if !retryable || !opts.RetryEnabled { + if hooks.Finalize != nil { + hooks.Finalize(attempts) + } + return + } + + if attempts >= retryMax { + if canRetryOnAlternateAccount(ctx, a, &assistantturn.OutputError{Status: http.StatusTooManyRequests}, opts.RetryEnabled, &accountSwitchAttempted) { + switched, switchErr := startPayloadCompletionOnAlternateAccount(ctx, ds, a, payload, maxAttempts) + if switchErr != nil { + if hooks.OnRetryFailure != nil { + hooks.OnRetryFailure(switchErr.Status, switchErr.Message, switchErr.Code) + } + return + } + if switched.Response != nil { + config.Logger.Info("[completion_runtime_account_switch_retry] retrying after 429", "surface", surface, "stream", opts.Stream, "account", a.AccountID) + currentResp = switched.Response + currentPayload = switched.Payload + pow = switched.Pow + if hooks.OnAccountSwitch != nil { + hooks.OnAccountSwitch(switched.SessionID) + } + if hooks.OnRetryPrompt != nil { + hooks.OnRetryPrompt(opts.UsagePrompt) + } + continue + } + } + if hooks.Finalize != nil { + hooks.Finalize(attempts) + } + return + } + + attempts++ + parentMessageID := 0 + if hooks.ParentMessageID != nil { + parentMessageID = hooks.ParentMessageID() + } + config.Logger.Info("[completion_runtime_empty_retry] attempting synthetic retry", "surface", surface, "stream", opts.Stream, "retry_attempt", attempts, "parent_message_id", parentMessageID) + retryPow, powErr := ds.GetPow(ctx, a, maxAttempts) + if powErr != nil { + config.Logger.Warn("[completion_runtime_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", surface, "stream", opts.Stream, "retry_attempt", attempts, "error", powErr) + retryPow = pow + } + nextResp, err := ds.CallCompletion(ctx, a, shared.ClonePayloadForEmptyOutputRetry(currentPayload, parentMessageID), retryPow, maxAttempts) + if err != nil { + if hooks.OnRetryFailure != nil { + hooks.OnRetryFailure(http.StatusInternalServerError, "Failed to get completion.", "error") + } + config.Logger.Warn("[completion_runtime_empty_retry] retry request failed", "surface", surface, "stream", opts.Stream, "retry_attempt", attempts, "error", err) + return + } + if nextResp.StatusCode != http.StatusOK { + body, readErr := io.ReadAll(nextResp.Body) + if readErr != nil { + config.Logger.Warn("[completion_runtime_empty_retry] retry error body read failed", "surface", surface, "stream", opts.Stream, "retry_attempt", attempts, "error", readErr) + } + closeRetryBody(surface, nextResp.Body) + msg := strings.TrimSpace(string(body)) + if msg == "" { + msg = http.StatusText(nextResp.StatusCode) + } + if hooks.OnRetryFailure != nil { + hooks.OnRetryFailure(nextResp.StatusCode, msg, "error") + } + return + } + if hooks.OnRetry != nil { + hooks.OnRetry(attempts) + } + if hooks.OnRetryPrompt != nil { + hooks.OnRetryPrompt(shared.UsagePromptWithEmptyOutputRetry(opts.UsagePrompt, attempts)) + } + currentResp = nextResp + } +} + +func startPayloadCompletionOnAlternateAccount(ctx context.Context, ds DeepSeekCaller, a *auth.RequestAuth, payload map[string]any, maxAttempts int) (StartResult, *assistantturn.OutputError) { + sessionID, err := ds.CreateSession(ctx, a, maxAttempts) + if err != nil { + return StartResult{}, authOutputError(a) + } + pow, err := ds.GetPow(ctx, a, maxAttempts) + if err != nil { + return StartResult{SessionID: sessionID}, &assistantturn.OutputError{Status: http.StatusUnauthorized, Message: "Failed to get PoW (invalid token or unknown error).", Code: "error"} + } + nextPayload := clonePayload(payload) + nextPayload["chat_session_id"] = sessionID + delete(nextPayload, "parent_message_id") + resp, err := ds.CallCompletion(ctx, a, nextPayload, pow, maxAttempts) + if err != nil { + return StartResult{SessionID: sessionID, Payload: nextPayload, Pow: pow}, &assistantturn.OutputError{Status: http.StatusInternalServerError, Message: "Failed to get completion.", Code: "error"} + } + return StartResult{SessionID: sessionID, Payload: nextPayload, Pow: pow, Response: resp}, nil +} + +func clonePayload(payload map[string]any) map[string]any { + clone := make(map[string]any, len(payload)) + for k, v := range payload { + clone[k] = v + } + return clone +} + +func closeRetryBody(surface string, body io.Closer) { + if body == nil { + return + } + if err := body.Close(); err != nil { + config.Logger.Warn("[completion_runtime_empty_retry] retry response body close failed", "surface", surface, "error", err) + } +} diff --git a/internal/completionruntime/stream_retry_test.go b/internal/completionruntime/stream_retry_test.go new file mode 100644 index 0000000..655016c --- /dev/null +++ b/internal/completionruntime/stream_retry_test.go @@ -0,0 +1,150 @@ +package completionruntime + +import ( + "context" + "io" + "net/http" + "strings" + "testing" + + "ds2api/internal/account" + "ds2api/internal/auth" + "ds2api/internal/config" + "ds2api/internal/httpapi/openai/shared" +) + +func TestExecuteStreamWithRetryUsesSharedRetryPayloadAndUsagePrompt(t *testing.T) { + ds := &fakeDeepSeekCaller{responses: []*http.Response{ + sseHTTPResponse(http.StatusOK, `data: {"p":"response/content","v":"ok"}`), + }} + initial := sseHTTPResponse(http.StatusOK, `data: {"response_message_id":77,"p":"response/thinking_content","v":"plan"}`) + payload := map[string]any{"prompt": "original prompt"} + attemptsSeen := 0 + retryPrompt := "" + + ExecuteStreamWithRetry(context.Background(), ds, &auth.RequestAuth{}, initial, payload, "pow", StreamRetryOptions{ + Surface: "test.stream", + Stream: true, + RetryEnabled: true, + UsagePrompt: "original prompt", + }, StreamRetryHooks{ + ConsumeAttempt: func(resp *http.Response, allowDeferEmpty bool) (bool, bool) { + defer func() { + if err := resp.Body.Close(); err != nil { + t.Fatalf("close failed: %v", err) + } + }() + _, _ = io.ReadAll(resp.Body) + attemptsSeen++ + return attemptsSeen == 2, attemptsSeen == 1 && allowDeferEmpty + }, + ParentMessageID: func() int { + return 77 + }, + OnRetryPrompt: func(prompt string) { + retryPrompt = prompt + }, + }) + + if attemptsSeen != 2 { + t.Fatalf("expected two stream attempts, got %d", attemptsSeen) + } + if len(ds.payloads) != 1 { + t.Fatalf("expected one retry completion call, got %d", len(ds.payloads)) + } + if got := ds.payloads[0]["parent_message_id"]; got != 77 { + t.Fatalf("retry parent_message_id mismatch: %#v", got) + } + if prompt, _ := ds.payloads[0]["prompt"].(string); !strings.Contains(prompt, shared.EmptyOutputRetrySuffix) { + t.Fatalf("expected retry suffix in payload prompt, got %q", prompt) + } + if !strings.Contains(retryPrompt, shared.EmptyOutputRetrySuffix) { + t.Fatalf("expected retry suffix in usage prompt, got %q", retryPrompt) + } +} + +func TestExecuteStreamWithRetrySwitchesManagedAccountBeforeFinal429(t *testing.T) { + t.Setenv("DS2API_CONFIG_JSON", `{ + "keys":["managed-key"], + "accounts":[ + {"email":"acc1@test.com","password":"pwd"}, + {"email":"acc2@test.com","password":"pwd"} + ] + }`) + store := config.LoadStore() + resolver := auth.NewResolver(store, account.NewPool(store), func(_ context.Context, acc config.Account) (string, error) { + return "token-" + acc.Identifier(), nil + }) + req, _ := http.NewRequest(http.MethodPost, "/", nil) + req.Header.Set("Authorization", "Bearer managed-key") + a, err := resolver.Determine(req) + if err != nil { + t.Fatalf("determine failed: %v", err) + } + defer resolver.Release(a) + + ds := &fakeDeepSeekCaller{ + sessionByAccount: true, + responses: []*http.Response{ + sseHTTPResponse(http.StatusOK, `data: {"response_message_id":12,"p":"response/thinking_content","v":"retry empty"}`), + sseHTTPResponse(http.StatusOK, `data: {"response_message_id":21,"p":"response/content","v":"ok from second account"}`), + }, + } + initial := sseHTTPResponse(http.StatusOK, `data: {"response_message_id":11,"p":"response/thinking_content","v":"first empty"}`) + payload := map[string]any{"prompt": "original prompt", "chat_session_id": "session-acc1@test.com"} + attemptsSeen := 0 + switchedSession := "" + + ExecuteStreamWithRetry(context.Background(), ds, a, initial, payload, "pow", StreamRetryOptions{ + Surface: "test.stream", + Stream: true, + RetryEnabled: true, + RetryMaxAttempts: 1, + UsagePrompt: "original prompt", + }, StreamRetryHooks{ + ConsumeAttempt: func(resp *http.Response, allowDeferEmpty bool) (bool, bool) { + defer func() { + if err := resp.Body.Close(); err != nil { + t.Fatalf("close failed: %v", err) + } + }() + body, _ := io.ReadAll(resp.Body) + attemptsSeen++ + if strings.Contains(string(body), "ok from second account") { + return true, false + } + if !allowDeferEmpty { + t.Fatalf("expected empty attempt %d to be deferred before final 429", attemptsSeen) + } + return false, true + }, + ParentMessageID: func() int { + return 11 + attemptsSeen + }, + OnAccountSwitch: func(sessionID string) { + switchedSession = sessionID + }, + }) + + if attemptsSeen != 3 { + t.Fatalf("expected three stream attempts, got %d", attemptsSeen) + } + if switchedSession != "session-acc2@test.com" { + t.Fatalf("expected switched session id, got %q", switchedSession) + } + wantAccounts := []string{"acc1@test.com", "acc2@test.com"} + if len(ds.completionAccounts) != len(wantAccounts) { + t.Fatalf("completion accounts mismatch: got %v want %v", ds.completionAccounts, wantAccounts) + } + for i, want := range wantAccounts { + if ds.completionAccounts[i] != want { + t.Fatalf("completion account %d = %q want %q (all=%v)", i, ds.completionAccounts[i], want, ds.completionAccounts) + } + } + if got := ds.payloads[1]["chat_session_id"]; got != "session-acc2@test.com" { + t.Fatalf("switched payload session mismatch: %#v", got) + } + if prompt, _ := ds.payloads[1]["prompt"].(string); strings.Contains(prompt, shared.EmptyOutputRetrySuffix) { + t.Fatalf("expected switched-account prompt without empty-output suffix, got %q", prompt) + } +} diff --git a/internal/format/openai/render_responses.go b/internal/format/openai/render_responses.go index 0c7dfbe..5635f96 100644 --- a/internal/format/openai/render_responses.go +++ b/internal/format/openai/render_responses.go @@ -21,6 +21,18 @@ func BuildResponseObjectWithToolCalls(responseID, model, finalPrompt, finalThink output := make([]any, 0, 2) if len(detected) > 0 { exposedOutputText = "" + if strings.TrimSpace(finalThinking) != "" { + output = append(output, map[string]any{ + "type": "message", + "id": "msg_" + strings.ReplaceAll(uuid.NewString(), "-", ""), + "role": "assistant", + "status": "completed", + "content": []any{map[string]any{ + "type": "reasoning", + "text": finalThinking, + }}, + }) + } output = append(output, toResponsesFunctionCallItems(detected, toolsRaw)...) } else { content := make([]any, 0, 2) diff --git a/internal/format/openai/render_test.go b/internal/format/openai/render_test.go index e174bd6..1c14f51 100644 --- a/internal/format/openai/render_test.go +++ b/internal/format/openai/render_test.go @@ -85,12 +85,24 @@ func TestBuildResponseObjectPromotesToolCallFromThinkingWhenTextEmpty(t *testing ) output, _ := obj["output"].([]any) - if len(output) != 1 { - t.Fatalf("expected one output item, got %#v", obj["output"]) + if len(output) != 2 { + t.Fatalf("expected reasoning message plus function_call output, got %#v", obj["output"]) } first, _ := output[0].(map[string]any) - if first["type"] != "function_call" { - t.Fatalf("expected function_call output, got %#v", first["type"]) + if first["type"] != "message" { + t.Fatalf("expected reasoning message output first, got %#v", first["type"]) + } + content, _ := first["content"].([]any) + if len(content) != 1 { + t.Fatalf("expected reasoning content, got %#v", first["content"]) + } + block0, _ := content[0].(map[string]any) + if block0["type"] != "reasoning" { + t.Fatalf("expected reasoning block, got %#v", block0["type"]) + } + second, _ := output[1].(map[string]any) + if second["type"] != "function_call" { + t.Fatalf("expected function_call output, got %#v", second["type"]) } } diff --git a/internal/httpapi/claude/handler_messages.go b/internal/httpapi/claude/handler_messages.go index 8478dc7..e22a1ed 100644 --- a/internal/httpapi/claude/handler_messages.go +++ b/internal/httpapi/claude/handler_messages.go @@ -145,7 +145,7 @@ func (h *Handler) handleClaudeDirectStream(w http.ResponseWriter, r *http.Reques return } streamReq := start.Request - h.handleClaudeStreamRealtime(w, r, start.Response, streamReq.ResponseModel, streamReq.Messages, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, historySession) + h.handleClaudeStreamRealtimeWithRetry(w, r, a, start.Response, start.Payload, start.Pow, streamReq.ResponseModel, streamReq.Messages, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, streamReq.PromptTokenText, historySession) } func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, store ConfigReader) bool { @@ -360,3 +360,112 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ OnFinalize: streamRuntime.onFinalize, }) } + +func (h *Handler) handleClaudeStreamRealtimeWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, model string, messages []any, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, promptTokenText string, historySession *responsehistory.Session) { + if resp.StatusCode != http.StatusOK { + defer func() { _ = resp.Body.Close() }() + body, _ := io.ReadAll(resp.Body) + if historySession != nil { + historySession.Error(resp.StatusCode, strings.TrimSpace(string(body)), "error", "", "") + } + writeClaudeError(w, http.StatusInternalServerError, string(body)) + return + } + + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache, no-transform") + w.Header().Set("Connection", "keep-alive") + w.Header().Set("X-Accel-Buffering", "no") + rc := http.NewResponseController(w) + _, canFlush := w.(http.Flusher) + if !canFlush { + config.Logger.Warn("[claude_stream] response writer does not support flush; streaming may be buffered") + } + + streamRuntime := newClaudeStreamRuntime( + w, + rc, + canFlush, + model, + messages, + thinkingEnabled, + searchEnabled, + stripReferenceMarkersEnabled(), + toolNames, + toolsRaw, + promptTokenText, + historySession, + ) + streamRuntime.sendMessageStart() + + completionruntime.ExecuteStreamWithRetry(r.Context(), h.DS, a, resp, payload, pow, completionruntime.StreamRetryOptions{ + Surface: "claude.messages", + Stream: true, + RetryEnabled: true, + MaxAttempts: 3, + UsagePrompt: promptTokenText, + }, completionruntime.StreamRetryHooks{ + ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) { + return h.consumeClaudeStreamAttempt(r, currentResp, streamRuntime, thinkingEnabled, allowDeferEmpty) + }, + Finalize: func(_ int) { + streamRuntime.finalize("end_turn", false) + }, + ParentMessageID: func() int { + return streamRuntime.responseMessageID + }, + OnRetryPrompt: func(prompt string) { + streamRuntime.promptTokenText = prompt + }, + OnRetryFailure: func(status int, message, code string) { + streamRuntime.sendErrorWithCode(status, strings.TrimSpace(message), code) + }, + }) +} + +func (h *Handler) consumeClaudeStreamAttempt(r *http.Request, resp *http.Response, streamRuntime *claudeStreamRuntime, thinkingEnabled bool, allowDeferEmpty bool) (bool, bool) { + defer func() { _ = resp.Body.Close() }() + initialType := "text" + if thinkingEnabled { + initialType = "thinking" + } + finalReason := streamengine.StopReason("") + var scannerErr error + streamengine.ConsumeSSE(streamengine.ConsumeConfig{ + Context: r.Context(), + Body: resp.Body, + ThinkingEnabled: thinkingEnabled, + InitialType: initialType, + KeepAliveInterval: claudeStreamPingInterval, + IdleTimeout: claudeStreamIdleTimeout, + MaxKeepAliveNoInput: claudeStreamMaxKeepaliveCnt, + }, streamengine.ConsumeHooks{ + OnKeepAlive: func() { + streamRuntime.sendPing() + }, + OnParsed: streamRuntime.onParsed, + OnFinalize: func(reason streamengine.StopReason, err error) { + finalReason = reason + scannerErr = err + }, + }) + if string(finalReason) == "upstream_error" { + if streamRuntime.history != nil { + streamRuntime.history.Error(500, streamRuntime.upstreamErr, "upstream_error", responsehistory.ThinkingForArchive(streamRuntime.rawThinking.String(), streamRuntime.toolDetectionThinking.String(), streamRuntime.thinking.String()), responsehistory.TextForArchive(streamRuntime.rawText.String(), streamRuntime.text.String())) + } + streamRuntime.sendError(streamRuntime.upstreamErr) + return true, false + } + if scannerErr != nil { + if streamRuntime.history != nil { + streamRuntime.history.Error(500, scannerErr.Error(), "error", responsehistory.ThinkingForArchive(streamRuntime.rawThinking.String(), streamRuntime.toolDetectionThinking.String(), streamRuntime.thinking.String()), responsehistory.TextForArchive(streamRuntime.rawText.String(), streamRuntime.text.String())) + } + streamRuntime.sendError(scannerErr.Error()) + return true, false + } + terminalWritten := streamRuntime.finalize("end_turn", allowDeferEmpty) + if terminalWritten { + return true, false + } + return false, true +} diff --git a/internal/httpapi/claude/handler_util_test.go b/internal/httpapi/claude/handler_util_test.go index d69dc25..a624b01 100644 --- a/internal/httpapi/claude/handler_util_test.go +++ b/internal/httpapi/claude/handler_util_test.go @@ -93,14 +93,51 @@ func TestNormalizeClaudeMessagesToolUseToAssistantToolCalls(t *testing.T) { t.Fatalf("expected call id preserved, got %#v", call) } content, _ := m["content"].(string) - if !containsStr(content, "<|DSML|tool_calls>") || !containsStr(content, `<|DSML|invoke name="search_web">`) { + if !containsStr(content, "<|DSML|tool_calls>") || !containsStr(content, `<|DSML|invoke name="search_web">`) { t.Fatalf("expected assistant content to include DSML tool call history, got %q", content) } - if !containsStr(content, `<|DSML|parameter name="query">`) { + if !containsStr(content, `<|DSML|parameter name="query">`) { t.Fatalf("expected assistant content to include serialized parameters, got %q", content) } } +func TestNormalizeClaudeMessagesPreservesThinkingOnToolUseHistory(t *testing.T) { + msgs := []any{ + map[string]any{ + "role": "assistant", + "content": []any{ + map[string]any{"type": "thinking", "thinking": "need live search before answering"}, + map[string]any{ + "type": "tool_use", + "id": "call_1", + "name": "search_web", + "input": map[string]any{"query": "latest"}, + }, + }, + }, + } + + got := normalizeClaudeMessages(msgs) + if len(got) != 1 { + t.Fatalf("expected one normalized tool-call message, got %#v", got) + } + m := got[0].(map[string]any) + if m["reasoning_content"] != "need live search before answering" { + t.Fatalf("expected thinking preserved as reasoning_content, got %#v", m) + } + tc, _ := m["tool_calls"].([]any) + if len(tc) != 1 { + t.Fatalf("expected one tool call, got %#v", m["tool_calls"]) + } + prompt := buildClaudePromptTokenText(got, true) + if !containsStr(prompt, "[reasoning_content]\nneed live search before answering\n[/reasoning_content]") { + t.Fatalf("expected thinking in prompt history, got %q", prompt) + } + if !containsStr(prompt, `<|DSML|invoke name="search_web">`) { + t.Fatalf("expected tool call in prompt history, got %q", prompt) + } +} + func TestNormalizeClaudeMessagesDoesNotPromoteUserToolUse(t *testing.T) { msgs := []any{ map[string]any{ diff --git a/internal/httpapi/claude/handler_utils.go b/internal/httpapi/claude/handler_utils.go index 5c53958..e3537b4 100644 --- a/internal/httpapi/claude/handler_utils.go +++ b/internal/httpapi/claude/handler_utils.go @@ -25,14 +25,21 @@ func normalizeClaudeMessages(messages []any) []any { switch content := msg["content"].(type) { case []any: textParts := make([]string, 0, len(content)) + pendingThinking := "" flushText := func() { if len(textParts) == 0 { return } - out = append(out, map[string]any{ + message := map[string]any{ "role": role, "content": strings.Join(textParts, "\n"), - }) + } + if role == "assistant" && strings.TrimSpace(pendingThinking) != "" { + message["reasoning_content"] = pendingThinking + message["content"] = prependClaudeReasoningForPrompt(pendingThinking, safeStringValue(message["content"])) + pendingThinking = "" + } + out = append(out, message) textParts = textParts[:0] } for _, block := range content { @@ -46,10 +53,29 @@ func normalizeClaudeMessages(messages []any) []any { if t, ok := b["text"].(string); ok { textParts = append(textParts, t) } + case "thinking": + if role == "assistant" { + if thinking := extractClaudeThinkingBlockText(b); thinking != "" { + if pendingThinking == "" { + pendingThinking = thinking + } else { + pendingThinking += "\n" + thinking + } + } + continue + } + if raw := strings.TrimSpace(formatClaudeUnknownBlockForPrompt(b)); raw != "" { + textParts = append(textParts, raw) + } case "tool_use": if role == "assistant" { flushText() if toolMsg := normalizeClaudeToolUseToAssistant(b, state); toolMsg != nil { + if strings.TrimSpace(pendingThinking) != "" { + toolMsg["reasoning_content"] = pendingThinking + toolMsg["content"] = prependClaudeReasoningForPrompt(pendingThinking, safeStringValue(toolMsg["content"])) + pendingThinking = "" + } out = append(out, toolMsg) } continue @@ -69,6 +95,13 @@ func normalizeClaudeMessages(messages []any) []any { } } flushText() + if role == "assistant" && strings.TrimSpace(pendingThinking) != "" { + out = append(out, map[string]any{ + "role": "assistant", + "reasoning_content": pendingThinking, + "content": formatClaudeReasoningForPrompt(pendingThinking), + }) + } default: copied := cloneMap(msg) out = append(out, copied) @@ -77,6 +110,39 @@ func normalizeClaudeMessages(messages []any) []any { return out } +func prependClaudeReasoningForPrompt(reasoning, content string) string { + reasoning = strings.TrimSpace(reasoning) + content = strings.TrimSpace(content) + if reasoning == "" { + return content + } + block := formatClaudeReasoningForPrompt(reasoning) + if content == "" { + return block + } + return block + "\n\n" + content +} + +func formatClaudeReasoningForPrompt(reasoning string) string { + reasoning = strings.TrimSpace(reasoning) + if reasoning == "" { + return "" + } + return "[reasoning_content]\n" + reasoning + "\n[/reasoning_content]" +} + +func extractClaudeThinkingBlockText(block map[string]any) string { + if block == nil { + return "" + } + for _, key := range []string{"thinking", "text", "content"} { + if text := strings.TrimSpace(safeStringValue(block[key])); text != "" { + return text + } + } + return "" +} + func buildClaudeToolPrompt(tools []any) string { toolSchemas := make([]string, 0, len(tools)) names := make([]string, 0, len(tools)) diff --git a/internal/httpapi/claude/stream_runtime_core.go b/internal/httpapi/claude/stream_runtime_core.go index 9c9e656..c558601 100644 --- a/internal/httpapi/claude/stream_runtime_core.go +++ b/internal/httpapi/claude/stream_runtime_core.go @@ -29,9 +29,10 @@ type claudeStreamRuntime struct { bufferToolContent bool stripReferenceMarkers bool - messageID string - thinking strings.Builder - text strings.Builder + messageID string + thinking strings.Builder + text strings.Builder + responseMessageID int sieve toolstream.State rawText strings.Builder @@ -92,6 +93,9 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse s.upstreamErr = parsed.ErrorMessage return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("upstream_error")} } + if parsed.ResponseMessageID > 0 { + s.responseMessageID = parsed.ResponseMessageID + } if parsed.Stop { return streamengine.ParsedDecision{Stop: true} } diff --git a/internal/httpapi/claude/stream_runtime_emit.go b/internal/httpapi/claude/stream_runtime_emit.go index e071cdc..7425a55 100644 --- a/internal/httpapi/claude/stream_runtime_emit.go +++ b/internal/httpapi/claude/stream_runtime_emit.go @@ -22,16 +22,27 @@ func (s *claudeStreamRuntime) send(event string, v any) { } func (s *claudeStreamRuntime) sendError(message string) { + s.sendErrorWithCode(500, message, "internal_error") +} + +func (s *claudeStreamRuntime) sendErrorWithCode(status int, message, code string) { msg := strings.TrimSpace(message) if msg == "" { msg = "upstream stream error" } + if code == "" { + code = "internal_error" + } + errType := "api_error" + if status == 429 { + errType = "rate_limit_error" + } s.send("error", map[string]any{ "type": "error", "error": map[string]any{ - "type": "api_error", + "type": errType, "message": msg, - "code": "internal_error", + "code": code, "param": nil, }, }) diff --git a/internal/httpapi/claude/stream_runtime_finalize.go b/internal/httpapi/claude/stream_runtime_finalize.go index f63b125..07be629 100644 --- a/internal/httpapi/claude/stream_runtime_finalize.go +++ b/internal/httpapi/claude/stream_runtime_finalize.go @@ -63,13 +63,10 @@ func (s *claudeStreamRuntime) sendToolUseBlock(idx int, tc toolcall.ParsedToolCa }) } -func (s *claudeStreamRuntime) finalize(stopReason string) { +func (s *claudeStreamRuntime) finalize(stopReason string, deferEmptyOutput bool) bool { if s.ended { - return + return true } - s.ended = true - - s.closeThinkingBlock() if s.bufferToolContent { for _, evt := range toolstream.Flush(&s.sieve, s.toolNames) { @@ -123,6 +120,7 @@ func (s *claudeStreamRuntime) finalize(stopReason string) { RawThinking: s.rawThinking.String(), VisibleThinking: s.thinking.String(), DetectionThinking: s.toolDetectionThinking.String(), + ResponseMessageID: s.responseMessageID, AlreadyEmittedCalls: s.toolCallsDetected, AlreadyEmittedToolRaw: s.toolCallsDetected, }, assistantturn.BuildOptions{ @@ -137,6 +135,22 @@ func (s *claudeStreamRuntime) finalize(stopReason string) { outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{ AlreadyEmittedToolCalls: s.toolCallsDetected, }) + if outcome.ShouldFail { + if deferEmptyOutput { + return false + } + s.ended = true + s.closeThinkingBlock() + s.closeTextBlock() + if s.history != nil { + s.history.Error(outcome.Error.Status, outcome.Error.Message, outcome.Error.Code, responsehistory.ThinkingForArchive(turn.RawThinking, turn.DetectionThinking, turn.Thinking), responsehistory.TextForArchive(turn.RawText, turn.Text)) + } + s.sendErrorWithCode(outcome.Error.Status, outcome.Error.Message, outcome.Error.Code) + return true + } + + s.ended = true + s.closeThinkingBlock() if s.bufferToolContent && !s.toolCallsDetected { if len(turn.ToolCalls) > 0 { @@ -197,6 +211,7 @@ func (s *claudeStreamRuntime) finalize(stopReason string) { }, }) s.send("message_stop", map[string]any{"type": "message_stop"}) + return true } func (s *claudeStreamRuntime) onFinalize(reason streamengine.StopReason, scannerErr error) { @@ -214,5 +229,5 @@ func (s *claudeStreamRuntime) onFinalize(reason streamengine.StopReason, scanner s.sendError(scannerErr.Error()) return } - s.finalize("end_turn") + s.finalize("end_turn", false) } diff --git a/internal/httpapi/gemini/convert_messages.go b/internal/httpapi/gemini/convert_messages.go index f6af145..6dd8f50 100644 --- a/internal/httpapi/gemini/convert_messages.go +++ b/internal/httpapi/gemini/convert_messages.go @@ -44,14 +44,20 @@ func geminiMessagesFromRequest(req map[string]any) []any { } textParts := make([]string, 0, len(parts)) + pendingThinking := "" flushText := func() { if len(textParts) == 0 { return } - out = append(out, map[string]any{ + msg := map[string]any{ "role": role, "content": strings.Join(textParts, "\n"), - }) + } + if role == "assistant" && strings.TrimSpace(pendingThinking) != "" { + msg["reasoning_content"] = pendingThinking + pendingThinking = "" + } + out = append(out, msg) textParts = textParts[:0] } @@ -61,6 +67,14 @@ func geminiMessagesFromRequest(req map[string]any) []any { continue } if text := strings.TrimSpace(asString(part["text"])); text != "" { + if role == "assistant" && isGeminiThoughtPart(part) { + if pendingThinking == "" { + pendingThinking = text + } else { + pendingThinking += "\n" + text + } + continue + } textParts = append(textParts, text) continue } @@ -75,7 +89,7 @@ func geminiMessagesFromRequest(req map[string]any) []any { } } lastToolCallIDByName[strings.ToLower(name)] = callID - out = append(out, map[string]any{ + msg := map[string]any{ "role": "assistant", "tool_calls": []any{ map[string]any{ @@ -87,7 +101,12 @@ func geminiMessagesFromRequest(req map[string]any) []any { }, }, }, - }) + } + if strings.TrimSpace(pendingThinking) != "" { + msg["reasoning_content"] = pendingThinking + pendingThinking = "" + } + out = append(out, msg) } continue } @@ -132,10 +151,29 @@ func geminiMessagesFromRequest(req map[string]any) []any { } } flushText() + if role == "assistant" && strings.TrimSpace(pendingThinking) != "" { + out = append(out, map[string]any{ + "role": "assistant", + "reasoning_content": pendingThinking, + }) + } } return out } +func isGeminiThoughtPart(part map[string]any) bool { + if part == nil { + return false + } + if v, ok := part["thought"].(bool); ok { + return v + } + if v, ok := part["thoughtSignature"].(string); ok && strings.TrimSpace(v) != "" { + return true + } + return false +} + func normalizeGeminiSystemInstruction(raw any) string { switch v := raw.(type) { case string: diff --git a/internal/httpapi/gemini/convert_messages_test.go b/internal/httpapi/gemini/convert_messages_test.go index a5191b9..a429325 100644 --- a/internal/httpapi/gemini/convert_messages_test.go +++ b/internal/httpapi/gemini/convert_messages_test.go @@ -1,6 +1,7 @@ package gemini import ( + "ds2api/internal/promptcompat" "strings" "testing" ) @@ -53,6 +54,46 @@ func TestGeminiMessagesFromRequestPreservesFunctionRoundtrip(t *testing.T) { } } +func TestGeminiMessagesFromRequestPreservesThoughtOnFunctionCallHistory(t *testing.T) { + req := map[string]any{ + "contents": []any{ + map[string]any{ + "role": "model", + "parts": []any{ + map[string]any{"text": "need current state before answering", "thought": true}, + map[string]any{ + "functionCall": map[string]any{ + "id": "call_g1", + "name": "search_web", + "args": map[string]any{"query": "ai"}, + }, + }, + }, + }, + }, + } + + got := geminiMessagesFromRequest(req) + if len(got) != 1 { + t.Fatalf("expected one normalized message, got %#v", got) + } + assistant, _ := got[0].(map[string]any) + if assistant["reasoning_content"] != "need current state before answering" { + t.Fatalf("expected thought preserved as reasoning_content, got %#v", assistant) + } + tc, _ := assistant["tool_calls"].([]any) + if len(tc) != 1 { + t.Fatalf("expected one tool call, got %#v", assistant["tool_calls"]) + } + prompt, _ := promptcompat.BuildOpenAIPromptForAdapter(got, nil, "", true) + if !strings.Contains(prompt, "[reasoning_content]\nneed current state before answering\n[/reasoning_content]") { + t.Fatalf("expected thought in prompt history, got %q", prompt) + } + if !strings.Contains(prompt, `<|DSML|invoke name="search_web">`) { + t.Fatalf("expected tool call in prompt history, got %q", prompt) + } +} + func TestGeminiMessagesFromRequestPreservesUnknownPartAsRawJSONText(t *testing.T) { req := map[string]any{ "contents": []any{ diff --git a/internal/httpapi/gemini/handler_generate.go b/internal/httpapi/gemini/handler_generate.go index b2a4114..784ff75 100644 --- a/internal/httpapi/gemini/handler_generate.go +++ b/internal/httpapi/gemini/handler_generate.go @@ -137,7 +137,7 @@ func (h *Handler) handleGeminiDirectStream(w http.ResponseWriter, r *http.Reques return } streamReq := start.Request - h.handleStreamGenerateContent(w, r, start.Response, streamReq.ResponseModel, streamReq.PromptTokenText, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, historySession) + h.handleStreamGenerateContentWithRetry(w, r, a, start.Response, start.Payload, start.Pow, streamReq.ResponseModel, streamReq.PromptTokenText, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, historySession) } func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, stream bool) bool { diff --git a/internal/httpapi/gemini/handler_stream_runtime.go b/internal/httpapi/gemini/handler_stream_runtime.go index de80fab..a1244ad 100644 --- a/internal/httpapi/gemini/handler_stream_runtime.go +++ b/internal/httpapi/gemini/handler_stream_runtime.go @@ -1,6 +1,7 @@ package gemini import ( + "context" "encoding/json" "io" "net/http" @@ -8,6 +9,8 @@ import ( "time" "ds2api/internal/assistantturn" + "ds2api/internal/auth" + "ds2api/internal/completionruntime" dsprotocol "ds2api/internal/deepseek/protocol" "ds2api/internal/responsehistory" "ds2api/internal/sse" @@ -54,7 +57,7 @@ func (h *Handler) handleStreamGenerateContent(w http.ResponseWriter, r *http.Req }, streamengine.ConsumeHooks{ OnParsed: runtime.onParsed, OnFinalize: func(_ streamengine.StopReason, _ error) { - runtime.finalize() + runtime.finalize(false) }, }) } @@ -78,9 +81,83 @@ type geminiStreamRuntime struct { accumulator *assistantturn.Accumulator contentFilter bool responseMessageID int + finalErrorStatus int + finalErrorMessage string + finalErrorCode string history *responsehistory.Session } +func (h *Handler) handleStreamGenerateContentWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, historySession *responsehistory.Session) { + if resp.StatusCode != http.StatusOK { + defer func() { _ = resp.Body.Close() }() + body, _ := io.ReadAll(resp.Body) + if historySession != nil { + historySession.Error(resp.StatusCode, strings.TrimSpace(string(body)), "error", "", "") + } + writeGeminiError(w, resp.StatusCode, strings.TrimSpace(string(body))) + return + } + + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache, no-transform") + w.Header().Set("Connection", "keep-alive") + w.Header().Set("X-Accel-Buffering", "no") + + rc := http.NewResponseController(w) + _, canFlush := w.(http.Flusher) + runtime := newGeminiStreamRuntime(w, rc, canFlush, model, finalPrompt, thinkingEnabled, searchEnabled, stripReferenceMarkersEnabled(), toolNames, toolsRaw, historySession) + + completionruntime.ExecuteStreamWithRetry(r.Context(), h.DS, a, resp, payload, pow, completionruntime.StreamRetryOptions{ + Surface: "gemini.generate_content", + Stream: true, + RetryEnabled: true, + MaxAttempts: 3, + UsagePrompt: finalPrompt, + }, completionruntime.StreamRetryHooks{ + ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) { + return h.consumeGeminiStreamAttempt(r.Context(), currentResp, runtime, thinkingEnabled, allowDeferEmpty) + }, + Finalize: func(_ int) { + runtime.finalize(false) + }, + ParentMessageID: func() int { + return runtime.responseMessageID + }, + OnRetryPrompt: func(prompt string) { + runtime.finalPrompt = prompt + }, + OnRetryFailure: func(status int, message, _ string) { + runtime.sendErrorChunk(status, strings.TrimSpace(message)) + }, + }) +} + +func (h *Handler) consumeGeminiStreamAttempt(ctx context.Context, resp *http.Response, runtime *geminiStreamRuntime, thinkingEnabled bool, allowDeferEmpty bool) (bool, bool) { + defer func() { _ = resp.Body.Close() }() + initialType := "text" + if thinkingEnabled { + initialType = "thinking" + } + streamengine.ConsumeSSE(streamengine.ConsumeConfig{ + Context: ctx, + Body: resp.Body, + ThinkingEnabled: thinkingEnabled, + InitialType: initialType, + KeepAliveInterval: time.Duration(dsprotocol.KeepAliveTimeout) * time.Second, + IdleTimeout: time.Duration(dsprotocol.StreamIdleTimeout) * time.Second, + MaxKeepAliveNoInput: dsprotocol.MaxKeepaliveCount, + }, streamengine.ConsumeHooks{ + OnParsed: runtime.onParsed, + OnFinalize: func(_ streamengine.StopReason, _ error) { + }, + }) + terminalWritten := runtime.finalize(allowDeferEmpty) + if terminalWritten { + return true, false + } + return false, true +} + //nolint:unused // retained for native Gemini stream handling path. func newGeminiStreamRuntime( w http.ResponseWriter, @@ -127,6 +204,35 @@ func (s *geminiStreamRuntime) sendChunk(payload map[string]any) { } } +func (s *geminiStreamRuntime) sendErrorChunk(status int, message string) { + msg := strings.TrimSpace(message) + if msg == "" { + msg = http.StatusText(status) + } + errorStatus := "INVALID_ARGUMENT" + switch status { + case http.StatusUnauthorized: + errorStatus = "UNAUTHENTICATED" + case http.StatusForbidden: + errorStatus = "PERMISSION_DENIED" + case http.StatusTooManyRequests: + errorStatus = "RESOURCE_EXHAUSTED" + case http.StatusNotFound: + errorStatus = "NOT_FOUND" + default: + if status >= 500 { + errorStatus = "INTERNAL" + } + } + s.sendChunk(map[string]any{ + "error": map[string]any{ + "code": status, + "message": msg, + "status": errorStatus, + }, + }) +} + //nolint:unused // retained for native Gemini stream handling path. func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedDecision { if !parsed.Parsed { @@ -192,7 +298,7 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse } //nolint:unused // retained for native Gemini stream handling path. -func (s *geminiStreamRuntime) finalize() { +func (s *geminiStreamRuntime) finalize(deferEmptyOutput bool) bool { rawText, text, rawThinking, thinking, detectionThinking := s.accumulator.Snapshot() turn := assistantturn.BuildTurnFromStreamSnapshot(assistantturn.StreamSnapshot{ RawText: rawText, @@ -211,6 +317,19 @@ func (s *geminiStreamRuntime) finalize() { ToolsRaw: s.toolsRaw, }) outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{}) + if outcome.ShouldFail { + if deferEmptyOutput { + s.finalErrorStatus = outcome.Error.Status + s.finalErrorMessage = outcome.Error.Message + s.finalErrorCode = outcome.Error.Code + return false + } + if s.history != nil { + s.history.Error(outcome.Error.Status, outcome.Error.Message, outcome.Error.Code, responsehistory.ThinkingForArchive(turn.RawThinking, turn.DetectionThinking, turn.Thinking), responsehistory.TextForArchive(turn.RawText, turn.Text)) + } + s.sendErrorChunk(outcome.Error.Status, outcome.Error.Message) + return true + } if s.history != nil { s.history.Success( http.StatusOK, @@ -257,4 +376,5 @@ func (s *geminiStreamRuntime) finalize() { "totalTokenCount": outcome.Usage.TotalTokens, }, }) + return true } diff --git a/internal/httpapi/openai/chat/empty_retry_runtime.go b/internal/httpapi/openai/chat/empty_retry_runtime.go index 748a39b..1dc8ca9 100644 --- a/internal/httpapi/openai/chat/empty_retry_runtime.go +++ b/internal/httpapi/openai/chat/empty_retry_runtime.go @@ -4,11 +4,11 @@ import ( "context" "io" "net/http" - "strings" "time" "ds2api/internal/assistantturn" "ds2api/internal/auth" + "ds2api/internal/completionruntime" "ds2api/internal/config" dsprotocol "ds2api/internal/deepseek/protocol" openaifmt "ds2api/internal/format/openai" @@ -17,191 +17,94 @@ import ( streamengine "ds2api/internal/stream" ) -type chatNonStreamResult struct { - rawThinking string - rawText string - thinking string - toolDetectionThinking string - text string - contentFilter bool - detectedCalls int - body map[string]any - finishReason string - responseMessageID int - outputError *assistantturn.OutputError -} - -func (r chatNonStreamResult) historyText() string { - return historyTextForArchive(r.rawText, r.text) -} - -func (r chatNonStreamResult) historyThinking() string { - return historyThinkingForArchive(r.rawThinking, r.toolDetectionThinking, r.thinking) -} - func (h *Handler) handleNonStreamWithRetry(w http.ResponseWriter, ctx context.Context, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, historySession *chatHistorySession) { - attempts := 0 - currentResp := resp - usagePrompt := finalPrompt - accumulatedThinking := "" - accumulatedRawThinking := "" - accumulatedToolDetectionThinking := "" - for { - result, ok := h.collectChatNonStreamAttempt(w, currentResp, completionID, model, usagePrompt, thinkingEnabled, searchEnabled, toolNames, toolsRaw) - if !ok { - return - } - accumulatedThinking += sse.TrimContinuationOverlap(accumulatedThinking, result.thinking) - accumulatedRawThinking += sse.TrimContinuationOverlap(accumulatedRawThinking, result.rawThinking) - accumulatedToolDetectionThinking += sse.TrimContinuationOverlap(accumulatedToolDetectionThinking, result.toolDetectionThinking) - result.thinking = accumulatedThinking - result.rawThinking = accumulatedRawThinking - result.toolDetectionThinking = accumulatedToolDetectionThinking - detected := detectAssistantToolCalls(result.rawText, result.text, result.rawThinking, result.toolDetectionThinking, toolNames) - result.detectedCalls = len(detected.Calls) - result.body = openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, result.thinking, result.text, detected.Calls, toolsRaw) - addRefFileTokensToUsage(result.body, refFileTokens) - result.finishReason = chatFinishReason(result.body) - if !shouldRetryChatNonStream(result, attempts) { - h.finishChatNonStreamResult(w, result, attempts, usagePrompt, refFileTokens, historySession) - return - } - - attempts++ - config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "parent_message_id", result.responseMessageID) - retryPow, powErr := h.DS.GetPow(ctx, a, 3) - if powErr != nil { - config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "error", powErr) - retryPow = pow - } - retryPayload := clonePayloadForEmptyOutputRetry(payload, result.responseMessageID) - nextResp, err := h.DS.CallCompletion(ctx, a, retryPayload, retryPow, 3) - if err != nil { - if historySession != nil { - historySession.error(http.StatusInternalServerError, "Failed to get completion.", "error", result.historyThinking(), result.historyText()) - } - writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.") - config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "error", err) - return - } - usagePrompt = usagePromptWithEmptyOutputRetry(usagePrompt, attempts) - currentResp = nextResp - } -} - -func (h *Handler) collectChatNonStreamAttempt(w http.ResponseWriter, resp *http.Response, completionID, model, usagePrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any) (chatNonStreamResult, bool) { if resp.StatusCode != http.StatusOK { defer func() { _ = resp.Body.Close() }() body, _ := io.ReadAll(resp.Body) - writeOpenAIError(w, resp.StatusCode, string(body)) - return chatNonStreamResult{}, false - } - result := sse.CollectStream(resp, thinkingEnabled, true) - turn := assistantturn.BuildTurnFromCollected(result, assistantturn.BuildOptions{ - Model: model, - Prompt: usagePrompt, - SearchEnabled: searchEnabled, - ToolNames: toolNames, - ToolsRaw: toolsRaw, - }) - respBody := openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, turn.Thinking, turn.Text, turn.ToolCalls, toolsRaw) - return chatNonStreamResult{ - rawThinking: result.Thinking, - rawText: result.Text, - thinking: turn.Thinking, - toolDetectionThinking: result.ToolDetectionThinking, - text: turn.Text, - contentFilter: result.ContentFilter, - detectedCalls: len(turn.ToolCalls), - body: respBody, - finishReason: chatFinishReason(respBody), - responseMessageID: result.ResponseMessageID, - outputError: turn.Error, - }, true -} - -func (h *Handler) finishChatNonStreamResult(w http.ResponseWriter, result chatNonStreamResult, attempts int, usagePrompt string, refFileTokens int, historySession *chatHistorySession) { - if result.detectedCalls == 0 && strings.TrimSpace(result.text) == "" { - status, message, code := upstreamEmptyOutputDetail(result.contentFilter, result.text, result.thinking) - if result.outputError != nil { - status, message, code = result.outputError.Status, result.outputError.Message, result.outputError.Code - } if historySession != nil { - historySession.error(status, message, code, result.historyThinking(), result.historyText()) + historySession.error(resp.StatusCode, string(body), "error", "", "") } - writeOpenAIErrorWithCode(w, status, message, code) - config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "chat.completions", "stream", false, "retry_attempts", attempts, "success_source", "none", "content_filter", result.contentFilter) + writeOpenAIError(w, resp.StatusCode, string(body)) return } - if historySession != nil { - historySession.success(http.StatusOK, result.historyThinking(), result.historyText(), result.finishReason, openaifmt.BuildChatUsageForModel("", usagePrompt, result.thinking, result.text, refFileTokens)) + stdReq := promptcompat.StandardRequest{ + Surface: "chat.completions", + ResponseModel: model, + PromptTokenText: finalPrompt, + FinalPrompt: finalPrompt, + RefFileTokens: refFileTokens, + Thinking: thinkingEnabled, + Search: searchEnabled, + ToolNames: toolNames, + ToolsRaw: toolsRaw, + ToolChoice: promptcompat.DefaultToolChoicePolicy(), } - writeJSON(w, http.StatusOK, result.body) - source := "first_attempt" - if attempts > 0 { - source = "synthetic_retry" - } - config.Logger.Info("[openai_empty_retry] completed", "surface", "chat.completions", "stream", false, "retry_attempts", attempts, "success_source", source) -} - -func chatFinishReason(respBody map[string]any) string { - if choices, ok := respBody["choices"].([]map[string]any); ok && len(choices) > 0 { - if fr, _ := choices[0]["finish_reason"].(string); strings.TrimSpace(fr) != "" { - return fr + retryEnabled := h != nil && h.DS != nil && emptyOutputRetryEnabled() + result, outErr := completionruntime.ExecuteNonStreamStartedWithRetry(ctx, h.DS, a, completionruntime.StartResult{ + SessionID: completionID, + Payload: payload, + Pow: pow, + Response: resp, + Request: stdReq, + }, completionruntime.Options{ + RetryEnabled: retryEnabled, + RetryMaxAttempts: emptyOutputRetryMaxAttempts(), + }) + if outErr != nil { + if historySession != nil { + historySession.error(outErr.Status, outErr.Message, outErr.Code, historyThinkingForArchive(result.Turn.RawThinking, result.Turn.DetectionThinking, result.Turn.Thinking), historyTextForArchive(result.Turn.RawText, result.Turn.Text)) } + writeOpenAIErrorWithCode(w, outErr.Status, outErr.Message, outErr.Code) + return } - return "stop" + respBody := openaifmt.BuildChatCompletionWithToolCalls(result.SessionID, model, result.Turn.Prompt, result.Turn.Thinking, result.Turn.Text, result.Turn.ToolCalls, toolsRaw) + respBody["usage"] = assistantturn.OpenAIChatUsage(result.Turn) + outcome := assistantturn.FinalizeTurn(result.Turn, assistantturn.FinalizeOptions{}) + if historySession != nil { + historySession.success(http.StatusOK, historyThinkingForArchive(result.Turn.RawThinking, result.Turn.DetectionThinking, result.Turn.Thinking), historyTextForArchive(result.Turn.RawText, result.Turn.Text), outcome.FinishReason, assistantturn.OpenAIChatUsage(result.Turn)) + } + writeJSON(w, http.StatusOK, respBody) } -func shouldRetryChatNonStream(result chatNonStreamResult, attempts int) bool { - return emptyOutputRetryEnabled() && - attempts < emptyOutputRetryMaxAttempts() && - !result.contentFilter && - result.detectedCalls == 0 && - strings.TrimSpace(result.text) == "" -} - -func (h *Handler) handleStreamWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, historySession *chatHistorySession) { +func (h *Handler) handleStreamWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID string, sessionIDRef *string, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, historySession *chatHistorySession) { streamRuntime, initialType, ok := h.prepareChatStreamRuntime(w, resp, completionID, model, finalPrompt, refFileTokens, thinkingEnabled, searchEnabled, toolNames, toolsRaw, toolChoice, historySession) if !ok { return } - attempts := 0 - currentResp := resp - for { - terminalWritten, retryable := h.consumeChatStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, historySession, attempts < emptyOutputRetryMaxAttempts()) - if terminalWritten { - logChatStreamTerminal(streamRuntime, attempts) - return - } - if !retryable || !emptyOutputRetryEnabled() || attempts >= emptyOutputRetryMaxAttempts() { + completionruntime.ExecuteStreamWithRetry(r.Context(), h.DS, a, resp, payload, pow, completionruntime.StreamRetryOptions{ + Surface: "chat.completions", + Stream: true, + RetryEnabled: emptyOutputRetryEnabled(), + RetryMaxAttempts: emptyOutputRetryMaxAttempts(), + MaxAttempts: 3, + UsagePrompt: finalPrompt, + }, completionruntime.StreamRetryHooks{ + ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) { + return h.consumeChatStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, historySession, allowDeferEmpty) + }, + Finalize: func(attempts int) { streamRuntime.finalize("stop", false) recordChatStreamHistory(streamRuntime, historySession) config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "chat.completions", "stream", true, "retry_attempts", attempts, "success_source", "none") - return - } - attempts++ - config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "parent_message_id", streamRuntime.responseMessageID) - retryPow, powErr := h.DS.GetPow(r.Context(), a, 3) - if powErr != nil { - config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "error", powErr) - retryPow = pow - } - nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadForEmptyOutputRetry(payload, streamRuntime.responseMessageID), retryPow, 3) - if err != nil { - failChatStreamRetry(streamRuntime, historySession, http.StatusInternalServerError, "Failed to get completion.", "error") - config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "error", err) - return - } - if nextResp.StatusCode != http.StatusOK { - defer func() { _ = nextResp.Body.Close() }() - body, _ := io.ReadAll(nextResp.Body) - failChatStreamRetry(streamRuntime, historySession, nextResp.StatusCode, string(body), "error") - return - } - streamRuntime.finalPrompt = usagePromptWithEmptyOutputRetry(finalPrompt, attempts) - currentResp = nextResp - } + }, + ParentMessageID: func() int { + return streamRuntime.responseMessageID + }, + OnRetryPrompt: func(prompt string) { + streamRuntime.finalPrompt = prompt + }, + OnRetryFailure: func(status int, message, code string) { + failChatStreamRetry(streamRuntime, historySession, status, message, code) + }, + OnAccountSwitch: func(sessionID string) { + if sessionIDRef != nil { + *sessionIDRef = sessionID + } + }, + OnTerminal: func(attempts int) { + logChatStreamTerminal(streamRuntime, attempts) + }, + }) } func (h *Handler) prepareChatStreamRuntime(w http.ResponseWriter, resp *http.Response, completionID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, historySession *chatHistorySession) (*chatStreamRuntime, string, bool) { diff --git a/internal/httpapi/openai/chat/handler.go b/internal/httpapi/openai/chat/handler.go index f3b4584..da0ad4a 100644 --- a/internal/httpapi/openai/chat/handler.go +++ b/internal/httpapi/openai/chat/handler.go @@ -106,10 +106,6 @@ func cleanVisibleOutput(text string, stripReferenceMarkers bool) string { return shared.CleanVisibleOutput(text, stripReferenceMarkers) } -func upstreamEmptyOutputDetail(contentFilter bool, text, thinking string) (int, string, string) { - return shared.UpstreamEmptyOutputDetail(contentFilter, text, thinking) -} - func emptyOutputRetryEnabled() bool { return shared.EmptyOutputRetryEnabled() } @@ -118,14 +114,6 @@ func emptyOutputRetryMaxAttempts() int { return shared.EmptyOutputRetryMaxAttempts() } -func clonePayloadForEmptyOutputRetry(payload map[string]any, parentMessageID int) map[string]any { - return shared.ClonePayloadForEmptyOutputRetry(payload, parentMessageID) -} - -func usagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string { - return shared.UsagePromptWithEmptyOutputRetry(originalPrompt, retryAttempts) -} - func formatIncrementalStreamToolCallDeltas(deltas []toolstream.ToolCallDelta, ids map[int]string) []map[string]any { return shared.FormatIncrementalStreamToolCallDeltas(deltas, ids) } @@ -137,7 +125,3 @@ func filterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta, func formatFinalStreamToolCallsWithStableIDs(calls []toolcall.ParsedToolCall, ids map[int]string, toolsRaw any) []map[string]any { return shared.FormatFinalStreamToolCallsWithStableIDs(calls, ids, toolsRaw) } - -func detectAssistantToolCalls(rawText, visibleText, exposedThinking, detectionThinking string, toolNames []string) toolcall.ToolCallParseResult { - return shared.DetectAssistantToolCalls(rawText, visibleText, exposedThinking, detectionThinking, toolNames) -} diff --git a/internal/httpapi/openai/chat/handler_chat.go b/internal/httpapi/openai/chat/handler_chat.go index 61703a0..9d86cf7 100644 --- a/internal/httpapi/openai/chat/handler_chat.go +++ b/internal/httpapi/openai/chat/handler_chat.go @@ -114,7 +114,7 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) { } streamReq := start.Request refFileTokens := streamReq.RefFileTokens - h.handleStreamWithRetry(w, r, a, start.Response, start.Payload, start.Pow, sessionID, streamReq.ResponseModel, streamReq.PromptTokenText, refFileTokens, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, streamReq.ToolChoice, historySession) + h.handleStreamWithRetry(w, r, a, start.Response, start.Payload, start.Pow, sessionID, &sessionID, streamReq.ResponseModel, streamReq.PromptTokenText, refFileTokens, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, streamReq.ToolChoice, historySession) } func (h *Handler) autoDeleteRemoteSession(ctx context.Context, a *auth.RequestAuth, sessionID string) { diff --git a/internal/httpapi/openai/chat/handler_toolcall_test.go b/internal/httpapi/openai/chat/handler_toolcall_test.go index 446b480..a42d7d4 100644 --- a/internal/httpapi/openai/chat/handler_toolcall_test.go +++ b/internal/httpapi/openai/chat/handler_toolcall_test.go @@ -85,8 +85,7 @@ func streamFinishReason(frames []map[string]any) string { return "" } -// Backward-compatible alias for historical test name used in CI logs. -func TestHandleNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T) { +func TestHandleNonStreamSingleAttemptReturns503WhenUpstreamOutputEmpty(t *testing.T) { h := &Handler{} resp := makeSSEHTTPResponse( `data: {"p":"response/content","v":""}`, @@ -95,17 +94,17 @@ func TestHandleNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T) { rec := httptest.NewRecorder() h.handleNonStream(rec, resp, "cid-empty", "deepseek-v4-flash", "prompt", 0, false, false, nil, nil, nil) - if rec.Code != http.StatusTooManyRequests { - t.Fatalf("expected status 429 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String()) + if rec.Code != http.StatusServiceUnavailable { + t.Fatalf("expected status 503 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String()) } out := decodeJSONBody(t, rec.Body.String()) errObj, _ := out["error"].(map[string]any) - if asString(errObj["code"]) != "upstream_empty_output" { - t.Fatalf("expected code=upstream_empty_output, got %#v", out) + if asString(errObj["code"]) != "upstream_unavailable" { + t.Fatalf("expected code=upstream_unavailable, got %#v", out) } } -func TestHandleNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) { +func TestHandleNonStreamSingleAttemptReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) { h := &Handler{} resp := makeSSEHTTPResponse( `data: {"code":"content_filter"}`, @@ -124,7 +123,7 @@ func TestHandleNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutp } } -func TestHandleNonStreamReturns429WhenUpstreamHasOnlyThinking(t *testing.T) { +func TestHandleNonStreamSingleAttemptReturns429WhenUpstreamHasOnlyThinking(t *testing.T) { h := &Handler{} resp := makeSSEHTTPResponse( `data: {"p":"response/thinking_content","v":"Only thinking"}`, diff --git a/internal/httpapi/openai/chat/ref_file_tokens.go b/internal/httpapi/openai/chat/ref_file_tokens.go deleted file mode 100644 index e5da36a..0000000 --- a/internal/httpapi/openai/chat/ref_file_tokens.go +++ /dev/null @@ -1,26 +0,0 @@ -package chat - -// addRefFileTokensToUsage adds inline-uploaded file token estimates to an existing -// usage map inside a response object. This keeps the token accounting aware of file -// content that the upstream model processes but that is not part of the prompt text. -func addRefFileTokensToUsage(obj map[string]any, refFileTokens int) { - if refFileTokens <= 0 || obj == nil { - return - } - usage, ok := obj["usage"].(map[string]any) - if !ok || usage == nil { - return - } - for _, key := range []string{"input_tokens", "prompt_tokens"} { - if v, ok := usage[key]; ok { - if n, ok := v.(int); ok { - usage[key] = n + refFileTokens - } - } - } - if v, ok := usage["total_tokens"]; ok { - if n, ok := v.(int); ok { - usage["total_tokens"] = n + refFileTokens - } - } -} diff --git a/internal/httpapi/openai/history_split_test.go b/internal/httpapi/openai/history_split_test.go index 3e69b17..97100f4 100644 --- a/internal/httpapi/openai/history_split_test.go +++ b/internal/httpapi/openai/history_split_test.go @@ -84,7 +84,7 @@ func TestBuildOpenAICurrentInputContextTranscriptUsesNumberedHistorySections(t * "latest user turn", "[reasoning_content]", "hidden reasoning", - "<|DSML|tool_calls>", + "<|DSML|tool_calls>", } { if !strings.Contains(transcript, want) { t.Fatalf("expected transcript to contain %q, got %q", want, transcript) diff --git a/internal/httpapi/openai/responses/empty_retry_runtime.go b/internal/httpapi/openai/responses/empty_retry_runtime.go index b0cb205..80422f5 100644 --- a/internal/httpapi/openai/responses/empty_retry_runtime.go +++ b/internal/httpapi/openai/responses/empty_retry_runtime.go @@ -7,6 +7,7 @@ import ( "time" "ds2api/internal/auth" + "ds2api/internal/completionruntime" "ds2api/internal/config" dsprotocol "ds2api/internal/deepseek/protocol" "ds2api/internal/promptcompat" @@ -19,41 +20,34 @@ func (h *Handler) handleResponsesStreamWithRetry(w http.ResponseWriter, r *http. if !ok { return } - attempts := 0 - currentResp := resp - for { - terminalWritten, retryable := h.consumeResponsesStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, attempts < emptyOutputRetryMaxAttempts()) - if terminalWritten { - logResponsesStreamTerminal(streamRuntime, attempts) - return - } - if !retryable || !emptyOutputRetryEnabled() || attempts >= emptyOutputRetryMaxAttempts() { + completionruntime.ExecuteStreamWithRetry(r.Context(), h.DS, a, resp, payload, pow, completionruntime.StreamRetryOptions{ + Surface: "responses", + Stream: true, + RetryEnabled: emptyOutputRetryEnabled(), + RetryMaxAttempts: emptyOutputRetryMaxAttempts(), + MaxAttempts: 3, + UsagePrompt: finalPrompt, + }, completionruntime.StreamRetryHooks{ + ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) { + return h.consumeResponsesStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, allowDeferEmpty) + }, + Finalize: func(attempts int) { streamRuntime.finalize("stop", false) config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "responses", "stream", true, "retry_attempts", attempts, "success_source", "none", "error_code", streamRuntime.finalErrorCode) - return - } - attempts++ - config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "responses", "stream", true, "retry_attempt", attempts, "parent_message_id", streamRuntime.responseMessageID) - retryPow, powErr := h.DS.GetPow(r.Context(), a, 3) - if powErr != nil { - config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "responses", "stream", true, "retry_attempt", attempts, "error", powErr) - retryPow = pow - } - nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadForEmptyOutputRetry(payload, streamRuntime.responseMessageID), retryPow, 3) - if err != nil { - streamRuntime.failResponse(http.StatusInternalServerError, "Failed to get completion.", "error") - config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "responses", "stream", true, "retry_attempt", attempts, "error", err) - return - } - if nextResp.StatusCode != http.StatusOK { - defer func() { _ = nextResp.Body.Close() }() - body, _ := io.ReadAll(nextResp.Body) - streamRuntime.failResponse(nextResp.StatusCode, strings.TrimSpace(string(body)), "error") - return - } - streamRuntime.finalPrompt = usagePromptWithEmptyOutputRetry(finalPrompt, attempts) - currentResp = nextResp - } + }, + ParentMessageID: func() int { + return streamRuntime.responseMessageID + }, + OnRetryPrompt: func(prompt string) { + streamRuntime.finalPrompt = prompt + }, + OnRetryFailure: func(status int, message, code string) { + streamRuntime.failResponse(status, strings.TrimSpace(message), code) + }, + OnTerminal: func(attempts int) { + logResponsesStreamTerminal(streamRuntime, attempts) + }, + }) } func (h *Handler) prepareResponsesStreamRuntime(w http.ResponseWriter, resp *http.Response, owner, responseID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, traceID string, historySession *responsehistory.Session) (*responsesStreamRuntime, string, bool) { diff --git a/internal/httpapi/openai/responses/handler.go b/internal/httpapi/openai/responses/handler.go index 445c6f5..da8e2e1 100644 --- a/internal/httpapi/openai/responses/handler.go +++ b/internal/httpapi/openai/responses/handler.go @@ -103,14 +103,6 @@ func emptyOutputRetryMaxAttempts() int { return shared.EmptyOutputRetryMaxAttempts() } -func clonePayloadForEmptyOutputRetry(payload map[string]any, parentMessageID int) map[string]any { - return shared.ClonePayloadForEmptyOutputRetry(payload, parentMessageID) -} - -func usagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string { - return shared.UsagePromptWithEmptyOutputRetry(originalPrompt, retryAttempts) -} - func filterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta, seenNames map[int]string) []toolstream.ToolCallDelta { return shared.FilterIncrementalToolCallDeltasByAllowed(deltas, seenNames) } diff --git a/internal/httpapi/openai/responses/responses_stream_runtime_toolcalls_finalize.go b/internal/httpapi/openai/responses/responses_stream_runtime_toolcalls_finalize.go index 2f03dd3..06d3673 100644 --- a/internal/httpapi/openai/responses/responses_stream_runtime_toolcalls_finalize.go +++ b/internal/httpapi/openai/responses/responses_stream_runtime_toolcalls_finalize.go @@ -81,6 +81,22 @@ func (s *responsesStreamRuntime) buildCompletedResponseObject(finalThinking, fin }, }, }) + } else if len(calls) > 0 && strings.TrimSpace(finalThinking) != "" { + indexed = append(indexed, indexedItem{ + index: s.ensureMessageOutputIndex(), + item: map[string]any{ + "id": s.ensureMessageItemID(), + "type": "message", + "role": "assistant", + "status": "completed", + "content": []map[string]any{ + { + "type": "reasoning", + "text": finalThinking, + }, + }, + }, + }) } else if len(calls) == 0 { content := make([]map[string]any, 0, 2) if finalThinking != "" { diff --git a/internal/httpapi/openai/responses/responses_stream_test.go b/internal/httpapi/openai/responses/responses_stream_test.go index fa06bd5..dac0e54 100644 --- a/internal/httpapi/openai/responses/responses_stream_test.go +++ b/internal/httpapi/openai/responses/responses_stream_test.go @@ -397,7 +397,7 @@ func TestHandleResponsesNonStreamRequiredToolChoiceIgnoresThinkingToolPayloadWhe } } -func TestHandleResponsesNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T) { +func TestHandleResponsesNonStreamSingleAttemptReturns503WhenUpstreamOutputEmpty(t *testing.T) { h := &Handler{} rec := httptest.NewRecorder() resp := &http.Response{ @@ -409,17 +409,17 @@ func TestHandleResponsesNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T) } h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-v4-flash", "prompt", 0, false, false, nil, nil, promptcompat.DefaultToolChoicePolicy(), "") - if rec.Code != http.StatusTooManyRequests { - t.Fatalf("expected 429 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String()) + if rec.Code != http.StatusServiceUnavailable { + t.Fatalf("expected 503 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String()) } out := decodeJSONBody(t, rec.Body.String()) errObj, _ := out["error"].(map[string]any) - if asString(errObj["code"]) != "upstream_empty_output" { - t.Fatalf("expected code=upstream_empty_output, got %#v", out) + if asString(errObj["code"]) != "upstream_unavailable" { + t.Fatalf("expected code=upstream_unavailable, got %#v", out) } } -func TestHandleResponsesNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) { +func TestHandleResponsesNonStreamSingleAttemptReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) { h := &Handler{} rec := httptest.NewRecorder() resp := &http.Response{ @@ -441,7 +441,7 @@ func TestHandleResponsesNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWi } } -func TestHandleResponsesNonStreamReturns429WhenUpstreamHasOnlyThinking(t *testing.T) { +func TestHandleResponsesNonStreamSingleAttemptReturns429WhenUpstreamHasOnlyThinking(t *testing.T) { h := &Handler{} rec := httptest.NewRecorder() resp := &http.Response{ diff --git a/internal/httpapi/openai/shared/upstream_empty.go b/internal/httpapi/openai/shared/upstream_empty.go index d2e396c..3660f78 100644 --- a/internal/httpapi/openai/shared/upstream_empty.go +++ b/internal/httpapi/openai/shared/upstream_empty.go @@ -17,7 +17,7 @@ func UpstreamEmptyOutputDetail(contentFilter bool, text, thinking string) (int, if thinking != "" { return http.StatusTooManyRequests, "Upstream account hit a rate limit and returned reasoning without visible output.", "upstream_empty_output" } - return http.StatusTooManyRequests, "Upstream account hit a rate limit and returned empty output.", "upstream_empty_output" + return http.StatusServiceUnavailable, "Upstream service is unavailable and returned no output.", "upstream_unavailable" } func WriteUpstreamEmptyOutputError(w http.ResponseWriter, text, thinking string, contentFilter bool) bool { diff --git a/internal/httpapi/openai/stream_status_test.go b/internal/httpapi/openai/stream_status_test.go index 3c11d57..2e54f3d 100644 --- a/internal/httpapi/openai/stream_status_test.go +++ b/internal/httpapi/openai/stream_status_test.go @@ -274,12 +274,12 @@ func TestChatCompletionsStreamEmitsFailureFrameWhenUpstreamOutputEmpty(t *testin } last := frames[0] statusCode, ok := last["status_code"].(float64) - if !ok || int(statusCode) != http.StatusTooManyRequests { - t.Fatalf("expected status_code=429, got %#v body=%s", last["status_code"], rec.Body.String()) + if !ok || int(statusCode) != http.StatusServiceUnavailable { + t.Fatalf("expected status_code=503, got %#v body=%s", last["status_code"], rec.Body.String()) } errObj, _ := last["error"].(map[string]any) - if asString(errObj["code"]) != "upstream_empty_output" { - t.Fatalf("expected code=upstream_empty_output, got %#v", last) + if asString(errObj["code"]) != "upstream_unavailable" { + t.Fatalf("expected code=upstream_unavailable, got %#v", last) } } @@ -345,7 +345,7 @@ func TestChatCompletionsStreamRetriesEmptyOutputOnSameSession(t *testing.T) { func TestChatCompletionsNonStreamRetriesThinkingOnlyOutput(t *testing.T) { ds := &streamStatusDSSeqStub{resps: []*http.Response{ - makeOpenAISSEHTTPResponse(`data: {"response_message_id":99}`, "data: [DONE]"), + makeOpenAISSEHTTPResponse(`data: {"response_message_id":99,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"), makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"), }} h := &openAITestSurface{ @@ -496,7 +496,7 @@ func TestResponsesStreamRetriesThinkingOnlyOutput(t *testing.T) { func TestResponsesNonStreamRetriesThinkingOnlyOutput(t *testing.T) { ds := &streamStatusDSSeqStub{resps: []*http.Response{ - makeOpenAISSEHTTPResponse(`data: {"response_message_id":88}`, "data: [DONE]"), + makeOpenAISSEHTTPResponse(`data: {"response_message_id":88,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"), makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"), }} h := &openAITestSurface{ @@ -537,8 +537,15 @@ func TestResponsesNonStreamRetriesThinkingOnlyOutput(t *testing.T) { if len(content) == 0 { t.Fatalf("expected content entries, got %#v", item) } - textEntry, _ := content[0].(map[string]any) - if asString(textEntry["type"]) != "output_text" || asString(textEntry["text"]) != "visible" { + var textEntry map[string]any + for _, entry := range content { + obj, _ := entry.(map[string]any) + if asString(obj["type"]) == "output_text" { + textEntry = obj + break + } + } + if asString(textEntry["text"]) != "visible" { t.Fatalf("expected visible text entry, got %#v", content) } } diff --git a/internal/js/chat-stream/vercel_stream_impl.js b/internal/js/chat-stream/vercel_stream_impl.js index f28598e..9a9bb0b 100644 --- a/internal/js/chat-stream/vercel_stream_impl.js +++ b/internal/js/chat-stream/vercel_stream_impl.js @@ -641,9 +641,9 @@ function upstreamEmptyOutputDetail(contentFilter, _text, thinking) { }; } return { - status: 429, - message: 'Upstream account hit a rate limit and returned empty output.', - code: 'upstream_empty_output', + status: 503, + message: 'Upstream service is unavailable and returned no output.', + code: 'upstream_unavailable', }; } diff --git a/internal/js/helpers/stream-tool-sieve/parse.js b/internal/js/helpers/stream-tool-sieve/parse.js index 82f8f94..f2ba3dc 100644 --- a/internal/js/helpers/stream-tool-sieve/parse.js +++ b/internal/js/helpers/stream-tool-sieve/parse.js @@ -113,9 +113,10 @@ function filterToolCallsDetailed(parsed, toolNames) { if (!tc || !tc.name) { continue; } + const input = tc.input && typeof tc.input === 'object' && !Array.isArray(tc.input) ? tc.input : {}; calls.push({ name: tc.name, - input: tc.input && typeof tc.input === 'object' && !Array.isArray(tc.input) ? tc.input : {}, + input, }); } return { calls, rejectedToolNames: [] }; diff --git a/internal/js/helpers/stream-tool-sieve/parse_payload.js b/internal/js/helpers/stream-tool-sieve/parse_payload.js index 6b8077e..ffd7742 100644 --- a/internal/js/helpers/stream-tool-sieve/parse_payload.js +++ b/internal/js/helpers/stream-tool-sieve/parse_payload.js @@ -1,6 +1,6 @@ 'use strict'; -const CDATA_PATTERN = /^$/i; +const CDATA_PATTERN = /^(?:<|〈)!\[CDATA\[([\s\S]*?)]](?:>|>|〉)$/i; const XML_ATTR_PATTERN = /\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')/gi; const TOOL_MARKUP_NAMES = [ { raw: 'tool_calls', canonical: 'tool_calls' }, @@ -102,9 +102,10 @@ function updateCDATAStateLine(inCDATA, line) { let state = inCDATA; while (pos < lower.length) { if (state) { - const end = lower.indexOf(']]>', pos); + const cdataEnd = findCDATAEnd(lower, pos); + const end = cdataEnd.index; if (end < 0) return true; - pos = end + ']]>'.length; + pos = end + cdataEnd.len; state = false; continue; } @@ -252,8 +253,9 @@ function replaceDSMLToolMarkupOutsideIgnored(text) { const tag = scanToolMarkupTagAt(raw, i); if (tag) { if (tag.dsmlLike) { - out += `<${tag.closing ? '/' : ''}${tag.name}${raw.slice(tag.nameEnd, tag.end + 1)}`; - if (raw[tag.end] !== '>') { + const tail = normalizeToolMarkupTagTailForXML(raw.slice(tag.nameEnd, tag.end + 1)); + out += `<${tag.closing ? '/' : ''}${tag.name}${tail}`; + if (!tail.endsWith('>')) { out += '>'; } } else { @@ -409,11 +411,12 @@ function findMatchingXmlEndTagOutsideCDATA(text, tag, from) { function skipXmlIgnoredSection(lower, i) { if (lower.startsWith('', i + ''.length }; + return { advanced: true, blocked: false, next: end + cdataEnd.len }; } if (lower.startsWith('', i + '") if end < 0 { @@ -227,15 +228,26 @@ func skipXMLIgnoredSection(text string, i int) (next int, advanced bool, blocked } func hasASCIIPrefixFoldAt(text string, start int, prefix string) bool { - if start < 0 || len(text)-start < len(prefix) { - return false + _, ok := matchASCIIPrefixFoldAt(text, start, prefix) + return ok +} + +func matchASCIIPrefixFoldAt(text string, start int, prefix string) (int, bool) { + if start < 0 || start >= len(text) && prefix != "" { + return 0, false } + idx := start for j := 0; j < len(prefix); j++ { - if asciiLower(text[start+j]) != asciiLower(prefix[j]) { - return false + if idx >= len(text) { + return 0, false } + ch, size := normalizedASCIIAt(text, idx) + if size <= 0 || asciiLower(ch) != asciiLower(prefix[j]) { + return 0, false + } + idx += size } - return true + return idx - start, true } func asciiLower(b byte) byte { @@ -266,15 +278,14 @@ func findToolCDATAEnd(text string, from int) int { if from < 0 || from >= len(text) { return -1 } - const closeMarker = "]]>" firstNonFenceEnd := -1 for searchFrom := from; searchFrom < len(text); { - rel := strings.Index(text[searchFrom:], closeMarker) - if rel < 0 { + end := indexToolCDATAClose(text, searchFrom) + if end < 0 { break } - end := searchFrom + rel - searchFrom = end + len(closeMarker) + closeLen := toolCDATACloseLenAt(text, end) + searchFrom = end + closeLen if cdataOffsetIsInsideMarkdownFence(text[from:end]) { continue } @@ -288,6 +299,35 @@ func findToolCDATAEnd(text string, from int) int { return firstNonFenceEnd } +func indexToolCDATAClose(text string, from int) int { + if from < 0 { + from = 0 + } + asciiIdx := strings.Index(text[from:], "]]>") + fullIdx := strings.Index(text[from:], "]]>") + cjkIdx := strings.Index(text[from:], "]]〉") + if asciiIdx < 0 && fullIdx < 0 && cjkIdx < 0 { + return -1 + } + best := -1 + for _, idx := range []int{asciiIdx, fullIdx, cjkIdx} { + if idx >= 0 && (best < 0 || idx < best) { + best = idx + } + } + return from + best +} + +func toolCDATACloseLenAt(text string, idx int) int { + if strings.HasPrefix(text[idx:], "]]〉") { + return len("]]〉") + } + if strings.HasPrefix(text[idx:], "]]>") { + return len("]]>") + } + return len("]]>") +} + func cdataEndLooksStructural(text string, after int) bool { for after < len(text) { switch { @@ -327,22 +367,29 @@ func cdataOffsetIsInsideMarkdownFence(fragment string) bool { } func findXMLTagEnd(text string, from int) int { - quote := byte(0) - for i := maxInt(from, 0); i < len(text); i++ { - ch := text[i] + quote := rune(0) + for i := maxInt(from, 0); i < len(text); { + r, size := utf8.DecodeRuneInString(text[i:]) + if r == utf8.RuneError && size == 0 { + break + } + ch := normalizeFullwidthASCII(r) if quote != 0 { if ch == quote { quote = 0 } + i += size continue } if ch == '"' || ch == '\'' { quote = ch + i += size continue } if ch == '>' { - return i + return i + size - 1 } + i += size } return -1 } @@ -355,7 +402,8 @@ func hasXMLTagBoundary(text string, idx int) bool { case ' ', '\t', '\n', '\r', '>', '/': return true default: - return false + r, _ := utf8.DecodeRuneInString(text[idx:]) + return normalizeFullwidthASCII(r) == '>' } } diff --git a/internal/toolcall/toolcalls_scan.go b/internal/toolcall/toolcalls_scan.go index 6acff6e..f8001fd 100644 --- a/internal/toolcall/toolcalls_scan.go +++ b/internal/toolcall/toolcalls_scan.go @@ -1,6 +1,9 @@ package toolcall -import "strings" +import ( + "strings" + "unicode/utf8" +) type toolMarkupNameAlias struct { raw string @@ -131,22 +134,35 @@ func FindMatchingToolMarkupClose(text string, open ToolMarkupTag) (ToolMarkupTag } func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) { - if start < 0 || start >= len(text) || text[start] != '<' { + next, ok := consumeToolMarkupLessThan(text, start) + if !ok { return ToolMarkupTag{}, false } - i := start + 1 - for i < len(text) && text[i] == '<' { - i++ + i := next + for { + next, ok := consumeToolMarkupLessThan(text, i) + if !ok { + break + } + i = next } closing := false if i < len(text) && text[i] == '/' { closing = true i++ } + prefixStart := i i, dsmlLike := consumeToolMarkupNamePrefix(text, i) name, nameLen := matchToolMarkupName(text, i, dsmlLike) if nameLen == 0 { - return ToolMarkupTag{}, false + fallbackName, fallbackStart, fallbackLen, ok := matchToolMarkupNameAfterArbitraryPrefix(text, prefixStart) + if !ok { + return ToolMarkupTag{}, false + } + name = fallbackName + i = fallbackStart + nameLen = fallbackLen + dsmlLike = true } nameEnd := i + nameLen nameEndBeforePipes := nameEnd @@ -184,7 +200,7 @@ func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) { } func IsPartialToolMarkupTagPrefix(text string) bool { - if text == "" || text[0] != '<' || strings.Contains(text, ">") { + if text == "" || text[0] != '<' || strings.Contains(text, ">") || strings.Contains(text, ">") { return false } i := 1 @@ -207,6 +223,9 @@ func IsPartialToolMarkupTagPrefix(text string) bool { if hasASCIIPartialPrefixFoldAt(text, i, "dsml") { return true } + if hasPartialToolMarkupNameAfterArbitraryPrefix(text, i) { + return true + } next, ok := consumeToolMarkupNamePrefixOnce(text, i) if !ok { return false @@ -236,26 +255,81 @@ func consumeToolMarkupNamePrefixOnce(text string, idx int) (int, bool) { return idx + 1, true } if hasASCIIPrefixFoldAt(text, idx, "dsml") { - next := idx + len("dsml") - if next < len(text) && (text[next] == '-' || text[next] == '_') { - next++ + dsmlLen, _ := matchASCIIPrefixFoldAt(text, idx, "dsml") + next := idx + dsmlLen + if sep, size := normalizedASCIIAt(text, next); sep == '-' || sep == '_' { + next += size } return next, true } + if next, ok := consumeArbitraryToolMarkupNamePrefix(text, idx); ok { + return next, true + } return idx, false } -func hasASCIIPartialPrefixFoldAt(text string, start int, prefix string) bool { - remain := len(text) - start - if remain <= 0 || remain > len(prefix) { - return false +func consumeArbitraryToolMarkupNamePrefix(text string, idx int) (int, bool) { + nextSegment, ok := consumeToolMarkupPrefixSegment(text, idx) + if !ok { + return idx, false } - for j := 0; j < remain; j++ { - if asciiLower(text[start+j]) != asciiLower(prefix[j]) { - return false + j := nextSegment + for { + nextSegment, ok = consumeToolMarkupPrefixSegment(text, j) + if !ok { + break + } + j = nextSegment + } + k := j + for k < len(text) && (text[k] == ' ' || text[k] == '\t' || text[k] == '\r' || text[k] == '\n') { + k++ + } + next, ok := consumeToolMarkupPipe(text, k) + if !ok { + if sep, size := normalizedASCIIAt(text, k); sep == '_' || sep == '-' { + next = k + size + ok = true } } - return true + if !ok { + return idx, false + } + for next < len(text) && (text[next] == ' ' || text[next] == '\t' || text[next] == '\r' || text[next] == '\n') { + next++ + } + if !hasToolMarkupNamePrefix(text, next) { + return idx, false + } + return next, true +} + +func consumeToolMarkupPrefixSegment(text string, idx int) (int, bool) { + ch, size := normalizedASCIIAt(text, idx) + if size <= 0 { + return idx, false + } + if (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') { + return idx + size, true + } + return idx, false +} + +func hasASCIIPartialPrefixFoldAt(text string, start int, prefix string) bool { + if start < 0 || start >= len(text) { + return false + } + idx := start + matched := 0 + for matched < len(prefix) && idx < len(text) { + ch, size := normalizedASCIIAt(text, idx) + if size <= 0 || asciiLower(ch) != asciiLower(prefix[matched]) { + return false + } + idx += size + matched++ + } + return matched > 0 && matched < len(prefix) && idx == len(text) } func hasToolMarkupNamePrefix(text string, start int) bool { @@ -275,13 +349,102 @@ func matchToolMarkupName(text string, start int, dsmlLike bool) (string, int) { if name.dsmlOnly && !dsmlLike { continue } - if hasASCIIPrefixFoldAt(text, start, name.raw) { - return name.canonical, len(name.raw) + if nameLen, ok := matchASCIIPrefixFoldAt(text, start, name.raw); ok { + return name.canonical, nameLen } } return "", 0 } +func matchToolMarkupNameAfterArbitraryPrefix(text string, start int) (string, int, int, bool) { + for idx := start; idx < len(text); { + if isToolMarkupTagTerminator(text, idx) { + return "", 0, 0, false + } + for _, name := range toolMarkupNames { + nameLen, ok := matchASCIIPrefixFoldAt(text, idx, name.raw) + if !ok { + continue + } + if !toolMarkupPrefixAllowsLocalName(text[start:idx]) { + continue + } + return name.canonical, idx, nameLen, true + } + _, size := utf8.DecodeRuneInString(text[idx:]) + if size <= 0 { + size = 1 + } + idx += size + } + return "", 0, 0, false +} + +func hasPartialToolMarkupNameAfterArbitraryPrefix(text string, start int) bool { + for idx := start; idx < len(text); { + if isToolMarkupTagTerminator(text, idx) { + return false + } + if toolMarkupPrefixAllowsLocalName(text[start:idx]) && hasToolMarkupNamePrefix(text, idx) { + return true + } + if toolMarkupPrefixAllowsLocalName(text[start:idx]) && hasDSMLNamePrefixOrPartial(text, idx) { + return true + } + _, size := utf8.DecodeRuneInString(text[idx:]) + if size <= 0 { + size = 1 + } + idx += size + } + return toolMarkupPrefixAllowsLocalName(text[start:]) +} + +func hasDSMLNamePrefixOrPartial(text string, start int) bool { + return hasASCIIPrefixFoldAt(text, start, "dsml") || hasASCIIPartialPrefixFoldAt(text, start, "dsml") +} + +func toolMarkupPrefixAllowsLocalName(prefix string) bool { + if prefix == "" { + return false + } + if strings.Contains(normalizedASCIILowerString(prefix), "dsml") { + return true + } + if strings.ContainsAny(prefix, "=\"'") { + return false + } + r, _ := utf8.DecodeLastRuneInString(prefix) + r = normalizeFullwidthASCII(r) + return (r < 'a' || r > 'z') && (r < 'A' || r > 'Z') && (r < '0' || r > '9') +} + +func normalizedASCIILowerString(text string) string { + var b strings.Builder + b.Grow(len(text)) + for _, r := range text { + r = normalizeFullwidthASCII(r) + if r >= 'A' && r <= 'Z' { + r += 'a' - 'A' + } + if r <= 0x7f { + b.WriteRune(r) + } + } + return b.String() +} + +func isToolMarkupTagTerminator(text string, idx int) bool { + if idx >= len(text) { + return false + } + if text[idx] == '>' { + return true + } + r, _ := utf8.DecodeRuneInString(text[idx:]) + return normalizeFullwidthASCII(r) == '>' +} + func consumeToolMarkupPipe(text string, idx int) (int, bool) { if idx >= len(text) { return idx, false @@ -289,12 +452,26 @@ func consumeToolMarkupPipe(text string, idx int) (int, bool) { if text[idx] == '|' { return idx + 1, true } + if text[idx] == '\x02' { + return idx + 1, true + } if strings.HasPrefix(text[idx:], "|") { return idx + len("|"), true } + if strings.HasPrefix(text[idx:], "␂") { + return idx + len("␂"), true + } return idx, false } +func consumeToolMarkupLessThan(text string, idx int) (int, bool) { + ch, size := normalizedASCIIAt(text, idx) + if size <= 0 || ch != '<' { + return idx, false + } + return idx + size, true +} + func hasToolMarkupBoundary(text string, idx int) bool { if idx >= len(text) { return true @@ -303,6 +480,35 @@ func hasToolMarkupBoundary(text string, idx int) bool { case ' ', '\t', '\n', '\r', '>', '/': return true default: - return false + r, _ := utf8.DecodeRuneInString(text[idx:]) + return normalizeFullwidthASCII(r) == '>' } } + +func normalizedASCIIAt(text string, idx int) (byte, int) { + if idx < 0 || idx >= len(text) { + return 0, 0 + } + r, size := utf8.DecodeRuneInString(text[idx:]) + if r == utf8.RuneError && size == 0 { + return 0, 0 + } + normalized := normalizeFullwidthASCII(r) + if normalized > 0x7f { + return 0, 0 + } + return byte(normalized), size +} + +func normalizeFullwidthASCII(r rune) rune { + switch r { + case '〈': + return '<' + case '〉': + return '>' + } + if r >= '!' && r <= '~' { + return r - 0xFEE0 + } + return r +} diff --git a/internal/toolcall/toolcalls_test.go b/internal/toolcall/toolcalls_test.go index 3cad720..b66f047 100644 --- a/internal/toolcall/toolcalls_test.go +++ b/internal/toolcall/toolcalls_test.go @@ -72,6 +72,97 @@ EOF } } +func TestParseToolCallsSupportsUnderscoredDSMLShell(t *testing.T) { + text := ` + + + + + + + +` + calls := ParseToolCalls(text, []string{"search_web", "eval_javascript"}) + if len(calls) != 2 { + t.Fatalf("expected two underscored DSML calls, got %#v", calls) + } + if calls[0].Name != "search_web" || calls[0].Input["query"] != "2026年5月 热点事件" || calls[0].Input["topic"] != "news" { + t.Fatalf("unexpected first underscored DSML call: %#v", calls[0]) + } + if calls[1].Name != "eval_javascript" || calls[1].Input["code"] != "1 + 1" { + t.Fatalf("unexpected second underscored DSML call: %#v", calls[1]) + } +} + +func TestParseToolCallsSupportsArbitraryPrefixedToolMarkup(t *testing.T) { + cases := []string{ + `README.md`, + `README.md`, + `README.md`, + } + for _, text := range cases { + calls := ParseToolCalls(text, []string{"Read"}) + if len(calls) != 1 { + t.Fatalf("expected one arbitrary-prefixed tool call for %q, got %#v", text, calls) + } + if calls[0].Name != "Read" || calls[0].Input["file_path"] != "README.md" { + t.Fatalf("unexpected arbitrary-prefixed parse result: %#v", calls[0]) + } + } +} + +func TestParseToolCallsSupportsFullwidthDSMLShell(t *testing.T) { + text := `<dSML|tool_calls> + <dSML|invoke name="Read"> + <dSML|parameter name="file_path"> + + <dSML|invoke name="Read"> + <dSML|parameter name="file_path"> + +` + calls := ParseToolCalls(text, []string{"Read"}) + if len(calls) != 2 { + t.Fatalf("expected two fullwidth DSML calls, got %#v", calls) + } + if calls[0].Name != "Read" || calls[0].Input["file_path"] != "/Users/aq/Desktop/myproject/Personal_Blog/README.md" { + t.Fatalf("unexpected first fullwidth DSML call: %#v", calls[0]) + } + if calls[1].Name != "Read" || calls[1].Input["file_path"] != "/Users/aq/Desktop/myproject/Personal_Blog/index.html" { + t.Fatalf("unexpected second fullwidth DSML call: %#v", calls[1]) + } +} + +func TestParseToolCallsSupportsCJKAngleDSMDrift(t *testing.T) { + text := ` + +〈![CDATA[Show commits on local dev not on origin/dev]]〉〈/DSM|parameter〉 +〈![CDATA[git log --oneline origin/dev..dev]]〉〈/DSM|parameter〉 +〈/DSM|invoke〉 + +〈![CDATA[Show commits on origin/dev not on local dev]]〉〈/DSM|parameter〉 +〈![CDATA[git log --oneline dev..origin/dev]]〉〈/DSM|parameter〉 +〈/DSM|invoke〉 + +〈![CDATA[Check tracking branch status]]〉〈/DSM|parameter〉 +〈![CDATA[git status -b --short]]〉〈/DSM|parameter〉 +〈/DSM|invoke〉 +〈/DSM|tool_calls〉` + + calls := ParseToolCalls(text, []string{"Bash"}) + if len(calls) != 3 { + t.Fatalf("expected three CJK-angle DSM drift calls, got %#v", calls) + } + if calls[0].Name != "Bash" || calls[0].Input["command"] != "git log --oneline origin/dev..dev" { + t.Fatalf("unexpected first CJK-angle DSM drift call: %#v", calls[0]) + } + if calls[1].Name != "Bash" || calls[1].Input["description"] != "Show commits on origin/dev not on local dev" { + t.Fatalf("unexpected second CJK-angle DSM drift call: %#v", calls[1]) + } + if calls[2].Name != "Bash" || calls[2].Input["command"] != "git status -b --short" { + t.Fatalf("unexpected third CJK-angle DSM drift call: %#v", calls[2]) + } +} + func TestParseToolCallsIgnoresBareHyphenatedToolCallsLookalike(t *testing.T) { text := `pwd` calls := ParseToolCalls(text, []string{"Bash"}) @@ -516,14 +607,17 @@ func TestParseToolCallsDetailedMarksToolCallsSyntax(t *testing.T) { } } -func TestParseToolCallsRejectsAllEmptyParameterPayload(t *testing.T) { +func TestParseToolCallsAllowsAllEmptyParameterPayload(t *testing.T) { text := ` ` res := ParseToolCallsDetailed(text, []string{"Bash"}) if !res.SawToolCallSyntax { t.Fatalf("expected tool syntax to be detected, got %#v", res) } - if len(res.Calls) != 0 { - t.Fatalf("expected all-empty payload to be rejected, got %#v", res.Calls) + if len(res.Calls) != 1 { + t.Fatalf("expected all-empty payload to be parsed, got %#v", res.Calls) + } + if res.Calls[0].Input["command"] != "" || res.Calls[0].Input["description"] != "" || res.Calls[0].Input["timeout"] != "" { + t.Fatalf("expected empty parameters to be preserved, got %#v", res.Calls[0].Input) } } diff --git a/internal/toolstream/tool_sieve_xml.go b/internal/toolstream/tool_sieve_xml.go index 8e728e3..11294bb 100644 --- a/internal/toolstream/tool_sieve_xml.go +++ b/internal/toolstream/tool_sieve_xml.go @@ -54,7 +54,7 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string, } if parsed.SawToolCallSyntax { if rejected == nil || tag.Start < rejected.start { - rejected = &rejectedBlock{start: tag.Start, prefix: prefixPart, suffix: suffixPart} + rejected = &rejectedBlock{start: tag.Start, prefix: prefixPart + xmlBlock, suffix: suffixPart} } searchFrom = tag.End + 1 continue @@ -88,7 +88,7 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string, return prefixPart, parsed.Calls, suffixPart, true } if parsed.SawToolCallSyntax { - return prefixPart, nil, suffixPart, true + return prefixPart + captured[invokeTag.Start:closeTag.End+1], nil, suffixPart, true } return prefixPart + captured[invokeTag.Start:closeTag.End+1], nil, suffixPart, true } diff --git a/internal/toolstream/tool_sieve_xml_test.go b/internal/toolstream/tool_sieve_xml_test.go index ce2ee77..e207969 100644 --- a/internal/toolstream/tool_sieve_xml_test.go +++ b/internal/toolstream/tool_sieve_xml_test.go @@ -1,6 +1,7 @@ package toolstream import ( + "ds2api/internal/toolcall" "strings" "testing" ) @@ -104,6 +105,99 @@ func TestProcessToolSieveInterceptsDSMLTrailingPipeToolCallWithoutLeak(t *testin } } +func TestProcessToolSieveInterceptsDSMLControlSeparatorWithoutLeak(t *testing.T) { + for _, tc := range []struct { + name string + sep string + }{ + {name: "control_picture", sep: "␂"}, + {name: "raw_stx", sep: "\x02"}, + } { + t.Run(tc.name, func(t *testing.T) { + sep := tc.sep + var state State + chunks := []string{ + "\n", + ` ` + "\n", + ` ` + "\n", + " \n", + "", + } + var events []Event + for _, c := range chunks { + events = append(events, ProcessChunk(&state, c, []string{"Read"})...) + } + events = append(events, Flush(&state, []string{"Read"})...) + + var textContent strings.Builder + var calls []any + for _, evt := range events { + textContent.WriteString(evt.Content) + for _, call := range evt.ToolCalls { + calls = append(calls, call) + } + } + if text := textContent.String(); strings.Contains(strings.ToLower(text), "dsml") || strings.Contains(text, "Read") || strings.Contains(text, sep) { + t.Fatalf("control-separator DSML tool call leaked to text: %q events=%#v", text, events) + } + if len(calls) != 1 { + t.Fatalf("expected one control-separator DSML tool call, got %d events=%#v", len(calls), events) + } + }) + } +} + +func TestProcessToolSieveInterceptsArbitraryPrefixedToolTagsWithoutLeak(t *testing.T) { + var state State + chunks := []string{ + "\n", + ` ` + "\n", + ` ` + "\n", + " \n", + "", + } + var events []Event + for _, c := range chunks { + events = append(events, ProcessChunk(&state, c, []string{"Read"})...) + } + events = append(events, Flush(&state, []string{"Read"})...) + + var textContent strings.Builder + var calls []any + for _, evt := range events { + textContent.WriteString(evt.Content) + for _, call := range evt.ToolCalls { + calls = append(calls, call) + } + } + if text := textContent.String(); strings.Contains(text, "proto") || strings.Contains(text, "Read") || strings.Contains(text, "💥") { + t.Fatalf("arbitrary-prefixed tool call leaked to text: %q events=%#v", text, events) + } + if len(calls) != 1 { + t.Fatalf("expected one arbitrary-prefixed tool call, got %d events=%#v", len(calls), events) + } +} + +func TestProcessToolSieveEmitsEmptyDSMLControlSeparatorBlockWithoutLeak(t *testing.T) { + sep := "␂" + chunks := []string{ + "\n", + ` ` + "\n", + ` ` + "\n", + " \n", + "", + } + calls := collectToolCallsForChunks(t, chunks, []string{"Read"}) + if len(calls) != 1 { + t.Fatalf("expected empty control-separator block to produce one call, got %#v", calls) + } + if calls[0].Name != "Read" || calls[0].Input["file_path"] != "" { + t.Fatalf("expected empty file_path parameter to be preserved, got %#v", calls) + } +} + func TestProcessToolSieveInterceptsExtraLeadingLessThanDSMLToolCallWithoutLeak(t *testing.T) { var state State chunks := []string{ @@ -490,7 +584,7 @@ func TestProcessToolSieveNonToolXMLKeepsSuffixForToolParsing(t *testing.T) { } } -func TestProcessToolSieveSuppressesMalformedExecutableXMLBlock(t *testing.T) { +func TestProcessToolSieveReleasesMalformedExecutableXMLBlock(t *testing.T) { var state State chunk := `{"path":"README.md"}` events := ProcessChunk(&state, chunk, []string{"read_file"}) @@ -506,13 +600,12 @@ func TestProcessToolSieveSuppressesMalformedExecutableXMLBlock(t *testing.T) { if toolCalls != 0 { t.Fatalf("expected malformed executable-looking XML not to become a tool call, got %d events=%#v", toolCalls, events) } - if textContent.Len() != 0 { - t.Fatalf("expected malformed executable-looking XML to be suppressed, got %q", textContent.String()) + if textContent.String() != chunk { + t.Fatalf("expected malformed executable-looking XML to be released as text, got %q", textContent.String()) } } -func TestProcessToolSieveSuppressesAllEmptyDSMLToolBlock(t *testing.T) { - var state State +func TestProcessToolSieveEmitsAllEmptyDSMLToolBlock(t *testing.T) { chunk := strings.Join([]string{ `<|DSML|tool_calls>`, `<|DSML|invoke name="Bash">`, @@ -522,22 +615,69 @@ func TestProcessToolSieveSuppressesAllEmptyDSMLToolBlock(t *testing.T) { ``, ``, }, "\n") - events := ProcessChunk(&state, chunk, []string{"Bash"}) - events = append(events, Flush(&state, []string{"Bash"})...) + calls := collectToolCallsForChunks(t, []string{chunk}, []string{"Bash"}) + if len(calls) != 1 { + t.Fatalf("expected all-empty DSML block to produce one tool call, got %#v", calls) + } + if calls[0].Input["command"] != "" || calls[0].Input["description"] != "" || calls[0].Input["timeout"] != "" { + t.Fatalf("expected empty parameters to be preserved, got %#v", calls[0].Input) + } +} + +func TestProcessToolSieveEmitsChunkedAllEmptyArbitraryPrefixedToolBlock(t *testing.T) { + chunk := strings.Join([]string{ + ``, + ` `, + ` `, + ` `, + ` `, + ` `, + ` `, + }, "\n") + calls := collectToolCallsForChunks(t, splitEveryNRBytes(chunk, 8), []string{"TaskOutput"}) + if len(calls) != 1 { + t.Fatalf("expected chunked all-empty arbitrary-prefixed block to produce one tool call, got %#v", calls) + } + if calls[0].Name != "TaskOutput" || calls[0].Input["task_id"] != "" || calls[0].Input["block"] != "" || calls[0].Input["timeout"] != "" { + t.Fatalf("expected empty TaskOutput parameters to be preserved, got %#v", calls) + } +} + +func collectToolCallsForChunks(t *testing.T, chunks []string, toolNames []string) []toolcall.ParsedToolCall { + t.Helper() + var state State + var events []Event + for _, chunk := range chunks { + events = append(events, ProcessChunk(&state, chunk, toolNames)...) + } + events = append(events, Flush(&state, toolNames)...) var textContent strings.Builder - toolCalls := 0 + var calls []toolcall.ParsedToolCall for _, evt := range events { textContent.WriteString(evt.Content) - toolCalls += len(evt.ToolCalls) - } - - if toolCalls != 0 { - t.Fatalf("expected all-empty DSML block not to produce tool calls, got %d events=%#v", toolCalls, events) + calls = append(calls, evt.ToolCalls...) } if textContent.Len() != 0 { - t.Fatalf("expected all-empty DSML block not to leak as text, got %q", textContent.String()) + t.Fatalf("expected tool block not to leak as text, got %q", textContent.String()) } + return calls +} + +func splitEveryNRBytes(s string, n int) []string { + if n <= 0 { + return []string{s} + } + out := make([]string, 0, len(s)/n+1) + for len(s) > 0 { + if len(s) <= n { + out = append(out, s) + break + } + out = append(out, s[:n]) + s = s[n:] + } + return out } func TestProcessToolSievePassesThroughFencedXMLToolCallExamples(t *testing.T) { @@ -671,6 +811,8 @@ func TestFindPartialXMLToolTagStart(t *testing.T) { {"partial_tool_calls", "Hello \n", + "\n", + "〈![CDATA[Check tracking branch status]]〉〈/DSM|parameter〉\n", + "〈![CDATA[git status -b --short]]〉〈/DSM|parameter〉\n", + "〈/DSM|invoke〉\n", + "〈/DSM|tool_calls〉", + } + var events []Event + for _, c := range chunks { + events = append(events, ProcessChunk(&state, c, []string{"Bash"})...) + } + events = append(events, Flush(&state, []string{"Bash"})...) + + var textContent string + var calls []toolcall.ParsedToolCall + for _, evt := range events { + textContent += evt.Content + calls = append(calls, evt.ToolCalls...) + } + + if strings.Contains(textContent, "DSM") || strings.Contains(textContent, "git status") { + t.Fatalf("CJK-angle DSM drift leaked to text: %q events=%#v", textContent, events) + } + if len(calls) != 1 { + t.Fatalf("expected one CJK-angle DSM drift tool call, got %d events=%#v", len(calls), events) + } + if calls[0].Name != "Bash" || calls[0].Input["command"] != "git status -b --short" { + t.Fatalf("unexpected CJK-angle DSM drift call: %#v", calls[0]) + } +} diff --git a/tests/node/chat-stream.test.js b/tests/node/chat-stream.test.js index cf49fa1..1146113 100644 --- a/tests/node/chat-stream.test.js +++ b/tests/node/chat-stream.test.js @@ -187,9 +187,9 @@ test('vercel stream emits Go-parity empty-output failure on DONE', async () => { const { frames } = await runMockVercelStream(['data: [DONE]\n\n']); assert.equal(frames.length, 2); const failed = JSON.parse(frames[0]); - assert.equal(failed.status_code, 429); - assert.equal(failed.error.type, 'rate_limit_error'); - assert.equal(failed.error.code, 'upstream_empty_output'); + assert.equal(failed.status_code, 503); + assert.equal(failed.error.type, 'service_unavailable_error'); + assert.equal(failed.error.code, 'upstream_unavailable'); assert.equal(frames[1], '[DONE]'); }); @@ -209,6 +209,21 @@ test('vercel stream retries empty output once and keeps one terminal frame', asy assert.match(completionBodies[1].prompt, /Previous reply had no visible output\. Please regenerate the visible final answer or tool call now\.$/); }); +test('vercel stream retries thinking-only output once', async () => { + const { frames, fetchURLs, fetchBodies } = await runMockVercelStreamSequence([ + ['data: {"response_message_id":42,"p":"response/thinking_content","v":"plan"}\n\n', 'data: [DONE]\n\n'], + ['data: {"p":"response/content","v":"visible"}\n\n', 'data: [DONE]\n\n'], + ], { thinking_enabled: true }); + const parsed = frames.filter((frame) => frame !== '[DONE]').map((frame) => JSON.parse(frame)); + const completionBodies = fetchBodies.filter((body) => Object.hasOwn(body, 'prompt')); + assert.equal(fetchURLs.filter((url) => url === 'https://chat.deepseek.com/api/v0/chat/completion').length, 2); + assert.equal(frames.filter((frame) => frame === '[DONE]').length, 1); + assert.equal(completionBodies[1].parent_message_id, 42); + assert.equal(parsed[0].choices[0].delta.reasoning_content, 'plan'); + assert.equal(parsed[1].choices[0].delta.content, 'visible'); + assert.equal(parsed[2].choices[0].finish_reason, 'stop'); +}); + test('vercel stream coalesces many small content deltas while keeping one choice', async () => { const lines = Array.from({ length: 100 }, () => `data: ${JSON.stringify({ p: 'response/content', v: '字' })}\n\n`); lines.push('data: [DONE]\n\n'); diff --git a/tests/node/stream-tool-sieve.test.js b/tests/node/stream-tool-sieve.test.js index eb9b5f3..e6a07e5 100644 --- a/tests/node/stream-tool-sieve.test.js +++ b/tests/node/stream-tool-sieve.test.js @@ -80,6 +80,118 @@ EOF assert.equal(calls[0].input.command.includes('Co-Authored-By: Claude Opus 4.7'), true); }); +test('parseToolCalls parses underscored DSML shell (Vercel parity)', () => { + const payload = ` + + + + + + + +`; + const calls = parseToolCalls(payload, ['search_web', 'eval_javascript']); + assert.equal(calls.length, 2); + assert.equal(calls[0].name, 'search_web'); + assert.deepEqual(calls[0].input, { query: '2026年5月 热点事件', topic: 'news' }); + assert.equal(calls[1].name, 'eval_javascript'); + assert.deepEqual(calls[1].input, { code: '1 + 1' }); +}); + +test('parseToolCalls parses arbitrary-prefixed tool markup shells', () => { + const samples = [ + 'README.md', + 'README.md', + 'README.md', + ]; + for (const payload of samples) { + const calls = parseToolCalls(payload, ['Read']); + assert.equal(calls.length, 1); + assert.equal(calls[0].name, 'Read'); + assert.deepEqual(calls[0].input, { file_path: 'README.md' }); + } +}); + +test('parseToolCalls parses fullwidth DSML shell drift', () => { + const payload = `<dSML|tool_calls> + <dSML|invoke name="Read"> + <dSML|parameter name="file_path"> + + <dSML|invoke name="Read"> + <dSML|parameter name="file_path"> + +`; + const calls = parseToolCalls(payload, ['Read']); + assert.equal(calls.length, 2); + assert.equal(calls[0].name, 'Read'); + assert.deepEqual(calls[0].input, { file_path: '/Users/aq/Desktop/myproject/Personal_Blog/README.md' }); + assert.equal(calls[1].name, 'Read'); + assert.deepEqual(calls[1].input, { file_path: '/Users/aq/Desktop/myproject/Personal_Blog/index.html' }); +}); + +test('parseToolCalls parses CJK-angle DSM drift', () => { + const payload = ` + +〈![CDATA[Show commits on local dev not on origin/dev]]〉〈/DSM|parameter〉 +〈![CDATA[git log --oneline origin/dev..dev]]〉〈/DSM|parameter〉 +〈/DSM|invoke〉 + +〈![CDATA[Show commits on origin/dev not on local dev]]〉〈/DSM|parameter〉 +〈![CDATA[git log --oneline dev..origin/dev]]〉〈/DSM|parameter〉 +〈/DSM|invoke〉 + +〈![CDATA[Check tracking branch status]]〉〈/DSM|parameter〉 +〈![CDATA[git status -b --short]]〉〈/DSM|parameter〉 +〈/DSM|invoke〉 +〈/DSM|tool_calls〉`; + const calls = parseToolCalls(payload, ['Bash']); + assert.equal(calls.length, 3); + assert.equal(calls[0].name, 'Bash'); + assert.equal(calls[0].input.command, 'git log --oneline origin/dev..dev'); + assert.equal(calls[1].input.description, 'Show commits on origin/dev not on local dev'); + assert.equal(calls[2].input.command, 'git status -b --short'); +}); + +test('parseToolCalls parses DSML control separator drift', () => { + for (const sep of ['␂', '\x02']) { + const payload = ` + + + +`; + const calls = parseToolCalls(payload, ['Read']); + assert.equal(calls.length, 1); + assert.equal(calls[0].name, 'Read'); + assert.deepEqual(calls[0].input, { file_path: '/tmp/input.txt' }); + } +}); + +test('parseToolCalls parses arbitrary-prefixed tool tags', () => { + const payload = ` + + + +`; + const calls = parseToolCalls(payload, ['Read']); + assert.equal(calls.length, 1); + assert.equal(calls[0].name, 'Read'); + assert.deepEqual(calls[0].input, { file_path: '/tmp/input.txt' }); +}); + +test('parseToolCalls allows all-empty parameter payloads', () => { + const payload = ` + + + + + +`; + const calls = parseToolCalls(payload, ['TaskOutput']); + assert.equal(calls.length, 1); + assert.equal(calls[0].name, 'TaskOutput'); + assert.deepEqual(calls[0].input, { task_id: '', block: '', timeout: '' }); +}); + test('parseToolCalls ignores bare hyphenated tool_calls lookalike', () => { const payload = 'pwd'; const calls = parseToolCalls(payload, ['Bash']); @@ -396,6 +508,80 @@ test('sieve emits tool_calls for DSML trailing pipe tag terminator', () => { assert.equal(text.toLowerCase().includes('dsml'), false); }); +test('sieve emits tool_calls for DSML control separator drift', () => { + for (const sep of ['␂', '\x02']) { + const events = runSieve([ + `\n', + `\n`, + `\n`, + `\n`, + ``, + ], ['Read']); + const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []); + assert.equal(finalCalls.length, 1); + assert.equal(finalCalls[0].name, 'Read'); + assert.equal(finalCalls[0].input.file_path, '/tmp/input.txt'); + const text = collectText(events); + assert.equal(text.toLowerCase().includes('dsml'), false); + assert.equal(text.includes(sep), false); + } +}); + +test('sieve emits tool_calls for arbitrary-prefixed tool tags', () => { + const events = runSieve([ + '\n', + '\n', + '\n', + '\n', + '', + ], ['Read']); + const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []); + assert.equal(finalCalls.length, 1); + assert.equal(finalCalls[0].name, 'Read'); + assert.equal(finalCalls[0].input.file_path, '/tmp/input.txt'); + const text = collectText(events); + assert.equal(text.includes('proto'), false); + assert.equal(text.includes('💥'), false); +}); + +test('sieve emits tool_calls for CJK-angle DSM drift', () => { + const events = runSieve([ + '\n', + '\n', + '〈![CDATA[Check tracking branch status]]〉〈/DSM|parameter〉\n', + '〈![CDATA[git status -b --short]]〉〈/DSM|parameter〉\n', + '〈/DSM|invoke〉\n', + '〈/DSM|tool_calls〉', + ], ['Bash']); + const finalCalls = events.flatMap((evt) => (evt.type === 'tool_calls' ? evt.calls : [])); + assert.equal(finalCalls.length, 1); + assert.equal(finalCalls[0].name, 'Bash'); + assert.equal(finalCalls[0].input.command, 'git status -b --short'); + assert.equal(collectText(events), ''); +}); + +test('sieve emits all-empty arbitrary-prefixed tool tags without leaking text', () => { + const payload = [ + '\n', + ' \n', + ' \n', + ' \n', + ' \n', + ' \n', + '', + ].join(''); + for (const chunks of [[payload], payload.match(/.{1,8}/gs)]) { + const events = runSieve(chunks, ['TaskOutput']); + const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []); + assert.equal(finalCalls.length, 1); + assert.equal(finalCalls[0].name, 'TaskOutput'); + assert.deepEqual(finalCalls[0].input, { task_id: '', block: '', timeout: '' }); + assert.equal(collectText(events), ''); + } +}); + test('sieve emits tool_calls for extra leading less-than DSML tags without leaking prefix', () => { const events = runSieve([ '<<|DSML|tool_calls>\n', @@ -734,7 +920,7 @@ test('sieve keeps embedded invalid tool-like json as normal text to avoid stream assert.equal(leakedText.toLowerCase().includes('tool_calls'), true); }); -test('sieve passes malformed executable-looking XML through as text', () => { +test('sieve releases malformed executable-looking XML wrappers as text', () => { const chunk = '{"path":"README.MD"}'; const events = runSieve([chunk], ['read_file']); const leakedText = collectText(events);