mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-10 03:07:41 +08:00
Compare commits
60 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
61d42f8b72 | ||
|
|
77b6d83266 | ||
|
|
740a78ad5a | ||
|
|
ddd42e532e | ||
|
|
3cc7f469f3 | ||
|
|
7c66742a19 | ||
|
|
067cf465bb | ||
|
|
9e9a7f1bec | ||
|
|
96691aa37a | ||
|
|
a3ce8008af | ||
|
|
23a79df687 | ||
|
|
e251a7ee29 | ||
|
|
30cca7cda0 | ||
|
|
ab163edee7 | ||
|
|
1201c3773f | ||
|
|
595ddf52af | ||
|
|
0adffccd46 | ||
|
|
0670d5acb4 | ||
|
|
239c4faa97 | ||
|
|
f33789399e | ||
|
|
1e00e482a6 | ||
|
|
7ab5a0e66d | ||
|
|
410efbd70b | ||
|
|
7179b995bb | ||
|
|
fef3798e5e | ||
|
|
00fe18b505 | ||
|
|
9b746e32d8 | ||
|
|
ace440481a | ||
|
|
66e0fa568f | ||
|
|
fa489248bc | ||
|
|
657b9379ed | ||
|
|
9062330104 | ||
|
|
d0d61a5d77 | ||
|
|
ffef451f7a | ||
|
|
a68a79e087 | ||
|
|
c8db66615c | ||
|
|
79ae9c8970 | ||
|
|
2378f0fbe7 | ||
|
|
aa29084038 | ||
|
|
21c1527c79 | ||
|
|
7ec0d99702 | ||
|
|
7e639667f8 | ||
|
|
066c48c107 | ||
|
|
d69b0658ea | ||
|
|
4315b424bf | ||
|
|
4678a061d0 | ||
|
|
70076c217f | ||
|
|
554fae6b3f | ||
|
|
76884c0d94 | ||
|
|
269d7cd8f9 | ||
|
|
7870a61bb0 | ||
|
|
ec4f178908 | ||
|
|
f413d42b0c | ||
|
|
5406f07938 | ||
|
|
fe87ded82b | ||
|
|
8ace349f84 | ||
|
|
112bedb05d | ||
|
|
c099a6f7bf | ||
|
|
5e55cf36d8 | ||
|
|
837dc74ffc |
@@ -22,6 +22,13 @@ These rules apply to all agent-made changes in this repository.
|
||||
- Keep changes additive and tightly scoped to the requested feature or bugfix.
|
||||
- Do not mix unrelated refactors into feature PRs unless they are required to make the change pass gates.
|
||||
|
||||
## Protocol Adapter Boundary
|
||||
|
||||
- Do not let OpenAI Chat, OpenAI Responses, Claude, Gemini, or other interface protocol formatting own shared business behavior.
|
||||
- Normalize protocol-specific request shapes into the project standard request/turn model first, run shared business logic in one place, then render back to the target protocol at the boundary.
|
||||
- Business logic that must stay globally consistent includes empty-output retry, thinking/reasoning handling, tool-call detection and policy, usage accounting, current-input-file injection, history persistence, file/reference handling, and completion payload assembly.
|
||||
- If a behavior must differ by protocol, keep the difference as an explicit adapter/rendering concern and document why it cannot live in the shared normalized path.
|
||||
|
||||
## Documentation Sync
|
||||
|
||||
- When business logic or user-visible behavior changes, update the corresponding documentation in the same change.
|
||||
|
||||
65
API.en.md
65
API.en.md
@@ -18,6 +18,7 @@ Docs: [Overview](README.en.md) / [Architecture](docs/ARCHITECTURE.en.md) / [Depl
|
||||
- [OpenAI-Compatible API](#openai-compatible-api)
|
||||
- [Claude-Compatible API](#claude-compatible-api)
|
||||
- [Gemini-Compatible API](#gemini-compatible-api)
|
||||
- [Ollama API](#ollama-api)
|
||||
- [Admin API](#admin-api)
|
||||
- [Error Payloads](#error-payloads)
|
||||
- [cURL Examples](#curl-examples)
|
||||
@@ -31,7 +32,7 @@ Docs: [Overview](README.en.md) / [Architecture](docs/ARCHITECTURE.en.md) / [Depl
|
||||
| Base URL | `http://localhost:5001` or your deployment domain |
|
||||
| Default Content-Type | `application/json` |
|
||||
| Health probes | `GET /healthz`, `GET /readyz` |
|
||||
| CORS | Enabled (uniformly covers `/v1/*`, `/anthropic/*`, `/v1beta/models/*`, and `/admin/*`; echoes the browser `Origin` when present, otherwise `*`; default allow-list includes `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Ds2-Source`, `X-Vercel-Protection-Bypass`, `X-Goog-Api-Key`, `Anthropic-Version`, `Anthropic-Beta`, and also accepts third-party preflight-requested headers such as `x-stainless-*`; `/v1/chat/completions` on Vercel Node Runtime matches the same behavior; internal-only `X-Ds2-Internal-Token` remains blocked) |
|
||||
| CORS | Enabled (uniformly covers `/v1/*`, `/anthropic/*`, `/v1beta/models/*`, `/api/*`, and `/admin/*`; echoes the browser `Origin` when present, otherwise `*`; default allow-list includes `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Ds2-Source`, `X-Vercel-Protection-Bypass`, `X-Goog-Api-Key`, `Anthropic-Version`, `Anthropic-Beta`, and also accepts third-party preflight-requested headers such as `x-stainless-*`; `/v1/chat/completions` on Vercel Node Runtime matches the same behavior; internal-only `X-Ds2-Internal-Token` remains blocked) |
|
||||
|
||||
- All JSON request bodies must be valid UTF-8; malformed byte sequences are rejected on ingress with `400 invalid json`.
|
||||
|
||||
@@ -39,8 +40,10 @@ Docs: [Overview](README.en.md) / [Architecture](docs/ARCHITECTURE.en.md) / [Depl
|
||||
|
||||
- OpenAI / Claude / Gemini protocols are now mounted on one shared `chi` router tree assembled in `internal/server/router.go`.
|
||||
- Adapter responsibilities are streamlined to: **request normalization → DeepSeek invocation → protocol-shaped rendering**, reducing legacy split-logic paths.
|
||||
- Tool-calling semantics are aligned between Go and Node runtime: models should output the DSML shell `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; DS2API also accepts legacy canonical XML `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`. DSML is normalized back to XML at the parser entry, so internal parsing remains XML-based, with stream-time anti-leak filtering.
|
||||
- Tool-calling semantics are aligned between Go and Node runtime: models should output the fullwidth-separator DSML shell `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; DS2API also accepts the halfwidth DSML wrapper `<|DSML|tool_calls>`, DSML wrapper aliases such as `<dsml|tool_calls>`, `<|tool_calls>`, `<|tool_calls>`, common DSML separator drift such as `<|DSML tool_calls>`, collapsed DSML local names such as `<DSMLtool_calls>`, control-separator drift such as `<DSML␂tool_calls>` / raw STX `\x02`, CJK angle bracket and trailing attribute separator drift such as `<DSM|parameter name="command"|>...〈/DSM|parameter〉`, arbitrary protocol prefixes such as `<proto💥tool_calls>`, and legacy canonical XML `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`. The scanner normalizes fixed local names (`tool_calls` / `invoke` / `parameter`) back to XML before parsing; only wrapped tool blocks or the narrow missing-opening-wrapper repair path enter the tool path, while bare `<invoke>` does not count as supported syntax. JSON literal parameter bodies are preserved as structured values, explicit empty or whitespace-only parameters are preserved as empty strings, malformed complete wrappers are released as plain text, and loose CDATA is narrowly repaired at final parse/flush when it can preserve a complete outer tool call.
|
||||
- `Admin API` separates static config from runtime policy: `/admin/config*` for configuration state, `/admin/settings*` for runtime behavior.
|
||||
- When upstream returns a thinking-only response with no visible text, the Go main path for both streaming and non-streaming completions retries once in the same DeepSeek session: it appends the prompt suffix `"Previous reply had no visible output. Please regenerate the visible final answer or tool call now."` and sets `parent_message_id`. If that same-account retry would still end as `429 upstream_empty_output`, managed-account mode switches to the next available account, creates a fresh session, and retries the original payload once before returning 429.
|
||||
- Citation/reference marker boundary: streaming output hides upstream `[citation:N]` / `[reference:N]` placeholders by default; non-stream output converts DeepSeek search reference markers into Markdown links.
|
||||
|
||||
---
|
||||
|
||||
@@ -83,7 +86,7 @@ Two header formats accepted:
|
||||
- Token is in `config.keys` → **Managed account mode**: DS2API auto-selects an account via rotation
|
||||
- Token is not in `config.keys` → **Direct token mode**: treated as a DeepSeek token directly
|
||||
|
||||
**Optional header**: `X-Ds2-Target-Account: <email_or_mobile>` — Pin a specific managed account; if the target account does not exist or the managed-account queue is exhausted, the request returns `429`, and current responses do not include `Retry-After`. If the account exists but login/refresh fails, the request returns the underlying `401` or upstream error.
|
||||
**Optional header**: `X-Ds2-Target-Account: <email_or_mobile>` — Pin a specific managed account; if the target account does not exist or the managed-account queue is exhausted, the request returns `429`, and current responses do not include `Retry-After`. If the account exists but login/refresh fails, the request returns the underlying `401` or upstream error. Without a pinned target, managed-account completion requests try one alternate-account fresh retry before returning an empty-output 429; pinned-target requests and requests with no other available account do not switch.
|
||||
Gemini-compatible clients can also send `x-goog-api-key`, `?key=`, or `?api_key=` as the caller credential source.
|
||||
|
||||
### Admin Endpoints (`/admin/*`)
|
||||
@@ -123,6 +126,9 @@ Gemini-compatible clients can also send `x-goog-api-key`, `?key=`, or `?api_key=
|
||||
| POST | `/v1beta/models/{model}:streamGenerateContent` | Business | Gemini stream |
|
||||
| POST | `/v1/models/{model}:generateContent` | Business | Gemini non-stream compat path |
|
||||
| POST | `/v1/models/{model}:streamGenerateContent` | Business | Gemini stream compat path |
|
||||
| GET | `/api/version` | None | Ollama version endpoint |
|
||||
| GET | `/api/tags` | None | Ollama model list |
|
||||
| POST | `/api/show` | None | Ollama model capability query (returns `id` + `capabilities`) |
|
||||
| POST | `/admin/login` | None | Admin login |
|
||||
| GET | `/admin/verify` | JWT | Verify admin JWT |
|
||||
| GET | `/admin/vercel/config` | Admin | Read preconfigured Vercel creds |
|
||||
@@ -222,16 +228,18 @@ For `chat` / `responses` / `embeddings`, DS2API follows a wide-input/strict-outp
|
||||
|
||||
1. Match DeepSeek native model IDs first.
|
||||
2. Then match exact keys in `model_aliases`.
|
||||
3. If still unmatched, fall back by known family heuristics (`o*`, `gpt-*`, `claude-*`, etc.).
|
||||
4. If still unmatched, return `invalid_request_error`.
|
||||
3. If the request name ends with `-nothinking`, resolve the base alias and append the corresponding no-thinking variant.
|
||||
4. If still unmatched, return `invalid_request_error`. Unknown model families are not guessed heuristically; add explicit compatibility names through `model_aliases`.
|
||||
|
||||
Built-in aliases come from `internal/config/models.go`; `config.model_aliases` can override or add mappings at runtime. Excerpt:
|
||||
|
||||
- OpenAI / Codex: `gpt-4o`, `gpt-4.1`, `gpt-5`, `gpt-5.5`, `gpt-5-codex`, `gpt-5.3-codex`, `codex-mini-latest`
|
||||
- OpenAI reasoning: `o1`, `o3`, `o3-deep-research`, `o4-mini`
|
||||
- Claude: `claude-opus-4-6`, `claude-sonnet-4-6`, `claude-haiku-4-5`, `claude-3-5-sonnet-latest`
|
||||
- Gemini: `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-pro-vision`
|
||||
- Other compatibility families: `llama-*`, `qwen-*`, `mistral-*`, and `command-*` fall back through family heuristics
|
||||
- Gemini: `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-3.1-pro`, `gemini-3-pro`, `gemini-3-flash`, `gemini-3.1-flash-lite`, `gemini-pro-vision`
|
||||
- Other exact built-in aliases: `llama-3.1-70b-instruct`, `qwen-max`
|
||||
|
||||
Aliases with a `-nothinking` suffix also map to the corresponding forced no-thinking DeepSeek model.
|
||||
|
||||
Current vision support resolves only to `deepseek-v4-vision` and does not expose a separate `vision-search` variant.
|
||||
|
||||
@@ -239,6 +247,8 @@ Retired historical families such as `claude-1.*`, `claude-2.*`, `claude-instant-
|
||||
|
||||
### `POST /v1/chat/completions`
|
||||
|
||||
> Path note: besides the canonical `/v1/chat/completions`, DS2API also accepts the root shortcut `/chat/completions`. On Vercel Runtime, `vercel.json` rewrites only the canonical `/v1/chat/completions` path to the Node streaming bridge; the root shortcut stays on the Go primary path. Use `/v1/chat/completions` on Vercel when real-time streaming is required.
|
||||
|
||||
**Headers**:
|
||||
|
||||
```http
|
||||
@@ -250,7 +260,7 @@ Content-Type: application/json
|
||||
|
||||
| Field | Type | Required | Notes |
|
||||
| --- | --- | --- | --- |
|
||||
| `model` | string | ✅ | DeepSeek native models + common aliases (`gpt-5.5`, `gpt-5.4-mini`, `gpt-5.3-codex`, `o3`, `claude-opus-4-6`, `gemini-2.5-pro`, `gemini-2.5-flash`, etc.) |
|
||||
| `model` | string | ✅ | DeepSeek native models + common aliases (`gpt-5.5`, `gpt-5.4-mini`, `gpt-5.3-codex`, `o3`, `claude-opus-4-6`, `gemini-2.5-pro`, `gemini-3.1-pro`, `gemini-3-flash`, etc.); `-nothinking` suffixes force thinking / reasoning off |
|
||||
| `messages` | array | ✅ | OpenAI-style messages |
|
||||
| `stream` | boolean | ❌ | Default `false` |
|
||||
| `tools` | array | ❌ | Function calling schema |
|
||||
@@ -345,7 +355,8 @@ When `tools` is present, DS2API performs anti-leak handling:
|
||||
|
||||
Additional notes:
|
||||
|
||||
- The parser treats DSML shell tool blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`) and legacy canonical XML tool blocks (`<tool_calls>` / `<invoke name="...">` / `<parameter name="...">`) as executable tool calls. DSML is normalized back to XML at the parser entry; internal parsing remains XML-based. Legacy `<tools>`, `<tool_call>`, `<tool_name>`, `<param>`, `<function_call>`, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text.
|
||||
- The parser treats the recommended DSML shell tool blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), halfwidth DSML shell blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), DSML wrapper aliases (`<dsml|tool_calls>`, `<|tool_calls>`, `<|tool_calls>`), common DSML separator drift (`<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`), collapsed DSML local names (`<DSMLtool_calls>` / `<DSMLinvoke>` / `<DSMLparameter>`), control-separator drift (`<DSML␂tool_calls>` / raw STX `\x02`), CJK angle bracket and trailing attribute separator drift (`<DSM|parameter name="command"|>...〈/DSM|parameter〉`), arbitrary protocol prefixes (`<proto💥tool_calls>`), and legacy canonical XML tool blocks (`<tool_calls>` / `<invoke name="...">` / `<parameter name="...">`) as executable tool calls. These shells normalize back to XML first, while internal parsing remains XML-based. Legacy `<tools>`, `<tool_call>`, `<tool_name>`, `<param>`, `<function_call>`, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text; complete but malformed wrappers are also released as plain text.
|
||||
- The parser no longer drops tool calls solely because parameter values are empty; explicit empty strings or whitespace-only parameters become empty strings in structured `tool_calls`. Prompting still tells the model not to emit blank parameters, and missing/empty argument rejection belongs in the tool executor or client schema validation.
|
||||
- If the final visible response text is empty but the reasoning stream contains an executable tool call, Chat / Responses emits a standard OpenAI `tool_calls` / `function_call` output during finalization. If thinking/reasoning was not enabled by the client, that reasoning text is used only for detection and is not exposed as visible text or `reasoning_content`.
|
||||
- `tool_calls` shown inside fenced markdown code blocks (for example, ```json ... ```) are treated as examples, not executable calls.
|
||||
|
||||
@@ -617,6 +628,20 @@ Returns SSE (`text/event-stream`), each chunk as `data: <json>`:
|
||||
|
||||
---
|
||||
|
||||
## Ollama API
|
||||
|
||||
- `POST /api/show` request body: `{"model":"<model-id>"}`.
|
||||
- Response uses lowercase `id` (not `ID`) and includes `capabilities` for Ollama-style clients and strict schemas.
|
||||
|
||||
Example response:
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "deepseek-v4-flash",
|
||||
"capabilities": ["tools", "thinking"]
|
||||
}
|
||||
```
|
||||
|
||||
## Admin API
|
||||
|
||||
### `POST /admin/login`
|
||||
@@ -660,11 +685,13 @@ Requires JWT: `Authorization: Bearer <jwt>`
|
||||
|
||||
### `GET /admin/vercel/config`
|
||||
|
||||
Returns Vercel preconfiguration status.
|
||||
Returns Vercel preconfiguration status. Environment variables are preferred, then the saved `vercel` config block is used as a fallback.
|
||||
|
||||
```json
|
||||
{
|
||||
"has_token": true,
|
||||
"token_preview": "vc****en",
|
||||
"token_source": "config",
|
||||
"project_id": "prj_xxx",
|
||||
"team_id": null
|
||||
}
|
||||
@@ -685,6 +712,12 @@ Returns sanitized config, including both `keys` and `api_keys`.
|
||||
"env_source_present": true,
|
||||
"env_writeback_enabled": true,
|
||||
"config_path": "/data/config.json",
|
||||
"vercel": {
|
||||
"has_token": true,
|
||||
"token_preview": "vc****en",
|
||||
"project_id": "prj_xxx",
|
||||
"team_id": ""
|
||||
},
|
||||
"accounts": [
|
||||
{
|
||||
"identifier": "user@example.com",
|
||||
@@ -736,6 +769,7 @@ Reads runtime settings and status, including:
|
||||
- `responses` / `embeddings`
|
||||
- `auto_delete` (`mode`: `none` / `single` / `all`; legacy `sessions=true` is still treated as `all`)
|
||||
- `current_input_file` (`enabled` defaults to `true`, plus `min_chars`)
|
||||
- `thinking_injection` (`enabled` defaults to `true`, `prompt`, and `default_prompt`)
|
||||
- `model_aliases`
|
||||
- `env_backed`, `needs_vercel_sync`
|
||||
- `toolcall` policy is fixed to `feature_match + high` and is no longer returned or editable via settings
|
||||
@@ -750,6 +784,7 @@ Hot-updates runtime settings. Supported fields:
|
||||
- `embeddings.provider`
|
||||
- `auto_delete.mode`
|
||||
- `current_input_file.enabled` / `current_input_file.min_chars`
|
||||
- `thinking_injection.enabled` / `thinking_injection.prompt`
|
||||
- `model_aliases`
|
||||
- `toolcall` policy is fixed and is no longer writable through settings
|
||||
|
||||
@@ -1096,11 +1131,11 @@ The success payload includes `sample_id`, `dir`, `meta_path`, and `upstream_path
|
||||
|
||||
| Field | Required | Notes |
|
||||
| --- | --- | --- |
|
||||
| `vercel_token` | ❌ | If empty or `__USE_PRECONFIG__`, read env |
|
||||
| `project_id` | ❌ | Fallback: `VERCEL_PROJECT_ID` |
|
||||
| `team_id` | ❌ | Fallback: `VERCEL_TEAM_ID` |
|
||||
| `vercel_token` | ❌ | If empty or `__USE_PRECONFIG__`, read env, then saved config |
|
||||
| `project_id` | ❌ | Fallback: `VERCEL_PROJECT_ID`, then saved config |
|
||||
| `team_id` | ❌ | Fallback: `VERCEL_TEAM_ID`, then saved config |
|
||||
| `auto_validate` | ❌ | Default `true` |
|
||||
| `save_credentials` | ❌ | Default `true` |
|
||||
| `save_credentials` | ❌ | Default `true`; saves explicitly supplied Vercel credentials for the next sync |
|
||||
|
||||
**Success response**:
|
||||
|
||||
@@ -1230,7 +1265,7 @@ Clients should handle HTTP status code plus `error` / `detail` fields.
|
||||
| Code | Meaning |
|
||||
| --- | --- |
|
||||
| `401` | Authentication failed (invalid key/token, or expired admin JWT) |
|
||||
| `429` | Too many requests (exceeded inflight + queue capacity; current responses do not include `Retry-After`) |
|
||||
| `429` | Too many requests (exceeded inflight + queue capacity, or upstream thinking-only output with no visible answer; managed-account mode first tries one alternate-account fresh retry; current responses do not include `Retry-After`) |
|
||||
| `503` | Model unavailable or upstream error |
|
||||
|
||||
---
|
||||
|
||||
66
API.md
66
API.md
@@ -18,6 +18,7 @@
|
||||
- [OpenAI 兼容接口](#openai-兼容接口)
|
||||
- [Claude 兼容接口](#claude-兼容接口)
|
||||
- [Gemini 兼容接口](#gemini-兼容接口)
|
||||
- [Ollama 兼容接口](#ollama-兼容接口)
|
||||
- [Admin 接口](#admin-接口)
|
||||
- [错误响应格式](#错误响应格式)
|
||||
- [cURL 示例](#curl-示例)
|
||||
@@ -31,7 +32,7 @@
|
||||
| Base URL | `http://localhost:5001` 或你的部署域名 |
|
||||
| 默认 Content-Type | `application/json` |
|
||||
| 健康检查 | `GET /healthz`、`GET /readyz` |
|
||||
| CORS | 已启用(统一覆盖 `/v1/*`、`/anthropic/*`、`/v1beta/models/*`、`/admin/*`;浏览器有 `Origin` 时回显该 Origin,否则为 `*`;默认允许 `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Ds2-Source`, `X-Vercel-Protection-Bypass`, `X-Goog-Api-Key`, `Anthropic-Version`, `Anthropic-Beta`,并会放行预检里声明的第三方请求头,如 `x-stainless-*`;Vercel 上 `/v1/chat/completions` 的 Node Runtime 也对齐相同行为;内部专用头 `X-Ds2-Internal-Token` 仍被拦截) |
|
||||
| CORS | 已启用(统一覆盖 `/v1/*`、`/anthropic/*`、`/v1beta/models/*`、`/api/*`、`/admin/*`;浏览器有 `Origin` 时回显该 Origin,否则为 `*`;默认允许 `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Ds2-Source`, `X-Vercel-Protection-Bypass`, `X-Goog-Api-Key`, `Anthropic-Version`, `Anthropic-Beta`,并会放行预检里声明的第三方请求头,如 `x-stainless-*`;Vercel 上 `/v1/chat/completions` 的 Node Runtime 也对齐相同行为;内部专用头 `X-Ds2-Internal-Token` 仍被拦截) |
|
||||
|
||||
- 所有 JSON 请求体都必须是合法 UTF-8;非法字节序列会在入站阶段被拒绝为 `400 invalid json`。
|
||||
|
||||
@@ -39,10 +40,10 @@
|
||||
|
||||
- OpenAI / Claude / Gemini 三套协议已统一挂在同一 `chi` 路由树上,由 `internal/server/router.go` 负责装配。
|
||||
- 适配器层职责收敛为:**请求归一化 → DeepSeek 调用 → 协议形态渲染**,减少历史版本中“同能力多处实现”的分叉。
|
||||
- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致:推荐模型输出 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容层也接受 DSML wrapper 别名 `<dsml|tool_calls>`、`<|tool_calls>`、`<|tool_calls>`、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>`)、`DSML` 与工具标签名黏连的常见 typo(如 `<DSMLtool_calls>`),以及旧式 canonical XML `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`。实现上采用窄容错结构扫描:只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 会进入工具路径,裸 `<invoke>` 不计为已支持语法;流式场景继续执行防泄漏筛分。若参数体本身是合法 JSON 字面量(如 `123`、`true`、`null`、数组或对象),会按结构化值输出,不再一律当作字符串;若 CDATA 偶发漏闭合,则会在最终 parse / flush 恢复阶段做窄修复,尽量保住已完整包裹的外层工具调用。
|
||||
- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致:推荐模型输出全角分隔符 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容层也接受半角 DSML wrapper `<|DSML|tool_calls>`、DSML wrapper 别名 `<dsml|tool_calls>`、`<|tool_calls>`、`<|tool_calls>`、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>`)、`DSML` 与工具标签名黏连的常见 typo(如 `<DSMLtool_calls>`)、控制分隔符漂移(如 `<DSML␂tool_calls>` / 原始 STX `\x02`)、CJK 尖括号与属性尾部分隔符漂移(如 `<DSM|parameter name="command"|>...〈/DSM|parameter〉`)、任意协议前缀壳(如 `<proto💥tool_calls>`),以及旧式 canonical XML `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`。实现上采用结构扫描:只要固定本地标签名是 `tool_calls` / `invoke` / `parameter`,前缀壳会在解析入口归一化;只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 会进入工具路径,裸 `<invoke>` 不计为已支持语法;流式场景继续执行防泄漏筛分。若参数体本身是合法 JSON 字面量(如 `123`、`true`、`null`、数组或对象),会按结构化值输出,不再一律当作字符串;显式空字符串和纯空白参数会结构化保留为空字符串,是否拒绝缺参由工具执行侧决定;完整但 malformed 的 wrapper 会作为普通文本释放,不会吞掉或伪造成工具调用;若 CDATA 偶发漏闭合,则会在最终 parse / flush 恢复阶段做窄修复,尽量保住已完整包裹的外层工具调用。
|
||||
- `Admin API` 将配置与运行时策略分开:`/admin/config*` 管静态配置,`/admin/settings*` 管运行时行为。
|
||||
- 当上游返回 thinking-only 响应(模型输出了推理链但无可见文本)时,非流式补全会自动重试一次:以多轮对话 follow-up 方式追加 prompt 后缀 `"Previous reply had no visible output. Please regenerate the visible final answer or tool call now."` 并设置 `parent_message_id` 在同一 DeepSeek session 内让模型重新输出;重试最大 1 次。
|
||||
- 引用标记剥离(strip reference markers)当前为固定开启的运行时行为,所有协议适配层统一生效。
|
||||
- 当上游返回 thinking-only 响应(模型输出了推理链但无可见文本)时,Go 主路径的流式与非流式补全都会先自动重试一次:以多轮对话 follow-up 方式追加 prompt 后缀 `"Previous reply had no visible output. Please regenerate the visible final answer or tool call now."` 并设置 `parent_message_id` 在同一 DeepSeek session 内让模型重新输出;同账号重试最大 1 次。若同账号重试后仍即将返回 `429 upstream_empty_output`,托管账号模式会在返回 429 前自动切换到下一个可用账号,新建 session,用原始 payload 再 fresh retry 一次。
|
||||
- 引用标记处理边界:流式输出默认隐藏 `[citation:N]` / `[reference:N]` 这类上游内部占位符;非流式输出默认把 DeepSeek 搜索引用标记转换为 Markdown 引用链接。
|
||||
|
||||
---
|
||||
|
||||
@@ -85,7 +86,7 @@ Vercel 一键部署可先只填 `DS2API_ADMIN_KEY`,部署后在 `/admin` 导
|
||||
- token 在 `config.keys` 中 → **托管账号模式**,自动轮询选择账号
|
||||
- token 不在 `config.keys` 中 → **直通 token 模式**,直接作为 DeepSeek token 使用
|
||||
|
||||
**可选请求头**:`X-Ds2-Target-Account: <email_or_mobile>` — 指定使用某个托管账号;如果目标账号不存在,或管理账号队列已耗尽,相关业务请求会返回 `429`,当前不会附带 `Retry-After` 头。若账号存在但登录/刷新失败,则返回对应的 `401` 或上游错误。
|
||||
**可选请求头**:`X-Ds2-Target-Account: <email_or_mobile>` — 指定使用某个托管账号;如果目标账号不存在,或管理账号队列已耗尽,相关业务请求会返回 `429`,当前不会附带 `Retry-After` 头。若账号存在但登录/刷新失败,则返回对应的 `401` 或上游错误。未指定目标账号时,托管账号模式的 completion 空输出 429 会先尝试切到另一个可用账号 fresh retry 一次;指定目标账号或无其他可用账号时不会切号。
|
||||
Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=` 作为凭据来源。
|
||||
|
||||
### Admin 接口(`/admin/*`)
|
||||
@@ -125,6 +126,9 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=`
|
||||
| POST | `/v1beta/models/{model}:streamGenerateContent` | 业务 | Gemini 流式 |
|
||||
| POST | `/v1/models/{model}:generateContent` | 业务 | Gemini 非流式兼容路径 |
|
||||
| POST | `/v1/models/{model}:streamGenerateContent` | 业务 | Gemini 流式兼容路径 |
|
||||
| GET | `/api/version` | 无 | Ollama 版本接口 |
|
||||
| GET | `/api/tags` | 无 | Ollama 模型列表 |
|
||||
| POST | `/api/show` | 无 | Ollama 单模型能力查询(返回 `id` 与 `capabilities`) |
|
||||
| POST | `/admin/login` | 无 | 管理登录 |
|
||||
| GET | `/admin/verify` | JWT | 校验管理 JWT |
|
||||
| GET | `/admin/vercel/config` | Admin | 读取 Vercel 预配置 |
|
||||
@@ -172,6 +176,8 @@ Gemini 兼容客户端还可以使用 `x-goog-api-key`、`?key=` 或 `?api_key=`
|
||||
|
||||
OpenAI `/v1/*` 仍是规范路径。对于只配置 DS2API 根地址的客户端,同一套 OpenAI handler 也通过根路径快捷路由暴露:`/models`、`/models/{id}`、`/chat/completions`、`/responses`、`/responses/{response_id}`、`/embeddings`、`/files`、`/files/{file_id}`。
|
||||
|
||||
服务器端记录本质上是 DeepSeek 上游响应归档:OpenAI Chat、OpenAI Responses、Claude Messages、Gemini GenerateContent 等直连 DeepSeek 的生成接口,在收到上游响应后会于各协议回译/裁剪前写入记录;列表按请求创建时间倒序展示,流式请求会在生成过程中持续刷新状态与详情。WebUI「API 测试」发出的请求也会进入该记录。
|
||||
|
||||
---
|
||||
|
||||
## 健康检查
|
||||
@@ -225,16 +231,15 @@ OpenAI `/v1/*` 仍是规范路径。对于只配置 DS2API 根地址的客户端
|
||||
1. 先匹配 DeepSeek 原生模型。
|
||||
2. 再匹配 `model_aliases` 精确映射。
|
||||
3. 如果请求名以 `-nothinking` 结尾,则在最终解析出的规范模型上追加对应的无思考变体。
|
||||
4. 未命中时按模型家族规则回退(如 `o*`、`gpt-*`、`claude-*`)。
|
||||
5. 仍未命中则返回 `invalid_request_error`。
|
||||
4. 仍未命中则返回 `invalid_request_error`。当前不会按未知模型家族做启发式兜底;需要新增兼容名时请通过 `model_aliases` 明确配置。
|
||||
|
||||
当前内置默认 alias 来自 `internal/config/models.go`,`config.model_aliases` 会在运行时覆盖或补充同名映射。节选:
|
||||
|
||||
- OpenAI / Codex:`gpt-4o`、`gpt-4.1`、`gpt-5`、`gpt-5.5`、`gpt-5-codex`、`gpt-5.3-codex`、`codex-mini-latest`
|
||||
- OpenAI reasoning:`o1`、`o3`、`o3-deep-research`、`o4-mini`
|
||||
- Claude:`claude-opus-4-6`、`claude-sonnet-4-6`、`claude-haiku-4-5`、`claude-3-5-sonnet-latest`
|
||||
- Gemini:`gemini-2.5-pro`、`gemini-2.5-flash`、`gemini-pro-vision`
|
||||
- 其他兼容族:`llama-*`、`qwen-*`、`mistral-*`、`command-*` 会按家族启发式回退
|
||||
- Gemini:`gemini-2.5-pro`、`gemini-2.5-flash`、`gemini-3.1-pro`、`gemini-3-pro`、`gemini-3-flash`、`gemini-3.1-flash-lite`、`gemini-pro-vision`
|
||||
- 其他内置精确 alias:`llama-3.1-70b-instruct`、`qwen-max`
|
||||
|
||||
上述 alias 若在请求名后追加 `-nothinking` 后缀,也会映射到对应的强制关闭 thinking 版本。
|
||||
当前视觉能力仅对应 `deepseek-v4-vision` / `deepseek-v4-vision-nothinking`,不会解析出独立的 `vision-search` 变体。
|
||||
@@ -243,6 +248,8 @@ OpenAI `/v1/*` 仍是规范路径。对于只配置 DS2API 根地址的客户端
|
||||
|
||||
### `POST /v1/chat/completions`
|
||||
|
||||
> 路径说明:除规范路径 `/v1/chat/completions` 外,也支持根路径快捷别名 `/chat/completions`。在 Vercel Runtime 上,`vercel.json` 仅把规范路径 `/v1/chat/completions` 重写到 Node 流式桥接;根路径快捷别名仍走 Go 主链路。因此 Vercel 上需要实时流式时请使用 `/v1/chat/completions`。
|
||||
|
||||
**请求头**:
|
||||
|
||||
```http
|
||||
@@ -254,7 +261,7 @@ Content-Type: application/json
|
||||
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
| --- | --- | --- | --- |
|
||||
| `model` | string | ✅ | 支持 DeepSeek 原生模型 + 常见 alias(如 `gpt-5.5`、`gpt-5.4-mini`、`gpt-5.3-codex`、`o3`、`claude-opus-4-6`、`claude-sonnet-4-6`、`gemini-2.5-pro`、`gemini-2.5-flash` 等);若模型名带 `-nothinking` 后缀,则强制关闭 thinking / reasoning |
|
||||
| `model` | string | ✅ | 支持 DeepSeek 原生模型 + 常见 alias(如 `gpt-5.5`、`gpt-5.4-mini`、`gpt-5.3-codex`、`o3`、`claude-opus-4-6`、`claude-sonnet-4-6`、`gemini-2.5-pro`、`gemini-3.1-pro`、`gemini-3-flash` 等);若模型名带 `-nothinking` 后缀,则强制关闭 thinking / reasoning |
|
||||
| `messages` | array | ✅ | OpenAI 风格消息数组 |
|
||||
| `stream` | boolean | ❌ | 默认 `false` |
|
||||
| `tools` | array | ❌ | Function Calling 定义 |
|
||||
@@ -350,7 +357,8 @@ data: [DONE]
|
||||
补充说明:
|
||||
|
||||
- **非代码块上下文**下,工具负载即使与普通文本混合,也会按特征识别并产出可执行 tool call(前后普通文本仍可透传)。
|
||||
- 解析器当前把 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、DSML wrapper 别名(`<dsml|tool_calls>`、`<|tool_calls>`、`<|tool_calls>`)、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`)、`DSML` 与工具标签名黏连的常见 typo(如 `<DSMLtool_calls>` / `<DSMLinvoke>` / `<DSMLparameter>`)和旧式 canonical XML 工具块(`<tool_calls>` / `<invoke name="...">` / `<parameter name="...">`)作为可执行调用解析;DSML 会先归一化回 XML,内部仍以 XML 解析语义为准。旧式 `<tools>`、`<tool_call>`、`<tool_name>`、`<param>`、`<function_call>`、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理。
|
||||
- 解析器当前把推荐 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、半角 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、DSML wrapper 别名(`<dsml|tool_calls>`、`<|tool_calls>`、`<|tool_calls>`)、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`)、`DSML` 与工具标签名黏连的常见 typo(如 `<DSMLtool_calls>` / `<DSMLinvoke>` / `<DSMLparameter>`)、控制分隔符漂移(如 `<DSML␂tool_calls>` / 原始 STX `\x02`)、CJK 尖括号与属性尾部分隔符漂移(如 `<DSM|parameter name="command"|>...〈/DSM|parameter〉`)、任意协议前缀壳(如 `<proto💥tool_calls>`)和旧式 canonical XML 工具块(`<tool_calls>` / `<invoke name="...">` / `<parameter name="...">`)作为可执行调用解析;这些前缀壳会先归一化回 XML,内部仍以 XML 解析语义为准。旧式 `<tools>`、`<tool_call>`、`<tool_name>`、`<param>`、`<function_call>`、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理;完整但 malformed 的 wrapper 同样会作为普通文本释放。
|
||||
- 解析层不会因为参数值为空而丢弃工具调用;显式空字符串或纯空白参数会按空字符串进入结构化 `tool_calls`。Prompt 会要求模型不要主动输出空参数,缺参/空命令的拒绝应由工具执行侧或客户端 schema 校验负责。
|
||||
- 当最终可见正文为空但思维链里包含可执行工具调用时,Chat / Responses 会在收尾阶段补发标准 OpenAI `tool_calls` / `function_call` 输出;如果客户端未开启 thinking / reasoning,该思维链只用于检测,不会作为可见正文或 `reasoning_content` 暴露。
|
||||
- Markdown fenced code block(例如 ```json ... ```)中的 `tool_calls` 仅视为示例文本,不会被执行。
|
||||
|
||||
@@ -626,6 +634,20 @@ data: {"type":"message_stop"}
|
||||
|
||||
---
|
||||
|
||||
## Ollama 兼容接口
|
||||
|
||||
- `POST /api/show` 请求体:`{"model":"<model-id>"}`。
|
||||
- 响应字段使用小写 `id`(不是 `ID`),并返回 `capabilities` 数组,便于与 Ollama 风格客户端/严格 schema 对齐。
|
||||
|
||||
示例响应:
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "deepseek-v4-flash",
|
||||
"capabilities": ["tools", "thinking"]
|
||||
}
|
||||
```
|
||||
|
||||
## Admin 接口
|
||||
|
||||
### `POST /admin/login`
|
||||
@@ -669,11 +691,13 @@ data: {"type":"message_stop"}
|
||||
|
||||
### `GET /admin/vercel/config`
|
||||
|
||||
返回 Vercel 预配置状态。
|
||||
返回 Vercel 预配置状态。优先读取环境变量,其次回退到已保存的 `vercel` 配置块。
|
||||
|
||||
```json
|
||||
{
|
||||
"has_token": true,
|
||||
"token_preview": "vc****en",
|
||||
"token_source": "config",
|
||||
"project_id": "prj_xxx",
|
||||
"team_id": null
|
||||
}
|
||||
@@ -694,6 +718,12 @@ data: {"type":"message_stop"}
|
||||
"env_source_present": true,
|
||||
"env_writeback_enabled": true,
|
||||
"config_path": "/data/config.json",
|
||||
"vercel": {
|
||||
"has_token": true,
|
||||
"token_preview": "vc****en",
|
||||
"project_id": "prj_xxx",
|
||||
"team_id": ""
|
||||
},
|
||||
"accounts": [
|
||||
{
|
||||
"identifier": "user@example.com",
|
||||
@@ -745,6 +775,7 @@ data: {"type":"message_stop"}
|
||||
- `responses` / `embeddings`
|
||||
- `auto_delete`(`mode`:`none` / `single` / `all`;旧配置 `sessions=true` 仍按 `all` 处理)
|
||||
- `current_input_file`(`enabled` 默认返回 `true`、`min_chars`)
|
||||
- `thinking_injection`(`enabled` 默认返回 `true`、`prompt`、`default_prompt`)
|
||||
- `model_aliases`
|
||||
- `env_backed`、`needs_vercel_sync`
|
||||
- `toolcall` 策略已固定为 `feature_match + high`,不再通过 settings 返回或修改
|
||||
@@ -759,6 +790,7 @@ data: {"type":"message_stop"}
|
||||
- `embeddings.provider`
|
||||
- `auto_delete.mode`
|
||||
- `current_input_file.enabled` / `current_input_file.min_chars`
|
||||
- `thinking_injection.enabled` / `thinking_injection.prompt`
|
||||
- `model_aliases`
|
||||
- `toolcall` 策略已固定,不再作为可写入字段
|
||||
|
||||
@@ -1107,11 +1139,11 @@ data: {"type":"message_stop"}
|
||||
|
||||
| 字段 | 必填 | 说明 |
|
||||
| --- | --- | --- |
|
||||
| `vercel_token` | ❌ | 空或 `__USE_PRECONFIG__` 则读环境变量 |
|
||||
| `project_id` | ❌ | 空则读 `VERCEL_PROJECT_ID` |
|
||||
| `team_id` | ❌ | 空则读 `VERCEL_TEAM_ID` |
|
||||
| `vercel_token` | ❌ | 空或 `__USE_PRECONFIG__` 则读环境变量,再回退到已保存配置 |
|
||||
| `project_id` | ❌ | 空则读 `VERCEL_PROJECT_ID`,再回退到已保存配置 |
|
||||
| `team_id` | ❌ | 空则读 `VERCEL_TEAM_ID`,再回退到已保存配置 |
|
||||
| `auto_validate` | ❌ | 默认 `true` |
|
||||
| `save_credentials` | ❌ | 默认 `true` |
|
||||
| `save_credentials` | ❌ | 默认 `true`;保存本次显式填写的 Vercel 凭据,供下次同步复用 |
|
||||
|
||||
**成功响应**:
|
||||
|
||||
@@ -1241,7 +1273,7 @@ Gemini 路由使用 Google 风格错误结构:
|
||||
| 状态码 | 说明 |
|
||||
| --- | --- |
|
||||
| `401` | 鉴权失败(key/token 无效,或 Admin JWT 过期) |
|
||||
| `429` | 请求过多(超出并发上限 + 等待队列;当前不附带 `Retry-After` 头) |
|
||||
| `429` | 请求过多(超出并发上限 + 等待队列,或上游账号 thinking-only 后仍无可见输出;托管账号模式会先尝试一次切号 fresh retry;当前不附带 `Retry-After` 头) |
|
||||
| `503` | 模型不可用或上游服务异常 |
|
||||
|
||||
---
|
||||
|
||||
28
README.MD
28
README.MD
@@ -23,6 +23,16 @@
|
||||
|
||||
【感谢Linux.do社区及GitHub社区各位开发者对项目的支持与贡献】
|
||||
|
||||
## Star History
|
||||
|
||||
<a href="https://www.star-history.com/?repos=cjackhwang%2Fds2api&type=date&legend=top-left">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/chart?repos=cjackhwang/ds2api&type=date&theme=dark&legend=top-left" />
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/chart?repos=cjackhwang/ds2api&type=date&legend=top-left" />
|
||||
<img alt="Star History Chart" src="https://api.star-history.com/chart?repos=cjackhwang/ds2api&type=date&legend=top-left" />
|
||||
</picture>
|
||||
</a>
|
||||
|
||||
> **重要免责声明**
|
||||
>
|
||||
> 本仓库仅供学习、研究、个人实验和内部验证使用,不提供任何形式的商业授权、适用性保证或结果保证。
|
||||
@@ -124,7 +134,8 @@ flowchart LR
|
||||
| OpenAI 兼容 | `GET /v1/models`、`GET /v1/models/{id}`、`POST /v1/chat/completions`、`POST /v1/responses`、`GET /v1/responses/{response_id}`、`POST /v1/embeddings`、`POST /v1/files`、`GET /v1/files/{file_id}` |
|
||||
| Claude 兼容 | `GET /anthropic/v1/models`、`POST /anthropic/v1/messages`、`POST /anthropic/v1/messages/count_tokens`(及快捷路径 `/v1/messages`、`/messages`) |
|
||||
| Gemini 兼容 | `POST /v1beta/models/{model}:generateContent`、`POST /v1beta/models/{model}:streamGenerateContent`(及 `/v1/models/{model}:*` 路径) |
|
||||
| 统一 CORS 兼容 | `/v1/*`、`/anthropic/*`、`/v1beta/models/*`、`/admin/*` 统一走同一套 CORS 策略;Vercel 上 `/v1/chat/completions` 的 Node Runtime 也对齐相同放行规则,尽量减少第三方预检请求头限制 |
|
||||
| Ollama 兼容 | `GET /api/version`、`GET /api/tags`、`POST /api/show` |
|
||||
| 统一 CORS 兼容 | `/v1/*`、`/anthropic/*`、`/v1beta/models/*`、`/api/*`、`/admin/*` 统一走同一套 CORS 策略;Vercel 上 `/v1/chat/completions` 的 Node Runtime 也对齐相同放行规则,尽量减少第三方预检请求头限制 |
|
||||
| 多账号轮询 | 自动 token 刷新、邮箱/手机号双登录方式 |
|
||||
| 并发队列控制 | 每账号 in-flight 上限 + 等待队列,动态计算建议并发值 |
|
||||
| DeepSeek PoW | 纯 Go 高性能实现(DeepSeekHashV1),毫秒级响应 |
|
||||
@@ -185,11 +196,11 @@ OpenAI `/v1/*` 仍是推荐的规范路径;同时支持 `/models`、`/chat/com
|
||||
- `ANTHROPIC_BASE_URL` 推荐直接指向 DS2API 根地址(例如 `http://127.0.0.1:5001`),Claude Code 会请求 `/v1/messages?beta=true`。
|
||||
- `ANTHROPIC_API_KEY` 需要与 `config.json` 中 `keys` 一致;建议同时保留常规 key 与 `sk-ant-*` 形态 key,兼容不同客户端校验习惯。
|
||||
- 若系统设置了代理,建议对 DS2API 地址配置 `NO_PROXY=127.0.0.1,localhost,<你的主机IP>`,避免本地回环请求被代理拦截。
|
||||
- 如遇“工具调用输出成文本、未执行”问题,请优先检查模型输出是否为推荐的 DSML 工具块:`<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`。兼容层也接受旧式 canonical XML:`<tool_calls><invoke name="..."><parameter name="...">...`;旧式 `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`、`<function_call>`、`tool_use` 或纯 JSON `tool_calls` 片段不会执行。
|
||||
- 如遇“工具调用输出成文本、未执行”问题,请优先检查模型输出是否为推荐的全角分隔符 DSML 工具块:`<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`。兼容层也接受半角 DSML 与旧式 canonical XML:`<tool_calls><invoke name="..."><parameter name="...">...`;旧式 `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`、`<function_call>`、`tool_use` 或纯 JSON `tool_calls` 片段不会执行,会作为普通文本处理。
|
||||
|
||||
### Gemini 接口
|
||||
|
||||
Gemini 适配器将模型名通过 `model_aliases` 或内置规则映射到 DeepSeek 原生模型,支持 `generateContent` 和 `streamGenerateContent` 两种调用方式,并完整支持 Tool Calling(`functionDeclarations` → `functionCall` 输出)。若 Gemini 模型名带 `-nothinking` 后缀,例如 `gemini-2.5-pro-nothinking`,会映射到对应的强制关闭思考模型。
|
||||
Gemini 适配器将模型名通过 `model_aliases` 或内置精确 alias 映射到 DeepSeek 原生模型(覆盖 `gemini-2.5-*`、`gemini-3*`、`gemini-pro-vision` 等常见名称),支持 `generateContent` 和 `streamGenerateContent` 两种调用方式,并完整支持 Tool Calling(`functionDeclarations` → `functionCall` 输出)。若 Gemini 模型名带 `-nothinking` 后缀,例如 `gemini-2.5-pro-nothinking`,会映射到对应的强制关闭思考模型。
|
||||
|
||||
## 快速开始
|
||||
|
||||
@@ -285,13 +296,13 @@ cp config.example.json config.json
|
||||
base64 < config.json | tr -d '\n'
|
||||
```
|
||||
|
||||
> **流式说明**:`/v1/chat/completions` 在 Vercel 上默认走 `api/chat-stream.js`(Node Runtime)以保证实时 SSE。鉴权、账号选择、会话/PoW 准备仍由 Go 内部 prepare 接口完成;流式响应(含 `tools`)在 Node 侧执行与 Go 对齐的输出组装与防泄漏处理。虽然这里只有 OpenAI chat 流式走 Node,但 CORS 放行策略仍与 Go 主路由保持一致,统一覆盖第三方客户端预检场景。
|
||||
> **流式说明**:OpenAI Chat 流式在 Vercel 上会由 `api/chat-stream.js`(Node Runtime)承接,但 `vercel.json` 只把规范路径 `/v1/chat/completions` 重写到 Node;根路径快捷别名 `/chat/completions` 仍走 Go 主链路。鉴权、账号选择、会话/PoW 准备仍由 Go 内部 prepare 接口完成;流式响应(含 `tools`)在 Node 侧执行与 Go 对齐的输出组装与防泄漏处理。Vercel 上需要实时流式时请使用 `/v1/chat/completions`。
|
||||
|
||||
详细部署说明请参阅 [部署指南](docs/DEPLOY.md)。
|
||||
|
||||
### 方式四:本地源码运行
|
||||
|
||||
**前置要求**:Go 1.26+,Node.js `20.19+` 或 `22.12+`(仅在需要构建 WebUI 时);同时确保 `npm` 可用,建议 `npm 10+`
|
||||
**前置要求**:Go 1.26+,Node.js `20.19+` 或 `22.12+`(仅在需要构建 WebUI 时;CI / Docker 构建使用 Node 24);同时确保 `npm` 可用,建议 `npm 10+`
|
||||
|
||||
```bash
|
||||
# 1. 克隆仓库
|
||||
@@ -310,7 +321,7 @@ go run ./cmd/ds2api
|
||||
|
||||
服务实际绑定:`0.0.0.0:5001`,因此同一局域网设备通常也可以通过你的内网 IP 访问。
|
||||
|
||||
> **WebUI 自动构建**:本地首次启动时,若 `static/admin` 不存在,会自动尝试执行 `npm ci`(仅在缺少依赖时)和 `npm run build -- --outDir static/admin --emptyOutDir`(需要本机有 Node.js 和 npm)。你也可以手动构建:`./scripts/build-webui.sh`
|
||||
> **WebUI 自动构建**:本地首次启动时,若 WebUI 静态目录不存在,会自动尝试执行 `npm ci --prefix webui`(仅在缺少依赖时)和 `npm run build --prefix webui -- --outDir static/admin --emptyOutDir`(需要本机有 Node.js 和 npm;静态目录可用 `DS2API_STATIC_ADMIN_DIR` 覆盖)。你也可以手动构建:`./scripts/build-webui.sh`
|
||||
|
||||
## 配置说明
|
||||
|
||||
@@ -340,6 +351,7 @@ go run ./cmd/ds2api
|
||||
|
||||
可选请求头 `X-Ds2-Target-Account`:指定使用某个托管账号(值为 email 或 mobile)。
|
||||
如果指定账号不存在,或者当前管理账号队列已满,请求会返回 `429`;当前 `429` 不附带 `Retry-After` 头。若账号存在但登录/刷新失败,则返回对应的鉴权错误。
|
||||
未指定目标账号时,如果 completion 因上游 thinking-only 空输出在同账号补偿重试后仍将返回 `429 upstream_empty_output`,托管账号模式会自动切到下一个可用账号,新建 session,并用原始 payload 再 fresh retry 一次。
|
||||
Gemini 路由还可以使用 `x-goog-api-key`,或在没有认证头时使用 `?key=` / `?api_key=` 作为调用方凭据。
|
||||
|
||||
## 并发模型
|
||||
@@ -353,6 +365,7 @@ Gemini 路由还可以使用 `x-goog-api-key`,或在没有认证头时使用 `
|
||||
|
||||
- 当 in-flight 槽位满时,请求进入等待队列,**不会立即 429**
|
||||
- 超出总承载上限后才返回 `429 Too Many Requests`,当前响应不附带 `Retry-After`
|
||||
- completion 空输出类 429 会先做同账号补偿重试;托管账号模式还会在最终返回 429 前切到另一个可用账号 fresh retry 一次
|
||||
- `GET /admin/queue/status` 返回实时并发状态
|
||||
|
||||
## Tool Call 适配
|
||||
@@ -360,12 +373,13 @@ Gemini 路由还可以使用 `x-goog-api-key`,或在没有认证头时使用 `
|
||||
当请求中带 `tools` 时,DS2API 会做防泄漏处理与结构化转译:
|
||||
|
||||
1. 只在**非代码块上下文**启用执行型 toolcall 识别(代码块示例默认不触发)
|
||||
2. 解析层当前把 DSML 外壳视为推荐可执行调用:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容旧式 canonical XML `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`。DSML 只是外壳别名,内部仍以 XML 解析语义为准;旧式 `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`、`<function_call>`、`tool_use` / antml 变体与纯 JSON `tool_calls` 片段都会按普通文本处理
|
||||
2. 解析层当前把全角分隔符 DSML 外壳视为推荐可执行调用:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容半角 DSML、旧式 canonical XML `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`,以及若干 DSML 前缀/分隔符漂移。DSML 只是外壳别名,内部仍以 XML 解析语义为准;旧式 `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`、`<function_call>`、`tool_use` / antml 变体与纯 JSON `tool_calls` 片段都会按普通文本处理,完整但 malformed 的 wrapper 也会作为普通文本释放
|
||||
3. `responses` 流式严格使用官方 item 生命周期事件(`response.output_item.*`、`response.content_part.*`、`response.function_call_arguments.*`)
|
||||
4. `responses` 支持并执行 `tool_choice`(`auto`/`none`/`required`/强制函数);`required` 违规时非流式返回 `422`,流式返回 `response.failed`
|
||||
5. 客户端请求哪种协议,就按该协议返回工具调用(OpenAI/Claude/Gemini 各自原生结构);模型侧优先约束输出规范 XML,再由兼容层转译
|
||||
|
||||
> 说明:当前版本 parser 层以”尽量解析成功”为优先,所有格式合法的 XML 工具调用都会通过,不做工具名 allow-list 过滤。
|
||||
> 解析层会保留显式空字符串或纯空白参数;Prompt 会要求模型不要主动输出空参数,缺参/空命令的拒绝应由工具执行侧或客户端 schema 校验负责。
|
||||
>
|
||||
> 想评估”把工具调用封装成 XML 再输入模型”的方案,可参考:`docs/toolcall-semantics.md`。
|
||||
|
||||
|
||||
30
README.en.md
30
README.en.md
@@ -20,6 +20,16 @@ DS2API converts DeepSeek Web chat capability into OpenAI-compatible, Claude-comp
|
||||
|
||||
Documentation entry: [Docs Index](docs/README.md) / [Architecture](docs/ARCHITECTURE.en.md) / [API Reference](API.en.md)
|
||||
|
||||
## Star History
|
||||
|
||||
<a href="https://www.star-history.com/?repos=cjackhwang%2Fds2api&type=date&legend=top-left">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/chart?repos=cjackhwang/ds2api&type=date&theme=dark&legend=top-left" />
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/chart?repos=cjackhwang/ds2api&type=date&legend=top-left" />
|
||||
<img alt="Star History Chart" src="https://api.star-history.com/chart?repos=cjackhwang/ds2api&type=date&legend=top-left" />
|
||||
</picture>
|
||||
</a>
|
||||
|
||||
> **Important Disclaimer**
|
||||
>
|
||||
> This repository is provided for learning, research, personal experimentation, and internal validation only. It does not grant any commercial authorization and comes with no warranty of fitness, stability, or results.
|
||||
@@ -121,7 +131,8 @@ For the full module-by-module architecture and directory responsibilities, see [
|
||||
| OpenAI compatible | `GET /v1/models`, `GET /v1/models/{id}`, `POST /v1/chat/completions`, `POST /v1/responses`, `GET /v1/responses/{response_id}`, `POST /v1/embeddings`, `POST /v1/files`, `GET /v1/files/{file_id}` |
|
||||
| Claude compatible | `GET /anthropic/v1/models`, `POST /anthropic/v1/messages`, `POST /anthropic/v1/messages/count_tokens` (plus shortcut paths `/v1/messages`, `/messages`) |
|
||||
| Gemini compatible | `POST /v1beta/models/{model}:generateContent`, `POST /v1beta/models/{model}:streamGenerateContent` (plus `/v1/models/{model}:*` paths) |
|
||||
| Unified CORS compatibility | `/v1/*`, `/anthropic/*`, `/v1beta/models/*`, and `/admin/*` share one CORS policy; on Vercel, the Node Runtime for `/v1/chat/completions` mirrors the same relaxed preflight behavior for third-party clients |
|
||||
| Ollama compatible | `GET /api/version`, `GET /api/tags`, `POST /api/show` |
|
||||
| Unified CORS compatibility | `/v1/*`, `/anthropic/*`, `/v1beta/models/*`, `/api/*`, and `/admin/*` share one CORS policy; on Vercel, the Node Runtime for `/v1/chat/completions` mirrors the same relaxed preflight behavior for third-party clients |
|
||||
| Multi-account rotation | Auto token refresh, email/mobile dual login |
|
||||
| Concurrency control | Per-account in-flight limit + waiting queue, dynamic recommended concurrency |
|
||||
| DeepSeek PoW | Pure Go high-performance solver (DeepSeekHashV1), ms-level response |
|
||||
@@ -174,11 +185,11 @@ Besides the primary aliases above, `/anthropic/v1/models` also returns Claude 4.
|
||||
- Set `ANTHROPIC_BASE_URL` to the DS2API root URL (for example `http://127.0.0.1:5001`). Claude Code sends requests to `/v1/messages?beta=true`.
|
||||
- `ANTHROPIC_API_KEY` must match an entry in `keys` from `config.json`. Keeping both a regular key and an `sk-ant-*` style key improves client compatibility.
|
||||
- If your environment has proxy variables, set `NO_PROXY=127.0.0.1,localhost,<your_host_ip>` for DS2API to avoid proxy interception of local traffic.
|
||||
- If tool calls are rendered as plain text and not executed, first verify the model output uses the recommended DSML block: `<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`. DS2API also accepts legacy canonical XML: `<tool_calls><invoke name="..."><parameter name="...">...`; legacy `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`, `<function_call>`, `tool_use`, or standalone JSON `tool_calls` are not executed.
|
||||
- If tool calls are rendered as plain text and not executed, first verify the model output uses the recommended fullwidth-separator DSML block: `<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`. DS2API also accepts halfwidth DSML and legacy canonical XML: `<tool_calls><invoke name="..."><parameter name="...">...`; legacy `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`, `<function_call>`, `tool_use`, or standalone JSON `tool_calls` are not executed and stay plain text.
|
||||
|
||||
### Gemini Endpoint
|
||||
|
||||
The Gemini adapter maps model names to DeepSeek native models via `model_aliases` or built-in heuristics, supporting both `generateContent` and `streamGenerateContent` call patterns with full Tool Calling support (`functionDeclarations` → `functionCall` output).
|
||||
The Gemini adapter maps model names to DeepSeek native models via `model_aliases` or exact built-in aliases (covering common `gemini-2.5-*`, `gemini-3*`, and `gemini-pro-vision` names), supporting both `generateContent` and `streamGenerateContent` call patterns with full Tool Calling support (`functionDeclarations` → `functionCall` output). If the Gemini model name has a `-nothinking` suffix, such as `gemini-2.5-pro-nothinking`, it maps to the corresponding forced no-thinking model.
|
||||
|
||||
## Quick Start
|
||||
|
||||
@@ -273,13 +284,13 @@ Recommended: convert `config.json` to Base64 locally, then paste into `DS2API_CO
|
||||
base64 < config.json | tr -d '\n'
|
||||
```
|
||||
|
||||
> **Streaming note**: `/v1/chat/completions` on Vercel is routed to `api/chat-stream.js` (Node Runtime) for real-time SSE. Auth, account selection, and session/PoW preparation are still handled by the Go internal prepare endpoint; streaming output (including `tools`) is assembled on Node with Go-aligned anti-leak handling. This is the only interface family currently routed through Node, and its CORS allow behavior is kept aligned with the Go router so third-party preflight handling stays unified.
|
||||
> **Streaming note**: OpenAI Chat streaming on Vercel is routed to `api/chat-stream.js` (Node Runtime), but `vercel.json` rewrites only the canonical `/v1/chat/completions` path to Node; the root shortcut `/chat/completions` stays on the Go main path. Auth, account selection, and session/PoW preparation are still handled by the Go internal prepare endpoint; streaming output (including `tools`) is assembled on Node with Go-aligned anti-leak handling. Use `/v1/chat/completions` on Vercel when real-time streaming is required.
|
||||
|
||||
For detailed deployment instructions, see the [Deployment Guide](docs/DEPLOY.en.md).
|
||||
|
||||
### Option 4: Local Run
|
||||
|
||||
**Prerequisites**: Go 1.26+, Node.js `20.19+` or `22.12+` (only if building WebUI locally)
|
||||
**Prerequisites**: Go 1.26+, Node.js `20.19+` or `22.12+` (only if building WebUI locally; CI / Docker builds use Node 24), and npm available; npm 10+ is recommended
|
||||
|
||||
```bash
|
||||
# 1. Clone
|
||||
@@ -298,7 +309,7 @@ Default local URL: `http://127.0.0.1:5001`
|
||||
|
||||
The server actually binds to `0.0.0.0:5001`, so devices on the same LAN can usually reach it through your private IP as well.
|
||||
|
||||
> **WebUI auto-build**: On first local startup, if `static/admin` is missing, DS2API will auto-run `npm ci` (only when dependencies are missing) and `npm run build -- --outDir static/admin --emptyOutDir` (requires Node.js). You can also build manually: `./scripts/build-webui.sh`
|
||||
> **WebUI auto-build**: On first local startup, if the WebUI static directory is missing, DS2API auto-runs `npm ci --prefix webui` (only when dependencies are missing) and `npm run build --prefix webui -- --outDir static/admin --emptyOutDir` (requires Node.js; `DS2API_STATIC_ADMIN_DIR` can override the static directory). You can also build manually: `./scripts/build-webui.sh`
|
||||
|
||||
## Configuration
|
||||
|
||||
@@ -326,6 +337,7 @@ For business endpoints (`/v1/*`, `/anthropic/*`, Gemini routes), DS2API supports
|
||||
| **Direct token** | If the token is not in `config.keys`, DS2API treats it as a DeepSeek token directly |
|
||||
|
||||
Optional header `X-Ds2-Target-Account`: Pin a specific managed account (value is email or mobile).
|
||||
When no target account is pinned, if a completion would end as `429 upstream_empty_output` after the same-account empty-output retry, managed-account mode switches to the next available account, creates a fresh session, and retries the original payload once.
|
||||
Gemini routes also accept `x-goog-api-key`, or `?key=` / `?api_key=` when no auth header is present.
|
||||
|
||||
## Concurrency Model
|
||||
@@ -338,7 +350,8 @@ Queue limit = DS2API_ACCOUNT_MAX_QUEUE (default = recommended concurrency)
|
||||
```
|
||||
|
||||
- When inflight slots are full, requests enter a waiting queue — **no immediate 429**
|
||||
- 429 is returned only when total load exceeds inflight + queue capacity
|
||||
- 429 is returned only when total load exceeds inflight + queue capacity; current responses do not include `Retry-After`
|
||||
- Completion empty-output 429s first get the same-account compensation retry; managed-account mode also tries one alternate-account fresh retry before returning the final 429
|
||||
- `GET /admin/queue/status` returns real-time concurrency state
|
||||
|
||||
## Tool Call Adaptation
|
||||
@@ -346,12 +359,13 @@ Queue limit = DS2API_ACCOUNT_MAX_QUEUE (default = recommended concurrency)
|
||||
When `tools` is present in the request, DS2API performs anti-leak handling:
|
||||
|
||||
1. Toolcall feature matching is enabled only in **non-code-block context** (fenced examples are ignored)
|
||||
2. The parser now treats the DSML shell as the recommended executable tool-calling syntax: `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; it also accepts legacy canonical XML `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`. DSML is a shell alias and internal parsing remains XML-based; legacy `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`, `<function_call>`, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text
|
||||
2. The parser treats the fullwidth-separator DSML shell as the recommended executable tool-calling syntax: `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; it also accepts halfwidth DSML, legacy canonical XML `<tool_calls>` → `<invoke name="...">` → `<parameter name="...">`, plus common DSML prefix/separator drift. DSML is a shell alias and internal parsing remains XML-based; legacy `<tools>` / `<tool_call>` / `<tool_name>` / `<param>`, `<function_call>`, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text, and complete but malformed wrappers are released as plain text too
|
||||
3. `responses` streaming strictly uses official item lifecycle events (`response.output_item.*`, `response.content_part.*`, `response.function_call_arguments.*`)
|
||||
4. `responses` supports and enforces `tool_choice` (`auto`/`none`/`required`/forced function); `required` violations return `422` for non-stream and `response.failed` for stream
|
||||
5. The output protocol follows the client request (OpenAI / Claude / Gemini native shapes); model-side prompting can prefer XML, and the compatibility layer handles the protocol-specific translation
|
||||
|
||||
> Note: the current parser still prioritizes “parse successfully whenever possible”; hard allow-list rejection for undeclared tool names is not enabled yet.
|
||||
> Explicit empty strings or whitespace-only parameters are preserved by the parser; prompting tells the model not to emit blank parameters, and missing/empty argument rejection belongs in the tool executor or client schema validation.
|
||||
|
||||
## Local Dev Packet Capture
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ ds2api/
|
||||
│ ├── claudeconv/ # Claude message conversion helpers
|
||||
│ ├── compat/ # Compatibility and regression helpers
|
||||
│ ├── assistantturn/ # Upstream output to canonical assistant turn / stream event semantics
|
||||
│ ├── completionruntime/ # Shared Go DeepSeek completion startup, non-stream collection, and retry
|
||||
│ ├── completionruntime/ # Shared Go DeepSeek completion startup, collection, empty-output/account-switch retry
|
||||
│ ├── config/ # Config loading/validation/hot reload
|
||||
│ ├── deepseek/ # DeepSeek upstream client/protocol/transport
|
||||
│ │ ├── client/ # Login/session/completion/upload/delete calls
|
||||
@@ -41,6 +41,7 @@ ds2api/
|
||||
│ │ ├── admin/ # Admin API root assembly and resource packages
|
||||
│ │ ├── claude/ # Claude HTTP protocol adapter
|
||||
│ │ ├── gemini/ # Gemini HTTP protocol adapter
|
||||
│ │ ├── ollama/ # Ollama-compatible model/capability query endpoints
|
||||
│ │ ├── openai/ # OpenAI HTTP surface
|
||||
│ │ │ ├── chat/ # Chat Completions execution entrypoint
|
||||
│ │ │ ├── responses/ # Responses API and response store
|
||||
@@ -57,6 +58,7 @@ ds2api/
|
||||
│ ├── prompt/ # Prompt composition
|
||||
│ ├── promptcompat/ # API request -> DeepSeek web-chat plain-text compatibility
|
||||
│ ├── rawsample/ # Raw sample read/write and management
|
||||
│ ├── responsehistory/ # DeepSeek upstream response archive and session snapshots
|
||||
│ ├── server/ # Router and middleware assembly
|
||||
│ │ └── data/ # Router/runtime helper data
|
||||
│ ├── sse/ # SSE parsing utilities
|
||||
@@ -188,10 +190,11 @@ flowchart LR
|
||||
- `internal/server`: router tree + middlewares (health, protocol routes, Admin/WebUI).
|
||||
- `internal/httpapi/openai/*`: OpenAI HTTP surface split into chat, responses, files, embeddings, history, and shared packages; chat/responses share the promptcompat, stream, and toolcall semantics.
|
||||
- `internal/httpapi/{claude,gemini}`: protocol adapters that normalize into the same prompt compatibility semantics; normal direct paths must share DeepSeek session/PoW/completion execution through `completionruntime`, while `translatorcliproxy` is reserved for Vercel prepare/release, missing-backend fallback, and regression tests.
|
||||
- `internal/httpapi/ollama`: Ollama-compatible model list and capability query endpoints.
|
||||
- `internal/httpapi/requestbody`: shared HTTP body reading, JSON pre-validation, and UTF-8 error helpers across protocol adapters.
|
||||
- `internal/promptcompat`: compatibility core for turning OpenAI/Claude/Gemini requests into DeepSeek web-chat plain-text context.
|
||||
- `internal/assistantturn`: Go output-side canonical semantics, converting DeepSeek SSE collection results and stream finalization state into assistant turns and centralizing thinking, tool call, citation, usage, stop/error behavior.
|
||||
- `internal/completionruntime`: shared Go completion execution helpers for DeepSeek session/PoW/call startup, non-stream collection, and empty-output retry; streaming paths use it to start upstream requests, continue to use `internal/stream` for real-time consumption, and use `assistantturn` during finalization.
|
||||
- `internal/completionruntime`: shared Go completion execution helpers for DeepSeek session/PoW/call startup, non-stream collection, empty-output retry, and one managed-account fresh retry before a final 429; streaming paths use it to start upstream requests, continue to use `internal/stream` for real-time consumption, and use `assistantturn` during finalization.
|
||||
- `internal/translatorcliproxy`: bridge compatibility layer for Claude/Gemini and OpenAI shape translation; it is not the main business protocol conversion center.
|
||||
- `internal/deepseek/{client,protocol,transport}`: upstream requests, sessions, PoW adaptation, protocol constants, and transport details.
|
||||
- `internal/js/chat-stream` + `api/chat-stream.js`: Vercel Node streaming bridge; Go prepare/release owns auth, account lease, and completion payload assembly, while Node relays real-time SSE with Go-aligned finalization and tool sieve semantics.
|
||||
@@ -199,6 +202,7 @@ flowchart LR
|
||||
- `internal/toolcall` + `internal/toolstream`: DSML shell compatibility plus canonical XML tool-call parsing and anti-leak sieve; DSML is normalized back to XML at the entrypoint, and internal parsing remains XML-based.
|
||||
- `internal/httpapi/admin/*`: Admin API root assembly plus auth/accounts/config/settings/proxies/rawsamples/vercel/history/devcapture/version resource packages.
|
||||
- `internal/chathistory`: server-side conversation history persistence, pagination, detail lookup, and retention policy.
|
||||
- `internal/responsehistory`: DeepSeek upstream response archive, saving assistant text, thinking, raw tool-call fragments, and streaming detail before protocol rendering/trimming.
|
||||
- `internal/config`: config loading/validation + runtime settings hot-reload.
|
||||
- `internal/account`: managed account pool, inflight slots, waiting queue.
|
||||
- `internal/textclean`: text cleanup helpers, e.g. stripping `[reference: N]` markers.
|
||||
|
||||
@@ -27,7 +27,7 @@ ds2api/
|
||||
│ ├── claudeconv/ # Claude 消息格式转换工具
|
||||
│ ├── compat/ # 兼容性辅助与回归支持
|
||||
│ ├── assistantturn/ # 上游输出到统一 assistant turn / stream event 的语义层
|
||||
│ ├── completionruntime/ # Go 主路径共享 DeepSeek completion 启动、非流式收集与 retry
|
||||
│ ├── completionruntime/ # Go 主路径共享 DeepSeek completion 启动、收集、空输出/切号 retry
|
||||
│ ├── config/ # 配置加载、校验、热更新
|
||||
│ ├── deepseek/ # DeepSeek 上游 client/protocol/transport
|
||||
│ │ ├── client/ # 登录、会话、completion、上传/删除等上游调用
|
||||
@@ -41,6 +41,7 @@ ds2api/
|
||||
│ │ ├── admin/ # Admin API 根装配与资源子包
|
||||
│ │ ├── claude/ # Claude HTTP 协议适配
|
||||
│ │ ├── gemini/ # Gemini HTTP 协议适配
|
||||
│ │ ├── ollama/ # Ollama 兼容模型/能力查询接口
|
||||
│ │ ├── openai/ # OpenAI HTTP surface
|
||||
│ │ │ ├── chat/ # Chat Completions 执行入口
|
||||
│ │ │ ├── responses/ # Responses API 与 response store
|
||||
@@ -57,6 +58,7 @@ ds2api/
|
||||
│ ├── prompt/ # Prompt 组装
|
||||
│ ├── promptcompat/ # API 请求到 DeepSeek 网页纯文本上下文兼容层
|
||||
│ ├── rawsample/ # raw sample 读写与管理
|
||||
│ ├── responsehistory/ # DeepSeek 上游响应归档与会话快照
|
||||
│ ├── server/ # 路由与中间件装配
|
||||
│ │ └── data/ # 路由/运行时辅助数据
|
||||
│ ├── sse/ # SSE 解析工具
|
||||
@@ -188,10 +190,11 @@ flowchart LR
|
||||
- `internal/server`:路由树和中间件挂载(健康检查、协议入口、Admin/WebUI)。
|
||||
- `internal/httpapi/openai/*`:OpenAI HTTP surface,按 chat、responses、files、embeddings、history、shared 拆分;chat/responses 共享 promptcompat、stream、toolcall 等核心语义。
|
||||
- `internal/httpapi/{claude,gemini}`:协议输入输出适配,归一到同一套 prompt compatibility 语义;正常直连路径必须通过 `completionruntime` 共享 DeepSeek session/PoW/completion 调用,`translatorcliproxy` 仅保留给 Vercel prepare/release、后端缺失 fallback 和回归测试。
|
||||
- `internal/httpapi/ollama`:Ollama 兼容的模型列表与能力查询入口。
|
||||
- `internal/httpapi/requestbody`:跨协议复用的请求体读取、JSON 解码前置校验与 UTF-8 错误处理辅助。
|
||||
- `internal/promptcompat`:OpenAI/Claude/Gemini 请求到 DeepSeek 网页纯文本上下文的兼容内核。
|
||||
- `internal/assistantturn`:Go 输出侧统一语义层,把 DeepSeek SSE 收集结果和流式收尾状态归一成 assistant turn,集中处理 thinking、tool call、citation、usage、stop/error 语义。
|
||||
- `internal/completionruntime`:Go surface 共享的 completion 执行辅助,负责 DeepSeek session/PoW/call 启动、非流式 collect 和 empty-output retry;流式路径复用它启动上游请求,继续用 `internal/stream` 做实时消费,并在最终收尾阶段接入 `assistantturn`。
|
||||
- `internal/completionruntime`:Go surface 共享的 completion 执行辅助,负责 DeepSeek session/PoW/call 启动、非流式 collect、empty-output retry,以及托管账号在最终 429 前的一次切号 fresh retry;流式路径复用它启动上游请求,继续用 `internal/stream` 做实时消费,并在最终收尾阶段接入 `assistantturn`。
|
||||
- `internal/translatorcliproxy`:Claude/Gemini 与 OpenAI 结构互转的桥接兼容层,不作为主业务协议转换中心。
|
||||
- `internal/deepseek/{client,protocol,transport}`:上游请求、会话、PoW 适配、协议常量与传输层。
|
||||
- `internal/js/chat-stream` + `api/chat-stream.js`:Vercel Node 流式桥;Go prepare/release 管理鉴权、账号租约和 completion payload,Node 侧负责实时 SSE 转发并保持 Go 对齐的终结态和 tool sieve 语义。
|
||||
@@ -199,6 +202,7 @@ flowchart LR
|
||||
- `internal/toolcall` + `internal/toolstream`:DSML 外壳兼容与 canonical XML 工具调用解析、防泄漏筛分;DSML 会在入口归一化回 XML,内部仍按 XML 语义解析。
|
||||
- `internal/httpapi/admin/*`:Admin API 根装配与 auth/accounts/config/settings/proxies/rawsamples/vercel/history/devcapture/version 等资源子包。
|
||||
- `internal/chathistory`:服务器端对话记录持久化、分页、单条详情和保留策略。
|
||||
- `internal/responsehistory`:DeepSeek 上游响应归档,会在协议回译/裁剪前保存 assistant text、thinking、tool-call 原始片段和流式详情。
|
||||
- `internal/config`:配置加载、校验、运行时 settings 热更新。
|
||||
- `internal/account`:托管账号池、并发槽位、等待队列。
|
||||
- `internal/textclean`:文本清洗,移除 `[reference: N]` 标记等噪声。
|
||||
|
||||
@@ -9,8 +9,8 @@ Thanks for your interest in contributing to DS2API!
|
||||
### Prerequisites
|
||||
|
||||
- Go 1.26+
|
||||
- Node.js `20.19+` or `22.12+` (for WebUI development)
|
||||
- npm (bundled with Node.js)
|
||||
- Node.js `20.19+` or `22.12+` (for WebUI development; CI / Docker builds use Node 24)
|
||||
- npm (bundled with Node.js; 10+ recommended)
|
||||
|
||||
### Backend Development
|
||||
|
||||
|
||||
@@ -9,8 +9,8 @@
|
||||
### 前置要求
|
||||
|
||||
- Go 1.26+
|
||||
- Node.js `20.19+` 或 `22.12+`(WebUI 开发时)
|
||||
- npm(随 Node.js 提供)
|
||||
- Node.js `20.19+` 或 `22.12+`(WebUI 开发时;CI / Docker 构建使用 Node 24)
|
||||
- npm(随 Node.js 提供,建议 10+)
|
||||
|
||||
### 后端开发
|
||||
|
||||
|
||||
@@ -39,8 +39,8 @@ Recommended order when choosing a deployment method:
|
||||
| Dependency | Minimum Version | Notes |
|
||||
| --- | --- | --- |
|
||||
| Go | 1.26+ | Build backend |
|
||||
| Node.js | `20.19+` or `22.12+` | Only needed to build WebUI locally |
|
||||
| npm | Bundled with Node.js | Install WebUI dependencies |
|
||||
| Node.js | `20.19+` or `22.12+` (CI / Docker builds use Node 24) | Only needed to build WebUI locally |
|
||||
| npm | Bundled with Node.js; 10+ recommended | Install WebUI dependencies |
|
||||
|
||||
Config source (choose one):
|
||||
|
||||
@@ -299,6 +299,8 @@ VERCEL_TEAM_ID=team_xxxxxxxxxxxx # optional for personal accounts
|
||||
| `DS2API_VERCEL_INTERNAL_SECRET` | Hybrid streaming internal auth | Falls back to `DS2API_ADMIN_KEY` |
|
||||
| `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | Stream lease TTL | `900` |
|
||||
| `DS2API_RAW_STREAM_SAMPLE_ROOT` | Raw stream sample root for saving/reading samples | `tests/raw_stream_samples` |
|
||||
| `DS2API_STATIC_ADMIN_DIR` | WebUI static asset directory | `static/admin` |
|
||||
| `DS2API_AUTO_BUILD_WEBUI` | Whether local startup auto-builds missing WebUI assets (`1/true/yes/on` or `0/false/no/off`) | Enabled outside Vercel |
|
||||
| `VERCEL_TOKEN` | Vercel sync token | — |
|
||||
| `VERCEL_PROJECT_ID` | Vercel project ID | — |
|
||||
| `VERCEL_TEAM_ID` | Vercel team ID | — |
|
||||
@@ -321,7 +323,7 @@ Request ──────┐
|
||||
```
|
||||
|
||||
- **Go entry**: `api/index.go` (Serverless Go)
|
||||
- **Stream entry**: `api/chat-stream.js` (Node Runtime for real-time SSE)
|
||||
- **Stream entry**: `api/chat-stream.js` (Node Runtime for real-time SSE; `vercel.json` rewrites only the canonical `/v1/chat/completions` path here, while the root shortcut `/chat/completions` stays on the Go entry)
|
||||
- **Routing**: `vercel.json`
|
||||
- **Build command**: `npm ci --prefix webui && npm run build --prefix webui` (automatic)
|
||||
|
||||
@@ -438,7 +440,7 @@ Default local access URL: `http://127.0.0.1:5001`; the server actually binds to
|
||||
|
||||
### 4.2 WebUI Build
|
||||
|
||||
On first local startup, if `static/admin/` is missing, DS2API will automatically attempt to build the WebUI (requires Node.js/npm; when dependencies are missing it runs `npm ci` first, then `npm run build -- --outDir static/admin --emptyOutDir`).
|
||||
On first local startup, if the WebUI static directory is missing, DS2API automatically attempts to build it (requires Node.js/npm; when dependencies are missing it runs `npm ci --prefix webui`, then `npm run build --prefix webui -- --outDir <static-dir> --emptyOutDir`). The default static directory is `static/admin/`, and `DS2API_STATIC_ADMIN_DIR` can override it.
|
||||
|
||||
Manual build:
|
||||
|
||||
|
||||
@@ -39,8 +39,8 @@
|
||||
| 依赖 | 最低版本 | 说明 |
|
||||
| --- | --- | --- |
|
||||
| Go | 1.26+ | 编译后端 |
|
||||
| Node.js | `20.19+` 或 `22.12+` | 仅在需要本地构建 WebUI 时 |
|
||||
| npm | 随 Node.js 提供 | 安装 WebUI 依赖 |
|
||||
| Node.js | `20.19+` 或 `22.12+`(CI / Docker 构建使用 Node 24) | 仅在需要本地构建 WebUI 时 |
|
||||
| npm | 随 Node.js 提供,建议 10+ | 安装 WebUI 依赖 |
|
||||
|
||||
配置来源(任选其一):
|
||||
|
||||
@@ -299,6 +299,8 @@ VERCEL_TEAM_ID=team_xxxxxxxxxxxx # 个人账号可留空
|
||||
| `DS2API_VERCEL_INTERNAL_SECRET` | 混合流式内部鉴权 | 回退用 `DS2API_ADMIN_KEY` |
|
||||
| `DS2API_VERCEL_STREAM_LEASE_TTL_SECONDS` | 流式 lease TTL | `900` |
|
||||
| `DS2API_RAW_STREAM_SAMPLE_ROOT` | raw stream 样本保存/读取根目录 | `tests/raw_stream_samples` |
|
||||
| `DS2API_STATIC_ADMIN_DIR` | WebUI 静态资源目录 | `static/admin` |
|
||||
| `DS2API_AUTO_BUILD_WEBUI` | 本地启动时是否自动构建缺失的 WebUI(`1/true/yes/on` 或 `0/false/no/off`) | 非 Vercel 默认开启 |
|
||||
| `VERCEL_TOKEN` | Vercel 同步 token | — |
|
||||
| `VERCEL_PROJECT_ID` | Vercel 项目 ID | — |
|
||||
| `VERCEL_TEAM_ID` | Vercel 团队 ID | — |
|
||||
@@ -331,7 +333,7 @@ api/index.go api/chat-stream.js
|
||||
```
|
||||
|
||||
- **入口文件**:`api/index.go`(Serverless Go)
|
||||
- **流式入口**:`api/chat-stream.js`(Node Runtime,保证实时 SSE)
|
||||
- **流式入口**:`api/chat-stream.js`(Node Runtime,保证实时 SSE;`vercel.json` 仅把规范路径 `/v1/chat/completions` 重写到这里,根路径快捷别名 `/chat/completions` 仍走 Go 入口)
|
||||
- **路由重写**:`vercel.json`
|
||||
- **构建命令**:`npm ci --prefix webui && npm run build --prefix webui`(自动执行)
|
||||
|
||||
@@ -448,7 +450,7 @@ go run ./cmd/ds2api
|
||||
|
||||
### 4.2 WebUI 构建
|
||||
|
||||
本地首次启动时,若 `static/admin/` 不存在,服务会自动尝试构建 WebUI(需要 Node.js/npm;缺依赖时会先执行 `npm ci`,再执行 `npm run build -- --outDir static/admin --emptyOutDir`)。
|
||||
本地首次启动时,若 WebUI 静态目录不存在,服务会自动尝试构建 WebUI(需要 Node.js/npm;缺依赖时会先执行 `npm ci --prefix webui`,再执行 `npm run build --prefix webui -- --outDir <静态目录> --emptyOutDir`)。默认静态目录为 `static/admin/`,可用 `DS2API_STATIC_ADMIN_DIR` 覆盖。
|
||||
|
||||
你也可以手动构建:
|
||||
|
||||
|
||||
@@ -74,13 +74,14 @@ gofmt -w <changed-go-files>
|
||||
|
||||
- Admin API:`/admin/chat-history`、`/admin/chat-history/{id}`。
|
||||
- 后端存储:`internal/chathistory/store.go`。
|
||||
- 输出归档:`internal/responsehistory` 在协议回译/裁剪前记录 DeepSeek 上游 assistant text / thinking;即使工具调用已被对外响应转成结构化 `tool_calls` 并从可见正文剔除,后台历史仍应保留原始 DSML / XML 片段,方便排查格式漂移。
|
||||
- 前端轮询和 ETag:`webui/src/features/chatHistory/ChatHistoryContainer.jsx`。
|
||||
|
||||
Tool call 问题优先跑:
|
||||
|
||||
```bash
|
||||
go test -v ./internal/toolcall ./internal/toolstream -count=1
|
||||
node --test tests/node/stream-tool-sieve.test.js tests/node/chat-stream.test.js
|
||||
./tests/scripts/run-unit-node.sh
|
||||
```
|
||||
|
||||
## 5. 测试选择
|
||||
|
||||
@@ -75,7 +75,7 @@ npm run build --prefix webui
|
||||
1. **Preflight 检查**:
|
||||
- `go test ./... -count=1`(单元测试)
|
||||
- `./tests/scripts/check-node-split-syntax.sh`(Node 拆分模块语法门禁)
|
||||
- `node --test tests/node/stream-tool-sieve.test.js tests/node/chat-stream.test.js tests/node/js_compat_test.js`
|
||||
- `node --test --test-concurrency=1 tests/node/stream-tool-sieve.test.js tests/node/chat-stream.test.js tests/node/chat-history-utils.test.js tests/node/js_compat_test.js`
|
||||
- `npm run build --prefix webui`(WebUI 构建检查)
|
||||
|
||||
2. **隔离启动**:复制 `config.json` 到临时目录,启动独立服务进程
|
||||
@@ -203,10 +203,10 @@ go test ./...
|
||||
|
||||
```bash
|
||||
# 运行 tool calls 相关测试(推荐用于调试 tool call 解析问题)
|
||||
go test -v -run 'TestParseToolCalls|TestRepair' ./internal/toolcall/
|
||||
go test -v -run 'TestParseToolCalls|TestProcessToolSieve|TestRepair' ./internal/toolcall ./internal/toolstream
|
||||
|
||||
# 运行单个测试用例
|
||||
go test -v -run TestParseToolCallsWithDeepSeekHallucination ./internal/toolcall/
|
||||
go test -v -run TestParseToolCallsAllowsAllEmptyParameterPayload ./internal/toolcall
|
||||
|
||||
# 运行 format 相关测试
|
||||
go test -v ./internal/format/...
|
||||
@@ -221,23 +221,23 @@ go test -v ./internal/httpapi/openai/...
|
||||
|
||||
```bash
|
||||
# 1. 运行 tool calls 相关的所有测试
|
||||
go test -v -run 'TestParseToolCalls|TestRepair' ./internal/toolcall/
|
||||
go test -v -run 'TestParseToolCalls|TestProcessToolSieve|TestRepair' ./internal/toolcall ./internal/toolstream
|
||||
|
||||
# 2. 查看测试输出中的详细调试信息
|
||||
go test -v -run TestParseToolCallsWithDeepSeekHallucination ./internal/toolcall/ 2>&1
|
||||
go test -v -run TestProcessToolSieveReleasesMalformedExecutableXMLBlock ./internal/toolstream 2>&1
|
||||
|
||||
# 3. 检查具体测试用例的修复效果
|
||||
# 测试用例位于 internal/toolcall/toolcalls_test.go,包含:
|
||||
# - TestParseToolCallsWithDeepSeekHallucination: DeepSeek 典型幻觉输出
|
||||
# 重点测试位于 internal/toolcall/toolcalls_test.go 与 internal/toolstream/tool_sieve_xml_test.go,包含:
|
||||
# - TestParseToolCallsAllowsAllEmptyParameterPayload: 空参数结构化保留
|
||||
# - TestProcessToolSieveReleasesMalformedExecutableXMLBlock: malformed XML wrapper 释放为文本
|
||||
# - TestRepairLooseJSONWithNestedObjects: 嵌套对象的方括号修复
|
||||
# - TestParseToolCallsWithMixedWindowsPaths: Windows 路径处理
|
||||
```
|
||||
|
||||
### 运行 Node.js 测试
|
||||
|
||||
```bash
|
||||
# 运行 Node 测试
|
||||
node --test tests/node/stream-tool-sieve.test.js
|
||||
node --test --test-concurrency=1 tests/node/stream-tool-sieve.test.js tests/node/chat-stream.test.js tests/node/chat-history-utils.test.js tests/node/js_compat_test.js
|
||||
|
||||
# 或使用脚本
|
||||
./tests/scripts/run-unit-node.sh
|
||||
|
||||
@@ -111,10 +111,11 @@ DS2API 当前的核心思路,不是把客户端传来的 `messages`、`tools`
|
||||
- OpenAI Chat / Responses 原生走统一 OpenAI 标准化与 DeepSeek payload 组装;Claude / Gemini 会尽量复用 OpenAI prompt/tool 语义,其中 Gemini 直接复用 `promptcompat.BuildOpenAIPromptForAdapter`。Go 主服务新增 `completionruntime` 启动层,统一执行 DeepSeek session/PoW/call;输出侧新增 `assistantturn` 语义层:非流式 OpenAI Chat / Responses / Claude / Gemini 会把 DeepSeek SSE 收集结果先归一成同一份 assistant turn,再分别渲染成各协议原生外形;流式 OpenAI Chat / Responses / Claude / Gemini 继续保持各协议实时 SSE framing,但最终收尾的 tool fallback、schema 归一、usage、empty-output / content-filter 错误语义同样由 `assistantturn` 判定。Claude / Gemini 的常规 Go 主路径不再依赖内部 `httptest` 转发到 OpenAI handler;`translatorcliproxy` 仅保留用于 Vercel bridge、后端缺失 fallback 和回归测试,不作为主业务协议转换中心。
|
||||
- Vercel Node 流式路径本轮不迁移,仍使用现有 Node bridge / stream-tool-sieve 实现;后续若变更 Node 流式语义,需要按 `assistantturn` 的 Go canonical 输出语义同步对齐。
|
||||
- 客户端传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`。Gemini `generationConfig.thinkingConfig.thinkingBudget` 会翻译成同一套 thinking 开关;关闭时即使上游返回 `response/thinking_content`,兼容层也不会把它当作可见正文输出。若最终解析出的模型名带 `-nothinking` 后缀,则会无条件强制关闭 thinking,优先级高于请求体中的 `thinking` / `reasoning` / `reasoning_effort`。未显式关闭时,各 surface 会按解析后的 DeepSeek 模型默认能力开启 thinking,并用各自协议的原生形态暴露:OpenAI Chat 为 `reasoning_content`,OpenAI Responses 为 `response.reasoning.delta` / `reasoning` content,Claude 为 `thinking` block / `thinking_delta`,Gemini 为 `thought: true` part。
|
||||
- 对 OpenAI Chat / Responses 的非流式收尾,如果最终可见正文为空,兼容层会优先尝试把思维链中的独立 DSML / XML 工具块当作真实工具调用解析出来。流式链路也会在收尾阶段做同样的 fallback 检测,但不会因为思维链内容去中途拦截或改写流式输出;真正的工具识别始终基于原始上游文本,而不是基于“已经做过可见输出清洗”的版本,因此即使最终可见层会剥离完整 leaked DSML / XML `tool_calls` wrapper、并抑制全空参数或无效 wrapper 块,也不会影响真实工具调用转成结构化 `tool_calls` / `function_call`。补发结果会作为本轮 assistant 的结构化 `tool_calls` / `function_call` 输出返回,而不是塞进 `content` 文本;如果客户端没有开启 thinking / reasoning,思维链只用于检测,不会作为 `reasoning_content` 或可见正文暴露。只有正文为空且思维链里也没有可执行工具调用时,才继续按空回复错误处理。
|
||||
- OpenAI Chat / Responses 的空回复错误处理之前会默认做一次内部补偿重试:第一次上游完整结束后,如果最终可见正文为空、没有解析到工具调用、也没有已经向客户端流式发出工具调用,并且终止原因不是 `content_filter`,兼容层会复用同一个 `chat_session_id`、账号、token 与工具策略,把原始 completion `prompt` 追加固定后缀 `Previous reply had no visible output. Please regenerate the visible final answer or tool call now.` 后重新提交一次。重试遵循 DeepSeek 多轮对话协议:从第一次上游 SSE 流中提取 `response_message_id`,并在重试 payload 中设置 `parent_message_id` 为该值,使重试成为同一会话的后续轮次而非断裂的根消息;同时重新获取一次 PoW(若 PoW 获取失败则回退到原始 PoW)。该重试不会重新标准化消息、不会新建 session、不会切换账号,也不会向流式客户端插入重试标记;第二次 thinking / reasoning 会按正常增量直接接到第一次之后,并继续使用 overlap trim 去重。若第二次仍为空,终端错误码仍保持现有 `upstream_empty_output`;若任一尝试触发空 `content_filter`,不做补偿重试并保持 `content_filter` 错误。JS Vercel 运行时同样设置 `parent_message_id`,但因无法直接调用 PoW API 而复用原始 PoW。
|
||||
- 对 OpenAI Chat / Responses 的非流式收尾,如果最终可见正文为空,兼容层会优先尝试把思维链中的独立 DSML / XML 工具块当作真实工具调用解析出来。流式链路也会在收尾阶段做同样的 fallback 检测,但不会因为思维链内容去中途拦截或改写流式输出;真正的工具识别始终基于原始上游文本,而不是基于“已经做过可见输出清洗”的版本。最终可见层会剥离已经成功解析成工具调用的完整 leaked DSML / XML `tool_calls` wrapper;如果遇到完整 wrapper 但内部形态不符合可执行工具调用语义(例如 `<param>` 这类 malformed XML 工具壳),流式 sieve 会把该块作为普通文本释放,而不是吞掉或伪造成工具调用。补发结果会作为本轮 assistant 的结构化 `tool_calls` / `function_call` 输出返回,而不是塞进 `content` 文本;如果客户端没有开启 thinking / reasoning,思维链只用于检测,不会作为 `reasoning_content` 或可见正文暴露。只有正文为空且思维链里也没有可执行工具调用时,才继续按空回复错误处理。
|
||||
- OpenAI Chat / Responses、Claude Messages、Gemini generateContent 的空回复错误处理之前会默认做一次内部补偿重试:第一次上游完整结束后,如果最终可见正文为空、没有解析到工具调用、也没有已经向客户端流式发出工具调用,并且终止原因不是 `content_filter`,兼容层会复用同一个 `chat_session_id`、账号、token 与工具策略,把原始 completion `prompt` 追加固定后缀 `Previous reply had no visible output. Please regenerate the visible final answer or tool call now.` 后重新提交一次。Go 主路径的非流式重试由 `completionruntime.ExecuteNonStreamWithRetry` 统一处理;流式重试由 `completionruntime.ExecuteStreamWithRetry` 统一处理,各协议 runtime 只负责消费/渲染本协议 SSE framing。重试遵循 DeepSeek 多轮对话协议:从第一次上游 SSE 流中提取 `response_message_id`,并在重试 payload 中设置 `parent_message_id` 为该值,使重试成为同一会话的后续轮次而非断裂的根消息;同时重新获取一次 PoW(若 PoW 获取失败则回退到原始 PoW)。该同账号重试不会重新标准化消息、不会新建 session,也不会向流式客户端插入重试标记;第二次 thinking / reasoning 会按正常增量直接接到第一次之后,并继续使用 overlap trim 去重。若同账号补偿重试后即将返回 429 `upstream_empty_output`,并且当前是托管账号模式,Go 主路径会在返回 429 前切换到下一个可用账号,新建 `chat_session_id`,使用原始 completion payload 再做一次 fresh retry;该切号重试不携带空回复 prompt 后缀,也不设置上一账号的 `parent_message_id`。如果没有可切换账号,或切号后的 fresh retry 仍没有可见正文或工具调用,则继续按原错误返回:无任何输出为 503 `upstream_unavailable`,有 reasoning 但没有可见正文或工具调用为 429 `upstream_empty_output`。若任一尝试触发空 `content_filter`,不做补偿重试并保持 `content_filter` 错误。JS Vercel 运行时同样设置 `parent_message_id`,但因无法直接调用 PoW API 而复用原始 PoW;切号 fresh retry 目前由 Go 主路径提供。
|
||||
|
||||
- OpenAI Chat / Responses 在最终可见正文渲染阶段,会把 DeepSeek 搜索返回中的 `[citation:N]` / `[reference:N]` 标记替换成对应 Markdown 链接。`citation` 标记按一基序号解析;`reference` 标记只有在同一段正文中出现 `[reference:0]`(允许冒号后有空格)时才按零基序号映射,并且不会影响同段正文里的 `citation` 标记。
|
||||
- 非流式 OpenAI Chat / Responses、Claude Messages、Gemini generateContent 在最终可见正文渲染阶段,会把 DeepSeek 搜索返回中的 `[citation:N]` / `[reference:N]` 标记替换成对应 Markdown 链接。`citation` 标记按一基序号解析;`reference` 标记只有在同一段正文中出现 `[reference:0]`(允许冒号后有空格)时才按零基序号映射,并且不会影响同段正文里的 `citation` 标记。
|
||||
- 流式输出仍默认隐藏 `[citation:N]` / `[reference:N]` 这类上游内部标记,避免分片输出中泄漏尚未完成映射的引用占位符。
|
||||
|
||||
## 5. prompt 是怎么拼出来的
|
||||
|
||||
@@ -166,14 +167,15 @@ OpenAI Chat / Responses 在标准化后、current input file 之前,会默认
|
||||
3. 再附上统一的 DSML tool call 外壳格式约束。
|
||||
4. 把这整段内容并入 system prompt。
|
||||
|
||||
工具调用正例现在优先示范官方 DSML 风格:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`。
|
||||
兼容层仍接受旧式纯 `<tool_calls>` wrapper,并会容错若干 DSML 标签变体,包括短横线形式 `<dsml-tool-calls>` / `<dsml-invoke>` / `<dsml-parameter>`;但提示词会优先要求模型输出官方 DSML 标签,并强调不能只输出 closing wrapper 而漏掉 opening tag。需要注意:这是“兼容 DSML 外壳,内部仍以 XML 解析语义为准”,不是原生 DSML 全链路实现;DSML 标签会在解析入口归一化回现有 XML 标签后继续走同一套 parser。
|
||||
工具调用正例现在优先示范全角分隔符 DSML 风格:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`。
|
||||
兼容层仍接受旧式纯 `<tool_calls>` wrapper,并会容错若干 DSML 标签变体,包括短横线形式 `<dsml-tool-calls>` / `<dsml-invoke>` / `<dsml-parameter>`、下划线形式 `<dsml_tool_calls>` / `<dsml_invoke>` / `<dsml_parameter>`,以及其他前缀分隔形态如 `<vendor|tool_calls>` / `<vendor_tool_calls>` / `<vendor - tool_calls>`;标签壳扫描还会把全角 ASCII 漂移归一化,例如 `<dSML|tool_calls>` 与全角 `>` 结束符,也会容错 CJK 尖括号和属性尾部分隔符漂移,例如 `<DSM|parameter name="command"|>...〈/DSM|parameter〉`。更一般地,Go / Node tag 扫描以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,标签名前任意协议前缀壳都会在解析入口剥离,例如 `<DSML␂tool_calls>`、`<proto💥tool_calls>` 这类控制符或非 ASCII 分隔符漂移也会归一化回现有 XML 标签后继续走同一套 parser。但提示词会优先要求模型输出官方 DSML 标签,并强调不能只输出 closing wrapper 而漏掉 opening tag。需要注意:这是“兼容 DSML 外壳,内部仍以 XML 解析语义为准”,不是原生 DSML 全链路实现。解析器会先截获非代码块中的疑似工具 wrapper,完整解析失败或工具语义无效时再按普通文本放行。
|
||||
数组参数使用 `<item>...</item>` 子节点表示;当某个参数体只包含 item 子节点时,Go / Node 解析器会把它还原成数组,避免 `questions` / `options` 这类 schema 中要求 array 的参数被误解析成 `{ "item": ... }` 对象。除此之外,解析器还会回收一些更松散的列表写法,例如 JSON array 字面量或逗号分隔的 JSON 项序列,只要它们足够明确;但 `<item>` 仍然是首选形态。若模型把完整结构化 XML fragment 误包进 CDATA,兼容层会在保护 `content` / `command` 等原文字段的前提下,尝试把非原文字段中的 CDATA XML fragment 还原成 object / array。不过,如果 CDATA 只是单个平面的 XML/HTML 标签,例如 `<b>urgent</b>` 这种行内标记,兼容层会保留原始字符串,不会强行升成 object / array;只有明显表示结构的 CDATA 片段,例如多兄弟节点、嵌套子节点或 `item` 列表,才会触发结构化恢复。对 `command` / `content` 等长文本参数,CDATA 内部的 Markdown fenced DSML / XML 示例会作为原文保护;示例里的 `]]></parameter>` 或 `</tool_calls>` 不会截断外层工具调用,解析器会继续等待围栏外真正的参数 / wrapper 结束标签。
|
||||
Go 侧读取 DeepSeek SSE 时不再依赖 `bufio.Scanner` 的固定 2MiB 单行上限;当写文件类工具把很长的 `content` 放在单个 `data:` 行里返回时,非流式收集、流式解析和 auto-continue 透传都会保留完整行,再进入同一套工具解析与序列化流程。
|
||||
在 assistant 最终回包阶段,如果某个 tool 参数在声明 schema 中明确是 `string`,兼容层会在把解析后的 `tool_calls` / `function_call` 重新序列化成 OpenAI / Responses / Claude 可见参数前,递归把该路径上的 number / bool / object / array 统一转成字符串;其中 object / array 会压成紧凑 JSON 字符串。这个保护只对 schema 明确声明为 string 的路径生效,不会改写本来就是 `number` / `boolean` / `object` / `array` 的参数。这样可以兼容 DeepSeek 输出了结构化片段、但上游客户端工具 schema 又严格要求字符串参数的场景(例如 `content`、`prompt`、`path`、`taskId` 等)。
|
||||
工具 schema 的权威来源始终是**当前请求实际携带的 schema**,而不是同名工具在其他 runtime(Claude Code / OpenCode / Codex 等)里的默认印象。兼容层现在会同时兼容 OpenAI 风格 `function.parameters`、直接工具对象上的 `parameters` / `input_schema`、以及 camelCase 的 `inputSchema` / `schema`,并在最终输出阶段按这份请求内 schema 决定是保留 array/object,还是仅对明确声明为 `string` 的路径做字符串化。该规则同样适用于 Claude 的流式收尾和 Vercel Node 流式 tool-call formatter,避免不同 runtime 因 schema shape 差异而出现同名工具参数类型漂移。
|
||||
正例中的工具名只会来自当前请求实际声明的工具;如果当前请求没有足够的已知工具形态,就省略对应的单工具、多工具或嵌套示例,避免把不可用工具名写进 prompt。
|
||||
对执行类工具,脚本内容必须进入执行参数本身:`Bash` / `execute_command` 使用 `command`,`exec_command` 使用 `cmd`;不要把脚本示范成 `path` / `content` 文件写入参数。
|
||||
工具提示词也会明确要求模型按本次调用实际需要填写参数,禁止输出 placeholder、空字符串或纯空白参数;如果必填参数未知,应先追问用户或正常文字回复,而不是输出空工具壳。对 `Bash` / `execute_command` 这类 shell 工具,命令或脚本必须写入 `command` 参数。解析层仍会把空字符串参数结构化返回;是否拒绝空 `command` 由后续工具执行侧 / 客户端 schema 校验决定。
|
||||
如果当前请求声明了 `Read` / `read_file` 这类读取工具,兼容层会额外注入一条 read-tool cache guard:当读取结果只表示“文件未变更 / 已在历史中 / 请引用先前上下文 / 没有正文内容”时,模型必须把它视为内容不可用,不能反复调用同一个无正文读取;应改为请求完整正文读取能力,或向用户说明需要重新提供文件内容。这个约束只缓解客户端缓存返回空内容导致的死循环,DS2API 不会也无法凭空恢复客户端本地文件正文。
|
||||
|
||||
OpenAI 路径实现:
|
||||
@@ -204,16 +206,20 @@ assistant 的 reasoning 会变成一个显式标签块:
|
||||
|
||||
然后再接可见回答正文。
|
||||
|
||||
对最终返回给客户端的 assistant 轮次,reasoning 不会因为本轮输出了工具调用而被丢弃。OpenAI Chat 会在同一个 assistant message 上同时返回 `reasoning_content` 和 `tool_calls`;OpenAI Responses 会先返回一个包含 `reasoning` content 的 assistant message item,再返回后续 `function_call` item;Claude / Gemini 也会在各自原生 thinking / thought 结构后继续返回 tool_use / functionCall。
|
||||
|
||||
对进入后续 prompt / `DS2API_HISTORY.txt` 的历史轮次,兼容层也会把同一轮工具调用前的 reasoning 绑定到 assistant tool call 历史上。OpenAI Chat 原生 `reasoning_content + tool_calls` 会直接保留;OpenAI Responses 若以 `reasoning` message item 后接 `function_call` item 的形式回放历史,会在归一化时合并为同一个 assistant 历史块;Claude 的 `thinking` block 会绑定到后续 `tool_use`;Gemini 的 `thought: true` part 会绑定到后续 `functionCall`。最终 prompt 中的顺序固定为 `[reasoning_content]...[/reasoning_content]`,再接 DSML tool call 外壳。
|
||||
|
||||
### 7.2 历史 tool_calls 保留方式
|
||||
|
||||
assistant 历史 `tool_calls` 不会保留成 OpenAI 原生 JSON,而会转成 prompt 可见的 DSML 外壳:
|
||||
|
||||
```xml
|
||||
<|DSML|tool_calls>
|
||||
<|DSML|invoke name="read_file">
|
||||
<|DSML|parameter name="path"><![CDATA[src/main.go]]></|DSML|parameter>
|
||||
</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
<|DSML|tool_calls>
|
||||
<|DSML|invoke name="read_file">
|
||||
<|DSML|parameter name="path"><![CDATA[src/main.go]]></|DSML|parameter>
|
||||
</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
```
|
||||
|
||||
解析层同时兼容旧式纯 XML 形态:`<tool_calls>` / `<invoke>` / `<parameter>`。两者都会先归一到现有 XML 解析语义;其他旧格式都会作为普通文本保留,不会作为可执行调用语法。
|
||||
@@ -418,7 +424,8 @@ Prior conversation history and tool progress.
|
||||
如果改的是 tool call 相关兼容语义,还应同时检查:
|
||||
|
||||
- `go test ./internal/toolcall/...`
|
||||
- `node --test tests/node/stream-tool-sieve.test.js`
|
||||
- `go test ./internal/toolstream/...`
|
||||
- `./tests/scripts/run-unit-node.sh`
|
||||
|
||||
## 14. 文档同步约定
|
||||
|
||||
|
||||
@@ -6,14 +6,14 @@
|
||||
|
||||
## 1) 当前可执行格式
|
||||
|
||||
当前版本推荐模型输出 DSML 外壳:
|
||||
当前版本推荐模型输出全角分隔符 DSML 外壳:
|
||||
|
||||
```xml
|
||||
<|DSML|tool_calls>
|
||||
<|DSML|invoke name="read_file">
|
||||
<|DSML|parameter name="path"><![CDATA[README.MD]]></|DSML|parameter>
|
||||
</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
<|DSML|tool_calls>
|
||||
<|DSML|invoke name="read_file">
|
||||
<|DSML|parameter name="path"><![CDATA[README.MD]]></|DSML|parameter>
|
||||
</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
```
|
||||
|
||||
兼容层仍接受旧式 canonical XML:
|
||||
@@ -30,17 +30,17 @@
|
||||
|
||||
约束:
|
||||
|
||||
- 必须有 `<|DSML|tool_calls>...</|DSML|tool_calls>` 或 `<tool_calls>...</tool_calls>` wrapper
|
||||
- 每个调用必须在 `<|DSML|invoke name="...">...</|DSML|invoke>` 或 `<invoke name="...">...</invoke>` 内
|
||||
- 必须有 `<|DSML|tool_calls>...</|DSML|tool_calls>` 或 `<tool_calls>...</tool_calls>` wrapper
|
||||
- 每个调用必须在 `<|DSML|invoke name="...">...</|DSML|invoke>` 或 `<invoke name="...">...</invoke>` 内
|
||||
- 工具名必须放在 `invoke` 的 `name` 属性
|
||||
- 参数必须使用 `<|DSML|parameter name="...">...</|DSML|parameter>` 或 `<parameter name="...">...</parameter>`
|
||||
- 参数必须使用 `<|DSML|parameter name="...">...</|DSML|parameter>` 或 `<parameter name="...">...</parameter>`
|
||||
- 同一个工具块内不要混用 DSML 标签和旧 XML 工具标签;混搭会被视为非法工具块
|
||||
|
||||
兼容修复:
|
||||
|
||||
- 如果模型漏掉 opening wrapper,但后面仍输出了一个或多个 invoke 并以 closing wrapper 收尾,Go 解析链路会在解析前补回缺失的 opening wrapper。
|
||||
- Go / Node 解析层不再枚举每一种 DSML typo。它会把工具标签名前的 `DSML`、管道符 `|` / `|`、空白、重复 leading `<` 视为可容忍的协议噪声,然后只匹配固定本地标签名 `tool_calls` / `invoke` / `parameter`。例如 `<DSML|tool_calls>`、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、`<DSMLtool_calls>`、`<<DSML|DSML|tool_calls>` 都会归一化;相似但非固定标签名(如 `tool_calls_extra`)仍按普通文本处理。
|
||||
- 如果模型在固定工具标签名后多输出一个尾部管道符,例如 `<|DSML|tool_calls|` / `<|DSML|invoke|` / `<|DSML|parameter|`,兼容层会把这个尾部 `|` 当作异常标签终止符并补齐缺失的 `>`;如果后面已经有 `>`,也会消费这个多余 `|` 后再归一化。
|
||||
- Go / Node 解析层不再枚举每一种 DSML typo。它以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,把标签名前的任意协议前缀壳视为可容忍噪声,并继续兼容管道符 `|` / `|`、空白、重复 leading `<`、可视控制符 `␂`、原始 STX `\x02`、非 ASCII 分隔符、CJK 尖括号 `〈` / `〉` 等漂移。例如 `<DSML|tool_calls>`、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、`<DSMLtool_calls>`、`<<DSML|DSML|tool_calls>`、`<DSML␂tool_calls>`、`<proto💥tool_calls>`、`<DSM|tool_calls>...〈/DSM|tool_calls〉` 都会归一化;相似但非固定标签名(如 `tool_calls_extra`)仍按普通文本处理。
|
||||
- 如果模型在固定工具标签名后多输出一个尾部管道符,例如 `<|DSML|tool_calls|` / `<|DSML|invoke|` / `<|DSML|parameter|`,或在带属性标签的结束符前多输出一个尾部管道符(如 `<DSM|parameter name="command"|>`),兼容层会把这个尾部 `|` / `|` 当作异常标签终止符并补齐或归一化;如果后面已经有 `>` / `〉`,也会消费这个多余分隔符后再归一化。
|
||||
- 这是一个针对常见模型失误的窄修复,不改变推荐输出格式;prompt 仍要求模型直接输出完整 DSML 外壳。
|
||||
- 裸 `<invoke ...>` / `<parameter ...>` 不会被当成“已支持的工具语法”;只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 才会进入工具调用路径。
|
||||
|
||||
@@ -54,7 +54,7 @@
|
||||
|
||||
在流式链路中(Go / Node 一致):
|
||||
|
||||
- DSML `<|DSML|tool_calls>` wrapper、短横线形式(如 `<dsml-tool-calls>` / `<dsml-invoke>` / `<dsml-parameter>`)、基于固定本地标签名的 DSML 噪声容错形态、尾部管道符形态(如 `<|DSML|tool_calls|`)和 canonical `<tool_calls>` wrapper 都会进入结构化捕获
|
||||
- DSML `<|DSML|tool_calls>` wrapper、短横线形式(如 `<dsml-tool-calls>` / `<dsml-invoke>` / `<dsml-parameter>`)、基于固定本地标签名的 DSML 噪声容错形态、尾部管道符形态(如 `<|DSML|tool_calls|`)和 canonical `<tool_calls>` wrapper 都会进入结构化捕获
|
||||
- 如果流里直接从 invoke 开始,但后面补上了 closing wrapper,Go 流式筛分也会按缺失 opening wrapper 的修复路径尝试恢复
|
||||
- 已识别成功的工具调用不会再次回流到普通文本
|
||||
- 不符合新格式的块不会执行,并继续按原样文本透传
|
||||
@@ -78,11 +78,16 @@
|
||||
- `rejectedByPolicy`:当前固定为 `false`
|
||||
- `rejectedToolNames`:当前固定为空数组
|
||||
|
||||
解析层不会因为参数值为空而丢弃工具调用。若模型输出了显式空字符串或纯空白参数,它们会按空字符串进入结构化 `tool_calls`;是否拒绝缺参或空命令应由后续工具执行侧 / 客户端 schema 校验决定。Prompt 层仍会要求模型不要主动输出空参数。
|
||||
|
||||
完整的 DSML / XML wrapper 只有在成功解析出有效 `invoke name`,并且参数节点(如存在)符合 `parameter` 语义后,才会变成结构化工具调用;真正的零参数工具调用仍然有效。如果 wrapper 完整但内部不是可执行工具调用形态(例如使用 `<param>`、缺少有效 `invoke name`、或其他 malformed XML 工具壳),流式 sieve 会把原始 wrapper 作为普通文本释放,不会吞掉内容,也不会生成空的工具调用。
|
||||
|
||||
## 5) 落地建议
|
||||
|
||||
1. Prompt 里只示范 DSML 外壳语法。
|
||||
2. 上游客户端应直接输出完整 DSML 外壳;DS2API 兼容旧式 canonical XML,并只对“closing tag 在、opening tag 漏掉”的常见失误做窄修复,不会泛化接受其他旧格式。
|
||||
3. 不要依赖 parser 做安全控制;执行器侧仍应做工具名和参数校验。
|
||||
3. 模型只有在知道本次调用所需参数值时才应输出工具调用;不要输出 placeholder、空字符串或纯空白参数。对 `Bash` / `execute_command`,实际命令必须在 `command` 参数里。
|
||||
4. 不要依赖 parser 做安全控制;执行器侧仍应做工具名和参数校验。
|
||||
|
||||
## 6) 回归验证
|
||||
|
||||
@@ -90,17 +95,18 @@
|
||||
|
||||
```bash
|
||||
go test -v -run 'TestParseToolCalls|TestProcessToolSieve' ./internal/toolcall ./internal/toolstream ./internal/httpapi/openai/...
|
||||
node --test tests/node/stream-tool-sieve.test.js
|
||||
./tests/scripts/run-unit-node.sh
|
||||
```
|
||||
|
||||
重点覆盖:
|
||||
|
||||
- DSML `<|DSML|tool_calls>` wrapper 正常解析
|
||||
- DSML `<|DSML|tool_calls>` wrapper 正常解析
|
||||
- legacy canonical `<tool_calls>` wrapper 正常解析
|
||||
- 固定本地标签名的 DSML 噪声容错形态(如 `<DSML|tool_calls>`、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、`<DSMLtool_calls>`、`<<DSML|DSML|tool_calls>`)正常解析
|
||||
- 固定本地标签名的 DSML 噪声容错形态(如 `<DSML|tool_calls>`、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、`<DSMLtool_calls>`、`<<DSML|DSML|tool_calls>`、`<DSM|tool_calls>...〈/DSM|tool_calls〉`)正常解析
|
||||
- 混搭标签(DSML wrapper + canonical inner)归一化后正常解析
|
||||
- 波浪线围栏 `~~~` 内的示例不执行
|
||||
- 嵌套围栏(4 反引号嵌套 3 反引号)内的示例不执行
|
||||
- 文本 mention 标签名后紧跟真正工具调用的场景(含同一 wrapper 变体)
|
||||
- 空参数结构化保留,malformed executable-looking XML wrapper 作为文本释放
|
||||
- 非兼容内容按普通文本透传
|
||||
- 代码块示例不执行
|
||||
|
||||
@@ -218,7 +218,7 @@ func UpstreamEmptyOutputDetail(contentFilter bool, text, thinking string) (int,
|
||||
if strings.TrimSpace(thinking) != "" {
|
||||
return http.StatusTooManyRequests, "Upstream account hit a rate limit and returned reasoning without visible output.", "upstream_empty_output"
|
||||
}
|
||||
return http.StatusTooManyRequests, "Upstream account hit a rate limit and returned empty output.", "upstream_empty_output"
|
||||
return http.StatusServiceUnavailable, "Upstream service is unavailable and returned no output.", "upstream_unavailable"
|
||||
}
|
||||
|
||||
// ShouldRetryEmptyOutput returns true when the turn produced no visible text
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package assistantturn
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"testing"
|
||||
|
||||
"ds2api/internal/promptcompat"
|
||||
@@ -11,7 +12,7 @@ func TestBuildTurnFromCollectedTextCitation(t *testing.T) {
|
||||
turn := BuildTurnFromCollected(sse.CollectResult{
|
||||
Text: "See [citation:1]",
|
||||
CitationLinks: map[int]string{1: "https://example.com"},
|
||||
}, BuildOptions{Model: "deepseek-v4-flash", Prompt: "prompt", SearchEnabled: true, StripReferenceMarkers: true})
|
||||
}, BuildOptions{Model: "deepseek-v4-flash", Prompt: "prompt", SearchEnabled: true})
|
||||
if turn.Text != "See [1](https://example.com)" {
|
||||
t.Fatalf("text mismatch: %q", turn.Text)
|
||||
}
|
||||
@@ -23,6 +24,20 @@ func TestBuildTurnFromCollectedTextCitation(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildTurnFromCollectedKeepsNonStreamReferenceLinks(t *testing.T) {
|
||||
turn := BuildTurnFromCollected(sse.CollectResult{
|
||||
Text: "结论[reference:0],补充[reference:1]。",
|
||||
CitationLinks: map[int]string{
|
||||
1: "https://example.com/a",
|
||||
2: "https://example.com/b",
|
||||
},
|
||||
}, BuildOptions{Model: "deepseek-v4-flash-search", Prompt: "prompt", SearchEnabled: true})
|
||||
want := "结论[0](https://example.com/a),补充[1](https://example.com/b)。"
|
||||
if turn.Text != want {
|
||||
t.Fatalf("text mismatch: got %q want %q", turn.Text, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildTurnFromCollectedToolCall(t *testing.T) {
|
||||
turn := BuildTurnFromCollected(sse.CollectResult{
|
||||
Text: `<tool_calls><invoke name="Write"><parameter name="content">{"x":1}</parameter></invoke></tool_calls>`,
|
||||
@@ -56,6 +71,13 @@ func TestBuildTurnFromCollectedThinkingOnlyIsEmptyOutput(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildTurnFromCollectedPureEmptyOutputIsUpstreamUnavailable(t *testing.T) {
|
||||
turn := BuildTurnFromCollected(sse.CollectResult{}, BuildOptions{})
|
||||
if turn.Error == nil || turn.Error.Status != http.StatusServiceUnavailable || turn.Error.Code != "upstream_unavailable" {
|
||||
t.Fatalf("expected upstream unavailable error, got %#v", turn.Error)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildTurnFromCollectedToolChoiceRequired(t *testing.T) {
|
||||
turn := BuildTurnFromCollected(sse.CollectResult{Text: "hello"}, BuildOptions{
|
||||
ToolChoice: promptcompat.ToolChoicePolicy{Mode: promptcompat.ToolChoiceRequired},
|
||||
|
||||
@@ -241,6 +241,36 @@ func TestSwitchAccountSkipsLoginFailureAndContinues(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSwitchAccountRespectsPinnedTargetAccount(t *testing.T) {
|
||||
t.Setenv("DS2API_CONFIG_JSON", `{
|
||||
"keys":["managed-key"],
|
||||
"accounts":[
|
||||
{"email":"acc1@test.com","token":"t1"},
|
||||
{"email":"acc2@test.com","token":"t2"}
|
||||
]
|
||||
}`)
|
||||
store := config.LoadStore()
|
||||
pool := account.NewPool(store)
|
||||
r := NewResolver(store, pool, func(_ context.Context, _ config.Account) (string, error) {
|
||||
return "new-token", nil
|
||||
})
|
||||
|
||||
req, _ := http.NewRequest("POST", "/", nil)
|
||||
req.Header.Set("Authorization", "Bearer managed-key")
|
||||
req.Header.Set("X-Ds2-Target-Account", "acc1@test.com")
|
||||
a, err := r.Determine(req)
|
||||
if err != nil {
|
||||
t.Fatalf("determine failed: %v", err)
|
||||
}
|
||||
defer r.Release(a)
|
||||
if r.SwitchAccount(context.Background(), a) {
|
||||
t.Fatal("expected switch to be disabled for pinned target account")
|
||||
}
|
||||
if a.AccountID != "acc1@test.com" {
|
||||
t.Fatalf("expected pinned account to remain selected, got %q", a.AccountID)
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Release edge cases ─────────────────────────────────────────────
|
||||
|
||||
func TestReleaseNilAuth(t *testing.T) {
|
||||
|
||||
@@ -28,6 +28,7 @@ type RequestAuth struct {
|
||||
DeepSeekToken string
|
||||
CallerID string
|
||||
AccountID string
|
||||
TargetAccount string
|
||||
Account config.Account
|
||||
TriedAccounts map[string]bool
|
||||
resolver *Resolver
|
||||
@@ -99,6 +100,7 @@ func (r *Resolver) acquireManagedRequestAuth(ctx context.Context, callerID, targ
|
||||
UseConfigToken: true,
|
||||
CallerID: callerID,
|
||||
AccountID: acc.Identifier(),
|
||||
TargetAccount: target,
|
||||
Account: acc,
|
||||
TriedAccounts: tried,
|
||||
resolver: r,
|
||||
@@ -185,6 +187,9 @@ func (r *Resolver) SwitchAccount(ctx context.Context, a *RequestAuth) bool {
|
||||
if !a.UseConfigToken {
|
||||
return false
|
||||
}
|
||||
if strings.TrimSpace(a.TargetAccount) != "" {
|
||||
return false
|
||||
}
|
||||
if a.TriedAccounts == nil {
|
||||
a.TriedAccounts = map[string]bool{}
|
||||
}
|
||||
@@ -208,6 +213,13 @@ func (r *Resolver) SwitchAccount(ctx context.Context, a *RequestAuth) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func (a *RequestAuth) SwitchAccount(ctx context.Context) bool {
|
||||
if a == nil || a.resolver == nil {
|
||||
return false
|
||||
}
|
||||
return a.resolver.SwitchAccount(ctx, a)
|
||||
}
|
||||
|
||||
func (r *Resolver) Release(a *RequestAuth) {
|
||||
if a == nil || !a.UseConfigToken || a.AccountID == "" {
|
||||
return
|
||||
|
||||
@@ -43,6 +43,7 @@ type Entry struct {
|
||||
Status string `json:"status"`
|
||||
CallerID string `json:"caller_id,omitempty"`
|
||||
AccountID string `json:"account_id,omitempty"`
|
||||
Surface string `json:"surface,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
Stream bool `json:"stream"`
|
||||
UserInput string `json:"user_input,omitempty"`
|
||||
@@ -72,6 +73,7 @@ type SummaryEntry struct {
|
||||
Status string `json:"status"`
|
||||
CallerID string `json:"caller_id,omitempty"`
|
||||
AccountID string `json:"account_id,omitempty"`
|
||||
Surface string `json:"surface,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
Stream bool `json:"stream"`
|
||||
UserInput string `json:"user_input,omitempty"`
|
||||
@@ -92,6 +94,7 @@ type File struct {
|
||||
type StartParams struct {
|
||||
CallerID string
|
||||
AccountID string
|
||||
Surface string
|
||||
Model string
|
||||
Stream bool
|
||||
UserInput string
|
||||
@@ -271,6 +274,7 @@ func (s *Store) Start(params StartParams) (Entry, error) {
|
||||
Status: "streaming",
|
||||
CallerID: strings.TrimSpace(params.CallerID),
|
||||
AccountID: strings.TrimSpace(params.AccountID),
|
||||
Surface: strings.TrimSpace(params.Surface),
|
||||
Model: strings.TrimSpace(params.Model),
|
||||
Stream: params.Stream,
|
||||
UserInput: strings.TrimSpace(params.UserInput),
|
||||
@@ -546,10 +550,13 @@ func (s *Store) rebuildIndexLocked() {
|
||||
summaries = append(summaries, summaryFromEntry(item))
|
||||
}
|
||||
sort.Slice(summaries, func(i, j int) bool {
|
||||
if summaries[i].UpdatedAt == summaries[j].UpdatedAt {
|
||||
return summaries[i].CreatedAt > summaries[j].CreatedAt
|
||||
if summaries[i].CreatedAt == summaries[j].CreatedAt {
|
||||
if summaries[i].Revision == summaries[j].Revision {
|
||||
return summaries[i].UpdatedAt > summaries[j].UpdatedAt
|
||||
}
|
||||
return summaries[i].Revision > summaries[j].Revision
|
||||
}
|
||||
return summaries[i].UpdatedAt > summaries[j].UpdatedAt
|
||||
return summaries[i].CreatedAt > summaries[j].CreatedAt
|
||||
})
|
||||
if s.state.Limit < DisabledLimit || !isAllowedLimit(s.state.Limit) {
|
||||
s.state.Limit = DefaultLimit
|
||||
@@ -593,6 +600,7 @@ func summaryFromEntry(item Entry) SummaryEntry {
|
||||
Status: item.Status,
|
||||
CallerID: item.CallerID,
|
||||
AccountID: item.AccountID,
|
||||
Surface: item.Surface,
|
||||
Model: item.Model,
|
||||
Stream: item.Stream,
|
||||
UserInput: item.UserInput,
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
@@ -494,6 +495,36 @@ func TestStoreWritesOnlyChangedDetailFiles(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestStoreOrdersByCreationTimeNotStreamingUpdates(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "chat_history.json")
|
||||
store := New(path)
|
||||
|
||||
first, err := store.Start(StartParams{UserInput: "first"})
|
||||
if err != nil {
|
||||
t.Fatalf("start first failed: %v", err)
|
||||
}
|
||||
time.Sleep(time.Millisecond)
|
||||
second, err := store.Start(StartParams{UserInput: "second"})
|
||||
if err != nil {
|
||||
t.Fatalf("start second failed: %v", err)
|
||||
}
|
||||
time.Sleep(time.Millisecond)
|
||||
if _, err := store.Update(first.ID, UpdateParams{Status: "streaming", Content: "still running"}); err != nil {
|
||||
t.Fatalf("update first failed: %v", err)
|
||||
}
|
||||
|
||||
snapshot, err := store.Snapshot()
|
||||
if err != nil {
|
||||
t.Fatalf("snapshot failed: %v", err)
|
||||
}
|
||||
if len(snapshot.Items) != 2 {
|
||||
t.Fatalf("expected two items, got %#v", snapshot.Items)
|
||||
}
|
||||
if snapshot.Items[0].ID != second.ID || snapshot.Items[1].ID != first.ID {
|
||||
t.Fatalf("expected creation-time order to stay stable, got %#v", snapshot.Items)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdatePreservesContentWhenNewContentIsEmpty(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "chat_history.json")
|
||||
store := New(path)
|
||||
|
||||
@@ -90,7 +90,11 @@ func ExecuteNonStreamWithRetry(ctx context.Context, ds DeepSeekCaller, a *auth.R
|
||||
if startErr != nil {
|
||||
return NonStreamResult{SessionID: start.SessionID, Payload: start.Payload}, startErr
|
||||
}
|
||||
stdReq = start.Request
|
||||
return ExecuteNonStreamStartedWithRetry(ctx, ds, a, start, opts)
|
||||
}
|
||||
|
||||
func ExecuteNonStreamStartedWithRetry(ctx context.Context, ds DeepSeekCaller, a *auth.RequestAuth, start StartResult, opts Options) (NonStreamResult, *assistantturn.OutputError) {
|
||||
stdReq := start.Request
|
||||
maxAttempts := opts.MaxAttempts
|
||||
if maxAttempts <= 0 {
|
||||
maxAttempts = 3
|
||||
@@ -100,6 +104,7 @@ func ExecuteNonStreamWithRetry(ctx context.Context, ds DeepSeekCaller, a *auth.R
|
||||
pow := start.Pow
|
||||
|
||||
attempts := 0
|
||||
accountSwitchAttempted := false
|
||||
currentResp := start.Response
|
||||
usagePrompt := stdReq.PromptTokenText
|
||||
accumulatedThinking := ""
|
||||
@@ -108,6 +113,24 @@ func ExecuteNonStreamWithRetry(ctx context.Context, ds DeepSeekCaller, a *auth.R
|
||||
for {
|
||||
turn, outErr := collectAttempt(currentResp, stdReq, usagePrompt, opts)
|
||||
if outErr != nil {
|
||||
if canRetryOnAlternateAccount(ctx, a, outErr, opts.RetryEnabled, &accountSwitchAttempted) {
|
||||
switched, switchErr := startStandardCompletionOnAlternateAccount(ctx, ds, a, stdReq, maxAttempts)
|
||||
if switchErr != nil {
|
||||
return NonStreamResult{SessionID: sessionID, Payload: payload, Attempts: attempts}, switchErr
|
||||
}
|
||||
if switched.Response != nil {
|
||||
config.Logger.Info("[completion_runtime_account_switch_retry] retrying after 429", "surface", stdReq.Surface, "stream", false, "account", a.AccountID)
|
||||
sessionID = switched.SessionID
|
||||
payload = switched.Payload
|
||||
pow = switched.Pow
|
||||
currentResp = switched.Response
|
||||
usagePrompt = stdReq.PromptTokenText
|
||||
accumulatedThinking = ""
|
||||
accumulatedRawThinking = ""
|
||||
accumulatedToolDetectionThinking = ""
|
||||
continue
|
||||
}
|
||||
}
|
||||
return NonStreamResult{SessionID: sessionID, Payload: payload, Attempts: attempts}, outErr
|
||||
}
|
||||
accumulatedThinking += sse.TrimContinuationOverlap(accumulatedThinking, turn.Thinking)
|
||||
@@ -130,6 +153,24 @@ func ExecuteNonStreamWithRetry(ctx context.Context, ds DeepSeekCaller, a *auth.R
|
||||
retryMax = shared.EmptyOutputRetryMaxAttempts()
|
||||
}
|
||||
if !opts.RetryEnabled || !assistantturn.ShouldRetryEmptyOutput(turn, attempts, retryMax) {
|
||||
if canRetryOnAlternateAccount(ctx, a, turn.Error, opts.RetryEnabled, &accountSwitchAttempted) {
|
||||
switched, switchErr := startStandardCompletionOnAlternateAccount(ctx, ds, a, stdReq, maxAttempts)
|
||||
if switchErr != nil {
|
||||
return NonStreamResult{SessionID: sessionID, Payload: payload, Turn: turn, Attempts: attempts}, switchErr
|
||||
}
|
||||
if switched.Response != nil {
|
||||
config.Logger.Info("[completion_runtime_account_switch_retry] retrying after 429", "surface", stdReq.Surface, "stream", false, "account", a.AccountID)
|
||||
sessionID = switched.SessionID
|
||||
payload = switched.Payload
|
||||
pow = switched.Pow
|
||||
currentResp = switched.Response
|
||||
usagePrompt = stdReq.PromptTokenText
|
||||
accumulatedThinking = ""
|
||||
accumulatedRawThinking = ""
|
||||
accumulatedToolDetectionThinking = ""
|
||||
continue
|
||||
}
|
||||
}
|
||||
return NonStreamResult{SessionID: sessionID, Payload: payload, Turn: turn, Attempts: attempts}, turn.Error
|
||||
}
|
||||
|
||||
@@ -150,6 +191,37 @@ func ExecuteNonStreamWithRetry(ctx context.Context, ds DeepSeekCaller, a *auth.R
|
||||
}
|
||||
}
|
||||
|
||||
func canRetryOnAlternateAccount(ctx context.Context, a *auth.RequestAuth, outErr *assistantturn.OutputError, retryEnabled bool, attempted *bool) bool {
|
||||
if outErr == nil || outErr.Status != http.StatusTooManyRequests {
|
||||
return false
|
||||
}
|
||||
if !retryEnabled || attempted == nil || *attempted {
|
||||
return false
|
||||
}
|
||||
if a == nil || !a.UseConfigToken {
|
||||
return false
|
||||
}
|
||||
*attempted = true
|
||||
return a.SwitchAccount(ctx)
|
||||
}
|
||||
|
||||
func startStandardCompletionOnAlternateAccount(ctx context.Context, ds DeepSeekCaller, a *auth.RequestAuth, stdReq promptcompat.StandardRequest, maxAttempts int) (StartResult, *assistantturn.OutputError) {
|
||||
sessionID, err := ds.CreateSession(ctx, a, maxAttempts)
|
||||
if err != nil {
|
||||
return StartResult{}, authOutputError(a)
|
||||
}
|
||||
pow, err := ds.GetPow(ctx, a, maxAttempts)
|
||||
if err != nil {
|
||||
return StartResult{SessionID: sessionID}, &assistantturn.OutputError{Status: http.StatusUnauthorized, Message: "Failed to get PoW (invalid token or unknown error).", Code: "error"}
|
||||
}
|
||||
payload := stdReq.CompletionPayload(sessionID)
|
||||
resp, err := ds.CallCompletion(ctx, a, payload, pow, maxAttempts)
|
||||
if err != nil {
|
||||
return StartResult{SessionID: sessionID, Payload: payload, Pow: pow}, &assistantturn.OutputError{Status: http.StatusInternalServerError, Message: "Failed to get completion.", Code: "error"}
|
||||
}
|
||||
return StartResult{SessionID: sessionID, Payload: payload, Pow: pow, Response: resp, Request: stdReq}, nil
|
||||
}
|
||||
|
||||
func collectAttempt(resp *http.Response, stdReq promptcompat.StandardRequest, usagePrompt string, opts Options) (assistantturn.Turn, *assistantturn.OutputError) {
|
||||
defer func() {
|
||||
if err := resp.Body.Close(); err != nil {
|
||||
|
||||
@@ -7,15 +7,19 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"ds2api/internal/account"
|
||||
"ds2api/internal/auth"
|
||||
"ds2api/internal/config"
|
||||
dsclient "ds2api/internal/deepseek/client"
|
||||
"ds2api/internal/promptcompat"
|
||||
)
|
||||
|
||||
type fakeDeepSeekCaller struct {
|
||||
responses []*http.Response
|
||||
payloads []map[string]any
|
||||
uploads []dsclient.UploadFileRequest
|
||||
responses []*http.Response
|
||||
payloads []map[string]any
|
||||
uploads []dsclient.UploadFileRequest
|
||||
completionAccounts []string
|
||||
sessionByAccount bool
|
||||
}
|
||||
|
||||
type currentInputRuntimeConfig struct{}
|
||||
@@ -23,7 +27,10 @@ type currentInputRuntimeConfig struct{}
|
||||
func (currentInputRuntimeConfig) CurrentInputFileEnabled() bool { return true }
|
||||
func (currentInputRuntimeConfig) CurrentInputFileMinChars() int { return 0 }
|
||||
|
||||
func (f *fakeDeepSeekCaller) CreateSession(context.Context, *auth.RequestAuth, int) (string, error) {
|
||||
func (f *fakeDeepSeekCaller) CreateSession(_ context.Context, a *auth.RequestAuth, _ int) (string, error) {
|
||||
if f.sessionByAccount && a != nil && a.AccountID != "" {
|
||||
return "session-" + a.AccountID, nil
|
||||
}
|
||||
return "session-1", nil
|
||||
}
|
||||
|
||||
@@ -36,8 +43,11 @@ func (f *fakeDeepSeekCaller) UploadFile(_ context.Context, _ *auth.RequestAuth,
|
||||
return &dsclient.UploadFileResult{ID: "file-runtime-1"}, nil
|
||||
}
|
||||
|
||||
func (f *fakeDeepSeekCaller) CallCompletion(_ context.Context, _ *auth.RequestAuth, payload map[string]any, _ string, _ int) (*http.Response, error) {
|
||||
func (f *fakeDeepSeekCaller) CallCompletion(_ context.Context, a *auth.RequestAuth, payload map[string]any, _ string, _ int) (*http.Response, error) {
|
||||
f.payloads = append(f.payloads, payload)
|
||||
if a != nil {
|
||||
f.completionAccounts = append(f.completionAccounts, a.AccountID)
|
||||
}
|
||||
if len(f.responses) == 0 {
|
||||
return sseHTTPResponse(http.StatusOK, `data: {"p":"response/content","v":"fallback"}`), nil
|
||||
}
|
||||
@@ -89,9 +99,72 @@ func TestExecuteNonStreamWithRetryBuildsCanonicalTurn(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestExecuteNonStreamWithRetrySwitchesManagedAccountBeforeFinal429(t *testing.T) {
|
||||
t.Setenv("DS2API_CONFIG_JSON", `{
|
||||
"keys":["managed-key"],
|
||||
"accounts":[
|
||||
{"email":"acc1@test.com","password":"pwd"},
|
||||
{"email":"acc2@test.com","password":"pwd"}
|
||||
]
|
||||
}`)
|
||||
store := config.LoadStore()
|
||||
resolver := auth.NewResolver(store, account.NewPool(store), func(_ context.Context, acc config.Account) (string, error) {
|
||||
return "token-" + acc.Identifier(), nil
|
||||
})
|
||||
req, _ := http.NewRequest(http.MethodPost, "/", nil)
|
||||
req.Header.Set("Authorization", "Bearer managed-key")
|
||||
a, err := resolver.Determine(req)
|
||||
if err != nil {
|
||||
t.Fatalf("determine failed: %v", err)
|
||||
}
|
||||
defer resolver.Release(a)
|
||||
|
||||
ds := &fakeDeepSeekCaller{
|
||||
sessionByAccount: true,
|
||||
responses: []*http.Response{
|
||||
sseHTTPResponse(http.StatusOK, `data: {"response_message_id":11,"p":"response/thinking_content","v":"first empty"}`),
|
||||
sseHTTPResponse(http.StatusOK, `data: {"response_message_id":12,"p":"response/thinking_content","v":"retry empty"}`),
|
||||
sseHTTPResponse(http.StatusOK, `data: {"response_message_id":21,"p":"response/content","v":"ok from second account"}`),
|
||||
},
|
||||
}
|
||||
stdReq := promptcompat.StandardRequest{
|
||||
Surface: "test",
|
||||
ResponseModel: "deepseek-v4-flash",
|
||||
PromptTokenText: "prompt",
|
||||
FinalPrompt: "final prompt",
|
||||
Thinking: true,
|
||||
}
|
||||
|
||||
result, outErr := ExecuteNonStreamWithRetry(context.Background(), ds, a, stdReq, Options{RetryEnabled: true})
|
||||
if outErr != nil {
|
||||
t.Fatalf("unexpected output error after account switch retry: %#v", outErr)
|
||||
}
|
||||
if result.Turn.Text != "ok from second account" {
|
||||
t.Fatalf("text mismatch after switch retry: %q", result.Turn.Text)
|
||||
}
|
||||
if result.SessionID != "session-acc2@test.com" {
|
||||
t.Fatalf("expected switched account session, got %q", result.SessionID)
|
||||
}
|
||||
wantAccounts := []string{"acc1@test.com", "acc1@test.com", "acc2@test.com"}
|
||||
if len(ds.completionAccounts) != len(wantAccounts) {
|
||||
t.Fatalf("completion account count mismatch: got %v want %v", ds.completionAccounts, wantAccounts)
|
||||
}
|
||||
for i, want := range wantAccounts {
|
||||
if ds.completionAccounts[i] != want {
|
||||
t.Fatalf("completion account %d = %q want %q (all=%v)", i, ds.completionAccounts[i], want, ds.completionAccounts)
|
||||
}
|
||||
}
|
||||
if got := ds.payloads[2]["chat_session_id"]; got != "session-acc2@test.com" {
|
||||
t.Fatalf("switched payload session mismatch: %#v", got)
|
||||
}
|
||||
if prompt, _ := ds.payloads[2]["prompt"].(string); strings.Contains(prompt, "Previous reply had no visible output") {
|
||||
t.Fatalf("expected fresh switched-account prompt without empty-output suffix, got %q", prompt)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExecuteNonStreamWithRetryUsesParentMessageForEmptyRetry(t *testing.T) {
|
||||
ds := &fakeDeepSeekCaller{responses: []*http.Response{
|
||||
sseHTTPResponse(http.StatusOK, `data: {"response_message_id":77,"p":"response/status","v":"FINISHED"}`),
|
||||
sseHTTPResponse(http.StatusOK, `data: {"response_message_id":77,"p":"response/thinking_content","v":"plan"}`),
|
||||
sseHTTPResponse(http.StatusOK, `data: {"response_message_id":78,"p":"response/content","v":"ok"}`),
|
||||
}}
|
||||
stdReq := promptcompat.StandardRequest{
|
||||
@@ -119,6 +192,29 @@ func TestExecuteNonStreamWithRetryUsesParentMessageForEmptyRetry(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestExecuteNonStreamWithRetryConvertsReferenceMarkers(t *testing.T) {
|
||||
ds := &fakeDeepSeekCaller{responses: []*http.Response{sseHTTPResponse(
|
||||
http.StatusOK,
|
||||
`data: {"p":"response/content","v":"答案[reference:0]。","citation":{"cite_index":0,"url":"https://example.com/ref"}}`,
|
||||
)}}
|
||||
stdReq := promptcompat.StandardRequest{
|
||||
Surface: "test",
|
||||
ResponseModel: "deepseek-v4-flash-search",
|
||||
PromptTokenText: "prompt",
|
||||
FinalPrompt: "final prompt",
|
||||
Search: true,
|
||||
}
|
||||
|
||||
result, outErr := ExecuteNonStreamWithRetry(context.Background(), ds, &auth.RequestAuth{}, stdReq, Options{})
|
||||
if outErr != nil {
|
||||
t.Fatalf("unexpected output error: %#v", outErr)
|
||||
}
|
||||
want := "答案[0](https://example.com/ref)。"
|
||||
if result.Turn.Text != want {
|
||||
t.Fatalf("text mismatch: got %q want %q", result.Turn.Text, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStartCompletionAppliesCurrentInputFileGlobally(t *testing.T) {
|
||||
ds := &fakeDeepSeekCaller{responses: []*http.Response{sseHTTPResponse(http.StatusOK, `data: {"p":"response/content","v":"ok"}`)}}
|
||||
stdReq := promptcompat.StandardRequest{
|
||||
|
||||
179
internal/completionruntime/stream_retry.go
Normal file
179
internal/completionruntime/stream_retry.go
Normal file
@@ -0,0 +1,179 @@
|
||||
package completionruntime
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"ds2api/internal/assistantturn"
|
||||
"ds2api/internal/auth"
|
||||
"ds2api/internal/config"
|
||||
"ds2api/internal/httpapi/openai/shared"
|
||||
)
|
||||
|
||||
type StreamRetryOptions struct {
|
||||
Surface string
|
||||
Stream bool
|
||||
RetryEnabled bool
|
||||
RetryMaxAttempts int
|
||||
MaxAttempts int
|
||||
UsagePrompt string
|
||||
}
|
||||
|
||||
type StreamRetryHooks struct {
|
||||
ConsumeAttempt func(resp *http.Response, allowDeferEmpty bool) (terminalWritten bool, retryable bool)
|
||||
Finalize func(attempts int)
|
||||
ParentMessageID func() int
|
||||
OnRetry func(attempts int)
|
||||
OnRetryPrompt func(prompt string)
|
||||
OnRetryFailure func(status int, message, code string)
|
||||
OnAccountSwitch func(sessionID string)
|
||||
OnTerminal func(attempts int)
|
||||
}
|
||||
|
||||
func ExecuteStreamWithRetry(ctx context.Context, ds DeepSeekCaller, a *auth.RequestAuth, initialResp *http.Response, payload map[string]any, pow string, opts StreamRetryOptions, hooks StreamRetryHooks) {
|
||||
if hooks.ConsumeAttempt == nil {
|
||||
return
|
||||
}
|
||||
surface := strings.TrimSpace(opts.Surface)
|
||||
if surface == "" {
|
||||
surface = "completion"
|
||||
}
|
||||
maxAttempts := opts.MaxAttempts
|
||||
if maxAttempts <= 0 {
|
||||
maxAttempts = 3
|
||||
}
|
||||
retryMax := opts.RetryMaxAttempts
|
||||
if retryMax <= 0 {
|
||||
retryMax = shared.EmptyOutputRetryMaxAttempts()
|
||||
}
|
||||
|
||||
attempts := 0
|
||||
accountSwitchAttempted := false
|
||||
currentResp := initialResp
|
||||
currentPayload := clonePayload(payload)
|
||||
for {
|
||||
allowAccountSwitch := opts.RetryEnabled && attempts >= retryMax && !accountSwitchAttempted && a != nil && a.UseConfigToken
|
||||
terminalWritten, retryable := hooks.ConsumeAttempt(currentResp, opts.RetryEnabled && (attempts < retryMax || allowAccountSwitch))
|
||||
if terminalWritten {
|
||||
if hooks.OnTerminal != nil {
|
||||
hooks.OnTerminal(attempts)
|
||||
}
|
||||
return
|
||||
}
|
||||
if !retryable || !opts.RetryEnabled {
|
||||
if hooks.Finalize != nil {
|
||||
hooks.Finalize(attempts)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if attempts >= retryMax {
|
||||
if canRetryOnAlternateAccount(ctx, a, &assistantturn.OutputError{Status: http.StatusTooManyRequests}, opts.RetryEnabled, &accountSwitchAttempted) {
|
||||
switched, switchErr := startPayloadCompletionOnAlternateAccount(ctx, ds, a, payload, maxAttempts)
|
||||
if switchErr != nil {
|
||||
if hooks.OnRetryFailure != nil {
|
||||
hooks.OnRetryFailure(switchErr.Status, switchErr.Message, switchErr.Code)
|
||||
}
|
||||
return
|
||||
}
|
||||
if switched.Response != nil {
|
||||
config.Logger.Info("[completion_runtime_account_switch_retry] retrying after 429", "surface", surface, "stream", opts.Stream, "account", a.AccountID)
|
||||
currentResp = switched.Response
|
||||
currentPayload = switched.Payload
|
||||
pow = switched.Pow
|
||||
if hooks.OnAccountSwitch != nil {
|
||||
hooks.OnAccountSwitch(switched.SessionID)
|
||||
}
|
||||
if hooks.OnRetryPrompt != nil {
|
||||
hooks.OnRetryPrompt(opts.UsagePrompt)
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
if hooks.Finalize != nil {
|
||||
hooks.Finalize(attempts)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
attempts++
|
||||
parentMessageID := 0
|
||||
if hooks.ParentMessageID != nil {
|
||||
parentMessageID = hooks.ParentMessageID()
|
||||
}
|
||||
config.Logger.Info("[completion_runtime_empty_retry] attempting synthetic retry", "surface", surface, "stream", opts.Stream, "retry_attempt", attempts, "parent_message_id", parentMessageID)
|
||||
retryPow, powErr := ds.GetPow(ctx, a, maxAttempts)
|
||||
if powErr != nil {
|
||||
config.Logger.Warn("[completion_runtime_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", surface, "stream", opts.Stream, "retry_attempt", attempts, "error", powErr)
|
||||
retryPow = pow
|
||||
}
|
||||
nextResp, err := ds.CallCompletion(ctx, a, shared.ClonePayloadForEmptyOutputRetry(currentPayload, parentMessageID), retryPow, maxAttempts)
|
||||
if err != nil {
|
||||
if hooks.OnRetryFailure != nil {
|
||||
hooks.OnRetryFailure(http.StatusInternalServerError, "Failed to get completion.", "error")
|
||||
}
|
||||
config.Logger.Warn("[completion_runtime_empty_retry] retry request failed", "surface", surface, "stream", opts.Stream, "retry_attempt", attempts, "error", err)
|
||||
return
|
||||
}
|
||||
if nextResp.StatusCode != http.StatusOK {
|
||||
body, readErr := io.ReadAll(nextResp.Body)
|
||||
if readErr != nil {
|
||||
config.Logger.Warn("[completion_runtime_empty_retry] retry error body read failed", "surface", surface, "stream", opts.Stream, "retry_attempt", attempts, "error", readErr)
|
||||
}
|
||||
closeRetryBody(surface, nextResp.Body)
|
||||
msg := strings.TrimSpace(string(body))
|
||||
if msg == "" {
|
||||
msg = http.StatusText(nextResp.StatusCode)
|
||||
}
|
||||
if hooks.OnRetryFailure != nil {
|
||||
hooks.OnRetryFailure(nextResp.StatusCode, msg, "error")
|
||||
}
|
||||
return
|
||||
}
|
||||
if hooks.OnRetry != nil {
|
||||
hooks.OnRetry(attempts)
|
||||
}
|
||||
if hooks.OnRetryPrompt != nil {
|
||||
hooks.OnRetryPrompt(shared.UsagePromptWithEmptyOutputRetry(opts.UsagePrompt, attempts))
|
||||
}
|
||||
currentResp = nextResp
|
||||
}
|
||||
}
|
||||
|
||||
func startPayloadCompletionOnAlternateAccount(ctx context.Context, ds DeepSeekCaller, a *auth.RequestAuth, payload map[string]any, maxAttempts int) (StartResult, *assistantturn.OutputError) {
|
||||
sessionID, err := ds.CreateSession(ctx, a, maxAttempts)
|
||||
if err != nil {
|
||||
return StartResult{}, authOutputError(a)
|
||||
}
|
||||
pow, err := ds.GetPow(ctx, a, maxAttempts)
|
||||
if err != nil {
|
||||
return StartResult{SessionID: sessionID}, &assistantturn.OutputError{Status: http.StatusUnauthorized, Message: "Failed to get PoW (invalid token or unknown error).", Code: "error"}
|
||||
}
|
||||
nextPayload := clonePayload(payload)
|
||||
nextPayload["chat_session_id"] = sessionID
|
||||
delete(nextPayload, "parent_message_id")
|
||||
resp, err := ds.CallCompletion(ctx, a, nextPayload, pow, maxAttempts)
|
||||
if err != nil {
|
||||
return StartResult{SessionID: sessionID, Payload: nextPayload, Pow: pow}, &assistantturn.OutputError{Status: http.StatusInternalServerError, Message: "Failed to get completion.", Code: "error"}
|
||||
}
|
||||
return StartResult{SessionID: sessionID, Payload: nextPayload, Pow: pow, Response: resp}, nil
|
||||
}
|
||||
|
||||
func clonePayload(payload map[string]any) map[string]any {
|
||||
clone := make(map[string]any, len(payload))
|
||||
for k, v := range payload {
|
||||
clone[k] = v
|
||||
}
|
||||
return clone
|
||||
}
|
||||
|
||||
func closeRetryBody(surface string, body io.Closer) {
|
||||
if body == nil {
|
||||
return
|
||||
}
|
||||
if err := body.Close(); err != nil {
|
||||
config.Logger.Warn("[completion_runtime_empty_retry] retry response body close failed", "surface", surface, "error", err)
|
||||
}
|
||||
}
|
||||
150
internal/completionruntime/stream_retry_test.go
Normal file
150
internal/completionruntime/stream_retry_test.go
Normal file
@@ -0,0 +1,150 @@
|
||||
package completionruntime
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"ds2api/internal/account"
|
||||
"ds2api/internal/auth"
|
||||
"ds2api/internal/config"
|
||||
"ds2api/internal/httpapi/openai/shared"
|
||||
)
|
||||
|
||||
func TestExecuteStreamWithRetryUsesSharedRetryPayloadAndUsagePrompt(t *testing.T) {
|
||||
ds := &fakeDeepSeekCaller{responses: []*http.Response{
|
||||
sseHTTPResponse(http.StatusOK, `data: {"p":"response/content","v":"ok"}`),
|
||||
}}
|
||||
initial := sseHTTPResponse(http.StatusOK, `data: {"response_message_id":77,"p":"response/thinking_content","v":"plan"}`)
|
||||
payload := map[string]any{"prompt": "original prompt"}
|
||||
attemptsSeen := 0
|
||||
retryPrompt := ""
|
||||
|
||||
ExecuteStreamWithRetry(context.Background(), ds, &auth.RequestAuth{}, initial, payload, "pow", StreamRetryOptions{
|
||||
Surface: "test.stream",
|
||||
Stream: true,
|
||||
RetryEnabled: true,
|
||||
UsagePrompt: "original prompt",
|
||||
}, StreamRetryHooks{
|
||||
ConsumeAttempt: func(resp *http.Response, allowDeferEmpty bool) (bool, bool) {
|
||||
defer func() {
|
||||
if err := resp.Body.Close(); err != nil {
|
||||
t.Fatalf("close failed: %v", err)
|
||||
}
|
||||
}()
|
||||
_, _ = io.ReadAll(resp.Body)
|
||||
attemptsSeen++
|
||||
return attemptsSeen == 2, attemptsSeen == 1 && allowDeferEmpty
|
||||
},
|
||||
ParentMessageID: func() int {
|
||||
return 77
|
||||
},
|
||||
OnRetryPrompt: func(prompt string) {
|
||||
retryPrompt = prompt
|
||||
},
|
||||
})
|
||||
|
||||
if attemptsSeen != 2 {
|
||||
t.Fatalf("expected two stream attempts, got %d", attemptsSeen)
|
||||
}
|
||||
if len(ds.payloads) != 1 {
|
||||
t.Fatalf("expected one retry completion call, got %d", len(ds.payloads))
|
||||
}
|
||||
if got := ds.payloads[0]["parent_message_id"]; got != 77 {
|
||||
t.Fatalf("retry parent_message_id mismatch: %#v", got)
|
||||
}
|
||||
if prompt, _ := ds.payloads[0]["prompt"].(string); !strings.Contains(prompt, shared.EmptyOutputRetrySuffix) {
|
||||
t.Fatalf("expected retry suffix in payload prompt, got %q", prompt)
|
||||
}
|
||||
if !strings.Contains(retryPrompt, shared.EmptyOutputRetrySuffix) {
|
||||
t.Fatalf("expected retry suffix in usage prompt, got %q", retryPrompt)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExecuteStreamWithRetrySwitchesManagedAccountBeforeFinal429(t *testing.T) {
|
||||
t.Setenv("DS2API_CONFIG_JSON", `{
|
||||
"keys":["managed-key"],
|
||||
"accounts":[
|
||||
{"email":"acc1@test.com","password":"pwd"},
|
||||
{"email":"acc2@test.com","password":"pwd"}
|
||||
]
|
||||
}`)
|
||||
store := config.LoadStore()
|
||||
resolver := auth.NewResolver(store, account.NewPool(store), func(_ context.Context, acc config.Account) (string, error) {
|
||||
return "token-" + acc.Identifier(), nil
|
||||
})
|
||||
req, _ := http.NewRequest(http.MethodPost, "/", nil)
|
||||
req.Header.Set("Authorization", "Bearer managed-key")
|
||||
a, err := resolver.Determine(req)
|
||||
if err != nil {
|
||||
t.Fatalf("determine failed: %v", err)
|
||||
}
|
||||
defer resolver.Release(a)
|
||||
|
||||
ds := &fakeDeepSeekCaller{
|
||||
sessionByAccount: true,
|
||||
responses: []*http.Response{
|
||||
sseHTTPResponse(http.StatusOK, `data: {"response_message_id":12,"p":"response/thinking_content","v":"retry empty"}`),
|
||||
sseHTTPResponse(http.StatusOK, `data: {"response_message_id":21,"p":"response/content","v":"ok from second account"}`),
|
||||
},
|
||||
}
|
||||
initial := sseHTTPResponse(http.StatusOK, `data: {"response_message_id":11,"p":"response/thinking_content","v":"first empty"}`)
|
||||
payload := map[string]any{"prompt": "original prompt", "chat_session_id": "session-acc1@test.com"}
|
||||
attemptsSeen := 0
|
||||
switchedSession := ""
|
||||
|
||||
ExecuteStreamWithRetry(context.Background(), ds, a, initial, payload, "pow", StreamRetryOptions{
|
||||
Surface: "test.stream",
|
||||
Stream: true,
|
||||
RetryEnabled: true,
|
||||
RetryMaxAttempts: 1,
|
||||
UsagePrompt: "original prompt",
|
||||
}, StreamRetryHooks{
|
||||
ConsumeAttempt: func(resp *http.Response, allowDeferEmpty bool) (bool, bool) {
|
||||
defer func() {
|
||||
if err := resp.Body.Close(); err != nil {
|
||||
t.Fatalf("close failed: %v", err)
|
||||
}
|
||||
}()
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
attemptsSeen++
|
||||
if strings.Contains(string(body), "ok from second account") {
|
||||
return true, false
|
||||
}
|
||||
if !allowDeferEmpty {
|
||||
t.Fatalf("expected empty attempt %d to be deferred before final 429", attemptsSeen)
|
||||
}
|
||||
return false, true
|
||||
},
|
||||
ParentMessageID: func() int {
|
||||
return 11 + attemptsSeen
|
||||
},
|
||||
OnAccountSwitch: func(sessionID string) {
|
||||
switchedSession = sessionID
|
||||
},
|
||||
})
|
||||
|
||||
if attemptsSeen != 3 {
|
||||
t.Fatalf("expected three stream attempts, got %d", attemptsSeen)
|
||||
}
|
||||
if switchedSession != "session-acc2@test.com" {
|
||||
t.Fatalf("expected switched session id, got %q", switchedSession)
|
||||
}
|
||||
wantAccounts := []string{"acc1@test.com", "acc2@test.com"}
|
||||
if len(ds.completionAccounts) != len(wantAccounts) {
|
||||
t.Fatalf("completion accounts mismatch: got %v want %v", ds.completionAccounts, wantAccounts)
|
||||
}
|
||||
for i, want := range wantAccounts {
|
||||
if ds.completionAccounts[i] != want {
|
||||
t.Fatalf("completion account %d = %q want %q (all=%v)", i, ds.completionAccounts[i], want, ds.completionAccounts)
|
||||
}
|
||||
}
|
||||
if got := ds.payloads[1]["chat_session_id"]; got != "session-acc2@test.com" {
|
||||
t.Fatalf("switched payload session mismatch: %#v", got)
|
||||
}
|
||||
if prompt, _ := ds.payloads[1]["prompt"].(string); strings.Contains(prompt, shared.EmptyOutputRetrySuffix) {
|
||||
t.Fatalf("expected switched-account prompt without empty-output suffix, got %q", prompt)
|
||||
}
|
||||
}
|
||||
@@ -48,6 +48,9 @@ func (c Config) MarshalJSON() ([]byte, error) {
|
||||
if c.ThinkingInjection.Enabled != nil || strings.TrimSpace(c.ThinkingInjection.Prompt) != "" {
|
||||
m["thinking_injection"] = c.ThinkingInjection
|
||||
}
|
||||
if strings.TrimSpace(c.Vercel.Token) != "" || strings.TrimSpace(c.Vercel.ProjectID) != "" || strings.TrimSpace(c.Vercel.TeamID) != "" {
|
||||
m["vercel"] = NormalizeVercelConfig(c.Vercel)
|
||||
}
|
||||
if c.VercelSyncHash != "" {
|
||||
m["_vercel_sync_hash"] = c.VercelSyncHash
|
||||
}
|
||||
@@ -125,6 +128,10 @@ func (c *Config) UnmarshalJSON(b []byte) error {
|
||||
if err := json.Unmarshal(v, &c.ThinkingInjection); err != nil {
|
||||
return fmt.Errorf("invalid field %q: %w", k, err)
|
||||
}
|
||||
case "vercel":
|
||||
if err := json.Unmarshal(v, &c.Vercel); err != nil {
|
||||
return fmt.Errorf("invalid field %q: %w", k, err)
|
||||
}
|
||||
case "_vercel_sync_hash":
|
||||
if err := json.Unmarshal(v, &c.VercelSyncHash); err != nil {
|
||||
return fmt.Errorf("invalid field %q: %w", k, err)
|
||||
@@ -164,6 +171,7 @@ func (c Config) Clone() Config {
|
||||
Enabled: cloneBoolPtr(c.ThinkingInjection.Enabled),
|
||||
Prompt: c.ThinkingInjection.Prompt,
|
||||
},
|
||||
Vercel: c.Vercel,
|
||||
VercelSyncHash: c.VercelSyncHash,
|
||||
VercelSyncTime: c.VercelSyncTime,
|
||||
AdditionalFields: map[string]any{},
|
||||
|
||||
@@ -20,6 +20,7 @@ type Config struct {
|
||||
AutoDelete AutoDeleteConfig `json:"auto_delete"`
|
||||
CurrentInputFile CurrentInputFileConfig `json:"current_input_file,omitempty"`
|
||||
ThinkingInjection ThinkingInjectionConfig `json:"thinking_injection,omitempty"`
|
||||
Vercel VercelConfig `json:"vercel,omitempty"`
|
||||
VercelSyncHash string `json:"_vercel_sync_hash,omitempty"`
|
||||
VercelSyncTime int64 `json:"_vercel_sync_time,omitempty"`
|
||||
AdditionalFields map[string]any `json:"-"`
|
||||
@@ -99,6 +100,7 @@ func (c *Config) NormalizeCredentials() {
|
||||
c.Accounts[i].Remark = strings.TrimSpace(c.Accounts[i].Remark)
|
||||
}
|
||||
|
||||
c.Vercel = NormalizeVercelConfig(c.Vercel)
|
||||
c.normalizeModelAliases()
|
||||
}
|
||||
|
||||
@@ -175,3 +177,24 @@ type ThinkingInjectionConfig struct {
|
||||
Enabled *bool `json:"enabled,omitempty"`
|
||||
Prompt string `json:"prompt,omitempty"`
|
||||
}
|
||||
|
||||
type VercelConfig struct {
|
||||
Token string `json:"token,omitempty"`
|
||||
ProjectID string `json:"project_id,omitempty"`
|
||||
TeamID string `json:"team_id,omitempty"`
|
||||
}
|
||||
|
||||
func NormalizeVercelConfig(v VercelConfig) VercelConfig {
|
||||
return VercelConfig{
|
||||
Token: strings.TrimSpace(v.Token),
|
||||
ProjectID: strings.TrimSpace(v.ProjectID),
|
||||
TeamID: strings.TrimSpace(v.TeamID),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Config) ClearVercelCredentials() {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
c.Vercel = VercelConfig{}
|
||||
}
|
||||
|
||||
@@ -173,6 +173,11 @@ func TestConfigJSONRoundtrip(t *testing.T) {
|
||||
Runtime: RuntimeConfig{
|
||||
TokenRefreshIntervalHours: 12,
|
||||
},
|
||||
Vercel: VercelConfig{
|
||||
Token: " vercel-token ",
|
||||
ProjectID: " prj_123 ",
|
||||
TeamID: " team_123 ",
|
||||
},
|
||||
VercelSyncHash: "hash123",
|
||||
VercelSyncTime: 1234567890,
|
||||
AdditionalFields: map[string]any{
|
||||
@@ -205,6 +210,9 @@ func TestConfigJSONRoundtrip(t *testing.T) {
|
||||
if decoded.AutoDelete.Mode != "single" {
|
||||
t.Fatalf("unexpected auto delete mode: %#v", decoded.AutoDelete.Mode)
|
||||
}
|
||||
if decoded.Vercel.Token != "vercel-token" || decoded.Vercel.ProjectID != "prj_123" || decoded.Vercel.TeamID != "team_123" {
|
||||
t.Fatalf("unexpected vercel config: %#v", decoded.Vercel)
|
||||
}
|
||||
if decoded.VercelSyncHash != "hash123" {
|
||||
t.Fatalf("unexpected vercel sync hash: %q", decoded.VercelSyncHash)
|
||||
}
|
||||
|
||||
@@ -75,20 +75,6 @@ func TestResolveExpandedHistoricalAliases(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveModelHeuristicReasoner(t *testing.T) {
|
||||
got, ok := ResolveModel(nil, "o3-super")
|
||||
if !ok || got != "deepseek-v4-pro" {
|
||||
t.Fatalf("expected heuristic reasoner, got ok=%v model=%q", ok, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveModelHeuristicReasonerNoThinking(t *testing.T) {
|
||||
got, ok := ResolveModel(nil, "o3-super-nothinking")
|
||||
if !ok || got != "deepseek-v4-pro-nothinking" {
|
||||
t.Fatalf("expected heuristic reasoner nothinking, got ok=%v model=%q", ok, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveModelUnknown(t *testing.T) {
|
||||
_, ok := ResolveModel(nil, "totally-custom-model")
|
||||
if ok {
|
||||
@@ -96,6 +82,13 @@ func TestResolveModelUnknown(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveModelUnknownKnownFamilyName(t *testing.T) {
|
||||
_, ok := ResolveModel(nil, "gpt-5.5-pro-search")
|
||||
if ok {
|
||||
t.Fatal("expected unknown known-family model to fail resolve without alias")
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveModelRejectsLegacyDeepSeekIDs(t *testing.T) {
|
||||
legacyModels := []string{
|
||||
"deepseek-chat",
|
||||
@@ -151,13 +144,6 @@ func TestResolveModelCustomAliasToVision(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveModelHeuristicVisionIgnoresSearchSuffix(t *testing.T) {
|
||||
got, ok := ResolveModel(nil, "gemini-vision-search")
|
||||
if !ok || got != "deepseek-v4-vision" {
|
||||
t.Fatalf("expected heuristic vision alias to resolve without search variant, got ok=%v model=%q", ok, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClaudeModelsResponsePaginationFields(t *testing.T) {
|
||||
resp := ClaudeModelsResponse()
|
||||
if _, ok := resp["first_id"]; !ok {
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
package config
|
||||
|
||||
import "strings"
|
||||
import (
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type ModelInfo struct {
|
||||
ID string `json:"id"`
|
||||
@@ -9,6 +12,16 @@ type ModelInfo struct {
|
||||
OwnedBy string `json:"owned_by"`
|
||||
Permission []any `json:"permission,omitempty"`
|
||||
}
|
||||
type OllamaModelInfo struct {
|
||||
Name string `json:"name"`
|
||||
Model string `json:"model"`
|
||||
Size int64 `json:"size"`
|
||||
ModifiedAt string `json:"modified_at"`
|
||||
}
|
||||
type OllamaCapabilitiesModelInfo struct {
|
||||
ID string `json:"id"`
|
||||
Capabilities []string `json:"capabilities"`
|
||||
}
|
||||
|
||||
type ModelAliasReader interface {
|
||||
ModelAliases() map[string]string
|
||||
@@ -24,8 +37,21 @@ var deepSeekBaseModels = []ModelInfo{
|
||||
{ID: "deepseek-v4-vision", Object: "model", Created: 1677610602, OwnedBy: "deepseek", Permission: []any{}},
|
||||
}
|
||||
|
||||
var DeepSeekModels = appendNoThinkingVariants(deepSeekBaseModels)
|
||||
var OllamaCapabilitiesModels = []OllamaCapabilitiesModelInfo{
|
||||
{ID: "deepseek-v4-flash", Capabilities: []string{"tools", "thinking"}},
|
||||
{ID: "deepseek-v4-pro", Capabilities: []string{"tools", "thinking"}},
|
||||
{ID: "deepseek-v4-flash-search", Capabilities: []string{"tools", "thinking"}},
|
||||
{ID: "deepseek-v4-pro-search", Capabilities: []string{"tools", "thinking"}},
|
||||
{ID: "deepseek-v4-vision", Capabilities: []string{"tools", "thinking", "vision"}},
|
||||
{ID: "deepseek-v4-flash-nothinking", Capabilities: []string{"tools"}},
|
||||
{ID: "deepseek-v4-pro-nothinking", Capabilities: []string{"tools"}},
|
||||
{ID: "deepseek-v4-flash-search-nothinking", Capabilities: []string{"tools"}},
|
||||
{ID: "deepseek-v4-pro-search-nothinking", Capabilities: []string{"tools"}},
|
||||
{ID: "deepseek-v4-vision-nothinking", Capabilities: []string{"tools", "vision"}},
|
||||
}
|
||||
|
||||
var DeepSeekModels = appendNoThinkingVariants(deepSeekBaseModels)
|
||||
var OllamaModels = mapToOllamaModels(DeepSeekModels)
|
||||
var claudeBaseModels = []ModelInfo{
|
||||
// Current aliases
|
||||
{ID: "claude-opus-4-6", Object: "model", Created: 1715635200, OwnedBy: "anthropic"},
|
||||
@@ -214,26 +240,10 @@ func ResolveModel(store ModelAliasReader, requested string) (string, bool) {
|
||||
return mapped, true
|
||||
}
|
||||
baseModel, noThinking := splitNoThinkingModel(model)
|
||||
resolvedModel, ok := resolveCanonicalModel(aliases, baseModel)
|
||||
if !ok {
|
||||
return "", false
|
||||
}
|
||||
return withNoThinkingVariant(resolvedModel, noThinking), true
|
||||
}
|
||||
|
||||
func isRetiredHistoricalModel(model string) bool {
|
||||
switch {
|
||||
case strings.HasPrefix(model, "claude-1."):
|
||||
return true
|
||||
case strings.HasPrefix(model, "claude-2."):
|
||||
return true
|
||||
case strings.HasPrefix(model, "claude-instant-"):
|
||||
return true
|
||||
case strings.HasPrefix(model, "gpt-3.5"):
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
if mapped, ok := aliases[baseModel]; ok && IsSupportedDeepSeekModel(mapped) {
|
||||
return withNoThinkingVariant(mapped, noThinking), true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func lower(s string) string {
|
||||
@@ -263,6 +273,23 @@ func OpenAIModelByID(store ModelAliasReader, id string) (ModelInfo, bool) {
|
||||
return ModelInfo{}, false
|
||||
}
|
||||
|
||||
func OllamaModelsResponse() map[string]any {
|
||||
return map[string]any{"models": OllamaModels}
|
||||
}
|
||||
|
||||
func OllamaModelByID(store ModelAliasReader, id string) (OllamaCapabilitiesModelInfo, bool) {
|
||||
canonical, ok := ResolveModel(store, id)
|
||||
if !ok {
|
||||
return OllamaCapabilitiesModelInfo{}, false
|
||||
}
|
||||
for _, model := range OllamaCapabilitiesModels {
|
||||
if model.ID == canonical {
|
||||
return model, true
|
||||
}
|
||||
}
|
||||
return OllamaCapabilitiesModelInfo{}, false
|
||||
}
|
||||
|
||||
func ClaudeModelsResponse() map[string]any {
|
||||
resp := map[string]any{"object": "list", "data": ClaudeModels}
|
||||
if len(ClaudeModels) > 0 {
|
||||
@@ -286,6 +313,23 @@ func appendNoThinkingVariants(models []ModelInfo) []ModelInfo {
|
||||
}
|
||||
return out
|
||||
}
|
||||
func mapToOllamaModels(models []ModelInfo) []OllamaModelInfo {
|
||||
out := make([]OllamaModelInfo, 0, len(models))
|
||||
for _, model := range models {
|
||||
var modifiedAt string
|
||||
if model.Created > 0 {
|
||||
modifiedAt = time.Unix(model.Created, 0).Format(time.RFC3339)
|
||||
}
|
||||
ollamaModel := OllamaModelInfo{
|
||||
Name: model.ID,
|
||||
Model: model.ID,
|
||||
Size: 0,
|
||||
ModifiedAt: modifiedAt,
|
||||
}
|
||||
out = append(out, ollamaModel)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func splitNoThinkingModel(model string) (string, bool) {
|
||||
model = lower(strings.TrimSpace(model))
|
||||
@@ -315,58 +359,3 @@ func loadModelAliases(store ModelAliasReader) map[string]string {
|
||||
}
|
||||
return aliases
|
||||
}
|
||||
|
||||
func resolveCanonicalModel(aliases map[string]string, model string) (string, bool) {
|
||||
model = lower(strings.TrimSpace(model))
|
||||
if model == "" {
|
||||
return "", false
|
||||
}
|
||||
if isRetiredHistoricalModel(model) {
|
||||
return "", false
|
||||
}
|
||||
if IsSupportedDeepSeekModel(model) {
|
||||
return model, true
|
||||
}
|
||||
if mapped, ok := aliases[model]; ok && IsSupportedDeepSeekModel(mapped) {
|
||||
return mapped, true
|
||||
}
|
||||
if strings.HasPrefix(model, "deepseek-") {
|
||||
return "", false
|
||||
}
|
||||
|
||||
knownFamily := false
|
||||
for _, prefix := range []string{
|
||||
"gpt-", "o1", "o3", "claude-", "gemini-", "llama-", "qwen-", "mistral-", "command-",
|
||||
} {
|
||||
if strings.HasPrefix(model, prefix) {
|
||||
knownFamily = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !knownFamily {
|
||||
return "", false
|
||||
}
|
||||
|
||||
useVision := strings.Contains(model, "vision")
|
||||
useReasoner := strings.Contains(model, "reason") ||
|
||||
strings.Contains(model, "reasoner") ||
|
||||
strings.HasPrefix(model, "o1") ||
|
||||
strings.HasPrefix(model, "o3") ||
|
||||
strings.Contains(model, "opus") ||
|
||||
strings.Contains(model, "slow") ||
|
||||
strings.Contains(model, "r1")
|
||||
useSearch := strings.Contains(model, "search")
|
||||
|
||||
switch {
|
||||
case useVision:
|
||||
return "deepseek-v4-vision", true
|
||||
case useReasoner && useSearch:
|
||||
return "deepseek-v4-pro-search", true
|
||||
case useReasoner:
|
||||
return "deepseek-v4-pro", true
|
||||
case useSearch:
|
||||
return "deepseek-v4-flash-search", true
|
||||
default:
|
||||
return "deepseek-v4-flash", true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,6 +58,11 @@ func RawStreamSampleRoot() string {
|
||||
}
|
||||
|
||||
func ChatHistoryPath() string {
|
||||
// On Vercel, /var/task is read-only at runtime. If no explicit path is set,
|
||||
// default to /tmp/chat_history.json (the only writable directory).
|
||||
if IsVercel() && strings.TrimSpace(os.Getenv("DS2API_CHAT_HISTORY_PATH")) == "" {
|
||||
return "/tmp/chat_history.json"
|
||||
}
|
||||
return ResolvePath("DS2API_CHAT_HISTORY_PATH", "data/chat_history.json")
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,18 @@ func BuildResponseObjectWithToolCalls(responseID, model, finalPrompt, finalThink
|
||||
output := make([]any, 0, 2)
|
||||
if len(detected) > 0 {
|
||||
exposedOutputText = ""
|
||||
if strings.TrimSpace(finalThinking) != "" {
|
||||
output = append(output, map[string]any{
|
||||
"type": "message",
|
||||
"id": "msg_" + strings.ReplaceAll(uuid.NewString(), "-", ""),
|
||||
"role": "assistant",
|
||||
"status": "completed",
|
||||
"content": []any{map[string]any{
|
||||
"type": "reasoning",
|
||||
"text": finalThinking,
|
||||
}},
|
||||
})
|
||||
}
|
||||
output = append(output, toResponsesFunctionCallItems(detected, toolsRaw)...)
|
||||
} else {
|
||||
content := make([]any, 0, 2)
|
||||
|
||||
@@ -85,12 +85,24 @@ func TestBuildResponseObjectPromotesToolCallFromThinkingWhenTextEmpty(t *testing
|
||||
)
|
||||
|
||||
output, _ := obj["output"].([]any)
|
||||
if len(output) != 1 {
|
||||
t.Fatalf("expected one output item, got %#v", obj["output"])
|
||||
if len(output) != 2 {
|
||||
t.Fatalf("expected reasoning message plus function_call output, got %#v", obj["output"])
|
||||
}
|
||||
first, _ := output[0].(map[string]any)
|
||||
if first["type"] != "function_call" {
|
||||
t.Fatalf("expected function_call output, got %#v", first["type"])
|
||||
if first["type"] != "message" {
|
||||
t.Fatalf("expected reasoning message output first, got %#v", first["type"])
|
||||
}
|
||||
content, _ := first["content"].([]any)
|
||||
if len(content) != 1 {
|
||||
t.Fatalf("expected reasoning content, got %#v", first["content"])
|
||||
}
|
||||
block0, _ := content[0].(map[string]any)
|
||||
if block0["type"] != "reasoning" {
|
||||
t.Fatalf("expected reasoning block, got %#v", block0["type"])
|
||||
}
|
||||
second, _ := output[1].(map[string]any)
|
||||
if second["type"] != "function_call" {
|
||||
t.Fatalf("expected function_call output, got %#v", second["type"])
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -15,5 +15,6 @@ type Handler struct {
|
||||
|
||||
var writeJSON = adminshared.WriteJSON
|
||||
var intFrom = adminshared.IntFrom
|
||||
var maskSecretPreview = adminshared.MaskSecretPreview
|
||||
|
||||
func nilIfEmpty(s string) any { return adminshared.NilIfEmpty(s) }
|
||||
|
||||
@@ -61,9 +61,34 @@ func (h *Handler) verify(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
func (h *Handler) getVercelConfig(w http.ResponseWriter, _ *http.Request) {
|
||||
saved := h.Store.Snapshot().Vercel
|
||||
token, tokenSource := firstConfiguredValue(
|
||||
[2]string{"env", os.Getenv("VERCEL_TOKEN")},
|
||||
[2]string{"config", saved.Token},
|
||||
)
|
||||
projectID, _ := firstConfiguredValue(
|
||||
[2]string{"env", os.Getenv("VERCEL_PROJECT_ID")},
|
||||
[2]string{"config", saved.ProjectID},
|
||||
)
|
||||
teamID, _ := firstConfiguredValue(
|
||||
[2]string{"env", os.Getenv("VERCEL_TEAM_ID")},
|
||||
[2]string{"config", saved.TeamID},
|
||||
)
|
||||
writeJSON(w, http.StatusOK, map[string]any{
|
||||
"has_token": strings.TrimSpace(os.Getenv("VERCEL_TOKEN")) != "",
|
||||
"project_id": strings.TrimSpace(os.Getenv("VERCEL_PROJECT_ID")),
|
||||
"team_id": nilIfEmpty(strings.TrimSpace(os.Getenv("VERCEL_TEAM_ID"))),
|
||||
"has_token": token != "",
|
||||
"token_preview": maskSecretPreview(token),
|
||||
"token_source": nilIfEmpty(tokenSource),
|
||||
"project_id": projectID,
|
||||
"team_id": nilIfEmpty(teamID),
|
||||
})
|
||||
}
|
||||
|
||||
func firstConfiguredValue(values ...[2]string) (string, string) {
|
||||
for _, pair := range values {
|
||||
value := strings.TrimSpace(pair[1])
|
||||
if value != "" {
|
||||
return value, strings.TrimSpace(pair[0])
|
||||
}
|
||||
}
|
||||
return "", ""
|
||||
}
|
||||
|
||||
38
internal/httpapi/admin/auth/handler_auth_test.go
Normal file
38
internal/httpapi/admin/auth/handler_auth_test.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package auth
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"ds2api/internal/config"
|
||||
)
|
||||
|
||||
func TestGetVercelConfigFallsBackToSavedConfig(t *testing.T) {
|
||||
t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"vercel":{"token":"saved-token","project_id":"saved-project","team_id":"saved-team"}}`)
|
||||
t.Setenv("VERCEL_TOKEN", "")
|
||||
t.Setenv("VERCEL_PROJECT_ID", "")
|
||||
t.Setenv("VERCEL_TEAM_ID", "")
|
||||
h := &Handler{Store: config.LoadStore()}
|
||||
|
||||
rec := httptest.NewRecorder()
|
||||
h.getVercelConfig(rec, httptest.NewRequest(http.MethodGet, "/admin/vercel/config", nil))
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", rec.Code, rec.Body.String())
|
||||
}
|
||||
var payload map[string]any
|
||||
if err := json.Unmarshal(rec.Body.Bytes(), &payload); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if payload["has_token"] != true {
|
||||
t.Fatalf("expected saved token to be detected: %#v", payload)
|
||||
}
|
||||
if payload["token_source"] != "config" || payload["project_id"] != "saved-project" || payload["team_id"] != "saved-team" {
|
||||
t.Fatalf("unexpected preconfig payload: %#v", payload)
|
||||
}
|
||||
if payload["token_preview"] == "saved-token" {
|
||||
t.Fatal("token preview leaked the full token")
|
||||
}
|
||||
}
|
||||
@@ -94,6 +94,10 @@ func (h *Handler) configImport(w http.ResponseWriter, r *http.Request) {
|
||||
if strings.TrimSpace(incoming.Embeddings.Provider) != "" {
|
||||
next.Embeddings.Provider = incoming.Embeddings.Provider
|
||||
}
|
||||
incomingVercel := config.NormalizeVercelConfig(incoming.Vercel)
|
||||
if strings.TrimSpace(incomingVercel.Token) != "" || strings.TrimSpace(incomingVercel.ProjectID) != "" || strings.TrimSpace(incomingVercel.TeamID) != "" {
|
||||
next.Vercel = incomingVercel
|
||||
}
|
||||
if strings.TrimSpace(incoming.Admin.PasswordHash) != "" {
|
||||
next.Admin.PasswordHash = incoming.Admin.PasswordHash
|
||||
}
|
||||
|
||||
@@ -19,6 +19,12 @@ func (h *Handler) getConfig(w http.ResponseWriter, _ *http.Request) {
|
||||
"env_writeback_enabled": h.Store.IsEnvWritebackEnabled(),
|
||||
"config_path": h.Store.ConfigPath(),
|
||||
"model_aliases": snap.ModelAliases,
|
||||
"vercel": map[string]any{
|
||||
"has_token": strings.TrimSpace(snap.Vercel.Token) != "",
|
||||
"token_preview": maskSecretPreview(snap.Vercel.Token),
|
||||
"project_id": snap.Vercel.ProjectID,
|
||||
"team_id": snap.Vercel.TeamID,
|
||||
},
|
||||
}
|
||||
accounts := make([]map[string]any, 0, len(snap.Accounts))
|
||||
for _, acc := range snap.Accounts {
|
||||
|
||||
@@ -78,6 +78,7 @@ func ComputeSyncHash(store ConfigStore) string {
|
||||
}
|
||||
snap := store.Snapshot().Clone()
|
||||
snap.ClearAccountTokens()
|
||||
snap.ClearVercelCredentials()
|
||||
snap.VercelSyncHash = ""
|
||||
snap.VercelSyncTime = 0
|
||||
b, _ := json.Marshal(snap)
|
||||
@@ -93,6 +94,7 @@ func SyncHashForJSON(s string) string {
|
||||
cfg.VercelSyncHash = ""
|
||||
cfg.VercelSyncTime = 0
|
||||
cfg.ClearAccountTokens()
|
||||
cfg.ClearVercelCredentials()
|
||||
b, err := json.Marshal(cfg)
|
||||
if err != nil {
|
||||
return ""
|
||||
|
||||
@@ -23,7 +23,7 @@ func (h *Handler) syncVercel(w http.ResponseWriter, r *http.Request) {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]any{"detail": "invalid json"})
|
||||
return
|
||||
}
|
||||
opts, err := parseVercelSyncOptions(req)
|
||||
opts, err := parseVercelSyncOptions(req, h.Store.Snapshot().Vercel)
|
||||
if err != nil {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]any{"detail": err.Error()})
|
||||
return
|
||||
@@ -50,6 +50,12 @@ func (h *Handler) syncVercel(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
savedCreds := h.saveVercelProjectCredentials(r.Context(), client, opts, params, headers, envs)
|
||||
credentialsWarning := ""
|
||||
if saved, err := h.saveLocalVercelCredentials(opts); err == nil && saved {
|
||||
savedCreds = append(savedCreds, "config.vercel")
|
||||
} else if err != nil {
|
||||
credentialsWarning = "保存 Vercel 凭据到本地配置失败: " + err.Error()
|
||||
}
|
||||
manual, deployURL := triggerVercelDeployment(r.Context(), client, opts.ProjectID, params, headers)
|
||||
_ = h.Store.SetVercelSync(syncHashForJSON(cfgJSON), time.Now().Unix())
|
||||
result := map[string]any{"success": true, "validated_accounts": validated}
|
||||
@@ -66,6 +72,9 @@ func (h *Handler) syncVercel(w http.ResponseWriter, r *http.Request) {
|
||||
if len(savedCreds) > 0 {
|
||||
result["saved_credentials"] = savedCreds
|
||||
}
|
||||
if credentialsWarning != "" {
|
||||
result["credentials_warning"] = credentialsWarning
|
||||
}
|
||||
writeJSON(w, http.StatusOK, result)
|
||||
}
|
||||
|
||||
@@ -78,7 +87,7 @@ type vercelSyncOptions struct {
|
||||
UsePreconfig bool
|
||||
}
|
||||
|
||||
func parseVercelSyncOptions(req map[string]any) (vercelSyncOptions, error) {
|
||||
func parseVercelSyncOptions(req map[string]any, saved config.VercelConfig) (vercelSyncOptions, error) {
|
||||
vercelToken, _ := req["vercel_token"].(string)
|
||||
projectID, _ := req["project_id"].(string)
|
||||
teamID, _ := req["team_id"].(string)
|
||||
@@ -92,13 +101,13 @@ func parseVercelSyncOptions(req map[string]any) (vercelSyncOptions, error) {
|
||||
}
|
||||
usePreconfig := vercelToken == "__USE_PRECONFIG__" || strings.TrimSpace(vercelToken) == ""
|
||||
if usePreconfig {
|
||||
vercelToken = strings.TrimSpace(os.Getenv("VERCEL_TOKEN"))
|
||||
vercelToken = firstNonEmpty(os.Getenv("VERCEL_TOKEN"), saved.Token)
|
||||
}
|
||||
if strings.TrimSpace(projectID) == "" {
|
||||
projectID = strings.TrimSpace(os.Getenv("VERCEL_PROJECT_ID"))
|
||||
projectID = firstNonEmpty(os.Getenv("VERCEL_PROJECT_ID"), saved.ProjectID)
|
||||
}
|
||||
if strings.TrimSpace(teamID) == "" {
|
||||
teamID = strings.TrimSpace(os.Getenv("VERCEL_TEAM_ID"))
|
||||
teamID = firstNonEmpty(os.Getenv("VERCEL_TEAM_ID"), saved.TeamID)
|
||||
}
|
||||
vercelToken = strings.TrimSpace(vercelToken)
|
||||
projectID = strings.TrimSpace(projectID)
|
||||
@@ -116,6 +125,15 @@ func parseVercelSyncOptions(req map[string]any) (vercelSyncOptions, error) {
|
||||
}, nil
|
||||
}
|
||||
|
||||
func firstNonEmpty(values ...string) string {
|
||||
for _, value := range values {
|
||||
if trimmed := strings.TrimSpace(value); trimmed != "" {
|
||||
return trimmed
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func buildVercelParams(teamID string) url.Values {
|
||||
params := url.Values{}
|
||||
if strings.TrimSpace(teamID) != "" {
|
||||
@@ -178,6 +196,25 @@ func (h *Handler) saveVercelProjectCredentials(ctx context.Context, client *http
|
||||
return saved
|
||||
}
|
||||
|
||||
func (h *Handler) saveLocalVercelCredentials(opts vercelSyncOptions) (bool, error) {
|
||||
if !opts.SaveCreds {
|
||||
return false, nil
|
||||
}
|
||||
err := h.Store.Update(func(c *config.Config) error {
|
||||
token := opts.VercelToken
|
||||
if opts.UsePreconfig {
|
||||
token = c.Vercel.Token
|
||||
}
|
||||
c.Vercel = config.NormalizeVercelConfig(config.VercelConfig{
|
||||
Token: token,
|
||||
ProjectID: opts.ProjectID,
|
||||
TeamID: opts.TeamID,
|
||||
})
|
||||
return nil
|
||||
})
|
||||
return err == nil, err
|
||||
}
|
||||
|
||||
func triggerVercelDeployment(ctx context.Context, client *http.Client, projectID string, params url.Values, headers map[string]string) (bool, string) {
|
||||
projectResp, status, _ := vercelRequest(ctx, client, http.MethodGet, "https://api.vercel.com/v9/projects/"+projectID, params, headers, nil)
|
||||
if status != http.StatusOK {
|
||||
@@ -243,7 +280,7 @@ func (h *Handler) vercelStatus(w http.ResponseWriter, r *http.Request) {
|
||||
func (h *Handler) exportSyncConfig(req map[string]any) (string, string, error) {
|
||||
override, ok := req["config_override"]
|
||||
if !ok || override == nil {
|
||||
return h.Store.ExportJSONAndBase64()
|
||||
return encodeVercelSyncConfig(h.Store.Snapshot())
|
||||
}
|
||||
raw, err := json.Marshal(override)
|
||||
if err != nil {
|
||||
@@ -253,8 +290,13 @@ func (h *Handler) exportSyncConfig(req map[string]any) (string, string, error) {
|
||||
if err := json.Unmarshal(raw, &cfg); err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
return encodeVercelSyncConfig(cfg)
|
||||
}
|
||||
|
||||
func encodeVercelSyncConfig(cfg config.Config) (string, string, error) {
|
||||
cfg.DropInvalidAccounts()
|
||||
cfg.ClearAccountTokens()
|
||||
cfg.ClearVercelCredentials()
|
||||
cfg.VercelSyncHash = ""
|
||||
cfg.VercelSyncTime = 0
|
||||
b, err := json.Marshal(cfg)
|
||||
@@ -272,6 +314,7 @@ func syncHashForJSON(s string) string {
|
||||
cfg.VercelSyncHash = ""
|
||||
cfg.VercelSyncTime = 0
|
||||
cfg.ClearAccountTokens()
|
||||
cfg.ClearVercelCredentials()
|
||||
b, err := json.Marshal(cfg)
|
||||
if err != nil {
|
||||
return ""
|
||||
|
||||
100
internal/httpapi/admin/vercel/handler_vercel_test.go
Normal file
100
internal/httpapi/admin/vercel/handler_vercel_test.go
Normal file
@@ -0,0 +1,100 @@
|
||||
package vercel
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"ds2api/internal/config"
|
||||
)
|
||||
|
||||
func TestParseVercelSyncOptionsFallsBackToSavedConfig(t *testing.T) {
|
||||
t.Setenv("VERCEL_TOKEN", "")
|
||||
t.Setenv("VERCEL_PROJECT_ID", "")
|
||||
t.Setenv("VERCEL_TEAM_ID", "")
|
||||
|
||||
opts, err := parseVercelSyncOptions(map[string]any{
|
||||
"vercel_token": "__USE_PRECONFIG__",
|
||||
}, config.VercelConfig{
|
||||
Token: " saved-token ",
|
||||
ProjectID: " saved-project ",
|
||||
TeamID: " saved-team ",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("parse options error: %v", err)
|
||||
}
|
||||
if opts.VercelToken != "saved-token" || opts.ProjectID != "saved-project" || opts.TeamID != "saved-team" {
|
||||
t.Fatalf("unexpected options: %#v", opts)
|
||||
}
|
||||
if !opts.UsePreconfig {
|
||||
t.Fatal("expected preconfig mode")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSaveLocalVercelCredentialsStoresExplicitInput(t *testing.T) {
|
||||
t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"]}`)
|
||||
store := config.LoadStore()
|
||||
h := &Handler{Store: store}
|
||||
|
||||
saved, err := h.saveLocalVercelCredentials(vercelSyncOptions{
|
||||
VercelToken: " token ",
|
||||
ProjectID: " project ",
|
||||
TeamID: " team ",
|
||||
SaveCreds: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("save local credentials error: %v", err)
|
||||
}
|
||||
if !saved {
|
||||
t.Fatal("expected credentials to be saved")
|
||||
}
|
||||
got := store.Snapshot().Vercel
|
||||
if got.Token != "token" || got.ProjectID != "project" || got.TeamID != "team" {
|
||||
t.Fatalf("unexpected saved credentials: %#v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSaveLocalVercelCredentialsPreservesPreconfiguredTokenAndUpdatesProject(t *testing.T) {
|
||||
t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"vercel":{"token":"saved-token","project_id":"old-project","team_id":"old-team"}}`)
|
||||
store := config.LoadStore()
|
||||
h := &Handler{Store: store}
|
||||
|
||||
saved, err := h.saveLocalVercelCredentials(vercelSyncOptions{
|
||||
VercelToken: "resolved-token",
|
||||
ProjectID: "new-project",
|
||||
TeamID: "new-team",
|
||||
SaveCreds: true,
|
||||
UsePreconfig: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("save local credentials error: %v", err)
|
||||
}
|
||||
if !saved {
|
||||
t.Fatal("expected project/team updates to be saved")
|
||||
}
|
||||
got := store.Snapshot().Vercel
|
||||
if got.Token != "saved-token" || got.ProjectID != "new-project" || got.TeamID != "new-team" {
|
||||
t.Fatalf("unexpected saved credentials: %#v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExportSyncConfigStripsSavedVercelCredentials(t *testing.T) {
|
||||
t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"vercel":{"token":"secret-token","project_id":"project","team_id":"team"}}`)
|
||||
store := config.LoadStore()
|
||||
h := &Handler{Store: store}
|
||||
|
||||
jsonStr, _, err := h.exportSyncConfig(map[string]any{})
|
||||
if err != nil {
|
||||
t.Fatalf("export sync config error: %v", err)
|
||||
}
|
||||
if strings.Contains(jsonStr, "secret-token") || strings.Contains(jsonStr, `"vercel"`) {
|
||||
t.Fatalf("expected sync export to strip Vercel credentials, got %s", jsonStr)
|
||||
}
|
||||
var exported config.Config
|
||||
if err := json.Unmarshal([]byte(jsonStr), &exported); err != nil {
|
||||
t.Fatalf("exported config is invalid JSON: %v", err)
|
||||
}
|
||||
if len(exported.Keys) != 1 || exported.Keys[0] != "k1" {
|
||||
t.Fatalf("unexpected exported config: %#v", exported)
|
||||
}
|
||||
}
|
||||
@@ -5,15 +5,25 @@ import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"ds2api/internal/auth"
|
||||
"ds2api/internal/chathistory"
|
||||
dsclient "ds2api/internal/deepseek/client"
|
||||
)
|
||||
|
||||
type claudeCurrentInputAuth struct{}
|
||||
|
||||
type claudeHistoryConfig struct {
|
||||
aliases map[string]string
|
||||
}
|
||||
|
||||
func (m claudeHistoryConfig) ModelAliases() map[string]string { return m.aliases }
|
||||
func (claudeHistoryConfig) CurrentInputFileEnabled() bool { return false }
|
||||
func (claudeHistoryConfig) CurrentInputFileMinChars() int { return 0 }
|
||||
|
||||
func (claudeCurrentInputAuth) Determine(*http.Request) (*auth.RequestAuth, error) {
|
||||
return &auth.RequestAuth{
|
||||
DeepSeekToken: "direct-token",
|
||||
@@ -22,6 +32,50 @@ func (claudeCurrentInputAuth) Determine(*http.Request) (*auth.RequestAuth, error
|
||||
}, nil
|
||||
}
|
||||
|
||||
func TestClaudeDirectRecordsResponseHistory(t *testing.T) {
|
||||
ds := &claudeCurrentInputDS{}
|
||||
historyStore := chathistory.New(filepath.Join(t.TempDir(), "history.json"))
|
||||
h := &Handler{
|
||||
Store: claudeHistoryConfig{aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}},
|
||||
Auth: claudeCurrentInputAuth{},
|
||||
DS: ds,
|
||||
ChatHistory: historyStore,
|
||||
}
|
||||
reqBody := `{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hello from claude"}],"max_tokens":1024}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(reqBody))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
h.Messages(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
snapshot, err := historyStore.Snapshot()
|
||||
if err != nil {
|
||||
t.Fatalf("snapshot history: %v", err)
|
||||
}
|
||||
if len(snapshot.Items) != 1 {
|
||||
t.Fatalf("expected one history item, got %d", len(snapshot.Items))
|
||||
}
|
||||
item, err := historyStore.Get(snapshot.Items[0].ID)
|
||||
if err != nil {
|
||||
t.Fatalf("get history item: %v", err)
|
||||
}
|
||||
if item.Surface != "claude.messages" {
|
||||
t.Fatalf("unexpected surface: %q", item.Surface)
|
||||
}
|
||||
if item.Model != "claude-sonnet-4-6" {
|
||||
t.Fatalf("unexpected model: %q", item.Model)
|
||||
}
|
||||
if item.UserInput != "hello from claude" {
|
||||
t.Fatalf("unexpected user input: %q", item.UserInput)
|
||||
}
|
||||
if item.Content != "ok" {
|
||||
t.Fatalf("expected raw upstream content, got %q", item.Content)
|
||||
}
|
||||
}
|
||||
|
||||
func (claudeCurrentInputAuth) Release(*auth.RequestAuth) {}
|
||||
|
||||
type claudeCurrentInputDS struct {
|
||||
@@ -53,10 +107,12 @@ func (d *claudeCurrentInputDS) CallCompletion(_ context.Context, _ *auth.Request
|
||||
|
||||
func TestClaudeDirectAppliesCurrentInputFile(t *testing.T) {
|
||||
ds := &claudeCurrentInputDS{}
|
||||
historyStore := chathistory.New(filepath.Join(t.TempDir(), "history.json"))
|
||||
h := &Handler{
|
||||
Store: mockClaudeConfig{aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}},
|
||||
Auth: claudeCurrentInputAuth{},
|
||||
DS: ds,
|
||||
Store: mockClaudeConfig{aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}},
|
||||
Auth: claudeCurrentInputAuth{},
|
||||
DS: ds,
|
||||
ChatHistory: historyStore,
|
||||
}
|
||||
reqBody := `{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hello from claude"}],"max_tokens":1024}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(reqBody))
|
||||
@@ -82,4 +138,21 @@ func TestClaudeDirectAppliesCurrentInputFile(t *testing.T) {
|
||||
if !strings.Contains(prompt, "Continue from the latest state in the attached DS2API_HISTORY.txt context.") {
|
||||
t.Fatalf("expected continuation prompt, got %q", prompt)
|
||||
}
|
||||
snapshot, err := historyStore.Snapshot()
|
||||
if err != nil {
|
||||
t.Fatalf("snapshot history: %v", err)
|
||||
}
|
||||
if len(snapshot.Items) != 1 {
|
||||
t.Fatalf("expected one history item, got %d", len(snapshot.Items))
|
||||
}
|
||||
full, err := historyStore.Get(snapshot.Items[0].ID)
|
||||
if err != nil {
|
||||
t.Fatalf("get history item: %v", err)
|
||||
}
|
||||
if full.HistoryText != string(ds.uploads[0].Data) {
|
||||
t.Fatalf("expected uploaded current input file to be persisted in history text")
|
||||
}
|
||||
if len(full.Messages) != 1 || !strings.Contains(full.Messages[0].Content, "Continue from the latest state in the attached DS2API_HISTORY.txt context.") {
|
||||
t.Fatalf("expected persisted message to match upstream continuation prompt, got %#v", full.Messages)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package claude
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
@@ -15,8 +16,10 @@ import (
|
||||
"ds2api/internal/completionruntime"
|
||||
"ds2api/internal/config"
|
||||
claudefmt "ds2api/internal/format/claude"
|
||||
"ds2api/internal/httpapi/openai/history"
|
||||
"ds2api/internal/httpapi/requestbody"
|
||||
"ds2api/internal/promptcompat"
|
||||
"ds2api/internal/responsehistory"
|
||||
streamengine "ds2api/internal/stream"
|
||||
"ds2api/internal/translatorcliproxy"
|
||||
"ds2api/internal/util"
|
||||
@@ -79,38 +82,70 @@ func (h *Handler) handleClaudeDirect(w http.ResponseWriter, r *http.Request) boo
|
||||
return true
|
||||
}
|
||||
defer h.Auth.Release(a)
|
||||
if norm.Standard.Stream {
|
||||
h.handleClaudeDirectStream(w, r, a, norm.Standard)
|
||||
stdReq, err := h.applyCurrentInputFile(r.Context(), a, norm.Standard)
|
||||
if err != nil {
|
||||
status, message := mapCurrentInputFileError(err)
|
||||
writeClaudeError(w, status, message)
|
||||
return true
|
||||
}
|
||||
result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, norm.Standard, completionruntime.Options{
|
||||
StripReferenceMarkers: stripReferenceMarkersEnabled(),
|
||||
RetryEnabled: true,
|
||||
CurrentInputFile: h.Store,
|
||||
historySession := responsehistory.Start(responsehistory.StartParams{
|
||||
Store: h.ChatHistory,
|
||||
Request: r,
|
||||
Auth: a,
|
||||
Surface: "claude.messages",
|
||||
Standard: stdReq,
|
||||
})
|
||||
if stdReq.Stream {
|
||||
h.handleClaudeDirectStream(w, r, a, stdReq, historySession)
|
||||
return true
|
||||
}
|
||||
result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{
|
||||
RetryEnabled: true,
|
||||
CurrentInputFile: h.Store,
|
||||
})
|
||||
if outErr != nil {
|
||||
if historySession != nil {
|
||||
historySession.ErrorTurn(outErr.Status, outErr.Message, outErr.Code, result.Turn)
|
||||
}
|
||||
writeClaudeError(w, outErr.Status, outErr.Message)
|
||||
return true
|
||||
}
|
||||
if historySession != nil {
|
||||
historySession.SuccessTurn(http.StatusOK, result.Turn, responsehistory.GenericUsage(result.Turn))
|
||||
}
|
||||
writeJSON(w, http.StatusOK, claudefmt.BuildMessageResponseFromTurn(
|
||||
fmt.Sprintf("msg_%d", time.Now().UnixNano()),
|
||||
norm.Standard.ResponseModel,
|
||||
stdReq.ResponseModel,
|
||||
result.Turn,
|
||||
exposeThinking,
|
||||
))
|
||||
return true
|
||||
}
|
||||
|
||||
func (h *Handler) handleClaudeDirectStream(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, stdReq promptcompat.StandardRequest) {
|
||||
func (h *Handler) applyCurrentInputFile(ctx context.Context, a *auth.RequestAuth, stdReq promptcompat.StandardRequest) (promptcompat.StandardRequest, error) {
|
||||
if h == nil {
|
||||
return stdReq, nil
|
||||
}
|
||||
return (history.Service{Store: h.Store, DS: h.DS}).ApplyCurrentInputFile(ctx, a, stdReq)
|
||||
}
|
||||
|
||||
func mapCurrentInputFileError(err error) (int, string) {
|
||||
return history.MapError(err)
|
||||
}
|
||||
|
||||
func (h *Handler) handleClaudeDirectStream(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, stdReq promptcompat.StandardRequest, historySession *responsehistory.Session) {
|
||||
start, outErr := completionruntime.StartCompletion(r.Context(), h.DS, a, stdReq, completionruntime.Options{
|
||||
CurrentInputFile: h.Store,
|
||||
})
|
||||
if outErr != nil {
|
||||
if historySession != nil {
|
||||
historySession.Error(outErr.Status, outErr.Message, outErr.Code, "", "")
|
||||
}
|
||||
writeClaudeError(w, outErr.Status, outErr.Message)
|
||||
return
|
||||
}
|
||||
streamReq := start.Request
|
||||
h.handleClaudeStreamRealtime(w, r, start.Response, streamReq.ResponseModel, streamReq.Messages, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw)
|
||||
h.handleClaudeStreamRealtimeWithRetry(w, r, a, start.Response, start.Payload, start.Pow, streamReq.ResponseModel, streamReq.Messages, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, streamReq.PromptTokenText, historySession)
|
||||
}
|
||||
|
||||
func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, store ConfigReader) bool {
|
||||
@@ -264,10 +299,17 @@ func stripClaudeThinkingBlocks(raw []byte) []byte {
|
||||
return out
|
||||
}
|
||||
|
||||
func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Request, resp *http.Response, model string, messages []any, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any) {
|
||||
func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Request, resp *http.Response, model string, messages []any, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, historySessions ...*responsehistory.Session) {
|
||||
var historySession *responsehistory.Session
|
||||
if len(historySessions) > 0 {
|
||||
historySession = historySessions[0]
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
if historySession != nil {
|
||||
historySession.Error(resp.StatusCode, strings.TrimSpace(string(body)), "error", "", "")
|
||||
}
|
||||
writeClaudeError(w, http.StatusInternalServerError, string(body))
|
||||
return
|
||||
}
|
||||
@@ -294,6 +336,7 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ
|
||||
toolNames,
|
||||
toolsRaw,
|
||||
buildClaudePromptTokenText(messages, thinkingEnabled),
|
||||
historySession,
|
||||
)
|
||||
streamRuntime.sendMessageStart()
|
||||
|
||||
@@ -317,3 +360,112 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ
|
||||
OnFinalize: streamRuntime.onFinalize,
|
||||
})
|
||||
}
|
||||
|
||||
func (h *Handler) handleClaudeStreamRealtimeWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, model string, messages []any, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, promptTokenText string, historySession *responsehistory.Session) {
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
if historySession != nil {
|
||||
historySession.Error(resp.StatusCode, strings.TrimSpace(string(body)), "error", "", "")
|
||||
}
|
||||
writeClaudeError(w, http.StatusInternalServerError, string(body))
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "text/event-stream")
|
||||
w.Header().Set("Cache-Control", "no-cache, no-transform")
|
||||
w.Header().Set("Connection", "keep-alive")
|
||||
w.Header().Set("X-Accel-Buffering", "no")
|
||||
rc := http.NewResponseController(w)
|
||||
_, canFlush := w.(http.Flusher)
|
||||
if !canFlush {
|
||||
config.Logger.Warn("[claude_stream] response writer does not support flush; streaming may be buffered")
|
||||
}
|
||||
|
||||
streamRuntime := newClaudeStreamRuntime(
|
||||
w,
|
||||
rc,
|
||||
canFlush,
|
||||
model,
|
||||
messages,
|
||||
thinkingEnabled,
|
||||
searchEnabled,
|
||||
stripReferenceMarkersEnabled(),
|
||||
toolNames,
|
||||
toolsRaw,
|
||||
promptTokenText,
|
||||
historySession,
|
||||
)
|
||||
streamRuntime.sendMessageStart()
|
||||
|
||||
completionruntime.ExecuteStreamWithRetry(r.Context(), h.DS, a, resp, payload, pow, completionruntime.StreamRetryOptions{
|
||||
Surface: "claude.messages",
|
||||
Stream: true,
|
||||
RetryEnabled: true,
|
||||
MaxAttempts: 3,
|
||||
UsagePrompt: promptTokenText,
|
||||
}, completionruntime.StreamRetryHooks{
|
||||
ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) {
|
||||
return h.consumeClaudeStreamAttempt(r, currentResp, streamRuntime, thinkingEnabled, allowDeferEmpty)
|
||||
},
|
||||
Finalize: func(_ int) {
|
||||
streamRuntime.finalize("end_turn", false)
|
||||
},
|
||||
ParentMessageID: func() int {
|
||||
return streamRuntime.responseMessageID
|
||||
},
|
||||
OnRetryPrompt: func(prompt string) {
|
||||
streamRuntime.promptTokenText = prompt
|
||||
},
|
||||
OnRetryFailure: func(status int, message, code string) {
|
||||
streamRuntime.sendErrorWithCode(status, strings.TrimSpace(message), code)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func (h *Handler) consumeClaudeStreamAttempt(r *http.Request, resp *http.Response, streamRuntime *claudeStreamRuntime, thinkingEnabled bool, allowDeferEmpty bool) (bool, bool) {
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
initialType := "text"
|
||||
if thinkingEnabled {
|
||||
initialType = "thinking"
|
||||
}
|
||||
finalReason := streamengine.StopReason("")
|
||||
var scannerErr error
|
||||
streamengine.ConsumeSSE(streamengine.ConsumeConfig{
|
||||
Context: r.Context(),
|
||||
Body: resp.Body,
|
||||
ThinkingEnabled: thinkingEnabled,
|
||||
InitialType: initialType,
|
||||
KeepAliveInterval: claudeStreamPingInterval,
|
||||
IdleTimeout: claudeStreamIdleTimeout,
|
||||
MaxKeepAliveNoInput: claudeStreamMaxKeepaliveCnt,
|
||||
}, streamengine.ConsumeHooks{
|
||||
OnKeepAlive: func() {
|
||||
streamRuntime.sendPing()
|
||||
},
|
||||
OnParsed: streamRuntime.onParsed,
|
||||
OnFinalize: func(reason streamengine.StopReason, err error) {
|
||||
finalReason = reason
|
||||
scannerErr = err
|
||||
},
|
||||
})
|
||||
if string(finalReason) == "upstream_error" {
|
||||
if streamRuntime.history != nil {
|
||||
streamRuntime.history.Error(500, streamRuntime.upstreamErr, "upstream_error", responsehistory.ThinkingForArchive(streamRuntime.rawThinking.String(), streamRuntime.toolDetectionThinking.String(), streamRuntime.thinking.String()), responsehistory.TextForArchive(streamRuntime.rawText.String(), streamRuntime.text.String()))
|
||||
}
|
||||
streamRuntime.sendError(streamRuntime.upstreamErr)
|
||||
return true, false
|
||||
}
|
||||
if scannerErr != nil {
|
||||
if streamRuntime.history != nil {
|
||||
streamRuntime.history.Error(500, scannerErr.Error(), "error", responsehistory.ThinkingForArchive(streamRuntime.rawThinking.String(), streamRuntime.toolDetectionThinking.String(), streamRuntime.thinking.String()), responsehistory.TextForArchive(streamRuntime.rawText.String(), streamRuntime.text.String()))
|
||||
}
|
||||
streamRuntime.sendError(scannerErr.Error())
|
||||
return true, false
|
||||
}
|
||||
terminalWritten := streamRuntime.finalize("end_turn", allowDeferEmpty)
|
||||
if terminalWritten {
|
||||
return true, false
|
||||
}
|
||||
return false, true
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"ds2api/internal/chathistory"
|
||||
"ds2api/internal/config"
|
||||
dsprotocol "ds2api/internal/deepseek/protocol"
|
||||
"ds2api/internal/textclean"
|
||||
@@ -16,10 +17,11 @@ import (
|
||||
var writeJSON = util.WriteJSON
|
||||
|
||||
type Handler struct {
|
||||
Store ConfigReader
|
||||
Auth AuthResolver
|
||||
DS DeepSeekCaller
|
||||
OpenAI OpenAIChatRunner
|
||||
Store ConfigReader
|
||||
Auth AuthResolver
|
||||
DS DeepSeekCaller
|
||||
OpenAI OpenAIChatRunner
|
||||
ChatHistory *chathistory.Store
|
||||
}
|
||||
|
||||
func stripReferenceMarkersEnabled() bool {
|
||||
|
||||
@@ -93,14 +93,51 @@ func TestNormalizeClaudeMessagesToolUseToAssistantToolCalls(t *testing.T) {
|
||||
t.Fatalf("expected call id preserved, got %#v", call)
|
||||
}
|
||||
content, _ := m["content"].(string)
|
||||
if !containsStr(content, "<|DSML|tool_calls>") || !containsStr(content, `<|DSML|invoke name="search_web">`) {
|
||||
if !containsStr(content, "<|DSML|tool_calls>") || !containsStr(content, `<|DSML|invoke name="search_web">`) {
|
||||
t.Fatalf("expected assistant content to include DSML tool call history, got %q", content)
|
||||
}
|
||||
if !containsStr(content, `<|DSML|parameter name="query"><![CDATA[latest]]></|DSML|parameter>`) {
|
||||
if !containsStr(content, `<|DSML|parameter name="query"><![CDATA[latest]]></|DSML|parameter>`) {
|
||||
t.Fatalf("expected assistant content to include serialized parameters, got %q", content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeClaudeMessagesPreservesThinkingOnToolUseHistory(t *testing.T) {
|
||||
msgs := []any{
|
||||
map[string]any{
|
||||
"role": "assistant",
|
||||
"content": []any{
|
||||
map[string]any{"type": "thinking", "thinking": "need live search before answering"},
|
||||
map[string]any{
|
||||
"type": "tool_use",
|
||||
"id": "call_1",
|
||||
"name": "search_web",
|
||||
"input": map[string]any{"query": "latest"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
got := normalizeClaudeMessages(msgs)
|
||||
if len(got) != 1 {
|
||||
t.Fatalf("expected one normalized tool-call message, got %#v", got)
|
||||
}
|
||||
m := got[0].(map[string]any)
|
||||
if m["reasoning_content"] != "need live search before answering" {
|
||||
t.Fatalf("expected thinking preserved as reasoning_content, got %#v", m)
|
||||
}
|
||||
tc, _ := m["tool_calls"].([]any)
|
||||
if len(tc) != 1 {
|
||||
t.Fatalf("expected one tool call, got %#v", m["tool_calls"])
|
||||
}
|
||||
prompt := buildClaudePromptTokenText(got, true)
|
||||
if !containsStr(prompt, "[reasoning_content]\nneed live search before answering\n[/reasoning_content]") {
|
||||
t.Fatalf("expected thinking in prompt history, got %q", prompt)
|
||||
}
|
||||
if !containsStr(prompt, `<|DSML|invoke name="search_web">`) {
|
||||
t.Fatalf("expected tool call in prompt history, got %q", prompt)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeClaudeMessagesDoesNotPromoteUserToolUse(t *testing.T) {
|
||||
msgs := []any{
|
||||
map[string]any{
|
||||
@@ -292,7 +329,7 @@ func TestBuildClaudeToolPromptSingleTool(t *testing.T) {
|
||||
if !containsStr(prompt, "Search the web") {
|
||||
t.Fatalf("expected description in prompt")
|
||||
}
|
||||
if !containsStr(prompt, "<|DSML|tool_calls>") {
|
||||
if !containsStr(prompt, "<|DSML|tool_calls>") {
|
||||
t.Fatalf("expected DSML tool_calls format in prompt")
|
||||
}
|
||||
if !containsStr(prompt, "TOOL CALL FORMAT") {
|
||||
|
||||
@@ -25,14 +25,21 @@ func normalizeClaudeMessages(messages []any) []any {
|
||||
switch content := msg["content"].(type) {
|
||||
case []any:
|
||||
textParts := make([]string, 0, len(content))
|
||||
pendingThinking := ""
|
||||
flushText := func() {
|
||||
if len(textParts) == 0 {
|
||||
return
|
||||
}
|
||||
out = append(out, map[string]any{
|
||||
message := map[string]any{
|
||||
"role": role,
|
||||
"content": strings.Join(textParts, "\n"),
|
||||
})
|
||||
}
|
||||
if role == "assistant" && strings.TrimSpace(pendingThinking) != "" {
|
||||
message["reasoning_content"] = pendingThinking
|
||||
message["content"] = prependClaudeReasoningForPrompt(pendingThinking, safeStringValue(message["content"]))
|
||||
pendingThinking = ""
|
||||
}
|
||||
out = append(out, message)
|
||||
textParts = textParts[:0]
|
||||
}
|
||||
for _, block := range content {
|
||||
@@ -46,10 +53,29 @@ func normalizeClaudeMessages(messages []any) []any {
|
||||
if t, ok := b["text"].(string); ok {
|
||||
textParts = append(textParts, t)
|
||||
}
|
||||
case "thinking":
|
||||
if role == "assistant" {
|
||||
if thinking := extractClaudeThinkingBlockText(b); thinking != "" {
|
||||
if pendingThinking == "" {
|
||||
pendingThinking = thinking
|
||||
} else {
|
||||
pendingThinking += "\n" + thinking
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
if raw := strings.TrimSpace(formatClaudeUnknownBlockForPrompt(b)); raw != "" {
|
||||
textParts = append(textParts, raw)
|
||||
}
|
||||
case "tool_use":
|
||||
if role == "assistant" {
|
||||
flushText()
|
||||
if toolMsg := normalizeClaudeToolUseToAssistant(b, state); toolMsg != nil {
|
||||
if strings.TrimSpace(pendingThinking) != "" {
|
||||
toolMsg["reasoning_content"] = pendingThinking
|
||||
toolMsg["content"] = prependClaudeReasoningForPrompt(pendingThinking, safeStringValue(toolMsg["content"]))
|
||||
pendingThinking = ""
|
||||
}
|
||||
out = append(out, toolMsg)
|
||||
}
|
||||
continue
|
||||
@@ -69,6 +95,13 @@ func normalizeClaudeMessages(messages []any) []any {
|
||||
}
|
||||
}
|
||||
flushText()
|
||||
if role == "assistant" && strings.TrimSpace(pendingThinking) != "" {
|
||||
out = append(out, map[string]any{
|
||||
"role": "assistant",
|
||||
"reasoning_content": pendingThinking,
|
||||
"content": formatClaudeReasoningForPrompt(pendingThinking),
|
||||
})
|
||||
}
|
||||
default:
|
||||
copied := cloneMap(msg)
|
||||
out = append(out, copied)
|
||||
@@ -77,6 +110,39 @@ func normalizeClaudeMessages(messages []any) []any {
|
||||
return out
|
||||
}
|
||||
|
||||
func prependClaudeReasoningForPrompt(reasoning, content string) string {
|
||||
reasoning = strings.TrimSpace(reasoning)
|
||||
content = strings.TrimSpace(content)
|
||||
if reasoning == "" {
|
||||
return content
|
||||
}
|
||||
block := formatClaudeReasoningForPrompt(reasoning)
|
||||
if content == "" {
|
||||
return block
|
||||
}
|
||||
return block + "\n\n" + content
|
||||
}
|
||||
|
||||
func formatClaudeReasoningForPrompt(reasoning string) string {
|
||||
reasoning = strings.TrimSpace(reasoning)
|
||||
if reasoning == "" {
|
||||
return ""
|
||||
}
|
||||
return "[reasoning_content]\n" + reasoning + "\n[/reasoning_content]"
|
||||
}
|
||||
|
||||
func extractClaudeThinkingBlockText(block map[string]any) string {
|
||||
if block == nil {
|
||||
return ""
|
||||
}
|
||||
for _, key := range []string{"thinking", "text", "content"} {
|
||||
if text := strings.TrimSpace(safeStringValue(block[key])); text != "" {
|
||||
return text
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func buildClaudeToolPrompt(tools []any) string {
|
||||
toolSchemas := make([]string, 0, len(tools))
|
||||
names := make([]string, 0, len(tools))
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"ds2api/internal/responsehistory"
|
||||
"ds2api/internal/sse"
|
||||
streamengine "ds2api/internal/stream"
|
||||
"ds2api/internal/toolcall"
|
||||
@@ -28,9 +29,10 @@ type claudeStreamRuntime struct {
|
||||
bufferToolContent bool
|
||||
stripReferenceMarkers bool
|
||||
|
||||
messageID string
|
||||
thinking strings.Builder
|
||||
text strings.Builder
|
||||
messageID string
|
||||
thinking strings.Builder
|
||||
text strings.Builder
|
||||
responseMessageID int
|
||||
|
||||
sieve toolstream.State
|
||||
rawText strings.Builder
|
||||
@@ -46,6 +48,7 @@ type claudeStreamRuntime struct {
|
||||
textEmitted bool
|
||||
ended bool
|
||||
upstreamErr string
|
||||
history *responsehistory.Session
|
||||
}
|
||||
|
||||
func newClaudeStreamRuntime(
|
||||
@@ -60,6 +63,7 @@ func newClaudeStreamRuntime(
|
||||
toolNames []string,
|
||||
toolsRaw any,
|
||||
promptTokenText string,
|
||||
history *responsehistory.Session,
|
||||
) *claudeStreamRuntime {
|
||||
return &claudeStreamRuntime{
|
||||
w: w,
|
||||
@@ -74,6 +78,7 @@ func newClaudeStreamRuntime(
|
||||
toolNames: toolNames,
|
||||
toolsRaw: toolsRaw,
|
||||
promptTokenText: promptTokenText,
|
||||
history: history,
|
||||
messageID: fmt.Sprintf("msg_%d", time.Now().UnixNano()),
|
||||
thinkingBlockIndex: -1,
|
||||
textBlockIndex: -1,
|
||||
@@ -88,6 +93,9 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
|
||||
s.upstreamErr = parsed.ErrorMessage
|
||||
return streamengine.ParsedDecision{Stop: true, StopReason: streamengine.StopReason("upstream_error")}
|
||||
}
|
||||
if parsed.ResponseMessageID > 0 {
|
||||
s.responseMessageID = parsed.ResponseMessageID
|
||||
}
|
||||
if parsed.Stop {
|
||||
return streamengine.ParsedDecision{Stop: true}
|
||||
}
|
||||
@@ -232,5 +240,11 @@ func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
|
||||
}
|
||||
}
|
||||
|
||||
if s.history != nil {
|
||||
s.history.Progress(
|
||||
responsehistory.ThinkingForArchive(s.rawThinking.String(), s.toolDetectionThinking.String(), s.thinking.String()),
|
||||
responsehistory.TextForArchive(s.rawText.String(), s.text.String()),
|
||||
)
|
||||
}
|
||||
return streamengine.ParsedDecision{ContentSeen: contentSeen}
|
||||
}
|
||||
|
||||
@@ -22,16 +22,27 @@ func (s *claudeStreamRuntime) send(event string, v any) {
|
||||
}
|
||||
|
||||
func (s *claudeStreamRuntime) sendError(message string) {
|
||||
s.sendErrorWithCode(500, message, "internal_error")
|
||||
}
|
||||
|
||||
func (s *claudeStreamRuntime) sendErrorWithCode(status int, message, code string) {
|
||||
msg := strings.TrimSpace(message)
|
||||
if msg == "" {
|
||||
msg = "upstream stream error"
|
||||
}
|
||||
if code == "" {
|
||||
code = "internal_error"
|
||||
}
|
||||
errType := "api_error"
|
||||
if status == 429 {
|
||||
errType = "rate_limit_error"
|
||||
}
|
||||
s.send("error", map[string]any{
|
||||
"type": "error",
|
||||
"error": map[string]any{
|
||||
"type": "api_error",
|
||||
"type": errType,
|
||||
"message": msg,
|
||||
"code": "internal_error",
|
||||
"code": code,
|
||||
"param": nil,
|
||||
},
|
||||
})
|
||||
|
||||
@@ -2,6 +2,7 @@ package claude
|
||||
|
||||
import (
|
||||
"ds2api/internal/assistantturn"
|
||||
"ds2api/internal/responsehistory"
|
||||
"ds2api/internal/sse"
|
||||
"ds2api/internal/toolcall"
|
||||
"ds2api/internal/toolstream"
|
||||
@@ -62,13 +63,10 @@ func (s *claudeStreamRuntime) sendToolUseBlock(idx int, tc toolcall.ParsedToolCa
|
||||
})
|
||||
}
|
||||
|
||||
func (s *claudeStreamRuntime) finalize(stopReason string) {
|
||||
func (s *claudeStreamRuntime) finalize(stopReason string, deferEmptyOutput bool) bool {
|
||||
if s.ended {
|
||||
return
|
||||
return true
|
||||
}
|
||||
s.ended = true
|
||||
|
||||
s.closeThinkingBlock()
|
||||
|
||||
if s.bufferToolContent {
|
||||
for _, evt := range toolstream.Flush(&s.sieve, s.toolNames) {
|
||||
@@ -122,6 +120,7 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
|
||||
RawThinking: s.rawThinking.String(),
|
||||
VisibleThinking: s.thinking.String(),
|
||||
DetectionThinking: s.toolDetectionThinking.String(),
|
||||
ResponseMessageID: s.responseMessageID,
|
||||
AlreadyEmittedCalls: s.toolCallsDetected,
|
||||
AlreadyEmittedToolRaw: s.toolCallsDetected,
|
||||
}, assistantturn.BuildOptions{
|
||||
@@ -136,6 +135,22 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
|
||||
outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{
|
||||
AlreadyEmittedToolCalls: s.toolCallsDetected,
|
||||
})
|
||||
if outcome.ShouldFail {
|
||||
if deferEmptyOutput {
|
||||
return false
|
||||
}
|
||||
s.ended = true
|
||||
s.closeThinkingBlock()
|
||||
s.closeTextBlock()
|
||||
if s.history != nil {
|
||||
s.history.Error(outcome.Error.Status, outcome.Error.Message, outcome.Error.Code, responsehistory.ThinkingForArchive(turn.RawThinking, turn.DetectionThinking, turn.Thinking), responsehistory.TextForArchive(turn.RawText, turn.Text))
|
||||
}
|
||||
s.sendErrorWithCode(outcome.Error.Status, outcome.Error.Message, outcome.Error.Code)
|
||||
return true
|
||||
}
|
||||
|
||||
s.ended = true
|
||||
s.closeThinkingBlock()
|
||||
|
||||
if s.bufferToolContent && !s.toolCallsDetected {
|
||||
if len(turn.ToolCalls) > 0 {
|
||||
@@ -175,6 +190,15 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
|
||||
if outcome.HasToolCalls {
|
||||
stopReason = "tool_use"
|
||||
}
|
||||
if s.history != nil {
|
||||
s.history.Success(
|
||||
200,
|
||||
responsehistory.ThinkingForArchive(turn.RawThinking, turn.DetectionThinking, turn.Thinking),
|
||||
responsehistory.TextForArchive(turn.RawText, turn.Text),
|
||||
stopReason,
|
||||
responsehistory.GenericUsage(turn),
|
||||
)
|
||||
}
|
||||
|
||||
s.send("message_delta", map[string]any{
|
||||
"type": "message_delta",
|
||||
@@ -187,16 +211,23 @@ func (s *claudeStreamRuntime) finalize(stopReason string) {
|
||||
},
|
||||
})
|
||||
s.send("message_stop", map[string]any{"type": "message_stop"})
|
||||
return true
|
||||
}
|
||||
|
||||
func (s *claudeStreamRuntime) onFinalize(reason streamengine.StopReason, scannerErr error) {
|
||||
if string(reason) == "upstream_error" {
|
||||
if s.history != nil {
|
||||
s.history.Error(500, s.upstreamErr, "upstream_error", responsehistory.ThinkingForArchive(s.rawThinking.String(), s.toolDetectionThinking.String(), s.thinking.String()), responsehistory.TextForArchive(s.rawText.String(), s.text.String()))
|
||||
}
|
||||
s.sendError(s.upstreamErr)
|
||||
return
|
||||
}
|
||||
if scannerErr != nil {
|
||||
if s.history != nil {
|
||||
s.history.Error(500, scannerErr.Error(), "error", responsehistory.ThinkingForArchive(s.rawThinking.String(), s.toolDetectionThinking.String(), s.thinking.String()), responsehistory.TextForArchive(s.rawText.String(), s.text.String()))
|
||||
}
|
||||
s.sendError(scannerErr.Error())
|
||||
return
|
||||
}
|
||||
s.finalize("end_turn")
|
||||
s.finalize("end_turn", false)
|
||||
}
|
||||
|
||||
@@ -44,14 +44,20 @@ func geminiMessagesFromRequest(req map[string]any) []any {
|
||||
}
|
||||
|
||||
textParts := make([]string, 0, len(parts))
|
||||
pendingThinking := ""
|
||||
flushText := func() {
|
||||
if len(textParts) == 0 {
|
||||
return
|
||||
}
|
||||
out = append(out, map[string]any{
|
||||
msg := map[string]any{
|
||||
"role": role,
|
||||
"content": strings.Join(textParts, "\n"),
|
||||
})
|
||||
}
|
||||
if role == "assistant" && strings.TrimSpace(pendingThinking) != "" {
|
||||
msg["reasoning_content"] = pendingThinking
|
||||
pendingThinking = ""
|
||||
}
|
||||
out = append(out, msg)
|
||||
textParts = textParts[:0]
|
||||
}
|
||||
|
||||
@@ -61,6 +67,14 @@ func geminiMessagesFromRequest(req map[string]any) []any {
|
||||
continue
|
||||
}
|
||||
if text := strings.TrimSpace(asString(part["text"])); text != "" {
|
||||
if role == "assistant" && isGeminiThoughtPart(part) {
|
||||
if pendingThinking == "" {
|
||||
pendingThinking = text
|
||||
} else {
|
||||
pendingThinking += "\n" + text
|
||||
}
|
||||
continue
|
||||
}
|
||||
textParts = append(textParts, text)
|
||||
continue
|
||||
}
|
||||
@@ -75,7 +89,7 @@ func geminiMessagesFromRequest(req map[string]any) []any {
|
||||
}
|
||||
}
|
||||
lastToolCallIDByName[strings.ToLower(name)] = callID
|
||||
out = append(out, map[string]any{
|
||||
msg := map[string]any{
|
||||
"role": "assistant",
|
||||
"tool_calls": []any{
|
||||
map[string]any{
|
||||
@@ -87,7 +101,12 @@ func geminiMessagesFromRequest(req map[string]any) []any {
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
if strings.TrimSpace(pendingThinking) != "" {
|
||||
msg["reasoning_content"] = pendingThinking
|
||||
pendingThinking = ""
|
||||
}
|
||||
out = append(out, msg)
|
||||
}
|
||||
continue
|
||||
}
|
||||
@@ -132,10 +151,29 @@ func geminiMessagesFromRequest(req map[string]any) []any {
|
||||
}
|
||||
}
|
||||
flushText()
|
||||
if role == "assistant" && strings.TrimSpace(pendingThinking) != "" {
|
||||
out = append(out, map[string]any{
|
||||
"role": "assistant",
|
||||
"reasoning_content": pendingThinking,
|
||||
})
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func isGeminiThoughtPart(part map[string]any) bool {
|
||||
if part == nil {
|
||||
return false
|
||||
}
|
||||
if v, ok := part["thought"].(bool); ok {
|
||||
return v
|
||||
}
|
||||
if v, ok := part["thoughtSignature"].(string); ok && strings.TrimSpace(v) != "" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func normalizeGeminiSystemInstruction(raw any) string {
|
||||
switch v := raw.(type) {
|
||||
case string:
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"ds2api/internal/promptcompat"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
@@ -53,6 +54,46 @@ func TestGeminiMessagesFromRequestPreservesFunctionRoundtrip(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGeminiMessagesFromRequestPreservesThoughtOnFunctionCallHistory(t *testing.T) {
|
||||
req := map[string]any{
|
||||
"contents": []any{
|
||||
map[string]any{
|
||||
"role": "model",
|
||||
"parts": []any{
|
||||
map[string]any{"text": "need current state before answering", "thought": true},
|
||||
map[string]any{
|
||||
"functionCall": map[string]any{
|
||||
"id": "call_g1",
|
||||
"name": "search_web",
|
||||
"args": map[string]any{"query": "ai"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
got := geminiMessagesFromRequest(req)
|
||||
if len(got) != 1 {
|
||||
t.Fatalf("expected one normalized message, got %#v", got)
|
||||
}
|
||||
assistant, _ := got[0].(map[string]any)
|
||||
if assistant["reasoning_content"] != "need current state before answering" {
|
||||
t.Fatalf("expected thought preserved as reasoning_content, got %#v", assistant)
|
||||
}
|
||||
tc, _ := assistant["tool_calls"].([]any)
|
||||
if len(tc) != 1 {
|
||||
t.Fatalf("expected one tool call, got %#v", assistant["tool_calls"])
|
||||
}
|
||||
prompt, _ := promptcompat.BuildOpenAIPromptForAdapter(got, nil, "", true)
|
||||
if !strings.Contains(prompt, "[reasoning_content]\nneed current state before answering\n[/reasoning_content]") {
|
||||
t.Fatalf("expected thought in prompt history, got %q", prompt)
|
||||
}
|
||||
if !strings.Contains(prompt, `<|DSML|invoke name="search_web">`) {
|
||||
t.Fatalf("expected tool call in prompt history, got %q", prompt)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGeminiMessagesFromRequestPreservesUnknownPartAsRawJSONText(t *testing.T) {
|
||||
req := map[string]any{
|
||||
"contents": []any{
|
||||
|
||||
@@ -2,6 +2,7 @@ package gemini
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
@@ -14,8 +15,10 @@ import (
|
||||
"ds2api/internal/assistantturn"
|
||||
"ds2api/internal/auth"
|
||||
"ds2api/internal/completionruntime"
|
||||
"ds2api/internal/httpapi/openai/history"
|
||||
"ds2api/internal/httpapi/requestbody"
|
||||
"ds2api/internal/promptcompat"
|
||||
"ds2api/internal/responsehistory"
|
||||
"ds2api/internal/sse"
|
||||
"ds2api/internal/toolcall"
|
||||
"ds2api/internal/translatorcliproxy"
|
||||
@@ -76,33 +79,65 @@ func (h *Handler) handleGeminiDirect(w http.ResponseWriter, r *http.Request, str
|
||||
return true
|
||||
}
|
||||
defer h.Auth.Release(a)
|
||||
stdReq, err = h.applyCurrentInputFile(r.Context(), a, stdReq)
|
||||
if err != nil {
|
||||
status, message := mapCurrentInputFileError(err)
|
||||
writeGeminiError(w, status, message)
|
||||
return true
|
||||
}
|
||||
historySession := responsehistory.Start(responsehistory.StartParams{
|
||||
Store: h.ChatHistory,
|
||||
Request: r,
|
||||
Auth: a,
|
||||
Surface: "gemini.generate_content",
|
||||
Standard: stdReq,
|
||||
})
|
||||
if stream {
|
||||
h.handleGeminiDirectStream(w, r, a, stdReq)
|
||||
h.handleGeminiDirectStream(w, r, a, stdReq, historySession)
|
||||
return true
|
||||
}
|
||||
result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{
|
||||
StripReferenceMarkers: stripReferenceMarkersEnabled(),
|
||||
RetryEnabled: true,
|
||||
CurrentInputFile: h.Store,
|
||||
RetryEnabled: true,
|
||||
CurrentInputFile: h.Store,
|
||||
})
|
||||
if outErr != nil {
|
||||
if historySession != nil {
|
||||
historySession.ErrorTurn(outErr.Status, outErr.Message, outErr.Code, result.Turn)
|
||||
}
|
||||
writeGeminiError(w, outErr.Status, outErr.Message)
|
||||
return true
|
||||
}
|
||||
if historySession != nil {
|
||||
historySession.SuccessTurn(http.StatusOK, result.Turn, responsehistory.GenericUsage(result.Turn))
|
||||
}
|
||||
writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponseFromTurn(result.Turn))
|
||||
return true
|
||||
}
|
||||
|
||||
func (h *Handler) handleGeminiDirectStream(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, stdReq promptcompat.StandardRequest) {
|
||||
func (h *Handler) applyCurrentInputFile(ctx context.Context, a *auth.RequestAuth, stdReq promptcompat.StandardRequest) (promptcompat.StandardRequest, error) {
|
||||
if h == nil {
|
||||
return stdReq, nil
|
||||
}
|
||||
return (history.Service{Store: h.Store, DS: h.DS}).ApplyCurrentInputFile(ctx, a, stdReq)
|
||||
}
|
||||
|
||||
func mapCurrentInputFileError(err error) (int, string) {
|
||||
return history.MapError(err)
|
||||
}
|
||||
|
||||
func (h *Handler) handleGeminiDirectStream(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, stdReq promptcompat.StandardRequest, historySession *responsehistory.Session) {
|
||||
start, outErr := completionruntime.StartCompletion(r.Context(), h.DS, a, stdReq, completionruntime.Options{
|
||||
CurrentInputFile: h.Store,
|
||||
})
|
||||
if outErr != nil {
|
||||
if historySession != nil {
|
||||
historySession.Error(outErr.Status, outErr.Message, outErr.Code, "", "")
|
||||
}
|
||||
writeGeminiError(w, outErr.Status, outErr.Message)
|
||||
return
|
||||
}
|
||||
streamReq := start.Request
|
||||
h.handleStreamGenerateContent(w, r, start.Response, streamReq.ResponseModel, streamReq.PromptTokenText, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw)
|
||||
h.handleStreamGenerateContentWithRetry(w, r, a, start.Response, start.Payload, start.Pow, streamReq.ResponseModel, streamReq.PromptTokenText, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, historySession)
|
||||
}
|
||||
|
||||
func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, stream bool) bool {
|
||||
@@ -294,12 +329,11 @@ func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *ht
|
||||
}
|
||||
|
||||
result := sse.CollectStream(resp, thinkingEnabled, true)
|
||||
stripReferenceMarkers := stripReferenceMarkersEnabled()
|
||||
writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponse(
|
||||
model,
|
||||
finalPrompt,
|
||||
cleanVisibleOutput(result.Thinking, stripReferenceMarkers),
|
||||
cleanVisibleOutput(result.Text, stripReferenceMarkers),
|
||||
cleanVisibleOutput(result.Thinking, false),
|
||||
cleanVisibleOutput(result.Text, false),
|
||||
toolNames,
|
||||
))
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"ds2api/internal/chathistory"
|
||||
"ds2api/internal/textclean"
|
||||
"ds2api/internal/util"
|
||||
)
|
||||
@@ -12,10 +13,11 @@ import (
|
||||
var writeJSON = util.WriteJSON
|
||||
|
||||
type Handler struct {
|
||||
Store ConfigReader
|
||||
Auth AuthResolver
|
||||
DS DeepSeekCaller
|
||||
OpenAI OpenAIChatRunner
|
||||
Store ConfigReader
|
||||
Auth AuthResolver
|
||||
DS DeepSeekCaller
|
||||
OpenAI OpenAIChatRunner
|
||||
ChatHistory *chathistory.Store
|
||||
}
|
||||
|
||||
//nolint:unused // used by native Gemini stream/non-stream runtime helpers.
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -8,16 +9,26 @@ import (
|
||||
"time"
|
||||
|
||||
"ds2api/internal/assistantturn"
|
||||
"ds2api/internal/auth"
|
||||
"ds2api/internal/completionruntime"
|
||||
dsprotocol "ds2api/internal/deepseek/protocol"
|
||||
"ds2api/internal/responsehistory"
|
||||
"ds2api/internal/sse"
|
||||
streamengine "ds2api/internal/stream"
|
||||
)
|
||||
|
||||
//nolint:unused // retained for native Gemini stream handling path.
|
||||
func (h *Handler) handleStreamGenerateContent(w http.ResponseWriter, r *http.Request, resp *http.Response, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any) {
|
||||
func (h *Handler) handleStreamGenerateContent(w http.ResponseWriter, r *http.Request, resp *http.Response, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, historySessions ...*responsehistory.Session) {
|
||||
var historySession *responsehistory.Session
|
||||
if len(historySessions) > 0 {
|
||||
historySession = historySessions[0]
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
if historySession != nil {
|
||||
historySession.Error(resp.StatusCode, strings.TrimSpace(string(body)), "error", "", "")
|
||||
}
|
||||
writeGeminiError(w, resp.StatusCode, strings.TrimSpace(string(body)))
|
||||
return
|
||||
}
|
||||
@@ -29,7 +40,7 @@ func (h *Handler) handleStreamGenerateContent(w http.ResponseWriter, r *http.Req
|
||||
|
||||
rc := http.NewResponseController(w)
|
||||
_, canFlush := w.(http.Flusher)
|
||||
runtime := newGeminiStreamRuntime(w, rc, canFlush, model, finalPrompt, thinkingEnabled, searchEnabled, stripReferenceMarkersEnabled(), toolNames, toolsRaw)
|
||||
runtime := newGeminiStreamRuntime(w, rc, canFlush, model, finalPrompt, thinkingEnabled, searchEnabled, stripReferenceMarkersEnabled(), toolNames, toolsRaw, historySession)
|
||||
|
||||
initialType := "text"
|
||||
if thinkingEnabled {
|
||||
@@ -46,7 +57,7 @@ func (h *Handler) handleStreamGenerateContent(w http.ResponseWriter, r *http.Req
|
||||
}, streamengine.ConsumeHooks{
|
||||
OnParsed: runtime.onParsed,
|
||||
OnFinalize: func(_ streamengine.StopReason, _ error) {
|
||||
runtime.finalize()
|
||||
runtime.finalize(false)
|
||||
},
|
||||
})
|
||||
}
|
||||
@@ -70,6 +81,81 @@ type geminiStreamRuntime struct {
|
||||
accumulator *assistantturn.Accumulator
|
||||
contentFilter bool
|
||||
responseMessageID int
|
||||
finalErrorStatus int
|
||||
finalErrorMessage string
|
||||
finalErrorCode string
|
||||
history *responsehistory.Session
|
||||
}
|
||||
|
||||
func (h *Handler) handleStreamGenerateContentWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, historySession *responsehistory.Session) {
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
if historySession != nil {
|
||||
historySession.Error(resp.StatusCode, strings.TrimSpace(string(body)), "error", "", "")
|
||||
}
|
||||
writeGeminiError(w, resp.StatusCode, strings.TrimSpace(string(body)))
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "text/event-stream")
|
||||
w.Header().Set("Cache-Control", "no-cache, no-transform")
|
||||
w.Header().Set("Connection", "keep-alive")
|
||||
w.Header().Set("X-Accel-Buffering", "no")
|
||||
|
||||
rc := http.NewResponseController(w)
|
||||
_, canFlush := w.(http.Flusher)
|
||||
runtime := newGeminiStreamRuntime(w, rc, canFlush, model, finalPrompt, thinkingEnabled, searchEnabled, stripReferenceMarkersEnabled(), toolNames, toolsRaw, historySession)
|
||||
|
||||
completionruntime.ExecuteStreamWithRetry(r.Context(), h.DS, a, resp, payload, pow, completionruntime.StreamRetryOptions{
|
||||
Surface: "gemini.generate_content",
|
||||
Stream: true,
|
||||
RetryEnabled: true,
|
||||
MaxAttempts: 3,
|
||||
UsagePrompt: finalPrompt,
|
||||
}, completionruntime.StreamRetryHooks{
|
||||
ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) {
|
||||
return h.consumeGeminiStreamAttempt(r.Context(), currentResp, runtime, thinkingEnabled, allowDeferEmpty)
|
||||
},
|
||||
Finalize: func(_ int) {
|
||||
runtime.finalize(false)
|
||||
},
|
||||
ParentMessageID: func() int {
|
||||
return runtime.responseMessageID
|
||||
},
|
||||
OnRetryPrompt: func(prompt string) {
|
||||
runtime.finalPrompt = prompt
|
||||
},
|
||||
OnRetryFailure: func(status int, message, _ string) {
|
||||
runtime.sendErrorChunk(status, strings.TrimSpace(message))
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func (h *Handler) consumeGeminiStreamAttempt(ctx context.Context, resp *http.Response, runtime *geminiStreamRuntime, thinkingEnabled bool, allowDeferEmpty bool) (bool, bool) {
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
initialType := "text"
|
||||
if thinkingEnabled {
|
||||
initialType = "thinking"
|
||||
}
|
||||
streamengine.ConsumeSSE(streamengine.ConsumeConfig{
|
||||
Context: ctx,
|
||||
Body: resp.Body,
|
||||
ThinkingEnabled: thinkingEnabled,
|
||||
InitialType: initialType,
|
||||
KeepAliveInterval: time.Duration(dsprotocol.KeepAliveTimeout) * time.Second,
|
||||
IdleTimeout: time.Duration(dsprotocol.StreamIdleTimeout) * time.Second,
|
||||
MaxKeepAliveNoInput: dsprotocol.MaxKeepaliveCount,
|
||||
}, streamengine.ConsumeHooks{
|
||||
OnParsed: runtime.onParsed,
|
||||
OnFinalize: func(_ streamengine.StopReason, _ error) {
|
||||
},
|
||||
})
|
||||
terminalWritten := runtime.finalize(allowDeferEmpty)
|
||||
if terminalWritten {
|
||||
return true, false
|
||||
}
|
||||
return false, true
|
||||
}
|
||||
|
||||
//nolint:unused // retained for native Gemini stream handling path.
|
||||
@@ -84,6 +170,7 @@ func newGeminiStreamRuntime(
|
||||
stripReferenceMarkers bool,
|
||||
toolNames []string,
|
||||
toolsRaw any,
|
||||
history *responsehistory.Session,
|
||||
) *geminiStreamRuntime {
|
||||
return &geminiStreamRuntime{
|
||||
w: w,
|
||||
@@ -97,6 +184,7 @@ func newGeminiStreamRuntime(
|
||||
stripReferenceMarkers: stripReferenceMarkers,
|
||||
toolNames: toolNames,
|
||||
toolsRaw: toolsRaw,
|
||||
history: history,
|
||||
accumulator: assistantturn.NewAccumulator(assistantturn.AccumulatorOptions{
|
||||
ThinkingEnabled: thinkingEnabled,
|
||||
SearchEnabled: searchEnabled,
|
||||
@@ -116,6 +204,35 @@ func (s *geminiStreamRuntime) sendChunk(payload map[string]any) {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *geminiStreamRuntime) sendErrorChunk(status int, message string) {
|
||||
msg := strings.TrimSpace(message)
|
||||
if msg == "" {
|
||||
msg = http.StatusText(status)
|
||||
}
|
||||
errorStatus := "INVALID_ARGUMENT"
|
||||
switch status {
|
||||
case http.StatusUnauthorized:
|
||||
errorStatus = "UNAUTHENTICATED"
|
||||
case http.StatusForbidden:
|
||||
errorStatus = "PERMISSION_DENIED"
|
||||
case http.StatusTooManyRequests:
|
||||
errorStatus = "RESOURCE_EXHAUSTED"
|
||||
case http.StatusNotFound:
|
||||
errorStatus = "NOT_FOUND"
|
||||
default:
|
||||
if status >= 500 {
|
||||
errorStatus = "INTERNAL"
|
||||
}
|
||||
}
|
||||
s.sendChunk(map[string]any{
|
||||
"error": map[string]any{
|
||||
"code": status,
|
||||
"message": msg,
|
||||
"status": errorStatus,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
//nolint:unused // retained for native Gemini stream handling path.
|
||||
func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedDecision {
|
||||
if !parsed.Parsed {
|
||||
@@ -170,11 +287,18 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
|
||||
"modelVersion": s.model,
|
||||
})
|
||||
}
|
||||
if s.history != nil {
|
||||
rawText, text, rawThinking, thinking, detectionThinking := s.accumulator.Snapshot()
|
||||
s.history.Progress(
|
||||
responsehistory.ThinkingForArchive(rawThinking, detectionThinking, thinking),
|
||||
responsehistory.TextForArchive(rawText, text),
|
||||
)
|
||||
}
|
||||
return streamengine.ParsedDecision{ContentSeen: accumulated.ContentSeen}
|
||||
}
|
||||
|
||||
//nolint:unused // retained for native Gemini stream handling path.
|
||||
func (s *geminiStreamRuntime) finalize() {
|
||||
func (s *geminiStreamRuntime) finalize(deferEmptyOutput bool) bool {
|
||||
rawText, text, rawThinking, thinking, detectionThinking := s.accumulator.Snapshot()
|
||||
turn := assistantturn.BuildTurnFromStreamSnapshot(assistantturn.StreamSnapshot{
|
||||
RawText: rawText,
|
||||
@@ -193,6 +317,28 @@ func (s *geminiStreamRuntime) finalize() {
|
||||
ToolsRaw: s.toolsRaw,
|
||||
})
|
||||
outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{})
|
||||
if outcome.ShouldFail {
|
||||
if deferEmptyOutput {
|
||||
s.finalErrorStatus = outcome.Error.Status
|
||||
s.finalErrorMessage = outcome.Error.Message
|
||||
s.finalErrorCode = outcome.Error.Code
|
||||
return false
|
||||
}
|
||||
if s.history != nil {
|
||||
s.history.Error(outcome.Error.Status, outcome.Error.Message, outcome.Error.Code, responsehistory.ThinkingForArchive(turn.RawThinking, turn.DetectionThinking, turn.Thinking), responsehistory.TextForArchive(turn.RawText, turn.Text))
|
||||
}
|
||||
s.sendErrorChunk(outcome.Error.Status, outcome.Error.Message)
|
||||
return true
|
||||
}
|
||||
if s.history != nil {
|
||||
s.history.Success(
|
||||
http.StatusOK,
|
||||
responsehistory.ThinkingForArchive(turn.RawThinking, turn.DetectionThinking, turn.Thinking),
|
||||
responsehistory.TextForArchive(turn.RawText, turn.Text),
|
||||
assistantturn.FinishReason(turn),
|
||||
responsehistory.GenericUsage(turn),
|
||||
)
|
||||
}
|
||||
|
||||
if s.bufferContent {
|
||||
parts := buildGeminiPartsFromTurn(turn)
|
||||
@@ -230,4 +376,5 @@ func (s *geminiStreamRuntime) finalize() {
|
||||
"totalTokenCount": outcome.Usage.TotalTokens,
|
||||
},
|
||||
})
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -7,12 +7,14 @@ import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"ds2api/internal/auth"
|
||||
"ds2api/internal/chathistory"
|
||||
dsclient "ds2api/internal/deepseek/client"
|
||||
)
|
||||
|
||||
@@ -138,10 +140,12 @@ func TestGeminiDirectAppliesCurrentInputFile(t *testing.T) {
|
||||
ds := &testGeminiDS{
|
||||
resp: makeGeminiUpstreamResponse(`data: {"p":"response/content","v":"ok"}`),
|
||||
}
|
||||
historyStore := chathistory.New(filepath.Join(t.TempDir(), "history.json"))
|
||||
h := &Handler{
|
||||
Store: testGeminiConfig{},
|
||||
Auth: testGeminiAuth{},
|
||||
DS: ds,
|
||||
Store: testGeminiConfig{},
|
||||
Auth: testGeminiAuth{},
|
||||
DS: ds,
|
||||
ChatHistory: historyStore,
|
||||
}
|
||||
reqBody := `{"contents":[{"role":"user","parts":[{"text":"hello from gemini"}]}]}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1beta/models/gemini-2.5-pro:generateContent", strings.NewReader(reqBody))
|
||||
@@ -172,6 +176,29 @@ func TestGeminiDirectAppliesCurrentInputFile(t *testing.T) {
|
||||
if !strings.Contains(prompt, "Continue from the latest state in the attached DS2API_HISTORY.txt context.") {
|
||||
t.Fatalf("expected continuation prompt, got %q", prompt)
|
||||
}
|
||||
snapshot, err := historyStore.Snapshot()
|
||||
if err != nil {
|
||||
t.Fatalf("snapshot history: %v", err)
|
||||
}
|
||||
if len(snapshot.Items) != 1 {
|
||||
t.Fatalf("expected one history item, got %d", len(snapshot.Items))
|
||||
}
|
||||
full, err := historyStore.Get(snapshot.Items[0].ID)
|
||||
if err != nil {
|
||||
t.Fatalf("get history item: %v", err)
|
||||
}
|
||||
if full.Surface != "gemini.generate_content" {
|
||||
t.Fatalf("unexpected surface: %q", full.Surface)
|
||||
}
|
||||
if full.Content != "ok" {
|
||||
t.Fatalf("expected raw upstream content, got %q", full.Content)
|
||||
}
|
||||
if full.HistoryText != string(ds.uploadCalls[0].Data) {
|
||||
t.Fatalf("expected uploaded current input file to be persisted in history text")
|
||||
}
|
||||
if len(full.Messages) != 1 || !strings.Contains(full.Messages[0].Content, "Continue from the latest state in the attached DS2API_HISTORY.txt context.") {
|
||||
t.Fatalf("expected persisted message to match upstream continuation prompt, got %#v", full.Messages)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGeminiRoutesRegistered(t *testing.T) {
|
||||
|
||||
58
internal/httpapi/ollama/handler_routes.go
Normal file
58
internal/httpapi/ollama/handler_routes.go
Normal file
@@ -0,0 +1,58 @@
|
||||
package ollama
|
||||
|
||||
import (
|
||||
"ds2api/internal/config"
|
||||
"ds2api/internal/util"
|
||||
"encoding/json"
|
||||
"github.com/go-chi/chi/v5"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
var WriteJSON = util.WriteJSON
|
||||
|
||||
type ConfigReader interface {
|
||||
ModelAliases() map[string]string
|
||||
}
|
||||
|
||||
type Handler struct {
|
||||
Store ConfigReader
|
||||
}
|
||||
|
||||
type OllamaModelRequest struct {
|
||||
Model string `json:"model"`
|
||||
}
|
||||
|
||||
func RegisterRoutes(r chi.Router, h *Handler) {
|
||||
r.Get("/api/version", h.GetVersion)
|
||||
r.Get("/api/tags", h.ListOllamaModels)
|
||||
r.Post("/api/show", h.GetOllamaModel)
|
||||
}
|
||||
|
||||
func (h *Handler) GetVersion(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte(`{"version":"0.23.1"}`))
|
||||
}
|
||||
func (h *Handler) ListOllamaModels(w http.ResponseWriter, r *http.Request) {
|
||||
WriteJSON(w, http.StatusOK, config.OllamaModelsResponse())
|
||||
}
|
||||
func (h *Handler) GetOllamaModel(w http.ResponseWriter, r *http.Request) {
|
||||
var payload OllamaModelRequest
|
||||
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
|
||||
http.Error(w, "Invalid JSON body: "+err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if err := r.Body.Close(); err != nil {
|
||||
slog.Warn("[ollama] failed to close request body", "error", err)
|
||||
}
|
||||
}()
|
||||
modelID := payload.Model
|
||||
model, ok := config.OllamaModelByID(h.Store, modelID)
|
||||
if !ok {
|
||||
http.Error(w, "Model not found.", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
WriteJSON(w, http.StatusOK, model)
|
||||
}
|
||||
127
internal/httpapi/ollama/handler_routes_test.go
Normal file
127
internal/httpapi/ollama/handler_routes_test.go
Normal file
@@ -0,0 +1,127 @@
|
||||
package ollama
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"github.com/go-chi/chi/v5"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type ollamaTestSurface struct {
|
||||
Store ConfigReader
|
||||
handler *Handler
|
||||
}
|
||||
|
||||
func (h *ollamaTestSurface) apiHandler() *Handler {
|
||||
if h.handler == nil {
|
||||
h.handler = &Handler{Store: h.Store}
|
||||
}
|
||||
return h.handler
|
||||
}
|
||||
|
||||
func registerOllamaTestRoutes(r chi.Router, h *ollamaTestSurface) {
|
||||
r.Get("/api/version", h.apiHandler().GetVersion)
|
||||
r.Get("/api/tags", h.apiHandler().ListOllamaModels)
|
||||
r.Post("/api/show", h.apiHandler().GetOllamaModel)
|
||||
}
|
||||
|
||||
func TestGetOllamaVersionRoute(t *testing.T) {
|
||||
h := &ollamaTestSurface{}
|
||||
r := chi.NewRouter()
|
||||
registerOllamaTestRoutes(r, h)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/version", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
r.ServeHTTP(rec, req)
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetOllamaModelsRoute(t *testing.T) {
|
||||
h := &ollamaTestSurface{}
|
||||
r := chi.NewRouter()
|
||||
registerOllamaTestRoutes(r, h)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/tags", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
r.ServeHTTP(rec, req)
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetOllamaModelRoute(t *testing.T) {
|
||||
h := &ollamaTestSurface{}
|
||||
r := chi.NewRouter()
|
||||
registerOllamaTestRoutes(r, h)
|
||||
|
||||
t.Run("direct", func(t *testing.T) {
|
||||
body := `{"model":"deepseek-v4-flash"}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/show", strings.NewReader(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
r.ServeHTTP(rec, req)
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
var payload map[string]any
|
||||
if err := json.Unmarshal(rec.Body.Bytes(), &payload); err != nil {
|
||||
t.Fatalf("expected valid json body, got err=%v body=%s", err, rec.Body.String())
|
||||
}
|
||||
if _, ok := payload["id"]; !ok {
|
||||
t.Fatalf("expected response has lowercase id field, body=%s", rec.Body.String())
|
||||
}
|
||||
if _, ok := payload["ID"]; ok {
|
||||
t.Fatalf("expected response does not expose uppercase ID field, body=%s", rec.Body.String())
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("direct_nothinking", func(t *testing.T) {
|
||||
body := `{"model":"deepseek-v4-flash-nothinking"}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/show", strings.NewReader(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
r.ServeHTTP(rec, req)
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("direct_expert", func(t *testing.T) {
|
||||
body := `{"model":"deepseek-v4-pro"}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/show", strings.NewReader(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
r.ServeHTTP(rec, req)
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("direct_vision", func(t *testing.T) {
|
||||
body := `{"model":"deepseek-v4-vision"}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/show", strings.NewReader(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
r.ServeHTTP(rec, req)
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestGetOllamaModelRouteNotFound(t *testing.T) {
|
||||
h := &ollamaTestSurface{}
|
||||
r := chi.NewRouter()
|
||||
registerOllamaTestRoutes(r, h)
|
||||
|
||||
body := `{"model":"not-exists"}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/show", strings.NewReader(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
r.ServeHTTP(rec, req)
|
||||
if rec.Code != http.StatusNotFound {
|
||||
t.Fatalf("expected 404, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
}
|
||||
@@ -14,9 +14,6 @@ import (
|
||||
"ds2api/internal/promptcompat"
|
||||
)
|
||||
|
||||
const adminWebUISourceHeader = "X-Ds2-Source"
|
||||
const adminWebUISourceValue = "admin-webui-api-tester"
|
||||
|
||||
type chatHistorySession struct {
|
||||
store *chathistory.Store
|
||||
entryID string
|
||||
@@ -40,6 +37,7 @@ func startChatHistory(store *chathistory.Store, r *http.Request, a *auth.Request
|
||||
entry, err := store.Start(chathistory.StartParams{
|
||||
CallerID: strings.TrimSpace(a.CallerID),
|
||||
AccountID: strings.TrimSpace(a.AccountID),
|
||||
Surface: "openai.chat_completions",
|
||||
Model: strings.TrimSpace(stdReq.ResponseModel),
|
||||
Stream: stdReq.Stream,
|
||||
UserInput: extractSingleUserInput(stdReq.Messages),
|
||||
@@ -50,6 +48,7 @@ func startChatHistory(store *chathistory.Store, r *http.Request, a *auth.Request
|
||||
startParams := chathistory.StartParams{
|
||||
CallerID: strings.TrimSpace(a.CallerID),
|
||||
AccountID: strings.TrimSpace(a.AccountID),
|
||||
Surface: "openai.chat_completions",
|
||||
Model: strings.TrimSpace(stdReq.ResponseModel),
|
||||
Stream: stdReq.Stream,
|
||||
UserInput: extractSingleUserInput(stdReq.Messages),
|
||||
@@ -82,7 +81,7 @@ func shouldCaptureChatHistory(r *http.Request) bool {
|
||||
if isVercelStreamPrepareRequest(r) || isVercelStreamReleaseRequest(r) {
|
||||
return false
|
||||
}
|
||||
return strings.TrimSpace(r.Header.Get(adminWebUISourceHeader)) != adminWebUISourceValue
|
||||
return true
|
||||
}
|
||||
|
||||
func extractSingleUserInput(messages []any) string {
|
||||
@@ -188,6 +187,23 @@ func (s *chatHistorySession) stopped(thinking, content, finishReason string) {
|
||||
})
|
||||
}
|
||||
|
||||
func historyTextForArchive(raw, visible string) string {
|
||||
if strings.TrimSpace(raw) != "" {
|
||||
return raw
|
||||
}
|
||||
return visible
|
||||
}
|
||||
|
||||
func historyThinkingForArchive(raw, detection, visible string) string {
|
||||
if strings.TrimSpace(raw) != "" {
|
||||
return raw
|
||||
}
|
||||
if strings.TrimSpace(detection) != "" {
|
||||
return detection
|
||||
}
|
||||
return visible
|
||||
}
|
||||
|
||||
func (s *chatHistorySession) retryMissingEntry() bool {
|
||||
if s == nil || s.store == nil || s.disabled {
|
||||
return false
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
@@ -102,6 +103,86 @@ func TestChatCompletionsNonStreamPersistsHistory(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestChatHistoryNonStreamArchivesRawToolCallMarkup(t *testing.T) {
|
||||
historyStore := newTestChatHistoryStore(t)
|
||||
entry, err := historyStore.Start(chathistory.StartParams{
|
||||
CallerID: "caller:test",
|
||||
Model: "deepseek-v4-flash",
|
||||
UserInput: "call tool",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start history failed: %v", err)
|
||||
}
|
||||
session := &chatHistorySession{
|
||||
store: historyStore,
|
||||
entryID: entry.ID,
|
||||
startedAt: time.Now(),
|
||||
lastPersist: time.Now().Add(-time.Second),
|
||||
finalPrompt: "call tool",
|
||||
}
|
||||
rawToolCall := `<tool_calls><invoke name="search"><parameter name="q">golang</parameter></invoke></tool_calls>`
|
||||
|
||||
h := &Handler{}
|
||||
rec := httptest.NewRecorder()
|
||||
resp := makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":`+strconv.Quote(rawToolCall)+`}`, `data: [DONE]`)
|
||||
h.handleNonStream(rec, resp, "cid-tool-history", "deepseek-v4-flash", "prompt", 0, false, false, []string{"search"}, nil, session)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
full, err := historyStore.Get(entry.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("get detail failed: %v", err)
|
||||
}
|
||||
if full.Content != rawToolCall {
|
||||
t.Fatalf("expected raw tool markup archived, got %q", full.Content)
|
||||
}
|
||||
if full.FinishReason != "tool_calls" {
|
||||
t.Fatalf("expected tool_calls finish reason, got %#v", full.FinishReason)
|
||||
}
|
||||
}
|
||||
|
||||
func TestChatHistoryStreamArchivesRawToolCallMarkup(t *testing.T) {
|
||||
historyStore := newTestChatHistoryStore(t)
|
||||
entry, err := historyStore.Start(chathistory.StartParams{
|
||||
CallerID: "caller:test",
|
||||
Model: "deepseek-v4-flash",
|
||||
Stream: true,
|
||||
UserInput: "call tool",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("start history failed: %v", err)
|
||||
}
|
||||
session := &chatHistorySession{
|
||||
store: historyStore,
|
||||
entryID: entry.ID,
|
||||
startedAt: time.Now(),
|
||||
lastPersist: time.Now().Add(-time.Second),
|
||||
finalPrompt: "call tool",
|
||||
}
|
||||
rawToolCall := `<tool_calls><invoke name="search"><parameter name="q">golang</parameter></invoke></tool_calls>`
|
||||
|
||||
h := &Handler{}
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
resp := makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":`+strconv.Quote(rawToolCall)+`}`, `data: [DONE]`)
|
||||
h.handleStream(rec, req, resp, "cid-stream-tool-history", "deepseek-v4-flash", "prompt", 0, false, false, []string{"search"}, nil, session)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
full, err := historyStore.Get(entry.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("get detail failed: %v", err)
|
||||
}
|
||||
if full.Content != rawToolCall {
|
||||
t.Fatalf("expected raw streamed tool markup archived, got %q", full.Content)
|
||||
}
|
||||
if full.FinishReason != "tool_calls" {
|
||||
t.Fatalf("expected tool_calls finish reason, got %#v", full.FinishReason)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStartChatHistoryRecoversFromTransientWriteFailure(t *testing.T) {
|
||||
historyStore := newTestChatHistoryStore(t)
|
||||
restore := blockChatHistoryDetailDir(t, historyStore.DetailDir())
|
||||
@@ -213,7 +294,7 @@ func TestHandleStreamContextCancelledMarksHistoryStopped(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestChatCompletionsSkipsAdminWebUISource(t *testing.T) {
|
||||
func TestChatCompletionsRecordsAdminWebUISource(t *testing.T) {
|
||||
historyStore := newTestChatHistoryStore(t)
|
||||
h := &Handler{
|
||||
Store: mockOpenAIConfig{},
|
||||
@@ -226,7 +307,7 @@ func TestChatCompletionsSkipsAdminWebUISource(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody))
|
||||
req.Header.Set("Authorization", "Bearer direct-token")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set(adminWebUISourceHeader, adminWebUISourceValue)
|
||||
req.Header.Set("X-Ds2-Source", "admin-webui-api-tester")
|
||||
rec := httptest.NewRecorder()
|
||||
h.ChatCompletions(rec, req)
|
||||
|
||||
@@ -237,8 +318,8 @@ func TestChatCompletionsSkipsAdminWebUISource(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("snapshot failed: %v", err)
|
||||
}
|
||||
if len(snapshot.Items) != 0 {
|
||||
t.Fatalf("expected admin webui source to be skipped, got %#v", snapshot.Items)
|
||||
if len(snapshot.Items) != 1 {
|
||||
t.Fatalf("expected admin webui source to be recorded, got %#v", snapshot.Items)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -127,13 +127,7 @@ func (s *chatStreamRuntime) sendKeepAlive() {
|
||||
return
|
||||
}
|
||||
_, _ = s.w.Write([]byte(": keep-alive\n\n"))
|
||||
s.sendChunk(openaifmt.BuildChatStreamChunk(
|
||||
s.completionID,
|
||||
s.created,
|
||||
s.model,
|
||||
[]map[string]any{},
|
||||
nil,
|
||||
))
|
||||
_ = s.rc.Flush()
|
||||
}
|
||||
|
||||
func (s *chatStreamRuntime) sendChunk(v any) {
|
||||
@@ -195,6 +189,24 @@ func (s *chatStreamRuntime) markContextCancelled() {
|
||||
s.finalFinishReason = string(streamengine.StopReasonContextCancelled)
|
||||
}
|
||||
|
||||
func (s *chatStreamRuntime) historyText() string {
|
||||
if s == nil {
|
||||
return ""
|
||||
}
|
||||
return historyTextForArchive(s.accumulator.RawText.String(), s.finalText)
|
||||
}
|
||||
|
||||
func (s *chatStreamRuntime) historyThinking() string {
|
||||
if s == nil {
|
||||
return ""
|
||||
}
|
||||
return historyThinkingForArchive(
|
||||
s.accumulator.RawThinking.String(),
|
||||
s.accumulator.ToolDetectionThinking.String(),
|
||||
s.finalThinking,
|
||||
)
|
||||
}
|
||||
|
||||
func (s *chatStreamRuntime) resetStreamToolCallState() {
|
||||
s.streamToolCallIDs = map[int]string{}
|
||||
s.streamToolNames = map[int]string{}
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"ds2api/internal/promptcompat"
|
||||
)
|
||||
|
||||
func TestChatStreamKeepAliveEmitsEmptyChoiceDataFrame(t *testing.T) {
|
||||
func TestChatStreamKeepAliveUsesCommentOnly(t *testing.T) {
|
||||
rec := httptest.NewRecorder()
|
||||
runtime := newChatStreamRuntime(
|
||||
rec,
|
||||
@@ -40,18 +40,8 @@ func TestChatStreamKeepAliveEmitsEmptyChoiceDataFrame(t *testing.T) {
|
||||
if done {
|
||||
t.Fatalf("keep-alive must not emit [DONE], body=%q", body)
|
||||
}
|
||||
if len(frames) != 1 {
|
||||
t.Fatalf("expected one data frame, got %d body=%q", len(frames), body)
|
||||
}
|
||||
if got := asString(frames[0]["id"]); got != "chatcmpl-test" {
|
||||
t.Fatalf("expected completion id to be preserved, got %q", got)
|
||||
}
|
||||
if got := asString(frames[0]["object"]); got != "chat.completion.chunk" {
|
||||
t.Fatalf("expected chat chunk object, got %q", got)
|
||||
}
|
||||
choices, _ := frames[0]["choices"].([]any)
|
||||
if len(choices) != 0 {
|
||||
t.Fatalf("expected empty choices heartbeat, got %#v", choices)
|
||||
if len(frames) != 0 {
|
||||
t.Fatalf("keep-alive must not emit JSON data frames, got %#v body=%q", frames, body)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,11 +4,11 @@ import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"ds2api/internal/assistantturn"
|
||||
"ds2api/internal/auth"
|
||||
"ds2api/internal/completionruntime"
|
||||
"ds2api/internal/config"
|
||||
dsprotocol "ds2api/internal/deepseek/protocol"
|
||||
openaifmt "ds2api/internal/format/openai"
|
||||
@@ -17,184 +17,94 @@ import (
|
||||
streamengine "ds2api/internal/stream"
|
||||
)
|
||||
|
||||
type chatNonStreamResult struct {
|
||||
rawThinking string
|
||||
rawText string
|
||||
thinking string
|
||||
toolDetectionThinking string
|
||||
text string
|
||||
contentFilter bool
|
||||
detectedCalls int
|
||||
body map[string]any
|
||||
finishReason string
|
||||
responseMessageID int
|
||||
outputError *assistantturn.OutputError
|
||||
}
|
||||
|
||||
func (h *Handler) handleNonStreamWithRetry(w http.ResponseWriter, ctx context.Context, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, historySession *chatHistorySession) {
|
||||
attempts := 0
|
||||
currentResp := resp
|
||||
usagePrompt := finalPrompt
|
||||
accumulatedThinking := ""
|
||||
accumulatedRawThinking := ""
|
||||
accumulatedToolDetectionThinking := ""
|
||||
for {
|
||||
result, ok := h.collectChatNonStreamAttempt(w, currentResp, completionID, model, usagePrompt, thinkingEnabled, searchEnabled, toolNames, toolsRaw)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
accumulatedThinking += sse.TrimContinuationOverlap(accumulatedThinking, result.thinking)
|
||||
accumulatedRawThinking += sse.TrimContinuationOverlap(accumulatedRawThinking, result.rawThinking)
|
||||
accumulatedToolDetectionThinking += sse.TrimContinuationOverlap(accumulatedToolDetectionThinking, result.toolDetectionThinking)
|
||||
result.thinking = accumulatedThinking
|
||||
result.rawThinking = accumulatedRawThinking
|
||||
result.toolDetectionThinking = accumulatedToolDetectionThinking
|
||||
detected := detectAssistantToolCalls(result.rawText, result.text, result.rawThinking, result.toolDetectionThinking, toolNames)
|
||||
result.detectedCalls = len(detected.Calls)
|
||||
result.body = openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, result.thinking, result.text, detected.Calls, toolsRaw)
|
||||
addRefFileTokensToUsage(result.body, refFileTokens)
|
||||
result.finishReason = chatFinishReason(result.body)
|
||||
if !shouldRetryChatNonStream(result, attempts) {
|
||||
h.finishChatNonStreamResult(w, result, attempts, usagePrompt, refFileTokens, historySession)
|
||||
return
|
||||
}
|
||||
|
||||
attempts++
|
||||
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "parent_message_id", result.responseMessageID)
|
||||
retryPow, powErr := h.DS.GetPow(ctx, a, 3)
|
||||
if powErr != nil {
|
||||
config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "error", powErr)
|
||||
retryPow = pow
|
||||
}
|
||||
retryPayload := clonePayloadForEmptyOutputRetry(payload, result.responseMessageID)
|
||||
nextResp, err := h.DS.CallCompletion(ctx, a, retryPayload, retryPow, 3)
|
||||
if err != nil {
|
||||
if historySession != nil {
|
||||
historySession.error(http.StatusInternalServerError, "Failed to get completion.", "error", result.thinking, result.text)
|
||||
}
|
||||
writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.")
|
||||
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "chat.completions", "stream", false, "retry_attempt", attempts, "error", err)
|
||||
return
|
||||
}
|
||||
usagePrompt = usagePromptWithEmptyOutputRetry(usagePrompt, attempts)
|
||||
currentResp = nextResp
|
||||
}
|
||||
}
|
||||
|
||||
func (h *Handler) collectChatNonStreamAttempt(w http.ResponseWriter, resp *http.Response, completionID, model, usagePrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any) (chatNonStreamResult, bool) {
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
writeOpenAIError(w, resp.StatusCode, string(body))
|
||||
return chatNonStreamResult{}, false
|
||||
}
|
||||
result := sse.CollectStream(resp, thinkingEnabled, true)
|
||||
turn := assistantturn.BuildTurnFromCollected(result, assistantturn.BuildOptions{
|
||||
Model: model,
|
||||
Prompt: usagePrompt,
|
||||
SearchEnabled: searchEnabled,
|
||||
StripReferenceMarkers: stripReferenceMarkersEnabled(),
|
||||
ToolNames: toolNames,
|
||||
ToolsRaw: toolsRaw,
|
||||
})
|
||||
respBody := openaifmt.BuildChatCompletionWithToolCalls(completionID, model, usagePrompt, turn.Thinking, turn.Text, turn.ToolCalls, toolsRaw)
|
||||
return chatNonStreamResult{
|
||||
rawThinking: result.Thinking,
|
||||
rawText: result.Text,
|
||||
thinking: turn.Thinking,
|
||||
toolDetectionThinking: result.ToolDetectionThinking,
|
||||
text: turn.Text,
|
||||
contentFilter: result.ContentFilter,
|
||||
detectedCalls: len(turn.ToolCalls),
|
||||
body: respBody,
|
||||
finishReason: chatFinishReason(respBody),
|
||||
responseMessageID: result.ResponseMessageID,
|
||||
outputError: turn.Error,
|
||||
}, true
|
||||
}
|
||||
|
||||
func (h *Handler) finishChatNonStreamResult(w http.ResponseWriter, result chatNonStreamResult, attempts int, usagePrompt string, refFileTokens int, historySession *chatHistorySession) {
|
||||
if result.detectedCalls == 0 && strings.TrimSpace(result.text) == "" {
|
||||
status, message, code := upstreamEmptyOutputDetail(result.contentFilter, result.text, result.thinking)
|
||||
if result.outputError != nil {
|
||||
status, message, code = result.outputError.Status, result.outputError.Message, result.outputError.Code
|
||||
}
|
||||
if historySession != nil {
|
||||
historySession.error(status, message, code, result.thinking, result.text)
|
||||
historySession.error(resp.StatusCode, string(body), "error", "", "")
|
||||
}
|
||||
writeOpenAIErrorWithCode(w, status, message, code)
|
||||
config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "chat.completions", "stream", false, "retry_attempts", attempts, "success_source", "none", "content_filter", result.contentFilter)
|
||||
writeOpenAIError(w, resp.StatusCode, string(body))
|
||||
return
|
||||
}
|
||||
if historySession != nil {
|
||||
historySession.success(http.StatusOK, result.thinking, result.text, result.finishReason, openaifmt.BuildChatUsageForModel("", usagePrompt, result.thinking, result.text, refFileTokens))
|
||||
stdReq := promptcompat.StandardRequest{
|
||||
Surface: "chat.completions",
|
||||
ResponseModel: model,
|
||||
PromptTokenText: finalPrompt,
|
||||
FinalPrompt: finalPrompt,
|
||||
RefFileTokens: refFileTokens,
|
||||
Thinking: thinkingEnabled,
|
||||
Search: searchEnabled,
|
||||
ToolNames: toolNames,
|
||||
ToolsRaw: toolsRaw,
|
||||
ToolChoice: promptcompat.DefaultToolChoicePolicy(),
|
||||
}
|
||||
writeJSON(w, http.StatusOK, result.body)
|
||||
source := "first_attempt"
|
||||
if attempts > 0 {
|
||||
source = "synthetic_retry"
|
||||
}
|
||||
config.Logger.Info("[openai_empty_retry] completed", "surface", "chat.completions", "stream", false, "retry_attempts", attempts, "success_source", source)
|
||||
}
|
||||
|
||||
func chatFinishReason(respBody map[string]any) string {
|
||||
if choices, ok := respBody["choices"].([]map[string]any); ok && len(choices) > 0 {
|
||||
if fr, _ := choices[0]["finish_reason"].(string); strings.TrimSpace(fr) != "" {
|
||||
return fr
|
||||
retryEnabled := h != nil && h.DS != nil && emptyOutputRetryEnabled()
|
||||
result, outErr := completionruntime.ExecuteNonStreamStartedWithRetry(ctx, h.DS, a, completionruntime.StartResult{
|
||||
SessionID: completionID,
|
||||
Payload: payload,
|
||||
Pow: pow,
|
||||
Response: resp,
|
||||
Request: stdReq,
|
||||
}, completionruntime.Options{
|
||||
RetryEnabled: retryEnabled,
|
||||
RetryMaxAttempts: emptyOutputRetryMaxAttempts(),
|
||||
})
|
||||
if outErr != nil {
|
||||
if historySession != nil {
|
||||
historySession.error(outErr.Status, outErr.Message, outErr.Code, historyThinkingForArchive(result.Turn.RawThinking, result.Turn.DetectionThinking, result.Turn.Thinking), historyTextForArchive(result.Turn.RawText, result.Turn.Text))
|
||||
}
|
||||
writeOpenAIErrorWithCode(w, outErr.Status, outErr.Message, outErr.Code)
|
||||
return
|
||||
}
|
||||
return "stop"
|
||||
respBody := openaifmt.BuildChatCompletionWithToolCalls(result.SessionID, model, result.Turn.Prompt, result.Turn.Thinking, result.Turn.Text, result.Turn.ToolCalls, toolsRaw)
|
||||
respBody["usage"] = assistantturn.OpenAIChatUsage(result.Turn)
|
||||
outcome := assistantturn.FinalizeTurn(result.Turn, assistantturn.FinalizeOptions{})
|
||||
if historySession != nil {
|
||||
historySession.success(http.StatusOK, historyThinkingForArchive(result.Turn.RawThinking, result.Turn.DetectionThinking, result.Turn.Thinking), historyTextForArchive(result.Turn.RawText, result.Turn.Text), outcome.FinishReason, assistantturn.OpenAIChatUsage(result.Turn))
|
||||
}
|
||||
writeJSON(w, http.StatusOK, respBody)
|
||||
}
|
||||
|
||||
func shouldRetryChatNonStream(result chatNonStreamResult, attempts int) bool {
|
||||
return emptyOutputRetryEnabled() &&
|
||||
attempts < emptyOutputRetryMaxAttempts() &&
|
||||
!result.contentFilter &&
|
||||
result.detectedCalls == 0 &&
|
||||
strings.TrimSpace(result.text) == ""
|
||||
}
|
||||
|
||||
func (h *Handler) handleStreamWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, historySession *chatHistorySession) {
|
||||
func (h *Handler) handleStreamWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID string, sessionIDRef *string, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, historySession *chatHistorySession) {
|
||||
streamRuntime, initialType, ok := h.prepareChatStreamRuntime(w, resp, completionID, model, finalPrompt, refFileTokens, thinkingEnabled, searchEnabled, toolNames, toolsRaw, toolChoice, historySession)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
attempts := 0
|
||||
currentResp := resp
|
||||
for {
|
||||
terminalWritten, retryable := h.consumeChatStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, historySession, attempts < emptyOutputRetryMaxAttempts())
|
||||
if terminalWritten {
|
||||
logChatStreamTerminal(streamRuntime, attempts)
|
||||
return
|
||||
}
|
||||
if !retryable || !emptyOutputRetryEnabled() || attempts >= emptyOutputRetryMaxAttempts() {
|
||||
completionruntime.ExecuteStreamWithRetry(r.Context(), h.DS, a, resp, payload, pow, completionruntime.StreamRetryOptions{
|
||||
Surface: "chat.completions",
|
||||
Stream: true,
|
||||
RetryEnabled: emptyOutputRetryEnabled(),
|
||||
RetryMaxAttempts: emptyOutputRetryMaxAttempts(),
|
||||
MaxAttempts: 3,
|
||||
UsagePrompt: finalPrompt,
|
||||
}, completionruntime.StreamRetryHooks{
|
||||
ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) {
|
||||
return h.consumeChatStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, historySession, allowDeferEmpty)
|
||||
},
|
||||
Finalize: func(attempts int) {
|
||||
streamRuntime.finalize("stop", false)
|
||||
recordChatStreamHistory(streamRuntime, historySession)
|
||||
config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "chat.completions", "stream", true, "retry_attempts", attempts, "success_source", "none")
|
||||
return
|
||||
}
|
||||
attempts++
|
||||
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "parent_message_id", streamRuntime.responseMessageID)
|
||||
retryPow, powErr := h.DS.GetPow(r.Context(), a, 3)
|
||||
if powErr != nil {
|
||||
config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "error", powErr)
|
||||
retryPow = pow
|
||||
}
|
||||
nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadForEmptyOutputRetry(payload, streamRuntime.responseMessageID), retryPow, 3)
|
||||
if err != nil {
|
||||
failChatStreamRetry(streamRuntime, historySession, http.StatusInternalServerError, "Failed to get completion.", "error")
|
||||
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "chat.completions", "stream", true, "retry_attempt", attempts, "error", err)
|
||||
return
|
||||
}
|
||||
if nextResp.StatusCode != http.StatusOK {
|
||||
defer func() { _ = nextResp.Body.Close() }()
|
||||
body, _ := io.ReadAll(nextResp.Body)
|
||||
failChatStreamRetry(streamRuntime, historySession, nextResp.StatusCode, string(body), "error")
|
||||
return
|
||||
}
|
||||
streamRuntime.finalPrompt = usagePromptWithEmptyOutputRetry(finalPrompt, attempts)
|
||||
currentResp = nextResp
|
||||
}
|
||||
},
|
||||
ParentMessageID: func() int {
|
||||
return streamRuntime.responseMessageID
|
||||
},
|
||||
OnRetryPrompt: func(prompt string) {
|
||||
streamRuntime.finalPrompt = prompt
|
||||
},
|
||||
OnRetryFailure: func(status int, message, code string) {
|
||||
failChatStreamRetry(streamRuntime, historySession, status, message, code)
|
||||
},
|
||||
OnAccountSwitch: func(sessionID string) {
|
||||
if sessionIDRef != nil {
|
||||
*sessionIDRef = sessionID
|
||||
}
|
||||
},
|
||||
OnTerminal: func(attempts int) {
|
||||
logChatStreamTerminal(streamRuntime, attempts)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func (h *Handler) prepareChatStreamRuntime(w http.ResponseWriter, resp *http.Response, completionID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, historySession *chatHistorySession) (*chatStreamRuntime, string, bool) {
|
||||
@@ -246,7 +156,7 @@ func (h *Handler) consumeChatStreamAttempt(r *http.Request, resp *http.Response,
|
||||
OnParsed: func(parsed sse.LineResult) streamengine.ParsedDecision {
|
||||
decision := streamRuntime.onParsed(parsed)
|
||||
if historySession != nil {
|
||||
historySession.progress(streamRuntime.accumulator.Thinking.String(), streamRuntime.accumulator.Text.String())
|
||||
historySession.progress(streamRuntime.historyThinking(), streamRuntime.historyText())
|
||||
}
|
||||
return decision
|
||||
},
|
||||
@@ -258,7 +168,7 @@ func (h *Handler) consumeChatStreamAttempt(r *http.Request, resp *http.Response,
|
||||
OnContextDone: func() {
|
||||
streamRuntime.markContextCancelled()
|
||||
if historySession != nil {
|
||||
historySession.stopped(streamRuntime.accumulator.Thinking.String(), streamRuntime.accumulator.Text.String(), string(streamengine.StopReasonContextCancelled))
|
||||
historySession.stopped(streamRuntime.historyThinking(), streamRuntime.historyText(), string(streamengine.StopReasonContextCancelled))
|
||||
}
|
||||
},
|
||||
})
|
||||
@@ -278,16 +188,16 @@ func recordChatStreamHistory(streamRuntime *chatStreamRuntime, historySession *c
|
||||
return
|
||||
}
|
||||
if streamRuntime.finalErrorMessage != "" {
|
||||
historySession.error(streamRuntime.finalErrorStatus, streamRuntime.finalErrorMessage, streamRuntime.finalErrorCode, streamRuntime.accumulator.Thinking.String(), streamRuntime.accumulator.Text.String())
|
||||
historySession.error(streamRuntime.finalErrorStatus, streamRuntime.finalErrorMessage, streamRuntime.finalErrorCode, streamRuntime.historyThinking(), streamRuntime.historyText())
|
||||
return
|
||||
}
|
||||
historySession.success(http.StatusOK, streamRuntime.finalThinking, streamRuntime.finalText, streamRuntime.finalFinishReason, streamRuntime.finalUsage)
|
||||
historySession.success(http.StatusOK, streamRuntime.historyThinking(), streamRuntime.historyText(), streamRuntime.finalFinishReason, streamRuntime.finalUsage)
|
||||
}
|
||||
|
||||
func failChatStreamRetry(streamRuntime *chatStreamRuntime, historySession *chatHistorySession, status int, message, code string) {
|
||||
streamRuntime.sendFailedChunk(status, message, code)
|
||||
if historySession != nil {
|
||||
historySession.error(status, message, code, streamRuntime.accumulator.Thinking.String(), streamRuntime.accumulator.Text.String())
|
||||
historySession.error(status, message, code, streamRuntime.historyThinking(), streamRuntime.historyText())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -106,10 +106,6 @@ func cleanVisibleOutput(text string, stripReferenceMarkers bool) string {
|
||||
return shared.CleanVisibleOutput(text, stripReferenceMarkers)
|
||||
}
|
||||
|
||||
func upstreamEmptyOutputDetail(contentFilter bool, text, thinking string) (int, string, string) {
|
||||
return shared.UpstreamEmptyOutputDetail(contentFilter, text, thinking)
|
||||
}
|
||||
|
||||
func emptyOutputRetryEnabled() bool {
|
||||
return shared.EmptyOutputRetryEnabled()
|
||||
}
|
||||
@@ -118,14 +114,6 @@ func emptyOutputRetryMaxAttempts() int {
|
||||
return shared.EmptyOutputRetryMaxAttempts()
|
||||
}
|
||||
|
||||
func clonePayloadForEmptyOutputRetry(payload map[string]any, parentMessageID int) map[string]any {
|
||||
return shared.ClonePayloadForEmptyOutputRetry(payload, parentMessageID)
|
||||
}
|
||||
|
||||
func usagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string {
|
||||
return shared.UsagePromptWithEmptyOutputRetry(originalPrompt, retryAttempts)
|
||||
}
|
||||
|
||||
func formatIncrementalStreamToolCallDeltas(deltas []toolstream.ToolCallDelta, ids map[int]string) []map[string]any {
|
||||
return shared.FormatIncrementalStreamToolCallDeltas(deltas, ids)
|
||||
}
|
||||
@@ -137,7 +125,3 @@ func filterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta,
|
||||
func formatFinalStreamToolCallsWithStableIDs(calls []toolcall.ParsedToolCall, ids map[int]string, toolsRaw any) []map[string]any {
|
||||
return shared.FormatFinalStreamToolCallsWithStableIDs(calls, ids, toolsRaw)
|
||||
}
|
||||
|
||||
func detectAssistantToolCalls(rawText, visibleText, exposedThinking, detectionThinking string, toolNames []string) toolcall.ToolCallParseResult {
|
||||
return shared.DetectAssistantToolCalls(rawText, visibleText, exposedThinking, detectionThinking, toolNames)
|
||||
}
|
||||
|
||||
@@ -80,14 +80,13 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
if !stdReq.Stream {
|
||||
result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{
|
||||
StripReferenceMarkers: stripReferenceMarkersEnabled(),
|
||||
RetryEnabled: true,
|
||||
CurrentInputFile: h.Store,
|
||||
RetryEnabled: true,
|
||||
CurrentInputFile: h.Store,
|
||||
})
|
||||
sessionID = result.SessionID
|
||||
if outErr != nil {
|
||||
if historySession != nil {
|
||||
historySession.error(outErr.Status, outErr.Message, outErr.Code, result.Turn.Thinking, result.Turn.Text)
|
||||
historySession.error(outErr.Status, outErr.Message, outErr.Code, historyThinkingForArchive(result.Turn.RawThinking, result.Turn.DetectionThinking, result.Turn.Thinking), historyTextForArchive(result.Turn.RawText, result.Turn.Text))
|
||||
}
|
||||
writeOpenAIErrorWithCode(w, outErr.Status, outErr.Message, outErr.Code)
|
||||
return
|
||||
@@ -96,7 +95,7 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
|
||||
respBody["usage"] = assistantturn.OpenAIChatUsage(result.Turn)
|
||||
finishReason := assistantturn.FinalizeTurn(result.Turn, assistantturn.FinalizeOptions{}).FinishReason
|
||||
if historySession != nil {
|
||||
historySession.success(http.StatusOK, result.Turn.Thinking, result.Turn.Text, finishReason, assistantturn.OpenAIChatUsage(result.Turn))
|
||||
historySession.success(http.StatusOK, historyThinkingForArchive(result.Turn.RawThinking, result.Turn.DetectionThinking, result.Turn.Thinking), historyTextForArchive(result.Turn.RawText, result.Turn.Text), finishReason, assistantturn.OpenAIChatUsage(result.Turn))
|
||||
}
|
||||
writeJSON(w, http.StatusOK, respBody)
|
||||
return
|
||||
@@ -115,7 +114,7 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
streamReq := start.Request
|
||||
refFileTokens := streamReq.RefFileTokens
|
||||
h.handleStreamWithRetry(w, r, a, start.Response, start.Payload, start.Pow, sessionID, streamReq.ResponseModel, streamReq.PromptTokenText, refFileTokens, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, streamReq.ToolChoice, historySession)
|
||||
h.handleStreamWithRetry(w, r, a, start.Response, start.Payload, start.Pow, sessionID, &sessionID, streamReq.ResponseModel, streamReq.PromptTokenText, refFileTokens, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, streamReq.ToolChoice, historySession)
|
||||
}
|
||||
|
||||
func (h *Handler) autoDeleteRemoteSession(ctx context.Context, a *auth.RequestAuth, sessionID string) {
|
||||
@@ -164,20 +163,19 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, resp *http.Response, co
|
||||
result := sse.CollectStream(resp, thinkingEnabled, true)
|
||||
|
||||
turn := assistantturn.BuildTurnFromCollected(result, assistantturn.BuildOptions{
|
||||
Model: model,
|
||||
Prompt: finalPrompt,
|
||||
RefFileTokens: refFileTokens,
|
||||
SearchEnabled: searchEnabled,
|
||||
StripReferenceMarkers: stripReferenceMarkersEnabled(),
|
||||
ToolNames: toolNames,
|
||||
ToolsRaw: toolsRaw,
|
||||
ToolChoice: promptcompat.DefaultToolChoicePolicy(),
|
||||
Model: model,
|
||||
Prompt: finalPrompt,
|
||||
RefFileTokens: refFileTokens,
|
||||
SearchEnabled: searchEnabled,
|
||||
ToolNames: toolNames,
|
||||
ToolsRaw: toolsRaw,
|
||||
ToolChoice: promptcompat.DefaultToolChoicePolicy(),
|
||||
})
|
||||
outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{})
|
||||
if outcome.ShouldFail {
|
||||
status, message, code := outcome.Error.Status, outcome.Error.Message, outcome.Error.Code
|
||||
if historySession != nil {
|
||||
historySession.error(status, message, code, turn.Thinking, turn.Text)
|
||||
historySession.error(status, message, code, historyThinkingForArchive(turn.RawThinking, turn.DetectionThinking, turn.Thinking), historyTextForArchive(turn.RawText, turn.Text))
|
||||
}
|
||||
writeOpenAIErrorWithCode(w, status, message, code)
|
||||
return
|
||||
@@ -185,7 +183,7 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, resp *http.Response, co
|
||||
respBody := openaifmt.BuildChatCompletionWithToolCalls(completionID, model, finalPrompt, turn.Thinking, turn.Text, turn.ToolCalls, toolsRaw)
|
||||
respBody["usage"] = assistantturn.OpenAIChatUsage(turn)
|
||||
if historySession != nil {
|
||||
historySession.success(http.StatusOK, turn.Thinking, turn.Text, outcome.FinishReason, assistantturn.OpenAIChatUsage(turn))
|
||||
historySession.success(http.StatusOK, historyThinkingForArchive(turn.RawThinking, turn.DetectionThinking, turn.Thinking), historyTextForArchive(turn.RawText, turn.Text), outcome.FinishReason, assistantturn.OpenAIChatUsage(turn))
|
||||
}
|
||||
writeJSON(w, http.StatusOK, respBody)
|
||||
}
|
||||
@@ -253,7 +251,7 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
|
||||
OnParsed: func(parsed sse.LineResult) streamengine.ParsedDecision {
|
||||
decision := streamRuntime.onParsed(parsed)
|
||||
if historySession != nil {
|
||||
historySession.progress(streamRuntime.accumulator.Thinking.String(), streamRuntime.accumulator.Text.String())
|
||||
historySession.progress(streamRuntime.historyThinking(), streamRuntime.historyText())
|
||||
}
|
||||
return decision
|
||||
},
|
||||
@@ -267,14 +265,15 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
|
||||
return
|
||||
}
|
||||
if streamRuntime.finalErrorMessage != "" {
|
||||
historySession.error(streamRuntime.finalErrorStatus, streamRuntime.finalErrorMessage, streamRuntime.finalErrorCode, streamRuntime.accumulator.Thinking.String(), streamRuntime.accumulator.Text.String())
|
||||
historySession.error(streamRuntime.finalErrorStatus, streamRuntime.finalErrorMessage, streamRuntime.finalErrorCode, streamRuntime.historyThinking(), streamRuntime.historyText())
|
||||
return
|
||||
}
|
||||
historySession.success(http.StatusOK, streamRuntime.finalThinking, streamRuntime.finalText, streamRuntime.finalFinishReason, streamRuntime.finalUsage)
|
||||
historySession.success(http.StatusOK, streamRuntime.historyThinking(), streamRuntime.historyText(), streamRuntime.finalFinishReason, streamRuntime.finalUsage)
|
||||
},
|
||||
OnContextDone: func() {
|
||||
streamRuntime.markContextCancelled()
|
||||
if historySession != nil {
|
||||
historySession.stopped(streamRuntime.accumulator.Thinking.String(), streamRuntime.accumulator.Text.String(), string(streamengine.StopReasonContextCancelled))
|
||||
historySession.stopped(streamRuntime.historyThinking(), streamRuntime.historyText(), string(streamengine.StopReasonContextCancelled))
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
@@ -85,8 +85,7 @@ func streamFinishReason(frames []map[string]any) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Backward-compatible alias for historical test name used in CI logs.
|
||||
func TestHandleNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T) {
|
||||
func TestHandleNonStreamSingleAttemptReturns503WhenUpstreamOutputEmpty(t *testing.T) {
|
||||
h := &Handler{}
|
||||
resp := makeSSEHTTPResponse(
|
||||
`data: {"p":"response/content","v":""}`,
|
||||
@@ -95,17 +94,17 @@ func TestHandleNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T) {
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
h.handleNonStream(rec, resp, "cid-empty", "deepseek-v4-flash", "prompt", 0, false, false, nil, nil, nil)
|
||||
if rec.Code != http.StatusTooManyRequests {
|
||||
t.Fatalf("expected status 429 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
|
||||
if rec.Code != http.StatusServiceUnavailable {
|
||||
t.Fatalf("expected status 503 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
out := decodeJSONBody(t, rec.Body.String())
|
||||
errObj, _ := out["error"].(map[string]any)
|
||||
if asString(errObj["code"]) != "upstream_empty_output" {
|
||||
t.Fatalf("expected code=upstream_empty_output, got %#v", out)
|
||||
if asString(errObj["code"]) != "upstream_unavailable" {
|
||||
t.Fatalf("expected code=upstream_unavailable, got %#v", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) {
|
||||
func TestHandleNonStreamSingleAttemptReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) {
|
||||
h := &Handler{}
|
||||
resp := makeSSEHTTPResponse(
|
||||
`data: {"code":"content_filter"}`,
|
||||
@@ -124,7 +123,7 @@ func TestHandleNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutp
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleNonStreamReturns429WhenUpstreamHasOnlyThinking(t *testing.T) {
|
||||
func TestHandleNonStreamSingleAttemptReturns429WhenUpstreamHasOnlyThinking(t *testing.T) {
|
||||
h := &Handler{}
|
||||
resp := makeSSEHTTPResponse(
|
||||
`data: {"p":"response/thinking_content","v":"Only thinking"}`,
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
package chat
|
||||
|
||||
// addRefFileTokensToUsage adds inline-uploaded file token estimates to an existing
|
||||
// usage map inside a response object. This keeps the token accounting aware of file
|
||||
// content that the upstream model processes but that is not part of the prompt text.
|
||||
func addRefFileTokensToUsage(obj map[string]any, refFileTokens int) {
|
||||
if refFileTokens <= 0 || obj == nil {
|
||||
return
|
||||
}
|
||||
usage, ok := obj["usage"].(map[string]any)
|
||||
if !ok || usage == nil {
|
||||
return
|
||||
}
|
||||
for _, key := range []string{"input_tokens", "prompt_tokens"} {
|
||||
if v, ok := usage[key]; ok {
|
||||
if n, ok := v.(int); ok {
|
||||
usage[key] = n + refFileTokens
|
||||
}
|
||||
}
|
||||
}
|
||||
if v, ok := usage["total_tokens"]; ok {
|
||||
if n, ok := v.(int); ok {
|
||||
usage["total_tokens"] = n + refFileTokens
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -84,7 +84,7 @@ func TestBuildOpenAICurrentInputContextTranscriptUsesNumberedHistorySections(t *
|
||||
"latest user turn",
|
||||
"[reasoning_content]",
|
||||
"hidden reasoning",
|
||||
"<|DSML|tool_calls>",
|
||||
"<|DSML|tool_calls>",
|
||||
} {
|
||||
if !strings.Contains(transcript, want) {
|
||||
t.Fatalf("expected transcript to contain %q, got %q", want, transcript)
|
||||
|
||||
@@ -7,58 +7,56 @@ import (
|
||||
"time"
|
||||
|
||||
"ds2api/internal/auth"
|
||||
"ds2api/internal/completionruntime"
|
||||
"ds2api/internal/config"
|
||||
dsprotocol "ds2api/internal/deepseek/protocol"
|
||||
"ds2api/internal/promptcompat"
|
||||
"ds2api/internal/responsehistory"
|
||||
streamengine "ds2api/internal/stream"
|
||||
)
|
||||
|
||||
func (h *Handler) handleResponsesStreamWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, owner, responseID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, traceID string) {
|
||||
streamRuntime, initialType, ok := h.prepareResponsesStreamRuntime(w, resp, owner, responseID, model, finalPrompt, refFileTokens, thinkingEnabled, searchEnabled, toolNames, toolsRaw, toolChoice, traceID)
|
||||
func (h *Handler) handleResponsesStreamWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, owner, responseID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, traceID string, historySession *responsehistory.Session) {
|
||||
streamRuntime, initialType, ok := h.prepareResponsesStreamRuntime(w, resp, owner, responseID, model, finalPrompt, refFileTokens, thinkingEnabled, searchEnabled, toolNames, toolsRaw, toolChoice, traceID, historySession)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
attempts := 0
|
||||
currentResp := resp
|
||||
for {
|
||||
terminalWritten, retryable := h.consumeResponsesStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, attempts < emptyOutputRetryMaxAttempts())
|
||||
if terminalWritten {
|
||||
logResponsesStreamTerminal(streamRuntime, attempts)
|
||||
return
|
||||
}
|
||||
if !retryable || !emptyOutputRetryEnabled() || attempts >= emptyOutputRetryMaxAttempts() {
|
||||
completionruntime.ExecuteStreamWithRetry(r.Context(), h.DS, a, resp, payload, pow, completionruntime.StreamRetryOptions{
|
||||
Surface: "responses",
|
||||
Stream: true,
|
||||
RetryEnabled: emptyOutputRetryEnabled(),
|
||||
RetryMaxAttempts: emptyOutputRetryMaxAttempts(),
|
||||
MaxAttempts: 3,
|
||||
UsagePrompt: finalPrompt,
|
||||
}, completionruntime.StreamRetryHooks{
|
||||
ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) {
|
||||
return h.consumeResponsesStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, allowDeferEmpty)
|
||||
},
|
||||
Finalize: func(attempts int) {
|
||||
streamRuntime.finalize("stop", false)
|
||||
config.Logger.Info("[openai_empty_retry] terminal empty output", "surface", "responses", "stream", true, "retry_attempts", attempts, "success_source", "none", "error_code", streamRuntime.finalErrorCode)
|
||||
return
|
||||
}
|
||||
attempts++
|
||||
config.Logger.Info("[openai_empty_retry] attempting synthetic retry", "surface", "responses", "stream", true, "retry_attempt", attempts, "parent_message_id", streamRuntime.responseMessageID)
|
||||
retryPow, powErr := h.DS.GetPow(r.Context(), a, 3)
|
||||
if powErr != nil {
|
||||
config.Logger.Warn("[openai_empty_retry] retry PoW fetch failed, falling back to original PoW", "surface", "responses", "stream", true, "retry_attempt", attempts, "error", powErr)
|
||||
retryPow = pow
|
||||
}
|
||||
nextResp, err := h.DS.CallCompletion(r.Context(), a, clonePayloadForEmptyOutputRetry(payload, streamRuntime.responseMessageID), retryPow, 3)
|
||||
if err != nil {
|
||||
streamRuntime.failResponse(http.StatusInternalServerError, "Failed to get completion.", "error")
|
||||
config.Logger.Warn("[openai_empty_retry] retry request failed", "surface", "responses", "stream", true, "retry_attempt", attempts, "error", err)
|
||||
return
|
||||
}
|
||||
if nextResp.StatusCode != http.StatusOK {
|
||||
defer func() { _ = nextResp.Body.Close() }()
|
||||
body, _ := io.ReadAll(nextResp.Body)
|
||||
streamRuntime.failResponse(nextResp.StatusCode, strings.TrimSpace(string(body)), "error")
|
||||
return
|
||||
}
|
||||
streamRuntime.finalPrompt = usagePromptWithEmptyOutputRetry(finalPrompt, attempts)
|
||||
currentResp = nextResp
|
||||
}
|
||||
},
|
||||
ParentMessageID: func() int {
|
||||
return streamRuntime.responseMessageID
|
||||
},
|
||||
OnRetryPrompt: func(prompt string) {
|
||||
streamRuntime.finalPrompt = prompt
|
||||
},
|
||||
OnRetryFailure: func(status int, message, code string) {
|
||||
streamRuntime.failResponse(status, strings.TrimSpace(message), code)
|
||||
},
|
||||
OnTerminal: func(attempts int) {
|
||||
logResponsesStreamTerminal(streamRuntime, attempts)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func (h *Handler) prepareResponsesStreamRuntime(w http.ResponseWriter, resp *http.Response, owner, responseID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, traceID string) (*responsesStreamRuntime, string, bool) {
|
||||
func (h *Handler) prepareResponsesStreamRuntime(w http.ResponseWriter, resp *http.Response, owner, responseID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, traceID string, historySession *responsehistory.Session) (*responsesStreamRuntime, string, bool) {
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
if historySession != nil {
|
||||
historySession.Error(resp.StatusCode, strings.TrimSpace(string(body)), "error", "", "")
|
||||
}
|
||||
writeOpenAIError(w, resp.StatusCode, strings.TrimSpace(string(body)))
|
||||
return nil, "", false
|
||||
}
|
||||
@@ -78,7 +76,7 @@ func (h *Handler) prepareResponsesStreamRuntime(w http.ResponseWriter, resp *htt
|
||||
h.toolcallFeatureMatchEnabled() && h.toolcallEarlyEmitHighConfidence(),
|
||||
toolChoice, traceID, func(obj map[string]any) {
|
||||
h.getResponseStore().put(owner, responseID, obj)
|
||||
},
|
||||
}, historySession,
|
||||
)
|
||||
streamRuntime.refFileTokens = refFileTokens
|
||||
streamRuntime.sendCreated()
|
||||
|
||||
@@ -47,6 +47,7 @@ func TestConsumeResponsesStreamAttemptMarksContextCancelledState(t *testing.T) {
|
||||
promptcompat.DefaultToolChoicePolicy(),
|
||||
"",
|
||||
nil,
|
||||
nil,
|
||||
)
|
||||
resp := makeResponsesOpenAISSEHTTPResponse(
|
||||
`data: {"p":"response/content","v":"hello"}`,
|
||||
|
||||
@@ -103,14 +103,6 @@ func emptyOutputRetryMaxAttempts() int {
|
||||
return shared.EmptyOutputRetryMaxAttempts()
|
||||
}
|
||||
|
||||
func clonePayloadForEmptyOutputRetry(payload map[string]any, parentMessageID int) map[string]any {
|
||||
return shared.ClonePayloadForEmptyOutputRetry(payload, parentMessageID)
|
||||
}
|
||||
|
||||
func usagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) string {
|
||||
return shared.UsagePromptWithEmptyOutputRetry(originalPrompt, retryAttempts)
|
||||
}
|
||||
|
||||
func filterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta, seenNames map[int]string) []toolstream.ToolCallDelta {
|
||||
return shared.FilterIncrementalToolCallDeltasByAllowed(deltas, seenNames)
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
dsprotocol "ds2api/internal/deepseek/protocol"
|
||||
openaifmt "ds2api/internal/format/openai"
|
||||
"ds2api/internal/promptcompat"
|
||||
"ds2api/internal/responsehistory"
|
||||
"ds2api/internal/sse"
|
||||
streamengine "ds2api/internal/stream"
|
||||
)
|
||||
@@ -95,16 +96,28 @@ func (h *Handler) Responses(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
responseID := "resp_" + strings.ReplaceAll(uuid.NewString(), "-", "")
|
||||
historySession := responsehistory.Start(responsehistory.StartParams{
|
||||
Store: h.ChatHistory,
|
||||
Request: r,
|
||||
Auth: a,
|
||||
Surface: "openai.responses",
|
||||
Standard: stdReq,
|
||||
})
|
||||
if !stdReq.Stream {
|
||||
result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{
|
||||
StripReferenceMarkers: stripReferenceMarkersEnabled(),
|
||||
RetryEnabled: true,
|
||||
CurrentInputFile: h.Store,
|
||||
RetryEnabled: true,
|
||||
CurrentInputFile: h.Store,
|
||||
})
|
||||
if outErr != nil {
|
||||
if historySession != nil {
|
||||
historySession.ErrorTurn(outErr.Status, outErr.Message, outErr.Code, result.Turn)
|
||||
}
|
||||
writeOpenAIErrorWithCode(w, outErr.Status, outErr.Message, outErr.Code)
|
||||
return
|
||||
}
|
||||
if historySession != nil {
|
||||
historySession.SuccessTurn(http.StatusOK, result.Turn, assistantturn.OpenAIResponsesUsage(result.Turn))
|
||||
}
|
||||
responseObj := openaifmt.BuildResponseObjectWithToolCalls(responseID, stdReq.ResponseModel, result.Turn.Prompt, result.Turn.Thinking, result.Turn.Text, result.Turn.ToolCalls, stdReq.ToolsRaw)
|
||||
responseObj["usage"] = assistantturn.OpenAIResponsesUsage(result.Turn)
|
||||
h.getResponseStore().put(owner, responseID, responseObj)
|
||||
@@ -116,13 +129,16 @@ func (h *Handler) Responses(w http.ResponseWriter, r *http.Request) {
|
||||
CurrentInputFile: h.Store,
|
||||
})
|
||||
if outErr != nil {
|
||||
if historySession != nil {
|
||||
historySession.Error(outErr.Status, outErr.Message, outErr.Code, "", "")
|
||||
}
|
||||
writeOpenAIErrorWithCode(w, outErr.Status, outErr.Message, outErr.Code)
|
||||
return
|
||||
}
|
||||
|
||||
streamReq := start.Request
|
||||
refFileTokens := streamReq.RefFileTokens
|
||||
h.handleResponsesStreamWithRetry(w, r, a, start.Response, start.Payload, start.Pow, owner, responseID, streamReq.ResponseModel, streamReq.PromptTokenText, refFileTokens, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, streamReq.ToolChoice, traceID)
|
||||
h.handleResponsesStreamWithRetry(w, r, a, start.Response, start.Payload, start.Pow, owner, responseID, streamReq.ResponseModel, streamReq.PromptTokenText, refFileTokens, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, streamReq.ToolChoice, traceID, historySession)
|
||||
}
|
||||
|
||||
func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Response, owner, responseID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, traceID string) {
|
||||
@@ -135,14 +151,13 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
|
||||
result := sse.CollectStream(resp, thinkingEnabled, true)
|
||||
|
||||
turn := assistantturn.BuildTurnFromCollected(result, assistantturn.BuildOptions{
|
||||
Model: model,
|
||||
Prompt: finalPrompt,
|
||||
RefFileTokens: refFileTokens,
|
||||
SearchEnabled: searchEnabled,
|
||||
StripReferenceMarkers: stripReferenceMarkersEnabled(),
|
||||
ToolNames: toolNames,
|
||||
ToolsRaw: toolsRaw,
|
||||
ToolChoice: toolChoice,
|
||||
Model: model,
|
||||
Prompt: finalPrompt,
|
||||
RefFileTokens: refFileTokens,
|
||||
SearchEnabled: searchEnabled,
|
||||
ToolNames: toolNames,
|
||||
ToolsRaw: toolsRaw,
|
||||
ToolChoice: toolChoice,
|
||||
})
|
||||
logResponsesToolPolicyRejection(traceID, toolChoice, turn.ParsedToolCalls, "text")
|
||||
outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{})
|
||||
@@ -198,6 +213,7 @@ func (h *Handler) handleResponsesStream(w http.ResponseWriter, r *http.Request,
|
||||
func(obj map[string]any) {
|
||||
h.getResponseStore().put(owner, responseID, obj)
|
||||
},
|
||||
nil,
|
||||
)
|
||||
streamRuntime.refFileTokens = refFileTokens
|
||||
streamRuntime.sendCreated()
|
||||
|
||||
100
internal/httpapi/openai/responses/responses_history_test.go
Normal file
100
internal/httpapi/openai/responses/responses_history_test.go
Normal file
@@ -0,0 +1,100 @@
|
||||
package responses
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"ds2api/internal/auth"
|
||||
"ds2api/internal/chathistory"
|
||||
dsclient "ds2api/internal/deepseek/client"
|
||||
)
|
||||
|
||||
type responsesHistoryDS struct {
|
||||
payload map[string]any
|
||||
}
|
||||
|
||||
func (d *responsesHistoryDS) CreateSession(context.Context, *auth.RequestAuth, int) (string, error) {
|
||||
return "session-id", nil
|
||||
}
|
||||
|
||||
func (d *responsesHistoryDS) GetPow(context.Context, *auth.RequestAuth, int) (string, error) {
|
||||
return "pow", nil
|
||||
}
|
||||
|
||||
func (d *responsesHistoryDS) UploadFile(context.Context, *auth.RequestAuth, dsclient.UploadFileRequest, int) (*dsclient.UploadFileResult, error) {
|
||||
return &dsclient.UploadFileResult{ID: "file-id"}, nil
|
||||
}
|
||||
|
||||
func (d *responsesHistoryDS) CallCompletion(_ context.Context, _ *auth.RequestAuth, payload map[string]any, _ string, _ int) (*http.Response, error) {
|
||||
d.payload = payload
|
||||
return &http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Header: make(http.Header),
|
||||
Body: io.NopCloser(strings.NewReader("data: {\"p\":\"response/content\",\"v\":\"ok\"}\n")),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (d *responsesHistoryDS) DeleteSessionForToken(context.Context, string, string) (*dsclient.DeleteSessionResult, error) {
|
||||
return &dsclient.DeleteSessionResult{Success: true}, nil
|
||||
}
|
||||
|
||||
func (d *responsesHistoryDS) DeleteAllSessionsForToken(context.Context, string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func TestResponsesRecordsResponseHistory(t *testing.T) {
|
||||
store, resolver := newDirectTokenResolver(t)
|
||||
historyStore := chathistory.New(filepath.Join(t.TempDir(), "history.json"))
|
||||
ds := &responsesHistoryDS{}
|
||||
h := &Handler{
|
||||
Store: store,
|
||||
Auth: resolver,
|
||||
DS: ds,
|
||||
ChatHistory: historyStore,
|
||||
}
|
||||
r := chi.NewRouter()
|
||||
RegisterRoutes(r, h)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(`{"model":"deepseek-v4-flash","input":"hello responses"}`))
|
||||
req.Header.Set("Authorization", "Bearer direct-token")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
r.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
if ds.payload == nil {
|
||||
t.Fatalf("expected upstream payload to be sent")
|
||||
}
|
||||
snapshot, err := historyStore.Snapshot()
|
||||
if err != nil {
|
||||
t.Fatalf("snapshot history: %v", err)
|
||||
}
|
||||
if len(snapshot.Items) != 1 {
|
||||
t.Fatalf("expected one history item, got %d", len(snapshot.Items))
|
||||
}
|
||||
item, err := historyStore.Get(snapshot.Items[0].ID)
|
||||
if err != nil {
|
||||
t.Fatalf("get history item: %v", err)
|
||||
}
|
||||
if item.Surface != "openai.responses" {
|
||||
t.Fatalf("unexpected surface: %q", item.Surface)
|
||||
}
|
||||
if !strings.Contains(item.UserInput, "Continue from the latest state in the attached DS2API_HISTORY.txt context.") {
|
||||
t.Fatalf("unexpected user input: %q", item.UserInput)
|
||||
}
|
||||
if !strings.Contains(item.HistoryText, "hello responses") {
|
||||
t.Fatalf("expected original input in persisted history text, got %q", item.HistoryText)
|
||||
}
|
||||
if item.Content != "ok" {
|
||||
t.Fatalf("expected raw upstream content, got %q", item.Content)
|
||||
}
|
||||
}
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
openaifmt "ds2api/internal/format/openai"
|
||||
"ds2api/internal/httpapi/openai/shared"
|
||||
"ds2api/internal/promptcompat"
|
||||
"ds2api/internal/responsehistory"
|
||||
"ds2api/internal/sse"
|
||||
streamengine "ds2api/internal/stream"
|
||||
"ds2api/internal/toolstream"
|
||||
@@ -61,6 +62,7 @@ type responsesStreamRuntime struct {
|
||||
finalErrorCode string
|
||||
|
||||
persistResponse func(obj map[string]any)
|
||||
history *responsehistory.Session
|
||||
}
|
||||
|
||||
func newResponsesStreamRuntime(
|
||||
@@ -80,6 +82,7 @@ func newResponsesStreamRuntime(
|
||||
toolChoice promptcompat.ToolChoicePolicy,
|
||||
traceID string,
|
||||
persistResponse func(obj map[string]any),
|
||||
history *responsehistory.Session,
|
||||
) *responsesStreamRuntime {
|
||||
return &responsesStreamRuntime{
|
||||
w: w,
|
||||
@@ -106,6 +109,7 @@ func newResponsesStreamRuntime(
|
||||
toolChoice: toolChoice,
|
||||
traceID: traceID,
|
||||
persistResponse: persistResponse,
|
||||
history: history,
|
||||
accumulator: shared.StreamAccumulator{
|
||||
ThinkingEnabled: thinkingEnabled,
|
||||
SearchEnabled: searchEnabled,
|
||||
@@ -138,6 +142,9 @@ func (s *responsesStreamRuntime) failResponse(status int, message, code string)
|
||||
if s.persistResponse != nil {
|
||||
s.persistResponse(failedResp)
|
||||
}
|
||||
if s.history != nil {
|
||||
s.history.Error(status, message, code, responsehistory.ThinkingForArchive(s.accumulator.RawThinking.String(), s.accumulator.ToolDetectionThinking.String(), s.accumulator.Thinking.String()), responsehistory.TextForArchive(s.accumulator.RawText.String(), s.accumulator.Text.String()))
|
||||
}
|
||||
s.sendEvent("response.failed", openaifmt.BuildResponsesFailedPayload(s.responseID, s.model, status, message, code))
|
||||
s.sendDone()
|
||||
}
|
||||
@@ -214,6 +221,15 @@ func (s *responsesStreamRuntime) finalize(finishReason string, deferEmptyOutput
|
||||
if s.persistResponse != nil {
|
||||
s.persistResponse(obj)
|
||||
}
|
||||
if s.history != nil {
|
||||
s.history.Success(
|
||||
http.StatusOK,
|
||||
responsehistory.ThinkingForArchive(turn.RawThinking, turn.DetectionThinking, turn.Thinking),
|
||||
responsehistory.TextForArchive(turn.RawText, turn.Text),
|
||||
outcome.FinishReason,
|
||||
assistantturn.OpenAIResponsesUsage(turn),
|
||||
)
|
||||
}
|
||||
s.sendEvent("response.completed", openaifmt.BuildResponsesCompletedPayload(obj))
|
||||
s.sendDone()
|
||||
return true
|
||||
@@ -272,5 +288,11 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
|
||||
}
|
||||
|
||||
batch.flush()
|
||||
if s.history != nil {
|
||||
s.history.Progress(
|
||||
responsehistory.ThinkingForArchive(s.accumulator.RawThinking.String(), s.accumulator.ToolDetectionThinking.String(), s.accumulator.Thinking.String()),
|
||||
responsehistory.TextForArchive(s.accumulator.RawText.String(), s.accumulator.Text.String()),
|
||||
)
|
||||
}
|
||||
return streamengine.ParsedDecision{ContentSeen: accumulated.ContentSeen}
|
||||
}
|
||||
|
||||
@@ -81,6 +81,22 @@ func (s *responsesStreamRuntime) buildCompletedResponseObject(finalThinking, fin
|
||||
},
|
||||
},
|
||||
})
|
||||
} else if len(calls) > 0 && strings.TrimSpace(finalThinking) != "" {
|
||||
indexed = append(indexed, indexedItem{
|
||||
index: s.ensureMessageOutputIndex(),
|
||||
item: map[string]any{
|
||||
"id": s.ensureMessageItemID(),
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": "completed",
|
||||
"content": []map[string]any{
|
||||
{
|
||||
"type": "reasoning",
|
||||
"text": finalThinking,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
} else if len(calls) == 0 {
|
||||
content := make([]map[string]any, 0, 2)
|
||||
if finalThinking != "" {
|
||||
|
||||
@@ -397,7 +397,7 @@ func TestHandleResponsesNonStreamRequiredToolChoiceIgnoresThinkingToolPayloadWhe
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleResponsesNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T) {
|
||||
func TestHandleResponsesNonStreamSingleAttemptReturns503WhenUpstreamOutputEmpty(t *testing.T) {
|
||||
h := &Handler{}
|
||||
rec := httptest.NewRecorder()
|
||||
resp := &http.Response{
|
||||
@@ -409,17 +409,17 @@ func TestHandleResponsesNonStreamReturns429WhenUpstreamOutputEmpty(t *testing.T)
|
||||
}
|
||||
|
||||
h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-v4-flash", "prompt", 0, false, false, nil, nil, promptcompat.DefaultToolChoicePolicy(), "")
|
||||
if rec.Code != http.StatusTooManyRequests {
|
||||
t.Fatalf("expected 429 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
|
||||
if rec.Code != http.StatusServiceUnavailable {
|
||||
t.Fatalf("expected 503 for empty upstream output, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
out := decodeJSONBody(t, rec.Body.String())
|
||||
errObj, _ := out["error"].(map[string]any)
|
||||
if asString(errObj["code"]) != "upstream_empty_output" {
|
||||
t.Fatalf("expected code=upstream_empty_output, got %#v", out)
|
||||
if asString(errObj["code"]) != "upstream_unavailable" {
|
||||
t.Fatalf("expected code=upstream_unavailable, got %#v", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleResponsesNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) {
|
||||
func TestHandleResponsesNonStreamSingleAttemptReturnsContentFilterErrorWhenUpstreamFilteredWithoutOutput(t *testing.T) {
|
||||
h := &Handler{}
|
||||
rec := httptest.NewRecorder()
|
||||
resp := &http.Response{
|
||||
@@ -441,7 +441,7 @@ func TestHandleResponsesNonStreamReturnsContentFilterErrorWhenUpstreamFilteredWi
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleResponsesNonStreamReturns429WhenUpstreamHasOnlyThinking(t *testing.T) {
|
||||
func TestHandleResponsesNonStreamSingleAttemptReturns429WhenUpstreamHasOnlyThinking(t *testing.T) {
|
||||
h := &Handler{}
|
||||
rec := httptest.NewRecorder()
|
||||
resp := &http.Response{
|
||||
|
||||
@@ -89,11 +89,11 @@ func (a *StreamAccumulator) applyTextPart(text string) StreamPartDelta {
|
||||
}
|
||||
a.RawText.WriteString(rawTrimmed)
|
||||
delta := StreamPartDelta{Type: "text", RawText: rawTrimmed}
|
||||
cleanedText := CleanVisibleOutput(rawTrimmed, a.StripReferenceMarkers)
|
||||
if a.SearchEnabled && sse.IsCitation(cleanedText) {
|
||||
if a.SearchEnabled && sse.IsCitation(rawTrimmed) {
|
||||
delta.CitationOnly = true
|
||||
return delta
|
||||
}
|
||||
cleanedText := CleanVisibleOutput(rawTrimmed, a.StripReferenceMarkers)
|
||||
trimmed := sse.TrimContinuationOverlapFromBuilder(&a.Text, cleanedText)
|
||||
if trimmed == "" {
|
||||
return delta
|
||||
|
||||
@@ -95,3 +95,21 @@ func TestStreamAccumulatorSuppressesCitationTextWhenSearchEnabled(t *testing.T)
|
||||
t.Fatalf("visible text = %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamAccumulatorStripsInlineCitationAndReferenceMarkers(t *testing.T) {
|
||||
acc := StreamAccumulator{SearchEnabled: true, StripReferenceMarkers: true}
|
||||
result := acc.Apply(sse.LineResult{
|
||||
Parsed: true,
|
||||
Parts: []sse.ContentPart{{Type: "text", Text: "广州天气[citation:1] 多云[reference:0]"}},
|
||||
})
|
||||
|
||||
if !result.ContentSeen {
|
||||
t.Fatalf("expected marker chunk to mark upstream content")
|
||||
}
|
||||
if got := acc.Text.String(); got != "广州天气 多云" {
|
||||
t.Fatalf("visible text = %q", got)
|
||||
}
|
||||
if len(result.Parts) != 1 || result.Parts[0].VisibleText != "广州天气 多云" {
|
||||
t.Fatalf("unexpected parts: %#v", result.Parts)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ func UpstreamEmptyOutputDetail(contentFilter bool, text, thinking string) (int,
|
||||
if thinking != "" {
|
||||
return http.StatusTooManyRequests, "Upstream account hit a rate limit and returned reasoning without visible output.", "upstream_empty_output"
|
||||
}
|
||||
return http.StatusTooManyRequests, "Upstream account hit a rate limit and returned empty output.", "upstream_empty_output"
|
||||
return http.StatusServiceUnavailable, "Upstream service is unavailable and returned no output.", "upstream_unavailable"
|
||||
}
|
||||
|
||||
func WriteUpstreamEmptyOutputError(w http.ResponseWriter, text, thinking string, contentFilter bool) bool {
|
||||
|
||||
@@ -274,12 +274,12 @@ func TestChatCompletionsStreamEmitsFailureFrameWhenUpstreamOutputEmpty(t *testin
|
||||
}
|
||||
last := frames[0]
|
||||
statusCode, ok := last["status_code"].(float64)
|
||||
if !ok || int(statusCode) != http.StatusTooManyRequests {
|
||||
t.Fatalf("expected status_code=429, got %#v body=%s", last["status_code"], rec.Body.String())
|
||||
if !ok || int(statusCode) != http.StatusServiceUnavailable {
|
||||
t.Fatalf("expected status_code=503, got %#v body=%s", last["status_code"], rec.Body.String())
|
||||
}
|
||||
errObj, _ := last["error"].(map[string]any)
|
||||
if asString(errObj["code"]) != "upstream_empty_output" {
|
||||
t.Fatalf("expected code=upstream_empty_output, got %#v", last)
|
||||
if asString(errObj["code"]) != "upstream_unavailable" {
|
||||
t.Fatalf("expected code=upstream_unavailable, got %#v", last)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -345,7 +345,7 @@ func TestChatCompletionsStreamRetriesEmptyOutputOnSameSession(t *testing.T) {
|
||||
|
||||
func TestChatCompletionsNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
|
||||
ds := &streamStatusDSSeqStub{resps: []*http.Response{
|
||||
makeOpenAISSEHTTPResponse(`data: {"response_message_id":99}`, "data: [DONE]"),
|
||||
makeOpenAISSEHTTPResponse(`data: {"response_message_id":99,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
|
||||
}}
|
||||
h := &openAITestSurface{
|
||||
@@ -496,7 +496,7 @@ func TestResponsesStreamRetriesThinkingOnlyOutput(t *testing.T) {
|
||||
|
||||
func TestResponsesNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
|
||||
ds := &streamStatusDSSeqStub{resps: []*http.Response{
|
||||
makeOpenAISSEHTTPResponse(`data: {"response_message_id":88}`, "data: [DONE]"),
|
||||
makeOpenAISSEHTTPResponse(`data: {"response_message_id":88,"p":"response/thinking_content","v":"plan"}`, "data: [DONE]"),
|
||||
makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"),
|
||||
}}
|
||||
h := &openAITestSurface{
|
||||
@@ -537,8 +537,15 @@ func TestResponsesNonStreamRetriesThinkingOnlyOutput(t *testing.T) {
|
||||
if len(content) == 0 {
|
||||
t.Fatalf("expected content entries, got %#v", item)
|
||||
}
|
||||
textEntry, _ := content[0].(map[string]any)
|
||||
if asString(textEntry["type"]) != "output_text" || asString(textEntry["text"]) != "visible" {
|
||||
var textEntry map[string]any
|
||||
for _, entry := range content {
|
||||
obj, _ := entry.(map[string]any)
|
||||
if asString(obj["type"]) == "output_text" {
|
||||
textEntry = obj
|
||||
break
|
||||
}
|
||||
}
|
||||
if asString(textEntry["text"]) != "visible" {
|
||||
t.Fatalf("expected visible text entry, got %#v", content)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,13 +19,15 @@ const BLOCKED_CORS_REQUEST_HEADERS = new Set([
|
||||
function setCorsHeaders(res, req) {
|
||||
const origin = asString(readHeader(req, 'origin'));
|
||||
res.setHeader('Access-Control-Allow-Origin', origin || '*');
|
||||
if (origin) {
|
||||
addVaryHeader(res, 'Origin');
|
||||
}
|
||||
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS, PUT, DELETE');
|
||||
res.setHeader('Access-Control-Max-Age', '600');
|
||||
res.setHeader(
|
||||
'Access-Control-Allow-Headers',
|
||||
buildCORSAllowHeaders(req),
|
||||
);
|
||||
addVaryHeader(res, 'Origin');
|
||||
addVaryHeader(res, 'Access-Control-Request-Headers');
|
||||
if (asString(readHeader(req, 'access-control-request-private-network')).toLowerCase() === 'true') {
|
||||
res.setHeader('Access-Control-Allow-Private-Network', 'true');
|
||||
|
||||
@@ -88,7 +88,7 @@ function isVercelRuntime() {
|
||||
|
||||
function isNodeStreamSupportedPath(rawURL) {
|
||||
const path = extractPathname(rawURL);
|
||||
return path === '/v1/chat/completions';
|
||||
return path === '/v1/chat/completions' || path === '/chat/completions';
|
||||
}
|
||||
|
||||
function extractPathname(rawURL) {
|
||||
|
||||
@@ -621,7 +621,7 @@ function stripReferenceMarkersText(text) {
|
||||
if (!text) {
|
||||
return text;
|
||||
}
|
||||
return text.replace(/\[reference:\s*\d+\]/gi, '');
|
||||
return text.replace(/\[(?:citation|reference):\s*\d+\]/gi, '');
|
||||
}
|
||||
|
||||
function asString(v) {
|
||||
|
||||
@@ -641,9 +641,9 @@ function upstreamEmptyOutputDetail(contentFilter, _text, thinking) {
|
||||
};
|
||||
}
|
||||
return {
|
||||
status: 429,
|
||||
message: 'Upstream account hit a rate limit and returned empty output.',
|
||||
code: 'upstream_empty_output',
|
||||
status: 503,
|
||||
message: 'Upstream service is unavailable and returned no output.',
|
||||
code: 'upstream_unavailable',
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -113,9 +113,10 @@ function filterToolCallsDetailed(parsed, toolNames) {
|
||||
if (!tc || !tc.name) {
|
||||
continue;
|
||||
}
|
||||
const input = tc.input && typeof tc.input === 'object' && !Array.isArray(tc.input) ? tc.input : {};
|
||||
calls.push({
|
||||
name: tc.name,
|
||||
input: tc.input && typeof tc.input === 'object' && !Array.isArray(tc.input) ? tc.input : {},
|
||||
input,
|
||||
});
|
||||
}
|
||||
return { calls, rejectedToolNames: [] };
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
'use strict';
|
||||
|
||||
const CDATA_PATTERN = /^<!\[CDATA\[([\s\S]*?)]]>$/i;
|
||||
const CDATA_PATTERN = /^(?:<|〈)!\[CDATA\[([\s\S]*?)]](?:>|>|〉)$/i;
|
||||
const XML_ATTR_PATTERN = /\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')/gi;
|
||||
const TOOL_MARKUP_NAMES = [
|
||||
{ raw: 'tool_calls', canonical: 'tool_calls' },
|
||||
@@ -102,9 +102,10 @@ function updateCDATAStateLine(inCDATA, line) {
|
||||
let state = inCDATA;
|
||||
while (pos < lower.length) {
|
||||
if (state) {
|
||||
const end = lower.indexOf(']]>', pos);
|
||||
const cdataEnd = findCDATAEnd(lower, pos);
|
||||
const end = cdataEnd.index;
|
||||
if (end < 0) return true;
|
||||
pos = end + ']]>'.length;
|
||||
pos = end + cdataEnd.len;
|
||||
state = false;
|
||||
continue;
|
||||
}
|
||||
@@ -252,8 +253,9 @@ function replaceDSMLToolMarkupOutsideIgnored(text) {
|
||||
const tag = scanToolMarkupTagAt(raw, i);
|
||||
if (tag) {
|
||||
if (tag.dsmlLike) {
|
||||
out += `<${tag.closing ? '/' : ''}${tag.name}${raw.slice(tag.nameEnd, tag.end + 1)}`;
|
||||
if (raw[tag.end] !== '>') {
|
||||
const tail = normalizeToolMarkupTagTailForXML(raw.slice(tag.nameEnd, tag.end + 1));
|
||||
out += `<${tag.closing ? '/' : ''}${tag.name}${tail}`;
|
||||
if (!tail.endsWith('>')) {
|
||||
out += '>';
|
||||
}
|
||||
} else {
|
||||
@@ -409,11 +411,12 @@ function findMatchingXmlEndTagOutsideCDATA(text, tag, from) {
|
||||
|
||||
function skipXmlIgnoredSection(lower, i) {
|
||||
if (lower.startsWith('<![cdata[', i)) {
|
||||
const end = lower.indexOf(']]>', i + '<![cdata['.length);
|
||||
const cdataEnd = findCDATAEnd(lower, i + '<![cdata['.length);
|
||||
const end = cdataEnd.index;
|
||||
if (end < 0) {
|
||||
return { advanced: false, blocked: true, next: i };
|
||||
}
|
||||
return { advanced: true, blocked: false, next: end + ']]>'.length };
|
||||
return { advanced: true, blocked: false, next: end + cdataEnd.len };
|
||||
}
|
||||
if (lower.startsWith('<!--', i)) {
|
||||
const end = lower.indexOf('-->', i + '<!--'.length);
|
||||
@@ -425,14 +428,34 @@ function skipXmlIgnoredSection(lower, i) {
|
||||
return { advanced: false, blocked: false, next: i };
|
||||
}
|
||||
|
||||
function findCDATAEnd(text, from) {
|
||||
const ascii = text.indexOf(']]>', from);
|
||||
const fullwidth = text.indexOf(']]>', from);
|
||||
const cjk = text.indexOf(']]〉', from);
|
||||
if (ascii < 0 && fullwidth < 0 && cjk < 0) {
|
||||
return { index: -1, len: 0 };
|
||||
}
|
||||
let best = { index: -1, len: 0 };
|
||||
for (const candidate of [
|
||||
{ index: ascii, len: ']]>'.length },
|
||||
{ index: fullwidth, len: ']]>'.length },
|
||||
{ index: cjk, len: ']]〉'.length },
|
||||
]) {
|
||||
if (candidate.index >= 0 && (best.index < 0 || candidate.index < best.index)) {
|
||||
best = candidate;
|
||||
}
|
||||
}
|
||||
return best;
|
||||
}
|
||||
|
||||
function scanToolMarkupTagAt(text, start) {
|
||||
const raw = toStringSafe(text);
|
||||
if (!raw || start < 0 || start >= raw.length || raw[start] !== '<') {
|
||||
if (!raw || start < 0 || start >= raw.length || normalizeFullwidthASCIIChar(raw[start]) !== '<') {
|
||||
return null;
|
||||
}
|
||||
const lower = raw.toLowerCase();
|
||||
let i = start + 1;
|
||||
while (i < raw.length && raw[i] === '<') {
|
||||
while (i < raw.length && normalizeFullwidthASCIIChar(raw[i]) === '<') {
|
||||
i += 1;
|
||||
}
|
||||
const closing = raw[i] === '/';
|
||||
@@ -440,11 +463,19 @@ function scanToolMarkupTagAt(text, start) {
|
||||
i += 1;
|
||||
}
|
||||
const prefix = consumeToolMarkupNamePrefix(raw, lower, i);
|
||||
const prefixStart = i;
|
||||
i = prefix.next;
|
||||
const dsmlLike = prefix.dsmlLike;
|
||||
const { name, len } = matchToolMarkupName(lower, i, dsmlLike);
|
||||
let dsmlLike = prefix.dsmlLike;
|
||||
let { name, len } = matchToolMarkupName(raw, i, dsmlLike);
|
||||
if (!name) {
|
||||
return null;
|
||||
const fallback = matchToolMarkupNameAfterArbitraryPrefix(raw, prefixStart);
|
||||
if (!fallback.ok) {
|
||||
return null;
|
||||
}
|
||||
name = fallback.name;
|
||||
i = fallback.start;
|
||||
len = fallback.len;
|
||||
dsmlLike = true;
|
||||
}
|
||||
const originalNameEnd = i + len;
|
||||
let nameEnd = originalNameEnd;
|
||||
@@ -541,7 +572,7 @@ function findPartialToolMarkupStart(text) {
|
||||
}
|
||||
const start = includeDuplicateLeadingLessThan(raw, lastLT);
|
||||
const tail = raw.slice(start);
|
||||
if (tail.includes('>')) {
|
||||
if (tail.includes('>') || tail.includes('>')) {
|
||||
return -1;
|
||||
}
|
||||
return isPartialToolMarkupTagPrefix(tail) ? start : -1;
|
||||
@@ -556,7 +587,7 @@ function includeDuplicateLeadingLessThan(text, idx) {
|
||||
}
|
||||
|
||||
function isToolMarkupPipe(ch) {
|
||||
return ch === '|' || ch === '|';
|
||||
return ch === '|' || ch === '|' || ch === '␂' || ch === '\x02';
|
||||
}
|
||||
|
||||
function isPartialToolMarkupTagPrefix(text) {
|
||||
@@ -579,10 +610,13 @@ function isPartialToolMarkupTagPrefix(text) {
|
||||
if (i === raw.length) {
|
||||
return true;
|
||||
}
|
||||
if (hasToolMarkupNamePrefix(lower.slice(i))) {
|
||||
if (hasToolMarkupNamePrefix(raw, i)) {
|
||||
return true;
|
||||
}
|
||||
if ('dsml'.startsWith(lower.slice(i))) {
|
||||
if (normalizedASCIITailAt(raw, i).startsWith('dsml') || 'dsml'.startsWith(normalizedASCIITailAt(raw, i))) {
|
||||
return true;
|
||||
}
|
||||
if (hasPartialToolMarkupNameAfterArbitraryPrefix(raw, i)) {
|
||||
return true;
|
||||
}
|
||||
const next = consumeToolMarkupNamePrefixOnce(raw, lower, i);
|
||||
@@ -607,6 +641,61 @@ function consumeToolMarkupNamePrefix(raw, lower, idx) {
|
||||
}
|
||||
}
|
||||
|
||||
function matchToolMarkupNameAfterArbitraryPrefix(raw, start) {
|
||||
for (let idx = start; idx < raw.length;) {
|
||||
if (isToolMarkupTagTerminator(raw, idx)) {
|
||||
return { ok: false };
|
||||
}
|
||||
for (const name of TOOL_MARKUP_NAMES) {
|
||||
const matched = matchNormalizedASCII(raw, idx, name.raw);
|
||||
if (!matched.ok) continue;
|
||||
if (!toolMarkupPrefixAllowsLocalName(raw.slice(start, idx))) continue;
|
||||
return { ok: true, name: name.canonical, start: idx, len: matched.len };
|
||||
}
|
||||
idx += 1;
|
||||
}
|
||||
return { ok: false };
|
||||
}
|
||||
|
||||
function hasPartialToolMarkupNameAfterArbitraryPrefix(raw, start) {
|
||||
for (let idx = start; idx < raw.length;) {
|
||||
if (isToolMarkupTagTerminator(raw, idx)) {
|
||||
return false;
|
||||
}
|
||||
if (toolMarkupPrefixAllowsLocalName(raw.slice(start, idx)) && hasToolMarkupNamePrefix(raw, idx)) {
|
||||
return true;
|
||||
}
|
||||
if (toolMarkupPrefixAllowsLocalName(raw.slice(start, idx)) && hasDSMLNamePrefixOrPartial(raw, idx)) {
|
||||
return true;
|
||||
}
|
||||
idx += 1;
|
||||
}
|
||||
return toolMarkupPrefixAllowsLocalName(raw.slice(start));
|
||||
}
|
||||
|
||||
function hasDSMLNamePrefixOrPartial(raw, start) {
|
||||
const tail = normalizedASCIITailAt(raw, start);
|
||||
return tail.startsWith('dsml') || 'dsml'.startsWith(tail);
|
||||
}
|
||||
|
||||
function toolMarkupPrefixAllowsLocalName(prefix) {
|
||||
if (!prefix) {
|
||||
return false;
|
||||
}
|
||||
if (normalizedASCIITailAt(prefix, 0).includes('dsml')) {
|
||||
return true;
|
||||
}
|
||||
if (/[="'"]/.test(prefix)) {
|
||||
return false;
|
||||
}
|
||||
const previous = normalizeFullwidthASCIIChar(prefix[prefix.length - 1] || '');
|
||||
return !/^[A-Za-z0-9]$/.test(previous);
|
||||
}
|
||||
|
||||
function isToolMarkupTagTerminator(raw, idx) {
|
||||
return raw[idx] === '>' || normalizeFullwidthASCIIChar(raw[idx] || '') === '>';
|
||||
}
|
||||
|
||||
function consumeToolMarkupNamePrefixOnce(raw, lower, idx) {
|
||||
if (idx < raw.length && isToolMarkupPipe(raw[idx])) {
|
||||
return { next: idx + 1, ok: true };
|
||||
@@ -614,32 +703,87 @@ function consumeToolMarkupNamePrefixOnce(raw, lower, idx) {
|
||||
if (idx < raw.length && [' ', '\t', '\r', '\n'].includes(raw[idx])) {
|
||||
return { next: idx + 1, ok: true };
|
||||
}
|
||||
if (lower.startsWith('dsml', idx)) {
|
||||
let next = idx + 'dsml'.length;
|
||||
if (next < raw.length && raw[next] === '-') {
|
||||
const dsml = matchNormalizedASCII(raw, idx, 'dsml');
|
||||
if (dsml.ok) {
|
||||
let next = idx + dsml.len;
|
||||
const sep = normalizeFullwidthASCIIChar(raw[next] || '');
|
||||
if (next < raw.length && (sep === '-' || sep === '_')) {
|
||||
next += 1;
|
||||
}
|
||||
return { next, ok: true };
|
||||
}
|
||||
const arbitrary = consumeArbitraryToolMarkupNamePrefix(raw, lower, idx);
|
||||
if (arbitrary.ok) {
|
||||
return arbitrary;
|
||||
}
|
||||
return { next: idx, ok: false };
|
||||
}
|
||||
|
||||
function hasToolMarkupNamePrefix(lowerTail) {
|
||||
function consumeArbitraryToolMarkupNamePrefix(raw, lower, idx) {
|
||||
const first = consumeToolMarkupPrefixSegment(raw, idx);
|
||||
if (!first.ok) {
|
||||
return { next: idx, ok: false };
|
||||
}
|
||||
let j = first.next;
|
||||
while (j < raw.length) {
|
||||
const segment = consumeToolMarkupPrefixSegment(raw, j);
|
||||
if (!segment.ok) break;
|
||||
j = segment.next;
|
||||
}
|
||||
let k = j;
|
||||
while (k < raw.length && [' ', '\t', '\r', '\n'].includes(raw[k])) {
|
||||
k += 1;
|
||||
}
|
||||
let next = k;
|
||||
let ok = false;
|
||||
if (next < raw.length && isToolMarkupPipe(raw[next])) {
|
||||
next += 1;
|
||||
ok = true;
|
||||
} else if (next < raw.length && ['_', '-'].includes(normalizeFullwidthASCIIChar(raw[next]))) {
|
||||
next += 1;
|
||||
ok = true;
|
||||
}
|
||||
if (!ok) {
|
||||
return { next: idx, ok: false };
|
||||
}
|
||||
while (next < raw.length && [' ', '\t', '\r', '\n'].includes(raw[next])) {
|
||||
next += 1;
|
||||
}
|
||||
if (!hasToolMarkupNamePrefix(raw, next)) {
|
||||
return { next: idx, ok: false };
|
||||
}
|
||||
return { next, ok: true };
|
||||
}
|
||||
|
||||
function consumeToolMarkupPrefixSegment(raw, idx) {
|
||||
if (idx < 0 || idx >= raw.length) {
|
||||
return { next: idx, ok: false };
|
||||
}
|
||||
const ch = normalizeFullwidthASCIIChar(raw[idx]);
|
||||
if (/^[A-Za-z0-9]$/.test(ch)) {
|
||||
return { next: idx + 1, ok: true };
|
||||
}
|
||||
return { next: idx, ok: false };
|
||||
}
|
||||
|
||||
function hasToolMarkupNamePrefix(raw, start) {
|
||||
const tail = normalizedASCIITailAt(raw, start);
|
||||
for (const name of TOOL_MARKUP_NAMES) {
|
||||
if (lowerTail.startsWith(name.raw) || name.raw.startsWith(lowerTail)) {
|
||||
if (tail.startsWith(name.raw) || name.raw.startsWith(tail)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function matchToolMarkupName(lower, start, dsmlLike) {
|
||||
function matchToolMarkupName(raw, start, dsmlLike) {
|
||||
for (const name of TOOL_MARKUP_NAMES) {
|
||||
if (name.dsmlOnly && !dsmlLike) {
|
||||
continue;
|
||||
}
|
||||
if (lower.startsWith(name.raw, start)) {
|
||||
return { name: name.canonical, len: name.raw.length };
|
||||
const matched = matchNormalizedASCII(raw, start, name.raw);
|
||||
if (matched.ok) {
|
||||
return { name: name.canonical, len: matched.len };
|
||||
}
|
||||
}
|
||||
return { name: '', len: 0 };
|
||||
@@ -649,17 +793,18 @@ function findXmlTagEnd(text, from) {
|
||||
let quote = '';
|
||||
for (let i = Math.max(0, from || 0); i < text.length; i += 1) {
|
||||
const ch = text[i];
|
||||
const normalized = normalizeFullwidthASCIIChar(ch);
|
||||
if (quote) {
|
||||
if (ch === quote) {
|
||||
if (normalized === quote) {
|
||||
quote = '';
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (ch === '"' || ch === "'") {
|
||||
quote = ch;
|
||||
if (normalized === '"' || normalized === "'") {
|
||||
quote = normalized;
|
||||
continue;
|
||||
}
|
||||
if (ch === '>') {
|
||||
if (normalized === '>') {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
@@ -670,13 +815,90 @@ function hasXmlTagBoundary(text, idx) {
|
||||
if (idx >= text.length) {
|
||||
return true;
|
||||
}
|
||||
return [' ', '\t', '\n', '\r', '>', '/'].includes(text[idx]);
|
||||
return [' ', '\t', '\n', '\r', '>', '/'].includes(text[idx])
|
||||
|| normalizeFullwidthASCIIChar(text[idx]) === '>';
|
||||
}
|
||||
|
||||
function isSelfClosingXmlTag(startTag) {
|
||||
return toStringSafe(startTag).trim().endsWith('/');
|
||||
}
|
||||
|
||||
function normalizeFullwidthASCIIChar(ch) {
|
||||
if (!ch) {
|
||||
return ch;
|
||||
}
|
||||
if (ch === '〈') {
|
||||
return '<';
|
||||
}
|
||||
if (ch === '〉') {
|
||||
return '>';
|
||||
}
|
||||
const code = ch.charCodeAt(0);
|
||||
if (code >= 0xff01 && code <= 0xff5e) {
|
||||
return String.fromCharCode(code - 0xfee0);
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
||||
function normalizedASCIITailAt(raw, start) {
|
||||
let out = '';
|
||||
for (let i = Math.max(0, start || 0); i < raw.length; i += 1) {
|
||||
const ch = normalizeFullwidthASCIIChar(raw[i]).toLowerCase();
|
||||
if (ch.charCodeAt(0) > 0x7f) {
|
||||
break;
|
||||
}
|
||||
out += ch;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function matchNormalizedASCII(raw, start, expected) {
|
||||
let idx = start;
|
||||
for (let j = 0; j < expected.length; j += 1) {
|
||||
if (idx >= raw.length) {
|
||||
return { ok: false, len: 0 };
|
||||
}
|
||||
const ch = normalizeFullwidthASCIIChar(raw[idx]).toLowerCase();
|
||||
if (ch !== expected[j].toLowerCase()) {
|
||||
return { ok: false, len: 0 };
|
||||
}
|
||||
idx += 1;
|
||||
}
|
||||
return { ok: true, len: idx - start };
|
||||
}
|
||||
|
||||
function normalizeToolMarkupTagTailForXML(tail) {
|
||||
let out = '';
|
||||
const raw = typeof tail === 'string' ? tail : String(tail || '');
|
||||
let quote = '';
|
||||
for (let i = 0; i < raw.length; i += 1) {
|
||||
const ch = raw[i];
|
||||
const normalized = normalizeFullwidthASCIIChar(ch);
|
||||
if (quote) {
|
||||
out += normalized;
|
||||
if (normalized === quote) {
|
||||
quote = '';
|
||||
}
|
||||
} else if (normalized === '"' || normalized === "'") {
|
||||
quote = normalized;
|
||||
out += normalized;
|
||||
} else if (normalized === '|') {
|
||||
let j = i + 1;
|
||||
while (j < raw.length && [' ', '\t', '\r', '\n'].includes(raw[j])) {
|
||||
j += 1;
|
||||
}
|
||||
if (normalizeFullwidthASCIIChar(raw[j] || '') !== '>') {
|
||||
out += normalized;
|
||||
}
|
||||
} else if (['>', '/', '='].includes(normalized)) {
|
||||
out += normalized;
|
||||
} else {
|
||||
out += ch;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function parseMarkupInput(raw) {
|
||||
const s = toStringSafe(raw).trim();
|
||||
if (!s) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
'use strict';
|
||||
const { parseToolCalls } = require('./parse');
|
||||
const { parseToolCallsDetailed } = require('./parse');
|
||||
const {
|
||||
findToolMarkupTagOutsideIgnored,
|
||||
findMatchingToolMarkupClose,
|
||||
@@ -27,19 +27,30 @@ function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
|
||||
const xmlBlock = captured.slice(openTag.start, closeTag.end + 1);
|
||||
const prefixPart = captured.slice(0, openTag.start);
|
||||
const suffixPart = captured.slice(closeTag.end + 1);
|
||||
const parsed = parseToolCalls(xmlBlock, toolNames);
|
||||
if (Array.isArray(parsed) && parsed.length > 0) {
|
||||
const parsed = parseToolCallsDetailed(xmlBlock, toolNames);
|
||||
if (Array.isArray(parsed.calls) && parsed.calls.length > 0) {
|
||||
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
|
||||
if (!best || openTag.start < best.start) {
|
||||
best = {
|
||||
start: openTag.start,
|
||||
prefix: trimmedFence.prefix,
|
||||
calls: parsed,
|
||||
calls: parsed.calls,
|
||||
suffix: trimmedFence.suffix,
|
||||
};
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (parsed.sawToolCallSyntax) {
|
||||
if (!rejected || openTag.start < rejected.start) {
|
||||
rejected = {
|
||||
start: openTag.start,
|
||||
prefix: prefixPart + xmlBlock,
|
||||
suffix: suffixPart,
|
||||
};
|
||||
}
|
||||
searchFrom = openTag.end + 1;
|
||||
continue;
|
||||
}
|
||||
if (!rejected || openTag.start < rejected.start) {
|
||||
rejected = {
|
||||
start: openTag.start,
|
||||
@@ -69,16 +80,19 @@ function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
|
||||
const xmlBlock = '<tool_calls>' + captured.slice(invokeTag.start, closeTag.end + 1);
|
||||
const prefixPart = captured.slice(0, invokeTag.start);
|
||||
const suffixPart = captured.slice(closeTag.end + 1);
|
||||
const parsed = parseToolCalls(xmlBlock, toolNames);
|
||||
if (Array.isArray(parsed) && parsed.length > 0) {
|
||||
const parsed = parseToolCallsDetailed(xmlBlock, toolNames);
|
||||
if (Array.isArray(parsed.calls) && parsed.calls.length > 0) {
|
||||
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
|
||||
return {
|
||||
ready: true,
|
||||
prefix: trimmedFence.prefix,
|
||||
calls: parsed,
|
||||
calls: parsed.calls,
|
||||
suffix: trimmedFence.suffix,
|
||||
};
|
||||
}
|
||||
if (parsed.sawToolCallSyntax) {
|
||||
return { ready: true, prefix: prefixPart + captured.slice(invokeTag.start, closeTag.end + 1), calls: [], suffix: suffixPart };
|
||||
}
|
||||
return { ready: true, prefix: prefixPart + captured.slice(invokeTag.start, closeTag.end + 1), calls: [], suffix: suffixPart };
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,15 @@ var promptXMLTextEscaper = strings.NewReplacer(
|
||||
|
||||
var promptXMLNamePattern = regexp.MustCompile(`^[A-Za-z_][A-Za-z0-9_.:-]*$`)
|
||||
|
||||
const (
|
||||
promptDSMLToolCallsOpen = "<|DSML|tool_calls>"
|
||||
promptDSMLToolCallsClose = "</|DSML|tool_calls>"
|
||||
promptDSMLInvokeOpen = "<|DSML|invoke"
|
||||
promptDSMLInvokeClose = "</|DSML|invoke>"
|
||||
promptDSMLParameterOpen = "<|DSML|parameter"
|
||||
promptDSMLParameterClose = "</|DSML|parameter>"
|
||||
)
|
||||
|
||||
// FormatToolCallsForPrompt renders a tool_calls slice into the prompt-visible
|
||||
// invoke/parameter history block used across adapters.
|
||||
func FormatToolCallsForPrompt(raw any) string {
|
||||
@@ -38,7 +47,7 @@ func FormatToolCallsForPrompt(raw any) string {
|
||||
if len(blocks) == 0 {
|
||||
return ""
|
||||
}
|
||||
return "<|DSML|tool_calls>\n" + strings.Join(blocks, "\n") + "\n</|DSML|tool_calls>"
|
||||
return promptDSMLToolCallsOpen + "\n" + strings.Join(blocks, "\n") + "\n" + promptDSMLToolCallsClose
|
||||
}
|
||||
|
||||
// StringifyToolCallArguments normalizes tool arguments into a compact string
|
||||
@@ -94,12 +103,12 @@ func formatToolCallForPrompt(call map[string]any) string {
|
||||
|
||||
parameters := formatToolCallParametersForPrompt(argsRaw)
|
||||
if parameters == "" {
|
||||
return ` <|DSML|invoke name="` + escapeXMLAttribute(name) + `"></|DSML|invoke>`
|
||||
return ` ` + promptDSMLInvokeOpen + ` name="` + escapeXMLAttribute(name) + `">` + promptDSMLInvokeClose
|
||||
}
|
||||
|
||||
return " <|DSML|invoke name=\"" + escapeXMLAttribute(name) + "\">\n" +
|
||||
return " " + promptDSMLInvokeOpen + " name=\"" + escapeXMLAttribute(name) + "\">\n" +
|
||||
parameters + "\n" +
|
||||
" </|DSML|invoke>"
|
||||
" " + promptDSMLInvokeClose
|
||||
}
|
||||
|
||||
func formatToolCallParametersForPrompt(raw any) string {
|
||||
@@ -113,7 +122,7 @@ func formatToolCallParametersForPrompt(raw any) string {
|
||||
if strings.TrimSpace(fallback) == "" {
|
||||
return ""
|
||||
}
|
||||
return " <|DSML|parameter name=\"content\">" + renderPromptXMLText(fallback) + "</|DSML|parameter>"
|
||||
return " " + promptDSMLParameterOpen + " name=\"content\">" + renderPromptXMLText(fallback) + promptDSMLParameterClose
|
||||
}
|
||||
|
||||
func renderPromptToolParameters(value any, indent string) (string, bool) {
|
||||
@@ -149,9 +158,9 @@ func renderPromptToolParameters(value any, indent string) (string, bool) {
|
||||
}
|
||||
return strings.Join(lines, "\n"), true
|
||||
case string:
|
||||
return indent + `<|DSML|parameter name="content">` + renderPromptXMLText(v) + `</|DSML|parameter>`, true
|
||||
return indent + promptDSMLParameterOpen + ` name="content">` + renderPromptXMLText(v) + promptDSMLParameterClose, true
|
||||
default:
|
||||
return indent + `<|DSML|parameter name="value">` + renderPromptXMLText(fmt.Sprint(v)) + `</|DSML|parameter>`, true
|
||||
return indent + promptDSMLParameterOpen + ` name="value">` + renderPromptXMLText(fmt.Sprint(v)) + promptDSMLParameterClose, true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -162,29 +171,29 @@ func renderPromptParameterNode(name string, value any, indent string) (string, b
|
||||
}
|
||||
switch v := value.(type) {
|
||||
case nil:
|
||||
return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + `"></|DSML|parameter>`, true
|
||||
return indent + promptDSMLParameterOpen + ` name="` + escapeXMLAttribute(trimmedName) + `">` + promptDSMLParameterClose, true
|
||||
case map[string]any:
|
||||
body, ok := renderPromptToolXMLBody(v, indent+" ")
|
||||
if !ok {
|
||||
return "", false
|
||||
}
|
||||
if strings.TrimSpace(body) == "" {
|
||||
return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + `"></|DSML|parameter>`, true
|
||||
return indent + promptDSMLParameterOpen + ` name="` + escapeXMLAttribute(trimmedName) + `">` + promptDSMLParameterClose, true
|
||||
}
|
||||
return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + "\">\n" + body + "\n" + indent + `</|DSML|parameter>`, true
|
||||
return indent + promptDSMLParameterOpen + ` name="` + escapeXMLAttribute(trimmedName) + "\">\n" + body + "\n" + indent + promptDSMLParameterClose, true
|
||||
case []any:
|
||||
body, ok := renderPromptToolXMLArray(v, indent+" ")
|
||||
if !ok {
|
||||
return "", false
|
||||
}
|
||||
if strings.TrimSpace(body) == "" {
|
||||
return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + `"></|DSML|parameter>`, true
|
||||
return indent + promptDSMLParameterOpen + ` name="` + escapeXMLAttribute(trimmedName) + `">` + promptDSMLParameterClose, true
|
||||
}
|
||||
return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + "\">\n" + body + "\n" + indent + `</|DSML|parameter>`, true
|
||||
return indent + promptDSMLParameterOpen + ` name="` + escapeXMLAttribute(trimmedName) + "\">\n" + body + "\n" + indent + promptDSMLParameterClose, true
|
||||
case string:
|
||||
return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + `">` + renderPromptXMLText(v) + `</|DSML|parameter>`, true
|
||||
return indent + promptDSMLParameterOpen + ` name="` + escapeXMLAttribute(trimmedName) + `">` + renderPromptXMLText(v) + promptDSMLParameterClose, true
|
||||
default:
|
||||
return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + `">` + renderPromptXMLText(fmt.Sprint(v)) + `</|DSML|parameter>`, true
|
||||
return indent + promptDSMLParameterOpen + ` name="` + escapeXMLAttribute(trimmedName) + `">` + renderPromptXMLText(fmt.Sprint(v)) + promptDSMLParameterClose, true
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ func TestFormatToolCallsForPromptDSML(t *testing.T) {
|
||||
if got == "" {
|
||||
t.Fatal("expected non-empty formatted tool calls")
|
||||
}
|
||||
if got != "<|DSML|tool_calls>\n <|DSML|invoke name=\"search_web\">\n <|DSML|parameter name=\"query\"><![CDATA[latest]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>" {
|
||||
if got != "<|DSML|tool_calls>\n <|DSML|invoke name=\"search_web\">\n <|DSML|parameter name=\"query\"><![CDATA[latest]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>" {
|
||||
t.Fatalf("unexpected formatted tool call DSML: %q", got)
|
||||
}
|
||||
}
|
||||
@@ -34,7 +34,7 @@ func TestFormatToolCallsForPromptEscapesXMLEntities(t *testing.T) {
|
||||
"arguments": `{"q":"a < b && c > d"}`,
|
||||
},
|
||||
})
|
||||
want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"search<&>\">\n <|DSML|parameter name=\"q\"><![CDATA[a < b && c > d]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>"
|
||||
want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"search<&>\">\n <|DSML|parameter name=\"q\"><![CDATA[a < b && c > d]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>"
|
||||
if got != want {
|
||||
t.Fatalf("unexpected escaped tool call XML: %q", got)
|
||||
}
|
||||
@@ -50,7 +50,7 @@ func TestFormatToolCallsForPromptUsesCDATAForMultilineContent(t *testing.T) {
|
||||
},
|
||||
},
|
||||
})
|
||||
want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"write_file\">\n <|DSML|parameter name=\"content\"><![CDATA[#!/bin/bash\nprintf \"hello\"\n]]></|DSML|parameter>\n <|DSML|parameter name=\"path\"><![CDATA[script.sh]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>"
|
||||
want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"write_file\">\n <|DSML|parameter name=\"content\"><![CDATA[#!/bin/bash\nprintf \"hello\"\n]]></|DSML|parameter>\n <|DSML|parameter name=\"path\"><![CDATA[script.sh]]></|DSML|parameter>\n </|DSML|invoke>\n</|DSML|tool_calls>"
|
||||
if got != want {
|
||||
t.Fatalf("unexpected multiline cdata tool call XML: %q", got)
|
||||
}
|
||||
|
||||
@@ -38,10 +38,10 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsAndToolResult(t *tes
|
||||
t.Fatalf("expected 4 normalized messages with assistant tool history preserved, got %d", len(normalized))
|
||||
}
|
||||
assistantContent, _ := normalized[2]["content"].(string)
|
||||
if !strings.Contains(assistantContent, "<|DSML|tool_calls>") {
|
||||
if !strings.Contains(assistantContent, "<|DSML|tool_calls>") {
|
||||
t.Fatalf("assistant tool history should be preserved in DSML form, got %q", assistantContent)
|
||||
}
|
||||
if !strings.Contains(assistantContent, `<|DSML|invoke name="get_weather">`) {
|
||||
if !strings.Contains(assistantContent, `<|DSML|invoke name="get_weather">`) {
|
||||
t.Fatalf("expected tool name in preserved history, got %q", assistantContent)
|
||||
}
|
||||
if !strings.Contains(normalized[3]["content"].(string), `"temp":18`) {
|
||||
@@ -49,7 +49,7 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsAndToolResult(t *tes
|
||||
}
|
||||
|
||||
prompt := util.MessagesPrepare(normalized)
|
||||
if !strings.Contains(prompt, "<|DSML|tool_calls>") {
|
||||
if !strings.Contains(prompt, "<|DSML|tool_calls>") {
|
||||
t.Fatalf("expected preserved assistant tool history in prompt: %q", prompt)
|
||||
}
|
||||
}
|
||||
@@ -177,10 +177,10 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantMultipleToolCallsRemainSepara
|
||||
t.Fatalf("expected assistant tool_call-only message preserved, got %#v", normalized)
|
||||
}
|
||||
content, _ := normalized[0]["content"].(string)
|
||||
if strings.Count(content, "<|DSML|invoke name=") != 2 {
|
||||
if strings.Count(content, "<|DSML|invoke name=") != 2 {
|
||||
t.Fatalf("expected two preserved tool call blocks, got %q", content)
|
||||
}
|
||||
if !strings.Contains(content, `<|DSML|invoke name="search_web">`) || !strings.Contains(content, `<|DSML|invoke name="eval_javascript">`) {
|
||||
if !strings.Contains(content, `<|DSML|invoke name="search_web">`) || !strings.Contains(content, `<|DSML|invoke name="eval_javascript">`) {
|
||||
t.Fatalf("expected both tool names in preserved history, got %q", content)
|
||||
}
|
||||
}
|
||||
@@ -258,7 +258,7 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantNilContentDoesNotInjectNullLi
|
||||
if strings.Contains(content, "null") {
|
||||
t.Fatalf("expected no null literal injection, got %q", content)
|
||||
}
|
||||
if !strings.Contains(content, "<|DSML|tool_calls>") {
|
||||
if !strings.Contains(content, "<|DSML|tool_calls>") {
|
||||
t.Fatalf("expected assistant tool history in normalized content, got %q", content)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,10 +47,10 @@ func TestBuildOpenAIFinalPrompt_HandlerPathIncludesToolRoundtripSemantics(t *tes
|
||||
if !strings.Contains(finalPrompt, `"condition":"sunny"`) {
|
||||
t.Fatalf("handler finalPrompt should preserve tool output content: %q", finalPrompt)
|
||||
}
|
||||
if !strings.Contains(finalPrompt, "<|DSML|tool_calls>") {
|
||||
if !strings.Contains(finalPrompt, "<|DSML|tool_calls>") {
|
||||
t.Fatalf("handler finalPrompt should preserve assistant tool history: %q", finalPrompt)
|
||||
}
|
||||
if !strings.Contains(finalPrompt, `<|DSML|invoke name="get_weather">`) {
|
||||
if !strings.Contains(finalPrompt, `<|DSML|invoke name="get_weather">`) {
|
||||
t.Fatalf("handler finalPrompt should include tool name history: %q", finalPrompt)
|
||||
}
|
||||
}
|
||||
@@ -74,7 +74,7 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t *
|
||||
}
|
||||
|
||||
finalPrompt, _ := buildOpenAIFinalPrompt(messages, tools, "", false)
|
||||
if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.") {
|
||||
if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.") {
|
||||
t.Fatalf("vercel prepare finalPrompt missing final tool-call anchor instruction: %q", finalPrompt)
|
||||
}
|
||||
if !strings.Contains(finalPrompt, "TOOL CALL FORMAT") {
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
package promptcompat
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestNormalizeResponsesInputItemPreservesAssistantReasoningContent(t *testing.T) {
|
||||
item := map[string]any{
|
||||
@@ -48,3 +51,44 @@ func TestNormalizeResponsesInputItemAssistantMessageWithReasoningBlocks(t *testi
|
||||
t.Fatalf("expected content blocks preserved, got %#v", got["content"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeResponsesInputArrayMergesReasoningMessageIntoFunctionCallHistory(t *testing.T) {
|
||||
input := []any{
|
||||
map[string]any{
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": []any{
|
||||
map[string]any{"type": "reasoning", "text": "need fresh docs before answering"},
|
||||
},
|
||||
},
|
||||
map[string]any{
|
||||
"type": "function_call",
|
||||
"call_id": "call_search",
|
||||
"name": "search_web",
|
||||
"arguments": `{"query":"docs"}`,
|
||||
},
|
||||
}
|
||||
|
||||
got := NormalizeResponsesInputAsMessages(input)
|
||||
if len(got) != 1 {
|
||||
t.Fatalf("expected reasoning and function_call merged into one assistant message, got %#v", got)
|
||||
}
|
||||
msg, _ := got[0].(map[string]any)
|
||||
if msg["role"] != "assistant" {
|
||||
t.Fatalf("expected assistant message, got %#v", msg)
|
||||
}
|
||||
if msg["reasoning_content"] != "need fresh docs before answering" {
|
||||
t.Fatalf("expected reasoning_content on tool-call message, got %#v", msg)
|
||||
}
|
||||
toolCalls, _ := msg["tool_calls"].([]any)
|
||||
if len(toolCalls) != 1 {
|
||||
t.Fatalf("expected one tool call, got %#v", msg["tool_calls"])
|
||||
}
|
||||
history := BuildOpenAIHistoryTranscript(got)
|
||||
if !strings.Contains(history, "[reasoning_content]\nneed fresh docs before answering\n[/reasoning_content]") {
|
||||
t.Fatalf("expected reasoning in history transcript, got %q", history)
|
||||
}
|
||||
if !strings.Contains(history, `<|DSML|invoke name="search_web">`) {
|
||||
t.Fatalf("expected tool call in history transcript, got %q", history)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,19 +61,52 @@ func normalizeResponsesInputArray(items []any) []any {
|
||||
out := make([]any, 0, len(items))
|
||||
callNameByID := map[string]string{}
|
||||
fallbackParts := make([]string, 0, len(items))
|
||||
pendingAssistantReasoning := ""
|
||||
flushFallback := func() {
|
||||
if len(fallbackParts) == 0 {
|
||||
return
|
||||
}
|
||||
if pendingAssistantReasoning != "" {
|
||||
out = append(out, map[string]any{"role": "assistant", "reasoning_content": pendingAssistantReasoning})
|
||||
pendingAssistantReasoning = ""
|
||||
}
|
||||
out = append(out, map[string]any{"role": "user", "content": strings.Join(fallbackParts, "\n")})
|
||||
fallbackParts = fallbackParts[:0]
|
||||
}
|
||||
flushPendingReasoning := func() {
|
||||
if pendingAssistantReasoning == "" {
|
||||
return
|
||||
}
|
||||
out = append(out, map[string]any{"role": "assistant", "reasoning_content": pendingAssistantReasoning})
|
||||
pendingAssistantReasoning = ""
|
||||
}
|
||||
|
||||
for _, item := range items {
|
||||
switch x := item.(type) {
|
||||
case map[string]any:
|
||||
if msg := normalizeResponsesInputItemWithState(x, callNameByID); msg != nil {
|
||||
if reasoning := assistantReasoningOnlyContent(msg); reasoning != "" {
|
||||
if pendingAssistantReasoning == "" {
|
||||
pendingAssistantReasoning = reasoning
|
||||
} else {
|
||||
pendingAssistantReasoning += "\n" + reasoning
|
||||
}
|
||||
continue
|
||||
}
|
||||
if isAssistantToolCallMessage(msg) && pendingAssistantReasoning != "" {
|
||||
if strings.TrimSpace(normalizeOpenAIReasoningContentForPrompt(msg["reasoning_content"])) == "" {
|
||||
msg["reasoning_content"] = pendingAssistantReasoning
|
||||
}
|
||||
pendingAssistantReasoning = ""
|
||||
} else {
|
||||
flushPendingReasoning()
|
||||
}
|
||||
flushFallback()
|
||||
if isAssistantToolCallMessage(msg) && len(out) > 0 {
|
||||
if merged := mergeResponsesAssistantToolCalls(out[len(out)-1], msg); merged {
|
||||
continue
|
||||
}
|
||||
}
|
||||
out = append(out, msg)
|
||||
continue
|
||||
}
|
||||
@@ -86,9 +119,55 @@ func normalizeResponsesInputArray(items []any) []any {
|
||||
}
|
||||
}
|
||||
}
|
||||
flushPendingReasoning()
|
||||
flushFallback()
|
||||
if len(out) == 0 {
|
||||
return nil
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func assistantReasoningOnlyContent(msg map[string]any) string {
|
||||
if !isAssistantMessage(msg) || isAssistantToolCallMessage(msg) {
|
||||
return ""
|
||||
}
|
||||
if _, hasContent := msg["content"]; hasContent {
|
||||
normalizedContent := strings.TrimSpace(NormalizeOpenAIContentForPrompt(msg["content"]))
|
||||
reasoningFromContent := strings.TrimSpace(extractOpenAIReasoningContentFromMessage(msg["content"]))
|
||||
if normalizedContent != "" && normalizedContent != reasoningFromContent {
|
||||
return ""
|
||||
}
|
||||
if reasoningFromContent != "" {
|
||||
return reasoningFromContent
|
||||
}
|
||||
}
|
||||
return strings.TrimSpace(normalizeOpenAIReasoningContentForPrompt(msg["reasoning_content"]))
|
||||
}
|
||||
|
||||
func isAssistantMessage(msg map[string]any) bool {
|
||||
return strings.EqualFold(strings.TrimSpace(asString(msg["role"])), "assistant")
|
||||
}
|
||||
|
||||
func isAssistantToolCallMessage(msg map[string]any) bool {
|
||||
if !isAssistantMessage(msg) {
|
||||
return false
|
||||
}
|
||||
toolCalls, ok := msg["tool_calls"].([]any)
|
||||
return ok && len(toolCalls) > 0
|
||||
}
|
||||
|
||||
func mergeResponsesAssistantToolCalls(prev any, next map[string]any) bool {
|
||||
prevMsg, ok := prev.(map[string]any)
|
||||
if !ok || !isAssistantToolCallMessage(prevMsg) || !isAssistantToolCallMessage(next) {
|
||||
return false
|
||||
}
|
||||
prevCalls, _ := prevMsg["tool_calls"].([]any)
|
||||
nextCalls, _ := next["tool_calls"].([]any)
|
||||
prevMsg["tool_calls"] = append(prevCalls, nextCalls...)
|
||||
if strings.TrimSpace(normalizeOpenAIReasoningContentForPrompt(prevMsg["reasoning_content"])) == "" {
|
||||
if reasoning := strings.TrimSpace(normalizeOpenAIReasoningContentForPrompt(next["reasoning_content"])); reasoning != "" {
|
||||
prevMsg["reasoning_content"] = reasoning
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
289
internal/responsehistory/session.go
Normal file
289
internal/responsehistory/session.go
Normal file
@@ -0,0 +1,289 @@
|
||||
package responsehistory
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"ds2api/internal/assistantturn"
|
||||
"ds2api/internal/auth"
|
||||
"ds2api/internal/chathistory"
|
||||
"ds2api/internal/config"
|
||||
"ds2api/internal/prompt"
|
||||
"ds2api/internal/promptcompat"
|
||||
)
|
||||
|
||||
type Session struct {
|
||||
store *chathistory.Store
|
||||
entryID string
|
||||
startedAt time.Time
|
||||
lastPersist time.Time
|
||||
startParams chathistory.StartParams
|
||||
disabled bool
|
||||
}
|
||||
|
||||
type StartParams struct {
|
||||
Store *chathistory.Store
|
||||
Request *http.Request
|
||||
Auth *auth.RequestAuth
|
||||
Surface string
|
||||
Standard promptcompat.StandardRequest
|
||||
}
|
||||
|
||||
func Start(params StartParams) *Session {
|
||||
if params.Store == nil || params.Request == nil || params.Auth == nil {
|
||||
return nil
|
||||
}
|
||||
if !params.Store.Enabled() || !shouldCapture(params.Request) {
|
||||
return nil
|
||||
}
|
||||
startParams := chathistory.StartParams{
|
||||
CallerID: strings.TrimSpace(params.Auth.CallerID),
|
||||
AccountID: strings.TrimSpace(params.Auth.AccountID),
|
||||
Surface: strings.TrimSpace(params.Surface),
|
||||
Model: strings.TrimSpace(params.Standard.ResponseModel),
|
||||
Stream: params.Standard.Stream,
|
||||
UserInput: ExtractSingleUserInput(params.Standard.Messages),
|
||||
Messages: ExtractAllMessages(params.Standard.Messages),
|
||||
HistoryText: params.Standard.HistoryText,
|
||||
FinalPrompt: params.Standard.FinalPrompt,
|
||||
}
|
||||
entry, err := params.Store.Start(startParams)
|
||||
session := &Session{
|
||||
store: params.Store,
|
||||
entryID: entry.ID,
|
||||
startedAt: time.Now(),
|
||||
lastPersist: time.Now(),
|
||||
startParams: startParams,
|
||||
}
|
||||
if err != nil {
|
||||
if entry.ID == "" {
|
||||
config.Logger.Warn("[response_history] start failed", "surface", startParams.Surface, "error", err)
|
||||
return nil
|
||||
}
|
||||
config.Logger.Warn("[response_history] start persisted in memory after write failure", "surface", startParams.Surface, "error", err)
|
||||
}
|
||||
return session
|
||||
}
|
||||
|
||||
func shouldCapture(r *http.Request) bool {
|
||||
if r == nil || r.URL == nil {
|
||||
return false
|
||||
}
|
||||
if strings.TrimSpace(r.URL.Query().Get("__stream_prepare")) == "1" {
|
||||
return false
|
||||
}
|
||||
if strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1" {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func ExtractSingleUserInput(messages []any) string {
|
||||
for i := len(messages) - 1; i >= 0; i-- {
|
||||
msg, ok := messages[i].(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
role := strings.ToLower(strings.TrimSpace(asString(msg["role"])))
|
||||
if role != "user" {
|
||||
continue
|
||||
}
|
||||
if normalized := strings.TrimSpace(prompt.NormalizeContent(msg["content"])); normalized != "" {
|
||||
return normalized
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func ExtractAllMessages(messages []any) []chathistory.Message {
|
||||
out := make([]chathistory.Message, 0, len(messages))
|
||||
for _, raw := range messages {
|
||||
msg, ok := raw.(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
role := strings.ToLower(strings.TrimSpace(asString(msg["role"])))
|
||||
content := strings.TrimSpace(prompt.NormalizeContent(msg["content"]))
|
||||
if role == "" || content == "" {
|
||||
continue
|
||||
}
|
||||
out = append(out, chathistory.Message{
|
||||
Role: role,
|
||||
Content: content,
|
||||
})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (s *Session) Progress(thinking, content string) {
|
||||
if s == nil || s.store == nil || s.disabled {
|
||||
return
|
||||
}
|
||||
now := time.Now()
|
||||
if now.Sub(s.lastPersist) < 250*time.Millisecond {
|
||||
return
|
||||
}
|
||||
s.lastPersist = now
|
||||
s.persistUpdate(chathistory.UpdateParams{
|
||||
Status: "streaming",
|
||||
ReasoningContent: thinking,
|
||||
Content: content,
|
||||
StatusCode: http.StatusOK,
|
||||
ElapsedMs: time.Since(s.startedAt).Milliseconds(),
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Session) Success(statusCode int, thinking, content, finishReason string, usage map[string]any) {
|
||||
if s == nil || s.store == nil || s.disabled {
|
||||
return
|
||||
}
|
||||
s.persistUpdate(chathistory.UpdateParams{
|
||||
Status: "success",
|
||||
ReasoningContent: thinking,
|
||||
Content: content,
|
||||
StatusCode: statusCode,
|
||||
ElapsedMs: time.Since(s.startedAt).Milliseconds(),
|
||||
FinishReason: finishReason,
|
||||
Usage: usage,
|
||||
Completed: true,
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Session) Error(statusCode int, message, finishReason, thinking, content string) {
|
||||
if s == nil || s.store == nil || s.disabled {
|
||||
return
|
||||
}
|
||||
s.persistUpdate(chathistory.UpdateParams{
|
||||
Status: "error",
|
||||
ReasoningContent: thinking,
|
||||
Content: content,
|
||||
Error: message,
|
||||
StatusCode: statusCode,
|
||||
ElapsedMs: time.Since(s.startedAt).Milliseconds(),
|
||||
FinishReason: finishReason,
|
||||
Completed: true,
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Session) SuccessTurn(statusCode int, turn assistantturn.Turn, usage map[string]any) {
|
||||
outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{})
|
||||
s.Success(
|
||||
statusCode,
|
||||
ThinkingForArchive(turn.RawThinking, turn.DetectionThinking, turn.Thinking),
|
||||
TextForArchive(turn.RawText, turn.Text),
|
||||
outcome.FinishReason,
|
||||
usage,
|
||||
)
|
||||
}
|
||||
|
||||
func (s *Session) ErrorTurn(statusCode int, message, finishReason string, turn assistantturn.Turn) {
|
||||
s.Error(
|
||||
statusCode,
|
||||
message,
|
||||
finishReason,
|
||||
ThinkingForArchive(turn.RawThinking, turn.DetectionThinking, turn.Thinking),
|
||||
TextForArchive(turn.RawText, turn.Text),
|
||||
)
|
||||
}
|
||||
|
||||
func TextForArchive(raw, visible string) string {
|
||||
if strings.TrimSpace(raw) != "" {
|
||||
return raw
|
||||
}
|
||||
return visible
|
||||
}
|
||||
|
||||
func ThinkingForArchive(raw, detection, visible string) string {
|
||||
if strings.TrimSpace(raw) != "" {
|
||||
return raw
|
||||
}
|
||||
if strings.TrimSpace(detection) != "" {
|
||||
return detection
|
||||
}
|
||||
return visible
|
||||
}
|
||||
|
||||
func GenericUsage(turn assistantturn.Turn) map[string]any {
|
||||
return map[string]any{
|
||||
"input_tokens": turn.Usage.InputTokens,
|
||||
"output_tokens": turn.Usage.OutputTokens,
|
||||
"reasoning_tokens": turn.Usage.ReasoningTokens,
|
||||
"total_tokens": turn.Usage.TotalTokens,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Session) retryMissingEntry() bool {
|
||||
if s == nil || s.store == nil || s.disabled {
|
||||
return false
|
||||
}
|
||||
entry, err := s.store.Start(s.startParams)
|
||||
if errors.Is(err, chathistory.ErrDisabled) {
|
||||
s.disabled = true
|
||||
return false
|
||||
}
|
||||
if entry.ID == "" {
|
||||
if err != nil {
|
||||
config.Logger.Warn("[response_history] recreate missing entry failed", "surface", s.startParams.Surface, "error", err)
|
||||
}
|
||||
return false
|
||||
}
|
||||
s.entryID = entry.ID
|
||||
if err != nil {
|
||||
config.Logger.Warn("[response_history] recreate missing entry persisted in memory after write failure", "surface", s.startParams.Surface, "error", err)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (s *Session) persistUpdate(params chathistory.UpdateParams) {
|
||||
if s == nil || s.store == nil || s.disabled {
|
||||
return
|
||||
}
|
||||
if _, err := s.store.Update(s.entryID, params); err != nil {
|
||||
s.handlePersistError(params, err)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Session) handlePersistError(params chathistory.UpdateParams, err error) {
|
||||
if err == nil || s == nil {
|
||||
return
|
||||
}
|
||||
if errors.Is(err, chathistory.ErrDisabled) {
|
||||
s.disabled = true
|
||||
return
|
||||
}
|
||||
if isMissingError(err) {
|
||||
if s.retryMissingEntry() {
|
||||
if _, retryErr := s.store.Update(s.entryID, params); retryErr != nil {
|
||||
if errors.Is(retryErr, chathistory.ErrDisabled) || isMissingError(retryErr) {
|
||||
s.disabled = true
|
||||
return
|
||||
}
|
||||
config.Logger.Warn("[response_history] retry after missing entry failed", "surface", s.startParams.Surface, "error", retryErr)
|
||||
}
|
||||
return
|
||||
}
|
||||
s.disabled = true
|
||||
return
|
||||
}
|
||||
config.Logger.Warn("[response_history] update failed", "surface", s.startParams.Surface, "error", err)
|
||||
}
|
||||
|
||||
func isMissingError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(strings.ToLower(err.Error()), "not found")
|
||||
}
|
||||
|
||||
func asString(v any) string {
|
||||
switch x := v.(type) {
|
||||
case string:
|
||||
return x
|
||||
case nil:
|
||||
return ""
|
||||
default:
|
||||
return strings.TrimSpace(prompt.NormalizeContent(x))
|
||||
}
|
||||
}
|
||||
@@ -22,6 +22,7 @@ import (
|
||||
"ds2api/internal/httpapi/admin"
|
||||
"ds2api/internal/httpapi/claude"
|
||||
"ds2api/internal/httpapi/gemini"
|
||||
"ds2api/internal/httpapi/ollama"
|
||||
"ds2api/internal/httpapi/openai/chat"
|
||||
"ds2api/internal/httpapi/openai/embeddings"
|
||||
"ds2api/internal/httpapi/openai/files"
|
||||
@@ -65,9 +66,10 @@ func NewApp() (*App, error) {
|
||||
responsesHandler := &responses.Handler{Store: store, Auth: resolver, DS: dsClient, ChatHistory: chatHistoryStore}
|
||||
filesHandler := &files.Handler{Store: store, Auth: resolver, DS: dsClient, ChatHistory: chatHistoryStore}
|
||||
embeddingsHandler := &embeddings.Handler{Store: store, Auth: resolver, DS: dsClient, ChatHistory: chatHistoryStore}
|
||||
claudeHandler := &claude.Handler{Store: store, Auth: resolver, DS: dsClient, OpenAI: chatHandler}
|
||||
geminiHandler := &gemini.Handler{Store: store, Auth: resolver, DS: dsClient, OpenAI: chatHandler}
|
||||
claudeHandler := &claude.Handler{Store: store, Auth: resolver, DS: dsClient, OpenAI: chatHandler, ChatHistory: chatHistoryStore}
|
||||
geminiHandler := &gemini.Handler{Store: store, Auth: resolver, DS: dsClient, OpenAI: chatHandler, ChatHistory: chatHistoryStore}
|
||||
adminHandler := &admin.Handler{Store: store, Pool: pool, DS: dsClient, OpenAI: chatHandler, ChatHistory: chatHistoryStore}
|
||||
ollamaHandler := &ollama.Handler{Store: store}
|
||||
webuiHandler := webui.NewHandler()
|
||||
|
||||
r := chi.NewRouter()
|
||||
@@ -112,6 +114,7 @@ func NewApp() (*App, error) {
|
||||
r.Post("/embeddings", embeddingsHandler.Embeddings)
|
||||
claude.RegisterRoutes(r, claudeHandler)
|
||||
gemini.RegisterRoutes(r, geminiHandler)
|
||||
ollama.RegisterRoutes(r, ollamaHandler)
|
||||
r.Route("/admin", func(ar chi.Router) {
|
||||
admin.RegisterRoutes(ar, adminHandler)
|
||||
})
|
||||
|
||||
@@ -2,19 +2,18 @@ package textclean
|
||||
|
||||
import "regexp"
|
||||
|
||||
var referenceMarkerPattern = regexp.MustCompile(`(?i)\[reference:\s*\d+\]`)
|
||||
var citationReferenceMarkerPattern = regexp.MustCompile(`(?i)\[(citation|reference):\s*\d+\]`)
|
||||
|
||||
func StripReferenceMarkers(text string) string {
|
||||
if text == "" {
|
||||
return text
|
||||
}
|
||||
return referenceMarkerPattern.ReplaceAllString(text, "")
|
||||
return citationReferenceMarkerPattern.ReplaceAllString(text, "")
|
||||
}
|
||||
|
||||
// StripReferenceMarkersEnabled returns true while reference-marker
|
||||
// stripping remains the fixed runtime default. When the behaviour is
|
||||
// eventually removed this function can be deleted and callers can drop
|
||||
// the conditional.
|
||||
// StripReferenceMarkersEnabled returns the default for streaming surfaces,
|
||||
// where partial citation/reference markers are hidden before the final
|
||||
// link metadata is available.
|
||||
func StripReferenceMarkersEnabled() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -11,46 +11,54 @@ import "strings"
|
||||
func BuildToolCallInstructions(toolNames []string) string {
|
||||
return `TOOL CALL FORMAT — FOLLOW EXACTLY:
|
||||
|
||||
<|DSML|tool_calls>
|
||||
<|DSML|invoke name="TOOL_NAME_HERE">
|
||||
<|DSML|parameter name="PARAMETER_NAME"><![CDATA[PARAMETER_VALUE]]></|DSML|parameter>
|
||||
</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
<|DSML|tool_calls>
|
||||
<|DSML|invoke name="TOOL_NAME_HERE">
|
||||
<|DSML|parameter name="PARAMETER_NAME"><![CDATA[PARAMETER_VALUE]]></|DSML|parameter>
|
||||
</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
|
||||
RULES:
|
||||
1) Use the <|DSML|tool_calls> wrapper format.
|
||||
2) Put one or more <|DSML|invoke> entries under a single <|DSML|tool_calls> root.
|
||||
3) Put the tool name in the invoke name attribute: <|DSML|invoke name="TOOL_NAME">.
|
||||
1) Use the <|DSML|tool_calls> wrapper format.
|
||||
2) Put one or more <|DSML|invoke> entries under a single <|DSML|tool_calls> root.
|
||||
3) Put the tool name in the invoke name attribute: <|DSML|invoke name="TOOL_NAME">.
|
||||
4) All string values must use <![CDATA[...]]>, even short ones. This includes code, scripts, file contents, prompts, paths, names, and queries.
|
||||
5) Every top-level argument must be a <|DSML|parameter name="ARG_NAME">...</|DSML|parameter> node.
|
||||
5) Every top-level argument must be a <|DSML|parameter name="ARG_NAME">...</|DSML|parameter> node.
|
||||
6) Objects use nested XML elements inside the parameter body. Arrays may repeat <item> children.
|
||||
7) Numbers, booleans, and null stay plain text.
|
||||
8) Use only the parameter names in the tool schema. Do not invent fields.
|
||||
9) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.
|
||||
10) If you call a tool, the first non-whitespace characters of that tool block must be exactly <|DSML|tool_calls>.
|
||||
11) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with </|DSML|tool_calls>.
|
||||
12) Compatibility note: the runtime also accepts the legacy XML tags <tool_calls> / <invoke> / <parameter>, but prefer the DSML-prefixed form above.
|
||||
9) Fill parameters with the actual values required for this call. Do not emit placeholder, blank, or whitespace-only parameters.
|
||||
10) If a required parameter value is unknown, ask the user or answer normally instead of outputting an empty tool call.
|
||||
11) For shell tools such as Bash / execute_command, the command/script must be inside the command parameter. Never call them with an empty command.
|
||||
12) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.
|
||||
13) If you call a tool, the first non-whitespace characters of that tool block must be exactly <|DSML|tool_calls>.
|
||||
14) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with </|DSML|tool_calls>.
|
||||
15) Compatibility note: the runtime also accepts the legacy XML tags <tool_calls> / <invoke> / <parameter>, but prefer the DSML-prefixed form above.
|
||||
|
||||
PARAMETER SHAPES:
|
||||
- string => <|DSML|parameter name="x"><![CDATA[value]]></|DSML|parameter>
|
||||
- object => <|DSML|parameter name="x"><field>...</field></|DSML|parameter>
|
||||
- array => <|DSML|parameter name="x"><item>...</item><item>...</item></|DSML|parameter>
|
||||
- number/bool/null => <|DSML|parameter name="x">plain_text</|DSML|parameter>
|
||||
- string => <|DSML|parameter name="x"><![CDATA[value]]></|DSML|parameter>
|
||||
- object => <|DSML|parameter name="x"><field>...</field></|DSML|parameter>
|
||||
- array => <|DSML|parameter name="x"><item>...</item><item>...</item></|DSML|parameter>
|
||||
- number/bool/null => <|DSML|parameter name="x">plain_text</|DSML|parameter>
|
||||
|
||||
【WRONG — Do NOT do these】:
|
||||
|
||||
Wrong 1 — mixed text after XML:
|
||||
<|DSML|tool_calls>...</|DSML|tool_calls> I hope this helps.
|
||||
<|DSML|tool_calls>...</|DSML|tool_calls> I hope this helps.
|
||||
Wrong 2 — Markdown code fences:
|
||||
` + "```xml" + `
|
||||
<|DSML|tool_calls>...</|DSML|tool_calls>
|
||||
<|DSML|tool_calls>...</|DSML|tool_calls>
|
||||
` + "```" + `
|
||||
Wrong 3 — missing opening wrapper:
|
||||
<|DSML|invoke name="TOOL_NAME">...</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
|
||||
Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.
|
||||
<|DSML|invoke name="TOOL_NAME">...</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
Wrong 4 — empty parameters:
|
||||
<|DSML|tool_calls>
|
||||
<|DSML|invoke name="Bash">
|
||||
<|DSML|parameter name="command"></|DSML|parameter>
|
||||
</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
|
||||
Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.
|
||||
` + buildCorrectToolExamples(toolNames)
|
||||
}
|
||||
|
||||
@@ -141,21 +149,21 @@ func firstScriptExample(names []string) (promptToolExample, bool) {
|
||||
|
||||
func renderToolExampleBlock(calls []promptToolExample) string {
|
||||
var b strings.Builder
|
||||
b.WriteString("<|DSML|tool_calls>\n")
|
||||
b.WriteString("<|DSML|tool_calls>\n")
|
||||
for _, call := range calls {
|
||||
b.WriteString(` <|DSML|invoke name="`)
|
||||
b.WriteString(` <|DSML|invoke name="`)
|
||||
b.WriteString(call.name)
|
||||
b.WriteString(`">` + "\n")
|
||||
b.WriteString(indentPromptParameters(call.params, " "))
|
||||
b.WriteString("\n </|DSML|invoke>\n")
|
||||
b.WriteString("\n </|DSML|invoke>\n")
|
||||
}
|
||||
b.WriteString("</|DSML|tool_calls>")
|
||||
b.WriteString("</|DSML|tool_calls>")
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func indentPromptParameters(body, indent string) string {
|
||||
if strings.TrimSpace(body) == "" {
|
||||
return indent + `<|DSML|parameter name="content"></|DSML|parameter>`
|
||||
return indent + `<|DSML|parameter name="content"></|DSML|parameter>`
|
||||
}
|
||||
lines := strings.Split(body, "\n")
|
||||
for i, line := range lines {
|
||||
@@ -169,7 +177,7 @@ func indentPromptParameters(body, indent string) string {
|
||||
}
|
||||
|
||||
func wrapParameter(name, inner string) string {
|
||||
return `<|DSML|parameter name="` + name + `">` + inner + `</|DSML|parameter>`
|
||||
return `<|DSML|parameter name="` + name + `">` + inner + `</|DSML|parameter>`
|
||||
}
|
||||
|
||||
func exampleBasicParams(name string) (string, bool) {
|
||||
@@ -195,7 +203,7 @@ func exampleBasicParams(name string) (string, bool) {
|
||||
case "Edit":
|
||||
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + wrapParameter("old_string", promptCDATA("foo")) + "\n" + wrapParameter("new_string", promptCDATA("bar")), true
|
||||
case "MultiEdit":
|
||||
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
|
||||
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
@@ -203,11 +211,11 @@ func exampleBasicParams(name string) (string, bool) {
|
||||
func exampleNestedParams(name string) (string, bool) {
|
||||
switch strings.TrimSpace(name) {
|
||||
case "MultiEdit":
|
||||
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
|
||||
return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits"><item><old_string>` + promptCDATA("foo") + `</old_string><new_string>` + promptCDATA("bar") + `</new_string></item></|DSML|parameter>`, true
|
||||
case "Task":
|
||||
return wrapParameter("description", promptCDATA("Investigate flaky tests")) + "\n" + wrapParameter("prompt", promptCDATA("Run targeted tests and summarize failures")), true
|
||||
case "ask_followup_question":
|
||||
return wrapParameter("question", promptCDATA("Which approach do you prefer?")) + "\n" + `<|DSML|parameter name="follow_up"><item><text>` + promptCDATA("Option A") + `</text></item><item><text>` + promptCDATA("Option B") + `</text></item></|DSML|parameter>`, true
|
||||
return wrapParameter("question", promptCDATA("Which approach do you prefer?")) + "\n" + `<|DSML|parameter name="follow_up"><item><text>` + promptCDATA("Option A") + `</text></item><item><text>` + promptCDATA("Option B") + `</text></item></|DSML|parameter>`, true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
@@ -7,20 +7,20 @@ import (
|
||||
|
||||
func TestBuildToolCallInstructions_ExecCommandUsesCmdExample(t *testing.T) {
|
||||
out := BuildToolCallInstructions([]string{"exec_command"})
|
||||
if !strings.Contains(out, `<|DSML|invoke name="exec_command">`) {
|
||||
if !strings.Contains(out, `<|DSML|invoke name="exec_command">`) {
|
||||
t.Fatalf("expected exec_command in examples, got: %s", out)
|
||||
}
|
||||
if !strings.Contains(out, `<|DSML|parameter name="cmd"><![CDATA[pwd]]></|DSML|parameter>`) {
|
||||
if !strings.Contains(out, `<|DSML|parameter name="cmd"><![CDATA[pwd]]></|DSML|parameter>`) {
|
||||
t.Fatalf("expected cmd parameter example for exec_command, got: %s", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildToolCallInstructions_ExecuteCommandUsesCommandExample(t *testing.T) {
|
||||
out := BuildToolCallInstructions([]string{"execute_command"})
|
||||
if !strings.Contains(out, `<|DSML|invoke name="execute_command">`) {
|
||||
if !strings.Contains(out, `<|DSML|invoke name="execute_command">`) {
|
||||
t.Fatalf("expected execute_command in examples, got: %s", out)
|
||||
}
|
||||
if !strings.Contains(out, `<|DSML|parameter name="command"><![CDATA[pwd]]></|DSML|parameter>`) {
|
||||
if !strings.Contains(out, `<|DSML|parameter name="command"><![CDATA[pwd]]></|DSML|parameter>`) {
|
||||
t.Fatalf("expected command parameter example for execute_command, got: %s", out)
|
||||
}
|
||||
}
|
||||
@@ -34,20 +34,20 @@ func TestBuildToolCallInstructions_BashUsesCommandAndDescriptionExamples(t *test
|
||||
|
||||
sawDescription := false
|
||||
for _, block := range blocks {
|
||||
if !strings.Contains(block, `<|DSML|parameter name="command">`) {
|
||||
if !strings.Contains(block, `<|DSML|parameter name="command">`) {
|
||||
t.Fatalf("expected every Bash example to use command parameter, got: %s", block)
|
||||
}
|
||||
if strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
|
||||
if strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
|
||||
t.Fatalf("expected Bash examples not to use file write parameters, got: %s", block)
|
||||
}
|
||||
if strings.Contains(block, `<|DSML|parameter name="description">`) {
|
||||
if strings.Contains(block, `<|DSML|parameter name="description">`) {
|
||||
sawDescription = true
|
||||
}
|
||||
}
|
||||
if !sawDescription {
|
||||
t.Fatalf("expected Bash long-script example to include description, got: %s", out)
|
||||
}
|
||||
if strings.Contains(out, `<|DSML|invoke name="Read">`) {
|
||||
if strings.Contains(out, `<|DSML|invoke name="Read">`) {
|
||||
t.Fatalf("expected examples to avoid unavailable hard-coded Read tool, got: %s", out)
|
||||
}
|
||||
}
|
||||
@@ -60,10 +60,10 @@ func TestBuildToolCallInstructions_ExecuteCommandLongScriptUsesCommand(t *testin
|
||||
}
|
||||
|
||||
for _, block := range blocks {
|
||||
if !strings.Contains(block, `<|DSML|parameter name="command">`) {
|
||||
if !strings.Contains(block, `<|DSML|parameter name="command">`) {
|
||||
t.Fatalf("expected execute_command examples to use command parameter, got: %s", block)
|
||||
}
|
||||
if strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
|
||||
if strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
|
||||
t.Fatalf("expected execute_command examples not to use file write parameters, got: %s", block)
|
||||
}
|
||||
}
|
||||
@@ -80,10 +80,10 @@ func TestBuildToolCallInstructions_ExecCommandLongScriptUsesCmd(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, block := range blocks {
|
||||
if !strings.Contains(block, `<|DSML|parameter name="cmd">`) {
|
||||
if !strings.Contains(block, `<|DSML|parameter name="cmd">`) {
|
||||
t.Fatalf("expected exec_command examples to use cmd parameter, got: %s", block)
|
||||
}
|
||||
if strings.Contains(block, `<|DSML|parameter name="command">`) || strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
|
||||
if strings.Contains(block, `<|DSML|parameter name="command">`) || strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) {
|
||||
t.Fatalf("expected exec_command examples not to use command or file write parameters, got: %s", block)
|
||||
}
|
||||
}
|
||||
@@ -100,10 +100,10 @@ func TestBuildToolCallInstructions_WriteUsesFilePathAndContent(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, block := range blocks {
|
||||
if !strings.Contains(block, `<|DSML|parameter name="file_path">`) || !strings.Contains(block, `<|DSML|parameter name="content">`) {
|
||||
if !strings.Contains(block, `<|DSML|parameter name="file_path">`) || !strings.Contains(block, `<|DSML|parameter name="content">`) {
|
||||
t.Fatalf("expected Write examples to use file_path and content, got: %s", block)
|
||||
}
|
||||
if strings.Contains(block, `<|DSML|parameter name="path">`) {
|
||||
if strings.Contains(block, `<|DSML|parameter name="path">`) {
|
||||
t.Fatalf("expected Write examples not to use path, got: %s", block)
|
||||
}
|
||||
}
|
||||
@@ -111,7 +111,7 @@ func TestBuildToolCallInstructions_WriteUsesFilePathAndContent(t *testing.T) {
|
||||
|
||||
func TestBuildToolCallInstructions_AnchorsMissingOpeningWrapperFailureMode(t *testing.T) {
|
||||
out := BuildToolCallInstructions([]string{"read_file"})
|
||||
if !strings.Contains(out, "Never omit the opening <|DSML|tool_calls> tag") {
|
||||
if !strings.Contains(out, "Never omit the opening <|DSML|tool_calls> tag") {
|
||||
t.Fatalf("expected explicit missing-opening-tag warning, got: %s", out)
|
||||
}
|
||||
if !strings.Contains(out, "Wrong 3 — missing opening wrapper") {
|
||||
@@ -119,8 +119,22 @@ func TestBuildToolCallInstructions_AnchorsMissingOpeningWrapperFailureMode(t *te
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildToolCallInstructions_RejectsEmptyParametersInPrompt(t *testing.T) {
|
||||
out := BuildToolCallInstructions([]string{"Bash"})
|
||||
for _, want := range []string{
|
||||
"Do not emit placeholder, blank, or whitespace-only parameters.",
|
||||
"If a required parameter value is unknown, ask the user or answer normally instead of outputting an empty tool call.",
|
||||
"Never call them with an empty command.",
|
||||
"Wrong 4 — empty parameters",
|
||||
} {
|
||||
if !strings.Contains(out, want) {
|
||||
t.Fatalf("expected empty-parameter instruction %q, got: %s", want, out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func findInvokeBlocks(text, name string) []string {
|
||||
open := `<|DSML|invoke name="` + name + `">`
|
||||
open := `<|DSML|invoke name="` + name + `">`
|
||||
remaining := text
|
||||
blocks := []string{}
|
||||
for {
|
||||
@@ -129,11 +143,11 @@ func findInvokeBlocks(text, name string) []string {
|
||||
return blocks
|
||||
}
|
||||
remaining = remaining[start:]
|
||||
end := strings.Index(remaining, `</|DSML|invoke>`)
|
||||
end := strings.Index(remaining, `</|DSML|invoke>`)
|
||||
if end < 0 {
|
||||
return blocks
|
||||
}
|
||||
end += len(`</|DSML|invoke>`)
|
||||
end += len(`</|DSML|invoke>`)
|
||||
blocks = append(blocks, remaining[:end])
|
||||
remaining = remaining[end:]
|
||||
}
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
package toolcall
|
||||
|
||||
import "strings"
|
||||
import (
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func normalizeDSMLToolCallMarkup(text string) (string, bool) {
|
||||
if text == "" {
|
||||
@@ -17,11 +20,10 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
|
||||
if text == "" {
|
||||
return ""
|
||||
}
|
||||
lower := strings.ToLower(text)
|
||||
var b strings.Builder
|
||||
b.Grow(len(text))
|
||||
for i := 0; i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, lower, i)
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
b.WriteString(text[i:])
|
||||
break
|
||||
@@ -43,8 +45,9 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
|
||||
b.WriteByte('/')
|
||||
}
|
||||
b.WriteString(tag.Name)
|
||||
b.WriteString(text[tag.NameEnd : tag.End+1])
|
||||
if text[tag.End] != '>' {
|
||||
tail := normalizeToolMarkupTagTailForXML(text[tag.NameEnd : tag.End+1])
|
||||
b.WriteString(tail)
|
||||
if !strings.HasSuffix(tail, ">") {
|
||||
b.WriteByte('>')
|
||||
}
|
||||
i = tag.End + 1
|
||||
@@ -55,3 +58,57 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func normalizeToolMarkupTagTailForXML(tail string) string {
|
||||
if tail == "" {
|
||||
return ""
|
||||
}
|
||||
var b strings.Builder
|
||||
b.Grow(len(tail))
|
||||
quote := rune(0)
|
||||
for i := 0; i < len(tail); {
|
||||
r, size := utf8.DecodeRuneInString(tail[i:])
|
||||
if r == utf8.RuneError && size == 1 {
|
||||
b.WriteByte(tail[i])
|
||||
i++
|
||||
continue
|
||||
}
|
||||
ch := normalizeFullwidthASCII(r)
|
||||
if quote != 0 {
|
||||
b.WriteRune(ch)
|
||||
if ch == quote {
|
||||
quote = 0
|
||||
}
|
||||
i += size
|
||||
continue
|
||||
}
|
||||
switch ch {
|
||||
case '"', '\'':
|
||||
quote = ch
|
||||
b.WriteRune(ch)
|
||||
case '|':
|
||||
j := i + size
|
||||
for j < len(tail) {
|
||||
next, nextSize := utf8.DecodeRuneInString(tail[j:])
|
||||
if nextSize <= 0 {
|
||||
break
|
||||
}
|
||||
if next == ' ' || next == '\t' || next == '\r' || next == '\n' {
|
||||
j += nextSize
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
next, _ := normalizedASCIIAt(tail, j)
|
||||
if next != '>' {
|
||||
b.WriteRune(ch)
|
||||
}
|
||||
case '>', '/', '=':
|
||||
b.WriteRune(ch)
|
||||
default:
|
||||
b.WriteString(tail[i : i+size])
|
||||
}
|
||||
i += size
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
var toolCallMarkupKVPattern = regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?([a-z0-9_\-.]+)\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?([a-z0-9_\-.]+)>`)
|
||||
|
||||
// cdataPattern matches a standalone CDATA section.
|
||||
var cdataPattern = regexp.MustCompile(`(?is)^<!\[CDATA\[(.*?)]]>$`)
|
||||
var cdataPattern = regexp.MustCompile(`(?is)^(?:<|〈)!\[CDATA\[(.*?)]](?:>|>|〉)$`)
|
||||
|
||||
func parseMarkupKVObject(text string) map[string]any {
|
||||
matches := toolCallMarkupKVPattern.FindAllStringSubmatch(strings.TrimSpace(text), -1)
|
||||
@@ -145,7 +145,6 @@ func SanitizeLooseCDATA(text string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
lower := strings.ToLower(text)
|
||||
const openMarker = "<![cdata["
|
||||
const closeMarker = "]]>"
|
||||
|
||||
@@ -154,17 +153,16 @@ func SanitizeLooseCDATA(text string) string {
|
||||
changed := false
|
||||
pos := 0
|
||||
for pos < len(text) {
|
||||
startRel := strings.Index(lower[pos:], openMarker)
|
||||
if startRel < 0 {
|
||||
start := indexASCIIFold(text, pos, openMarker)
|
||||
if start < 0 {
|
||||
b.WriteString(text[pos:])
|
||||
break
|
||||
}
|
||||
start := pos + startRel
|
||||
contentStart := start + len(openMarker)
|
||||
b.WriteString(text[pos:start])
|
||||
|
||||
if endRel := strings.Index(lower[contentStart:], closeMarker); endRel >= 0 {
|
||||
end := contentStart + endRel + len(closeMarker)
|
||||
if endRel := indexASCIIFold(text, contentStart, closeMarker); endRel >= 0 {
|
||||
end := endRel + len(closeMarker)
|
||||
b.WriteString(text[start:end])
|
||||
pos = end
|
||||
continue
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user