From 3a75b75ae0a8ce5d7f056513ffa7b92507ebfbb9 Mon Sep 17 00:00:00 2001 From: CJACK Date: Wed, 18 Feb 2026 23:06:18 +0800 Subject: [PATCH] feat: Introduce model alias resolution, enhanced configuration options, and improved OpenAI/Claude adapter handling for responses, embeddings, and tool calls. --- API.en.md | 128 +++++- API.md | 128 +++++- README.MD | 47 +- README.en.md | 47 +- api/chat-stream.js | 4 +- api/helpers/stream-tool-sieve.js | 65 ++- api/helpers/stream-tool-sieve.test.js | 4 +- config.example.json | 26 +- internal/adapter/claude/error_shape_test.go | 35 ++ internal/adapter/claude/handler.go | 52 ++- internal/adapter/openai/embeddings_handler.go | 138 ++++++ internal/adapter/openai/error_shape_test.go | 35 ++ internal/adapter/openai/handler.go | 74 +++- .../adapter/openai/handler_toolcall_test.go | 28 +- internal/adapter/openai/models_route_test.go | 46 ++ internal/adapter/openai/response_store.go | 91 ++++ .../openai/responses_embeddings_test.go | 65 +++ internal/adapter/openai/responses_handler.go | 407 ++++++++++++++++++ internal/adapter/openai/tool_sieve.go | 79 ++-- internal/adapter/openai/vercel_stream.go | 12 +- internal/config/config.go | 118 +++++ internal/config/model_alias_test.go | 44 ++ internal/config/models.go | 113 ++++- internal/server/router.go | 2 +- internal/util/toolcalls.go | 33 +- internal/util/toolcalls_test.go | 7 +- internal/util/util_edge_test.go | 12 - opencode.json.example | 8 +- 28 files changed, 1665 insertions(+), 183 deletions(-) create mode 100644 internal/adapter/claude/error_shape_test.go create mode 100644 internal/adapter/openai/embeddings_handler.go create mode 100644 internal/adapter/openai/error_shape_test.go create mode 100644 internal/adapter/openai/models_route_test.go create mode 100644 internal/adapter/openai/response_store.go create mode 100644 internal/adapter/openai/responses_embeddings_test.go create mode 100644 internal/adapter/openai/responses_handler.go create mode 100644 internal/config/model_alias_test.go diff --git a/API.en.md b/API.en.md index 09149b2..babd1dc 100644 --- a/API.en.md +++ b/API.en.md @@ -28,7 +28,7 @@ This document describes the actual behavior of the current Go codebase. | Base URL | `http://localhost:5001` or your deployment domain | | Default Content-Type | `application/json` | | Health probes | `GET /healthz`, `GET /readyz` | -| CORS | Enabled (`Access-Control-Allow-Origin: *`, allows `Content-Type`, `Authorization`) | +| CORS | Enabled (`Access-Control-Allow-Origin: *`, allows `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Vercel-Protection-Bypass`) | --- @@ -89,7 +89,11 @@ Two header formats accepted: | GET | `/healthz` | None | Liveness probe | | GET | `/readyz` | None | Readiness probe | | GET | `/v1/models` | None | OpenAI model list | +| GET | `/v1/models/{id}` | None | OpenAI single-model query (alias accepted) | | POST | `/v1/chat/completions` | Business | OpenAI chat completions | +| POST | `/v1/responses` | Business | OpenAI Responses API (stream/non-stream) | +| GET | `/v1/responses/{response_id}` | Business | Query stored response (in-memory TTL) | +| POST | `/v1/embeddings` | Business | OpenAI Embeddings API | | GET | `/anthropic/v1/models` | None | Claude model list | | POST | `/anthropic/v1/messages` | Business | Claude messages | | POST | `/anthropic/v1/messages/count_tokens` | Business | Claude token counting | @@ -150,6 +154,15 @@ No auth required. Returns supported models. } ``` +### Model Alias Resolution + +For `chat` / `responses` / `embeddings`, DS2API follows a wide-input/strict-output policy: + +1. Match DeepSeek native model IDs first. +2. Then match exact keys in `model_aliases`. +3. If still unmatched, fall back by known family heuristics (`o*`, `gpt-*`, `claude-*`, etc.). +4. If still unmatched, return `invalid_request_error`. + ### `POST /v1/chat/completions` **Headers**: @@ -163,7 +176,7 @@ Content-Type: application/json | Field | Type | Required | Notes | | --- | --- | --- | --- | -| `model` | string | ✅ | `deepseek-chat` / `deepseek-reasoner` / `deepseek-chat-search` / `deepseek-reasoner-search` | +| `model` | string | ✅ | DeepSeek native models + common aliases (`gpt-4o`, `gpt-5-codex`, `o3`, `claude-sonnet-4-5`, etc.) | | `messages` | array | ✅ | OpenAI-style messages | | `stream` | boolean | ❌ | Default `false` | | `tools` | array | ❌ | Function calling schema | @@ -253,7 +266,63 @@ When `tools` is present, DS2API performs anti-leak handling: } ``` -**Stream**: DS2API buffers text first. If tool call detected → only structured `delta.tool_calls` (each with `index`); otherwise emits buffered text at once. +**Stream**: Once high-confidence toolcall features are matched, DS2API emits `delta.tool_calls` immediately (without waiting for full JSON closure), then keeps sending argument deltas; confirmed raw tool JSON is never forwarded as `delta.content`. + +--- + +### `GET /v1/models/{id}` + +No auth required. Alias values are accepted as path params (for example `gpt-4o`), and the returned object is the mapped DeepSeek model. + +### `POST /v1/responses` + +OpenAI Responses-style endpoint, accepting either `input` or `messages`. + +| Field | Type | Required | Notes | +| --- | --- | --- | --- | +| `model` | string | ✅ | Supports native models + alias mapping | +| `input` | string/array/object | ❌ | One of `input` or `messages` is required | +| `messages` | array | ❌ | One of `input` or `messages` is required | +| `instructions` | string | ❌ | Prepended as a system message | +| `stream` | boolean | ❌ | Default `false` | +| `tools` | array | ❌ | Same tool detection/translation policy as chat | + +**Non-stream**: Returns a standard `response` object with an ID like `resp_xxx`, and stores it in in-memory TTL cache. + +**Stream (SSE)**: minimal event sequence: + +```text +event: response.created +data: {"type":"response.created","id":"resp_xxx","status":"in_progress",...} + +event: response.output_text.delta +data: {"type":"response.output_text.delta","id":"resp_xxx","delta":"..."} + +event: response.output_tool_call.delta +data: {"type":"response.output_tool_call.delta","id":"resp_xxx","tool_calls":[...]} + +event: response.completed +data: {"type":"response.completed","response":{...}} + +data: [DONE] +``` + +### `GET /v1/responses/{response_id}` + +Business auth required. Fetches cached responses created by `POST /v1/responses`. + +> Backed by in-memory TTL store. Default TTL is `900s` (configurable via `responses.store_ttl_seconds`). + +### `POST /v1/embeddings` + +Business auth required. Returns OpenAI-compatible embeddings shape. + +| Field | Type | Required | Notes | +| --- | --- | --- | --- | +| `model` | string | ✅ | Supports native models + alias mapping | +| `input` | string/array | ✅ | Supports string, string array, token array | + +> Requires `embeddings.provider`. Current supported values: `mock` / `deterministic` / `builtin`. If missing/unsupported, returns standard error shape with HTTP 501. --- @@ -272,7 +341,10 @@ No auth required. {"id": "claude-sonnet-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"}, {"id": "claude-haiku-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"}, {"id": "claude-opus-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"} - ] + ], + "first_id": "claude-opus-4-6", + "last_id": "claude-instant-1.0", + "has_more": false } ``` @@ -288,13 +360,15 @@ Content-Type: application/json anthropic-version: 2023-06-01 ``` +> `anthropic-version` is optional; DS2API auto-fills `2023-06-01` when absent. + **Request body**: | Field | Type | Required | Notes | | --- | --- | --- | --- | | `model` | string | ✅ | For example `claude-sonnet-4-5` / `claude-opus-4-6` / `claude-haiku-4-5` (compatible with `claude-3-5-haiku-latest`), plus historical Claude model IDs | | `messages` | array | ✅ | Claude-style messages | -| `max_tokens` | number | ❌ | Not strictly enforced by upstream bridge | +| `max_tokens` | number | ❌ | Auto-filled to `8192` when omitted; not strictly enforced by upstream bridge | | `stream` | boolean | ❌ | Default `false` | | `system` | string | ❌ | Optional system prompt | | `tools` | array | ❌ | Claude tool schema | @@ -684,13 +758,20 @@ Or manual deploy required: ## Error Payloads -Error formats vary by module: +Compatible routes (`/v1/*`, `/anthropic/*`) use the same error envelope: -| Module | Format | -| --- | --- | -| OpenAI routes | `{"error": {"message": "...", "type": "..."}}` | -| Claude routes | `{"error": {"type": "...", "message": "..."}}` | -| Admin routes | `{"detail": "..."}` | +```json +{ + "error": { + "message": "...", + "type": "invalid_request_error", + "code": "invalid_request", + "param": null + } +} +``` + +Admin routes keep `{"detail":"..."}`. Clients should handle HTTP status code plus `error` / `detail` fields. @@ -732,6 +813,31 @@ curl http://localhost:5001/v1/chat/completions \ }' ``` +### OpenAI Responses (Stream) + +```bash +curl http://localhost:5001/v1/responses \ + -H "Authorization: Bearer your-api-key" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-5-codex", + "input": "Write a hello world in golang", + "stream": true + }' +``` + +### OpenAI Embeddings + +```bash +curl http://localhost:5001/v1/embeddings \ + -H "Authorization: Bearer your-api-key" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4o", + "input": ["first text", "second text"] + }' +``` + ### OpenAI with Search ```bash diff --git a/API.md b/API.md index 02cbf9b..fa07cfa 100644 --- a/API.md +++ b/API.md @@ -28,7 +28,7 @@ | Base URL | `http://localhost:5001` 或你的部署域名 | | 默认 Content-Type | `application/json` | | 健康检查 | `GET /healthz`、`GET /readyz` | -| CORS | 已启用(`Access-Control-Allow-Origin: *`,允许 `Content-Type`, `Authorization`) | +| CORS | 已启用(`Access-Control-Allow-Origin: *`,允许 `Content-Type`, `Authorization`, `X-API-Key`, `X-Ds2-Target-Account`, `X-Vercel-Protection-Bypass`) | --- @@ -89,7 +89,11 @@ Vercel 一键部署可先只填 `DS2API_ADMIN_KEY`,部署后在 `/admin` 导 | GET | `/healthz` | 无 | 存活探针 | | GET | `/readyz` | 无 | 就绪探针 | | GET | `/v1/models` | 无 | OpenAI 模型列表 | +| GET | `/v1/models/{id}` | 无 | OpenAI 单模型查询(支持 alias 入参) | | POST | `/v1/chat/completions` | 业务 | OpenAI 对话补全 | +| POST | `/v1/responses` | 业务 | OpenAI Responses 接口(流式/非流式) | +| GET | `/v1/responses/{response_id}` | 业务 | 查询已生成 response(内存 TTL) | +| POST | `/v1/embeddings` | 业务 | OpenAI Embeddings 接口 | | GET | `/anthropic/v1/models` | 无 | Claude 模型列表 | | POST | `/anthropic/v1/messages` | 业务 | Claude 消息接口 | | POST | `/anthropic/v1/messages/count_tokens` | 业务 | Claude token 计数 | @@ -150,6 +154,15 @@ Vercel 一键部署可先只填 `DS2API_ADMIN_KEY`,部署后在 `/admin` 导 } ``` +### 模型 alias 解析策略 + +对 `chat` / `responses` / `embeddings` 的 `model` 字段采用“宽进严出”: + +1. 先匹配 DeepSeek 原生模型。 +2. 再匹配 `model_aliases` 精确映射。 +3. 未命中时按模型家族规则回退(如 `o*`、`gpt-*`、`claude-*`)。 +4. 仍未命中则返回 `invalid_request_error`。 + ### `POST /v1/chat/completions` **请求头**: @@ -163,7 +176,7 @@ Content-Type: application/json | 字段 | 类型 | 必填 | 说明 | | --- | --- | --- | --- | -| `model` | string | ✅ | `deepseek-chat` / `deepseek-reasoner` / `deepseek-chat-search` / `deepseek-reasoner-search` | +| `model` | string | ✅ | 支持 DeepSeek 原生模型 + 常见 alias(如 `gpt-4o`、`gpt-5-codex`、`o3`、`claude-sonnet-4-5`) | | `messages` | array | ✅ | OpenAI 风格消息数组 | | `stream` | boolean | ❌ | 默认 `false` | | `tools` | array | ❌ | Function Calling 定义 | @@ -253,7 +266,63 @@ data: [DONE] } ``` -**流式**:先缓冲正文片段。识别到工具调用 → 仅输出结构化 `delta.tool_calls`(每个 tool call 带 `index`);否则一次性输出普通文本。 +**流式**:命中高置信特征后立即输出 `delta.tool_calls`(不等待完整 JSON 闭合),并持续发送 arguments 增量;已确认的 toolcall 原始 JSON 不会回流到 `delta.content`。 + +--- + +### `GET /v1/models/{id}` + +无需鉴权。入参支持 alias(例如 `gpt-4o`),返回的是映射后的 DeepSeek 模型对象。 + +### `POST /v1/responses` + +OpenAI Responses 风格接口,兼容 `input` 或 `messages`。 + +| 字段 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `model` | string | ✅ | 支持原生模型 + alias 自动映射 | +| `input` | string/array/object | ❌ | 与 `messages` 二选一 | +| `messages` | array | ❌ | 与 `input` 二选一 | +| `instructions` | string | ❌ | 自动前置为 system 消息 | +| `stream` | boolean | ❌ | 默认 `false` | +| `tools` | array | ❌ | 与 chat 同样的工具识别与转译策略 | + +**非流式响应**:返回标准 `response` 对象,`id` 形如 `resp_xxx`,并写入内存 TTL 存储。 + +**流式响应(SSE)**:最小事件序列如下。 + +```text +event: response.created +data: {"type":"response.created","id":"resp_xxx","status":"in_progress",...} + +event: response.output_text.delta +data: {"type":"response.output_text.delta","id":"resp_xxx","delta":"..."} + +event: response.output_tool_call.delta +data: {"type":"response.output_tool_call.delta","id":"resp_xxx","tool_calls":[...]} + +event: response.completed +data: {"type":"response.completed","response":{...}} + +data: [DONE] +``` + +### `GET /v1/responses/{response_id}` + +需要业务鉴权。查询 `POST /v1/responses` 生成并缓存的 response 对象。 + +> 当前为内存 TTL 存储,默认过期时间 `900s`(可用 `responses.store_ttl_seconds` 调整)。 + +### `POST /v1/embeddings` + +需要业务鉴权。返回 OpenAI Embeddings 兼容结构。 + +| 字段 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `model` | string | ✅ | 支持原生模型 + alias 自动映射 | +| `input` | string/array | ✅ | 支持字符串、字符串数组、token 数组 | + +> 需配置 `embeddings.provider`。当前支持:`mock` / `deterministic` / `builtin`。未配置或不支持时返回标准错误结构(HTTP 501)。 --- @@ -272,7 +341,10 @@ data: [DONE] {"id": "claude-sonnet-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"}, {"id": "claude-haiku-4-5", "object": "model", "created": 1715635200, "owned_by": "anthropic"}, {"id": "claude-opus-4-6", "object": "model", "created": 1715635200, "owned_by": "anthropic"} - ] + ], + "first_id": "claude-opus-4-6", + "last_id": "claude-instant-1.0", + "has_more": false } ``` @@ -288,13 +360,15 @@ Content-Type: application/json anthropic-version: 2023-06-01 ``` +> `anthropic-version` 可省略,服务端会自动补为 `2023-06-01`。 + **请求体**: | 字段 | 类型 | 必填 | 说明 | | --- | --- | --- | --- | | `model` | string | ✅ | 例如 `claude-sonnet-4-5` / `claude-opus-4-6` / `claude-haiku-4-5`(兼容 `claude-3-5-haiku-latest`),并支持历史 Claude 模型 ID | | `messages` | array | ✅ | Claude 风格消息数组 | -| `max_tokens` | number | ❌ | 当前实现不会硬性截断上游输出 | +| `max_tokens` | number | ❌ | 缺省自动补 `8192`;当前实现不会硬性截断上游输出 | | `stream` | boolean | ❌ | 默认 `false` | | `system` | string | ❌ | 可选系统提示 | | `tools` | array | ❌ | Claude tool 定义 | @@ -684,13 +758,20 @@ data: {"type":"message_stop"} ## 错误响应格式 -不同模块的错误格式略有差异: +兼容路由(`/v1/*`、`/anthropic/*`)统一使用以下结构: -| 模块 | 格式 | -| --- | --- | -| OpenAI 接口 | `{"error": {"message": "...", "type": "..."}}` | -| Claude 接口 | `{"error": {"type": "...", "message": "..."}}` | -| Admin 接口 | `{"detail": "..."}` | +```json +{ + "error": { + "message": "...", + "type": "invalid_request_error", + "code": "invalid_request", + "param": null + } +} +``` + +Admin 接口保持 `{"detail":"..."}`。 建议客户端处理逻辑:检查 HTTP 状态码 + 解析 `error` 或 `detail` 字段。 @@ -732,6 +813,31 @@ curl http://localhost:5001/v1/chat/completions \ }' ``` +### OpenAI Responses(流式) + +```bash +curl http://localhost:5001/v1/responses \ + -H "Authorization: Bearer your-api-key" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-5-codex", + "input": "写一个 golang 的 hello world", + "stream": true + }' +``` + +### OpenAI Embeddings + +```bash +curl http://localhost:5001/v1/embeddings \ + -H "Authorization: Bearer your-api-key" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4o", + "input": ["第一段文本", "第二段文本"] + }' +``` + ### OpenAI 带搜索 ```bash diff --git a/README.MD b/README.MD index 3517a55..261e34a 100644 --- a/README.MD +++ b/README.MD @@ -54,16 +54,27 @@ flowchart LR | 能力 | 说明 | | --- | --- | -| OpenAI 兼容 | `GET /v1/models`、`POST /v1/chat/completions`(流式/非流式) | +| OpenAI 兼容 | `GET /v1/models`、`GET /v1/models/{id}`、`POST /v1/chat/completions`、`POST /v1/responses`、`GET /v1/responses/{response_id}`、`POST /v1/embeddings` | | Claude 兼容 | `GET /anthropic/v1/models`、`POST /anthropic/v1/messages`、`POST /anthropic/v1/messages/count_tokens` | | 多账号轮询 | 自动 token 刷新、邮箱/手机号双登录方式 | | 并发队列控制 | 每账号 in-flight 上限 + 等待队列,动态计算建议并发值 | | DeepSeek PoW | WASM 计算(`wazero`),无需外部 Node.js 依赖 | -| Tool Calling | 防泄漏处理:自动缓冲、识别、结构化输出 | +| Tool Calling | 防泄漏处理:非代码块高置信特征识别、`delta.tool_calls` 早发、结构化增量输出 | | Admin API | 配置管理、账号测试 / 批量测试、导入导出、Vercel 同步 | | WebUI 管理台 | `/admin` 单页应用(中英文双语、深色模式) | | 运维探针 | `GET /healthz`(存活)、`GET /readyz`(就绪) | +## 平台兼容矩阵 + +| 级别 | 平台 | 当前状态 | +| --- | --- | --- | +| P0 | Codex CLI/SDK(`wire_api=chat` / `wire_api=responses`) | ✅ | +| P0 | OpenAI SDK(JS/Python,chat + responses) | ✅ | +| P0 | Vercel AI SDK(openai-compatible) | ✅ | +| P0 | Anthropic SDK(messages) | ✅ | +| P1 | LangChain / LlamaIndex / OpenWebUI(OpenAI 兼容接入) | ✅ | +| P2 | MCP 独立桥接层 | 规划中 | + ## 模型支持 ### OpenAI 接口 @@ -196,6 +207,7 @@ cp opencode.json.example opencode.json 3. 在项目目录启动 OpenCode CLI(按你的安装方式运行 `opencode`)。 > 建议优先使用 OpenAI 兼容路径(`/v1/*`),即示例里的 `@ai-sdk/openai-compatible` provider。 +> 若客户端支持 `wire_api`,可分别测试 `responses` 与 `chat`,DS2API 两条链路都兼容。 ## 配置说明 @@ -216,6 +228,24 @@ cp opencode.json.example opencode.json "token": "" } ], + "model_aliases": { + "gpt-4o": "deepseek-chat", + "gpt-5-codex": "deepseek-reasoner", + "o3": "deepseek-reasoner" + }, + "compat": { + "wide_input_strict_output": true + }, + "toolcall": { + "mode": "feature_match", + "early_emit_confidence": "high" + }, + "responses": { + "store_ttl_seconds": 900 + }, + "embeddings": { + "provider": "deterministic" + }, "claude_model_mapping": { "fast": "deepseek-chat", "slow": "deepseek-reasoner" @@ -226,6 +256,11 @@ cp opencode.json.example opencode.json - `keys`:API 访问密钥列表,客户端通过 `Authorization: Bearer ` 鉴权 - `accounts`:DeepSeek 账号列表,支持 `email` 或 `mobile` 登录 - `token`:留空则首次请求时自动登录获取;也可预填已有 token +- `model_aliases`:常见模型名(如 GPT/Codex/Claude)到 DeepSeek 模型的映射 +- `compat.wide_input_strict_output`:建议保持 `true`(当前实现默认宽进严出) +- `toolcall`:固定采用特征匹配 + 高置信早发策略 +- `responses.store_ttl_seconds`:`/v1/responses/{id}` 的内存缓存 TTL +- `embeddings.provider`:embedding 提供方(当前内置 `deterministic/mock/builtin`) - `claude_model_mapping`:字典中 `fast`/`slow` 后缀映射到对应 DeepSeek 模型 ### 环境变量 @@ -281,10 +316,10 @@ cp opencode.json.example opencode.json 当请求中带 `tools` 时,DS2API 会做防泄漏处理: -1. `stream=true` 时先**缓冲**正文片段 -2. 若识别到工具调用 → 仅输出结构化 `tool_calls`,不透传原始 JSON 文本 -3. 若最终不是工具调用 → 一次性输出普通文本 -4. 解析器支持混合文本、fenced JSON、`function.arguments` 字符串等格式 +1. 只在**非代码块上下文**启用 toolcall 特征识别(代码块示例不会触发) +2. 一旦命中高置信特征(`tool_calls` + `name` + `arguments/input` 起始)就立即输出 `delta.tool_calls` +3. 已确认的 toolcall JSON 片段不会泄漏到 `delta.content` +4. 前文/后文自然语言保持顺序透传,支持混合文本与增量参数输出 ## 项目结构 diff --git a/README.en.md b/README.en.md index d1a91a1..5d2f326 100644 --- a/README.en.md +++ b/README.en.md @@ -54,16 +54,27 @@ flowchart LR | Capability | Details | | --- | --- | -| OpenAI compatible | `GET /v1/models`, `POST /v1/chat/completions` (stream/non-stream) | +| OpenAI compatible | `GET /v1/models`, `GET /v1/models/{id}`, `POST /v1/chat/completions`, `POST /v1/responses`, `GET /v1/responses/{response_id}`, `POST /v1/embeddings` | | Claude compatible | `GET /anthropic/v1/models`, `POST /anthropic/v1/messages`, `POST /anthropic/v1/messages/count_tokens` | | Multi-account rotation | Auto token refresh, email/mobile dual login | | Concurrency control | Per-account in-flight limit + waiting queue, dynamic recommended concurrency | | DeepSeek PoW | WASM solving via `wazero`, no external Node.js dependency | -| Tool Calling | Anti-leak handling: auto buffer, detect, structured output | +| Tool Calling | Anti-leak handling: non-code-block feature match, early `delta.tool_calls`, structured incremental output | | Admin API | Config management, account testing/batch test, import/export, Vercel sync | | WebUI Admin Panel | SPA at `/admin` (bilingual Chinese/English, dark mode) | | Health Probes | `GET /healthz` (liveness), `GET /readyz` (readiness) | +## Platform Compatibility Matrix + +| Tier | Platform | Status | +| --- | --- | --- | +| P0 | Codex CLI/SDK (`wire_api=chat` / `wire_api=responses`) | ✅ | +| P0 | OpenAI SDK (JS/Python, chat + responses) | ✅ | +| P0 | Vercel AI SDK (openai-compatible) | ✅ | +| P0 | Anthropic SDK (messages) | ✅ | +| P1 | LangChain / LlamaIndex / OpenWebUI (OpenAI-compatible integration) | ✅ | +| P2 | MCP standalone bridge | Planned | + ## Model Support ### OpenAI Endpoint @@ -196,6 +207,7 @@ cp opencode.json.example opencode.json 3. Start OpenCode CLI in the project directory (run `opencode` using your installed method). > Recommended: use the OpenAI-compatible path (`/v1/*`) via `@ai-sdk/openai-compatible` as shown in the example. +> If your client supports `wire_api`, test both `responses` and `chat`; DS2API supports both paths. ## Configuration @@ -216,6 +228,24 @@ cp opencode.json.example opencode.json "token": "" } ], + "model_aliases": { + "gpt-4o": "deepseek-chat", + "gpt-5-codex": "deepseek-reasoner", + "o3": "deepseek-reasoner" + }, + "compat": { + "wide_input_strict_output": true + }, + "toolcall": { + "mode": "feature_match", + "early_emit_confidence": "high" + }, + "responses": { + "store_ttl_seconds": 900 + }, + "embeddings": { + "provider": "deterministic" + }, "claude_model_mapping": { "fast": "deepseek-chat", "slow": "deepseek-reasoner" @@ -226,6 +256,11 @@ cp opencode.json.example opencode.json - `keys`: API access keys; clients authenticate via `Authorization: Bearer ` - `accounts`: DeepSeek account list, supports `email` or `mobile` login - `token`: Leave empty for auto-login on first request; or pre-fill an existing token +- `model_aliases`: Map common model names (GPT/Codex/Claude) to DeepSeek models +- `compat.wide_input_strict_output`: Keep `true` (current default policy) +- `toolcall`: Fixed to feature matching + high-confidence early emit +- `responses.store_ttl_seconds`: In-memory TTL for `/v1/responses/{id}` +- `embeddings.provider`: Embeddings provider (`deterministic/mock/builtin` built-in) - `claude_model_mapping`: Maps `fast`/`slow` suffixes to corresponding DeepSeek models ### Environment Variables @@ -281,10 +316,10 @@ Queue limit = DS2API_ACCOUNT_MAX_QUEUE (default = recommended concurrency) When `tools` is present in the request, DS2API performs anti-leak handling: -1. With `stream=true`, DS2API **buffers** text deltas first -2. If a tool call is detected → only structured `tool_calls` are emitted, raw JSON is not leaked -3. If no tool call → buffered text is emitted at once -4. Parser supports mixed text, fenced JSON, and `function.arguments` payloads +1. Toolcall feature matching is enabled only in **non-code-block context** (fenced examples are ignored) +2. Once high-confidence features are matched (`tool_calls` + `name` + `arguments/input` start), `delta.tool_calls` is emitted immediately +3. Confirmed toolcall JSON fragments are never leaked into `delta.content` +4. Natural language before/after toolcalls keeps original order, with incremental argument output supported ## Project Structure diff --git a/api/chat-stream.js b/api/chat-stream.js index 309c473..680651d 100644 --- a/api/chat-stream.js +++ b/api/chat-stream.js @@ -7,7 +7,7 @@ const { createToolSieveState, processToolSieveChunk, flushToolSieve, - parseStandaloneToolCalls, + parseToolCalls, formatOpenAIStreamToolCalls, } = require('./helpers/stream-tool-sieve'); @@ -199,7 +199,7 @@ module.exports = async function handler(req, res) { await releaseLease(); return; } - const detected = parseStandaloneToolCalls(outputText, toolNames); + const detected = parseToolCalls(outputText, toolNames); if (detected.length > 0 && !toolCallsEmitted) { toolCallsEmitted = true; sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(detected) }); diff --git a/api/helpers/stream-tool-sieve.js b/api/helpers/stream-tool-sieve.js index dc40f3a..44e31cd 100644 --- a/api/helpers/stream-tool-sieve.js +++ b/api/helpers/stream-tool-sieve.js @@ -28,7 +28,6 @@ function createToolSieveState() { pending: '', capture: '', capturing: false, - hasMeaningfulText: false, recentTextTail: '', toolNameSent: false, toolName: '', @@ -192,12 +191,21 @@ function findToolSegmentStart(s) { return -1; } const lower = s.toLowerCase(); - const keyIdx = lower.indexOf('tool_calls'); - if (keyIdx < 0) { - return -1; + let offset = 0; + // eslint-disable-next-line no-constant-condition + while (true) { + const keyRel = lower.indexOf('tool_calls', offset); + if (keyRel < 0) { + return -1; + } + const keyIdx = keyRel; + const start = s.slice(0, keyIdx).lastIndexOf('{'); + const candidateStart = start >= 0 ? start : keyIdx; + if (!insideCodeFence(s.slice(0, candidateStart))) { + return candidateStart; + } + offset = keyIdx + 'tool_calls'.length; } - const start = s.slice(0, keyIdx).lastIndexOf('{'); - return start >= 0 ? start : keyIdx; } function consumeToolCapture(state, toolNames) { @@ -220,7 +228,7 @@ function consumeToolCapture(state, toolNames) { } const prefixPart = captured.slice(0, start); const suffixPart = captured.slice(obj.end); - if (!state.toolNameSent && (hasMeaningfulText(prefixPart) || looksLikeToolExampleContext(state.recentTextTail) || looksLikeToolExampleContext(suffixPart))) { + if (insideCodeFence((state.recentTextTail || '') + prefixPart)) { return { ready: true, prefix: captured, @@ -283,7 +291,10 @@ function buildIncrementalToolDeltas(state) { return []; } const start = captured.slice(0, keyIdx).lastIndexOf('{'); - if (start < 0 || hasMeaningfulText(captured.slice(0, start))) { + if (start < 0) { + return []; + } + if (insideCodeFence((state.recentTextTail || '') + captured.slice(0, start))) { return []; } const callStart = findFirstToolCallObjectStart(captured, keyIdx); @@ -621,7 +632,11 @@ function parseToolCalls(text, toolNames) { if (!toStringSafe(text)) { return []; } - const candidates = buildToolCallCandidates(text); + const sanitized = stripFencedCodeBlocks(text); + if (!toStringSafe(sanitized)) { + return []; + } + const candidates = buildToolCallCandidates(sanitized); let parsed = []; for (const c of candidates) { parsed = parseToolCallsPayload(c); @@ -635,11 +650,22 @@ function parseToolCalls(text, toolNames) { return filterToolCalls(parsed, toolNames); } +function stripFencedCodeBlocks(text) { + const t = typeof text === 'string' ? text : ''; + if (!t) { + return ''; + } + return t.replace(/```[\s\S]*?```/g, ' '); +} + function parseStandaloneToolCalls(text, toolNames) { const trimmed = toStringSafe(text); if (!trimmed) { return []; } + if ((trimmed.startsWith('```') && trimmed.endsWith('```')) || trimmed.includes('```')) { + return []; + } if (looksLikeToolExampleContext(trimmed)) { return []; } @@ -852,7 +878,6 @@ function noteText(state, text) { if (!state || !hasMeaningfulText(text)) { return; } - state.hasMeaningfulText = true; state.recentTextTail = appendTail(state.recentTextTail, text, TOOL_SIEVE_CONTEXT_TAIL_LIMIT); } @@ -870,22 +895,16 @@ function appendTail(prev, next, max) { } function looksLikeToolExampleContext(text) { - const t = toStringSafe(text).toLowerCase(); + return insideCodeFence(text); +} + +function insideCodeFence(text) { + const t = typeof text === 'string' ? text : ''; if (!t) { return false; } - const cues = [ - '示例', - '例子', - 'for example', - 'example', - 'demo', - '请勿执行', - '不要执行', - 'do not execute', - '```', - ]; - return cues.some((cue) => t.includes(cue)); + const ticks = (t.match(/```/g) || []).length; + return ticks % 2 === 1; } function hasMeaningfulText(text) { diff --git a/api/helpers/stream-tool-sieve.test.js b/api/helpers/stream-tool-sieve.test.js index fea891f..7f532f1 100644 --- a/api/helpers/stream-tool-sieve.test.js +++ b/api/helpers/stream-tool-sieve.test.js @@ -69,9 +69,7 @@ test('parseToolCalls supports fenced json and function.arguments string payload' '```', ].join('\n'); const calls = parseToolCalls(text, ['read_file']); - assert.equal(calls.length, 1); - assert.equal(calls[0].name, 'read_file'); - assert.deepEqual(calls[0].input, { path: 'README.md' }); + assert.equal(calls.length, 0); }); test('parseStandaloneToolCalls only matches standalone payload and ignores mixed prose', () => { diff --git a/config.example.json b/config.example.json index 7614e77..97161f7 100644 --- a/config.example.json +++ b/config.example.json @@ -24,5 +24,27 @@ "password": "your-password-3", "token": "" } - ] -} \ No newline at end of file + ], + "model_aliases": { + "gpt-4o": "deepseek-chat", + "gpt-5-codex": "deepseek-reasoner", + "o3": "deepseek-reasoner" + }, + "compat": { + "wide_input_strict_output": true + }, + "toolcall": { + "mode": "feature_match", + "early_emit_confidence": "high" + }, + "responses": { + "store_ttl_seconds": 900 + }, + "embeddings": { + "provider": "deterministic" + }, + "claude_model_mapping": { + "fast": "deepseek-chat", + "slow": "deepseek-reasoner" + } +} diff --git a/internal/adapter/claude/error_shape_test.go b/internal/adapter/claude/error_shape_test.go new file mode 100644 index 0000000..910fce8 --- /dev/null +++ b/internal/adapter/claude/error_shape_test.go @@ -0,0 +1,35 @@ +package claude + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" +) + +func TestWriteClaudeErrorIncludesUnifiedFields(t *testing.T) { + rec := httptest.NewRecorder() + writeClaudeError(rec, http.StatusUnauthorized, "bad token") + if rec.Code != http.StatusUnauthorized { + t.Fatalf("expected 401, got %d", rec.Code) + } + + var body map[string]any + if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil { + t.Fatalf("decode body: %v", err) + } + errObj, _ := body["error"].(map[string]any) + if errObj["message"] != "bad token" { + t.Fatalf("unexpected message: %v", errObj["message"]) + } + if errObj["type"] != "invalid_request_error" { + t.Fatalf("unexpected type: %v", errObj["type"]) + } + if errObj["code"] != "authentication_failed" { + t.Fatalf("unexpected code: %v", errObj["code"]) + } + if _, ok := errObj["param"]; !ok { + t.Fatal("expected param field") + } +} + diff --git a/internal/adapter/claude/handler.go b/internal/adapter/claude/handler.go index b9ecd27..a7d3431 100644 --- a/internal/adapter/claude/handler.go +++ b/internal/adapter/claude/handler.go @@ -43,6 +43,9 @@ func (h *Handler) ListModels(w http.ResponseWriter, _ *http.Request) { } func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) { + if strings.TrimSpace(r.Header.Get("anthropic-version")) == "" { + r.Header.Set("anthropic-version", "2023-06-01") + } a, err := h.Auth.Determine(r) if err != nil { status := http.StatusUnauthorized @@ -50,22 +53,25 @@ func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) { if err == auth.ErrNoAccount { status = http.StatusTooManyRequests } - writeJSON(w, status, map[string]any{"error": map[string]any{"type": "invalid_request_error", "message": detail}}) + writeClaudeError(w, status, detail) return } defer h.Auth.Release(a) var req map[string]any if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - writeJSON(w, http.StatusBadRequest, map[string]any{"error": map[string]any{"type": "invalid_request_error", "message": "invalid json"}}) + writeClaudeError(w, http.StatusBadRequest, "invalid json") return } model, _ := req["model"].(string) messagesRaw, _ := req["messages"].([]any) if model == "" || len(messagesRaw) == 0 { - writeJSON(w, http.StatusBadRequest, map[string]any{"error": map[string]any{"type": "invalid_request_error", "message": "Request must include 'model' and 'messages'."}}) + writeClaudeError(w, http.StatusBadRequest, "Request must include 'model' and 'messages'.") return } + if _, ok := req["max_tokens"]; !ok { + req["max_tokens"] = 8192 + } normalized := normalizeClaudeMessages(messagesRaw) payload := cloneMap(req) @@ -86,12 +92,12 @@ func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) { sessionID, err := h.DS.CreateSession(r.Context(), a, 3) if err != nil { - writeJSON(w, http.StatusUnauthorized, map[string]any{"error": map[string]any{"type": "api_error", "message": "invalid token."}}) + writeClaudeError(w, http.StatusUnauthorized, "invalid token.") return } pow, err := h.DS.GetPow(r.Context(), a, 3) if err != nil { - writeJSON(w, http.StatusUnauthorized, map[string]any{"error": map[string]any{"type": "api_error", "message": "Failed to get PoW"}}) + writeClaudeError(w, http.StatusUnauthorized, "Failed to get PoW") return } requestPayload := map[string]any{ @@ -104,13 +110,13 @@ func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) { } resp, err := h.DS.CallCompletion(r.Context(), a, requestPayload, pow, 3) if err != nil { - writeJSON(w, http.StatusInternalServerError, map[string]any{"error": map[string]any{"type": "api_error", "message": "Failed to get Claude response."}}) + writeClaudeError(w, http.StatusInternalServerError, "Failed to get Claude response.") return } if resp.StatusCode != http.StatusOK { defer resp.Body.Close() body, _ := io.ReadAll(resp.Body) - writeJSON(w, http.StatusInternalServerError, map[string]any{"error": map[string]any{"type": "api_error", "message": string(body)}}) + writeClaudeError(w, http.StatusInternalServerError, string(body)) return } @@ -162,20 +168,20 @@ func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) { func (h *Handler) CountTokens(w http.ResponseWriter, r *http.Request) { a, err := h.Auth.Determine(r) if err != nil { - writeJSON(w, http.StatusUnauthorized, map[string]any{"error": err.Error()}) + writeClaudeError(w, http.StatusUnauthorized, err.Error()) return } defer h.Auth.Release(a) var req map[string]any if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - writeJSON(w, http.StatusBadRequest, map[string]any{"error": "invalid json"}) + writeClaudeError(w, http.StatusBadRequest, "invalid json") return } model, _ := req["model"].(string) messages, _ := req["messages"].([]any) if model == "" || len(messages) == 0 { - writeJSON(w, http.StatusBadRequest, map[string]any{"error": "Request must include 'model' and 'messages'."}) + writeClaudeError(w, http.StatusBadRequest, "Request must include 'model' and 'messages'.") return } inputTokens := 0 @@ -206,7 +212,7 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) - writeJSON(w, http.StatusInternalServerError, map[string]any{"error": map[string]any{"type": "api_error", "message": string(body)}}) + writeClaudeError(w, http.StatusInternalServerError, string(body)) return } @@ -241,6 +247,8 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ "error": map[string]any{ "type": "api_error", "message": msg, + "code": "internal_error", + "param": nil, }, }) } @@ -492,6 +500,28 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ } } +func writeClaudeError(w http.ResponseWriter, status int, message string) { + code := "invalid_request" + switch status { + case http.StatusUnauthorized: + code = "authentication_failed" + case http.StatusTooManyRequests: + code = "rate_limit_exceeded" + case http.StatusNotFound: + code = "not_found" + case http.StatusInternalServerError: + code = "internal_error" + } + writeJSON(w, status, map[string]any{ + "error": map[string]any{ + "type": "invalid_request_error", + "message": message, + "code": code, + "param": nil, + }, + }) +} + func normalizeClaudeMessages(messages []any) []any { out := make([]any, 0, len(messages)) for _, m := range messages { diff --git a/internal/adapter/openai/embeddings_handler.go b/internal/adapter/openai/embeddings_handler.go new file mode 100644 index 0000000..ff61be0 --- /dev/null +++ b/internal/adapter/openai/embeddings_handler.go @@ -0,0 +1,138 @@ +package openai + +import ( + "crypto/sha256" + "encoding/binary" + "encoding/json" + "fmt" + "net/http" + "strings" + + "ds2api/internal/auth" + "ds2api/internal/config" + "ds2api/internal/util" +) + +func (h *Handler) Embeddings(w http.ResponseWriter, r *http.Request) { + a, err := h.Auth.Determine(r) + if err != nil { + status := http.StatusUnauthorized + detail := err.Error() + if err == auth.ErrNoAccount { + status = http.StatusTooManyRequests + } + writeOpenAIError(w, status, detail) + return + } + defer h.Auth.Release(a) + + var req map[string]any + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeOpenAIError(w, http.StatusBadRequest, "invalid json") + return + } + model, _ := req["model"].(string) + model = strings.TrimSpace(model) + if model == "" { + writeOpenAIError(w, http.StatusBadRequest, "Request must include 'model'.") + return + } + if _, ok := config.ResolveModel(h.Store, model); !ok { + writeOpenAIError(w, http.StatusBadRequest, fmt.Sprintf("Model '%s' is not available.", model)) + return + } + + inputs := extractEmbeddingInputs(req["input"]) + if len(inputs) == 0 { + writeOpenAIError(w, http.StatusBadRequest, "Request must include non-empty 'input'.") + return + } + + provider := "" + if h.Store != nil { + provider = strings.ToLower(strings.TrimSpace(h.Store.EmbeddingsProvider())) + } + if provider == "" { + writeOpenAIError(w, http.StatusNotImplemented, "Embeddings provider is not configured. Set embeddings.provider in config.") + return + } + switch provider { + case "mock", "deterministic", "builtin": + // supported local deterministic provider + default: + writeOpenAIError(w, http.StatusNotImplemented, fmt.Sprintf("Embeddings provider '%s' is not supported.", provider)) + return + } + + data := make([]map[string]any, 0, len(inputs)) + totalTokens := 0 + for i, input := range inputs { + totalTokens += util.EstimateTokens(input) + data = append(data, map[string]any{ + "object": "embedding", + "index": i, + "embedding": deterministicEmbedding(input), + }) + } + writeJSON(w, http.StatusOK, map[string]any{ + "object": "list", + "data": data, + "model": model, + "usage": map[string]any{ + "prompt_tokens": totalTokens, + "total_tokens": totalTokens, + }, + }) +} + +func extractEmbeddingInputs(raw any) []string { + switch v := raw.(type) { + case string: + s := strings.TrimSpace(v) + if s == "" { + return nil + } + return []string{s} + case []any: + out := make([]string, 0, len(v)) + for _, item := range v { + switch iv := item.(type) { + case string: + s := strings.TrimSpace(iv) + if s != "" { + out = append(out, s) + } + case []any: + // Token array input support: convert to stable string form. + out = append(out, fmt.Sprintf("%v", iv)) + default: + s := strings.TrimSpace(fmt.Sprintf("%v", iv)) + if s != "" { + out = append(out, s) + } + } + } + return out + default: + return nil + } +} + +func deterministicEmbedding(input string) []float64 { + // Keep response shape stable without external dependencies. + const dims = 64 + out := make([]float64, dims) + seed := sha256.Sum256([]byte(input)) + buf := seed[:] + for i := 0; i < dims; i++ { + if len(buf) < 4 { + next := sha256.Sum256(buf) + buf = next[:] + } + v := binary.BigEndian.Uint32(buf[:4]) + buf = buf[4:] + // map [0, 2^32) -> [-1, 1] + out[i] = (float64(v)/2147483647.5 - 1.0) + } + return out +} diff --git a/internal/adapter/openai/error_shape_test.go b/internal/adapter/openai/error_shape_test.go new file mode 100644 index 0000000..c169e04 --- /dev/null +++ b/internal/adapter/openai/error_shape_test.go @@ -0,0 +1,35 @@ +package openai + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" +) + +func TestWriteOpenAIErrorIncludesUnifiedFields(t *testing.T) { + rec := httptest.NewRecorder() + writeOpenAIError(rec, http.StatusBadRequest, "invalid input") + if rec.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rec.Code) + } + + var body map[string]any + if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil { + t.Fatalf("decode body: %v", err) + } + errObj, _ := body["error"].(map[string]any) + if errObj["message"] != "invalid input" { + t.Fatalf("unexpected message: %v", errObj["message"]) + } + if errObj["type"] != "invalid_request_error" { + t.Fatalf("unexpected type: %v", errObj["type"]) + } + if errObj["code"] != "invalid_request" { + t.Fatalf("unexpected code: %v", errObj["code"]) + } + if _, ok := errObj["param"]; !ok { + t.Fatal("expected param field") + } +} + diff --git a/internal/adapter/openai/handler.go b/internal/adapter/openai/handler.go index 4de28b7..a2a1c4d 100644 --- a/internal/adapter/openai/handler.go +++ b/internal/adapter/openai/handler.go @@ -31,6 +31,8 @@ type Handler struct { leaseMu sync.Mutex streamLeases map[string]streamLease + responsesMu sync.Mutex + responses *responseStore } type streamLease struct { @@ -40,13 +42,27 @@ type streamLease struct { func RegisterRoutes(r chi.Router, h *Handler) { r.Get("/v1/models", h.ListModels) + r.Get("/v1/models/{model_id}", h.GetModel) r.Post("/v1/chat/completions", h.ChatCompletions) + r.Post("/v1/responses", h.Responses) + r.Get("/v1/responses/{response_id}", h.GetResponseByID) + r.Post("/v1/embeddings", h.Embeddings) } func (h *Handler) ListModels(w http.ResponseWriter, _ *http.Request) { writeJSON(w, http.StatusOK, config.OpenAIModelsResponse()) } +func (h *Handler) GetModel(w http.ResponseWriter, r *http.Request) { + modelID := strings.TrimSpace(chi.URLParam(r, "model_id")) + model, ok := config.OpenAIModelByID(h.Store, modelID) + if !ok { + writeOpenAIError(w, http.StatusNotFound, "Model not found.") + return + } + writeJSON(w, http.StatusOK, model) +} + func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) { if isVercelStreamReleaseRequest(r) { h.handleVercelStreamRelease(w, r) @@ -81,11 +97,16 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) { writeOpenAIError(w, http.StatusBadRequest, "Request must include 'model' and 'messages'.") return } - thinkingEnabled, searchEnabled, ok := config.GetModelConfig(model) + resolvedModel, ok := config.ResolveModel(h.Store, model) if !ok { - writeOpenAIError(w, http.StatusServiceUnavailable, fmt.Sprintf("Model '%s' is not available.", model)) + writeOpenAIError(w, http.StatusBadRequest, fmt.Sprintf("Model '%s' is not available.", model)) return } + thinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel) + responseModel := strings.TrimSpace(model) + if responseModel == "" { + responseModel = resolvedModel + } finalPrompt, toolNames := buildOpenAIFinalPrompt(messagesRaw, req["tools"]) @@ -111,16 +132,17 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) { "thinking_enabled": thinkingEnabled, "search_enabled": searchEnabled, } + applyOpenAIChatPassThrough(req, payload) resp, err := h.DS.CallCompletion(r.Context(), a, payload, pow, 3) if err != nil { writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.") return } if util.ToBool(req["stream"]) { - h.handleStream(w, r, resp, sessionID, model, finalPrompt, thinkingEnabled, searchEnabled, toolNames) + h.handleStream(w, r, resp, sessionID, responseModel, finalPrompt, thinkingEnabled, searchEnabled, toolNames) return } - h.handleNonStream(w, r.Context(), resp, sessionID, model, finalPrompt, thinkingEnabled, toolNames) + h.handleNonStream(w, r.Context(), resp, sessionID, responseModel, finalPrompt, thinkingEnabled, toolNames) } func (h *Handler) handleNonStream(w http.ResponseWriter, ctx context.Context, resp *http.Response, completionID, model, finalPrompt string, thinkingEnabled bool, toolNames []string) { @@ -135,7 +157,7 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, ctx context.Context, re finalThinking := result.Thinking finalText := result.Text - detected := util.ParseStandaloneToolCalls(finalText, toolNames) + detected := util.ParseToolCalls(finalText, toolNames) finishReason := "stop" messageObj := map[string]any{"role": "assistant", "content": finalText} if thinkingEnabled && finalThinking != "" { @@ -222,7 +244,7 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt finalize := func(finishReason string) { finalThinking := thinking.String() finalText := text.String() - detected := util.ParseStandaloneToolCalls(finalText, toolNames) + detected := util.ParseToolCalls(finalText, toolNames) if len(detected) > 0 && !toolCallsEmitted { finishReason = "tool_calls" delta := map[string]any{ @@ -497,6 +519,8 @@ func writeOpenAIError(w http.ResponseWriter, status int, message string) { "error": map[string]any{ "message": message, "type": openAIErrorType(status), + "code": openAIErrorCode(status), + "param": nil, }, }) } @@ -520,3 +544,41 @@ func openAIErrorType(status int) string { return "invalid_request_error" } } + +func openAIErrorCode(status int) string { + switch status { + case http.StatusBadRequest: + return "invalid_request" + case http.StatusUnauthorized: + return "authentication_failed" + case http.StatusForbidden: + return "forbidden" + case http.StatusTooManyRequests: + return "rate_limit_exceeded" + case http.StatusNotFound: + return "not_found" + case http.StatusServiceUnavailable: + return "service_unavailable" + default: + if status >= 500 { + return "internal_error" + } + return "invalid_request" + } +} + +func applyOpenAIChatPassThrough(req map[string]any, payload map[string]any) { + for _, k := range []string{ + "temperature", + "top_p", + "max_tokens", + "max_completion_tokens", + "presence_penalty", + "frequency_penalty", + "stop", + } { + if v, ok := req[k]; ok { + payload[k] = v + } + } +} diff --git a/internal/adapter/openai/handler_toolcall_test.go b/internal/adapter/openai/handler_toolcall_test.go index c987991..dd2bb0f 100644 --- a/internal/adapter/openai/handler_toolcall_test.go +++ b/internal/adapter/openai/handler_toolcall_test.go @@ -210,7 +210,7 @@ func TestHandleNonStreamUnknownToolStillIntercepted(t *testing.T) { } } -func TestHandleNonStreamEmbeddedToolCallExampleNotIntercepted(t *testing.T) { +func TestHandleNonStreamEmbeddedToolCallExampleIntercepted(t *testing.T) { h := &Handler{} resp := makeSSEHTTPResponse( `data: {"p":"response/content","v":"下面是示例:"}`, @@ -228,16 +228,16 @@ func TestHandleNonStreamEmbeddedToolCallExampleNotIntercepted(t *testing.T) { out := decodeJSONBody(t, rec.Body.String()) choices, _ := out["choices"].([]any) choice, _ := choices[0].(map[string]any) - if choice["finish_reason"] != "stop" { - t.Fatalf("expected finish_reason=stop, got %#v", choice["finish_reason"]) + if choice["finish_reason"] != "tool_calls" { + t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"]) } msg, _ := choice["message"].(map[string]any) - if _, ok := msg["tool_calls"]; ok { - t.Fatalf("did not expect tool_calls field for embedded example: %#v", msg["tool_calls"]) + toolCalls, _ := msg["tool_calls"].([]any) + if len(toolCalls) == 0 { + t.Fatalf("expected tool_calls field for embedded example: %#v", msg["tool_calls"]) } - content, _ := msg["content"].(string) - if !strings.Contains(content, "示例") || !strings.Contains(content, `"tool_calls"`) { - t.Fatalf("expected embedded example to pass through as text, got %q", content) + if msg["content"] != nil { + t.Fatalf("expected content nil when tool_calls detected, got %#v", msg["content"]) } } @@ -471,8 +471,8 @@ func TestHandleStreamToolCallMixedWithPlainTextSegments(t *testing.T) { if !done { t.Fatalf("expected [DONE], body=%s", rec.Body.String()) } - if streamHasToolCallsDelta(frames) { - t.Fatalf("did not expect tool_calls delta in mixed prose stream, body=%s", rec.Body.String()) + if !streamHasToolCallsDelta(frames) { + t.Fatalf("expected tool_calls delta in mixed prose stream, body=%s", rec.Body.String()) } content := strings.Builder{} for _, frame := range frames { @@ -489,11 +489,11 @@ func TestHandleStreamToolCallMixedWithPlainTextSegments(t *testing.T) { if !strings.Contains(got, "下面是示例:") || !strings.Contains(got, "请勿执行。") { t.Fatalf("expected pre/post plain text to pass sieve, got=%q", got) } - if !strings.Contains(got, `"tool_calls"`) { - t.Fatalf("expected mixed stream to preserve embedded tool_calls example text, got=%q", got) + if strings.Contains(strings.ToLower(got), `"tool_calls"`) { + t.Fatalf("expected no raw tool_calls json leak in content, got=%q", got) } - if streamFinishReason(frames) != "stop" { - t.Fatalf("expected finish_reason=stop for mixed prose, body=%s", rec.Body.String()) + if streamFinishReason(frames) != "tool_calls" { + t.Fatalf("expected finish_reason=tool_calls for mixed prose, body=%s", rec.Body.String()) } } diff --git a/internal/adapter/openai/models_route_test.go b/internal/adapter/openai/models_route_test.go new file mode 100644 index 0000000..1ba3382 --- /dev/null +++ b/internal/adapter/openai/models_route_test.go @@ -0,0 +1,46 @@ +package openai + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/go-chi/chi/v5" +) + +func TestGetModelRouteDirectAndAlias(t *testing.T) { + h := &Handler{} + r := chi.NewRouter() + RegisterRoutes(r, h) + + t.Run("direct", func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/v1/models/deepseek-chat", nil) + rec := httptest.NewRecorder() + r.ServeHTTP(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + }) + + t.Run("alias", func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/v1/models/gpt-4.1", nil) + rec := httptest.NewRecorder() + r.ServeHTTP(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("expected 200 for alias, got %d body=%s", rec.Code, rec.Body.String()) + } + }) +} + +func TestGetModelRouteNotFound(t *testing.T) { + h := &Handler{} + r := chi.NewRouter() + RegisterRoutes(r, h) + + req := httptest.NewRequest(http.MethodGet, "/v1/models/not-exists", nil) + rec := httptest.NewRecorder() + r.ServeHTTP(rec, req) + if rec.Code != http.StatusNotFound { + t.Fatalf("expected 404, got %d body=%s", rec.Code, rec.Body.String()) + } +} diff --git a/internal/adapter/openai/response_store.go b/internal/adapter/openai/response_store.go new file mode 100644 index 0000000..4f51dfa --- /dev/null +++ b/internal/adapter/openai/response_store.go @@ -0,0 +1,91 @@ +package openai + +import ( + "sync" + "time" +) + +type storedResponse struct { + Value map[string]any + ExpiresAt time.Time +} + +type responseStore struct { + mu sync.Mutex + ttl time.Duration + items map[string]storedResponse +} + +func newResponseStore(ttl time.Duration) *responseStore { + if ttl <= 0 { + ttl = 15 * time.Minute + } + return &responseStore{ + ttl: ttl, + items: make(map[string]storedResponse), + } +} + +func (s *responseStore) put(id string, value map[string]any) { + if s == nil || id == "" || value == nil { + return + } + now := time.Now() + s.mu.Lock() + defer s.mu.Unlock() + s.sweepLocked(now) + s.items[id] = storedResponse{ + Value: cloneAnyMap(value), + ExpiresAt: now.Add(s.ttl), + } +} + +func (s *responseStore) get(id string) (map[string]any, bool) { + if s == nil || id == "" { + return nil, false + } + now := time.Now() + s.mu.Lock() + defer s.mu.Unlock() + s.sweepLocked(now) + item, ok := s.items[id] + if !ok { + return nil, false + } + return cloneAnyMap(item.Value), true +} + +func (s *responseStore) sweepLocked(now time.Time) { + for k, v := range s.items { + if now.After(v.ExpiresAt) { + delete(s.items, k) + } + } +} + +func cloneAnyMap(in map[string]any) map[string]any { + if in == nil { + return nil + } + out := make(map[string]any, len(in)) + for k, v := range in { + out[k] = v + } + return out +} + +func (h *Handler) getResponseStore() *responseStore { + if h == nil { + return nil + } + h.responsesMu.Lock() + defer h.responsesMu.Unlock() + if h.responses == nil { + ttl := 15 * time.Minute + if h.Store != nil { + ttl = time.Duration(h.Store.ResponsesStoreTTLSeconds()) * time.Second + } + h.responses = newResponseStore(ttl) + } + return h.responses +} diff --git a/internal/adapter/openai/responses_embeddings_test.go b/internal/adapter/openai/responses_embeddings_test.go new file mode 100644 index 0000000..b23597d --- /dev/null +++ b/internal/adapter/openai/responses_embeddings_test.go @@ -0,0 +1,65 @@ +package openai + +import ( + "testing" + "time" +) + +func TestNormalizeResponsesInputAsMessagesString(t *testing.T) { + msgs := normalizeResponsesInputAsMessages("hello") + if len(msgs) != 1 { + t.Fatalf("expected one message, got %d", len(msgs)) + } + m, _ := msgs[0].(map[string]any) + if m["role"] != "user" || m["content"] != "hello" { + t.Fatalf("unexpected message: %#v", m) + } +} + +func TestResponsesMessagesFromRequestWithInstructions(t *testing.T) { + req := map[string]any{ + "model": "gpt-4.1", + "input": "ping", + "instructions": "system text", + } + msgs := responsesMessagesFromRequest(req) + if len(msgs) != 2 { + t.Fatalf("expected two messages, got %d", len(msgs)) + } + sys, _ := msgs[0].(map[string]any) + if sys["role"] != "system" { + t.Fatalf("unexpected first message: %#v", sys) + } +} + +func TestExtractEmbeddingInputs(t *testing.T) { + got := extractEmbeddingInputs([]any{"a", "b"}) + if len(got) != 2 || got[0] != "a" || got[1] != "b" { + t.Fatalf("unexpected inputs: %#v", got) + } +} + +func TestDeterministicEmbeddingStable(t *testing.T) { + a := deterministicEmbedding("hello") + b := deterministicEmbedding("hello") + if len(a) != 64 || len(b) != 64 { + t.Fatalf("expected 64 dims, got %d and %d", len(a), len(b)) + } + for i := range a { + if a[i] != b[i] { + t.Fatalf("expected stable embedding at %d: %v != %v", i, a[i], b[i]) + } + } +} + +func TestResponseStorePutGet(t *testing.T) { + st := newResponseStore(100 * time.Millisecond) + st.put("resp_1", map[string]any{"id": "resp_1"}) + got, ok := st.get("resp_1") + if !ok { + t.Fatal("expected stored response") + } + if got["id"] != "resp_1" { + t.Fatalf("unexpected response payload: %#v", got) + } +} diff --git a/internal/adapter/openai/responses_handler.go b/internal/adapter/openai/responses_handler.go new file mode 100644 index 0000000..8fbb132 --- /dev/null +++ b/internal/adapter/openai/responses_handler.go @@ -0,0 +1,407 @@ +package openai + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "time" + + "github.com/go-chi/chi/v5" + "github.com/google/uuid" + + "ds2api/internal/auth" + "ds2api/internal/config" + "ds2api/internal/sse" + "ds2api/internal/util" +) + +func (h *Handler) GetResponseByID(w http.ResponseWriter, r *http.Request) { + id := strings.TrimSpace(chi.URLParam(r, "response_id")) + if id == "" { + writeOpenAIError(w, http.StatusBadRequest, "response_id is required.") + return + } + st := h.getResponseStore() + item, ok := st.get(id) + if !ok { + writeOpenAIError(w, http.StatusNotFound, "Response not found.") + return + } + writeJSON(w, http.StatusOK, item) +} + +func (h *Handler) Responses(w http.ResponseWriter, r *http.Request) { + a, err := h.Auth.Determine(r) + if err != nil { + status := http.StatusUnauthorized + detail := err.Error() + if err == auth.ErrNoAccount { + status = http.StatusTooManyRequests + } + writeOpenAIError(w, status, detail) + return + } + defer h.Auth.Release(a) + r = r.WithContext(auth.WithAuth(r.Context(), a)) + + var req map[string]any + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeOpenAIError(w, http.StatusBadRequest, "invalid json") + return + } + + model, _ := req["model"].(string) + model = strings.TrimSpace(model) + if model == "" { + writeOpenAIError(w, http.StatusBadRequest, "Request must include 'model'.") + return + } + resolvedModel, ok := config.ResolveModel(h.Store, model) + if !ok { + writeOpenAIError(w, http.StatusBadRequest, fmt.Sprintf("Model '%s' is not available.", model)) + return + } + thinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel) + + messagesRaw := responsesMessagesFromRequest(req) + if len(messagesRaw) == 0 { + writeOpenAIError(w, http.StatusBadRequest, "Request must include 'input' or 'messages'.") + return + } + finalPrompt, toolNames := buildOpenAIFinalPrompt(messagesRaw, req["tools"]) + + sessionID, err := h.DS.CreateSession(r.Context(), a, 3) + if err != nil { + if a.UseConfigToken { + writeOpenAIError(w, http.StatusUnauthorized, "Account token is invalid. Please re-login the account in admin.") + } else { + writeOpenAIError(w, http.StatusUnauthorized, "Invalid token. If this should be a DS2API key, add it to config.keys first.") + } + return + } + pow, err := h.DS.GetPow(r.Context(), a, 3) + if err != nil { + writeOpenAIError(w, http.StatusUnauthorized, "Failed to get PoW (invalid token or unknown error).") + return + } + payload := map[string]any{ + "chat_session_id": sessionID, + "parent_message_id": nil, + "prompt": finalPrompt, + "ref_file_ids": []any{}, + "thinking_enabled": thinkingEnabled, + "search_enabled": searchEnabled, + } + applyOpenAIChatPassThrough(req, payload) + resp, err := h.DS.CallCompletion(r.Context(), a, payload, pow, 3) + if err != nil { + writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.") + return + } + + responseID := "resp_" + strings.ReplaceAll(uuid.NewString(), "-", "") + if util.ToBool(req["stream"]) { + h.handleResponsesStream(w, r, resp, responseID, model, finalPrompt, thinkingEnabled, searchEnabled, toolNames) + return + } + h.handleResponsesNonStream(w, resp, responseID, model, finalPrompt, thinkingEnabled, toolNames) +} + +func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Response, responseID, model, finalPrompt string, thinkingEnabled bool, toolNames []string) { + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + writeOpenAIError(w, resp.StatusCode, strings.TrimSpace(string(body))) + return + } + result := sse.CollectStream(resp, thinkingEnabled, true) + responseObj := buildResponseObject(responseID, model, finalPrompt, result.Thinking, result.Text, toolNames) + h.getResponseStore().put(responseID, responseObj) + writeJSON(w, http.StatusOK, responseObj) +} + +func (h *Handler) handleResponsesStream(w http.ResponseWriter, r *http.Request, resp *http.Response, responseID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string) { + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + writeOpenAIError(w, resp.StatusCode, strings.TrimSpace(string(body))) + return + } + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache, no-transform") + w.Header().Set("Connection", "keep-alive") + w.Header().Set("X-Accel-Buffering", "no") + rc := http.NewResponseController(w) + canFlush := rc.Flush() == nil + + sendEvent := func(event string, payload map[string]any) { + b, _ := json.Marshal(payload) + _, _ = w.Write([]byte("event: " + event + "\n")) + _, _ = w.Write([]byte("data: ")) + _, _ = w.Write(b) + _, _ = w.Write([]byte("\n\n")) + if canFlush { + _ = rc.Flush() + } + } + + sendEvent("response.created", map[string]any{ + "type": "response.created", + "id": responseID, + "object": "response", + "model": model, + "status": "in_progress", + }) + + initialType := "text" + if thinkingEnabled { + initialType = "thinking" + } + parsedLines, done := sse.StartParsedLinePump(r.Context(), resp.Body, thinkingEnabled, initialType) + bufferToolContent := len(toolNames) > 0 + var sieve toolStreamSieveState + thinking := strings.Builder{} + text := strings.Builder{} + toolCallsEmitted := false + streamToolCallIDs := map[int]string{} + + finalize := func() { + finalThinking := thinking.String() + finalText := text.String() + if bufferToolContent { + for _, evt := range flushToolSieve(&sieve, toolNames) { + if evt.Content != "" { + finalText += evt.Content + sendEvent("response.output_text.delta", map[string]any{ + "type": "response.output_text.delta", + "id": responseID, + "delta": evt.Content, + }) + } + if len(evt.ToolCalls) > 0 { + toolCallsEmitted = true + sendEvent("response.output_tool_call.done", map[string]any{ + "type": "response.output_tool_call.done", + "id": responseID, + "tool_calls": util.FormatOpenAIStreamToolCalls(evt.ToolCalls), + }) + } + } + } + obj := buildResponseObject(responseID, model, finalPrompt, finalThinking, finalText, toolNames) + if toolCallsEmitted { + obj["status"] = "completed" + } + h.getResponseStore().put(responseID, obj) + sendEvent("response.completed", map[string]any{ + "type": "response.completed", + "response": obj, + }) + _, _ = w.Write([]byte("data: [DONE]\n\n")) + if canFlush { + _ = rc.Flush() + } + } + + for { + select { + case <-r.Context().Done(): + return + case parsed, ok := <-parsedLines: + if !ok { + _ = <-done + finalize() + return + } + if !parsed.Parsed { + continue + } + if parsed.ContentFilter || parsed.ErrorMessage != "" || parsed.Stop { + finalize() + return + } + for _, p := range parsed.Parts { + if p.Text == "" { + continue + } + if p.Type != "thinking" && searchEnabled && sse.IsCitation(p.Text) { + continue + } + if p.Type == "thinking" { + if !thinkingEnabled { + continue + } + thinking.WriteString(p.Text) + sendEvent("response.reasoning.delta", map[string]any{ + "type": "response.reasoning.delta", + "id": responseID, + "delta": p.Text, + }) + continue + } + text.WriteString(p.Text) + if !bufferToolContent { + sendEvent("response.output_text.delta", map[string]any{ + "type": "response.output_text.delta", + "id": responseID, + "delta": p.Text, + }) + continue + } + for _, evt := range processToolSieveChunk(&sieve, p.Text, toolNames) { + if evt.Content != "" { + sendEvent("response.output_text.delta", map[string]any{ + "type": "response.output_text.delta", + "id": responseID, + "delta": evt.Content, + }) + } + if len(evt.ToolCallDeltas) > 0 { + toolCallsEmitted = true + sendEvent("response.output_tool_call.delta", map[string]any{ + "type": "response.output_tool_call.delta", + "id": responseID, + "tool_calls": formatIncrementalStreamToolCallDeltas(evt.ToolCallDeltas, streamToolCallIDs), + }) + } + if len(evt.ToolCalls) > 0 { + toolCallsEmitted = true + sendEvent("response.output_tool_call.done", map[string]any{ + "type": "response.output_tool_call.done", + "id": responseID, + "tool_calls": util.FormatOpenAIStreamToolCalls(evt.ToolCalls), + }) + } + } + } + } + } +} + +func buildResponseObject(responseID, model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any { + detected := util.ParseToolCalls(finalText, toolNames) + output := make([]any, 0, 2) + if len(detected) > 0 { + toolCalls := make([]any, 0, len(detected)) + for _, tc := range detected { + toolCalls = append(toolCalls, map[string]any{ + "type": "tool_call", + "name": tc.Name, + "arguments": tc.Input, + }) + } + output = append(output, map[string]any{ + "type": "tool_calls", + "tool_calls": toolCalls, + }) + } else { + content := []any{ + map[string]any{ + "type": "output_text", + "text": finalText, + }, + } + if finalThinking != "" { + content = append([]any{map[string]any{ + "type": "reasoning", + "text": finalThinking, + }}, content...) + } + output = append(output, map[string]any{ + "type": "message", + "id": "msg_" + strings.ReplaceAll(uuid.NewString(), "-", ""), + "role": "assistant", + "content": content, + }) + } + promptTokens := util.EstimateTokens(finalPrompt) + reasoningTokens := util.EstimateTokens(finalThinking) + completionTokens := util.EstimateTokens(finalText) + return map[string]any{ + "id": responseID, + "type": "response", + "object": "response", + "created_at": time.Now().Unix(), + "status": "completed", + "model": model, + "output": output, + "output_text": finalText, + "usage": map[string]any{ + "input_tokens": promptTokens, + "output_tokens": reasoningTokens + completionTokens, + "total_tokens": promptTokens + reasoningTokens + completionTokens, + }, + } +} + +func responsesMessagesFromRequest(req map[string]any) []any { + if msgs, ok := req["messages"].([]any); ok && len(msgs) > 0 { + return prependInstructionMessage(msgs, req["instructions"]) + } + if rawInput, ok := req["input"]; ok { + if msgs := normalizeResponsesInputAsMessages(rawInput); len(msgs) > 0 { + return prependInstructionMessage(msgs, req["instructions"]) + } + } + return nil +} + +func prependInstructionMessage(messages []any, instructions any) []any { + sys, _ := instructions.(string) + sys = strings.TrimSpace(sys) + if sys == "" { + return messages + } + out := make([]any, 0, len(messages)+1) + out = append(out, map[string]any{"role": "system", "content": sys}) + out = append(out, messages...) + return out +} + +func normalizeResponsesInputAsMessages(input any) []any { + switch v := input.(type) { + case string: + if strings.TrimSpace(v) == "" { + return nil + } + return []any{map[string]any{"role": "user", "content": v}} + case []any: + if len(v) == 0 { + return nil + } + // If caller already provides role-shaped items, keep as-is. + if first, ok := v[0].(map[string]any); ok { + if _, hasRole := first["role"]; hasRole { + return v + } + } + parts := make([]string, 0, len(v)) + for _, item := range v { + if m, ok := item.(map[string]any); ok { + if t, _ := m["type"].(string); strings.EqualFold(strings.TrimSpace(t), "input_text") { + if txt, _ := m["text"].(string); strings.TrimSpace(txt) != "" { + parts = append(parts, txt) + continue + } + } + } + if s := strings.TrimSpace(fmt.Sprintf("%v", item)); s != "" { + parts = append(parts, s) + } + } + if len(parts) == 0 { + return nil + } + return []any{map[string]any{"role": "user", "content": strings.Join(parts, "\n")}} + case map[string]any: + if txt, _ := v["text"].(string); strings.TrimSpace(txt) != "" { + return []any{map[string]any{"role": "user", "content": txt}} + } + if content, ok := v["content"].(string); ok && strings.TrimSpace(content) != "" { + return []any{map[string]any{"role": "user", "content": content}} + } + } + return nil +} diff --git a/internal/adapter/openai/tool_sieve.go b/internal/adapter/openai/tool_sieve.go index b737ff6..fd7222b 100644 --- a/internal/adapter/openai/tool_sieve.go +++ b/internal/adapter/openai/tool_sieve.go @@ -7,17 +7,16 @@ import ( ) type toolStreamSieveState struct { - pending strings.Builder - capture strings.Builder - capturing bool - hasMeaningfulText bool - recentTextTail string - toolNameSent bool - toolName string - toolArgsStart int - toolArgsSent int - toolArgsString bool - toolArgsDone bool + pending strings.Builder + capture strings.Builder + capturing bool + recentTextTail string + toolNameSent bool + toolName string + toolArgsStart int + toolArgsSent int + toolArgsString bool + toolArgsDone bool } type toolStreamEvent struct { @@ -197,14 +196,22 @@ func findToolSegmentStart(s string) int { return -1 } lower := strings.ToLower(s) - keyIdx := strings.Index(lower, "tool_calls") - if keyIdx < 0 { - return -1 + offset := 0 + for { + keyRel := strings.Index(lower[offset:], "tool_calls") + if keyRel < 0 { + return -1 + } + keyIdx := offset + keyRel + start := strings.LastIndex(s[:keyIdx], "{") + if start < 0 { + start = keyIdx + } + if !insideCodeFence(s[:start]) { + return start + } + offset = keyIdx + len("tool_calls") } - if start := strings.LastIndex(s[:keyIdx], "{"); start >= 0 { - return start - } - return keyIdx } func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix string, calls []util.ParsedToolCall, suffix string, ready bool) { @@ -227,7 +234,7 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix } prefixPart := captured[:start] suffixPart := captured[end:] - if !state.toolNameSent && (strings.TrimSpace(prefixPart) != "" || looksLikeToolExampleContext(state.recentTextTail) || looksLikeToolExampleContext(suffixPart)) { + if insideCodeFence(state.recentTextTail + prefixPart) { return captured, nil, "", true } parsed := util.ParseStandaloneToolCalls(obj, toolNames) @@ -293,16 +300,16 @@ func buildIncrementalToolDeltas(state *toolStreamSieveState) []toolCallDelta { if captured == "" { return nil } - if looksLikeToolExampleContext(state.recentTextTail) { - return nil - } lower := strings.ToLower(captured) keyIdx := strings.Index(lower, "tool_calls") if keyIdx < 0 { return nil } start := strings.LastIndex(captured[:keyIdx], "{") - if start < 0 || strings.TrimSpace(captured[:start]) != "" { + if start < 0 { + return nil + } + if insideCodeFence(state.recentTextTail + captured[:start]) { return nil } callStart, ok := findFirstToolCallObjectStart(captured, keyIdx) @@ -612,7 +619,6 @@ func (s *toolStreamSieveState) noteText(content string) { if strings.TrimSpace(content) == "" { return } - s.hasMeaningfulText = true s.recentTextTail = appendTail(s.recentTextTail, content, toolSieveContextTailLimit) } @@ -628,25 +634,12 @@ func appendTail(prev, next string, max int) string { } func looksLikeToolExampleContext(text string) bool { - t := strings.ToLower(strings.TrimSpace(text)) - if t == "" { + return insideCodeFence(text) +} + +func insideCodeFence(text string) bool { + if text == "" { return false } - cues := []string{ - "示例", - "例子", - "for example", - "example", - "demo", - "请勿执行", - "不要执行", - "do not execute", - "```", - } - for _, cue := range cues { - if strings.Contains(t, cue) { - return true - } - } - return false + return strings.Count(text, "```")%2 == 1 } diff --git a/internal/adapter/openai/vercel_stream.go b/internal/adapter/openai/vercel_stream.go index 85c9cd8..be8a590 100644 --- a/internal/adapter/openai/vercel_stream.go +++ b/internal/adapter/openai/vercel_stream.go @@ -62,11 +62,16 @@ func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Reque writeOpenAIError(w, http.StatusBadRequest, "Request must include 'model' and 'messages'.") return } - thinkingEnabled, searchEnabled, ok := config.GetModelConfig(model) + resolvedModel, ok := config.ResolveModel(h.Store, model) if !ok { - writeOpenAIError(w, http.StatusServiceUnavailable, fmt.Sprintf("Model '%s' is not available.", model)) + writeOpenAIError(w, http.StatusBadRequest, fmt.Sprintf("Model '%s' is not available.", model)) return } + thinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel) + responseModel := strings.TrimSpace(model) + if responseModel == "" { + responseModel = resolvedModel + } finalPrompt, _ := buildOpenAIFinalPrompt(messagesRaw, req["tools"]) @@ -97,6 +102,7 @@ func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Reque "thinking_enabled": thinkingEnabled, "search_enabled": searchEnabled, } + applyOpenAIChatPassThrough(req, payload) leaseID := h.holdStreamLease(a) if leaseID == "" { writeOpenAIError(w, http.StatusInternalServerError, "failed to create stream lease") @@ -106,7 +112,7 @@ func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Reque writeJSON(w, http.StatusOK, map[string]any{ "session_id": sessionID, "lease_id": leaseID, - "model": model, + "model": responseModel, "final_prompt": finalPrompt, "thinking_enabled": thinkingEnabled, "search_enabled": searchEnabled, diff --git a/internal/config/config.go b/internal/config/config.go index b4058c6..d583159 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -62,11 +62,33 @@ type Config struct { Accounts []Account `json:"accounts,omitempty"` ClaudeMapping map[string]string `json:"claude_mapping,omitempty"` ClaudeModelMap map[string]string `json:"claude_model_mapping,omitempty"` + ModelAliases map[string]string `json:"model_aliases,omitempty"` + Compat CompatConfig `json:"compat,omitempty"` + Toolcall ToolcallConfig `json:"toolcall,omitempty"` + Responses ResponsesConfig `json:"responses,omitempty"` + Embeddings EmbeddingsConfig `json:"embeddings,omitempty"` VercelSyncHash string `json:"_vercel_sync_hash,omitempty"` VercelSyncTime int64 `json:"_vercel_sync_time,omitempty"` AdditionalFields map[string]any `json:"-"` } +type CompatConfig struct { + WideInputStrictOutput bool `json:"wide_input_strict_output,omitempty"` +} + +type ToolcallConfig struct { + Mode string `json:"mode,omitempty"` + EarlyEmitConfidence string `json:"early_emit_confidence,omitempty"` +} + +type ResponsesConfig struct { + StoreTTLSeconds int `json:"store_ttl_seconds,omitempty"` +} + +type EmbeddingsConfig struct { + Provider string `json:"provider,omitempty"` +} + func (c Config) MarshalJSON() ([]byte, error) { m := map[string]any{} for k, v := range c.AdditionalFields { @@ -84,6 +106,21 @@ func (c Config) MarshalJSON() ([]byte, error) { if len(c.ClaudeModelMap) > 0 { m["claude_model_mapping"] = c.ClaudeModelMap } + if len(c.ModelAliases) > 0 { + m["model_aliases"] = c.ModelAliases + } + if c.Compat.WideInputStrictOutput { + m["compat"] = c.Compat + } + if strings.TrimSpace(c.Toolcall.Mode) != "" || strings.TrimSpace(c.Toolcall.EarlyEmitConfidence) != "" { + m["toolcall"] = c.Toolcall + } + if c.Responses.StoreTTLSeconds > 0 { + m["responses"] = c.Responses + } + if strings.TrimSpace(c.Embeddings.Provider) != "" { + m["embeddings"] = c.Embeddings + } if c.VercelSyncHash != "" { m["_vercel_sync_hash"] = c.VercelSyncHash } @@ -117,6 +154,26 @@ func (c *Config) UnmarshalJSON(b []byte) error { if err := json.Unmarshal(v, &c.ClaudeModelMap); err != nil { return fmt.Errorf("invalid field %q: %w", k, err) } + case "model_aliases": + if err := json.Unmarshal(v, &c.ModelAliases); err != nil { + return fmt.Errorf("invalid field %q: %w", k, err) + } + case "compat": + if err := json.Unmarshal(v, &c.Compat); err != nil { + return fmt.Errorf("invalid field %q: %w", k, err) + } + case "toolcall": + if err := json.Unmarshal(v, &c.Toolcall); err != nil { + return fmt.Errorf("invalid field %q: %w", k, err) + } + case "responses": + if err := json.Unmarshal(v, &c.Responses); err != nil { + return fmt.Errorf("invalid field %q: %w", k, err) + } + case "embeddings": + if err := json.Unmarshal(v, &c.Embeddings); err != nil { + return fmt.Errorf("invalid field %q: %w", k, err) + } case "_vercel_sync_hash": if err := json.Unmarshal(v, &c.VercelSyncHash); err != nil { return fmt.Errorf("invalid field %q: %w", k, err) @@ -141,6 +198,11 @@ func (c Config) Clone() Config { Accounts: slices.Clone(c.Accounts), ClaudeMapping: cloneStringMap(c.ClaudeMapping), ClaudeModelMap: cloneStringMap(c.ClaudeModelMap), + ModelAliases: cloneStringMap(c.ModelAliases), + Compat: c.Compat, + Toolcall: c.Toolcall, + Responses: c.Responses, + Embeddings: c.Embeddings, VercelSyncHash: c.VercelSyncHash, VercelSyncTime: c.VercelSyncTime, AdditionalFields: map[string]any{}, @@ -490,3 +552,59 @@ func (s *Store) ClaudeMapping() map[string]string { } return map[string]string{"fast": "deepseek-chat", "slow": "deepseek-reasoner"} } + +func (s *Store) ModelAliases() map[string]string { + s.mu.RLock() + defer s.mu.RUnlock() + out := DefaultModelAliases() + for k, v := range s.cfg.ModelAliases { + key := strings.TrimSpace(lower(k)) + val := strings.TrimSpace(lower(v)) + if key == "" || val == "" { + continue + } + out[key] = val + } + return out +} + +func (s *Store) CompatWideInputStrictOutput() bool { + // Current default policy is always wide-input / strict-output. + // Kept as a method so callers do not depend on storage shape. + return true +} + +func (s *Store) ToolcallMode() string { + s.mu.RLock() + defer s.mu.RUnlock() + mode := strings.TrimSpace(strings.ToLower(s.cfg.Toolcall.Mode)) + if mode == "" { + return "feature_match" + } + return mode +} + +func (s *Store) ToolcallEarlyEmitConfidence() string { + s.mu.RLock() + defer s.mu.RUnlock() + level := strings.TrimSpace(strings.ToLower(s.cfg.Toolcall.EarlyEmitConfidence)) + if level == "" { + return "high" + } + return level +} + +func (s *Store) ResponsesStoreTTLSeconds() int { + s.mu.RLock() + defer s.mu.RUnlock() + if s.cfg.Responses.StoreTTLSeconds > 0 { + return s.cfg.Responses.StoreTTLSeconds + } + return 900 +} + +func (s *Store) EmbeddingsProvider() string { + s.mu.RLock() + defer s.mu.RUnlock() + return strings.TrimSpace(s.cfg.Embeddings.Provider) +} diff --git a/internal/config/model_alias_test.go b/internal/config/model_alias_test.go new file mode 100644 index 0000000..89e74b0 --- /dev/null +++ b/internal/config/model_alias_test.go @@ -0,0 +1,44 @@ +package config + +import "testing" + +func TestResolveModelDirectDeepSeek(t *testing.T) { + got, ok := ResolveModel(nil, "deepseek-chat") + if !ok || got != "deepseek-chat" { + t.Fatalf("expected deepseek-chat, got ok=%v model=%q", ok, got) + } +} + +func TestResolveModelAlias(t *testing.T) { + got, ok := ResolveModel(nil, "gpt-4.1") + if !ok || got != "deepseek-chat" { + t.Fatalf("expected alias gpt-4.1 -> deepseek-chat, got ok=%v model=%q", ok, got) + } +} + +func TestResolveModelHeuristicReasoner(t *testing.T) { + got, ok := ResolveModel(nil, "o3-super") + if !ok || got != "deepseek-reasoner" { + t.Fatalf("expected heuristic reasoner, got ok=%v model=%q", ok, got) + } +} + +func TestResolveModelUnknown(t *testing.T) { + _, ok := ResolveModel(nil, "totally-custom-model") + if ok { + t.Fatal("expected unknown model to fail resolve") + } +} + +func TestClaudeModelsResponsePaginationFields(t *testing.T) { + resp := ClaudeModelsResponse() + if _, ok := resp["first_id"]; !ok { + t.Fatalf("expected first_id in response: %#v", resp) + } + if _, ok := resp["last_id"]; !ok { + t.Fatalf("expected last_id in response: %#v", resp) + } + if _, ok := resp["has_more"]; !ok { + t.Fatalf("expected has_more in response: %#v", resp) + } +} diff --git a/internal/config/models.go b/internal/config/models.go index 13fa63d..017a2ee 100644 --- a/internal/config/models.go +++ b/internal/config/models.go @@ -1,5 +1,7 @@ package config +import "strings" + type ModelInfo struct { ID string `json:"id"` Object string `json:"object"` @@ -71,6 +73,91 @@ func GetModelConfig(model string) (thinking bool, search bool, ok bool) { } } +func IsSupportedDeepSeekModel(model string) bool { + _, _, ok := GetModelConfig(model) + return ok +} + +func DefaultModelAliases() map[string]string { + return map[string]string{ + "gpt-4o": "deepseek-chat", + "gpt-4.1": "deepseek-chat", + "gpt-4.1-mini": "deepseek-chat", + "gpt-4.1-nano": "deepseek-chat", + "gpt-5": "deepseek-chat", + "gpt-5-mini": "deepseek-chat", + "gpt-5-codex": "deepseek-reasoner", + "o1": "deepseek-reasoner", + "o1-mini": "deepseek-reasoner", + "o3": "deepseek-reasoner", + "o3-mini": "deepseek-reasoner", + "claude-sonnet-4-5": "deepseek-chat", + "claude-haiku-4-5": "deepseek-chat", + "claude-opus-4-6": "deepseek-reasoner", + "claude-3-5-sonnet": "deepseek-chat", + "claude-3-5-haiku": "deepseek-chat", + "claude-3-opus": "deepseek-reasoner", + "gemini-2.5-pro": "deepseek-chat", + "gemini-2.5-flash": "deepseek-chat", + "llama-3.1-70b-instruct": "deepseek-chat", + "qwen-max": "deepseek-chat", + } +} + +func ResolveModel(store *Store, requested string) (string, bool) { + model := lower(strings.TrimSpace(requested)) + if model == "" { + return "", false + } + if IsSupportedDeepSeekModel(model) { + return model, true + } + aliases := DefaultModelAliases() + if store != nil { + for k, v := range store.ModelAliases() { + aliases[lower(strings.TrimSpace(k))] = lower(strings.TrimSpace(v)) + } + } + if mapped, ok := aliases[model]; ok && IsSupportedDeepSeekModel(mapped) { + return mapped, true + } + if strings.HasPrefix(model, "deepseek-") { + return "", false + } + + knownFamily := false + for _, prefix := range []string{ + "gpt-", "o1", "o3", "claude-", "gemini-", "llama-", "qwen-", "mistral-", "command-", + } { + if strings.HasPrefix(model, prefix) { + knownFamily = true + break + } + } + if !knownFamily { + return "", false + } + + useReasoner := strings.Contains(model, "reason") || + strings.Contains(model, "reasoner") || + strings.HasPrefix(model, "o1") || + strings.HasPrefix(model, "o3") || + strings.Contains(model, "opus") || + strings.Contains(model, "r1") + useSearch := strings.Contains(model, "search") + + switch { + case useReasoner && useSearch: + return "deepseek-reasoner-search", true + case useReasoner: + return "deepseek-reasoner", true + case useSearch: + return "deepseek-chat-search", true + default: + return "deepseek-chat", true + } +} + func lower(s string) string { b := []byte(s) for i, c := range b { @@ -85,6 +172,28 @@ func OpenAIModelsResponse() map[string]any { return map[string]any{"object": "list", "data": DeepSeekModels} } -func ClaudeModelsResponse() map[string]any { - return map[string]any{"object": "list", "data": ClaudeModels} +func OpenAIModelByID(store *Store, id string) (ModelInfo, bool) { + canonical, ok := ResolveModel(store, id) + if !ok { + return ModelInfo{}, false + } + for _, model := range DeepSeekModels { + if model.ID == canonical { + return model, true + } + } + return ModelInfo{}, false +} + +func ClaudeModelsResponse() map[string]any { + resp := map[string]any{"object": "list", "data": ClaudeModels} + if len(ClaudeModels) > 0 { + resp["first_id"] = ClaudeModels[0].ID + resp["last_id"] = ClaudeModels[len(ClaudeModels)-1].ID + } else { + resp["first_id"] = nil + resp["last_id"] = nil + } + resp["has_more"] = false + return resp } diff --git a/internal/server/router.go b/internal/server/router.go index c6339fb..a81f0cb 100644 --- a/internal/server/router.go +++ b/internal/server/router.go @@ -92,7 +92,7 @@ func cors(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Access-Control-Allow-Origin", "*") w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS, PUT, DELETE") - w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization") + w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, X-API-Key, X-Ds2-Target-Account, X-Vercel-Protection-Bypass") if r.Method == http.MethodOptions { w.WriteHeader(http.StatusNoContent) return diff --git a/internal/util/toolcalls.go b/internal/util/toolcalls.go index decb96e..9e44b94 100644 --- a/internal/util/toolcalls.go +++ b/internal/util/toolcalls.go @@ -10,6 +10,7 @@ import ( var toolCallPattern = regexp.MustCompile(`\{\s*["']tool_calls["']\s*:\s*\[(.*?)\]\s*\}`) var fencedJSONPattern = regexp.MustCompile("(?s)```(?:json)?\\s*(.*?)\\s*```") +var fencedBlockPattern = regexp.MustCompile("(?s)```.*?```") type ParsedToolCall struct { Name string `json:"name"` @@ -20,6 +21,10 @@ func ParseToolCalls(text string, availableToolNames []string) []ParsedToolCall { if strings.TrimSpace(text) == "" { return nil } + text = stripFencedCodeBlocks(text) + if strings.TrimSpace(text) == "" { + return nil + } candidates := buildToolCallCandidates(text) var parsed []ParsedToolCall @@ -45,11 +50,6 @@ func ParseStandaloneToolCalls(text string, availableToolNames []string) []Parsed return nil } candidates := []string{trimmed} - if strings.HasPrefix(trimmed, "```") && strings.HasSuffix(trimmed, "```") { - if m := fencedJSONPattern.FindStringSubmatch(trimmed); len(m) >= 2 { - candidates = append(candidates, strings.TrimSpace(m[1])) - } - } for _, candidate := range candidates { candidate = strings.TrimSpace(candidate) if candidate == "" { @@ -321,23 +321,14 @@ func looksLikeToolExampleContext(text string) bool { if t == "" { return false } - cues := []string{ - "```", - "示例", - "例子", - "for example", - "example", - "demo", - "请勿执行", - "不要执行", - "do not execute", + return strings.Contains(t, "```") +} + +func stripFencedCodeBlocks(text string) string { + if strings.TrimSpace(text) == "" { + return "" } - for _, cue := range cues { - if strings.Contains(t, cue) { - return true - } - } - return false + return fencedBlockPattern.ReplaceAllString(text, " ") } func FormatOpenAIToolCalls(calls []ParsedToolCall) []map[string]any { diff --git a/internal/util/toolcalls_test.go b/internal/util/toolcalls_test.go index 509299c..f7c82d2 100644 --- a/internal/util/toolcalls_test.go +++ b/internal/util/toolcalls_test.go @@ -19,11 +19,8 @@ func TestParseToolCalls(t *testing.T) { func TestParseToolCallsFromFencedJSON(t *testing.T) { text := "I will call tools now\n```json\n{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"news\"}}]}\n```" calls := ParseToolCalls(text, []string{"search"}) - if len(calls) != 1 { - t.Fatalf("expected 1 call, got %d", len(calls)) - } - if calls[0].Input["q"] != "news" { - t.Fatalf("unexpected args: %#v", calls[0].Input) + if len(calls) != 0 { + t.Fatalf("expected fenced tool_call example to be ignored, got %#v", calls) } } diff --git a/internal/util/util_edge_test.go b/internal/util/util_edge_test.go index 393aa88..cba0ceb 100644 --- a/internal/util/util_edge_test.go +++ b/internal/util/util_edge_test.go @@ -416,18 +416,6 @@ func TestParseStandaloneToolCallsFencedCodeBlock(t *testing.T) { // ─── looksLikeToolExampleContext ───────────────────────────────────── -func TestLooksLikeToolExampleContextChinese(t *testing.T) { - if !looksLikeToolExampleContext("下面是示例") { - t.Fatal("expected true for Chinese example context") - } -} - -func TestLooksLikeToolExampleContextEnglish(t *testing.T) { - if !looksLikeToolExampleContext("here is an example of") { - t.Fatal("expected true for English example context") - } -} - func TestLooksLikeToolExampleContextNone(t *testing.T) { if looksLikeToolExampleContext("I will call the tool now") { t.Fatal("expected false for non-example context") diff --git a/opencode.json.example b/opencode.json.example index 2933e9f..ed18a63 100644 --- a/opencode.json.example +++ b/opencode.json.example @@ -9,6 +9,12 @@ "apiKey": "your-api-key" }, "models": { + "gpt-4o": { + "name": "GPT-4o (aliased to deepseek-chat)" + }, + "gpt-5-codex": { + "name": "GPT-5 Codex (aliased to deepseek-reasoner)" + }, "deepseek-chat": { "name": "DeepSeek Chat (DS2API)" }, @@ -18,5 +24,5 @@ } } }, - "model": "ds2api/deepseek-chat" + "model": "ds2api/gpt-5-codex" }