From 1286b02247461b030813012fe7906306ed89da67 Mon Sep 17 00:00:00 2001 From: CJACK Date: Sun, 3 May 2026 04:14:19 +0800 Subject: [PATCH] refactor: remove legacy compatibility configuration and UI components --- API.en.md | 6 +- API.md | 6 +- config.example.json | 4 - docs/ARCHITECTURE.en.md | 6 +- docs/ARCHITECTURE.md | 6 +- docs/DEVELOPMENT.md | 2 +- docs/prompt-compatibility.md | 2 +- internal/assistantturn/turn.go | 56 +++++++++++++ internal/assistantturn/turn_test.go | 27 ++++++ internal/config/codec.go | 17 +--- internal/config/config.go | 6 -- internal/config/config_edge_test.go | 82 ++----------------- internal/config/store_accessors.go | 18 ---- .../admin/settings/handler_settings_parse.go | 40 +++------ .../admin/settings/handler_settings_read.go | 1 - .../admin/settings/handler_settings_write.go | 10 +-- internal/httpapi/admin/shared/deps.go | 1 - internal/httpapi/claude/deps.go | 1 - .../httpapi/claude/deps_injection_test.go | 1 - internal/httpapi/claude/handler_messages.go | 4 +- internal/httpapi/claude/handler_routes.go | 7 +- internal/httpapi/claude/proxy_vercel_test.go | 5 +- .../httpapi/claude/stream_runtime_finalize.go | 7 +- internal/httpapi/claude/stream_status_test.go | 5 +- internal/httpapi/gemini/deps.go | 1 - internal/httpapi/gemini/handler_generate.go | 4 +- internal/httpapi/gemini/handler_routes.go | 7 +- .../httpapi/gemini/handler_stream_runtime.go | 9 +- internal/httpapi/gemini/handler_test.go | 7 +- .../httpapi/openai/chat/chat_history_test.go | 7 +- .../openai/chat/chat_stream_runtime.go | 32 ++------ .../openai/chat/empty_retry_runtime.go | 4 +- internal/httpapi/openai/chat/handler.go | 19 +---- internal/httpapi/openai/chat/handler_chat.go | 51 ++++++------ .../chat/handler_chat_auto_delete_test.go | 2 - .../openai/chat/handler_toolcall_test.go | 15 ++-- .../httpapi/openai/chat/test_helpers_test.go | 7 +- .../openai/chat/vercel_prepare_test.go | 2 - internal/httpapi/openai/chat/vercel_stream.go | 11 +-- .../httpapi/openai/deps_injection_test.go | 29 ++----- .../httpapi/openai/file_inline_upload_test.go | 12 +-- internal/httpapi/openai/files_route_test.go | 12 +-- internal/httpapi/openai/history_split_test.go | 14 ---- .../openai/responses/empty_retry_runtime.go | 2 +- internal/httpapi/openai/responses/handler.go | 24 +----- .../openai/responses/responses_handler.go | 41 +++++----- .../responses_stream_runtime_core.go | 22 ++--- .../openai/responses/responses_stream_test.go | 22 ++--- internal/httpapi/openai/shared/deps.go | 9 -- internal/httpapi/openai/stream_status_test.go | 22 ++--- internal/js/chat-stream/vercel_stream_impl.js | 3 +- internal/promptcompat/request_normalize.go | 13 +-- tests/node/chat-stream.test.js | 2 - .../settings/CompatibilitySection.jsx | 34 -------- .../features/settings/SettingsContainer.jsx | 3 - .../src/features/settings/useSettingsForm.js | 7 -- webui/src/locales/en.json | 5 +- webui/src/locales/zh.json | 5 +- 58 files changed, 262 insertions(+), 517 deletions(-) delete mode 100644 webui/src/features/settings/CompatibilitySection.jsx diff --git a/API.en.md b/API.en.md index becb765..d3ab762 100644 --- a/API.en.md +++ b/API.en.md @@ -733,7 +733,6 @@ Reads runtime settings and status, including: - `success` - `admin` (`has_password_hash`, `jwt_expire_hours`, `jwt_valid_after_unix`, `default_password_warning`) - `runtime` (`account_max_inflight`, `account_max_queue`, `global_max_inflight`, `token_refresh_interval_hours`) -- `compat` (`wide_input_strict_output`, `strip_reference_markers`) - `responses` / `embeddings` - `auto_delete` (`mode`: `none` / `single` / `all`; legacy `sessions=true` is still treated as `all`) - `current_input_file` (`enabled` defaults to `true`, plus `min_chars`) @@ -747,7 +746,6 @@ Hot-updates runtime settings. Supported fields: - `admin.jwt_expire_hours` - `runtime.account_max_inflight` / `runtime.account_max_queue` / `runtime.global_max_inflight` / `runtime.token_refresh_interval_hours` -- `compat.wide_input_strict_output` / `compat.strip_reference_markers` - `responses.store_ttl_seconds` - `embeddings.provider` - `auto_delete.mode` @@ -776,9 +774,9 @@ Imports full config with: The request can send config directly, or wrapped as `{"config": {...}, "mode":"merge"}`. Query params `?mode=merge` / `?mode=replace` are also supported. -`replace` mode replaces the full config shape while preserving Vercel sync metadata. `merge` mode merges `keys`, `api_keys`, `accounts`, and `model_aliases`, and overwrites non-empty fields under `admin`, `runtime`, `responses`, and `embeddings`. Manage `compat`, `auto_delete`, and `current_input_file` via `/admin/settings` or the config file; legacy `toolcall` fields are ignored. +`replace` mode replaces the full config shape while preserving Vercel sync metadata. `merge` mode merges `keys`, `api_keys`, `accounts`, and `model_aliases`, and overwrites non-empty fields under `admin`, `runtime`, `responses`, and `embeddings`. Manage `auto_delete` and `current_input_file` via `/admin/settings` or the config file; legacy `compat` and `toolcall` fields are ignored. -> Note: `merge` mode does not update `compat`, `auto_delete`, or `current_input_file`. +> Note: `merge` mode does not update `auto_delete` or `current_input_file`. ### `GET /admin/config/export` diff --git a/API.md b/API.md index 46f55e3..833c0a5 100644 --- a/API.md +++ b/API.md @@ -740,7 +740,6 @@ data: {"type":"message_stop"} - `success` - `admin`(`has_password_hash`、`jwt_expire_hours`、`jwt_valid_after_unix`、`default_password_warning`) - `runtime`(`account_max_inflight`、`account_max_queue`、`global_max_inflight`、`token_refresh_interval_hours`) -- `compat`(`wide_input_strict_output`、`strip_reference_markers`) - `responses` / `embeddings` - `auto_delete`(`mode`:`none` / `single` / `all`;旧配置 `sessions=true` 仍按 `all` 处理) - `current_input_file`(`enabled` 默认返回 `true`、`min_chars`) @@ -754,7 +753,6 @@ data: {"type":"message_stop"} - `admin.jwt_expire_hours` - `runtime.account_max_inflight` / `runtime.account_max_queue` / `runtime.global_max_inflight` / `runtime.token_refresh_interval_hours` -- `compat.wide_input_strict_output` / `compat.strip_reference_markers` - `responses.store_ttl_seconds` - `embeddings.provider` - `auto_delete.mode` @@ -783,9 +781,9 @@ data: {"type":"message_stop"} 请求可直接传配置对象,或使用 `{"config": {...}, "mode":"merge"}` 包裹格式。 也支持在查询参数里传 `?mode=merge` / `?mode=replace`。 -`replace` 模式会按完整配置结构替换(保留 Vercel 同步元信息);`merge` 模式会合并 `keys`、`api_keys`、`accounts`、`model_aliases`,并覆盖 `admin`、`runtime`、`responses`、`embeddings` 中的非空字段。`compat`、`auto_delete`、`current_input_file` 建议通过 `/admin/settings` 或配置文件管理;`toolcall` 相关字段会被忽略。 +`replace` 模式会按完整配置结构替换(保留 Vercel 同步元信息);`merge` 模式会合并 `keys`、`api_keys`、`accounts`、`model_aliases`,并覆盖 `admin`、`runtime`、`responses`、`embeddings` 中的非空字段。`auto_delete`、`current_input_file` 建议通过 `/admin/settings` 或配置文件管理;`compat` 与 `toolcall` 相关字段会被忽略。 -> 注意:`merge` 模式不会更新 `compat`、`auto_delete`、`current_input_file`。 +> 注意:`merge` 模式不会更新 `auto_delete`、`current_input_file`。 ### `GET /admin/config/export` diff --git a/config.example.json b/config.example.json index d8e683b..2737010 100644 --- a/config.example.json +++ b/config.example.json @@ -43,10 +43,6 @@ "gpt-5.3-codex": "deepseek-v4-pro", "o3": "deepseek-v4-pro" }, - "compat": { - "wide_input_strict_output": true, - "strip_reference_markers": true - }, "responses": { "store_ttl_seconds": 900 }, diff --git a/docs/ARCHITECTURE.en.md b/docs/ARCHITECTURE.en.md index ae06a46..aa93142 100644 --- a/docs/ARCHITECTURE.en.md +++ b/docs/ARCHITECTURE.en.md @@ -65,7 +65,7 @@ ds2api/ │ ├── textclean/ # Text cleanup │ ├── toolcall/ # Tool-call parsing and repair │ ├── toolstream/ # Go streaming tool-call anti-leak and delta detection -│ ├── translatorcliproxy/ # Cross-protocol translation bridge +│ ├── translatorcliproxy/ # Vercel/fallback/test protocol translation bridge │ ├── util/ # Shared utility helpers │ ├── version/ # Version query/compare │ └── webui/ # WebUI static hosting logic @@ -187,12 +187,12 @@ flowchart LR - `internal/server`: router tree + middlewares (health, protocol routes, Admin/WebUI). - `internal/httpapi/openai/*`: OpenAI HTTP surface split into chat, responses, files, embeddings, history, and shared packages; chat/responses share the promptcompat, stream, and toolcall semantics. -- `internal/httpapi/{claude,gemini}`: protocol adapters that normalize into the same prompt compatibility semantics; direct paths share DeepSeek session/PoW/completion execution through `completionruntime`, while Vercel/proxy paths can still translate through `translatorcliproxy` into the OpenAI handler. +- `internal/httpapi/{claude,gemini}`: protocol adapters that normalize into the same prompt compatibility semantics; normal direct paths must share DeepSeek session/PoW/completion execution through `completionruntime`, while `translatorcliproxy` is reserved for Vercel prepare/release, missing-backend fallback, and regression tests. - `internal/httpapi/requestbody`: shared HTTP body reading, JSON pre-validation, and UTF-8 error helpers across protocol adapters. - `internal/promptcompat`: compatibility core for turning OpenAI/Claude/Gemini requests into DeepSeek web-chat plain-text context. - `internal/assistantturn`: Go output-side canonical semantics, converting DeepSeek SSE collection results and stream finalization state into assistant turns and centralizing thinking, tool call, citation, usage, stop/error behavior. - `internal/completionruntime`: shared Go completion execution helpers for DeepSeek session/PoW/call startup, non-stream collection, and empty-output retry; streaming paths use it to start upstream requests, continue to use `internal/stream` for real-time consumption, and use `assistantturn` during finalization. -- `internal/translatorcliproxy`: structure translation between Claude/Gemini and OpenAI. +- `internal/translatorcliproxy`: bridge compatibility layer for Claude/Gemini and OpenAI shape translation; it is not the main business protocol conversion center. - `internal/deepseek/{client,protocol,transport}`: upstream requests, sessions, PoW adaptation, protocol constants, and transport details. - `internal/js/chat-stream` + `api/chat-stream.js`: Vercel Node streaming bridge; Go prepare/release owns auth, account lease, and completion payload assembly, while Node relays real-time SSE with Go-aligned finalization and tool sieve semantics. - `internal/stream` + `internal/sse`: Go stream parsing and incremental assembly. diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 61557d4..a4da59e 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -65,7 +65,7 @@ ds2api/ │ ├── textclean/ # 文本清洗 │ ├── toolcall/ # 工具调用解析与修复 │ ├── toolstream/ # Go 流式 tool call 防泄漏与增量检测 -│ ├── translatorcliproxy/ # 多协议互转桥 +│ ├── translatorcliproxy/ # Vercel/fallback/测试用协议互转桥 │ ├── util/ # 通用工具函数 │ ├── version/ # 版本查询/比较 │ └── webui/ # WebUI 静态托管相关逻辑 @@ -187,12 +187,12 @@ flowchart LR - `internal/server`:路由树和中间件挂载(健康检查、协议入口、Admin/WebUI)。 - `internal/httpapi/openai/*`:OpenAI HTTP surface,按 chat、responses、files、embeddings、history、shared 拆分;chat/responses 共享 promptcompat、stream、toolcall 等核心语义。 -- `internal/httpapi/{claude,gemini}`:协议输入输出适配,归一到同一套 prompt compatibility 语义;直连路径通过 `completionruntime` 共享 DeepSeek session/PoW/completion 调用,Vercel/代理路径仍可经 `translatorcliproxy` 转到 OpenAI handler。 +- `internal/httpapi/{claude,gemini}`:协议输入输出适配,归一到同一套 prompt compatibility 语义;正常直连路径必须通过 `completionruntime` 共享 DeepSeek session/PoW/completion 调用,`translatorcliproxy` 仅保留给 Vercel prepare/release、后端缺失 fallback 和回归测试。 - `internal/httpapi/requestbody`:跨协议复用的请求体读取、JSON 解码前置校验与 UTF-8 错误处理辅助。 - `internal/promptcompat`:OpenAI/Claude/Gemini 请求到 DeepSeek 网页纯文本上下文的兼容内核。 - `internal/assistantturn`:Go 输出侧统一语义层,把 DeepSeek SSE 收集结果和流式收尾状态归一成 assistant turn,集中处理 thinking、tool call、citation、usage、stop/error 语义。 - `internal/completionruntime`:Go surface 共享的 completion 执行辅助,负责 DeepSeek session/PoW/call 启动、非流式 collect 和 empty-output retry;流式路径复用它启动上游请求,继续用 `internal/stream` 做实时消费,并在最终收尾阶段接入 `assistantturn`。 -- `internal/translatorcliproxy`:Claude/Gemini 与 OpenAI 结构互转。 +- `internal/translatorcliproxy`:Claude/Gemini 与 OpenAI 结构互转的桥接兼容层,不作为主业务协议转换中心。 - `internal/deepseek/{client,protocol,transport}`:上游请求、会话、PoW 适配、协议常量与传输层。 - `internal/js/chat-stream` + `api/chat-stream.js`:Vercel Node 流式桥;Go prepare/release 管理鉴权、账号租约和 completion payload,Node 侧负责实时 SSE 转发并保持 Go 对齐的终结态和 tool sieve 语义。 - `internal/stream` + `internal/sse`:Go 流式解析与增量处理。 diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md index 4002e13..d3fe7cf 100644 --- a/docs/DEVELOPMENT.md +++ b/docs/DEVELOPMENT.md @@ -68,7 +68,7 @@ gofmt -w 3. 请求归一化:`internal/promptcompat` 或协议转换包。 4. 上游请求:`internal/deepseek/client`。 5. 流式输出:`internal/stream`、`internal/sse`、`internal/toolstream`。 -6. 响应格式:`internal/format/*` 或 `internal/translatorcliproxy`。 +6. 响应格式:主路径看 `internal/assistantturn` 与 `internal/format/*`;`internal/translatorcliproxy` 只用于 Vercel/fallback/test 桥接。 对话记录页面问题优先检查: diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md index 83d7763..8d18762 100644 --- a/docs/prompt-compatibility.md +++ b/docs/prompt-compatibility.md @@ -108,7 +108,7 @@ DS2API 当前的核心思路,不是把客户端传来的 `messages`、`tools` - 对外返回给客户端的 `prompt_tokens` / `input_tokens` / `promptTokenCount` 不再按“最后一条消息”或字符粗估近似返回,而是基于**完整上下文 prompt**做 tokenizer 计数;为了避免上下文实际超限但客户端误以为还能塞下,请求侧上下文 token 会额外保守上浮一点,宁可略大也不低估。 - 当前 `/v1/chat/completions` 业务路径仍是“每次请求新建一个远端 `chat_session_id`,并默认发送 `parent_message_id: null`”;因此 DS2API 对外默认表现为“新会话 + prompt 拼历史”,而不是复用 DeepSeek 原生会话树。 - 但 DeepSeek 远端本身支持同一 `chat_session_id` 的跨轮次持续对话。2026-04-27 已用项目内现有 DeepSeek client 做过一次不改业务代码的双轮实测:同一 `chat_session_id` 下,第 1 轮返回 `request_message_id=1` / `response_message_id=2` / 文本 `SESSION_TEST_ONE`;第 2 轮重新获取一次 PoW,并发送 `parent_message_id=2` 后,成功返回 `request_message_id=3` / `response_message_id=4` / 文本 `SESSION_TEST_TWO`。这说明“同远端会话持续聊天”能力存在,且每轮需要携带正确的 parent/message 链接信息,同时重新获取对应轮次可用的 PoW。 -- OpenAI Chat / Responses 原生走统一 OpenAI 标准化与 DeepSeek payload 组装;Claude / Gemini 会尽量复用 OpenAI prompt/tool 语义,其中 Gemini 直接复用 `promptcompat.BuildOpenAIPromptForAdapter`。Go 主服务新增 `completionruntime` 启动层,统一执行 DeepSeek session/PoW/call;输出侧新增 `assistantturn` 语义层:非流式 OpenAI Chat / Responses / Claude / Gemini 会把 DeepSeek SSE 收集结果先归一成同一份 assistant turn,再分别渲染成各协议原生外形;流式 OpenAI Chat / Responses / Claude / Gemini 继续保持各协议实时 SSE framing,但最终收尾的 tool fallback、schema 归一、usage、empty-output / content-filter 错误语义同样由 `assistantturn` 判定。Claude / Gemini 的常规 Go 主路径不再依赖内部 `httptest` 转发到 OpenAI handler;`translatorcliproxy` 仍保留用于 Vercel bridge、兼容工具和回归测试。 +- OpenAI Chat / Responses 原生走统一 OpenAI 标准化与 DeepSeek payload 组装;Claude / Gemini 会尽量复用 OpenAI prompt/tool 语义,其中 Gemini 直接复用 `promptcompat.BuildOpenAIPromptForAdapter`。Go 主服务新增 `completionruntime` 启动层,统一执行 DeepSeek session/PoW/call;输出侧新增 `assistantturn` 语义层:非流式 OpenAI Chat / Responses / Claude / Gemini 会把 DeepSeek SSE 收集结果先归一成同一份 assistant turn,再分别渲染成各协议原生外形;流式 OpenAI Chat / Responses / Claude / Gemini 继续保持各协议实时 SSE framing,但最终收尾的 tool fallback、schema 归一、usage、empty-output / content-filter 错误语义同样由 `assistantturn` 判定。Claude / Gemini 的常规 Go 主路径不再依赖内部 `httptest` 转发到 OpenAI handler;`translatorcliproxy` 仅保留用于 Vercel bridge、后端缺失 fallback 和回归测试,不作为主业务协议转换中心。 - Vercel Node 流式路径本轮不迁移,仍使用现有 Node bridge / stream-tool-sieve 实现;后续若变更 Node 流式语义,需要按 `assistantturn` 的 Go canonical 输出语义同步对齐。 - 客户端传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`。Gemini `generationConfig.thinkingConfig.thinkingBudget` 会翻译成同一套 thinking 开关;关闭时即使上游返回 `response/thinking_content`,兼容层也不会把它当作可见正文输出。若最终解析出的模型名带 `-nothinking` 后缀,则会无条件强制关闭 thinking,优先级高于请求体中的 `thinking` / `reasoning` / `reasoning_effort`。未显式关闭时,各 surface 会按解析后的 DeepSeek 模型默认能力开启 thinking,并用各自协议的原生形态暴露:OpenAI Chat 为 `reasoning_content`,OpenAI Responses 为 `response.reasoning.delta` / `reasoning` content,Claude 为 `thinking` block / `thinking_delta`,Gemini 为 `thought: true` part。 - 对 OpenAI Chat / Responses 的非流式收尾,如果最终可见正文为空,兼容层会优先尝试把思维链中的独立 DSML / XML 工具块当作真实工具调用解析出来。流式链路也会在收尾阶段做同样的 fallback 检测,但不会因为思维链内容去中途拦截或改写流式输出;真正的工具识别始终基于原始上游文本,而不是基于“已经做过可见输出清洗”的版本,因此即使最终可见层会剥离完整 leaked DSML / XML `tool_calls` wrapper、并抑制全空参数或无效 wrapper 块,也不会影响真实工具调用转成结构化 `tool_calls` / `function_call`。补发结果会作为本轮 assistant 的结构化 `tool_calls` / `function_call` 输出返回,而不是塞进 `content` 文本;如果客户端没有开启 thinking / reasoning,思维链只用于检测,不会作为 `reasoning_content` 或可见正文暴露。只有正文为空且思维链里也没有可执行工具调用时,才继续按空回复错误处理。 diff --git a/internal/assistantturn/turn.go b/internal/assistantturn/turn.go index 4f8b36d..a115b17 100644 --- a/internal/assistantturn/turn.go +++ b/internal/assistantturn/turn.go @@ -51,6 +51,20 @@ type Turn struct { Error *OutputError } +type FinalizeOptions struct { + AlreadyEmittedToolCalls bool +} + +type FinalOutcome struct { + FinishReason string + Error *OutputError + Usage Usage + HasToolCalls bool + HasVisibleText bool + HasVisibleOutput bool + ShouldFail bool +} + type BuildOptions struct { Model string Prompt string @@ -215,6 +229,48 @@ func ShouldRetryEmptyOutput(turn Turn, attempts, maxAttempts int) bool { strings.TrimSpace(turn.Thinking) == "" } +func FinalizeTurn(turn Turn, opts FinalizeOptions) FinalOutcome { + hasToolCalls := len(turn.ToolCalls) > 0 || opts.AlreadyEmittedToolCalls + hasVisibleText := strings.TrimSpace(turn.Text) != "" + hasVisibleThinking := strings.TrimSpace(turn.Thinking) != "" + err := turn.Error + if hasToolCalls { + err = nil + } + finishReason := FinishReason(turn) + if hasToolCalls { + finishReason = "tool_calls" + } + return FinalOutcome{ + FinishReason: finishReason, + Error: err, + Usage: turn.Usage, + HasToolCalls: hasToolCalls, + HasVisibleText: hasVisibleText, + HasVisibleOutput: hasVisibleText || hasVisibleThinking || hasToolCalls, + ShouldFail: err != nil, + } +} + +func OpenAIChatUsage(turn Turn) map[string]any { + return map[string]any{ + "prompt_tokens": turn.Usage.InputTokens, + "completion_tokens": turn.Usage.OutputTokens, + "total_tokens": turn.Usage.TotalTokens, + "completion_tokens_details": map[string]any{ + "reasoning_tokens": turn.Usage.ReasoningTokens, + }, + } +} + +func OpenAIResponsesUsage(turn Turn) map[string]any { + return map[string]any{ + "input_tokens": turn.Usage.InputTokens, + "output_tokens": turn.Usage.OutputTokens, + "total_tokens": turn.Usage.TotalTokens, + } +} + func FinishReason(turn Turn) string { switch turn.StopReason { case StopReasonToolCalls: diff --git a/internal/assistantturn/turn_test.go b/internal/assistantturn/turn_test.go index 4aca558..15565ce 100644 --- a/internal/assistantturn/turn_test.go +++ b/internal/assistantturn/turn_test.go @@ -98,3 +98,30 @@ func TestBuildTurnFromStreamSnapshotAlreadyEmittedToolAvoidsEmptyError(t *testin t.Fatalf("stop reason mismatch: %q", turn.StopReason) } } + +func TestFinalizeTurnStopOutcome(t *testing.T) { + turn := BuildTurnFromCollected(sse.CollectResult{Text: "hello"}, BuildOptions{}) + outcome := FinalizeTurn(turn, FinalizeOptions{}) + if outcome.ShouldFail { + t.Fatalf("unexpected failure: %#v", outcome.Error) + } + if outcome.FinishReason != "stop" || !outcome.HasVisibleText || !outcome.HasVisibleOutput { + t.Fatalf("unexpected outcome: %#v", outcome) + } +} + +func TestFinalizeTurnToolCallsOutcome(t *testing.T) { + turn := BuildTurnFromStreamSnapshot(StreamSnapshot{AlreadyEmittedCalls: true}, BuildOptions{}) + outcome := FinalizeTurn(turn, FinalizeOptions{AlreadyEmittedToolCalls: true}) + if outcome.ShouldFail || outcome.FinishReason != "tool_calls" || !outcome.HasToolCalls { + t.Fatalf("unexpected tool outcome: %#v", outcome) + } +} + +func TestFinalizeTurnContentFilterOutcome(t *testing.T) { + turn := BuildTurnFromCollected(sse.CollectResult{ContentFilter: true}, BuildOptions{}) + outcome := FinalizeTurn(turn, FinalizeOptions{}) + if !outcome.ShouldFail || outcome.Error == nil || outcome.Error.Code != "content_filter" { + t.Fatalf("expected content filter failure, got %#v", outcome) + } +} diff --git a/internal/config/codec.go b/internal/config/codec.go index ac9086d..3e918a1 100644 --- a/internal/config/codec.go +++ b/internal/config/codec.go @@ -35,9 +35,6 @@ func (c Config) MarshalJSON() ([]byte, error) { if c.Runtime.AccountMaxInflight > 0 || c.Runtime.AccountMaxQueue > 0 || c.Runtime.GlobalMaxInflight > 0 || c.Runtime.TokenRefreshIntervalHours > 0 { m["runtime"] = c.Runtime } - if c.Compat.WideInputStrictOutput != nil || c.Compat.StripReferenceMarkers != nil { - m["compat"] = c.Compat - } if c.Responses.StoreTTLSeconds > 0 { m["responses"] = c.Responses } @@ -100,9 +97,7 @@ func (c *Config) UnmarshalJSON(b []byte) error { return fmt.Errorf("invalid field %q: %w", k, err) } case "compat": - if err := json.Unmarshal(v, &c.Compat); err != nil { - return fmt.Errorf("invalid field %q: %w", k, err) - } + // Removed field ignored instead of persisted. case "toolcall": // Legacy field ignored. Toolcall policy is fixed and no longer configurable. case "responses": @@ -155,13 +150,9 @@ func (c Config) Clone() Config { ModelAliases: cloneStringMap(c.ModelAliases), Admin: c.Admin, Runtime: c.Runtime, - Compat: CompatConfig{ - WideInputStrictOutput: cloneBoolPtr(c.Compat.WideInputStrictOutput), - StripReferenceMarkers: cloneBoolPtr(c.Compat.StripReferenceMarkers), - }, - Responses: c.Responses, - Embeddings: c.Embeddings, - AutoDelete: c.AutoDelete, + Responses: c.Responses, + Embeddings: c.Embeddings, + AutoDelete: c.AutoDelete, CurrentInputFile: CurrentInputFileConfig{ Enabled: cloneBoolPtr(c.CurrentInputFile.Enabled), MinChars: c.CurrentInputFile.MinChars, diff --git a/internal/config/config.go b/internal/config/config.go index 8754197..b63bd5d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -15,7 +15,6 @@ type Config struct { ModelAliases map[string]string `json:"model_aliases,omitempty"` Admin AdminConfig `json:"admin,omitempty"` Runtime RuntimeConfig `json:"runtime,omitempty"` - Compat CompatConfig `json:"compat,omitempty"` Responses ResponsesConfig `json:"responses,omitempty"` Embeddings EmbeddingsConfig `json:"embeddings,omitempty"` AutoDelete AutoDeleteConfig `json:"auto_delete"` @@ -141,11 +140,6 @@ func (c *Config) normalizeModelAliases() { } } -type CompatConfig struct { - WideInputStrictOutput *bool `json:"wide_input_strict_output,omitempty"` - StripReferenceMarkers *bool `json:"strip_reference_markers,omitempty"` -} - type AdminConfig struct { PasswordHash string `json:"password_hash,omitempty"` JWTExpireHours int `json:"jwt_expire_hours,omitempty"` diff --git a/internal/config/config_edge_test.go b/internal/config/config_edge_test.go index 88cf740..b87154e 100644 --- a/internal/config/config_edge_test.go +++ b/internal/config/config_edge_test.go @@ -163,8 +163,6 @@ func TestLowerFunction(t *testing.T) { // ─── Config.MarshalJSON / UnmarshalJSON roundtrip ──────────────────── func TestConfigJSONRoundtrip(t *testing.T) { - trueVal := true - falseVal := false cfg := Config{ Keys: []string{"key1", "key2"}, Accounts: []Account{{Email: "user@example.com", Password: "pass", Token: "tok"}}, @@ -175,10 +173,6 @@ func TestConfigJSONRoundtrip(t *testing.T) { Runtime: RuntimeConfig{ TokenRefreshIntervalHours: 12, }, - Compat: CompatConfig{ - WideInputStrictOutput: &trueVal, - StripReferenceMarkers: &falseVal, - }, VercelSyncHash: "hash123", VercelSyncTime: 1234567890, AdditionalFields: map[string]any{ @@ -211,12 +205,6 @@ func TestConfigJSONRoundtrip(t *testing.T) { if decoded.AutoDelete.Mode != "single" { t.Fatalf("unexpected auto delete mode: %#v", decoded.AutoDelete.Mode) } - if decoded.Compat.WideInputStrictOutput == nil || !*decoded.Compat.WideInputStrictOutput { - t.Fatalf("unexpected compat wide_input_strict_output: %#v", decoded.Compat.WideInputStrictOutput) - } - if decoded.Compat.StripReferenceMarkers == nil || *decoded.Compat.StripReferenceMarkers { - t.Fatalf("unexpected compat strip_reference_markers: %#v", decoded.Compat.StripReferenceMarkers) - } if decoded.VercelSyncHash != "hash123" { t.Fatalf("unexpected vercel sync hash: %q", decoded.VercelSyncHash) } @@ -301,14 +289,10 @@ func TestConfigUnmarshalJSONIgnoresRemovedHistorySplit(t *testing.T) { // ─── Config.Clone ──────────────────────────────────────────────────── func TestConfigCloneIsDeepCopy(t *testing.T) { - falseVal := false cfg := Config{ - Keys: []string{"key1"}, - Accounts: []Account{{Email: "user@test.com", Token: "token"}}, - ModelAliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}, - Compat: CompatConfig{ - StripReferenceMarkers: &falseVal, - }, + Keys: []string{"key1"}, + Accounts: []Account{{Email: "user@test.com", Token: "token"}}, + ModelAliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}, AdditionalFields: map[string]any{"custom": "value"}, } @@ -318,9 +302,6 @@ func TestConfigCloneIsDeepCopy(t *testing.T) { cfg.Keys[0] = "modified" cfg.Accounts[0].Email = "modified@test.com" cfg.ModelAliases["claude-sonnet-4-6"] = "modified-model" - if cfg.Compat.StripReferenceMarkers != nil { - *cfg.Compat.StripReferenceMarkers = true - } // Cloned should not be affected if cloned.Keys[0] != "key1" { @@ -332,9 +313,6 @@ func TestConfigCloneIsDeepCopy(t *testing.T) { if cloned.ModelAliases["claude-sonnet-4-6"] != "deepseek-v4-flash" { t.Fatalf("clone model aliases was affected: %#v", cloned.ModelAliases) } - if cloned.Compat.StripReferenceMarkers == nil || *cloned.Compat.StripReferenceMarkers { - t.Fatalf("clone compat was affected: %#v", cloned.Compat.StripReferenceMarkers) - } } func TestConfigCloneNilMaps(t *testing.T) { @@ -473,53 +451,9 @@ func TestStoreFindAccountNotFound(t *testing.T) { } } -func TestStoreCompatWideInputStrictOutputDefaultTrue(t *testing.T) { - t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[]}`) - store := LoadStore() - if !store.CompatWideInputStrictOutput() { - t.Fatal("expected default wide_input_strict_output=true when unset") - } -} - -func TestStoreCompatWideInputStrictOutputCanDisable(t *testing.T) { - t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[],"compat":{"wide_input_strict_output":false}}`) - store := LoadStore() - if store.CompatWideInputStrictOutput() { - t.Fatal("expected wide_input_strict_output=false when explicitly configured") - } - - snap := store.Snapshot() - data, err := snap.MarshalJSON() - if err != nil { - t.Fatalf("marshal failed: %v", err) - } - var out map[string]any - if err := json.Unmarshal(data, &out); err != nil { - t.Fatalf("decode failed: %v", err) - } - rawCompat, ok := out["compat"].(map[string]any) - if !ok { - t.Fatalf("expected compat in marshaled output, got %#v", out) - } - if rawCompat["wide_input_strict_output"] != false { - t.Fatalf("expected explicit false in compat, got %#v", rawCompat) - } -} - -func TestStoreCompatStripReferenceMarkersDefaultTrue(t *testing.T) { - t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[]}`) - store := LoadStore() - if !store.CompatStripReferenceMarkers() { - t.Fatal("expected default strip_reference_markers=true when unset") - } -} - -func TestStoreCompatStripReferenceMarkersCanDisable(t *testing.T) { +func TestStoreIgnoresRemovedCompatConfig(t *testing.T) { t.Setenv("DS2API_CONFIG_JSON", `{"keys":["k1"],"accounts":[],"compat":{"strip_reference_markers":false}}`) store := LoadStore() - if store.CompatStripReferenceMarkers() { - t.Fatal("expected strip_reference_markers=false when explicitly configured") - } snap := store.Snapshot() data, err := snap.MarshalJSON() @@ -530,12 +464,8 @@ func TestStoreCompatStripReferenceMarkersCanDisable(t *testing.T) { if err := json.Unmarshal(data, &out); err != nil { t.Fatalf("decode failed: %v", err) } - rawCompat, ok := out["compat"].(map[string]any) - if !ok { - t.Fatalf("expected compat in marshaled output, got %#v", out) - } - if rawCompat["strip_reference_markers"] != false { - t.Fatalf("expected explicit false in compat, got %#v", rawCompat) + if _, ok := out["compat"]; ok { + t.Fatalf("expected removed compat field not to marshal, got %#v", out) } } diff --git a/internal/config/store_accessors.go b/internal/config/store_accessors.go index dc7ae57..41fe8fb 100644 --- a/internal/config/store_accessors.go +++ b/internal/config/store_accessors.go @@ -21,24 +21,6 @@ func (s *Store) ModelAliases() map[string]string { return out } -func (s *Store) CompatWideInputStrictOutput() bool { - s.mu.RLock() - defer s.mu.RUnlock() - if s.cfg.Compat.WideInputStrictOutput == nil { - return true - } - return *s.cfg.Compat.WideInputStrictOutput -} - -func (s *Store) CompatStripReferenceMarkers() bool { - s.mu.RLock() - defer s.mu.RUnlock() - if s.cfg.Compat.StripReferenceMarkers == nil { - return true - } - return *s.cfg.Compat.StripReferenceMarkers -} - func (s *Store) ToolcallMode() string { return "feature_match" } diff --git a/internal/httpapi/admin/settings/handler_settings_parse.go b/internal/httpapi/admin/settings/handler_settings_parse.go index 2617503..f507287 100644 --- a/internal/httpapi/admin/settings/handler_settings_parse.go +++ b/internal/httpapi/admin/settings/handler_settings_parse.go @@ -21,11 +21,10 @@ func boolFrom(v any) bool { } } -func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *config.RuntimeConfig, *config.CompatConfig, *config.ResponsesConfig, *config.EmbeddingsConfig, *config.AutoDeleteConfig, *config.CurrentInputFileConfig, *config.ThinkingInjectionConfig, map[string]string, error) { +func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *config.RuntimeConfig, *config.ResponsesConfig, *config.EmbeddingsConfig, *config.AutoDeleteConfig, *config.CurrentInputFileConfig, *config.ThinkingInjectionConfig, map[string]string, error) { var ( adminCfg *config.AdminConfig runtimeCfg *config.RuntimeConfig - compatCfg *config.CompatConfig respCfg *config.ResponsesConfig embCfg *config.EmbeddingsConfig autoDeleteCfg *config.AutoDeleteConfig @@ -39,7 +38,7 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi if v, exists := raw["jwt_expire_hours"]; exists { n := intFrom(v) if err := config.ValidateIntRange("admin.jwt_expire_hours", n, 1, 720, true); err != nil { - return nil, nil, nil, nil, nil, nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, nil, nil, nil, err } cfg.JWTExpireHours = n } @@ -51,56 +50,43 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi if v, exists := raw["account_max_inflight"]; exists { n := intFrom(v) if err := config.ValidateIntRange("runtime.account_max_inflight", n, 1, 256, true); err != nil { - return nil, nil, nil, nil, nil, nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, nil, nil, nil, err } cfg.AccountMaxInflight = n } if v, exists := raw["account_max_queue"]; exists { n := intFrom(v) if err := config.ValidateIntRange("runtime.account_max_queue", n, 1, 200000, true); err != nil { - return nil, nil, nil, nil, nil, nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, nil, nil, nil, err } cfg.AccountMaxQueue = n } if v, exists := raw["global_max_inflight"]; exists { n := intFrom(v) if err := config.ValidateIntRange("runtime.global_max_inflight", n, 1, 200000, true); err != nil { - return nil, nil, nil, nil, nil, nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, nil, nil, nil, err } cfg.GlobalMaxInflight = n } if v, exists := raw["token_refresh_interval_hours"]; exists { n := intFrom(v) if err := config.ValidateIntRange("runtime.token_refresh_interval_hours", n, 1, 720, true); err != nil { - return nil, nil, nil, nil, nil, nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, nil, nil, nil, err } cfg.TokenRefreshIntervalHours = n } if cfg.AccountMaxInflight > 0 && cfg.GlobalMaxInflight > 0 && cfg.GlobalMaxInflight < cfg.AccountMaxInflight { - return nil, nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight") + return nil, nil, nil, nil, nil, nil, nil, nil, fmt.Errorf("runtime.global_max_inflight must be >= runtime.account_max_inflight") } runtimeCfg = cfg } - if raw, ok := req["compat"].(map[string]any); ok { - cfg := &config.CompatConfig{} - if v, exists := raw["wide_input_strict_output"]; exists { - b := boolFrom(v) - cfg.WideInputStrictOutput = &b - } - if v, exists := raw["strip_reference_markers"]; exists { - b := boolFrom(v) - cfg.StripReferenceMarkers = &b - } - compatCfg = cfg - } - if raw, ok := req["responses"].(map[string]any); ok { cfg := &config.ResponsesConfig{} if v, exists := raw["store_ttl_seconds"]; exists { n := intFrom(v) if err := config.ValidateIntRange("responses.store_ttl_seconds", n, 30, 86400, true); err != nil { - return nil, nil, nil, nil, nil, nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, nil, nil, nil, err } cfg.StoreTTLSeconds = n } @@ -112,7 +98,7 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi if v, exists := raw["provider"]; exists { p := strings.TrimSpace(fmt.Sprintf("%v", v)) if err := config.ValidateTrimmedString("embeddings.provider", p, false); err != nil { - return nil, nil, nil, nil, nil, nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, nil, nil, nil, err } cfg.Provider = p } @@ -138,7 +124,7 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi if v, exists := raw["mode"]; exists { mode := strings.ToLower(strings.TrimSpace(fmt.Sprintf("%v", v))) if err := config.ValidateAutoDeleteMode(mode); err != nil { - return nil, nil, nil, nil, nil, nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, nil, nil, nil, err } if mode == "" { mode = "none" @@ -160,12 +146,12 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi if v, exists := raw["min_chars"]; exists { n := intFrom(v) if err := config.ValidateIntRange("current_input_file.min_chars", n, 0, 100000000, true); err != nil { - return nil, nil, nil, nil, nil, nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, nil, nil, nil, err } cfg.MinChars = n } if err := config.ValidateCurrentInputFileConfig(*cfg); err != nil { - return nil, nil, nil, nil, nil, nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, nil, nil, nil, err } currentInputCfg = cfg } @@ -182,5 +168,5 @@ func parseSettingsUpdateRequest(req map[string]any) (*config.AdminConfig, *confi thinkingInjCfg = cfg } - return adminCfg, runtimeCfg, compatCfg, respCfg, embCfg, autoDeleteCfg, currentInputCfg, thinkingInjCfg, aliasMap, nil + return adminCfg, runtimeCfg, respCfg, embCfg, autoDeleteCfg, currentInputCfg, thinkingInjCfg, aliasMap, nil } diff --git a/internal/httpapi/admin/settings/handler_settings_read.go b/internal/httpapi/admin/settings/handler_settings_read.go index 1997b01..1e5e6b4 100644 --- a/internal/httpapi/admin/settings/handler_settings_read.go +++ b/internal/httpapi/admin/settings/handler_settings_read.go @@ -27,7 +27,6 @@ func (h *Handler) getSettings(w http.ResponseWriter, _ *http.Request) { "global_max_inflight": h.Store.RuntimeGlobalMaxInflight(recommended), "token_refresh_interval_hours": h.Store.RuntimeTokenRefreshIntervalHours(), }, - "compat": snap.Compat, "responses": snap.Responses, "embeddings": snap.Embeddings, "auto_delete": snap.AutoDelete, diff --git a/internal/httpapi/admin/settings/handler_settings_write.go b/internal/httpapi/admin/settings/handler_settings_write.go index bb740f5..3c8d143 100644 --- a/internal/httpapi/admin/settings/handler_settings_write.go +++ b/internal/httpapi/admin/settings/handler_settings_write.go @@ -17,7 +17,7 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) { return } - adminCfg, runtimeCfg, compatCfg, responsesCfg, embeddingsCfg, autoDeleteCfg, currentInputCfg, thinkingInjCfg, aliasMap, err := parseSettingsUpdateRequest(req) + adminCfg, runtimeCfg, responsesCfg, embeddingsCfg, autoDeleteCfg, currentInputCfg, thinkingInjCfg, aliasMap, err := parseSettingsUpdateRequest(req) if err != nil { writeJSON(w, http.StatusBadRequest, map[string]any{"detail": err.Error()}) return @@ -53,14 +53,6 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) { c.Runtime.TokenRefreshIntervalHours = runtimeCfg.TokenRefreshIntervalHours } } - if compatCfg != nil { - if compatCfg.WideInputStrictOutput != nil { - c.Compat.WideInputStrictOutput = compatCfg.WideInputStrictOutput - } - if compatCfg.StripReferenceMarkers != nil { - c.Compat.StripReferenceMarkers = compatCfg.StripReferenceMarkers - } - } if responsesCfg != nil && responsesCfg.StoreTTLSeconds > 0 { c.Responses.StoreTTLSeconds = responsesCfg.StoreTTLSeconds } diff --git a/internal/httpapi/admin/shared/deps.go b/internal/httpapi/admin/shared/deps.go index a2df124..e063ae1 100644 --- a/internal/httpapi/admin/shared/deps.go +++ b/internal/httpapi/admin/shared/deps.go @@ -37,7 +37,6 @@ type ConfigStore interface { CurrentInputFileMinChars() int ThinkingInjectionEnabled() bool ThinkingInjectionPrompt() string - CompatStripReferenceMarkers() bool AutoDeleteSessions() bool } diff --git a/internal/httpapi/claude/deps.go b/internal/httpapi/claude/deps.go index 8ca98b1..70e56c3 100644 --- a/internal/httpapi/claude/deps.go +++ b/internal/httpapi/claude/deps.go @@ -23,7 +23,6 @@ type DeepSeekCaller interface { type ConfigReader interface { ModelAliases() map[string]string - CompatStripReferenceMarkers() bool CurrentInputFileEnabled() bool CurrentInputFileMinChars() int } diff --git a/internal/httpapi/claude/deps_injection_test.go b/internal/httpapi/claude/deps_injection_test.go index e6da543..5fee82f 100644 --- a/internal/httpapi/claude/deps_injection_test.go +++ b/internal/httpapi/claude/deps_injection_test.go @@ -7,7 +7,6 @@ type mockClaudeConfig struct { } func (m mockClaudeConfig) ModelAliases() map[string]string { return m.aliases } -func (mockClaudeConfig) CompatStripReferenceMarkers() bool { return true } func (mockClaudeConfig) CurrentInputFileEnabled() bool { return true } func (mockClaudeConfig) CurrentInputFileMinChars() int { return 0 } diff --git a/internal/httpapi/claude/handler_messages.go b/internal/httpapi/claude/handler_messages.go index d0bc8ca..8c6f063 100644 --- a/internal/httpapi/claude/handler_messages.go +++ b/internal/httpapi/claude/handler_messages.go @@ -84,7 +84,7 @@ func (h *Handler) handleClaudeDirect(w http.ResponseWriter, r *http.Request) boo return true } result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, norm.Standard, completionruntime.Options{ - StripReferenceMarkers: h.compatStripReferenceMarkers(), + StripReferenceMarkers: stripReferenceMarkersEnabled(), RetryEnabled: true, CurrentInputFile: h.Store, }) @@ -290,7 +290,7 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ messages, thinkingEnabled, searchEnabled, - h.compatStripReferenceMarkers(), + stripReferenceMarkersEnabled(), toolNames, toolsRaw, buildClaudePromptTokenText(messages, thinkingEnabled), diff --git a/internal/httpapi/claude/handler_routes.go b/internal/httpapi/claude/handler_routes.go index 390b97d..548be2e 100644 --- a/internal/httpapi/claude/handler_routes.go +++ b/internal/httpapi/claude/handler_routes.go @@ -21,11 +21,8 @@ type Handler struct { OpenAI OpenAIChatRunner } -func (h *Handler) compatStripReferenceMarkers() bool { - if h == nil || h.Store == nil { - return true - } - return h.Store.CompatStripReferenceMarkers() +func stripReferenceMarkersEnabled() bool { + return true } var ( diff --git a/internal/httpapi/claude/proxy_vercel_test.go b/internal/httpapi/claude/proxy_vercel_test.go index 4fbbbfe..3a879df 100644 --- a/internal/httpapi/claude/proxy_vercel_test.go +++ b/internal/httpapi/claude/proxy_vercel_test.go @@ -14,9 +14,8 @@ type claudeProxyStoreStub struct { func (s claudeProxyStoreStub) ModelAliases() map[string]string { return s.aliases } -func (claudeProxyStoreStub) CompatStripReferenceMarkers() bool { return true } -func (claudeProxyStoreStub) CurrentInputFileEnabled() bool { return true } -func (claudeProxyStoreStub) CurrentInputFileMinChars() int { return 0 } +func (claudeProxyStoreStub) CurrentInputFileEnabled() bool { return true } +func (claudeProxyStoreStub) CurrentInputFileMinChars() int { return 0 } type openAIProxyStub struct { status int diff --git a/internal/httpapi/claude/stream_runtime_finalize.go b/internal/httpapi/claude/stream_runtime_finalize.go index 89e7c9f..1d4e512 100644 --- a/internal/httpapi/claude/stream_runtime_finalize.go +++ b/internal/httpapi/claude/stream_runtime_finalize.go @@ -133,6 +133,9 @@ func (s *claudeStreamRuntime) finalize(stopReason string) { ToolsRaw: s.toolsRaw, }) finalText := turn.Text + outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{ + AlreadyEmittedToolCalls: s.toolCallsDetected, + }) if s.bufferToolContent && !s.toolCallsDetected { if len(turn.ToolCalls) > 0 { @@ -169,7 +172,7 @@ func (s *claudeStreamRuntime) finalize(stopReason string) { } } - if s.toolCallsDetected { + if outcome.HasToolCalls { stopReason = "tool_use" } @@ -180,7 +183,7 @@ func (s *claudeStreamRuntime) finalize(stopReason string) { "stop_sequence": nil, }, "usage": map[string]any{ - "output_tokens": turn.Usage.OutputTokens, + "output_tokens": outcome.Usage.OutputTokens, }, }) s.send("message_stop", map[string]any{"type": "message_stop"}) diff --git a/internal/httpapi/claude/stream_status_test.go b/internal/httpapi/claude/stream_status_test.go index 96e5858..a2cabe8 100644 --- a/internal/httpapi/claude/stream_status_test.go +++ b/internal/httpapi/claude/stream_status_test.go @@ -23,9 +23,8 @@ type streamStatusClaudeStoreStub struct{} func (streamStatusClaudeStoreStub) ModelAliases() map[string]string { return nil } -func (streamStatusClaudeStoreStub) CompatStripReferenceMarkers() bool { return true } -func (streamStatusClaudeStoreStub) CurrentInputFileEnabled() bool { return true } -func (streamStatusClaudeStoreStub) CurrentInputFileMinChars() int { return 0 } +func (streamStatusClaudeStoreStub) CurrentInputFileEnabled() bool { return true } +func (streamStatusClaudeStoreStub) CurrentInputFileMinChars() int { return 0 } func captureClaudeStatusMiddleware(statuses *[]int) func(http.Handler) http.Handler { return func(next http.Handler) http.Handler { diff --git a/internal/httpapi/gemini/deps.go b/internal/httpapi/gemini/deps.go index f99edbc..028c194 100644 --- a/internal/httpapi/gemini/deps.go +++ b/internal/httpapi/gemini/deps.go @@ -23,7 +23,6 @@ type DeepSeekCaller interface { type ConfigReader interface { ModelAliases() map[string]string - CompatStripReferenceMarkers() bool CurrentInputFileEnabled() bool CurrentInputFileMinChars() int } diff --git a/internal/httpapi/gemini/handler_generate.go b/internal/httpapi/gemini/handler_generate.go index 53effe6..4945d6e 100644 --- a/internal/httpapi/gemini/handler_generate.go +++ b/internal/httpapi/gemini/handler_generate.go @@ -81,7 +81,7 @@ func (h *Handler) handleGeminiDirect(w http.ResponseWriter, r *http.Request, str return true } result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{ - StripReferenceMarkers: h.compatStripReferenceMarkers(), + StripReferenceMarkers: stripReferenceMarkersEnabled(), RetryEnabled: true, CurrentInputFile: h.Store, }) @@ -294,7 +294,7 @@ func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *ht } result := sse.CollectStream(resp, thinkingEnabled, true) - stripReferenceMarkers := h.compatStripReferenceMarkers() + stripReferenceMarkers := stripReferenceMarkersEnabled() writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponse( model, finalPrompt, diff --git a/internal/httpapi/gemini/handler_routes.go b/internal/httpapi/gemini/handler_routes.go index fb573da..13a2570 100644 --- a/internal/httpapi/gemini/handler_routes.go +++ b/internal/httpapi/gemini/handler_routes.go @@ -18,11 +18,8 @@ type Handler struct { } //nolint:unused // used by native Gemini stream/non-stream runtime helpers. -func (h *Handler) compatStripReferenceMarkers() bool { - if h == nil || h.Store == nil { - return true - } - return h.Store.CompatStripReferenceMarkers() +func stripReferenceMarkersEnabled() bool { + return true } func RegisterRoutes(r chi.Router, h *Handler) { diff --git a/internal/httpapi/gemini/handler_stream_runtime.go b/internal/httpapi/gemini/handler_stream_runtime.go index 523c127..c005d92 100644 --- a/internal/httpapi/gemini/handler_stream_runtime.go +++ b/internal/httpapi/gemini/handler_stream_runtime.go @@ -29,7 +29,7 @@ func (h *Handler) handleStreamGenerateContent(w http.ResponseWriter, r *http.Req rc := http.NewResponseController(w) _, canFlush := w.(http.Flusher) - runtime := newGeminiStreamRuntime(w, rc, canFlush, model, finalPrompt, thinkingEnabled, searchEnabled, h.compatStripReferenceMarkers(), toolNames, toolsRaw) + runtime := newGeminiStreamRuntime(w, rc, canFlush, model, finalPrompt, thinkingEnabled, searchEnabled, stripReferenceMarkersEnabled(), toolNames, toolsRaw) initialType := "text" if thinkingEnabled { @@ -192,6 +192,7 @@ func (s *geminiStreamRuntime) finalize() { ToolNames: s.toolNames, ToolsRaw: s.toolsRaw, }) + outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{}) if s.bufferContent { parts := buildGeminiPartsFromTurn(turn) @@ -224,9 +225,9 @@ func (s *geminiStreamRuntime) finalize() { }, "modelVersion": s.model, "usageMetadata": map[string]any{ - "promptTokenCount": turn.Usage.InputTokens, - "candidatesTokenCount": turn.Usage.OutputTokens, - "totalTokenCount": turn.Usage.TotalTokens, + "promptTokenCount": outcome.Usage.InputTokens, + "candidatesTokenCount": outcome.Usage.OutputTokens, + "totalTokenCount": outcome.Usage.TotalTokens, }, }) } diff --git a/internal/httpapi/gemini/handler_test.go b/internal/httpapi/gemini/handler_test.go index d674485..a7e974b 100644 --- a/internal/httpapi/gemini/handler_test.go +++ b/internal/httpapi/gemini/handler_test.go @@ -18,10 +18,9 @@ import ( type testGeminiConfig struct{} -func (testGeminiConfig) ModelAliases() map[string]string { return nil } -func (testGeminiConfig) CompatStripReferenceMarkers() bool { return true } -func (testGeminiConfig) CurrentInputFileEnabled() bool { return true } -func (testGeminiConfig) CurrentInputFileMinChars() int { return 0 } +func (testGeminiConfig) ModelAliases() map[string]string { return nil } +func (testGeminiConfig) CurrentInputFileEnabled() bool { return true } +func (testGeminiConfig) CurrentInputFileMinChars() int { return 0 } type testGeminiAuth struct { a *auth.RequestAuth diff --git a/internal/httpapi/openai/chat/chat_history_test.go b/internal/httpapi/openai/chat/chat_history_test.go index e0c47fc..246abfa 100644 --- a/internal/httpapi/openai/chat/chat_history_test.go +++ b/internal/httpapi/openai/chat/chat_history_test.go @@ -57,7 +57,7 @@ func blockChatHistoryDetailDir(t *testing.T, detailDir string) func() { func TestChatCompletionsNonStreamPersistsHistory(t *testing.T) { historyStore := newTestChatHistoryStore(t) h := &Handler{ - Store: mockOpenAIConfig{wideInput: true}, + Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: streamStatusDSStub{resp: makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"hello world"}`, `data: [DONE]`)}, ChatHistory: historyStore, @@ -216,7 +216,7 @@ func TestHandleStreamContextCancelledMarksHistoryStopped(t *testing.T) { func TestChatCompletionsSkipsAdminWebUISource(t *testing.T) { historyStore := newTestChatHistoryStore(t) h := &Handler{ - Store: mockOpenAIConfig{wideInput: true}, + Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: streamStatusDSStub{resp: makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"hello world"}`, `data: [DONE]`)}, ChatHistory: historyStore, @@ -248,7 +248,7 @@ func TestChatCompletionsSkipsHistoryWhenDisabled(t *testing.T) { t.Fatalf("disable history store failed: %v", err) } h := &Handler{ - Store: mockOpenAIConfig{wideInput: true}, + Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: streamStatusDSStub{resp: makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"hello world"}`, `data: [DONE]`)}, ChatHistory: historyStore, @@ -278,7 +278,6 @@ func TestChatCompletionsCurrentInputFilePersistsNeutralPrompt(t *testing.T) { ds := &inlineUploadDSStub{} h := &Handler{ Store: mockOpenAIConfig{ - wideInput: true, currentInputEnabled: true, }, Auth: streamStatusAuthStub{}, diff --git a/internal/httpapi/openai/chat/chat_stream_runtime.go b/internal/httpapi/openai/chat/chat_stream_runtime.go index 14183ae..db34bcb 100644 --- a/internal/httpapi/openai/chat/chat_stream_runtime.go +++ b/internal/httpapi/openai/chat/chat_stream_runtime.go @@ -230,7 +230,6 @@ func (s *chatStreamRuntime) finalize(finishReason string, deferEmptyOutput bool) s.finalThinking = turn.Thinking s.finalText = turn.Text if len(turn.ToolCalls) > 0 && !s.toolCallsDoneEmitted { - finishReason = "tool_calls" s.sendDelta(map[string]any{ "tool_calls": formatFinalStreamToolCallsWithStableIDs(turn.ToolCalls, s.streamToolCallIDs, s.toolsRaw), }) @@ -241,7 +240,6 @@ func (s *chatStreamRuntime) finalize(finishReason string, deferEmptyOutput bool) for _, evt := range toolstream.Flush(&s.toolSieve, s.toolNames) { if len(evt.ToolCalls) > 0 { batch.flush() - finishReason = "tool_calls" s.toolCallsEmitted = true s.toolCallsDoneEmitted = true s.sendDelta(map[string]any{ @@ -261,14 +259,11 @@ func (s *chatStreamRuntime) finalize(finishReason string, deferEmptyOutput bool) batch.flush() } - if len(turn.ToolCalls) > 0 || s.toolCallsEmitted { - finishReason = "tool_calls" - } - if len(turn.ToolCalls) == 0 && !s.toolCallsEmitted && strings.TrimSpace(turn.Text) == "" { - status, message, code := upstreamEmptyOutputDetail(finishReason == "content_filter", turn.Text, turn.Thinking) - if turn.Error != nil { - status, message, code = turn.Error.Status, turn.Error.Message, turn.Error.Code - } + outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{ + AlreadyEmittedToolCalls: s.toolCallsEmitted || s.toolCallsDoneEmitted, + }) + if outcome.ShouldFail { + status, message, code := outcome.Error.Status, outcome.Error.Message, outcome.Error.Code if deferEmptyOutput { s.finalErrorStatus = status s.finalErrorMessage = message @@ -278,31 +273,20 @@ func (s *chatStreamRuntime) finalize(finishReason string, deferEmptyOutput bool) s.sendFailedChunk(status, message, code) return true } - usage := chatUsageFromTurn(turn) - s.finalFinishReason = finishReason + usage := assistantturn.OpenAIChatUsage(turn) + s.finalFinishReason = outcome.FinishReason s.finalUsage = usage s.sendChunk(openaifmt.BuildChatStreamChunk( s.completionID, s.created, s.model, - []map[string]any{openaifmt.BuildChatStreamFinishChoice(0, finishReason)}, + []map[string]any{openaifmt.BuildChatStreamFinishChoice(0, outcome.FinishReason)}, usage, )) s.sendDone() return true } -func chatUsageFromTurn(turn assistantturn.Turn) map[string]any { - return map[string]any{ - "prompt_tokens": turn.Usage.InputTokens, - "completion_tokens": turn.Usage.OutputTokens, - "total_tokens": turn.Usage.TotalTokens, - "completion_tokens_details": map[string]any{ - "reasoning_tokens": turn.Usage.ReasoningTokens, - }, - } -} - func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedDecision { if !parsed.Parsed { return streamengine.ParsedDecision{} diff --git a/internal/httpapi/openai/chat/empty_retry_runtime.go b/internal/httpapi/openai/chat/empty_retry_runtime.go index 54ce00e..847bbe7 100644 --- a/internal/httpapi/openai/chat/empty_retry_runtime.go +++ b/internal/httpapi/openai/chat/empty_retry_runtime.go @@ -93,7 +93,7 @@ func (h *Handler) collectChatNonStreamAttempt(w http.ResponseWriter, resp *http. Model: model, Prompt: usagePrompt, SearchEnabled: searchEnabled, - StripReferenceMarkers: h.compatStripReferenceMarkers(), + StripReferenceMarkers: stripReferenceMarkersEnabled(), ToolNames: toolNames, ToolsRaw: toolsRaw, }) @@ -223,7 +223,7 @@ func (h *Handler) prepareChatStreamRuntime(w http.ResponseWriter, resp *http.Res } streamRuntime := newChatStreamRuntime( w, rc, canFlush, completionID, time.Now().Unix(), model, finalPrompt, - thinkingEnabled, searchEnabled, h.compatStripReferenceMarkers(), toolNames, toolsRaw, + thinkingEnabled, searchEnabled, stripReferenceMarkersEnabled(), toolNames, toolsRaw, toolChoice, len(toolNames) > 0, h.toolcallFeatureMatchEnabled() && h.toolcallEarlyEmitHighConfidence(), ) diff --git a/internal/httpapi/openai/chat/handler.go b/internal/httpapi/openai/chat/handler.go index 522fbcb..1661fd5 100644 --- a/internal/httpapi/openai/chat/handler.go +++ b/internal/httpapi/openai/chat/handler.go @@ -35,11 +35,8 @@ type streamLease struct { ExpiresAt time.Time } -func (h *Handler) compatStripReferenceMarkers() bool { - if h == nil { - return true - } - return shared.CompatStripReferenceMarkers(h.Store) +func stripReferenceMarkersEnabled() bool { + return true } func (h *Handler) applyCurrentInputFile(ctx context.Context, a *auth.RequestAuth, stdReq promptcompat.StandardRequest) (promptcompat.StandardRequest, error) { @@ -108,22 +105,10 @@ func cleanVisibleOutput(text string, stripReferenceMarkers bool) string { return shared.CleanVisibleOutput(text, stripReferenceMarkers) } -func replaceCitationMarkersWithLinks(text string, links map[int]string) string { - return shared.ReplaceCitationMarkersWithLinks(text, links) -} - -func shouldWriteUpstreamEmptyOutputError(text, thinking string) bool { - return shared.ShouldWriteUpstreamEmptyOutputError(text, thinking) -} - func upstreamEmptyOutputDetail(contentFilter bool, text, thinking string) (int, string, string) { return shared.UpstreamEmptyOutputDetail(contentFilter, text, thinking) } -func writeUpstreamEmptyOutputError(w http.ResponseWriter, text, thinking string, contentFilter bool) bool { - return shared.WriteUpstreamEmptyOutputError(w, text, thinking, contentFilter) -} - func emptyOutputRetryEnabled() bool { return shared.EmptyOutputRetryEnabled() } diff --git a/internal/httpapi/openai/chat/handler_chat.go b/internal/httpapi/openai/chat/handler_chat.go index 38174f6..5064869 100644 --- a/internal/httpapi/openai/chat/handler_chat.go +++ b/internal/httpapi/openai/chat/handler_chat.go @@ -8,6 +8,7 @@ import ( "strings" "time" + "ds2api/internal/assistantturn" "ds2api/internal/auth" "ds2api/internal/completionruntime" "ds2api/internal/config" @@ -79,7 +80,7 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) { if !stdReq.Stream { result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{ - StripReferenceMarkers: h.compatStripReferenceMarkers(), + StripReferenceMarkers: stripReferenceMarkersEnabled(), RetryEnabled: true, CurrentInputFile: h.Store, }) @@ -92,10 +93,10 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) { return } respBody := openaifmt.BuildChatCompletionWithToolCalls(result.SessionID, stdReq.ResponseModel, result.Turn.Prompt, result.Turn.Thinking, result.Turn.Text, result.Turn.ToolCalls, stdReq.ToolsRaw) - respBody["usage"] = chatUsageFromTurn(result.Turn) - finishReason := chatFinishReason(respBody) + respBody["usage"] = assistantturn.OpenAIChatUsage(result.Turn) + finishReason := assistantturn.FinalizeTurn(result.Turn, assistantturn.FinalizeOptions{}).FinishReason if historySession != nil { - historySession.success(http.StatusOK, result.Turn.Thinking, result.Turn.Text, finishReason, chatUsageFromTurn(result.Turn)) + historySession.success(http.StatusOK, result.Turn.Thinking, result.Turn.Text, finishReason, assistantturn.OpenAIChatUsage(result.Turn)) } writeJSON(w, http.StatusOK, respBody) return @@ -162,33 +163,29 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, resp *http.Response, co } result := sse.CollectStream(resp, thinkingEnabled, true) - stripReferenceMarkers := h.compatStripReferenceMarkers() - finalThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers) - finalText := cleanVisibleOutput(result.Text, stripReferenceMarkers) - if searchEnabled { - finalText = replaceCitationMarkersWithLinks(finalText, result.CitationLinks) - } - detected := detectAssistantToolCalls(result.Text, finalText, result.Thinking, result.ToolDetectionThinking, toolNames) - if shouldWriteUpstreamEmptyOutputError(finalText, finalThinking) && len(detected.Calls) == 0 { - status, message, code := upstreamEmptyOutputDetail(result.ContentFilter, finalText, finalThinking) + turn := assistantturn.BuildTurnFromCollected(result, assistantturn.BuildOptions{ + Model: model, + Prompt: finalPrompt, + RefFileTokens: refFileTokens, + SearchEnabled: searchEnabled, + StripReferenceMarkers: stripReferenceMarkersEnabled(), + ToolNames: toolNames, + ToolsRaw: toolsRaw, + ToolChoice: promptcompat.DefaultToolChoicePolicy(), + }) + outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{}) + if outcome.ShouldFail { + status, message, code := outcome.Error.Status, outcome.Error.Message, outcome.Error.Code if historySession != nil { - historySession.error(status, message, code, finalThinking, finalText) + historySession.error(status, message, code, turn.Thinking, turn.Text) } - writeUpstreamEmptyOutputError(w, finalText, finalThinking, result.ContentFilter) + writeOpenAIErrorWithCode(w, status, message, code) return } - respBody := openaifmt.BuildChatCompletionWithToolCalls(completionID, model, finalPrompt, finalThinking, finalText, detected.Calls, toolsRaw) - if refFileTokens > 0 { - addRefFileTokensToUsage(respBody, refFileTokens) - } - finishReason := "stop" - if choices, ok := respBody["choices"].([]map[string]any); ok && len(choices) > 0 { - if fr, _ := choices[0]["finish_reason"].(string); strings.TrimSpace(fr) != "" { - finishReason = fr - } - } + respBody := openaifmt.BuildChatCompletionWithToolCalls(completionID, model, finalPrompt, turn.Thinking, turn.Text, turn.ToolCalls, toolsRaw) + respBody["usage"] = assistantturn.OpenAIChatUsage(turn) if historySession != nil { - historySession.success(http.StatusOK, finalThinking, finalText, finishReason, openaifmt.BuildChatUsageForModel(model, finalPrompt, finalThinking, finalText, refFileTokens)) + historySession.success(http.StatusOK, turn.Thinking, turn.Text, outcome.FinishReason, assistantturn.OpenAIChatUsage(turn)) } writeJSON(w, http.StatusOK, respBody) } @@ -216,7 +213,7 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt created := time.Now().Unix() bufferToolContent := len(toolNames) > 0 emitEarlyToolDeltas := h.toolcallFeatureMatchEnabled() && h.toolcallEarlyEmitHighConfidence() - stripReferenceMarkers := h.compatStripReferenceMarkers() + stripReferenceMarkers := stripReferenceMarkersEnabled() initialType := "text" if thinkingEnabled { initialType = "thinking" diff --git a/internal/httpapi/openai/chat/handler_chat_auto_delete_test.go b/internal/httpapi/openai/chat/handler_chat_auto_delete_test.go index 15645aa..243cbc9 100644 --- a/internal/httpapi/openai/chat/handler_chat_auto_delete_test.go +++ b/internal/httpapi/openai/chat/handler_chat_auto_delete_test.go @@ -75,7 +75,6 @@ func TestChatCompletionsAutoDeleteModes(t *testing.T) { } h := &Handler{ Store: mockOpenAIConfig{ - wideInput: true, autoDeleteMode: tc.mode, }, Auth: streamStatusAuthStub{}, @@ -123,7 +122,6 @@ func TestAutoDeleteRemoteSessionIgnoresCanceledParentContext(t *testing.T) { ds := &autoDeleteCtxDSStub{} h := &Handler{ Store: mockOpenAIConfig{ - wideInput: true, autoDeleteMode: "single", }, DS: ds, diff --git a/internal/httpapi/openai/chat/handler_toolcall_test.go b/internal/httpapi/openai/chat/handler_toolcall_test.go index 0d0aba8..446b480 100644 --- a/internal/httpapi/openai/chat/handler_toolcall_test.go +++ b/internal/httpapi/openai/chat/handler_toolcall_test.go @@ -133,18 +133,13 @@ func TestHandleNonStreamReturns429WhenUpstreamHasOnlyThinking(t *testing.T) { rec := httptest.NewRecorder() h.handleNonStream(rec, resp, "cid-thinking-only", "deepseek-v4-pro", "prompt", 0, true, false, nil, nil, nil) - if rec.Code != http.StatusOK { - t.Fatalf("expected status 200 for thinking-only upstream output, got %d body=%s", rec.Code, rec.Body.String()) + if rec.Code != http.StatusTooManyRequests { + t.Fatalf("expected status 429 for thinking-only upstream output, got %d body=%s", rec.Code, rec.Body.String()) } out := decodeJSONBody(t, rec.Body.String()) - choices, _ := out["choices"].([]any) - if len(choices) == 0 { - t.Fatal("expected at least one choice") - } - first, _ := choices[0].(map[string]any) - msg, _ := first["message"].(map[string]any) - if asString(msg["reasoning_content"]) != "Only thinking" { - t.Fatalf("expected reasoning_content='Only thinking', got %#v", msg) + errObj, _ := out["error"].(map[string]any) + if asString(errObj["code"]) != "upstream_empty_output" { + t.Fatalf("expected code=upstream_empty_output, got %#v", out) } } diff --git a/internal/httpapi/openai/chat/test_helpers_test.go b/internal/httpapi/openai/chat/test_helpers_test.go index 3760f21..d8284cd 100644 --- a/internal/httpapi/openai/chat/test_helpers_test.go +++ b/internal/httpapi/openai/chat/test_helpers_test.go @@ -12,7 +12,6 @@ import ( type mockOpenAIConfig struct { aliases map[string]string - wideInput bool autoDeleteMode string toolMode string earlyEmit string @@ -24,11 +23,7 @@ type mockOpenAIConfig struct { thinkingPrompt string } -func (m mockOpenAIConfig) ModelAliases() map[string]string { return m.aliases } -func (m mockOpenAIConfig) CompatWideInputStrictOutput() bool { - return m.wideInput -} -func (m mockOpenAIConfig) CompatStripReferenceMarkers() bool { return true } +func (m mockOpenAIConfig) ModelAliases() map[string]string { return m.aliases } func (m mockOpenAIConfig) ToolcallMode() string { return m.toolMode } func (m mockOpenAIConfig) ToolcallEarlyEmitConfidence() string { return m.earlyEmit } func (m mockOpenAIConfig) ResponsesStoreTTLSeconds() int { return m.responsesTTL } diff --git a/internal/httpapi/openai/chat/vercel_prepare_test.go b/internal/httpapi/openai/chat/vercel_prepare_test.go index b27be18..38fccc2 100644 --- a/internal/httpapi/openai/chat/vercel_prepare_test.go +++ b/internal/httpapi/openai/chat/vercel_prepare_test.go @@ -94,7 +94,6 @@ func TestHandleVercelStreamPrepareAppliesCurrentInputFile(t *testing.T) { ds := &inlineUploadDSStub{} h := &Handler{ Store: mockOpenAIConfig{ - wideInput: true, currentInputEnabled: true, }, Auth: streamStatusAuthStub{}, @@ -151,7 +150,6 @@ func TestHandleVercelStreamPrepareMapsCurrentInputFileManagedAuthFailureTo401(t } h := &Handler{ Store: mockOpenAIConfig{ - wideInput: true, currentInputEnabled: true, }, Auth: streamStatusManagedAuthStub{}, diff --git a/internal/httpapi/openai/chat/vercel_stream.go b/internal/httpapi/openai/chat/vercel_stream.go index cf74f5f..b52cd9c 100644 --- a/internal/httpapi/openai/chat/vercel_stream.go +++ b/internal/httpapi/openai/chat/vercel_stream.go @@ -109,13 +109,10 @@ func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Reque "final_prompt": stdReq.FinalPrompt, "thinking_enabled": stdReq.Thinking, "search_enabled": stdReq.Search, - "compat": map[string]any{ - "strip_reference_markers": h.compatStripReferenceMarkers(), - }, - "tool_names": stdReq.ToolNames, - "deepseek_token": a.DeepSeekToken, - "pow_header": powHeader, - "payload": payload, + "tool_names": stdReq.ToolNames, + "deepseek_token": a.DeepSeekToken, + "pow_header": powHeader, + "payload": payload, }) } diff --git a/internal/httpapi/openai/deps_injection_test.go b/internal/httpapi/openai/deps_injection_test.go index 2f9f445..3082dab 100644 --- a/internal/httpapi/openai/deps_injection_test.go +++ b/internal/httpapi/openai/deps_injection_test.go @@ -1,6 +1,7 @@ package openai import ( + "strings" "testing" "ds2api/internal/promptcompat" @@ -8,7 +9,6 @@ import ( type mockOpenAIConfig struct { aliases map[string]string - wideInput bool autoDeleteMode string toolMode string earlyEmit string @@ -20,11 +20,7 @@ type mockOpenAIConfig struct { thinkingPrompt string } -func (m mockOpenAIConfig) ModelAliases() map[string]string { return m.aliases } -func (m mockOpenAIConfig) CompatWideInputStrictOutput() bool { - return m.wideInput -} -func (m mockOpenAIConfig) CompatStripReferenceMarkers() bool { return true } +func (m mockOpenAIConfig) ModelAliases() map[string]string { return m.aliases } func (m mockOpenAIConfig) ToolcallMode() string { return m.toolMode } func (m mockOpenAIConfig) ToolcallEarlyEmitConfidence() string { return m.earlyEmit } func (m mockOpenAIConfig) ResponsesStoreTTLSeconds() int { return m.responsesTTL } @@ -53,7 +49,6 @@ func TestNormalizeOpenAIChatRequestWithConfigInterface(t *testing.T) { aliases: map[string]string{ "my-model": "deepseek-v4-flash-search", }, - wideInput: true, } req := map[string]any{ "model": "my-model", @@ -72,7 +67,7 @@ func TestNormalizeOpenAIChatRequestWithConfigInterface(t *testing.T) { } func TestNormalizeOpenAIChatRequestDisablesThinkingForNoThinkingModel(t *testing.T) { - cfg := mockOpenAIConfig{wideInput: true} + cfg := mockOpenAIConfig{} req := map[string]any{ "model": "deepseek-v4-pro-nothinking", "messages": []any{map[string]any{"role": "user", "content": "hello"}}, @@ -93,28 +88,22 @@ func TestNormalizeOpenAIChatRequestDisablesThinkingForNoThinkingModel(t *testing } } -func TestNormalizeOpenAIResponsesRequestWideInputPolicyFromInterface(t *testing.T) { +func TestNormalizeOpenAIResponsesRequestAlwaysAcceptsWideInput(t *testing.T) { req := map[string]any{ "model": "deepseek-v4-flash", "input": "hi", } - _, err := promptcompat.NormalizeOpenAIResponsesRequest(mockOpenAIConfig{ - aliases: map[string]string{}, - wideInput: false, - }, req, "") - if err == nil { - t.Fatal("expected error when wide input is disabled and only input is provided") - } - out, err := promptcompat.NormalizeOpenAIResponsesRequest(mockOpenAIConfig{ - aliases: map[string]string{}, - wideInput: true, + aliases: map[string]string{}, }, req, "") if err != nil { - t.Fatalf("unexpected error when wide input is enabled: %v", err) + t.Fatalf("unexpected error for wide input request: %v", err) } if out.Surface != "openai_responses" { t.Fatalf("unexpected surface: %q", out.Surface) } + if !strings.Contains(out.FinalPrompt, "<|User|>hi") { + t.Fatalf("unexpected final prompt: %q", out.FinalPrompt) + } } diff --git a/internal/httpapi/openai/file_inline_upload_test.go b/internal/httpapi/openai/file_inline_upload_test.go index 8194aeb..abaf704 100644 --- a/internal/httpapi/openai/file_inline_upload_test.go +++ b/internal/httpapi/openai/file_inline_upload_test.go @@ -151,7 +151,7 @@ func TestPreprocessInlineFileInputsDeduplicatesIdenticalPayloads(t *testing.T) { func TestChatCompletionsUploadsInlineFilesBeforeCompletion(t *testing.T) { ds := &inlineUploadDSStub{} - h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds} + h := &openAITestSurface{Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: ds} reqBody := `{"model":"deepseek-v4-vision","messages":[{"role":"user","content":[{"type":"input_text","text":"hi"},{"type":"image_url","image_url":{"url":"data:image/png;base64,QUJDRA=="}}]}],"stream":false}` req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody)) req.Header.Set("Authorization", "Bearer direct-token") @@ -180,7 +180,7 @@ func TestChatCompletionsUploadsInlineFilesBeforeCompletion(t *testing.T) { func TestResponsesUploadsInlineFilesBeforeCompletion(t *testing.T) { ds := &inlineUploadDSStub{} - h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds} + h := &openAITestSurface{Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: ds} r := chi.NewRouter() registerOpenAITestRoutes(r, h) reqBody := `{"model":"deepseek-v4-pro","input":[{"role":"user","content":[{"type":"input_text","text":"hi"},{"type":"input_image","image_url":{"url":"data:image/png;base64,QUJDRA=="}}]}],"stream":false}` @@ -208,7 +208,7 @@ func TestResponsesUploadsInlineFilesBeforeCompletion(t *testing.T) { func TestChatCompletionsInlineUploadFailureReturnsBadRequest(t *testing.T) { ds := &inlineUploadDSStub{} - h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds} + h := &openAITestSurface{Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: ds} reqBody := `{"model":"deepseek-v4-flash","messages":[{"role":"user","content":[{"type":"image_url","image_url":{"url":"data:image/png;base64,%%%"}}]}],"stream":false}` req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody)) req.Header.Set("Authorization", "Bearer direct-token") @@ -227,7 +227,7 @@ func TestChatCompletionsInlineUploadFailureReturnsBadRequest(t *testing.T) { func TestChatCompletionsInlineUploadLimitReturnsBadRequest(t *testing.T) { ds := &inlineUploadDSStub{} - h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds} + h := &openAITestSurface{Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: ds} content := []any{map[string]any{"type": "input_text", "text": "hi"}} for i := 0; i < 51; i++ { content = append(content, map[string]any{ @@ -266,7 +266,7 @@ func TestChatCompletionsInlineUploadLimitReturnsBadRequest(t *testing.T) { func TestResponsesInlineUploadFailureReturnsInternalServerError(t *testing.T) { ds := &inlineUploadDSStub{uploadErr: errors.New("boom")} - h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds} + h := &openAITestSurface{Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: ds} r := chi.NewRouter() registerOpenAITestRoutes(r, h) reqBody := `{"model":"deepseek-v4-flash","input":[{"role":"user","content":[{"type":"image_url","image_url":{"url":"data:image/png;base64,QUJDRA=="}}]}],"stream":false}` @@ -289,7 +289,7 @@ func TestVercelPrepareUploadsInlineFilesBeforeLeasePayload(t *testing.T) { t.Setenv("VERCEL", "1") t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret") ds := &inlineUploadDSStub{} - h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds} + h := &openAITestSurface{Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: ds} r := chi.NewRouter() registerOpenAITestRoutes(r, h) reqBody := `{"model":"deepseek-v4-flash","messages":[{"role":"user","content":[{"type":"input_text","text":"hi"},{"type":"image_url","image_url":{"url":"data:image/png;base64,QUJDRA=="}}]}],"stream":true}` diff --git a/internal/httpapi/openai/files_route_test.go b/internal/httpapi/openai/files_route_test.go index 680e547..722b795 100644 --- a/internal/httpapi/openai/files_route_test.go +++ b/internal/httpapi/openai/files_route_test.go @@ -120,7 +120,7 @@ func newMultipartUploadRequest(t *testing.T, purpose string, filename string, da func TestFilesRouteUploadSuccess(t *testing.T) { ds := &filesRouteDSStub{} - h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds} + h := &openAITestSurface{Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: ds} r := chi.NewRouter() registerOpenAITestRoutes(r, h) @@ -160,7 +160,7 @@ func TestFilesRouteUploadSuccess(t *testing.T) { func TestFilesRouteUploadIncludesAccountIDForManagedAccount(t *testing.T) { ds := &filesRouteDSStub{} - h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: managedFilesAuthStub{}, DS: ds} + h := &openAITestSurface{Store: mockOpenAIConfig{}, Auth: managedFilesAuthStub{}, DS: ds} r := chi.NewRouter() registerOpenAITestRoutes(r, h) @@ -188,7 +188,7 @@ func TestFilesRouteRetrieveSuccess(t *testing.T) { Purpose: "assistants", Status: "processed", }} - h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: managedFilesAuthStub{}, DS: ds} + h := &openAITestSurface{Store: mockOpenAIConfig{}, Auth: managedFilesAuthStub{}, DS: ds} r := chi.NewRouter() registerOpenAITestRoutes(r, h) @@ -214,7 +214,7 @@ func TestFilesRouteRetrieveSuccess(t *testing.T) { func TestFilesRouteRetrieveNotFound(t *testing.T) { ds := &filesRouteDSStub{err: dsclient.ErrUploadFileNotFound} - h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds} + h := &openAITestSurface{Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: ds} r := chi.NewRouter() registerOpenAITestRoutes(r, h) @@ -229,7 +229,7 @@ func TestFilesRouteRetrieveNotFound(t *testing.T) { } func TestFilesRouteRejectsNonMultipart(t *testing.T) { - h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: &filesRouteDSStub{}} + h := &openAITestSurface{Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: &filesRouteDSStub{}} r := chi.NewRouter() registerOpenAITestRoutes(r, h) @@ -245,7 +245,7 @@ func TestFilesRouteRejectsNonMultipart(t *testing.T) { } func TestFilesRouteRequiresFileField(t *testing.T) { - h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: &filesRouteDSStub{}} + h := &openAITestSurface{Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: &filesRouteDSStub{}} r := chi.NewRouter() registerOpenAITestRoutes(r, h) diff --git a/internal/httpapi/openai/history_split_test.go b/internal/httpapi/openai/history_split_test.go index 1be537f..3e69b17 100644 --- a/internal/httpapi/openai/history_split_test.go +++ b/internal/httpapi/openai/history_split_test.go @@ -96,7 +96,6 @@ func TestApplyCurrentInputFileSkipsShortInputWhenThresholdNotReached(t *testing. ds := &inlineUploadDSStub{} h := &openAITestSurface{ Store: mockOpenAIConfig{ - wideInput: true, currentInputEnabled: true, currentInputMin: 10, }, @@ -129,7 +128,6 @@ func TestApplyThinkingInjectionAppendsLatestUserPrompt(t *testing.T) { ds := &inlineUploadDSStub{} h := &openAITestSurface{ Store: mockOpenAIConfig{ - wideInput: true, thinkingInjection: boolPtr(true), }, DS: ds, @@ -161,7 +159,6 @@ func TestApplyThinkingInjectionUsesCustomPrompt(t *testing.T) { ds := &inlineUploadDSStub{} h := &openAITestSurface{ Store: mockOpenAIConfig{ - wideInput: true, thinkingInjection: boolPtr(true), thinkingPrompt: "custom thinking format", }, @@ -191,7 +188,6 @@ func TestApplyCurrentInputFileDisabledPassThrough(t *testing.T) { ds := &inlineUploadDSStub{} h := &openAITestSurface{ Store: mockOpenAIConfig{ - wideInput: true, currentInputEnabled: false, }, DS: ds, @@ -224,7 +220,6 @@ func TestApplyCurrentInputFileUploadsFirstTurnWithNumberedHistoryTranscript(t *t ds := &inlineUploadDSStub{} h := &openAITestSurface{ Store: mockOpenAIConfig{ - wideInput: true, currentInputEnabled: true, currentInputMin: 10, thinkingInjection: boolPtr(true), @@ -294,7 +289,6 @@ func TestApplyCurrentInputFilePreservesFullContextPromptForTokenCounting(t *test ds := &inlineUploadDSStub{} h := &openAITestSurface{ Store: mockOpenAIConfig{ - wideInput: true, currentInputEnabled: true, currentInputMin: 0, thinkingInjection: boolPtr(true), @@ -340,7 +334,6 @@ func TestApplyCurrentInputFileUploadsFullContextFile(t *testing.T) { ds := &inlineUploadDSStub{} h := &openAITestSurface{ Store: mockOpenAIConfig{ - wideInput: true, currentInputEnabled: true, currentInputMin: 0, thinkingInjection: boolPtr(true), @@ -391,7 +384,6 @@ func TestApplyCurrentInputFileCarriesHistoryText(t *testing.T) { ds := &inlineUploadDSStub{} h := &openAITestSurface{ Store: mockOpenAIConfig{ - wideInput: true, currentInputEnabled: true, }, DS: ds, @@ -424,7 +416,6 @@ func TestChatCompletionsCurrentInputFileUploadsContextAndKeepsNeutralPrompt(t *t ds := &inlineUploadDSStub{} h := &openAITestSurface{ Store: mockOpenAIConfig{ - wideInput: true, currentInputEnabled: true, }, Auth: streamStatusAuthStub{}, @@ -495,7 +486,6 @@ func TestResponsesCurrentInputFileUploadsContextAndKeepsNeutralPrompt(t *testing ds := &inlineUploadDSStub{} h := &openAITestSurface{ Store: mockOpenAIConfig{ - wideInput: true, currentInputEnabled: true, }, Auth: streamStatusAuthStub{}, @@ -553,7 +543,6 @@ func TestChatCompletionsCurrentInputFileMapsManagedAuthFailureTo401(t *testing.T } h := &openAITestSurface{ Store: mockOpenAIConfig{ - wideInput: true, currentInputEnabled: true, }, Auth: streamStatusManagedAuthStub{}, @@ -585,7 +574,6 @@ func TestResponsesCurrentInputFileMapsDirectAuthFailureTo401(t *testing.T) { } h := &openAITestSurface{ Store: mockOpenAIConfig{ - wideInput: true, currentInputEnabled: true, }, Auth: streamStatusAuthStub{}, @@ -617,7 +605,6 @@ func TestChatCompletionsCurrentInputFileUploadFailureReturnsInternalServerError( ds := &inlineUploadDSStub{uploadErr: errors.New("boom")} h := &openAITestSurface{ Store: mockOpenAIConfig{ - wideInput: true, currentInputEnabled: true, }, Auth: streamStatusAuthStub{}, @@ -646,7 +633,6 @@ func TestCurrentInputFileWorksAcrossAutoDeleteModes(t *testing.T) { ds := &inlineUploadDSStub{} h := &openAITestSurface{ Store: mockOpenAIConfig{ - wideInput: true, autoDeleteMode: mode, currentInputEnabled: true, }, diff --git a/internal/httpapi/openai/responses/empty_retry_runtime.go b/internal/httpapi/openai/responses/empty_retry_runtime.go index 74546b7..ccf2f06 100644 --- a/internal/httpapi/openai/responses/empty_retry_runtime.go +++ b/internal/httpapi/openai/responses/empty_retry_runtime.go @@ -74,7 +74,7 @@ func (h *Handler) prepareResponsesStreamRuntime(w http.ResponseWriter, resp *htt } streamRuntime := newResponsesStreamRuntime( w, rc, canFlush, responseID, model, finalPrompt, thinkingEnabled, searchEnabled, - h.compatStripReferenceMarkers(), toolNames, toolsRaw, len(toolNames) > 0, + stripReferenceMarkersEnabled(), toolNames, toolsRaw, len(toolNames) > 0, h.toolcallFeatureMatchEnabled() && h.toolcallEarlyEmitHighConfidence(), toolChoice, traceID, func(obj map[string]any) { h.getResponseStore().put(owner, responseID, obj) diff --git a/internal/httpapi/openai/responses/handler.go b/internal/httpapi/openai/responses/handler.go index ac8cd04..7449f40 100644 --- a/internal/httpapi/openai/responses/handler.go +++ b/internal/httpapi/openai/responses/handler.go @@ -11,7 +11,6 @@ import ( "ds2api/internal/httpapi/openai/history" "ds2api/internal/httpapi/openai/shared" "ds2api/internal/promptcompat" - "ds2api/internal/toolcall" "ds2api/internal/toolstream" ) @@ -29,11 +28,8 @@ type Handler struct { responses *responseStore } -func (h *Handler) compatStripReferenceMarkers() bool { - if h == nil { - return true - } - return shared.CompatStripReferenceMarkers(h.Store) +func stripReferenceMarkersEnabled() bool { + return true } func (h *Handler) applyCurrentInputFile(ctx context.Context, a *auth.RequestAuth, stdReq promptcompat.StandardRequest) (promptcompat.StandardRequest, error) { @@ -98,18 +94,6 @@ func cleanVisibleOutput(text string, stripReferenceMarkers bool) string { return shared.CleanVisibleOutput(text, stripReferenceMarkers) } -func replaceCitationMarkersWithLinks(text string, links map[int]string) string { - return shared.ReplaceCitationMarkersWithLinks(text, links) -} - -func upstreamEmptyOutputDetail(contentFilter bool, text, thinking string) (int, string, string) { - return shared.UpstreamEmptyOutputDetail(contentFilter, text, thinking) -} - -func writeUpstreamEmptyOutputError(w http.ResponseWriter, text, thinking string, contentFilter bool) bool { - return shared.WriteUpstreamEmptyOutputError(w, text, thinking, contentFilter) -} - func emptyOutputRetryEnabled() bool { return shared.EmptyOutputRetryEnabled() } @@ -129,7 +113,3 @@ func usagePromptWithEmptyOutputRetry(originalPrompt string, retryAttempts int) s func filterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta, seenNames map[int]string) []toolstream.ToolCallDelta { return shared.FilterIncrementalToolCallDeltasByAllowed(deltas, seenNames) } - -func detectAssistantToolCalls(rawText, visibleText, exposedThinking, detectionThinking string, toolNames []string) toolcall.ToolCallParseResult { - return shared.DetectAssistantToolCalls(rawText, visibleText, exposedThinking, detectionThinking, toolNames) -} diff --git a/internal/httpapi/openai/responses/responses_handler.go b/internal/httpapi/openai/responses/responses_handler.go index 5a47070..9ec27e3 100644 --- a/internal/httpapi/openai/responses/responses_handler.go +++ b/internal/httpapi/openai/responses/responses_handler.go @@ -11,6 +11,7 @@ import ( "github.com/go-chi/chi/v5" "github.com/google/uuid" + "ds2api/internal/assistantturn" "ds2api/internal/auth" "ds2api/internal/completionruntime" "ds2api/internal/config" @@ -96,7 +97,7 @@ func (h *Handler) Responses(w http.ResponseWriter, r *http.Request) { responseID := "resp_" + strings.ReplaceAll(uuid.NewString(), "-", "") if !stdReq.Stream { result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{ - StripReferenceMarkers: h.compatStripReferenceMarkers(), + StripReferenceMarkers: stripReferenceMarkersEnabled(), RetryEnabled: true, CurrentInputFile: h.Store, }) @@ -105,7 +106,7 @@ func (h *Handler) Responses(w http.ResponseWriter, r *http.Request) { return } responseObj := openaifmt.BuildResponseObjectWithToolCalls(responseID, stdReq.ResponseModel, result.Turn.Prompt, result.Turn.Thinking, result.Turn.Text, result.Turn.ToolCalls, stdReq.ToolsRaw) - responseObj["usage"] = responsesUsageFromTurn(result.Turn) + responseObj["usage"] = assistantturn.OpenAIResponsesUsage(result.Turn) h.getResponseStore().put(owner, responseID, responseObj) writeJSON(w, http.StatusOK, responseObj) return @@ -132,28 +133,26 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res return } result := sse.CollectStream(resp, thinkingEnabled, true) - stripReferenceMarkers := h.compatStripReferenceMarkers() - sanitizedThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers) - sanitizedText := cleanVisibleOutput(result.Text, stripReferenceMarkers) - if searchEnabled { - sanitizedText = replaceCitationMarkersWithLinks(sanitizedText, result.CitationLinks) - } - textParsed := detectAssistantToolCalls(result.Text, sanitizedText, result.Thinking, result.ToolDetectionThinking, toolNames) - if len(textParsed.Calls) == 0 && writeUpstreamEmptyOutputError(w, sanitizedText, sanitizedThinking, result.ContentFilter) { - return - } - logResponsesToolPolicyRejection(traceID, toolChoice, textParsed, "text") - callCount := len(textParsed.Calls) - if toolChoice.IsRequired() && callCount == 0 { - writeOpenAIErrorWithCode(w, http.StatusUnprocessableEntity, "tool_choice requires at least one valid tool call.", "tool_choice_violation") + turn := assistantturn.BuildTurnFromCollected(result, assistantturn.BuildOptions{ + Model: model, + Prompt: finalPrompt, + RefFileTokens: refFileTokens, + SearchEnabled: searchEnabled, + StripReferenceMarkers: stripReferenceMarkersEnabled(), + ToolNames: toolNames, + ToolsRaw: toolsRaw, + ToolChoice: toolChoice, + }) + logResponsesToolPolicyRejection(traceID, toolChoice, turn.ParsedToolCalls, "text") + outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{}) + if outcome.ShouldFail { + writeOpenAIErrorWithCode(w, outcome.Error.Status, outcome.Error.Message, outcome.Error.Code) return } - responseObj := openaifmt.BuildResponseObjectWithToolCalls(responseID, model, finalPrompt, sanitizedThinking, sanitizedText, textParsed.Calls, toolsRaw) - if refFileTokens > 0 { - addRefFileTokensToUsage(responseObj, refFileTokens) - } + responseObj := openaifmt.BuildResponseObjectWithToolCalls(responseID, model, finalPrompt, turn.Thinking, turn.Text, turn.ToolCalls, toolsRaw) + responseObj["usage"] = assistantturn.OpenAIResponsesUsage(turn) h.getResponseStore().put(owner, responseID, responseObj) writeJSON(w, http.StatusOK, responseObj) } @@ -178,7 +177,7 @@ func (h *Handler) handleResponsesStream(w http.ResponseWriter, r *http.Request, } bufferToolContent := len(toolNames) > 0 emitEarlyToolDeltas := h.toolcallFeatureMatchEnabled() && h.toolcallEarlyEmitHighConfidence() - stripReferenceMarkers := h.compatStripReferenceMarkers() + stripReferenceMarkers := stripReferenceMarkersEnabled() streamRuntime := newResponsesStreamRuntime( w, diff --git a/internal/httpapi/openai/responses/responses_stream_runtime_core.go b/internal/httpapi/openai/responses/responses_stream_runtime_core.go index c043dc1..4d3a3ab 100644 --- a/internal/httpapi/openai/responses/responses_stream_runtime_core.go +++ b/internal/httpapi/openai/responses/responses_stream_runtime_core.go @@ -194,15 +194,11 @@ func (s *responsesStreamRuntime) finalize(finishReason string, deferEmptyOutput s.closeMessageItem() - if turn.Error != nil && turn.Error.Code == "tool_choice_violation" { - s.failResponse(turn.Error.Status, turn.Error.Message, turn.Error.Code) - return true - } - if len(detected) == 0 && strings.TrimSpace(turn.Text) == "" { - status, message, code := upstreamEmptyOutputDetail(finishReason == "content_filter", turn.Text, turn.Thinking) - if turn.Error != nil { - status, message, code = turn.Error.Status, turn.Error.Message, turn.Error.Code - } + outcome := assistantturn.FinalizeTurn(turn, assistantturn.FinalizeOptions{ + AlreadyEmittedToolCalls: s.toolCallsEmitted || s.toolCallsDoneEmitted, + }) + if outcome.ShouldFail { + status, message, code := outcome.Error.Status, outcome.Error.Message, outcome.Error.Code if deferEmptyOutput { s.finalErrorStatus = status s.finalErrorMessage = message @@ -223,14 +219,6 @@ func (s *responsesStreamRuntime) finalize(finishReason string, deferEmptyOutput return true } -func responsesUsageFromTurn(turn assistantturn.Turn) map[string]any { - return map[string]any{ - "input_tokens": turn.Usage.InputTokens, - "output_tokens": turn.Usage.OutputTokens, - "total_tokens": turn.Usage.TotalTokens, - } -} - func (s *responsesStreamRuntime) logToolPolicyRejections(textParsed toolcall.ToolCallParseResult) { logRejected := func(parsed toolcall.ToolCallParseResult, channel string) { rejected := filteredRejectedToolNamesForLog(parsed.RejectedToolNames) diff --git a/internal/httpapi/openai/responses/responses_stream_test.go b/internal/httpapi/openai/responses/responses_stream_test.go index 54495f7..fa06bd5 100644 --- a/internal/httpapi/openai/responses/responses_stream_test.go +++ b/internal/httpapi/openai/responses/responses_stream_test.go @@ -453,25 +453,13 @@ func TestHandleResponsesNonStreamReturns429WhenUpstreamHasOnlyThinking(t *testin } h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-v4-pro", "prompt", 0, true, false, nil, nil, promptcompat.DefaultToolChoicePolicy(), "") - if rec.Code != http.StatusOK { - t.Fatalf("expected 200 for thinking-only upstream output, got %d body=%s", rec.Code, rec.Body.String()) + if rec.Code != http.StatusTooManyRequests { + t.Fatalf("expected 429 for thinking-only upstream output, got %d body=%s", rec.Code, rec.Body.String()) } out := decodeJSONBody(t, rec.Body.String()) - output, _ := out["output"].([]any) - if len(output) == 0 { - t.Fatal("expected at least one output item") - } - first, _ := output[0].(map[string]any) - content, _ := first["content"].([]any) - if len(content) == 0 { - t.Fatal("expected at least one content item") - } - firstContent, _ := content[0].(map[string]any) - if asString(firstContent["type"]) != "reasoning" { - t.Fatalf("expected reasoning type, got %v", firstContent["type"]) - } - if asString(firstContent["text"]) != "Only thinking" { - t.Fatalf("expected text='Only thinking', got %v", firstContent["text"]) + errObj, _ := out["error"].(map[string]any) + if asString(errObj["code"]) != "upstream_empty_output" { + t.Fatalf("expected code=upstream_empty_output, got %#v", out) } } diff --git a/internal/httpapi/openai/shared/deps.go b/internal/httpapi/openai/shared/deps.go index 776abfa..eca93a7 100644 --- a/internal/httpapi/openai/shared/deps.go +++ b/internal/httpapi/openai/shared/deps.go @@ -35,8 +35,6 @@ type DeepSeekCaller interface { type ConfigReader interface { ModelAliases() map[string]string - CompatWideInputStrictOutput() bool - CompatStripReferenceMarkers() bool ToolcallMode() string ToolcallEarlyEmitConfidence() string ResponsesStoreTTLSeconds() int @@ -56,13 +54,6 @@ type Deps struct { ChatHistory *chathistory.Store } -func CompatStripReferenceMarkers(store ConfigReader) bool { - if store == nil { - return true - } - return store.CompatStripReferenceMarkers() -} - var WriteJSON = util.WriteJSON var _ AuthResolver = (*auth.Resolver)(nil) diff --git a/internal/httpapi/openai/stream_status_test.go b/internal/httpapi/openai/stream_status_test.go index c30d555..3c11d57 100644 --- a/internal/httpapi/openai/stream_status_test.go +++ b/internal/httpapi/openai/stream_status_test.go @@ -135,7 +135,7 @@ func captureStatusMiddleware(statuses *[]int) func(http.Handler) http.Handler { func TestChatCompletionsStreamStatusCapturedAs200(t *testing.T) { statuses := make([]int, 0, 1) h := &openAITestSurface{ - Store: mockOpenAIConfig{wideInput: true}, + Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: streamStatusDSStub{resp: makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"hello"}`, "data: [DONE]")}, } @@ -164,7 +164,7 @@ func TestChatCompletionsStreamStatusCapturedAs200(t *testing.T) { func TestResponsesStreamStatusCapturedAs200(t *testing.T) { statuses := make([]int, 0, 1) h := &openAITestSurface{ - Store: mockOpenAIConfig{wideInput: true}, + Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: streamStatusDSStub{resp: makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"hello"}`, "data: [DONE]")}, } @@ -193,7 +193,7 @@ func TestResponsesStreamStatusCapturedAs200(t *testing.T) { func TestChatCompletionsStreamContentFilterStopsNormallyWithoutLeak(t *testing.T) { statuses := make([]int, 0, 1) h := &openAITestSurface{ - Store: mockOpenAIConfig{wideInput: true}, + Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: streamStatusDSStub{resp: makeOpenAISSEHTTPResponse( `data: {"p":"response/content","v":"合法前缀"}`, @@ -243,7 +243,7 @@ func TestChatCompletionsStreamContentFilterStopsNormallyWithoutLeak(t *testing.T func TestChatCompletionsStreamEmitsFailureFrameWhenUpstreamOutputEmpty(t *testing.T) { statuses := make([]int, 0, 1) h := &openAITestSurface{ - Store: mockOpenAIConfig{wideInput: true}, + Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: streamStatusDSStub{resp: makeOpenAISSEHTTPResponse("data: [DONE]")}, } @@ -289,7 +289,7 @@ func TestChatCompletionsStreamRetriesEmptyOutputOnSameSession(t *testing.T) { makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"), }} h := &openAITestSurface{ - Store: mockOpenAIConfig{wideInput: true}, + Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: ds, } @@ -349,7 +349,7 @@ func TestChatCompletionsNonStreamRetriesThinkingOnlyOutput(t *testing.T) { makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"), }} h := &openAITestSurface{ - Store: mockOpenAIConfig{wideInput: true}, + Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: ds, } @@ -388,7 +388,7 @@ func TestChatCompletionsContentFilterDoesNotRetry(t *testing.T) { makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"), }} h := &openAITestSurface{ - Store: mockOpenAIConfig{wideInput: true}, + Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: ds, } @@ -410,7 +410,7 @@ func TestChatCompletionsContentFilterDoesNotRetry(t *testing.T) { func TestResponsesStreamUsageIgnoresBatchAccumulatedTokenUsage(t *testing.T) { statuses := make([]int, 0, 1) h := &openAITestSurface{ - Store: mockOpenAIConfig{wideInput: true}, + Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: streamStatusDSStub{resp: makeOpenAISSEHTTPResponse( `data: {"p":"response/content","v":"hello"}`, @@ -461,7 +461,7 @@ func TestResponsesStreamRetriesThinkingOnlyOutput(t *testing.T) { makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"), }} h := &openAITestSurface{ - Store: mockOpenAIConfig{wideInput: true}, + Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: ds, } @@ -500,7 +500,7 @@ func TestResponsesNonStreamRetriesThinkingOnlyOutput(t *testing.T) { makeOpenAISSEHTTPResponse(`data: {"p":"response/content","v":"visible"}`, "data: [DONE]"), }} h := &openAITestSurface{ - Store: mockOpenAIConfig{wideInput: true}, + Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: ds, } @@ -546,7 +546,7 @@ func TestResponsesNonStreamRetriesThinkingOnlyOutput(t *testing.T) { func TestResponsesNonStreamUsageIgnoresPromptAndOutputTokenUsage(t *testing.T) { statuses := make([]int, 0, 1) h := &openAITestSurface{ - Store: mockOpenAIConfig{wideInput: true}, + Store: mockOpenAIConfig{}, Auth: streamStatusAuthStub{}, DS: streamStatusDSStub{resp: makeOpenAISSEHTTPResponse( `data: {"p":"response/content","v":"ok"}`, diff --git a/internal/js/chat-stream/vercel_stream_impl.js b/internal/js/chat-stream/vercel_stream_impl.js index 02af872..f28598e 100644 --- a/internal/js/chat-stream/vercel_stream_impl.js +++ b/internal/js/chat-stream/vercel_stream_impl.js @@ -17,7 +17,6 @@ const { resolveToolcallPolicy, formatIncrementalToolCallDeltas, filterIncrementalToolCallDeltasByAllowed, - boolDefaultTrue, resetStreamToolCallState, } = require('./toolcall_policy'); const { createChatCompletionEmitter, createDeltaCoalescer } = require('./stream_emitter'); @@ -58,7 +57,7 @@ async function handleVercelStream(req, res, rawBody, payload) { const toolPolicy = resolveToolcallPolicy(prep.body, payload.tools); const toolNames = toolPolicy.toolNames; const emitEarlyToolDeltas = toolPolicy.emitEarlyToolDeltas; - const stripReferenceMarkers = boolDefaultTrue(prep.body.compat && prep.body.compat.strip_reference_markers); + const stripReferenceMarkers = true; if (!model || !leaseID || !deepseekToken || !initialPowHeader || !completionPayload) { writeOpenAIError(res, 500, 'invalid vercel prepare response'); diff --git a/internal/promptcompat/request_normalize.go b/internal/promptcompat/request_normalize.go index 833a54e..ec7b2ff 100644 --- a/internal/promptcompat/request_normalize.go +++ b/internal/promptcompat/request_normalize.go @@ -10,7 +10,6 @@ import ( type ConfigReader interface { ModelAliases() map[string]string - CompatWideInputStrictOutput() bool } func NormalizeOpenAIChatRequest(store ConfigReader, req map[string]any, traceID string) (StandardRequest, error) { @@ -74,17 +73,7 @@ func NormalizeOpenAIResponsesRequest(store ConfigReader, req map[string]any, tra thinkingEnabled = false } - // Keep width-control as an explicit policy hook even if current default is true. - allowWideInput := true - if store != nil { - allowWideInput = store.CompatWideInputStrictOutput() - } - var messagesRaw []any - if allowWideInput { - messagesRaw = ResponsesMessagesFromRequest(req) - } else if msgs, ok := req["messages"].([]any); ok && len(msgs) > 0 { - messagesRaw = msgs - } + messagesRaw := ResponsesMessagesFromRequest(req) if len(messagesRaw) == 0 { return StandardRequest{}, fmt.Errorf("request must include 'input' or 'messages'") } diff --git a/tests/node/chat-stream.test.js b/tests/node/chat-stream.test.js index ba5ef7d..0171de6 100644 --- a/tests/node/chat-stream.test.js +++ b/tests/node/chat-stream.test.js @@ -137,7 +137,6 @@ async function runMockVercelStreamSequence(upstreamSequences, prepareOverrides = final_prompt: 'hello', thinking_enabled: false, search_enabled: false, - compat: { strip_reference_markers: true }, tool_names: [], deepseek_token: 'deepseek-token', pow_header: 'pow-header', @@ -310,7 +309,6 @@ test('vercel stream reuses prior PoW when refresh fails', async () => { final_prompt: 'hello', thinking_enabled: false, search_enabled: false, - compat: { strip_reference_markers: true }, tool_names: [], deepseek_token: 'deepseek-token', pow_header: 'pow-header-initial', diff --git a/webui/src/features/settings/CompatibilitySection.jsx b/webui/src/features/settings/CompatibilitySection.jsx deleted file mode 100644 index 104a69f..0000000 --- a/webui/src/features/settings/CompatibilitySection.jsx +++ /dev/null @@ -1,34 +0,0 @@ -import { ShieldAlert } from 'lucide-react' - -export default function CompatibilitySection({ t, form, setForm }) { - return ( -
-
- -

{t('settings.compatibilityTitle')}

-
-

{t('settings.compatibilityDesc')}

-
- - -
-
- ) -} diff --git a/webui/src/features/settings/SettingsContainer.jsx b/webui/src/features/settings/SettingsContainer.jsx index f8d5e7b..cd855c8 100644 --- a/webui/src/features/settings/SettingsContainer.jsx +++ b/webui/src/features/settings/SettingsContainer.jsx @@ -6,7 +6,6 @@ import SecuritySection from './SecuritySection' import RuntimeSection from './RuntimeSection' import BehaviorSection from './BehaviorSection' import CurrentInputFileSection from './CurrentInputFileSection' -import CompatibilitySection from './CompatibilitySection' import AutoDeleteSection from './AutoDeleteSection' import ModelSection from './ModelSection' import BackupSection from './BackupSection' @@ -98,8 +97,6 @@ export default function SettingsContainer({ onRefresh, onMessage, authFetch, onF - - diff --git a/webui/src/features/settings/useSettingsForm.js b/webui/src/features/settings/useSettingsForm.js index 0a9600b..e08c015 100644 --- a/webui/src/features/settings/useSettingsForm.js +++ b/webui/src/features/settings/useSettingsForm.js @@ -13,7 +13,6 @@ const MAX_AUTO_FETCH_FAILURES = 3 const DEFAULT_FORM = { admin: { jwt_expire_hours: 24 }, runtime: { account_max_inflight: 2, account_max_queue: 10, global_max_inflight: 10, token_refresh_interval_hours: 6 }, - compat: { strip_reference_markers: true }, responses: { store_ttl_seconds: 900 }, embeddings: { provider: '' }, auto_delete: { mode: 'none' }, @@ -60,9 +59,6 @@ function fromServerForm(data) { global_max_inflight: Number(data.runtime?.global_max_inflight || 10), token_refresh_interval_hours: Number(data.runtime?.token_refresh_interval_hours || 6), }, - compat: { - strip_reference_markers: data.compat?.strip_reference_markers ?? true, - }, responses: { store_ttl_seconds: Number(data.responses?.store_ttl_seconds || 900), }, @@ -95,9 +91,6 @@ function toServerPayload(form) { global_max_inflight: Number(form.runtime.global_max_inflight), token_refresh_interval_hours: Number(form.runtime.token_refresh_interval_hours), }, - compat: { - strip_reference_markers: Boolean(form.compat?.strip_reference_markers ?? true), - }, responses: { store_ttl_seconds: Number(form.responses.store_ttl_seconds) }, embeddings: { provider: String(form.embeddings.provider || '').trim() }, auto_delete: { mode: normalizeAutoDeleteMode(form.auto_delete) }, diff --git a/webui/src/locales/en.json b/webui/src/locales/en.json index 0808398..36abab2 100644 --- a/webui/src/locales/en.json +++ b/webui/src/locales/en.json @@ -397,9 +397,6 @@ "currentInputFileDesc": "Enabled by default. Once the character threshold is reached, upload the full context as a DS2API_HISTORY.txt context file.", "currentInputFileMinChars": "Current input threshold (characters)", "currentInputFileHelp": "Default is 0, which uses independent split for any non-empty input.", - "compatibilityTitle": "Compatibility", - "compatibilityDesc": "Compatibility controls that keep stream output closer to the wire format or safer for the web UI.", - "stripReferenceMarkers": "Strip [reference:N] markers", "modelTitle": "Model mapping", "modelAliases": "Global model aliases (JSON)", "autoDeleteTitle": "Session Cleanup Policy", @@ -485,4 +482,4 @@ "four": "Trigger a redeploy to apply the updated environment variables." } } -} \ No newline at end of file +} diff --git a/webui/src/locales/zh.json b/webui/src/locales/zh.json index b3b2460..8e72487 100644 --- a/webui/src/locales/zh.json +++ b/webui/src/locales/zh.json @@ -397,9 +397,6 @@ "currentInputFileDesc": "默认开启。达到字符阈值后,将完整上下文上传为 DS2API_HISTORY.txt 上下文文件。", "currentInputFileMinChars": "当前输入阈值(字符数)", "currentInputFileHelp": "默认 0,表示只要有输入就会使用独立拆分。", - "compatibilityTitle": "兼容性设置", - "compatibilityDesc": "用于控制输出格式兼容性,避免把模型原始流里的标记直接暴露到前端。", - "stripReferenceMarkers": "移除 [reference:N] 标记", "modelTitle": "模型映射", "modelAliases": "全局模型映射(JSON)", "autoDeleteTitle": "会话删除策略", @@ -485,4 +482,4 @@ "four": "触发重新部署以应用新的环境变量。" } } -} \ No newline at end of file +}