From 40d5e3ebb58e16e64961af61e7d34f15085e41fd Mon Sep 17 00:00:00 2001 From: CJACK Date: Mon, 27 Apr 2026 00:21:26 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B5=8B=E8=AF=95DSML?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/quality-gates.yml | 103 +++++++++++++++-- .github/workflows/release-artifacts.yml | 64 +++-------- API.en.md | 5 +- API.md | 5 +- Dockerfile | 3 +- README.MD | 6 +- README.en.md | 6 +- docs/ARCHITECTURE.en.md | 2 +- docs/ARCHITECTURE.md | 2 +- docs/DEPLOY.en.md | 4 +- docs/DEPLOY.md | 4 +- docs/TESTING.md | 3 + docs/prompt-compatibility.md | 24 ++-- docs/toolcall-semantics.md | 46 +++++--- internal/format/openai/render_chat.go | 8 +- internal/format/openai/render_responses.go | 8 +- internal/httpapi/claude/handler_util_test.go | 10 +- .../openai/chat/chat_stream_runtime.go | 21 ++-- internal/httpapi/openai/chat/handler.go | 4 + internal/httpapi/openai/chat/handler_chat.go | 6 +- .../openai/chat/handler_toolcall_test.go | 61 ++++++++++ internal/httpapi/openai/history_split_test.go | 2 +- internal/httpapi/openai/responses/handler.go | 5 + .../openai/responses/responses_handler.go | 5 +- .../responses_stream_runtime_core.go | 46 +++++--- .../openai/responses/responses_stream_test.go | 70 ++++++++++++ .../openai/shared/assistant_toolcalls.go | 26 +++++ .../js/helpers/stream-tool-sieve/parse.js | 2 +- .../stream-tool-sieve/parse_payload.js | 104 ++++++++++++++++- .../js/helpers/stream-tool-sieve/sieve-xml.js | 45 ++++++++ .../stream-tool-sieve/tool-keywords.js | 3 + internal/prompt/tool_calls.go | 28 ++--- internal/prompt/tool_calls_test.go | 10 +- .../promptcompat/message_normalize_test.go | 14 +-- internal/promptcompat/prompt_build_test.go | 6 +- internal/sse/consumer.go | 23 ++-- internal/sse/line.go | 25 ++-- internal/sse/parser.go | 31 ++++- internal/toolcall/tool_prompt.go | 61 +++++----- internal/toolcall/tool_prompt_test.go | 36 +++--- internal/toolcall/toolcalls_dsml.go | 108 ++++++++++++++++++ internal/toolcall/toolcalls_parse.go | 6 +- internal/toolcall/toolcalls_test.go | 31 +++++ internal/toolstream/tool_sieve_xml.go | 72 +++++++++--- internal/toolstream/tool_sieve_xml_test.go | 31 +++++ scripts/build-release-archives.sh | 81 +++++++++++++ scripts/build-webui.sh | 2 +- scripts/release-targets.sh | 12 ++ tests/node/stream-tool-sieve.test.js | 39 +++++++ tests/scripts/check-cross-build.sh | 58 ++++++++++ 50 files changed, 1112 insertions(+), 265 deletions(-) create mode 100644 internal/httpapi/openai/shared/assistant_toolcalls.go create mode 100644 internal/toolcall/toolcalls_dsml.go create mode 100755 scripts/build-release-archives.sh create mode 100755 scripts/release-targets.sh create mode 100755 tests/scripts/check-cross-build.sh diff --git a/.github/workflows/quality-gates.yml b/.github/workflows/quality-gates.yml index 0365672..64e70d5 100644 --- a/.github/workflows/quality-gates.yml +++ b/.github/workflows/quality-gates.yml @@ -9,8 +9,18 @@ on: permissions: contents: read +concurrency: + group: quality-gates-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + GO_VERSION: "1.26.x" + NODE_VERSION: "24" + GOLANGCI_LINT_VERSION: "v2.11.4" + jobs: - quality-gates: + lint-and-refactor: + name: Lint and Refactor Gate runs-on: ubuntu-latest steps: - name: Checkout @@ -19,19 +29,13 @@ jobs: - name: Setup Go uses: actions/setup-go@v5 with: - go-version: "1.26.x" - - - name: Setup Node - uses: actions/setup-node@v4 - with: - node-version: "24" - cache: "npm" - cache-dependency-path: webui/package-lock.json + go-version: ${{ env.GO_VERSION }} + cache-dependency-path: go.sum - name: Setup golangci-lint uses: golangci/golangci-lint-action@v8 with: - version: v2.11.4 + version: ${{ env.GOLANGCI_LINT_VERSION }} install-mode: binary verify: true @@ -41,10 +45,87 @@ jobs: - name: Refactor Line Gate run: ./tests/scripts/check-refactor-line-gate.sh + go-unit: + name: Go Unit (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - macos-latest + - windows-latest + defaults: + run: + shell: bash + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + cache-dependency-path: go.sum + + - name: Go Unit Gate + run: ./tests/scripts/run-unit-go.sh + + unit-all: + name: Unit Gates (Go + Node) + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + cache-dependency-path: go.sum + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: ${{ env.NODE_VERSION }} + cache: npm + cache-dependency-path: webui/package-lock.json + - name: Unit Gates (Go + Node) run: ./tests/scripts/run-unit-all.sh + webui-build: + name: WebUI Build + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: ${{ env.NODE_VERSION }} + cache: npm + cache-dependency-path: webui/package-lock.json + - name: WebUI Build Gate run: | - npm ci --prefix webui + npm ci --prefix webui --prefer-offline --no-audit npm run build --prefix webui + + cross-build: + name: Release Target Cross-Build + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + cache-dependency-path: go.sum + + - name: Cross-Build Release Targets + env: + CROSS_BUILD_JOBS: "3" + run: ./tests/scripts/check-cross-build.sh diff --git a/.github/workflows/release-artifacts.yml b/.github/workflows/release-artifacts.yml index 17b3b74..09ec700 100644 --- a/.github/workflows/release-artifacts.yml +++ b/.github/workflows/release-artifacts.yml @@ -15,6 +15,14 @@ permissions: contents: write packages: write +concurrency: + group: release-artifacts-${{ github.event.release.tag_name || github.event.inputs.release_tag }} + cancel-in-progress: false + +env: + GO_VERSION: "1.26.x" + NODE_VERSION: "24" + jobs: build-and-upload: runs-on: ubuntu-latest @@ -27,12 +35,13 @@ jobs: - name: Setup Go uses: actions/setup-go@v5 with: - go-version: "1.26.x" + go-version: ${{ env.GO_VERSION }} + cache-dependency-path: go.sum - name: Setup Node uses: actions/setup-node@v4 with: - node-version: "24" + node-version: ${{ env.NODE_VERSION }} cache: "npm" cache-dependency-path: webui/package-lock.json @@ -44,52 +53,13 @@ jobs: - name: Build WebUI run: | - npm ci --prefix webui + npm ci --prefix webui --prefer-offline --no-audit npm run build --prefix webui - name: Build Multi-Platform Archives - run: | - set -euo pipefail - TAG="${RELEASE_TAG}" - BUILD_VERSION="${TAG}" - if [ -z "${BUILD_VERSION}" ] && [ -f VERSION ]; then - BUILD_VERSION="$(cat VERSION | tr -d '[:space:]')" - fi - mkdir -p dist - - targets=( - "linux/amd64" - "linux/arm64" - "darwin/amd64" - "darwin/arm64" - "windows/amd64" - ) - - for target in "${targets[@]}"; do - GOOS="${target%/*}" - GOARCH="${target#*/}" - PKG="ds2api_${TAG}_${GOOS}_${GOARCH}" - STAGE="dist/${PKG}" - BIN="ds2api" - if [ "${GOOS}" = "windows" ]; then - BIN="ds2api.exe" - fi - - mkdir -p "${STAGE}/static" - CGO_ENABLED=0 GOOS="${GOOS}" GOARCH="${GOARCH}" \ - go build -trimpath -ldflags="-s -w -X ds2api/internal/version.BuildVersion=${BUILD_VERSION}" -o "${STAGE}/${BIN}" ./cmd/ds2api - - cp config.example.json .env.example LICENSE README.MD README.en.md "${STAGE}/" - cp -R static/admin "${STAGE}/static/admin" - - if [ "${GOOS}" = "windows" ]; then - (cd dist && zip -rq "${PKG}.zip" "${PKG}") - else - tar -C dist -czf "dist/${PKG}.tar.gz" "${PKG}" - fi - - rm -rf "${STAGE}" - done + env: + RELEASE_BUILD_JOBS: "3" + run: ./scripts/build-release-archives.sh - name: Prepare Docker release inputs run: | @@ -153,6 +123,8 @@ jobs: platforms: linux/amd64,linux/arm64 tags: ${{ steps.meta_release.outputs.tags }} labels: ${{ steps.meta_release.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max - name: Export Docker image archives for release assets run: | @@ -162,12 +134,14 @@ jobs: docker buildx build \ --platform linux/amd64 \ --target runtime-from-dist \ + --cache-from type=gha \ --output type=docker,dest="dist/ds2api_${TAG}_docker_linux_amd64.tar" \ . docker buildx build \ --platform linux/arm64 \ --target runtime-from-dist \ + --cache-from type=gha \ --output type=docker,dest="dist/ds2api_${TAG}_docker_linux_arm64.tar" \ . diff --git a/API.en.md b/API.en.md index ca1e7a9..04a26fb 100644 --- a/API.en.md +++ b/API.en.md @@ -37,7 +37,7 @@ Docs: [Overview](README.en.md) / [Architecture](docs/ARCHITECTURE.en.md) / [Depl - OpenAI / Claude / Gemini protocols are now mounted on one shared `chi` router tree assembled in `internal/server/router.go`. - Adapter responsibilities are streamlined to: **request normalization → DeepSeek invocation → protocol-shaped rendering**, reducing legacy split-logic paths. -- Tool-calling semantics are aligned between Go and Node runtime: the only executable model-output syntax is the canonical XML tool block `` → `` → ``, plus stream-time anti-leak filtering. +- Tool-calling semantics are aligned between Go and Node runtime: models should output the DSML shell `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; DS2API also accepts legacy canonical XML `` → `` → ``. DSML is normalized back to XML at the parser entry, so internal parsing remains XML-based, with stream-time anti-leak filtering. - `Admin API` separates static config from runtime policy: `/admin/config*` for configuration state, `/admin/settings*` for runtime behavior. --- @@ -334,7 +334,8 @@ When `tools` is present, DS2API performs anti-leak handling: Additional notes: -- The parser currently treats only canonical XML tool blocks (`` / `` / ``) as executable tool calls. Legacy ``, ``, ``, ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text. +- The parser treats DSML shell tool blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`) and legacy canonical XML tool blocks (`` / `` / ``) as executable tool calls. DSML is normalized back to XML at the parser entry; internal parsing remains XML-based. Legacy ``, ``, ``, ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text. +- If the final visible response text is empty but the reasoning stream contains an executable tool call, Chat / Responses emits a standard OpenAI `tool_calls` / `function_call` output during finalization. If thinking/reasoning was not enabled by the client, that reasoning text is used only for detection and is not exposed as visible text or `reasoning_content`. - `tool_calls` shown inside fenced markdown code blocks (for example, ```json ... ```) are treated as examples, not executable calls. --- diff --git a/API.md b/API.md index 35d97d4..d7d7963 100644 --- a/API.md +++ b/API.md @@ -37,7 +37,7 @@ - OpenAI / Claude / Gemini 三套协议已统一挂在同一 `chi` 路由树上,由 `internal/server/router.go` 负责装配。 - 适配器层职责收敛为:**请求归一化 → DeepSeek 调用 → 协议形态渲染**,减少历史版本中“同能力多处实现”的分叉。 -- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致:当前唯一可执行的模型输出语法是 canonical XML 工具块 `` → `` → ``,并在流式场景执行防泄漏筛分。 +- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致:推荐模型输出 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容层也接受旧式 canonical XML `` → `` → ``,内部仍以 XML 解析语义为准,并在流式场景执行防泄漏筛分。 - `Admin API` 将配置与运行时策略分开:`/admin/config*` 管静态配置,`/admin/settings*` 管运行时行为。 --- @@ -335,7 +335,8 @@ data: [DONE] 补充说明: - **非代码块上下文**下,工具负载即使与普通文本混合,也会按特征识别并产出可执行 tool call(前后普通文本仍可透传)。 -- 解析器当前只把 canonical XML 工具块(`` / `` / ``)作为可执行调用解析;旧式 ``、``、``、``、``、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理。 +- 解析器当前把 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)和旧式 canonical XML 工具块(`` / `` / ``)作为可执行调用解析;DSML 会先归一化回 XML,内部仍以 XML 解析语义为准。旧式 ``、``、``、``、``、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理。 +- 当最终可见正文为空但思维链里包含可执行工具调用时,Chat / Responses 会在收尾阶段补发标准 OpenAI `tool_calls` / `function_call` 输出;如果客户端未开启 thinking / reasoning,该思维链只用于检测,不会作为可见正文或 `reasoning_content` 暴露。 - Markdown fenced code block(例如 ```json ... ```)中的 `tool_calls` 仅视为示例文本,不会被执行。 --- diff --git a/Dockerfile b/Dockerfile index be25b95..ac062f7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,7 @@ RUN set -eux; \ GOARCH="${TARGETARCH:-$(go env GOARCH)}"; \ BUILD_VERSION_RESOLVED="${BUILD_VERSION:-}"; \ if [ -z "${BUILD_VERSION_RESOLVED}" ] && [ -f VERSION ]; then BUILD_VERSION_RESOLVED="$(cat VERSION | tr -d "[:space:]")"; fi; \ - CGO_ENABLED=0 GOOS="${GOOS}" GOARCH="${GOARCH}" go build -ldflags="-s -w -X ds2api/internal/version.BuildVersion=${BUILD_VERSION_RESOLVED}" -o /out/ds2api ./cmd/ds2api + CGO_ENABLED=0 GOOS="${GOOS}" GOARCH="${GOARCH}" go build -buildvcs=false -ldflags="-s -w -X ds2api/internal/version.BuildVersion=${BUILD_VERSION_RESOLVED}" -o /out/ds2api ./cmd/ds2api FROM busybox:1.36.1-musl AS busybox-tools @@ -54,7 +54,6 @@ RUN set -eux; \ test -n "${PKG_DIR}"; \ mkdir -p /out/static; \ cp "${PKG_DIR}/ds2api" /out/ds2api; \ - cp "${PKG_DIR}/config.example.json" /out/config.example.json; \ cp -R "${PKG_DIR}/static/admin" /out/static/admin diff --git a/README.MD b/README.MD index 329add9..a4fe4e0 100644 --- a/README.MD +++ b/README.MD @@ -149,7 +149,7 @@ flowchart LR - `ANTHROPIC_BASE_URL` 推荐直接指向 DS2API 根地址(例如 `http://127.0.0.1:5001`),Claude Code 会请求 `/v1/messages?beta=true`。 - `ANTHROPIC_API_KEY` 需要与 `config.json` 中 `keys` 一致;建议同时保留常规 key 与 `sk-ant-*` 形态 key,兼容不同客户端校验习惯。 - 若系统设置了代理,建议对 DS2API 地址配置 `NO_PROXY=127.0.0.1,localhost,<你的主机IP>`,避免本地回环请求被代理拦截。 -- 如遇“工具调用输出成文本、未执行”问题,请优先检查模型输出是否为当前唯一受支持的 XML 工具块:`...`,而不是旧式 `` / `` / `` / ``、``、`tool_use` 或纯 JSON `tool_calls` 片段。 +- 如遇“工具调用输出成文本、未执行”问题,请优先检查模型输出是否为推荐的 DSML 工具块:`<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`。兼容层也接受旧式 canonical XML:`...`;旧式 `` / `` / `` / ``、``、`tool_use` 或纯 JSON `tool_calls` 片段不会执行。 ### Gemini 接口 @@ -318,7 +318,7 @@ Gemini 路由还可以使用 `x-goog-api-key`,或在没有认证头时使用 ` 当请求中带 `tools` 时,DS2API 会做防泄漏处理与结构化转译: 1. 只在**非代码块上下文**启用执行型 toolcall 识别(代码块示例默认不触发) -2. 解析层当前只把 canonical XML 工具块视为可执行调用:`` → `` → ``;旧式 `` / `` / `` / ``、``、`tool_use` / antml 变体与纯 JSON `tool_calls` 片段都会按普通文本处理 +2. 解析层当前把 DSML 外壳视为推荐可执行调用:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容旧式 canonical XML `` → `` → ``。DSML 只是外壳别名,内部仍以 XML 解析语义为准;旧式 `` / `` / `` / ``、``、`tool_use` / antml 变体与纯 JSON `tool_calls` 片段都会按普通文本处理 3. `responses` 流式严格使用官方 item 生命周期事件(`response.output_item.*`、`response.content_part.*`、`response.function_call_arguments.*`) 4. `responses` 支持并执行 `tool_choice`(`auto`/`none`/`required`/强制函数);`required` 违规时非流式返回 `422`,流式返回 `response.failed` 5. 客户端请求哪种协议,就按该协议返回工具调用(OpenAI/Claude/Gemini 各自原生结构);模型侧优先约束输出规范 XML,再由兼容层转译 @@ -389,7 +389,7 @@ npm run build --prefix webui 工作流文件:`.github/workflows/release-artifacts.yml` - **触发条件**:仅在 GitHub Release `published` 时触发(普通 push 不会触发) -- **构建产物**:多平台二进制包(`linux/amd64`、`linux/arm64`、`darwin/amd64`、`darwin/arm64`、`windows/amd64`)+ `sha256sums.txt` +- **构建产物**:多平台二进制包(`linux/amd64`、`linux/arm64`、`linux/armv7`、`darwin/amd64`、`darwin/arm64`、`windows/amd64`、`windows/arm64`)+ `sha256sums.txt` - **容器镜像发布**:仅推送到 GHCR(`ghcr.io/cjackhwang/ds2api`) - **每个压缩包包含**:`ds2api` 可执行文件、`static/admin`、WASM 文件(同时支持内置 fallback)、`config.example.json` 配置示例、README、LICENSE diff --git a/README.en.md b/README.en.md index d267b82..13b6982 100644 --- a/README.en.md +++ b/README.en.md @@ -146,7 +146,7 @@ Besides the primary aliases above, `/anthropic/v1/models` also returns Claude 4. - Set `ANTHROPIC_BASE_URL` to the DS2API root URL (for example `http://127.0.0.1:5001`). Claude Code sends requests to `/v1/messages?beta=true`. - `ANTHROPIC_API_KEY` must match an entry in `keys` from `config.json`. Keeping both a regular key and an `sk-ant-*` style key improves client compatibility. - If your environment has proxy variables, set `NO_PROXY=127.0.0.1,localhost,` for DS2API to avoid proxy interception of local traffic. -- If tool calls are rendered as plain text and not executed, first verify the model output uses the only supported XML block: `...`, not legacy `` / `` / `` / ``, ``, `tool_use`, or standalone JSON `tool_calls`. +- If tool calls are rendered as plain text and not executed, first verify the model output uses the recommended DSML block: `<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`. DS2API also accepts legacy canonical XML: `...`; legacy `` / `` / `` / ``, ``, `tool_use`, or standalone JSON `tool_calls` are not executed. ### Gemini Endpoint @@ -312,7 +312,7 @@ Queue limit = DS2API_ACCOUNT_MAX_QUEUE (default = recommended concurrency) When `tools` is present in the request, DS2API performs anti-leak handling: 1. Toolcall feature matching is enabled only in **non-code-block context** (fenced examples are ignored) -2. The parser now treats only the canonical XML wrapper as executable tool-calling syntax: `` → `` → ``; legacy `` / `` / `` / ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text +2. The parser now treats the DSML shell as the recommended executable tool-calling syntax: `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; it also accepts legacy canonical XML `` → `` → ``. DSML is a shell alias and internal parsing remains XML-based; legacy `` / `` / `` / ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text 3. `responses` streaming strictly uses official item lifecycle events (`response.output_item.*`, `response.content_part.*`, `response.function_call_arguments.*`) 4. `responses` supports and enforces `tool_choice` (`auto`/`none`/`required`/forced function); `required` violations return `422` for non-stream and `response.failed` for stream 5. The output protocol follows the client request (OpenAI / Claude / Gemini native shapes); model-side prompting can prefer XML, and the compatibility layer handles the protocol-specific translation @@ -381,7 +381,7 @@ npm run build --prefix webui Workflow: `.github/workflows/release-artifacts.yml` - **Trigger**: only on GitHub Release `published` (normal pushes do not trigger builds) -- **Outputs**: multi-platform archives (`linux/amd64`, `linux/arm64`, `darwin/amd64`, `darwin/arm64`, `windows/amd64`) + `sha256sums.txt` +- **Outputs**: multi-platform archives (`linux/amd64`, `linux/arm64`, `linux/armv7`, `darwin/amd64`, `darwin/arm64`, `windows/amd64`, `windows/arm64`) + `sha256sums.txt` - **Container publishing**: GHCR only (`ghcr.io/cjackhwang/ds2api`) - **Each archive includes**: `ds2api` executable, `static/admin`, WASM file (with embedded fallback support), `config.example.json`-based config template, README, LICENSE diff --git a/docs/ARCHITECTURE.en.md b/docs/ARCHITECTURE.en.md index 1f6b24a..3ba24fa 100644 --- a/docs/ARCHITECTURE.en.md +++ b/docs/ARCHITECTURE.en.md @@ -175,7 +175,7 @@ flowchart LR - `internal/deepseek/{client,protocol,transport}`: upstream requests, sessions, PoW adaptation, protocol constants, and transport details. - `internal/js/chat-stream` + `api/chat-stream.js`: Vercel Node streaming bridge; Go prepare/release owns auth, account lease, and completion payload assembly, while Node relays real-time SSE with Go-aligned finalization and tool sieve semantics. - `internal/stream` + `internal/sse`: Go stream parsing and incremental assembly. -- `internal/toolcall` + `internal/toolstream`: canonical XML tool-call parsing + anti-leak sieve (the only executable format is `` / `` / ``). +- `internal/toolcall` + `internal/toolstream`: DSML shell compatibility plus canonical XML tool-call parsing and anti-leak sieve; DSML is normalized back to XML at the entrypoint, and internal parsing remains XML-based. - `internal/httpapi/admin/*`: Admin API root assembly plus auth/accounts/config/settings/proxies/rawsamples/vercel/history/devcapture/version resource packages. - `internal/chathistory`: server-side conversation history persistence, pagination, detail lookup, and retention policy. - `internal/config`: config loading/validation + runtime settings hot-reload. diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 5094ea4..d5b8baf 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -175,7 +175,7 @@ flowchart LR - `internal/deepseek/{client,protocol,transport}`:上游请求、会话、PoW 适配、协议常量与传输层。 - `internal/js/chat-stream` + `api/chat-stream.js`:Vercel Node 流式桥;Go prepare/release 管理鉴权、账号租约和 completion payload,Node 侧负责实时 SSE 转发并保持 Go 对齐的终结态和 tool sieve 语义。 - `internal/stream` + `internal/sse`:Go 流式解析与增量处理。 -- `internal/toolcall` + `internal/toolstream`:canonical XML 工具调用解析与防泄漏筛分(唯一可执行格式:`` / `` / ``)。 +- `internal/toolcall` + `internal/toolstream`:DSML 外壳兼容与 canonical XML 工具调用解析、防泄漏筛分;DSML 会在入口归一化回 XML,内部仍按 XML 语义解析。 - `internal/httpapi/admin/*`:Admin API 根装配与 auth/accounts/config/settings/proxies/rawsamples/vercel/history/devcapture/version 等资源子包。 - `internal/chathistory`:服务器端对话记录持久化、分页、单条详情和保留策略。 - `internal/config`:配置加载、校验、运行时 settings 热更新。 diff --git a/docs/DEPLOY.en.md b/docs/DEPLOY.en.md index de52b4c..3e06322 100644 --- a/docs/DEPLOY.en.md +++ b/docs/DEPLOY.en.md @@ -70,9 +70,9 @@ Built-in GitHub Actions workflow: `.github/workflows/release-artifacts.yml` | Platform | Architecture | Format | | --- | --- | --- | -| Linux | amd64, arm64 | `.tar.gz` | +| Linux | amd64, arm64, armv7 | `.tar.gz` | | macOS | amd64, arm64 | `.tar.gz` | -| Windows | amd64 | `.zip` | +| Windows | amd64, arm64 | `.zip` | Each archive includes: diff --git a/docs/DEPLOY.md b/docs/DEPLOY.md index 7509cb3..29612f2 100644 --- a/docs/DEPLOY.md +++ b/docs/DEPLOY.md @@ -70,9 +70,9 @@ cp config.example.json config.json | 平台 | 架构 | 文件格式 | | --- | --- | --- | -| Linux | amd64, arm64 | `.tar.gz` | +| Linux | amd64, arm64, armv7 | `.tar.gz` | | macOS | amd64, arm64 | `.tar.gz` | -| Windows | amd64 | `.zip` | +| Windows | amd64, arm64 | `.zip` | 每个压缩包包含: diff --git a/docs/TESTING.md b/docs/TESTING.md index 40c3501..dd16142 100644 --- a/docs/TESTING.md +++ b/docs/TESTING.md @@ -13,6 +13,7 @@ DS2API 提供两个层级的测试: | 单元测试(Go) | `./tests/scripts/run-unit-go.sh` | 不需要真实账号 | | 单元测试(Node) | `./tests/scripts/run-unit-node.sh` | 不需要真实账号 | | 单元测试(全部) | `./tests/scripts/run-unit-all.sh` | 不需要真实账号 | +| Release 目标交叉编译 | `./tests/scripts/check-cross-build.sh` | 覆盖发布包支持的 GOOS/GOARCH | | 端到端测试 | `./tests/scripts/run-live.sh` | 使用真实账号执行全链路测试 | 端到端测试集会录制完整的请求/响应日志,用于故障排查。 @@ -35,6 +36,7 @@ npm run build --prefix webui - `./scripts/lint.sh` 会运行 Go 格式化检查和 `golangci-lint`;修改 Go 文件后仍建议先执行 `gofmt -w `。 - `run-unit-all.sh` 串行调用 Go 与 Node 单元测试入口。 +- CI 还会额外在 macOS/Windows 跑 Go 单测,并执行 release 目标交叉编译检查。 - `run-live.sh` 是真实账号端到端测试,适合作为发布或高风险改动后的补充验证,不属于每次 PR 的固定本地门禁。 --- @@ -57,6 +59,7 @@ npm run build --prefix webui # 结构与流程门禁 ./tests/scripts/check-refactor-line-gate.sh ./tests/scripts/check-node-split-syntax.sh +./tests/scripts/check-cross-build.sh # 历史阶段门禁:阶段 6 手工烟测签字检查(默认读取 plans/stage6-manual-smoke.md) ./tests/scripts/check-stage6-manual-smoke.sh diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md index 8338ac6..ee227df 100644 --- a/docs/prompt-compatibility.md +++ b/docs/prompt-compatibility.md @@ -100,7 +100,7 @@ DS2API 当前的核心思路,不是把客户端传来的 `messages`、`tools` - `tools` 不会作为“原生工具 schema”直接下发给下游,而是被改写进 `prompt`。 - OpenAI Chat / Responses 原生走统一 OpenAI 标准化与 DeepSeek payload 组装;Claude / Gemini 会尽量复用 OpenAI prompt/tool 语义,其中 Gemini 直接复用 `promptcompat.BuildOpenAIPromptForAdapter`,Claude 消息接口在可代理场景会转换为 OpenAI chat 形态再执行。 - 客户端传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`。Gemini `generationConfig.thinkingConfig.thinkingBudget` 会翻译成同一套 thinking 开关;关闭时即使上游返回 `response/thinking_content`,兼容层也不会把它当作可见正文输出。Claude surface 在流式请求且未显式声明 `thinking` 时,仍按 Anthropic 语义默认关闭;但在非流式代理场景,兼容层会内部开启一次下游 thinking,用于捕获“正文为空、工具调用落在 thinking 里”的情况,随后在回包前剥离用户不可见的 thinking block。 -- 对 OpenAI Chat / Responses 的非流式收尾,如果最终可见正文为空,兼容层会优先尝试把思维链中的独立 `...` 结构当作真实工具调用解析出来。流式链路也会在收尾阶段做同样的 fallback 检测,但不会因为思维链内容去中途拦截或改写流式输出;thinking / reasoning 增量仍按原样先发,只有在结束收尾时才可能补发最终工具调用结果。只有正文为空且思维链里也没有可执行工具调用时,才继续按空回复错误处理。 +- 对 OpenAI Chat / Responses 的非流式收尾,如果最终可见正文为空,兼容层会优先尝试把思维链中的独立 DSML / XML 工具块当作真实工具调用解析出来。流式链路也会在收尾阶段做同样的 fallback 检测,但不会因为思维链内容去中途拦截或改写流式输出;thinking / reasoning 增量仍按原样先发,只有在结束收尾时才可能补发最终工具调用结果。补发结果会作为本轮 assistant 的结构化 `tool_calls` / `function_call` 输出返回,而不是塞进 `content` 文本;如果客户端没有开启 thinking / reasoning,思维链只用于检测,不会作为 `reasoning_content` 或可见正文暴露。只有正文为空且思维链里也没有可执行工具调用时,才继续按空回复错误处理。 ## 5. prompt 是怎么拼出来的 @@ -155,11 +155,11 @@ OpenAI Chat / Responses 在标准化后、history split / current input file 之 1. 把每个 tool 的名称、描述、参数 schema 序列化成文本。 2. 拼成 `You have access to these tools:` 大段说明。 -3. 再附上统一的 XML tool call 格式约束。 +3. 再附上统一的 DSML tool call 外壳格式约束。 4. 把这整段内容并入 system prompt。 -工具调用正例现在优先示范官方 DSML 风格:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="..." string="true|false">`。 -兼容层仍接受旧式纯 `` wrapper,但提示词会优先要求模型输出官方 DSML 标签,并强调不能只输出 closing wrapper 而漏掉 opening tag。 +工具调用正例现在优先示范官方 DSML 风格:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`。 +兼容层仍接受旧式纯 `` wrapper,但提示词会优先要求模型输出官方 DSML 标签,并强调不能只输出 closing wrapper 而漏掉 opening tag。需要注意:这是“兼容 DSML 外壳,内部仍以 XML 解析语义为准”,不是原生 DSML 全链路实现;DSML 标签会在解析入口归一化回现有 XML 标签后继续走同一套 parser。 正例中的工具名只会来自当前请求实际声明的工具;如果当前请求没有足够的已知工具形态,就省略对应的单工具、多工具或嵌套示例,避免把不可用工具名写进 prompt。 对执行类工具,脚本内容必须进入执行参数本身:`Bash` / `execute_command` 使用 `command`,`exec_command` 使用 `cmd`;不要把脚本示范成 `path` / `content` 文件写入参数。 @@ -193,18 +193,18 @@ assistant 的 reasoning 会变成一个显式标签块: ### 7.2 历史 tool_calls 保留方式 -assistant 历史 `tool_calls` 不会保留成 OpenAI 原生 JSON,而会转成 prompt 可见的 XML: +assistant 历史 `tool_calls` 不会保留成 OpenAI 原生 JSON,而会转成 prompt 可见的 DSML 外壳: ```xml - - - - - +<|DSML|tool_calls> + <|DSML|invoke name="read_file"> + <|DSML|parameter name="path"> + + ``` -这也是当前项目里唯一受支持的 canonical tool-calling 形态;其他形态都会作为普通文本保留,不会作为可执行调用语法。 -例外是 parser 会对一个非常窄的模型失误做修复:如果 assistant 输出了 `` ... ``,但漏掉最前面的 opening ``,解析阶段会补回 wrapper 后再尝试识别。 +解析层同时兼容旧式纯 XML 形态:`` / `` / ``。两者都会先归一到现有 XML 解析语义;其他旧格式都会作为普通文本保留,不会作为可执行调用语法。 +例外是 parser 会对一个非常窄的模型失误做修复:如果 assistant 输出了 `` ... ``(或 DSML 对应标签),但漏掉最前面的 opening wrapper,解析阶段会补回 wrapper 后再尝试识别。 这件事很重要,因为它决定了: diff --git a/docs/toolcall-semantics.md b/docs/toolcall-semantics.md index ea5c456..c15da11 100644 --- a/docs/toolcall-semantics.md +++ b/docs/toolcall-semantics.md @@ -4,9 +4,19 @@ 文档导航:[总览](../README.MD) / [架构说明](./ARCHITECTURE.md) / [测试指南](./TESTING.md) -## 1) 当前唯一可执行格式 +## 1) 当前可执行格式 -当前版本只把下面这类 canonical XML 视为可执行工具调用: +当前版本推荐模型输出 DSML 外壳: + +```xml +<|DSML|tool_calls> + <|DSML|invoke name="read_file"> + <|DSML|parameter name="path"> + + +``` + +兼容层仍接受旧式 canonical XML: ```xml @@ -16,21 +26,24 @@ ``` +这不是原生 DSML 全链路实现。DSML 只作为 prompt 外壳和解析入口别名;进入 parser 前会被归一化成 `` / `` / ``,内部仍以现有 XML 解析语义为准。 + 约束: -- 必须有 `...` wrapper -- 每个调用必须在 `...` 内 +- 必须有 `<|DSML|tool_calls>...` 或 `...` wrapper +- 每个调用必须在 `<|DSML|invoke name="...">...` 或 `...` 内 - 工具名必须放在 `invoke` 的 `name` 属性 -- 参数必须使用 `...` +- 参数必须使用 `<|DSML|parameter name="...">...` 或 `...` +- 同一个工具块内不要混用 DSML 标签和旧 XML 工具标签;混搭会被视为非法工具块 兼容修复: -- 如果模型漏掉 opening ``,但后面仍输出了一个或多个 `` 并以 `` 收尾,Go 解析链路会在解析前补回缺失的 opening wrapper。 -- 这是一个针对常见模型失误的窄修复,不改变推荐输出格式;prompt 仍要求模型直接输出完整 canonical XML。 +- 如果模型漏掉 opening wrapper,但后面仍输出了一个或多个 invoke 并以 closing wrapper 收尾,Go 解析链路会在解析前补回缺失的 opening wrapper。 +- 这是一个针对常见模型失误的窄修复,不改变推荐输出格式;prompt 仍要求模型直接输出完整 DSML 外壳。 -## 2) 非 canonical 内容 +## 2) 非兼容内容 -任何不满足上述 canonical XML 形态的内容,都会保留为普通文本,不会执行。一个例外是上一节提到的“缺失 opening ``、但 closing `` 仍存在”的窄修复场景。 +任何不满足上述 DSML / canonical XML 形态的内容,都会保留为普通文本,不会执行。一个例外是上一节提到的“缺失 opening wrapper、但 closing wrapper 仍存在”的窄修复场景。 当前 parser 不把 allow-list 当作硬安全边界:即使传入了已声明工具名列表,XML 里出现未声明工具名时也会尽量解析并交给上层协议输出;真正的执行侧仍必须自行校验工具名和参数。 @@ -38,8 +51,8 @@ 在流式链路中(Go / Node 一致): -- canonical `` wrapper 会进入结构化捕获 -- 如果流里直接从 `` 开始,但后面补上了 ``,Go 流式筛分也会按缺失 opening wrapper 的修复路径尝试恢复 +- DSML `<|DSML|tool_calls>` wrapper 和 canonical `` wrapper 都会进入结构化捕获 +- 如果流里直接从 invoke 开始,但后面补上了 closing wrapper,Go 流式筛分也会按缺失 opening wrapper 的修复路径尝试恢复 - 已识别成功的工具调用不会再次回流到普通文本 - 不符合新格式的块不会执行,并继续按原样文本透传 - fenced code block 中的 XML 示例始终按普通文本处理 @@ -49,14 +62,14 @@ `ParseToolCallsDetailed` / `parseToolCallsDetailed` 返回: - `calls`:解析出的工具调用列表(`name` + `input`) -- `sawToolCallSyntax`:检测到 canonical wrapper,或命中“缺失 opening wrapper 但可修复”的形态时会为 `true` +- `sawToolCallSyntax`:检测到 DSML / canonical wrapper,或命中“缺失 opening wrapper 但可修复”的形态时会为 `true` - `rejectedByPolicy`:当前固定为 `false` - `rejectedToolNames`:当前固定为空数组 ## 5) 落地建议 -1. Prompt 里只示范 canonical XML 语法。 -2. 上游客户端仍应直接输出 canonical XML;DS2API 只对“closing tag 在、opening tag 漏掉”的常见失误做窄修复,不会泛化接受其他旧格式。 +1. Prompt 里只示范 DSML 外壳语法。 +2. 上游客户端应直接输出完整 DSML 外壳;DS2API 兼容旧式 canonical XML,并只对“closing tag 在、opening tag 漏掉”的常见失误做窄修复,不会泛化接受其他旧格式。 3. 不要依赖 parser 做安全控制;执行器侧仍应做工具名和参数校验。 ## 6) 回归验证 @@ -70,6 +83,7 @@ node --test tests/node/stream-tool-sieve.test.js 重点覆盖: -- canonical `` wrapper 正常解析 -- 非 canonical 内容按普通文本透传 +- DSML `<|DSML|tool_calls>` wrapper 正常解析 +- legacy canonical `` wrapper 正常解析 +- 非兼容内容按普通文本透传 - 代码块示例不执行 diff --git a/internal/format/openai/render_chat.go b/internal/format/openai/render_chat.go index 764f151..24b6fa1 100644 --- a/internal/format/openai/render_chat.go +++ b/internal/format/openai/render_chat.go @@ -8,14 +8,18 @@ import ( func BuildChatCompletion(completionID, model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any { detected := toolcall.ParseAssistantToolCallsDetailed(finalText, finalThinking, toolNames) + return BuildChatCompletionWithToolCalls(completionID, model, finalPrompt, finalThinking, finalText, detected.Calls) +} + +func BuildChatCompletionWithToolCalls(completionID, model, finalPrompt, finalThinking, finalText string, detected []toolcall.ParsedToolCall) map[string]any { finishReason := "stop" messageObj := map[string]any{"role": "assistant", "content": finalText} if strings.TrimSpace(finalThinking) != "" { messageObj["reasoning_content"] = finalThinking } - if len(detected.Calls) > 0 { + if len(detected) > 0 { finishReason = "tool_calls" - messageObj["tool_calls"] = toolcall.FormatOpenAIToolCalls(detected.Calls) + messageObj["tool_calls"] = toolcall.FormatOpenAIToolCalls(detected) messageObj["content"] = nil } diff --git a/internal/format/openai/render_responses.go b/internal/format/openai/render_responses.go index 2a58bf5..7d8e78b 100644 --- a/internal/format/openai/render_responses.go +++ b/internal/format/openai/render_responses.go @@ -13,11 +13,15 @@ func BuildResponseObject(responseID, model, finalPrompt, finalThinking, finalTex // Strict mode: only standalone, structured tool-call payloads are treated // as executable tool calls. detected := toolcall.ParseAssistantToolCallsDetailed(finalText, finalThinking, toolNames) + return BuildResponseObjectWithToolCalls(responseID, model, finalPrompt, finalThinking, finalText, detected.Calls) +} + +func BuildResponseObjectWithToolCalls(responseID, model, finalPrompt, finalThinking, finalText string, detected []toolcall.ParsedToolCall) map[string]any { exposedOutputText := finalText output := make([]any, 0, 2) - if len(detected.Calls) > 0 { + if len(detected) > 0 { exposedOutputText = "" - output = append(output, toResponsesFunctionCallItems(detected.Calls)...) + output = append(output, toResponsesFunctionCallItems(detected)...) } else { content := make([]any, 0, 2) if finalThinking != "" { diff --git a/internal/httpapi/claude/handler_util_test.go b/internal/httpapi/claude/handler_util_test.go index 68f68ca..7b83c88 100644 --- a/internal/httpapi/claude/handler_util_test.go +++ b/internal/httpapi/claude/handler_util_test.go @@ -93,10 +93,10 @@ func TestNormalizeClaudeMessagesToolUseToAssistantToolCalls(t *testing.T) { t.Fatalf("expected call id preserved, got %#v", call) } content, _ := m["content"].(string) - if !containsStr(content, "") || !containsStr(content, ``) { - t.Fatalf("expected assistant content to include XML tool call history, got %q", content) + if !containsStr(content, "<|DSML|tool_calls>") || !containsStr(content, `<|DSML|invoke name="search_web">`) { + t.Fatalf("expected assistant content to include DSML tool call history, got %q", content) } - if !containsStr(content, ``) { + if !containsStr(content, `<|DSML|parameter name="query">`) { t.Fatalf("expected assistant content to include serialized parameters, got %q", content) } } @@ -292,8 +292,8 @@ func TestBuildClaudeToolPromptSingleTool(t *testing.T) { if !containsStr(prompt, "Search the web") { t.Fatalf("expected description in prompt") } - if !containsStr(prompt, "") { - t.Fatalf("expected XML tool_calls format in prompt") + if !containsStr(prompt, "<|DSML|tool_calls>") { + t.Fatalf("expected DSML tool_calls format in prompt") } if !containsStr(prompt, "TOOL CALL FORMAT") { t.Fatalf("expected tool call format header in prompt") diff --git a/internal/httpapi/openai/chat/chat_stream_runtime.go b/internal/httpapi/openai/chat/chat_stream_runtime.go index 8ea0546..4bfb5d3 100644 --- a/internal/httpapi/openai/chat/chat_stream_runtime.go +++ b/internal/httpapi/openai/chat/chat_stream_runtime.go @@ -1,7 +1,6 @@ package chat import ( - "ds2api/internal/toolcall" "encoding/json" "net/http" "strings" @@ -33,11 +32,12 @@ type chatStreamRuntime struct { toolCallsEmitted bool toolCallsDoneEmitted bool - toolSieve toolstream.State - streamToolCallIDs map[int]string - streamToolNames map[int]string - thinking strings.Builder - text strings.Builder + toolSieve toolstream.State + streamToolCallIDs map[int]string + streamToolNames map[int]string + thinking strings.Builder + toolDetectionThinking strings.Builder + text strings.Builder finalThinking string finalText string @@ -130,10 +130,11 @@ func (s *chatStreamRuntime) resetStreamToolCallState() { func (s *chatStreamRuntime) finalize(finishReason string) { finalThinking := s.thinking.String() + finalToolDetectionThinking := s.toolDetectionThinking.String() finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers) s.finalThinking = finalThinking s.finalText = finalText - detected := toolcall.ParseAssistantToolCallsDetailed(finalText, finalThinking, s.toolNames) + detected := detectAssistantToolCalls(finalText, finalThinking, finalToolDetectionThinking, s.toolNames) if len(detected.Calls) > 0 && !s.toolCallsDoneEmitted { finishReason = "tool_calls" delta := map[string]any{ @@ -238,6 +239,12 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD newChoices := make([]map[string]any, 0, len(parsed.Parts)) contentSeen := false + for _, p := range parsed.ToolDetectionThinkingParts { + trimmed := sse.TrimContinuationOverlap(s.toolDetectionThinking.String(), p.Text) + if trimmed != "" { + s.toolDetectionThinking.WriteString(trimmed) + } + } for _, p := range parsed.Parts { cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers) if s.searchEnabled && sse.IsCitation(cleanedText) { diff --git a/internal/httpapi/openai/chat/handler.go b/internal/httpapi/openai/chat/handler.go index 337d962..a682cc6 100644 --- a/internal/httpapi/openai/chat/handler.go +++ b/internal/httpapi/openai/chat/handler.go @@ -134,3 +134,7 @@ func filterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta, func formatFinalStreamToolCallsWithStableIDs(calls []toolcall.ParsedToolCall, ids map[int]string) []map[string]any { return shared.FormatFinalStreamToolCallsWithStableIDs(calls, ids) } + +func detectAssistantToolCalls(text, exposedThinking, detectionThinking string, toolNames []string) toolcall.ToolCallParseResult { + return shared.DetectAssistantToolCalls(text, exposedThinking, detectionThinking, toolNames) +} diff --git a/internal/httpapi/openai/chat/handler_chat.go b/internal/httpapi/openai/chat/handler_chat.go index 3f97a50..da048fa 100644 --- a/internal/httpapi/openai/chat/handler_chat.go +++ b/internal/httpapi/openai/chat/handler_chat.go @@ -15,7 +15,6 @@ import ( "ds2api/internal/promptcompat" "ds2api/internal/sse" streamengine "ds2api/internal/stream" - "ds2api/internal/toolcall" ) func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) { @@ -159,11 +158,12 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, resp *http.Response, co stripReferenceMarkers := h.compatStripReferenceMarkers() finalThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers) + finalToolDetectionThinking := cleanVisibleOutput(result.ToolDetectionThinking, stripReferenceMarkers) finalText := cleanVisibleOutput(result.Text, stripReferenceMarkers) if searchEnabled { finalText = replaceCitationMarkersWithLinks(finalText, result.CitationLinks) } - detected := toolcall.ParseAssistantToolCallsDetailed(finalText, finalThinking, toolNames) + detected := detectAssistantToolCalls(finalText, finalThinking, finalToolDetectionThinking, toolNames) if shouldWriteUpstreamEmptyOutputError(finalText) && len(detected.Calls) == 0 { status, message, code := upstreamEmptyOutputDetail(result.ContentFilter, finalText, finalThinking) if historySession != nil { @@ -172,7 +172,7 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, resp *http.Response, co writeUpstreamEmptyOutputError(w, finalText, finalThinking, result.ContentFilter) return } - respBody := openaifmt.BuildChatCompletion(completionID, model, finalPrompt, finalThinking, finalText, toolNames) + respBody := openaifmt.BuildChatCompletionWithToolCalls(completionID, model, finalPrompt, finalThinking, finalText, detected.Calls) finishReason := "stop" if choices, ok := respBody["choices"].([]map[string]any); ok && len(choices) > 0 { if fr, _ := choices[0]["finish_reason"].(string); strings.TrimSpace(fr) != "" { diff --git a/internal/httpapi/openai/chat/handler_toolcall_test.go b/internal/httpapi/openai/chat/handler_toolcall_test.go index bfff08a..cf74192 100644 --- a/internal/httpapi/openai/chat/handler_toolcall_test.go +++ b/internal/httpapi/openai/chat/handler_toolcall_test.go @@ -173,6 +173,34 @@ func TestHandleNonStreamPromotesThinkingToolCallsWhenTextEmpty(t *testing.T) { } } +func TestHandleNonStreamPromotesHiddenThinkingDSMLToolCallsWhenTextEmpty(t *testing.T) { + h := &Handler{} + resp := makeSSEHTTPResponse( + `data: {"p":"response/thinking_content","v":"<|DSML|tool_calls><|DSML|invoke name=\"search\"><|DSML|parameter name=\"q\">from-hidden-thinking"}`, + `data: [DONE]`, + ) + rec := httptest.NewRecorder() + + h.handleNonStream(rec, resp, "cid-hidden-thinking-tool", "deepseek-v4-pro", "prompt", false, false, []string{"search"}, nil) + if rec.Code != http.StatusOK { + t.Fatalf("expected 200 for hidden thinking tool calls, got %d body=%s", rec.Code, rec.Body.String()) + } + out := decodeJSONBody(t, rec.Body.String()) + choices, _ := out["choices"].([]any) + choice, _ := choices[0].(map[string]any) + message, _ := choice["message"].(map[string]any) + if _, ok := message["reasoning_content"]; ok { + t.Fatalf("expected hidden thinking not to be exposed, got %#v", message) + } + toolCalls, _ := message["tool_calls"].([]any) + if len(toolCalls) != 1 { + t.Fatalf("expected one hidden-thinking tool call, got %#v", message["tool_calls"]) + } + if got := asString(choice["finish_reason"]); got != "tool_calls" { + t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"]) + } +} + func TestHandleStreamToolsPlainTextStreamsBeforeFinish(t *testing.T) { h := &Handler{} resp := makeSSEHTTPResponse( @@ -282,6 +310,39 @@ func TestHandleStreamPromotesThinkingToolCallsOnFinalizeWithoutMidstreamIntercep } } +func TestHandleStreamPromotesHiddenThinkingDSMLToolCallsOnFinalize(t *testing.T) { + h := &Handler{} + resp := makeSSEHTTPResponse( + `data: {"p":"response/thinking_content","v":"<|DSML|tool_calls><|DSML|invoke name=\"search\"><|DSML|parameter name=\"q\">from-hidden-thinking"}`, + `data: [DONE]`, + ) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil) + + h.handleStream(rec, req, resp, "cid-hidden-thinking-stream", "deepseek-v4-pro", "prompt", false, false, []string{"search"}, nil) + + frames, done := parseSSEDataFrames(t, rec.Body.String()) + if !done { + t.Fatalf("expected [DONE], body=%s", rec.Body.String()) + } + if !streamHasToolCallsDelta(frames) { + t.Fatalf("expected tool_calls delta from hidden thinking fallback, body=%s", rec.Body.String()) + } + for _, frame := range frames { + choices, _ := frame["choices"].([]any) + for _, item := range choices { + choice, _ := item.(map[string]any) + delta, _ := choice["delta"].(map[string]any) + if asString(delta["reasoning_content"]) != "" { + t.Fatalf("did not expect hidden reasoning_content delta, body=%s", rec.Body.String()) + } + } + } + if streamFinishReason(frames) != "tool_calls" { + t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String()) + } +} + func TestHandleStreamEmitsDistinctToolCallIDsAcrossSeparateToolBlocks(t *testing.T) { h := &Handler{} resp := makeSSEHTTPResponse( diff --git a/internal/httpapi/openai/history_split_test.go b/internal/httpapi/openai/history_split_test.go index babfd13..2fa6080 100644 --- a/internal/httpapi/openai/history_split_test.go +++ b/internal/httpapi/openai/history_split_test.go @@ -76,7 +76,7 @@ func TestBuildOpenAIHistoryTranscriptUsesInjectedFileWrapper(t *testing.T) { if !strings.Contains(transcript, "[reasoning_content]") || !strings.Contains(transcript, "hidden reasoning") { t.Fatalf("expected reasoning block preserved, got %q", transcript) } - if !strings.Contains(transcript, "") { + if !strings.Contains(transcript, "<|DSML|tool_calls>") { t.Fatalf("expected tool calls preserved, got %q", transcript) } if !strings.HasSuffix(transcript, "\n[file name]: IGNORE\n[file content begin]\n") { diff --git a/internal/httpapi/openai/responses/handler.go b/internal/httpapi/openai/responses/handler.go index 04de3ac..0ad75bf 100644 --- a/internal/httpapi/openai/responses/handler.go +++ b/internal/httpapi/openai/responses/handler.go @@ -11,6 +11,7 @@ import ( "ds2api/internal/httpapi/openai/history" "ds2api/internal/httpapi/openai/shared" "ds2api/internal/promptcompat" + "ds2api/internal/toolcall" "ds2api/internal/toolstream" ) @@ -115,3 +116,7 @@ func writeUpstreamEmptyOutputError(w http.ResponseWriter, text, thinking string, func filterIncrementalToolCallDeltasByAllowed(deltas []toolstream.ToolCallDelta, seenNames map[int]string) []toolstream.ToolCallDelta { return shared.FilterIncrementalToolCallDeltasByAllowed(deltas, seenNames) } + +func detectAssistantToolCalls(text, exposedThinking, detectionThinking string, toolNames []string) toolcall.ToolCallParseResult { + return shared.DetectAssistantToolCalls(text, exposedThinking, detectionThinking, toolNames) +} diff --git a/internal/httpapi/openai/responses/responses_handler.go b/internal/httpapi/openai/responses/responses_handler.go index f142388..0054b8e 100644 --- a/internal/httpapi/openai/responses/responses_handler.go +++ b/internal/httpapi/openai/responses/responses_handler.go @@ -131,11 +131,12 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res result := sse.CollectStream(resp, thinkingEnabled, true) stripReferenceMarkers := h.compatStripReferenceMarkers() sanitizedThinking := cleanVisibleOutput(result.Thinking, stripReferenceMarkers) + toolDetectionThinking := cleanVisibleOutput(result.ToolDetectionThinking, stripReferenceMarkers) sanitizedText := cleanVisibleOutput(result.Text, stripReferenceMarkers) if searchEnabled { sanitizedText = replaceCitationMarkersWithLinks(sanitizedText, result.CitationLinks) } - textParsed := toolcall.ParseAssistantToolCallsDetailed(sanitizedText, sanitizedThinking, toolNames) + textParsed := detectAssistantToolCalls(sanitizedText, sanitizedThinking, toolDetectionThinking, toolNames) if len(textParsed.Calls) == 0 && writeUpstreamEmptyOutputError(w, sanitizedText, sanitizedThinking, result.ContentFilter) { return } @@ -147,7 +148,7 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res return } - responseObj := openaifmt.BuildResponseObject(responseID, model, finalPrompt, sanitizedThinking, sanitizedText, toolNames) + responseObj := openaifmt.BuildResponseObjectWithToolCalls(responseID, model, finalPrompt, sanitizedThinking, sanitizedText, textParsed.Calls) h.getResponseStore().put(owner, responseID, responseObj) writeJSON(w, http.StatusOK, responseObj) } diff --git a/internal/httpapi/openai/responses/responses_stream_runtime_core.go b/internal/httpapi/openai/responses/responses_stream_runtime_core.go index f49bcaa..4f65ce2 100644 --- a/internal/httpapi/openai/responses/responses_stream_runtime_core.go +++ b/internal/httpapi/openai/responses/responses_stream_runtime_core.go @@ -34,24 +34,25 @@ type responsesStreamRuntime struct { toolCallsEmitted bool toolCallsDoneEmitted bool - sieve toolstream.State - thinking strings.Builder - text strings.Builder - visibleText strings.Builder - streamToolCallIDs map[int]string - functionItemIDs map[int]string - functionOutputIDs map[int]int - functionArgs map[int]string - functionDone map[int]bool - functionAdded map[int]bool - functionNames map[int]string - messageItemID string - messageOutputID int - nextOutputID int - messageAdded bool - messagePartAdded bool - sequence int - failed bool + sieve toolstream.State + thinking strings.Builder + toolDetectionThinking strings.Builder + text strings.Builder + visibleText strings.Builder + streamToolCallIDs map[int]string + functionItemIDs map[int]string + functionOutputIDs map[int]int + functionArgs map[int]string + functionDone map[int]bool + functionAdded map[int]bool + functionNames map[int]string + messageItemID string + messageOutputID int + nextOutputID int + messageAdded bool + messagePartAdded bool + sequence int + failed bool persistResponse func(obj map[string]any) } @@ -127,13 +128,14 @@ func (s *responsesStreamRuntime) failResponse(status int, message, code string) func (s *responsesStreamRuntime) finalize() { finalThinking := s.thinking.String() + finalToolDetectionThinking := s.toolDetectionThinking.String() finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers) if s.bufferToolContent { s.processToolStreamEvents(toolstream.Flush(&s.sieve, s.toolNames), true, true) } - textParsed := toolcall.ParseAssistantToolCallsDetailed(finalText, finalThinking, s.toolNames) + textParsed := detectAssistantToolCalls(finalText, finalThinking, finalToolDetectionThinking, s.toolNames) detected := textParsed.Calls s.logToolPolicyRejections(textParsed) @@ -191,6 +193,12 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa } contentSeen := false + for _, p := range parsed.ToolDetectionThinkingParts { + trimmed := sse.TrimContinuationOverlap(s.toolDetectionThinking.String(), p.Text) + if trimmed != "" { + s.toolDetectionThinking.WriteString(trimmed) + } + } for _, p := range parsed.Parts { cleanedText := cleanVisibleOutput(p.Text, s.stripReferenceMarkers) if cleanedText == "" { diff --git a/internal/httpapi/openai/responses/responses_stream_test.go b/internal/httpapi/openai/responses/responses_stream_test.go index 4562951..c9316e4 100644 --- a/internal/httpapi/openai/responses/responses_stream_test.go +++ b/internal/httpapi/openai/responses/responses_stream_test.go @@ -265,6 +265,43 @@ func TestHandleResponsesStreamPromotesThinkingToolCallsOnFinalizeWithoutMidstrea } } +func TestHandleResponsesStreamPromotesHiddenThinkingDSMLToolCallsOnFinalize(t *testing.T) { + h := &Handler{} + req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil) + rec := httptest.NewRecorder() + + sseLine := func(path, value string) string { + b, _ := json.Marshal(map[string]any{ + "p": path, + "v": value, + }) + return "data: " + string(b) + "\n" + } + + streamBody := sseLine("response/thinking_content", `<|DSML|tool_calls><|DSML|invoke name="read_file"><|DSML|parameter name="path">README.MD`) + "data: [DONE]\n" + resp := &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(strings.NewReader(streamBody)), + } + + policy := promptcompat.ToolChoicePolicy{ + Mode: promptcompat.ToolChoiceRequired, + Allowed: map[string]struct{}{"read_file": {}}, + } + h.handleResponsesStream(rec, req, resp, "owner-a", "resp_hidden", "deepseek-v4-pro", "prompt", false, false, []string{"read_file"}, policy, "") + + body := rec.Body.String() + if strings.Contains(body, "event: response.reasoning.delta") { + t.Fatalf("did not expect hidden reasoning delta in stream body, got %s", body) + } + if !strings.Contains(body, "event: response.function_call_arguments.done") { + t.Fatalf("expected hidden-thinking fallback function call event, got %s", body) + } + if strings.Contains(body, "event: response.failed") { + t.Fatalf("did not expect response.failed, body=%s", body) + } +} + func TestHandleResponsesNonStreamRequiredToolChoiceViolation(t *testing.T) { h := &Handler{} rec := httptest.NewRecorder() @@ -410,6 +447,39 @@ func TestHandleResponsesNonStreamPromotesThinkingToolCallsWhenTextEmpty(t *testi } } +func TestHandleResponsesNonStreamPromotesHiddenThinkingDSMLToolCallsWhenTextEmpty(t *testing.T) { + h := &Handler{} + rec := httptest.NewRecorder() + resp := &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(strings.NewReader( + `data: {"p":"response/thinking_content","v":"<|DSML|tool_calls><|DSML|invoke name=\"read_file\"><|DSML|parameter name=\"path\">README.MD"}` + "\n" + + `data: [DONE]` + "\n", + )), + } + + policy := promptcompat.ToolChoicePolicy{ + Mode: promptcompat.ToolChoiceRequired, + Allowed: map[string]struct{}{"read_file": {}}, + } + h.handleResponsesNonStream(rec, resp, "owner-a", "resp_hidden", "deepseek-v4-pro", "prompt", false, false, []string{"read_file"}, policy, "") + if rec.Code != http.StatusOK { + t.Fatalf("expected 200 for hidden thinking tool calls, got %d body=%s", rec.Code, rec.Body.String()) + } + out := decodeJSONBody(t, rec.Body.String()) + output, _ := out["output"].([]any) + if len(output) != 1 { + t.Fatalf("expected one output item, got %#v", out["output"]) + } + first, _ := output[0].(map[string]any) + if got := asString(first["type"]); got != "function_call" { + t.Fatalf("expected function_call output, got %#v", first["type"]) + } + if strings.Contains(rec.Body.String(), "reasoning") { + t.Fatalf("did not expect hidden reasoning in response body, got %s", rec.Body.String()) + } +} + func extractSSEEventPayload(body, targetEvent string) (map[string]any, bool) { scanner := bufio.NewScanner(strings.NewReader(body)) matched := false diff --git a/internal/httpapi/openai/shared/assistant_toolcalls.go b/internal/httpapi/openai/shared/assistant_toolcalls.go new file mode 100644 index 0000000..25f930b --- /dev/null +++ b/internal/httpapi/openai/shared/assistant_toolcalls.go @@ -0,0 +1,26 @@ +package shared + +import ( + "strings" + + "ds2api/internal/toolcall" +) + +func DetectAssistantToolCalls(text, exposedThinking, detectionThinking string, toolNames []string) toolcall.ToolCallParseResult { + textParsed := toolcall.ParseStandaloneToolCallsDetailed(text, toolNames) + if len(textParsed.Calls) > 0 { + return textParsed + } + if strings.TrimSpace(text) != "" { + return textParsed + } + thinking := detectionThinking + if strings.TrimSpace(thinking) == "" { + thinking = exposedThinking + } + thinkingParsed := toolcall.ParseStandaloneToolCallsDetailed(thinking, toolNames) + if len(thinkingParsed.Calls) > 0 { + return thinkingParsed + } + return textParsed +} diff --git a/internal/js/helpers/stream-tool-sieve/parse.js b/internal/js/helpers/stream-tool-sieve/parse.js index 0e7d552..fdae967 100644 --- a/internal/js/helpers/stream-tool-sieve/parse.js +++ b/internal/js/helpers/stream-tool-sieve/parse.js @@ -8,7 +8,7 @@ const { stripFencedCodeBlocks, } = require('./parse_payload'); -const TOOL_MARKUP_PREFIXES = ['', to: '' }, + { from: '<|dsml|invoke', to: '', to: '' }, + { from: '<|dsml|parameter', to: '', to: '' }, +]; + +const CANONICAL_TOOL_MARKUP_PREFIXES = [ + '', + '', + '', +]; + +function toolMarkupStylesOutsideIgnored(text) { + const lower = toStringSafe(text).toLowerCase(); + const styles = { dsml: false, canonical: false }; + for (let i = 0; i < lower.length;) { + const skipped = skipXmlIgnoredSection(lower, i); + if (skipped.blocked) { + return styles; + } + if (skipped.advanced) { + i = skipped.next; + continue; + } + if (CANONICAL_TOOL_MARKUP_PREFIXES.some(prefix => lower.startsWith(prefix, i))) { + styles.canonical = true; + } + if (DSML_TOOL_MARKUP_ALIASES.some(alias => lower.startsWith(alias.from, i))) { + styles.dsml = true; + } + if (styles.dsml && styles.canonical) { + return styles; + } + i += 1; + } + return styles; +} + +function replaceDSMLToolMarkupOutsideIgnored(text) { + const raw = toStringSafe(text); + const lower = raw.toLowerCase(); + let out = ''; + for (let i = 0; i < raw.length;) { + const skipped = skipXmlIgnoredSection(lower, i); + if (skipped.blocked) { + out += raw.slice(i); + break; + } + if (skipped.advanced) { + out += raw.slice(i, skipped.next); + i = skipped.next; + continue; + } + const alias = DSML_TOOL_MARKUP_ALIASES.find(item => lower.startsWith(item.from, i)); + if (alias) { + out += alias.to; + i += alias.from.length; + continue; + } + out += raw[i]; + i += 1; + } + return out; +} + function parseMarkupSingleToolCall(block) { const attrs = parseTagAttributes(block.attrs); const name = toStringSafe(attrs.name).trim(); @@ -403,4 +504,5 @@ function isOnlyRawValue(obj) { module.exports = { stripFencedCodeBlocks, parseMarkupToolCalls, + normalizeDSMLToolCallMarkup, }; diff --git a/internal/js/helpers/stream-tool-sieve/sieve-xml.js b/internal/js/helpers/stream-tool-sieve/sieve-xml.js index 90ea280..cff8fe7 100644 --- a/internal/js/helpers/stream-tool-sieve/sieve-xml.js +++ b/internal/js/helpers/stream-tool-sieve/sieve-xml.js @@ -3,6 +3,7 @@ const { parseToolCalls } = require('./parse'); // XML wrapper tag pair used by the streaming sieve. const XML_TOOL_TAG_PAIRS = [ + { open: '<|dsml|tool_calls', close: '' }, { open: '' }, ]; @@ -41,6 +42,31 @@ function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) { // If this block failed to become a tool call, pass it through as text. return { ready: true, prefix: prefixPart + xmlBlock, calls: [], suffix: suffixPart }; } + if (!containsAnyToolCallWrapper(lower)) { + const found = firstInvokeIndex(lower); + if (found.index >= 0) { + const closeTag = found.dsml ? '' : ''; + const openWrapper = found.dsml ? '<|DSML|tool_calls>' : ''; + const closeIdx = findXMLCloseOutsideCDATA(captured, closeTag, found.index); + if (closeIdx > found.index) { + const closeEnd = closeIdx + closeTag.length; + const xmlBlock = openWrapper + captured.slice(found.index, closeIdx) + closeTag; + let prefixPart = captured.slice(0, found.index); + let suffixPart = captured.slice(closeEnd); + const parsed = parseToolCalls(xmlBlock, toolNames); + if (Array.isArray(parsed) && parsed.length > 0) { + const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart); + return { + ready: true, + prefix: trimmedFence.prefix, + calls: parsed, + suffix: trimmedFence.suffix, + }; + } + return { ready: true, prefix: prefixPart + captured.slice(found.index, closeEnd), calls: [], suffix: suffixPart }; + } + } + } return { ready: false, prefix: '', calls: [], suffix: '' }; } @@ -57,6 +83,25 @@ function hasOpenXMLToolTag(captured) { return false; } +function containsAnyToolCallWrapper(lower) { + return lower.includes('= 0 }; + } + if (dsmlIdx < 0) { + return { index: xmlIdx, dsml: false }; + } + if (dsmlIdx < xmlIdx) { + return { index: dsmlIdx, dsml: true }; + } + return { index: xmlIdx, dsml: false }; +} + function findPartialXMLToolTagStart(s) { const lastLT = s.lastIndexOf('<'); if (lastLT < 0) { diff --git a/internal/js/helpers/stream-tool-sieve/tool-keywords.js b/internal/js/helpers/stream-tool-sieve/tool-keywords.js index 93efd5d..5191c68 100644 --- a/internal/js/helpers/stream-tool-sieve/tool-keywords.js +++ b/internal/js/helpers/stream-tool-sieve/tool-keywords.js @@ -1,14 +1,17 @@ 'use strict'; const XML_TOOL_SEGMENT_TAGS = [ + '<|dsml|tool_calls>', '<|dsml|tool_calls\n', '<|dsml|tool_calls ', '', '', '', ]; diff --git a/internal/prompt/tool_calls.go b/internal/prompt/tool_calls.go index d38e9fa..da52920 100644 --- a/internal/prompt/tool_calls.go +++ b/internal/prompt/tool_calls.go @@ -38,7 +38,7 @@ func FormatToolCallsForPrompt(raw any) string { if len(blocks) == 0 { return "" } - return "\n" + strings.Join(blocks, "\n") + "\n" + return "<|DSML|tool_calls>\n" + strings.Join(blocks, "\n") + "\n" } // StringifyToolCallArguments normalizes tool arguments into a compact string @@ -94,12 +94,12 @@ func formatToolCallForPrompt(call map[string]any) string { parameters := formatToolCallParametersForPrompt(argsRaw) if parameters == "" { - return ` ` + return ` <|DSML|invoke name="` + escapeXMLAttribute(name) + `">` } - return " \n" + + return " <|DSML|invoke name=\"" + escapeXMLAttribute(name) + "\">\n" + parameters + "\n" + - " " + " " } func formatToolCallParametersForPrompt(raw any) string { @@ -113,7 +113,7 @@ func formatToolCallParametersForPrompt(raw any) string { if strings.TrimSpace(fallback) == "" { return "" } - return " " + renderPromptXMLText(fallback) + "" + return " <|DSML|parameter name=\"content\">" + renderPromptXMLText(fallback) + "" } func renderPromptToolParameters(value any, indent string) (string, bool) { @@ -149,9 +149,9 @@ func renderPromptToolParameters(value any, indent string) (string, bool) { } return strings.Join(lines, "\n"), true case string: - return indent + `` + renderPromptXMLText(v) + ``, true + return indent + `<|DSML|parameter name="content">` + renderPromptXMLText(v) + ``, true default: - return indent + `` + renderPromptXMLText(fmt.Sprint(v)) + ``, true + return indent + `<|DSML|parameter name="value">` + renderPromptXMLText(fmt.Sprint(v)) + ``, true } } @@ -162,29 +162,29 @@ func renderPromptParameterNode(name string, value any, indent string) (string, b } switch v := value.(type) { case nil: - return indent + ``, true + return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + `">`, true case map[string]any: body, ok := renderPromptToolXMLBody(v, indent+" ") if !ok { return "", false } if strings.TrimSpace(body) == "" { - return indent + ``, true + return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + `">`, true } - return indent + `\n" + body + "\n" + indent + ``, true + return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + "\">\n" + body + "\n" + indent + ``, true case []any: body, ok := renderPromptToolXMLArray(v, indent+" ") if !ok { return "", false } if strings.TrimSpace(body) == "" { - return indent + ``, true + return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + `">`, true } - return indent + `\n" + body + "\n" + indent + ``, true + return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + "\">\n" + body + "\n" + indent + ``, true case string: - return indent + `` + renderPromptXMLText(v) + ``, true + return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + `">` + renderPromptXMLText(v) + ``, true default: - return indent + `` + renderPromptXMLText(fmt.Sprint(v)) + ``, true + return indent + `<|DSML|parameter name="` + escapeXMLAttribute(trimmedName) + `">` + renderPromptXMLText(fmt.Sprint(v)) + ``, true } } diff --git a/internal/prompt/tool_calls_test.go b/internal/prompt/tool_calls_test.go index b26658c..8a5a369 100644 --- a/internal/prompt/tool_calls_test.go +++ b/internal/prompt/tool_calls_test.go @@ -9,7 +9,7 @@ func TestStringifyToolCallArgumentsPreservesConcatenatedJSON(t *testing.T) { } } -func TestFormatToolCallsForPromptXML(t *testing.T) { +func TestFormatToolCallsForPromptDSML(t *testing.T) { got := FormatToolCallsForPrompt([]any{ map[string]any{ "id": "call_1", @@ -22,8 +22,8 @@ func TestFormatToolCallsForPromptXML(t *testing.T) { if got == "" { t.Fatal("expected non-empty formatted tool calls") } - if got != "\n \n \n \n" { - t.Fatalf("unexpected formatted tool call XML: %q", got) + if got != "<|DSML|tool_calls>\n <|DSML|invoke name=\"search_web\">\n <|DSML|parameter name=\"query\">\n \n" { + t.Fatalf("unexpected formatted tool call DSML: %q", got) } } @@ -34,7 +34,7 @@ func TestFormatToolCallsForPromptEscapesXMLEntities(t *testing.T) { "arguments": `{"q":"a < b && c > d"}`, }, }) - want := "\n \n d]]>\n \n" + want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"search<&>\">\n <|DSML|parameter name=\"q\"> d]]>\n \n" if got != want { t.Fatalf("unexpected escaped tool call XML: %q", got) } @@ -50,7 +50,7 @@ func TestFormatToolCallsForPromptUsesCDATAForMultilineContent(t *testing.T) { }, }, }) - want := "\n \n \n \n \n" + want := "<|DSML|tool_calls>\n <|DSML|invoke name=\"write_file\">\n <|DSML|parameter name=\"content\">\n <|DSML|parameter name=\"path\">\n \n" if got != want { t.Fatalf("unexpected multiline cdata tool call XML: %q", got) } diff --git a/internal/promptcompat/message_normalize_test.go b/internal/promptcompat/message_normalize_test.go index 36079d0..df41997 100644 --- a/internal/promptcompat/message_normalize_test.go +++ b/internal/promptcompat/message_normalize_test.go @@ -38,10 +38,10 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsAndToolResult(t *tes t.Fatalf("expected 4 normalized messages with assistant tool history preserved, got %d", len(normalized)) } assistantContent, _ := normalized[2]["content"].(string) - if !strings.Contains(assistantContent, "") { - t.Fatalf("assistant tool history should be preserved in XML form, got %q", assistantContent) + if !strings.Contains(assistantContent, "<|DSML|tool_calls>") { + t.Fatalf("assistant tool history should be preserved in DSML form, got %q", assistantContent) } - if !strings.Contains(assistantContent, ``) { + if !strings.Contains(assistantContent, `<|DSML|invoke name="get_weather">`) { t.Fatalf("expected tool name in preserved history, got %q", assistantContent) } if !strings.Contains(normalized[3]["content"].(string), `"temp":18`) { @@ -49,7 +49,7 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsAndToolResult(t *tes } prompt := util.MessagesPrepare(normalized) - if !strings.Contains(prompt, "") { + if !strings.Contains(prompt, "<|DSML|tool_calls>") { t.Fatalf("expected preserved assistant tool history in prompt: %q", prompt) } } @@ -177,10 +177,10 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantMultipleToolCallsRemainSepara t.Fatalf("expected assistant tool_call-only message preserved, got %#v", normalized) } content, _ := normalized[0]["content"].(string) - if strings.Count(content, "`) || !strings.Contains(content, ``) { + if !strings.Contains(content, `<|DSML|invoke name="search_web">`) || !strings.Contains(content, `<|DSML|invoke name="eval_javascript">`) { t.Fatalf("expected both tool names in preserved history, got %q", content) } } @@ -258,7 +258,7 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantNilContentDoesNotInjectNullLi if strings.Contains(content, "null") { t.Fatalf("expected no null literal injection, got %q", content) } - if !strings.Contains(content, "") { + if !strings.Contains(content, "<|DSML|tool_calls>") { t.Fatalf("expected assistant tool history in normalized content, got %q", content) } } diff --git a/internal/promptcompat/prompt_build_test.go b/internal/promptcompat/prompt_build_test.go index 82101d3..225cae8 100644 --- a/internal/promptcompat/prompt_build_test.go +++ b/internal/promptcompat/prompt_build_test.go @@ -47,10 +47,10 @@ func TestBuildOpenAIFinalPrompt_HandlerPathIncludesToolRoundtripSemantics(t *tes if !strings.Contains(finalPrompt, `"condition":"sunny"`) { t.Fatalf("handler finalPrompt should preserve tool output content: %q", finalPrompt) } - if !strings.Contains(finalPrompt, "") { + if !strings.Contains(finalPrompt, "<|DSML|tool_calls>") { t.Fatalf("handler finalPrompt should preserve assistant tool history: %q", finalPrompt) } - if !strings.Contains(finalPrompt, ``) { + if !strings.Contains(finalPrompt, `<|DSML|invoke name="get_weather">`) { t.Fatalf("handler finalPrompt should include tool name history: %q", finalPrompt) } } @@ -74,7 +74,7 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t * } finalPrompt, _ := buildOpenAIFinalPrompt(messages, tools, "", false) - if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the ... XML block at the end of your response.") { + if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>... block at the end of your response.") { t.Fatalf("vercel prepare finalPrompt missing final tool-call anchor instruction: %q", finalPrompt) } if !strings.Contains(finalPrompt, "TOOL CALL FORMAT") { diff --git a/internal/sse/consumer.go b/internal/sse/consumer.go index 1a9adf8..d81d818 100644 --- a/internal/sse/consumer.go +++ b/internal/sse/consumer.go @@ -10,10 +10,11 @@ import ( // CollectResult holds the aggregated text and thinking content from a // DeepSeek SSE stream, consumed to completion (non-streaming use case). type CollectResult struct { - Text string - Thinking string - ContentFilter bool - CitationLinks map[int]string + Text string + Thinking string + ToolDetectionThinking string + ContentFilter bool + CitationLinks map[int]string } // CollectStream fully consumes a DeepSeek SSE response and separates @@ -28,6 +29,7 @@ func CollectStream(resp *http.Response, thinkingEnabled bool, closeBody bool) Co } text := strings.Builder{} thinking := strings.Builder{} + toolDetectionThinking := strings.Builder{} contentFilter := false stopped := false collector := newCitationLinkCollector() @@ -70,12 +72,17 @@ func CollectStream(resp *http.Response, thinkingEnabled bool, closeBody bool) Co text.WriteString(trimmed) } } + for _, p := range result.ToolDetectionThinkingParts { + trimmed := TrimContinuationOverlap(toolDetectionThinking.String(), p.Text) + toolDetectionThinking.WriteString(trimmed) + } return true }) return CollectResult{ - Text: text.String(), - Thinking: thinking.String(), - ContentFilter: contentFilter, - CitationLinks: collector.build(), + Text: text.String(), + Thinking: thinking.String(), + ToolDetectionThinking: toolDetectionThinking.String(), + ContentFilter: contentFilter, + CitationLinks: collector.build(), } } diff --git a/internal/sse/line.go b/internal/sse/line.go index fbd2939..311a91f 100644 --- a/internal/sse/line.go +++ b/internal/sse/line.go @@ -4,12 +4,13 @@ import "fmt" // LineResult is the normalized parse result for one DeepSeek SSE line. type LineResult struct { - Parsed bool - Stop bool - ContentFilter bool - ErrorMessage string - Parts []ContentPart - NextType string + Parsed bool + Stop bool + ContentFilter bool + ErrorMessage string + Parts []ContentPart + ToolDetectionThinkingParts []ContentPart + NextType string } // ParseDeepSeekContentLine centralizes one-line DeepSeek SSE parsing for both @@ -46,12 +47,14 @@ func ParseDeepSeekContentLine(raw []byte, thinkingEnabled bool, currentType stri NextType: currentType, } } - parts, finished, nextType := ParseSSEChunkForContent(chunk, thinkingEnabled, currentType) + parts, detectionThinkingParts, finished, nextType := ParseSSEChunkForContentDetailed(chunk, thinkingEnabled, currentType) parts = filterLeakedContentFilterParts(parts) + detectionThinkingParts = filterLeakedContentFilterParts(detectionThinkingParts) return LineResult{ - Parsed: true, - Stop: finished, - Parts: parts, - NextType: nextType, + Parsed: true, + Stop: finished, + Parts: parts, + ToolDetectionThinkingParts: detectionThinkingParts, + NextType: nextType, } } diff --git a/internal/sse/parser.go b/internal/sse/parser.go index 3057eda..abb2eb6 100644 --- a/internal/sse/parser.go +++ b/internal/sse/parser.go @@ -69,20 +69,25 @@ func isFragmentStatusPath(path string) bool { } func ParseSSEChunkForContent(chunk map[string]any, thinkingEnabled bool, currentFragmentType string) ([]ContentPart, bool, string) { + parts, _, finished, nextType := ParseSSEChunkForContentDetailed(chunk, thinkingEnabled, currentFragmentType) + return parts, finished, nextType +} + +func ParseSSEChunkForContentDetailed(chunk map[string]any, thinkingEnabled bool, currentFragmentType string) ([]ContentPart, []ContentPart, bool, string) { v, ok := chunk["v"] if !ok { - return nil, false, currentFragmentType + return nil, nil, false, currentFragmentType } path, _ := chunk["p"].(string) if shouldSkipPath(path) { - return nil, false, currentFragmentType + return nil, nil, false, currentFragmentType } if isStatusPath(path) { if s, ok := v.(string); ok { if strings.EqualFold(strings.TrimSpace(s), "FINISHED") { - return nil, true, currentFragmentType + return nil, nil, true, currentFragmentType } - return nil, false, currentFragmentType + return nil, nil, false, currentFragmentType } } newType := currentFragmentType @@ -92,18 +97,32 @@ func ParseSSEChunkForContent(chunk map[string]any, thinkingEnabled bool, current partType := resolvePartType(path, thinkingEnabled, newType) finished := appendChunkValueContent(v, partType, &newType, &parts, path) if finished { - return nil, true, newType + return nil, nil, true, newType } var transitioned bool parts, transitioned = splitThinkingParts(parts) if transitioned { newType = "text" } + detectionThinkingParts := selectThinkingParts(parts) if !thinkingEnabled { parts = dropThinkingParts(parts) newType = "text" } - return parts, false, newType + return parts, detectionThinkingParts, false, newType +} + +func selectThinkingParts(parts []ContentPart) []ContentPart { + if len(parts) == 0 { + return nil + } + out := make([]ContentPart, 0, len(parts)) + for _, p := range parts { + if p.Type == "thinking" { + out = append(out, p) + } + } + return out } func collectDirectFragments(path string, chunk map[string]any, v any, newType *string, parts *[]ContentPart) { diff --git a/internal/toolcall/tool_prompt.go b/internal/toolcall/tool_prompt.go index aa556e8..6844eb4 100644 --- a/internal/toolcall/tool_prompt.go +++ b/internal/toolcall/tool_prompt.go @@ -11,44 +11,45 @@ import "strings" func BuildToolCallInstructions(toolNames []string) string { return `TOOL CALL FORMAT — FOLLOW EXACTLY: - - - - - +<|DSML|tool_calls> + <|DSML|invoke name="TOOL_NAME_HERE"> + <|DSML|parameter name="PARAMETER_NAME"> + + RULES: -1) Use the XML wrapper format only. -2) Put one or more entries under a single root. -3) Put the tool name in the invoke name attribute: . +1) Use the <|DSML|tool_calls> wrapper format. +2) Put one or more <|DSML|invoke> entries under a single <|DSML|tool_calls> root. +3) Put the tool name in the invoke name attribute: <|DSML|invoke name="TOOL_NAME">. 4) All string values must use , even short ones. This includes code, scripts, file contents, prompts, paths, names, and queries. -5) Every top-level argument must be a ... node. +5) Every top-level argument must be a <|DSML|parameter name="ARG_NAME">... node. 6) Objects use nested XML elements inside the parameter body. Arrays may repeat children. 7) Numbers, booleans, and null stay plain text. 8) Use only the parameter names in the tool schema. Do not invent fields. 9) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue. -10) If you call a tool, the first non-whitespace characters of that tool block must be exactly . -11) Never omit the opening tag, even if you already plan to close with . +10) If you call a tool, the first non-whitespace characters of that tool block must be exactly <|DSML|tool_calls>. +11) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with . +12) Compatibility note: the runtime also accepts the legacy XML tags / / , but prefer the DSML-prefixed form above. PARAMETER SHAPES: -- string => -- object => ... -- array => ...... -- number/bool/null => plain_text +- string => <|DSML|parameter name="x"> +- object => <|DSML|parameter name="x">... +- array => <|DSML|parameter name="x">...... +- number/bool/null => <|DSML|parameter name="x">plain_text 【WRONG — Do NOT do these】: Wrong 1 — mixed text after XML: - ... I hope this helps. + <|DSML|tool_calls>... I hope this helps. Wrong 2 — Markdown code fences: ` + "```xml" + ` - ... + <|DSML|tool_calls>... ` + "```" + ` Wrong 3 — missing opening wrapper: - ... - + <|DSML|invoke name="TOOL_NAME">... + -Remember: The ONLY valid way to use tools is the ... XML block at the end of your response. +Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>... block at the end of your response. ` + buildCorrectToolExamples(toolNames) } @@ -140,21 +141,21 @@ func firstScriptExample(names []string) (promptToolExample, bool) { func renderToolExampleBlock(calls []promptToolExample) string { var b strings.Builder - b.WriteString("\n") + b.WriteString("<|DSML|tool_calls>\n") for _, call := range calls { - b.WriteString(` \n") + b.WriteString(`">` + "\n") b.WriteString(indentPromptParameters(call.params, " ")) - b.WriteString("\n \n") + b.WriteString("\n \n") } - b.WriteString("") + b.WriteString("") return b.String() } func indentPromptParameters(body, indent string) string { if strings.TrimSpace(body) == "" { - return indent + `` + return indent + `<|DSML|parameter name="content">` } lines := strings.Split(body, "\n") for i, line := range lines { @@ -168,7 +169,7 @@ func indentPromptParameters(body, indent string) string { } func wrapParameter(name, inner string) string { - return `` + inner + `` + return `<|DSML|parameter name="` + name + `">` + inner + `` } func exampleBasicParams(name string) (string, bool) { @@ -194,7 +195,7 @@ func exampleBasicParams(name string) (string, bool) { case "Edit": return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + wrapParameter("old_string", promptCDATA("foo")) + "\n" + wrapParameter("new_string", promptCDATA("bar")), true case "MultiEdit": - return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `` + promptCDATA("foo") + `` + promptCDATA("bar") + ``, true + return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits">` + promptCDATA("foo") + `` + promptCDATA("bar") + ``, true } return "", false } @@ -202,11 +203,11 @@ func exampleBasicParams(name string) (string, bool) { func exampleNestedParams(name string) (string, bool) { switch strings.TrimSpace(name) { case "MultiEdit": - return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `` + promptCDATA("foo") + `` + promptCDATA("bar") + ``, true + return wrapParameter("file_path", promptCDATA("README.md")) + "\n" + `<|DSML|parameter name="edits">` + promptCDATA("foo") + `` + promptCDATA("bar") + ``, true case "Task": return wrapParameter("description", promptCDATA("Investigate flaky tests")) + "\n" + wrapParameter("prompt", promptCDATA("Run targeted tests and summarize failures")), true case "ask_followup_question": - return wrapParameter("question", promptCDATA("Which approach do you prefer?")) + "\n" + `` + promptCDATA("Option A") + `` + promptCDATA("Option B") + ``, true + return wrapParameter("question", promptCDATA("Which approach do you prefer?")) + "\n" + `<|DSML|parameter name="follow_up">` + promptCDATA("Option A") + `` + promptCDATA("Option B") + ``, true } return "", false } diff --git a/internal/toolcall/tool_prompt_test.go b/internal/toolcall/tool_prompt_test.go index d482d52..f153e43 100644 --- a/internal/toolcall/tool_prompt_test.go +++ b/internal/toolcall/tool_prompt_test.go @@ -7,20 +7,20 @@ import ( func TestBuildToolCallInstructions_ExecCommandUsesCmdExample(t *testing.T) { out := BuildToolCallInstructions([]string{"exec_command"}) - if !strings.Contains(out, ``) { + if !strings.Contains(out, `<|DSML|invoke name="exec_command">`) { t.Fatalf("expected exec_command in examples, got: %s", out) } - if !strings.Contains(out, ``) { + if !strings.Contains(out, `<|DSML|parameter name="cmd">`) { t.Fatalf("expected cmd parameter example for exec_command, got: %s", out) } } func TestBuildToolCallInstructions_ExecuteCommandUsesCommandExample(t *testing.T) { out := BuildToolCallInstructions([]string{"execute_command"}) - if !strings.Contains(out, ``) { + if !strings.Contains(out, `<|DSML|invoke name="execute_command">`) { t.Fatalf("expected execute_command in examples, got: %s", out) } - if !strings.Contains(out, ``) { + if !strings.Contains(out, `<|DSML|parameter name="command">`) { t.Fatalf("expected command parameter example for execute_command, got: %s", out) } } @@ -34,20 +34,20 @@ func TestBuildToolCallInstructions_BashUsesCommandAndDescriptionExamples(t *test sawDescription := false for _, block := range blocks { - if !strings.Contains(block, ``) { + if !strings.Contains(block, `<|DSML|parameter name="command">`) { t.Fatalf("expected every Bash example to use command parameter, got: %s", block) } - if strings.Contains(block, ``) || strings.Contains(block, ``) { + if strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) { t.Fatalf("expected Bash examples not to use file write parameters, got: %s", block) } - if strings.Contains(block, ``) { + if strings.Contains(block, `<|DSML|parameter name="description">`) { sawDescription = true } } if !sawDescription { t.Fatalf("expected Bash long-script example to include description, got: %s", out) } - if strings.Contains(out, ``) { + if strings.Contains(out, `<|DSML|invoke name="Read">`) { t.Fatalf("expected examples to avoid unavailable hard-coded Read tool, got: %s", out) } } @@ -60,10 +60,10 @@ func TestBuildToolCallInstructions_ExecuteCommandLongScriptUsesCommand(t *testin } for _, block := range blocks { - if !strings.Contains(block, ``) { + if !strings.Contains(block, `<|DSML|parameter name="command">`) { t.Fatalf("expected execute_command examples to use command parameter, got: %s", block) } - if strings.Contains(block, ``) || strings.Contains(block, ``) { + if strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) { t.Fatalf("expected execute_command examples not to use file write parameters, got: %s", block) } } @@ -80,10 +80,10 @@ func TestBuildToolCallInstructions_ExecCommandLongScriptUsesCmd(t *testing.T) { } for _, block := range blocks { - if !strings.Contains(block, ``) { + if !strings.Contains(block, `<|DSML|parameter name="cmd">`) { t.Fatalf("expected exec_command examples to use cmd parameter, got: %s", block) } - if strings.Contains(block, ``) || strings.Contains(block, ``) || strings.Contains(block, ``) { + if strings.Contains(block, `<|DSML|parameter name="command">`) || strings.Contains(block, `<|DSML|parameter name="path">`) || strings.Contains(block, `<|DSML|parameter name="content">`) { t.Fatalf("expected exec_command examples not to use command or file write parameters, got: %s", block) } } @@ -100,10 +100,10 @@ func TestBuildToolCallInstructions_WriteUsesFilePathAndContent(t *testing.T) { } for _, block := range blocks { - if !strings.Contains(block, ``) || !strings.Contains(block, ``) { + if !strings.Contains(block, `<|DSML|parameter name="file_path">`) || !strings.Contains(block, `<|DSML|parameter name="content">`) { t.Fatalf("expected Write examples to use file_path and content, got: %s", block) } - if strings.Contains(block, ``) { + if strings.Contains(block, `<|DSML|parameter name="path">`) { t.Fatalf("expected Write examples not to use path, got: %s", block) } } @@ -111,7 +111,7 @@ func TestBuildToolCallInstructions_WriteUsesFilePathAndContent(t *testing.T) { func TestBuildToolCallInstructions_AnchorsMissingOpeningWrapperFailureMode(t *testing.T) { out := BuildToolCallInstructions([]string{"read_file"}) - if !strings.Contains(out, "Never omit the opening tag") { + if !strings.Contains(out, "Never omit the opening <|DSML|tool_calls> tag") { t.Fatalf("expected explicit missing-opening-tag warning, got: %s", out) } if !strings.Contains(out, "Wrong 3 — missing opening wrapper") { @@ -120,7 +120,7 @@ func TestBuildToolCallInstructions_AnchorsMissingOpeningWrapperFailureMode(t *te } func findInvokeBlocks(text, name string) []string { - open := `` + open := `<|DSML|invoke name="` + name + `">` remaining := text blocks := []string{} for { @@ -129,11 +129,11 @@ func findInvokeBlocks(text, name string) []string { return blocks } remaining = remaining[start:] - end := strings.Index(remaining, ``) + end := strings.Index(remaining, ``) if end < 0 { return blocks } - end += len(``) + end += len(``) blocks = append(blocks, remaining[:end]) remaining = remaining[end:] } diff --git a/internal/toolcall/toolcalls_dsml.go b/internal/toolcall/toolcalls_dsml.go new file mode 100644 index 0000000..e694a00 --- /dev/null +++ b/internal/toolcall/toolcalls_dsml.go @@ -0,0 +1,108 @@ +package toolcall + +import "strings" + +func normalizeDSMLToolCallMarkup(text string) (string, bool) { + if text == "" { + return "", true + } + hasDSML, hasCanonical := toolMarkupStylesOutsideIgnored(text) + if hasDSML && hasCanonical { + return text, false + } + if !hasDSML { + return text, true + } + return replaceDSMLToolMarkupOutsideIgnored(text), true +} + +var dsmlToolMarkupAliases = []struct { + from string + to string +}{ + {"<|dsml|tool_calls", "", ""}, + {"<|dsml|invoke", "", ""}, + {"<|dsml|parameter", "", ""}, +} + +var canonicalToolMarkupPrefixes = []string{ + "", + "", + "", +} + +func toolMarkupStylesOutsideIgnored(text string) (hasDSML, hasCanonical bool) { + lower := strings.ToLower(text) + for i := 0; i < len(text); { + next, advanced, blocked := skipXMLIgnoredSection(lower, i) + if blocked { + return hasDSML, hasCanonical + } + if advanced { + i = next + continue + } + if hasPrefixAt(lower, i, canonicalToolMarkupPrefixes) { + hasCanonical = true + } + for _, alias := range dsmlToolMarkupAliases { + if strings.HasPrefix(lower[i:], alias.from) { + hasDSML = true + break + } + } + if hasDSML && hasCanonical { + return true, true + } + i++ + } + return hasDSML, hasCanonical +} + +func replaceDSMLToolMarkupOutsideIgnored(text string) string { + lower := strings.ToLower(text) + var b strings.Builder + b.Grow(len(text)) + for i := 0; i < len(text); { + next, advanced, blocked := skipXMLIgnoredSection(lower, i) + if blocked { + b.WriteString(text[i:]) + break + } + if advanced { + b.WriteString(text[i:next]) + i = next + continue + } + replaced := false + for _, alias := range dsmlToolMarkupAliases { + if strings.HasPrefix(lower[i:], alias.from) { + b.WriteString(alias.to) + i += len(alias.from) + replaced = true + break + } + } + if replaced { + continue + } + b.WriteByte(text[i]) + i++ + } + return b.String() +} + +func hasPrefixAt(text string, idx int, prefixes []string) bool { + for _, prefix := range prefixes { + if strings.HasPrefix(text[idx:], prefix) { + return true + } + } + return false +} diff --git a/internal/toolcall/toolcalls_parse.go b/internal/toolcall/toolcalls_parse.go index 3dc8c25..272127b 100644 --- a/internal/toolcall/toolcalls_parse.go +++ b/internal/toolcall/toolcalls_parse.go @@ -60,7 +60,11 @@ func parseToolCallsDetailedXMLOnly(text string) ToolCallParseResult { return result } - parsed := parseXMLToolCalls(trimmed) + normalized, ok := normalizeDSMLToolCallMarkup(trimmed) + if !ok { + return result + } + parsed := parseXMLToolCalls(normalized) if len(parsed) == 0 { return result } diff --git a/internal/toolcall/toolcalls_test.go b/internal/toolcall/toolcalls_test.go index c4bfe51..ab6c8cd 100644 --- a/internal/toolcall/toolcalls_test.go +++ b/internal/toolcall/toolcalls_test.go @@ -30,6 +30,37 @@ func TestParseToolCallsSupportsToolCallsWrapper(t *testing.T) { } } +func TestParseToolCallsSupportsDSMLShell(t *testing.T) { + text := `<|DSML|tool_calls><|DSML|invoke name="Bash"><|DSML|parameter name="command">` + calls := ParseToolCalls(text, []string{"Bash"}) + if len(calls) != 1 { + t.Fatalf("expected 1 DSML call, got %#v", calls) + } + if calls[0].Name != "Bash" || calls[0].Input["command"] != "pwd" { + t.Fatalf("unexpected DSML parse result: %#v", calls[0]) + } +} + +func TestParseToolCallsSupportsDSMLShellWithCanonicalExampleInCDATA(t *testing.T) { + content := `x` + text := `<|DSML|tool_calls><|DSML|invoke name="Write"><|DSML|parameter name="file_path">notes.md<|DSML|parameter name="content">` + calls := ParseToolCalls(text, []string{"Write"}) + if len(calls) != 1 { + t.Fatalf("expected 1 DSML call with XML-looking CDATA, got %#v", calls) + } + if calls[0].Name != "Write" || calls[0].Input["content"] != content { + t.Fatalf("unexpected DSML CDATA parse result: %#v", calls[0]) + } +} + +func TestParseToolCallsRejectsMixedDSMLAndCanonicalToolTags(t *testing.T) { + text := `<|DSML|tool_calls><|DSML|parameter name="command">pwd` + calls := ParseToolCalls(text, []string{"Bash"}) + if len(calls) != 0 { + t.Fatalf("expected mixed DSML/XML tool tags to be rejected, got %#v", calls) + } +} + func TestParseToolCallsSupportsStandaloneToolWithMultilineCDATAAndRepeatedXMLTags(t *testing.T) { text := `script.sh"} -var xmlToolCallOpeningTags = []string{"", ""} +var xmlToolCallOpeningTags = []string{""}, {""}, } // xmlToolCallBlockPattern matches a complete canonical XML tool call block. // //nolint:unused // reserved for future fast-path XML block detection. -var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)(]*>\s*(?:.*?)\s*)`) +var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)((?:]*>\s*(?:.*?)\s*(?:|))`) // xmlToolTagsToDetect is the set of XML tag prefixes used by findToolSegmentStart. -var xmlToolTagsToDetect = []string{"", "", "<|dsml|tool_calls\n", "<|dsml|tool_calls ", + "<|dsml|invoke ", "<|dsml|invoke\n", "<|dsml|invoke\t", "<|dsml|invoke\r", + "", "", invokeIdx) + if !containsAnyToolCallWrapper(lower) { + invokeIdx, dsml := firstInvokeIndex(lower) + closeTag := "" + openWrapper := "" + if dsml { + closeTag = "" + openWrapper = "<|DSML|tool_calls>" + } + closeIdx := findXMLCloseOutsideCDATA(captured, closeTag, invokeIdx) if invokeIdx >= 0 && closeIdx > invokeIdx { - closeEnd := closeIdx + len("") - xmlBlock := "" + captured[invokeIdx:closeIdx] + "" + closeEnd := closeIdx + len(closeTag) + xmlBlock := openWrapper + captured[invokeIdx:closeIdx] + closeTag prefixPart := captured[:invokeIdx] suffixPart := captured[closeEnd:] parsed := toolcall.ParseToolCalls(xmlBlock, toolNames) @@ -92,15 +103,25 @@ func hasOpenXMLToolTag(captured string) bool { func shouldKeepBareInvokeCapture(captured string) bool { lower := strings.ToLower(captured) - invokeIdx := strings.Index(lower, "", invokeIdx) > invokeIdx { + wrapperClose := "" + invokeOpenLen := len(" invokeIdx { return true } - startEnd := findXMLTagEnd(captured, invokeIdx+len("", startEnd+1) + invokeCloseIdx := findXMLCloseOutsideCDATA(captured, invokeClose, startEnd+1) if invokeCloseIdx >= 0 { - afterClose := captured[invokeCloseIdx+len(""):] + afterClose := captured[invokeCloseIdx+len(invokeClose):] return strings.TrimSpace(afterClose) == "" } trimmedLower := strings.ToLower(trimmedBody) - return strings.HasPrefix(trimmedLower, "= 0 + case dsmlIdx < 0: + return xmlIdx, false + case dsmlIdx < xmlIdx: + return dsmlIdx, true + default: + return xmlIdx, false + } +} + func findXMLCloseOutsideCDATA(s, closeTag string, start int) int { if s == "" || closeTag == "" { return -1 diff --git a/internal/toolstream/tool_sieve_xml_test.go b/internal/toolstream/tool_sieve_xml_test.go index 55e0549..e35035f 100644 --- a/internal/toolstream/tool_sieve_xml_test.go +++ b/internal/toolstream/tool_sieve_xml_test.go @@ -41,6 +41,37 @@ func TestProcessToolSieveInterceptsXMLToolCallWithoutLeak(t *testing.T) { } } +func TestProcessToolSieveInterceptsDSMLToolCallWithoutLeak(t *testing.T) { + var state State + chunks := []string{ + "<|DSML|tool", + "_calls>\n", + ` <|DSML|invoke name="read_file">` + "\n", + ` <|DSML|parameter name="path">README.MD` + "\n", + " \n", + "", + } + var events []Event + for _, c := range chunks { + events = append(events, ProcessChunk(&state, c, []string{"read_file"})...) + } + events = append(events, Flush(&state, []string{"read_file"})...) + + var textContent string + var toolCalls int + for _, evt := range events { + textContent += evt.Content + toolCalls += len(evt.ToolCalls) + } + + if strings.Contains(strings.ToLower(textContent), "dsml") || strings.Contains(textContent, "read_file") { + t.Fatalf("DSML tool call content leaked to text: %q", textContent) + } + if toolCalls != 1 { + t.Fatalf("expected one DSML tool call, got %d events=%#v", toolCalls, events) + } +} + func TestProcessToolSieveHandlesLongXMLToolCall(t *testing.T) { var state State const toolName = "write_to_file" diff --git a/scripts/build-release-archives.sh b/scripts/build-release-archives.sh new file mode 100755 index 0000000..415aab8 --- /dev/null +++ b/scripts/build-release-archives.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT_DIR" + +source "${ROOT_DIR}/scripts/release-targets.sh" + +build_one() { + local tag="$1" build_version="$2" goos="$3" goarch="$4" goarm="$5" label="$6" + local pkg stage bin + + pkg="ds2api_${tag}_${label}" + stage="dist/${pkg}" + bin="ds2api" + if [[ "$goos" == "windows" ]]; then + bin="ds2api.exe" + fi + + echo "[release-archives] building ${label}" + rm -rf "$stage" + mkdir -p "${stage}/static" + + if [[ "$goarm" == "-" ]]; then + CGO_ENABLED=0 GOOS="$goos" GOARCH="$goarch" \ + go build -buildvcs=false -trimpath -ldflags="-s -w -X ds2api/internal/version.BuildVersion=${build_version}" -o "${stage}/${bin}" ./cmd/ds2api + else + CGO_ENABLED=0 GOOS="$goos" GOARCH="$goarch" GOARM="$goarm" \ + go build -buildvcs=false -trimpath -ldflags="-s -w -X ds2api/internal/version.BuildVersion=${build_version}" -o "${stage}/${bin}" ./cmd/ds2api + fi + + cp config.example.json .env.example LICENSE README.MD README.en.md "${stage}/" + cp -R static/admin "${stage}/static/admin" + + if [[ "$goos" == "windows" ]]; then + (cd dist && zip -rq "${pkg}.zip" "${pkg}") + else + tar -C dist -czf "dist/${pkg}.tar.gz" "${pkg}" + fi + + rm -rf "$stage" +} + +if [[ "${1:-}" == "--build-one" ]]; then + shift + build_one "$@" + exit 0 +fi + +tag="${RELEASE_TAG:-}" +if [[ -z "$tag" && -f VERSION ]]; then + tag="$(tr -d '[:space:]' < VERSION)" +fi +if [[ -z "$tag" ]]; then + echo "release tag is empty; set RELEASE_TAG or provide VERSION." >&2 + exit 1 +fi + +build_version="${BUILD_VERSION:-$tag}" +jobs="${RELEASE_BUILD_JOBS:-}" +if [[ -z "$jobs" ]]; then + if command -v nproc >/dev/null 2>&1; then + jobs="$(nproc)" + elif command -v sysctl >/dev/null 2>&1; then + jobs="$(sysctl -n hw.ncpu)" + else + jobs="2" + fi +fi + +mkdir -p dist + +if [[ "$jobs" -le 1 ]]; then + for target in "${DS2API_RELEASE_TARGETS[@]}"; do + read -r goos goarch goarm label <<< "$target" + build_one "$tag" "$build_version" "$goos" "$goarch" "$goarm" "$label" + done +else + printf '%s\n' "${DS2API_RELEASE_TARGETS[@]}" \ + | xargs -L 1 -P "$jobs" bash "${ROOT_DIR}/scripts/build-release-archives.sh" --build-one "$tag" "$build_version" +fi diff --git a/scripts/build-webui.sh b/scripts/build-webui.sh index 485f4cb..bde077e 100755 --- a/scripts/build-webui.sh +++ b/scripts/build-webui.sh @@ -11,7 +11,7 @@ cd "$(dirname "$0")/../webui" # 检查 node_modules if [ ! -d "node_modules" ]; then echo "📦 Installing dependencies..." - npm install + npm ci --prefer-offline --no-audit fi # 构建 diff --git a/scripts/release-targets.sh b/scripts/release-targets.sh new file mode 100755 index 0000000..63a5a7e --- /dev/null +++ b/scripts/release-targets.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +# goos goarch goarm package-label +DS2API_RELEASE_TARGETS=( + "linux amd64 - linux_amd64" + "linux arm64 - linux_arm64" + "linux arm 7 linux_armv7" + "darwin amd64 - darwin_amd64" + "darwin arm64 - darwin_arm64" + "windows amd64 - windows_amd64" + "windows arm64 - windows_arm64" +) diff --git a/tests/node/stream-tool-sieve.test.js b/tests/node/stream-tool-sieve.test.js index cc6ae93..40a6e42 100644 --- a/tests/node/stream-tool-sieve.test.js +++ b/tests/node/stream-tool-sieve.test.js @@ -49,6 +49,29 @@ test('parseToolCalls parses XML markup tool call', () => { assert.deepEqual(calls[0].input, { path: 'README.MD' }); }); +test('parseToolCalls parses DSML shell as XML-compatible tool call', () => { + const payload = '<|DSML|tool_calls><|DSML|invoke name="read_file"><|DSML|parameter name="path">README.MD'; + const calls = parseToolCalls(payload, ['read_file']); + assert.equal(calls.length, 1); + assert.equal(calls[0].name, 'read_file'); + assert.deepEqual(calls[0].input, { path: 'README.MD' }); +}); + +test('parseToolCalls keeps canonical XML examples inside DSML CDATA', () => { + const content = 'x'; + const payload = `<|DSML|tool_calls><|DSML|invoke name="write_file"><|DSML|parameter name="path">notes.md<|DSML|parameter name="content">`; + const calls = parseToolCalls(payload, ['write_file']); + assert.equal(calls.length, 1); + assert.equal(calls[0].name, 'write_file'); + assert.deepEqual(calls[0].input, { path: 'notes.md', content }); +}); + +test('parseToolCalls rejects mixed DSML and XML tool tags', () => { + const payload = '<|DSML|tool_calls><|DSML|parameter name="path">README.MD'; + const calls = parseToolCalls(payload, ['read_file']); + assert.equal(calls.length, 0); +}); + test('parseToolCalls ignores JSON tool_calls payload (XML-only)', () => { const payload = JSON.stringify({ tool_calls: [{ name: 'read_file', input: { path: 'README.MD' } }], @@ -98,6 +121,22 @@ test('sieve emits tool_calls when XML tag spans multiple chunks', () => { assert.equal(finalCalls[0].name, 'read_file'); }); +test('sieve emits tool_calls when DSML tag spans multiple chunks', () => { + const events = runSieve( + [ + '<|DSML|tool', + '_calls><|DSML|invoke name="read_file">', + '<|DSML|parameter name="path">README.MD', + ], + ['read_file'], + ); + const leakedText = collectText(events); + const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []); + assert.equal(leakedText, ''); + assert.equal(finalCalls.length, 1); + assert.equal(finalCalls[0].name, 'read_file'); +}); + test('sieve keeps long XML tool calls buffered until the closing tag arrives', () => { const longContent = 'x'.repeat(4096); const splitAt = longContent.length / 2; diff --git a/tests/scripts/check-cross-build.sh b/tests/scripts/check-cross-build.sh new file mode 100755 index 0000000..22741ec --- /dev/null +++ b/tests/scripts/check-cross-build.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/../.." && pwd)" +cd "$ROOT_DIR" + +source "${ROOT_DIR}/scripts/release-targets.sh" + +OUT_DIR="${ROOT_DIR}/.tmp/cross-build" + +build_one() { + local goos="$1" goarch="$2" goarm="$3" label="$4" + local out + out="${OUT_DIR}/${label}/ds2api" + if [[ "$goos" == "windows" ]]; then + out="${out}.exe" + fi + + echo "[cross-build] ${label}" + mkdir -p "$(dirname "$out")" + if [[ "$goarm" == "-" ]]; then + CGO_ENABLED=0 GOOS="$goos" GOARCH="$goarch" \ + go build -buildvcs=false -trimpath -o "$out" ./cmd/ds2api + else + CGO_ENABLED=0 GOOS="$goos" GOARCH="$goarch" GOARM="$goarm" \ + go build -buildvcs=false -trimpath -o "$out" ./cmd/ds2api + fi +} + +if [[ "${1:-}" == "--build-one" ]]; then + shift + build_one "$@" + exit 0 +fi + +jobs="${CROSS_BUILD_JOBS:-}" +if [[ -z "$jobs" ]]; then + if command -v nproc >/dev/null 2>&1; then + jobs="$(nproc)" + elif command -v sysctl >/dev/null 2>&1; then + jobs="$(sysctl -n hw.ncpu)" + else + jobs="2" + fi +fi + +rm -rf "$OUT_DIR" +mkdir -p "$OUT_DIR" + +if [[ "$jobs" -le 1 ]]; then + for target in "${DS2API_RELEASE_TARGETS[@]}"; do + read -r goos goarch goarm label <<< "$target" + build_one "$goos" "$goarch" "$goarm" "$label" + done +else + printf '%s\n' "${DS2API_RELEASE_TARGETS[@]}" \ + | xargs -L 1 -P "$jobs" bash "${ROOT_DIR}/tests/scripts/check-cross-build.sh" --build-one +fi