From 0d3d535c08da45ab2cdf810975b328d26a5a9e80 Mon Sep 17 00:00:00 2001 From: CJACK Date: Mon, 23 Feb 2026 00:27:46 +0800 Subject: [PATCH] feat: prevent raw tool call JSON leakage for unknown or rejected tool calls and consolidate container publishing to GHCR. --- .github/workflows/release-artifacts.yml | 11 ------- DEPLOY.en.md | 11 +++++++ DEPLOY.md | 11 +++++++ README.MD | 1 + README.en.md | 1 + .../adapter/openai/handler_toolcall_test.go | 32 +++++++++++++++++-- internal/adapter/openai/tool_sieve_core.go | 11 +++++-- .../js/helpers/stream-tool-sieve/sieve.js | 9 ++++++ tests/node/stream-tool-sieve.test.js | 14 ++++++++ 9 files changed, 84 insertions(+), 17 deletions(-) diff --git a/.github/workflows/release-artifacts.yml b/.github/workflows/release-artifacts.yml index 051117d..3e33aa1 100644 --- a/.github/workflows/release-artifacts.yml +++ b/.github/workflows/release-artifacts.yml @@ -12,9 +12,6 @@ permissions: jobs: build-and-upload: runs-on: ubuntu-latest - env: - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} steps: - name: Checkout uses: actions/checkout@v4 @@ -95,20 +92,12 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Log in to Docker Hub - if: "${{ env.DOCKERHUB_USERNAME != '' }}" - uses: docker/login-action@v3 - with: - username: ${{ env.DOCKERHUB_USERNAME }} - password: ${{ env.DOCKERHUB_TOKEN }} - - name: Extract Docker metadata id: meta_release uses: docker/metadata-action@v5 with: images: | ghcr.io/${{ github.repository }} - ${{ env.DOCKERHUB_USERNAME || 'cjackhwang' }}/ds2api tags: | type=raw,value=${{ github.event.release.tag_name }} type=raw,value=latest diff --git a/DEPLOY.en.md b/DEPLOY.en.md index 18fa438..19c3999 100644 --- a/DEPLOY.en.md +++ b/DEPLOY.en.md @@ -341,6 +341,7 @@ Built-in GitHub Actions workflow: `.github/workflows/release-artifacts.yml` - **Trigger**: only on Release `published` (no build on normal push) - **Outputs**: multi-platform binary archives + `sha256sums.txt` +- **Container publishing**: GHCR only (`ghcr.io/cjackhwang/ds2api`) | Platform | Architecture | Format | | --- | --- | --- | @@ -378,6 +379,16 @@ cp config.example.json config.json 2. Wait for the `Release Artifacts` workflow to complete 3. Download the matching archive from Release Assets +### Pull from GHCR (Optional) + +```bash +# latest +docker pull ghcr.io/cjackhwang/ds2api:latest + +# specific version (example) +docker pull ghcr.io/cjackhwang/ds2api:v2.1.2 +``` + --- ## 5. Reverse Proxy (Nginx) diff --git a/DEPLOY.md b/DEPLOY.md index 49583bf..e2e484a 100644 --- a/DEPLOY.md +++ b/DEPLOY.md @@ -341,6 +341,7 @@ No Output Directory named "public" found after the Build completed. - **触发条件**:仅在 Release `published` 时触发(普通 push 不会构建) - **构建产物**:多平台二进制压缩包 + `sha256sums.txt` +- **容器镜像发布**:仅发布到 GHCR(`ghcr.io/cjackhwang/ds2api`) | 平台 | 架构 | 文件格式 | | --- | --- | --- | @@ -378,6 +379,16 @@ cp config.example.json config.json 2. 等待 Actions 工作流 `Release Artifacts` 完成 3. 在 Release 的 Assets 下载对应平台压缩包 +### 拉取 GHCR 镜像(可选) + +```bash +# latest +docker pull ghcr.io/cjackhwang/ds2api:latest + +# 指定版本(示例) +docker pull ghcr.io/cjackhwang/ds2api:v2.1.2 +``` + --- ## 五、反向代理(Nginx) diff --git a/README.MD b/README.MD index 0c1ecd1..b8c3be0 100644 --- a/README.MD +++ b/README.MD @@ -462,6 +462,7 @@ npm ci --prefix webui && npm run build --prefix webui - **触发条件**:仅在 GitHub Release `published` 时触发(普通 push 不会触发) - **构建产物**:多平台二进制包(`linux/amd64`、`linux/arm64`、`darwin/amd64`、`darwin/arm64`、`windows/amd64`)+ `sha256sums.txt` +- **容器镜像发布**:仅推送到 GHCR(`ghcr.io/cjackhwang/ds2api`) - **每个压缩包包含**:`ds2api` 可执行文件、`static/admin`、WASM 文件、配置示例、README、LICENSE ## 免责声明 diff --git a/README.en.md b/README.en.md index 2cc9389..4e872ad 100644 --- a/README.en.md +++ b/README.en.md @@ -462,6 +462,7 @@ Workflow: `.github/workflows/release-artifacts.yml` - **Trigger**: only on GitHub Release `published` (normal pushes do not trigger builds) - **Outputs**: multi-platform archives (`linux/amd64`, `linux/arm64`, `darwin/amd64`, `darwin/arm64`, `windows/amd64`) + `sha256sums.txt` +- **Container publishing**: GHCR only (`ghcr.io/cjackhwang/ds2api`) - **Each archive includes**: `ds2api` executable, `static/admin`, WASM file, config template, README, LICENSE ## Disclaimer diff --git a/internal/adapter/openai/handler_toolcall_test.go b/internal/adapter/openai/handler_toolcall_test.go index 9236b8b..895605f 100644 --- a/internal/adapter/openai/handler_toolcall_test.go +++ b/internal/adapter/openai/handler_toolcall_test.go @@ -375,7 +375,7 @@ func TestHandleStreamReasonerToolCallInterceptsWithoutRawContentLeak(t *testing. } } -func TestHandleStreamUnknownToolNotIntercepted(t *testing.T) { +func TestHandleStreamUnknownToolDoesNotLeakRawPayload(t *testing.T) { h := &Handler{} resp := makeSSEHTTPResponse( `data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"not_in_schema\",\"input\":{\"q\":\"go\"}}]}"}`, @@ -393,8 +393,34 @@ func TestHandleStreamUnknownToolNotIntercepted(t *testing.T) { if streamHasToolCallsDelta(frames) { t.Fatalf("did not expect tool_calls delta for unknown schema name, body=%s", rec.Body.String()) } - if !streamHasRawToolJSONContent(frames) { - t.Fatalf("expected raw tool_calls json to remain in content for unknown schema name: %s", rec.Body.String()) + if streamHasRawToolJSONContent(frames) { + t.Fatalf("did not expect raw tool_calls json leak for unknown schema name: %s", rec.Body.String()) + } + if streamFinishReason(frames) != "stop" { + t.Fatalf("expected finish_reason=stop, body=%s", rec.Body.String()) + } +} + +func TestHandleStreamUnknownToolNoArgsDoesNotLeakRawPayload(t *testing.T) { + h := &Handler{} + resp := makeSSEHTTPResponse( + `data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"not_in_schema\"}]}"}`, + `data: [DONE]`, + ) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil) + + h.handleStream(rec, req, resp, "cid5b", "deepseek-chat", "prompt", false, false, []string{"search"}) + + frames, done := parseSSEDataFrames(t, rec.Body.String()) + if !done { + t.Fatalf("expected [DONE], body=%s", rec.Body.String()) + } + if streamHasToolCallsDelta(frames) { + t.Fatalf("did not expect tool_calls delta for unknown schema name (no args), body=%s", rec.Body.String()) + } + if streamHasRawToolJSONContent(frames) { + t.Fatalf("did not expect raw tool_calls json leak for unknown schema name (no args): %s", rec.Body.String()) } if streamFinishReason(frames) != "stop" { t.Fatalf("expected finish_reason=stop, body=%s", rec.Body.String()) diff --git a/internal/adapter/openai/tool_sieve_core.go b/internal/adapter/openai/tool_sieve_core.go index 1bcf102..5ed9b90 100644 --- a/internal/adapter/openai/tool_sieve_core.go +++ b/internal/adapter/openai/tool_sieve_core.go @@ -200,9 +200,14 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix if insideCodeFence(state.recentTextTail + prefixPart) { return captured, nil, "", true } - parsed := util.ParseStandaloneToolCalls(obj, toolNames) - if len(parsed) == 0 { + parsed := util.ParseStandaloneToolCallsDetailed(obj, toolNames) + if len(parsed.Calls) == 0 { + if parsed.SawToolCallSyntax && parsed.RejectedByPolicy { + // Parsed as tool-call payload but rejected by schema/policy: + // consume it to avoid leaking raw tool_calls JSON to user content. + return prefixPart, nil, suffixPart, true + } return captured, nil, "", true } - return prefixPart, parsed, suffixPart, true + return prefixPart, parsed.Calls, suffixPart, true } diff --git a/internal/js/helpers/stream-tool-sieve/sieve.js b/internal/js/helpers/stream-tool-sieve/sieve.js index c10e636..699c3a8 100644 --- a/internal/js/helpers/stream-tool-sieve/sieve.js +++ b/internal/js/helpers/stream-tool-sieve/sieve.js @@ -205,8 +205,17 @@ function consumeToolCapture(state, toolNames) { suffix: '', }; } + const rawParsed = parseStandaloneToolCalls(captured.slice(start, obj.end), []); const parsed = parseStandaloneToolCalls(captured.slice(start, obj.end), toolNames); if (parsed.length === 0) { + if (rawParsed.length > 0 && Array.isArray(toolNames) && toolNames.length > 0) { + return { + ready: true, + prefix: prefixPart, + calls: [], + suffix: suffixPart, + }; + } if (state.toolNameSent) { return { ready: true, diff --git a/tests/node/stream-tool-sieve.test.js b/tests/node/stream-tool-sieve.test.js index 2fc2ecc..f20cb11 100644 --- a/tests/node/stream-tool-sieve.test.js +++ b/tests/node/stream-tool-sieve.test.js @@ -152,6 +152,20 @@ test('sieve keeps plain text intact in tool mode when no tool call appears', () assert.equal(leakedText, '你好,这是普通文本回复。请继续。'); }); +test('sieve intercepts rejected unknown tool payload (no args) without raw leak', () => { + const events = runSieve( + ['{"tool_calls":[{"name":"not_in_schema"}]}', '后置正文G。'], + ['read_file'], + ); + const leakedText = collectText(events); + const hasToolCall = events.some((evt) => evt.type === 'tool_calls' && Array.isArray(evt.calls) && evt.calls.length > 0); + const hasToolDelta = events.some((evt) => evt.type === 'tool_call_deltas' && Array.isArray(evt.deltas) && evt.deltas.length > 0); + assert.equal(hasToolCall, false); + assert.equal(hasToolDelta, false); + assert.equal(leakedText.toLowerCase().includes('tool_calls'), false); + assert.equal(leakedText.includes('后置正文G。'), true); +}); + test('sieve emits incremental tool_call_deltas for split arguments payload', () => { const state = createToolSieveState(); const first = processToolSieveChunk(