From deec72416e8eb3c28b41ecc8ba6ac4f07cc35907 Mon Sep 17 00:00:00 2001 From: CJACK Date: Wed, 18 Feb 2026 16:51:30 +0800 Subject: [PATCH] test: Introduce comprehensive edge case tests across multiple modules and refine tool call and OpenAI handler logic. --- api/helpers/stream-tool-sieve.js | 84 +++++++++++++------ api/helpers/stream-tool-sieve.test.js | 18 ++++ .../adapter/openai/handler_toolcall_test.go | 77 ++++++++++++++++- internal/adapter/openai/tool_sieve.go | 84 +++++++++++++------ internal/util/toolcalls.go | 27 ++++++ internal/util/toolcalls_test.go | 7 ++ 6 files changed, 242 insertions(+), 55 deletions(-) diff --git a/api/helpers/stream-tool-sieve.js b/api/helpers/stream-tool-sieve.js index 8b586aa..4a713e5 100644 --- a/api/helpers/stream-tool-sieve.js +++ b/api/helpers/stream-tool-sieve.js @@ -3,6 +3,7 @@ const crypto = require('crypto'); const TOOL_CALL_PATTERN = /\{\s*["']tool_calls["']\s*:\s*\[(.*?)\]\s*\}/s; const TOOL_SIEVE_CAPTURE_LIMIT = 8 * 1024; +const TOOL_SIEVE_CONTEXT_TAIL_LIMIT = 256; function extractToolNames(tools) { if (!Array.isArray(tools) || tools.length === 0) { @@ -28,6 +29,7 @@ function createToolSieveState() { capture: '', capturing: false, hasMeaningfulText: false, + recentTextTail: '', toolNameSent: false, toolName: '', toolArgsStart: -1, @@ -68,9 +70,7 @@ function processToolSieveChunk(state, chunk, toolNames) { const consumed = consumeToolCapture(state, toolNames); if (!consumed.ready) { if (state.capture.length > TOOL_SIEVE_CAPTURE_LIMIT) { - if (hasMeaningfulText(state.capture)) { - state.hasMeaningfulText = true; - } + noteText(state, state.capture); events.push({ type: 'text', text: state.capture }); state.capture = ''; state.capturing = false; @@ -83,9 +83,7 @@ function processToolSieveChunk(state, chunk, toolNames) { state.capturing = false; resetIncrementalToolState(state); if (consumed.prefix) { - if (hasMeaningfulText(consumed.prefix)) { - state.hasMeaningfulText = true; - } + noteText(state, consumed.prefix); events.push({ type: 'text', text: consumed.prefix }); } if (Array.isArray(consumed.calls) && consumed.calls.length > 0) { @@ -105,9 +103,7 @@ function processToolSieveChunk(state, chunk, toolNames) { if (start >= 0) { const prefix = state.pending.slice(0, start); if (prefix) { - if (hasMeaningfulText(prefix)) { - state.hasMeaningfulText = true; - } + noteText(state, prefix); events.push({ type: 'text', text: prefix }); } state.capture = state.pending.slice(start); @@ -122,9 +118,7 @@ function processToolSieveChunk(state, chunk, toolNames) { break; } state.pending = hold; - if (hasMeaningfulText(safe)) { - state.hasMeaningfulText = true; - } + noteText(state, safe); events.push({ type: 'text', text: safe }); } return events; @@ -139,24 +133,18 @@ function flushToolSieve(state, toolNames) { const consumed = consumeToolCapture(state, toolNames); if (consumed.ready) { if (consumed.prefix) { - if (hasMeaningfulText(consumed.prefix)) { - state.hasMeaningfulText = true; - } + noteText(state, consumed.prefix); events.push({ type: 'text', text: consumed.prefix }); } if (Array.isArray(consumed.calls) && consumed.calls.length > 0) { events.push({ type: 'tool_calls', calls: consumed.calls }); } if (consumed.suffix) { - if (hasMeaningfulText(consumed.suffix)) { - state.hasMeaningfulText = true; - } + noteText(state, consumed.suffix); events.push({ type: 'text', text: consumed.suffix }); } } else if (state.capture) { - if (hasMeaningfulText(state.capture)) { - state.hasMeaningfulText = true; - } + noteText(state, state.capture); events.push({ type: 'text', text: state.capture }); } state.capture = ''; @@ -164,9 +152,7 @@ function flushToolSieve(state, toolNames) { resetIncrementalToolState(state); } if (state.pending) { - if (hasMeaningfulText(state.pending)) { - state.hasMeaningfulText = true; - } + noteText(state, state.pending); events.push({ type: 'text', text: state.pending }); state.pending = ''; } @@ -234,7 +220,7 @@ function consumeToolCapture(state, toolNames) { } const prefixPart = captured.slice(0, start); const suffixPart = captured.slice(obj.end); - if (!state.toolNameSent && (state.hasMeaningfulText || hasMeaningfulText(prefixPart) || hasMeaningfulText(suffixPart))) { + if (!state.toolNameSent && (hasMeaningfulText(prefixPart) || hasMeaningfulText(suffixPart) || looksLikeToolExampleContext(state.recentTextTail))) { return { ready: true, prefix: captured, @@ -285,7 +271,10 @@ function consumeToolCapture(state, toolNames) { function buildIncrementalToolDeltas(state) { const captured = state.capture || ''; - if (!captured || state.hasMeaningfulText) { + if (!captured) { + return []; + } + if (looksLikeToolExampleContext(state.recentTextTail)) { return []; } const lower = captured.toLowerCase(); @@ -651,6 +640,9 @@ function parseStandaloneToolCalls(text, toolNames) { if (!trimmed) { return []; } + if (looksLikeToolExampleContext(trimmed)) { + return []; + } const candidates = [trimmed]; if (trimmed.startsWith('```') && trimmed.endsWith('```')) { const m = trimmed.match(/```(?:json)?\s*([\s\S]*?)\s*```/i); @@ -856,6 +848,46 @@ function filterToolCalls(parsed, toolNames) { return out; } +function noteText(state, text) { + if (!state || !hasMeaningfulText(text)) { + return; + } + state.hasMeaningfulText = true; + state.recentTextTail = appendTail(state.recentTextTail, text, TOOL_SIEVE_CONTEXT_TAIL_LIMIT); +} + +function appendTail(prev, next, max) { + const left = typeof prev === 'string' ? prev : ''; + const right = typeof next === 'string' ? next : ''; + if (!Number.isFinite(max) || max <= 0) { + return ''; + } + const combined = left + right; + if (combined.length <= max) { + return combined; + } + return combined.slice(combined.length - max); +} + +function looksLikeToolExampleContext(text) { + const t = toStringSafe(text).toLowerCase(); + if (!t) { + return false; + } + const cues = [ + '示例', + '例子', + 'for example', + 'example', + 'demo', + '请勿执行', + '不要执行', + 'do not execute', + '```', + ]; + return cues.some((cue) => t.includes(cue)); +} + function hasMeaningfulText(text) { return toStringSafe(text) !== ''; } diff --git a/api/helpers/stream-tool-sieve.test.js b/api/helpers/stream-tool-sieve.test.js index ad1dc0b..c085436 100644 --- a/api/helpers/stream-tool-sieve.test.js +++ b/api/helpers/stream-tool-sieve.test.js @@ -83,6 +83,12 @@ test('parseStandaloneToolCalls only matches standalone payload and ignores mixed assert.equal(standaloneCalls.length, 1); }); +test('parseStandaloneToolCalls ignores fenced code block tool_call examples', () => { + const fenced = ['```json', '{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}', '```'].join('\n'); + const calls = parseStandaloneToolCalls(fenced, ['read_file']); + assert.equal(calls.length, 0); +}); + test('sieve emits tool_calls and does not leak suspicious prefix on late key convergence', () => { const events = runSieve( [ @@ -165,3 +171,15 @@ test('sieve emits incremental tool_call_deltas for split arguments payload', () assert.equal(argsJoined.includes('"path":"README.MD"'), true); assert.equal(argsJoined.includes('"mode":"head"'), true); }); + +test('sieve still intercepts tool call after leading plain text without suffix', () => { + const events = runSieve( + ['我将调用工具。', '{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}'], + ['read_file'], + ); + const hasTool = events.some((evt) => (evt.type === 'tool_calls' && evt.calls?.length > 0) || (evt.type === 'tool_call_deltas' && evt.deltas?.length > 0)); + const leakedText = collectText(events); + assert.equal(hasTool, true); + assert.equal(leakedText.includes('我将调用工具。'), true); + assert.equal(leakedText.toLowerCase().includes('tool_calls'), false); +}); diff --git a/internal/adapter/openai/handler_toolcall_test.go b/internal/adapter/openai/handler_toolcall_test.go index 30197d7..8c1435d 100644 --- a/internal/adapter/openai/handler_toolcall_test.go +++ b/internal/adapter/openai/handler_toolcall_test.go @@ -241,6 +241,35 @@ func TestHandleNonStreamEmbeddedToolCallExampleNotIntercepted(t *testing.T) { } } +func TestHandleNonStreamFencedToolCallExampleNotIntercepted(t *testing.T) { + h := &Handler{} + resp := makeSSEHTTPResponse( + "data: {\"p\":\"response/content\",\"v\":\"```json\\n{\\\"tool_calls\\\":[{\\\"name\\\":\\\"search\\\",\\\"input\\\":{\\\"q\\\":\\\"go\\\"}}]}\\n```\"}", + `data: [DONE]`, + ) + rec := httptest.NewRecorder() + + h.handleNonStream(rec, context.Background(), resp, "cid2d", "deepseek-chat", "prompt", false, false, []string{"search"}) + if rec.Code != http.StatusOK { + t.Fatalf("unexpected status: %d", rec.Code) + } + + out := decodeJSONBody(t, rec.Body.String()) + choices, _ := out["choices"].([]any) + choice, _ := choices[0].(map[string]any) + if choice["finish_reason"] != "stop" { + t.Fatalf("expected finish_reason=stop, got %#v", choice["finish_reason"]) + } + msg, _ := choice["message"].(map[string]any) + if _, ok := msg["tool_calls"]; ok { + t.Fatalf("did not expect tool_calls field for fenced example: %#v", msg["tool_calls"]) + } + content, _ := msg["content"].(string) + if !strings.Contains(content, "```json") || !strings.Contains(content, `"tool_calls"`) { + t.Fatalf("expected fenced tool example to pass through as text, got %q", content) + } +} + func TestHandleStreamToolCallInterceptsWithoutRawContentLeak(t *testing.T) { h := &Handler{} resp := makeSSEHTTPResponse( @@ -428,9 +457,9 @@ func TestHandleStreamToolsPlainTextStreamsBeforeFinish(t *testing.T) { func TestHandleStreamToolCallMixedWithPlainTextSegments(t *testing.T) { h := &Handler{} resp := makeSSEHTTPResponse( - `data: {"p":"response/content","v":"前置正文A。"}`, + `data: {"p":"response/content","v":"下面是示例:"}`, `data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}"}`, - `data: {"p":"response/content","v":"后置正文B。"}`, + `data: {"p":"response/content","v":"请勿执行。"}`, `data: [DONE]`, ) rec := httptest.NewRecorder() @@ -457,7 +486,7 @@ func TestHandleStreamToolCallMixedWithPlainTextSegments(t *testing.T) { } } got := content.String() - if !strings.Contains(got, "前置正文A。") || !strings.Contains(got, "后置正文B。") { + if !strings.Contains(got, "下面是示例:") || !strings.Contains(got, "请勿执行。") { t.Fatalf("expected pre/post plain text to pass sieve, got=%q", got) } if !strings.Contains(got, `"tool_calls"`) { @@ -468,6 +497,48 @@ func TestHandleStreamToolCallMixedWithPlainTextSegments(t *testing.T) { } } +func TestHandleStreamToolCallAfterLeadingTextStillIntercepted(t *testing.T) { + h := &Handler{} + resp := makeSSEHTTPResponse( + `data: {"p":"response/content","v":"我将调用工具。"}`, + `data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}"}`, + `data: [DONE]`, + ) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil) + + h.handleStream(rec, req, resp, "cid7b", "deepseek-chat", "prompt", false, false, []string{"search"}) + + frames, done := parseSSEDataFrames(t, rec.Body.String()) + if !done { + t.Fatalf("expected [DONE], body=%s", rec.Body.String()) + } + if !streamHasToolCallsDelta(frames) { + t.Fatalf("expected tool_calls delta, body=%s", rec.Body.String()) + } + content := strings.Builder{} + for _, frame := range frames { + choices, _ := frame["choices"].([]any) + for _, item := range choices { + choice, _ := item.(map[string]any) + delta, _ := choice["delta"].(map[string]any) + if c, ok := delta["content"].(string); ok { + content.WriteString(c) + } + } + } + got := content.String() + if !strings.Contains(got, "我将调用工具。") { + t.Fatalf("expected leading text to keep streaming, got=%q", got) + } + if strings.Contains(strings.ToLower(got), "tool_calls") { + t.Fatalf("unexpected raw tool json leak, got=%q", got) + } + if streamFinishReason(frames) != "tool_calls" { + t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String()) + } +} + func TestHandleStreamToolCallKeyAppearsLateStillNoPrefixLeak(t *testing.T) { h := &Handler{} spaces := strings.Repeat(" ", 200) diff --git a/internal/adapter/openai/tool_sieve.go b/internal/adapter/openai/tool_sieve.go index d890314..e5d6b77 100644 --- a/internal/adapter/openai/tool_sieve.go +++ b/internal/adapter/openai/tool_sieve.go @@ -11,6 +11,7 @@ type toolStreamSieveState struct { capture strings.Builder capturing bool hasMeaningfulText bool + recentTextTail string toolNameSent bool toolName string toolArgsStart int @@ -32,6 +33,7 @@ type toolCallDelta struct { } const toolSieveCaptureLimit = 8 * 1024 +const toolSieveContextTailLimit = 256 func (s *toolStreamSieveState) resetIncrementalToolState() { s.toolNameSent = false @@ -67,9 +69,7 @@ func processToolSieveChunk(state *toolStreamSieveState, chunk string, toolNames state.capture.Reset() state.capturing = false state.resetIncrementalToolState() - if strings.TrimSpace(content) != "" { - state.hasMeaningfulText = true - } + state.noteText(content) events = append(events, toolStreamEvent{Content: content}) continue } @@ -79,9 +79,7 @@ func processToolSieveChunk(state *toolStreamSieveState, chunk string, toolNames state.capturing = false state.resetIncrementalToolState() if prefix != "" { - if strings.TrimSpace(prefix) != "" { - state.hasMeaningfulText = true - } + state.noteText(prefix) events = append(events, toolStreamEvent{Content: prefix}) } if len(calls) > 0 { @@ -101,9 +99,7 @@ func processToolSieveChunk(state *toolStreamSieveState, chunk string, toolNames if start >= 0 { prefix := pending[:start] if prefix != "" { - if strings.TrimSpace(prefix) != "" { - state.hasMeaningfulText = true - } + state.noteText(prefix) events = append(events, toolStreamEvent{Content: prefix}) } state.pending.Reset() @@ -119,9 +115,7 @@ func processToolSieveChunk(state *toolStreamSieveState, chunk string, toolNames } state.pending.Reset() state.pending.WriteString(hold) - if strings.TrimSpace(safe) != "" { - state.hasMeaningfulText = true - } + state.noteText(safe) events = append(events, toolStreamEvent{Content: safe}) } @@ -137,26 +131,20 @@ func flushToolSieve(state *toolStreamSieveState, toolNames []string) []toolStrea consumedPrefix, consumedCalls, consumedSuffix, ready := consumeToolCapture(state, toolNames) if ready { if consumedPrefix != "" { - if strings.TrimSpace(consumedPrefix) != "" { - state.hasMeaningfulText = true - } + state.noteText(consumedPrefix) events = append(events, toolStreamEvent{Content: consumedPrefix}) } if len(consumedCalls) > 0 { events = append(events, toolStreamEvent{ToolCalls: consumedCalls}) } if consumedSuffix != "" { - if strings.TrimSpace(consumedSuffix) != "" { - state.hasMeaningfulText = true - } + state.noteText(consumedSuffix) events = append(events, toolStreamEvent{Content: consumedSuffix}) } } else { content := state.capture.String() if content != "" { - if strings.TrimSpace(content) != "" { - state.hasMeaningfulText = true - } + state.noteText(content) events = append(events, toolStreamEvent{Content: content}) } } @@ -166,9 +154,7 @@ func flushToolSieve(state *toolStreamSieveState, toolNames []string) []toolStrea } if state.pending.Len() > 0 { content := state.pending.String() - if strings.TrimSpace(content) != "" { - state.hasMeaningfulText = true - } + state.noteText(content) events = append(events, toolStreamEvent{Content: content}) state.pending.Reset() } @@ -241,7 +227,7 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix } prefixPart := captured[:start] suffixPart := captured[end:] - if !state.toolNameSent && (state.hasMeaningfulText || strings.TrimSpace(prefixPart) != "" || strings.TrimSpace(suffixPart) != "") { + if !state.toolNameSent && (strings.TrimSpace(prefixPart) != "" || strings.TrimSpace(suffixPart) != "" || looksLikeToolExampleContext(state.recentTextTail)) { return captured, nil, "", true } parsed := util.ParseStandaloneToolCalls(obj, toolNames) @@ -304,7 +290,10 @@ func extractJSONObjectFrom(text string, start int) (string, int, bool) { func buildIncrementalToolDeltas(state *toolStreamSieveState) []toolCallDelta { captured := state.capture.String() - if captured == "" || state.hasMeaningfulText { + if captured == "" { + return nil + } + if looksLikeToolExampleContext(state.recentTextTail) { return nil } lower := strings.ToLower(captured) @@ -618,3 +607,46 @@ func skipSpaces(text string, i int) int { } return i } + +func (s *toolStreamSieveState) noteText(content string) { + if strings.TrimSpace(content) == "" { + return + } + s.hasMeaningfulText = true + s.recentTextTail = appendTail(s.recentTextTail, content, toolSieveContextTailLimit) +} + +func appendTail(prev, next string, max int) string { + if max <= 0 { + return "" + } + combined := prev + next + if len(combined) <= max { + return combined + } + return combined[len(combined)-max:] +} + +func looksLikeToolExampleContext(text string) bool { + t := strings.ToLower(strings.TrimSpace(text)) + if t == "" { + return false + } + cues := []string{ + "示例", + "例子", + "for example", + "example", + "demo", + "请勿执行", + "不要执行", + "do not execute", + "```", + } + for _, cue := range cues { + if strings.Contains(t, cue) { + return true + } + } + return false +} diff --git a/internal/util/toolcalls.go b/internal/util/toolcalls.go index 4760546..decb96e 100644 --- a/internal/util/toolcalls.go +++ b/internal/util/toolcalls.go @@ -41,6 +41,9 @@ func ParseStandaloneToolCalls(text string, availableToolNames []string) []Parsed if trimmed == "" { return nil } + if looksLikeToolExampleContext(trimmed) { + return nil + } candidates := []string{trimmed} if strings.HasPrefix(trimmed, "```") && strings.HasSuffix(trimmed, "```") { if m := fencedJSONPattern.FindStringSubmatch(trimmed); len(m) >= 2 { @@ -313,6 +316,30 @@ func extractJSONObject(text string, start int) (string, int, bool) { return "", 0, false } +func looksLikeToolExampleContext(text string) bool { + t := strings.ToLower(strings.TrimSpace(text)) + if t == "" { + return false + } + cues := []string{ + "```", + "示例", + "例子", + "for example", + "example", + "demo", + "请勿执行", + "不要执行", + "do not execute", + } + for _, cue := range cues { + if strings.Contains(t, cue) { + return true + } + } + return false +} + func FormatOpenAIToolCalls(calls []ParsedToolCall) []map[string]any { out := make([]map[string]any, 0, len(calls)) for _, c := range calls { diff --git a/internal/util/toolcalls_test.go b/internal/util/toolcalls_test.go index 8a29a18..509299c 100644 --- a/internal/util/toolcalls_test.go +++ b/internal/util/toolcalls_test.go @@ -75,3 +75,10 @@ func TestParseStandaloneToolCallsOnlyMatchesStandalonePayload(t *testing.T) { t.Fatalf("expected standalone parser to match, got %#v", calls) } } + +func TestParseStandaloneToolCallsIgnoresFencedCodeBlock(t *testing.T) { + fenced := "```json\n{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}\n```" + if calls := ParseStandaloneToolCalls(fenced, []string{"search"}); len(calls) != 0 { + t.Fatalf("expected fenced tool_call example to be ignored, got %#v", calls) + } +}