From fb5fc0e8855cc91ba212186e48ac8537b38a97bc Mon Sep 17 00:00:00 2001 From: "CJACK." Date: Fri, 20 Mar 2026 02:03:46 +0800 Subject: [PATCH] Default to aggressive tool-call interception in mixed/fenced text --- .../adapter/claude/handler_stream_test.go | 29 +++++++----- .../adapter/openai/handler_toolcall_test.go | 29 ++++++------ internal/adapter/openai/tool_sieve_core.go | 44 ++++++------------- .../adapter/openai/tool_sieve_incremental.go | 3 -- internal/format/openai/render_test.go | 12 ++--- internal/util/toolcalls_parse.go | 6 +-- internal/util/toolcalls_test.go | 10 ++--- internal/util/util_edge_test.go | 4 +- .../expected/toolcalls_fenced_json.json | 13 ++++-- .../toolcalls_standalone_fenced_example.json | 13 ++++-- 10 files changed, 79 insertions(+), 84 deletions(-) diff --git a/internal/adapter/claude/handler_stream_test.go b/internal/adapter/claude/handler_stream_test.go index dda425a..77e62c8 100644 --- a/internal/adapter/claude/handler_stream_test.go +++ b/internal/adapter/claude/handler_stream_test.go @@ -358,7 +358,7 @@ func TestHandleClaudeStreamRealtimeToolSafetyAcrossStructuredFormats(t *testing. } } -func TestHandleClaudeStreamRealtimeDoesNotStopOnUnclosedFencedToolExample(t *testing.T) { +func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.T) { h := &Handler{} resp := makeClaudeSSEHTTPResponse( "data: {\"p\":\"response/content\",\"v\":\"Here is an example:\\n```json\\n{\\\"tool_calls\\\":[{\\\"name\\\":\\\"Bash\\\",\\\"input\\\":{\\\"command\\\":\\\"pwd\\\"}}]}\"}", @@ -371,22 +371,27 @@ func TestHandleClaudeStreamRealtimeDoesNotStopOnUnclosedFencedToolExample(t *tes h.handleClaudeStreamRealtime(rec, req, resp, "claude-sonnet-4-5", []any{map[string]any{"role": "user", "content": "show example only"}}, false, false, []string{"Bash"}) frames := parseClaudeFrames(t, rec.Body.String()) + foundToolUse := false for _, f := range findClaudeFrames(frames, "content_block_start") { contentBlock, _ := f.Payload["content_block"].(map[string]any) if contentBlock["type"] == "tool_use" { - t.Fatalf("unexpected tool_use for fenced example, body=%s", rec.Body.String()) - } - } - - foundEndTurn := false - for _, f := range findClaudeFrames(frames, "message_delta") { - delta, _ := f.Payload["delta"].(map[string]any) - if delta["stop_reason"] == "end_turn" { - foundEndTurn = true + foundToolUse = true break } } - if !foundEndTurn { - t.Fatalf("expected stop_reason=end_turn, body=%s", rec.Body.String()) + if !foundToolUse { + t.Fatalf("expected tool_use for fenced example, body=%s", rec.Body.String()) + } + + foundToolStop := false + for _, f := range findClaudeFrames(frames, "message_delta") { + delta, _ := f.Payload["delta"].(map[string]any) + if delta["stop_reason"] == "tool_use" { + foundToolStop = true + break + } + } + if !foundToolStop { + t.Fatalf("expected stop_reason=tool_use, body=%s", rec.Body.String()) } } diff --git a/internal/adapter/openai/handler_toolcall_test.go b/internal/adapter/openai/handler_toolcall_test.go index 00a0e8d..ef22803 100644 --- a/internal/adapter/openai/handler_toolcall_test.go +++ b/internal/adapter/openai/handler_toolcall_test.go @@ -243,7 +243,7 @@ func TestHandleNonStreamEmbeddedToolCallExamplePromotesToolCall(t *testing.T) { } } -func TestHandleNonStreamFencedToolCallExampleNotIntercepted(t *testing.T) { +func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) { h := &Handler{} resp := makeSSEHTTPResponse( "data: {\"p\":\"response/content\",\"v\":\"```json\\n{\\\"tool_calls\\\":[{\\\"name\\\":\\\"search\\\",\\\"input\\\":{\\\"q\\\":\\\"go\\\"}}]}\\n```\"}", @@ -259,16 +259,17 @@ func TestHandleNonStreamFencedToolCallExampleNotIntercepted(t *testing.T) { out := decodeJSONBody(t, rec.Body.String()) choices, _ := out["choices"].([]any) choice, _ := choices[0].(map[string]any) - if choice["finish_reason"] != "stop" { - t.Fatalf("expected finish_reason=stop, got %#v", choice["finish_reason"]) + if choice["finish_reason"] != "tool_calls" { + t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"]) } msg, _ := choice["message"].(map[string]any) - if _, ok := msg["tool_calls"]; ok { - t.Fatalf("did not expect tool_calls field for fenced example: %#v", msg["tool_calls"]) + toolCalls, _ := msg["tool_calls"].([]any) + if len(toolCalls) != 1 { + t.Fatalf("expected one tool_call field for fenced example: %#v", msg["tool_calls"]) } content, _ := msg["content"].(string) - if !strings.Contains(content, "```json") || !strings.Contains(content, `"tool_calls"`) { - t.Fatalf("expected fenced tool example to pass through as text, got %q", content) + if strings.Contains(content, `"tool_calls"`) { + t.Fatalf("expected raw tool_calls json stripped from content, got %q", content) } } @@ -616,7 +617,7 @@ func TestHandleStreamToolCallWithSameChunkTrailingTextRemainsText(t *testing.T) } } -func TestHandleStreamFencedToolCallSnippetRemainsText(t *testing.T) { +func TestHandleStreamFencedToolCallSnippetPromotesToolCall(t *testing.T) { h := &Handler{} resp := makeSSEHTTPResponse( fmt.Sprintf(`data: {"p":"response/content","v":%q}`, "下面是调用示例:\n```json\n"), @@ -632,8 +633,8 @@ func TestHandleStreamFencedToolCallSnippetRemainsText(t *testing.T) { if !done { t.Fatalf("expected [DONE], body=%s", rec.Body.String()) } - if streamHasToolCallsDelta(frames) { - t.Fatalf("did not expect tool_calls delta for fenced snippet, body=%s", rec.Body.String()) + if !streamHasToolCallsDelta(frames) { + t.Fatalf("expected tool_calls delta for fenced snippet, body=%s", rec.Body.String()) } content := strings.Builder{} for _, frame := range frames { @@ -647,11 +648,11 @@ func TestHandleStreamFencedToolCallSnippetRemainsText(t *testing.T) { } } got := content.String() - if !strings.Contains(got, "```json") || !strings.Contains(strings.ToLower(got), "tool_calls") { - t.Fatalf("expected fenced tool snippet in content, got=%q", got) + if strings.Contains(strings.ToLower(got), "tool_calls") { + t.Fatalf("expected raw fenced tool_calls snippet stripped from content, got=%q", got) } - if streamFinishReason(frames) != "stop" { - t.Fatalf("expected finish_reason=stop, body=%s", rec.Body.String()) + if streamFinishReason(frames) != "tool_calls" { + t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String()) } } diff --git a/internal/adapter/openai/tool_sieve_core.go b/internal/adapter/openai/tool_sieve_core.go index 72628e9..ca2223a 100644 --- a/internal/adapter/openai/tool_sieve_core.go +++ b/internal/adapter/openai/tool_sieve_core.go @@ -168,36 +168,21 @@ func findToolSegmentStart(s string) int { } lower := strings.ToLower(s) keywords := []string{"tool_calls", "function.name:", "[tool_call_history]"} - offset := 0 - for { - bestKeyIdx := -1 - matchedKeyword := "" - - for _, kw := range keywords { - idx := strings.Index(lower[offset:], kw) - if idx >= 0 { - absIdx := offset + idx - if bestKeyIdx < 0 || absIdx < bestKeyIdx { - bestKeyIdx = absIdx - matchedKeyword = kw - } - } + bestKeyIdx := -1 + for _, kw := range keywords { + idx := strings.Index(lower, kw) + if idx >= 0 && (bestKeyIdx < 0 || idx < bestKeyIdx) { + bestKeyIdx = idx } - - if bestKeyIdx < 0 { - return -1 - } - - keyIdx := bestKeyIdx - start := strings.LastIndex(s[:keyIdx], "{") - if start < 0 { - start = keyIdx - } - if !insideCodeFence(s[:start]) { - return start - } - offset = keyIdx + len(matchedKeyword) } + if bestKeyIdx < 0 { + return -1 + } + start := strings.LastIndex(s[:bestKeyIdx], "{") + if start < 0 { + start = bestKeyIdx + } + return start } func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix string, calls []util.ParsedToolCall, suffix string, ready bool) { @@ -229,9 +214,6 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix } prefixPart := captured[:start] suffixPart := captured[end:] - if insideCodeFence(state.recentTextTail + prefixPart) { - return captured, nil, "", true - } parsed := util.ParseStandaloneToolCallsDetailed(obj, toolNames) if len(parsed.Calls) == 0 { if parsed.SawToolCallSyntax && parsed.RejectedByPolicy { diff --git a/internal/adapter/openai/tool_sieve_incremental.go b/internal/adapter/openai/tool_sieve_incremental.go index ad0f901..d0d7842 100644 --- a/internal/adapter/openai/tool_sieve_incremental.go +++ b/internal/adapter/openai/tool_sieve_incremental.go @@ -19,9 +19,6 @@ func buildIncrementalToolDeltas(state *toolStreamSieveState) []toolCallDelta { if start < 0 { return nil } - if insideCodeFence(state.recentTextTail + captured[:start]) { - return nil - } certainSingle, hasMultiple := classifyToolCallsIncrementalSafety(captured, keyIdx) if hasMultiple { state.disableDeltas = true diff --git a/internal/format/openai/render_test.go b/internal/format/openai/render_test.go index 2ec05c6..952d0ef 100644 --- a/internal/format/openai/render_test.go +++ b/internal/format/openai/render_test.go @@ -69,7 +69,7 @@ func TestBuildResponseObjectPromotesMixedProseToolPayloadToFunctionCall(t *testi } } -func TestBuildResponseObjectFencedToolPayloadRemainsText(t *testing.T) { +func TestBuildResponseObjectPromotesFencedToolPayloadToFunctionCall(t *testing.T) { obj := BuildResponseObject( "resp_test", "gpt-4o", @@ -80,16 +80,16 @@ func TestBuildResponseObjectFencedToolPayloadRemainsText(t *testing.T) { ) outputText, _ := obj["output_text"].(string) - if outputText == "" { - t.Fatalf("expected output_text preserved for fenced example") + if outputText != "" { + t.Fatalf("expected output_text hidden for fenced tool payload, got %q", outputText) } output, _ := obj["output"].([]any) if len(output) != 1 { - t.Fatalf("expected one message output item, got %#v", obj["output"]) + t.Fatalf("expected one function_call output item, got %#v", obj["output"]) } first, _ := output[0].(map[string]any) - if first["type"] != "message" { - t.Fatalf("expected message output type, got %#v", first["type"]) + if first["type"] != "function_call" { + t.Fatalf("expected function_call output type, got %#v", first["type"]) } } diff --git a/internal/util/toolcalls_parse.go b/internal/util/toolcalls_parse.go index e55861b..bcffd03 100644 --- a/internal/util/toolcalls_parse.go +++ b/internal/util/toolcalls_parse.go @@ -26,10 +26,6 @@ func ParseToolCallsDetailed(text string, availableToolNames []string) ToolCallPa if strings.TrimSpace(text) == "" { return result } - text = stripFencedCodeBlocks(text) - if strings.TrimSpace(text) == "" { - return result - } result.SawToolCallSyntax = looksLikeToolCallSyntax(text) candidates := buildToolCallCandidates(text) @@ -75,7 +71,7 @@ func ParseStandaloneToolCalls(text string, availableToolNames []string) []Parsed func ParseStandaloneToolCallsDetailed(text string, availableToolNames []string) ToolCallParseResult { result := ToolCallParseResult{} - trimmed := strings.TrimSpace(stripFencedCodeBlocks(text)) + trimmed := strings.TrimSpace(text) if trimmed == "" { return result } diff --git a/internal/util/toolcalls_test.go b/internal/util/toolcalls_test.go index da6e59a..2d29c1a 100644 --- a/internal/util/toolcalls_test.go +++ b/internal/util/toolcalls_test.go @@ -22,8 +22,8 @@ func TestParseToolCalls(t *testing.T) { func TestParseToolCallsFromFencedJSON(t *testing.T) { text := "I will call tools now\n```json\n{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"news\"}}]}\n```" calls := ParseToolCalls(text, []string{"search"}) - if len(calls) != 0 { - t.Fatalf("expected fenced tool_call example to be ignored, got %#v", calls) + if len(calls) != 1 { + t.Fatalf("expected fenced tool_call payload to be parsed, got %#v", calls) } } @@ -112,10 +112,10 @@ func TestParseStandaloneToolCallsSupportsMixedProsePayload(t *testing.T) { } } -func TestParseStandaloneToolCallsIgnoresFencedCodeBlock(t *testing.T) { +func TestParseStandaloneToolCallsParsesFencedCodeBlock(t *testing.T) { fenced := "```json\n{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}\n```" - if calls := ParseStandaloneToolCalls(fenced, []string{"search"}); len(calls) != 0 { - t.Fatalf("expected fenced tool_call example to be ignored, got %#v", calls) + if calls := ParseStandaloneToolCalls(fenced, []string{"search"}); len(calls) != 1 { + t.Fatalf("expected fenced tool_call payload to be parsed, got %#v", calls) } } diff --git a/internal/util/util_edge_test.go b/internal/util/util_edge_test.go index 876cd04..81d607e 100644 --- a/internal/util/util_edge_test.go +++ b/internal/util/util_edge_test.go @@ -409,8 +409,8 @@ func TestParseToolCallsWithFunctionWrapper(t *testing.T) { func TestParseStandaloneToolCallsFencedCodeBlock(t *testing.T) { fenced := "Here's an example:\n```json\n{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}\n```\nDon't execute this." calls := ParseStandaloneToolCalls(fenced, []string{"search"}) - if len(calls) != 0 { - t.Fatalf("expected fenced code block ignored, got %d calls", len(calls)) + if len(calls) != 1 { + t.Fatalf("expected fenced code block to be parsed, got %d calls", len(calls)) } } diff --git a/tests/compat/expected/toolcalls_fenced_json.json b/tests/compat/expected/toolcalls_fenced_json.json index d740e67..124de59 100644 --- a/tests/compat/expected/toolcalls_fenced_json.json +++ b/tests/compat/expected/toolcalls_fenced_json.json @@ -1,6 +1,13 @@ { - "calls": [], - "sawToolCallSyntax": false, + "calls": [ + { + "name": "read_file", + "input": { + "path": "README.MD" + } + } + ], + "sawToolCallSyntax": true, "rejectedByPolicy": false, "rejectedToolNames": [] -} \ No newline at end of file +} diff --git a/tests/compat/expected/toolcalls_standalone_fenced_example.json b/tests/compat/expected/toolcalls_standalone_fenced_example.json index d740e67..124de59 100644 --- a/tests/compat/expected/toolcalls_standalone_fenced_example.json +++ b/tests/compat/expected/toolcalls_standalone_fenced_example.json @@ -1,6 +1,13 @@ { - "calls": [], - "sawToolCallSyntax": false, + "calls": [ + { + "name": "read_file", + "input": { + "path": "README.MD" + } + } + ], + "sawToolCallSyntax": true, "rejectedByPolicy": false, "rejectedToolNames": [] -} \ No newline at end of file +}