diff --git a/internal/adapter/openai/tool_history_sanitize.go b/internal/adapter/openai/tool_history_sanitize.go index 18b05e9..b2c740d 100644 --- a/internal/adapter/openai/tool_history_sanitize.go +++ b/internal/adapter/openai/tool_history_sanitize.go @@ -8,7 +8,15 @@ var leakedToolHistoryPattern = regexp.MustCompile(`(?is)\[TOOL_CALL_HISTORY\][\s var emptyJSONFencePattern = regexp.MustCompile("(?is)```json\\s*```") var leakedToolCallArrayPattern = regexp.MustCompile(`(?is)\[\{\s*"function"\s*:\s*\{[\s\S]*?\}\s*,\s*"id"\s*:\s*"call[^"]*"\s*,\s*"type"\s*:\s*"function"\s*}\]`) var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*>\s*\{[\s\S]*?"tool_call_id"\s*:\s*"call[^"]*"\s*}`) -var leakedMetaMarkerPattern = regexp.MustCompile(`(?is)<\s*\|\s*(?:assistant|tool|end_of_sentence|end_of_thinking)\s*\|\s*>`) + +// leakedMetaMarkerPattern matches DeepSeek special tokens in BOTH forms: +// - ASCII underscore: <|end_of_sentence|> +// - U+2581 variant: <|end▁of▁sentence|> (used in some DeepSeek outputs) +var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking)\s*[|\|]>`) + +// leakedAgentXMLPattern catches agent-style XML tags that leak through when +// the sieve fails to capture them (e.g. incomplete blocks at stream end). +var leakedAgentXMLPattern = regexp.MustCompile(`(?is)`) func sanitizeLeakedToolHistory(text string) string { if text == "" { @@ -19,5 +27,6 @@ func sanitizeLeakedToolHistory(text string) string { out = leakedToolCallArrayPattern.ReplaceAllString(out, "") out = leakedToolResultBlobPattern.ReplaceAllString(out, "") out = leakedMetaMarkerPattern.ReplaceAllString(out, "") + out = leakedAgentXMLPattern.ReplaceAllString(out, "") return out } diff --git a/internal/adapter/openai/tool_history_sanitize_test.go b/internal/adapter/openai/tool_history_sanitize_test.go index 3eb434a..5c12fb0 100644 --- a/internal/adapter/openai/tool_history_sanitize_test.go +++ b/internal/adapter/openai/tool_history_sanitize_test.go @@ -86,13 +86,21 @@ func TestSanitizeLeakedToolHistoryRemovesLeakedWireToolCallAndResult(t *testing. } func TestSanitizeLeakedToolHistoryRemovesStandaloneMetaMarkers(t *testing.T) { - raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C" + raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<|end▁of▁thinking|>D<|end▁of▁sentence|>E" got := sanitizeLeakedToolHistory(raw) - if got != "ABC" { + if got != "ABCDE" { t.Fatalf("unexpected sanitize result for meta markers: %q", got) } } +func TestSanitizeLeakedToolHistoryRemovesAgentXMLLeaks(t *testing.T) { + raw := "Done.Some final answer" + got := sanitizeLeakedToolHistory(raw) + if got != "Done.Some final answer" { + t.Fatalf("unexpected sanitize result for agent XML leak: %q", got) + } +} + func TestProcessToolSieveChunkSplitsResultHistoryBoundary(t *testing.T) { var state toolStreamSieveState parts := []string{ diff --git a/internal/adapter/openai/tool_sieve_xml.go b/internal/adapter/openai/tool_sieve_xml.go index aa97b5e..37cb8b3 100644 --- a/internal/adapter/openai/tool_sieve_xml.go +++ b/internal/adapter/openai/tool_sieve_xml.go @@ -9,8 +9,12 @@ import ( // --- XML tool call support for the streaming sieve --- -var xmlToolCallClosingTags = []string{"", "", "", "", "", ""} -var xmlToolCallOpeningTags = []string{"", "", "", "", "", "", + // Agent-style XML tags (Roo Code, Cline, etc.) + "", "", "", ""} +var xmlToolCallOpeningTags = []string{""}, {""}, {""}, + // Agent-style: these are XML "tool call" patterns from coding agents. + // They get captured → parsed. If parsing fails, the block is consumed + // (swallowed) to prevent raw XML from leaking to the client. + {""}, + {""}, + {""}, } // xmlToolCallBlockPattern matches a complete XML tool call block (wrapper or standalone). -var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)(\s*(?:.*?)\s*|\s*(?:.*?)\s*|]*>(?:.*?)|]*>(?:.*?)|(?:.*?))`) +var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)(\s*(?:.*?)\s*|\s*(?:.*?)\s*|]*>(?:.*?)|]*>(?:.*?)|(?:.*?)|(?:.*?)|(?:.*?)|(?:.*?))`) // xmlToolTagsToDetect is the set of XML tag prefixes used by findToolSegmentStart. var xmlToolTagsToDetect = []string{"", "", "", ""} + "", "", + // Agent-style tags + "", "", ""} // consumeXMLToolCapture tries to extract complete XML tool call blocks from captured text. func consumeXMLToolCapture(captured string, toolNames []string) (prefix string, calls []util.ParsedToolCall, suffix string, ready bool) { @@ -120,6 +132,11 @@ func looksLikeXMLToolTagFragment(s string) bool { "function_calls>", "function_call>", "/function_calls>", "/function_call>", "invoke>", "/invoke>", "tool_use>", "/tool_use>", "tool_name>", "/tool_name>", "parameters>", "/parameters>", + // Agent-style tag fragments + "attempt_completion>", "/attempt_completion>", + "ask_followup_question>", "/ask_followup_question>", + "new_task>", "/new_task>", + "result>", "/result>", } for _, f := range fragments { if strings.Contains(lower, f) { diff --git a/internal/adapter/openai/tool_sieve_xml_test.go b/internal/adapter/openai/tool_sieve_xml_test.go index b678345..9201189 100644 --- a/internal/adapter/openai/tool_sieve_xml_test.go +++ b/internal/adapter/openai/tool_sieve_xml_test.go @@ -285,3 +285,35 @@ func TestOpeningXMLTagNotLeakedAsContent(t *testing.T) { t.Fatal("expected tool calls to be extracted") } } + +func TestProcessToolSieveInterceptsAttemptCompletionLeak(t *testing.T) { + var state toolStreamSieveState + // Simulate an agent outputting attempt_completion XML tag + // which shouldn't leak to text output, even if it fails to parse as a valid tool. + chunks := []string{ + "Done with task.\n", + "\n", + " Here is the answer\n", + "", + } + var events []toolStreamEvent + for _, c := range chunks { + events = append(events, processToolSieveChunk(&state, c, []string{"attempt_completion"})...) + } + events = append(events, flushToolSieve(&state, []string{"attempt_completion"})...) + + var textContent string + for _, evt := range events { + if evt.Content != "" { + textContent += evt.Content + } + } + + if !strings.Contains(textContent, "Done with task.\n") { + t.Fatalf("expected leading text to be emitted, got %q", textContent) + } + + if strings.Contains(textContent, "") || strings.Contains(textContent, "result>") { + t.Fatalf("agent XML tag content leaked to text: %q", textContent) + } +}