diff --git a/internal/adapter/openai/tool_history_sanitize.go b/internal/adapter/openai/tool_history_sanitize.go
index 18b05e9..b2c740d 100644
--- a/internal/adapter/openai/tool_history_sanitize.go
+++ b/internal/adapter/openai/tool_history_sanitize.go
@@ -8,7 +8,15 @@ var leakedToolHistoryPattern = regexp.MustCompile(`(?is)\[TOOL_CALL_HISTORY\][\s
var emptyJSONFencePattern = regexp.MustCompile("(?is)```json\\s*```")
var leakedToolCallArrayPattern = regexp.MustCompile(`(?is)\[\{\s*"function"\s*:\s*\{[\s\S]*?\}\s*,\s*"id"\s*:\s*"call[^"]*"\s*,\s*"type"\s*:\s*"function"\s*}\]`)
var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*>\s*\{[\s\S]*?"tool_call_id"\s*:\s*"call[^"]*"\s*}`)
-var leakedMetaMarkerPattern = regexp.MustCompile(`(?is)<\s*\|\s*(?:assistant|tool|end_of_sentence|end_of_thinking)\s*\|\s*>`)
+
+// leakedMetaMarkerPattern matches DeepSeek special tokens in BOTH forms:
+// - ASCII underscore: <|end_of_sentence|>
+// - U+2581 variant: <|end▁of▁sentence|> (used in some DeepSeek outputs)
+var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking)\s*[|\|]>`)
+
+// leakedAgentXMLPattern catches agent-style XML tags that leak through when
+// the sieve fails to capture them (e.g. incomplete blocks at stream end).
+var leakedAgentXMLPattern = regexp.MustCompile(`(?is)?(?:attempt_completion|ask_followup_question|new_task|result)>`)
func sanitizeLeakedToolHistory(text string) string {
if text == "" {
@@ -19,5 +27,6 @@ func sanitizeLeakedToolHistory(text string) string {
out = leakedToolCallArrayPattern.ReplaceAllString(out, "")
out = leakedToolResultBlobPattern.ReplaceAllString(out, "")
out = leakedMetaMarkerPattern.ReplaceAllString(out, "")
+ out = leakedAgentXMLPattern.ReplaceAllString(out, "")
return out
}
diff --git a/internal/adapter/openai/tool_history_sanitize_test.go b/internal/adapter/openai/tool_history_sanitize_test.go
index 3eb434a..5c12fb0 100644
--- a/internal/adapter/openai/tool_history_sanitize_test.go
+++ b/internal/adapter/openai/tool_history_sanitize_test.go
@@ -86,13 +86,21 @@ func TestSanitizeLeakedToolHistoryRemovesLeakedWireToolCallAndResult(t *testing.
}
func TestSanitizeLeakedToolHistoryRemovesStandaloneMetaMarkers(t *testing.T) {
- raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C"
+ raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<|end▁of▁thinking|>D<|end▁of▁sentence|>E"
got := sanitizeLeakedToolHistory(raw)
- if got != "ABC" {
+ if got != "ABCDE" {
t.Fatalf("unexpected sanitize result for meta markers: %q", got)
}
}
+func TestSanitizeLeakedToolHistoryRemovesAgentXMLLeaks(t *testing.T) {
+ raw := "Done.Some final answer"
+ got := sanitizeLeakedToolHistory(raw)
+ if got != "Done.Some final answer" {
+ t.Fatalf("unexpected sanitize result for agent XML leak: %q", got)
+ }
+}
+
func TestProcessToolSieveChunkSplitsResultHistoryBoundary(t *testing.T) {
var state toolStreamSieveState
parts := []string{
diff --git a/internal/adapter/openai/tool_sieve_xml.go b/internal/adapter/openai/tool_sieve_xml.go
index aa97b5e..37cb8b3 100644
--- a/internal/adapter/openai/tool_sieve_xml.go
+++ b/internal/adapter/openai/tool_sieve_xml.go
@@ -9,8 +9,12 @@ import (
// --- XML tool call support for the streaming sieve ---
-var xmlToolCallClosingTags = []string{"", "", "", "", "", ""}
-var xmlToolCallOpeningTags = []string{"", "", "", "", "", "",
+ // Agent-style XML tags (Roo Code, Cline, etc.)
+ "", "", "", ""}
+var xmlToolCallOpeningTags = []string{""},
{""},
{""},
+ // Agent-style: these are XML "tool call" patterns from coding agents.
+ // They get captured → parsed. If parsing fails, the block is consumed
+ // (swallowed) to prevent raw XML from leaking to the client.
+ {""},
+ {""},
+ {""},
}
// xmlToolCallBlockPattern matches a complete XML tool call block (wrapper or standalone).
-var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)(\s*(?:.*?)\s*|\s*(?:.*?)\s*|]*>(?:.*?)|]*>(?:.*?)|(?:.*?))`)
+var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)(\s*(?:.*?)\s*|\s*(?:.*?)\s*|]*>(?:.*?)|]*>(?:.*?)|(?:.*?)|(?:.*?)|(?:.*?)|(?:.*?))`)
// xmlToolTagsToDetect is the set of XML tag prefixes used by findToolSegmentStart.
var xmlToolTagsToDetect = []string{"", "", "", ""}
+ "", "",
+ // Agent-style tags
+ "", "", ""}
// consumeXMLToolCapture tries to extract complete XML tool call blocks from captured text.
func consumeXMLToolCapture(captured string, toolNames []string) (prefix string, calls []util.ParsedToolCall, suffix string, ready bool) {
@@ -120,6 +132,11 @@ func looksLikeXMLToolTagFragment(s string) bool {
"function_calls>", "function_call>", "/function_calls>", "/function_call>",
"invoke>", "/invoke>", "tool_use>", "/tool_use>",
"tool_name>", "/tool_name>", "parameters>", "/parameters>",
+ // Agent-style tag fragments
+ "attempt_completion>", "/attempt_completion>",
+ "ask_followup_question>", "/ask_followup_question>",
+ "new_task>", "/new_task>",
+ "result>", "/result>",
}
for _, f := range fragments {
if strings.Contains(lower, f) {
diff --git a/internal/adapter/openai/tool_sieve_xml_test.go b/internal/adapter/openai/tool_sieve_xml_test.go
index b678345..9201189 100644
--- a/internal/adapter/openai/tool_sieve_xml_test.go
+++ b/internal/adapter/openai/tool_sieve_xml_test.go
@@ -285,3 +285,35 @@ func TestOpeningXMLTagNotLeakedAsContent(t *testing.T) {
t.Fatal("expected tool calls to be extracted")
}
}
+
+func TestProcessToolSieveInterceptsAttemptCompletionLeak(t *testing.T) {
+ var state toolStreamSieveState
+ // Simulate an agent outputting attempt_completion XML tag
+ // which shouldn't leak to text output, even if it fails to parse as a valid tool.
+ chunks := []string{
+ "Done with task.\n",
+ "\n",
+ " Here is the answer\n",
+ "",
+ }
+ var events []toolStreamEvent
+ for _, c := range chunks {
+ events = append(events, processToolSieveChunk(&state, c, []string{"attempt_completion"})...)
+ }
+ events = append(events, flushToolSieve(&state, []string{"attempt_completion"})...)
+
+ var textContent string
+ for _, evt := range events {
+ if evt.Content != "" {
+ textContent += evt.Content
+ }
+ }
+
+ if !strings.Contains(textContent, "Done with task.\n") {
+ t.Fatalf("expected leading text to be emitted, got %q", textContent)
+ }
+
+ if strings.Contains(textContent, "") || strings.Contains(textContent, "result>") {
+ t.Fatalf("agent XML tag content leaked to text: %q", textContent)
+ }
+}