diff --git a/internal/adapter/openai/tool_sieve_core.go b/internal/adapter/openai/tool_sieve_core.go
index e651445..bc81882 100644
--- a/internal/adapter/openai/tool_sieve_core.go
+++ b/internal/adapter/openai/tool_sieve_core.go
@@ -114,8 +114,14 @@ func flushToolSieve(state *toolStreamSieveState, toolNames []string) []toolStrea
} else {
content := state.capture.String()
if content != "" {
- state.noteText(content)
- events = append(events, toolStreamEvent{Content: content})
+ // If the captured text looks like an incomplete XML tool call block,
+ // swallow it to prevent leaking raw XML tags to the client.
+ if hasOpenXMLToolTag(content) {
+ // Drop it silently — incomplete tool call.
+ } else {
+ state.noteText(content)
+ events = append(events, toolStreamEvent{Content: content})
+ }
}
}
state.capture.Reset()
@@ -200,6 +206,11 @@ func findToolSegmentStart(s string) int {
if start < 0 {
start = bestKeyIdx
}
+ // If the keyword matched inside an XML tag (e.g. "tool_calls" in ""),
+ // back up past the '<' to capture the full tag.
+ if start > 0 && s[start-1] == '<' {
+ start--
+ }
if fenceStart, ok := openFenceStartBefore(s, start); ok {
return fenceStart
}
diff --git a/internal/adapter/openai/tool_sieve_xml.go b/internal/adapter/openai/tool_sieve_xml.go
index 885f50a..c4474af 100644
--- a/internal/adapter/openai/tool_sieve_xml.go
+++ b/internal/adapter/openai/tool_sieve_xml.go
@@ -12,6 +12,17 @@ import (
var xmlToolCallClosingTags = []string{"", "", "", "", "", ""}
var xmlToolCallOpeningTags = []string{""},
+ {""},
+ {""},
+ {""},
+ {""},
+ {""},
+}
+
// xmlToolCallBlockPattern matches a complete XML tool call block (wrapper or standalone).
var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)(\s*(?:.*?)\s*|\s*(?:.*?)\s*|]*>(?:.*?)|]*>(?:.*?)|(?:.*?))`)
@@ -22,59 +33,45 @@ var xmlToolTagsToDetect = []string{"", "
// consumeXMLToolCapture tries to extract complete XML tool call blocks from captured text.
func consumeXMLToolCapture(captured string, toolNames []string) (prefix string, calls []util.ParsedToolCall, suffix string, ready bool) {
lower := strings.ToLower(captured)
- // Find the earliest XML tool opening tag.
- openIdx := -1
- for _, tag := range xmlToolCallOpeningTags {
- idx := strings.Index(lower, tag)
- if idx >= 0 && (openIdx < 0 || idx < openIdx) {
- openIdx = idx
+ // Find the FIRST matching open/close pair, preferring wrapper tags.
+ // Tag pairs are ordered longest-first (e.g. = 0 {
- absEnd := openIdx + idx + len(tag)
- if closeIdx < 0 || absEnd > closeIdx {
- closeIdx = absEnd
- }
+ // Find the LAST occurrence of the specific closing tag to get the outermost block.
+ closeIdx := strings.LastIndex(lower, pair.close)
+ if closeIdx < openIdx {
+ // Opening tag is present but its specific closing tag hasn't arrived.
+ // Return not-ready so we keep buffering — do NOT fall through to
+ // try inner pairs (e.g. 0 {
- prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
- return prefixPart, parsed, suffixPart, true
+ xmlBlock := captured[openIdx:closeEnd]
+ prefixPart := captured[:openIdx]
+ suffixPart := captured[closeEnd:]
+ parsed := util.ParseToolCalls(xmlBlock, toolNames)
+ if len(parsed) > 0 {
+ prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
+ return prefixPart, parsed, suffixPart, true
+ }
+ // Looks like XML tool syntax but failed to parse — consume it to avoid leak.
+ return prefixPart, nil, suffixPart, true
}
- // Looks like XML tool syntax but failed to parse — consume it to avoid leak.
- return prefixPart, nil, suffixPart, true
+ return "", nil, "", false
}
// hasOpenXMLToolTag returns true if captured text contains an XML tool opening tag
-// but no corresponding closing tag yet.
+// whose SPECIFIC closing tag has not appeared yet.
func hasOpenXMLToolTag(captured string) bool {
lower := strings.ToLower(captured)
- for _, tag := range xmlToolCallOpeningTags {
- if strings.Contains(lower, tag) {
- hasClosed := false
- for _, ct := range xmlToolCallClosingTags {
- if strings.Contains(lower, ct) {
- hasClosed = true
- break
- }
- }
- if !hasClosed {
+ for _, pair := range xmlToolCallTagPairs {
+ if strings.Contains(lower, pair.open) {
+ if !strings.Contains(lower, pair.close) {
return true
}
}
diff --git a/internal/adapter/openai/tool_sieve_xml_test.go b/internal/adapter/openai/tool_sieve_xml_test.go
index b04f87b..b678345 100644
--- a/internal/adapter/openai/tool_sieve_xml_test.go
+++ b/internal/adapter/openai/tool_sieve_xml_test.go
@@ -153,3 +153,135 @@ func TestHasOpenXMLToolTag(t *testing.T) {
t.Fatal("should return false for plain text")
}
}
+
+// Test the EXACT scenario the user reports: token-by-token streaming where
+// tag arrives in small pieces.
+func TestProcessToolSieveTokenByTokenXMLNoLeak(t *testing.T) {
+ var state toolStreamSieveState
+ // Simulate DeepSeek model generating tokens one at a time.
+ chunks := []string{
+ "<",
+ "tool",
+ "_calls",
+ ">\n",
+ " <",
+ "tool",
+ "_call",
+ ">\n",
+ " <",
+ "tool",
+ "_name",
+ ">",
+ "read",
+ "_file",
+ "",
+ "tool",
+ "_name",
+ ">\n",
+ " <",
+ "parameters",
+ ">",
+ `{"path"`,
+ `: "README.MD"`,
+ `}`,
+ "",
+ "parameters",
+ ">\n",
+ " ",
+ "tool",
+ "_call",
+ ">\n",
+ "",
+ "tool",
+ "_calls",
+ ">",
+ }
+ var events []toolStreamEvent
+ for _, c := range chunks {
+ events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
+ }
+ events = append(events, flushToolSieve(&state, []string{"read_file"})...)
+
+ var textContent string
+ var toolCalls int
+ for _, evt := range events {
+ if evt.Content != "" {
+ textContent += evt.Content
+ }
+ toolCalls += len(evt.ToolCalls)
+ }
+
+ if strings.Contains(textContent, "") {
+ t.Fatalf("closing tag fragment leaked to text: %q", textContent)
+ }
+ if strings.Contains(textContent, "read_file") {
+ t.Fatalf("tool name leaked to text: %q", textContent)
+ }
+ if toolCalls == 0 {
+ t.Fatal("expected tool calls to be extracted, got none")
+ }
+}
+
+// Test that flushToolSieve on incomplete XML does NOT leak the raw XML content.
+func TestFlushToolSieveIncompleteXMLDoesNotLeak(t *testing.T) {
+ var state toolStreamSieveState
+ // XML block starts but stream ends before completion.
+ chunks := []string{
+ "\n",
+ " \n",
+ " read_file\n",
+ }
+ var events []toolStreamEvent
+ for _, c := range chunks {
+ events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
+ }
+ // Stream ends abruptly - flush should NOT dump raw XML.
+ events = append(events, flushToolSieve(&state, []string{"read_file"})...)
+
+ var textContent string
+ for _, evt := range events {
+ if evt.Content != "" {
+ textContent += evt.Content
+ }
+ }
+
+ if strings.Contains(textContent, "\n " is NOT emitted as text content.
+func TestOpeningXMLTagNotLeakedAsContent(t *testing.T) {
+ var state toolStreamSieveState
+ // First chunk is the opening tag - should be held, not emitted.
+ evts1 := processToolSieveChunk(&state, "\n ", []string{"read_file"})
+ for _, evt := range evts1 {
+ if strings.Contains(evt.Content, "") {
+ t.Fatalf("opening tag leaked on first chunk: %q", evt.Content)
+ }
+ }
+
+ // Remaining content arrives.
+ evts2 := processToolSieveChunk(&state, "\n read_file\n {\"path\":\"README.MD\"}\n \n", []string{"read_file"})
+ evts2 = append(evts2, flushToolSieve(&state, []string{"read_file"})...)
+
+ var textContent string
+ var toolCalls int
+ allEvents := append(evts1, evts2...)
+ for _, evt := range allEvents {
+ if evt.Content != "" {
+ textContent += evt.Content
+ }
+ toolCalls += len(evt.ToolCalls)
+ }
+
+ if strings.Contains(textContent, "' },
+ { open: '' },
+ { open: '' },
+ { open: '' },
+ { open: '' },
+ { open: '' },
+];
+
+const XML_TOOL_OPENING_TAGS = XML_TOOL_TAG_PAIRS.map(p => p.open);
function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
const lower = captured.toLowerCase();
- let openIdx = -1;
- for (const tag of XML_TOOL_OPENING_TAGS) {
- const idx = lower.indexOf(tag);
- if (idx >= 0 && (openIdx < 0 || idx < openIdx)) {
- openIdx = idx;
+ // Find the FIRST matching open/close pair, preferring wrapper tags.
+ for (const pair of XML_TOOL_TAG_PAIRS) {
+ const openIdx = lower.indexOf(pair.open);
+ if (openIdx < 0) {
+ continue;
}
- }
- if (openIdx < 0) {
- return { ready: false, prefix: '', calls: [], suffix: '' };
- }
- let closeIdx = -1;
- for (const tag of XML_TOOL_CLOSING_TAGS) {
- const idx = lower.indexOf(tag, openIdx);
- if (idx >= 0) {
- const absEnd = idx + tag.length;
- if (closeIdx < 0 || absEnd > closeIdx) {
- closeIdx = absEnd;
- }
+ // Find the LAST occurrence of the specific closing tag.
+ const closeIdx = lower.lastIndexOf(pair.close);
+ if (closeIdx < openIdx) {
+ // Opening tag present but specific closing tag hasn't arrived.
+ // Return not-ready — do NOT fall through to inner pairs.
+ return { ready: false, prefix: '', calls: [], suffix: '' };
}
+ const closeEnd = closeIdx + pair.close.length;
+ const xmlBlock = captured.slice(openIdx, closeEnd);
+ let prefixPart = captured.slice(0, openIdx);
+ let suffixPart = captured.slice(closeEnd);
+ const parsed = parseToolCalls(xmlBlock, toolNames);
+ if (Array.isArray(parsed) && parsed.length > 0) {
+ const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
+ return {
+ ready: true,
+ prefix: trimmedFence.prefix,
+ calls: parsed,
+ suffix: trimmedFence.suffix,
+ };
+ }
+ // XML tool syntax but failed to parse — consume to avoid leak.
+ return { ready: true, prefix: prefixPart, calls: [], suffix: suffixPart };
}
- if (closeIdx <= 0) {
- return { ready: false, prefix: '', calls: [], suffix: '' };
- }
- const xmlBlock = captured.slice(openIdx, closeIdx);
- let prefixPart = captured.slice(0, openIdx);
- let suffixPart = captured.slice(closeIdx);
- const parsed = parseToolCalls(xmlBlock, toolNames);
- if (Array.isArray(parsed) && parsed.length > 0) {
- const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
- return {
- ready: true,
- prefix: trimmedFence.prefix,
- calls: parsed,
- suffix: trimmedFence.suffix,
- };
- }
- return { ready: true, prefix: prefixPart, calls: [], suffix: suffixPart };
+ return { ready: false, prefix: '', calls: [], suffix: '' };
}
function hasOpenXMLToolTag(captured) {
const lower = captured.toLowerCase();
- for (const tag of XML_TOOL_OPENING_TAGS) {
- if (lower.includes(tag)) {
- let hasClosed = false;
- for (const ct of XML_TOOL_CLOSING_TAGS) {
- if (lower.includes(ct)) {
- hasClosed = true;
- break;
- }
- }
- if (!hasClosed) {
+ for (const pair of XML_TOOL_TAG_PAIRS) {
+ if (lower.includes(pair.open)) {
+ if (!lower.includes(pair.close)) {
return true;
}
}
diff --git a/internal/js/helpers/stream-tool-sieve/sieve.js b/internal/js/helpers/stream-tool-sieve/sieve.js
index bd7e7cc..43c9224 100644
--- a/internal/js/helpers/stream-tool-sieve/sieve.js
+++ b/internal/js/helpers/stream-tool-sieve/sieve.js
@@ -191,7 +191,12 @@ function findToolSegmentStart(state, s) {
}
const keyIdx = bestKeyIdx;
const start = s.slice(0, keyIdx).lastIndexOf('{');
- const candidateStart = start >= 0 ? start : keyIdx;
+ let candidateStart = start >= 0 ? start : keyIdx;
+ // If the keyword matched inside an XML tag (e.g. "tool_calls" in ""),
+ // back up past the '<' to capture the full tag.
+ if (candidateStart > 0 && s[candidateStart - 1] === '<') {
+ candidateStart--;
+ }
if (!insideCodeFenceWithState(state, s.slice(0, candidateStart))) {
return candidateStart;
}