package openai import ( "strings" "testing" ) func TestProcessToolSieveInterceptsXMLToolCallWithoutLeak(t *testing.T) { var state toolStreamSieveState // Simulate a model producing XML tool call output chunk by chunk. chunks := []string{ "\n", " \n", " read_file\n", ` {"path":"README.MD"}` + "\n", " \n", "", } var events []toolStreamEvent for _, c := range chunks { events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...) } events = append(events, flushToolSieve(&state, []string{"read_file"})...) var textContent string var toolCalls int for _, evt := range events { if evt.Content != "" { textContent += evt.Content } toolCalls += len(evt.ToolCalls) } if strings.Contains(textContent, "\n \n read_file\n", ` {"path":"go.mod"}` + "\n \n", } var events []toolStreamEvent for _, c := range chunks { events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...) } events = append(events, flushToolSieve(&state, []string{"read_file"})...) var textContent string var toolCalls int for _, evt := range events { if evt.Content != "" { textContent += evt.Content } toolCalls += len(evt.ToolCalls) } // Leading text should be emitted. if !strings.Contains(textContent, "Let me check the file.") { t.Fatalf("expected leading text to be emitted, got %q", textContent) } // The XML itself should NOT leak. if strings.Contains(textContent, "\n", 10}, {"tool_call_tag", "prefix \n", 7}, {"invoke_tag", "text body", 5}, {"function_call_tag", "body", 0}, {"no_xml", "just plain text", -1}, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { got := findToolSegmentStart(tc.input) if got != tc.want { t.Fatalf("findToolSegmentStart(%q) = %d, want %d", tc.input, got, tc.want) } }) } } func TestFindPartialXMLToolTagStart(t *testing.T) { cases := []struct { name string input string want int }{ {"partial_tool_call", "Hello done", -1}, {"no_lt", "plain text", -1}, {"closed_lt", "a < b > c", -1}, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { got := findPartialXMLToolTagStart(tc.input) if got != tc.want { t.Fatalf("findPartialXMLToolTagStart(%q) = %d, want %d", tc.input, got, tc.want) } }) } } func TestHasOpenXMLToolTag(t *testing.T) { if !hasOpenXMLToolTag("\nfoo") { t.Fatal("should detect open XML tool tag without closing tag") } if hasOpenXMLToolTag("\nfoo") { t.Fatal("should return false when closing tag is present") } if hasOpenXMLToolTag("plain text without any XML") { t.Fatal("should return false for plain text") } } // Test the EXACT scenario the user reports: token-by-token streaming where // tag arrives in small pieces. func TestProcessToolSieveTokenByTokenXMLNoLeak(t *testing.T) { var state toolStreamSieveState // Simulate DeepSeek model generating tokens one at a time. chunks := []string{ "<", "tool", "_calls", ">\n", " <", "tool", "_call", ">\n", " <", "tool", "_name", ">", "read", "_file", "\n", " <", "parameters", ">", `{"path"`, `: "README.MD"`, `}`, "\n", " \n", "", } var events []toolStreamEvent for _, c := range chunks { events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...) } events = append(events, flushToolSieve(&state, []string{"read_file"})...) var textContent string var toolCalls int for _, evt := range events { if evt.Content != "" { textContent += evt.Content } toolCalls += len(evt.ToolCalls) } if strings.Contains(textContent, "") { t.Fatalf("closing tag fragment leaked to text: %q", textContent) } if strings.Contains(textContent, "read_file") { t.Fatalf("tool name leaked to text: %q", textContent) } if toolCalls == 0 { t.Fatal("expected tool calls to be extracted, got none") } } // Test that flushToolSieve on incomplete XML does NOT leak the raw XML content. func TestFlushToolSieveIncompleteXMLDoesNotLeak(t *testing.T) { var state toolStreamSieveState // XML block starts but stream ends before completion. chunks := []string{ "\n", " \n", " read_file\n", } var events []toolStreamEvent for _, c := range chunks { events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...) } // Stream ends abruptly - flush should NOT dump raw XML. events = append(events, flushToolSieve(&state, []string{"read_file"})...) var textContent string for _, evt := range events { if evt.Content != "" { textContent += evt.Content } } if strings.Contains(textContent, "\n " is NOT emitted as text content. func TestOpeningXMLTagNotLeakedAsContent(t *testing.T) { var state toolStreamSieveState // First chunk is the opening tag - should be held, not emitted. evts1 := processToolSieveChunk(&state, "\n ", []string{"read_file"}) for _, evt := range evts1 { if strings.Contains(evt.Content, "") { t.Fatalf("opening tag leaked on first chunk: %q", evt.Content) } } // Remaining content arrives. evts2 := processToolSieveChunk(&state, "\n read_file\n {\"path\":\"README.MD\"}\n \n", []string{"read_file"}) evts2 = append(evts2, flushToolSieve(&state, []string{"read_file"})...) var textContent string var toolCalls int allEvents := append(evts1, evts2...) for _, evt := range allEvents { if evt.Content != "" { textContent += evt.Content } toolCalls += len(evt.ToolCalls) } if strings.Contains(textContent, "\n", " Here is the answer\n", "", } var events []toolStreamEvent for _, c := range chunks { events = append(events, processToolSieveChunk(&state, c, []string{"attempt_completion"})...) } events = append(events, flushToolSieve(&state, []string{"attempt_completion"})...) var textContent string for _, evt := range events { if evt.Content != "" { textContent += evt.Content } } if !strings.Contains(textContent, "Done with task.\n") { t.Fatalf("expected leading text to be emitted, got %q", textContent) } if strings.Contains(textContent, "") || strings.Contains(textContent, "result>") { t.Fatalf("agent XML tag content leaked to text: %q", textContent) } }