package toolstream
import (
"strings"
"testing"
)
func TestProcessToolSieveInterceptsXMLToolCallWithoutLeak(t *testing.T) {
var state State
// Simulate a model producing XML tool call output chunk by chunk.
chunks := []string{
"\n",
` ` + "\n",
` README.MD` + "\n",
" \n",
"",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
}
events = append(events, Flush(&state, []string{"read_file"})...)
var textContent string
var toolCalls int
for _, evt := range events {
if evt.Content != "" {
textContent += evt.Content
}
toolCalls += len(evt.ToolCalls)
}
if strings.Contains(textContent, "\n",
` <|DSML|invoke name="read_file">` + "\n",
` <|DSML|parameter name="path">README.MD|DSML|parameter>` + "\n",
" |DSML|invoke>\n",
"|DSML|tool_calls>",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
}
events = append(events, Flush(&state, []string{"read_file"})...)
var textContent string
var toolCalls int
for _, evt := range events {
textContent += evt.Content
toolCalls += len(evt.ToolCalls)
}
if strings.Contains(strings.ToLower(textContent), "dsml") || strings.Contains(textContent, "read_file") {
t.Fatalf("DSML tool call content leaked to text: %q", textContent)
}
if toolCalls != 1 {
t.Fatalf("expected one DSML tool call, got %d events=%#v", toolCalls, events)
}
}
func TestProcessToolSieveInterceptsDSMLTrailingPipeToolCallWithoutLeak(t *testing.T) {
var state State
chunks := []string{
"<|DSML|tool_calls| \n",
` <|DSML|invoke name="terminal">` + "\n",
` <|DSML|parameter name="command">|DSML|parameter>` + "\n",
` <|DSML|parameter name="timeout">|DSML|parameter>` + "\n",
" |DSML|invoke>\n",
"|DSML|tool_calls>",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"terminal"})...)
}
events = append(events, Flush(&state, []string{"terminal"})...)
var textContent strings.Builder
var calls []any
for _, evt := range events {
textContent.WriteString(evt.Content)
for _, call := range evt.ToolCalls {
calls = append(calls, call)
}
}
if text := textContent.String(); strings.Contains(strings.ToLower(text), "dsml") || strings.Contains(text, "terminal") {
t.Fatalf("trailing-pipe DSML tool call leaked to text: %q events=%#v", text, events)
}
if len(calls) != 1 {
t.Fatalf("expected one trailing-pipe DSML tool call, got %d events=%#v", len(calls), events)
}
}
func TestProcessToolSieveInterceptsExtraLeadingLessThanDSMLToolCallWithoutLeak(t *testing.T) {
var state State
chunks := []string{
"<<|DSML|tool_calls>\n",
` <<|DSML|invoke name="Bash">` + "\n",
` <<|DSML|parameter name="command">|DSML|parameter>` + "\n",
" |DSML|invoke>\n",
"|DSML|tool_calls>",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"Bash"})...)
}
events = append(events, Flush(&state, []string{"Bash"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if text := textContent.String(); strings.Contains(text, "<") || strings.Contains(text, "Bash") {
t.Fatalf("extra-leading-less-than DSML tool call leaked to text: %q events=%#v", text, events)
}
if toolCalls != 1 {
t.Fatalf("expected one extra-leading-less-than DSML tool call, got %d events=%#v", toolCalls, events)
}
}
func TestProcessToolSieveInterceptsRepeatedDSMLPrefixNoiseWithoutLeak(t *testing.T) {
var state State
chunks := []string{
"<\n",
` <` + "\n",
` <` + "\n",
" \n",
"",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"Bash"})...)
}
events = append(events, Flush(&state, []string{"Bash"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if text := textContent.String(); strings.Contains(strings.ToLower(text), "dsml") || strings.Contains(text, "Bash") {
t.Fatalf("repeated-prefix DSML tool call leaked to text: %q events=%#v", text, events)
}
if toolCalls != 1 {
t.Fatalf("expected one repeated-prefix DSML tool call, got %d events=%#v", toolCalls, events)
}
}
func TestProcessToolSieveHandlesLongXMLToolCall(t *testing.T) {
var state State
const toolName = "write_to_file"
payload := strings.Repeat("x", 4096)
splitAt := len(payload) / 2
chunks := []string{
"\n \n \n \n",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{toolName})...)
}
events = append(events, Flush(&state, []string{toolName})...)
var textContent strings.Builder
toolCalls := 0
var gotPayload any
for _, evt := range events {
if evt.Content != "" {
textContent.WriteString(evt.Content)
}
if len(evt.ToolCalls) > 0 && gotPayload == nil {
gotPayload = evt.ToolCalls[0].Input["content"]
}
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 1 {
t.Fatalf("expected one long XML tool call, got %d events=%#v", toolCalls, events)
}
if textContent.Len() != 0 {
t.Fatalf("expected no leaked text for long XML tool call, got %q", textContent.String())
}
got, _ := gotPayload.(string)
if got != payload {
t.Fatalf("expected long XML payload to survive intact, got len=%d want=%d", len(got), len(payload))
}
}
func TestProcessToolSieveKeepsCDATAEmbeddedToolClosingBuffered(t *testing.T) {
var state State
payload := strings.Join([]string{
"# DS2API 4.0 更新内容",
"",
strings.Repeat("x", 4096),
"```xml",
"",
" ",
" x",
" ",
"",
"```",
"tail",
}, "\n")
innerClose := strings.Index(payload, "") + len("")
chunks := []string{
"\n \n \n DS2API-4.0-Release-Notes.md\n \n",
}
var events []Event
for i, c := range chunks {
next := ProcessChunk(&state, c, []string{"Write"})
if i <= 1 {
for _, evt := range next {
if evt.Content != "" || len(evt.ToolCalls) > 0 {
t.Fatalf("expected no events before outer closing tag, chunk=%d events=%#v", i, next)
}
}
}
events = append(events, next...)
}
events = append(events, Flush(&state, []string{"Write"})...)
var textContent strings.Builder
var gotPayload string
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
if len(evt.ToolCalls) > 0 {
toolCalls += len(evt.ToolCalls)
gotPayload, _ = evt.ToolCalls[0].Input["content"].(string)
}
}
if toolCalls != 1 {
t.Fatalf("expected one parsed tool call, got %d events=%#v", toolCalls, events)
}
if textContent.Len() != 0 {
t.Fatalf("expected no leaked text, got %q", textContent.String())
}
if gotPayload != payload {
t.Fatalf("expected full CDATA payload to survive intact, got len=%d want=%d", len(gotPayload), len(payload))
}
}
func TestProcessToolSieveKeepsExtremeHereDocCDATAUntilOuterClose(t *testing.T) {
var state State
command := strings.Join([]string{
"cat > docs/project-value.md << 'ENDOFFILE'",
"# DS2API project value",
"",
"```xml",
`<|DSML|tool_calls>`,
` <|DSML|invoke name="Bash">`,
` <|DSML|parameter name="command">&1]]>|DSML|parameter>`,
` |DSML|invoke>`,
`|DSML|tool_calls>`,
"```",
"",
"Only the literal `]]>` needs special handling.",
"",
"ENDOFFILE",
`echo "Done. Lines: $(wc -l < docs/project-value.md)"`,
}, "\n")
innerClose := strings.Index(command, `|DSML|tool_calls>`) + len(`|DSML|tool_calls>`)
chunks := []string{
`<|DSML|tool_calls>` + "\n",
`<|DSML|invoke name="Bash">` + "\n",
`<|DSML|parameter name="command">|DSML|parameter>` + "\n",
`<|DSML|parameter name="description">|DSML|parameter>` + "\n",
`|DSML|invoke>` + "\n",
`|DSML|tool_calls>`,
}
var events []Event
for i, c := range chunks {
next := ProcessChunk(&state, c, []string{"Bash"})
if i <= 2 {
for _, evt := range next {
if evt.Content != "" || len(evt.ToolCalls) > 0 {
t.Fatalf("expected no events before outer close, chunk=%d events=%#v", i, next)
}
}
}
events = append(events, next...)
}
events = append(events, Flush(&state, []string{"Bash"})...)
var textContent strings.Builder
var gotCommand string
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
if len(evt.ToolCalls) > 0 {
toolCalls += len(evt.ToolCalls)
gotCommand, _ = evt.ToolCalls[0].Input["command"].(string)
}
}
if toolCalls != 1 {
t.Fatalf("expected one parsed tool call, got %d events=%#v", toolCalls, events)
}
if textContent.Len() != 0 {
t.Fatalf("expected no leaked text, got %q", textContent.String())
}
if gotCommand != command {
t.Fatalf("expected full heredoc command to survive, got len=%d want=%d", len(gotCommand), len(command))
}
}
func TestProcessToolSieveKeepsCompactCDATAWithImmediateFencedDSML(t *testing.T) {
var state State
content := strings.Join([]string{
"```xml",
`<|DSML|tool_calls>`,
` <|DSML|invoke name="Bash">`,
` <|DSML|parameter name="command">|DSML|parameter>`,
` |DSML|invoke>`,
`|DSML|tool_calls>`,
"```",
"tail",
}, "\n")
chunks := []string{
``,
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"Write"})...)
}
events = append(events, Flush(&state, []string{"Write"})...)
var textContent strings.Builder
var gotContent string
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
if len(evt.ToolCalls) > 0 {
toolCalls += len(evt.ToolCalls)
gotContent, _ = evt.ToolCalls[0].Input["content"].(string)
}
}
if toolCalls != 1 {
t.Fatalf("expected one compact CDATA tool call, got %d events=%#v", toolCalls, events)
}
if textContent.Len() != 0 {
t.Fatalf("expected no leaked text, got %q", textContent.String())
}
if gotContent != content {
t.Fatalf("expected compact CDATA content to survive, got len=%d want=%d", len(gotContent), len(content))
}
}
func TestProcessToolSieveFallsBackWhenCDATANeverCloses(t *testing.T) {
var state State
chunks := []string{
"\n \n \n \n",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"Write"})...)
}
events = append(events, Flush(&state, []string{"Write"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
if evt.Content != "" {
textContent.WriteString(evt.Content)
}
toolCalls += len(evt.ToolCalls)
if len(evt.ToolCalls) > 0 {
if got, _ := evt.ToolCalls[0].Input["content"].(string); got != "hello world" {
t.Fatalf("expected recovered CDATA payload, got %q", got)
}
}
}
if toolCalls != 1 {
t.Fatalf("expected unclosed CDATA payload to still parse, got %d tool calls events=%#v", toolCalls, events)
}
if textContent.Len() != 0 {
t.Fatalf("expected no leaked text, got %q", textContent.String())
}
}
func TestProcessToolSieveXMLWithLeadingText(t *testing.T) {
var state State
// Model outputs some prose then an XML tool call.
chunks := []string{
"Let me check the file.\n",
"\n \n",
` go.mod` + "\n \n",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
}
events = append(events, Flush(&state, []string{"read_file"})...)
var textContent string
var toolCalls int
for _, evt := range events {
if evt.Content != "" {
textContent += evt.Content
}
toolCalls += len(evt.ToolCalls)
}
// Leading text should be emitted.
if !strings.Contains(textContent, "Let me check the file.") {
t.Fatalf("expected leading text to be emitted, got %q", textContent)
}
// The XML itself should NOT leak.
if strings.Contains(textContent, "示例 XMLplain text xml payload`
events := ProcessChunk(&state, chunk, []string{"read_file"})
events = append(events, Flush(&state, []string{"read_file"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 0 {
t.Fatalf("expected no tool calls for plain XML payload, got %d events=%#v", toolCalls, events)
}
if textContent.String() != chunk {
t.Fatalf("expected XML payload to pass through unchanged, got %q", textContent.String())
}
}
func TestProcessToolSieveNonToolXMLKeepsSuffixForToolParsing(t *testing.T) {
var state State
chunk := `plain xmlREADME.MD`
events := ProcessChunk(&state, chunk, []string{"read_file"})
events = append(events, Flush(&state, []string{"read_file"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if !strings.Contains(textContent.String(), `plain xml`) {
t.Fatalf("expected leading non-tool XML to be preserved, got %q", textContent.String())
}
if strings.Contains(textContent.String(), `{"path":"README.md"}`
events := ProcessChunk(&state, chunk, []string{"read_file"})
events = append(events, Flush(&state, []string{"read_file"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 0 {
t.Fatalf("expected malformed executable-looking XML not to become a tool call, got %d events=%#v", toolCalls, events)
}
if textContent.Len() != 0 {
t.Fatalf("expected malformed executable-looking XML to be suppressed, got %q", textContent.String())
}
}
func TestProcessToolSieveSuppressesAllEmptyDSMLToolBlock(t *testing.T) {
var state State
chunk := strings.Join([]string{
`<|DSML|tool_calls>`,
`<|DSML|invoke name="Bash">`,
`<|DSML|parameter name="command">|DSML|parameter>`,
`<|DSML|parameter name="description"> |DSML|parameter>`,
`<|DSML|parameter name="timeout">|DSML|parameter>`,
`|DSML|invoke>`,
`|DSML|tool_calls>`,
}, "\n")
events := ProcessChunk(&state, chunk, []string{"Bash"})
events = append(events, Flush(&state, []string{"Bash"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 0 {
t.Fatalf("expected all-empty DSML block not to produce tool calls, got %d events=%#v", toolCalls, events)
}
if textContent.Len() != 0 {
t.Fatalf("expected all-empty DSML block not to leak as text, got %q", textContent.String())
}
}
func TestProcessToolSievePassesThroughFencedXMLToolCallExamples(t *testing.T) {
var state State
input := strings.Join([]string{
"Before first example.\n```",
"xml\nREADME.md\n```\n",
"Between examples.\n```xml\n",
"golang\n",
"```\nAfter examples.",
}, "")
chunks := []string{
"Before first example.\n```",
"xml\nREADME.md\n```\n",
"Between examples.\n```xml\n",
"golang\n",
"```\nAfter examples.",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"read_file", "search"})...)
}
events = append(events, Flush(&state, []string{"read_file", "search"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
if evt.Content != "" {
textContent.WriteString(evt.Content)
}
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 0 {
t.Fatalf("expected fenced XML examples to stay text, got %d tool calls events=%#v", toolCalls, events)
}
if textContent.String() != input {
t.Fatalf("expected fenced XML examples to pass through unchanged, got %q", textContent.String())
}
}
func TestProcessToolSieveKeepsPartialXMLTagInsideFencedExample(t *testing.T) {
var state State
input := strings.Join([]string{
"Example:\n```xml\nREADME.md\n```\n",
"Done.",
}, "")
chunks := []string{
"Example:\n```xml\nREADME.md\n```\n",
"Done.",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
}
events = append(events, Flush(&state, []string{"read_file"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
if evt.Content != "" {
textContent.WriteString(evt.Content)
}
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 0 {
t.Fatalf("expected partial fenced XML to stay text, got %d tool calls events=%#v", toolCalls, events)
}
if textContent.String() != input {
t.Fatalf("expected partial fenced XML to pass through unchanged, got %q", textContent.String())
}
}
func TestProcessToolSievePartialXMLTagHeldBack(t *testing.T) {
var state State
// Chunk ends with a partial XML tool tag.
events := ProcessChunk(&state, "Hello \n", 10},
{"dsml_trailing_pipe_tag", "some text <|DSML|tool_calls| \n", 10},
{"dsml_extra_leading_less_than", "some text <<|DSML|tool_calls>\n", 10},
{"invoke_tag_missing_wrapper", "some text \n", 10},
{"bare_tool_call_text", "prefix \n", -1},
{"xml_inside_code_fence", "```xml\n\n```", -1},
{"no_xml", "just plain text", -1},
{"gemini_json_no_detect", `some text {"functionCall":{"name":"search"}}`, -1},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got := findToolSegmentStart(nil, tc.input)
if got != tc.want {
t.Fatalf("findToolSegmentStart(%q) = %d, want %d", tc.input, got, tc.want)
}
})
}
}
func TestFindPartialXMLToolTagStart(t *testing.T) {
cases := []struct {
name string
input string
want int
}{
{"partial_tool_calls", "Hello done", -1},
{"no_lt", "plain text", -1},
{"closed_lt", "a < b > c", -1},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got := findPartialXMLToolTagStart(tc.input)
if got != tc.want {
t.Fatalf("findPartialXMLToolTagStart(%q) = %d, want %d", tc.input, got, tc.want)
}
})
}
}
func TestHasOpenXMLToolTag(t *testing.T) {
if !hasOpenXMLToolTag("\n") {
t.Fatal("should detect open XML tool tag without closing tag")
}
if hasOpenXMLToolTag("\n\n") {
t.Fatal("should return false when closing tag is present")
}
if hasOpenXMLToolTag("plain text without any XML") {
t.Fatal("should return false for plain text")
}
}
// Test the EXACT scenario the user reports: token-by-token streaming where
// tag arrives in small pieces.
func TestProcessToolSieveTokenByTokenXMLNoLeak(t *testing.T) {
var state State
// Simulate DeepSeek model generating tokens one at a time.
chunks := []string{
"<",
"tool",
"_ca",
"lls",
">\n",
" ` + "\n",
" `,
"README.MD",
"\n",
" \n",
"",
"tool_calls",
">",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
}
events = append(events, Flush(&state, []string{"read_file"})...)
var textContent string
var toolCalls int
for _, evt := range events {
if evt.Content != "" {
textContent += evt.Content
}
toolCalls += len(evt.ToolCalls)
}
if strings.Contains(textContent, "") {
t.Fatalf("closing tag fragment leaked to text: %q", textContent)
}
if strings.Contains(textContent, "read_file") {
t.Fatalf("tool name leaked to text: %q", textContent)
}
if toolCalls == 0 {
t.Fatal("expected tool calls to be extracted, got none")
}
}
// Test that Flush on incomplete XML falls back to raw text.
func TestFlushToolSieveIncompleteXMLFallsBackToText(t *testing.T) {
var state State
// XML block starts but stream ends before completion.
chunks := []string{
"\n",
" \n",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
}
// Stream ends abruptly - flush should NOT dump raw XML.
events = append(events, Flush(&state, []string{"read_file"})...)
var textContent string
for _, evt := range events {
if evt.Content != "" {
textContent += evt.Content
}
}
if textContent != strings.Join(chunks, "") {
t.Fatalf("expected incomplete XML to fall back to raw text, got %q", textContent)
}
}
// Test that the opening tag "\n " is NOT emitted as text content.
func TestOpeningXMLTagNotLeakedAsContent(t *testing.T) {
var state State
// First chunk is the opening tag - should be held, not emitted.
evts1 := ProcessChunk(&state, "\n ", []string{"read_file"})
for _, evt := range evts1 {
if strings.Contains(evt.Content, "") {
t.Fatalf("opening tag leaked on first chunk: %q", evt.Content)
}
}
// Remaining content arrives.
evts2 := ProcessChunk(&state, "\n README.MD\n \n", []string{"read_file"})
evts2 = append(evts2, Flush(&state, []string{"read_file"})...)
var textContent string
var toolCalls int
allEvents := append(evts1, evts2...)
for _, evt := range allEvents {
if evt.Content != "" {
textContent += evt.Content
}
toolCalls += len(evt.ToolCalls)
}
if strings.Contains(textContent, "\n",
" Here is the answer\n",
"",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"attempt_completion"})...)
}
events = append(events, Flush(&state, []string{"attempt_completion"})...)
var textContent string
for _, evt := range events {
if evt.Content != "" {
textContent += evt.Content
}
}
if !strings.Contains(textContent, "Done with task.\n") {
t.Fatalf("expected leading text to be emitted, got %q", textContent)
}
if textContent != strings.Join(chunks, "") {
t.Fatalf("expected agent XML to fall back to raw text, got %q", textContent)
}
}
func TestProcessToolSievePassesThroughBareToolCallAsText(t *testing.T) {
var state State
chunk := `README.md`
events := ProcessChunk(&state, chunk, []string{"read_file"})
events = append(events, Flush(&state, []string{"read_file"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 0 {
t.Fatalf("expected bare invoke to remain text, got %d events=%#v", toolCalls, events)
}
if textContent.String() != chunk {
t.Fatalf("expected bare invoke to pass through unchanged, got %q", textContent.String())
}
}
func TestProcessToolSieveBareInvokeInlineProseDoesNotStall(t *testing.T) {
var state State
chunk := "Use `` as plain documentation text."
events := ProcessChunk(&state, chunk, []string{"read_file"})
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 0 {
t.Fatalf("expected inline invoke prose to remain text, got %d events=%#v", toolCalls, events)
}
if textContent.String() != chunk {
t.Fatalf("expected inline invoke prose to stream immediately, got %q", textContent.String())
}
if state.capturing {
t.Fatal("expected inline invoke prose not to leave stream capture open")
}
}
func TestProcessToolSieveBareInvokeExampleReleasesWhenNotRepairable(t *testing.T) {
var state State
chunks := []string{
`Example: README.md`,
" then continue.",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
}
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 0 {
t.Fatalf("expected non-repairable bare invoke to remain text, got %d events=%#v", toolCalls, events)
}
if textContent.String() != strings.Join(chunks, "") {
t.Fatalf("expected non-repairable bare invoke to pass through, got %q", textContent.String())
}
if state.capturing {
t.Fatal("expected non-repairable bare invoke not to leave stream capture open")
}
}
func TestProcessToolSieveRepairsMissingOpeningWrapperWithoutLeakingInvokeText(t *testing.T) {
var state State
chunks := []string{
"\n",
" README.md\n",
"\n",
"",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
}
events = append(events, Flush(&state, []string{"read_file"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 1 {
t.Fatalf("expected repaired missing-wrapper stream to emit one tool call, got %d events=%#v", toolCalls, events)
}
if strings.Contains(textContent.String(), "") {
t.Fatalf("expected repaired missing-wrapper stream not to leak xml text, got %q", textContent.String())
}
}
// Test fullwidth pipe variant: <|tool_calls> (U+FF5C) should be buffered and parsed.
func TestProcessToolSieveFullwidthPipeVariantDoesNotLeak(t *testing.T) {
var state State
chunks := []string{
"<\uff5ctool_calls>\n",
"\n",
"git status\n",
"\n",
"\uff5ctool_calls>",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"execute_command"})...)
}
events = append(events, Flush(&state, []string{"execute_command"})...)
var textContent string
var toolCalls int
for _, evt := range events {
textContent += evt.Content
toolCalls += len(evt.ToolCalls)
}
if strings.Contains(textContent, "invoke") || strings.Contains(textContent, "execute_command") {
t.Fatalf("fullwidth pipe variant leaked to text: %q", textContent)
}
if toolCalls != 1 {
t.Fatalf("expected one tool call from fullwidth pipe variant, got %d events=%#v", toolCalls, events)
}
}
// Test <|DSML|tool_calls> with DSML invoke/parameter tags should buffer the
// wrapper instead of leaking it before the block is complete.
func TestProcessToolSieveFullwidthDSMLPrefixVariantDoesNotLeak(t *testing.T) {
var state State
chunks := []string{
"<|DSML|tool",
"_calls>\n",
"<|DSML|invoke name=\"Bash\">\n",
"<|DSML|parameter name=\"command\">|DSML|parameter>\n",
"<|DSML|parameter name=\"description\">|DSML|parameter>\n",
"|DSML|invoke>\n",
"<|DSML|invoke name=\"Bash\">\n",
"<|DSML|parameter name=\"command\">/dev/null || echo \"No package.json found\"]]>|DSML|parameter>\n",
"<|DSML|parameter name=\"description\">|DSML|parameter>\n",
"|DSML|invoke>\n",
"|DSML|tool_calls>",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"Bash"})...)
}
events = append(events, Flush(&state, []string{"Bash"})...)
var textContent strings.Builder
var toolCalls int
var names []string
for _, evt := range events {
textContent.WriteString(evt.Content)
for _, call := range evt.ToolCalls {
toolCalls++
names = append(names, call.Name)
}
}
if toolCalls != 2 {
t.Fatalf("expected two tool calls from fullwidth DSML prefix variant, got %d events=%#v", toolCalls, events)
}
if len(names) != 2 || names[0] != "Bash" || names[1] != "Bash" {
t.Fatalf("expected two Bash tool calls, got %v", names)
}
if textContent.Len() != 0 {
t.Fatalf("expected fullwidth DSML prefix variant not to leak text, got %q", textContent.String())
}
}
// Test with <|DSML|invoke> (DSML prefix without leading pipe on wrapper).
func TestProcessToolSieveDSMLPrefixVariantDoesNotLeak(t *testing.T) {
var state State
chunks := []string{
"\n",
" <|DSML|invoke name=\"execute_command\">\n",
" <|DSML|parameter name=\"command\">|DSML|parameter>\n",
" |DSML|invoke>\n",
"",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"execute_command"})...)
}
events = append(events, Flush(&state, []string{"execute_command"})...)
var textContent string
var toolCalls int
for _, evt := range events {
textContent += evt.Content
toolCalls += len(evt.ToolCalls)
}
if strings.Contains(strings.ToLower(textContent), "dsml") || strings.Contains(textContent, "execute_command") {
t.Fatalf("DSML prefix variant leaked to text: %q", textContent)
}
if toolCalls != 1 {
t.Fatalf("expected one tool call from DSML prefix variant, got %d events=%#v", toolCalls, events)
}
}
// Test with (no pipe anywhere) should be buffered and parsed.
func TestProcessToolSieveDSMLBarePrefixVariantDoesNotLeak(t *testing.T) {
var state State
chunks := []string{
"\n",
"\n",
"\n",
"\n",
"",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"execute_command"})...)
}
events = append(events, Flush(&state, []string{"execute_command"})...)
var textContent string
var toolCalls int
for _, evt := range events {
textContent += evt.Content
toolCalls += len(evt.ToolCalls)
}
if strings.Contains(strings.ToLower(textContent), "dsml") || strings.Contains(textContent, "execute_command") {
t.Fatalf("DSML bare prefix variant leaked to text: %q", textContent)
}
if toolCalls != 1 {
t.Fatalf("expected one tool call from DSML bare prefix variant, got %d events=%#v", toolCalls, events)
}
}