refactor: allow and preserve empty tool parameter values while updating sieve to release malformed XML as text

This commit is contained in:
CJACK
2026-05-10 01:05:18 +08:00
parent ddd42e532e
commit 740a78ad5a
13 changed files with 185 additions and 88 deletions

View File

@@ -54,7 +54,7 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string,
}
if parsed.SawToolCallSyntax {
if rejected == nil || tag.Start < rejected.start {
rejected = &rejectedBlock{start: tag.Start, prefix: prefixPart, suffix: suffixPart}
rejected = &rejectedBlock{start: tag.Start, prefix: prefixPart + xmlBlock, suffix: suffixPart}
}
searchFrom = tag.End + 1
continue
@@ -88,7 +88,7 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string,
return prefixPart, parsed.Calls, suffixPart, true
}
if parsed.SawToolCallSyntax {
return prefixPart, nil, suffixPart, true
return prefixPart + captured[invokeTag.Start:closeTag.End+1], nil, suffixPart, true
}
return prefixPart + captured[invokeTag.Start:closeTag.End+1], nil, suffixPart, true
}

View File

@@ -1,6 +1,7 @@
package toolstream
import (
"ds2api/internal/toolcall"
"strings"
"testing"
)
@@ -179,8 +180,7 @@ func TestProcessToolSieveInterceptsArbitraryPrefixedToolTagsWithoutLeak(t *testi
}
}
func TestProcessToolSieveSuppressesEmptyDSMLControlSeparatorBlockWithoutLeak(t *testing.T) {
var state State
func TestProcessToolSieveEmitsEmptyDSMLControlSeparatorBlockWithoutLeak(t *testing.T) {
sep := "␂"
chunks := []string{
"<DSML" + sep + "tool_calls>\n",
@@ -189,23 +189,12 @@ func TestProcessToolSieveSuppressesEmptyDSMLControlSeparatorBlockWithoutLeak(t *
" </DSML" + sep + "invoke>\n",
"</DSML" + sep + "tool_calls>",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"Read"})...)
calls := collectToolCallsForChunks(t, chunks, []string{"Read"})
if len(calls) != 1 {
t.Fatalf("expected empty control-separator block to produce one call, got %#v", calls)
}
events = append(events, Flush(&state, []string{"Read"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 0 {
t.Fatalf("expected empty control-separator block not to produce calls, got %d events=%#v", toolCalls, events)
}
if text := textContent.String(); strings.Contains(strings.ToLower(text), "dsml") || strings.Contains(text, "Read") || strings.Contains(text, sep) {
t.Fatalf("expected empty control-separator block not to leak as text, got %q", text)
if calls[0].Name != "Read" || calls[0].Input["file_path"] != "" {
t.Fatalf("expected empty file_path parameter to be preserved, got %#v", calls)
}
}
@@ -595,7 +584,7 @@ func TestProcessToolSieveNonToolXMLKeepsSuffixForToolParsing(t *testing.T) {
}
}
func TestProcessToolSieveSuppressesMalformedExecutableXMLBlock(t *testing.T) {
func TestProcessToolSieveReleasesMalformedExecutableXMLBlock(t *testing.T) {
var state State
chunk := `<tool_calls><invoke name="read_file"><param>{"path":"README.md"}</param></invoke></tool_calls>`
events := ProcessChunk(&state, chunk, []string{"read_file"})
@@ -611,13 +600,12 @@ func TestProcessToolSieveSuppressesMalformedExecutableXMLBlock(t *testing.T) {
if toolCalls != 0 {
t.Fatalf("expected malformed executable-looking XML not to become a tool call, got %d events=%#v", toolCalls, events)
}
if textContent.Len() != 0 {
t.Fatalf("expected malformed executable-looking XML to be suppressed, got %q", textContent.String())
if textContent.String() != chunk {
t.Fatalf("expected malformed executable-looking XML to be released as text, got %q", textContent.String())
}
}
func TestProcessToolSieveSuppressesAllEmptyDSMLToolBlock(t *testing.T) {
var state State
func TestProcessToolSieveEmitsAllEmptyDSMLToolBlock(t *testing.T) {
chunk := strings.Join([]string{
`<|DSML|tool_calls>`,
`<|DSML|invoke name="Bash">`,
@@ -627,22 +615,69 @@ func TestProcessToolSieveSuppressesAllEmptyDSMLToolBlock(t *testing.T) {
`</|DSML|invoke>`,
`</|DSML|tool_calls>`,
}, "\n")
events := ProcessChunk(&state, chunk, []string{"Bash"})
events = append(events, Flush(&state, []string{"Bash"})...)
calls := collectToolCallsForChunks(t, []string{chunk}, []string{"Bash"})
if len(calls) != 1 {
t.Fatalf("expected all-empty DSML block to produce one tool call, got %#v", calls)
}
if calls[0].Input["command"] != "" || calls[0].Input["description"] != "" || calls[0].Input["timeout"] != "" {
t.Fatalf("expected empty parameters to be preserved, got %#v", calls[0].Input)
}
}
func TestProcessToolSieveEmitsChunkedAllEmptyArbitraryPrefixedToolBlock(t *testing.T) {
chunk := strings.Join([]string{
`<TDSMLtool_calls>`,
` <TDSMLinvoke name="TaskOutput">`,
` <TDSMLparameter name="task_id"></TDSMLparameter>`,
` <TDSMLparameter name="block"></TDSMLparameter>`,
` <TDSMLparameter name="timeout"></TDSMLparameter>`,
` </TDSMLinvoke>`,
` </TDSMLtool_calls>`,
}, "\n")
calls := collectToolCallsForChunks(t, splitEveryNRBytes(chunk, 8), []string{"TaskOutput"})
if len(calls) != 1 {
t.Fatalf("expected chunked all-empty arbitrary-prefixed block to produce one tool call, got %#v", calls)
}
if calls[0].Name != "TaskOutput" || calls[0].Input["task_id"] != "" || calls[0].Input["block"] != "" || calls[0].Input["timeout"] != "" {
t.Fatalf("expected empty TaskOutput parameters to be preserved, got %#v", calls)
}
}
func collectToolCallsForChunks(t *testing.T, chunks []string, toolNames []string) []toolcall.ParsedToolCall {
t.Helper()
var state State
var events []Event
for _, chunk := range chunks {
events = append(events, ProcessChunk(&state, chunk, toolNames)...)
}
events = append(events, Flush(&state, toolNames)...)
var textContent strings.Builder
toolCalls := 0
var calls []toolcall.ParsedToolCall
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 0 {
t.Fatalf("expected all-empty DSML block not to produce tool calls, got %d events=%#v", toolCalls, events)
calls = append(calls, evt.ToolCalls...)
}
if textContent.Len() != 0 {
t.Fatalf("expected all-empty DSML block not to leak as text, got %q", textContent.String())
t.Fatalf("expected tool block not to leak as text, got %q", textContent.String())
}
return calls
}
func splitEveryNRBytes(s string, n int) []string {
if n <= 0 {
return []string{s}
}
out := make([]string, 0, len(s)/n+1)
for len(s) > 0 {
if len(s) <= n {
out = append(out, s)
break
}
out = append(out, s[:n])
s = s[n:]
}
return out
}
func TestProcessToolSievePassesThroughFencedXMLToolCallExamples(t *testing.T) {
@@ -776,6 +811,8 @@ func TestFindPartialXMLToolTagStart(t *testing.T) {
{"partial_tool_calls", "Hello <tool_ca", 6},
{"partial_dsml_trailing_pipe", "Hello <|DSML|tool_calls|", 6},
{"partial_dsml_extra_leading_less_than", "Hello <<|DSML|tool_calls", 6},
{"partial_arbitrary_prefix_before_dsml", "Hello <TDS", 6},
{"partial_arbitrary_prefix_after_dsml_pipe", "Hello <TDSML", 6},
{"partial_invoke", "Hello <inv", 6},
{"bare_tool_call_not_held", "Hello <tool_name", -1},
{"partial_lt_only", "Text <", 5},