refactor: allow and preserve empty tool parameter values while updating sieve to release malformed XML as text

This commit is contained in:
CJACK
2026-05-10 01:05:18 +08:00
parent ddd42e532e
commit 740a78ad5a
13 changed files with 185 additions and 88 deletions

View File

@@ -113,9 +113,10 @@ function filterToolCallsDetailed(parsed, toolNames) {
if (!tc || !tc.name) {
continue;
}
const input = tc.input && typeof tc.input === 'object' && !Array.isArray(tc.input) ? tc.input : {};
calls.push({
name: tc.name,
input: tc.input && typeof tc.input === 'object' && !Array.isArray(tc.input) ? tc.input : {},
input,
});
}
return { calls, rejectedToolNames: [] };

View File

@@ -660,9 +660,17 @@ function hasPartialToolMarkupNameAfterArbitraryPrefix(raw, start) {
if (toolMarkupPrefixAllowsLocalName(raw.slice(start, idx)) && hasToolMarkupNamePrefix(raw, idx)) {
return true;
}
if (toolMarkupPrefixAllowsLocalName(raw.slice(start, idx)) && hasDSMLNamePrefixOrPartial(raw, idx)) {
return true;
}
idx += 1;
}
return false;
return toolMarkupPrefixAllowsLocalName(raw.slice(start));
}
function hasDSMLNamePrefixOrPartial(raw, start) {
const tail = normalizedASCIITailAt(raw, start);
return tail.startsWith('dsml') || 'dsml'.startsWith(tail);
}
function toolMarkupPrefixAllowsLocalName(prefix) {

View File

@@ -1,5 +1,5 @@
'use strict';
const { parseToolCalls } = require('./parse');
const { parseToolCallsDetailed } = require('./parse');
const {
findToolMarkupTagOutsideIgnored,
findMatchingToolMarkupClose,
@@ -27,19 +27,30 @@ function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
const xmlBlock = captured.slice(openTag.start, closeTag.end + 1);
const prefixPart = captured.slice(0, openTag.start);
const suffixPart = captured.slice(closeTag.end + 1);
const parsed = parseToolCalls(xmlBlock, toolNames);
if (Array.isArray(parsed) && parsed.length > 0) {
const parsed = parseToolCallsDetailed(xmlBlock, toolNames);
if (Array.isArray(parsed.calls) && parsed.calls.length > 0) {
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
if (!best || openTag.start < best.start) {
best = {
start: openTag.start,
prefix: trimmedFence.prefix,
calls: parsed,
calls: parsed.calls,
suffix: trimmedFence.suffix,
};
}
break;
}
if (parsed.sawToolCallSyntax) {
if (!rejected || openTag.start < rejected.start) {
rejected = {
start: openTag.start,
prefix: prefixPart + xmlBlock,
suffix: suffixPart,
};
}
searchFrom = openTag.end + 1;
continue;
}
if (!rejected || openTag.start < rejected.start) {
rejected = {
start: openTag.start,
@@ -69,16 +80,19 @@ function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
const xmlBlock = '<tool_calls>' + captured.slice(invokeTag.start, closeTag.end + 1);
const prefixPart = captured.slice(0, invokeTag.start);
const suffixPart = captured.slice(closeTag.end + 1);
const parsed = parseToolCalls(xmlBlock, toolNames);
if (Array.isArray(parsed) && parsed.length > 0) {
const parsed = parseToolCallsDetailed(xmlBlock, toolNames);
if (Array.isArray(parsed.calls) && parsed.calls.length > 0) {
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
return {
ready: true,
prefix: trimmedFence.prefix,
calls: parsed,
calls: parsed.calls,
suffix: trimmedFence.suffix,
};
}
if (parsed.sawToolCallSyntax) {
return { ready: true, prefix: prefixPart + captured.slice(invokeTag.start, closeTag.end + 1), calls: [], suffix: suffixPart };
}
return { ready: true, prefix: prefixPart + captured.slice(invokeTag.start, closeTag.end + 1), calls: [], suffix: suffixPart };
}
}

View File

@@ -26,10 +26,13 @@ RULES:
6) Objects use nested XML elements inside the parameter body. Arrays may repeat <item> children.
7) Numbers, booleans, and null stay plain text.
8) Use only the parameter names in the tool schema. Do not invent fields.
9) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.
10) If you call a tool, the first non-whitespace characters of that tool block must be exactly <DSMLtool_calls>.
11) Never omit the opening <DSMLtool_calls> tag, even if you already plan to close with </DSMLtool_calls>.
12) Compatibility note: the runtime also accepts the legacy XML tags <tool_calls> / <invoke> / <parameter>, but prefer the DSML-prefixed form above.
9) Fill parameters with the actual values required for this call. Do not emit placeholder, blank, or whitespace-only parameters.
10) If a required parameter value is unknown, ask the user or answer normally instead of outputting an empty tool call.
11) For shell tools such as Bash / execute_command, the command/script must be inside the command parameter. Never call them with an empty command.
12) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.
13) If you call a tool, the first non-whitespace characters of that tool block must be exactly <DSMLtool_calls>.
14) Never omit the opening <DSMLtool_calls> tag, even if you already plan to close with </DSMLtool_calls>.
15) Compatibility note: the runtime also accepts the legacy XML tags <tool_calls> / <invoke> / <parameter>, but prefer the DSML-prefixed form above.
PARAMETER SHAPES:
- string => <DSMLparameter name="x"><![CDATA[value]]></DSMLparameter>
@@ -48,6 +51,12 @@ Wrong 2 — Markdown code fences:
Wrong 3 — missing opening wrapper:
<DSMLinvoke name="TOOL_NAME">...</DSMLinvoke>
</DSMLtool_calls>
Wrong 4 — empty parameters:
<DSMLtool_calls>
<DSMLinvoke name="Bash">
<DSMLparameter name="command"></DSMLparameter>
</DSMLinvoke>
</DSMLtool_calls>
Remember: The ONLY valid way to use tools is the <DSMLtool_calls>...</DSMLtool_calls> block at the end of your response.
` + buildCorrectToolExamples(toolNames)

View File

@@ -119,6 +119,20 @@ func TestBuildToolCallInstructions_AnchorsMissingOpeningWrapperFailureMode(t *te
}
}
func TestBuildToolCallInstructions_RejectsEmptyParametersInPrompt(t *testing.T) {
out := BuildToolCallInstructions([]string{"Bash"})
for _, want := range []string{
"Do not emit placeholder, blank, or whitespace-only parameters.",
"If a required parameter value is unknown, ask the user or answer normally instead of outputting an empty tool call.",
"Never call them with an empty command.",
"Wrong 4 — empty parameters",
} {
if !strings.Contains(out, want) {
t.Fatalf("expected empty-parameter instruction %q, got: %s", want, out)
}
}
}
func findInvokeBlocks(text, name string) []string {
open := `<DSMLinvoke name="` + name + `">`
remaining := text

View File

@@ -92,45 +92,11 @@ func filterToolCallsDetailed(parsed []ParsedToolCall) ([]ParsedToolCall, []strin
if tc.Input == nil {
tc.Input = map[string]any{}
}
if len(tc.Input) > 0 && !toolCallInputHasMeaningfulValue(tc.Input) {
continue
}
out = append(out, tc)
}
return out, nil
}
func toolCallInputHasMeaningfulValue(v any) bool {
switch x := v.(type) {
case nil:
return false
case string:
return strings.TrimSpace(x) != ""
case map[string]any:
if len(x) == 0 {
return false
}
for _, child := range x {
if toolCallInputHasMeaningfulValue(child) {
return true
}
}
return false
case []any:
if len(x) == 0 {
return false
}
for _, child := range x {
if toolCallInputHasMeaningfulValue(child) {
return true
}
}
return false
default:
return true
}
}
func looksLikeToolCallSyntax(text string) bool {
hasDSML, hasCanonical := ContainsToolCallWrapperSyntaxOutsideIgnored(text)
return hasDSML || hasCanonical

View File

@@ -383,13 +383,20 @@ func hasPartialToolMarkupNameAfterArbitraryPrefix(text string, start int) bool {
if toolMarkupPrefixAllowsLocalName(text[start:idx]) && hasToolMarkupNamePrefix(text, idx) {
return true
}
if toolMarkupPrefixAllowsLocalName(text[start:idx]) && hasDSMLNamePrefixOrPartial(text, idx) {
return true
}
_, size := utf8.DecodeRuneInString(text[idx:])
if size <= 0 {
size = 1
}
idx += size
}
return false
return toolMarkupPrefixAllowsLocalName(text[start:])
}
func hasDSMLNamePrefixOrPartial(text string, start int) bool {
return hasASCIIPrefixFoldAt(text, start, "dsml") || hasASCIIPartialPrefixFoldAt(text, start, "dsml")
}
func toolMarkupPrefixAllowsLocalName(prefix string) bool {

View File

@@ -576,14 +576,17 @@ func TestParseToolCallsDetailedMarksToolCallsSyntax(t *testing.T) {
}
}
func TestParseToolCallsRejectsAllEmptyParameterPayload(t *testing.T) {
func TestParseToolCallsAllowsAllEmptyParameterPayload(t *testing.T) {
text := `<tool_calls><invoke name="Bash"><parameter name="command"></parameter><parameter name="description"> </parameter><parameter name="timeout"></parameter></invoke></tool_calls>`
res := ParseToolCallsDetailed(text, []string{"Bash"})
if !res.SawToolCallSyntax {
t.Fatalf("expected tool syntax to be detected, got %#v", res)
}
if len(res.Calls) != 0 {
t.Fatalf("expected all-empty payload to be rejected, got %#v", res.Calls)
if len(res.Calls) != 1 {
t.Fatalf("expected all-empty payload to be parsed, got %#v", res.Calls)
}
if res.Calls[0].Input["command"] != "" || res.Calls[0].Input["description"] != "" || res.Calls[0].Input["timeout"] != "" {
t.Fatalf("expected empty parameters to be preserved, got %#v", res.Calls[0].Input)
}
}

View File

@@ -54,7 +54,7 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string,
}
if parsed.SawToolCallSyntax {
if rejected == nil || tag.Start < rejected.start {
rejected = &rejectedBlock{start: tag.Start, prefix: prefixPart, suffix: suffixPart}
rejected = &rejectedBlock{start: tag.Start, prefix: prefixPart + xmlBlock, suffix: suffixPart}
}
searchFrom = tag.End + 1
continue
@@ -88,7 +88,7 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string,
return prefixPart, parsed.Calls, suffixPart, true
}
if parsed.SawToolCallSyntax {
return prefixPart, nil, suffixPart, true
return prefixPart + captured[invokeTag.Start:closeTag.End+1], nil, suffixPart, true
}
return prefixPart + captured[invokeTag.Start:closeTag.End+1], nil, suffixPart, true
}

View File

@@ -1,6 +1,7 @@
package toolstream
import (
"ds2api/internal/toolcall"
"strings"
"testing"
)
@@ -179,8 +180,7 @@ func TestProcessToolSieveInterceptsArbitraryPrefixedToolTagsWithoutLeak(t *testi
}
}
func TestProcessToolSieveSuppressesEmptyDSMLControlSeparatorBlockWithoutLeak(t *testing.T) {
var state State
func TestProcessToolSieveEmitsEmptyDSMLControlSeparatorBlockWithoutLeak(t *testing.T) {
sep := "␂"
chunks := []string{
"<DSML" + sep + "tool_calls>\n",
@@ -189,23 +189,12 @@ func TestProcessToolSieveSuppressesEmptyDSMLControlSeparatorBlockWithoutLeak(t *
" </DSML" + sep + "invoke>\n",
"</DSML" + sep + "tool_calls>",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"Read"})...)
calls := collectToolCallsForChunks(t, chunks, []string{"Read"})
if len(calls) != 1 {
t.Fatalf("expected empty control-separator block to produce one call, got %#v", calls)
}
events = append(events, Flush(&state, []string{"Read"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 0 {
t.Fatalf("expected empty control-separator block not to produce calls, got %d events=%#v", toolCalls, events)
}
if text := textContent.String(); strings.Contains(strings.ToLower(text), "dsml") || strings.Contains(text, "Read") || strings.Contains(text, sep) {
t.Fatalf("expected empty control-separator block not to leak as text, got %q", text)
if calls[0].Name != "Read" || calls[0].Input["file_path"] != "" {
t.Fatalf("expected empty file_path parameter to be preserved, got %#v", calls)
}
}
@@ -595,7 +584,7 @@ func TestProcessToolSieveNonToolXMLKeepsSuffixForToolParsing(t *testing.T) {
}
}
func TestProcessToolSieveSuppressesMalformedExecutableXMLBlock(t *testing.T) {
func TestProcessToolSieveReleasesMalformedExecutableXMLBlock(t *testing.T) {
var state State
chunk := `<tool_calls><invoke name="read_file"><param>{"path":"README.md"}</param></invoke></tool_calls>`
events := ProcessChunk(&state, chunk, []string{"read_file"})
@@ -611,13 +600,12 @@ func TestProcessToolSieveSuppressesMalformedExecutableXMLBlock(t *testing.T) {
if toolCalls != 0 {
t.Fatalf("expected malformed executable-looking XML not to become a tool call, got %d events=%#v", toolCalls, events)
}
if textContent.Len() != 0 {
t.Fatalf("expected malformed executable-looking XML to be suppressed, got %q", textContent.String())
if textContent.String() != chunk {
t.Fatalf("expected malformed executable-looking XML to be released as text, got %q", textContent.String())
}
}
func TestProcessToolSieveSuppressesAllEmptyDSMLToolBlock(t *testing.T) {
var state State
func TestProcessToolSieveEmitsAllEmptyDSMLToolBlock(t *testing.T) {
chunk := strings.Join([]string{
`<|DSML|tool_calls>`,
`<|DSML|invoke name="Bash">`,
@@ -627,22 +615,69 @@ func TestProcessToolSieveSuppressesAllEmptyDSMLToolBlock(t *testing.T) {
`</|DSML|invoke>`,
`</|DSML|tool_calls>`,
}, "\n")
events := ProcessChunk(&state, chunk, []string{"Bash"})
events = append(events, Flush(&state, []string{"Bash"})...)
calls := collectToolCallsForChunks(t, []string{chunk}, []string{"Bash"})
if len(calls) != 1 {
t.Fatalf("expected all-empty DSML block to produce one tool call, got %#v", calls)
}
if calls[0].Input["command"] != "" || calls[0].Input["description"] != "" || calls[0].Input["timeout"] != "" {
t.Fatalf("expected empty parameters to be preserved, got %#v", calls[0].Input)
}
}
func TestProcessToolSieveEmitsChunkedAllEmptyArbitraryPrefixedToolBlock(t *testing.T) {
chunk := strings.Join([]string{
`<TDSMLtool_calls>`,
` <TDSMLinvoke name="TaskOutput">`,
` <TDSMLparameter name="task_id"></TDSMLparameter>`,
` <TDSMLparameter name="block"></TDSMLparameter>`,
` <TDSMLparameter name="timeout"></TDSMLparameter>`,
` </TDSMLinvoke>`,
` </TDSMLtool_calls>`,
}, "\n")
calls := collectToolCallsForChunks(t, splitEveryNRBytes(chunk, 8), []string{"TaskOutput"})
if len(calls) != 1 {
t.Fatalf("expected chunked all-empty arbitrary-prefixed block to produce one tool call, got %#v", calls)
}
if calls[0].Name != "TaskOutput" || calls[0].Input["task_id"] != "" || calls[0].Input["block"] != "" || calls[0].Input["timeout"] != "" {
t.Fatalf("expected empty TaskOutput parameters to be preserved, got %#v", calls)
}
}
func collectToolCallsForChunks(t *testing.T, chunks []string, toolNames []string) []toolcall.ParsedToolCall {
t.Helper()
var state State
var events []Event
for _, chunk := range chunks {
events = append(events, ProcessChunk(&state, chunk, toolNames)...)
}
events = append(events, Flush(&state, toolNames)...)
var textContent strings.Builder
toolCalls := 0
var calls []toolcall.ParsedToolCall
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 0 {
t.Fatalf("expected all-empty DSML block not to produce tool calls, got %d events=%#v", toolCalls, events)
calls = append(calls, evt.ToolCalls...)
}
if textContent.Len() != 0 {
t.Fatalf("expected all-empty DSML block not to leak as text, got %q", textContent.String())
t.Fatalf("expected tool block not to leak as text, got %q", textContent.String())
}
return calls
}
func splitEveryNRBytes(s string, n int) []string {
if n <= 0 {
return []string{s}
}
out := make([]string, 0, len(s)/n+1)
for len(s) > 0 {
if len(s) <= n {
out = append(out, s)
break
}
out = append(out, s[:n])
s = s[n:]
}
return out
}
func TestProcessToolSievePassesThroughFencedXMLToolCallExamples(t *testing.T) {
@@ -776,6 +811,8 @@ func TestFindPartialXMLToolTagStart(t *testing.T) {
{"partial_tool_calls", "Hello <tool_ca", 6},
{"partial_dsml_trailing_pipe", "Hello <|DSML|tool_calls|", 6},
{"partial_dsml_extra_leading_less_than", "Hello <<|DSML|tool_calls", 6},
{"partial_arbitrary_prefix_before_dsml", "Hello <TDS", 6},
{"partial_arbitrary_prefix_after_dsml_pipe", "Hello <TDSML", 6},
{"partial_invoke", "Hello <inv", 6},
{"bare_tool_call_not_held", "Hello <tool_name", -1},
{"partial_lt_only", "Text <", 5},