mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-22 17:07:46 +08:00
refactor: allow and preserve empty tool parameter values while updating sieve to release malformed XML as text
This commit is contained in:
@@ -113,9 +113,10 @@ function filterToolCallsDetailed(parsed, toolNames) {
|
||||
if (!tc || !tc.name) {
|
||||
continue;
|
||||
}
|
||||
const input = tc.input && typeof tc.input === 'object' && !Array.isArray(tc.input) ? tc.input : {};
|
||||
calls.push({
|
||||
name: tc.name,
|
||||
input: tc.input && typeof tc.input === 'object' && !Array.isArray(tc.input) ? tc.input : {},
|
||||
input,
|
||||
});
|
||||
}
|
||||
return { calls, rejectedToolNames: [] };
|
||||
|
||||
@@ -660,9 +660,17 @@ function hasPartialToolMarkupNameAfterArbitraryPrefix(raw, start) {
|
||||
if (toolMarkupPrefixAllowsLocalName(raw.slice(start, idx)) && hasToolMarkupNamePrefix(raw, idx)) {
|
||||
return true;
|
||||
}
|
||||
if (toolMarkupPrefixAllowsLocalName(raw.slice(start, idx)) && hasDSMLNamePrefixOrPartial(raw, idx)) {
|
||||
return true;
|
||||
}
|
||||
idx += 1;
|
||||
}
|
||||
return false;
|
||||
return toolMarkupPrefixAllowsLocalName(raw.slice(start));
|
||||
}
|
||||
|
||||
function hasDSMLNamePrefixOrPartial(raw, start) {
|
||||
const tail = normalizedASCIITailAt(raw, start);
|
||||
return tail.startsWith('dsml') || 'dsml'.startsWith(tail);
|
||||
}
|
||||
|
||||
function toolMarkupPrefixAllowsLocalName(prefix) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
'use strict';
|
||||
const { parseToolCalls } = require('./parse');
|
||||
const { parseToolCallsDetailed } = require('./parse');
|
||||
const {
|
||||
findToolMarkupTagOutsideIgnored,
|
||||
findMatchingToolMarkupClose,
|
||||
@@ -27,19 +27,30 @@ function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
|
||||
const xmlBlock = captured.slice(openTag.start, closeTag.end + 1);
|
||||
const prefixPart = captured.slice(0, openTag.start);
|
||||
const suffixPart = captured.slice(closeTag.end + 1);
|
||||
const parsed = parseToolCalls(xmlBlock, toolNames);
|
||||
if (Array.isArray(parsed) && parsed.length > 0) {
|
||||
const parsed = parseToolCallsDetailed(xmlBlock, toolNames);
|
||||
if (Array.isArray(parsed.calls) && parsed.calls.length > 0) {
|
||||
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
|
||||
if (!best || openTag.start < best.start) {
|
||||
best = {
|
||||
start: openTag.start,
|
||||
prefix: trimmedFence.prefix,
|
||||
calls: parsed,
|
||||
calls: parsed.calls,
|
||||
suffix: trimmedFence.suffix,
|
||||
};
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (parsed.sawToolCallSyntax) {
|
||||
if (!rejected || openTag.start < rejected.start) {
|
||||
rejected = {
|
||||
start: openTag.start,
|
||||
prefix: prefixPart + xmlBlock,
|
||||
suffix: suffixPart,
|
||||
};
|
||||
}
|
||||
searchFrom = openTag.end + 1;
|
||||
continue;
|
||||
}
|
||||
if (!rejected || openTag.start < rejected.start) {
|
||||
rejected = {
|
||||
start: openTag.start,
|
||||
@@ -69,16 +80,19 @@ function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
|
||||
const xmlBlock = '<tool_calls>' + captured.slice(invokeTag.start, closeTag.end + 1);
|
||||
const prefixPart = captured.slice(0, invokeTag.start);
|
||||
const suffixPart = captured.slice(closeTag.end + 1);
|
||||
const parsed = parseToolCalls(xmlBlock, toolNames);
|
||||
if (Array.isArray(parsed) && parsed.length > 0) {
|
||||
const parsed = parseToolCallsDetailed(xmlBlock, toolNames);
|
||||
if (Array.isArray(parsed.calls) && parsed.calls.length > 0) {
|
||||
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
|
||||
return {
|
||||
ready: true,
|
||||
prefix: trimmedFence.prefix,
|
||||
calls: parsed,
|
||||
calls: parsed.calls,
|
||||
suffix: trimmedFence.suffix,
|
||||
};
|
||||
}
|
||||
if (parsed.sawToolCallSyntax) {
|
||||
return { ready: true, prefix: prefixPart + captured.slice(invokeTag.start, closeTag.end + 1), calls: [], suffix: suffixPart };
|
||||
}
|
||||
return { ready: true, prefix: prefixPart + captured.slice(invokeTag.start, closeTag.end + 1), calls: [], suffix: suffixPart };
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,10 +26,13 @@ RULES:
|
||||
6) Objects use nested XML elements inside the parameter body. Arrays may repeat <item> children.
|
||||
7) Numbers, booleans, and null stay plain text.
|
||||
8) Use only the parameter names in the tool schema. Do not invent fields.
|
||||
9) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.
|
||||
10) If you call a tool, the first non-whitespace characters of that tool block must be exactly <|DSML|tool_calls>.
|
||||
11) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with </|DSML|tool_calls>.
|
||||
12) Compatibility note: the runtime also accepts the legacy XML tags <tool_calls> / <invoke> / <parameter>, but prefer the DSML-prefixed form above.
|
||||
9) Fill parameters with the actual values required for this call. Do not emit placeholder, blank, or whitespace-only parameters.
|
||||
10) If a required parameter value is unknown, ask the user or answer normally instead of outputting an empty tool call.
|
||||
11) For shell tools such as Bash / execute_command, the command/script must be inside the command parameter. Never call them with an empty command.
|
||||
12) Do NOT wrap XML in markdown fences. Do NOT output explanations, role markers, or internal monologue.
|
||||
13) If you call a tool, the first non-whitespace characters of that tool block must be exactly <|DSML|tool_calls>.
|
||||
14) Never omit the opening <|DSML|tool_calls> tag, even if you already plan to close with </|DSML|tool_calls>.
|
||||
15) Compatibility note: the runtime also accepts the legacy XML tags <tool_calls> / <invoke> / <parameter>, but prefer the DSML-prefixed form above.
|
||||
|
||||
PARAMETER SHAPES:
|
||||
- string => <|DSML|parameter name="x"><![CDATA[value]]></|DSML|parameter>
|
||||
@@ -48,6 +51,12 @@ Wrong 2 — Markdown code fences:
|
||||
Wrong 3 — missing opening wrapper:
|
||||
<|DSML|invoke name="TOOL_NAME">...</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
Wrong 4 — empty parameters:
|
||||
<|DSML|tool_calls>
|
||||
<|DSML|invoke name="Bash">
|
||||
<|DSML|parameter name="command"></|DSML|parameter>
|
||||
</|DSML|invoke>
|
||||
</|DSML|tool_calls>
|
||||
|
||||
Remember: The ONLY valid way to use tools is the <|DSML|tool_calls>...</|DSML|tool_calls> block at the end of your response.
|
||||
` + buildCorrectToolExamples(toolNames)
|
||||
|
||||
@@ -119,6 +119,20 @@ func TestBuildToolCallInstructions_AnchorsMissingOpeningWrapperFailureMode(t *te
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildToolCallInstructions_RejectsEmptyParametersInPrompt(t *testing.T) {
|
||||
out := BuildToolCallInstructions([]string{"Bash"})
|
||||
for _, want := range []string{
|
||||
"Do not emit placeholder, blank, or whitespace-only parameters.",
|
||||
"If a required parameter value is unknown, ask the user or answer normally instead of outputting an empty tool call.",
|
||||
"Never call them with an empty command.",
|
||||
"Wrong 4 — empty parameters",
|
||||
} {
|
||||
if !strings.Contains(out, want) {
|
||||
t.Fatalf("expected empty-parameter instruction %q, got: %s", want, out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func findInvokeBlocks(text, name string) []string {
|
||||
open := `<|DSML|invoke name="` + name + `">`
|
||||
remaining := text
|
||||
|
||||
@@ -92,45 +92,11 @@ func filterToolCallsDetailed(parsed []ParsedToolCall) ([]ParsedToolCall, []strin
|
||||
if tc.Input == nil {
|
||||
tc.Input = map[string]any{}
|
||||
}
|
||||
if len(tc.Input) > 0 && !toolCallInputHasMeaningfulValue(tc.Input) {
|
||||
continue
|
||||
}
|
||||
out = append(out, tc)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func toolCallInputHasMeaningfulValue(v any) bool {
|
||||
switch x := v.(type) {
|
||||
case nil:
|
||||
return false
|
||||
case string:
|
||||
return strings.TrimSpace(x) != ""
|
||||
case map[string]any:
|
||||
if len(x) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, child := range x {
|
||||
if toolCallInputHasMeaningfulValue(child) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
case []any:
|
||||
if len(x) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, child := range x {
|
||||
if toolCallInputHasMeaningfulValue(child) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func looksLikeToolCallSyntax(text string) bool {
|
||||
hasDSML, hasCanonical := ContainsToolCallWrapperSyntaxOutsideIgnored(text)
|
||||
return hasDSML || hasCanonical
|
||||
|
||||
@@ -383,13 +383,20 @@ func hasPartialToolMarkupNameAfterArbitraryPrefix(text string, start int) bool {
|
||||
if toolMarkupPrefixAllowsLocalName(text[start:idx]) && hasToolMarkupNamePrefix(text, idx) {
|
||||
return true
|
||||
}
|
||||
if toolMarkupPrefixAllowsLocalName(text[start:idx]) && hasDSMLNamePrefixOrPartial(text, idx) {
|
||||
return true
|
||||
}
|
||||
_, size := utf8.DecodeRuneInString(text[idx:])
|
||||
if size <= 0 {
|
||||
size = 1
|
||||
}
|
||||
idx += size
|
||||
}
|
||||
return false
|
||||
return toolMarkupPrefixAllowsLocalName(text[start:])
|
||||
}
|
||||
|
||||
func hasDSMLNamePrefixOrPartial(text string, start int) bool {
|
||||
return hasASCIIPrefixFoldAt(text, start, "dsml") || hasASCIIPartialPrefixFoldAt(text, start, "dsml")
|
||||
}
|
||||
|
||||
func toolMarkupPrefixAllowsLocalName(prefix string) bool {
|
||||
|
||||
@@ -576,14 +576,17 @@ func TestParseToolCallsDetailedMarksToolCallsSyntax(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsRejectsAllEmptyParameterPayload(t *testing.T) {
|
||||
func TestParseToolCallsAllowsAllEmptyParameterPayload(t *testing.T) {
|
||||
text := `<tool_calls><invoke name="Bash"><parameter name="command"></parameter><parameter name="description"> </parameter><parameter name="timeout"></parameter></invoke></tool_calls>`
|
||||
res := ParseToolCallsDetailed(text, []string{"Bash"})
|
||||
if !res.SawToolCallSyntax {
|
||||
t.Fatalf("expected tool syntax to be detected, got %#v", res)
|
||||
}
|
||||
if len(res.Calls) != 0 {
|
||||
t.Fatalf("expected all-empty payload to be rejected, got %#v", res.Calls)
|
||||
if len(res.Calls) != 1 {
|
||||
t.Fatalf("expected all-empty payload to be parsed, got %#v", res.Calls)
|
||||
}
|
||||
if res.Calls[0].Input["command"] != "" || res.Calls[0].Input["description"] != "" || res.Calls[0].Input["timeout"] != "" {
|
||||
t.Fatalf("expected empty parameters to be preserved, got %#v", res.Calls[0].Input)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string,
|
||||
}
|
||||
if parsed.SawToolCallSyntax {
|
||||
if rejected == nil || tag.Start < rejected.start {
|
||||
rejected = &rejectedBlock{start: tag.Start, prefix: prefixPart, suffix: suffixPart}
|
||||
rejected = &rejectedBlock{start: tag.Start, prefix: prefixPart + xmlBlock, suffix: suffixPart}
|
||||
}
|
||||
searchFrom = tag.End + 1
|
||||
continue
|
||||
@@ -88,7 +88,7 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string,
|
||||
return prefixPart, parsed.Calls, suffixPart, true
|
||||
}
|
||||
if parsed.SawToolCallSyntax {
|
||||
return prefixPart, nil, suffixPart, true
|
||||
return prefixPart + captured[invokeTag.Start:closeTag.End+1], nil, suffixPart, true
|
||||
}
|
||||
return prefixPart + captured[invokeTag.Start:closeTag.End+1], nil, suffixPart, true
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package toolstream
|
||||
|
||||
import (
|
||||
"ds2api/internal/toolcall"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
@@ -179,8 +180,7 @@ func TestProcessToolSieveInterceptsArbitraryPrefixedToolTagsWithoutLeak(t *testi
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveSuppressesEmptyDSMLControlSeparatorBlockWithoutLeak(t *testing.T) {
|
||||
var state State
|
||||
func TestProcessToolSieveEmitsEmptyDSMLControlSeparatorBlockWithoutLeak(t *testing.T) {
|
||||
sep := "␂"
|
||||
chunks := []string{
|
||||
"<DSML" + sep + "tool_calls>\n",
|
||||
@@ -189,23 +189,12 @@ func TestProcessToolSieveSuppressesEmptyDSMLControlSeparatorBlockWithoutLeak(t *
|
||||
" </DSML" + sep + "invoke>\n",
|
||||
"</DSML" + sep + "tool_calls>",
|
||||
}
|
||||
var events []Event
|
||||
for _, c := range chunks {
|
||||
events = append(events, ProcessChunk(&state, c, []string{"Read"})...)
|
||||
calls := collectToolCallsForChunks(t, chunks, []string{"Read"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected empty control-separator block to produce one call, got %#v", calls)
|
||||
}
|
||||
events = append(events, Flush(&state, []string{"Read"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
toolCalls := 0
|
||||
for _, evt := range events {
|
||||
textContent.WriteString(evt.Content)
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
if toolCalls != 0 {
|
||||
t.Fatalf("expected empty control-separator block not to produce calls, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
if text := textContent.String(); strings.Contains(strings.ToLower(text), "dsml") || strings.Contains(text, "Read") || strings.Contains(text, sep) {
|
||||
t.Fatalf("expected empty control-separator block not to leak as text, got %q", text)
|
||||
if calls[0].Name != "Read" || calls[0].Input["file_path"] != "" {
|
||||
t.Fatalf("expected empty file_path parameter to be preserved, got %#v", calls)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -595,7 +584,7 @@ func TestProcessToolSieveNonToolXMLKeepsSuffixForToolParsing(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveSuppressesMalformedExecutableXMLBlock(t *testing.T) {
|
||||
func TestProcessToolSieveReleasesMalformedExecutableXMLBlock(t *testing.T) {
|
||||
var state State
|
||||
chunk := `<tool_calls><invoke name="read_file"><param>{"path":"README.md"}</param></invoke></tool_calls>`
|
||||
events := ProcessChunk(&state, chunk, []string{"read_file"})
|
||||
@@ -611,13 +600,12 @@ func TestProcessToolSieveSuppressesMalformedExecutableXMLBlock(t *testing.T) {
|
||||
if toolCalls != 0 {
|
||||
t.Fatalf("expected malformed executable-looking XML not to become a tool call, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
if textContent.Len() != 0 {
|
||||
t.Fatalf("expected malformed executable-looking XML to be suppressed, got %q", textContent.String())
|
||||
if textContent.String() != chunk {
|
||||
t.Fatalf("expected malformed executable-looking XML to be released as text, got %q", textContent.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveSuppressesAllEmptyDSMLToolBlock(t *testing.T) {
|
||||
var state State
|
||||
func TestProcessToolSieveEmitsAllEmptyDSMLToolBlock(t *testing.T) {
|
||||
chunk := strings.Join([]string{
|
||||
`<|DSML|tool_calls>`,
|
||||
`<|DSML|invoke name="Bash">`,
|
||||
@@ -627,22 +615,69 @@ func TestProcessToolSieveSuppressesAllEmptyDSMLToolBlock(t *testing.T) {
|
||||
`</|DSML|invoke>`,
|
||||
`</|DSML|tool_calls>`,
|
||||
}, "\n")
|
||||
events := ProcessChunk(&state, chunk, []string{"Bash"})
|
||||
events = append(events, Flush(&state, []string{"Bash"})...)
|
||||
calls := collectToolCallsForChunks(t, []string{chunk}, []string{"Bash"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected all-empty DSML block to produce one tool call, got %#v", calls)
|
||||
}
|
||||
if calls[0].Input["command"] != "" || calls[0].Input["description"] != "" || calls[0].Input["timeout"] != "" {
|
||||
t.Fatalf("expected empty parameters to be preserved, got %#v", calls[0].Input)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveEmitsChunkedAllEmptyArbitraryPrefixedToolBlock(t *testing.T) {
|
||||
chunk := strings.Join([]string{
|
||||
`<T|DSML|tool_calls>`,
|
||||
` <T|DSML|invoke name="TaskOutput">`,
|
||||
` <T|DSML|parameter name="task_id"></T|DSML|parameter>`,
|
||||
` <T|DSML|parameter name="block"></T|DSML|parameter>`,
|
||||
` <T|DSML|parameter name="timeout"></T|DSML|parameter>`,
|
||||
` </T|DSML|invoke>`,
|
||||
` </T|DSML|tool_calls>`,
|
||||
}, "\n")
|
||||
calls := collectToolCallsForChunks(t, splitEveryNRBytes(chunk, 8), []string{"TaskOutput"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected chunked all-empty arbitrary-prefixed block to produce one tool call, got %#v", calls)
|
||||
}
|
||||
if calls[0].Name != "TaskOutput" || calls[0].Input["task_id"] != "" || calls[0].Input["block"] != "" || calls[0].Input["timeout"] != "" {
|
||||
t.Fatalf("expected empty TaskOutput parameters to be preserved, got %#v", calls)
|
||||
}
|
||||
}
|
||||
|
||||
func collectToolCallsForChunks(t *testing.T, chunks []string, toolNames []string) []toolcall.ParsedToolCall {
|
||||
t.Helper()
|
||||
var state State
|
||||
var events []Event
|
||||
for _, chunk := range chunks {
|
||||
events = append(events, ProcessChunk(&state, chunk, toolNames)...)
|
||||
}
|
||||
events = append(events, Flush(&state, toolNames)...)
|
||||
|
||||
var textContent strings.Builder
|
||||
toolCalls := 0
|
||||
var calls []toolcall.ParsedToolCall
|
||||
for _, evt := range events {
|
||||
textContent.WriteString(evt.Content)
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
|
||||
if toolCalls != 0 {
|
||||
t.Fatalf("expected all-empty DSML block not to produce tool calls, got %d events=%#v", toolCalls, events)
|
||||
calls = append(calls, evt.ToolCalls...)
|
||||
}
|
||||
if textContent.Len() != 0 {
|
||||
t.Fatalf("expected all-empty DSML block not to leak as text, got %q", textContent.String())
|
||||
t.Fatalf("expected tool block not to leak as text, got %q", textContent.String())
|
||||
}
|
||||
return calls
|
||||
}
|
||||
|
||||
func splitEveryNRBytes(s string, n int) []string {
|
||||
if n <= 0 {
|
||||
return []string{s}
|
||||
}
|
||||
out := make([]string, 0, len(s)/n+1)
|
||||
for len(s) > 0 {
|
||||
if len(s) <= n {
|
||||
out = append(out, s)
|
||||
break
|
||||
}
|
||||
out = append(out, s[:n])
|
||||
s = s[n:]
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func TestProcessToolSievePassesThroughFencedXMLToolCallExamples(t *testing.T) {
|
||||
@@ -776,6 +811,8 @@ func TestFindPartialXMLToolTagStart(t *testing.T) {
|
||||
{"partial_tool_calls", "Hello <tool_ca", 6},
|
||||
{"partial_dsml_trailing_pipe", "Hello <|DSML|tool_calls|", 6},
|
||||
{"partial_dsml_extra_leading_less_than", "Hello <<|DSML|tool_calls", 6},
|
||||
{"partial_arbitrary_prefix_before_dsml", "Hello <T|DS", 6},
|
||||
{"partial_arbitrary_prefix_after_dsml_pipe", "Hello <T|DSML|", 6},
|
||||
{"partial_invoke", "Hello <inv", 6},
|
||||
{"bare_tool_call_not_held", "Hello <tool_name", -1},
|
||||
{"partial_lt_only", "Text <", 5},
|
||||
|
||||
Reference in New Issue
Block a user