mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-11 03:37:40 +08:00
revert: replace fullwidth pipe | with halfwidth | in DSML tool markup
PR #460 introduced fullwidth pipe characters (|) in DSML tool call formatting to improve parsing robustness, but models exposed to these fullwidth pipes in system prompts exhibit significantly higher rates of tool output hallucinations. Reverting to halfwidth pipes (|) drastically reduces tokenizer/perplexity-driven hallucinations while retaining the existing confusable-hardening in the parser. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -316,11 +316,11 @@ func TestSieve_CharByCharToolCall(t *testing.T) {
|
||||
func TestSieve_FullwidthPipeWrapperDSMLInvoke(t *testing.T) {
|
||||
var state State
|
||||
chunks := []string{
|
||||
"<|tool_calls>\n",
|
||||
"<|tool_calls>\n",
|
||||
"<|DSML|invoke name=\"read_file\">\n",
|
||||
"<|DSML|parameter name=\"path\">README.md</|DSML|parameter>\n",
|
||||
"</|DSML|invoke>\n",
|
||||
"</|tool_calls>",
|
||||
"</|tool_calls>",
|
||||
}
|
||||
var events []Event
|
||||
for _, c := range chunks {
|
||||
@@ -382,7 +382,7 @@ func TestSieve_TagMentionInTextThenRealToolCall(t *testing.T) {
|
||||
chunks := []string{
|
||||
"建议的 commit message:\n\nfeat: expand DSML alias support\n\n",
|
||||
"Add support for <dsml|tool_calls>, ",
|
||||
"<|tool_calls> (fullwidth pipe),\n",
|
||||
"<|tool_calls> (pipe alias),\n",
|
||||
"and <|tool_calls> wrapper variants.\n\n",
|
||||
"<|DSML|tool_calls>\n",
|
||||
"<|DSML|invoke name=\"Bash\">\n",
|
||||
@@ -466,14 +466,14 @@ func TestSieve_ReviewSampleWithAliasMentionsPreservesBodyAndToolCalls(t *testing
|
||||
chunks := []string{
|
||||
"Done reviewing the diff. Here's my analysis before we commit:\n\n",
|
||||
"Summary of Changes\n",
|
||||
"DSML wrapper variant support — recognize aliases (<dsml|tool_calls>, <|tool_calls>, <|tool_calls>) alongside canonical <tool_calls> and <|DSML|tool_calls> wrappers.\n\n",
|
||||
"DSML wrapper variant support — recognize aliases (<dsml|tool_calls>, <|tool_calls>) alongside canonical <tool_calls> and <|DSML|tool_calls> wrappers.\n\n",
|
||||
"<|DSML|tool_calls>\n",
|
||||
"<|DSML|invoke name=\"Bash\">\n",
|
||||
"<|DSML|parameter name=\"command\"><![CDATA[git add docs/toolcall-semantics.md internal/toolstream/tool_sieve_xml.go]]></|DSML|parameter>\n",
|
||||
"<|DSML|parameter name=\"description\"><![CDATA[Stage all relevant changed files]]></|DSML|parameter>\n",
|
||||
"</|DSML|invoke>\n",
|
||||
"<|DSML|invoke name=\"Bash\">\n",
|
||||
"<|DSML|parameter name=\"command\"><![CDATA[git commit -m \"$(cat <<'EOF'\nfeat(toolstream): expand DSML wrapper detection\n\nSupport DSML wrapper aliases: <dsml|tool_calls>, <|tool_calls>, <|tool_calls> alongside existing canonical wrappers.\nEOF\n)\"]]></|DSML|parameter>\n",
|
||||
"<|DSML|parameter name=\"command\"><![CDATA[git commit -m \"$(cat <<'EOF'\nfeat(toolstream): expand DSML wrapper detection\n\nSupport DSML wrapper aliases: <dsml|tool_calls> and <|tool_calls> alongside existing canonical wrappers.\nEOF\n)\"]]></|DSML|parameter>\n",
|
||||
"<|DSML|parameter name=\"description\"><![CDATA[Create commit with all staged changes]]></|DSML|parameter>\n",
|
||||
"</|DSML|invoke>\n",
|
||||
"</|DSML|tool_calls>",
|
||||
|
||||
@@ -626,13 +626,13 @@ func TestProcessToolSieveEmitsAllEmptyDSMLToolBlock(t *testing.T) {
|
||||
|
||||
func TestProcessToolSieveEmitsChunkedAllEmptyArbitraryPrefixedToolBlock(t *testing.T) {
|
||||
chunk := strings.Join([]string{
|
||||
`<T|DSML|tool_calls>`,
|
||||
` <T|DSML|invoke name="TaskOutput">`,
|
||||
` <T|DSML|parameter name="task_id"></T|DSML|parameter>`,
|
||||
` <T|DSML|parameter name="block"></T|DSML|parameter>`,
|
||||
` <T|DSML|parameter name="timeout"></T|DSML|parameter>`,
|
||||
` </T|DSML|invoke>`,
|
||||
` </T|DSML|tool_calls>`,
|
||||
`<T|DSML|tool_calls>`,
|
||||
` <T|DSML|invoke name="TaskOutput">`,
|
||||
` <T|DSML|parameter name="task_id"></T|DSML|parameter>`,
|
||||
` <T|DSML|parameter name="block"></T|DSML|parameter>`,
|
||||
` <T|DSML|parameter name="timeout"></T|DSML|parameter>`,
|
||||
` </T|DSML|invoke>`,
|
||||
` </T|DSML|tool_calls>`,
|
||||
}, "\n")
|
||||
calls := collectToolCallsForChunks(t, splitEveryNRBytes(chunk, 8), []string{"TaskOutput"})
|
||||
if len(calls) != 1 {
|
||||
@@ -811,8 +811,8 @@ func TestFindPartialXMLToolTagStart(t *testing.T) {
|
||||
{"partial_tool_calls", "Hello <tool_ca", 6},
|
||||
{"partial_dsml_trailing_pipe", "Hello <|DSML|tool_calls|", 6},
|
||||
{"partial_dsml_extra_leading_less_than", "Hello <<|DSML|tool_calls", 6},
|
||||
{"partial_arbitrary_prefix_before_dsml", "Hello <T|DS", 6},
|
||||
{"partial_arbitrary_prefix_after_dsml_pipe", "Hello <T|DSML|", 6},
|
||||
{"partial_arbitrary_prefix_before_dsml", "Hello <T|DS", 6},
|
||||
{"partial_arbitrary_prefix_after_dsml_pipe", "Hello <T|DSML|", 6},
|
||||
{"partial_invoke", "Hello <inv", 6},
|
||||
{"bare_tool_call_not_held", "Hello <tool_name", -1},
|
||||
{"partial_lt_only", "Text <", 5},
|
||||
@@ -1091,7 +1091,7 @@ func TestProcessToolSieveRepairsMissingOpeningWrapperWithoutLeakingInvokeText(t
|
||||
}
|
||||
}
|
||||
|
||||
// Test fullwidth pipe variant: <|tool_calls> (U+FF5C) should be buffered and parsed.
|
||||
// Test escaped U+FF5C pipe variant: <\uff5ctool_calls> should be buffered and parsed.
|
||||
func TestProcessToolSieveFullwidthPipeVariantDoesNotLeak(t *testing.T) {
|
||||
var state State
|
||||
chunks := []string{
|
||||
@@ -1115,19 +1115,19 @@ func TestProcessToolSieveFullwidthPipeVariantDoesNotLeak(t *testing.T) {
|
||||
}
|
||||
|
||||
if strings.Contains(textContent, "invoke") || strings.Contains(textContent, "execute_command") {
|
||||
t.Fatalf("fullwidth pipe variant leaked to text: %q", textContent)
|
||||
t.Fatalf("escaped U+FF5C pipe variant leaked to text: %q", textContent)
|
||||
}
|
||||
if toolCalls != 1 {
|
||||
t.Fatalf("expected one tool call from fullwidth pipe variant, got %d events=%#v", toolCalls, events)
|
||||
t.Fatalf("expected one tool call from escaped U+FF5C pipe variant, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
}
|
||||
|
||||
// Test <|DSML|tool_calls> with DSML invoke/parameter tags should buffer the
|
||||
// Test <|DSML|tool_calls> with DSML invoke/parameter tags should buffer the
|
||||
// wrapper instead of leaking it before the block is complete.
|
||||
func TestProcessToolSieveFullwidthDSMLPrefixVariantDoesNotLeak(t *testing.T) {
|
||||
var state State
|
||||
chunks := []string{
|
||||
"<|DSML|tool",
|
||||
"<|DSML|tool",
|
||||
"_calls>\n",
|
||||
"<|DSML|invoke name=\"Bash\">\n",
|
||||
"<|DSML|parameter name=\"command\"><![CDATA[ls -la /Users/aq/Desktop/myproject/ds2api/]]></|DSML|parameter>\n",
|
||||
@@ -1232,12 +1232,12 @@ func TestProcessToolSieveDSMLBarePrefixVariantDoesNotLeak(t *testing.T) {
|
||||
func TestProcessToolSieveCJKAngleDSMDriftDoesNotLeak(t *testing.T) {
|
||||
var state State
|
||||
chunks := []string{
|
||||
"<DSM|tool_calls>\n",
|
||||
"<DSM|invoke name=\"Bash\">\n",
|
||||
"<DSM|parameter name=\"description\"|>〈![CDATA[Check tracking branch status]]〉〈/DSM|parameter〉\n",
|
||||
"<DSM|parameter name=\"command\"|>〈![CDATA[git status -b --short]]〉〈/DSM|parameter〉\n",
|
||||
"〈/DSM|invoke〉\n",
|
||||
"〈/DSM|tool_calls〉",
|
||||
"<DSM|tool_calls>\n",
|
||||
"<DSM|invoke name=\"Bash\">\n",
|
||||
"<DSM|parameter name=\"description\"|>〈![CDATA[Check tracking branch status]]〉〈/DSM|parameter〉\n",
|
||||
"<DSM|parameter name=\"command\"|>〈![CDATA[git status -b --short]]〉〈/DSM|parameter〉\n",
|
||||
"〈/DSM|invoke〉\n",
|
||||
"〈/DSM|tool_calls〉",
|
||||
}
|
||||
var events []Event
|
||||
for _, c := range chunks {
|
||||
@@ -1338,7 +1338,7 @@ func TestProcessToolSieveIdeographicCommaDSMLDriftDoesNotLeak(t *testing.T) {
|
||||
|
||||
func TestProcessToolSieveParsesFullwidthClosingSlashAndKeepsSuffixText(t *testing.T) {
|
||||
var state State
|
||||
chunk := `<|DSML|tool_calls><|DSML|invoke name="execute_code"><|DSML|parameter name="code"><![CDATA[print("hi")]]></|DSML|parameter></|DSML|invoke></DSML|tool_calls> sao cụm này lại đc trả là 1 message`
|
||||
chunk := `<|DSML|tool_calls><|DSML|invoke name="execute_code"><|DSML|parameter name="code"><![CDATA[print("hi")]]></|DSML|parameter></|DSML|invoke></DSML|tool_calls> sao cụm này lại đc trả là 1 message`
|
||||
events := ProcessChunk(&state, chunk, []string{"execute_code"})
|
||||
events = append(events, Flush(&state, []string{"execute_code"})...)
|
||||
|
||||
@@ -1365,7 +1365,7 @@ func TestProcessToolSieveParsesFullwidthClosingSlashAndKeepsSuffixText(t *testin
|
||||
|
||||
func TestProcessToolSieveParsesSentencePieceSeparatorAndFullwidthTerminator(t *testing.T) {
|
||||
var state State
|
||||
chunk := `<|DSML▁tool_calls|><|DSML▁invoke▁name="execute_code"><|DSML▁parameter▁name="code"><![CDATA[print("hi")]]></|DSML▁parameter></|DSML▁invoke></|DSML▁tool_calls> suffix`
|
||||
chunk := `<|DSML▁tool_calls|><|DSML▁invoke▁name="execute_code"><|DSML▁parameter▁name="code"><![CDATA[print("hi")]]></|DSML▁parameter></|DSML▁invoke></|DSML▁tool_calls> suffix`
|
||||
events := ProcessChunk(&state, chunk, []string{"execute_code"})
|
||||
events = append(events, Flush(&state, []string{"execute_code"})...)
|
||||
|
||||
@@ -1392,7 +1392,7 @@ func TestProcessToolSieveParsesSentencePieceSeparatorAndFullwidthTerminator(t *t
|
||||
|
||||
func TestProcessToolSieveParsesFullwidthOpeningDelimiterAndUnicodeAttributes(t *testing.T) {
|
||||
var state State
|
||||
chunk := `<|DSML tool_calls><|DSML invoke name=“execute_code”><|DSML parameter name=“code”><![CDATA[print("hi")]]></DSML|parameter></DSML|invoke></DSML|tool_calls> suffix`
|
||||
chunk := `<|DSML tool_calls><|DSML invoke name=“execute_code”><|DSML parameter name=“code”><![CDATA[print("hi")]]></DSML|parameter></DSML|invoke></DSML|tool_calls> suffix`
|
||||
events := ProcessChunk(&state, chunk, []string{"execute_code"})
|
||||
events = append(events, Flush(&state, []string{"execute_code"})...)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user