feat(toolcall): harden confusable candidate spans

This commit is contained in:
Your Name
2026-05-10 09:27:30 +07:00
parent 6a8edf96c3
commit 196e3c46f6
20 changed files with 2257 additions and 363 deletions

View File

@@ -141,6 +141,9 @@ func shouldKeepBareInvokeCapture(captured string) bool {
if invokeCloseTag, ok := findFirstToolMarkupTagByNameFrom(captured, startEnd+1, "invoke", true); ok {
return strings.TrimSpace(captured[invokeCloseTag.End+1:]) == ""
}
if paramTag, ok := findFirstToolMarkupTagByName(body, 0, "parameter"); ok && strings.TrimSpace(body[:paramTag.Start]) == "" {
return true
}
trimmedLower := strings.ToLower(trimmedBody)
return strings.HasPrefix(trimmedLower, "<parameter") ||
@@ -149,14 +152,14 @@ func shouldKeepBareInvokeCapture(captured string) bool {
}
func findPartialXMLToolTagStart(s string) int {
lastLT := strings.LastIndex(s, "<")
lastLT := lastToolMarkupStartDelimiterIndex(s)
if lastLT < 0 {
return -1
}
start := includeDuplicateLeadingLessThan(s, lastLT)
tail := s[start:]
// If there's a '>' in the tail, the tag is closed — not partial.
if strings.Contains(tail, ">") {
// If there's a tag terminator in the tail, the tag is closed — not partial.
if strings.Contains(tail, ">") || strings.Contains(tail, "") {
return -1
}
if toolcall.IsPartialToolMarkupTagPrefix(tail) {
@@ -164,3 +167,12 @@ func findPartialXMLToolTagStart(s string) int {
}
return -1
}
func lastToolMarkupStartDelimiterIndex(s string) int {
asciiIdx := strings.LastIndex(s, "<")
fullwidthIdx := strings.LastIndex(s, "")
if asciiIdx > fullwidthIdx {
return asciiIdx
}
return fullwidthIdx
}

View File

@@ -1335,3 +1335,166 @@ func TestProcessToolSieveIdeographicCommaDSMLDriftDoesNotLeak(t *testing.T) {
t.Fatalf("unexpected ideographic-comma DSML drift call: %#v", calls[0])
}
}
func TestProcessToolSieveParsesFullwidthClosingSlashAndKeepsSuffixText(t *testing.T) {
var state State
chunk := `<DSMLtool_calls><DSMLinvoke name="execute_code"><DSMLparameter name="code"><![CDATA[print("hi")]]></DSMLparameter></DSMLinvoke><DSMLtool_calls> sao cụm này lại đc trả là 1 message`
events := ProcessChunk(&state, chunk, []string{"execute_code"})
events = append(events, Flush(&state, []string{"execute_code"})...)
var textContent strings.Builder
toolCalls := 0
var parsed Event
for _, evt := range events {
textContent.WriteString(evt.Content)
if len(evt.ToolCalls) > 0 {
parsed = evt
}
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 1 {
t.Fatalf("expected exactly one parsed tool call from fullwidth closing slash block, got %d events=%#v", toolCalls, events)
}
if parsed.ToolCalls[0].Name != "execute_code" || parsed.ToolCalls[0].Input["code"] != `print("hi")` {
t.Fatalf("unexpected parsed call from fullwidth closing slash block: %#v", parsed.ToolCalls[0])
}
if got := textContent.String(); got != " sao cụm này lại đc trả là 1 message" {
t.Fatalf("expected suffix text to be preserved, got %q", got)
}
}
func TestProcessToolSieveParsesSentencePieceSeparatorAndFullwidthTerminator(t *testing.T) {
var state State
chunk := `<DSML▁tool_calls><DSML▁invoke▁name="execute_code"><DSML▁parameter▁name="code"><![CDATA[print("hi")]]></DSML▁parameter></DSML▁invoke></DSML▁tool_calls suffix`
events := ProcessChunk(&state, chunk, []string{"execute_code"})
events = append(events, Flush(&state, []string{"execute_code"})...)
var textContent strings.Builder
toolCalls := 0
var parsed Event
for _, evt := range events {
textContent.WriteString(evt.Content)
if len(evt.ToolCalls) > 0 {
parsed = evt
}
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 1 {
t.Fatalf("expected exactly one parsed tool call from sentencepiece/fullwidth-terminator block, got %d events=%#v", toolCalls, events)
}
if parsed.ToolCalls[0].Name != "execute_code" || parsed.ToolCalls[0].Input["code"] != `print("hi")` {
t.Fatalf("unexpected parsed call from sentencepiece/fullwidth-terminator block: %#v", parsed.ToolCalls[0])
}
if got := textContent.String(); got != " suffix" {
t.Fatalf("expected suffix text to be preserved, got %q", got)
}
}
func TestProcessToolSieveParsesFullwidthOpeningDelimiterAndUnicodeAttributes(t *testing.T) {
var state State
chunk := `DSML tool_callsDSML invoke name“execute_code”DSML parameter name“code”<![CDATA[print("hi")]]>DSMLparameterDSMLinvokeDSMLtool_calls suffix`
events := ProcessChunk(&state, chunk, []string{"execute_code"})
events = append(events, Flush(&state, []string{"execute_code"})...)
var textContent strings.Builder
toolCalls := 0
var parsed Event
for _, evt := range events {
textContent.WriteString(evt.Content)
if len(evt.ToolCalls) > 0 {
parsed = evt
}
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 1 {
t.Fatalf("expected exactly one parsed tool call from fullwidth-opening/Unicode-attr block, got %d events=%#v", toolCalls, events)
}
if parsed.ToolCalls[0].Name != "execute_code" || parsed.ToolCalls[0].Input["code"] != `print("hi")` {
t.Fatalf("unexpected parsed call from fullwidth-opening/Unicode-attr block: %#v", parsed.ToolCalls[0])
}
if got := textContent.String(); got != " suffix" {
t.Fatalf("expected suffix text to be preserved, got %q", got)
}
}
func TestProcessToolSieveParsesConfusableCandidateShellAndKeepsSuffixText(t *testing.T) {
var state State
chunk := "<|\u200b\uff24\u0405\u039cL|to\u03bfl\uff3fcalls><|\ufeffDSML|inv\u03bfk\u0435 n\u0430me\uff1d\u201cexecute_code\u201d><|\u200bDSML|par\u0430meter n\u0430me\uff1d\u201ccode\u201d><![\ufeff\u0421D\u0410T\u0410[print(\"hi\")]]></|\u200bDSML|par\u0430meter></|\u200bDSML|inv\u03bfk\u0435></|\u200b\uff24\u0405\u039cL|to\u03bfl\uff3fcalls> suffix"
events := ProcessChunk(&state, chunk, []string{"execute_code"})
events = append(events, Flush(&state, []string{"execute_code"})...)
var textContent strings.Builder
toolCalls := 0
var parsed Event
for _, evt := range events {
textContent.WriteString(evt.Content)
if len(evt.ToolCalls) > 0 {
parsed = evt
}
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 1 {
t.Fatalf("expected exactly one parsed tool call from confusable-shell block, got %d events=%#v", toolCalls, events)
}
if parsed.ToolCalls[0].Name != "execute_code" || parsed.ToolCalls[0].Input["code"] != `print("hi")` {
t.Fatalf("unexpected parsed call from confusable-shell block: %#v", parsed.ToolCalls[0])
}
if got := textContent.String(); got != " suffix" {
t.Fatalf("expected suffix text to be preserved, got %q", got)
}
}
func TestProcessToolSieveRepairsConfusableMissingWrapperAndKeepsSuffixText(t *testing.T) {
var state State
chunks := []string{
"<inv\u03bfk\u0435 n\u0430me=\"read_file\">\n",
" <par\u0430meter n\u0430me=\"path\"><![\u200b\u0421D\u0410T\u0410[README.md]]></par\u0430meter>\n",
"</inv\u03bfk\u0435>\n",
"</to\u03bfl_calls> trailing prose",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
}
events = append(events, Flush(&state, []string{"read_file"})...)
var textContent strings.Builder
toolCalls := 0
var parsed Event
for _, evt := range events {
textContent.WriteString(evt.Content)
if len(evt.ToolCalls) > 0 {
parsed = evt
}
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 1 {
t.Fatalf("expected repaired confusable missing-wrapper stream to emit one tool call, got %d events=%#v", toolCalls, events)
}
if parsed.ToolCalls[0].Name != "read_file" || parsed.ToolCalls[0].Input["path"] != "README.md" {
t.Fatalf("unexpected parsed call from repaired confusable missing-wrapper block: %#v", parsed.ToolCalls[0])
}
if got := textContent.String(); got != " trailing prose" {
t.Fatalf("expected suffix prose to be preserved, got %q", got)
}
}
func TestProcessToolSieveKeepsConfusableNearMissWrapperAsText(t *testing.T) {
var state State
chunk := "<to\u03bfl_callz><inv\u03bfke name=\"read_file\"><parameter name=\"path\">README.md</parameter></inv\u03bfke></to\u03bfl_callz>"
events := ProcessChunk(&state, chunk, []string{"read_file"})
events = append(events, Flush(&state, []string{"read_file"})...)
var textContent strings.Builder
toolCalls := 0
for _, evt := range events {
textContent.WriteString(evt.Content)
toolCalls += len(evt.ToolCalls)
}
if toolCalls != 0 {
t.Fatalf("expected confusable near-miss wrapper to remain text, got %d events=%#v", toolCalls, events)
}
if got := textContent.String(); got != chunk {
t.Fatalf("expected confusable near-miss wrapper to pass through unchanged, got %q", got)
}
}