mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-17 06:35:14 +08:00
feat(toolcall): harden confusable candidate spans
This commit is contained in:
@@ -141,6 +141,9 @@ func shouldKeepBareInvokeCapture(captured string) bool {
|
||||
if invokeCloseTag, ok := findFirstToolMarkupTagByNameFrom(captured, startEnd+1, "invoke", true); ok {
|
||||
return strings.TrimSpace(captured[invokeCloseTag.End+1:]) == ""
|
||||
}
|
||||
if paramTag, ok := findFirstToolMarkupTagByName(body, 0, "parameter"); ok && strings.TrimSpace(body[:paramTag.Start]) == "" {
|
||||
return true
|
||||
}
|
||||
|
||||
trimmedLower := strings.ToLower(trimmedBody)
|
||||
return strings.HasPrefix(trimmedLower, "<parameter") ||
|
||||
@@ -149,14 +152,14 @@ func shouldKeepBareInvokeCapture(captured string) bool {
|
||||
}
|
||||
|
||||
func findPartialXMLToolTagStart(s string) int {
|
||||
lastLT := strings.LastIndex(s, "<")
|
||||
lastLT := lastToolMarkupStartDelimiterIndex(s)
|
||||
if lastLT < 0 {
|
||||
return -1
|
||||
}
|
||||
start := includeDuplicateLeadingLessThan(s, lastLT)
|
||||
tail := s[start:]
|
||||
// If there's a '>' in the tail, the tag is closed — not partial.
|
||||
if strings.Contains(tail, ">") {
|
||||
// If there's a tag terminator in the tail, the tag is closed — not partial.
|
||||
if strings.Contains(tail, ">") || strings.Contains(tail, ">") {
|
||||
return -1
|
||||
}
|
||||
if toolcall.IsPartialToolMarkupTagPrefix(tail) {
|
||||
@@ -164,3 +167,12 @@ func findPartialXMLToolTagStart(s string) int {
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func lastToolMarkupStartDelimiterIndex(s string) int {
|
||||
asciiIdx := strings.LastIndex(s, "<")
|
||||
fullwidthIdx := strings.LastIndex(s, "<")
|
||||
if asciiIdx > fullwidthIdx {
|
||||
return asciiIdx
|
||||
}
|
||||
return fullwidthIdx
|
||||
}
|
||||
|
||||
@@ -1335,3 +1335,166 @@ func TestProcessToolSieveIdeographicCommaDSMLDriftDoesNotLeak(t *testing.T) {
|
||||
t.Fatalf("unexpected ideographic-comma DSML drift call: %#v", calls[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveParsesFullwidthClosingSlashAndKeepsSuffixText(t *testing.T) {
|
||||
var state State
|
||||
chunk := `<|DSML|tool_calls><|DSML|invoke name="execute_code"><|DSML|parameter name="code"><![CDATA[print("hi")]]></|DSML|parameter></|DSML|invoke></DSML|tool_calls> sao cụm này lại đc trả là 1 message`
|
||||
events := ProcessChunk(&state, chunk, []string{"execute_code"})
|
||||
events = append(events, Flush(&state, []string{"execute_code"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
toolCalls := 0
|
||||
var parsed Event
|
||||
for _, evt := range events {
|
||||
textContent.WriteString(evt.Content)
|
||||
if len(evt.ToolCalls) > 0 {
|
||||
parsed = evt
|
||||
}
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
if toolCalls != 1 {
|
||||
t.Fatalf("expected exactly one parsed tool call from fullwidth closing slash block, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
if parsed.ToolCalls[0].Name != "execute_code" || parsed.ToolCalls[0].Input["code"] != `print("hi")` {
|
||||
t.Fatalf("unexpected parsed call from fullwidth closing slash block: %#v", parsed.ToolCalls[0])
|
||||
}
|
||||
if got := textContent.String(); got != " sao cụm này lại đc trả là 1 message" {
|
||||
t.Fatalf("expected suffix text to be preserved, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveParsesSentencePieceSeparatorAndFullwidthTerminator(t *testing.T) {
|
||||
var state State
|
||||
chunk := `<|DSML▁tool_calls|><|DSML▁invoke▁name="execute_code"><|DSML▁parameter▁name="code"><![CDATA[print("hi")]]></|DSML▁parameter></|DSML▁invoke></|DSML▁tool_calls> suffix`
|
||||
events := ProcessChunk(&state, chunk, []string{"execute_code"})
|
||||
events = append(events, Flush(&state, []string{"execute_code"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
toolCalls := 0
|
||||
var parsed Event
|
||||
for _, evt := range events {
|
||||
textContent.WriteString(evt.Content)
|
||||
if len(evt.ToolCalls) > 0 {
|
||||
parsed = evt
|
||||
}
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
if toolCalls != 1 {
|
||||
t.Fatalf("expected exactly one parsed tool call from sentencepiece/fullwidth-terminator block, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
if parsed.ToolCalls[0].Name != "execute_code" || parsed.ToolCalls[0].Input["code"] != `print("hi")` {
|
||||
t.Fatalf("unexpected parsed call from sentencepiece/fullwidth-terminator block: %#v", parsed.ToolCalls[0])
|
||||
}
|
||||
if got := textContent.String(); got != " suffix" {
|
||||
t.Fatalf("expected suffix text to be preserved, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveParsesFullwidthOpeningDelimiterAndUnicodeAttributes(t *testing.T) {
|
||||
var state State
|
||||
chunk := `<|DSML tool_calls><|DSML invoke name=“execute_code”><|DSML parameter name=“code”><![CDATA[print("hi")]]></DSML|parameter></DSML|invoke></DSML|tool_calls> suffix`
|
||||
events := ProcessChunk(&state, chunk, []string{"execute_code"})
|
||||
events = append(events, Flush(&state, []string{"execute_code"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
toolCalls := 0
|
||||
var parsed Event
|
||||
for _, evt := range events {
|
||||
textContent.WriteString(evt.Content)
|
||||
if len(evt.ToolCalls) > 0 {
|
||||
parsed = evt
|
||||
}
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
if toolCalls != 1 {
|
||||
t.Fatalf("expected exactly one parsed tool call from fullwidth-opening/Unicode-attr block, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
if parsed.ToolCalls[0].Name != "execute_code" || parsed.ToolCalls[0].Input["code"] != `print("hi")` {
|
||||
t.Fatalf("unexpected parsed call from fullwidth-opening/Unicode-attr block: %#v", parsed.ToolCalls[0])
|
||||
}
|
||||
if got := textContent.String(); got != " suffix" {
|
||||
t.Fatalf("expected suffix text to be preserved, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveParsesConfusableCandidateShellAndKeepsSuffixText(t *testing.T) {
|
||||
var state State
|
||||
chunk := "<|\u200b\uff24\u0405\u039cL|to\u03bfl\uff3fcalls><|\ufeffDSML|inv\u03bfk\u0435 n\u0430me\uff1d\u201cexecute_code\u201d><|\u200bDSML|par\u0430meter n\u0430me\uff1d\u201ccode\u201d><![\ufeff\u0421D\u0410T\u0410[print(\"hi\")]]></|\u200bDSML|par\u0430meter></|\u200bDSML|inv\u03bfk\u0435></|\u200b\uff24\u0405\u039cL|to\u03bfl\uff3fcalls> suffix"
|
||||
events := ProcessChunk(&state, chunk, []string{"execute_code"})
|
||||
events = append(events, Flush(&state, []string{"execute_code"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
toolCalls := 0
|
||||
var parsed Event
|
||||
for _, evt := range events {
|
||||
textContent.WriteString(evt.Content)
|
||||
if len(evt.ToolCalls) > 0 {
|
||||
parsed = evt
|
||||
}
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
if toolCalls != 1 {
|
||||
t.Fatalf("expected exactly one parsed tool call from confusable-shell block, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
if parsed.ToolCalls[0].Name != "execute_code" || parsed.ToolCalls[0].Input["code"] != `print("hi")` {
|
||||
t.Fatalf("unexpected parsed call from confusable-shell block: %#v", parsed.ToolCalls[0])
|
||||
}
|
||||
if got := textContent.String(); got != " suffix" {
|
||||
t.Fatalf("expected suffix text to be preserved, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveRepairsConfusableMissingWrapperAndKeepsSuffixText(t *testing.T) {
|
||||
var state State
|
||||
chunks := []string{
|
||||
"<inv\u03bfk\u0435 n\u0430me=\"read_file\">\n",
|
||||
" <par\u0430meter n\u0430me=\"path\"><![\u200b\u0421D\u0410T\u0410[README.md]]></par\u0430meter>\n",
|
||||
"</inv\u03bfk\u0435>\n",
|
||||
"</to\u03bfl_calls> trailing prose",
|
||||
}
|
||||
var events []Event
|
||||
for _, c := range chunks {
|
||||
events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
|
||||
}
|
||||
events = append(events, Flush(&state, []string{"read_file"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
toolCalls := 0
|
||||
var parsed Event
|
||||
for _, evt := range events {
|
||||
textContent.WriteString(evt.Content)
|
||||
if len(evt.ToolCalls) > 0 {
|
||||
parsed = evt
|
||||
}
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
if toolCalls != 1 {
|
||||
t.Fatalf("expected repaired confusable missing-wrapper stream to emit one tool call, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
if parsed.ToolCalls[0].Name != "read_file" || parsed.ToolCalls[0].Input["path"] != "README.md" {
|
||||
t.Fatalf("unexpected parsed call from repaired confusable missing-wrapper block: %#v", parsed.ToolCalls[0])
|
||||
}
|
||||
if got := textContent.String(); got != " trailing prose" {
|
||||
t.Fatalf("expected suffix prose to be preserved, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveKeepsConfusableNearMissWrapperAsText(t *testing.T) {
|
||||
var state State
|
||||
chunk := "<to\u03bfl_callz><inv\u03bfke name=\"read_file\"><parameter name=\"path\">README.md</parameter></inv\u03bfke></to\u03bfl_callz>"
|
||||
events := ProcessChunk(&state, chunk, []string{"read_file"})
|
||||
events = append(events, Flush(&state, []string{"read_file"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
toolCalls := 0
|
||||
for _, evt := range events {
|
||||
textContent.WriteString(evt.Content)
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
if toolCalls != 0 {
|
||||
t.Fatalf("expected confusable near-miss wrapper to remain text, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
if got := textContent.String(); got != chunk {
|
||||
t.Fatalf("expected confusable near-miss wrapper to pass through unchanged, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user