feat(toolcall): harden confusable candidate spans

This commit is contained in:
Your Name
2026-05-10 09:27:30 +07:00
parent 6a8edf96c3
commit 196e3c46f6
20 changed files with 2257 additions and 363 deletions

View File

@@ -18,10 +18,16 @@ var leakedThinkTagPattern = regexp.MustCompile(`(?is)</?\s*think\s*>`)
// - U+2581 variant: <begin▁of▁sentence>
var leakedBOSMarkerPattern = regexp.MustCompile(`(?i)<[\|]\s*begin[_▁]of[_▁]sentence\s*[\|]>`)
// leakedThoughtMarkerPattern matches leaked thought control markers in both
// explicit and compact forms:
// - ASCII underscore: <| of_thought |>, <| begin_of_thought |>
// - U+2581 variant: <▁of▁thought>, <begin▁of▁thought>
var leakedThoughtMarkerPattern = regexp.MustCompile(`(?i)<[\|]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[\|]>`)
// leakedMetaMarkerPattern matches the remaining DeepSeek special tokens in BOTH forms:
// - ASCII underscore: <end_of_sentence>, <end_of_toolresults>, <end_of_instructions>
// - U+2581 variant: <end▁of▁sentence>, <end▁of▁toolresults>, <end▁of▁instructions>
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[\|]>`)
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[\|]>`)
// leakedAgentXMLBlockPatterns catch agent-style XML blocks that leak through
// when the sieve fails to capture them. These are applied only to complete
@@ -48,6 +54,7 @@ func sanitizeLeakedOutput(text string) string {
out = stripDanglingThinkSuffix(out)
out = leakedThinkTagPattern.ReplaceAllString(out, "")
out = leakedBOSMarkerPattern.ReplaceAllString(out, "")
out = leakedThoughtMarkerPattern.ReplaceAllString(out, "")
out = leakedMetaMarkerPattern.ReplaceAllString(out, "")
out = stripLeakedToolCallWrapperBlocks(out)
out = sanitizeLeakedAgentXMLBlocks(out)