mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-11 03:37:40 +08:00
feat(toolcall): harden confusable candidate spans
This commit is contained in:
@@ -34,6 +34,14 @@ func TestSanitizeLeakedOutputRemovesThinkAndBosMarkers(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeLeakedOutputRemovesThoughtMarkers(t *testing.T) {
|
||||
raw := "A<|▁of▁thought|>B<| of_thought |>C<| begin_of_thought |>D<| end_of_thought |>E"
|
||||
got := sanitizeLeakedOutput(raw)
|
||||
if got != "ABCDE" {
|
||||
t.Fatalf("unexpected sanitize result for leaked thought markers: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeLeakedOutputRemovesDanglingThinkBlock(t *testing.T) {
|
||||
raw := "Answer prefix<think>internal reasoning that never closes"
|
||||
got := sanitizeLeakedOutput(raw)
|
||||
|
||||
@@ -18,10 +18,16 @@ var leakedThinkTagPattern = regexp.MustCompile(`(?is)</?\s*think\s*>`)
|
||||
// - U+2581 variant: <|begin▁of▁sentence|>
|
||||
var leakedBOSMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*begin[_▁]of[_▁]sentence\s*[|\|]>`)
|
||||
|
||||
// leakedThoughtMarkerPattern matches leaked thought control markers in both
|
||||
// explicit and compact forms:
|
||||
// - ASCII underscore: <| of_thought |>, <| begin_of_thought |>
|
||||
// - U+2581 variant: <|▁of▁thought|>, <|begin▁of▁thought|>
|
||||
var leakedThoughtMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[|\|]>`)
|
||||
|
||||
// leakedMetaMarkerPattern matches the remaining DeepSeek special tokens in BOTH forms:
|
||||
// - ASCII underscore: <|end_of_sentence|>, <|end_of_toolresults|>, <|end_of_instructions|>
|
||||
// - U+2581 variant: <|end▁of▁sentence|>, <|end▁of▁toolresults|>, <|end▁of▁instructions|>
|
||||
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[|\|]>`)
|
||||
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[|\|]>`)
|
||||
|
||||
// leakedAgentXMLBlockPatterns catch agent-style XML blocks that leak through
|
||||
// when the sieve fails to capture them. These are applied only to complete
|
||||
@@ -48,6 +54,7 @@ func sanitizeLeakedOutput(text string) string {
|
||||
out = stripDanglingThinkSuffix(out)
|
||||
out = leakedThinkTagPattern.ReplaceAllString(out, "")
|
||||
out = leakedBOSMarkerPattern.ReplaceAllString(out, "")
|
||||
out = leakedThoughtMarkerPattern.ReplaceAllString(out, "")
|
||||
out = leakedMetaMarkerPattern.ReplaceAllString(out, "")
|
||||
out = stripLeakedToolCallWrapperBlocks(out)
|
||||
out = sanitizeLeakedAgentXMLBlocks(out)
|
||||
|
||||
Reference in New Issue
Block a user