mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-21 08:27:42 +08:00
71 lines
3.5 KiB
Go
71 lines
3.5 KiB
Go
package openai
|
||
|
||
import (
|
||
"regexp"
|
||
)
|
||
|
||
var emptyJSONFencePattern = regexp.MustCompile("(?is)```json\\s*```")
|
||
var leakedToolCallArrayPattern = regexp.MustCompile(`(?is)\[\{\s*"function"\s*:\s*\{[\s\S]*?\}\s*,\s*"id"\s*:\s*"call[^"]*"\s*,\s*"type"\s*:\s*"function"\s*}\]`)
|
||
var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*>\s*\{[\s\S]*?"tool_call_id"\s*:\s*"call[^"]*"\s*}`)
|
||
|
||
// leakedMetaMarkerPattern matches DeepSeek special tokens in BOTH forms:
|
||
// - ASCII underscore: <|end_of_sentence|>, <|end_of_toolresults|>, <|end_of_instructions|>
|
||
// - U+2581 variant: <|end▁of▁sentence|>, <|end▁of▁toolresults|>, <|end▁of▁instructions|>
|
||
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[|\|]>`)
|
||
|
||
// leakedAgentXMLBlockPatterns catch agent-style XML blocks that leak through
|
||
// when the sieve fails to capture them. These are applied only to complete
|
||
// wrapper blocks so standalone "<result>" examples in normal output remain
|
||
// untouched.
|
||
var leakedAgentXMLBlockPatterns = []*regexp.Regexp{
|
||
regexp.MustCompile(`(?is)<attempt_completion\b[^>]*>(.*?)</attempt_completion>`),
|
||
regexp.MustCompile(`(?is)<ask_followup_question\b[^>]*>(.*?)</ask_followup_question>`),
|
||
regexp.MustCompile(`(?is)<new_task\b[^>]*>(.*?)</new_task>`),
|
||
}
|
||
|
||
var leakedAgentWrapperTagPattern = regexp.MustCompile(`(?is)</?(?:attempt_completion|ask_followup_question|new_task)\b[^>]*>`)
|
||
var leakedAgentWrapperPlusResultOpenPattern = regexp.MustCompile(`(?is)<(?:attempt_completion|ask_followup_question|new_task)\b[^>]*>\s*<result>`)
|
||
var leakedAgentResultPlusWrapperClosePattern = regexp.MustCompile(`(?is)</result>\s*</(?:attempt_completion|ask_followup_question|new_task)\b[^>]*>`)
|
||
var leakedAgentResultTagPattern = regexp.MustCompile(`(?is)</?result>`)
|
||
|
||
func sanitizeLeakedOutput(text string) string {
|
||
if text == "" {
|
||
return text
|
||
}
|
||
out := emptyJSONFencePattern.ReplaceAllString(text, "")
|
||
out = leakedToolCallArrayPattern.ReplaceAllString(out, "")
|
||
out = leakedToolResultBlobPattern.ReplaceAllString(out, "")
|
||
out = leakedMetaMarkerPattern.ReplaceAllString(out, "")
|
||
out = sanitizeLeakedAgentXMLBlocks(out)
|
||
return out
|
||
}
|
||
|
||
func sanitizeLeakedAgentXMLBlocks(text string) string {
|
||
out := text
|
||
for _, pattern := range leakedAgentXMLBlockPatterns {
|
||
out = pattern.ReplaceAllStringFunc(out, func(match string) string {
|
||
submatches := pattern.FindStringSubmatch(match)
|
||
if len(submatches) < 2 {
|
||
return match
|
||
}
|
||
// Preserve the inner text so leaked agent instructions do not erase
|
||
// the actual answer, but strip the wrapper/result markup itself.
|
||
return leakedAgentResultTagPattern.ReplaceAllString(submatches[1], "")
|
||
})
|
||
}
|
||
// Fallback for truncated output streams: strip any dangling wrapper tags
|
||
// that were not part of a complete block replacement. If we detect leaked
|
||
// wrapper tags, strip only adjacent <result> tags to avoid exposing agent
|
||
// markup without altering unrelated user-visible <result> examples.
|
||
if leakedAgentWrapperTagPattern.MatchString(out) {
|
||
out = leakedAgentWrapperPlusResultOpenPattern.ReplaceAllStringFunc(out, func(match string) string {
|
||
return leakedAgentResultTagPattern.ReplaceAllString(match, "")
|
||
})
|
||
out = leakedAgentResultPlusWrapperClosePattern.ReplaceAllStringFunc(out, func(match string) string {
|
||
return leakedAgentResultTagPattern.ReplaceAllString(match, "")
|
||
})
|
||
out = leakedAgentWrapperTagPattern.ReplaceAllString(out, "")
|
||
}
|
||
return out
|
||
}
|