mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-21 08:27:42 +08:00
fix(openai): strip leaked tool result markers
This commit is contained in:
@@ -35,6 +35,24 @@ func TestSanitizeLeakedOutputRemovesFullwidthDelimitedMetaMarkers(t *testing.T)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSanitizeLeakedOutputRemovesAssistantEndOfToolCallsMarkers(t *testing.T) {
|
||||||
|
fw := "\uff5c"
|
||||||
|
raw := "A<|Assistant_END_OF_TOOL_CALLS|>B<" + fw + "Assistant▁END▁OF▁TOOL_CALLS" + fw + ">C<|end_of_tool_calls|>D"
|
||||||
|
got := sanitizeLeakedOutput(raw)
|
||||||
|
if got != "ABCD" {
|
||||||
|
t.Fatalf("unexpected sanitize result for assistant end-of-tool-calls markers: %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSanitizeLeakedOutputRemovesFullToolResultSection(t *testing.T) {
|
||||||
|
fw := "\uff5c"
|
||||||
|
raw := "开始<" + fw + "Tool" + fw + ">[{\"content\":\"openjdk version 21\"}]<" + fw + "end▁of▁toolresults" + fw + ">结束"
|
||||||
|
got := sanitizeLeakedOutput(raw)
|
||||||
|
if got != "开始结束" {
|
||||||
|
t.Fatalf("unexpected sanitize result for leaked tool result section: %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestSanitizeLeakedOutputRemovesThinkAndBosMarkers(t *testing.T) {
|
func TestSanitizeLeakedOutputRemovesThinkAndBosMarkers(t *testing.T) {
|
||||||
raw := "A<think>B</think>C<|begin▁of▁sentence|>D<| begin_of_sentence |>E<|begin_of_sentence|>F"
|
raw := "A<think>B</think>C<|begin▁of▁sentence|>D<| begin_of_sentence |>E<|begin_of_sentence|>F"
|
||||||
got := sanitizeLeakedOutput(raw)
|
got := sanitizeLeakedOutput(raw)
|
||||||
|
|||||||
@@ -10,6 +10,9 @@ import (
|
|||||||
var emptyJSONFencePattern = regexp.MustCompile("(?is)```json\\s*```")
|
var emptyJSONFencePattern = regexp.MustCompile("(?is)```json\\s*```")
|
||||||
var leakedToolCallArrayPattern = regexp.MustCompile(`(?is)\[\{\s*"function"\s*:\s*\{[\s\S]*?\}\s*,\s*"id"\s*:\s*"call[^"]*"\s*,\s*"type"\s*:\s*"function"\s*}\]`)
|
var leakedToolCallArrayPattern = regexp.MustCompile(`(?is)\[\{\s*"function"\s*:\s*\{[\s\S]*?\}\s*,\s*"id"\s*:\s*"call[^"]*"\s*,\s*"type"\s*:\s*"function"\s*}\]`)
|
||||||
var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*>\s*\{[\s\S]*?"tool_call_id"\s*:\s*"call[^"]*"\s*}`)
|
var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*>\s*\{[\s\S]*?"tool_call_id"\s*:\s*"call[^"]*"\s*}`)
|
||||||
|
var leakedToolResultOpenMarkerPattern = regexp.MustCompile(`(?is)<[\|\x{ff5c}]\s*tool\s*[\|\x{ff5c}]>`)
|
||||||
|
var leakedToolResultCloseMarkerPattern = regexp.MustCompile(`(?is)<[\|\x{ff5c}]\s*end[_▁]of[_▁]tool[_▁]?results\s*[\|\x{ff5c}]>`)
|
||||||
|
var leakedToolResultSectionPattern = regexp.MustCompile(`(?is)<[\|\x{ff5c}]\s*tool\s*[\|\x{ff5c}]>[\s\S]*?<[\|\x{ff5c}]\s*end[_▁]of[_▁]tool[_▁]?results\s*[\|\x{ff5c}]>`)
|
||||||
|
|
||||||
var leakedThinkTagPattern = regexp.MustCompile(`(?is)</?\s*think\s*>`)
|
var leakedThinkTagPattern = regexp.MustCompile(`(?is)</?\s*think\s*>`)
|
||||||
|
|
||||||
@@ -29,7 +32,8 @@ var leakedThoughtMarkerPattern = regexp.MustCompile(`(?i)<[\|\x{ff5c}]\s*(?:begi
|
|||||||
// halfwidth or legacy U+FF5C fullwidth delimiters:
|
// halfwidth or legacy U+FF5C fullwidth delimiters:
|
||||||
// - ASCII underscore: <|end_of_sentence|>, <|end_of_toolresults|>, <|end_of_instructions|>
|
// - ASCII underscore: <|end_of_sentence|>, <|end_of_toolresults|>, <|end_of_instructions|>
|
||||||
// - U+2581 variant: <|end▁of▁sentence|>, <|end▁of▁toolresults|>, <|end▁of▁instructions|>
|
// - U+2581 variant: <|end▁of▁sentence|>, <|end▁of▁toolresults|>, <|end▁of▁instructions|>
|
||||||
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[\|\x{ff5c}]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[\|\x{ff5c}]>`)
|
// - compound assistant markers: <|Assistant_END_OF_TOOL_CALLS|>
|
||||||
|
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[\|\x{ff5c}]\s*(?:assistant(?:[_▁]end[_▁]of[_▁]tool[_▁]?calls)?|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]tool[_▁]?results|end[_▁]of[_▁]tool[_▁]?calls|end[_▁]of[_▁]instructions)\s*[\|\x{ff5c}]>`)
|
||||||
|
|
||||||
// leakedAgentXMLBlockPatterns catch agent-style XML blocks that leak through
|
// leakedAgentXMLBlockPatterns catch agent-style XML blocks that leak through
|
||||||
// when the sieve fails to capture them. These are applied only to complete
|
// when the sieve fails to capture them. These are applied only to complete
|
||||||
@@ -52,6 +56,7 @@ func sanitizeLeakedOutput(text string) string {
|
|||||||
}
|
}
|
||||||
out := emptyJSONFencePattern.ReplaceAllString(text, "")
|
out := emptyJSONFencePattern.ReplaceAllString(text, "")
|
||||||
out = leakedToolCallArrayPattern.ReplaceAllString(out, "")
|
out = leakedToolCallArrayPattern.ReplaceAllString(out, "")
|
||||||
|
out = leakedToolResultSectionPattern.ReplaceAllString(out, "")
|
||||||
out = leakedToolResultBlobPattern.ReplaceAllString(out, "")
|
out = leakedToolResultBlobPattern.ReplaceAllString(out, "")
|
||||||
out = stripDanglingThinkSuffix(out)
|
out = stripDanglingThinkSuffix(out)
|
||||||
out = leakedThinkTagPattern.ReplaceAllString(out, "")
|
out = leakedThinkTagPattern.ReplaceAllString(out, "")
|
||||||
@@ -63,6 +68,40 @@ func sanitizeLeakedOutput(text string) string {
|
|||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func stripLeakedToolResultSectionsDelta(text string, inside *bool) string {
|
||||||
|
if text == "" || inside == nil {
|
||||||
|
return text
|
||||||
|
}
|
||||||
|
var b strings.Builder
|
||||||
|
pos := 0
|
||||||
|
for pos < len(text) {
|
||||||
|
if *inside {
|
||||||
|
loc := leakedToolResultCloseMarkerPattern.FindStringIndex(text[pos:])
|
||||||
|
if loc == nil {
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
*inside = false
|
||||||
|
pos += loc[1]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
loc := leakedToolResultOpenMarkerPattern.FindStringIndex(text[pos:])
|
||||||
|
if loc == nil {
|
||||||
|
b.WriteString(text[pos:])
|
||||||
|
break
|
||||||
|
}
|
||||||
|
start := pos + loc[0]
|
||||||
|
openEnd := pos + loc[1]
|
||||||
|
b.WriteString(text[pos:start])
|
||||||
|
closeLoc := leakedToolResultCloseMarkerPattern.FindStringIndex(text[openEnd:])
|
||||||
|
if closeLoc == nil {
|
||||||
|
*inside = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
pos = openEnd + closeLoc[1]
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
func stripLeakedToolCallWrapperBlocks(text string) string {
|
func stripLeakedToolCallWrapperBlocks(text string) string {
|
||||||
if text == "" {
|
if text == "" {
|
||||||
return text
|
return text
|
||||||
|
|||||||
@@ -16,6 +16,9 @@ type StreamAccumulator struct {
|
|||||||
ToolDetectionThinking strings.Builder
|
ToolDetectionThinking strings.Builder
|
||||||
RawText strings.Builder
|
RawText strings.Builder
|
||||||
Text strings.Builder
|
Text strings.Builder
|
||||||
|
|
||||||
|
thinkingToolResultSectionOpen bool
|
||||||
|
textToolResultSectionOpen bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type StreamPartDelta struct {
|
type StreamPartDelta struct {
|
||||||
@@ -69,7 +72,8 @@ func (a *StreamAccumulator) applyThinkingPart(text string) StreamPartDelta {
|
|||||||
if !a.ThinkingEnabled || rawTrimmed == "" {
|
if !a.ThinkingEnabled || rawTrimmed == "" {
|
||||||
return delta
|
return delta
|
||||||
}
|
}
|
||||||
cleanedText := CleanVisibleOutput(rawTrimmed, a.StripReferenceMarkers)
|
visibleCandidate := stripLeakedToolResultSectionsDelta(rawTrimmed, &a.thinkingToolResultSectionOpen)
|
||||||
|
cleanedText := CleanVisibleOutput(visibleCandidate, a.StripReferenceMarkers)
|
||||||
if cleanedText == "" {
|
if cleanedText == "" {
|
||||||
return delta
|
return delta
|
||||||
}
|
}
|
||||||
@@ -89,11 +93,15 @@ func (a *StreamAccumulator) applyTextPart(text string) StreamPartDelta {
|
|||||||
}
|
}
|
||||||
a.RawText.WriteString(rawTrimmed)
|
a.RawText.WriteString(rawTrimmed)
|
||||||
delta := StreamPartDelta{Type: "text", RawText: rawTrimmed}
|
delta := StreamPartDelta{Type: "text", RawText: rawTrimmed}
|
||||||
if a.SearchEnabled && sse.IsCitation(rawTrimmed) {
|
visibleCandidate := stripLeakedToolResultSectionsDelta(rawTrimmed, &a.textToolResultSectionOpen)
|
||||||
|
if visibleCandidate == "" {
|
||||||
|
return delta
|
||||||
|
}
|
||||||
|
if a.SearchEnabled && sse.IsCitation(visibleCandidate) {
|
||||||
delta.CitationOnly = true
|
delta.CitationOnly = true
|
||||||
return delta
|
return delta
|
||||||
}
|
}
|
||||||
cleanedText := CleanVisibleOutput(rawTrimmed, a.StripReferenceMarkers)
|
cleanedText := CleanVisibleOutput(visibleCandidate, a.StripReferenceMarkers)
|
||||||
trimmed := sse.TrimContinuationOverlapFromBuilder(&a.Text, cleanedText)
|
trimmed := sse.TrimContinuationOverlapFromBuilder(&a.Text, cleanedText)
|
||||||
if trimmed == "" {
|
if trimmed == "" {
|
||||||
return delta
|
return delta
|
||||||
|
|||||||
@@ -96,6 +96,87 @@ func TestStreamAccumulatorSuppressesCitationTextWhenSearchEnabled(t *testing.T)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestStreamAccumulatorStripsToolResultSectionAcrossTextChunks(t *testing.T) {
|
||||||
|
acc := StreamAccumulator{StripReferenceMarkers: true}
|
||||||
|
first := acc.Apply(sse.LineResult{
|
||||||
|
Parsed: true,
|
||||||
|
Parts: []sse.ContentPart{{Type: "text", Text: "visible:<|Tool|>"}},
|
||||||
|
})
|
||||||
|
second := acc.Apply(sse.LineResult{
|
||||||
|
Parsed: true,
|
||||||
|
Parts: []sse.ContentPart{{Type: "text", Text: `[{"content":"secret","tool_call_id":"call_123"}]`}},
|
||||||
|
})
|
||||||
|
third := acc.Apply(sse.LineResult{
|
||||||
|
Parsed: true,
|
||||||
|
Parts: []sse.ContentPart{{Type: "text", Text: "<|end_of_toolresults|> after"}},
|
||||||
|
})
|
||||||
|
|
||||||
|
if got := acc.RawText.String(); got != `visible:<|Tool|>[{"content":"secret","tool_call_id":"call_123"}]<|end_of_toolresults|> after` {
|
||||||
|
t.Fatalf("raw text = %q", got)
|
||||||
|
}
|
||||||
|
if got := acc.Text.String(); got != "visible: after" {
|
||||||
|
t.Fatalf("visible text = %q", got)
|
||||||
|
}
|
||||||
|
if !first.ContentSeen || !second.ContentSeen || !third.ContentSeen {
|
||||||
|
t.Fatalf("expected all chunks to mark upstream content")
|
||||||
|
}
|
||||||
|
if got := first.Parts[0].VisibleText; got != "visible:" {
|
||||||
|
t.Fatalf("first visible delta = %q", got)
|
||||||
|
}
|
||||||
|
if got := second.Parts[0].VisibleText; got != "" {
|
||||||
|
t.Fatalf("payload visible delta = %q", got)
|
||||||
|
}
|
||||||
|
if got := third.Parts[0].VisibleText; got != " after" {
|
||||||
|
t.Fatalf("closing visible delta = %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStreamAccumulatorStripsFullwidthToolResultSectionAcrossTextChunks(t *testing.T) {
|
||||||
|
acc := StreamAccumulator{StripReferenceMarkers: true}
|
||||||
|
acc.Apply(sse.LineResult{
|
||||||
|
Parsed: true,
|
||||||
|
Parts: []sse.ContentPart{{Type: "text", Text: "x<|Tool|>"}},
|
||||||
|
})
|
||||||
|
acc.Apply(sse.LineResult{
|
||||||
|
Parsed: true,
|
||||||
|
Parts: []sse.ContentPart{{Type: "text", Text: `{"content":"secret"}`}},
|
||||||
|
})
|
||||||
|
acc.Apply(sse.LineResult{
|
||||||
|
Parsed: true,
|
||||||
|
Parts: []sse.ContentPart{{Type: "text", Text: "<|end▁of▁toolresults|>y"}},
|
||||||
|
})
|
||||||
|
|
||||||
|
if got := acc.Text.String(); got != "xy" {
|
||||||
|
t.Fatalf("visible text = %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStreamAccumulatorStripsToolResultSectionAcrossThinkingChunks(t *testing.T) {
|
||||||
|
acc := StreamAccumulator{ThinkingEnabled: true, StripReferenceMarkers: true}
|
||||||
|
acc.Apply(sse.LineResult{
|
||||||
|
Parsed: true,
|
||||||
|
Parts: []sse.ContentPart{{Type: "thinking", Text: "thought <|Tool|>"}},
|
||||||
|
})
|
||||||
|
payload := acc.Apply(sse.LineResult{
|
||||||
|
Parsed: true,
|
||||||
|
Parts: []sse.ContentPart{{Type: "thinking", Text: `[{"content":"secret"}]`}},
|
||||||
|
})
|
||||||
|
acc.Apply(sse.LineResult{
|
||||||
|
Parsed: true,
|
||||||
|
Parts: []sse.ContentPart{{Type: "thinking", Text: "<|end_of_toolresults|>resumes"}},
|
||||||
|
})
|
||||||
|
|
||||||
|
if got := acc.RawThinking.String(); got != `thought <|Tool|>[{"content":"secret"}]<|end_of_toolresults|>resumes` {
|
||||||
|
t.Fatalf("raw thinking = %q", got)
|
||||||
|
}
|
||||||
|
if got := acc.Thinking.String(); got != "thought resumes" {
|
||||||
|
t.Fatalf("visible thinking = %q", got)
|
||||||
|
}
|
||||||
|
if got := payload.Parts[0].VisibleText; got != "" {
|
||||||
|
t.Fatalf("payload visible delta = %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestStreamAccumulatorStripsInlineCitationAndReferenceMarkers(t *testing.T) {
|
func TestStreamAccumulatorStripsInlineCitationAndReferenceMarkers(t *testing.T) {
|
||||||
acc := StreamAccumulator{SearchEnabled: true, StripReferenceMarkers: true}
|
acc := StreamAccumulator{SearchEnabled: true, StripReferenceMarkers: true}
|
||||||
result := acc.Apply(sse.LineResult{
|
result := acc.Apply(sse.LineResult{
|
||||||
|
|||||||
Reference in New Issue
Block a user