mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-11 11:47:43 +08:00
fix(openai): strip leaked tool result markers
This commit is contained in:
@@ -10,6 +10,9 @@ import (
|
||||
var emptyJSONFencePattern = regexp.MustCompile("(?is)```json\\s*```")
|
||||
var leakedToolCallArrayPattern = regexp.MustCompile(`(?is)\[\{\s*"function"\s*:\s*\{[\s\S]*?\}\s*,\s*"id"\s*:\s*"call[^"]*"\s*,\s*"type"\s*:\s*"function"\s*}\]`)
|
||||
var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*>\s*\{[\s\S]*?"tool_call_id"\s*:\s*"call[^"]*"\s*}`)
|
||||
var leakedToolResultOpenMarkerPattern = regexp.MustCompile(`(?is)<[\|\x{ff5c}]\s*tool\s*[\|\x{ff5c}]>`)
|
||||
var leakedToolResultCloseMarkerPattern = regexp.MustCompile(`(?is)<[\|\x{ff5c}]\s*end[_▁]of[_▁]tool[_▁]?results\s*[\|\x{ff5c}]>`)
|
||||
var leakedToolResultSectionPattern = regexp.MustCompile(`(?is)<[\|\x{ff5c}]\s*tool\s*[\|\x{ff5c}]>[\s\S]*?<[\|\x{ff5c}]\s*end[_▁]of[_▁]tool[_▁]?results\s*[\|\x{ff5c}]>`)
|
||||
|
||||
var leakedThinkTagPattern = regexp.MustCompile(`(?is)</?\s*think\s*>`)
|
||||
|
||||
@@ -29,7 +32,8 @@ var leakedThoughtMarkerPattern = regexp.MustCompile(`(?i)<[\|\x{ff5c}]\s*(?:begi
|
||||
// halfwidth or legacy U+FF5C fullwidth delimiters:
|
||||
// - ASCII underscore: <|end_of_sentence|>, <|end_of_toolresults|>, <|end_of_instructions|>
|
||||
// - U+2581 variant: <|end▁of▁sentence|>, <|end▁of▁toolresults|>, <|end▁of▁instructions|>
|
||||
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[\|\x{ff5c}]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[\|\x{ff5c}]>`)
|
||||
// - compound assistant markers: <|Assistant_END_OF_TOOL_CALLS|>
|
||||
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[\|\x{ff5c}]\s*(?:assistant(?:[_▁]end[_▁]of[_▁]tool[_▁]?calls)?|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]tool[_▁]?results|end[_▁]of[_▁]tool[_▁]?calls|end[_▁]of[_▁]instructions)\s*[\|\x{ff5c}]>`)
|
||||
|
||||
// leakedAgentXMLBlockPatterns catch agent-style XML blocks that leak through
|
||||
// when the sieve fails to capture them. These are applied only to complete
|
||||
@@ -52,6 +56,7 @@ func sanitizeLeakedOutput(text string) string {
|
||||
}
|
||||
out := emptyJSONFencePattern.ReplaceAllString(text, "")
|
||||
out = leakedToolCallArrayPattern.ReplaceAllString(out, "")
|
||||
out = leakedToolResultSectionPattern.ReplaceAllString(out, "")
|
||||
out = leakedToolResultBlobPattern.ReplaceAllString(out, "")
|
||||
out = stripDanglingThinkSuffix(out)
|
||||
out = leakedThinkTagPattern.ReplaceAllString(out, "")
|
||||
@@ -63,6 +68,40 @@ func sanitizeLeakedOutput(text string) string {
|
||||
return out
|
||||
}
|
||||
|
||||
func stripLeakedToolResultSectionsDelta(text string, inside *bool) string {
|
||||
if text == "" || inside == nil {
|
||||
return text
|
||||
}
|
||||
var b strings.Builder
|
||||
pos := 0
|
||||
for pos < len(text) {
|
||||
if *inside {
|
||||
loc := leakedToolResultCloseMarkerPattern.FindStringIndex(text[pos:])
|
||||
if loc == nil {
|
||||
return b.String()
|
||||
}
|
||||
*inside = false
|
||||
pos += loc[1]
|
||||
continue
|
||||
}
|
||||
loc := leakedToolResultOpenMarkerPattern.FindStringIndex(text[pos:])
|
||||
if loc == nil {
|
||||
b.WriteString(text[pos:])
|
||||
break
|
||||
}
|
||||
start := pos + loc[0]
|
||||
openEnd := pos + loc[1]
|
||||
b.WriteString(text[pos:start])
|
||||
closeLoc := leakedToolResultCloseMarkerPattern.FindStringIndex(text[openEnd:])
|
||||
if closeLoc == nil {
|
||||
*inside = true
|
||||
break
|
||||
}
|
||||
pos = openEnd + closeLoc[1]
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func stripLeakedToolCallWrapperBlocks(text string) string {
|
||||
if text == "" {
|
||||
return text
|
||||
|
||||
@@ -16,6 +16,9 @@ type StreamAccumulator struct {
|
||||
ToolDetectionThinking strings.Builder
|
||||
RawText strings.Builder
|
||||
Text strings.Builder
|
||||
|
||||
thinkingToolResultSectionOpen bool
|
||||
textToolResultSectionOpen bool
|
||||
}
|
||||
|
||||
type StreamPartDelta struct {
|
||||
@@ -69,7 +72,8 @@ func (a *StreamAccumulator) applyThinkingPart(text string) StreamPartDelta {
|
||||
if !a.ThinkingEnabled || rawTrimmed == "" {
|
||||
return delta
|
||||
}
|
||||
cleanedText := CleanVisibleOutput(rawTrimmed, a.StripReferenceMarkers)
|
||||
visibleCandidate := stripLeakedToolResultSectionsDelta(rawTrimmed, &a.thinkingToolResultSectionOpen)
|
||||
cleanedText := CleanVisibleOutput(visibleCandidate, a.StripReferenceMarkers)
|
||||
if cleanedText == "" {
|
||||
return delta
|
||||
}
|
||||
@@ -89,11 +93,15 @@ func (a *StreamAccumulator) applyTextPart(text string) StreamPartDelta {
|
||||
}
|
||||
a.RawText.WriteString(rawTrimmed)
|
||||
delta := StreamPartDelta{Type: "text", RawText: rawTrimmed}
|
||||
if a.SearchEnabled && sse.IsCitation(rawTrimmed) {
|
||||
visibleCandidate := stripLeakedToolResultSectionsDelta(rawTrimmed, &a.textToolResultSectionOpen)
|
||||
if visibleCandidate == "" {
|
||||
return delta
|
||||
}
|
||||
if a.SearchEnabled && sse.IsCitation(visibleCandidate) {
|
||||
delta.CitationOnly = true
|
||||
return delta
|
||||
}
|
||||
cleanedText := CleanVisibleOutput(rawTrimmed, a.StripReferenceMarkers)
|
||||
cleanedText := CleanVisibleOutput(visibleCandidate, a.StripReferenceMarkers)
|
||||
trimmed := sse.TrimContinuationOverlapFromBuilder(&a.Text, cleanedText)
|
||||
if trimmed == "" {
|
||||
return delta
|
||||
|
||||
@@ -96,6 +96,87 @@ func TestStreamAccumulatorSuppressesCitationTextWhenSearchEnabled(t *testing.T)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamAccumulatorStripsToolResultSectionAcrossTextChunks(t *testing.T) {
|
||||
acc := StreamAccumulator{StripReferenceMarkers: true}
|
||||
first := acc.Apply(sse.LineResult{
|
||||
Parsed: true,
|
||||
Parts: []sse.ContentPart{{Type: "text", Text: "visible:<|Tool|>"}},
|
||||
})
|
||||
second := acc.Apply(sse.LineResult{
|
||||
Parsed: true,
|
||||
Parts: []sse.ContentPart{{Type: "text", Text: `[{"content":"secret","tool_call_id":"call_123"}]`}},
|
||||
})
|
||||
third := acc.Apply(sse.LineResult{
|
||||
Parsed: true,
|
||||
Parts: []sse.ContentPart{{Type: "text", Text: "<|end_of_toolresults|> after"}},
|
||||
})
|
||||
|
||||
if got := acc.RawText.String(); got != `visible:<|Tool|>[{"content":"secret","tool_call_id":"call_123"}]<|end_of_toolresults|> after` {
|
||||
t.Fatalf("raw text = %q", got)
|
||||
}
|
||||
if got := acc.Text.String(); got != "visible: after" {
|
||||
t.Fatalf("visible text = %q", got)
|
||||
}
|
||||
if !first.ContentSeen || !second.ContentSeen || !third.ContentSeen {
|
||||
t.Fatalf("expected all chunks to mark upstream content")
|
||||
}
|
||||
if got := first.Parts[0].VisibleText; got != "visible:" {
|
||||
t.Fatalf("first visible delta = %q", got)
|
||||
}
|
||||
if got := second.Parts[0].VisibleText; got != "" {
|
||||
t.Fatalf("payload visible delta = %q", got)
|
||||
}
|
||||
if got := third.Parts[0].VisibleText; got != " after" {
|
||||
t.Fatalf("closing visible delta = %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamAccumulatorStripsFullwidthToolResultSectionAcrossTextChunks(t *testing.T) {
|
||||
acc := StreamAccumulator{StripReferenceMarkers: true}
|
||||
acc.Apply(sse.LineResult{
|
||||
Parsed: true,
|
||||
Parts: []sse.ContentPart{{Type: "text", Text: "x<|Tool|>"}},
|
||||
})
|
||||
acc.Apply(sse.LineResult{
|
||||
Parsed: true,
|
||||
Parts: []sse.ContentPart{{Type: "text", Text: `{"content":"secret"}`}},
|
||||
})
|
||||
acc.Apply(sse.LineResult{
|
||||
Parsed: true,
|
||||
Parts: []sse.ContentPart{{Type: "text", Text: "<|end▁of▁toolresults|>y"}},
|
||||
})
|
||||
|
||||
if got := acc.Text.String(); got != "xy" {
|
||||
t.Fatalf("visible text = %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamAccumulatorStripsToolResultSectionAcrossThinkingChunks(t *testing.T) {
|
||||
acc := StreamAccumulator{ThinkingEnabled: true, StripReferenceMarkers: true}
|
||||
acc.Apply(sse.LineResult{
|
||||
Parsed: true,
|
||||
Parts: []sse.ContentPart{{Type: "thinking", Text: "thought <|Tool|>"}},
|
||||
})
|
||||
payload := acc.Apply(sse.LineResult{
|
||||
Parsed: true,
|
||||
Parts: []sse.ContentPart{{Type: "thinking", Text: `[{"content":"secret"}]`}},
|
||||
})
|
||||
acc.Apply(sse.LineResult{
|
||||
Parsed: true,
|
||||
Parts: []sse.ContentPart{{Type: "thinking", Text: "<|end_of_toolresults|>resumes"}},
|
||||
})
|
||||
|
||||
if got := acc.RawThinking.String(); got != `thought <|Tool|>[{"content":"secret"}]<|end_of_toolresults|>resumes` {
|
||||
t.Fatalf("raw thinking = %q", got)
|
||||
}
|
||||
if got := acc.Thinking.String(); got != "thought resumes" {
|
||||
t.Fatalf("visible thinking = %q", got)
|
||||
}
|
||||
if got := payload.Parts[0].VisibleText; got != "" {
|
||||
t.Fatalf("payload visible delta = %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStreamAccumulatorStripsInlineCitationAndReferenceMarkers(t *testing.T) {
|
||||
acc := StreamAccumulator{SearchEnabled: true, StripReferenceMarkers: true}
|
||||
result := acc.Apply(sse.LineResult{
|
||||
|
||||
Reference in New Issue
Block a user