Sanitize leaked tool-call wire format in assistant text

This commit is contained in:
CJACK.
2026-03-22 22:17:40 +08:00
parent 5887821a9d
commit 87c231e736
2 changed files with 22 additions and 0 deletions

View File

@@ -6,6 +6,9 @@ import (
var leakedToolHistoryPattern = regexp.MustCompile(`(?is)\[TOOL_CALL_HISTORY\][\s\S]*?\[/TOOL_CALL_HISTORY\]|\[TOOL_RESULT_HISTORY\][\s\S]*?\[/TOOL_RESULT_HISTORY\]`)
var emptyJSONFencePattern = regexp.MustCompile("(?is)```json\\s*```")
var leakedToolCallArrayPattern = regexp.MustCompile(`(?is)\[\{\s*"function"\s*:\s*\{[\s\S]*?\}\s*,\s*"id"\s*:\s*"call[^"]*"\s*,\s*"type"\s*:\s*"function"\s*}\]`)
var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*>\s*\{[\s\S]*?"tool_call_id"\s*:\s*"call[^"]*"\s*}`)
var leakedMetaMarkerPattern = regexp.MustCompile(`(?is)<\s*\|\s*(?:assistant|tool|end_of_sentence|end_of_thinking)\s*\|\s*>`)
func sanitizeLeakedToolHistory(text string) string {
if text == "" {
@@ -13,5 +16,8 @@ func sanitizeLeakedToolHistory(text string) string {
}
out := leakedToolHistoryPattern.ReplaceAllString(text, "")
out = emptyJSONFencePattern.ReplaceAllString(out, "")
out = leakedToolCallArrayPattern.ReplaceAllString(out, "")
out = leakedToolResultBlobPattern.ReplaceAllString(out, "")
out = leakedMetaMarkerPattern.ReplaceAllString(out, "")
return out
}

View File

@@ -77,6 +77,22 @@ func TestFlushToolSieveDropsToolResultHistoryLeak(t *testing.T) {
}
}
func TestSanitizeLeakedToolHistoryRemovesLeakedWireToolCallAndResult(t *testing.T) {
raw := "开始\n[{\"function\":{\"arguments\":\"{\\\"command\\\":\\\"java -version\\\"}\",\"name\":\"exec\"},\"id\":\"callb9a321\",\"type\":\"function\"}]< | Tool | >{\"content\":\"openjdk version 21\",\"tool_call_id\":\"callb9a321\"}\n结束"
got := sanitizeLeakedToolHistory(raw)
if got != "开始\n\n结束" {
t.Fatalf("unexpected sanitize result for leaked wire format: %q", got)
}
}
func TestSanitizeLeakedToolHistoryRemovesStandaloneMetaMarkers(t *testing.T) {
raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C"
got := sanitizeLeakedToolHistory(raw)
if got != "ABC" {
t.Fatalf("unexpected sanitize result for meta markers: %q", got)
}
}
func TestProcessToolSieveChunkSplitsResultHistoryBoundary(t *testing.T) {
var state toolStreamSieveState
parts := []string{