From 87c231e7363b5d04cd7856b6e226fed3905e3694 Mon Sep 17 00:00:00 2001 From: "CJACK." Date: Sun, 22 Mar 2026 22:17:40 +0800 Subject: [PATCH] Sanitize leaked tool-call wire format in assistant text --- internal/adapter/openai/tool_history_sanitize.go | 6 ++++++ .../adapter/openai/tool_history_sanitize_test.go | 16 ++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/internal/adapter/openai/tool_history_sanitize.go b/internal/adapter/openai/tool_history_sanitize.go index 126414a..18b05e9 100644 --- a/internal/adapter/openai/tool_history_sanitize.go +++ b/internal/adapter/openai/tool_history_sanitize.go @@ -6,6 +6,9 @@ import ( var leakedToolHistoryPattern = regexp.MustCompile(`(?is)\[TOOL_CALL_HISTORY\][\s\S]*?\[/TOOL_CALL_HISTORY\]|\[TOOL_RESULT_HISTORY\][\s\S]*?\[/TOOL_RESULT_HISTORY\]`) var emptyJSONFencePattern = regexp.MustCompile("(?is)```json\\s*```") +var leakedToolCallArrayPattern = regexp.MustCompile(`(?is)\[\{\s*"function"\s*:\s*\{[\s\S]*?\}\s*,\s*"id"\s*:\s*"call[^"]*"\s*,\s*"type"\s*:\s*"function"\s*}\]`) +var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*>\s*\{[\s\S]*?"tool_call_id"\s*:\s*"call[^"]*"\s*}`) +var leakedMetaMarkerPattern = regexp.MustCompile(`(?is)<\s*\|\s*(?:assistant|tool|end_of_sentence|end_of_thinking)\s*\|\s*>`) func sanitizeLeakedToolHistory(text string) string { if text == "" { @@ -13,5 +16,8 @@ func sanitizeLeakedToolHistory(text string) string { } out := leakedToolHistoryPattern.ReplaceAllString(text, "") out = emptyJSONFencePattern.ReplaceAllString(out, "") + out = leakedToolCallArrayPattern.ReplaceAllString(out, "") + out = leakedToolResultBlobPattern.ReplaceAllString(out, "") + out = leakedMetaMarkerPattern.ReplaceAllString(out, "") return out } diff --git a/internal/adapter/openai/tool_history_sanitize_test.go b/internal/adapter/openai/tool_history_sanitize_test.go index 7c10ad2..3eb434a 100644 --- a/internal/adapter/openai/tool_history_sanitize_test.go +++ b/internal/adapter/openai/tool_history_sanitize_test.go @@ -77,6 +77,22 @@ func TestFlushToolSieveDropsToolResultHistoryLeak(t *testing.T) { } } +func TestSanitizeLeakedToolHistoryRemovesLeakedWireToolCallAndResult(t *testing.T) { + raw := "开始\n[{\"function\":{\"arguments\":\"{\\\"command\\\":\\\"java -version\\\"}\",\"name\":\"exec\"},\"id\":\"callb9a321\",\"type\":\"function\"}]< | Tool | >{\"content\":\"openjdk version 21\",\"tool_call_id\":\"callb9a321\"}\n结束" + got := sanitizeLeakedToolHistory(raw) + if got != "开始\n\n结束" { + t.Fatalf("unexpected sanitize result for leaked wire format: %q", got) + } +} + +func TestSanitizeLeakedToolHistoryRemovesStandaloneMetaMarkers(t *testing.T) { + raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C" + got := sanitizeLeakedToolHistory(raw) + if got != "ABC" { + t.Fatalf("unexpected sanitize result for meta markers: %q", got) + } +} + func TestProcessToolSieveChunkSplitsResultHistoryBoundary(t *testing.T) { var state toolStreamSieveState parts := []string{