Merge pull request #157 from CJackHwang/codex/analyze-toolcall-output-formatting-issue

Sanitize leaked tool-call wire format in assistant text
This commit is contained in:
CJACK.
2026-03-22 22:46:07 +08:00
committed by GitHub
4 changed files with 42 additions and 69 deletions

View File

@@ -2,7 +2,6 @@ package openai
import (
"encoding/json"
"fmt"
"strings"
"ds2api/internal/prompt"
@@ -56,45 +55,13 @@ func normalizeOpenAIMessagesForPrompt(raw []any, traceID string) []map[string]an
}
func buildAssistantContentForPrompt(msg map[string]any) string {
content := normalizeOpenAIContentForPrompt(msg["content"])
toolCalls := normalizeAssistantToolCallsForPrompt(msg["tool_calls"])
if toolCalls == "" {
return strings.TrimSpace(content)
}
if strings.TrimSpace(content) == "" {
return toolCalls
}
return strings.TrimSpace(content + "\n" + toolCalls)
}
func normalizeAssistantToolCallsForPrompt(v any) string {
calls, ok := v.([]any)
if !ok || len(calls) == 0 {
return ""
}
b, err := json.Marshal(calls)
if err != nil {
return strings.TrimSpace(fmt.Sprintf("%v", calls))
}
return strings.TrimSpace(string(b))
return strings.TrimSpace(normalizeOpenAIContentForPrompt(msg["content"]))
}
func buildToolContentForPrompt(msg map[string]any) string {
payload := map[string]any{
"content": msg["content"],
}
if id := strings.TrimSpace(asString(msg["tool_call_id"])); id != "" {
payload["tool_call_id"] = id
}
if id := strings.TrimSpace(asString(msg["id"])); id != "" {
payload["id"] = id
}
if name := strings.TrimSpace(asString(msg["name"])); name != "" {
payload["name"] = name
}
content := normalizeOpenAIContentForPrompt(payload)
content := normalizeOpenAIContentForPrompt(msg["content"])
if strings.TrimSpace(content) == "" {
return `{"content":"null"}`
return "null"
}
return content
}

View File

@@ -34,11 +34,11 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsAndToolResult(t *tes
}
normalized := normalizeOpenAIMessagesForPrompt(raw, "")
if len(normalized) != 4 {
t.Fatalf("expected 4 normalized messages with assistant tool_call history preserved, got %d", len(normalized))
if len(normalized) != 3 {
t.Fatalf("expected 3 normalized messages with tool-call-only assistant turn omitted, got %d", len(normalized))
}
toolContent, _ := normalized[3]["content"].(string)
if !strings.Contains(toolContent, `\"temp\":18`) {
toolContent, _ := normalized[2]["content"].(string)
if !strings.Contains(toolContent, `"temp":18`) {
t.Fatalf("tool result should be transparently forwarded, got %q", toolContent)
}
if strings.Contains(toolContent, "[TOOL_RESULT_HISTORY]") {
@@ -87,8 +87,8 @@ func TestNormalizeOpenAIMessagesForPrompt_ToolArrayBlocksJoined(t *testing.T) {
normalized := normalizeOpenAIMessagesForPrompt(raw, "")
got, _ := normalized[0]["content"].(string)
if !strings.Contains(got, `"line-1"`) || !strings.Contains(got, `"line-2"`) || !strings.Contains(got, `"name":"read_file"`) {
t.Fatalf("expected tool envelope to preserve content blocks and metadata, got %q", got)
if !strings.Contains(got, `line-1`) || !strings.Contains(got, `line-2`) {
t.Fatalf("expected tool content blocks preserved, got %q", got)
}
}
@@ -112,7 +112,7 @@ func TestNormalizeOpenAIMessagesForPrompt_FunctionRoleCompatible(t *testing.T) {
t.Fatalf("expected function role normalized as tool, got %#v", normalized[0]["role"])
}
got, _ := normalized[0]["content"].(string)
if !strings.Contains(got, `"name":"legacy_tool"`) || !strings.Contains(got, `"ok":true`) {
if !strings.Contains(got, `"ok":true`) || strings.Contains(got, `"name":"legacy_tool"`) {
t.Fatalf("unexpected normalized function-role content: %q", got)
}
}
@@ -139,8 +139,8 @@ func TestNormalizeOpenAIMessagesForPrompt_EmptyToolContentPreservedAsNull(t *tes
t.Fatalf("expected tool role preserved, got %#v", normalized[0]["role"])
}
got, _ := normalized[0]["content"].(string)
if !strings.Contains(got, `"content":""`) || !strings.Contains(got, `"name":"noop_tool"`) || !strings.Contains(got, `"tool_call_id":"call_5"`) {
t.Fatalf("expected tool metadata preserved in content envelope, got %q", got)
if got != "null" {
t.Fatalf("expected empty tool content normalized as null string, got %q", got)
}
}
@@ -170,12 +170,8 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantMultipleToolCallsRemainSepara
}
normalized := normalizeOpenAIMessagesForPrompt(raw, "")
if len(normalized) != 1 {
t.Fatalf("expected assistant tool_call-only message to be preserved, got %#v", normalized)
}
got, _ := normalized[0]["content"].(string)
if !strings.Contains(got, `"name":"search_web"`) || !strings.Contains(got, `"name":"eval_javascript"`) {
t.Fatalf("expected tool_calls payload preserved in assistant content, got %q", got)
if len(normalized) != 0 {
t.Fatalf("expected assistant tool_call-only message omitted, got %#v", normalized)
}
}
@@ -196,12 +192,8 @@ func TestNormalizeOpenAIMessagesForPrompt_PreservesConcatenatedToolArguments(t *
}
normalized := normalizeOpenAIMessagesForPrompt(raw, "")
if len(normalized) != 1 {
t.Fatalf("expected assistant tool_call-only content to be preserved, got %#v", normalized)
}
got, _ := normalized[0]["content"].(string)
if !strings.Contains(got, `{}{\"query\":\"测试工具调用\"}`) {
t.Fatalf("expected concatenated arguments preserved verbatim, got %q", got)
if len(normalized) != 0 {
t.Fatalf("expected assistant tool_call-only content omitted, got %#v", normalized)
}
}
@@ -222,12 +214,8 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsMissingNameAreDroppe
}
normalized := normalizeOpenAIMessagesForPrompt(raw, "")
if len(normalized) != 1 {
t.Fatalf("expected assistant tool_calls history to be preserved even when name missing, got %#v", normalized)
}
got, _ := normalized[0]["content"].(string)
if !strings.Contains(got, "call_missing_name") {
t.Fatalf("expected raw tool_call payload preserved, got %q", got)
if len(normalized) != 0 {
t.Fatalf("expected assistant tool_calls without text omitted, got %#v", normalized)
}
}
@@ -249,12 +237,8 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantNilContentDoesNotInjectNullLi
}
normalized := normalizeOpenAIMessagesForPrompt(raw, "")
if len(normalized) != 1 {
t.Fatalf("expected nil-content assistant tool_call-only message to be preserved, got %#v", normalized)
}
got, _ := normalized[0]["content"].(string)
if !strings.Contains(got, "send_file_to_user") {
t.Fatalf("expected tool call payload preserved, got %q", got)
if len(normalized) != 0 {
t.Fatalf("expected nil-content assistant tool_call-only message omitted, got %#v", normalized)
}
}

View File

@@ -6,6 +6,9 @@ import (
var leakedToolHistoryPattern = regexp.MustCompile(`(?is)\[TOOL_CALL_HISTORY\][\s\S]*?\[/TOOL_CALL_HISTORY\]|\[TOOL_RESULT_HISTORY\][\s\S]*?\[/TOOL_RESULT_HISTORY\]`)
var emptyJSONFencePattern = regexp.MustCompile("(?is)```json\\s*```")
var leakedToolCallArrayPattern = regexp.MustCompile(`(?is)\[\{\s*"function"\s*:\s*\{[\s\S]*?\}\s*,\s*"id"\s*:\s*"call[^"]*"\s*,\s*"type"\s*:\s*"function"\s*}\]`)
var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*>\s*\{[\s\S]*?"tool_call_id"\s*:\s*"call[^"]*"\s*}`)
var leakedMetaMarkerPattern = regexp.MustCompile(`(?is)<\s*\|\s*(?:assistant|tool|end_of_sentence|end_of_thinking)\s*\|\s*>`)
func sanitizeLeakedToolHistory(text string) string {
if text == "" {
@@ -13,5 +16,8 @@ func sanitizeLeakedToolHistory(text string) string {
}
out := leakedToolHistoryPattern.ReplaceAllString(text, "")
out = emptyJSONFencePattern.ReplaceAllString(out, "")
out = leakedToolCallArrayPattern.ReplaceAllString(out, "")
out = leakedToolResultBlobPattern.ReplaceAllString(out, "")
out = leakedMetaMarkerPattern.ReplaceAllString(out, "")
return out
}

View File

@@ -77,6 +77,22 @@ func TestFlushToolSieveDropsToolResultHistoryLeak(t *testing.T) {
}
}
func TestSanitizeLeakedToolHistoryRemovesLeakedWireToolCallAndResult(t *testing.T) {
raw := "开始\n[{\"function\":{\"arguments\":\"{\\\"command\\\":\\\"java -version\\\"}\",\"name\":\"exec\"},\"id\":\"callb9a321\",\"type\":\"function\"}]< | Tool | >{\"content\":\"openjdk version 21\",\"tool_call_id\":\"callb9a321\"}\n结束"
got := sanitizeLeakedToolHistory(raw)
if got != "开始\n\n结束" {
t.Fatalf("unexpected sanitize result for leaked wire format: %q", got)
}
}
func TestSanitizeLeakedToolHistoryRemovesStandaloneMetaMarkers(t *testing.T) {
raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C"
got := sanitizeLeakedToolHistory(raw)
if got != "ABC" {
t.Fatalf("unexpected sanitize result for meta markers: %q", got)
}
}
func TestProcessToolSieveChunkSplitsResultHistoryBoundary(t *testing.T) {
var state toolStreamSieveState
parts := []string{