mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-04 00:15:28 +08:00
Merge pull request #157 from CJackHwang/codex/analyze-toolcall-output-formatting-issue
Sanitize leaked tool-call wire format in assistant text
This commit is contained in:
@@ -2,7 +2,6 @@ package openai
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"ds2api/internal/prompt"
|
||||
@@ -56,45 +55,13 @@ func normalizeOpenAIMessagesForPrompt(raw []any, traceID string) []map[string]an
|
||||
}
|
||||
|
||||
func buildAssistantContentForPrompt(msg map[string]any) string {
|
||||
content := normalizeOpenAIContentForPrompt(msg["content"])
|
||||
toolCalls := normalizeAssistantToolCallsForPrompt(msg["tool_calls"])
|
||||
if toolCalls == "" {
|
||||
return strings.TrimSpace(content)
|
||||
}
|
||||
if strings.TrimSpace(content) == "" {
|
||||
return toolCalls
|
||||
}
|
||||
return strings.TrimSpace(content + "\n" + toolCalls)
|
||||
}
|
||||
|
||||
func normalizeAssistantToolCallsForPrompt(v any) string {
|
||||
calls, ok := v.([]any)
|
||||
if !ok || len(calls) == 0 {
|
||||
return ""
|
||||
}
|
||||
b, err := json.Marshal(calls)
|
||||
if err != nil {
|
||||
return strings.TrimSpace(fmt.Sprintf("%v", calls))
|
||||
}
|
||||
return strings.TrimSpace(string(b))
|
||||
return strings.TrimSpace(normalizeOpenAIContentForPrompt(msg["content"]))
|
||||
}
|
||||
|
||||
func buildToolContentForPrompt(msg map[string]any) string {
|
||||
payload := map[string]any{
|
||||
"content": msg["content"],
|
||||
}
|
||||
if id := strings.TrimSpace(asString(msg["tool_call_id"])); id != "" {
|
||||
payload["tool_call_id"] = id
|
||||
}
|
||||
if id := strings.TrimSpace(asString(msg["id"])); id != "" {
|
||||
payload["id"] = id
|
||||
}
|
||||
if name := strings.TrimSpace(asString(msg["name"])); name != "" {
|
||||
payload["name"] = name
|
||||
}
|
||||
content := normalizeOpenAIContentForPrompt(payload)
|
||||
content := normalizeOpenAIContentForPrompt(msg["content"])
|
||||
if strings.TrimSpace(content) == "" {
|
||||
return `{"content":"null"}`
|
||||
return "null"
|
||||
}
|
||||
return content
|
||||
}
|
||||
|
||||
@@ -34,11 +34,11 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsAndToolResult(t *tes
|
||||
}
|
||||
|
||||
normalized := normalizeOpenAIMessagesForPrompt(raw, "")
|
||||
if len(normalized) != 4 {
|
||||
t.Fatalf("expected 4 normalized messages with assistant tool_call history preserved, got %d", len(normalized))
|
||||
if len(normalized) != 3 {
|
||||
t.Fatalf("expected 3 normalized messages with tool-call-only assistant turn omitted, got %d", len(normalized))
|
||||
}
|
||||
toolContent, _ := normalized[3]["content"].(string)
|
||||
if !strings.Contains(toolContent, `\"temp\":18`) {
|
||||
toolContent, _ := normalized[2]["content"].(string)
|
||||
if !strings.Contains(toolContent, `"temp":18`) {
|
||||
t.Fatalf("tool result should be transparently forwarded, got %q", toolContent)
|
||||
}
|
||||
if strings.Contains(toolContent, "[TOOL_RESULT_HISTORY]") {
|
||||
@@ -87,8 +87,8 @@ func TestNormalizeOpenAIMessagesForPrompt_ToolArrayBlocksJoined(t *testing.T) {
|
||||
|
||||
normalized := normalizeOpenAIMessagesForPrompt(raw, "")
|
||||
got, _ := normalized[0]["content"].(string)
|
||||
if !strings.Contains(got, `"line-1"`) || !strings.Contains(got, `"line-2"`) || !strings.Contains(got, `"name":"read_file"`) {
|
||||
t.Fatalf("expected tool envelope to preserve content blocks and metadata, got %q", got)
|
||||
if !strings.Contains(got, `line-1`) || !strings.Contains(got, `line-2`) {
|
||||
t.Fatalf("expected tool content blocks preserved, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -112,7 +112,7 @@ func TestNormalizeOpenAIMessagesForPrompt_FunctionRoleCompatible(t *testing.T) {
|
||||
t.Fatalf("expected function role normalized as tool, got %#v", normalized[0]["role"])
|
||||
}
|
||||
got, _ := normalized[0]["content"].(string)
|
||||
if !strings.Contains(got, `"name":"legacy_tool"`) || !strings.Contains(got, `"ok":true`) {
|
||||
if !strings.Contains(got, `"ok":true`) || strings.Contains(got, `"name":"legacy_tool"`) {
|
||||
t.Fatalf("unexpected normalized function-role content: %q", got)
|
||||
}
|
||||
}
|
||||
@@ -139,8 +139,8 @@ func TestNormalizeOpenAIMessagesForPrompt_EmptyToolContentPreservedAsNull(t *tes
|
||||
t.Fatalf("expected tool role preserved, got %#v", normalized[0]["role"])
|
||||
}
|
||||
got, _ := normalized[0]["content"].(string)
|
||||
if !strings.Contains(got, `"content":""`) || !strings.Contains(got, `"name":"noop_tool"`) || !strings.Contains(got, `"tool_call_id":"call_5"`) {
|
||||
t.Fatalf("expected tool metadata preserved in content envelope, got %q", got)
|
||||
if got != "null" {
|
||||
t.Fatalf("expected empty tool content normalized as null string, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -170,12 +170,8 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantMultipleToolCallsRemainSepara
|
||||
}
|
||||
|
||||
normalized := normalizeOpenAIMessagesForPrompt(raw, "")
|
||||
if len(normalized) != 1 {
|
||||
t.Fatalf("expected assistant tool_call-only message to be preserved, got %#v", normalized)
|
||||
}
|
||||
got, _ := normalized[0]["content"].(string)
|
||||
if !strings.Contains(got, `"name":"search_web"`) || !strings.Contains(got, `"name":"eval_javascript"`) {
|
||||
t.Fatalf("expected tool_calls payload preserved in assistant content, got %q", got)
|
||||
if len(normalized) != 0 {
|
||||
t.Fatalf("expected assistant tool_call-only message omitted, got %#v", normalized)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -196,12 +192,8 @@ func TestNormalizeOpenAIMessagesForPrompt_PreservesConcatenatedToolArguments(t *
|
||||
}
|
||||
|
||||
normalized := normalizeOpenAIMessagesForPrompt(raw, "")
|
||||
if len(normalized) != 1 {
|
||||
t.Fatalf("expected assistant tool_call-only content to be preserved, got %#v", normalized)
|
||||
}
|
||||
got, _ := normalized[0]["content"].(string)
|
||||
if !strings.Contains(got, `{}{\"query\":\"测试工具调用\"}`) {
|
||||
t.Fatalf("expected concatenated arguments preserved verbatim, got %q", got)
|
||||
if len(normalized) != 0 {
|
||||
t.Fatalf("expected assistant tool_call-only content omitted, got %#v", normalized)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -222,12 +214,8 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsMissingNameAreDroppe
|
||||
}
|
||||
|
||||
normalized := normalizeOpenAIMessagesForPrompt(raw, "")
|
||||
if len(normalized) != 1 {
|
||||
t.Fatalf("expected assistant tool_calls history to be preserved even when name missing, got %#v", normalized)
|
||||
}
|
||||
got, _ := normalized[0]["content"].(string)
|
||||
if !strings.Contains(got, "call_missing_name") {
|
||||
t.Fatalf("expected raw tool_call payload preserved, got %q", got)
|
||||
if len(normalized) != 0 {
|
||||
t.Fatalf("expected assistant tool_calls without text omitted, got %#v", normalized)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -249,12 +237,8 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantNilContentDoesNotInjectNullLi
|
||||
}
|
||||
|
||||
normalized := normalizeOpenAIMessagesForPrompt(raw, "")
|
||||
if len(normalized) != 1 {
|
||||
t.Fatalf("expected nil-content assistant tool_call-only message to be preserved, got %#v", normalized)
|
||||
}
|
||||
got, _ := normalized[0]["content"].(string)
|
||||
if !strings.Contains(got, "send_file_to_user") {
|
||||
t.Fatalf("expected tool call payload preserved, got %q", got)
|
||||
if len(normalized) != 0 {
|
||||
t.Fatalf("expected nil-content assistant tool_call-only message omitted, got %#v", normalized)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,9 @@ import (
|
||||
|
||||
var leakedToolHistoryPattern = regexp.MustCompile(`(?is)\[TOOL_CALL_HISTORY\][\s\S]*?\[/TOOL_CALL_HISTORY\]|\[TOOL_RESULT_HISTORY\][\s\S]*?\[/TOOL_RESULT_HISTORY\]`)
|
||||
var emptyJSONFencePattern = regexp.MustCompile("(?is)```json\\s*```")
|
||||
var leakedToolCallArrayPattern = regexp.MustCompile(`(?is)\[\{\s*"function"\s*:\s*\{[\s\S]*?\}\s*,\s*"id"\s*:\s*"call[^"]*"\s*,\s*"type"\s*:\s*"function"\s*}\]`)
|
||||
var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*>\s*\{[\s\S]*?"tool_call_id"\s*:\s*"call[^"]*"\s*}`)
|
||||
var leakedMetaMarkerPattern = regexp.MustCompile(`(?is)<\s*\|\s*(?:assistant|tool|end_of_sentence|end_of_thinking)\s*\|\s*>`)
|
||||
|
||||
func sanitizeLeakedToolHistory(text string) string {
|
||||
if text == "" {
|
||||
@@ -13,5 +16,8 @@ func sanitizeLeakedToolHistory(text string) string {
|
||||
}
|
||||
out := leakedToolHistoryPattern.ReplaceAllString(text, "")
|
||||
out = emptyJSONFencePattern.ReplaceAllString(out, "")
|
||||
out = leakedToolCallArrayPattern.ReplaceAllString(out, "")
|
||||
out = leakedToolResultBlobPattern.ReplaceAllString(out, "")
|
||||
out = leakedMetaMarkerPattern.ReplaceAllString(out, "")
|
||||
return out
|
||||
}
|
||||
|
||||
@@ -77,6 +77,22 @@ func TestFlushToolSieveDropsToolResultHistoryLeak(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeLeakedToolHistoryRemovesLeakedWireToolCallAndResult(t *testing.T) {
|
||||
raw := "开始\n[{\"function\":{\"arguments\":\"{\\\"command\\\":\\\"java -version\\\"}\",\"name\":\"exec\"},\"id\":\"callb9a321\",\"type\":\"function\"}]< | Tool | >{\"content\":\"openjdk version 21\",\"tool_call_id\":\"callb9a321\"}\n结束"
|
||||
got := sanitizeLeakedToolHistory(raw)
|
||||
if got != "开始\n\n结束" {
|
||||
t.Fatalf("unexpected sanitize result for leaked wire format: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeLeakedToolHistoryRemovesStandaloneMetaMarkers(t *testing.T) {
|
||||
raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C"
|
||||
got := sanitizeLeakedToolHistory(raw)
|
||||
if got != "ABC" {
|
||||
t.Fatalf("unexpected sanitize result for meta markers: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveChunkSplitsResultHistoryBoundary(t *testing.T) {
|
||||
var state toolStreamSieveState
|
||||
parts := []string{
|
||||
|
||||
Reference in New Issue
Block a user