feat: Hide raw tool call JSON from output_text in OpenAI-style responses when structured tool calls are present.

2026-05-05 00:45:29 +08:00 · 2026-02-19 00:28:44 +08:00
parent df9aea194c
commit d21aedac83
3 changed files with 82 additions and 4 deletions
--- a/internal/adapter/openai/responses_stream_test.go
+++ b/internal/adapter/openai/responses_stream_test.go
@@ -10,7 +10,59 @@ import (
 	"testing"
 )

-func TestHandleResponsesStreamNoDuplicateTailInCompletedOutputText(t *testing.T) {
+func TestHandleResponsesStreamToolCallsHideRawOutputTextInCompleted(t *testing.T) {
+	h := &Handler{}
+	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
+	rec := httptest.NewRecorder()
+
+	sseLine := func(v string) string {
+		b, _ := json.Marshal(map[string]any{
+			"p": "response/content",
+			"v": v,
+		})
+		return "data: " + string(b) + "\n"
+	}
+
+	rawToolJSON := `{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}`
+	streamBody := sseLine(rawToolJSON) + "data: [DONE]\n"
+	resp := &http.Response{
+		StatusCode: http.StatusOK,
+		Body:       io.NopCloser(strings.NewReader(streamBody)),
+	}
+
+	h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, []string{"read_file"})
+
+	completed, ok := extractSSEEventPayload(rec.Body.String(), "response.completed")
+	if !ok {
+		t.Fatalf("expected response.completed event, body=%s", rec.Body.String())
+	}
+	responseObj, _ := completed["response"].(map[string]any)
+	outputText, _ := responseObj["output_text"].(string)
+	if outputText != "" {
+		t.Fatalf("expected empty output_text for tool_calls response, got output_text=%q", outputText)
+	}
+	output, _ := responseObj["output"].([]any)
+	if len(output) == 0 {
+		t.Fatalf("expected structured output entries, got %#v", responseObj["output"])
+	}
+	first, _ := output[0].(map[string]any)
+	if first["type"] != "tool_calls" {
+		t.Fatalf("expected first output type tool_calls, got %#v", first["type"])
+	}
+	toolCalls, _ := first["tool_calls"].([]any)
+	if len(toolCalls) == 0 {
+		t.Fatalf("expected at least one tool_call in output, got %#v", first["tool_calls"])
+	}
+	call0, _ := toolCalls[0].(map[string]any)
+	if call0["name"] != "read_file" {
+		t.Fatalf("unexpected tool call name: %#v", call0["name"])
+	}
+	if strings.Contains(outputText, `"tool_calls"`) {
+		t.Fatalf("raw tool_calls JSON leaked in output_text: %q", outputText)
+	}
+}
+
+func TestHandleResponsesStreamIncompleteTailNotDuplicatedInCompletedOutputText(t *testing.T) {
 	h := &Handler{}
 	req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
 	rec := httptest.NewRecorder()
@@ -38,8 +90,8 @@ func TestHandleResponsesStreamNoDuplicateTailInCompletedOutputText(t *testing.T)
 	}
 	responseObj, _ := completed["response"].(map[string]any)
 	outputText, _ := responseObj["output_text"].(string)
-	if strings.Count(outputText, tail) != 1 {
-		t.Fatalf("expected tail to appear once in output_text, got output_text=%q", outputText)
+	if strings.Count(outputText, tail) > 1 {
+		t.Fatalf("expected incomplete tail not to be duplicated, got output_text=%q", outputText)
 	}
 }

--- a/internal/util/render.go
+++ b/internal/util/render.go
@@ -43,8 +43,12 @@ func BuildOpenAIChatCompletion(completionID, model, finalPrompt, finalThinking,

 func BuildOpenAIResponseObject(responseID, model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any {
 	detected := ParseToolCalls(finalText, toolNames)
+	exposedOutputText := finalText
 	output := make([]any, 0, 2)
 	if len(detected) > 0 {
+		// Keep structured tool output only; avoid leaking raw tool-call JSON
+		// into response.output_text for clients reading completed responses.
+		exposedOutputText = ""
 		toolCalls := make([]any, 0, len(detected))
 		for _, tc := range detected {
 			toolCalls = append(toolCalls, map[string]any{
@@ -88,7 +92,7 @@ func BuildOpenAIResponseObject(responseID, model, finalPrompt, finalThinking, fi
 		"status":      "completed",
 		"model":       model,
 		"output":      output,
-		"output_text": finalText,
+		"output_text": exposedOutputText,
 		"usage": map[string]any{
 			"input_tokens":  promptTokens,
 			"output_tokens": reasoningTokens + completionTokens,
--- a/internal/util/render_test.go
+++ b/internal/util/render_test.go
@@ -54,6 +54,28 @@ func TestBuildOpenAIResponseObjectWithText(t *testing.T) {
 	}
 }

+func TestBuildOpenAIResponseObjectToolCallsHidesRawOutputText(t *testing.T) {
+	out := BuildOpenAIResponseObject(
+		"resp_2",
+		"gpt-4o",
+		"prompt",
+		"",
+		`{"tool_calls":[{"name":"search","input":{"q":"go"}}]}`,
+		[]string{"search"},
+	)
+	if out["output_text"] != "" {
+		t.Fatalf("expected empty output_text for tool_calls, got %#v", out["output_text"])
+	}
+	output, _ := out["output"].([]any)
+	if len(output) == 0 {
+		t.Fatalf("expected output entries")
+	}
+	first, _ := output[0].(map[string]any)
+	if first["type"] != "tool_calls" {
+		t.Fatalf("expected first output type tool_calls, got %#v", first["type"])
+	}
+}
+
 func TestBuildClaudeMessageResponseToolUse(t *testing.T) {
 	out := BuildClaudeMessageResponse(
 		"msg_1",