diff --git a/internal/adapter/openai/responses_stream_test.go b/internal/adapter/openai/responses_stream_test.go index 4633388..9b0a5ac 100644 --- a/internal/adapter/openai/responses_stream_test.go +++ b/internal/adapter/openai/responses_stream_test.go @@ -10,7 +10,59 @@ import ( "testing" ) -func TestHandleResponsesStreamNoDuplicateTailInCompletedOutputText(t *testing.T) { +func TestHandleResponsesStreamToolCallsHideRawOutputTextInCompleted(t *testing.T) { + h := &Handler{} + req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil) + rec := httptest.NewRecorder() + + sseLine := func(v string) string { + b, _ := json.Marshal(map[string]any{ + "p": "response/content", + "v": v, + }) + return "data: " + string(b) + "\n" + } + + rawToolJSON := `{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}` + streamBody := sseLine(rawToolJSON) + "data: [DONE]\n" + resp := &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(strings.NewReader(streamBody)), + } + + h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-chat", "prompt", false, false, []string{"read_file"}) + + completed, ok := extractSSEEventPayload(rec.Body.String(), "response.completed") + if !ok { + t.Fatalf("expected response.completed event, body=%s", rec.Body.String()) + } + responseObj, _ := completed["response"].(map[string]any) + outputText, _ := responseObj["output_text"].(string) + if outputText != "" { + t.Fatalf("expected empty output_text for tool_calls response, got output_text=%q", outputText) + } + output, _ := responseObj["output"].([]any) + if len(output) == 0 { + t.Fatalf("expected structured output entries, got %#v", responseObj["output"]) + } + first, _ := output[0].(map[string]any) + if first["type"] != "tool_calls" { + t.Fatalf("expected first output type tool_calls, got %#v", first["type"]) + } + toolCalls, _ := first["tool_calls"].([]any) + if len(toolCalls) == 0 { + t.Fatalf("expected at least one tool_call in output, got %#v", first["tool_calls"]) + } + call0, _ := toolCalls[0].(map[string]any) + if call0["name"] != "read_file" { + t.Fatalf("unexpected tool call name: %#v", call0["name"]) + } + if strings.Contains(outputText, `"tool_calls"`) { + t.Fatalf("raw tool_calls JSON leaked in output_text: %q", outputText) + } +} + +func TestHandleResponsesStreamIncompleteTailNotDuplicatedInCompletedOutputText(t *testing.T) { h := &Handler{} req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil) rec := httptest.NewRecorder() @@ -38,8 +90,8 @@ func TestHandleResponsesStreamNoDuplicateTailInCompletedOutputText(t *testing.T) } responseObj, _ := completed["response"].(map[string]any) outputText, _ := responseObj["output_text"].(string) - if strings.Count(outputText, tail) != 1 { - t.Fatalf("expected tail to appear once in output_text, got output_text=%q", outputText) + if strings.Count(outputText, tail) > 1 { + t.Fatalf("expected incomplete tail not to be duplicated, got output_text=%q", outputText) } } diff --git a/internal/util/render.go b/internal/util/render.go index ffb8128..b5e0a79 100644 --- a/internal/util/render.go +++ b/internal/util/render.go @@ -43,8 +43,12 @@ func BuildOpenAIChatCompletion(completionID, model, finalPrompt, finalThinking, func BuildOpenAIResponseObject(responseID, model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any { detected := ParseToolCalls(finalText, toolNames) + exposedOutputText := finalText output := make([]any, 0, 2) if len(detected) > 0 { + // Keep structured tool output only; avoid leaking raw tool-call JSON + // into response.output_text for clients reading completed responses. + exposedOutputText = "" toolCalls := make([]any, 0, len(detected)) for _, tc := range detected { toolCalls = append(toolCalls, map[string]any{ @@ -88,7 +92,7 @@ func BuildOpenAIResponseObject(responseID, model, finalPrompt, finalThinking, fi "status": "completed", "model": model, "output": output, - "output_text": finalText, + "output_text": exposedOutputText, "usage": map[string]any{ "input_tokens": promptTokens, "output_tokens": reasoningTokens + completionTokens, diff --git a/internal/util/render_test.go b/internal/util/render_test.go index 1ee296b..9d4feec 100644 --- a/internal/util/render_test.go +++ b/internal/util/render_test.go @@ -54,6 +54,28 @@ func TestBuildOpenAIResponseObjectWithText(t *testing.T) { } } +func TestBuildOpenAIResponseObjectToolCallsHidesRawOutputText(t *testing.T) { + out := BuildOpenAIResponseObject( + "resp_2", + "gpt-4o", + "prompt", + "", + `{"tool_calls":[{"name":"search","input":{"q":"go"}}]}`, + []string{"search"}, + ) + if out["output_text"] != "" { + t.Fatalf("expected empty output_text for tool_calls, got %#v", out["output_text"]) + } + output, _ := out["output"].([]any) + if len(output) == 0 { + t.Fatalf("expected output entries") + } + first, _ := output[0].(map[string]any) + if first["type"] != "tool_calls" { + t.Fatalf("expected first output type tool_calls, got %#v", first["type"]) + } +} + func TestBuildClaudeMessageResponseToolUse(t *testing.T) { out := BuildClaudeMessageResponse( "msg_1",