ds2api/internal/httpapi/gemini/handler_generate.go

package gemini

import (
	"bytes"
	"context"
	"encoding/json"
	"errors"
	"io"
	"net/http"
	"net/http/httptest"
	"strings"

	"github.com/go-chi/chi/v5"

	"ds2api/internal/assistantturn"
	"ds2api/internal/auth"
	"ds2api/internal/completionruntime"
	"ds2api/internal/httpapi/openai/history"
	"ds2api/internal/httpapi/requestbody"
	"ds2api/internal/promptcompat"
	"ds2api/internal/responsehistory"
	"ds2api/internal/sse"
	"ds2api/internal/toolcall"
	"ds2api/internal/translatorcliproxy"
	"ds2api/internal/util"

	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
)

func (h *Handler) handleGenerateContent(w http.ResponseWriter, r *http.Request, stream bool) {
	if isGeminiVercelProxyRequest(r) && h.proxyViaOpenAI(w, r, stream) {
		return
	}
	if h.Auth == nil || h.DS == nil {
		if h.OpenAI != nil && h.proxyViaOpenAI(w, r, stream) {
			return
		}
		writeGeminiError(w, http.StatusInternalServerError, "Gemini runtime backend unavailable.")
		return
	}
	if h.handleGeminiDirect(w, r, stream) {
		return
	}
	writeGeminiError(w, http.StatusBadGateway, "Failed to handle Gemini request.")
}

func isGeminiVercelProxyRequest(r *http.Request) bool {
	if r == nil || r.URL == nil {
		return false
	}
	return strings.TrimSpace(r.URL.Query().Get("__stream_prepare")) == "1" ||
		strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1"
}

func (h *Handler) handleGeminiDirect(w http.ResponseWriter, r *http.Request, stream bool) bool {
	raw, err := io.ReadAll(r.Body)
	if err != nil {
		if errors.Is(err, requestbody.ErrInvalidUTF8Body) {
			writeGeminiError(w, http.StatusBadRequest, "invalid json")
		} else {
			writeGeminiError(w, http.StatusBadRequest, "invalid body")
		}
		return true
	}
	routeModel := strings.TrimSpace(chi.URLParam(r, "model"))
	var req map[string]any
	if err := json.Unmarshal(raw, &req); err != nil {
		writeGeminiError(w, http.StatusBadRequest, "invalid json")
		return true
	}
	stdReq, err := normalizeGeminiRequest(h.Store, routeModel, req, stream)
	if err != nil {
		writeGeminiError(w, http.StatusBadRequest, err.Error())
		return true
	}
	a, err := h.Auth.Determine(r)
	if err != nil {
		writeGeminiError(w, http.StatusUnauthorized, err.Error())
		return true
	}
	defer h.Auth.Release(a)
	stdReq, err = h.applyCurrentInputFile(r.Context(), a, stdReq)
	if err != nil {
		status, message := mapCurrentInputFileError(err)
		writeGeminiError(w, status, message)
		return true
	}
	historySession := responsehistory.Start(responsehistory.StartParams{
		Store:    h.ChatHistory,
		Request:  r,
		Auth:     a,
		Surface:  "gemini.generate_content",
		Standard: stdReq,
	})
	if stream {
		h.handleGeminiDirectStream(w, r, a, stdReq, historySession)
		return true
	}
	result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{
		RetryEnabled:     true,
		CurrentInputFile: h.Store,
	})
	if outErr != nil {
		if historySession != nil {
			historySession.ErrorTurn(outErr.Status, outErr.Message, outErr.Code, result.Turn)
		}
		writeGeminiError(w, outErr.Status, outErr.Message)
		return true
	}
	if historySession != nil {
		historySession.SuccessTurn(http.StatusOK, result.Turn, responsehistory.GenericUsage(result.Turn))
	}
	writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponseFromTurn(result.Turn))
	return true
}

func (h *Handler) applyCurrentInputFile(ctx context.Context, a *auth.RequestAuth, stdReq promptcompat.StandardRequest) (promptcompat.StandardRequest, error) {
	if h == nil {
		return stdReq, nil
	}
	return (history.Service{Store: h.Store, DS: h.DS}).ApplyCurrentInputFile(ctx, a, stdReq)
}

func mapCurrentInputFileError(err error) (int, string) {
	return history.MapError(err)
}

func (h *Handler) handleGeminiDirectStream(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, stdReq promptcompat.StandardRequest, historySession *responsehistory.Session) {
	start, outErr := completionruntime.StartCompletion(r.Context(), h.DS, a, stdReq, completionruntime.Options{
		CurrentInputFile: h.Store,
	})
	if outErr != nil {
		if historySession != nil {
			historySession.Error(outErr.Status, outErr.Message, outErr.Code, "", "")
		}
		writeGeminiError(w, outErr.Status, outErr.Message)
		return
	}
	streamReq := start.Request
	h.handleStreamGenerateContent(w, r, start.Response, streamReq.ResponseModel, streamReq.PromptTokenText, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, historySession)
}

func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, stream bool) bool {
	raw, err := io.ReadAll(r.Body)
	if err != nil {
		if errors.Is(err, requestbody.ErrInvalidUTF8Body) {
			writeGeminiError(w, http.StatusBadRequest, "invalid json")
		} else {
			writeGeminiError(w, http.StatusBadRequest, "invalid body")
		}
		return true
	}
	routeModel := strings.TrimSpace(chi.URLParam(r, "model"))
	var req map[string]any
	if err := json.Unmarshal(raw, &req); err != nil {
		writeGeminiError(w, http.StatusBadRequest, "invalid json")
		return true
	}
	translatedReq := translatorcliproxy.ToOpenAI(sdktranslator.FormatGemini, routeModel, raw, stream)
	if !strings.Contains(string(translatedReq), `"stream"`) {
		var reqMap map[string]any
		if json.Unmarshal(translatedReq, &reqMap) == nil {
			reqMap["stream"] = stream
			if b, e := json.Marshal(reqMap); e == nil {
				translatedReq = b
			}
		}
	}
	translatedReq = applyGeminiThinkingPolicyToOpenAIRequest(translatedReq, req)

	isVercelPrepare := strings.TrimSpace(r.URL.Query().Get("__stream_prepare")) == "1"
	isVercelRelease := strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1"

	if isVercelRelease {
		proxyReq := r.Clone(r.Context())
		proxyReq.URL.Path = "/v1/chat/completions"
		proxyReq.Body = io.NopCloser(bytes.NewReader(raw))
		proxyReq.ContentLength = int64(len(raw))
		rec := httptest.NewRecorder()
		h.OpenAI.ChatCompletions(rec, proxyReq)
		res := rec.Result()
		defer func() { _ = res.Body.Close() }()
		body, _ := io.ReadAll(res.Body)
		for k, vv := range res.Header {
			for _, v := range vv {
				w.Header().Add(k, v)
			}
		}
		w.WriteHeader(res.StatusCode)
		_, _ = w.Write(body)
		return true
	}

	proxyReq := r.Clone(r.Context())
	proxyReq.URL.Path = "/v1/chat/completions"
	proxyReq.Body = io.NopCloser(bytes.NewReader(translatedReq))
	proxyReq.ContentLength = int64(len(translatedReq))

	if stream && !isVercelPrepare {
		w.Header().Set("Content-Type", "text/event-stream")
		w.Header().Set("Cache-Control", "no-cache, no-transform")
		w.Header().Set("Connection", "keep-alive")
		w.Header().Set("X-Accel-Buffering", "no")
		streamWriter := translatorcliproxy.NewOpenAIStreamTranslatorWriter(w, sdktranslator.FormatGemini, routeModel, raw, translatedReq)
		h.OpenAI.ChatCompletions(streamWriter, proxyReq)
		return true
	}

	rec := httptest.NewRecorder()
	h.OpenAI.ChatCompletions(rec, proxyReq)
	res := rec.Result()
	defer func() { _ = res.Body.Close() }()
	body, _ := io.ReadAll(res.Body)
	if res.StatusCode < 200 || res.StatusCode >= 300 {
		for k, vv := range res.Header {
			for _, v := range vv {
				w.Header().Add(k, v)
			}
		}
		writeGeminiErrorFromOpenAI(w, res.StatusCode, body)
		return true
	}
	if isVercelPrepare {
		for k, vv := range res.Header {
			for _, v := range vv {
				w.Header().Add(k, v)
			}
		}
		w.WriteHeader(res.StatusCode)
		_, _ = w.Write(body)
		return true
	}
	converted := translatorcliproxy.FromOpenAINonStream(sdktranslator.FormatGemini, routeModel, raw, translatedReq, body)
	w.Header().Set("Content-Type", "application/json")
	w.WriteHeader(http.StatusOK)
	_, _ = w.Write(converted)
	return true
}

func applyGeminiThinkingPolicyToOpenAIRequest(translated []byte, original map[string]any) []byte {
	req := map[string]any{}
	if err := json.Unmarshal(translated, &req); err != nil {
		return translated
	}
	enabled, ok := resolveGeminiThinkingOverride(original)
	if !ok {
		return translated
	}
	typ := "disabled"
	if enabled {
		typ = "enabled"
	}
	req["thinking"] = map[string]any{"type": typ}
	out, err := json.Marshal(req)
	if err != nil {
		return translated
	}
	return out
}

func resolveGeminiThinkingOverride(req map[string]any) (bool, bool) {
	generationConfig, ok := req["generationConfig"].(map[string]any)
	if !ok {
		generationConfig, ok = req["generation_config"].(map[string]any)
	}
	if !ok {
		return false, false
	}
	thinkingConfig, ok := generationConfig["thinkingConfig"].(map[string]any)
	if !ok {
		thinkingConfig, ok = generationConfig["thinking_config"].(map[string]any)
	}
	if !ok {
		return false, false
	}
	budget, ok := numericAny(thinkingConfig["thinkingBudget"])
	if !ok {
		budget, ok = numericAny(thinkingConfig["thinking_budget"])
	}
	if !ok {
		return false, false
	}
	return budget > 0, true
}

func numericAny(raw any) (float64, bool) {
	switch v := raw.(type) {
	case float64:
		return v, true
	case float32:
		return float64(v), true
	case int:
		return float64(v), true
	case int64:
		return float64(v), true
	case int32:
		return float64(v), true
	case json.Number:
		f, err := v.Float64()
		return f, err == nil
	default:
		return 0, false
	}
}

func writeGeminiErrorFromOpenAI(w http.ResponseWriter, status int, raw []byte) {
	message := strings.TrimSpace(string(raw))
	var parsed map[string]any
	if err := json.Unmarshal(raw, &parsed); err == nil {
		if errObj, ok := parsed["error"].(map[string]any); ok {
			if msg, ok := errObj["message"].(string); ok && strings.TrimSpace(msg) != "" {
				message = strings.TrimSpace(msg)
			}
		}
	}
	if message == "" {
		message = http.StatusText(status)
	}
	writeGeminiError(w, status, message)
}

//nolint:unused // retained for native Gemini non-stream handling path.
func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *http.Response, model, finalPrompt string, thinkingEnabled bool, toolNames []string) {
	defer func() { _ = resp.Body.Close() }()
	if resp.StatusCode != http.StatusOK {
		body, _ := io.ReadAll(resp.Body)
		writeGeminiError(w, resp.StatusCode, strings.TrimSpace(string(body)))
		return
	}

	result := sse.CollectStream(resp, thinkingEnabled, true)
	writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponse(
		model,
		finalPrompt,
		cleanVisibleOutput(result.Thinking, false),
		cleanVisibleOutput(result.Text, false),
		toolNames,
	))
}

//nolint:unused // retained for native Gemini non-stream handling path.
func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any {
	parts := buildGeminiPartsFromFinal(finalText, finalThinking, toolNames)
	usage := buildGeminiUsage(model, finalPrompt, finalThinking, finalText)
	return map[string]any{
		"candidates": []map[string]any{
			{
				"index": 0,
				"content": map[string]any{
					"role":  "model",
					"parts": parts,
				},
				"finishReason": "STOP",
			},
		},
		"modelVersion":  model,
		"usageMetadata": usage,
	}
}

func buildGeminiGenerateContentResponseFromTurn(turn assistantturn.Turn) map[string]any {
	parts := buildGeminiPartsFromTurn(turn)
	return map[string]any{
		"candidates": []map[string]any{
			{
				"index": 0,
				"content": map[string]any{
					"role":  "model",
					"parts": parts,
				},
				"finishReason": "STOP",
			},
		},
		"modelVersion": turn.Model,
		"usageMetadata": map[string]any{
			"promptTokenCount":     turn.Usage.InputTokens,
			"candidatesTokenCount": turn.Usage.OutputTokens,
			"totalTokenCount":      turn.Usage.TotalTokens,
		},
	}
}

func buildGeminiPartsFromTurn(turn assistantturn.Turn) []map[string]any {
	thinkingPart := func() []map[string]any {
		if turn.Thinking == "" {
			return nil
		}
		return []map[string]any{{"text": turn.Thinking, "thought": true}}
	}
	if len(turn.ToolCalls) > 0 {
		parts := thinkingPart()
		if parts == nil {
			parts = make([]map[string]any, 0, len(turn.ToolCalls))
		}
		for _, tc := range turn.ToolCalls {
			parts = append(parts, map[string]any{
				"functionCall": map[string]any{
					"name": tc.Name,
					"args": tc.Input,
				},
			})
		}
		return parts
	}
	parts := thinkingPart()
	if turn.Text != "" {
		parts = append(parts, map[string]any{"text": turn.Text})
	}
	if len(parts) == 0 {
		parts = append(parts, map[string]any{"text": ""})
	}
	return parts
}

//nolint:unused // retained for native Gemini non-stream handling path.
func buildGeminiUsage(model, finalPrompt, finalThinking, finalText string) map[string]any {
	promptTokens := util.CountPromptTokens(finalPrompt, model)
	reasoningTokens := util.CountOutputTokens(finalThinking, model)
	completionTokens := util.CountOutputTokens(finalText, model)
	return map[string]any{
		"promptTokenCount":     promptTokens,
		"candidatesTokenCount": reasoningTokens + completionTokens,
		"totalTokenCount":      promptTokens + reasoningTokens + completionTokens,
	}
}

//nolint:unused // retained for native Gemini non-stream handling path.
func buildGeminiPartsFromFinal(finalText, finalThinking string, toolNames []string) []map[string]any {
	detected := toolcall.ParseToolCalls(finalText, toolNames)
	if len(detected) == 0 && finalThinking != "" {
		detected = toolcall.ParseToolCalls(finalThinking, toolNames)
	}
	thinkingPart := func() []map[string]any {
		if finalThinking == "" {
			return nil
		}
		return []map[string]any{{"text": finalThinking, "thought": true}}
	}
	if len(detected) > 0 {
		parts := thinkingPart()
		if parts == nil {
			parts = make([]map[string]any, 0, len(detected))
		}
		for _, tc := range detected {
			parts = append(parts, map[string]any{
				"functionCall": map[string]any{
					"name": tc.Name,
					"args": tc.Input,
				},
			})
		}
		return parts
	}

	parts := thinkingPart()
	if finalText != "" {
		parts = append(parts, map[string]any{"text": finalText})
	}
	if len(parts) == 0 {
		parts = append(parts, map[string]any{"text": ""})
	}
	return parts
}