mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-04 16:35:27 +08:00
- Stream: strip both and [reference:N] markers to prevent leaking partial link metadata during incremental output - Non-stream: convert citation/reference markers to Markdown links for Claude Messages, Gemini generateContent, and OpenAI Chat/Responses - Remove StripReferenceMarkers option from call sites; behavior is now determined automatically by stream vs non-stream context - Extend JS runtime stripReferenceMarkersText() to also match [citation:N] - Add tests for streaming marker stripping and non-stream link conversion Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
464 lines
14 KiB
Go
464 lines
14 KiB
Go
package gemini
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"io"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"strings"
|
|
|
|
"github.com/go-chi/chi/v5"
|
|
|
|
"ds2api/internal/assistantturn"
|
|
"ds2api/internal/auth"
|
|
"ds2api/internal/completionruntime"
|
|
"ds2api/internal/httpapi/openai/history"
|
|
"ds2api/internal/httpapi/requestbody"
|
|
"ds2api/internal/promptcompat"
|
|
"ds2api/internal/responsehistory"
|
|
"ds2api/internal/sse"
|
|
"ds2api/internal/toolcall"
|
|
"ds2api/internal/translatorcliproxy"
|
|
"ds2api/internal/util"
|
|
|
|
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
|
)
|
|
|
|
func (h *Handler) handleGenerateContent(w http.ResponseWriter, r *http.Request, stream bool) {
|
|
if isGeminiVercelProxyRequest(r) && h.proxyViaOpenAI(w, r, stream) {
|
|
return
|
|
}
|
|
if h.Auth == nil || h.DS == nil {
|
|
if h.OpenAI != nil && h.proxyViaOpenAI(w, r, stream) {
|
|
return
|
|
}
|
|
writeGeminiError(w, http.StatusInternalServerError, "Gemini runtime backend unavailable.")
|
|
return
|
|
}
|
|
if h.handleGeminiDirect(w, r, stream) {
|
|
return
|
|
}
|
|
writeGeminiError(w, http.StatusBadGateway, "Failed to handle Gemini request.")
|
|
}
|
|
|
|
func isGeminiVercelProxyRequest(r *http.Request) bool {
|
|
if r == nil || r.URL == nil {
|
|
return false
|
|
}
|
|
return strings.TrimSpace(r.URL.Query().Get("__stream_prepare")) == "1" ||
|
|
strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1"
|
|
}
|
|
|
|
func (h *Handler) handleGeminiDirect(w http.ResponseWriter, r *http.Request, stream bool) bool {
|
|
raw, err := io.ReadAll(r.Body)
|
|
if err != nil {
|
|
if errors.Is(err, requestbody.ErrInvalidUTF8Body) {
|
|
writeGeminiError(w, http.StatusBadRequest, "invalid json")
|
|
} else {
|
|
writeGeminiError(w, http.StatusBadRequest, "invalid body")
|
|
}
|
|
return true
|
|
}
|
|
routeModel := strings.TrimSpace(chi.URLParam(r, "model"))
|
|
var req map[string]any
|
|
if err := json.Unmarshal(raw, &req); err != nil {
|
|
writeGeminiError(w, http.StatusBadRequest, "invalid json")
|
|
return true
|
|
}
|
|
stdReq, err := normalizeGeminiRequest(h.Store, routeModel, req, stream)
|
|
if err != nil {
|
|
writeGeminiError(w, http.StatusBadRequest, err.Error())
|
|
return true
|
|
}
|
|
a, err := h.Auth.Determine(r)
|
|
if err != nil {
|
|
writeGeminiError(w, http.StatusUnauthorized, err.Error())
|
|
return true
|
|
}
|
|
defer h.Auth.Release(a)
|
|
stdReq, err = h.applyCurrentInputFile(r.Context(), a, stdReq)
|
|
if err != nil {
|
|
status, message := mapCurrentInputFileError(err)
|
|
writeGeminiError(w, status, message)
|
|
return true
|
|
}
|
|
historySession := responsehistory.Start(responsehistory.StartParams{
|
|
Store: h.ChatHistory,
|
|
Request: r,
|
|
Auth: a,
|
|
Surface: "gemini.generate_content",
|
|
Standard: stdReq,
|
|
})
|
|
if stream {
|
|
h.handleGeminiDirectStream(w, r, a, stdReq, historySession)
|
|
return true
|
|
}
|
|
result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{
|
|
RetryEnabled: true,
|
|
CurrentInputFile: h.Store,
|
|
})
|
|
if outErr != nil {
|
|
if historySession != nil {
|
|
historySession.ErrorTurn(outErr.Status, outErr.Message, outErr.Code, result.Turn)
|
|
}
|
|
writeGeminiError(w, outErr.Status, outErr.Message)
|
|
return true
|
|
}
|
|
if historySession != nil {
|
|
historySession.SuccessTurn(http.StatusOK, result.Turn, responsehistory.GenericUsage(result.Turn))
|
|
}
|
|
writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponseFromTurn(result.Turn))
|
|
return true
|
|
}
|
|
|
|
func (h *Handler) applyCurrentInputFile(ctx context.Context, a *auth.RequestAuth, stdReq promptcompat.StandardRequest) (promptcompat.StandardRequest, error) {
|
|
if h == nil {
|
|
return stdReq, nil
|
|
}
|
|
return (history.Service{Store: h.Store, DS: h.DS}).ApplyCurrentInputFile(ctx, a, stdReq)
|
|
}
|
|
|
|
func mapCurrentInputFileError(err error) (int, string) {
|
|
return history.MapError(err)
|
|
}
|
|
|
|
func (h *Handler) handleGeminiDirectStream(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, stdReq promptcompat.StandardRequest, historySession *responsehistory.Session) {
|
|
start, outErr := completionruntime.StartCompletion(r.Context(), h.DS, a, stdReq, completionruntime.Options{
|
|
CurrentInputFile: h.Store,
|
|
})
|
|
if outErr != nil {
|
|
if historySession != nil {
|
|
historySession.Error(outErr.Status, outErr.Message, outErr.Code, "", "")
|
|
}
|
|
writeGeminiError(w, outErr.Status, outErr.Message)
|
|
return
|
|
}
|
|
streamReq := start.Request
|
|
h.handleStreamGenerateContent(w, r, start.Response, streamReq.ResponseModel, streamReq.PromptTokenText, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, historySession)
|
|
}
|
|
|
|
func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, stream bool) bool {
|
|
raw, err := io.ReadAll(r.Body)
|
|
if err != nil {
|
|
if errors.Is(err, requestbody.ErrInvalidUTF8Body) {
|
|
writeGeminiError(w, http.StatusBadRequest, "invalid json")
|
|
} else {
|
|
writeGeminiError(w, http.StatusBadRequest, "invalid body")
|
|
}
|
|
return true
|
|
}
|
|
routeModel := strings.TrimSpace(chi.URLParam(r, "model"))
|
|
var req map[string]any
|
|
if err := json.Unmarshal(raw, &req); err != nil {
|
|
writeGeminiError(w, http.StatusBadRequest, "invalid json")
|
|
return true
|
|
}
|
|
translatedReq := translatorcliproxy.ToOpenAI(sdktranslator.FormatGemini, routeModel, raw, stream)
|
|
if !strings.Contains(string(translatedReq), `"stream"`) {
|
|
var reqMap map[string]any
|
|
if json.Unmarshal(translatedReq, &reqMap) == nil {
|
|
reqMap["stream"] = stream
|
|
if b, e := json.Marshal(reqMap); e == nil {
|
|
translatedReq = b
|
|
}
|
|
}
|
|
}
|
|
translatedReq = applyGeminiThinkingPolicyToOpenAIRequest(translatedReq, req)
|
|
|
|
isVercelPrepare := strings.TrimSpace(r.URL.Query().Get("__stream_prepare")) == "1"
|
|
isVercelRelease := strings.TrimSpace(r.URL.Query().Get("__stream_release")) == "1"
|
|
|
|
if isVercelRelease {
|
|
proxyReq := r.Clone(r.Context())
|
|
proxyReq.URL.Path = "/v1/chat/completions"
|
|
proxyReq.Body = io.NopCloser(bytes.NewReader(raw))
|
|
proxyReq.ContentLength = int64(len(raw))
|
|
rec := httptest.NewRecorder()
|
|
h.OpenAI.ChatCompletions(rec, proxyReq)
|
|
res := rec.Result()
|
|
defer func() { _ = res.Body.Close() }()
|
|
body, _ := io.ReadAll(res.Body)
|
|
for k, vv := range res.Header {
|
|
for _, v := range vv {
|
|
w.Header().Add(k, v)
|
|
}
|
|
}
|
|
w.WriteHeader(res.StatusCode)
|
|
_, _ = w.Write(body)
|
|
return true
|
|
}
|
|
|
|
proxyReq := r.Clone(r.Context())
|
|
proxyReq.URL.Path = "/v1/chat/completions"
|
|
proxyReq.Body = io.NopCloser(bytes.NewReader(translatedReq))
|
|
proxyReq.ContentLength = int64(len(translatedReq))
|
|
|
|
if stream && !isVercelPrepare {
|
|
w.Header().Set("Content-Type", "text/event-stream")
|
|
w.Header().Set("Cache-Control", "no-cache, no-transform")
|
|
w.Header().Set("Connection", "keep-alive")
|
|
w.Header().Set("X-Accel-Buffering", "no")
|
|
streamWriter := translatorcliproxy.NewOpenAIStreamTranslatorWriter(w, sdktranslator.FormatGemini, routeModel, raw, translatedReq)
|
|
h.OpenAI.ChatCompletions(streamWriter, proxyReq)
|
|
return true
|
|
}
|
|
|
|
rec := httptest.NewRecorder()
|
|
h.OpenAI.ChatCompletions(rec, proxyReq)
|
|
res := rec.Result()
|
|
defer func() { _ = res.Body.Close() }()
|
|
body, _ := io.ReadAll(res.Body)
|
|
if res.StatusCode < 200 || res.StatusCode >= 300 {
|
|
for k, vv := range res.Header {
|
|
for _, v := range vv {
|
|
w.Header().Add(k, v)
|
|
}
|
|
}
|
|
writeGeminiErrorFromOpenAI(w, res.StatusCode, body)
|
|
return true
|
|
}
|
|
if isVercelPrepare {
|
|
for k, vv := range res.Header {
|
|
for _, v := range vv {
|
|
w.Header().Add(k, v)
|
|
}
|
|
}
|
|
w.WriteHeader(res.StatusCode)
|
|
_, _ = w.Write(body)
|
|
return true
|
|
}
|
|
converted := translatorcliproxy.FromOpenAINonStream(sdktranslator.FormatGemini, routeModel, raw, translatedReq, body)
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(http.StatusOK)
|
|
_, _ = w.Write(converted)
|
|
return true
|
|
}
|
|
|
|
func applyGeminiThinkingPolicyToOpenAIRequest(translated []byte, original map[string]any) []byte {
|
|
req := map[string]any{}
|
|
if err := json.Unmarshal(translated, &req); err != nil {
|
|
return translated
|
|
}
|
|
enabled, ok := resolveGeminiThinkingOverride(original)
|
|
if !ok {
|
|
return translated
|
|
}
|
|
typ := "disabled"
|
|
if enabled {
|
|
typ = "enabled"
|
|
}
|
|
req["thinking"] = map[string]any{"type": typ}
|
|
out, err := json.Marshal(req)
|
|
if err != nil {
|
|
return translated
|
|
}
|
|
return out
|
|
}
|
|
|
|
func resolveGeminiThinkingOverride(req map[string]any) (bool, bool) {
|
|
generationConfig, ok := req["generationConfig"].(map[string]any)
|
|
if !ok {
|
|
generationConfig, ok = req["generation_config"].(map[string]any)
|
|
}
|
|
if !ok {
|
|
return false, false
|
|
}
|
|
thinkingConfig, ok := generationConfig["thinkingConfig"].(map[string]any)
|
|
if !ok {
|
|
thinkingConfig, ok = generationConfig["thinking_config"].(map[string]any)
|
|
}
|
|
if !ok {
|
|
return false, false
|
|
}
|
|
budget, ok := numericAny(thinkingConfig["thinkingBudget"])
|
|
if !ok {
|
|
budget, ok = numericAny(thinkingConfig["thinking_budget"])
|
|
}
|
|
if !ok {
|
|
return false, false
|
|
}
|
|
return budget > 0, true
|
|
}
|
|
|
|
func numericAny(raw any) (float64, bool) {
|
|
switch v := raw.(type) {
|
|
case float64:
|
|
return v, true
|
|
case float32:
|
|
return float64(v), true
|
|
case int:
|
|
return float64(v), true
|
|
case int64:
|
|
return float64(v), true
|
|
case int32:
|
|
return float64(v), true
|
|
case json.Number:
|
|
f, err := v.Float64()
|
|
return f, err == nil
|
|
default:
|
|
return 0, false
|
|
}
|
|
}
|
|
|
|
func writeGeminiErrorFromOpenAI(w http.ResponseWriter, status int, raw []byte) {
|
|
message := strings.TrimSpace(string(raw))
|
|
var parsed map[string]any
|
|
if err := json.Unmarshal(raw, &parsed); err == nil {
|
|
if errObj, ok := parsed["error"].(map[string]any); ok {
|
|
if msg, ok := errObj["message"].(string); ok && strings.TrimSpace(msg) != "" {
|
|
message = strings.TrimSpace(msg)
|
|
}
|
|
}
|
|
}
|
|
if message == "" {
|
|
message = http.StatusText(status)
|
|
}
|
|
writeGeminiError(w, status, message)
|
|
}
|
|
|
|
//nolint:unused // retained for native Gemini non-stream handling path.
|
|
func (h *Handler) handleNonStreamGenerateContent(w http.ResponseWriter, resp *http.Response, model, finalPrompt string, thinkingEnabled bool, toolNames []string) {
|
|
defer func() { _ = resp.Body.Close() }()
|
|
if resp.StatusCode != http.StatusOK {
|
|
body, _ := io.ReadAll(resp.Body)
|
|
writeGeminiError(w, resp.StatusCode, strings.TrimSpace(string(body)))
|
|
return
|
|
}
|
|
|
|
result := sse.CollectStream(resp, thinkingEnabled, true)
|
|
writeJSON(w, http.StatusOK, buildGeminiGenerateContentResponse(
|
|
model,
|
|
finalPrompt,
|
|
cleanVisibleOutput(result.Thinking, false),
|
|
cleanVisibleOutput(result.Text, false),
|
|
toolNames,
|
|
))
|
|
}
|
|
|
|
//nolint:unused // retained for native Gemini non-stream handling path.
|
|
func buildGeminiGenerateContentResponse(model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any {
|
|
parts := buildGeminiPartsFromFinal(finalText, finalThinking, toolNames)
|
|
usage := buildGeminiUsage(model, finalPrompt, finalThinking, finalText)
|
|
return map[string]any{
|
|
"candidates": []map[string]any{
|
|
{
|
|
"index": 0,
|
|
"content": map[string]any{
|
|
"role": "model",
|
|
"parts": parts,
|
|
},
|
|
"finishReason": "STOP",
|
|
},
|
|
},
|
|
"modelVersion": model,
|
|
"usageMetadata": usage,
|
|
}
|
|
}
|
|
|
|
func buildGeminiGenerateContentResponseFromTurn(turn assistantturn.Turn) map[string]any {
|
|
parts := buildGeminiPartsFromTurn(turn)
|
|
return map[string]any{
|
|
"candidates": []map[string]any{
|
|
{
|
|
"index": 0,
|
|
"content": map[string]any{
|
|
"role": "model",
|
|
"parts": parts,
|
|
},
|
|
"finishReason": "STOP",
|
|
},
|
|
},
|
|
"modelVersion": turn.Model,
|
|
"usageMetadata": map[string]any{
|
|
"promptTokenCount": turn.Usage.InputTokens,
|
|
"candidatesTokenCount": turn.Usage.OutputTokens,
|
|
"totalTokenCount": turn.Usage.TotalTokens,
|
|
},
|
|
}
|
|
}
|
|
|
|
func buildGeminiPartsFromTurn(turn assistantturn.Turn) []map[string]any {
|
|
thinkingPart := func() []map[string]any {
|
|
if turn.Thinking == "" {
|
|
return nil
|
|
}
|
|
return []map[string]any{{"text": turn.Thinking, "thought": true}}
|
|
}
|
|
if len(turn.ToolCalls) > 0 {
|
|
parts := thinkingPart()
|
|
if parts == nil {
|
|
parts = make([]map[string]any, 0, len(turn.ToolCalls))
|
|
}
|
|
for _, tc := range turn.ToolCalls {
|
|
parts = append(parts, map[string]any{
|
|
"functionCall": map[string]any{
|
|
"name": tc.Name,
|
|
"args": tc.Input,
|
|
},
|
|
})
|
|
}
|
|
return parts
|
|
}
|
|
parts := thinkingPart()
|
|
if turn.Text != "" {
|
|
parts = append(parts, map[string]any{"text": turn.Text})
|
|
}
|
|
if len(parts) == 0 {
|
|
parts = append(parts, map[string]any{"text": ""})
|
|
}
|
|
return parts
|
|
}
|
|
|
|
//nolint:unused // retained for native Gemini non-stream handling path.
|
|
func buildGeminiUsage(model, finalPrompt, finalThinking, finalText string) map[string]any {
|
|
promptTokens := util.CountPromptTokens(finalPrompt, model)
|
|
reasoningTokens := util.CountOutputTokens(finalThinking, model)
|
|
completionTokens := util.CountOutputTokens(finalText, model)
|
|
return map[string]any{
|
|
"promptTokenCount": promptTokens,
|
|
"candidatesTokenCount": reasoningTokens + completionTokens,
|
|
"totalTokenCount": promptTokens + reasoningTokens + completionTokens,
|
|
}
|
|
}
|
|
|
|
//nolint:unused // retained for native Gemini non-stream handling path.
|
|
func buildGeminiPartsFromFinal(finalText, finalThinking string, toolNames []string) []map[string]any {
|
|
detected := toolcall.ParseToolCalls(finalText, toolNames)
|
|
if len(detected) == 0 && finalThinking != "" {
|
|
detected = toolcall.ParseToolCalls(finalThinking, toolNames)
|
|
}
|
|
thinkingPart := func() []map[string]any {
|
|
if finalThinking == "" {
|
|
return nil
|
|
}
|
|
return []map[string]any{{"text": finalThinking, "thought": true}}
|
|
}
|
|
if len(detected) > 0 {
|
|
parts := thinkingPart()
|
|
if parts == nil {
|
|
parts = make([]map[string]any, 0, len(detected))
|
|
}
|
|
for _, tc := range detected {
|
|
parts = append(parts, map[string]any{
|
|
"functionCall": map[string]any{
|
|
"name": tc.Name,
|
|
"args": tc.Input,
|
|
},
|
|
})
|
|
}
|
|
return parts
|
|
}
|
|
|
|
parts := thinkingPart()
|
|
if finalText != "" {
|
|
parts = append(parts, map[string]any{"text": finalText})
|
|
}
|
|
if len(parts) == 0 {
|
|
parts = append(parts, map[string]any{"text": ""})
|
|
}
|
|
return parts
|
|
}
|