feat: implement support for thinking blocks in Gemini API and enable thinking by default for supported models

This commit is contained in:
CJACK
2026-05-03 01:00:06 +08:00
parent a901250de7
commit 7c0bc9ec0f
10 changed files with 177 additions and 33 deletions

View File

@@ -343,8 +343,17 @@ func buildGeminiGenerateContentResponseFromTurn(turn assistantturn.Turn) map[str
}
func buildGeminiPartsFromTurn(turn assistantturn.Turn) []map[string]any {
thinkingPart := func() []map[string]any {
if turn.Thinking == "" {
return nil
}
return []map[string]any{{"text": turn.Thinking, "thought": true}}
}
if len(turn.ToolCalls) > 0 {
parts := make([]map[string]any, 0, len(turn.ToolCalls))
parts := thinkingPart()
if parts == nil {
parts = make([]map[string]any, 0, len(turn.ToolCalls))
}
for _, tc := range turn.ToolCalls {
parts = append(parts, map[string]any{
"functionCall": map[string]any{
@@ -355,11 +364,14 @@ func buildGeminiPartsFromTurn(turn assistantturn.Turn) []map[string]any {
}
return parts
}
text := turn.Text
if text == "" {
text = turn.Thinking
parts := thinkingPart()
if turn.Text != "" {
parts = append(parts, map[string]any{"text": turn.Text})
}
return []map[string]any{{"text": text}}
if len(parts) == 0 {
parts = append(parts, map[string]any{"text": ""})
}
return parts
}
//nolint:unused // retained for native Gemini non-stream handling path.
@@ -380,8 +392,17 @@ func buildGeminiPartsFromFinal(finalText, finalThinking string, toolNames []stri
if len(detected) == 0 && finalThinking != "" {
detected = toolcall.ParseToolCalls(finalThinking, toolNames)
}
thinkingPart := func() []map[string]any {
if finalThinking == "" {
return nil
}
return []map[string]any{{"text": finalThinking, "thought": true}}
}
if len(detected) > 0 {
parts := make([]map[string]any, 0, len(detected))
parts := thinkingPart()
if parts == nil {
parts = make([]map[string]any, 0, len(detected))
}
for _, tc := range detected {
parts = append(parts, map[string]any{
"functionCall": map[string]any{
@@ -393,9 +414,12 @@ func buildGeminiPartsFromFinal(finalText, finalThinking string, toolNames []stri
return parts
}
text := finalText
if text == "" {
text = finalThinking
parts := thinkingPart()
if finalText != "" {
parts = append(parts, map[string]any{"text": finalText})
}
return []map[string]any{{"text": text}}
if len(parts) == 0 {
parts = append(parts, map[string]any{"text": ""})
}
return parts
}

View File

@@ -134,6 +134,21 @@ func (s *geminiStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Parse
accumulated := s.accumulator.Apply(parsed)
for _, p := range accumulated.Parts {
if p.Type == "thinking" {
if p.VisibleText == "" || s.bufferContent {
continue
}
s.sendChunk(map[string]any{
"candidates": []map[string]any{
{
"index": 0,
"content": map[string]any{
"role": "model",
"parts": []map[string]any{{"text": p.VisibleText, "thought": true}},
},
},
},
"modelVersion": s.model,
})
continue
}
if p.RawText == "" || p.CitationOnly || p.VisibleText == "" {

View File

@@ -257,6 +257,56 @@ func TestStreamGenerateContentEmitsSSE(t *testing.T) {
}
}
func TestNativeStreamGenerateContentEmitsThoughtParts(t *testing.T) {
h := &Handler{}
resp := makeGeminiUpstreamResponse(
`data: {"p":"response/thinking_content","v":"think"}`,
`data: {"p":"response/content","v":"answer"}`,
`data: [DONE]`,
)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/v1beta/models/gemini-2.5-pro:streamGenerateContent", nil)
h.handleStreamGenerateContent(rec, req, resp, "gemini-2.5-pro", "prompt", true, false, nil, nil)
frames := extractGeminiSSEFrames(t, rec.Body.String())
if len(frames) < 2 {
t.Fatalf("expected thought and text stream frames, body=%s", rec.Body.String())
}
var gotThought, gotText string
for _, frame := range frames {
for _, part := range geminiPartsFromFrame(frame) {
if part["thought"] == true {
gotThought += asString(part["text"])
} else {
gotText += asString(part["text"])
}
}
}
if gotThought != "think" {
t.Fatalf("expected thought part, got %q body=%s", gotThought, rec.Body.String())
}
if !strings.Contains(gotText, "answer") {
t.Fatalf("expected text part answer, got %q body=%s", gotText, rec.Body.String())
}
}
func TestBuildGeminiPartsFromFinalIncludesThoughtPart(t *testing.T) {
parts := buildGeminiPartsFromFinal("answer", "think", nil)
if len(parts) != 2 {
t.Fatalf("expected thought + answer parts, got %#v", parts)
}
if parts[0]["thought"] != true || parts[0]["text"] != "think" {
t.Fatalf("expected first part to be thought, got %#v", parts[0])
}
if _, ok := parts[1]["thought"]; ok {
t.Fatalf("expected second part to be visible text, got %#v", parts[1])
}
if parts[1]["text"] != "answer" {
t.Fatalf("expected answer text, got %#v", parts[1])
}
}
func TestGeminiProxyTranslatesInlineImageToOpenAIDataURL(t *testing.T) {
openAI := &geminiOpenAISuccessStub{}
h := &Handler{Store: testGeminiConfig{}, OpenAI: openAI}
@@ -396,3 +446,21 @@ func extractGeminiSSEFrames(t *testing.T, body string) []map[string]any {
}
return out
}
func geminiPartsFromFrame(frame map[string]any) []map[string]any {
candidates, _ := frame["candidates"].([]any)
if len(candidates) == 0 {
return nil
}
c0, _ := candidates[0].(map[string]any)
content, _ := c0["content"].(map[string]any)
rawParts, _ := content["parts"].([]any)
parts := make([]map[string]any, 0, len(rawParts))
for _, raw := range rawParts {
part, _ := raw.(map[string]any)
if part != nil {
parts = append(parts, part)
}
}
return parts
}