feat: Introduce model alias resolution, enhanced configuration options, and improved OpenAI/Claude adapter handling for responses, embeddings, and tool calls.

This commit is contained in:
CJACK
2026-02-18 23:06:18 +08:00
parent 27ecb4b69b
commit 3a75b75ae0
28 changed files with 1665 additions and 183 deletions

View File

@@ -0,0 +1,35 @@
package claude
import (
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
)
func TestWriteClaudeErrorIncludesUnifiedFields(t *testing.T) {
rec := httptest.NewRecorder()
writeClaudeError(rec, http.StatusUnauthorized, "bad token")
if rec.Code != http.StatusUnauthorized {
t.Fatalf("expected 401, got %d", rec.Code)
}
var body map[string]any
if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil {
t.Fatalf("decode body: %v", err)
}
errObj, _ := body["error"].(map[string]any)
if errObj["message"] != "bad token" {
t.Fatalf("unexpected message: %v", errObj["message"])
}
if errObj["type"] != "invalid_request_error" {
t.Fatalf("unexpected type: %v", errObj["type"])
}
if errObj["code"] != "authentication_failed" {
t.Fatalf("unexpected code: %v", errObj["code"])
}
if _, ok := errObj["param"]; !ok {
t.Fatal("expected param field")
}
}

View File

@@ -43,6 +43,9 @@ func (h *Handler) ListModels(w http.ResponseWriter, _ *http.Request) {
}
func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) {
if strings.TrimSpace(r.Header.Get("anthropic-version")) == "" {
r.Header.Set("anthropic-version", "2023-06-01")
}
a, err := h.Auth.Determine(r)
if err != nil {
status := http.StatusUnauthorized
@@ -50,22 +53,25 @@ func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) {
if err == auth.ErrNoAccount {
status = http.StatusTooManyRequests
}
writeJSON(w, status, map[string]any{"error": map[string]any{"type": "invalid_request_error", "message": detail}})
writeClaudeError(w, status, detail)
return
}
defer h.Auth.Release(a)
var req map[string]any
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSON(w, http.StatusBadRequest, map[string]any{"error": map[string]any{"type": "invalid_request_error", "message": "invalid json"}})
writeClaudeError(w, http.StatusBadRequest, "invalid json")
return
}
model, _ := req["model"].(string)
messagesRaw, _ := req["messages"].([]any)
if model == "" || len(messagesRaw) == 0 {
writeJSON(w, http.StatusBadRequest, map[string]any{"error": map[string]any{"type": "invalid_request_error", "message": "Request must include 'model' and 'messages'."}})
writeClaudeError(w, http.StatusBadRequest, "Request must include 'model' and 'messages'.")
return
}
if _, ok := req["max_tokens"]; !ok {
req["max_tokens"] = 8192
}
normalized := normalizeClaudeMessages(messagesRaw)
payload := cloneMap(req)
@@ -86,12 +92,12 @@ func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) {
sessionID, err := h.DS.CreateSession(r.Context(), a, 3)
if err != nil {
writeJSON(w, http.StatusUnauthorized, map[string]any{"error": map[string]any{"type": "api_error", "message": "invalid token."}})
writeClaudeError(w, http.StatusUnauthorized, "invalid token.")
return
}
pow, err := h.DS.GetPow(r.Context(), a, 3)
if err != nil {
writeJSON(w, http.StatusUnauthorized, map[string]any{"error": map[string]any{"type": "api_error", "message": "Failed to get PoW"}})
writeClaudeError(w, http.StatusUnauthorized, "Failed to get PoW")
return
}
requestPayload := map[string]any{
@@ -104,13 +110,13 @@ func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) {
}
resp, err := h.DS.CallCompletion(r.Context(), a, requestPayload, pow, 3)
if err != nil {
writeJSON(w, http.StatusInternalServerError, map[string]any{"error": map[string]any{"type": "api_error", "message": "Failed to get Claude response."}})
writeClaudeError(w, http.StatusInternalServerError, "Failed to get Claude response.")
return
}
if resp.StatusCode != http.StatusOK {
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
writeJSON(w, http.StatusInternalServerError, map[string]any{"error": map[string]any{"type": "api_error", "message": string(body)}})
writeClaudeError(w, http.StatusInternalServerError, string(body))
return
}
@@ -162,20 +168,20 @@ func (h *Handler) Messages(w http.ResponseWriter, r *http.Request) {
func (h *Handler) CountTokens(w http.ResponseWriter, r *http.Request) {
a, err := h.Auth.Determine(r)
if err != nil {
writeJSON(w, http.StatusUnauthorized, map[string]any{"error": err.Error()})
writeClaudeError(w, http.StatusUnauthorized, err.Error())
return
}
defer h.Auth.Release(a)
var req map[string]any
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSON(w, http.StatusBadRequest, map[string]any{"error": "invalid json"})
writeClaudeError(w, http.StatusBadRequest, "invalid json")
return
}
model, _ := req["model"].(string)
messages, _ := req["messages"].([]any)
if model == "" || len(messages) == 0 {
writeJSON(w, http.StatusBadRequest, map[string]any{"error": "Request must include 'model' and 'messages'."})
writeClaudeError(w, http.StatusBadRequest, "Request must include 'model' and 'messages'.")
return
}
inputTokens := 0
@@ -206,7 +212,7 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
writeJSON(w, http.StatusInternalServerError, map[string]any{"error": map[string]any{"type": "api_error", "message": string(body)}})
writeClaudeError(w, http.StatusInternalServerError, string(body))
return
}
@@ -241,6 +247,8 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ
"error": map[string]any{
"type": "api_error",
"message": msg,
"code": "internal_error",
"param": nil,
},
})
}
@@ -492,6 +500,28 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ
}
}
func writeClaudeError(w http.ResponseWriter, status int, message string) {
code := "invalid_request"
switch status {
case http.StatusUnauthorized:
code = "authentication_failed"
case http.StatusTooManyRequests:
code = "rate_limit_exceeded"
case http.StatusNotFound:
code = "not_found"
case http.StatusInternalServerError:
code = "internal_error"
}
writeJSON(w, status, map[string]any{
"error": map[string]any{
"type": "invalid_request_error",
"message": message,
"code": code,
"param": nil,
},
})
}
func normalizeClaudeMessages(messages []any) []any {
out := make([]any, 0, len(messages))
for _, m := range messages {

View File

@@ -0,0 +1,138 @@
package openai
import (
"crypto/sha256"
"encoding/binary"
"encoding/json"
"fmt"
"net/http"
"strings"
"ds2api/internal/auth"
"ds2api/internal/config"
"ds2api/internal/util"
)
func (h *Handler) Embeddings(w http.ResponseWriter, r *http.Request) {
a, err := h.Auth.Determine(r)
if err != nil {
status := http.StatusUnauthorized
detail := err.Error()
if err == auth.ErrNoAccount {
status = http.StatusTooManyRequests
}
writeOpenAIError(w, status, detail)
return
}
defer h.Auth.Release(a)
var req map[string]any
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeOpenAIError(w, http.StatusBadRequest, "invalid json")
return
}
model, _ := req["model"].(string)
model = strings.TrimSpace(model)
if model == "" {
writeOpenAIError(w, http.StatusBadRequest, "Request must include 'model'.")
return
}
if _, ok := config.ResolveModel(h.Store, model); !ok {
writeOpenAIError(w, http.StatusBadRequest, fmt.Sprintf("Model '%s' is not available.", model))
return
}
inputs := extractEmbeddingInputs(req["input"])
if len(inputs) == 0 {
writeOpenAIError(w, http.StatusBadRequest, "Request must include non-empty 'input'.")
return
}
provider := ""
if h.Store != nil {
provider = strings.ToLower(strings.TrimSpace(h.Store.EmbeddingsProvider()))
}
if provider == "" {
writeOpenAIError(w, http.StatusNotImplemented, "Embeddings provider is not configured. Set embeddings.provider in config.")
return
}
switch provider {
case "mock", "deterministic", "builtin":
// supported local deterministic provider
default:
writeOpenAIError(w, http.StatusNotImplemented, fmt.Sprintf("Embeddings provider '%s' is not supported.", provider))
return
}
data := make([]map[string]any, 0, len(inputs))
totalTokens := 0
for i, input := range inputs {
totalTokens += util.EstimateTokens(input)
data = append(data, map[string]any{
"object": "embedding",
"index": i,
"embedding": deterministicEmbedding(input),
})
}
writeJSON(w, http.StatusOK, map[string]any{
"object": "list",
"data": data,
"model": model,
"usage": map[string]any{
"prompt_tokens": totalTokens,
"total_tokens": totalTokens,
},
})
}
func extractEmbeddingInputs(raw any) []string {
switch v := raw.(type) {
case string:
s := strings.TrimSpace(v)
if s == "" {
return nil
}
return []string{s}
case []any:
out := make([]string, 0, len(v))
for _, item := range v {
switch iv := item.(type) {
case string:
s := strings.TrimSpace(iv)
if s != "" {
out = append(out, s)
}
case []any:
// Token array input support: convert to stable string form.
out = append(out, fmt.Sprintf("%v", iv))
default:
s := strings.TrimSpace(fmt.Sprintf("%v", iv))
if s != "" {
out = append(out, s)
}
}
}
return out
default:
return nil
}
}
func deterministicEmbedding(input string) []float64 {
// Keep response shape stable without external dependencies.
const dims = 64
out := make([]float64, dims)
seed := sha256.Sum256([]byte(input))
buf := seed[:]
for i := 0; i < dims; i++ {
if len(buf) < 4 {
next := sha256.Sum256(buf)
buf = next[:]
}
v := binary.BigEndian.Uint32(buf[:4])
buf = buf[4:]
// map [0, 2^32) -> [-1, 1]
out[i] = (float64(v)/2147483647.5 - 1.0)
}
return out
}

View File

@@ -0,0 +1,35 @@
package openai
import (
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
)
func TestWriteOpenAIErrorIncludesUnifiedFields(t *testing.T) {
rec := httptest.NewRecorder()
writeOpenAIError(rec, http.StatusBadRequest, "invalid input")
if rec.Code != http.StatusBadRequest {
t.Fatalf("expected 400, got %d", rec.Code)
}
var body map[string]any
if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil {
t.Fatalf("decode body: %v", err)
}
errObj, _ := body["error"].(map[string]any)
if errObj["message"] != "invalid input" {
t.Fatalf("unexpected message: %v", errObj["message"])
}
if errObj["type"] != "invalid_request_error" {
t.Fatalf("unexpected type: %v", errObj["type"])
}
if errObj["code"] != "invalid_request" {
t.Fatalf("unexpected code: %v", errObj["code"])
}
if _, ok := errObj["param"]; !ok {
t.Fatal("expected param field")
}
}

View File

@@ -31,6 +31,8 @@ type Handler struct {
leaseMu sync.Mutex
streamLeases map[string]streamLease
responsesMu sync.Mutex
responses *responseStore
}
type streamLease struct {
@@ -40,13 +42,27 @@ type streamLease struct {
func RegisterRoutes(r chi.Router, h *Handler) {
r.Get("/v1/models", h.ListModels)
r.Get("/v1/models/{model_id}", h.GetModel)
r.Post("/v1/chat/completions", h.ChatCompletions)
r.Post("/v1/responses", h.Responses)
r.Get("/v1/responses/{response_id}", h.GetResponseByID)
r.Post("/v1/embeddings", h.Embeddings)
}
func (h *Handler) ListModels(w http.ResponseWriter, _ *http.Request) {
writeJSON(w, http.StatusOK, config.OpenAIModelsResponse())
}
func (h *Handler) GetModel(w http.ResponseWriter, r *http.Request) {
modelID := strings.TrimSpace(chi.URLParam(r, "model_id"))
model, ok := config.OpenAIModelByID(h.Store, modelID)
if !ok {
writeOpenAIError(w, http.StatusNotFound, "Model not found.")
return
}
writeJSON(w, http.StatusOK, model)
}
func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
if isVercelStreamReleaseRequest(r) {
h.handleVercelStreamRelease(w, r)
@@ -81,11 +97,16 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
writeOpenAIError(w, http.StatusBadRequest, "Request must include 'model' and 'messages'.")
return
}
thinkingEnabled, searchEnabled, ok := config.GetModelConfig(model)
resolvedModel, ok := config.ResolveModel(h.Store, model)
if !ok {
writeOpenAIError(w, http.StatusServiceUnavailable, fmt.Sprintf("Model '%s' is not available.", model))
writeOpenAIError(w, http.StatusBadRequest, fmt.Sprintf("Model '%s' is not available.", model))
return
}
thinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
responseModel := strings.TrimSpace(model)
if responseModel == "" {
responseModel = resolvedModel
}
finalPrompt, toolNames := buildOpenAIFinalPrompt(messagesRaw, req["tools"])
@@ -111,16 +132,17 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) {
"thinking_enabled": thinkingEnabled,
"search_enabled": searchEnabled,
}
applyOpenAIChatPassThrough(req, payload)
resp, err := h.DS.CallCompletion(r.Context(), a, payload, pow, 3)
if err != nil {
writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.")
return
}
if util.ToBool(req["stream"]) {
h.handleStream(w, r, resp, sessionID, model, finalPrompt, thinkingEnabled, searchEnabled, toolNames)
h.handleStream(w, r, resp, sessionID, responseModel, finalPrompt, thinkingEnabled, searchEnabled, toolNames)
return
}
h.handleNonStream(w, r.Context(), resp, sessionID, model, finalPrompt, thinkingEnabled, toolNames)
h.handleNonStream(w, r.Context(), resp, sessionID, responseModel, finalPrompt, thinkingEnabled, toolNames)
}
func (h *Handler) handleNonStream(w http.ResponseWriter, ctx context.Context, resp *http.Response, completionID, model, finalPrompt string, thinkingEnabled bool, toolNames []string) {
@@ -135,7 +157,7 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, ctx context.Context, re
finalThinking := result.Thinking
finalText := result.Text
detected := util.ParseStandaloneToolCalls(finalText, toolNames)
detected := util.ParseToolCalls(finalText, toolNames)
finishReason := "stop"
messageObj := map[string]any{"role": "assistant", "content": finalText}
if thinkingEnabled && finalThinking != "" {
@@ -222,7 +244,7 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
finalize := func(finishReason string) {
finalThinking := thinking.String()
finalText := text.String()
detected := util.ParseStandaloneToolCalls(finalText, toolNames)
detected := util.ParseToolCalls(finalText, toolNames)
if len(detected) > 0 && !toolCallsEmitted {
finishReason = "tool_calls"
delta := map[string]any{
@@ -497,6 +519,8 @@ func writeOpenAIError(w http.ResponseWriter, status int, message string) {
"error": map[string]any{
"message": message,
"type": openAIErrorType(status),
"code": openAIErrorCode(status),
"param": nil,
},
})
}
@@ -520,3 +544,41 @@ func openAIErrorType(status int) string {
return "invalid_request_error"
}
}
func openAIErrorCode(status int) string {
switch status {
case http.StatusBadRequest:
return "invalid_request"
case http.StatusUnauthorized:
return "authentication_failed"
case http.StatusForbidden:
return "forbidden"
case http.StatusTooManyRequests:
return "rate_limit_exceeded"
case http.StatusNotFound:
return "not_found"
case http.StatusServiceUnavailable:
return "service_unavailable"
default:
if status >= 500 {
return "internal_error"
}
return "invalid_request"
}
}
func applyOpenAIChatPassThrough(req map[string]any, payload map[string]any) {
for _, k := range []string{
"temperature",
"top_p",
"max_tokens",
"max_completion_tokens",
"presence_penalty",
"frequency_penalty",
"stop",
} {
if v, ok := req[k]; ok {
payload[k] = v
}
}
}

View File

@@ -210,7 +210,7 @@ func TestHandleNonStreamUnknownToolStillIntercepted(t *testing.T) {
}
}
func TestHandleNonStreamEmbeddedToolCallExampleNotIntercepted(t *testing.T) {
func TestHandleNonStreamEmbeddedToolCallExampleIntercepted(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
`data: {"p":"response/content","v":"下面是示例:"}`,
@@ -228,16 +228,16 @@ func TestHandleNonStreamEmbeddedToolCallExampleNotIntercepted(t *testing.T) {
out := decodeJSONBody(t, rec.Body.String())
choices, _ := out["choices"].([]any)
choice, _ := choices[0].(map[string]any)
if choice["finish_reason"] != "stop" {
t.Fatalf("expected finish_reason=stop, got %#v", choice["finish_reason"])
if choice["finish_reason"] != "tool_calls" {
t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"])
}
msg, _ := choice["message"].(map[string]any)
if _, ok := msg["tool_calls"]; ok {
t.Fatalf("did not expect tool_calls field for embedded example: %#v", msg["tool_calls"])
toolCalls, _ := msg["tool_calls"].([]any)
if len(toolCalls) == 0 {
t.Fatalf("expected tool_calls field for embedded example: %#v", msg["tool_calls"])
}
content, _ := msg["content"].(string)
if !strings.Contains(content, "示例") || !strings.Contains(content, `"tool_calls"`) {
t.Fatalf("expected embedded example to pass through as text, got %q", content)
if msg["content"] != nil {
t.Fatalf("expected content nil when tool_calls detected, got %#v", msg["content"])
}
}
@@ -471,8 +471,8 @@ func TestHandleStreamToolCallMixedWithPlainTextSegments(t *testing.T) {
if !done {
t.Fatalf("expected [DONE], body=%s", rec.Body.String())
}
if streamHasToolCallsDelta(frames) {
t.Fatalf("did not expect tool_calls delta in mixed prose stream, body=%s", rec.Body.String())
if !streamHasToolCallsDelta(frames) {
t.Fatalf("expected tool_calls delta in mixed prose stream, body=%s", rec.Body.String())
}
content := strings.Builder{}
for _, frame := range frames {
@@ -489,11 +489,11 @@ func TestHandleStreamToolCallMixedWithPlainTextSegments(t *testing.T) {
if !strings.Contains(got, "下面是示例:") || !strings.Contains(got, "请勿执行。") {
t.Fatalf("expected pre/post plain text to pass sieve, got=%q", got)
}
if !strings.Contains(got, `"tool_calls"`) {
t.Fatalf("expected mixed stream to preserve embedded tool_calls example text, got=%q", got)
if strings.Contains(strings.ToLower(got), `"tool_calls"`) {
t.Fatalf("expected no raw tool_calls json leak in content, got=%q", got)
}
if streamFinishReason(frames) != "stop" {
t.Fatalf("expected finish_reason=stop for mixed prose, body=%s", rec.Body.String())
if streamFinishReason(frames) != "tool_calls" {
t.Fatalf("expected finish_reason=tool_calls for mixed prose, body=%s", rec.Body.String())
}
}

View File

@@ -0,0 +1,46 @@
package openai
import (
"net/http"
"net/http/httptest"
"testing"
"github.com/go-chi/chi/v5"
)
func TestGetModelRouteDirectAndAlias(t *testing.T) {
h := &Handler{}
r := chi.NewRouter()
RegisterRoutes(r, h)
t.Run("direct", func(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/v1/models/deepseek-chat", nil)
rec := httptest.NewRecorder()
r.ServeHTTP(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
}
})
t.Run("alias", func(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/v1/models/gpt-4.1", nil)
rec := httptest.NewRecorder()
r.ServeHTTP(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("expected 200 for alias, got %d body=%s", rec.Code, rec.Body.String())
}
})
}
func TestGetModelRouteNotFound(t *testing.T) {
h := &Handler{}
r := chi.NewRouter()
RegisterRoutes(r, h)
req := httptest.NewRequest(http.MethodGet, "/v1/models/not-exists", nil)
rec := httptest.NewRecorder()
r.ServeHTTP(rec, req)
if rec.Code != http.StatusNotFound {
t.Fatalf("expected 404, got %d body=%s", rec.Code, rec.Body.String())
}
}

View File

@@ -0,0 +1,91 @@
package openai
import (
"sync"
"time"
)
type storedResponse struct {
Value map[string]any
ExpiresAt time.Time
}
type responseStore struct {
mu sync.Mutex
ttl time.Duration
items map[string]storedResponse
}
func newResponseStore(ttl time.Duration) *responseStore {
if ttl <= 0 {
ttl = 15 * time.Minute
}
return &responseStore{
ttl: ttl,
items: make(map[string]storedResponse),
}
}
func (s *responseStore) put(id string, value map[string]any) {
if s == nil || id == "" || value == nil {
return
}
now := time.Now()
s.mu.Lock()
defer s.mu.Unlock()
s.sweepLocked(now)
s.items[id] = storedResponse{
Value: cloneAnyMap(value),
ExpiresAt: now.Add(s.ttl),
}
}
func (s *responseStore) get(id string) (map[string]any, bool) {
if s == nil || id == "" {
return nil, false
}
now := time.Now()
s.mu.Lock()
defer s.mu.Unlock()
s.sweepLocked(now)
item, ok := s.items[id]
if !ok {
return nil, false
}
return cloneAnyMap(item.Value), true
}
func (s *responseStore) sweepLocked(now time.Time) {
for k, v := range s.items {
if now.After(v.ExpiresAt) {
delete(s.items, k)
}
}
}
func cloneAnyMap(in map[string]any) map[string]any {
if in == nil {
return nil
}
out := make(map[string]any, len(in))
for k, v := range in {
out[k] = v
}
return out
}
func (h *Handler) getResponseStore() *responseStore {
if h == nil {
return nil
}
h.responsesMu.Lock()
defer h.responsesMu.Unlock()
if h.responses == nil {
ttl := 15 * time.Minute
if h.Store != nil {
ttl = time.Duration(h.Store.ResponsesStoreTTLSeconds()) * time.Second
}
h.responses = newResponseStore(ttl)
}
return h.responses
}

View File

@@ -0,0 +1,65 @@
package openai
import (
"testing"
"time"
)
func TestNormalizeResponsesInputAsMessagesString(t *testing.T) {
msgs := normalizeResponsesInputAsMessages("hello")
if len(msgs) != 1 {
t.Fatalf("expected one message, got %d", len(msgs))
}
m, _ := msgs[0].(map[string]any)
if m["role"] != "user" || m["content"] != "hello" {
t.Fatalf("unexpected message: %#v", m)
}
}
func TestResponsesMessagesFromRequestWithInstructions(t *testing.T) {
req := map[string]any{
"model": "gpt-4.1",
"input": "ping",
"instructions": "system text",
}
msgs := responsesMessagesFromRequest(req)
if len(msgs) != 2 {
t.Fatalf("expected two messages, got %d", len(msgs))
}
sys, _ := msgs[0].(map[string]any)
if sys["role"] != "system" {
t.Fatalf("unexpected first message: %#v", sys)
}
}
func TestExtractEmbeddingInputs(t *testing.T) {
got := extractEmbeddingInputs([]any{"a", "b"})
if len(got) != 2 || got[0] != "a" || got[1] != "b" {
t.Fatalf("unexpected inputs: %#v", got)
}
}
func TestDeterministicEmbeddingStable(t *testing.T) {
a := deterministicEmbedding("hello")
b := deterministicEmbedding("hello")
if len(a) != 64 || len(b) != 64 {
t.Fatalf("expected 64 dims, got %d and %d", len(a), len(b))
}
for i := range a {
if a[i] != b[i] {
t.Fatalf("expected stable embedding at %d: %v != %v", i, a[i], b[i])
}
}
}
func TestResponseStorePutGet(t *testing.T) {
st := newResponseStore(100 * time.Millisecond)
st.put("resp_1", map[string]any{"id": "resp_1"})
got, ok := st.get("resp_1")
if !ok {
t.Fatal("expected stored response")
}
if got["id"] != "resp_1" {
t.Fatalf("unexpected response payload: %#v", got)
}
}

View File

@@ -0,0 +1,407 @@
package openai
import (
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/go-chi/chi/v5"
"github.com/google/uuid"
"ds2api/internal/auth"
"ds2api/internal/config"
"ds2api/internal/sse"
"ds2api/internal/util"
)
func (h *Handler) GetResponseByID(w http.ResponseWriter, r *http.Request) {
id := strings.TrimSpace(chi.URLParam(r, "response_id"))
if id == "" {
writeOpenAIError(w, http.StatusBadRequest, "response_id is required.")
return
}
st := h.getResponseStore()
item, ok := st.get(id)
if !ok {
writeOpenAIError(w, http.StatusNotFound, "Response not found.")
return
}
writeJSON(w, http.StatusOK, item)
}
func (h *Handler) Responses(w http.ResponseWriter, r *http.Request) {
a, err := h.Auth.Determine(r)
if err != nil {
status := http.StatusUnauthorized
detail := err.Error()
if err == auth.ErrNoAccount {
status = http.StatusTooManyRequests
}
writeOpenAIError(w, status, detail)
return
}
defer h.Auth.Release(a)
r = r.WithContext(auth.WithAuth(r.Context(), a))
var req map[string]any
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeOpenAIError(w, http.StatusBadRequest, "invalid json")
return
}
model, _ := req["model"].(string)
model = strings.TrimSpace(model)
if model == "" {
writeOpenAIError(w, http.StatusBadRequest, "Request must include 'model'.")
return
}
resolvedModel, ok := config.ResolveModel(h.Store, model)
if !ok {
writeOpenAIError(w, http.StatusBadRequest, fmt.Sprintf("Model '%s' is not available.", model))
return
}
thinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
messagesRaw := responsesMessagesFromRequest(req)
if len(messagesRaw) == 0 {
writeOpenAIError(w, http.StatusBadRequest, "Request must include 'input' or 'messages'.")
return
}
finalPrompt, toolNames := buildOpenAIFinalPrompt(messagesRaw, req["tools"])
sessionID, err := h.DS.CreateSession(r.Context(), a, 3)
if err != nil {
if a.UseConfigToken {
writeOpenAIError(w, http.StatusUnauthorized, "Account token is invalid. Please re-login the account in admin.")
} else {
writeOpenAIError(w, http.StatusUnauthorized, "Invalid token. If this should be a DS2API key, add it to config.keys first.")
}
return
}
pow, err := h.DS.GetPow(r.Context(), a, 3)
if err != nil {
writeOpenAIError(w, http.StatusUnauthorized, "Failed to get PoW (invalid token or unknown error).")
return
}
payload := map[string]any{
"chat_session_id": sessionID,
"parent_message_id": nil,
"prompt": finalPrompt,
"ref_file_ids": []any{},
"thinking_enabled": thinkingEnabled,
"search_enabled": searchEnabled,
}
applyOpenAIChatPassThrough(req, payload)
resp, err := h.DS.CallCompletion(r.Context(), a, payload, pow, 3)
if err != nil {
writeOpenAIError(w, http.StatusInternalServerError, "Failed to get completion.")
return
}
responseID := "resp_" + strings.ReplaceAll(uuid.NewString(), "-", "")
if util.ToBool(req["stream"]) {
h.handleResponsesStream(w, r, resp, responseID, model, finalPrompt, thinkingEnabled, searchEnabled, toolNames)
return
}
h.handleResponsesNonStream(w, resp, responseID, model, finalPrompt, thinkingEnabled, toolNames)
}
func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Response, responseID, model, finalPrompt string, thinkingEnabled bool, toolNames []string) {
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
writeOpenAIError(w, resp.StatusCode, strings.TrimSpace(string(body)))
return
}
result := sse.CollectStream(resp, thinkingEnabled, true)
responseObj := buildResponseObject(responseID, model, finalPrompt, result.Thinking, result.Text, toolNames)
h.getResponseStore().put(responseID, responseObj)
writeJSON(w, http.StatusOK, responseObj)
}
func (h *Handler) handleResponsesStream(w http.ResponseWriter, r *http.Request, resp *http.Response, responseID, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string) {
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
writeOpenAIError(w, resp.StatusCode, strings.TrimSpace(string(body)))
return
}
w.Header().Set("Content-Type", "text/event-stream")
w.Header().Set("Cache-Control", "no-cache, no-transform")
w.Header().Set("Connection", "keep-alive")
w.Header().Set("X-Accel-Buffering", "no")
rc := http.NewResponseController(w)
canFlush := rc.Flush() == nil
sendEvent := func(event string, payload map[string]any) {
b, _ := json.Marshal(payload)
_, _ = w.Write([]byte("event: " + event + "\n"))
_, _ = w.Write([]byte("data: "))
_, _ = w.Write(b)
_, _ = w.Write([]byte("\n\n"))
if canFlush {
_ = rc.Flush()
}
}
sendEvent("response.created", map[string]any{
"type": "response.created",
"id": responseID,
"object": "response",
"model": model,
"status": "in_progress",
})
initialType := "text"
if thinkingEnabled {
initialType = "thinking"
}
parsedLines, done := sse.StartParsedLinePump(r.Context(), resp.Body, thinkingEnabled, initialType)
bufferToolContent := len(toolNames) > 0
var sieve toolStreamSieveState
thinking := strings.Builder{}
text := strings.Builder{}
toolCallsEmitted := false
streamToolCallIDs := map[int]string{}
finalize := func() {
finalThinking := thinking.String()
finalText := text.String()
if bufferToolContent {
for _, evt := range flushToolSieve(&sieve, toolNames) {
if evt.Content != "" {
finalText += evt.Content
sendEvent("response.output_text.delta", map[string]any{
"type": "response.output_text.delta",
"id": responseID,
"delta": evt.Content,
})
}
if len(evt.ToolCalls) > 0 {
toolCallsEmitted = true
sendEvent("response.output_tool_call.done", map[string]any{
"type": "response.output_tool_call.done",
"id": responseID,
"tool_calls": util.FormatOpenAIStreamToolCalls(evt.ToolCalls),
})
}
}
}
obj := buildResponseObject(responseID, model, finalPrompt, finalThinking, finalText, toolNames)
if toolCallsEmitted {
obj["status"] = "completed"
}
h.getResponseStore().put(responseID, obj)
sendEvent("response.completed", map[string]any{
"type": "response.completed",
"response": obj,
})
_, _ = w.Write([]byte("data: [DONE]\n\n"))
if canFlush {
_ = rc.Flush()
}
}
for {
select {
case <-r.Context().Done():
return
case parsed, ok := <-parsedLines:
if !ok {
_ = <-done
finalize()
return
}
if !parsed.Parsed {
continue
}
if parsed.ContentFilter || parsed.ErrorMessage != "" || parsed.Stop {
finalize()
return
}
for _, p := range parsed.Parts {
if p.Text == "" {
continue
}
if p.Type != "thinking" && searchEnabled && sse.IsCitation(p.Text) {
continue
}
if p.Type == "thinking" {
if !thinkingEnabled {
continue
}
thinking.WriteString(p.Text)
sendEvent("response.reasoning.delta", map[string]any{
"type": "response.reasoning.delta",
"id": responseID,
"delta": p.Text,
})
continue
}
text.WriteString(p.Text)
if !bufferToolContent {
sendEvent("response.output_text.delta", map[string]any{
"type": "response.output_text.delta",
"id": responseID,
"delta": p.Text,
})
continue
}
for _, evt := range processToolSieveChunk(&sieve, p.Text, toolNames) {
if evt.Content != "" {
sendEvent("response.output_text.delta", map[string]any{
"type": "response.output_text.delta",
"id": responseID,
"delta": evt.Content,
})
}
if len(evt.ToolCallDeltas) > 0 {
toolCallsEmitted = true
sendEvent("response.output_tool_call.delta", map[string]any{
"type": "response.output_tool_call.delta",
"id": responseID,
"tool_calls": formatIncrementalStreamToolCallDeltas(evt.ToolCallDeltas, streamToolCallIDs),
})
}
if len(evt.ToolCalls) > 0 {
toolCallsEmitted = true
sendEvent("response.output_tool_call.done", map[string]any{
"type": "response.output_tool_call.done",
"id": responseID,
"tool_calls": util.FormatOpenAIStreamToolCalls(evt.ToolCalls),
})
}
}
}
}
}
}
func buildResponseObject(responseID, model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any {
detected := util.ParseToolCalls(finalText, toolNames)
output := make([]any, 0, 2)
if len(detected) > 0 {
toolCalls := make([]any, 0, len(detected))
for _, tc := range detected {
toolCalls = append(toolCalls, map[string]any{
"type": "tool_call",
"name": tc.Name,
"arguments": tc.Input,
})
}
output = append(output, map[string]any{
"type": "tool_calls",
"tool_calls": toolCalls,
})
} else {
content := []any{
map[string]any{
"type": "output_text",
"text": finalText,
},
}
if finalThinking != "" {
content = append([]any{map[string]any{
"type": "reasoning",
"text": finalThinking,
}}, content...)
}
output = append(output, map[string]any{
"type": "message",
"id": "msg_" + strings.ReplaceAll(uuid.NewString(), "-", ""),
"role": "assistant",
"content": content,
})
}
promptTokens := util.EstimateTokens(finalPrompt)
reasoningTokens := util.EstimateTokens(finalThinking)
completionTokens := util.EstimateTokens(finalText)
return map[string]any{
"id": responseID,
"type": "response",
"object": "response",
"created_at": time.Now().Unix(),
"status": "completed",
"model": model,
"output": output,
"output_text": finalText,
"usage": map[string]any{
"input_tokens": promptTokens,
"output_tokens": reasoningTokens + completionTokens,
"total_tokens": promptTokens + reasoningTokens + completionTokens,
},
}
}
func responsesMessagesFromRequest(req map[string]any) []any {
if msgs, ok := req["messages"].([]any); ok && len(msgs) > 0 {
return prependInstructionMessage(msgs, req["instructions"])
}
if rawInput, ok := req["input"]; ok {
if msgs := normalizeResponsesInputAsMessages(rawInput); len(msgs) > 0 {
return prependInstructionMessage(msgs, req["instructions"])
}
}
return nil
}
func prependInstructionMessage(messages []any, instructions any) []any {
sys, _ := instructions.(string)
sys = strings.TrimSpace(sys)
if sys == "" {
return messages
}
out := make([]any, 0, len(messages)+1)
out = append(out, map[string]any{"role": "system", "content": sys})
out = append(out, messages...)
return out
}
func normalizeResponsesInputAsMessages(input any) []any {
switch v := input.(type) {
case string:
if strings.TrimSpace(v) == "" {
return nil
}
return []any{map[string]any{"role": "user", "content": v}}
case []any:
if len(v) == 0 {
return nil
}
// If caller already provides role-shaped items, keep as-is.
if first, ok := v[0].(map[string]any); ok {
if _, hasRole := first["role"]; hasRole {
return v
}
}
parts := make([]string, 0, len(v))
for _, item := range v {
if m, ok := item.(map[string]any); ok {
if t, _ := m["type"].(string); strings.EqualFold(strings.TrimSpace(t), "input_text") {
if txt, _ := m["text"].(string); strings.TrimSpace(txt) != "" {
parts = append(parts, txt)
continue
}
}
}
if s := strings.TrimSpace(fmt.Sprintf("%v", item)); s != "" {
parts = append(parts, s)
}
}
if len(parts) == 0 {
return nil
}
return []any{map[string]any{"role": "user", "content": strings.Join(parts, "\n")}}
case map[string]any:
if txt, _ := v["text"].(string); strings.TrimSpace(txt) != "" {
return []any{map[string]any{"role": "user", "content": txt}}
}
if content, ok := v["content"].(string); ok && strings.TrimSpace(content) != "" {
return []any{map[string]any{"role": "user", "content": content}}
}
}
return nil
}

View File

@@ -7,17 +7,16 @@ import (
)
type toolStreamSieveState struct {
pending strings.Builder
capture strings.Builder
capturing bool
hasMeaningfulText bool
recentTextTail string
toolNameSent bool
toolName string
toolArgsStart int
toolArgsSent int
toolArgsString bool
toolArgsDone bool
pending strings.Builder
capture strings.Builder
capturing bool
recentTextTail string
toolNameSent bool
toolName string
toolArgsStart int
toolArgsSent int
toolArgsString bool
toolArgsDone bool
}
type toolStreamEvent struct {
@@ -197,14 +196,22 @@ func findToolSegmentStart(s string) int {
return -1
}
lower := strings.ToLower(s)
keyIdx := strings.Index(lower, "tool_calls")
if keyIdx < 0 {
return -1
offset := 0
for {
keyRel := strings.Index(lower[offset:], "tool_calls")
if keyRel < 0 {
return -1
}
keyIdx := offset + keyRel
start := strings.LastIndex(s[:keyIdx], "{")
if start < 0 {
start = keyIdx
}
if !insideCodeFence(s[:start]) {
return start
}
offset = keyIdx + len("tool_calls")
}
if start := strings.LastIndex(s[:keyIdx], "{"); start >= 0 {
return start
}
return keyIdx
}
func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix string, calls []util.ParsedToolCall, suffix string, ready bool) {
@@ -227,7 +234,7 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix
}
prefixPart := captured[:start]
suffixPart := captured[end:]
if !state.toolNameSent && (strings.TrimSpace(prefixPart) != "" || looksLikeToolExampleContext(state.recentTextTail) || looksLikeToolExampleContext(suffixPart)) {
if insideCodeFence(state.recentTextTail + prefixPart) {
return captured, nil, "", true
}
parsed := util.ParseStandaloneToolCalls(obj, toolNames)
@@ -293,16 +300,16 @@ func buildIncrementalToolDeltas(state *toolStreamSieveState) []toolCallDelta {
if captured == "" {
return nil
}
if looksLikeToolExampleContext(state.recentTextTail) {
return nil
}
lower := strings.ToLower(captured)
keyIdx := strings.Index(lower, "tool_calls")
if keyIdx < 0 {
return nil
}
start := strings.LastIndex(captured[:keyIdx], "{")
if start < 0 || strings.TrimSpace(captured[:start]) != "" {
if start < 0 {
return nil
}
if insideCodeFence(state.recentTextTail + captured[:start]) {
return nil
}
callStart, ok := findFirstToolCallObjectStart(captured, keyIdx)
@@ -612,7 +619,6 @@ func (s *toolStreamSieveState) noteText(content string) {
if strings.TrimSpace(content) == "" {
return
}
s.hasMeaningfulText = true
s.recentTextTail = appendTail(s.recentTextTail, content, toolSieveContextTailLimit)
}
@@ -628,25 +634,12 @@ func appendTail(prev, next string, max int) string {
}
func looksLikeToolExampleContext(text string) bool {
t := strings.ToLower(strings.TrimSpace(text))
if t == "" {
return insideCodeFence(text)
}
func insideCodeFence(text string) bool {
if text == "" {
return false
}
cues := []string{
"示例",
"例子",
"for example",
"example",
"demo",
"请勿执行",
"不要执行",
"do not execute",
"```",
}
for _, cue := range cues {
if strings.Contains(t, cue) {
return true
}
}
return false
return strings.Count(text, "```")%2 == 1
}

View File

@@ -62,11 +62,16 @@ func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Reque
writeOpenAIError(w, http.StatusBadRequest, "Request must include 'model' and 'messages'.")
return
}
thinkingEnabled, searchEnabled, ok := config.GetModelConfig(model)
resolvedModel, ok := config.ResolveModel(h.Store, model)
if !ok {
writeOpenAIError(w, http.StatusServiceUnavailable, fmt.Sprintf("Model '%s' is not available.", model))
writeOpenAIError(w, http.StatusBadRequest, fmt.Sprintf("Model '%s' is not available.", model))
return
}
thinkingEnabled, searchEnabled, _ := config.GetModelConfig(resolvedModel)
responseModel := strings.TrimSpace(model)
if responseModel == "" {
responseModel = resolvedModel
}
finalPrompt, _ := buildOpenAIFinalPrompt(messagesRaw, req["tools"])
@@ -97,6 +102,7 @@ func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Reque
"thinking_enabled": thinkingEnabled,
"search_enabled": searchEnabled,
}
applyOpenAIChatPassThrough(req, payload)
leaseID := h.holdStreamLease(a)
if leaseID == "" {
writeOpenAIError(w, http.StatusInternalServerError, "failed to create stream lease")
@@ -106,7 +112,7 @@ func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Reque
writeJSON(w, http.StatusOK, map[string]any{
"session_id": sessionID,
"lease_id": leaseID,
"model": model,
"model": responseModel,
"final_prompt": finalPrompt,
"thinking_enabled": thinkingEnabled,
"search_enabled": searchEnabled,