From 55abf647179ee5d57f621ad942ebfb9d976cdd98 Mon Sep 17 00:00:00 2001 From: CJACK Date: Sat, 2 May 2026 00:55:17 +0800 Subject: [PATCH] feat: add model type support for file uploads with automatic resolution and header propagation --- docs/prompt-compatibility.md | 8 +++++ internal/deepseek/client/client_upload.go | 8 +++++ .../deepseek/client/client_upload_test.go | 6 ++++ .../httpapi/openai/file_inline_upload_test.go | 13 +++++++-- .../openai/files/file_inline_upload.go | 12 ++++++++ .../httpapi/openai/files/handler_files.go | 29 +++++++++++++++++++ internal/httpapi/openai/files_route_test.go | 14 +++++++-- .../openai/history/current_input_file.go | 6 ++++ internal/httpapi/openai/history_split_test.go | 9 ++++-- .../features/apiTester/ApiTesterContainer.jsx | 1 + webui/src/features/apiTester/ChatPanel.jsx | 8 ++++- 11 files changed, 105 insertions(+), 9 deletions(-) diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md index 58c2c6c..fcc70a5 100644 --- a/docs/prompt-compatibility.md +++ b/docs/prompt-compatibility.md @@ -238,6 +238,14 @@ OpenAI 文件相关实现: - 文件 ID 收集: [internal/promptcompat/file_refs.go](../internal/promptcompat/file_refs.go) +OpenAI 的文件上传现在不再是“只传文件本体”的通用路径,而是会先根据请求里的 `model` 解析出 DeepSeek 的上传类型,并把它透传到上传接口的 `x-model-type`。当前可见的上传类型就是 `default` / `expert` / `vision`,其中 vision 请求上传图片时必须带上 `vision`,否则下游容易退回到仅文本或 OCR 语义。这个模型类型会同时用于: + +- `/v1/files` 这类独立文件上传入口 +- Chat / Responses 的 inline 图片、附件上传 +- current input file 触发时生成的 `DS2API_HISTORY.txt` 上下文文件 + +也就是说,文件上传和完成请求的 `model_type` 现在是一致的:完成 payload 里仍然是 `model_type`,上传文件则会在 DeepSeek 上传阶段携带同样的模型类型信息。 + 结论: - “systemprompt 文字”在 prompt 里 diff --git a/internal/deepseek/client/client_upload.go b/internal/deepseek/client/client_upload.go index 9e95a23..c3334c3 100644 --- a/internal/deepseek/client/client_upload.go +++ b/internal/deepseek/client/client_upload.go @@ -23,6 +23,7 @@ type UploadFileRequest struct { Filename string ContentType string Purpose string + ModelType string Data []byte } @@ -54,6 +55,7 @@ func (c *Client) UploadFile(ctx context.Context, a *auth.RequestAuth, req Upload contentType = "application/octet-stream" } purpose := strings.TrimSpace(req.Purpose) + modelType := strings.ToLower(strings.TrimSpace(req.ModelType)) body, contentTypeHeader, err := buildUploadMultipartBody(filename, contentType, req.Data) if err != nil { return nil, err @@ -64,6 +66,9 @@ func (c *Client) UploadFile(ctx context.Context, a *auth.RequestAuth, req Upload "purpose": purpose, "bytes": len(req.Data), } + if modelType != "" { + capturePayload["model_type"] = modelType + } captureSession := c.capture.Start("deepseek_upload_file", dsprotocol.DeepSeekUploadFileURL, a.AccountID, capturePayload) attempts := 0 refreshed := false @@ -81,6 +86,9 @@ func (c *Client) UploadFile(ctx context.Context, a *auth.RequestAuth, req Upload } headers := c.authHeaders(a.DeepSeekToken) headers["Content-Type"] = contentTypeHeader + if modelType != "" { + headers["x-model-type"] = modelType + } headers["x-ds-pow-response"] = powHeader headers["x-file-size"] = strconv.Itoa(len(req.Data)) headers["x-thinking-enabled"] = "1" diff --git a/internal/deepseek/client/client_upload_test.go b/internal/deepseek/client/client_upload_test.go index 90e11cd..e7d1cc0 100644 --- a/internal/deepseek/client/client_upload_test.go +++ b/internal/deepseek/client/client_upload_test.go @@ -82,6 +82,7 @@ func TestUploadFileUsesUploadTargetPowAndMultipartHeaders(t *testing.T) { var seenTargetPath string var seenContentType string var seenFileSize string + var seenModelType string var seenBody string call := 0 client := &Client{ @@ -96,6 +97,7 @@ func TestUploadFileUsesUploadTargetPowAndMultipartHeaders(t *testing.T) { seenPow = req.Header.Get("x-ds-pow-response") seenContentType = req.Header.Get("Content-Type") seenFileSize = req.Header.Get("x-file-size") + seenModelType = req.Header.Get("x-model-type") seenBody = string(bodyBytes) return &http.Response{StatusCode: http.StatusOK, Header: make(http.Header), Body: io.NopCloser(strings.NewReader(uploadResponse)), Request: req}, nil default: @@ -112,6 +114,7 @@ func TestUploadFileUsesUploadTargetPowAndMultipartHeaders(t *testing.T) { Filename: "demo.txt", ContentType: "text/plain", Purpose: "assistants", + ModelType: "vision", Data: []byte("hello"), }, 1) if err != nil { @@ -140,6 +143,9 @@ func TestUploadFileUsesUploadTargetPowAndMultipartHeaders(t *testing.T) { if seenFileSize != "5" { t.Fatalf("expected x-file-size=5, got %q", seenFileSize) } + if seenModelType != "vision" { + t.Fatalf("expected x-model-type=vision, got %q", seenModelType) + } if !strings.HasPrefix(seenContentType, "multipart/form-data; boundary=") { t.Fatalf("expected multipart content type, got %q", seenContentType) } diff --git a/internal/httpapi/openai/file_inline_upload_test.go b/internal/httpapi/openai/file_inline_upload_test.go index fa399b8..8194aeb 100644 --- a/internal/httpapi/openai/file_inline_upload_test.go +++ b/internal/httpapi/openai/file_inline_upload_test.go @@ -94,6 +94,9 @@ func TestPreprocessInlineFileInputsReplacesDataURLAndCollectsRefFileIDs(t *testi if len(ds.uploadCalls) != 1 { t.Fatalf("expected 1 upload, got %d", len(ds.uploadCalls)) } + if ds.uploadCalls[0].ModelType != "default" { + t.Fatalf("expected default model type when request omits model, got %q", ds.uploadCalls[0].ModelType) + } if ds.lastCtx != ctx { t.Fatalf("expected upload to use request context") } @@ -149,7 +152,7 @@ func TestPreprocessInlineFileInputsDeduplicatesIdenticalPayloads(t *testing.T) { func TestChatCompletionsUploadsInlineFilesBeforeCompletion(t *testing.T) { ds := &inlineUploadDSStub{} h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds} - reqBody := `{"model":"deepseek-v4-flash","messages":[{"role":"user","content":[{"type":"input_text","text":"hi"},{"type":"image_url","image_url":{"url":"data:image/png;base64,QUJDRA=="}}]}],"stream":false}` + reqBody := `{"model":"deepseek-v4-vision","messages":[{"role":"user","content":[{"type":"input_text","text":"hi"},{"type":"image_url","image_url":{"url":"data:image/png;base64,QUJDRA=="}}]}],"stream":false}` req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(reqBody)) req.Header.Set("Authorization", "Bearer direct-token") req.Header.Set("Content-Type", "application/json") @@ -163,6 +166,9 @@ func TestChatCompletionsUploadsInlineFilesBeforeCompletion(t *testing.T) { if len(ds.uploadCalls) != 1 { t.Fatalf("expected 1 upload call, got %d", len(ds.uploadCalls)) } + if ds.uploadCalls[0].ModelType != "vision" { + t.Fatalf("expected vision model type for vision request, got %q", ds.uploadCalls[0].ModelType) + } if ds.completionReq == nil { t.Fatal("expected completion payload to be captured") } @@ -177,7 +183,7 @@ func TestResponsesUploadsInlineFilesBeforeCompletion(t *testing.T) { h := &openAITestSurface{Store: mockOpenAIConfig{wideInput: true}, Auth: streamStatusAuthStub{}, DS: ds} r := chi.NewRouter() registerOpenAITestRoutes(r, h) - reqBody := `{"model":"deepseek-v4-flash","input":[{"role":"user","content":[{"type":"input_text","text":"hi"},{"type":"input_image","image_url":{"url":"data:image/png;base64,QUJDRA=="}}]}],"stream":false}` + reqBody := `{"model":"deepseek-v4-pro","input":[{"role":"user","content":[{"type":"input_text","text":"hi"},{"type":"input_image","image_url":{"url":"data:image/png;base64,QUJDRA=="}}]}],"stream":false}` req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(reqBody)) req.Header.Set("Authorization", "Bearer direct-token") req.Header.Set("Content-Type", "application/json") @@ -191,6 +197,9 @@ func TestResponsesUploadsInlineFilesBeforeCompletion(t *testing.T) { if len(ds.uploadCalls) != 1 { t.Fatalf("expected 1 upload call, got %d", len(ds.uploadCalls)) } + if ds.uploadCalls[0].ModelType != "expert" { + t.Fatalf("expected expert model type for pro request, got %q", ds.uploadCalls[0].ModelType) + } refIDs, _ := ds.completionReq["ref_file_ids"].([]any) if len(refIDs) != 1 || refIDs[0] != "file-inline-1" { t.Fatalf("unexpected completion ref_file_ids: %#v", ds.completionReq["ref_file_ids"]) diff --git a/internal/httpapi/openai/files/file_inline_upload.go b/internal/httpapi/openai/files/file_inline_upload.go index a16fe52..bb3ddce 100644 --- a/internal/httpapi/openai/files/file_inline_upload.go +++ b/internal/httpapi/openai/files/file_inline_upload.go @@ -12,6 +12,7 @@ import ( "strings" "ds2api/internal/auth" + "ds2api/internal/config" dsclient "ds2api/internal/deepseek/client" "ds2api/internal/httpapi/openai/shared" "ds2api/internal/promptcompat" @@ -42,6 +43,7 @@ type inlineUploadState struct { ctx context.Context handler *Handler auth *auth.RequestAuth + modelType string uploadedByID map[string]string uploadCount int inlineFileBytes int @@ -58,10 +60,19 @@ func (h *Handler) PreprocessInlineFileInputs(ctx context.Context, a *auth.Reques if h == nil || h.DS == nil || len(req) == 0 { return nil } + modelType := "default" + if requestedModel, ok := req["model"].(string); ok { + if resolvedModel, ok := config.ResolveModel(h.Store, requestedModel); ok { + if resolvedType, ok := config.GetModelType(resolvedModel); ok { + modelType = resolvedType + } + } + } state := &inlineUploadState{ ctx: ctx, handler: h, auth: a, + modelType: modelType, uploadedByID: map[string]string{}, } for _, key := range []string{"messages", "input", "attachments"} { @@ -174,6 +185,7 @@ func (s *inlineUploadState) uploadInlineFile(file inlineDecodedFile) (string, er result, err := s.handler.DS.UploadFile(s.ctx, s.auth, dsclient.UploadFileRequest{ Filename: file.Filename, ContentType: contentType, + ModelType: s.modelType, Data: file.Data, }, 3) if err != nil { diff --git a/internal/httpapi/openai/files/handler_files.go b/internal/httpapi/openai/files/handler_files.go index edfb653..5365409 100644 --- a/internal/httpapi/openai/files/handler_files.go +++ b/internal/httpapi/openai/files/handler_files.go @@ -8,6 +8,7 @@ import ( "ds2api/internal/auth" "ds2api/internal/chathistory" + "ds2api/internal/config" dsclient "ds2api/internal/deepseek/client" "ds2api/internal/httpapi/openai/shared" ) @@ -66,10 +67,12 @@ func (h *Handler) UploadFile(w http.ResponseWriter, r *http.Request) { if contentType == "" && len(data) > 0 { contentType = http.DetectContentType(data) } + modelType := resolveUploadModelType(h.Store, r) result, err := h.DS.UploadFile(r.Context(), a, dsclient.UploadFileRequest{ Filename: header.Filename, ContentType: contentType, Purpose: strings.TrimSpace(r.FormValue("purpose")), + ModelType: modelType, Data: data, }, 3) if err != nil { @@ -82,6 +85,32 @@ func (h *Handler) UploadFile(w http.ResponseWriter, r *http.Request) { shared.WriteJSON(w, http.StatusOK, buildOpenAIFileObject(result)) } +func resolveUploadModelType(store shared.ConfigReader, r *http.Request) string { + for _, candidate := range []string{r.FormValue("model_type"), r.Header.Get("X-Model-Type")} { + if modelType := normalizeUploadModelType(candidate); modelType != "" { + return modelType + } + } + requestedModel := strings.TrimSpace(r.FormValue("model")) + if requestedModel != "" { + if resolvedModel, ok := config.ResolveModel(store, requestedModel); ok { + if modelType, ok := config.GetModelType(resolvedModel); ok { + return modelType + } + } + } + return "default" +} + +func normalizeUploadModelType(raw string) string { + switch strings.ToLower(strings.TrimSpace(raw)) { + case "default", "expert", "vision": + return strings.ToLower(strings.TrimSpace(raw)) + default: + return "" + } +} + func buildOpenAIFileObject(result *dsclient.UploadFileResult) map[string]any { if result == nil { obj := map[string]any{ diff --git a/internal/httpapi/openai/files_route_test.go b/internal/httpapi/openai/files_route_test.go index 2b9c205..f365dc3 100644 --- a/internal/httpapi/openai/files_route_test.go +++ b/internal/httpapi/openai/files_route_test.go @@ -77,7 +77,7 @@ func (m *filesRouteDSStub) DeleteAllSessionsForToken(_ context.Context, _ string return nil } -func newMultipartUploadRequest(t *testing.T, purpose string, filename string, data []byte) *http.Request { +func newMultipartUploadRequest(t *testing.T, purpose string, filename string, data []byte, model string) *http.Request { t.Helper() var body bytes.Buffer writer := multipart.NewWriter(&body) @@ -86,6 +86,11 @@ func newMultipartUploadRequest(t *testing.T, purpose string, filename string, da t.Fatalf("write purpose failed: %v", err) } } + if model != "" { + if err := writer.WriteField("model", model); err != nil { + t.Fatalf("write model failed: %v", err) + } + } part, err := writer.CreateFormFile("file", filename) if err != nil { t.Fatalf("create form file failed: %v", err) @@ -108,7 +113,7 @@ func TestFilesRouteUploadSuccess(t *testing.T) { r := chi.NewRouter() registerOpenAITestRoutes(r, h) - req := newMultipartUploadRequest(t, "assistants", "notes.txt", []byte("hello world")) + req := newMultipartUploadRequest(t, "assistants", "notes.txt", []byte("hello world"), "deepseek-v4-vision") rec := httptest.NewRecorder() r.ServeHTTP(rec, req) @@ -121,6 +126,9 @@ func TestFilesRouteUploadSuccess(t *testing.T) { if ds.lastReq.Purpose != "assistants" { t.Fatalf("expected purpose assistants, got %q", ds.lastReq.Purpose) } + if ds.lastReq.ModelType != "vision" { + t.Fatalf("expected vision model type, got %q", ds.lastReq.ModelType) + } if string(ds.lastReq.Data) != "hello world" { t.Fatalf("unexpected uploaded data: %q", string(ds.lastReq.Data)) } @@ -145,7 +153,7 @@ func TestFilesRouteUploadIncludesAccountIDForManagedAccount(t *testing.T) { r := chi.NewRouter() registerOpenAITestRoutes(r, h) - req := newMultipartUploadRequest(t, "assistants", "notes.txt", []byte("hello world")) + req := newMultipartUploadRequest(t, "assistants", "notes.txt", []byte("hello world"), "deepseek-v4-vision") rec := httptest.NewRecorder() r.ServeHTTP(rec, req) diff --git a/internal/httpapi/openai/history/current_input_file.go b/internal/httpapi/openai/history/current_input_file.go index 648331c..1763276 100644 --- a/internal/httpapi/openai/history/current_input_file.go +++ b/internal/httpapi/openai/history/current_input_file.go @@ -7,6 +7,7 @@ import ( "strings" "ds2api/internal/auth" + "ds2api/internal/config" dsclient "ds2api/internal/deepseek/client" "ds2api/internal/httpapi/openai/shared" "ds2api/internal/promptcompat" @@ -35,10 +36,15 @@ func (s Service) ApplyCurrentInputFile(ctx context.Context, a *auth.RequestAuth, if strings.TrimSpace(fileText) == "" { return stdReq, errors.New("current user input file produced empty transcript") } + modelType := "default" + if resolvedType, ok := config.GetModelType(stdReq.ResolvedModel); ok { + modelType = resolvedType + } result, err := s.DS.UploadFile(ctx, a, dsclient.UploadFileRequest{ Filename: currentInputFilename, ContentType: currentInputContentType, Purpose: currentInputPurpose, + ModelType: modelType, Data: []byte(fileText), }, 3) if err != nil { diff --git a/internal/httpapi/openai/history_split_test.go b/internal/httpapi/openai/history_split_test.go index d223689..9e5bdd9 100644 --- a/internal/httpapi/openai/history_split_test.go +++ b/internal/httpapi/openai/history_split_test.go @@ -227,7 +227,7 @@ func TestApplyCurrentInputFileDisabledPassThrough(t *testing.T) { DS: ds, } req := map[string]any{ - "model": "deepseek-v4-flash", + "model": "deepseek-v4-vision", "messages": historySplitTestMessages(), } stdReq, err := promptcompat.NormalizeOpenAIChatRequest(h.Store, req, "") @@ -332,7 +332,7 @@ func TestApplyCurrentInputFilePreservesFullContextPromptForTokenCounting(t *test DS: ds, } req := map[string]any{ - "model": "deepseek-v4-flash", + "model": "deepseek-v4-vision", "messages": historySplitTestMessages(), } stdReq, err := promptcompat.NormalizeOpenAIChatRequest(h.Store, req, "") @@ -378,7 +378,7 @@ func TestApplyCurrentInputFileUploadsFullContextFile(t *testing.T) { DS: ds, } req := map[string]any{ - "model": "deepseek-v4-flash", + "model": "deepseek-v4-vision", "messages": historySplitTestMessages(), } stdReq, err := promptcompat.NormalizeOpenAIChatRequest(h.Store, req, "") @@ -400,6 +400,9 @@ func TestApplyCurrentInputFileUploadsFullContextFile(t *testing.T) { if upload.Filename != "DS2API_HISTORY.txt" { t.Fatalf("expected DS2API_HISTORY.txt upload, got %q", upload.Filename) } + if upload.ModelType != "vision" { + t.Fatalf("expected vision model type for vision request, got %q", upload.ModelType) + } uploadedText := string(upload.Data) for _, want := range []string{"# DS2API_HISTORY.txt", "=== 1. SYSTEM ===", "=== 2. USER ===", "=== 3. ASSISTANT ===", "=== 4. TOOL ===", "=== 5. USER ===", "system instructions", "first user turn", "hidden reasoning", "tool result", "latest user turn", promptcompat.ThinkingInjectionMarker} { if !strings.Contains(uploadedText, want) { diff --git a/webui/src/features/apiTester/ApiTesterContainer.jsx b/webui/src/features/apiTester/ApiTesterContainer.jsx index dabd049..dce018e 100644 --- a/webui/src/features/apiTester/ApiTesterContainer.jsx +++ b/webui/src/features/apiTester/ApiTesterContainer.jsx @@ -217,6 +217,7 @@ export default function ApiTesterContainer({ config, onMessage, authFetch }) { setSelectedAccount={setSelectedAccount} effectiveKey={effectiveKey} selectedAccount={selectedAccount} + model={model} onMessage={onMessage} response={response} isStreaming={isStreaming} diff --git a/webui/src/features/apiTester/ChatPanel.jsx b/webui/src/features/apiTester/ChatPanel.jsx index 32b160e..e4d1428 100644 --- a/webui/src/features/apiTester/ChatPanel.jsx +++ b/webui/src/features/apiTester/ChatPanel.jsx @@ -13,6 +13,7 @@ export default function ChatPanel({ setSelectedAccount, effectiveKey, selectedAccount, + model, onMessage, response, isStreaming, @@ -37,11 +38,15 @@ export default function ChatPanel({ setUploadingFiles(true) const initialSelectedAccount = String(selectedAccount || '').trim() + const selectedModel = String(model || '').trim() let boundAccount = initialSelectedAccount for (const file of files) { const formData = new FormData() formData.append('file', file) formData.append('purpose', 'assistants') + if (selectedModel) { + formData.append('model', selectedModel) + } const headers = { 'Authorization': `Bearer ${effectiveKey}`, @@ -181,8 +186,9 @@ export default function ChatPanel({ />