mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-05 08:55:28 +08:00
feat: add full-context token regression coverage and docs
Lock in the current_input_file regression with API-level tests and document that returned context token counts now track full prompt semantics with conservative sizing.
This commit is contained in:
@@ -98,6 +98,7 @@ DS2API 当前的核心思路,不是把客户端传来的 `messages`、`tools`
|
||||
- `prompt` 才是对话上下文主载体。
|
||||
- `ref_file_ids` 只承载文件引用,不承载普通文本消息。
|
||||
- `tools` 不会作为“原生工具 schema”直接下发给下游,而是被改写进 `prompt`。
|
||||
- 对外返回给客户端的 `prompt_tokens` / `input_tokens` / `promptTokenCount` 不再按“最后一条消息”或字符粗估近似返回,而是基于**完整上下文 prompt**做 tokenizer 计数;为了避免上下文实际超限但客户端误以为还能塞下,请求侧上下文 token 会额外保守上浮一点,宁可略大也不低估。
|
||||
- 当前 `/v1/chat/completions` 业务路径仍是“每次请求新建一个远端 `chat_session_id`,并默认发送 `parent_message_id: null`”;因此 DS2API 对外默认表现为“新会话 + prompt 拼历史”,而不是复用 DeepSeek 原生会话树。
|
||||
- 但 DeepSeek 远端本身支持同一 `chat_session_id` 的跨轮次持续对话。2026-04-27 已用项目内现有 DeepSeek client 做过一次不改业务代码的双轮实测:同一 `chat_session_id` 下,第 1 轮返回 `request_message_id=1` / `response_message_id=2` / 文本 `SESSION_TEST_ONE`;第 2 轮重新获取一次 PoW,并发送 `parent_message_id=2` 后,成功返回 `request_message_id=3` / `response_message_id=4` / 文本 `SESSION_TEST_TWO`。这说明“同远端会话持续聊天”能力存在,且每轮需要携带正确的 parent/message 链接信息,同时重新获取对应轮次可用的 PoW。
|
||||
- OpenAI Chat / Responses 原生走统一 OpenAI 标准化与 DeepSeek payload 组装;Claude / Gemini 会尽量复用 OpenAI prompt/tool 语义,其中 Gemini 直接复用 `promptcompat.BuildOpenAIPromptForAdapter`,Claude 消息接口在可代理场景会转换为 OpenAI chat 形态再执行。
|
||||
@@ -247,6 +248,7 @@ OpenAI 文件相关实现:
|
||||
- `current_input_file` 默认开启;它用于把“完整上下文”合并进隐藏上下文文件。当最新 user turn 的纯文本长度达到 `current_input_file.min_chars`(默认 `0`)时,兼容层会上传一个文件名为 `IGNORE.txt` 的上下文文件,并在 live prompt 中只保留一个中性的 user 消息要求模型直接回答最新请求,不再暴露文件名或要求模型读取本地文件。
|
||||
- 如果 `current_input_file.enabled=false`,请求会直接透传,不上传任何拆分上下文文件。
|
||||
- 旧的 `history_split.enabled` / `history_split.trigger_after_turns` 会被读取进配置对象以保持兼容,但不会触发拆分上传,也不会影响 `current_input_file` 的默认开启。
|
||||
- 即使触发 `current_input_file` 后 live prompt 被缩短,对客户端回包里的上下文 token 统计,仍会沿用**拆分前的完整 prompt 语义**做计数,而不是按缩短后的占位 prompt 计算;否则会把真实上下文显著算小。
|
||||
|
||||
相关实现:
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"ds2api/internal/auth"
|
||||
dsclient "ds2api/internal/deepseek/client"
|
||||
"ds2api/internal/promptcompat"
|
||||
"ds2api/internal/util"
|
||||
)
|
||||
|
||||
func historySplitTestMessages() []any {
|
||||
@@ -302,6 +303,48 @@ func TestApplyCurrentInputFileUploadsFirstTurnWithInjectedWrapper(t *testing.T)
|
||||
if len(out.RefFileIDs) != 1 || out.RefFileIDs[0] != "file-inline-1" {
|
||||
t.Fatalf("expected current input file id in ref_file_ids, got %#v", out.RefFileIDs)
|
||||
}
|
||||
if !strings.Contains(out.PromptTokenText, "first turn content that is long enough") {
|
||||
t.Fatalf("expected prompt token text to preserve original full context, got %q", out.PromptTokenText)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyCurrentInputFilePreservesFullContextPromptForTokenCounting(t *testing.T) {
|
||||
ds := &inlineUploadDSStub{}
|
||||
h := &openAITestSurface{
|
||||
Store: mockOpenAIConfig{
|
||||
wideInput: true,
|
||||
currentInputEnabled: true,
|
||||
currentInputMin: 0,
|
||||
thinkingInjection: boolPtr(true),
|
||||
},
|
||||
DS: ds,
|
||||
}
|
||||
req := map[string]any{
|
||||
"model": "deepseek-v4-flash",
|
||||
"messages": historySplitTestMessages(),
|
||||
}
|
||||
stdReq, err := promptcompat.NormalizeOpenAIChatRequest(h.Store, req, "")
|
||||
if err != nil {
|
||||
t.Fatalf("normalize failed: %v", err)
|
||||
}
|
||||
originalPrompt := stdReq.FinalPrompt
|
||||
|
||||
out, err := h.applyCurrentInputFile(context.Background(), &auth.RequestAuth{DeepSeekToken: "token"}, stdReq)
|
||||
if err != nil {
|
||||
t.Fatalf("apply current input file failed: %v", err)
|
||||
}
|
||||
if out.PromptTokenText != originalPrompt {
|
||||
t.Fatalf("expected prompt token text to preserve original prompt, got %q want %q", out.PromptTokenText, originalPrompt)
|
||||
}
|
||||
if out.FinalPrompt == originalPrompt {
|
||||
t.Fatalf("expected live prompt to be rewritten after current input file")
|
||||
}
|
||||
if !strings.Contains(out.PromptTokenText, "first user turn") || !strings.Contains(out.PromptTokenText, "latest user turn") {
|
||||
t.Fatalf("expected prompt token text to retain full context, got %q", out.PromptTokenText)
|
||||
}
|
||||
if strings.Contains(out.FinalPrompt, "first user turn") || strings.Contains(out.FinalPrompt, "latest user turn") {
|
||||
t.Fatalf("expected live prompt to hide original turns, got %q", out.FinalPrompt)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyCurrentInputFileUploadsFullContextFile(t *testing.T) {
|
||||
@@ -438,6 +481,22 @@ func TestChatCompletionsCurrentInputFileUploadsContextAndKeepsNeutralPrompt(t *t
|
||||
if len(refIDs) == 0 || refIDs[0] != "file-inline-1" {
|
||||
t.Fatalf("expected uploaded current input file to be first ref_file_id, got %#v", ds.completionReq["ref_file_ids"])
|
||||
}
|
||||
var body map[string]any
|
||||
if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil {
|
||||
t.Fatalf("decode response failed: %v", err)
|
||||
}
|
||||
usage, _ := body["usage"].(map[string]any)
|
||||
promptTokens := int(usage["prompt_tokens"].(float64))
|
||||
originalPrompt, _ := promptcompat.BuildOpenAIPrompt(historySplitTestMessages(), nil, "", defaultToolChoicePolicy(), true)
|
||||
expectedMin := util.CountPromptTokens(originalPrompt, "deepseek-v4-flash")
|
||||
neutralPrompt := promptText
|
||||
neutralCount := util.CountPromptTokens(neutralPrompt, "deepseek-v4-flash")
|
||||
if promptTokens != expectedMin {
|
||||
t.Fatalf("expected prompt_tokens from full original context, got=%d want=%d", promptTokens, expectedMin)
|
||||
}
|
||||
if promptTokens <= neutralCount {
|
||||
t.Fatalf("expected prompt_tokens to exceed neutral live prompt count, got=%d neutral=%d", promptTokens, neutralCount)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResponsesCurrentInputFileUploadsContextAndKeepsNeutralPrompt(t *testing.T) {
|
||||
@@ -480,6 +539,21 @@ func TestResponsesCurrentInputFileUploadsContextAndKeepsNeutralPrompt(t *testing
|
||||
if strings.Contains(promptText, "first user turn") || strings.Contains(promptText, "latest user turn") {
|
||||
t.Fatalf("expected prompt to hide original turns, got %s", promptText)
|
||||
}
|
||||
var body map[string]any
|
||||
if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil {
|
||||
t.Fatalf("decode response failed: %v", err)
|
||||
}
|
||||
usage, _ := body["usage"].(map[string]any)
|
||||
inputTokens := int(usage["input_tokens"].(float64))
|
||||
originalPrompt, _ := promptcompat.BuildOpenAIPrompt(historySplitTestMessages(), nil, "", defaultToolChoicePolicy(), true)
|
||||
expectedMin := util.CountPromptTokens(originalPrompt, "deepseek-v4-flash")
|
||||
neutralCount := util.CountPromptTokens(promptText, "deepseek-v4-flash")
|
||||
if inputTokens != expectedMin {
|
||||
t.Fatalf("expected input_tokens from full original context, got=%d want=%d", inputTokens, expectedMin)
|
||||
}
|
||||
if inputTokens <= neutralCount {
|
||||
t.Fatalf("expected input_tokens to exceed neutral live prompt count, got=%d neutral=%d", inputTokens, neutralCount)
|
||||
}
|
||||
}
|
||||
|
||||
func TestChatCompletionsCurrentInputFileMapsManagedAuthFailureTo401(t *testing.T) {
|
||||
|
||||
@@ -23,9 +23,9 @@ func BuildOpenAIChatCompletion(completionID, model, finalPrompt, finalThinking,
|
||||
messageObj["tool_calls"] = toolcall.FormatOpenAIToolCalls(detected, nil)
|
||||
messageObj["content"] = nil
|
||||
}
|
||||
promptTokens := EstimateTokens(finalPrompt)
|
||||
reasoningTokens := EstimateTokens(finalThinking)
|
||||
completionTokens := EstimateTokens(finalText)
|
||||
promptTokens := CountPromptTokens(finalPrompt, model)
|
||||
reasoningTokens := CountOutputTokens(finalThinking, model)
|
||||
completionTokens := CountOutputTokens(finalText, model)
|
||||
|
||||
return map[string]any{
|
||||
"id": completionID,
|
||||
@@ -86,9 +86,9 @@ func BuildOpenAIResponseObject(responseID, model, finalPrompt, finalThinking, fi
|
||||
"content": content,
|
||||
})
|
||||
}
|
||||
promptTokens := EstimateTokens(finalPrompt)
|
||||
reasoningTokens := EstimateTokens(finalThinking)
|
||||
completionTokens := EstimateTokens(finalText)
|
||||
promptTokens := CountPromptTokens(finalPrompt, model)
|
||||
reasoningTokens := CountOutputTokens(finalThinking, model)
|
||||
completionTokens := CountOutputTokens(finalText, model)
|
||||
return map[string]any{
|
||||
"id": responseID,
|
||||
"type": "response",
|
||||
@@ -140,8 +140,8 @@ func BuildClaudeMessageResponse(messageID, model string, normalizedMessages []an
|
||||
"stop_reason": stopReason,
|
||||
"stop_sequence": nil,
|
||||
"usage": map[string]any{
|
||||
"input_tokens": EstimateTokens(fmt.Sprintf("%v", normalizedMessages)),
|
||||
"output_tokens": EstimateTokens(finalThinking) + EstimateTokens(finalText),
|
||||
"input_tokens": CountPromptTokens(fmt.Sprintf("%v", normalizedMessages), model),
|
||||
"output_tokens": CountOutputTokens(finalThinking, model) + CountOutputTokens(finalText, model),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user