mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-05 00:45:29 +08:00
Merge pull request #234 from CJackHwang/codex/fix-documentation-and-accumulated_token_usage
Propagate DeepSeek SSE token usage to /v1/responses and remove stale POW env docs
This commit is contained in:
@@ -267,6 +267,7 @@ data: [DONE]
|
||||
- `deepseek-reasoner` / `deepseek-reasoner-search` models emit `delta.reasoning_content`
|
||||
- Text emits `delta.content`
|
||||
- Last chunk includes `finish_reason` and `usage`
|
||||
- Token counting prefers pass-through from upstream DeepSeek SSE (`accumulated_token_usage` / `token_usage`), and only falls back to local estimation when upstream usage is absent
|
||||
|
||||
#### Tool Calls
|
||||
|
||||
@@ -383,6 +384,7 @@ Business auth required. Returns OpenAI-compatible embeddings shape.
|
||||
## Claude-Compatible API
|
||||
|
||||
Besides `/anthropic/v1/*`, DS2API also supports shortcut paths: `/v1/messages`, `/messages`, `/v1/messages/count_tokens`, `/messages/count_tokens`.
|
||||
Implementation-wise this path is unified on the OpenAI Chat Completions parse-and-translate pipeline to avoid maintaining divergent parsing chains.
|
||||
|
||||
### `GET /anthropic/v1/models`
|
||||
|
||||
@@ -517,6 +519,7 @@ Supported paths:
|
||||
- `/v1/models/{model}:streamGenerateContent` (compat path)
|
||||
|
||||
Authentication is the same as other business routes (`Authorization: Bearer <token>` or `x-api-key`).
|
||||
Implementation-wise this path is unified on the OpenAI Chat Completions parse-and-translate pipeline to avoid maintaining divergent parsing chains.
|
||||
|
||||
### `POST /v1beta/models/{model}:generateContent`
|
||||
|
||||
@@ -535,6 +538,7 @@ Returns SSE (`text/event-stream`), each chunk as `data: <json>`:
|
||||
- regular text: incremental text chunks
|
||||
- `tools` mode: buffered and emitted as `functionCall` at finalize phase
|
||||
- final chunk: includes `finishReason: "STOP"` and `usageMetadata`
|
||||
- Token counting prefers pass-through from upstream DeepSeek SSE (`accumulated_token_usage` / `token_usage`), and only falls back to local estimation when upstream usage is absent
|
||||
|
||||
---
|
||||
|
||||
|
||||
4
API.md
4
API.md
@@ -267,6 +267,7 @@ data: [DONE]
|
||||
- `deepseek-reasoner` / `deepseek-reasoner-search` 模型输出 `delta.reasoning_content`
|
||||
- 普通文本输出 `delta.content`
|
||||
- 最后一段包含 `finish_reason` 和 `usage`
|
||||
- token 计数优先透传上游 DeepSeek SSE(如 `accumulated_token_usage` / `token_usage`);仅在上游缺失时回退本地估算
|
||||
|
||||
#### Tool Calls
|
||||
|
||||
@@ -389,6 +390,7 @@ data: [DONE]
|
||||
## Claude 兼容接口
|
||||
|
||||
除标准路径 `/anthropic/v1/*` 外,还支持快捷路径 `/v1/messages`、`/messages`、`/v1/messages/count_tokens`、`/messages/count_tokens`。
|
||||
实现上统一走 OpenAI Chat Completions 解析与回译链路,避免多套解析逻辑分叉维护。
|
||||
|
||||
### `GET /anthropic/v1/models`
|
||||
|
||||
@@ -523,6 +525,7 @@ data: {"type":"message_stop"}
|
||||
- `/v1/models/{model}:streamGenerateContent`(兼容路径)
|
||||
|
||||
鉴权方式同业务接口(`Authorization: Bearer <token>` 或 `x-api-key`)。
|
||||
实现上统一走 OpenAI Chat Completions 解析与回译链路,避免多套解析逻辑分叉维护。
|
||||
|
||||
### `POST /v1beta/models/{model}:generateContent`
|
||||
|
||||
@@ -541,6 +544,7 @@ data: {"type":"message_stop"}
|
||||
- 常规文本:持续返回增量文本 chunk
|
||||
- `tools` 场景:会缓冲并在结束时输出 `functionCall` 结构
|
||||
- 结束 chunk:包含 `finishReason: "STOP"` 与 `usageMetadata`
|
||||
- token 计数优先透传上游 DeepSeek SSE(如 `accumulated_token_usage` / `token_usage`);仅在上游缺失时回退本地估算
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -344,7 +344,6 @@ cp opencode.json.example opencode.json
|
||||
| `DS2API_CONFIG_PATH` | 配置文件路径 | `config.json` |
|
||||
| `DS2API_CONFIG_JSON` | 直接注入配置(JSON 或 Base64) | — |
|
||||
| `DS2API_ENV_WRITEBACK` | 环境变量模式下自动写回配置文件并切换文件模式(`1/true/yes/on`) | 关闭 |
|
||||
| `DS2API_POW_CONCURRENCY` | PoW 并行计算协程数(可选) | 默认 CPU 核心数 |
|
||||
| `DS2API_STATIC_ADMIN_DIR` | 管理台静态文件目录 | `static/admin` |
|
||||
| `DS2API_AUTO_BUILD_WEBUI` | 启动时自动构建 WebUI | 本地开启,Vercel 关闭 |
|
||||
| `DS2API_DEV_PACKET_CAPTURE` | 本地开发抓包开关(记录最近会话请求/响应体) | 本地非 Vercel 默认开启 |
|
||||
|
||||
@@ -344,7 +344,6 @@ cp opencode.json.example opencode.json
|
||||
| `DS2API_CONFIG_PATH` | Config file path | `config.json` |
|
||||
| `DS2API_CONFIG_JSON` | Inline config (JSON or Base64) | — |
|
||||
| `DS2API_ENV_WRITEBACK` | Auto-write env-backed config to file and transition to file mode (`1/true/yes/on`) | Disabled |
|
||||
| `DS2API_POW_CONCURRENCY` | PoW parallel solver goroutine count (optional) | Default CPU core count |
|
||||
| `DS2API_STATIC_ADMIN_DIR` | Admin static assets dir | `static/admin` |
|
||||
| `DS2API_AUTO_BUILD_WEBUI` | Auto-build WebUI on startup | Enabled locally, disabled on Vercel |
|
||||
| `DS2API_ACCOUNT_MAX_INFLIGHT` | Max in-flight requests per account | `2` |
|
||||
|
||||
@@ -74,16 +74,13 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t *
|
||||
}
|
||||
|
||||
finalPrompt, _ := buildOpenAIFinalPrompt(messages, tools, "")
|
||||
if !strings.Contains(finalPrompt, "After receiving a tool result, use it directly.") {
|
||||
t.Fatalf("vercel prepare finalPrompt missing final-answer instruction: %q", finalPrompt)
|
||||
}
|
||||
if !strings.Contains(finalPrompt, "Only call another tool if the result is insufficient.") {
|
||||
t.Fatalf("vercel prepare finalPrompt missing retry guard instruction: %q", finalPrompt)
|
||||
if !strings.Contains(finalPrompt, "Remember: Output ONLY the <tool_calls>...</tool_calls> XML block when calling tools.") {
|
||||
t.Fatalf("vercel prepare finalPrompt missing final tool-call anchor instruction: %q", finalPrompt)
|
||||
}
|
||||
if !strings.Contains(finalPrompt, "TOOL CALL FORMAT") {
|
||||
t.Fatalf("vercel prepare finalPrompt missing xml format instruction: %q", finalPrompt)
|
||||
}
|
||||
if !strings.Contains(finalPrompt, "Do NOT wrap the XML in markdown code fences") {
|
||||
if !strings.Contains(finalPrompt, "Do NOT wrap XML in markdown fences") {
|
||||
t.Fatalf("vercel prepare finalPrompt missing no-fence xml instruction: %q", finalPrompt)
|
||||
}
|
||||
if strings.Contains(finalPrompt, "```json") {
|
||||
|
||||
@@ -130,12 +130,17 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res
|
||||
}
|
||||
|
||||
responseObj := openaifmt.BuildResponseObject(responseID, model, finalPrompt, sanitizedThinking, sanitizedText, toolNames)
|
||||
if result.OutputTokens > 0 {
|
||||
if result.PromptTokens > 0 || result.OutputTokens > 0 {
|
||||
if usage, ok := responseObj["usage"].(map[string]any); ok {
|
||||
usage["output_tokens"] = result.OutputTokens
|
||||
if input, ok := usage["input_tokens"].(int); ok {
|
||||
usage["total_tokens"] = input + result.OutputTokens
|
||||
if result.PromptTokens > 0 {
|
||||
usage["input_tokens"] = result.PromptTokens
|
||||
}
|
||||
if result.OutputTokens > 0 {
|
||||
usage["output_tokens"] = result.OutputTokens
|
||||
}
|
||||
input, _ := usage["input_tokens"].(int)
|
||||
output, _ := usage["output_tokens"].(int)
|
||||
usage["total_tokens"] = input + output
|
||||
}
|
||||
}
|
||||
h.getResponseStore().put(owner, responseID, responseObj)
|
||||
|
||||
@@ -51,6 +51,7 @@ type responsesStreamRuntime struct {
|
||||
messagePartAdded bool
|
||||
sequence int
|
||||
failed bool
|
||||
promptTokens int
|
||||
outputTokens int
|
||||
|
||||
persistResponse func(obj map[string]any)
|
||||
@@ -152,9 +153,19 @@ func (s *responsesStreamRuntime) finalize() {
|
||||
if s.outputTokens > 0 {
|
||||
if usage, ok := obj["usage"].(map[string]any); ok {
|
||||
usage["output_tokens"] = s.outputTokens
|
||||
if input, ok := usage["input_tokens"].(int); ok {
|
||||
usage["total_tokens"] = input + s.outputTokens
|
||||
}
|
||||
}
|
||||
if s.promptTokens > 0 || s.outputTokens > 0 {
|
||||
if usage, ok := obj["usage"].(map[string]any); ok {
|
||||
if s.promptTokens > 0 {
|
||||
usage["input_tokens"] = s.promptTokens
|
||||
}
|
||||
if s.outputTokens > 0 {
|
||||
usage["output_tokens"] = s.outputTokens
|
||||
}
|
||||
input, _ := usage["input_tokens"].(int)
|
||||
output, _ := usage["output_tokens"].(int)
|
||||
usage["total_tokens"] = input + output
|
||||
}
|
||||
}
|
||||
if s.persistResponse != nil {
|
||||
@@ -185,6 +196,9 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
|
||||
if !parsed.Parsed {
|
||||
return streamengine.ParsedDecision{}
|
||||
}
|
||||
if parsed.PromptTokens > 0 {
|
||||
s.promptTokens = parsed.PromptTokens
|
||||
}
|
||||
if parsed.OutputTokens > 0 {
|
||||
s.outputTokens = parsed.OutputTokens
|
||||
}
|
||||
|
||||
@@ -238,3 +238,97 @@ func TestChatCompletionsStreamContentFilterStopsNormallyWithoutLeak(t *testing.T
|
||||
t.Fatalf("expected finish_reason=stop for content-filter upstream stop, got %#v", choice["finish_reason"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestResponsesStreamUsageOverridesFromBatchAccumulatedTokenUsage(t *testing.T) {
|
||||
statuses := make([]int, 0, 1)
|
||||
h := &Handler{
|
||||
Store: mockOpenAIConfig{wideInput: true},
|
||||
Auth: streamStatusAuthStub{},
|
||||
DS: streamStatusDSStub{resp: makeOpenAISSEHTTPResponse(
|
||||
`data: {"p":"response/content","v":"hello"}`,
|
||||
`data: {"p":"response","o":"BATCH","v":[{"p":"accumulated_token_usage","v":190},{"p":"quasi_status","v":"FINISHED"}]}`,
|
||||
)},
|
||||
}
|
||||
r := chi.NewRouter()
|
||||
r.Use(captureStatusMiddleware(&statuses))
|
||||
RegisterRoutes(r, h)
|
||||
|
||||
reqBody := `{"model":"deepseek-chat","input":"hi","stream":true}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(reqBody))
|
||||
req.Header.Set("Authorization", "Bearer direct-token")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
r.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
if len(statuses) != 1 || statuses[0] != http.StatusOK {
|
||||
t.Fatalf("expected captured status 200, got %#v", statuses)
|
||||
}
|
||||
frames, done := parseSSEDataFrames(t, rec.Body.String())
|
||||
if !done {
|
||||
t.Fatalf("expected [DONE], body=%s", rec.Body.String())
|
||||
}
|
||||
if len(frames) == 0 {
|
||||
t.Fatalf("expected at least one json frame, body=%s", rec.Body.String())
|
||||
}
|
||||
last := frames[len(frames)-1]
|
||||
resp, _ := last["response"].(map[string]any)
|
||||
if resp == nil {
|
||||
t.Fatalf("expected response payload in final frame, got %#v", last)
|
||||
}
|
||||
usage, _ := resp["usage"].(map[string]any)
|
||||
if usage == nil {
|
||||
t.Fatalf("expected usage in response payload, got %#v", resp)
|
||||
}
|
||||
if got, _ := usage["output_tokens"].(float64); int(got) != 190 {
|
||||
t.Fatalf("expected output_tokens=190, got %#v", usage["output_tokens"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestResponsesNonStreamUsageOverridesPromptAndOutputTokenUsage(t *testing.T) {
|
||||
statuses := make([]int, 0, 1)
|
||||
h := &Handler{
|
||||
Store: mockOpenAIConfig{wideInput: true},
|
||||
Auth: streamStatusAuthStub{},
|
||||
DS: streamStatusDSStub{resp: makeOpenAISSEHTTPResponse(
|
||||
`data: {"p":"response/content","v":"ok"}`,
|
||||
`data: {"p":"response","o":"BATCH","v":[{"p":"token_usage","v":{"prompt_tokens":11,"completion_tokens":29}},{"p":"quasi_status","v":"FINISHED"}]}`,
|
||||
)},
|
||||
}
|
||||
r := chi.NewRouter()
|
||||
r.Use(captureStatusMiddleware(&statuses))
|
||||
RegisterRoutes(r, h)
|
||||
|
||||
reqBody := `{"model":"deepseek-chat","input":"hi","stream":false}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(reqBody))
|
||||
req.Header.Set("Authorization", "Bearer direct-token")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
r.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
if len(statuses) != 1 || statuses[0] != http.StatusOK {
|
||||
t.Fatalf("expected captured status 200, got %#v", statuses)
|
||||
}
|
||||
var out map[string]any
|
||||
if err := json.Unmarshal(rec.Body.Bytes(), &out); err != nil {
|
||||
t.Fatalf("decode response failed: %v body=%s", err, rec.Body.String())
|
||||
}
|
||||
usage, _ := out["usage"].(map[string]any)
|
||||
if usage == nil {
|
||||
t.Fatalf("expected usage object, got %#v", out)
|
||||
}
|
||||
if got, _ := usage["input_tokens"].(float64); int(got) != 11 {
|
||||
t.Fatalf("expected input_tokens=11, got %#v", usage["input_tokens"])
|
||||
}
|
||||
if got, _ := usage["output_tokens"].(float64); int(got) != 29 {
|
||||
t.Fatalf("expected output_tokens=29, got %#v", usage["output_tokens"])
|
||||
}
|
||||
if got, _ := usage["total_tokens"].(float64); int(got) != 40 {
|
||||
t.Fatalf("expected total_tokens=40, got %#v", usage["total_tokens"])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,6 +26,26 @@ func TestFromOpenAINonStreamClaude(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFromOpenAINonStreamClaudePreservesUsageFromOpenAI(t *testing.T) {
|
||||
original := []byte(`{"model":"claude-sonnet-4-5","messages":[{"role":"user","content":"hi"}],"stream":false}`)
|
||||
translatedReq := []byte(`{"model":"claude-sonnet-4-5","messages":[{"role":"user","content":"hi"}],"stream":false}`)
|
||||
openaibody := []byte(`{"id":"chatcmpl_1","object":"chat.completion","created":1,"model":"claude-sonnet-4-5","choices":[{"index":0,"message":{"role":"assistant","content":"hello"},"finish_reason":"stop"}],"usage":{"prompt_tokens":11,"completion_tokens":29,"total_tokens":40}}`)
|
||||
got := string(FromOpenAINonStream(sdktranslator.FormatClaude, "claude-sonnet-4-5", original, translatedReq, openaibody))
|
||||
if !strings.Contains(got, `"input_tokens":11`) || !strings.Contains(got, `"output_tokens":29`) {
|
||||
t.Fatalf("expected claude usage to preserve prompt/completion tokens, got: %s", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFromOpenAINonStreamGeminiPreservesUsageFromOpenAI(t *testing.T) {
|
||||
original := []byte(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}`)
|
||||
translatedReq := []byte(`{"model":"gemini-2.5-pro","messages":[{"role":"user","content":"hi"}],"stream":false}`)
|
||||
openaibody := []byte(`{"id":"chatcmpl_1","object":"chat.completion","created":1,"model":"gemini-2.5-pro","choices":[{"index":0,"message":{"role":"assistant","content":"hello"},"finish_reason":"stop"}],"usage":{"prompt_tokens":11,"completion_tokens":29,"total_tokens":40}}`)
|
||||
got := string(FromOpenAINonStream(sdktranslator.FormatGemini, "gemini-2.5-pro", original, translatedReq, openaibody))
|
||||
if !strings.Contains(got, `"promptTokenCount":11`) || !strings.Contains(got, `"candidatesTokenCount":29`) || !strings.Contains(got, `"totalTokenCount":40`) {
|
||||
t.Fatalf("expected gemini usageMetadata to preserve prompt/completion tokens, got: %s", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFormatAliases(t *testing.T) {
|
||||
cases := map[string]sdktranslator.Format{
|
||||
"responses": sdktranslator.FormatOpenAIResponse,
|
||||
|
||||
@@ -3,7 +3,9 @@ package translatorcliproxy
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||
)
|
||||
@@ -77,7 +79,13 @@ func (w *OpenAIStreamTranslatorWriter) Write(p []byte) (int, error) {
|
||||
if !bytes.HasPrefix(trimmed, []byte("data:")) {
|
||||
continue
|
||||
}
|
||||
usage, hasUsage := extractOpenAIUsage(trimmed)
|
||||
chunks := sdktranslator.TranslateStream(context.Background(), sdktranslator.FormatOpenAI, w.target, w.model, w.originalReq, w.translatedReq, trimmed, &w.param)
|
||||
if hasUsage {
|
||||
for i := range chunks {
|
||||
chunks[i] = injectStreamUsageMetadata(chunks[i], w.target, usage)
|
||||
}
|
||||
}
|
||||
for i := range chunks {
|
||||
if len(chunks[i]) == 0 {
|
||||
continue
|
||||
@@ -118,3 +126,102 @@ func (w *OpenAIStreamTranslatorWriter) readOneLine() ([]byte, bool) {
|
||||
w.lineBuf.Next(idx + 1)
|
||||
return line, true
|
||||
}
|
||||
|
||||
type openAIUsage struct {
|
||||
PromptTokens int
|
||||
CompletionTokens int
|
||||
TotalTokens int
|
||||
}
|
||||
|
||||
func extractOpenAIUsage(line []byte) (openAIUsage, bool) {
|
||||
raw := strings.TrimSpace(strings.TrimPrefix(string(line), "data:"))
|
||||
if raw == "" || raw == "[DONE]" {
|
||||
return openAIUsage{}, false
|
||||
}
|
||||
var payload map[string]any
|
||||
if err := json.Unmarshal([]byte(raw), &payload); err != nil {
|
||||
return openAIUsage{}, false
|
||||
}
|
||||
usageObj, _ := payload["usage"].(map[string]any)
|
||||
if usageObj == nil {
|
||||
return openAIUsage{}, false
|
||||
}
|
||||
p := toInt(usageObj["prompt_tokens"])
|
||||
c := toInt(usageObj["completion_tokens"])
|
||||
t := toInt(usageObj["total_tokens"])
|
||||
if p <= 0 && c <= 0 && t <= 0 {
|
||||
return openAIUsage{}, false
|
||||
}
|
||||
if t <= 0 {
|
||||
t = p + c
|
||||
}
|
||||
return openAIUsage{PromptTokens: p, CompletionTokens: c, TotalTokens: t}, true
|
||||
}
|
||||
|
||||
func injectStreamUsageMetadata(chunk []byte, target sdktranslator.Format, usage openAIUsage) []byte {
|
||||
if target != sdktranslator.FormatGemini {
|
||||
return chunk
|
||||
}
|
||||
suffix := ""
|
||||
switch {
|
||||
case bytes.HasSuffix(chunk, []byte("\n\n")):
|
||||
suffix = "\n\n"
|
||||
case bytes.HasSuffix(chunk, []byte("\n")):
|
||||
suffix = "\n"
|
||||
}
|
||||
text := strings.TrimSpace(string(chunk))
|
||||
if text == "" {
|
||||
return chunk
|
||||
}
|
||||
var (
|
||||
hasDataPrefix bool
|
||||
jsonText = text
|
||||
)
|
||||
if strings.HasPrefix(jsonText, "data:") {
|
||||
hasDataPrefix = true
|
||||
jsonText = strings.TrimSpace(strings.TrimPrefix(jsonText, "data:"))
|
||||
}
|
||||
if jsonText == "" || jsonText == "[DONE]" {
|
||||
return chunk
|
||||
}
|
||||
obj := map[string]any{}
|
||||
if err := json.Unmarshal([]byte(jsonText), &obj); err != nil {
|
||||
return chunk
|
||||
}
|
||||
if _, ok := obj["candidates"]; !ok {
|
||||
return chunk
|
||||
}
|
||||
obj["usageMetadata"] = map[string]any{
|
||||
"promptTokenCount": usage.PromptTokens,
|
||||
"candidatesTokenCount": usage.CompletionTokens,
|
||||
"totalTokenCount": usage.TotalTokens,
|
||||
}
|
||||
b, err := json.Marshal(obj)
|
||||
if err != nil {
|
||||
return chunk
|
||||
}
|
||||
if hasDataPrefix {
|
||||
return []byte("data: " + string(b) + suffix)
|
||||
}
|
||||
if suffix != "" {
|
||||
return append(b, []byte(suffix)...)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func toInt(v any) int {
|
||||
switch x := v.(type) {
|
||||
case int:
|
||||
return x
|
||||
case int32:
|
||||
return int(x)
|
||||
case int64:
|
||||
return int(x)
|
||||
case float64:
|
||||
return int(x)
|
||||
case float32:
|
||||
return int(x)
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,12 +18,16 @@ func TestOpenAIStreamTranslatorWriterClaude(t *testing.T) {
|
||||
w.WriteHeader(200)
|
||||
_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl_1\",\"object\":\"chat.completion.chunk\",\"created\":1,\"model\":\"claude-sonnet-4-5\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\"},\"finish_reason\":null}]}\n\n"))
|
||||
_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl_1\",\"object\":\"chat.completion.chunk\",\"created\":1,\"model\":\"claude-sonnet-4-5\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"hi\"},\"finish_reason\":null}]}\n\n"))
|
||||
_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl_1\",\"object\":\"chat.completion.chunk\",\"created\":1,\"model\":\"claude-sonnet-4-5\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":11,\"completion_tokens\":29,\"total_tokens\":40}}\n\n"))
|
||||
_, _ = w.Write([]byte("data: [DONE]\n\n"))
|
||||
|
||||
body := rec.Body.String()
|
||||
if !strings.Contains(body, "event: message_start") {
|
||||
t.Fatalf("expected claude message_start event, got: %s", body)
|
||||
}
|
||||
if !strings.Contains(body, `"output_tokens":29`) {
|
||||
t.Fatalf("expected claude stream usage to preserve output tokens, got: %s", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpenAIStreamTranslatorWriterGemini(t *testing.T) {
|
||||
@@ -35,12 +39,16 @@ func TestOpenAIStreamTranslatorWriterGemini(t *testing.T) {
|
||||
w.Header().Set("Content-Type", "text/event-stream")
|
||||
w.WriteHeader(200)
|
||||
_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl_1\",\"object\":\"chat.completion.chunk\",\"created\":1,\"model\":\"gemini-2.5-pro\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"hi\"},\"finish_reason\":null}]}\n\n"))
|
||||
_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl_1\",\"object\":\"chat.completion.chunk\",\"created\":1,\"model\":\"gemini-2.5-pro\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":11,\"completion_tokens\":29,\"total_tokens\":40}}\n\n"))
|
||||
_, _ = w.Write([]byte("data: [DONE]\n\n"))
|
||||
|
||||
body := rec.Body.String()
|
||||
if !strings.Contains(body, "candidates") {
|
||||
t.Fatalf("expected gemini stream payload, got: %s", body)
|
||||
}
|
||||
if !strings.Contains(body, `"promptTokenCount":11`) || !strings.Contains(body, `"candidatesTokenCount":29`) {
|
||||
t.Fatalf("expected gemini stream usageMetadata to preserve usage, got: %s", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpenAIStreamTranslatorWriterPreservesKeepAliveComment(t *testing.T) {
|
||||
@@ -55,3 +63,15 @@ func TestOpenAIStreamTranslatorWriterPreservesKeepAliveComment(t *testing.T) {
|
||||
t.Fatalf("expected keep-alive comment passthrough, got %q", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInjectStreamUsageMetadataPreservesSSEFrameTerminator(t *testing.T) {
|
||||
chunk := []byte("data: {\"candidates\":[{\"index\":0}],\"model\":\"gemini-2.5-pro\"}\n\n")
|
||||
usage := openAIUsage{PromptTokens: 11, CompletionTokens: 29, TotalTokens: 40}
|
||||
got := injectStreamUsageMetadata(chunk, sdktranslator.FormatGemini, usage)
|
||||
if !strings.HasSuffix(string(got), "\n\n") {
|
||||
t.Fatalf("expected injected chunk to preserve \\n\\n frame terminator, got %q", string(got))
|
||||
}
|
||||
if !strings.Contains(string(got), `"usageMetadata"`) {
|
||||
t.Fatalf("expected usageMetadata injected, got %q", string(got))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -275,7 +275,7 @@ test('parseChunkForContent keeps error branches distinct from content_filter sta
|
||||
assert.equal(parsed.finished, true);
|
||||
assert.equal(parsed.contentFilter, false);
|
||||
assert.equal(parsed.errorMessage.length > 0, true);
|
||||
assert.equal(parsed.outputTokens, 0);
|
||||
assert.equal(parsed.outputTokens, 88);
|
||||
assert.deepEqual(parsed.parts, []);
|
||||
});
|
||||
|
||||
@@ -292,6 +292,26 @@ test('parseChunkForContent preserves output tokens on FINISHED lines', () => {
|
||||
assert.deepEqual(parsed.parts, []);
|
||||
});
|
||||
|
||||
test('parseChunkForContent captures output tokens from response BATCH status snapshots', () => {
|
||||
const parsed = parseChunkForContent(
|
||||
{
|
||||
p: 'response',
|
||||
o: 'BATCH',
|
||||
v: [
|
||||
{ p: 'accumulated_token_usage', v: 190 },
|
||||
{ p: 'quasi_status', v: 'FINISHED' },
|
||||
],
|
||||
},
|
||||
false,
|
||||
'text',
|
||||
);
|
||||
assert.equal(parsed.parsed, true);
|
||||
assert.equal(parsed.finished, false);
|
||||
assert.equal(parsed.contentFilter, false);
|
||||
assert.equal(parsed.outputTokens, 190);
|
||||
assert.deepEqual(parsed.parts, []);
|
||||
});
|
||||
|
||||
test('parseChunkForContent matches FINISHED case-insensitively on status paths', () => {
|
||||
const parsed = parseChunkForContent(
|
||||
{ p: 'response/status', v: ' finished ', accumulated_token_usage: 190 },
|
||||
|
||||
Reference in New Issue
Block a user