feat: support explicit prompt token tracking in SSE parsing and stream handlers

2026-05-18 07:05:08 +08:00 · 2026-04-07 01:39:27 +08:00
parent da778a18fb
commit b79a13efd5
13 changed files with 136 additions and 63 deletions
--- a/internal/js/chat-stream/sse_parse_impl.js
+++ b/internal/js/chat-stream/sse_parse_impl.js
@@ -20,7 +20,9 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
    };
  }

-  const outputTokens = extractAccumulatedTokenUsage(chunk);
+  const usage = extractAccumulatedTokenUsage(chunk);
+  const promptTokens = usage.prompt;
+  const outputTokens = usage.output;

  if (Object.prototype.hasOwnProperty.call(chunk, 'error')) {
    return {
@@ -29,7 +31,8 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
      finished: true,
      contentFilter: false,
      errorMessage: formatErrorMessage(chunk.error),
-      outputTokens: 0,
+      promptTokens,
+      outputTokens,
      newType: currentType,
    };
  }
@@ -43,6 +46,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
      finished: true,
      contentFilter: true,
      errorMessage: '',
+      promptTokens,
      outputTokens,
      newType: currentType,
    };
@@ -55,6 +59,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
      finished: false,
      contentFilter: false,
      errorMessage: '',
+      promptTokens,
      outputTokens,
      newType: currentType,
    };
@@ -67,6 +72,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
        finished: true,
        contentFilter: false,
        errorMessage: '',
+        promptTokens,
        outputTokens,
        newType: currentType,
      };
@@ -77,6 +83,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
      finished: false,
      contentFilter: false,
      errorMessage: '',
+      promptTokens,
      outputTokens,
      newType: currentType,
    };
@@ -89,6 +96,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
      finished: false,
      contentFilter: false,
      errorMessage: '',
+      promptTokens,
      outputTokens,
      newType: currentType,
    };
@@ -157,6 +165,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
        finished: true,
        contentFilter: false,
        errorMessage: '',
+        promptTokens,
        outputTokens,
        newType,
      };
@@ -168,6 +177,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
        finished: false,
        contentFilter: false,
        errorMessage: '',
+        promptTokens,
        outputTokens,
        newType,
      };
@@ -182,6 +192,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
      finished: false,
      contentFilter: false,
      errorMessage: '',
+      promptTokens,
      outputTokens,
      newType,
    };
@@ -196,6 +207,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
        finished: true,
        contentFilter: false,
        errorMessage: '',
+        promptTokens,
        outputTokens,
        newType,
      };
@@ -207,6 +219,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
      finished: false,
      contentFilter: false,
      errorMessage: '',
+      promptTokens,
      outputTokens,
      newType,
    };
@@ -242,6 +255,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
    finished: false,
    contentFilter: false,
    errorMessage: '',
+    promptTokens,
    outputTokens,
    newType,
  };
@@ -429,40 +443,54 @@ function hasContentFilterStatusValue(v) {
 }

 function extractAccumulatedTokenUsage(chunk) {
-  return findAccumulatedTokenUsage(chunk);
+  const usage = findAccumulatedTokenUsage(chunk);
+  return usage || { prompt: 0, output: 0 };
 }

 function findAccumulatedTokenUsage(v) {
  if (Array.isArray(v)) {
    for (const item of v) {
-      const n = findAccumulatedTokenUsage(item);
-      if (n > 0) {
-        return n;
-      }
+      const u = findAccumulatedTokenUsage(item);
+      if (u) return u;
    }
-    return 0;
+    return null;
  }
  if (!v || typeof v !== 'object') {
-    return 0;
+    return null;
  }
  const pathValue = asString(v.p);
  if (pathValue && pathValue.toLowerCase().includes('accumulated_token_usage')) {
    const n = toInt(v.v);
    if (n > 0) {
-      return n;
+      return { prompt: 0, output: n };
+    }
+  }
+  if (pathValue && pathValue.toLowerCase().includes('token_usage')) {
+    const u = v.v;
+    if (u && typeof u === 'object') {
+      const p = toInt(u.prompt_tokens);
+      const c = toInt(u.completion_tokens);
+      if (p > 0 || c > 0) {
+        return { prompt: p, output: c };
+      }
    }
  }
  const direct = toInt(v.accumulated_token_usage);
  if (direct > 0) {
-    return direct;
+    return { prompt: 0, output: direct };
  }
-  for (const value of Object.values(v)) {
-    const n = findAccumulatedTokenUsage(value);
-    if (n > 0) {
-      return n;
+  if (v.token_usage && typeof v.token_usage === 'object') {
+    const p = toInt(v.token_usage.prompt_tokens);
+    const c = toInt(v.token_usage.completion_tokens);
+    if (p > 0 || c > 0) {
+      return { prompt: p, output: c };
    }
  }
-  return 0;
+  for (const value of Object.values(v)) {
+    const u = findAccumulatedTokenUsage(value);
+    if (u) return u;
+  }
+  return null;
 }

 function toInt(v) {
--- a/internal/js/chat-stream/token_usage.js
+++ b/internal/js/chat-stream/token_usage.js
@@ -1,15 +1,17 @@
 'use strict';

-function buildUsage(prompt, thinking, output, outputTokens = 0) {
-  const promptTokens = estimateTokens(prompt);
+function buildUsage(prompt, thinking, output, outputTokens = 0, providedPromptTokens = 0) {
  const reasoningTokens = estimateTokens(thinking);
  const completionTokens = estimateTokens(output);
+
+  const finalPromptTokens = Number.isFinite(providedPromptTokens) && providedPromptTokens > 0 ? Math.trunc(providedPromptTokens) : estimateTokens(prompt);
+
  const overriddenCompletionTokens = Number.isFinite(outputTokens) && outputTokens > 0 ? Math.trunc(outputTokens) : 0;
  const finalCompletionTokens = overriddenCompletionTokens > 0 ? overriddenCompletionTokens : reasoningTokens + completionTokens;
  return {
-    prompt_tokens: promptTokens,
+    prompt_tokens: finalPromptTokens,
    completion_tokens: finalCompletionTokens,
-    total_tokens: promptTokens + finalCompletionTokens,
+    total_tokens: finalPromptTokens + finalCompletionTokens,
    completion_tokens_details: {
      reasoning_tokens: reasoningTokens,
    },
--- a/internal/js/chat-stream/vercel_stream_impl.js
+++ b/internal/js/chat-stream/vercel_stream_impl.js
@@ -125,6 +125,7 @@ async function handleVercelStream(req, res, rawBody, payload) {
    let currentType = thinkingEnabled ? 'thinking' : 'text';
    let thinkingText = '';
    let outputText = '';
+    let promptTokens = 0;
    let outputTokens = 0;
    const toolSieveEnabled = toolPolicy.toolSieveEnabled;
    const toolSieveState = createToolSieveState();
@@ -178,7 +179,7 @@ async function handleVercelStream(req, res, rawBody, payload) {
        created,
        model,
        choices: [{ delta: {}, index: 0, finish_reason: reason }],
-        usage: buildUsage(finalPrompt, thinkingText, outputText, outputTokens),
+        usage: buildUsage(finalPrompt, thinkingText, outputText, outputTokens, promptTokens),
      });
      if (!res.writableEnded && !res.destroyed) {
        res.write('data: [DONE]\n\n');
@@ -227,6 +228,9 @@ async function handleVercelStream(req, res, rawBody, payload) {
          if (!parsed.parsed) {
            continue;
          }
+          if (parsed.promptTokens > 0) {
+            promptTokens = parsed.promptTokens;
+          }
          if (parsed.outputTokens > 0) {
            outputTokens = parsed.outputTokens;
          }