mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-18 07:05:08 +08:00
feat: support explicit prompt token tracking in SSE parsing and stream handlers
This commit is contained in:
@@ -20,7 +20,9 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
|
||||
};
|
||||
}
|
||||
|
||||
const outputTokens = extractAccumulatedTokenUsage(chunk);
|
||||
const usage = extractAccumulatedTokenUsage(chunk);
|
||||
const promptTokens = usage.prompt;
|
||||
const outputTokens = usage.output;
|
||||
|
||||
if (Object.prototype.hasOwnProperty.call(chunk, 'error')) {
|
||||
return {
|
||||
@@ -29,7 +31,8 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
|
||||
finished: true,
|
||||
contentFilter: false,
|
||||
errorMessage: formatErrorMessage(chunk.error),
|
||||
outputTokens: 0,
|
||||
promptTokens,
|
||||
outputTokens,
|
||||
newType: currentType,
|
||||
};
|
||||
}
|
||||
@@ -43,6 +46,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
|
||||
finished: true,
|
||||
contentFilter: true,
|
||||
errorMessage: '',
|
||||
promptTokens,
|
||||
outputTokens,
|
||||
newType: currentType,
|
||||
};
|
||||
@@ -55,6 +59,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
|
||||
finished: false,
|
||||
contentFilter: false,
|
||||
errorMessage: '',
|
||||
promptTokens,
|
||||
outputTokens,
|
||||
newType: currentType,
|
||||
};
|
||||
@@ -67,6 +72,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
|
||||
finished: true,
|
||||
contentFilter: false,
|
||||
errorMessage: '',
|
||||
promptTokens,
|
||||
outputTokens,
|
||||
newType: currentType,
|
||||
};
|
||||
@@ -77,6 +83,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
|
||||
finished: false,
|
||||
contentFilter: false,
|
||||
errorMessage: '',
|
||||
promptTokens,
|
||||
outputTokens,
|
||||
newType: currentType,
|
||||
};
|
||||
@@ -89,6 +96,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
|
||||
finished: false,
|
||||
contentFilter: false,
|
||||
errorMessage: '',
|
||||
promptTokens,
|
||||
outputTokens,
|
||||
newType: currentType,
|
||||
};
|
||||
@@ -157,6 +165,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
|
||||
finished: true,
|
||||
contentFilter: false,
|
||||
errorMessage: '',
|
||||
promptTokens,
|
||||
outputTokens,
|
||||
newType,
|
||||
};
|
||||
@@ -168,6 +177,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
|
||||
finished: false,
|
||||
contentFilter: false,
|
||||
errorMessage: '',
|
||||
promptTokens,
|
||||
outputTokens,
|
||||
newType,
|
||||
};
|
||||
@@ -182,6 +192,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
|
||||
finished: false,
|
||||
contentFilter: false,
|
||||
errorMessage: '',
|
||||
promptTokens,
|
||||
outputTokens,
|
||||
newType,
|
||||
};
|
||||
@@ -196,6 +207,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
|
||||
finished: true,
|
||||
contentFilter: false,
|
||||
errorMessage: '',
|
||||
promptTokens,
|
||||
outputTokens,
|
||||
newType,
|
||||
};
|
||||
@@ -207,6 +219,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
|
||||
finished: false,
|
||||
contentFilter: false,
|
||||
errorMessage: '',
|
||||
promptTokens,
|
||||
outputTokens,
|
||||
newType,
|
||||
};
|
||||
@@ -242,6 +255,7 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc
|
||||
finished: false,
|
||||
contentFilter: false,
|
||||
errorMessage: '',
|
||||
promptTokens,
|
||||
outputTokens,
|
||||
newType,
|
||||
};
|
||||
@@ -429,40 +443,54 @@ function hasContentFilterStatusValue(v) {
|
||||
}
|
||||
|
||||
function extractAccumulatedTokenUsage(chunk) {
|
||||
return findAccumulatedTokenUsage(chunk);
|
||||
const usage = findAccumulatedTokenUsage(chunk);
|
||||
return usage || { prompt: 0, output: 0 };
|
||||
}
|
||||
|
||||
function findAccumulatedTokenUsage(v) {
|
||||
if (Array.isArray(v)) {
|
||||
for (const item of v) {
|
||||
const n = findAccumulatedTokenUsage(item);
|
||||
if (n > 0) {
|
||||
return n;
|
||||
}
|
||||
const u = findAccumulatedTokenUsage(item);
|
||||
if (u) return u;
|
||||
}
|
||||
return 0;
|
||||
return null;
|
||||
}
|
||||
if (!v || typeof v !== 'object') {
|
||||
return 0;
|
||||
return null;
|
||||
}
|
||||
const pathValue = asString(v.p);
|
||||
if (pathValue && pathValue.toLowerCase().includes('accumulated_token_usage')) {
|
||||
const n = toInt(v.v);
|
||||
if (n > 0) {
|
||||
return n;
|
||||
return { prompt: 0, output: n };
|
||||
}
|
||||
}
|
||||
if (pathValue && pathValue.toLowerCase().includes('token_usage')) {
|
||||
const u = v.v;
|
||||
if (u && typeof u === 'object') {
|
||||
const p = toInt(u.prompt_tokens);
|
||||
const c = toInt(u.completion_tokens);
|
||||
if (p > 0 || c > 0) {
|
||||
return { prompt: p, output: c };
|
||||
}
|
||||
}
|
||||
}
|
||||
const direct = toInt(v.accumulated_token_usage);
|
||||
if (direct > 0) {
|
||||
return direct;
|
||||
return { prompt: 0, output: direct };
|
||||
}
|
||||
for (const value of Object.values(v)) {
|
||||
const n = findAccumulatedTokenUsage(value);
|
||||
if (n > 0) {
|
||||
return n;
|
||||
if (v.token_usage && typeof v.token_usage === 'object') {
|
||||
const p = toInt(v.token_usage.prompt_tokens);
|
||||
const c = toInt(v.token_usage.completion_tokens);
|
||||
if (p > 0 || c > 0) {
|
||||
return { prompt: p, output: c };
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
for (const value of Object.values(v)) {
|
||||
const u = findAccumulatedTokenUsage(value);
|
||||
if (u) return u;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function toInt(v) {
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
'use strict';
|
||||
|
||||
function buildUsage(prompt, thinking, output, outputTokens = 0) {
|
||||
const promptTokens = estimateTokens(prompt);
|
||||
function buildUsage(prompt, thinking, output, outputTokens = 0, providedPromptTokens = 0) {
|
||||
const reasoningTokens = estimateTokens(thinking);
|
||||
const completionTokens = estimateTokens(output);
|
||||
|
||||
const finalPromptTokens = Number.isFinite(providedPromptTokens) && providedPromptTokens > 0 ? Math.trunc(providedPromptTokens) : estimateTokens(prompt);
|
||||
|
||||
const overriddenCompletionTokens = Number.isFinite(outputTokens) && outputTokens > 0 ? Math.trunc(outputTokens) : 0;
|
||||
const finalCompletionTokens = overriddenCompletionTokens > 0 ? overriddenCompletionTokens : reasoningTokens + completionTokens;
|
||||
return {
|
||||
prompt_tokens: promptTokens,
|
||||
prompt_tokens: finalPromptTokens,
|
||||
completion_tokens: finalCompletionTokens,
|
||||
total_tokens: promptTokens + finalCompletionTokens,
|
||||
total_tokens: finalPromptTokens + finalCompletionTokens,
|
||||
completion_tokens_details: {
|
||||
reasoning_tokens: reasoningTokens,
|
||||
},
|
||||
|
||||
@@ -125,6 +125,7 @@ async function handleVercelStream(req, res, rawBody, payload) {
|
||||
let currentType = thinkingEnabled ? 'thinking' : 'text';
|
||||
let thinkingText = '';
|
||||
let outputText = '';
|
||||
let promptTokens = 0;
|
||||
let outputTokens = 0;
|
||||
const toolSieveEnabled = toolPolicy.toolSieveEnabled;
|
||||
const toolSieveState = createToolSieveState();
|
||||
@@ -178,7 +179,7 @@ async function handleVercelStream(req, res, rawBody, payload) {
|
||||
created,
|
||||
model,
|
||||
choices: [{ delta: {}, index: 0, finish_reason: reason }],
|
||||
usage: buildUsage(finalPrompt, thinkingText, outputText, outputTokens),
|
||||
usage: buildUsage(finalPrompt, thinkingText, outputText, outputTokens, promptTokens),
|
||||
});
|
||||
if (!res.writableEnded && !res.destroyed) {
|
||||
res.write('data: [DONE]\n\n');
|
||||
@@ -227,6 +228,9 @@ async function handleVercelStream(req, res, rawBody, payload) {
|
||||
if (!parsed.parsed) {
|
||||
continue;
|
||||
}
|
||||
if (parsed.promptTokens > 0) {
|
||||
promptTokens = parsed.promptTokens;
|
||||
}
|
||||
if (parsed.outputTokens > 0) {
|
||||
outputTokens = parsed.outputTokens;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user