mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-05 00:45:29 +08:00
feat: Implement tool call sieving and formatting for streaming responses in Node.js, add Go fallback for non-Vercel environments, and update Vercel configuration.
This commit is contained in:
@@ -71,6 +71,7 @@ Notes:
|
||||
- `api/chat-stream.js` automatically falls back to the Go entry for non-stream requests or requests with `tools` (internal `__go=1`)
|
||||
- `api/chat-stream.js` is data-path only (stream relay + SSE conversion); auth/account/session/PoW preparation still comes from an internal Go prepare endpoint (enabled on Vercel only)
|
||||
- Go prepare creates a stream lease and Node releases it when streaming ends, keeping account occupancy semantics aligned with native Go streaming
|
||||
- `vercel.json` sets `maxDuration: 300` for both `api/chat-stream.js` and `api/index.go` (subject to your Vercel plan limits)
|
||||
|
||||
Minimum environment variables:
|
||||
|
||||
|
||||
@@ -71,6 +71,7 @@ docker-compose up -d --build
|
||||
- `api/chat-stream.js` 对非流式请求或 `tools` 请求会自动回退到 Go 入口(内部 `__go=1`)
|
||||
- `api/chat-stream.js` 仅负责流式数据转发与 SSE 转换;鉴权、账号选择、会话创建、PoW 计算仍由 Go 内部 prepare 接口完成(仅 Vercel 启用)
|
||||
- Go prepare 会创建流式 lease,Node 在流结束后回调 release;账号占用语义与 Go 原生流式保持一致
|
||||
- `vercel.json` 已将 `api/chat-stream.js` 与 `api/index.go` 的 `maxDuration` 设为 `300`(受套餐上限约束)
|
||||
|
||||
至少配置环境变量:
|
||||
|
||||
|
||||
@@ -90,6 +90,7 @@ docker-compose logs -f
|
||||
- `/v1/chat/completions` 在 Vercel 上默认走 `api/chat-stream.js`(Node Runtime)以保证实时 SSE
|
||||
- `api/chat-stream.js` 仅负责流式数据转发;鉴权、账号选择、会话/PoW 准备仍由 Go 内部 prepare 接口处理
|
||||
- Go prepare 会下发 `lease_id`,Node 在流结束后调用 release,确保账号占用时长与 Go 原生流式一致
|
||||
- WebUI 的“非流式测试”会直接请求 `?__go=1`,避免 Vercel 上 Node 中转导致长请求更易超时
|
||||
- 至少配置:
|
||||
- `DS2API_ADMIN_KEY`
|
||||
- `DS2API_CONFIG_JSON`(JSON 字符串或 Base64)
|
||||
|
||||
@@ -90,6 +90,7 @@ docker-compose logs -f
|
||||
- `/v1/chat/completions` is routed to `api/chat-stream.js` (Node Runtime) on Vercel to preserve real-time SSE
|
||||
- `api/chat-stream.js` is data-path only; auth/account/session/PoW preparation still comes from an internal Go prepare endpoint
|
||||
- Go prepare returns a `lease_id`; Node releases it at stream end so account occupancy duration stays aligned with native Go streaming behavior
|
||||
- WebUI non-stream test calls `?__go=1` directly to avoid extra Node hop timeout risk on long Vercel requests
|
||||
- Minimum env vars:
|
||||
- `DS2API_ADMIN_KEY`
|
||||
- `DS2API_CONFIG_JSON` (raw JSON or Base64)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
'use strict';
|
||||
|
||||
const crypto = require('crypto');
|
||||
|
||||
const DEEPSEEK_COMPLETION_URL = 'https://chat.deepseek.com/api/v0/chat/completion';
|
||||
|
||||
const BASE_HEADERS = {
|
||||
@@ -37,6 +39,14 @@ module.exports = async function handler(req, res) {
|
||||
}
|
||||
|
||||
const rawBody = await readRawBody(req);
|
||||
|
||||
// Hard guard: only use Node data path for streaming on Vercel runtime.
|
||||
// Any non-Vercel runtime always falls back to Go for full behavior parity.
|
||||
if (!isVercelRuntime()) {
|
||||
await proxyToGo(req, res, rawBody);
|
||||
return;
|
||||
}
|
||||
|
||||
let payload;
|
||||
try {
|
||||
payload = JSON.parse(rawBody.toString('utf8') || '{}');
|
||||
@@ -66,6 +76,7 @@ module.exports = async function handler(req, res) {
|
||||
const finalPrompt = asString(prep.body.final_prompt);
|
||||
const thinkingEnabled = toBool(prep.body.thinking_enabled);
|
||||
const searchEnabled = toBool(prep.body.search_enabled);
|
||||
const toolNames = extractToolNames(payload.tools);
|
||||
|
||||
if (!model || !leaseID || !deepseekToken || !powHeader || !completionPayload) {
|
||||
writeOpenAIError(res, 500, 'invalid vercel prepare response');
|
||||
@@ -103,6 +114,9 @@ module.exports = async function handler(req, res) {
|
||||
let currentType = thinkingEnabled ? 'thinking' : 'text';
|
||||
let thinkingText = '';
|
||||
let outputText = '';
|
||||
const toolSieveEnabled = toolNames.length > 0;
|
||||
const toolSieveState = createToolSieveState();
|
||||
let toolCallsEmitted = false;
|
||||
const decoder = new TextDecoder();
|
||||
const reader = completionRes.body.getReader();
|
||||
let buffered = '';
|
||||
@@ -115,11 +129,42 @@ module.exports = async function handler(req, res) {
|
||||
}
|
||||
};
|
||||
|
||||
const sendDeltaFrame = (delta) => {
|
||||
const payloadDelta = { ...delta };
|
||||
if (!firstChunkSent) {
|
||||
payloadDelta.role = 'assistant';
|
||||
firstChunkSent = true;
|
||||
}
|
||||
sendFrame({
|
||||
id: sessionID,
|
||||
object: 'chat.completion.chunk',
|
||||
created,
|
||||
model,
|
||||
choices: [{ delta: payloadDelta, index: 0 }],
|
||||
});
|
||||
};
|
||||
|
||||
const finish = async (reason) => {
|
||||
if (ended) {
|
||||
return;
|
||||
}
|
||||
ended = true;
|
||||
if (toolSieveEnabled) {
|
||||
const tailEvents = flushToolSieve(toolSieveState, toolNames);
|
||||
for (const evt of tailEvents) {
|
||||
if (evt.type === 'tool_calls') {
|
||||
toolCallsEmitted = true;
|
||||
sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(evt.calls) });
|
||||
continue;
|
||||
}
|
||||
if (evt.text) {
|
||||
sendDeltaFrame({ content: evt.text });
|
||||
}
|
||||
}
|
||||
}
|
||||
if (toolCallsEmitted) {
|
||||
reason = 'tool_calls';
|
||||
}
|
||||
sendFrame({
|
||||
id: sessionID,
|
||||
object: 'chat.completion.chunk',
|
||||
@@ -181,25 +226,27 @@ module.exports = async function handler(req, res) {
|
||||
if (searchEnabled && isCitation(p.text)) {
|
||||
continue;
|
||||
}
|
||||
const delta = {};
|
||||
if (!firstChunkSent) {
|
||||
delta.role = 'assistant';
|
||||
firstChunkSent = true;
|
||||
}
|
||||
if (p.type === 'thinking') {
|
||||
thinkingText += p.text;
|
||||
delta.reasoning_content = p.text;
|
||||
sendDeltaFrame({ reasoning_content: p.text });
|
||||
} else {
|
||||
outputText += p.text;
|
||||
delta.content = p.text;
|
||||
if (!toolSieveEnabled) {
|
||||
sendDeltaFrame({ content: p.text });
|
||||
continue;
|
||||
}
|
||||
const events = processToolSieveChunk(toolSieveState, p.text, toolNames);
|
||||
for (const evt of events) {
|
||||
if (evt.type === 'tool_calls') {
|
||||
toolCallsEmitted = true;
|
||||
sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(evt.calls) });
|
||||
continue;
|
||||
}
|
||||
if (evt.text) {
|
||||
sendDeltaFrame({ content: evt.text });
|
||||
}
|
||||
}
|
||||
}
|
||||
sendFrame({
|
||||
id: sessionID,
|
||||
object: 'chat.completion.chunk',
|
||||
created,
|
||||
model,
|
||||
choices: [{ delta, index: 0 }],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -612,6 +659,10 @@ function toBool(v) {
|
||||
return v === true;
|
||||
}
|
||||
|
||||
function isVercelRuntime() {
|
||||
return asString(process.env.VERCEL) !== '' || asString(process.env.NOW_REGION) !== '';
|
||||
}
|
||||
|
||||
function asString(v) {
|
||||
if (typeof v === 'string') {
|
||||
return v.trim();
|
||||
@@ -624,3 +675,412 @@ function asString(v) {
|
||||
}
|
||||
return String(v).trim();
|
||||
}
|
||||
|
||||
function extractToolNames(tools) {
|
||||
if (!Array.isArray(tools) || tools.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const out = [];
|
||||
for (const t of tools) {
|
||||
if (!t || typeof t !== 'object') {
|
||||
continue;
|
||||
}
|
||||
const fn = t.function && typeof t.function === 'object' ? t.function : t;
|
||||
const name = asString(fn.name);
|
||||
if (name) {
|
||||
out.push(name);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function createToolSieveState() {
|
||||
return {
|
||||
pending: '',
|
||||
capture: '',
|
||||
capturing: false,
|
||||
};
|
||||
}
|
||||
|
||||
function processToolSieveChunk(state, chunk, toolNames) {
|
||||
if (!state) {
|
||||
return [];
|
||||
}
|
||||
if (chunk) {
|
||||
state.pending += chunk;
|
||||
}
|
||||
const events = [];
|
||||
// eslint-disable-next-line no-constant-condition
|
||||
while (true) {
|
||||
if (state.capturing) {
|
||||
if (state.pending) {
|
||||
state.capture += state.pending;
|
||||
state.pending = '';
|
||||
}
|
||||
const consumed = consumeToolCapture(state.capture, toolNames);
|
||||
if (!consumed.ready) {
|
||||
break;
|
||||
}
|
||||
state.capture = '';
|
||||
state.capturing = false;
|
||||
if (consumed.prefix) {
|
||||
events.push({ type: 'text', text: consumed.prefix });
|
||||
}
|
||||
if (Array.isArray(consumed.calls) && consumed.calls.length > 0) {
|
||||
events.push({ type: 'tool_calls', calls: consumed.calls });
|
||||
}
|
||||
if (consumed.suffix) {
|
||||
state.pending += consumed.suffix;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!state.pending) {
|
||||
break;
|
||||
}
|
||||
|
||||
const start = findToolSegmentStart(state.pending);
|
||||
if (start >= 0) {
|
||||
const prefix = state.pending.slice(0, start);
|
||||
if (prefix) {
|
||||
events.push({ type: 'text', text: prefix });
|
||||
}
|
||||
state.capture = state.pending.slice(start);
|
||||
state.pending = '';
|
||||
state.capturing = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
const [safe, hold] = splitSafeContent(state.pending, 64);
|
||||
if (!safe) {
|
||||
break;
|
||||
}
|
||||
state.pending = hold;
|
||||
events.push({ type: 'text', text: safe });
|
||||
}
|
||||
return events;
|
||||
}
|
||||
|
||||
function flushToolSieve(state, toolNames) {
|
||||
if (!state) {
|
||||
return [];
|
||||
}
|
||||
const events = processToolSieveChunk(state, '', toolNames);
|
||||
if (state.capturing) {
|
||||
const consumed = consumeToolCapture(state.capture, toolNames);
|
||||
if (consumed.ready) {
|
||||
if (consumed.prefix) {
|
||||
events.push({ type: 'text', text: consumed.prefix });
|
||||
}
|
||||
if (Array.isArray(consumed.calls) && consumed.calls.length > 0) {
|
||||
events.push({ type: 'tool_calls', calls: consumed.calls });
|
||||
}
|
||||
if (consumed.suffix) {
|
||||
events.push({ type: 'text', text: consumed.suffix });
|
||||
}
|
||||
} else if (state.capture) {
|
||||
events.push({ type: 'text', text: state.capture });
|
||||
}
|
||||
state.capture = '';
|
||||
state.capturing = false;
|
||||
}
|
||||
if (state.pending) {
|
||||
events.push({ type: 'text', text: state.pending });
|
||||
state.pending = '';
|
||||
}
|
||||
return events;
|
||||
}
|
||||
|
||||
function splitSafeContent(s, holdChars) {
|
||||
const chars = Array.from(s || '');
|
||||
if (chars.length <= holdChars) {
|
||||
return ['', s];
|
||||
}
|
||||
return [chars.slice(0, chars.length - holdChars).join(''), chars.slice(chars.length - holdChars).join('')];
|
||||
}
|
||||
|
||||
function findToolSegmentStart(s) {
|
||||
if (!s) {
|
||||
return -1;
|
||||
}
|
||||
const lower = s.toLowerCase();
|
||||
const keyIdx = lower.indexOf('tool_calls');
|
||||
if (keyIdx < 0) {
|
||||
return -1;
|
||||
}
|
||||
const start = s.slice(0, keyIdx).lastIndexOf('{');
|
||||
return start >= 0 ? start : keyIdx;
|
||||
}
|
||||
|
||||
function consumeToolCapture(captured, toolNames) {
|
||||
if (!captured) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
const lower = captured.toLowerCase();
|
||||
const keyIdx = lower.indexOf('tool_calls');
|
||||
if (keyIdx < 0) {
|
||||
if (Array.from(captured).length >= 256) {
|
||||
return { ready: true, prefix: captured, calls: [], suffix: '' };
|
||||
}
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
const start = captured.slice(0, keyIdx).lastIndexOf('{');
|
||||
if (start < 0) {
|
||||
if (Array.from(captured).length >= 512) {
|
||||
return { ready: true, prefix: captured, calls: [], suffix: '' };
|
||||
}
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
const obj = extractJSONObjectFrom(captured, start);
|
||||
if (!obj.ok) {
|
||||
if (Array.from(captured).length >= 4096) {
|
||||
return { ready: true, prefix: captured, calls: [], suffix: '' };
|
||||
}
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
const parsed = parseToolCalls(captured.slice(start, obj.end), toolNames);
|
||||
if (parsed.length === 0) {
|
||||
return {
|
||||
ready: true,
|
||||
prefix: captured.slice(0, obj.end),
|
||||
calls: [],
|
||||
suffix: captured.slice(obj.end),
|
||||
};
|
||||
}
|
||||
return {
|
||||
ready: true,
|
||||
prefix: captured.slice(0, start),
|
||||
calls: parsed,
|
||||
suffix: captured.slice(obj.end),
|
||||
};
|
||||
}
|
||||
|
||||
function extractJSONObjectFrom(text, start) {
|
||||
if (!text || start < 0 || start >= text.length || text[start] !== '{') {
|
||||
return { ok: false, end: 0 };
|
||||
}
|
||||
let depth = 0;
|
||||
let quote = '';
|
||||
let escaped = false;
|
||||
for (let i = start; i < text.length; i += 1) {
|
||||
const ch = text[i];
|
||||
if (quote) {
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
if (ch === '\\') {
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
if (ch === quote) {
|
||||
quote = '';
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (ch === '"' || ch === "'") {
|
||||
quote = ch;
|
||||
continue;
|
||||
}
|
||||
if (ch === '{') {
|
||||
depth += 1;
|
||||
continue;
|
||||
}
|
||||
if (ch === '}') {
|
||||
depth -= 1;
|
||||
if (depth === 0) {
|
||||
return { ok: true, end: i + 1 };
|
||||
}
|
||||
}
|
||||
}
|
||||
return { ok: false, end: 0 };
|
||||
}
|
||||
|
||||
function parseToolCalls(text, toolNames) {
|
||||
if (!asString(text)) {
|
||||
return [];
|
||||
}
|
||||
const candidates = buildToolCallCandidates(text);
|
||||
let parsed = [];
|
||||
for (const c of candidates) {
|
||||
parsed = parseToolCallsPayload(c);
|
||||
if (parsed.length > 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (parsed.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const allowed = new Set((toolNames || []).filter(Boolean));
|
||||
const out = [];
|
||||
for (const tc of parsed) {
|
||||
if (!tc || !tc.name) {
|
||||
continue;
|
||||
}
|
||||
if (allowed.size > 0 && !allowed.has(tc.name)) {
|
||||
continue;
|
||||
}
|
||||
out.push({ name: tc.name, input: tc.input || {} });
|
||||
}
|
||||
if (out.length === 0 && parsed.length > 0) {
|
||||
for (const tc of parsed) {
|
||||
if (!tc || !tc.name) {
|
||||
continue;
|
||||
}
|
||||
out.push({ name: tc.name, input: tc.input || {} });
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function buildToolCallCandidates(text) {
|
||||
const trimmed = asString(text);
|
||||
const candidates = [trimmed];
|
||||
const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)\s*```/gi) || [];
|
||||
for (const block of fenced) {
|
||||
const m = block.match(/```(?:json)?\s*([\s\S]*?)\s*```/i);
|
||||
if (m && m[1]) {
|
||||
candidates.push(asString(m[1]));
|
||||
}
|
||||
}
|
||||
const keyIdx = trimmed.toLowerCase().indexOf('tool_calls');
|
||||
if (keyIdx >= 0) {
|
||||
const start = trimmed.slice(0, keyIdx).lastIndexOf('{');
|
||||
if (start >= 0) {
|
||||
const obj = extractJSONObjectFrom(trimmed, start);
|
||||
if (obj.ok) {
|
||||
candidates.push(asString(trimmed.slice(start, obj.end)));
|
||||
}
|
||||
}
|
||||
}
|
||||
const first = trimmed.indexOf('{');
|
||||
const last = trimmed.lastIndexOf('}');
|
||||
if (first >= 0 && last > first) {
|
||||
candidates.push(asString(trimmed.slice(first, last + 1)));
|
||||
}
|
||||
return [...new Set(candidates.filter(Boolean))];
|
||||
}
|
||||
|
||||
function parseToolCallsPayload(payload) {
|
||||
let decoded;
|
||||
try {
|
||||
decoded = JSON.parse(payload);
|
||||
} catch (_err) {
|
||||
return [];
|
||||
}
|
||||
if (Array.isArray(decoded)) {
|
||||
return parseToolCallList(decoded);
|
||||
}
|
||||
if (!decoded || typeof decoded !== 'object') {
|
||||
return [];
|
||||
}
|
||||
if (decoded.tool_calls) {
|
||||
return parseToolCallList(decoded.tool_calls);
|
||||
}
|
||||
const one = parseToolCallItem(decoded);
|
||||
return one ? [one] : [];
|
||||
}
|
||||
|
||||
function parseToolCallList(v) {
|
||||
if (!Array.isArray(v)) {
|
||||
return [];
|
||||
}
|
||||
const out = [];
|
||||
for (const item of v) {
|
||||
if (!item || typeof item !== 'object') {
|
||||
continue;
|
||||
}
|
||||
const one = parseToolCallItem(item);
|
||||
if (one) {
|
||||
out.push(one);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function parseToolCallItem(m) {
|
||||
let name = asString(m.name);
|
||||
let inputRaw = m.input;
|
||||
let hasInput = Object.prototype.hasOwnProperty.call(m, 'input');
|
||||
const fn = m.function && typeof m.function === 'object' ? m.function : null;
|
||||
if (fn) {
|
||||
if (!name) {
|
||||
name = asString(fn.name);
|
||||
}
|
||||
if (!hasInput && Object.prototype.hasOwnProperty.call(fn, 'arguments')) {
|
||||
inputRaw = fn.arguments;
|
||||
hasInput = true;
|
||||
}
|
||||
}
|
||||
if (!hasInput) {
|
||||
for (const k of ['arguments', 'args', 'parameters', 'params']) {
|
||||
if (Object.prototype.hasOwnProperty.call(m, k)) {
|
||||
inputRaw = m[k];
|
||||
hasInput = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!name) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
name,
|
||||
input: parseToolCallInput(inputRaw),
|
||||
};
|
||||
}
|
||||
|
||||
function parseToolCallInput(v) {
|
||||
if (v == null) {
|
||||
return {};
|
||||
}
|
||||
if (typeof v === 'string') {
|
||||
const raw = asString(v);
|
||||
if (!raw) {
|
||||
return {};
|
||||
}
|
||||
try {
|
||||
const parsed = JSON.parse(raw);
|
||||
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
||||
return parsed;
|
||||
}
|
||||
} catch (_err) {
|
||||
return { _raw: raw };
|
||||
}
|
||||
return {};
|
||||
}
|
||||
if (typeof v === 'object' && !Array.isArray(v)) {
|
||||
return v;
|
||||
}
|
||||
try {
|
||||
const parsed = JSON.parse(JSON.stringify(v));
|
||||
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
||||
return parsed;
|
||||
}
|
||||
} catch (_err) {
|
||||
return {};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
function formatOpenAIStreamToolCalls(calls) {
|
||||
if (!Array.isArray(calls) || calls.length === 0) {
|
||||
return [];
|
||||
}
|
||||
return calls.map((c, idx) => ({
|
||||
index: idx,
|
||||
id: `call_${newCallID()}`,
|
||||
type: 'function',
|
||||
function: {
|
||||
name: c.name,
|
||||
arguments: JSON.stringify(c.input || {}),
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
function newCallID() {
|
||||
if (typeof crypto.randomUUID === 'function') {
|
||||
return crypto.randomUUID().replace(/-/g, '');
|
||||
}
|
||||
return `${Date.now()}${Math.floor(Math.random() * 1e9)}`;
|
||||
}
|
||||
|
||||
@@ -39,6 +39,17 @@ type streamLease struct {
|
||||
ExpiresAt time.Time
|
||||
}
|
||||
|
||||
type toolStreamSieveState struct {
|
||||
pending strings.Builder
|
||||
capture strings.Builder
|
||||
capturing bool
|
||||
}
|
||||
|
||||
type toolStreamEvent struct {
|
||||
Content string
|
||||
ToolCalls []util.ParsedToolCall
|
||||
}
|
||||
|
||||
func RegisterRoutes(r chi.Router, h *Handler) {
|
||||
r.Get("/v1/models", h.ListModels)
|
||||
r.Post("/v1/chat/completions", h.ChatCompletions)
|
||||
@@ -376,6 +387,8 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
|
||||
created := time.Now().Unix()
|
||||
firstChunkSent := false
|
||||
bufferToolContent := len(toolNames) > 0
|
||||
var toolSieve toolStreamSieveState
|
||||
toolCallsEmitted := false
|
||||
currentType := "text"
|
||||
if thinkingEnabled {
|
||||
currentType = "thinking"
|
||||
@@ -408,7 +421,7 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
|
||||
finalThinking := thinking.String()
|
||||
finalText := text.String()
|
||||
detected := util.ParseToolCalls(finalText, toolNames)
|
||||
if len(detected) > 0 {
|
||||
if len(detected) > 0 && !toolCallsEmitted {
|
||||
finishReason = "tool_calls"
|
||||
delta := map[string]any{
|
||||
"tool_calls": util.FormatOpenAIStreamToolCalls(detected),
|
||||
@@ -424,21 +437,29 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
|
||||
"model": model,
|
||||
"choices": []map[string]any{{"delta": delta, "index": 0}},
|
||||
})
|
||||
} else if bufferToolContent && strings.TrimSpace(finalText) != "" {
|
||||
delta := map[string]any{
|
||||
"content": finalText,
|
||||
} else if bufferToolContent {
|
||||
for _, evt := range flushToolSieve(&toolSieve, toolNames) {
|
||||
if evt.Content == "" {
|
||||
continue
|
||||
}
|
||||
delta := map[string]any{
|
||||
"content": evt.Content,
|
||||
}
|
||||
if !firstChunkSent {
|
||||
delta["role"] = "assistant"
|
||||
firstChunkSent = true
|
||||
}
|
||||
sendChunk(map[string]any{
|
||||
"id": completionID,
|
||||
"object": "chat.completion.chunk",
|
||||
"created": created,
|
||||
"model": model,
|
||||
"choices": []map[string]any{{"delta": delta, "index": 0}},
|
||||
})
|
||||
}
|
||||
if !firstChunkSent {
|
||||
delta["role"] = "assistant"
|
||||
firstChunkSent = true
|
||||
}
|
||||
sendChunk(map[string]any{
|
||||
"id": completionID,
|
||||
"object": "chat.completion.chunk",
|
||||
"created": created,
|
||||
"model": model,
|
||||
"choices": []map[string]any{{"delta": delta, "index": 0}},
|
||||
})
|
||||
}
|
||||
if len(detected) > 0 || toolCallsEmitted {
|
||||
finishReason = "tool_calls"
|
||||
}
|
||||
promptTokens := util.EstimateTokens(finalPrompt)
|
||||
reasoningTokens := util.EstimateTokens(finalThinking)
|
||||
@@ -532,6 +553,41 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
|
||||
text.WriteString(p.Text)
|
||||
if !bufferToolContent {
|
||||
delta["content"] = p.Text
|
||||
} else {
|
||||
events := processToolSieveChunk(&toolSieve, p.Text, toolNames)
|
||||
if len(events) == 0 {
|
||||
// Keep thinking delta only frame.
|
||||
}
|
||||
for _, evt := range events {
|
||||
if len(evt.ToolCalls) > 0 {
|
||||
toolCallsEmitted = true
|
||||
tcDelta := map[string]any{
|
||||
"tool_calls": util.FormatOpenAIStreamToolCalls(evt.ToolCalls),
|
||||
}
|
||||
if !firstChunkSent {
|
||||
tcDelta["role"] = "assistant"
|
||||
firstChunkSent = true
|
||||
}
|
||||
newChoices = append(newChoices, map[string]any{
|
||||
"delta": tcDelta,
|
||||
"index": 0,
|
||||
})
|
||||
continue
|
||||
}
|
||||
if evt.Content != "" {
|
||||
contentDelta := map[string]any{
|
||||
"content": evt.Content,
|
||||
}
|
||||
if !firstChunkSent {
|
||||
contentDelta["role"] = "assistant"
|
||||
firstChunkSent = true
|
||||
}
|
||||
newChoices = append(newChoices, map[string]any{
|
||||
"delta": contentDelta,
|
||||
"index": 0,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(delta) > 0 {
|
||||
@@ -669,6 +725,224 @@ func vercelInternalSecret() string {
|
||||
return "admin"
|
||||
}
|
||||
|
||||
func shouldEmitBufferedToolProbeContent(buffered string) bool {
|
||||
trimmed := strings.TrimSpace(buffered)
|
||||
if trimmed == "" {
|
||||
return false
|
||||
}
|
||||
normalized := normalizeToolProbePrefix(trimmed)
|
||||
if normalized == "" {
|
||||
return false
|
||||
}
|
||||
first := normalized[0]
|
||||
switch first {
|
||||
case '{', '[', '`':
|
||||
lower := strings.ToLower(normalized)
|
||||
if strings.Contains(lower, "tool_calls") {
|
||||
return false
|
||||
}
|
||||
// Keep a short hold window for JSON-ish starts to avoid leaking tool JSON.
|
||||
if len([]rune(normalized)) < 20 {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
default:
|
||||
// Natural language starts can be streamed immediately.
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeToolProbePrefix(s string) string {
|
||||
t := strings.TrimSpace(s)
|
||||
if strings.HasPrefix(t, "```") {
|
||||
t = strings.TrimPrefix(t, "```")
|
||||
t = strings.TrimSpace(t)
|
||||
t = strings.TrimPrefix(strings.ToLower(t), "json")
|
||||
t = strings.TrimSpace(t)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
func processToolSieveChunk(state *toolStreamSieveState, chunk string, toolNames []string) []toolStreamEvent {
|
||||
if state == nil || chunk == "" {
|
||||
return nil
|
||||
}
|
||||
state.pending.WriteString(chunk)
|
||||
events := make([]toolStreamEvent, 0, 2)
|
||||
|
||||
for {
|
||||
if state.capturing {
|
||||
if state.pending.Len() > 0 {
|
||||
state.capture.WriteString(state.pending.String())
|
||||
state.pending.Reset()
|
||||
}
|
||||
prefix, calls, suffix, ready := consumeToolCapture(state.capture.String(), toolNames)
|
||||
if !ready {
|
||||
break
|
||||
}
|
||||
state.capture.Reset()
|
||||
state.capturing = false
|
||||
if prefix != "" {
|
||||
events = append(events, toolStreamEvent{Content: prefix})
|
||||
}
|
||||
if len(calls) > 0 {
|
||||
events = append(events, toolStreamEvent{ToolCalls: calls})
|
||||
}
|
||||
if suffix != "" {
|
||||
state.pending.WriteString(suffix)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
pending := state.pending.String()
|
||||
if pending == "" {
|
||||
break
|
||||
}
|
||||
start := findToolSegmentStart(pending)
|
||||
if start >= 0 {
|
||||
prefix := pending[:start]
|
||||
if prefix != "" {
|
||||
events = append(events, toolStreamEvent{Content: prefix})
|
||||
}
|
||||
state.pending.Reset()
|
||||
state.capture.WriteString(pending[start:])
|
||||
state.capturing = true
|
||||
continue
|
||||
}
|
||||
|
||||
safe, hold := splitSafeContent(pending, 64)
|
||||
if safe == "" {
|
||||
break
|
||||
}
|
||||
state.pending.Reset()
|
||||
state.pending.WriteString(hold)
|
||||
events = append(events, toolStreamEvent{Content: safe})
|
||||
}
|
||||
|
||||
return events
|
||||
}
|
||||
|
||||
func flushToolSieve(state *toolStreamSieveState, toolNames []string) []toolStreamEvent {
|
||||
if state == nil {
|
||||
return nil
|
||||
}
|
||||
events := processToolSieveChunk(state, "", toolNames)
|
||||
if state.capturing {
|
||||
raw := state.capture.String()
|
||||
state.capture.Reset()
|
||||
state.capturing = false
|
||||
if raw != "" {
|
||||
events = append(events, toolStreamEvent{Content: raw})
|
||||
}
|
||||
}
|
||||
if state.pending.Len() > 0 {
|
||||
events = append(events, toolStreamEvent{Content: state.pending.String()})
|
||||
state.pending.Reset()
|
||||
}
|
||||
return events
|
||||
}
|
||||
|
||||
func splitSafeContent(s string, holdRunes int) (safe, hold string) {
|
||||
if s == "" || holdRunes <= 0 {
|
||||
return s, ""
|
||||
}
|
||||
runes := []rune(s)
|
||||
if len(runes) <= holdRunes {
|
||||
return "", s
|
||||
}
|
||||
return string(runes[:len(runes)-holdRunes]), string(runes[len(runes)-holdRunes:])
|
||||
}
|
||||
|
||||
func findToolSegmentStart(s string) int {
|
||||
if s == "" {
|
||||
return -1
|
||||
}
|
||||
lower := strings.ToLower(s)
|
||||
keyIdx := strings.Index(lower, "tool_calls")
|
||||
if keyIdx < 0 {
|
||||
return -1
|
||||
}
|
||||
if start := strings.LastIndex(s[:keyIdx], "{"); start >= 0 {
|
||||
return start
|
||||
}
|
||||
return keyIdx
|
||||
}
|
||||
|
||||
func consumeToolCapture(captured string, toolNames []string) (prefix string, calls []util.ParsedToolCall, suffix string, ready bool) {
|
||||
if captured == "" {
|
||||
return "", nil, "", false
|
||||
}
|
||||
lower := strings.ToLower(captured)
|
||||
keyIdx := strings.Index(lower, "tool_calls")
|
||||
if keyIdx < 0 {
|
||||
if len([]rune(captured)) >= 256 {
|
||||
return captured, nil, "", true
|
||||
}
|
||||
return "", nil, "", false
|
||||
}
|
||||
start := strings.LastIndex(captured[:keyIdx], "{")
|
||||
if start < 0 {
|
||||
if len([]rune(captured)) >= 512 {
|
||||
return captured, nil, "", true
|
||||
}
|
||||
return "", nil, "", false
|
||||
}
|
||||
obj, end, ok := extractJSONObjectFrom(captured, start)
|
||||
if !ok {
|
||||
if len([]rune(captured)) >= 4096 {
|
||||
return captured, nil, "", true
|
||||
}
|
||||
return "", nil, "", false
|
||||
}
|
||||
parsed := util.ParseToolCalls(obj, toolNames)
|
||||
if len(parsed) == 0 {
|
||||
return captured[:end], nil, captured[end:], true
|
||||
}
|
||||
return captured[:start], parsed, captured[end:], true
|
||||
}
|
||||
|
||||
func extractJSONObjectFrom(text string, start int) (string, int, bool) {
|
||||
if start < 0 || start >= len(text) || text[start] != '{' {
|
||||
return "", 0, false
|
||||
}
|
||||
depth := 0
|
||||
quote := byte(0)
|
||||
escaped := false
|
||||
for i := start; i < len(text); i++ {
|
||||
ch := text[i]
|
||||
if quote != 0 {
|
||||
if escaped {
|
||||
escaped = false
|
||||
continue
|
||||
}
|
||||
if ch == '\\' {
|
||||
escaped = true
|
||||
continue
|
||||
}
|
||||
if ch == quote {
|
||||
quote = 0
|
||||
}
|
||||
continue
|
||||
}
|
||||
if ch == '"' || ch == '\'' {
|
||||
quote = ch
|
||||
continue
|
||||
}
|
||||
if ch == '{' {
|
||||
depth++
|
||||
continue
|
||||
}
|
||||
if ch == '}' {
|
||||
depth--
|
||||
if depth == 0 {
|
||||
end := i + 1
|
||||
return text[start:end], end, true
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", 0, false
|
||||
}
|
||||
|
||||
func (h *Handler) holdStreamLease(a *auth.RequestAuth) string {
|
||||
if a == nil {
|
||||
return ""
|
||||
|
||||
@@ -335,3 +335,84 @@ func TestHandleStreamUnknownToolStillIntercepted(t *testing.T) {
|
||||
t.Fatalf("raw tool_calls JSON leaked in content delta: %s", rec.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleStreamToolsPlainTextStreamsBeforeFinish(t *testing.T) {
|
||||
h := &Handler{}
|
||||
resp := makeSSEHTTPResponse(
|
||||
`data: {"p":"response/content","v":"你好,"}`,
|
||||
`data: {"p":"response/content","v":"这是普通文本回复。"}`,
|
||||
`data: [DONE]`,
|
||||
)
|
||||
rec := httptest.NewRecorder()
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
|
||||
|
||||
h.handleStream(rec, req, resp, "cid6", "deepseek-chat", "prompt", false, false, []string{"search"})
|
||||
|
||||
frames, done := parseSSEDataFrames(t, rec.Body.String())
|
||||
if !done {
|
||||
t.Fatalf("expected [DONE], body=%s", rec.Body.String())
|
||||
}
|
||||
if streamHasToolCallsDelta(frames) {
|
||||
t.Fatalf("did not expect tool_calls delta for plain text: %s", rec.Body.String())
|
||||
}
|
||||
content := strings.Builder{}
|
||||
for _, frame := range frames {
|
||||
choices, _ := frame["choices"].([]any)
|
||||
for _, item := range choices {
|
||||
choice, _ := item.(map[string]any)
|
||||
delta, _ := choice["delta"].(map[string]any)
|
||||
if c, ok := delta["content"].(string); ok {
|
||||
content.WriteString(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
if got := content.String(); got == "" {
|
||||
t.Fatalf("expected streamed content in tool mode plain text, body=%s", rec.Body.String())
|
||||
}
|
||||
if streamFinishReason(frames) != "stop" {
|
||||
t.Fatalf("expected finish_reason=stop, body=%s", rec.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleStreamToolCallMixedWithPlainTextSegments(t *testing.T) {
|
||||
h := &Handler{}
|
||||
resp := makeSSEHTTPResponse(
|
||||
`data: {"p":"response/content","v":"前置正文A。"}`,
|
||||
`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}"}`,
|
||||
`data: {"p":"response/content","v":"后置正文B。"}`,
|
||||
`data: [DONE]`,
|
||||
)
|
||||
rec := httptest.NewRecorder()
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
|
||||
|
||||
h.handleStream(rec, req, resp, "cid7", "deepseek-chat", "prompt", false, false, []string{"search"})
|
||||
|
||||
frames, done := parseSSEDataFrames(t, rec.Body.String())
|
||||
if !done {
|
||||
t.Fatalf("expected [DONE], body=%s", rec.Body.String())
|
||||
}
|
||||
if !streamHasToolCallsDelta(frames) {
|
||||
t.Fatalf("expected tool_calls delta in mixed stream, body=%s", rec.Body.String())
|
||||
}
|
||||
if streamHasRawToolJSONContent(frames) {
|
||||
t.Fatalf("raw tool_calls JSON leaked in mixed stream: %s", rec.Body.String())
|
||||
}
|
||||
content := strings.Builder{}
|
||||
for _, frame := range frames {
|
||||
choices, _ := frame["choices"].([]any)
|
||||
for _, item := range choices {
|
||||
choice, _ := item.(map[string]any)
|
||||
delta, _ := choice["delta"].(map[string]any)
|
||||
if c, ok := delta["content"].(string); ok {
|
||||
content.WriteString(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
got := content.String()
|
||||
if !strings.Contains(got, "前置正文A。") || !strings.Contains(got, "后置正文B。") {
|
||||
t.Fatalf("expected pre/post plain text to pass sieve, got=%q", got)
|
||||
}
|
||||
if streamFinishReason(frames) != "tool_calls" {
|
||||
t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,11 @@
|
||||
"outputDirectory": "static",
|
||||
"functions": {
|
||||
"api/chat-stream.js": {
|
||||
"includeFiles": "**/sha3_wasm_bg.7b9ca65ddd.wasm"
|
||||
"includeFiles": "**/sha3_wasm_bg.7b9ca65ddd.wasm",
|
||||
"maxDuration": 300
|
||||
},
|
||||
"api/index.go": {
|
||||
"maxDuration": 300
|
||||
}
|
||||
},
|
||||
"rewrites": [
|
||||
|
||||
@@ -115,7 +115,8 @@ export default function ApiTester({ config, onMessage, authFetch }) {
|
||||
headers['X-Ds2-Target-Account'] = selectedAccount
|
||||
}
|
||||
|
||||
const res = await fetch('/v1/chat/completions', {
|
||||
const endpoint = streamingMode ? '/v1/chat/completions' : '/v1/chat/completions?__go=1'
|
||||
const res = await fetch(endpoint, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify({
|
||||
|
||||
Reference in New Issue
Block a user