diff --git a/DEPLOY.en.md b/DEPLOY.en.md index 523c098..2d88c27 100644 --- a/DEPLOY.en.md +++ b/DEPLOY.en.md @@ -71,6 +71,7 @@ Notes: - `api/chat-stream.js` automatically falls back to the Go entry for non-stream requests or requests with `tools` (internal `__go=1`) - `api/chat-stream.js` is data-path only (stream relay + SSE conversion); auth/account/session/PoW preparation still comes from an internal Go prepare endpoint (enabled on Vercel only) - Go prepare creates a stream lease and Node releases it when streaming ends, keeping account occupancy semantics aligned with native Go streaming +- `vercel.json` sets `maxDuration: 300` for both `api/chat-stream.js` and `api/index.go` (subject to your Vercel plan limits) Minimum environment variables: diff --git a/DEPLOY.md b/DEPLOY.md index 48e44b0..cb2c7b5 100644 --- a/DEPLOY.md +++ b/DEPLOY.md @@ -71,6 +71,7 @@ docker-compose up -d --build - `api/chat-stream.js` 对非流式请求或 `tools` 请求会自动回退到 Go 入口(内部 `__go=1`) - `api/chat-stream.js` 仅负责流式数据转发与 SSE 转换;鉴权、账号选择、会话创建、PoW 计算仍由 Go 内部 prepare 接口完成(仅 Vercel 启用) - Go prepare 会创建流式 lease,Node 在流结束后回调 release;账号占用语义与 Go 原生流式保持一致 +- `vercel.json` 已将 `api/chat-stream.js` 与 `api/index.go` 的 `maxDuration` 设为 `300`(受套餐上限约束) 至少配置环境变量: diff --git a/README.MD b/README.MD index cc6e0b7..d21854a 100644 --- a/README.MD +++ b/README.MD @@ -90,6 +90,7 @@ docker-compose logs -f - `/v1/chat/completions` 在 Vercel 上默认走 `api/chat-stream.js`(Node Runtime)以保证实时 SSE - `api/chat-stream.js` 仅负责流式数据转发;鉴权、账号选择、会话/PoW 准备仍由 Go 内部 prepare 接口处理 - Go prepare 会下发 `lease_id`,Node 在流结束后调用 release,确保账号占用时长与 Go 原生流式一致 +- WebUI 的“非流式测试”会直接请求 `?__go=1`,避免 Vercel 上 Node 中转导致长请求更易超时 - 至少配置: - `DS2API_ADMIN_KEY` - `DS2API_CONFIG_JSON`(JSON 字符串或 Base64) diff --git a/README.en.md b/README.en.md index b84cbfc..26e82fb 100644 --- a/README.en.md +++ b/README.en.md @@ -90,6 +90,7 @@ docker-compose logs -f - `/v1/chat/completions` is routed to `api/chat-stream.js` (Node Runtime) on Vercel to preserve real-time SSE - `api/chat-stream.js` is data-path only; auth/account/session/PoW preparation still comes from an internal Go prepare endpoint - Go prepare returns a `lease_id`; Node releases it at stream end so account occupancy duration stays aligned with native Go streaming behavior +- WebUI non-stream test calls `?__go=1` directly to avoid extra Node hop timeout risk on long Vercel requests - Minimum env vars: - `DS2API_ADMIN_KEY` - `DS2API_CONFIG_JSON` (raw JSON or Base64) diff --git a/api/chat-stream.js b/api/chat-stream.js index 8ea21fe..8879a47 100644 --- a/api/chat-stream.js +++ b/api/chat-stream.js @@ -1,5 +1,7 @@ 'use strict'; +const crypto = require('crypto'); + const DEEPSEEK_COMPLETION_URL = 'https://chat.deepseek.com/api/v0/chat/completion'; const BASE_HEADERS = { @@ -37,6 +39,14 @@ module.exports = async function handler(req, res) { } const rawBody = await readRawBody(req); + + // Hard guard: only use Node data path for streaming on Vercel runtime. + // Any non-Vercel runtime always falls back to Go for full behavior parity. + if (!isVercelRuntime()) { + await proxyToGo(req, res, rawBody); + return; + } + let payload; try { payload = JSON.parse(rawBody.toString('utf8') || '{}'); @@ -66,6 +76,7 @@ module.exports = async function handler(req, res) { const finalPrompt = asString(prep.body.final_prompt); const thinkingEnabled = toBool(prep.body.thinking_enabled); const searchEnabled = toBool(prep.body.search_enabled); + const toolNames = extractToolNames(payload.tools); if (!model || !leaseID || !deepseekToken || !powHeader || !completionPayload) { writeOpenAIError(res, 500, 'invalid vercel prepare response'); @@ -103,6 +114,9 @@ module.exports = async function handler(req, res) { let currentType = thinkingEnabled ? 'thinking' : 'text'; let thinkingText = ''; let outputText = ''; + const toolSieveEnabled = toolNames.length > 0; + const toolSieveState = createToolSieveState(); + let toolCallsEmitted = false; const decoder = new TextDecoder(); const reader = completionRes.body.getReader(); let buffered = ''; @@ -115,11 +129,42 @@ module.exports = async function handler(req, res) { } }; + const sendDeltaFrame = (delta) => { + const payloadDelta = { ...delta }; + if (!firstChunkSent) { + payloadDelta.role = 'assistant'; + firstChunkSent = true; + } + sendFrame({ + id: sessionID, + object: 'chat.completion.chunk', + created, + model, + choices: [{ delta: payloadDelta, index: 0 }], + }); + }; + const finish = async (reason) => { if (ended) { return; } ended = true; + if (toolSieveEnabled) { + const tailEvents = flushToolSieve(toolSieveState, toolNames); + for (const evt of tailEvents) { + if (evt.type === 'tool_calls') { + toolCallsEmitted = true; + sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(evt.calls) }); + continue; + } + if (evt.text) { + sendDeltaFrame({ content: evt.text }); + } + } + } + if (toolCallsEmitted) { + reason = 'tool_calls'; + } sendFrame({ id: sessionID, object: 'chat.completion.chunk', @@ -181,25 +226,27 @@ module.exports = async function handler(req, res) { if (searchEnabled && isCitation(p.text)) { continue; } - const delta = {}; - if (!firstChunkSent) { - delta.role = 'assistant'; - firstChunkSent = true; - } if (p.type === 'thinking') { thinkingText += p.text; - delta.reasoning_content = p.text; + sendDeltaFrame({ reasoning_content: p.text }); } else { outputText += p.text; - delta.content = p.text; + if (!toolSieveEnabled) { + sendDeltaFrame({ content: p.text }); + continue; + } + const events = processToolSieveChunk(toolSieveState, p.text, toolNames); + for (const evt of events) { + if (evt.type === 'tool_calls') { + toolCallsEmitted = true; + sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(evt.calls) }); + continue; + } + if (evt.text) { + sendDeltaFrame({ content: evt.text }); + } + } } - sendFrame({ - id: sessionID, - object: 'chat.completion.chunk', - created, - model, - choices: [{ delta, index: 0 }], - }); } } } @@ -612,6 +659,10 @@ function toBool(v) { return v === true; } +function isVercelRuntime() { + return asString(process.env.VERCEL) !== '' || asString(process.env.NOW_REGION) !== ''; +} + function asString(v) { if (typeof v === 'string') { return v.trim(); @@ -624,3 +675,412 @@ function asString(v) { } return String(v).trim(); } + +function extractToolNames(tools) { + if (!Array.isArray(tools) || tools.length === 0) { + return []; + } + const out = []; + for (const t of tools) { + if (!t || typeof t !== 'object') { + continue; + } + const fn = t.function && typeof t.function === 'object' ? t.function : t; + const name = asString(fn.name); + if (name) { + out.push(name); + } + } + return out; +} + +function createToolSieveState() { + return { + pending: '', + capture: '', + capturing: false, + }; +} + +function processToolSieveChunk(state, chunk, toolNames) { + if (!state) { + return []; + } + if (chunk) { + state.pending += chunk; + } + const events = []; + // eslint-disable-next-line no-constant-condition + while (true) { + if (state.capturing) { + if (state.pending) { + state.capture += state.pending; + state.pending = ''; + } + const consumed = consumeToolCapture(state.capture, toolNames); + if (!consumed.ready) { + break; + } + state.capture = ''; + state.capturing = false; + if (consumed.prefix) { + events.push({ type: 'text', text: consumed.prefix }); + } + if (Array.isArray(consumed.calls) && consumed.calls.length > 0) { + events.push({ type: 'tool_calls', calls: consumed.calls }); + } + if (consumed.suffix) { + state.pending += consumed.suffix; + } + continue; + } + + if (!state.pending) { + break; + } + + const start = findToolSegmentStart(state.pending); + if (start >= 0) { + const prefix = state.pending.slice(0, start); + if (prefix) { + events.push({ type: 'text', text: prefix }); + } + state.capture = state.pending.slice(start); + state.pending = ''; + state.capturing = true; + continue; + } + + const [safe, hold] = splitSafeContent(state.pending, 64); + if (!safe) { + break; + } + state.pending = hold; + events.push({ type: 'text', text: safe }); + } + return events; +} + +function flushToolSieve(state, toolNames) { + if (!state) { + return []; + } + const events = processToolSieveChunk(state, '', toolNames); + if (state.capturing) { + const consumed = consumeToolCapture(state.capture, toolNames); + if (consumed.ready) { + if (consumed.prefix) { + events.push({ type: 'text', text: consumed.prefix }); + } + if (Array.isArray(consumed.calls) && consumed.calls.length > 0) { + events.push({ type: 'tool_calls', calls: consumed.calls }); + } + if (consumed.suffix) { + events.push({ type: 'text', text: consumed.suffix }); + } + } else if (state.capture) { + events.push({ type: 'text', text: state.capture }); + } + state.capture = ''; + state.capturing = false; + } + if (state.pending) { + events.push({ type: 'text', text: state.pending }); + state.pending = ''; + } + return events; +} + +function splitSafeContent(s, holdChars) { + const chars = Array.from(s || ''); + if (chars.length <= holdChars) { + return ['', s]; + } + return [chars.slice(0, chars.length - holdChars).join(''), chars.slice(chars.length - holdChars).join('')]; +} + +function findToolSegmentStart(s) { + if (!s) { + return -1; + } + const lower = s.toLowerCase(); + const keyIdx = lower.indexOf('tool_calls'); + if (keyIdx < 0) { + return -1; + } + const start = s.slice(0, keyIdx).lastIndexOf('{'); + return start >= 0 ? start : keyIdx; +} + +function consumeToolCapture(captured, toolNames) { + if (!captured) { + return { ready: false, prefix: '', calls: [], suffix: '' }; + } + const lower = captured.toLowerCase(); + const keyIdx = lower.indexOf('tool_calls'); + if (keyIdx < 0) { + if (Array.from(captured).length >= 256) { + return { ready: true, prefix: captured, calls: [], suffix: '' }; + } + return { ready: false, prefix: '', calls: [], suffix: '' }; + } + const start = captured.slice(0, keyIdx).lastIndexOf('{'); + if (start < 0) { + if (Array.from(captured).length >= 512) { + return { ready: true, prefix: captured, calls: [], suffix: '' }; + } + return { ready: false, prefix: '', calls: [], suffix: '' }; + } + const obj = extractJSONObjectFrom(captured, start); + if (!obj.ok) { + if (Array.from(captured).length >= 4096) { + return { ready: true, prefix: captured, calls: [], suffix: '' }; + } + return { ready: false, prefix: '', calls: [], suffix: '' }; + } + const parsed = parseToolCalls(captured.slice(start, obj.end), toolNames); + if (parsed.length === 0) { + return { + ready: true, + prefix: captured.slice(0, obj.end), + calls: [], + suffix: captured.slice(obj.end), + }; + } + return { + ready: true, + prefix: captured.slice(0, start), + calls: parsed, + suffix: captured.slice(obj.end), + }; +} + +function extractJSONObjectFrom(text, start) { + if (!text || start < 0 || start >= text.length || text[start] !== '{') { + return { ok: false, end: 0 }; + } + let depth = 0; + let quote = ''; + let escaped = false; + for (let i = start; i < text.length; i += 1) { + const ch = text[i]; + if (quote) { + if (escaped) { + escaped = false; + continue; + } + if (ch === '\\') { + escaped = true; + continue; + } + if (ch === quote) { + quote = ''; + } + continue; + } + if (ch === '"' || ch === "'") { + quote = ch; + continue; + } + if (ch === '{') { + depth += 1; + continue; + } + if (ch === '}') { + depth -= 1; + if (depth === 0) { + return { ok: true, end: i + 1 }; + } + } + } + return { ok: false, end: 0 }; +} + +function parseToolCalls(text, toolNames) { + if (!asString(text)) { + return []; + } + const candidates = buildToolCallCandidates(text); + let parsed = []; + for (const c of candidates) { + parsed = parseToolCallsPayload(c); + if (parsed.length > 0) { + break; + } + } + if (parsed.length === 0) { + return []; + } + const allowed = new Set((toolNames || []).filter(Boolean)); + const out = []; + for (const tc of parsed) { + if (!tc || !tc.name) { + continue; + } + if (allowed.size > 0 && !allowed.has(tc.name)) { + continue; + } + out.push({ name: tc.name, input: tc.input || {} }); + } + if (out.length === 0 && parsed.length > 0) { + for (const tc of parsed) { + if (!tc || !tc.name) { + continue; + } + out.push({ name: tc.name, input: tc.input || {} }); + } + } + return out; +} + +function buildToolCallCandidates(text) { + const trimmed = asString(text); + const candidates = [trimmed]; + const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)\s*```/gi) || []; + for (const block of fenced) { + const m = block.match(/```(?:json)?\s*([\s\S]*?)\s*```/i); + if (m && m[1]) { + candidates.push(asString(m[1])); + } + } + const keyIdx = trimmed.toLowerCase().indexOf('tool_calls'); + if (keyIdx >= 0) { + const start = trimmed.slice(0, keyIdx).lastIndexOf('{'); + if (start >= 0) { + const obj = extractJSONObjectFrom(trimmed, start); + if (obj.ok) { + candidates.push(asString(trimmed.slice(start, obj.end))); + } + } + } + const first = trimmed.indexOf('{'); + const last = trimmed.lastIndexOf('}'); + if (first >= 0 && last > first) { + candidates.push(asString(trimmed.slice(first, last + 1))); + } + return [...new Set(candidates.filter(Boolean))]; +} + +function parseToolCallsPayload(payload) { + let decoded; + try { + decoded = JSON.parse(payload); + } catch (_err) { + return []; + } + if (Array.isArray(decoded)) { + return parseToolCallList(decoded); + } + if (!decoded || typeof decoded !== 'object') { + return []; + } + if (decoded.tool_calls) { + return parseToolCallList(decoded.tool_calls); + } + const one = parseToolCallItem(decoded); + return one ? [one] : []; +} + +function parseToolCallList(v) { + if (!Array.isArray(v)) { + return []; + } + const out = []; + for (const item of v) { + if (!item || typeof item !== 'object') { + continue; + } + const one = parseToolCallItem(item); + if (one) { + out.push(one); + } + } + return out; +} + +function parseToolCallItem(m) { + let name = asString(m.name); + let inputRaw = m.input; + let hasInput = Object.prototype.hasOwnProperty.call(m, 'input'); + const fn = m.function && typeof m.function === 'object' ? m.function : null; + if (fn) { + if (!name) { + name = asString(fn.name); + } + if (!hasInput && Object.prototype.hasOwnProperty.call(fn, 'arguments')) { + inputRaw = fn.arguments; + hasInput = true; + } + } + if (!hasInput) { + for (const k of ['arguments', 'args', 'parameters', 'params']) { + if (Object.prototype.hasOwnProperty.call(m, k)) { + inputRaw = m[k]; + hasInput = true; + break; + } + } + } + if (!name) { + return null; + } + return { + name, + input: parseToolCallInput(inputRaw), + }; +} + +function parseToolCallInput(v) { + if (v == null) { + return {}; + } + if (typeof v === 'string') { + const raw = asString(v); + if (!raw) { + return {}; + } + try { + const parsed = JSON.parse(raw); + if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { + return parsed; + } + } catch (_err) { + return { _raw: raw }; + } + return {}; + } + if (typeof v === 'object' && !Array.isArray(v)) { + return v; + } + try { + const parsed = JSON.parse(JSON.stringify(v)); + if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { + return parsed; + } + } catch (_err) { + return {}; + } + return {}; +} + +function formatOpenAIStreamToolCalls(calls) { + if (!Array.isArray(calls) || calls.length === 0) { + return []; + } + return calls.map((c, idx) => ({ + index: idx, + id: `call_${newCallID()}`, + type: 'function', + function: { + name: c.name, + arguments: JSON.stringify(c.input || {}), + }, + })); +} + +function newCallID() { + if (typeof crypto.randomUUID === 'function') { + return crypto.randomUUID().replace(/-/g, ''); + } + return `${Date.now()}${Math.floor(Math.random() * 1e9)}`; +} diff --git a/internal/adapter/openai/handler.go b/internal/adapter/openai/handler.go index 99793b7..78e9164 100644 --- a/internal/adapter/openai/handler.go +++ b/internal/adapter/openai/handler.go @@ -39,6 +39,17 @@ type streamLease struct { ExpiresAt time.Time } +type toolStreamSieveState struct { + pending strings.Builder + capture strings.Builder + capturing bool +} + +type toolStreamEvent struct { + Content string + ToolCalls []util.ParsedToolCall +} + func RegisterRoutes(r chi.Router, h *Handler) { r.Get("/v1/models", h.ListModels) r.Post("/v1/chat/completions", h.ChatCompletions) @@ -376,6 +387,8 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt created := time.Now().Unix() firstChunkSent := false bufferToolContent := len(toolNames) > 0 + var toolSieve toolStreamSieveState + toolCallsEmitted := false currentType := "text" if thinkingEnabled { currentType = "thinking" @@ -408,7 +421,7 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt finalThinking := thinking.String() finalText := text.String() detected := util.ParseToolCalls(finalText, toolNames) - if len(detected) > 0 { + if len(detected) > 0 && !toolCallsEmitted { finishReason = "tool_calls" delta := map[string]any{ "tool_calls": util.FormatOpenAIStreamToolCalls(detected), @@ -424,21 +437,29 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt "model": model, "choices": []map[string]any{{"delta": delta, "index": 0}}, }) - } else if bufferToolContent && strings.TrimSpace(finalText) != "" { - delta := map[string]any{ - "content": finalText, + } else if bufferToolContent { + for _, evt := range flushToolSieve(&toolSieve, toolNames) { + if evt.Content == "" { + continue + } + delta := map[string]any{ + "content": evt.Content, + } + if !firstChunkSent { + delta["role"] = "assistant" + firstChunkSent = true + } + sendChunk(map[string]any{ + "id": completionID, + "object": "chat.completion.chunk", + "created": created, + "model": model, + "choices": []map[string]any{{"delta": delta, "index": 0}}, + }) } - if !firstChunkSent { - delta["role"] = "assistant" - firstChunkSent = true - } - sendChunk(map[string]any{ - "id": completionID, - "object": "chat.completion.chunk", - "created": created, - "model": model, - "choices": []map[string]any{{"delta": delta, "index": 0}}, - }) + } + if len(detected) > 0 || toolCallsEmitted { + finishReason = "tool_calls" } promptTokens := util.EstimateTokens(finalPrompt) reasoningTokens := util.EstimateTokens(finalThinking) @@ -532,6 +553,41 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt text.WriteString(p.Text) if !bufferToolContent { delta["content"] = p.Text + } else { + events := processToolSieveChunk(&toolSieve, p.Text, toolNames) + if len(events) == 0 { + // Keep thinking delta only frame. + } + for _, evt := range events { + if len(evt.ToolCalls) > 0 { + toolCallsEmitted = true + tcDelta := map[string]any{ + "tool_calls": util.FormatOpenAIStreamToolCalls(evt.ToolCalls), + } + if !firstChunkSent { + tcDelta["role"] = "assistant" + firstChunkSent = true + } + newChoices = append(newChoices, map[string]any{ + "delta": tcDelta, + "index": 0, + }) + continue + } + if evt.Content != "" { + contentDelta := map[string]any{ + "content": evt.Content, + } + if !firstChunkSent { + contentDelta["role"] = "assistant" + firstChunkSent = true + } + newChoices = append(newChoices, map[string]any{ + "delta": contentDelta, + "index": 0, + }) + } + } } } if len(delta) > 0 { @@ -669,6 +725,224 @@ func vercelInternalSecret() string { return "admin" } +func shouldEmitBufferedToolProbeContent(buffered string) bool { + trimmed := strings.TrimSpace(buffered) + if trimmed == "" { + return false + } + normalized := normalizeToolProbePrefix(trimmed) + if normalized == "" { + return false + } + first := normalized[0] + switch first { + case '{', '[', '`': + lower := strings.ToLower(normalized) + if strings.Contains(lower, "tool_calls") { + return false + } + // Keep a short hold window for JSON-ish starts to avoid leaking tool JSON. + if len([]rune(normalized)) < 20 { + return false + } + return true + default: + // Natural language starts can be streamed immediately. + return true + } +} + +func normalizeToolProbePrefix(s string) string { + t := strings.TrimSpace(s) + if strings.HasPrefix(t, "```") { + t = strings.TrimPrefix(t, "```") + t = strings.TrimSpace(t) + t = strings.TrimPrefix(strings.ToLower(t), "json") + t = strings.TrimSpace(t) + } + return t +} + +func processToolSieveChunk(state *toolStreamSieveState, chunk string, toolNames []string) []toolStreamEvent { + if state == nil || chunk == "" { + return nil + } + state.pending.WriteString(chunk) + events := make([]toolStreamEvent, 0, 2) + + for { + if state.capturing { + if state.pending.Len() > 0 { + state.capture.WriteString(state.pending.String()) + state.pending.Reset() + } + prefix, calls, suffix, ready := consumeToolCapture(state.capture.String(), toolNames) + if !ready { + break + } + state.capture.Reset() + state.capturing = false + if prefix != "" { + events = append(events, toolStreamEvent{Content: prefix}) + } + if len(calls) > 0 { + events = append(events, toolStreamEvent{ToolCalls: calls}) + } + if suffix != "" { + state.pending.WriteString(suffix) + } + continue + } + + pending := state.pending.String() + if pending == "" { + break + } + start := findToolSegmentStart(pending) + if start >= 0 { + prefix := pending[:start] + if prefix != "" { + events = append(events, toolStreamEvent{Content: prefix}) + } + state.pending.Reset() + state.capture.WriteString(pending[start:]) + state.capturing = true + continue + } + + safe, hold := splitSafeContent(pending, 64) + if safe == "" { + break + } + state.pending.Reset() + state.pending.WriteString(hold) + events = append(events, toolStreamEvent{Content: safe}) + } + + return events +} + +func flushToolSieve(state *toolStreamSieveState, toolNames []string) []toolStreamEvent { + if state == nil { + return nil + } + events := processToolSieveChunk(state, "", toolNames) + if state.capturing { + raw := state.capture.String() + state.capture.Reset() + state.capturing = false + if raw != "" { + events = append(events, toolStreamEvent{Content: raw}) + } + } + if state.pending.Len() > 0 { + events = append(events, toolStreamEvent{Content: state.pending.String()}) + state.pending.Reset() + } + return events +} + +func splitSafeContent(s string, holdRunes int) (safe, hold string) { + if s == "" || holdRunes <= 0 { + return s, "" + } + runes := []rune(s) + if len(runes) <= holdRunes { + return "", s + } + return string(runes[:len(runes)-holdRunes]), string(runes[len(runes)-holdRunes:]) +} + +func findToolSegmentStart(s string) int { + if s == "" { + return -1 + } + lower := strings.ToLower(s) + keyIdx := strings.Index(lower, "tool_calls") + if keyIdx < 0 { + return -1 + } + if start := strings.LastIndex(s[:keyIdx], "{"); start >= 0 { + return start + } + return keyIdx +} + +func consumeToolCapture(captured string, toolNames []string) (prefix string, calls []util.ParsedToolCall, suffix string, ready bool) { + if captured == "" { + return "", nil, "", false + } + lower := strings.ToLower(captured) + keyIdx := strings.Index(lower, "tool_calls") + if keyIdx < 0 { + if len([]rune(captured)) >= 256 { + return captured, nil, "", true + } + return "", nil, "", false + } + start := strings.LastIndex(captured[:keyIdx], "{") + if start < 0 { + if len([]rune(captured)) >= 512 { + return captured, nil, "", true + } + return "", nil, "", false + } + obj, end, ok := extractJSONObjectFrom(captured, start) + if !ok { + if len([]rune(captured)) >= 4096 { + return captured, nil, "", true + } + return "", nil, "", false + } + parsed := util.ParseToolCalls(obj, toolNames) + if len(parsed) == 0 { + return captured[:end], nil, captured[end:], true + } + return captured[:start], parsed, captured[end:], true +} + +func extractJSONObjectFrom(text string, start int) (string, int, bool) { + if start < 0 || start >= len(text) || text[start] != '{' { + return "", 0, false + } + depth := 0 + quote := byte(0) + escaped := false + for i := start; i < len(text); i++ { + ch := text[i] + if quote != 0 { + if escaped { + escaped = false + continue + } + if ch == '\\' { + escaped = true + continue + } + if ch == quote { + quote = 0 + } + continue + } + if ch == '"' || ch == '\'' { + quote = ch + continue + } + if ch == '{' { + depth++ + continue + } + if ch == '}' { + depth-- + if depth == 0 { + end := i + 1 + return text[start:end], end, true + } + } + } + return "", 0, false +} + func (h *Handler) holdStreamLease(a *auth.RequestAuth) string { if a == nil { return "" diff --git a/internal/adapter/openai/handler_toolcall_test.go b/internal/adapter/openai/handler_toolcall_test.go index 6bc8d30..e3cfc7d 100644 --- a/internal/adapter/openai/handler_toolcall_test.go +++ b/internal/adapter/openai/handler_toolcall_test.go @@ -335,3 +335,84 @@ func TestHandleStreamUnknownToolStillIntercepted(t *testing.T) { t.Fatalf("raw tool_calls JSON leaked in content delta: %s", rec.Body.String()) } } + +func TestHandleStreamToolsPlainTextStreamsBeforeFinish(t *testing.T) { + h := &Handler{} + resp := makeSSEHTTPResponse( + `data: {"p":"response/content","v":"你好,"}`, + `data: {"p":"response/content","v":"这是普通文本回复。"}`, + `data: [DONE]`, + ) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil) + + h.handleStream(rec, req, resp, "cid6", "deepseek-chat", "prompt", false, false, []string{"search"}) + + frames, done := parseSSEDataFrames(t, rec.Body.String()) + if !done { + t.Fatalf("expected [DONE], body=%s", rec.Body.String()) + } + if streamHasToolCallsDelta(frames) { + t.Fatalf("did not expect tool_calls delta for plain text: %s", rec.Body.String()) + } + content := strings.Builder{} + for _, frame := range frames { + choices, _ := frame["choices"].([]any) + for _, item := range choices { + choice, _ := item.(map[string]any) + delta, _ := choice["delta"].(map[string]any) + if c, ok := delta["content"].(string); ok { + content.WriteString(c) + } + } + } + if got := content.String(); got == "" { + t.Fatalf("expected streamed content in tool mode plain text, body=%s", rec.Body.String()) + } + if streamFinishReason(frames) != "stop" { + t.Fatalf("expected finish_reason=stop, body=%s", rec.Body.String()) + } +} + +func TestHandleStreamToolCallMixedWithPlainTextSegments(t *testing.T) { + h := &Handler{} + resp := makeSSEHTTPResponse( + `data: {"p":"response/content","v":"前置正文A。"}`, + `data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}"}`, + `data: {"p":"response/content","v":"后置正文B。"}`, + `data: [DONE]`, + ) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil) + + h.handleStream(rec, req, resp, "cid7", "deepseek-chat", "prompt", false, false, []string{"search"}) + + frames, done := parseSSEDataFrames(t, rec.Body.String()) + if !done { + t.Fatalf("expected [DONE], body=%s", rec.Body.String()) + } + if !streamHasToolCallsDelta(frames) { + t.Fatalf("expected tool_calls delta in mixed stream, body=%s", rec.Body.String()) + } + if streamHasRawToolJSONContent(frames) { + t.Fatalf("raw tool_calls JSON leaked in mixed stream: %s", rec.Body.String()) + } + content := strings.Builder{} + for _, frame := range frames { + choices, _ := frame["choices"].([]any) + for _, item := range choices { + choice, _ := item.(map[string]any) + delta, _ := choice["delta"].(map[string]any) + if c, ok := delta["content"].(string); ok { + content.WriteString(c) + } + } + } + got := content.String() + if !strings.Contains(got, "前置正文A。") || !strings.Contains(got, "后置正文B。") { + t.Fatalf("expected pre/post plain text to pass sieve, got=%q", got) + } + if streamFinishReason(frames) != "tool_calls" { + t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String()) + } +} diff --git a/vercel.json b/vercel.json index 0645290..2e68a94 100644 --- a/vercel.json +++ b/vercel.json @@ -4,7 +4,11 @@ "outputDirectory": "static", "functions": { "api/chat-stream.js": { - "includeFiles": "**/sha3_wasm_bg.7b9ca65ddd.wasm" + "includeFiles": "**/sha3_wasm_bg.7b9ca65ddd.wasm", + "maxDuration": 300 + }, + "api/index.go": { + "maxDuration": 300 } }, "rewrites": [ diff --git a/webui/src/components/ApiTester.jsx b/webui/src/components/ApiTester.jsx index c2c228a..7d49982 100644 --- a/webui/src/components/ApiTester.jsx +++ b/webui/src/components/ApiTester.jsx @@ -115,7 +115,8 @@ export default function ApiTester({ config, onMessage, authFetch }) { headers['X-Ds2-Target-Account'] = selectedAccount } - const res = await fetch('/v1/chat/completions', { + const endpoint = streamingMode ? '/v1/chat/completions' : '/v1/chat/completions?__go=1' + const res = await fetch(endpoint, { method: 'POST', headers, body: JSON.stringify({