refactor: extract SSE parsing and Vercel stream logic into dedicated implementation modules

2026-05-13 20:57:41 +08:00 · 2026-04-05 16:32:13 +08:00
parent 1d80f644d4
commit 298a6f27cc
4 changed files with 847 additions and 837 deletions
--- a/internal/js/chat-stream/sse_parse_impl.js
+++ b/internal/js/chat-stream/sse_parse_impl.js
@@ -0,0 +1,535 @@
+'use strict';
+
+// Implementation moved here to keep the line-gate wrapper tiny.
+
+const {
+  SKIP_PATTERNS,
+  SKIP_EXACT_PATHS,
+} = require('../shared/deepseek-constants');
+
+function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenceMarkers = true) {
+  if (!chunk || typeof chunk !== 'object') {
+    return {
+      parsed: false,
+      parts: [],
+      finished: false,
+      contentFilter: false,
+      errorMessage: '',
+      outputTokens: 0,
+      newType: currentType,
+    };
+  }
+
+  if (Object.prototype.hasOwnProperty.call(chunk, 'error')) {
+    return {
+      parsed: true,
+      parts: [],
+      finished: true,
+      contentFilter: false,
+      errorMessage: formatErrorMessage(chunk.error),
+      outputTokens: 0,
+      newType: currentType,
+    };
+  }
+
+  const pathValue = asString(chunk.p);
+  const outputTokens = extractAccumulatedTokenUsage(chunk);
+
+  if (hasContentFilterStatus(chunk)) {
+    return {
+      parsed: true,
+      parts: [],
+      finished: true,
+      contentFilter: true,
+      errorMessage: '',
+      outputTokens,
+      newType: currentType,
+    };
+  }
+
+  if (shouldSkipPath(pathValue)) {
+    return {
+      parsed: true,
+      parts: [],
+      finished: false,
+      contentFilter: false,
+      errorMessage: '',
+      outputTokens,
+      newType: currentType,
+    };
+  }
+  if (pathValue === 'response/status' && asString(chunk.v) === 'FINISHED') {
+    return {
+      parsed: true,
+      parts: [],
+      finished: true,
+      contentFilter: false,
+      errorMessage: '',
+      outputTokens,
+      newType: currentType,
+    };
+  }
+
+  if (!Object.prototype.hasOwnProperty.call(chunk, 'v')) {
+    return {
+      parsed: true,
+      parts: [],
+      finished: false,
+      contentFilter: false,
+      errorMessage: '',
+      outputTokens,
+      newType: currentType,
+    };
+  }
+
+  let newType = currentType;
+  const parts = [];
+
+  if (pathValue === 'response/fragments' && asString(chunk.o).toUpperCase() === 'APPEND' && Array.isArray(chunk.v)) {
+    for (const frag of chunk.v) {
+      if (!frag || typeof frag !== 'object') {
+        continue;
+      }
+      const fragType = asString(frag.type).toUpperCase();
+      const content = asContentString(frag.content, stripReferenceMarkers);
+      if (!content) {
+        continue;
+      }
+      if (fragType === 'THINK' || fragType === 'THINKING') {
+        newType = 'thinking';
+        parts.push({ text: content, type: 'thinking' });
+      } else if (fragType === 'RESPONSE') {
+        newType = 'text';
+        parts.push({ text: content, type: 'text' });
+      } else {
+        parts.push({ text: content, type: 'text' });
+      }
+    }
+  }
+
+  if (pathValue === 'response' && Array.isArray(chunk.v)) {
+    for (const item of chunk.v) {
+      if (!item || typeof item !== 'object') {
+        continue;
+      }
+      if (item.p === 'fragments' && item.o === 'APPEND' && Array.isArray(item.v)) {
+        for (const frag of item.v) {
+          const fragType = asString(frag && frag.type).toUpperCase();
+          if (fragType === 'THINK' || fragType === 'THINKING') {
+            newType = 'thinking';
+          } else if (fragType === 'RESPONSE') {
+            newType = 'text';
+          }
+        }
+      }
+    }
+  }
+
+  let partType = 'text';
+  if (pathValue === 'response/thinking_content') {
+    partType = 'thinking';
+  } else if (pathValue === 'response/content') {
+    partType = 'text';
+  } else if (pathValue.includes('response/fragments') && pathValue.includes('/content')) {
+    partType = newType;
+  } else if (!pathValue && thinkingEnabled) {
+    partType = newType;
+  }
+
+  const val = chunk.v;
+  if (typeof val === 'string') {
+    if (val === 'FINISHED' && (!pathValue || pathValue === 'status')) {
+      return {
+        parsed: true,
+        parts: [],
+        finished: true,
+        contentFilter: false,
+        errorMessage: '',
+        outputTokens,
+        newType,
+      };
+    }
+    const content = asContentString(val, stripReferenceMarkers);
+    if (content) {
+      parts.push({ text: content, type: partType });
+    }
+    return {
+      parsed: true,
+      parts: filterLeakedContentFilterParts(parts),
+      finished: false,
+      contentFilter: false,
+      errorMessage: '',
+      outputTokens,
+      newType,
+    };
+  }
+
+  if (Array.isArray(val)) {
+    const extracted = extractContentRecursive(val, partType, stripReferenceMarkers);
+    if (extracted.finished) {
+      return {
+        parsed: true,
+        parts: [],
+        finished: true,
+        contentFilter: false,
+        errorMessage: '',
+        outputTokens,
+        newType,
+      };
+    }
+    parts.push(...extracted.parts);
+    return {
+      parsed: true,
+      parts: filterLeakedContentFilterParts(parts),
+      finished: false,
+      contentFilter: false,
+      errorMessage: '',
+      outputTokens,
+      newType,
+    };
+  }
+
+  if (val && typeof val === 'object') {
+    const resp = val.response && typeof val.response === 'object' ? val.response : val;
+    if (Array.isArray(resp.fragments)) {
+      for (const frag of resp.fragments) {
+        if (!frag || typeof frag !== 'object') {
+          continue;
+        }
+        const content = asContentString(frag.content, stripReferenceMarkers);
+        if (!content) {
+          continue;
+        }
+        const t = asString(frag.type).toUpperCase();
+        if (t === 'THINK' || t === 'THINKING') {
+          newType = 'thinking';
+          parts.push({ text: content, type: 'thinking' });
+        } else if (t === 'RESPONSE') {
+          newType = 'text';
+          parts.push({ text: content, type: 'text' });
+        } else {
+          parts.push({ text: content, type: partType });
+        }
+      }
+    }
+  }
+  return {
+    parsed: true,
+    parts: filterLeakedContentFilterParts(parts),
+    finished: false,
+    contentFilter: false,
+    errorMessage: '',
+    outputTokens,
+    newType,
+  };
+}
+
+function extractContentRecursive(items, defaultType, stripReferenceMarkers = true) {
+  const parts = [];
+  for (const it of items) {
+    if (!it || typeof it !== 'object') {
+      continue;
+    }
+    if (!Object.prototype.hasOwnProperty.call(it, 'v')) {
+      continue;
+    }
+    const itemPath = asString(it.p);
+    const itemV = it.v;
+    if (itemPath === 'status' && asString(itemV) === 'FINISHED') {
+      return { parts: [], finished: true };
+    }
+    if (shouldSkipPath(itemPath)) {
+      continue;
+    }
+    const content = asContentString(it.content, stripReferenceMarkers);
+    if (content) {
+      const typeName = asString(it.type).toUpperCase();
+      if (typeName === 'THINK' || typeName === 'THINKING') {
+        parts.push({ text: content, type: 'thinking' });
+      } else if (typeName === 'RESPONSE') {
+        parts.push({ text: content, type: 'text' });
+      } else {
+        parts.push({ text: content, type: defaultType });
+      }
+      continue;
+    }
+
+    let partType = defaultType;
+    if (itemPath.includes('thinking')) {
+      partType = 'thinking';
+    } else if (itemPath.includes('content') || itemPath === 'response' || itemPath === 'fragments') {
+      partType = 'text';
+    }
+
+    if (typeof itemV === 'string') {
+      if (itemV && itemV !== 'FINISHED') {
+        const content = asContentString(itemV, stripReferenceMarkers);
+        if (content) {
+          parts.push({ text: content, type: partType });
+        }
+      }
+      continue;
+    }
+
+    if (!Array.isArray(itemV)) {
+      continue;
+    }
+    for (const inner of itemV) {
+      if (typeof inner === 'string') {
+        if (inner) {
+          const content = asContentString(inner, stripReferenceMarkers);
+          if (content) {
+            parts.push({ text: content, type: partType });
+          }
+        }
+        continue;
+      }
+      if (!inner || typeof inner !== 'object') {
+        continue;
+      }
+      const ct = asContentString(inner.content, stripReferenceMarkers);
+      if (!ct) {
+        continue;
+      }
+      const typeName = asString(inner.type).toUpperCase();
+      if (typeName === 'THINK' || typeName === 'THINKING') {
+        parts.push({ text: ct, type: 'thinking' });
+      } else if (typeName === 'RESPONSE') {
+        parts.push({ text: ct, type: 'text' });
+      } else {
+        parts.push({ text: ct, type: partType });
+      }
+    }
+  }
+  return { parts, finished: false };
+}
+
+function filterLeakedContentFilterParts(parts) {
+  if (!Array.isArray(parts) || parts.length === 0) {
+    return parts;
+  }
+  const out = [];
+  for (const p of parts) {
+    if (!p || typeof p !== 'object') {
+      continue;
+    }
+    const { text, stripped } = stripLeakedContentFilterSuffix(p.text);
+    if (stripped && shouldDropCleanedLeakedChunk(text)) {
+      continue;
+    }
+    if (stripped) {
+      out.push({ ...p, text });
+      continue;
+    }
+    out.push(p);
+  }
+  return out;
+}
+
+function stripLeakedContentFilterSuffix(text) {
+  if (typeof text !== 'string' || text === '') {
+    return { text, stripped: false };
+  }
+  const upperText = text.toUpperCase();
+  const idx = upperText.indexOf('CONTENT_FILTER');
+  if (idx < 0) {
+    return { text, stripped: false };
+  }
+  return {
+    text: text.slice(0, idx).replace(/[ \t\r]+$/g, ''),
+    stripped: true,
+  };
+}
+
+function shouldDropCleanedLeakedChunk(cleaned) {
+  if (cleaned === '') {
+    return true;
+  }
+  if (typeof cleaned === 'string' && cleaned.includes('\n')) {
+    return false;
+  }
+  return asString(cleaned).trim() === '';
+}
+
+function hasContentFilterStatus(chunk) {
+  if (!chunk || typeof chunk !== 'object') {
+    return false;
+  }
+  const code = asString(chunk.code);
+  if (code && code.toLowerCase() === 'content_filter') {
+    return true;
+  }
+  return hasContentFilterStatusValue(chunk);
+}
+
+function hasContentFilterStatusValue(v) {
+  if (Array.isArray(v)) {
+    for (const item of v) {
+      if (hasContentFilterStatusValue(item)) {
+        return true;
+      }
+    }
+    return false;
+  }
+  if (!v || typeof v !== 'object') {
+    return false;
+  }
+  const pathValue = asString(v.p);
+  if (pathValue && pathValue.toLowerCase().includes('status')) {
+    if (asString(v.v).toLowerCase() === 'content_filter') {
+      return true;
+    }
+  }
+  if (asString(v.code).toLowerCase() === 'content_filter') {
+    return true;
+  }
+  for (const value of Object.values(v)) {
+    if (hasContentFilterStatusValue(value)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+function extractAccumulatedTokenUsage(chunk) {
+  return findAccumulatedTokenUsage(chunk);
+}
+
+function findAccumulatedTokenUsage(v) {
+  if (Array.isArray(v)) {
+    for (const item of v) {
+      const n = findAccumulatedTokenUsage(item);
+      if (n > 0) {
+        return n;
+      }
+    }
+    return 0;
+  }
+  if (!v || typeof v !== 'object') {
+    return 0;
+  }
+  const pathValue = asString(v.p);
+  if (pathValue && pathValue.toLowerCase().includes('accumulated_token_usage')) {
+    const n = toInt(v.v);
+    if (n > 0) {
+      return n;
+    }
+  }
+  const direct = toInt(v.accumulated_token_usage);
+  if (direct > 0) {
+    return direct;
+  }
+  for (const value of Object.values(v)) {
+    const n = findAccumulatedTokenUsage(value);
+    if (n > 0) {
+      return n;
+    }
+  }
+  return 0;
+}
+
+function toInt(v) {
+  if (typeof v !== 'number' || !Number.isFinite(v)) {
+    return 0;
+  }
+  return Math.trunc(v);
+}
+
+function formatErrorMessage(v) {
+  if (typeof v === 'string') {
+    return v;
+  }
+  if (v == null) {
+    return String(v);
+  }
+  try {
+    return JSON.stringify(v);
+  } catch (_err) {
+    return String(v);
+  }
+}
+
+function shouldSkipPath(pathValue) {
+  if (isFragmentStatusPath(pathValue)) {
+    return true;
+  }
+  if (SKIP_EXACT_PATHS.has(pathValue)) {
+    return true;
+  }
+  for (const p of SKIP_PATTERNS) {
+    if (pathValue.includes(p)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+function isFragmentStatusPath(pathValue) {
+  if (!pathValue || pathValue === 'response/status') {
+    return false;
+  }
+  return /^response\/fragments\/-?\d+\/status$/i.test(pathValue);
+}
+
+function isCitation(text) {
+  return asString(text).trim().startsWith('[citation:');
+}
+
+function asContentString(v, stripReferenceMarkers = true) {
+  if (typeof v === 'string') {
+    return stripReferenceMarkers ? stripReferenceMarkersText(v) : v;
+  }
+  if (Array.isArray(v)) {
+    let out = '';
+    for (const item of v) {
+      out += asContentString(item, stripReferenceMarkers);
+    }
+    return out;
+  }
+  if (v && typeof v === 'object') {
+    if (Object.prototype.hasOwnProperty.call(v, 'content')) {
+      return asContentString(v.content, stripReferenceMarkers);
+    }
+    if (Object.prototype.hasOwnProperty.call(v, 'v')) {
+      return asContentString(v.v, stripReferenceMarkers);
+    }
+    return '';
+  }
+  if (v == null) {
+    return '';
+  }
+  const text = String(v);
+  return stripReferenceMarkers ? stripReferenceMarkersText(text) : text;
+}
+
+function stripReferenceMarkersText(text) {
+  if (!text) {
+    return text;
+  }
+  return text.replace(/\[reference:\s*\d+\]/gi, '');
+}
+
+function asString(v) {
+  if (typeof v === 'string') {
+    return v.trim();
+  }
+  if (Array.isArray(v)) {
+    return asString(v[0]);
+  }
+  if (v == null) {
+    return '';
+  }
+  return String(v).trim();
+}
+
+module.exports = {
+  parseChunkForContent,
+  extractContentRecursive,
+  filterLeakedContentFilterParts,
+  hasContentFilterStatus,
+  extractAccumulatedTokenUsage,
+  shouldSkipPath,
+  isFragmentStatusPath,
+  isCitation,
+  stripReferenceMarkers: stripReferenceMarkersText,
+};