refactor: Consolidate SSE and stream parsing logic into sse_parser.py, remove stream_parser.py, and update WebUI panel text.

2026-05-05 00:45:29 +08:00 · 2026-02-01 15:37:01 +08:00
parent e990517970
commit 1cf52502bb
5 changed files with 227 additions and 203 deletions
--- a/core/sse_parser.py
+++ b/core/sse_parser.py
@@ -1,15 +1,55 @@
 # -*- coding: utf-8 -*-
 """DeepSeek SSE 流解析模块

-这个模块包含解析 DeepSeek SSE 响应的公共逻辑，供 openai.py 和 accounts.py 共用。
+这个模块包含解析 DeepSeek SSE 响应的公共逻辑，供 openai.py、claude.py 和 accounts.py 共用。
+合并了原 sse_parser.py 和 stream_parser.py 的功能。
 """
+import json
+import re
+from typing import List, Tuple, Optional, Dict, Any, Generator

-from typing import List, Tuple, Optional, Dict, Any
-
+from .config import logger
 from .constants import SKIP_PATTERNS

+# 预编译正则表达式
+_TOOL_CALL_PATTERN = re.compile(r'\{\s*["\']tool_calls["\']\s*:\s*\[(.*?)\]\s*\}', re.DOTALL)
+_CITATION_PATTERN = re.compile(r"^\[citation:")


+# ----------------------------------------------------------------------
+# 基础解析函数
+# ----------------------------------------------------------------------
+
+def parse_deepseek_sse_line(raw_line: bytes) -> Optional[Dict[str, Any]]:
+    """解析 DeepSeek SSE 行
+    
+    Args:
+        raw_line: 原始字节行
+        
+    Returns:
+        解析后的 chunk 字典，如果解析失败或应跳过则返回 None
+    """
+    try:
+        line = raw_line.decode("utf-8")
+    except Exception as e:
+        logger.warning(f"[parse_deepseek_sse_line] 解码失败: {e}")
+        return None
+    
+    if not line or not line.startswith("data:"):
+        return None
+    
+    data_str = line[5:].strip()
+    
+    if data_str == "[DONE]":
+        return {"type": "done"}
+    
+    try:
+        chunk = json.loads(data_str)
+        return chunk
+    except json.JSONDecodeError as e:
+        logger.warning(f"[parse_deepseek_sse_line] JSON解析失败: {e}")
+        return None
+

 def should_skip_chunk(chunk_path: str) -> bool:
    """判断是否应该跳过这个 chunk（状态相关，不是内容）"""
@@ -33,6 +73,10 @@ def is_search_result(item: dict) -> bool:
    return "url" in item and "title" in item


+# ----------------------------------------------------------------------
+# 内容提取函数
+# ----------------------------------------------------------------------
+
 def extract_content_from_item(item: dict, default_type: str = "text") -> Optional[Tuple[str, str]]:
    """从包含 content 和 type 的项中提取内容
    
@@ -57,7 +101,7 @@ def extract_content_recursive(items: List[Dict], default_type: str = "text") ->
    返回 [(content, content_type), ...] 列表，
    如果遇到 FINISHED 信号返回 None
    """
-    extracted = []
+    extracted: List[Tuple[str, str]] = []
    for item in items:
        if not isinstance(item, dict):
            continue
@@ -117,8 +161,15 @@ def extract_content_recursive(items: List[Dict], default_type: str = "text") ->
    return extracted


-def parse_sse_chunk_for_content(chunk: dict, thinking_enabled: bool = False, 
-                                 current_fragment_type: str = "thinking") -> Tuple[List[Tuple[str, str]], bool, str]:
+# ----------------------------------------------------------------------
+# 高级解析函数
+# ----------------------------------------------------------------------
+
+def parse_sse_chunk_for_content(
+    chunk: Dict[str, Any], 
+    thinking_enabled: bool = False, 
+    current_fragment_type: str = "thinking"
+) -> Tuple[List[Tuple[str, str]], bool, str]:
    """解析单个 SSE chunk 并提取内容
    
    Args:
@@ -138,7 +189,7 @@ def parse_sse_chunk_for_content(chunk: dict, thinking_enabled: bool = False,
    
    v_value = chunk["v"]
    chunk_path = chunk.get("p", "")
-    contents = []
+    contents: List[Tuple[str, str]] = []
    new_fragment_type = current_fragment_type
    
    # 跳过状态相关 chunk
@@ -206,3 +257,160 @@ def parse_sse_chunk_for_content(chunk: dict, thinking_enabled: bool = False,
    
    return (contents, False, new_fragment_type)

+
+def extract_content_from_chunk(chunk: Dict[str, Any]) -> Tuple[str, str, bool]:
+    """从 DeepSeek chunk 中提取内容（简化版本，兼容旧接口）
+    
+    Args:
+        chunk: 解析后的 chunk 字典
+        
+    Returns:
+        (content, content_type, is_finished) 元组
+        content_type 为 "thinking" 或 "text"
+        is_finished 为 True 表示响应结束
+    """
+    if chunk.get("type") == "done":
+        return "", "text", True
+    
+    # 检测内容审核/敏感词阻止
+    if "error" in chunk or chunk.get("code") == "content_filter":
+        logger.warning(f"[extract_content_from_chunk] 检测到内容过滤: {chunk}")
+        return "", "text", True
+    
+    if "v" not in chunk:
+        return "", "text", False
+    
+    v_value = chunk["v"]
+    ptype = "text"
+    
+    # 检查路径确定类型
+    path = chunk.get("p", "")
+    if path == "response/search_status":
+        return "", "text", False  # 跳过搜索状态
+    elif path == "response/thinking_content":
+        ptype = "thinking"
+    elif path == "response/content":
+        ptype = "text"
+    
+    if isinstance(v_value, str):
+        if v_value == "FINISHED":
+            return "", ptype, True
+        return v_value, ptype, False
+    elif isinstance(v_value, list):
+        for item in v_value:
+            if isinstance(item, dict):
+                if item.get("p") == "status" and item.get("v") == "FINISHED":
+                    return "", ptype, True
+        return "", ptype, False
+    
+    return "", ptype, False
+
+
+# ----------------------------------------------------------------------
+# 响应收集函数
+# ----------------------------------------------------------------------
+
+def collect_deepseek_response(response: Any) -> Tuple[str, str]:
+    """收集 DeepSeek 流响应的完整内容
+    
+    Args:
+        response: DeepSeek 流响应对象
+        
+    Returns:
+        (reasoning_content, text_content) 元组
+    """
+    thinking_parts: List[str] = []
+    text_parts: List[str] = []
+    
+    try:
+        for raw_line in response.iter_lines():
+            chunk = parse_deepseek_sse_line(raw_line)
+            if not chunk:
+                continue
+            
+            content, content_type, is_finished = extract_content_from_chunk(chunk)
+            
+            if is_finished:
+                break
+            
+            if content:
+                if content_type == "thinking":
+                    thinking_parts.append(content)
+                else:
+                    text_parts.append(content)
+    except Exception as e:
+        logger.error(f"[collect_deepseek_response] 收集响应失败: {e}")
+    finally:
+        try:
+            response.close()
+        except Exception:
+            pass
+    
+    return "".join(thinking_parts), "".join(text_parts)
+
+
+# ----------------------------------------------------------------------
+# 工具调用解析
+# ----------------------------------------------------------------------
+
+def parse_tool_calls(text: str, tools_requested: List[Dict]) -> List[Dict[str, Any]]:
+    """从响应文本中解析工具调用
+    
+    Args:
+        text: 响应文本
+        tools_requested: 请求中定义的工具列表
+        
+    Returns:
+        检测到的工具调用列表，每项包含 name 和 input
+    """
+    detected_tools: List[Dict[str, Any]] = []
+    cleaned_text = text.strip()
+    
+    # 尝试直接解析完整 JSON
+    if cleaned_text.startswith('{"tool_calls":') and cleaned_text.endswith("]}"):
+        try:
+            tool_data = json.loads(cleaned_text)
+            for tool_call in tool_data.get("tool_calls", []):
+                tool_name = tool_call.get("name")
+                tool_input = tool_call.get("input", {})
+                if any(tool.get("name") == tool_name for tool in tools_requested):
+                    detected_tools.append({"name": tool_name, "input": tool_input})
+            if detected_tools:
+                return detected_tools
+        except json.JSONDecodeError:
+            pass
+    
+    # 使用正则匹配
+    matches = _TOOL_CALL_PATTERN.findall(cleaned_text)
+    for match in matches:
+        try:
+            tool_calls_json = f'{{"tool_calls": [{match}]}}'
+            tool_data = json.loads(tool_calls_json)
+            for tool_call in tool_data.get("tool_calls", []):
+                tool_name = tool_call.get("name")
+                tool_input = tool_call.get("input", {})
+                if any(tool.get("name") == tool_name for tool in tools_requested):
+                    detected_tools.append({"name": tool_name, "input": tool_input})
+        except json.JSONDecodeError:
+            continue
+    
+    return detected_tools
+
+
+# ----------------------------------------------------------------------
+# 引用过滤
+# ----------------------------------------------------------------------
+
+def should_filter_citation(text: str, search_enabled: bool) -> bool:
+    """检查是否应该过滤引用内容
+    
+    Args:
+        text: 内容文本
+        search_enabled: 是否启用搜索
+        
+    Returns:
+        是否应该过滤
+    """
+    if not search_enabled:
+        return False
+    return _CITATION_PATTERN.match(text) is not None
--- a/core/stream_parser.py
+++ b/core/stream_parser.py
@@ -1,186 +0,0 @@
-# -*- coding: utf-8 -*-
-"""流解析模块 - 处理 DeepSeek SSE 流响应"""
-import json
-import re
-
-from .config import logger
-
-# 预编译正则表达式
-_TOOL_CALL_PATTERN = re.compile(r'\{\s*["\']tool_calls["\']\s*:\s*\[(.*?)\]\s*\}', re.DOTALL)
-_CITATION_PATTERN = re.compile(r"^\[citation:")
-
-
-def parse_deepseek_sse_line(raw_line: bytes) -> dict | None:
-    """解析 DeepSeek SSE 行
-    
-    Args:
-        raw_line: 原始字节行
-        
-    Returns:
-        解析后的 chunk 字典，如果解析失败或应跳过则返回 None
-    """
-    try:
-        line = raw_line.decode("utf-8")
-    except Exception as e:
-        logger.warning(f"[parse_deepseek_sse_line] 解码失败: {e}")
-        return None
-    
-    if not line or not line.startswith("data:"):
-        return None
-    
-    data_str = line[5:].strip()
-    
-    if data_str == "[DONE]":
-        return {"type": "done"}
-    
-    try:
-        chunk = json.loads(data_str)
-        return chunk
-    except json.JSONDecodeError as e:
-        logger.warning(f"[parse_deepseek_sse_line] JSON解析失败: {e}")
-        return None
-
-
-def extract_content_from_chunk(chunk: dict) -> tuple[str, str, bool]:
-    """从 DeepSeek chunk 中提取内容
-    
-    Args:
-        chunk: 解析后的 chunk 字典
-        
-    Returns:
-        (content, content_type, is_finished) 元组
-        content_type 为 "thinking" 或 "text"
-        is_finished 为 True 表示响应结束
-    """
-    if chunk.get("type") == "done":
-        return "", "text", True
-    
-    # 检测内容审核/敏感词阻止
-    if "error" in chunk or chunk.get("code") == "content_filter":
-        logger.warning(f"[extract_content_from_chunk] 检测到内容过滤: {chunk}")
-        return "", "text", True
-    
-    if "v" not in chunk:
-        return "", "text", False
-    
-    v_value = chunk["v"]
-    ptype = "text"
-    
-    # 检查路径确定类型
-    path = chunk.get("p", "")
-    if path == "response/search_status":
-        return "", "text", False  # 跳过搜索状态
-    elif path == "response/thinking_content":
-        ptype = "thinking"
-    elif path == "response/content":
-        ptype = "text"
-    
-    if isinstance(v_value, str):
-        if v_value == "FINISHED":
-            return "", ptype, True
-        return v_value, ptype, False
-    elif isinstance(v_value, list):
-        for item in v_value:
-            if item.get("p") == "status" and item.get("v") == "FINISHED":
-                return "", ptype, True
-        return "", ptype, False
-    
-    return "", ptype, False
-
-
-def collect_deepseek_response(response) -> tuple[str, str]:
-    """收集 DeepSeek 流响应的完整内容
-    
-    Args:
-        response: DeepSeek 流响应对象
-        
-    Returns:
-        (reasoning_content, text_content) 元组
-    """
-    thinking_parts = []
-    text_parts = []
-    
-    try:
-        for raw_line in response.iter_lines():
-            chunk = parse_deepseek_sse_line(raw_line)
-            if not chunk:
-                continue
-            
-            content, content_type, is_finished = extract_content_from_chunk(chunk)
-            
-            if is_finished:
-                break
-            
-            if content:
-                if content_type == "thinking":
-                    thinking_parts.append(content)
-                else:
-                    text_parts.append(content)
-    except Exception as e:
-        logger.error(f"[collect_deepseek_response] 收集响应失败: {e}")
-    finally:
-        try:
-            response.close()
-        except Exception:
-            pass
-    
-    return "".join(thinking_parts), "".join(text_parts)
-
-
-def parse_tool_calls(text: str, tools_requested: list) -> list[dict]:
-    """从响应文本中解析工具调用
-    
-    Args:
-        text: 响应文本
-        tools_requested: 请求中定义的工具列表
-        
-    Returns:
-        检测到的工具调用列表，每项包含 name 和 input
-    """
-    detected_tools = []
-    cleaned_text = text.strip()
-    
-    # 尝试直接解析完整 JSON
-    if cleaned_text.startswith('{"tool_calls":') and cleaned_text.endswith("]}"):
-        try:
-            tool_data = json.loads(cleaned_text)
-            for tool_call in tool_data.get("tool_calls", []):
-                tool_name = tool_call.get("name")
-                tool_input = tool_call.get("input", {})
-                if any(tool.get("name") == tool_name for tool in tools_requested):
-                    detected_tools.append({"name": tool_name, "input": tool_input})
-            if detected_tools:
-                return detected_tools
-        except json.JSONDecodeError:
-            pass
-    
-    # 使用正则匹配
-    matches = _TOOL_CALL_PATTERN.findall(cleaned_text)
-    for match in matches:
-        try:
-            tool_calls_json = f'{{"tool_calls": [{match}]}}'
-            tool_data = json.loads(tool_calls_json)
-            for tool_call in tool_data.get("tool_calls", []):
-                tool_name = tool_call.get("name")
-                tool_input = tool_call.get("input", {})
-                if any(tool.get("name") == tool_name for tool in tools_requested):
-                    detected_tools.append({"name": tool_name, "input": tool_input})
-        except json.JSONDecodeError:
-            continue
-    
-    return detected_tools
-
-
-def should_filter_citation(text: str, search_enabled: bool) -> bool:
-    """检查是否应该过滤引用内容
-    
-    Args:
-        text: 内容文本
-        search_enabled: 是否启用搜索
-        
-    Returns:
-        是否应该过滤
-    """
-    if not search_enabled:
-        return False
-    return _CITATION_PATTERN.match(text) is not None