mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-05 00:45:29 +08:00
refactor: Consolidate SSE and stream parsing logic into sse_parser.py, remove stream_parser.py, and update WebUI panel text.
This commit is contained in:
@@ -1,15 +1,55 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""DeepSeek SSE 流解析模块
|
||||
|
||||
这个模块包含解析 DeepSeek SSE 响应的公共逻辑,供 openai.py 和 accounts.py 共用。
|
||||
这个模块包含解析 DeepSeek SSE 响应的公共逻辑,供 openai.py、claude.py 和 accounts.py 共用。
|
||||
合并了原 sse_parser.py 和 stream_parser.py 的功能。
|
||||
"""
|
||||
import json
|
||||
import re
|
||||
from typing import List, Tuple, Optional, Dict, Any, Generator
|
||||
|
||||
from typing import List, Tuple, Optional, Dict, Any
|
||||
|
||||
from .config import logger
|
||||
from .constants import SKIP_PATTERNS
|
||||
|
||||
# 预编译正则表达式
|
||||
_TOOL_CALL_PATTERN = re.compile(r'\{\s*["\']tool_calls["\']\s*:\s*\[(.*?)\]\s*\}', re.DOTALL)
|
||||
_CITATION_PATTERN = re.compile(r"^\[citation:")
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# 基础解析函数
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def parse_deepseek_sse_line(raw_line: bytes) -> Optional[Dict[str, Any]]:
|
||||
"""解析 DeepSeek SSE 行
|
||||
|
||||
Args:
|
||||
raw_line: 原始字节行
|
||||
|
||||
Returns:
|
||||
解析后的 chunk 字典,如果解析失败或应跳过则返回 None
|
||||
"""
|
||||
try:
|
||||
line = raw_line.decode("utf-8")
|
||||
except Exception as e:
|
||||
logger.warning(f"[parse_deepseek_sse_line] 解码失败: {e}")
|
||||
return None
|
||||
|
||||
if not line or not line.startswith("data:"):
|
||||
return None
|
||||
|
||||
data_str = line[5:].strip()
|
||||
|
||||
if data_str == "[DONE]":
|
||||
return {"type": "done"}
|
||||
|
||||
try:
|
||||
chunk = json.loads(data_str)
|
||||
return chunk
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"[parse_deepseek_sse_line] JSON解析失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def should_skip_chunk(chunk_path: str) -> bool:
|
||||
"""判断是否应该跳过这个 chunk(状态相关,不是内容)"""
|
||||
@@ -33,6 +73,10 @@ def is_search_result(item: dict) -> bool:
|
||||
return "url" in item and "title" in item
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# 内容提取函数
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def extract_content_from_item(item: dict, default_type: str = "text") -> Optional[Tuple[str, str]]:
|
||||
"""从包含 content 和 type 的项中提取内容
|
||||
|
||||
@@ -57,7 +101,7 @@ def extract_content_recursive(items: List[Dict], default_type: str = "text") ->
|
||||
返回 [(content, content_type), ...] 列表,
|
||||
如果遇到 FINISHED 信号返回 None
|
||||
"""
|
||||
extracted = []
|
||||
extracted: List[Tuple[str, str]] = []
|
||||
for item in items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
@@ -117,8 +161,15 @@ def extract_content_recursive(items: List[Dict], default_type: str = "text") ->
|
||||
return extracted
|
||||
|
||||
|
||||
def parse_sse_chunk_for_content(chunk: dict, thinking_enabled: bool = False,
|
||||
current_fragment_type: str = "thinking") -> Tuple[List[Tuple[str, str]], bool, str]:
|
||||
# ----------------------------------------------------------------------
|
||||
# 高级解析函数
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def parse_sse_chunk_for_content(
|
||||
chunk: Dict[str, Any],
|
||||
thinking_enabled: bool = False,
|
||||
current_fragment_type: str = "thinking"
|
||||
) -> Tuple[List[Tuple[str, str]], bool, str]:
|
||||
"""解析单个 SSE chunk 并提取内容
|
||||
|
||||
Args:
|
||||
@@ -138,7 +189,7 @@ def parse_sse_chunk_for_content(chunk: dict, thinking_enabled: bool = False,
|
||||
|
||||
v_value = chunk["v"]
|
||||
chunk_path = chunk.get("p", "")
|
||||
contents = []
|
||||
contents: List[Tuple[str, str]] = []
|
||||
new_fragment_type = current_fragment_type
|
||||
|
||||
# 跳过状态相关 chunk
|
||||
@@ -206,3 +257,160 @@ def parse_sse_chunk_for_content(chunk: dict, thinking_enabled: bool = False,
|
||||
|
||||
return (contents, False, new_fragment_type)
|
||||
|
||||
|
||||
def extract_content_from_chunk(chunk: Dict[str, Any]) -> Tuple[str, str, bool]:
|
||||
"""从 DeepSeek chunk 中提取内容(简化版本,兼容旧接口)
|
||||
|
||||
Args:
|
||||
chunk: 解析后的 chunk 字典
|
||||
|
||||
Returns:
|
||||
(content, content_type, is_finished) 元组
|
||||
content_type 为 "thinking" 或 "text"
|
||||
is_finished 为 True 表示响应结束
|
||||
"""
|
||||
if chunk.get("type") == "done":
|
||||
return "", "text", True
|
||||
|
||||
# 检测内容审核/敏感词阻止
|
||||
if "error" in chunk or chunk.get("code") == "content_filter":
|
||||
logger.warning(f"[extract_content_from_chunk] 检测到内容过滤: {chunk}")
|
||||
return "", "text", True
|
||||
|
||||
if "v" not in chunk:
|
||||
return "", "text", False
|
||||
|
||||
v_value = chunk["v"]
|
||||
ptype = "text"
|
||||
|
||||
# 检查路径确定类型
|
||||
path = chunk.get("p", "")
|
||||
if path == "response/search_status":
|
||||
return "", "text", False # 跳过搜索状态
|
||||
elif path == "response/thinking_content":
|
||||
ptype = "thinking"
|
||||
elif path == "response/content":
|
||||
ptype = "text"
|
||||
|
||||
if isinstance(v_value, str):
|
||||
if v_value == "FINISHED":
|
||||
return "", ptype, True
|
||||
return v_value, ptype, False
|
||||
elif isinstance(v_value, list):
|
||||
for item in v_value:
|
||||
if isinstance(item, dict):
|
||||
if item.get("p") == "status" and item.get("v") == "FINISHED":
|
||||
return "", ptype, True
|
||||
return "", ptype, False
|
||||
|
||||
return "", ptype, False
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# 响应收集函数
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def collect_deepseek_response(response: Any) -> Tuple[str, str]:
|
||||
"""收集 DeepSeek 流响应的完整内容
|
||||
|
||||
Args:
|
||||
response: DeepSeek 流响应对象
|
||||
|
||||
Returns:
|
||||
(reasoning_content, text_content) 元组
|
||||
"""
|
||||
thinking_parts: List[str] = []
|
||||
text_parts: List[str] = []
|
||||
|
||||
try:
|
||||
for raw_line in response.iter_lines():
|
||||
chunk = parse_deepseek_sse_line(raw_line)
|
||||
if not chunk:
|
||||
continue
|
||||
|
||||
content, content_type, is_finished = extract_content_from_chunk(chunk)
|
||||
|
||||
if is_finished:
|
||||
break
|
||||
|
||||
if content:
|
||||
if content_type == "thinking":
|
||||
thinking_parts.append(content)
|
||||
else:
|
||||
text_parts.append(content)
|
||||
except Exception as e:
|
||||
logger.error(f"[collect_deepseek_response] 收集响应失败: {e}")
|
||||
finally:
|
||||
try:
|
||||
response.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return "".join(thinking_parts), "".join(text_parts)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# 工具调用解析
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def parse_tool_calls(text: str, tools_requested: List[Dict]) -> List[Dict[str, Any]]:
|
||||
"""从响应文本中解析工具调用
|
||||
|
||||
Args:
|
||||
text: 响应文本
|
||||
tools_requested: 请求中定义的工具列表
|
||||
|
||||
Returns:
|
||||
检测到的工具调用列表,每项包含 name 和 input
|
||||
"""
|
||||
detected_tools: List[Dict[str, Any]] = []
|
||||
cleaned_text = text.strip()
|
||||
|
||||
# 尝试直接解析完整 JSON
|
||||
if cleaned_text.startswith('{"tool_calls":') and cleaned_text.endswith("]}"):
|
||||
try:
|
||||
tool_data = json.loads(cleaned_text)
|
||||
for tool_call in tool_data.get("tool_calls", []):
|
||||
tool_name = tool_call.get("name")
|
||||
tool_input = tool_call.get("input", {})
|
||||
if any(tool.get("name") == tool_name for tool in tools_requested):
|
||||
detected_tools.append({"name": tool_name, "input": tool_input})
|
||||
if detected_tools:
|
||||
return detected_tools
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 使用正则匹配
|
||||
matches = _TOOL_CALL_PATTERN.findall(cleaned_text)
|
||||
for match in matches:
|
||||
try:
|
||||
tool_calls_json = f'{{"tool_calls": [{match}]}}'
|
||||
tool_data = json.loads(tool_calls_json)
|
||||
for tool_call in tool_data.get("tool_calls", []):
|
||||
tool_name = tool_call.get("name")
|
||||
tool_input = tool_call.get("input", {})
|
||||
if any(tool.get("name") == tool_name for tool in tools_requested):
|
||||
detected_tools.append({"name": tool_name, "input": tool_input})
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
return detected_tools
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# 引用过滤
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def should_filter_citation(text: str, search_enabled: bool) -> bool:
|
||||
"""检查是否应该过滤引用内容
|
||||
|
||||
Args:
|
||||
text: 内容文本
|
||||
search_enabled: 是否启用搜索
|
||||
|
||||
Returns:
|
||||
是否应该过滤
|
||||
"""
|
||||
if not search_enabled:
|
||||
return False
|
||||
return _CITATION_PATTERN.match(text) is not None
|
||||
|
||||
@@ -1,186 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""流解析模块 - 处理 DeepSeek SSE 流响应"""
|
||||
import json
|
||||
import re
|
||||
|
||||
from .config import logger
|
||||
|
||||
# 预编译正则表达式
|
||||
_TOOL_CALL_PATTERN = re.compile(r'\{\s*["\']tool_calls["\']\s*:\s*\[(.*?)\]\s*\}', re.DOTALL)
|
||||
_CITATION_PATTERN = re.compile(r"^\[citation:")
|
||||
|
||||
|
||||
def parse_deepseek_sse_line(raw_line: bytes) -> dict | None:
|
||||
"""解析 DeepSeek SSE 行
|
||||
|
||||
Args:
|
||||
raw_line: 原始字节行
|
||||
|
||||
Returns:
|
||||
解析后的 chunk 字典,如果解析失败或应跳过则返回 None
|
||||
"""
|
||||
try:
|
||||
line = raw_line.decode("utf-8")
|
||||
except Exception as e:
|
||||
logger.warning(f"[parse_deepseek_sse_line] 解码失败: {e}")
|
||||
return None
|
||||
|
||||
if not line or not line.startswith("data:"):
|
||||
return None
|
||||
|
||||
data_str = line[5:].strip()
|
||||
|
||||
if data_str == "[DONE]":
|
||||
return {"type": "done"}
|
||||
|
||||
try:
|
||||
chunk = json.loads(data_str)
|
||||
return chunk
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"[parse_deepseek_sse_line] JSON解析失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def extract_content_from_chunk(chunk: dict) -> tuple[str, str, bool]:
|
||||
"""从 DeepSeek chunk 中提取内容
|
||||
|
||||
Args:
|
||||
chunk: 解析后的 chunk 字典
|
||||
|
||||
Returns:
|
||||
(content, content_type, is_finished) 元组
|
||||
content_type 为 "thinking" 或 "text"
|
||||
is_finished 为 True 表示响应结束
|
||||
"""
|
||||
if chunk.get("type") == "done":
|
||||
return "", "text", True
|
||||
|
||||
# 检测内容审核/敏感词阻止
|
||||
if "error" in chunk or chunk.get("code") == "content_filter":
|
||||
logger.warning(f"[extract_content_from_chunk] 检测到内容过滤: {chunk}")
|
||||
return "", "text", True
|
||||
|
||||
if "v" not in chunk:
|
||||
return "", "text", False
|
||||
|
||||
v_value = chunk["v"]
|
||||
ptype = "text"
|
||||
|
||||
# 检查路径确定类型
|
||||
path = chunk.get("p", "")
|
||||
if path == "response/search_status":
|
||||
return "", "text", False # 跳过搜索状态
|
||||
elif path == "response/thinking_content":
|
||||
ptype = "thinking"
|
||||
elif path == "response/content":
|
||||
ptype = "text"
|
||||
|
||||
if isinstance(v_value, str):
|
||||
if v_value == "FINISHED":
|
||||
return "", ptype, True
|
||||
return v_value, ptype, False
|
||||
elif isinstance(v_value, list):
|
||||
for item in v_value:
|
||||
if item.get("p") == "status" and item.get("v") == "FINISHED":
|
||||
return "", ptype, True
|
||||
return "", ptype, False
|
||||
|
||||
return "", ptype, False
|
||||
|
||||
|
||||
def collect_deepseek_response(response) -> tuple[str, str]:
|
||||
"""收集 DeepSeek 流响应的完整内容
|
||||
|
||||
Args:
|
||||
response: DeepSeek 流响应对象
|
||||
|
||||
Returns:
|
||||
(reasoning_content, text_content) 元组
|
||||
"""
|
||||
thinking_parts = []
|
||||
text_parts = []
|
||||
|
||||
try:
|
||||
for raw_line in response.iter_lines():
|
||||
chunk = parse_deepseek_sse_line(raw_line)
|
||||
if not chunk:
|
||||
continue
|
||||
|
||||
content, content_type, is_finished = extract_content_from_chunk(chunk)
|
||||
|
||||
if is_finished:
|
||||
break
|
||||
|
||||
if content:
|
||||
if content_type == "thinking":
|
||||
thinking_parts.append(content)
|
||||
else:
|
||||
text_parts.append(content)
|
||||
except Exception as e:
|
||||
logger.error(f"[collect_deepseek_response] 收集响应失败: {e}")
|
||||
finally:
|
||||
try:
|
||||
response.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return "".join(thinking_parts), "".join(text_parts)
|
||||
|
||||
|
||||
def parse_tool_calls(text: str, tools_requested: list) -> list[dict]:
|
||||
"""从响应文本中解析工具调用
|
||||
|
||||
Args:
|
||||
text: 响应文本
|
||||
tools_requested: 请求中定义的工具列表
|
||||
|
||||
Returns:
|
||||
检测到的工具调用列表,每项包含 name 和 input
|
||||
"""
|
||||
detected_tools = []
|
||||
cleaned_text = text.strip()
|
||||
|
||||
# 尝试直接解析完整 JSON
|
||||
if cleaned_text.startswith('{"tool_calls":') and cleaned_text.endswith("]}"):
|
||||
try:
|
||||
tool_data = json.loads(cleaned_text)
|
||||
for tool_call in tool_data.get("tool_calls", []):
|
||||
tool_name = tool_call.get("name")
|
||||
tool_input = tool_call.get("input", {})
|
||||
if any(tool.get("name") == tool_name for tool in tools_requested):
|
||||
detected_tools.append({"name": tool_name, "input": tool_input})
|
||||
if detected_tools:
|
||||
return detected_tools
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 使用正则匹配
|
||||
matches = _TOOL_CALL_PATTERN.findall(cleaned_text)
|
||||
for match in matches:
|
||||
try:
|
||||
tool_calls_json = f'{{"tool_calls": [{match}]}}'
|
||||
tool_data = json.loads(tool_calls_json)
|
||||
for tool_call in tool_data.get("tool_calls", []):
|
||||
tool_name = tool_call.get("name")
|
||||
tool_input = tool_call.get("input", {})
|
||||
if any(tool.get("name") == tool_name for tool in tools_requested):
|
||||
detected_tools.append({"name": tool_name, "input": tool_input})
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
return detected_tools
|
||||
|
||||
|
||||
def should_filter_citation(text: str, search_enabled: bool) -> bool:
|
||||
"""检查是否应该过滤引用内容
|
||||
|
||||
Args:
|
||||
text: 内容文本
|
||||
search_enabled: 是否启用搜索
|
||||
|
||||
Returns:
|
||||
是否应该过滤
|
||||
"""
|
||||
if not search_enabled:
|
||||
return False
|
||||
return _CITATION_PATTERN.match(text) is not None
|
||||
Reference in New Issue
Block a user