mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-05 08:55:28 +08:00
187 lines
5.5 KiB
Python
187 lines
5.5 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""流解析模块 - 处理 DeepSeek SSE 流响应"""
|
|
import json
|
|
import re
|
|
|
|
from .config import logger
|
|
|
|
# 预编译正则表达式
|
|
_TOOL_CALL_PATTERN = re.compile(r'\{\s*["\']tool_calls["\']\s*:\s*\[(.*?)\]\s*\}', re.DOTALL)
|
|
_CITATION_PATTERN = re.compile(r"^\[citation:")
|
|
|
|
|
|
def parse_deepseek_sse_line(raw_line: bytes) -> dict | None:
|
|
"""解析 DeepSeek SSE 行
|
|
|
|
Args:
|
|
raw_line: 原始字节行
|
|
|
|
Returns:
|
|
解析后的 chunk 字典,如果解析失败或应跳过则返回 None
|
|
"""
|
|
try:
|
|
line = raw_line.decode("utf-8")
|
|
except Exception as e:
|
|
logger.warning(f"[parse_deepseek_sse_line] 解码失败: {e}")
|
|
return None
|
|
|
|
if not line or not line.startswith("data:"):
|
|
return None
|
|
|
|
data_str = line[5:].strip()
|
|
|
|
if data_str == "[DONE]":
|
|
return {"type": "done"}
|
|
|
|
try:
|
|
chunk = json.loads(data_str)
|
|
return chunk
|
|
except json.JSONDecodeError as e:
|
|
logger.warning(f"[parse_deepseek_sse_line] JSON解析失败: {e}")
|
|
return None
|
|
|
|
|
|
def extract_content_from_chunk(chunk: dict) -> tuple[str, str, bool]:
|
|
"""从 DeepSeek chunk 中提取内容
|
|
|
|
Args:
|
|
chunk: 解析后的 chunk 字典
|
|
|
|
Returns:
|
|
(content, content_type, is_finished) 元组
|
|
content_type 为 "thinking" 或 "text"
|
|
is_finished 为 True 表示响应结束
|
|
"""
|
|
if chunk.get("type") == "done":
|
|
return "", "text", True
|
|
|
|
# 检测内容审核/敏感词阻止
|
|
if "error" in chunk or chunk.get("code") == "content_filter":
|
|
logger.warning(f"[extract_content_from_chunk] 检测到内容过滤: {chunk}")
|
|
return "", "text", True
|
|
|
|
if "v" not in chunk:
|
|
return "", "text", False
|
|
|
|
v_value = chunk["v"]
|
|
ptype = "text"
|
|
|
|
# 检查路径确定类型
|
|
path = chunk.get("p", "")
|
|
if path == "response/search_status":
|
|
return "", "text", False # 跳过搜索状态
|
|
elif path == "response/thinking_content":
|
|
ptype = "thinking"
|
|
elif path == "response/content":
|
|
ptype = "text"
|
|
|
|
if isinstance(v_value, str):
|
|
if v_value == "FINISHED":
|
|
return "", ptype, True
|
|
return v_value, ptype, False
|
|
elif isinstance(v_value, list):
|
|
for item in v_value:
|
|
if item.get("p") == "status" and item.get("v") == "FINISHED":
|
|
return "", ptype, True
|
|
return "", ptype, False
|
|
|
|
return "", ptype, False
|
|
|
|
|
|
def collect_deepseek_response(response) -> tuple[str, str]:
|
|
"""收集 DeepSeek 流响应的完整内容
|
|
|
|
Args:
|
|
response: DeepSeek 流响应对象
|
|
|
|
Returns:
|
|
(reasoning_content, text_content) 元组
|
|
"""
|
|
thinking_parts = []
|
|
text_parts = []
|
|
|
|
try:
|
|
for raw_line in response.iter_lines():
|
|
chunk = parse_deepseek_sse_line(raw_line)
|
|
if not chunk:
|
|
continue
|
|
|
|
content, content_type, is_finished = extract_content_from_chunk(chunk)
|
|
|
|
if is_finished:
|
|
break
|
|
|
|
if content:
|
|
if content_type == "thinking":
|
|
thinking_parts.append(content)
|
|
else:
|
|
text_parts.append(content)
|
|
except Exception as e:
|
|
logger.error(f"[collect_deepseek_response] 收集响应失败: {e}")
|
|
finally:
|
|
try:
|
|
response.close()
|
|
except Exception:
|
|
pass
|
|
|
|
return "".join(thinking_parts), "".join(text_parts)
|
|
|
|
|
|
def parse_tool_calls(text: str, tools_requested: list) -> list[dict]:
|
|
"""从响应文本中解析工具调用
|
|
|
|
Args:
|
|
text: 响应文本
|
|
tools_requested: 请求中定义的工具列表
|
|
|
|
Returns:
|
|
检测到的工具调用列表,每项包含 name 和 input
|
|
"""
|
|
detected_tools = []
|
|
cleaned_text = text.strip()
|
|
|
|
# 尝试直接解析完整 JSON
|
|
if cleaned_text.startswith('{"tool_calls":') and cleaned_text.endswith("]}"):
|
|
try:
|
|
tool_data = json.loads(cleaned_text)
|
|
for tool_call in tool_data.get("tool_calls", []):
|
|
tool_name = tool_call.get("name")
|
|
tool_input = tool_call.get("input", {})
|
|
if any(tool.get("name") == tool_name for tool in tools_requested):
|
|
detected_tools.append({"name": tool_name, "input": tool_input})
|
|
if detected_tools:
|
|
return detected_tools
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
# 使用正则匹配
|
|
matches = _TOOL_CALL_PATTERN.findall(cleaned_text)
|
|
for match in matches:
|
|
try:
|
|
tool_calls_json = f'{{"tool_calls": [{match}]}}'
|
|
tool_data = json.loads(tool_calls_json)
|
|
for tool_call in tool_data.get("tool_calls", []):
|
|
tool_name = tool_call.get("name")
|
|
tool_input = tool_call.get("input", {})
|
|
if any(tool.get("name") == tool_name for tool in tools_requested):
|
|
detected_tools.append({"name": tool_name, "input": tool_input})
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
return detected_tools
|
|
|
|
|
|
def should_filter_citation(text: str, search_enabled: bool) -> bool:
|
|
"""检查是否应该过滤引用内容
|
|
|
|
Args:
|
|
text: 内容文本
|
|
search_enabled: 是否启用搜索
|
|
|
|
Returns:
|
|
是否应该过滤
|
|
"""
|
|
if not search_enabled:
|
|
return False
|
|
return _CITATION_PATTERN.match(text) is not None
|