feat: add stream parsing for DeepSeek SSE responses, centralize model management, and refactor tool call handling.

2026-05-14 05:05:09 +08:00 · 2026-02-01 03:53:01 +08:00
parent 4193336dd8
commit b90901d5a0
10 changed files with 359 additions and 192 deletions
--- a/core/auth.py
+++ b/core/auth.py
@@ -5,6 +5,7 @@ from fastapi import HTTPException, Request

 from .config import CONFIG, logger
 from .deepseek import login_deepseek_via_account, BASE_HEADERS
+from .utils import get_account_identifier

 # -------------------------- 全局账号队列 --------------------------
 # 使用列表实现轮询队列，配合线程锁保证并发安全
@@ -37,12 +38,7 @@ init_account_queue()
 init_claude_api_key_queue()


-# ----------------------------------------------------------------------
-# 辅助函数：获取账号唯一标识（优先 email，否则 mobile）
-# ----------------------------------------------------------------------
-def get_account_identifier(account: dict) -> str:
-    """返回账号的唯一标识，优先使用 email，否则使用 mobile"""
-    return account.get("email", "").strip() or account.get("mobile", "").strip()
+# get_account_identifier 已移至 core.utils


 def get_queue_status() -> dict:
@@ -176,12 +172,7 @@ def get_auth_headers(request: Request) -> dict:
    return {**BASE_HEADERS, "authorization": f"Bearer {request.state.deepseek_token}"}


-# ----------------------------------------------------------------------
-# Claude 认证相关函数
-# ----------------------------------------------------------------------
-def determine_claude_mode_and_token(request: Request):
-    """Claude认证：沿用现有的OpenAI接口认证逻辑"""
-    determine_mode_and_token(request)
+# determine_claude_mode_and_token 已移除（直接使用 determine_mode_and_token）


 # ----------------------------------------------------------------------
--- a/core/deepseek.py
+++ b/core/deepseek.py
@@ -5,6 +5,7 @@ from curl_cffi import requests
 from fastapi import HTTPException

 from .config import CONFIG, save_config, logger
+from .utils import get_account_identifier

 # ----------------------------------------------------------------------
 # DeepSeek 相关常量
@@ -28,9 +29,7 @@ BASE_HEADERS = {
 }


-def get_account_identifier(account: dict) -> str:
-    """返回账号的唯一标识，优先使用 email，否则使用 mobile"""
-    return account.get("email", "").strip() or account.get("mobile", "").strip()
+# get_account_identifier 已移至 core.utils


 # ----------------------------------------------------------------------
--- a/core/models.py
+++ b/core/models.py
@@ -0,0 +1,90 @@
+# -*- coding: utf-8 -*-
+"""模型定义模块 - 集中管理所有支持的模型"""
+
+# DeepSeek 模型列表（官方模型名称）
+DEEPSEEK_MODELS = [
+    {
+        "id": "deepseek-chat",
+        "object": "model",
+        "created": 1677610602,
+        "owned_by": "deepseek",
+        "permission": [],
+    },
+    {
+        "id": "deepseek-reasoner",
+        "object": "model",
+        "created": 1677610602,
+        "owned_by": "deepseek",
+        "permission": [],
+    },
+    {
+        "id": "deepseek-chat-search",
+        "object": "model",
+        "created": 1677610602,
+        "owned_by": "deepseek",
+        "permission": [],
+    },
+    {
+        "id": "deepseek-reasoner-search",
+        "object": "model",
+        "created": 1677610602,
+        "owned_by": "deepseek",
+        "permission": [],
+    },
+]
+
+# Claude 模型映射列表
+CLAUDE_MODELS = [
+    {
+        "id": "claude-sonnet-4-20250514",
+        "object": "model",
+        "created": 1715635200,
+        "owned_by": "anthropic",
+    },
+    {
+        "id": "claude-sonnet-4-20250514-fast",
+        "object": "model",
+        "created": 1715635200,
+        "owned_by": "anthropic",
+    },
+    {
+        "id": "claude-sonnet-4-20250514-slow",
+        "object": "model",
+        "created": 1715635200,
+        "owned_by": "anthropic",
+    },
+]
+
+
+def get_model_config(model: str) -> tuple[bool, bool]:
+    """根据模型名称获取配置
+    
+    Args:
+        model: 模型名称
+        
+    Returns:
+        (thinking_enabled, search_enabled) 元组
+    """
+    model_lower = model.lower()
+    
+    if model_lower == "deepseek-chat":
+        return False, False
+    elif model_lower == "deepseek-reasoner":
+        return True, False
+    elif model_lower == "deepseek-chat-search":
+        return False, True
+    elif model_lower == "deepseek-reasoner-search":
+        return True, True
+    else:
+        return None, None  # 不支持的模型
+
+
+def get_openai_models_response() -> dict:
+    """获取 OpenAI 格式的模型列表响应"""
+    return {"object": "list", "data": DEEPSEEK_MODELS}
+
+
+def get_claude_models_response() -> dict:
+    """获取 Claude 格式的模型列表响应"""
+    return {"object": "list", "data": CLAUDE_MODELS}
+
--- a/core/pow.py
+++ b/core/pow.py
@@ -11,6 +11,7 @@ from curl_cffi import requests
 from wasmtime import Engine, Linker, Module, Store

 from .config import CONFIG, WASM_PATH, logger
+from .utils import get_account_identifier

 # ----------------------------------------------------------------------
 # WASM 模块缓存 - 避免每次请求都重新加载
@@ -51,10 +52,7 @@ try:
 except Exception as e:
    logger.warning(f"[WASM] 启动时预加载失败（将在首次使用时重试）: {e}")

-
-def get_account_identifier(account: dict) -> str:
-    """返回账号的唯一标识"""
-    return account.get("email", "").strip() or account.get("mobile", "").strip()
+# get_account_identifier 已移至 core.utils


 # ----------------------------------------------------------------------
@@ -152,17 +150,24 @@ def compute_pow_answer(
    return int(value)


-# ----------------------------------------------------------------------
-# 获取 PoW 响应，融合计算 answer 逻辑
-# ----------------------------------------------------------------------
-def get_pow_response(request, get_auth_headers_func, choose_new_account_func, 
-                     login_func, pow_url: str, max_attempts: int = 3):
-    """获取 PoW 响应"""
-    from .deepseek import BASE_HEADERS
+def get_pow_response(request, max_attempts: int = 3):
+    """获取 PoW 响应
+    
+    Args:
+        request: FastAPI 请求对象
+        max_attempts: 最大重试次数
+        
+    Returns:
+        Base64 编码的 PoW 响应，如果失败返回 None
+    """
+    from .auth import get_auth_headers, choose_new_account
+    from .deepseek import BASE_HEADERS, login_deepseek_via_account, DEEPSEEK_CREATE_POW_URL
+    
+    pow_url = DEEPSEEK_CREATE_POW_URL
    
    attempts = 0
    while attempts < max_attempts:
-        headers = get_auth_headers_func(request)
+        headers = get_auth_headers(request)
        try:
            resp = requests.post(
                pow_url,
@@ -227,11 +232,11 @@ def get_pow_response(request, get_auth_headers_func, choose_new_account_func,
                    request.state.tried_accounts = []
                if current_id not in request.state.tried_accounts:
                    request.state.tried_accounts.append(current_id)
-                new_account = choose_new_account_func(request.state.tried_accounts)
+                new_account = choose_new_account(request.state.tried_accounts)
                if new_account is None:
                    break
                try:
-                    login_func(new_account)
+                    login_deepseek_via_account(new_account)
                except Exception as e:
                    logger.error(
                        f"[get_pow_response] 账号 {get_account_identifier(new_account)} 登录失败：{e}"
@@ -245,3 +250,4 @@ def get_pow_response(request, get_auth_headers_func, choose_new_account_func,
                continue
            attempts += 1
    return None
+
--- a/core/session_manager.py
+++ b/core/session_manager.py
@@ -4,10 +4,11 @@ from curl_cffi import requests as cffi_requests
 from fastapi import HTTPException, Request

 from .config import logger
+from .utils import get_account_identifier
+from .models import get_model_config
 from .auth import (
    get_auth_headers,
    choose_new_account,
-    get_account_identifier,
    release_account,
    refresh_account_token,
 )
@@ -114,14 +115,7 @@ def get_pow(request: Request, max_attempts: int = 3) -> str | None:
    Returns:
        Base64 编码的 PoW 响应，如果失败返回 None
    """
-    return get_pow_response(
-        request,
-        get_auth_headers,
-        choose_new_account,
-        login_deepseek_via_account,
-        DEEPSEEK_CREATE_POW_URL,
-        max_attempts,
-    )
+    return get_pow_response(request, max_attempts)


 def prepare_completion_request(
@@ -162,27 +156,7 @@ def prepare_completion_request(
    return call_completion_endpoint(payload, headers, max_attempts)


-def get_model_config(model: str) -> tuple[bool, bool]:
-    """根据模型名称获取配置
-    
-    Args:
-        model: 模型名称
-        
-    Returns:
-        (thinking_enabled, search_enabled) 元组
-    """
-    model_lower = model.lower()
-    
-    if model_lower in ["deepseek-v3", "deepseek-chat"]:
-        return False, False
-    elif model_lower in ["deepseek-r1", "deepseek-reasoner"]:
-        return True, False
-    elif model_lower in ["deepseek-v3-search", "deepseek-chat-search"]:
-        return False, True
-    elif model_lower in ["deepseek-r1-search", "deepseek-reasoner-search"]:
-        return True, True
-    else:
-        return None, None  # 不支持的模型
+# get_model_config 已移至 core.models


 def cleanup_account(request: Request):
--- a/core/stream_parser.py
+++ b/core/stream_parser.py
@@ -0,0 +1,186 @@
+# -*- coding: utf-8 -*-
+"""流解析模块 - 处理 DeepSeek SSE 流响应"""
+import json
+import re
+
+from .config import logger
+
+# 预编译正则表达式
+_TOOL_CALL_PATTERN = re.compile(r'\{\s*["\']tool_calls["\']\s*:\s*\[(.*?)\]\s*\}', re.DOTALL)
+_CITATION_PATTERN = re.compile(r"^\[citation:")
+
+
+def parse_deepseek_sse_line(raw_line: bytes) -> dict | None:
+    """解析 DeepSeek SSE 行
+    
+    Args:
+        raw_line: 原始字节行
+        
+    Returns:
+        解析后的 chunk 字典，如果解析失败或应跳过则返回 None
+    """
+    try:
+        line = raw_line.decode("utf-8")
+    except Exception as e:
+        logger.warning(f"[parse_deepseek_sse_line] 解码失败: {e}")
+        return None
+    
+    if not line or not line.startswith("data:"):
+        return None
+    
+    data_str = line[5:].strip()
+    
+    if data_str == "[DONE]":
+        return {"type": "done"}
+    
+    try:
+        chunk = json.loads(data_str)
+        return chunk
+    except json.JSONDecodeError as e:
+        logger.warning(f"[parse_deepseek_sse_line] JSON解析失败: {e}")
+        return None
+
+
+def extract_content_from_chunk(chunk: dict) -> tuple[str, str, bool]:
+    """从 DeepSeek chunk 中提取内容
+    
+    Args:
+        chunk: 解析后的 chunk 字典
+        
+    Returns:
+        (content, content_type, is_finished) 元组
+        content_type 为 "thinking" 或 "text"
+        is_finished 为 True 表示响应结束
+    """
+    if chunk.get("type") == "done":
+        return "", "text", True
+    
+    # 检测内容审核/敏感词阻止
+    if "error" in chunk or chunk.get("code") == "content_filter":
+        logger.warning(f"[extract_content_from_chunk] 检测到内容过滤: {chunk}")
+        return "", "text", True
+    
+    if "v" not in chunk:
+        return "", "text", False
+    
+    v_value = chunk["v"]
+    ptype = "text"
+    
+    # 检查路径确定类型
+    path = chunk.get("p", "")
+    if path == "response/search_status":
+        return "", "text", False  # 跳过搜索状态
+    elif path == "response/thinking_content":
+        ptype = "thinking"
+    elif path == "response/content":
+        ptype = "text"
+    
+    if isinstance(v_value, str):
+        if v_value == "FINISHED":
+            return "", ptype, True
+        return v_value, ptype, False
+    elif isinstance(v_value, list):
+        for item in v_value:
+            if item.get("p") == "status" and item.get("v") == "FINISHED":
+                return "", ptype, True
+        return "", ptype, False
+    
+    return "", ptype, False
+
+
+def collect_deepseek_response(response) -> tuple[str, str]:
+    """收集 DeepSeek 流响应的完整内容
+    
+    Args:
+        response: DeepSeek 流响应对象
+        
+    Returns:
+        (reasoning_content, text_content) 元组
+    """
+    thinking_parts = []
+    text_parts = []
+    
+    try:
+        for raw_line in response.iter_lines():
+            chunk = parse_deepseek_sse_line(raw_line)
+            if not chunk:
+                continue
+            
+            content, content_type, is_finished = extract_content_from_chunk(chunk)
+            
+            if is_finished:
+                break
+            
+            if content:
+                if content_type == "thinking":
+                    thinking_parts.append(content)
+                else:
+                    text_parts.append(content)
+    except Exception as e:
+        logger.error(f"[collect_deepseek_response] 收集响应失败: {e}")
+    finally:
+        try:
+            response.close()
+        except Exception:
+            pass
+    
+    return "".join(thinking_parts), "".join(text_parts)
+
+
+def parse_tool_calls(text: str, tools_requested: list) -> list[dict]:
+    """从响应文本中解析工具调用
+    
+    Args:
+        text: 响应文本
+        tools_requested: 请求中定义的工具列表
+        
+    Returns:
+        检测到的工具调用列表，每项包含 name 和 input
+    """
+    detected_tools = []
+    cleaned_text = text.strip()
+    
+    # 尝试直接解析完整 JSON
+    if cleaned_text.startswith('{"tool_calls":') and cleaned_text.endswith("]}"):
+        try:
+            tool_data = json.loads(cleaned_text)
+            for tool_call in tool_data.get("tool_calls", []):
+                tool_name = tool_call.get("name")
+                tool_input = tool_call.get("input", {})
+                if any(tool.get("name") == tool_name for tool in tools_requested):
+                    detected_tools.append({"name": tool_name, "input": tool_input})
+            if detected_tools:
+                return detected_tools
+        except json.JSONDecodeError:
+            pass
+    
+    # 使用正则匹配
+    matches = _TOOL_CALL_PATTERN.findall(cleaned_text)
+    for match in matches:
+        try:
+            tool_calls_json = f'{{"tool_calls": [{match}]}}'
+            tool_data = json.loads(tool_calls_json)
+            for tool_call in tool_data.get("tool_calls", []):
+                tool_name = tool_call.get("name")
+                tool_input = tool_call.get("input", {})
+                if any(tool.get("name") == tool_name for tool in tools_requested):
+                    detected_tools.append({"name": tool_name, "input": tool_input})
+        except json.JSONDecodeError:
+            continue
+    
+    return detected_tools
+
+
+def should_filter_citation(text: str, search_enabled: bool) -> bool:
+    """检查是否应该过滤引用内容
+    
+    Args:
+        text: 内容文本
+        search_enabled: 是否启用搜索
+        
+    Returns:
+        是否应该过滤
+    """
+    if not search_enabled:
+        return False
+    return _CITATION_PATTERN.match(text) is not None
--- a/core/utils.py
+++ b/core/utils.py
@@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+"""公共工具函数模块"""
+
+
+def get_account_identifier(account: dict) -> str:
+    """返回账号的唯一标识，优先使用 email，否则使用 mobile"""
+    return account.get("email", "").strip() or account.get("mobile", "").strip()
+
+
+def estimate_tokens(text) -> int:
+    """估算文本的 token 数量（简单估算：字符数/4）
+    
+    Args:
+        text: 字符串或其他类型
+        
+    Returns:
+        估算的 token 数量，最小为 1
+    """
+    if isinstance(text, str):
+        return max(1, len(text) // 4)
+    elif isinstance(text, list):
+        return sum(
+            estimate_tokens(item.get("text", ""))
+            if isinstance(item, dict)
+            else estimate_tokens(str(item))
+            for item in text
+        )
+    else:
+        return max(1, len(str(text)) // 4)