Merge pull request #16 from CJackHwang/codex/analyze-non-streaming-model-output

Unify non-stream parsing with SSE parser
2026-05-16 22:25:15 +08:00 · 2026-02-06 02:47:28 +08:00
parent 9626d6ccbd ba96554cc1
commit baca42280f
1 changed files with 69 additions and 92 deletions
--- a/routes/openai.py
+++ b/routes/openai.py
@@ -453,107 +453,84 @@ IMPORTANT: If calling tools, output ONLY the JSON. The response must start with
            def collect_data():
                nonlocal result
-                ptype = "text"
+                current_fragment_type = "thinking" if thinking_enabled else "text"
                try:
                    for raw_line in deepseek_resp.iter_lines():
-                        try:
+                        chunk = parse_deepseek_sse_line(raw_line)
-                            line = raw_line.decode("utf-8")
+                        if not chunk:
-                        except Exception as e:
+                            continue
-                            logger.warning(f"[chat_completions] 解码失败: {e}")
+                        if chunk.get("type") == "done":
                            if ptype == "thinking":
                                think_list.append("解码失败，请稍候再试")
                            else:
                                text_list.append("解码失败，请稍候再试")
                            data_queue.put(None)
                            break
-                        if not line:
+                        try:
-                            continue
+                            contents, is_finished, new_fragment_type = parse_sse_chunk_for_content(
-                        if line.startswith("data:"):
+                                chunk, thinking_enabled, current_fragment_type
-                            data_str = line[5:].strip()
+                            )
-                            if data_str == "[DONE]":
+                            current_fragment_type = new_fragment_type
-                                data_queue.put(None)
+                            if is_finished:
-                                break
+                                final_reasoning = "".join(think_list)
-                            try:
+                                final_content = "".join(text_list)
-                                chunk = json.loads(data_str)
+                                prompt_tokens = len(final_prompt) // 4
-                                if "v" in chunk:
+                                reasoning_tokens = len(final_reasoning) // 4
-                                    v_value = chunk["v"]
+                                completion_tokens = len(final_content) // 4
                                    if "p" in chunk and chunk.get("p") == "response/search_status":
                                        continue
                                    if "p" in chunk and chunk.get("p") == "response/thinking_content":
                                        ptype = "thinking"
                                    elif "p" in chunk and chunk.get("p") == "response/content":
                                        ptype = "text"
                                    if isinstance(v_value, str):
                                        if search_enabled and v_value.startswith("[citation:"):
                                            continue
                                        if ptype == "thinking":
                                            think_list.append(v_value)
                                        else:
                                            text_list.append(v_value)
                                    elif isinstance(v_value, list):
                                        for item in v_value:
                                            if item.get("p") == "status" and item.get("v") == "FINISHED":
                                                final_reasoning = "".join(think_list)
                                                final_content = "".join(text_list)
                                                prompt_tokens = len(final_prompt) // 4
                                                reasoning_tokens = len(final_reasoning) // 4
                                                completion_tokens = len(final_content) // 4
-                                                # 检测工具调用
+                                # 检测工具调用
-                                                detected_tools = []
+                                detected_tools = []
-                                                finish_reason = "stop"
+                                finish_reason = "stop"
-                                                if has_tools:
+                                if has_tools:
-                                                    detected_tools = parse_tool_calls(final_content, [{"name": t.get("function", t).get("name")} for t in tools_requested])
+                                    detected_tools = parse_tool_calls(final_content, [{"name": t.get("function", t).get("name")} for t in tools_requested])
-                                                    if detected_tools:
+                                    if detected_tools:
-                                                        finish_reason = "tool_calls"
+                                        finish_reason = "tool_calls"
-                                                # 构建 message 对象
+                                # 构建 message 对象
-                                                message_obj = {
+                                message_obj = {
-                                                    "role": "assistant",
+                                    "role": "assistant",
-                                                    "content": final_content if not detected_tools else None,
+                                    "content": final_content if not detected_tools else None,
-                                                }
+                                }
-                                                # 只有启用思考模式时才包含 reasoning_content
+                                # 只有启用思考模式时才包含 reasoning_content
-                                                if thinking_enabled and final_reasoning:
+                                if thinking_enabled and final_reasoning:
-                                                    message_obj["reasoning_content"] = final_reasoning
+                                    message_obj["reasoning_content"] = final_reasoning
-                                                # 添加工具调用
+                                # 添加工具调用
-                                                if detected_tools:
+                                if detected_tools:
-                                                    tool_calls_data = format_openai_tool_calls(detected_tools)
+                                    tool_calls_data = format_openai_tool_calls(detected_tools)
-                                                    message_obj["tool_calls"] = tool_calls_data
+                                    message_obj["tool_calls"] = tool_calls_data
-                                                    message_obj["content"] = None
+                                    message_obj["content"] = None
-                                                result = {
+                                result = {
-                                                    "id": completion_id,
+                                    "id": completion_id,
-                                                    "object": "chat.completion",
+                                    "object": "chat.completion",
-                                                    "created": created_time,
+                                    "created": created_time,
-                                                    "model": model,
+                                    "model": model,
-                                                    "choices": [{
+                                    "choices": [{
-                                                        "index": 0,
+                                        "index": 0,
-                                                        "message": message_obj,
+                                        "message": message_obj,
-                                                        "finish_reason": finish_reason,
+                                        "finish_reason": finish_reason,
-                                                    }],
+                                    }],
-                                                    "usage": {
+                                    "usage": {
-                                                        "prompt_tokens": prompt_tokens,
+                                        "prompt_tokens": prompt_tokens,
-                                                        "completion_tokens": reasoning_tokens + completion_tokens,
+                                        "completion_tokens": reasoning_tokens + completion_tokens,
-                                                        "total_tokens": prompt_tokens + reasoning_tokens + completion_tokens,
+                                        "total_tokens": prompt_tokens + reasoning_tokens + completion_tokens,
-                                                        "completion_tokens_details": {"reasoning_tokens": reasoning_tokens},
+                                        "completion_tokens_details": {"reasoning_tokens": reasoning_tokens},
-                                                    },
+                                    },
-                                                }
+                                }
-                                                data_queue.put("DONE")
+                                data_queue.put("DONE")
-                                                return
+                                return
-                            except Exception as e:
+
-                                logger.warning(f"[collect_data] 无法解析: {data_str}, 错误: {e}")
+                            for content_text, content_type in contents:
-                                if ptype == "thinking":
+                                if should_filter_citation(content_text, search_enabled):
-                                    think_list.append("解析失败，请稍候再试")
+                                    continue
                                if content_type == "thinking":
                                    think_list.append(content_text)
                                else:
-                                    text_list.append("解析失败，请稍候再试")
+                                    text_list.append(content_text)
                            except Exception as e:
                                logger.warning(f"[collect_data] 无法解析: {chunk}, 错误: {e}")
                                text_list.append("解析失败，请稍候再试")
                                data_queue.put(None)
                                break
                except Exception as e:
                    logger.warning(f"[collect_data] 错误: {e}")
-                    if ptype == "thinking":
+                    text_list.append("处理失败，请稍候再试")
                        think_list.append("处理失败，请稍候再试")
                    else:
                        text_list.append("处理失败，请稍候再试")
                    data_queue.put(None)
                finally:
                    deepseek_resp.close()