Merge pull request #16 from CJackHwang/codex/analyze-non-streaming-model-output

Unify non-stream parsing with SSE parser
This commit is contained in:
CJACK.
2026-02-06 02:47:28 +08:00
committed by GitHub

View File

@@ -453,107 +453,84 @@ IMPORTANT: If calling tools, output ONLY the JSON. The response must start with
def collect_data(): def collect_data():
nonlocal result nonlocal result
ptype = "text" current_fragment_type = "thinking" if thinking_enabled else "text"
try: try:
for raw_line in deepseek_resp.iter_lines(): for raw_line in deepseek_resp.iter_lines():
try: chunk = parse_deepseek_sse_line(raw_line)
line = raw_line.decode("utf-8") if not chunk:
except Exception as e: continue
logger.warning(f"[chat_completions] 解码失败: {e}") if chunk.get("type") == "done":
if ptype == "thinking":
think_list.append("解码失败,请稍候再试")
else:
text_list.append("解码失败,请稍候再试")
data_queue.put(None) data_queue.put(None)
break break
if not line: try:
continue contents, is_finished, new_fragment_type = parse_sse_chunk_for_content(
if line.startswith("data:"): chunk, thinking_enabled, current_fragment_type
data_str = line[5:].strip() )
if data_str == "[DONE]": current_fragment_type = new_fragment_type
data_queue.put(None) if is_finished:
break final_reasoning = "".join(think_list)
try: final_content = "".join(text_list)
chunk = json.loads(data_str) prompt_tokens = len(final_prompt) // 4
if "v" in chunk: reasoning_tokens = len(final_reasoning) // 4
v_value = chunk["v"] completion_tokens = len(final_content) // 4
if "p" in chunk and chunk.get("p") == "response/search_status":
continue
if "p" in chunk and chunk.get("p") == "response/thinking_content":
ptype = "thinking"
elif "p" in chunk and chunk.get("p") == "response/content":
ptype = "text"
if isinstance(v_value, str):
if search_enabled and v_value.startswith("[citation:"):
continue
if ptype == "thinking":
think_list.append(v_value)
else:
text_list.append(v_value)
elif isinstance(v_value, list):
for item in v_value:
if item.get("p") == "status" and item.get("v") == "FINISHED":
final_reasoning = "".join(think_list)
final_content = "".join(text_list)
prompt_tokens = len(final_prompt) // 4
reasoning_tokens = len(final_reasoning) // 4
completion_tokens = len(final_content) // 4
# 检测工具调用 # 检测工具调用
detected_tools = [] detected_tools = []
finish_reason = "stop" finish_reason = "stop"
if has_tools: if has_tools:
detected_tools = parse_tool_calls(final_content, [{"name": t.get("function", t).get("name")} for t in tools_requested]) detected_tools = parse_tool_calls(final_content, [{"name": t.get("function", t).get("name")} for t in tools_requested])
if detected_tools: if detected_tools:
finish_reason = "tool_calls" finish_reason = "tool_calls"
# 构建 message 对象 # 构建 message 对象
message_obj = { message_obj = {
"role": "assistant", "role": "assistant",
"content": final_content if not detected_tools else None, "content": final_content if not detected_tools else None,
} }
# 只有启用思考模式时才包含 reasoning_content # 只有启用思考模式时才包含 reasoning_content
if thinking_enabled and final_reasoning: if thinking_enabled and final_reasoning:
message_obj["reasoning_content"] = final_reasoning message_obj["reasoning_content"] = final_reasoning
# 添加工具调用 # 添加工具调用
if detected_tools: if detected_tools:
tool_calls_data = format_openai_tool_calls(detected_tools) tool_calls_data = format_openai_tool_calls(detected_tools)
message_obj["tool_calls"] = tool_calls_data message_obj["tool_calls"] = tool_calls_data
message_obj["content"] = None message_obj["content"] = None
result = { result = {
"id": completion_id, "id": completion_id,
"object": "chat.completion", "object": "chat.completion",
"created": created_time, "created": created_time,
"model": model, "model": model,
"choices": [{ "choices": [{
"index": 0, "index": 0,
"message": message_obj, "message": message_obj,
"finish_reason": finish_reason, "finish_reason": finish_reason,
}], }],
"usage": { "usage": {
"prompt_tokens": prompt_tokens, "prompt_tokens": prompt_tokens,
"completion_tokens": reasoning_tokens + completion_tokens, "completion_tokens": reasoning_tokens + completion_tokens,
"total_tokens": prompt_tokens + reasoning_tokens + completion_tokens, "total_tokens": prompt_tokens + reasoning_tokens + completion_tokens,
"completion_tokens_details": {"reasoning_tokens": reasoning_tokens}, "completion_tokens_details": {"reasoning_tokens": reasoning_tokens},
}, },
} }
data_queue.put("DONE") data_queue.put("DONE")
return return
except Exception as e:
logger.warning(f"[collect_data] 无法解析: {data_str}, 错误: {e}") for content_text, content_type in contents:
if ptype == "thinking": if should_filter_citation(content_text, search_enabled):
think_list.append("解析失败,请稍候再试") continue
if content_type == "thinking":
think_list.append(content_text)
else: else:
text_list.append("解析失败,请稍候再试") text_list.append(content_text)
except Exception as e:
logger.warning(f"[collect_data] 无法解析: {chunk}, 错误: {e}")
text_list.append("解析失败,请稍候再试")
data_queue.put(None) data_queue.put(None)
break break
except Exception as e: except Exception as e:
logger.warning(f"[collect_data] 错误: {e}") logger.warning(f"[collect_data] 错误: {e}")
if ptype == "thinking": text_list.append("处理失败,请稍候再试")
think_list.append("处理失败,请稍候再试")
else:
text_list.append("处理失败,请稍候再试")
data_queue.put(None) data_queue.put(None)
finally: finally:
deepseek_resp.close() deepseek_resp.close()