# -*- coding: utf-8 -*- """ DeepSeek Web API 单线程版本 - 参考 kimistart.py 的类结构 - 使用 loguru 日志系统 - 单线程循环处理任务 """ import base64 import json import os import pathlib import random import re import string import time from datetime import datetime from json import JSONDecodeError from typing import Dict, List, Optional, Tuple import requests from glom import glom from loguru import logger from deep.ai_seo import AiSearchResult from deep.ds_test import calc_pow_with_node from utlit.retry import retry # 配置日志 cwd = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) logger.add(f"{cwd}/deepseek.log", level="DEBUG", rotation="00:00", retention="3 days", compression="zip", backtrace=True) # 常量配置 USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36" ORIGIN = "https://chat.deepseek.com" REFERER = "https://chat.deepseek.com/" BASE = "https://chat.deepseek.com" SESSION_CREATE = f"{BASE}/api/v0/chat_session/create" POW_CHALLENGE = f"{BASE}/api/v0/chat/create_pow_challenge" CHAT_COMPLETION = f"{BASE}/api/v0/chat/completion" TASK_URL = 'https://api.granking.com' TASK_HOST = 'api.granking.com' class ToolsLoad: """工具类:处理API请求和会话管理""" @retry('获取deepseek cookie', 0, time_sleep=30) def get_cookie(self, platform_id="1"): url = "http://granking-api.neicela.com/api/third/getOneSpiderSession?platform_id=" + platform_id + "&app_id=aa65700299848d6f21b969dbc9f6cf7c&secret=5588071d36f0bc61af849c311a03f2c4" payload = {} headers = { 'Authorization': 'Bearer ', 'User-Agent': 'Apifox/1.0.0 (http://apifox.com)' } response = requests.request("GET", url, headers=headers, data=payload).json() if response.get("data", []) == []: logger.warning(f'没有获取到cookie: {response}') return False logger.info(f'成功获取到cookie: {response.get("data")}') return response.get("data") @retry('上传cookie状态', 5) def update_session(self, id, reload_time, status="4"): url = "http://granking-api.neicela.com/api/third/updateSpiderSession?app_id=aa65700299848d6f21b969dbc9f6cf7c&secret=5588071d36f0bc61af849c311a03f2c4" payload = json.dumps({ "id": id, "status": status, "reload_time": reload_time }) headers = { 'lang': '{{lang}}', 'Authorization': 'Bearer ', 'User-Agent': 'Apifox/1.0.0 (http://apifox.com)', 'Content-Type': 'application/json' } response = requests.request("POST", url, headers=headers, data=payload) logger.info(f'更新session状态: {response.text}') return response.text @retry('提交结果', 5) def post_task(self, data): url = f"{TASK_URL}/api/third/submitProjectTask" headers = { 'User-Agent': 'Apifox/1.0.0 (http://apifox.com)', 'Content-Type': 'application/json', 'Accept': '*/*', 'Host': TASK_HOST, 'Connection': 'keep-alive', 'Cookie': 'lang=zh-cn' } resp = requests.post(url, headers=headers, json=data, timeout=(5, 300)) resp.raise_for_status() return resp.json() @retry('获取task消息', 5) def get_task(self): url = f"{TASK_URL}/api/third/getTask?app_id=aa65700299848d6f21b969dbc9f6cf7c&secret=5588071d36f0bc61af849c311a03f2c4&platform_ids=1" resp = requests.get(url, timeout=(5, 20)) resp.raise_for_status() return resp.json() @retry('更新任务状态', 5) def update_task_status(self, task_id, status): url = f"{TASK_URL}/api/third/updateTask?app_id=aa65700299848d6f21b969dbc9f6cf7c&secret=5588071d36f0bc61af849c311a03f2c4" payload = json.dumps({ "task_id": task_id, "status": status, }) headers = { 'User-Agent': 'Apifox/1.0.0 (http://apifox.com)', 'Content-Type': 'application/json', 'Accept': '*/*', 'Host': 'xunliu-api.ecps.com.cn', 'Connection': 'keep-alive', 'Cookie': 'lang=zh-cn' } response = requests.request("POST", url, headers=headers, data=payload) return response.json() def get_leim(self): url = 'https://hif-leim.deepseek.com/query' resp = requests.get(url) return resp.json().get("data").get("biz_data").get("value") class DeepSeekChatClient: """DeepSeek聊天客户端""" def __init__(self): self.base_path = pathlib.Path(__file__).resolve().parent self.js_data_path = self.base_path / "js_data" self.tools = ToolsLoad() def default_headers(self, extra: Optional[Dict[str, str]] = None) -> Dict[str, str]: h = { "Authorization": '', "Content-Type": "application/json", "Origin": ORIGIN, "Referer": REFERER, "User-Agent": USER_AGENT, "Accept": "*/*", } if extra: h.update(extra) return h def create_chat_session(self, cookie) -> dict: """创建聊天会话""" r = requests.post(SESSION_CREATE, headers=self.default_headers({"Authorization": cookie}), json={}, timeout=30) r.raise_for_status() data = r.json() # 兼容多种结构 if isinstance(data, dict) and "id" in data and isinstance(data["id"], str): return {"id": data["id"]} if isinstance(data.get("data"), dict): if isinstance(data["data"].get("id"), str): return {"id": data["data"]["id"]} if isinstance(data["data"].get("data"), dict) and isinstance(data["data"]["data"].get("id"), str): return {"id": data["data"]["data"]["id"]} biz_data = data["data"].get("biz_data") if isinstance(biz_data, dict) and isinstance(biz_data.get("id"), str): return biz_data return biz_data raise ValueError(f"无法解析 chat_session_id:{data}") def fetch_pow_challenge(self, cookie, target_path="/api/v0/chat/completion") -> dict: """获取PoW挑战""" r = requests.post( POW_CHALLENGE, headers=self.default_headers({"Authorization": cookie}), json={"target_path": target_path}, timeout=30, ) r.raise_for_status() data = r.json() ch = data.get("data", {}).get("biz_data", {}).get("challenge") if not ch: raise RuntimeError(f"挑战返回结构异常:{data}") return ch def solve_answer_fixed_sig(self, challenge_obj: dict) -> int: """求解PoW答案""" logger.debug(f"开始求解PoW: {challenge_obj}") ch = challenge_obj["challenge"] salt = challenge_obj["salt"] lim = int(challenge_obj.get("difficulty", 200000)) expire_at = challenge_obj["expire_at"] # 指向 js_data 文件夹中的文件 node_runner = str(self.js_data_path / "js_runner.js") wasm_file = str(self.js_data_path / "sha3_wasm_bg.wasm") res = calc_pow_with_node( node_runner_path=node_runner, wasm_path=wasm_file, algorithm="DeepSeekHashV1", challenge=ch, salt=salt, difficulty=lim, expire_at=expire_at, ) logger.info(f"PoW求解结果: {res}") return res.get("answer") def pow_to_header_value(self, algorithm: str, challenge: str, salt: str, answer: int, signature: str, target_path: str) -> str: """组装x-ds-pow-response头""" payload = { 'algorithm': algorithm, 'challenge': challenge, 'salt': salt, 'answer': answer, 'signature': signature, 'target_path': target_path } raw = json.dumps(payload, ensure_ascii=False).encode("utf-8") return base64.b64encode(raw).decode("ascii") def ensure_valid_challenge(self, cookie) -> dict: """获取未过期的挑战""" while True: ch = self.fetch_pow_challenge(cookie, "/api/v0/chat/completion") now_ms = int(time.time() * 1000) expire_at = int(ch.get("expire_at", now_ms + 1)) if expire_at - now_ms > 5000: return ch time.sleep(0.2) def sse_chat_completion( self, chat_session_id: dict, prompt: str, cookie: str, thinking_enabled: bool = False, search_enabled: bool = True, parent_message_id: Optional[str] = None, client_stream_id: Optional[str] = None, ) -> Tuple[List[Dict], str, str]: """SSE聊天请求,返回搜索结果、回答、思考内容""" # 1) 取挑战 ch = self.ensure_valid_challenge(cookie) algorithm = ch.get("algorithm", "DeepSeekHashV1") challenge = ch["challenge"] salt = ch["salt"] signature = ch["signature"] target_path = ch.get("target_path", "/api/v0/chat/completion") # 2) 求解answer answer = self.solve_answer_fixed_sig(ch) logger.info(f"PoW answer: {answer}") # 3) 组装x-ds-pow-response xpow = self.pow_to_header_value( algorithm=algorithm, challenge=challenge, salt=salt, answer=answer, signature=signature, target_path=target_path, ) # 4) 构建请求头 h = self.default_headers({"x-ds-pow-response": xpow, "Authorization": cookie}) h["x-hif-leim"] = self.tools.get_leim() if not client_stream_id: client_stream_id = f"{time.strftime('%Y%m%d')}" payload = { "chat_session_id": chat_session_id.get("id"), "parent_message_id": parent_message_id, "prompt": prompt, "ref_file_ids": [], "thinking_enabled": thinking_enabled, "search_enabled": search_enabled, "client_stream_id": client_stream_id + "-3e910d848b6140d5", } # 5) 发送SSE请求 with requests.post( CHAT_COMPLETION, headers=h, json=payload, stream=True, timeout=300 ) as resp: resp.raise_for_status() response_text = '' thinking_text = '' search_result_lists = list() start_content = False start_thinking = False for raw in resp.iter_lines(decode_unicode=True): if not raw: continue line = raw.strip() # print(line,end=' ') data_str = line[6:] if line.startswith("data: ") else line if data_str == "[DONE]": logger.info("SSE流结束") break try: data = json.loads(data_str) if glom(data, 'v.0.v', default='') == 'TIMEOUT': logger.warning("DeepSeek服务器繁忙") except JSONDecodeError: continue # 获取搜索结果 if data.get('p', '') == 'response/search_results' and isinstance(data.get('v', ''), list): logger.info("获取到联网搜索结果") search_result_list = data.get('v', []) search_result_lists.extend(search_result_list) # 深度思考数据 if data.get('p', '') == 'response/thinking_content': start_thinking = True if data.get('p', '') == 'response/thinking_elapsed_secs': start_thinking = False if start_thinking: value = data.get('v', None) if isinstance(value, dict): continue if value is None: value = glom(data, 'choices.0.delta.content', default="") thinking_text = thinking_text + str(value) # 回复数据 if data.get('p', '') == 'response/content': start_content = True if start_content: value = data.get('v', None) if isinstance(value, dict): continue if value is None: value = glom(data, 'choices.0.delta.content', default="") response_text = response_text + str(value) # 处理引用 citation = list() citations = re.findall(r'citation:(\d+)', response_text) if citations: citation = list(set(citations)) # 构建搜索结果列表 ai_search_result_list = [] for index, search_result in enumerate(search_result_lists): dic = { "url": search_result.get('url', ''), "title": search_result.get('title', ''), "body": search_result.get('snippet', ''), "publish_time": search_result.get('published_at', ''), "host_name": search_result.get('site_name', '未知'), "is_referenced": "1" if str(index + 1) in citation else "0" } if dic.get("title") and dic.get("url"): ai_search_result_list.append(dic) return ai_search_result_list, response_text, thinking_text class Start: """主启动类""" def __init__(self): self.tools = ToolsLoad() self.client = DeepSeekChatClient() @retry('处理消息任务', for_work=10) def process_task(self, task): """处理单个任务""" task_id = task.get("id", "") keyword = task.get("keyword", "") platform_id = task.get("platform_id", "") brand = task.get("brand", "") logger.info(f"开始处理任务: {keyword} | task_id: {task_id}") # 获取cookie response = self.tools.get_cookie(platform_id="1") if not response: logger.warning(f'cookie获取失败') return False cookie = response.get("cookie") cookie_id = response.get("id") if not cookie: logger.warning(f'cookie为空') return False try: # 创建会话 logger.info("创建chat_session...") session_id = self.client.create_chat_session(cookie) logger.info(f"新建chat_session_id: {session_id}") # 发送聊天请求 logger.info(f"发送prompt: {keyword}") ai_search_result_list, answer, thinking = self.client.sse_chat_completion( chat_session_id=session_id, prompt=keyword, cookie=cookie, thinking_enabled=True, search_enabled=True, ) if 'answer' == '': print('异常结果',ai_search_result_list, answer, thinking) return False # 构建结果 now_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") result = { 'app_id': 'aa65700299848d6f21b969dbc9f6cf7c', 'secret': '5588071d36f0bc61af849c311a03f2c4', 'platform_id': platform_id, 'platform_name': 'deepseek', 'prompt': keyword, 'keyword': brand, 'answer': answer, 'search_result': ai_search_result_list, 'screenshot_file': '', 'run_status': True, 'task_id': task_id, 'rank': 0, 'start_time': now_dt, 'end_time': now_dt, 'screenshot_url': '', 'words': [] } # 提交结果 post_resp = self.tools.post_task(result) # print('\n') # print('\n') print(result) logger.info(f"任务 {task_id} 提交返回: {post_resp}") return result except Exception as e: error_msg = str(e) logger.error(f"任务 {task_id} 处理异常: {error_msg}") # token失效处理 if "Authorization Failed (invalid token)" in error_msg: self.tools.update_session(cookie_id, "full_datetime", "2") # 更新任务状态为失败 if task_id: self.tools.update_task_status(task_id, "4") raise @retry('主运行窗口', for_work=1) def start_task_msg(self): """获取并处理任务""" task_resp = self.tools.get_task() # task_resp = {'code': 0, 'msg': 'success', 'data': {'id': 'e07a6ffddf62a61c8072a0d2d518a655', 'project_id': '019b97b0da35706a9f5aba211a201226', 'keyword_id': '019b97bc96c573b1825716bc35c78a24', 'keyword': '国泰基金怎么样', 'brand': '国泰基金', 'platform_id': '4', 'gather_date': '2026-05-07', 'gather_time': '06:00', 'gather_filter': '2026-05-07 00:30:01', 'status': 2, 'retry_count': 1, 'screen_flag': 1, 'thinking': 1, 'is_deal': 1, 'is_init': 2, 'publish_time': '2026-05-07 09:06:04', 'screen_url': '', 'priority': 3, 'start_time': None, 'end_time': None, 'create_time': '2026-05-07 00:30:10', 'update_time': '2026-05-07 09:06:04', 'delete_time': 0, 'create_by': '', 'update_by': '', 'type': 1}} logger.info(f'获取任务响应: {task_resp}') if not task_resp: logger.info("get_task 未返回有效数据,等待后重试") time.sleep(5) return True task_data = task_resp.get("data", False) if not task_data: logger.info("没有任务数据,等待下一轮") time.sleep(30) return True self.process_task(task_data) return True def run(self): """主循环""" logger.info("DeepSeek单线程爬虫启动") while True: try: self.start_task_msg() except Exception as e: logger.error(f"主循环异常: {e}") time.sleep(10) if __name__ == "__main__": Start().run()