You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
497 lines
18 KiB
497 lines
18 KiB
# -*- coding: utf-8 -*-
|
|
"""
|
|
DeepSeek Web API 单线程版本
|
|
- 参考 kimistart.py 的类结构
|
|
- 使用 loguru 日志系统
|
|
- 单线程循环处理任务
|
|
"""
|
|
|
|
import base64
|
|
import json
|
|
import os
|
|
import pathlib
|
|
import random
|
|
import re
|
|
import string
|
|
import time
|
|
from datetime import datetime
|
|
from json import JSONDecodeError
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
|
import requests
|
|
from glom import glom
|
|
from loguru import logger
|
|
|
|
from deep.ai_seo import AiSearchResult
|
|
from deep.ds_test import calc_pow_with_node
|
|
from utlit.retry import retry
|
|
|
|
# 配置日志
|
|
cwd = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
logger.add(f"{cwd}/deepseek.log",
|
|
level="DEBUG",
|
|
rotation="00:00",
|
|
retention="3 days",
|
|
compression="zip",
|
|
backtrace=True)
|
|
|
|
# 常量配置
|
|
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36"
|
|
ORIGIN = "https://chat.deepseek.com"
|
|
REFERER = "https://chat.deepseek.com/"
|
|
|
|
BASE = "https://chat.deepseek.com"
|
|
SESSION_CREATE = f"{BASE}/api/v0/chat_session/create"
|
|
POW_CHALLENGE = f"{BASE}/api/v0/chat/create_pow_challenge"
|
|
CHAT_COMPLETION = f"{BASE}/api/v0/chat/completion"
|
|
|
|
TASK_URL = 'https://api.granking.com'
|
|
TASK_HOST = 'api.granking.com'
|
|
|
|
|
|
class ToolsLoad:
|
|
"""工具类:处理API请求和会话管理"""
|
|
|
|
@retry('获取deepseek cookie', 0, time_sleep=30)
|
|
def get_cookie(self, platform_id="1"):
|
|
url = "http://granking-api.neicela.com/api/third/getOneSpiderSession?platform_id=" + platform_id + "&app_id=aa65700299848d6f21b969dbc9f6cf7c&secret=5588071d36f0bc61af849c311a03f2c4"
|
|
payload = {}
|
|
headers = {
|
|
'Authorization': 'Bearer ',
|
|
'User-Agent': 'Apifox/1.0.0 (http://apifox.com)'
|
|
}
|
|
response = requests.request("GET", url, headers=headers, data=payload).json()
|
|
if response.get("data", []) == []:
|
|
logger.warning(f'没有获取到cookie: {response}')
|
|
return False
|
|
logger.info(f'成功获取到cookie: {response.get("data")}')
|
|
return response.get("data")
|
|
|
|
@retry('上传cookie状态', 5)
|
|
def update_session(self, id, reload_time, status="4"):
|
|
url = "http://granking-api.neicela.com/api/third/updateSpiderSession?app_id=aa65700299848d6f21b969dbc9f6cf7c&secret=5588071d36f0bc61af849c311a03f2c4"
|
|
payload = json.dumps({
|
|
"id": id,
|
|
"status": status,
|
|
"reload_time": reload_time
|
|
})
|
|
headers = {
|
|
'lang': '{{lang}}',
|
|
'Authorization': 'Bearer ',
|
|
'User-Agent': 'Apifox/1.0.0 (http://apifox.com)',
|
|
'Content-Type': 'application/json'
|
|
}
|
|
response = requests.request("POST", url, headers=headers, data=payload)
|
|
logger.info(f'更新session状态: {response.text}')
|
|
return response.text
|
|
|
|
@retry('提交结果', 5)
|
|
def post_task(self, data):
|
|
url = f"{TASK_URL}/api/third/submitProjectTask"
|
|
headers = {
|
|
'User-Agent': 'Apifox/1.0.0 (http://apifox.com)',
|
|
'Content-Type': 'application/json',
|
|
'Accept': '*/*',
|
|
'Host': TASK_HOST,
|
|
'Connection': 'keep-alive',
|
|
'Cookie': 'lang=zh-cn'
|
|
}
|
|
resp = requests.post(url, headers=headers, json=data, timeout=(5, 300))
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
|
|
@retry('获取task消息', 5)
|
|
def get_task(self):
|
|
url = f"{TASK_URL}/api/third/getTask?app_id=aa65700299848d6f21b969dbc9f6cf7c&secret=5588071d36f0bc61af849c311a03f2c4&platform_ids=1"
|
|
resp = requests.get(url, timeout=(5, 20))
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
|
|
@retry('更新任务状态', 1)
|
|
def update_task_status(self, task_id, status):
|
|
url = f"{TASK_URL}/api/third/updateTask?app_id=aa65700299848d6f21b969dbc9f6cf7c&secret=5588071d36f0bc61af849c311a03f2c4"
|
|
payload = json.dumps({
|
|
"task_id": task_id,
|
|
"status": status,
|
|
})
|
|
headers = {
|
|
'User-Agent': 'Apifox/1.0.0 (http://apifox.com)',
|
|
'Content-Type': 'application/json',
|
|
'Accept': '*/*',
|
|
'Host': 'xunliu-api.ecps.com.cn',
|
|
'Connection': 'keep-alive',
|
|
'Cookie': 'lang=zh-cn'
|
|
}
|
|
response = requests.request("POST", url, headers=headers, data=payload)
|
|
return response.json()
|
|
|
|
def get_leim(self):
|
|
url = 'https://hif-leim.deepseek.com/query'
|
|
resp = requests.get(url)
|
|
return resp.json().get("data").get("biz_data").get("value")
|
|
|
|
|
|
class DeepSeekChatClient:
|
|
"""DeepSeek聊天客户端"""
|
|
|
|
def __init__(self):
|
|
self.base_path = pathlib.Path(__file__).resolve().parent
|
|
self.js_data_path = self.base_path / "js_data"
|
|
self.tools = ToolsLoad()
|
|
|
|
def default_headers(self, extra: Optional[Dict[str, str]] = None) -> Dict[str, str]:
|
|
h = {
|
|
"Authorization": '',
|
|
"Content-Type": "application/json",
|
|
"Origin": ORIGIN,
|
|
"Referer": REFERER,
|
|
"User-Agent": USER_AGENT,
|
|
"Accept": "*/*",
|
|
}
|
|
if extra:
|
|
h.update(extra)
|
|
return h
|
|
|
|
def create_chat_session(self, cookie) -> dict:
|
|
"""创建聊天会话"""
|
|
r = requests.post(SESSION_CREATE, headers=self.default_headers({"Authorization": cookie}), json={}, timeout=30)
|
|
r.raise_for_status()
|
|
data = r.json()
|
|
# 兼容多种结构
|
|
if isinstance(data, dict) and "id" in data and isinstance(data["id"], str):
|
|
return {"id": data["id"]}
|
|
if isinstance(data.get("data"), dict):
|
|
if isinstance(data["data"].get("id"), str):
|
|
return {"id": data["data"]["id"]}
|
|
if isinstance(data["data"].get("data"), dict) and isinstance(data["data"]["data"].get("id"), str):
|
|
return {"id": data["data"]["data"]["id"]}
|
|
biz_data = data["data"].get("biz_data")
|
|
if isinstance(biz_data, dict) and isinstance(biz_data.get("id"), str):
|
|
return biz_data
|
|
return biz_data
|
|
raise ValueError(f"无法解析 chat_session_id:{data}")
|
|
|
|
def fetch_pow_challenge(self, cookie, target_path="/api/v0/chat/completion") -> dict:
|
|
"""获取PoW挑战"""
|
|
r = requests.post(
|
|
POW_CHALLENGE,
|
|
headers=self.default_headers({"Authorization": cookie}),
|
|
json={"target_path": target_path},
|
|
timeout=30,
|
|
)
|
|
r.raise_for_status()
|
|
data = r.json()
|
|
ch = data.get("data", {}).get("biz_data", {}).get("challenge")
|
|
if not ch:
|
|
raise RuntimeError(f"挑战返回结构异常:{data}")
|
|
return ch
|
|
|
|
def solve_answer_fixed_sig(self, challenge_obj: dict) -> int:
|
|
"""求解PoW答案"""
|
|
logger.debug(f"开始求解PoW: {challenge_obj}")
|
|
ch = challenge_obj["challenge"]
|
|
salt = challenge_obj["salt"]
|
|
lim = int(challenge_obj.get("difficulty", 200000))
|
|
expire_at = challenge_obj["expire_at"]
|
|
|
|
# 指向 js_data 文件夹中的文件
|
|
node_runner = str(self.js_data_path / "js_runner.js")
|
|
wasm_file = str(self.js_data_path / "sha3_wasm_bg.wasm")
|
|
|
|
res = calc_pow_with_node(
|
|
node_runner_path=node_runner,
|
|
wasm_path=wasm_file,
|
|
algorithm="DeepSeekHashV1",
|
|
challenge=ch,
|
|
salt=salt,
|
|
difficulty=lim,
|
|
expire_at=expire_at,
|
|
)
|
|
logger.info(f"PoW求解结果: {res}")
|
|
return res.get("answer")
|
|
|
|
def pow_to_header_value(self, algorithm: str, challenge: str, salt: str, answer: int, signature: str,
|
|
target_path: str) -> str:
|
|
"""组装x-ds-pow-response头"""
|
|
payload = {
|
|
'algorithm': algorithm,
|
|
'challenge': challenge,
|
|
'salt': salt,
|
|
'answer': answer,
|
|
'signature': signature,
|
|
'target_path': target_path
|
|
}
|
|
raw = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
|
return base64.b64encode(raw).decode("ascii")
|
|
|
|
def ensure_valid_challenge(self, cookie) -> dict:
|
|
"""获取未过期的挑战"""
|
|
while True:
|
|
ch = self.fetch_pow_challenge(cookie, "/api/v0/chat/completion")
|
|
now_ms = int(time.time() * 1000)
|
|
expire_at = int(ch.get("expire_at", now_ms + 1))
|
|
if expire_at - now_ms > 5000:
|
|
return ch
|
|
time.sleep(0.2)
|
|
|
|
def sse_chat_completion(
|
|
self,
|
|
chat_session_id: dict,
|
|
prompt: str,
|
|
cookie: str,
|
|
thinking_enabled: bool = False,
|
|
search_enabled: bool = True,
|
|
parent_message_id: Optional[str] = None,
|
|
client_stream_id: Optional[str] = None,
|
|
) -> Tuple[List[Dict], str, str]:
|
|
"""SSE聊天请求,返回搜索结果、回答、思考内容"""
|
|
# 1) 取挑战
|
|
ch = self.ensure_valid_challenge(cookie)
|
|
algorithm = ch.get("algorithm", "DeepSeekHashV1")
|
|
challenge = ch["challenge"]
|
|
salt = ch["salt"]
|
|
signature = ch["signature"]
|
|
target_path = ch.get("target_path", "/api/v0/chat/completion")
|
|
|
|
# 2) 求解answer
|
|
answer = self.solve_answer_fixed_sig(ch)
|
|
logger.info(f"PoW answer: {answer}")
|
|
|
|
# 3) 组装x-ds-pow-response
|
|
xpow = self.pow_to_header_value(
|
|
algorithm=algorithm,
|
|
challenge=challenge,
|
|
salt=salt,
|
|
answer=answer,
|
|
signature=signature,
|
|
target_path=target_path,
|
|
)
|
|
|
|
# 4) 构建请求头
|
|
h = self.default_headers({"x-ds-pow-response": xpow, "Authorization": cookie})
|
|
h["x-hif-leim"] = self.tools.get_leim()
|
|
|
|
if not client_stream_id:
|
|
client_stream_id = f"{time.strftime('%Y%m%d')}"
|
|
|
|
payload = {
|
|
"chat_session_id": chat_session_id.get("id"),
|
|
"parent_message_id": parent_message_id,
|
|
"prompt": prompt,
|
|
"ref_file_ids": [],
|
|
"thinking_enabled": thinking_enabled,
|
|
"search_enabled": search_enabled,
|
|
"client_stream_id": client_stream_id + "-3e910d848b6140d5",
|
|
}
|
|
payload = {"chat_session_id": chat_session_id.get("id"), "parent_message_id": None,
|
|
"model_type": "default", "prompt": prompt, "ref_file_ids": [], "thinking_enabled": False,
|
|
"search_enabled": True, "preempt": False}
|
|
|
|
# 5) 发送SSE请求
|
|
with requests.post(
|
|
CHAT_COMPLETION, headers=h, json=payload, stream=True, timeout=300
|
|
) as resp:
|
|
resp.raise_for_status()
|
|
response_text = ''
|
|
thinking_text = ''
|
|
search_result_lists = list()
|
|
start_content = False
|
|
start_thinking = False
|
|
|
|
for raw in resp.iter_lines(decode_unicode=True):
|
|
if not raw:
|
|
continue
|
|
line = raw.strip()
|
|
print(line)
|
|
data_str = line[6:] if line.startswith("data: ") else line
|
|
|
|
if data_str == "[DONE]":
|
|
logger.info("SSE流结束")
|
|
break
|
|
|
|
try:
|
|
data = json.loads(data_str)
|
|
if glom(data, 'v.0.v', default='') == 'TIMEOUT':
|
|
logger.warning("DeepSeek服务器繁忙")
|
|
except JSONDecodeError:
|
|
continue
|
|
|
|
# 获取搜索结果
|
|
if data.get('p', '') == 'response/search_results' and isinstance(data.get('v', ''), list):
|
|
logger.info("获取到联网搜索结果")
|
|
search_result_list = data.get('v', [])
|
|
search_result_lists.extend(search_result_list)
|
|
|
|
# 深度思考数据
|
|
if data.get('p', '') == 'response/thinking_content':
|
|
start_thinking = True
|
|
if data.get('p', '') == 'response/thinking_elapsed_secs':
|
|
start_thinking = False
|
|
if start_thinking:
|
|
value = data.get('v', None)
|
|
if isinstance(value, dict):
|
|
continue
|
|
if value is None:
|
|
value = glom(data, 'choices.0.delta.content', default="")
|
|
thinking_text = thinking_text + str(value)
|
|
|
|
# 回复数据
|
|
if data.get('p', '') == 'response/content':
|
|
start_content = True
|
|
if start_content:
|
|
value = data.get('v', None)
|
|
if isinstance(value, dict):
|
|
continue
|
|
if value is None:
|
|
value = glom(data, 'choices.0.delta.content', default="")
|
|
response_text = response_text + str(value)
|
|
|
|
# 处理引用
|
|
citation = list()
|
|
citations = re.findall(r'citation:(\d+)', response_text)
|
|
if citations:
|
|
citation = list(set(citations))
|
|
|
|
# 构建搜索结果列表
|
|
ai_search_result_list = []
|
|
for index, search_result in enumerate(search_result_lists):
|
|
dic = {
|
|
"url": search_result.get('url', ''),
|
|
"title": search_result.get('title', ''),
|
|
"body": search_result.get('snippet', ''),
|
|
"publish_time": search_result.get('published_at', ''),
|
|
"host_name": search_result.get('site_name', '未知'),
|
|
"is_referenced": "1" if str(index + 1) in citation else "0"
|
|
}
|
|
if dic.get("title") and dic.get("url"):
|
|
ai_search_result_list.append(dic)
|
|
|
|
return ai_search_result_list, response_text, thinking_text
|
|
|
|
|
|
class Start:
|
|
"""主启动类"""
|
|
|
|
def __init__(self):
|
|
self.tools = ToolsLoad()
|
|
self.client = DeepSeekChatClient()
|
|
|
|
@retry('处理消息任务', for_work=10)
|
|
def process_task(self, task):
|
|
"""处理单个任务"""
|
|
task_id = task.get("id", "")
|
|
keyword = task.get("keyword", "")
|
|
platform_id = task.get("platform_id", "")
|
|
brand = task.get("brand", "")
|
|
|
|
logger.info(f"开始处理任务: {keyword} | task_id: {task_id}")
|
|
|
|
# 获取cookie
|
|
response = self.tools.get_cookie(platform_id="1")
|
|
if not response:
|
|
logger.warning(f'cookie获取失败')
|
|
return False
|
|
|
|
cookie = response.get("cookie")
|
|
cookie_id = response.get("id")
|
|
|
|
if not cookie:
|
|
logger.warning(f'cookie为空')
|
|
return False
|
|
|
|
try:
|
|
# 创建会话
|
|
logger.info("创建chat_session...")
|
|
session_id = self.client.create_chat_session(cookie)
|
|
logger.info(f"新建chat_session_id: {session_id}")
|
|
|
|
# 发送聊天请求
|
|
logger.info(f"发送prompt: {keyword}")
|
|
ai_search_result_list, answer, thinking = self.client.sse_chat_completion(
|
|
chat_session_id=session_id,
|
|
prompt=keyword,
|
|
cookie=cookie,
|
|
thinking_enabled=True,
|
|
search_enabled=True,
|
|
)
|
|
if answer == '':
|
|
print('异常结果', ai_search_result_list, answer, thinking)
|
|
return False
|
|
|
|
# 构建结果
|
|
now_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
result = {
|
|
'app_id': 'aa65700299848d6f21b969dbc9f6cf7c',
|
|
'secret': '5588071d36f0bc61af849c311a03f2c4',
|
|
'platform_id': platform_id,
|
|
'platform_name': 'deepseek',
|
|
'prompt': keyword,
|
|
'keyword': brand,
|
|
'answer': answer,
|
|
'search_result': ai_search_result_list,
|
|
'screenshot_file': '',
|
|
'run_status': True,
|
|
'task_id': task_id,
|
|
'rank': 0,
|
|
'start_time': now_dt,
|
|
'end_time': now_dt,
|
|
'screenshot_url': '',
|
|
'words': []
|
|
}
|
|
|
|
# 提交结果
|
|
post_resp = self.tools.post_task(result)
|
|
# print('\n')
|
|
# print('\n')
|
|
print(str(result)[:500])
|
|
logger.info(f"任务 {task_id} 提交返回: {post_resp}")
|
|
return result
|
|
|
|
except Exception as e:
|
|
error_msg = str(e)
|
|
logger.error(f"任务 {task_id} 处理异常: {error_msg}")
|
|
|
|
# token失效处理
|
|
if "Authorization Failed (invalid token)" in error_msg:
|
|
self.tools.update_session(cookie_id, "", "2")
|
|
|
|
# 更新任务状态为失败
|
|
if task_id:
|
|
self.tools.update_task_status(task_id, "4")
|
|
raise
|
|
|
|
@retry('主运行窗口', for_work=1)
|
|
def start_task_msg(self):
|
|
"""获取并处理任务"""
|
|
task_resp = self.tools.get_task()
|
|
# task_resp = {'code': 0, 'msg': 'success', 'data': {'id': 'e07a6ffddf62a61c8072a0d2d518a655', 'project_id': '019b97b0da35706a9f5aba211a201226', 'keyword_id': '019b97bc96c573b1825716bc35c78a24', 'keyword': '国泰基金怎么样', 'brand': '国泰基金', 'platform_id': '4', 'gather_date': '2026-05-07', 'gather_time': '06:00', 'gather_filter': '2026-05-07 00:30:01', 'status': 2, 'retry_count': 1, 'screen_flag': 1, 'thinking': 1, 'is_deal': 1, 'is_init': 2, 'publish_time': '2026-05-07 09:06:04', 'screen_url': '', 'priority': 3, 'start_time': None, 'end_time': None, 'create_time': '2026-05-07 00:30:10', 'update_time': '2026-05-07 09:06:04', 'delete_time': 0, 'create_by': '', 'update_by': '', 'type': 1}}
|
|
|
|
logger.info(f'获取任务响应: {task_resp}')
|
|
|
|
if not task_resp:
|
|
logger.info("get_task 未返回有效数据,等待后重试")
|
|
time.sleep(5)
|
|
return True
|
|
|
|
task_data = task_resp.get("data", False)
|
|
if not task_data:
|
|
logger.info("没有任务数据,等待下一轮")
|
|
time.sleep(30)
|
|
return True
|
|
self.process_task(task_data)
|
|
|
|
return True
|
|
|
|
def run(self):
|
|
"""主循环"""
|
|
logger.info("DeepSeek单线程爬虫启动")
|
|
while True:
|
|
try:
|
|
self.start_task_msg()
|
|
except Exception as e:
|
|
logger.error(f"主循环异常: {e}")
|
|
time.sleep(10)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
Start().run()
|