ai项目
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

494 lines
18 KiB

# -*- coding: utf-8 -*-
"""
DeepSeek Web API 单线程版本
- 参考 kimistart.py 的类结构
- 使用 loguru 日志系统
- 单线程循环处理任务
"""
import base64
import json
import os
import pathlib
import random
import re
import string
import time
from datetime import datetime
from json import JSONDecodeError
from typing import Dict, List, Optional, Tuple
import requests
from glom import glom
from loguru import logger
from deep.ai_seo import AiSearchResult
from deep.ds_test import calc_pow_with_node
from utlit.retry import retry
# 配置日志
cwd = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
logger.add(f"{cwd}/deepseek.log",
level="DEBUG",
rotation="00:00",
retention="3 days",
compression="zip",
backtrace=True)
# 常量配置
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36"
ORIGIN = "https://chat.deepseek.com"
REFERER = "https://chat.deepseek.com/"
BASE = "https://chat.deepseek.com"
SESSION_CREATE = f"{BASE}/api/v0/chat_session/create"
POW_CHALLENGE = f"{BASE}/api/v0/chat/create_pow_challenge"
CHAT_COMPLETION = f"{BASE}/api/v0/chat/completion"
TASK_URL = 'https://api.granking.com'
TASK_HOST = 'api.granking.com'
class ToolsLoad:
"""工具类:处理API请求和会话管理"""
@retry('获取deepseek cookie', 0, time_sleep=30)
def get_cookie(self, platform_id="1"):
url = "http://granking-api.neicela.com/api/third/getOneSpiderSession?platform_id=" + platform_id + "&app_id=aa65700299848d6f21b969dbc9f6cf7c&secret=5588071d36f0bc61af849c311a03f2c4"
payload = {}
headers = {
'Authorization': 'Bearer ',
'User-Agent': 'Apifox/1.0.0 (http://apifox.com)'
}
response = requests.request("GET", url, headers=headers, data=payload).json()
if response.get("data", []) == []:
logger.warning(f'没有获取到cookie: {response}')
return False
logger.info(f'成功获取到cookie: {response.get("data")}')
return response.get("data")
@retry('上传cookie状态', 5)
def update_session(self, id, reload_time, status="4"):
url = "http://granking-api.neicela.com/api/third/updateSpiderSession?app_id=aa65700299848d6f21b969dbc9f6cf7c&secret=5588071d36f0bc61af849c311a03f2c4"
payload = json.dumps({
"id": id,
"status": status,
"reload_time": reload_time
})
headers = {
'lang': '{{lang}}',
'Authorization': 'Bearer ',
'User-Agent': 'Apifox/1.0.0 (http://apifox.com)',
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
logger.info(f'更新session状态: {response.text}')
return response.text
@retry('提交结果', 5)
def post_task(self, data):
url = f"{TASK_URL}/api/third/submitProjectTask"
headers = {
'User-Agent': 'Apifox/1.0.0 (http://apifox.com)',
'Content-Type': 'application/json',
'Accept': '*/*',
'Host': TASK_HOST,
'Connection': 'keep-alive',
'Cookie': 'lang=zh-cn'
}
resp = requests.post(url, headers=headers, json=data, timeout=(5, 300))
resp.raise_for_status()
return resp.json()
@retry('获取task消息', 5)
def get_task(self):
url = f"{TASK_URL}/api/third/getTask?app_id=aa65700299848d6f21b969dbc9f6cf7c&secret=5588071d36f0bc61af849c311a03f2c4&platform_ids=1"
resp = requests.get(url, timeout=(5, 20))
resp.raise_for_status()
return resp.json()
@retry('更新任务状态', 1)
def update_task_status(self, task_id, status):
url = f"{TASK_URL}/api/third/updateTask?app_id=aa65700299848d6f21b969dbc9f6cf7c&secret=5588071d36f0bc61af849c311a03f2c4"
payload = json.dumps({
"task_id": task_id,
"status": status,
})
headers = {
'User-Agent': 'Apifox/1.0.0 (http://apifox.com)',
'Content-Type': 'application/json',
'Accept': '*/*',
'Host': 'xunliu-api.ecps.com.cn',
'Connection': 'keep-alive',
'Cookie': 'lang=zh-cn'
}
response = requests.request("POST", url, headers=headers, data=payload)
return response.json()
def get_leim(self):
url = 'https://hif-leim.deepseek.com/query'
resp = requests.get(url)
return resp.json().get("data").get("biz_data").get("value")
class DeepSeekChatClient:
"""DeepSeek聊天客户端"""
def __init__(self):
self.base_path = pathlib.Path(__file__).resolve().parent
self.js_data_path = self.base_path / "js_data"
self.tools = ToolsLoad()
def default_headers(self, extra: Optional[Dict[str, str]] = None) -> Dict[str, str]:
h = {
"Authorization": '',
"Content-Type": "application/json",
"Origin": ORIGIN,
"Referer": REFERER,
"User-Agent": USER_AGENT,
"Accept": "*/*",
}
if extra:
h.update(extra)
return h
def create_chat_session(self, cookie) -> dict:
"""创建聊天会话"""
r = requests.post(SESSION_CREATE, headers=self.default_headers({"Authorization": cookie}), json={}, timeout=30)
r.raise_for_status()
data = r.json()
# 兼容多种结构
if isinstance(data, dict) and "id" in data and isinstance(data["id"], str):
return {"id": data["id"]}
if isinstance(data.get("data"), dict):
if isinstance(data["data"].get("id"), str):
return {"id": data["data"]["id"]}
if isinstance(data["data"].get("data"), dict) and isinstance(data["data"]["data"].get("id"), str):
return {"id": data["data"]["data"]["id"]}
biz_data = data["data"].get("biz_data")
if isinstance(biz_data, dict) and isinstance(biz_data.get("id"), str):
return biz_data
return biz_data
raise ValueError(f"无法解析 chat_session_id:{data}")
def fetch_pow_challenge(self, cookie, target_path="/api/v0/chat/completion") -> dict:
"""获取PoW挑战"""
r = requests.post(
POW_CHALLENGE,
headers=self.default_headers({"Authorization": cookie}),
json={"target_path": target_path},
timeout=30,
)
r.raise_for_status()
data = r.json()
ch = data.get("data", {}).get("biz_data", {}).get("challenge")
if not ch:
raise RuntimeError(f"挑战返回结构异常:{data}")
return ch
def solve_answer_fixed_sig(self, challenge_obj: dict) -> int:
"""求解PoW答案"""
logger.debug(f"开始求解PoW: {challenge_obj}")
ch = challenge_obj["challenge"]
salt = challenge_obj["salt"]
lim = int(challenge_obj.get("difficulty", 200000))
expire_at = challenge_obj["expire_at"]
# 指向 js_data 文件夹中的文件
node_runner = str(self.js_data_path / "js_runner.js")
wasm_file = str(self.js_data_path / "sha3_wasm_bg.wasm")
res = calc_pow_with_node(
node_runner_path=node_runner,
wasm_path=wasm_file,
algorithm="DeepSeekHashV1",
challenge=ch,
salt=salt,
difficulty=lim,
expire_at=expire_at,
)
logger.info(f"PoW求解结果: {res}")
return res.get("answer")
def pow_to_header_value(self, algorithm: str, challenge: str, salt: str, answer: int, signature: str,
target_path: str) -> str:
"""组装x-ds-pow-response头"""
payload = {
'algorithm': algorithm,
'challenge': challenge,
'salt': salt,
'answer': answer,
'signature': signature,
'target_path': target_path
}
raw = json.dumps(payload, ensure_ascii=False).encode("utf-8")
return base64.b64encode(raw).decode("ascii")
def ensure_valid_challenge(self, cookie) -> dict:
"""获取未过期的挑战"""
while True:
ch = self.fetch_pow_challenge(cookie, "/api/v0/chat/completion")
now_ms = int(time.time() * 1000)
expire_at = int(ch.get("expire_at", now_ms + 1))
if expire_at - now_ms > 5000:
return ch
time.sleep(0.2)
def sse_chat_completion(
self,
chat_session_id: dict,
prompt: str,
cookie: str,
thinking_enabled: bool = False,
search_enabled: bool = True,
parent_message_id: Optional[str] = None,
client_stream_id: Optional[str] = None,
) -> Tuple[List[Dict], str, str]:
"""SSE聊天请求,返回搜索结果、回答、思考内容"""
# 1) 取挑战
ch = self.ensure_valid_challenge(cookie)
algorithm = ch.get("algorithm", "DeepSeekHashV1")
challenge = ch["challenge"]
salt = ch["salt"]
signature = ch["signature"]
target_path = ch.get("target_path", "/api/v0/chat/completion")
# 2) 求解answer
answer = self.solve_answer_fixed_sig(ch)
logger.info(f"PoW answer: {answer}")
# 3) 组装x-ds-pow-response
xpow = self.pow_to_header_value(
algorithm=algorithm,
challenge=challenge,
salt=salt,
answer=answer,
signature=signature,
target_path=target_path,
)
# 4) 构建请求头
h = self.default_headers({"x-ds-pow-response": xpow, "Authorization": cookie})
h["x-hif-leim"] = self.tools.get_leim()
if not client_stream_id:
client_stream_id = f"{time.strftime('%Y%m%d')}"
payload = {
"chat_session_id": chat_session_id.get("id"),
"parent_message_id": parent_message_id,
"prompt": prompt,
"ref_file_ids": [],
"thinking_enabled": thinking_enabled,
"search_enabled": search_enabled,
"client_stream_id": client_stream_id + "-3e910d848b6140d5",
}
# 5) 发送SSE请求
with requests.post(
CHAT_COMPLETION, headers=h, json=payload, stream=True, timeout=300
) as resp:
resp.raise_for_status()
response_text = ''
thinking_text = ''
search_result_lists = list()
start_content = False
start_thinking = False
for raw in resp.iter_lines(decode_unicode=True):
if not raw:
continue
line = raw.strip()
# print(line)
data_str = line[6:] if line.startswith("data: ") else line
if data_str == "[DONE]":
logger.info("SSE流结束")
break
try:
data = json.loads(data_str)
if glom(data, 'v.0.v', default='') == 'TIMEOUT':
logger.warning("DeepSeek服务器繁忙")
except JSONDecodeError:
continue
# 获取搜索结果
if data.get('p', '') == 'response/search_results' and isinstance(data.get('v', ''), list):
logger.info("获取到联网搜索结果")
search_result_list = data.get('v', [])
search_result_lists.extend(search_result_list)
# 深度思考数据
if data.get('p', '') == 'response/thinking_content':
start_thinking = True
if data.get('p', '') == 'response/thinking_elapsed_secs':
start_thinking = False
if start_thinking:
value = data.get('v', None)
if isinstance(value, dict):
continue
if value is None:
value = glom(data, 'choices.0.delta.content', default="")
thinking_text = thinking_text + str(value)
# 回复数据
if data.get('p', '') == 'response/content':
start_content = True
if start_content:
value = data.get('v', None)
if isinstance(value, dict):
continue
if value is None:
value = glom(data, 'choices.0.delta.content', default="")
response_text = response_text + str(value)
# 处理引用
citation = list()
citations = re.findall(r'citation:(\d+)', response_text)
if citations:
citation = list(set(citations))
# 构建搜索结果列表
ai_search_result_list = []
for index, search_result in enumerate(search_result_lists):
dic = {
"url": search_result.get('url', ''),
"title": search_result.get('title', ''),
"body": search_result.get('snippet', ''),
"publish_time": search_result.get('published_at', ''),
"host_name": search_result.get('site_name', '未知'),
"is_referenced": "1" if str(index + 1) in citation else "0"
}
if dic.get("title") and dic.get("url"):
ai_search_result_list.append(dic)
return ai_search_result_list, response_text, thinking_text
class Start:
"""主启动类"""
def __init__(self):
self.tools = ToolsLoad()
self.client = DeepSeekChatClient()
@retry('处理消息任务', for_work=10)
def process_task(self, task):
"""处理单个任务"""
task_id = task.get("id", "")
keyword = task.get("keyword", "")
platform_id = task.get("platform_id", "")
brand = task.get("brand", "")
logger.info(f"开始处理任务: {keyword} | task_id: {task_id}")
# 获取cookie
response = self.tools.get_cookie(platform_id="1")
if not response:
logger.warning(f'cookie获取失败')
return False
cookie = response.get("cookie")
cookie_id = response.get("id")
if not cookie:
logger.warning(f'cookie为空')
return False
try:
# 创建会话
logger.info("创建chat_session...")
session_id = self.client.create_chat_session(cookie)
logger.info(f"新建chat_session_id: {session_id}")
# 发送聊天请求
logger.info(f"发送prompt: {keyword}")
ai_search_result_list, answer, thinking = self.client.sse_chat_completion(
chat_session_id=session_id,
prompt=keyword,
cookie=cookie,
thinking_enabled=True,
search_enabled=True,
)
if thinking == '':
print('异常结果', ai_search_result_list, answer, thinking)
return False
# 构建结果
now_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
result = {
'app_id': 'aa65700299848d6f21b969dbc9f6cf7c',
'secret': '5588071d36f0bc61af849c311a03f2c4',
'platform_id': platform_id,
'platform_name': 'deepseek',
'prompt': keyword,
'keyword': brand,
'answer': answer,
'search_result': ai_search_result_list,
'screenshot_file': '',
'run_status': True,
'task_id': task_id,
'rank': 0,
'start_time': now_dt,
'end_time': now_dt,
'screenshot_url': '',
'words': []
}
# 提交结果
post_resp = self.tools.post_task(result)
# print('\n')
# print('\n')
print(result)
logger.info(f"任务 {task_id} 提交返回: {post_resp}")
return result
except Exception as e:
error_msg = str(e)
logger.error(f"任务 {task_id} 处理异常: {error_msg}")
# token失效处理
if "Authorization Failed (invalid token)" in error_msg:
self.tools.update_session(cookie_id, "", "2")
# 更新任务状态为失败
if task_id:
self.tools.update_task_status(task_id, "4")
raise
@retry('主运行窗口', for_work=1)
def start_task_msg(self):
"""获取并处理任务"""
task_resp = self.tools.get_task()
# task_resp = {'code': 0, 'msg': 'success', 'data': {'id': 'e07a6ffddf62a61c8072a0d2d518a655', 'project_id': '019b97b0da35706a9f5aba211a201226', 'keyword_id': '019b97bc96c573b1825716bc35c78a24', 'keyword': '国泰基金怎么样', 'brand': '国泰基金', 'platform_id': '4', 'gather_date': '2026-05-07', 'gather_time': '06:00', 'gather_filter': '2026-05-07 00:30:01', 'status': 2, 'retry_count': 1, 'screen_flag': 1, 'thinking': 1, 'is_deal': 1, 'is_init': 2, 'publish_time': '2026-05-07 09:06:04', 'screen_url': '', 'priority': 3, 'start_time': None, 'end_time': None, 'create_time': '2026-05-07 00:30:10', 'update_time': '2026-05-07 09:06:04', 'delete_time': 0, 'create_by': '', 'update_by': '', 'type': 1}}
logger.info(f'获取任务响应: {task_resp}')
if not task_resp:
logger.info("get_task 未返回有效数据,等待后重试")
time.sleep(5)
return True
task_data = task_resp.get("data", False)
if not task_data:
logger.info("没有任务数据,等待下一轮")
time.sleep(30)
return True
self.process_task(task_data)
return True
def run(self):
"""主循环"""
logger.info("DeepSeek单线程爬虫启动")
while True:
try:
self.start_task_msg()
except Exception as e:
logger.error(f"主循环异常: {e}")
time.sleep(10)
if __name__ == "__main__":
Start().run()