You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

89 lines
3.1 KiB

# _*_ coding: utf-8 _*_
import execjs
import requests
from loguru import logger
import os
from datetime import datetime
import config
import json
from utils import CookieUtils
# sign.js就是上面还原的算法代码,自行保存即可
def load_js(file_path="sign.js"):
with open(file_path, "r", encoding="utf-8") as js_file:
js_code = js_file.read()
return execjs.compile(js_code)
def generate_signature(keyword, page, js_context):
return js_context.call('get_signature', keyword, page)
def perform_search(cookies, keyword, page, js_context):
headers = {
"accept": "*/*",
"accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7",
"cache-control": "no-cache",
"pragma": "no-cache",
"priority": "u=1, i",
"referer": "https://cq.tousu.sina.com.cn/index/search/?keywords=%E5%92%B8%E9%B1%BC%E4%B9%8B%E7%8E%8B&t=1",
"sec-ch-ua": "\"Chromium\";v=\"128\", \"Not;A=Brand\";v=\"24\", \"Google Chrome\";v=\"128\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
# 自行获取
"x-requested-with": "XMLHttpRequest"
}
signature_data = generate_signature(keyword, page, js_context)
logger.info(f'抓取第 {page} 页.')
params = {
'ts': signature_data['ts'],
'rs': signature_data['rs'],
'signature': signature_data['signature'],
'keywords': keyword,
'page_size': '10',
'page': str(page),
}
response = requests.get('https://tousu.sina.com.cn/api/index/s', cookies=cookies, params=params, headers=headers)
if not response.status_code == 200:
raise RuntimeError(f"响应异常 状态码: {response.status_code}")
try:
return response.json()
except Exception:
raise RuntimeError(f"Json解析异常 响应体: {response.text}")
def process_search_results(cookies, keyword, max_page):
datas = []
js_context = load_js()
for page in range(1, max_page):
try:
result = perform_search(cookies, keyword, page, js_context)
datas.append(result)
logger.info(f'搜索结果({keyword}[{page}/{max_page}]): {result}')
except Exception as e:
logger.error(f"出现异常: 关键词: {keyword} 页码: {page}")
logger.error(f"异常信息: {str(e)}")
return datas
if __name__ == '__main__':
cookies = CookieUtils.cookie_str_to_dict(config.COOKIE)
if not os.path.exists('./data'):
os.mkdir('./data')
for k in config.KEYWORDS:
all_result = process_search_results(cookies, k, config.MAX_PAGE)
json_string = json.dumps(all_result, indent=4, ensure_ascii=False)
with open(f"./data/{k}_{datetime.now().strftime('%Y_%m_%d_%H%M%S')}.json", 'w', encoding='utf-8') as file:
for item in all_result:
file.write(f"{json_string}\n")
logger.success(f"{k} 查询结果已保存")