# _*_ coding: utf-8 _*_ import json import os import random import time from datetime import datetime import execjs import requests from loguru import logger import config from utils import CookieUtils # sign.js就是上面还原的算法代码,自行保存即可 def load_js(file_path="sign.js"): with open(file_path, "r", encoding="utf-8") as js_file: js_code = js_file.read() return execjs.compile(js_code) def generate_signature(keyword, page, js_context): return js_context.call('get_signature', keyword, page) def perform_search(cookies, keyword, page, js_context): headers = { "accept": "*/*", "accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7", "cache-control": "no-cache", "pragma": "no-cache", "priority": "u=1, i", "referer": "https://cq.tousu.sina.com.cn/index/search/?keywords=%E5%92%B8%E9%B1%BC%E4%B9%8B%E7%8E%8B&t=1", "sec-ch-ua": "\"Chromium\";v=\"128\", \"Not;A=Brand\";v=\"24\", \"Google Chrome\";v=\"128\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"macOS\"", "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-origin", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36", # 自行获取 "x-requested-with": "XMLHttpRequest" } signature_data = generate_signature(keyword, page, js_context) logger.info(f'抓取第 {page} 页.') params = { 'ts': signature_data['ts'], 'rs': signature_data['rs'], 'signature': signature_data['signature'], 'keywords': keyword, 'page_size': '10', 'page': str(page), } response = requests.get('https://tousu.sina.com.cn/api/index/s', cookies=cookies, params=params, headers=headers, proxies=config.PROXY) if not response.status_code == 200: raise RuntimeError(f"响应异常 状态码: {response.status_code}") try: return response.json() except Exception: raise RuntimeError(f"Json解析异常 响应体: {response.text}") def process_search_results(cookies, keyword, max_page): datas = [] js_context = load_js() for page in range(1, max_page + 1): try: result = perform_search(cookies, keyword, page, js_context) datas.append(result) logger.info(f'搜索结果({keyword}[{page}/{max_page}]): {result}') except Exception as e: logger.error(f"出现异常: 关键词: {keyword} 页码: {page}") logger.error(f"异常信息: {str(e)}") time.sleep(random.randint(2, 5)) return datas if __name__ == '__main__': cookies = CookieUtils.cookie_str_to_dict(config.COOKIE) if not os.path.exists('./data'): os.mkdir('./data') for k in config.KEYWORDS: all_result = process_search_results(cookies, k, config.MAX_PAGE) json_string = json.dumps(all_result, indent=4, ensure_ascii=False) with open(f"./data/{k}_{datetime.now().strftime('%Y_%m_%d_%H%M%S')}.json", 'w', encoding='utf-8') as file: for item in all_result: file.write(f"{json_string}\n") logger.success(f"{k} 查询结果已保存")