You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							94 lines
						
					
					
						
							3.3 KiB
						
					
					
				
			
		
		
		
			
			
			
				
					
				
				
					
				
			
		
		
	
	
							94 lines
						
					
					
						
							3.3 KiB
						
					
					
				| # _*_ coding: utf-8 _*_ | |
| 
 | |
| import json | |
| import os | |
| import random | |
| import time | |
| from datetime import datetime | |
| 
 | |
| import execjs | |
| import requests | |
| from loguru import logger | |
| 
 | |
| import config | |
| from utils import CookieUtils | |
| 
 | |
| 
 | |
| # sign.js就是上面还原的算法代码,自行保存即可 | |
| def load_js(file_path="sign.js"): | |
|     with open(file_path, "r", encoding="utf-8") as js_file: | |
|         js_code = js_file.read() | |
|         return execjs.compile(js_code) | |
| 
 | |
| 
 | |
| def generate_signature(keyword, page, js_context): | |
|     return js_context.call('get_signature', keyword, page) | |
| 
 | |
| 
 | |
| def perform_search(cookies, keyword, page, js_context): | |
|     headers = { | |
|         "accept": "*/*", | |
|         "accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7", | |
|         "cache-control": "no-cache", | |
|         "pragma": "no-cache", | |
|         "priority": "u=1, i", | |
|         "referer": "https://cq.tousu.sina.com.cn/index/search/?keywords=%E5%92%B8%E9%B1%BC%E4%B9%8B%E7%8E%8B&t=1", | |
|         "sec-ch-ua": "\"Chromium\";v=\"128\", \"Not;A=Brand\";v=\"24\", \"Google Chrome\";v=\"128\"", | |
|         "sec-ch-ua-mobile": "?0", | |
|         "sec-ch-ua-platform": "\"macOS\"", | |
|         "sec-fetch-dest": "empty", | |
|         "sec-fetch-mode": "cors", | |
|         "sec-fetch-site": "same-origin", | |
|         "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36", | |
|         # 自行获取 | |
|         "x-requested-with": "XMLHttpRequest" | |
|     } | |
| 
 | |
|     signature_data = generate_signature(keyword, page, js_context) | |
|     logger.info(f'抓取第 {page} 页.') | |
|     params = { | |
|         'ts': signature_data['ts'], | |
|         'rs': signature_data['rs'], | |
|         'signature': signature_data['signature'], | |
|         'keywords': keyword, | |
|         'page_size': '10', | |
|         'page': str(page), | |
|     } | |
| 
 | |
|     response = requests.get('https://tousu.sina.com.cn/api/index/s', | |
|                             cookies=cookies, params=params, | |
|                             headers=headers, proxies=config.PROXY) | |
|     if not response.status_code == 200: | |
|         raise RuntimeError(f"响应异常 状态码: {response.status_code}") | |
|     try: | |
|         return response.json() | |
|     except Exception: | |
|         raise RuntimeError(f"Json解析异常 响应体: {response.text}") | |
| 
 | |
| 
 | |
| def process_search_results(cookies, keyword, max_page): | |
|     datas = [] | |
|     js_context = load_js() | |
|     for page in range(1, max_page + 1): | |
|         try: | |
|             result = perform_search(cookies, keyword, page, js_context) | |
|             datas.append(result) | |
|             logger.info(f'搜索结果({keyword}[{page}/{max_page}]): {result}') | |
|         except Exception as e: | |
|             logger.error(f"出现异常: 关键词: {keyword} 页码: {page}") | |
|             logger.error(f"异常信息: {str(e)}") | |
|         time.sleep(random.randint(2, 5)) | |
|     return datas | |
| 
 | |
| 
 | |
| if __name__ == '__main__': | |
|     cookies = CookieUtils.cookie_str_to_dict(config.COOKIE) | |
|     if not os.path.exists('./data'): | |
|         os.mkdir('./data') | |
|     for k in config.KEYWORDS: | |
|         all_result = process_search_results(cookies, k, config.MAX_PAGE) | |
|         json_string = json.dumps(all_result, indent=4, ensure_ascii=False) | |
|         with open(f"./data/{k}_{datetime.now().strftime('%Y_%m_%d_%H%M%S')}.json", 'w', encoding='utf-8') as file: | |
|             for item in all_result: | |
|                 file.write(f"{json_string}\n") | |
|             logger.success(f"{k} 查询结果已保存")
 |