From fceab79e0ae17951957a21e4dc1bccf4e2897c08 Mon Sep 17 00:00:00 2001 From: zzx Date: Mon, 24 Feb 2025 11:00:31 +0800 Subject: [PATCH] =?UTF-8?q?:sparkles:=20=E5=8A=A0=E4=BB=A3=E7=90=86?= =?UTF-8?q?=E5=92=8C=E9=9A=8F=E6=9C=BA=E5=81=9C=E9=A1=BF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + config.py | 6 +++++- main.py | 19 ++++++++++++------- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 7633447..374942c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ __pycache__/ data .idea .vscode +config.py # Distribution / packaging .Python diff --git a/config.py b/config.py index ac1a63d..c098209 100644 --- a/config.py +++ b/config.py @@ -1,5 +1,9 @@ # coding=utf-8 KEYWORDS = ['咸鱼之王'] -MAX_PAGE = 10 +MAX_PAGE = 5 COOKIE = 'UOR=www.baidu.com,tousu.sina.com.cn,; SINAGLOBAL=180.109.135.223_1734009009.184774; Apache=180.109.135.223_1734009009.184775; ULV=1737698014297:2:1:1:180.109.135.223_1734009009.184775:1734009008920; ALF=02_1742823137; SCF=AnbhzHKrnUQl7Hr1ketFkwfNrrNrnZoluPHwHKF6Cd5jepxd4jnBttaKovu1rtniHOjeKih3dtFzpJfX3fnoexE.; SUB=_2A25KvaGxDeRhGeVO6VQW9S7FzD2IHXVpsrt5rDV_PUJbkNAbLRGkkW9NTWl_zXi3K6i7_10g-b280K9gc5zMpYjq; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WFTxn..WU0jomaP4CuWPAC85NHD95Q0ehzcS0-71KMpWs4DqcjMi--NiK.Xi-2Ri--ciKnRi-zNMcxo1g-NIPLNd7tt; U_TRS1=000000d8.6b0c8bd59.67b9d1e3.a609672c; U_TRS2=000000d8.6b148bd59.67b9d1e3.70e955ed; HM-AMT=%7B%22amt%22%3A24089921%2C%22amt24h%22%3A15814%2C%22v%22%3A%222.3.168%22%2C%22vPcJs%22%3A%221.6.79%22%2C%22vPcCss%22%3A%221.2.393%22%7D' +PROXY = { + "http:": "http://127.0.0.1:10809", + "https:": "http://127.0.0.1:10809", +} diff --git a/main.py b/main.py index 9c708e6..0d549eb 100644 --- a/main.py +++ b/main.py @@ -1,13 +1,16 @@ # _*_ coding: utf-8 _*_ +import json +import os +import random +import time +from datetime import datetime + import execjs import requests from loguru import logger -import os -from datetime import datetime -import config -import json +import config from utils import CookieUtils @@ -52,7 +55,9 @@ def perform_search(cookies, keyword, page, js_context): 'page': str(page), } - response = requests.get('https://tousu.sina.com.cn/api/index/s', cookies=cookies, params=params, headers=headers) + response = requests.get('https://tousu.sina.com.cn/api/index/s', + cookies=cookies, params=params, + headers=headers, proxies=config.PROXY) if not response.status_code == 200: raise RuntimeError(f"响应异常 状态码: {response.status_code}") try: @@ -64,7 +69,7 @@ def perform_search(cookies, keyword, page, js_context): def process_search_results(cookies, keyword, max_page): datas = [] js_context = load_js() - for page in range(1, max_page): + for page in range(1, max_page + 1): try: result = perform_search(cookies, keyword, page, js_context) datas.append(result) @@ -72,6 +77,7 @@ def process_search_results(cookies, keyword, max_page): except Exception as e: logger.error(f"出现异常: 关键词: {keyword} 页码: {page}") logger.error(f"异常信息: {str(e)}") + time.sleep(random.randint(2, 5)) return datas @@ -86,4 +92,3 @@ if __name__ == '__main__': for item in all_result: file.write(f"{json_string}\n") logger.success(f"{k} 查询结果已保存") -