From a2dd40d1d9d6675feede2b0790a3b31b7e98c094 Mon Sep 17 00:00:00 2001 From: zzx Date: Fri, 1 Aug 2025 16:25:20 +0800 Subject: [PATCH 1/5] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9Edeepseek=20sessio?= =?UTF-8?q?n=E6=A3=80=E6=9F=A5=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- abs_spider.py | 22 ++++++++++++++++++++-- main.py | 17 ++++++++++++++--- spiders/ai_seo/deepseek.py | 12 ++++++++++++ utils/ai_seo_api_utils.py | 26 +++++++++++++++++++++++++- utils/session_utils.py | 4 ++-- 5 files changed, 73 insertions(+), 8 deletions(-) diff --git a/abs_spider.py b/abs_spider.py index 45c5026..432f1d9 100644 --- a/abs_spider.py +++ b/abs_spider.py @@ -9,6 +9,7 @@ from playwright.async_api import Browser, BrowserContext, Page import config from domain.ai_seo import AiAnswer from utils import create_logger +from utils.ai_seo_api_utils import AiSeoApis from utils.session_utils import get_spider_session logger = create_logger("abs_spider") @@ -67,9 +68,9 @@ class AbstractAiSeoSpider(ABC): screenshot_path = f'{config.SCREENSHOT_BASE_PATH}/{self.platform_name}_{unique_id}.png' return screenshot_path - async def __init_page(self): + async def __init_page(self, id=''): if self.load_session: - self.session_info = await get_spider_session(self.platform_id) + self.session_info = await get_spider_session(self.platform_id, id) if self.platform_id != 8: self.browser_content = await self.browser.new_context(storage_state=self.session_info['session_path']) else: @@ -118,6 +119,20 @@ class AbstractAiSeoSpider(ABC): finally: await self._close() + async def check_session(self, session_id) -> bool: + await self.__init_page(session_id) + result = await self.do_check_session() + await self._close() + if result: + logger.success(f"[{self.get_platform_name()}]session状态有效! ✅ id: {session_id}") + else: + logger.error(f"[{self.get_platform_name()}]session状态无效! ❌ id: {session_id}") + + # 更新session状态 + await AiSeoApis.update_spider_session(session_id, 1 if result else 2) + return result + + @abstractmethod async def _do_spider(self) -> AiAnswer: """ @@ -137,3 +152,6 @@ class AbstractAiSeoSpider(ABC): @abstractmethod def get_home_url(self) -> str: pass + @abstractmethod + async def do_check_session(self) -> bool: + pass diff --git a/main.py b/main.py index 7189fd6..73acdfa 100644 --- a/main.py +++ b/main.py @@ -12,7 +12,9 @@ import config from abs_spider import AbstractAiSeoSpider from domain.ai_seo import AiAnswer from spiders.ai_seo import * +from spiders.ai_seo.wenxiaoyan import WenxiaoyanSpider from utils.logger_utils import create_logger +from utils.ai import AiSeoApis logger = create_logger("app") @@ -24,7 +26,8 @@ SPIDER_CLS = { 5: DouBaoSpider, 6: YiYanSpider, 7: NanometerSpider, - 13: MetasoSpider + 13: MetasoSpider, + 8: WenxiaoyanSpider } @@ -47,7 +50,7 @@ def get_spider(platform_id, prompt, brand, browser) -> AbstractAiSeoSpider: cls = SPIDER_CLS.get(int(platform_id), None) if not cls: raise ValueError(f"未找到对应的爬虫类,platform_id={platform_id}") - return cls(browser, prompt, brand, True) + return cls(browser, prompt, brand) def save_local(ai_answer: AiAnswer): @@ -80,6 +83,14 @@ async def test(): index = index + 1 await asyncio.sleep(config.TEST_INTERVAL * 6) +async def test_check_session(): + sessions = await AiSeoApis.list_spider_session(1) + playwright, browser = await init_browser() + for session in sessions: + spider = get_spider(session['platform_id'], '你好', '品牌词', browser) + result = await spider.check_session(session['id']) + + if __name__ == '__main__': - asyncio.get_event_loop().run_until_complete(test()) + asyncio.get_event_loop().run_until_complete(test_check_session()) diff --git a/spiders/ai_seo/deepseek.py b/spiders/ai_seo/deepseek.py index e9ec9ac..348cb84 100644 --- a/spiders/ai_seo/deepseek.py +++ b/spiders/ai_seo/deepseek.py @@ -87,6 +87,18 @@ class DeepseekSpider(AbstractAiSeoSpider): self.ai_answer.screenshot_file = screenshot_path return self.ai_answer + async def do_check_session(self) -> bool: + try: + await self.browser_page.goto(self.get_home_url(), timeout=600000) + await asyncio.sleep(3) + chat_input_element = self.browser_page.locator("//textarea[@id='chat-input']") + await chat_input_element.click() + # 输入提问词 + await self.browser_page.keyboard.type(self.prompt) + return True + except Exception: + return False + def handle_listen_response_error(self, func): """ 装饰器 用于处理请求回调中的异常 diff --git a/utils/ai_seo_api_utils.py b/utils/ai_seo_api_utils.py index 79aaf9c..80c134b 100644 --- a/utils/ai_seo_api_utils.py +++ b/utils/ai_seo_api_utils.py @@ -170,15 +170,39 @@ class AiSeoApis: return json_result['data'] @staticmethod - async def get_spider_session(platform_id): + async def get_spider_session(platform_id, id=''): """ 获取爬虫会话 :param platform_id: + :param id: :return: """ uri = '/api/third/getOneSpiderSession' url = AiSeoApis.build_full_url(uri) json_data = {**config.AI_SEO_API_AUTH, 'platform_id': platform_id} + if not id: + json_data['id'] = id + async with httpx.AsyncClient() as client: + response = await client.get(url, params=json_data, timeout=60) + json_result = response.json() + if not json_result['code'] == 0: + logger.error(f"获取爬虫session失败") + return None + return json_result['data'] + + @staticmethod + async def list_spider_session(platform_id='', status=''): + """ + 获取爬虫session列表 + :param platform_id: + :param id: + :return: + """ + uri = '/api/third/getSpiderSessionList' + url = AiSeoApis.build_full_url(uri) + json_data = {**config.AI_SEO_API_AUTH, 'platform_id': platform_id, 'status': status} + if not id: + json_data['id'] = id async with httpx.AsyncClient() as client: response = await client.get(url, params=json_data, timeout=60) json_result = response.json() diff --git a/utils/session_utils.py b/utils/session_utils.py index fbc71dc..4a648ff 100644 --- a/utils/session_utils.py +++ b/utils/session_utils.py @@ -5,7 +5,7 @@ import config from utils.ai_seo_api_utils import AiSeoApis -async def get_spider_session(platform_id): +async def get_spider_session(platform_id, id=''): """ 获取可用的爬虫session :param platform_id: @@ -13,7 +13,7 @@ async def get_spider_session(platform_id): """ base_path = f'{config.ROOT_PATH}/data/session_data' # 爬虫信息 - session_info = await AiSeoApis.get_spider_session(platform_id) + session_info = await AiSeoApis.get_spider_session(platform_id, id) if not session_info: raise Exception(f"平台id: {platform_id} 没有可用的爬虫session") if platform_id == 8: From e7c780c32230814d770c96c3e8e0a622ad2828de Mon Sep 17 00:00:00 2001 From: zzx Date: Fri, 1 Aug 2025 16:36:18 +0800 Subject: [PATCH 2/5] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9E=E6=96=87?= =?UTF-8?q?=E5=BF=83=E4=B8=80=E8=A8=80=20session=E6=A3=80=E6=9F=A5?= =?UTF-8?q?=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- spiders/ai_seo/yiyan.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/spiders/ai_seo/yiyan.py b/spiders/ai_seo/yiyan.py index f58e86b..1d3efb7 100644 --- a/spiders/ai_seo/yiyan.py +++ b/spiders/ai_seo/yiyan.py @@ -168,7 +168,6 @@ class YiYanSpider(AbstractAiSeoSpider): )) self.ai_answer.search_result = ai_search_result_list - async def check_login(self): # 找登录后才会出现的侧边栏 try: @@ -182,6 +181,22 @@ class YiYanSpider(AbstractAiSeoSpider): await AiSeoApis.update_spider_session(self.session_info['id'], 2) raise Exception(f"{self.get_platform_name()}登录失败 session_id: {self.session_info['id']}") + async def do_check_session(self) -> bool: + try: + await self.browser_page.goto(self.get_home_url(), timeout=200000) + await asyncio.sleep(2) + # 检查登录状态 + await self.check_login() + # 开始操作 + chat_input_element = self.browser_page.locator("//div[@class='yc-editor']") + await chat_input_element.click() + await asyncio.sleep(2) + # 输入提问词 + await self.browser_page.keyboard.insert_text(self.prompt) + return True + except Exception: + return False + def get_platform_id(self) -> int: return 6 From b5a04c3751cbfc2ae22a85ea91173312ea3398f0 Mon Sep 17 00:00:00 2001 From: zzx Date: Fri, 1 Aug 2025 18:10:37 +0800 Subject: [PATCH 3/5] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9E=E7=A7=98?= =?UTF-8?q?=E5=A1=94ai=E6=90=9C=E7=B4=A2session=E6=A3=80=E6=9F=A5=E5=8A=9F?= =?UTF-8?q?=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- abs_spider.py | 8 +++++++- main.py | 2 +- spiders/ai_seo/metaso.py | 25 +++++++++++++++++++++---- utils/ai_seo_api_utils.py | 2 +- 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/abs_spider.py b/abs_spider.py index 432f1d9..2ed7303 100644 --- a/abs_spider.py +++ b/abs_spider.py @@ -72,6 +72,7 @@ class AbstractAiSeoSpider(ABC): if self.load_session: self.session_info = await get_spider_session(self.platform_id, id) if self.platform_id != 8: + print(self.session_info['session_path']) self.browser_content = await self.browser.new_context(storage_state=self.session_info['session_path']) else: self.browser_content = await self.browser.new_context() @@ -129,7 +130,12 @@ class AbstractAiSeoSpider(ABC): logger.error(f"[{self.get_platform_name()}]session状态无效! ❌ id: {session_id}") # 更新session状态 - await AiSeoApis.update_spider_session(session_id, 1 if result else 2) + status = None + if self.get_platform_id() != 13: + status = 1 if result else 2 + else: + status = 1 if result else 3 + await AiSeoApis.update_spider_session(session_id, status) return result diff --git a/main.py b/main.py index 73acdfa..f55a4c7 100644 --- a/main.py +++ b/main.py @@ -84,7 +84,7 @@ async def test(): await asyncio.sleep(config.TEST_INTERVAL * 6) async def test_check_session(): - sessions = await AiSeoApis.list_spider_session(1) + sessions = await AiSeoApis.list_spider_session(13) playwright, browser = await init_browser() for session in sessions: spider = get_spider(session['platform_id'], '你好', '品牌词', browser) diff --git a/spiders/ai_seo/metaso.py b/spiders/ai_seo/metaso.py index cae559a..999c659 100644 --- a/spiders/ai_seo/metaso.py +++ b/spiders/ai_seo/metaso.py @@ -112,10 +112,6 @@ class MetasoSpider(AbstractAiSeoSpider): def get_platform_name(self) -> str: return 'Metaso' - - - - async def __listen_response(self, response): url = response.url if response.status == 200: @@ -194,6 +190,7 @@ class MetasoSpider(AbstractAiSeoSpider): else: pass + def handle_listen_response_error(self, func): """ 装饰器 用于处理请求回调中的异常 @@ -214,6 +211,26 @@ class MetasoSpider(AbstractAiSeoSpider): return wrapper + async def do_check_session(self) -> bool: + try: + await self.browser_page.goto(self.get_home_url(), timeout=200000) + await asyncio.sleep(2) + info = await self.browser_page.wait_for_selector( + '#left-menu > div > div.LeftMenu_footer__qsJdJ > div > div > div > button', timeout=600000) + await info.click() + edu = self.browser_page.locator( + '//div[@aria-label="每天有100搜索额度"]/following-sibling::div[1]//span[contains(@class, "MuiTypography-root")]') + edu_txt = await edu.text_content() + if edu_txt == '0': + return False + # 开始操作 + chat_input_element = self.browser_page.locator("//textarea[contains(@class, 'search-consult-textarea')]") + # 输入提问词 + await chat_input_element.fill(self.prompt) + logger.info(f"[{self.get_platform_name()}]查询还剩{edu_txt} 次") + return True + except Exception: + return False async def run(): diff --git a/utils/ai_seo_api_utils.py b/utils/ai_seo_api_utils.py index 80c134b..cd8755e 100644 --- a/utils/ai_seo_api_utils.py +++ b/utils/ai_seo_api_utils.py @@ -180,7 +180,7 @@ class AiSeoApis: uri = '/api/third/getOneSpiderSession' url = AiSeoApis.build_full_url(uri) json_data = {**config.AI_SEO_API_AUTH, 'platform_id': platform_id} - if not id: + if id: json_data['id'] = id async with httpx.AsyncClient() as client: response = await client.get(url, params=json_data, timeout=60) From 0f0f50101e72755d03a4e534c1b3a1c9de3f45d8 Mon Sep 17 00:00:00 2001 From: zzx Date: Fri, 1 Aug 2025 18:29:47 +0800 Subject: [PATCH 4/5] =?UTF-8?q?feat:=20=E6=9B=B4=E6=96=B0gitignore?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ba7a126..e674f9a 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,6 @@ config.py screenshot Pipfile Pipfile.lock -logs \ No newline at end of file +logs +__pycache__/ +**/__pycache__/ \ No newline at end of file From 72a5b48977762cee7eb81e56726023434982923f Mon Sep 17 00:00:00 2001 From: zzx Date: Mon, 4 Aug 2025 09:53:57 +0800 Subject: [PATCH 5/5] =?UTF-8?q?refactor:=20=E6=89=80=E6=9C=89=E7=88=AC?= =?UTF-8?q?=E8=99=AB=E8=84=9A=E6=9C=AC=E5=9D=87=E5=AE=9E=E7=8E=B0do=5Fchec?= =?UTF-8?q?k=5Flogin?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 2 +- spiders/ai_seo/doubao.py | 3 +++ spiders/ai_seo/kimi.py | 3 +++ spiders/ai_seo/nanometer.py | 3 +++ spiders/ai_seo/tongyi.py | 3 +++ spiders/ai_seo/wenxiaoyan.py | 3 +++ spiders/ai_seo/yuanbao.py | 3 +++ 7 files changed, 19 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index f55a4c7..c24968b 100644 --- a/main.py +++ b/main.py @@ -84,7 +84,7 @@ async def test(): await asyncio.sleep(config.TEST_INTERVAL * 6) async def test_check_session(): - sessions = await AiSeoApis.list_spider_session(13) + sessions = await AiSeoApis.list_spider_session(6) playwright, browser = await init_browser() for session in sessions: spider = get_spider(session['platform_id'], '你好', '品牌词', browser) diff --git a/spiders/ai_seo/doubao.py b/spiders/ai_seo/doubao.py index 8c72e48..72dbf97 100644 --- a/spiders/ai_seo/doubao.py +++ b/spiders/ai_seo/doubao.py @@ -16,6 +16,9 @@ logger = create_logger(__name__) class DouBaoSpider(AbstractAiSeoSpider): + async def do_check_session(self) -> bool: + pass + def __init__(self, browser: Browser, prompt: str, keyword: str, think: bool = False): super().__init__(browser, prompt, keyword, think) self.__listen_response = self.handle_listen_response_error(self.__listen_response) diff --git a/spiders/ai_seo/kimi.py b/spiders/ai_seo/kimi.py index 7547fef..eab9d04 100644 --- a/spiders/ai_seo/kimi.py +++ b/spiders/ai_seo/kimi.py @@ -16,6 +16,9 @@ logger = create_logger(__name__) class KimiSpider(AbstractAiSeoSpider): + async def do_check_session(self) -> bool: + pass + def __init__(self, browser: Browser, prompt: str, keyword: str, think: bool = False): super().__init__(browser, prompt, keyword, think) self.__listen_response = self.handle_listen_response_error(self.__listen_response) diff --git a/spiders/ai_seo/nanometer.py b/spiders/ai_seo/nanometer.py index 670aeaa..349a8af 100644 --- a/spiders/ai_seo/nanometer.py +++ b/spiders/ai_seo/nanometer.py @@ -18,6 +18,9 @@ logger = create_logger(__name__) class NanometerSpider(AbstractAiSeoSpider): + async def do_check_session(self) -> bool: + pass + def __init__(self, browser: Browser, prompt: str, keyword: str): super().__init__(browser, prompt, keyword) self.load_session = False diff --git a/spiders/ai_seo/tongyi.py b/spiders/ai_seo/tongyi.py index a9a6f1e..8f83ab0 100644 --- a/spiders/ai_seo/tongyi.py +++ b/spiders/ai_seo/tongyi.py @@ -16,6 +16,9 @@ logger = create_logger(__name__) class TongyiSpider(AbstractAiSeoSpider): + async def do_check_session(self) -> bool: + pass + def __init__(self, browser: Browser, prompt: str, keyword: str, think: bool = False): super().__init__(browser, prompt, keyword, think) self.__listen_response = self.handle_listen_response_error(self.__listen_response) diff --git a/spiders/ai_seo/wenxiaoyan.py b/spiders/ai_seo/wenxiaoyan.py index b0dda86..41eadcf 100644 --- a/spiders/ai_seo/wenxiaoyan.py +++ b/spiders/ai_seo/wenxiaoyan.py @@ -18,6 +18,9 @@ logger = create_logger(__name__) class WenxiaoyanSpider(AbstractAiSeoSpider): + async def do_check_session(self) -> bool: + pass + def __init__(self, browser: Browser, prompt: str, keyword: str): super().__init__(browser, prompt, keyword) def get_home_url(self) -> str: diff --git a/spiders/ai_seo/yuanbao.py b/spiders/ai_seo/yuanbao.py index 75d23f6..b508123 100644 --- a/spiders/ai_seo/yuanbao.py +++ b/spiders/ai_seo/yuanbao.py @@ -17,6 +17,9 @@ logger = create_logger(__name__) class YuanBaoSpider(AbstractAiSeoSpider): + async def do_check_session(self) -> bool: + pass + def __init__(self, browser: Browser, prompt: str, keyword: str, think: bool = False): super().__init__(browser, prompt, keyword, think) self.__listen_response = self.handle_listen_response_error(self.__listen_response)