From a2dd40d1d9d6675feede2b0790a3b31b7e98c094 Mon Sep 17 00:00:00 2001 From: zzx Date: Fri, 1 Aug 2025 16:25:20 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9Edeepseek=20session?= =?UTF-8?q?=E6=A3=80=E6=9F=A5=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- abs_spider.py | 22 ++++++++++++++++++++-- main.py | 17 ++++++++++++++--- spiders/ai_seo/deepseek.py | 12 ++++++++++++ utils/ai_seo_api_utils.py | 26 +++++++++++++++++++++++++- utils/session_utils.py | 4 ++-- 5 files changed, 73 insertions(+), 8 deletions(-) diff --git a/abs_spider.py b/abs_spider.py index 45c5026..432f1d9 100644 --- a/abs_spider.py +++ b/abs_spider.py @@ -9,6 +9,7 @@ from playwright.async_api import Browser, BrowserContext, Page import config from domain.ai_seo import AiAnswer from utils import create_logger +from utils.ai_seo_api_utils import AiSeoApis from utils.session_utils import get_spider_session logger = create_logger("abs_spider") @@ -67,9 +68,9 @@ class AbstractAiSeoSpider(ABC): screenshot_path = f'{config.SCREENSHOT_BASE_PATH}/{self.platform_name}_{unique_id}.png' return screenshot_path - async def __init_page(self): + async def __init_page(self, id=''): if self.load_session: - self.session_info = await get_spider_session(self.platform_id) + self.session_info = await get_spider_session(self.platform_id, id) if self.platform_id != 8: self.browser_content = await self.browser.new_context(storage_state=self.session_info['session_path']) else: @@ -118,6 +119,20 @@ class AbstractAiSeoSpider(ABC): finally: await self._close() + async def check_session(self, session_id) -> bool: + await self.__init_page(session_id) + result = await self.do_check_session() + await self._close() + if result: + logger.success(f"[{self.get_platform_name()}]session状态有效! ✅ id: {session_id}") + else: + logger.error(f"[{self.get_platform_name()}]session状态无效! ❌ id: {session_id}") + + # 更新session状态 + await AiSeoApis.update_spider_session(session_id, 1 if result else 2) + return result + + @abstractmethod async def _do_spider(self) -> AiAnswer: """ @@ -137,3 +152,6 @@ class AbstractAiSeoSpider(ABC): @abstractmethod def get_home_url(self) -> str: pass + @abstractmethod + async def do_check_session(self) -> bool: + pass diff --git a/main.py b/main.py index 7189fd6..73acdfa 100644 --- a/main.py +++ b/main.py @@ -12,7 +12,9 @@ import config from abs_spider import AbstractAiSeoSpider from domain.ai_seo import AiAnswer from spiders.ai_seo import * +from spiders.ai_seo.wenxiaoyan import WenxiaoyanSpider from utils.logger_utils import create_logger +from utils.ai import AiSeoApis logger = create_logger("app") @@ -24,7 +26,8 @@ SPIDER_CLS = { 5: DouBaoSpider, 6: YiYanSpider, 7: NanometerSpider, - 13: MetasoSpider + 13: MetasoSpider, + 8: WenxiaoyanSpider } @@ -47,7 +50,7 @@ def get_spider(platform_id, prompt, brand, browser) -> AbstractAiSeoSpider: cls = SPIDER_CLS.get(int(platform_id), None) if not cls: raise ValueError(f"未找到对应的爬虫类,platform_id={platform_id}") - return cls(browser, prompt, brand, True) + return cls(browser, prompt, brand) def save_local(ai_answer: AiAnswer): @@ -80,6 +83,14 @@ async def test(): index = index + 1 await asyncio.sleep(config.TEST_INTERVAL * 6) +async def test_check_session(): + sessions = await AiSeoApis.list_spider_session(1) + playwright, browser = await init_browser() + for session in sessions: + spider = get_spider(session['platform_id'], '你好', '品牌词', browser) + result = await spider.check_session(session['id']) + + if __name__ == '__main__': - asyncio.get_event_loop().run_until_complete(test()) + asyncio.get_event_loop().run_until_complete(test_check_session()) diff --git a/spiders/ai_seo/deepseek.py b/spiders/ai_seo/deepseek.py index e9ec9ac..348cb84 100644 --- a/spiders/ai_seo/deepseek.py +++ b/spiders/ai_seo/deepseek.py @@ -87,6 +87,18 @@ class DeepseekSpider(AbstractAiSeoSpider): self.ai_answer.screenshot_file = screenshot_path return self.ai_answer + async def do_check_session(self) -> bool: + try: + await self.browser_page.goto(self.get_home_url(), timeout=600000) + await asyncio.sleep(3) + chat_input_element = self.browser_page.locator("//textarea[@id='chat-input']") + await chat_input_element.click() + # 输入提问词 + await self.browser_page.keyboard.type(self.prompt) + return True + except Exception: + return False + def handle_listen_response_error(self, func): """ 装饰器 用于处理请求回调中的异常 diff --git a/utils/ai_seo_api_utils.py b/utils/ai_seo_api_utils.py index 79aaf9c..80c134b 100644 --- a/utils/ai_seo_api_utils.py +++ b/utils/ai_seo_api_utils.py @@ -170,15 +170,39 @@ class AiSeoApis: return json_result['data'] @staticmethod - async def get_spider_session(platform_id): + async def get_spider_session(platform_id, id=''): """ 获取爬虫会话 :param platform_id: + :param id: :return: """ uri = '/api/third/getOneSpiderSession' url = AiSeoApis.build_full_url(uri) json_data = {**config.AI_SEO_API_AUTH, 'platform_id': platform_id} + if not id: + json_data['id'] = id + async with httpx.AsyncClient() as client: + response = await client.get(url, params=json_data, timeout=60) + json_result = response.json() + if not json_result['code'] == 0: + logger.error(f"获取爬虫session失败") + return None + return json_result['data'] + + @staticmethod + async def list_spider_session(platform_id='', status=''): + """ + 获取爬虫session列表 + :param platform_id: + :param id: + :return: + """ + uri = '/api/third/getSpiderSessionList' + url = AiSeoApis.build_full_url(uri) + json_data = {**config.AI_SEO_API_AUTH, 'platform_id': platform_id, 'status': status} + if not id: + json_data['id'] = id async with httpx.AsyncClient() as client: response = await client.get(url, params=json_data, timeout=60) json_result = response.json() diff --git a/utils/session_utils.py b/utils/session_utils.py index fbc71dc..4a648ff 100644 --- a/utils/session_utils.py +++ b/utils/session_utils.py @@ -5,7 +5,7 @@ import config from utils.ai_seo_api_utils import AiSeoApis -async def get_spider_session(platform_id): +async def get_spider_session(platform_id, id=''): """ 获取可用的爬虫session :param platform_id: @@ -13,7 +13,7 @@ async def get_spider_session(platform_id): """ base_path = f'{config.ROOT_PATH}/data/session_data' # 爬虫信息 - session_info = await AiSeoApis.get_spider_session(platform_id) + session_info = await AiSeoApis.get_spider_session(platform_id, id) if not session_info: raise Exception(f"平台id: {platform_id} 没有可用的爬虫session") if platform_id == 8: