diff --git a/.gitignore b/.gitignore index ba7a126..e674f9a 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,6 @@ config.py screenshot Pipfile Pipfile.lock -logs \ No newline at end of file +logs +__pycache__/ +**/__pycache__/ \ No newline at end of file diff --git a/abs_spider.py b/abs_spider.py index 45c5026..2ed7303 100644 --- a/abs_spider.py +++ b/abs_spider.py @@ -9,6 +9,7 @@ from playwright.async_api import Browser, BrowserContext, Page import config from domain.ai_seo import AiAnswer from utils import create_logger +from utils.ai_seo_api_utils import AiSeoApis from utils.session_utils import get_spider_session logger = create_logger("abs_spider") @@ -67,10 +68,11 @@ class AbstractAiSeoSpider(ABC): screenshot_path = f'{config.SCREENSHOT_BASE_PATH}/{self.platform_name}_{unique_id}.png' return screenshot_path - async def __init_page(self): + async def __init_page(self, id=''): if self.load_session: - self.session_info = await get_spider_session(self.platform_id) + self.session_info = await get_spider_session(self.platform_id, id) if self.platform_id != 8: + print(self.session_info['session_path']) self.browser_content = await self.browser.new_context(storage_state=self.session_info['session_path']) else: self.browser_content = await self.browser.new_context() @@ -118,6 +120,25 @@ class AbstractAiSeoSpider(ABC): finally: await self._close() + async def check_session(self, session_id) -> bool: + await self.__init_page(session_id) + result = await self.do_check_session() + await self._close() + if result: + logger.success(f"[{self.get_platform_name()}]session状态有效! ✅ id: {session_id}") + else: + logger.error(f"[{self.get_platform_name()}]session状态无效! ❌ id: {session_id}") + + # 更新session状态 + status = None + if self.get_platform_id() != 13: + status = 1 if result else 2 + else: + status = 1 if result else 3 + await AiSeoApis.update_spider_session(session_id, status) + return result + + @abstractmethod async def _do_spider(self) -> AiAnswer: """ @@ -137,3 +158,6 @@ class AbstractAiSeoSpider(ABC): @abstractmethod def get_home_url(self) -> str: pass + @abstractmethod + async def do_check_session(self) -> bool: + pass diff --git a/main.py b/main.py index 7189fd6..c24968b 100644 --- a/main.py +++ b/main.py @@ -12,7 +12,9 @@ import config from abs_spider import AbstractAiSeoSpider from domain.ai_seo import AiAnswer from spiders.ai_seo import * +from spiders.ai_seo.wenxiaoyan import WenxiaoyanSpider from utils.logger_utils import create_logger +from utils.ai import AiSeoApis logger = create_logger("app") @@ -24,7 +26,8 @@ SPIDER_CLS = { 5: DouBaoSpider, 6: YiYanSpider, 7: NanometerSpider, - 13: MetasoSpider + 13: MetasoSpider, + 8: WenxiaoyanSpider } @@ -47,7 +50,7 @@ def get_spider(platform_id, prompt, brand, browser) -> AbstractAiSeoSpider: cls = SPIDER_CLS.get(int(platform_id), None) if not cls: raise ValueError(f"未找到对应的爬虫类,platform_id={platform_id}") - return cls(browser, prompt, brand, True) + return cls(browser, prompt, brand) def save_local(ai_answer: AiAnswer): @@ -80,6 +83,14 @@ async def test(): index = index + 1 await asyncio.sleep(config.TEST_INTERVAL * 6) +async def test_check_session(): + sessions = await AiSeoApis.list_spider_session(6) + playwright, browser = await init_browser() + for session in sessions: + spider = get_spider(session['platform_id'], '你好', '品牌词', browser) + result = await spider.check_session(session['id']) + + if __name__ == '__main__': - asyncio.get_event_loop().run_until_complete(test()) + asyncio.get_event_loop().run_until_complete(test_check_session()) diff --git a/spiders/ai_seo/deepseek.py b/spiders/ai_seo/deepseek.py index 20beb3a..f4c1b05 100644 --- a/spiders/ai_seo/deepseek.py +++ b/spiders/ai_seo/deepseek.py @@ -87,6 +87,18 @@ class DeepseekSpider(AbstractAiSeoSpider): self.ai_answer.screenshot_file = screenshot_path return self.ai_answer + async def do_check_session(self) -> bool: + try: + await self.browser_page.goto(self.get_home_url(), timeout=600000) + await asyncio.sleep(3) + chat_input_element = self.browser_page.locator("//textarea[@id='chat-input']") + await chat_input_element.click() + # 输入提问词 + await self.browser_page.keyboard.type(self.prompt) + return True + except Exception: + return False + def handle_listen_response_error(self, func): """ 装饰器 用于处理请求回调中的异常 diff --git a/spiders/ai_seo/doubao.py b/spiders/ai_seo/doubao.py index 8c72e48..72dbf97 100644 --- a/spiders/ai_seo/doubao.py +++ b/spiders/ai_seo/doubao.py @@ -16,6 +16,9 @@ logger = create_logger(__name__) class DouBaoSpider(AbstractAiSeoSpider): + async def do_check_session(self) -> bool: + pass + def __init__(self, browser: Browser, prompt: str, keyword: str, think: bool = False): super().__init__(browser, prompt, keyword, think) self.__listen_response = self.handle_listen_response_error(self.__listen_response) diff --git a/spiders/ai_seo/kimi.py b/spiders/ai_seo/kimi.py index 7547fef..eab9d04 100644 --- a/spiders/ai_seo/kimi.py +++ b/spiders/ai_seo/kimi.py @@ -16,6 +16,9 @@ logger = create_logger(__name__) class KimiSpider(AbstractAiSeoSpider): + async def do_check_session(self) -> bool: + pass + def __init__(self, browser: Browser, prompt: str, keyword: str, think: bool = False): super().__init__(browser, prompt, keyword, think) self.__listen_response = self.handle_listen_response_error(self.__listen_response) diff --git a/spiders/ai_seo/metaso.py b/spiders/ai_seo/metaso.py index cae559a..999c659 100644 --- a/spiders/ai_seo/metaso.py +++ b/spiders/ai_seo/metaso.py @@ -112,10 +112,6 @@ class MetasoSpider(AbstractAiSeoSpider): def get_platform_name(self) -> str: return 'Metaso' - - - - async def __listen_response(self, response): url = response.url if response.status == 200: @@ -194,6 +190,7 @@ class MetasoSpider(AbstractAiSeoSpider): else: pass + def handle_listen_response_error(self, func): """ 装饰器 用于处理请求回调中的异常 @@ -214,6 +211,26 @@ class MetasoSpider(AbstractAiSeoSpider): return wrapper + async def do_check_session(self) -> bool: + try: + await self.browser_page.goto(self.get_home_url(), timeout=200000) + await asyncio.sleep(2) + info = await self.browser_page.wait_for_selector( + '#left-menu > div > div.LeftMenu_footer__qsJdJ > div > div > div > button', timeout=600000) + await info.click() + edu = self.browser_page.locator( + '//div[@aria-label="每天有100搜索额度"]/following-sibling::div[1]//span[contains(@class, "MuiTypography-root")]') + edu_txt = await edu.text_content() + if edu_txt == '0': + return False + # 开始操作 + chat_input_element = self.browser_page.locator("//textarea[contains(@class, 'search-consult-textarea')]") + # 输入提问词 + await chat_input_element.fill(self.prompt) + logger.info(f"[{self.get_platform_name()}]查询还剩{edu_txt} 次") + return True + except Exception: + return False async def run(): diff --git a/spiders/ai_seo/nanometer.py b/spiders/ai_seo/nanometer.py index 670aeaa..349a8af 100644 --- a/spiders/ai_seo/nanometer.py +++ b/spiders/ai_seo/nanometer.py @@ -18,6 +18,9 @@ logger = create_logger(__name__) class NanometerSpider(AbstractAiSeoSpider): + async def do_check_session(self) -> bool: + pass + def __init__(self, browser: Browser, prompt: str, keyword: str): super().__init__(browser, prompt, keyword) self.load_session = False diff --git a/spiders/ai_seo/tongyi.py b/spiders/ai_seo/tongyi.py index a9a6f1e..8f83ab0 100644 --- a/spiders/ai_seo/tongyi.py +++ b/spiders/ai_seo/tongyi.py @@ -16,6 +16,9 @@ logger = create_logger(__name__) class TongyiSpider(AbstractAiSeoSpider): + async def do_check_session(self) -> bool: + pass + def __init__(self, browser: Browser, prompt: str, keyword: str, think: bool = False): super().__init__(browser, prompt, keyword, think) self.__listen_response = self.handle_listen_response_error(self.__listen_response) diff --git a/spiders/ai_seo/wenxiaoyan.py b/spiders/ai_seo/wenxiaoyan.py index b0dda86..41eadcf 100644 --- a/spiders/ai_seo/wenxiaoyan.py +++ b/spiders/ai_seo/wenxiaoyan.py @@ -18,6 +18,9 @@ logger = create_logger(__name__) class WenxiaoyanSpider(AbstractAiSeoSpider): + async def do_check_session(self) -> bool: + pass + def __init__(self, browser: Browser, prompt: str, keyword: str): super().__init__(browser, prompt, keyword) def get_home_url(self) -> str: diff --git a/spiders/ai_seo/yiyan.py b/spiders/ai_seo/yiyan.py index f58e86b..1d3efb7 100644 --- a/spiders/ai_seo/yiyan.py +++ b/spiders/ai_seo/yiyan.py @@ -168,7 +168,6 @@ class YiYanSpider(AbstractAiSeoSpider): )) self.ai_answer.search_result = ai_search_result_list - async def check_login(self): # 找登录后才会出现的侧边栏 try: @@ -182,6 +181,22 @@ class YiYanSpider(AbstractAiSeoSpider): await AiSeoApis.update_spider_session(self.session_info['id'], 2) raise Exception(f"{self.get_platform_name()}登录失败 session_id: {self.session_info['id']}") + async def do_check_session(self) -> bool: + try: + await self.browser_page.goto(self.get_home_url(), timeout=200000) + await asyncio.sleep(2) + # 检查登录状态 + await self.check_login() + # 开始操作 + chat_input_element = self.browser_page.locator("//div[@class='yc-editor']") + await chat_input_element.click() + await asyncio.sleep(2) + # 输入提问词 + await self.browser_page.keyboard.insert_text(self.prompt) + return True + except Exception: + return False + def get_platform_id(self) -> int: return 6 diff --git a/spiders/ai_seo/yuanbao.py b/spiders/ai_seo/yuanbao.py index 75d23f6..b508123 100644 --- a/spiders/ai_seo/yuanbao.py +++ b/spiders/ai_seo/yuanbao.py @@ -17,6 +17,9 @@ logger = create_logger(__name__) class YuanBaoSpider(AbstractAiSeoSpider): + async def do_check_session(self) -> bool: + pass + def __init__(self, browser: Browser, prompt: str, keyword: str, think: bool = False): super().__init__(browser, prompt, keyword, think) self.__listen_response = self.handle_listen_response_error(self.__listen_response) diff --git a/utils/ai_seo_api_utils.py b/utils/ai_seo_api_utils.py index 79aaf9c..cd8755e 100644 --- a/utils/ai_seo_api_utils.py +++ b/utils/ai_seo_api_utils.py @@ -170,15 +170,39 @@ class AiSeoApis: return json_result['data'] @staticmethod - async def get_spider_session(platform_id): + async def get_spider_session(platform_id, id=''): """ 获取爬虫会话 :param platform_id: + :param id: :return: """ uri = '/api/third/getOneSpiderSession' url = AiSeoApis.build_full_url(uri) json_data = {**config.AI_SEO_API_AUTH, 'platform_id': platform_id} + if id: + json_data['id'] = id + async with httpx.AsyncClient() as client: + response = await client.get(url, params=json_data, timeout=60) + json_result = response.json() + if not json_result['code'] == 0: + logger.error(f"获取爬虫session失败") + return None + return json_result['data'] + + @staticmethod + async def list_spider_session(platform_id='', status=''): + """ + 获取爬虫session列表 + :param platform_id: + :param id: + :return: + """ + uri = '/api/third/getSpiderSessionList' + url = AiSeoApis.build_full_url(uri) + json_data = {**config.AI_SEO_API_AUTH, 'platform_id': platform_id, 'status': status} + if not id: + json_data['id'] = id async with httpx.AsyncClient() as client: response = await client.get(url, params=json_data, timeout=60) json_result = response.json() diff --git a/utils/session_utils.py b/utils/session_utils.py index fbc71dc..4a648ff 100644 --- a/utils/session_utils.py +++ b/utils/session_utils.py @@ -5,7 +5,7 @@ import config from utils.ai_seo_api_utils import AiSeoApis -async def get_spider_session(platform_id): +async def get_spider_session(platform_id, id=''): """ 获取可用的爬虫session :param platform_id: @@ -13,7 +13,7 @@ async def get_spider_session(platform_id): """ base_path = f'{config.ROOT_PATH}/data/session_data' # 爬虫信息 - session_info = await AiSeoApis.get_spider_session(platform_id) + session_info = await AiSeoApis.get_spider_session(platform_id, id) if not session_info: raise Exception(f"平台id: {platform_id} 没有可用的爬虫session") if platform_id == 8: