|  | @ -13,6 +13,7 @@ import config | 
		
	
		
			
				|  |  | from abs_spider import AbstractAiSeoSpider |  |  | from abs_spider import AbstractAiSeoSpider | 
		
	
		
			
				|  |  | from domain.ai_seo import AiAnswer, AiSearchResult |  |  | from domain.ai_seo import AiAnswer, AiSearchResult | 
		
	
		
			
				|  |  | from utils import create_logger |  |  | from utils import create_logger | 
		
	
		
			
				|  |  |  |  |  | from utils.captcha import get_slide_offset_from_base64 | 
		
	
		
			
				|  |  | 
 |  |  | 
 | 
		
	
		
			
				|  |  | logger = create_logger(__name__) |  |  | logger = create_logger(__name__) | 
		
	
		
			
				|  |  | 
 |  |  | 
 | 
		
	
	
		
			
				|  | @ -29,6 +30,7 @@ class MetasoSpider(AbstractAiSeoSpider): | 
		
	
		
			
				|  |  |     async def _do_spider(self) -> AiAnswer: |  |  |     async def _do_spider(self) -> AiAnswer: | 
		
	
		
			
				|  |  |         # 初始化信息 |  |  |         # 初始化信息 | 
		
	
		
			
				|  |  |         self._init_data() |  |  |         self._init_data() | 
		
	
		
			
				|  |  |  |  |  |         self.browser_page.on('response', partial(self.__listen_response)) | 
		
	
		
			
				|  |  |         await self.browser_page.goto(self.get_home_url(), timeout=600000) |  |  |         await self.browser_page.goto(self.get_home_url(), timeout=600000) | 
		
	
		
			
				|  |  |         await asyncio.sleep(2) |  |  |         await asyncio.sleep(2) | 
		
	
		
			
				|  |  |         info = await self.browser_page.wait_for_selector('#left-menu > div > div.LeftMenu_footer__qsJdJ > div > div > div > button', timeout=600000) |  |  |         info = await self.browser_page.wait_for_selector('#left-menu > div > div.LeftMenu_footer__qsJdJ > div > div > div > button', timeout=600000) | 
		
	
	
		
			
				|  | @ -47,7 +49,7 @@ class MetasoSpider(AbstractAiSeoSpider): | 
		
	
		
			
				|  |  |         await self.browser_page.keyboard.press('Enter') |  |  |         await self.browser_page.keyboard.press('Enter') | 
		
	
		
			
				|  |  |         # 监听请求 |  |  |         # 监听请求 | 
		
	
		
			
				|  |  |         await asyncio.sleep(2) |  |  |         await asyncio.sleep(2) | 
		
	
		
			
				|  |  |         # self.browser_page.on('response', partial(self.__listen_response)) |  |  |  | 
		
	
		
			
				|  |  |  |  |  | 
 | 
		
	
		
			
				|  |  |         await self.browser_page.reload() |  |  |         await self.browser_page.reload() | 
		
	
		
			
				|  |  |         # await self.completed_event.wait() |  |  |         # await self.completed_event.wait() | 
		
	
		
			
				|  |  |         # 等待指定元素 |  |  |         # 等待指定元素 | 
		
	
	
		
			
				|  | @ -109,61 +111,87 @@ class MetasoSpider(AbstractAiSeoSpider): | 
		
	
		
			
				|  |  |     def get_platform_name(self) -> str: |  |  |     def get_platform_name(self) -> str: | 
		
	
		
			
				|  |  |         return 'Metaso' |  |  |         return 'Metaso' | 
		
	
		
			
				|  |  | 
 |  |  | 
 | 
		
	
		
			
				|  |  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |  | 
 | 
		
	
		
			
				|  |  |     async def __listen_response(self, response): |  |  |     async def __listen_response(self, response): | 
		
	
		
			
				|  |  |         url = response.url |  |  |         url = response.url | 
		
	
		
			
				|  |  |         logger.debug(f'url: {url}') |  |  |  | 
		
	
		
			
				|  |  |         if 'searchV2' in url: |  |  |  | 
		
	
		
			
				|  |  |             answer = '' |  |  |  | 
		
	
		
			
				|  |  |             results = [] |  |  |  | 
		
	
		
			
				|  |  |             search_results = list() |  |  |  | 
		
	
		
			
				|  |  |             response_text = ftfy.fix_text(await response.text()) |  |  |  | 
		
	
		
			
				|  |  |             event_lines = response_text.split('\n\n') |  |  |  | 
		
	
		
			
				|  |  |             self.completed_event.set() |  |  |  | 
		
	
		
			
				|  |  |             for line in event_lines: |  |  |  | 
		
	
		
			
				|  |  |                 if line.startswith('data:'): |  |  |  | 
		
	
		
			
				|  |  |                     line = line[5:] |  |  |  | 
		
	
		
			
				|  |  |                 try: |  |  |  | 
		
	
		
			
				|  |  |                     event_json = json.loads(line) |  |  |  | 
		
	
		
			
				|  |  |                 except JSONDecodeError: |  |  |  | 
		
	
		
			
				|  |  |                     continue |  |  |  | 
		
	
		
			
				|  |  |                 # 开始event_json |  |  |  | 
		
	
		
			
				|  |  |                 type = event_json.get('type') |  |  |  | 
		
	
		
			
				|  |  |                 # 获取到搜索结果 |  |  |  | 
		
	
		
			
				|  |  |                 if type == 'set-reference': |  |  |  | 
		
	
		
			
				|  |  |                     search_results = event_json.get('list', []) |  |  |  | 
		
	
		
			
				|  |  |                     # for search_result in search_results: |  |  |  | 
		
	
		
			
				|  |  |                     #     result = AiSearchResult(title=search_result.get('title', ''), |  |  |  | 
		
	
		
			
				|  |  |                     #                    url=search_result.get('url', ''), |  |  |  | 
		
	
		
			
				|  |  |                     #                    host_name=search_result.get('author', ''), |  |  |  | 
		
	
		
			
				|  |  |                     #                    body=search_result.get('displaySource'), |  |  |  | 
		
	
		
			
				|  |  |                     #                    publish_time=search_result.get('publish_time', '')) |  |  |  | 
		
	
		
			
				|  |  |                     #     results.append(result) |  |  |  | 
		
	
		
			
				|  |  |                     # self.ai_answer.search_result = results |  |  |  | 
		
	
		
			
				|  |  |                 # 获取到回答内容 |  |  |  | 
		
	
		
			
				|  |  |                 if type == 'append-text': |  |  |  | 
		
	
		
			
				|  |  |                     answer = answer + event_json.get('text', '') |  |  |  | 
		
	
		
			
				|  |  |             pattern = r'\[(\d+)\]' |  |  |  | 
		
	
		
			
				|  |  |             index_data = list(set(re.findall(pattern, answer))) |  |  |  | 
		
	
		
			
				|  |  |             for index,search_result in enumerate(search_results): |  |  |  | 
		
	
		
			
				|  |  |                 if str(index+1) in index_data: |  |  |  | 
		
	
		
			
				|  |  |                     result = AiSearchResult(title=search_result.get('title', ''), |  |  |  | 
		
	
		
			
				|  |  |                                             url=search_result.get('url', ''), |  |  |  | 
		
	
		
			
				|  |  |                                             host_name=search_result.get('author', ''), |  |  |  | 
		
	
		
			
				|  |  |                                             body=search_result.get('displaySource'), |  |  |  | 
		
	
		
			
				|  |  |                                             publish_time=search_result.get('publish_time', ''), |  |  |  | 
		
	
		
			
				|  |  |                                             is_referenced="1") |  |  |  | 
		
	
		
			
				|  |  |                 else: |  |  |  | 
		
	
		
			
				|  |  |                     result = AiSearchResult(title=search_result.get('title', ''), |  |  |  | 
		
	
		
			
				|  |  |                                             url=search_result.get('url', ''), |  |  |  | 
		
	
		
			
				|  |  |                                             host_name=search_result.get('author', ''), |  |  |  | 
		
	
		
			
				|  |  |                                             body=search_result.get('displaySource'), |  |  |  | 
		
	
		
			
				|  |  |                                             publish_time=search_result.get('publish_time', ''), |  |  |  | 
		
	
		
			
				|  |  |                                             is_referenced="0") |  |  |  | 
		
	
		
			
				|  |  |                 results.append(result) |  |  |  | 
		
	
		
			
				|  |  |             self.ai_answer.search_result = results |  |  |  | 
		
	
		
			
				|  |  |             self.ai_answer.answer = answer |  |  |  | 
		
	
		
			
				|  |  |             self.completed_event.set() |  |  |  | 
		
	
		
			
				|  |  | 
 |  |  |  | 
		
	
		
			
				|  |  |  |  |  |         if response.status == 200: | 
		
	
		
			
				|  |  |  |  |  |             # if 'searchV2' in url: | 
		
	
		
			
				|  |  |  |  |  |             #     answer = '' | 
		
	
		
			
				|  |  |  |  |  |             #     results = [] | 
		
	
		
			
				|  |  |  |  |  |             #     search_results = list() | 
		
	
		
			
				|  |  |  |  |  |             #     response_text = ftfy.fix_text(await response.text()) | 
		
	
		
			
				|  |  |  |  |  |             #     event_lines = response_text.split('\n\n') | 
		
	
		
			
				|  |  |  |  |  |             #     self.completed_event.set() | 
		
	
		
			
				|  |  |  |  |  |             #     for line in event_lines: | 
		
	
		
			
				|  |  |  |  |  |             #         if line.startswith('data:'): | 
		
	
		
			
				|  |  |  |  |  |             #             line = line[5:] | 
		
	
		
			
				|  |  |  |  |  |             #         try: | 
		
	
		
			
				|  |  |  |  |  |             #             event_json = json.loads(line) | 
		
	
		
			
				|  |  |  |  |  |             #         except JSONDecodeError: | 
		
	
		
			
				|  |  |  |  |  |             #             continue | 
		
	
		
			
				|  |  |  |  |  |             #         # 开始event_json | 
		
	
		
			
				|  |  |  |  |  |             #         type = event_json.get('type') | 
		
	
		
			
				|  |  |  |  |  |             #         # 获取到搜索结果 | 
		
	
		
			
				|  |  |  |  |  |             #         if type == 'set-reference': | 
		
	
		
			
				|  |  |  |  |  |             #             search_results = event_json.get('list', []) | 
		
	
		
			
				|  |  |  |  |  |             #             # for search_result in search_results: | 
		
	
		
			
				|  |  |  |  |  |             #             #     result = AiSearchResult(title=search_result.get('title', ''), | 
		
	
		
			
				|  |  |  |  |  |             #             #                    url=search_result.get('url', ''), | 
		
	
		
			
				|  |  |  |  |  |             #             #                    host_name=search_result.get('author', ''), | 
		
	
		
			
				|  |  |  |  |  |             #             #                    body=search_result.get('displaySource'), | 
		
	
		
			
				|  |  |  |  |  |             #             #                    publish_time=search_result.get('publish_time', '')) | 
		
	
		
			
				|  |  |  |  |  |             #             #     results.append(result) | 
		
	
		
			
				|  |  |  |  |  |             #             # self.ai_answer.search_result = results | 
		
	
		
			
				|  |  |  |  |  |             #         # 获取到回答内容 | 
		
	
		
			
				|  |  |  |  |  |             #         if type == 'append-text': | 
		
	
		
			
				|  |  |  |  |  |             #             answer = answer + event_json.get('text', '') | 
		
	
		
			
				|  |  |  |  |  |             #     pattern = r'\[(\d+)\]' | 
		
	
		
			
				|  |  |  |  |  |             #     index_data = list(set(re.findall(pattern, answer))) | 
		
	
		
			
				|  |  |  |  |  |             #     for index,search_result in enumerate(search_results): | 
		
	
		
			
				|  |  |  |  |  |             #         if str(index+1) in index_data: | 
		
	
		
			
				|  |  |  |  |  |             #             result = AiSearchResult(title=search_result.get('title', ''), | 
		
	
		
			
				|  |  |  |  |  |             #                                     url=search_result.get('url', ''), | 
		
	
		
			
				|  |  |  |  |  |             #                                     host_name=search_result.get('author', ''), | 
		
	
		
			
				|  |  |  |  |  |             #                                     body=search_result.get('displaySource'), | 
		
	
		
			
				|  |  |  |  |  |             #                                     publish_time=search_result.get('publish_time', ''), | 
		
	
		
			
				|  |  |  |  |  |             #                                     is_referenced="1") | 
		
	
		
			
				|  |  |  |  |  |             #         else: | 
		
	
		
			
				|  |  |  |  |  |             #             result = AiSearchResult(title=search_result.get('title', ''), | 
		
	
		
			
				|  |  |  |  |  |             #                                     url=search_result.get('url', ''), | 
		
	
		
			
				|  |  |  |  |  |             #                                     host_name=search_result.get('author', ''), | 
		
	
		
			
				|  |  |  |  |  |             #                                     body=search_result.get('displaySource'), | 
		
	
		
			
				|  |  |  |  |  |             #                                     publish_time=search_result.get('publish_time', ''), | 
		
	
		
			
				|  |  |  |  |  |             #                                     is_referenced="0") | 
		
	
		
			
				|  |  |  |  |  |             #         results.append(result) | 
		
	
		
			
				|  |  |  |  |  |             #     self.ai_answer.search_result = results | 
		
	
		
			
				|  |  |  |  |  |             #     self.ai_answer.answer = answer | 
		
	
		
			
				|  |  |  |  |  |             #     self.completed_event.set() | 
		
	
		
			
				|  |  |  |  |  |             if 'api/captcha/get' in url: | 
		
	
		
			
				|  |  |  |  |  |                     logger.info(await response.json()) | 
		
	
		
			
				|  |  |  |  |  |                     captcha_data = await response.json() | 
		
	
		
			
				|  |  |  |  |  |                     bg = captcha_data.get("repData").get('originalImageBase64') | 
		
	
		
			
				|  |  |  |  |  |                     slider = captcha_data.get("repData").get('jigsawImageBase64') | 
		
	
		
			
				|  |  |  |  |  |                     x_box = get_slide_offset_from_base64(bg,slider) | 
		
	
		
			
				|  |  |  |  |  |                     slider = await self.browser_page.query_selector('.verify-move-block')  # 替换为实际滑块的 CSS 选择器 | 
		
	
		
			
				|  |  |  |  |  |                     if slider: | 
		
	
		
			
				|  |  |  |  |  |                         box = await slider.bounding_box() | 
		
	
		
			
				|  |  |  |  |  |                         start_x = box['x'] + box['width'] / 2  # 获取滑块的初始位置 X | 
		
	
		
			
				|  |  |  |  |  |                         start_y = box['y'] + box['height'] / 2  # 获取滑块的初始位置 Y | 
		
	
		
			
				|  |  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |  |                         # 目标位置 (替换为你想要的目标位置) | 
		
	
		
			
				|  |  |  |  |  |                         target_x = start_x + int(x_box)  # 向右移动 100px | 
		
	
		
			
				|  |  |  |  |  |                         target_y = start_y | 
		
	
		
			
				|  |  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |  |                         await self.browser_page.mouse.move(start_x, start_y) | 
		
	
		
			
				|  |  |  |  |  |                         await self.browser_page.mouse.down() | 
		
	
		
			
				|  |  |  |  |  |                         await self.browser_page.mouse.move(target_x, target_y, steps=20)  # 分步模拟滑动 | 
		
	
		
			
				|  |  |  |  |  |                         await self.browser_page.mouse.up() | 
		
	
		
			
				|  |  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |  |                     else: | 
		
	
		
			
				|  |  |  |  |  |                         pass | 
		
	
		
			
				|  |  |     def handle_listen_response_error(self, func): |  |  |     def handle_listen_response_error(self, func): | 
		
	
		
			
				|  |  |         """ |  |  |         """ | 
		
	
		
			
				|  |  |         装饰器 用于处理请求回调中的异常 |  |  |         装饰器 用于处理请求回调中的异常 | 
		
	
	
		
			
				|  | 
 |