From 6dfda1086ed6f26bb55ccd7c49b76b7f2fdc9b01 Mon Sep 17 00:00:00 2001
From: zzx <zzx@suq.cn>
Date: Sat, 20 Sep 2025 22:16:30 +0800
Subject: [PATCH] =?UTF-8?q?feat(spider):=20=E6=9B=B4=E6=96=B0ai=5Fseo?=
 =?UTF-8?q?=E7=88=AC=E8=99=AB=E9=80=89=E6=8B=A9=E5=99=A8=E4=BB=A5=E9=80=82?=
 =?UTF-8?q?=E9=85=8D=E6=96=B0=E9=A1=B5=E9=9D=A2=E7=BB=93=E6=9E=84?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 修改复制按钮的XPath选择器，使用更稳定的定位方式
- 调整来源数据获取逻辑，点击展示按钮后延时加载
- 更新来源列表项的选择器路径，并修复发布时间提取逻辑- 修改回答容器元素的选择器，确保正确获取截图区域- 添加遮罩层按钮点击操作，优化截图前的页面状态
---
 spiders/ai_seo/metaso.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/spiders/ai_seo/metaso.py b/spiders/ai_seo/metaso.py
index 3cfb552..b153bc6 100644
--- a/spiders/ai_seo/metaso.py
+++ b/spiders/ai_seo/metaso.py
@@ -55,7 +55,7 @@ class MetasoSpider(AbstractAiSeoSpider):
         await self.browser_page.reload()
         # await self.completed_event.wait()
         # 等待指定元素
-        copy_button = await self.browser_page.wait_for_selector('//*[starts-with(@id, "search-content-container-")]/div[2]/div[3]/button', timeout=600000)
+        copy_button = await self.browser_page.wait_for_selector("//div[@class='relative']/following-sibling::div[1]//button[1]", timeout=600000)
         # 点击复制按钮
         await copy_button.click()
         # 读取剪贴板
@@ -63,26 +63,31 @@ class MetasoSpider(AbstractAiSeoSpider):
         logger.debug(f'ai回复内容: {self.ai_answer}')
         # 获取来源数据
         try:
-            await self.browser_page.wait_for_selector("//div[contains(@class, 'meta-ordered-list_list-item')]/span", timeout=60000)
-            search_items = self.browser_page.locator("//div[contains(@class, 'meta-ordered-list_list-item')]/span")
+            await self.browser_page.wait_for_selector("//div[contains(@aria-label, '来源')]", timeout=60000)
+            show_search_item_btn = self.browser_page.locator("//div[contains(@aria-label, '来源')]")
+            await show_search_item_btn.click()
+            await asyncio.sleep(2)
+            # logger.debug(f'来源数据: {search_item_count}')
+            # 获取来源数据
+            search_items =  self.browser_page.locator("//ul[contains(@class, 'meta-ordered-list_meta-list')]/li")
             search_item_count = await search_items.count()
-            logger.debug(f'来源数据: {search_item_count}')
-            await asyncio.sleep(5)
             search_results = []
             for i in range(search_item_count):
                 search_result = AiSearchResult()
                 search_item = search_items.nth(i)
                 # 抽取链接和标题
-                a = search_item.locator("xpath=./a")
+                a = search_item.locator("xpath=./div[1]/a")
                 # 抽取时间
-                publish_date_element = search_item.locator("xpath=./span")
+                publish_date_element = search_item.locator("xpath=./div[2]/div")
+                publish_str = await publish_date_element.text_content()
+                search_result.publish_time = publish_str.replace('[', '').replace(']', '')
                 if await a.is_visible():
                     search_result.title = await a.text_content()
                     search_result.url = await a.get_attribute('href')
-                if await publish_date_element.count() > 0:
-                    publish_date_element = search_item.locator("xpath=./span").nth(-1)
-                    publish_str = await publish_date_element.text_content()
-                    search_result.publish_time = publish_str.replace('[', '').replace(']', '')
+                # if await publish_date_element.count() > 0:
+                #     publish_date_element = search_item.locator("xpath=./span").nth(-1)
+                #     publish_str = await publish_date_element.text_content()
+                #     search_result.publish_time = publish_str.replace('[', '').replace(']', '')
                 search_results.append(search_result)
             self.ai_answer.search_result = search_results
         except TimeoutError:
@@ -91,7 +96,7 @@ class MetasoSpider(AbstractAiSeoSpider):
         if self.fail_status:
             raise self.fail_exception
         # 获取回答元素
-        answer_element = self.browser_page.locator("//div[contains(@class, 'Search_search-result-container')]")
+        answer_element = self.browser_page.locator("//div[contains(@class, 'result-responsive-layer')]")
         box = await answer_element.bounding_box()
         logger.debug(f'answer_element: {box}')
         view_port_height = box['height'] + 300
@@ -100,6 +105,7 @@ class MetasoSpider(AbstractAiSeoSpider):
             'width': 1920,
             'height': int(view_port_height)
         })
+        await self.browser_page.locator("//div[contains(@class, 'MetaDialog_meta-dialog-mask')]//button").click()
         # 截图
         screenshot_path = self._get_screenshot_path()
         await self.browser_page.screenshot(path=screenshot_path)