|
|
|
@ -55,7 +55,7 @@ class MetasoSpider(AbstractAiSeoSpider): |
|
|
|
await self.browser_page.reload() |
|
|
|
# await self.completed_event.wait() |
|
|
|
# 等待指定元素 |
|
|
|
copy_button = await self.browser_page.wait_for_selector('//*[starts-with(@id, "search-content-container-")]/div[2]/div[3]/button', timeout=600000) |
|
|
|
copy_button = await self.browser_page.wait_for_selector("//div[@class='relative']/following-sibling::div[1]//button[1]", timeout=600000) |
|
|
|
# 点击复制按钮 |
|
|
|
await copy_button.click() |
|
|
|
# 读取剪贴板 |
|
|
|
@ -63,26 +63,31 @@ class MetasoSpider(AbstractAiSeoSpider): |
|
|
|
logger.debug(f'ai回复内容: {self.ai_answer}') |
|
|
|
# 获取来源数据 |
|
|
|
try: |
|
|
|
await self.browser_page.wait_for_selector("//div[contains(@class, 'meta-ordered-list_list-item')]/span", timeout=60000) |
|
|
|
search_items = self.browser_page.locator("//div[contains(@class, 'meta-ordered-list_list-item')]/span") |
|
|
|
await self.browser_page.wait_for_selector("//div[contains(@aria-label, '来源')]", timeout=60000) |
|
|
|
show_search_item_btn = self.browser_page.locator("//div[contains(@aria-label, '来源')]") |
|
|
|
await show_search_item_btn.click() |
|
|
|
await asyncio.sleep(2) |
|
|
|
# logger.debug(f'来源数据: {search_item_count}') |
|
|
|
# 获取来源数据 |
|
|
|
search_items = self.browser_page.locator("//ul[contains(@class, 'meta-ordered-list_meta-list')]/li") |
|
|
|
search_item_count = await search_items.count() |
|
|
|
logger.debug(f'来源数据: {search_item_count}') |
|
|
|
await asyncio.sleep(5) |
|
|
|
search_results = [] |
|
|
|
for i in range(search_item_count): |
|
|
|
search_result = AiSearchResult() |
|
|
|
search_item = search_items.nth(i) |
|
|
|
# 抽取链接和标题 |
|
|
|
a = search_item.locator("xpath=./a") |
|
|
|
a = search_item.locator("xpath=./div[1]/a") |
|
|
|
# 抽取时间 |
|
|
|
publish_date_element = search_item.locator("xpath=./span") |
|
|
|
publish_date_element = search_item.locator("xpath=./div[2]/div") |
|
|
|
publish_str = await publish_date_element.text_content() |
|
|
|
search_result.publish_time = publish_str.replace('[', '').replace(']', '') |
|
|
|
if await a.is_visible(): |
|
|
|
search_result.title = await a.text_content() |
|
|
|
search_result.url = await a.get_attribute('href') |
|
|
|
if await publish_date_element.count() > 0: |
|
|
|
publish_date_element = search_item.locator("xpath=./span").nth(-1) |
|
|
|
publish_str = await publish_date_element.text_content() |
|
|
|
search_result.publish_time = publish_str.replace('[', '').replace(']', '') |
|
|
|
# if await publish_date_element.count() > 0: |
|
|
|
# publish_date_element = search_item.locator("xpath=./span").nth(-1) |
|
|
|
# publish_str = await publish_date_element.text_content() |
|
|
|
# search_result.publish_time = publish_str.replace('[', '').replace(']', '') |
|
|
|
search_results.append(search_result) |
|
|
|
self.ai_answer.search_result = search_results |
|
|
|
except TimeoutError: |
|
|
|
@ -91,7 +96,7 @@ class MetasoSpider(AbstractAiSeoSpider): |
|
|
|
if self.fail_status: |
|
|
|
raise self.fail_exception |
|
|
|
# 获取回答元素 |
|
|
|
answer_element = self.browser_page.locator("//div[contains(@class, 'Search_search-result-container')]") |
|
|
|
answer_element = self.browser_page.locator("//div[contains(@class, 'result-responsive-layer')]") |
|
|
|
box = await answer_element.bounding_box() |
|
|
|
logger.debug(f'answer_element: {box}') |
|
|
|
view_port_height = box['height'] + 300 |
|
|
|
@ -100,6 +105,7 @@ class MetasoSpider(AbstractAiSeoSpider): |
|
|
|
'width': 1920, |
|
|
|
'height': int(view_port_height) |
|
|
|
}) |
|
|
|
await self.browser_page.locator("//div[contains(@class, 'MetaDialog_meta-dialog-mask')]//button").click() |
|
|
|
# 截图 |
|
|
|
screenshot_path = self._get_screenshot_path() |
|
|
|
await self.browser_page.screenshot(path=screenshot_path) |
|
|
|
|