Browse Source

fix(spider):为发布时间提取添加异常处理

当无法正确提取发布时间空时,设置默认值以避免程序中断
master
zzx 1 month ago
parent
commit
6e9e4e34e1
  1. 9
      spiders/ai_seo/metaso.py

9
spiders/ai_seo/metaso.py

@ -78,9 +78,12 @@ class MetasoSpider(AbstractAiSeoSpider):
# 抽取链接和标题
a = search_item.locator("xpath=./div[1]/a")
# 抽取时间
publish_date_element = search_item.locator("xpath=./div[2]/div")
publish_str = await publish_date_element.text_content()
search_result.publish_time = publish_str.replace('[', '').replace(']', '')
try:
publish_date_element = search_item.locator("xpath=./div[2]/div")
publish_str = await publish_date_element.text_content()
search_result.publish_time = publish_str.replace('[', '').replace(']', '')
except Exception as e:
search_result.publish_time = ''
if await a.is_visible():
search_result.title = await a.text_content()
search_result.url = await a.get_attribute('href')

Loading…
Cancel
Save