Browse Source

提交deepseek链接是否被引用代码

master
zhurunlin 4 months ago
parent
commit
23ed8ecb56
  1. 37
      spiders/ai_seo/deepseek.py

37
spiders/ai_seo/deepseek.py

@ -1,7 +1,6 @@
# coding=utf-8 # coding=utf-8
import asyncio import asyncio
import json import json
import re
from functools import partial, wraps from functools import partial, wraps
from json import JSONDecodeError from json import JSONDecodeError
from glom import glom from glom import glom
@ -11,7 +10,7 @@ from abs_spider import AbstractAiSeoSpider
from domain.ai_seo import AiAnswer, AiSearchResult from domain.ai_seo import AiAnswer, AiSearchResult
from utils import create_logger, css_to_dict from utils import create_logger, css_to_dict
from utils.image_utils import crop_image_left from utils.image_utils import crop_image_left
import re
logger = create_logger(__name__) logger = create_logger(__name__)
class DeepseekSpider(AbstractAiSeoSpider): class DeepseekSpider(AbstractAiSeoSpider):
@ -39,7 +38,7 @@ class DeepseekSpider(AbstractAiSeoSpider):
if await search_btn.is_visible(): if await search_btn.is_visible():
await search_btn.click() await search_btn.click()
if self.think: if self.think:
# 开启深度思考
# 开启深度思考
think_btn = self.browser_page.locator("span:text('深度思考 (R1)')").locator('..') think_btn = self.browser_page.locator("span:text('深度思考 (R1)')").locator('..')
if await think_btn.is_visible(): if await think_btn.is_visible():
styles = css_to_dict(await think_btn.get_attribute('style')) styles = css_to_dict(await think_btn.get_attribute('style'))
@ -131,22 +130,22 @@ class DeepseekSpider(AbstractAiSeoSpider):
if data.get('p', '') == 'response/search_results' or isinstance(data.get('v', ''), list): if data.get('p', '') == 'response/search_results' or isinstance(data.get('v', ''), list):
logger.debug(f"获取到联网搜索结果") logger.debug(f"获取到联网搜索结果")
search_result_list = data.get('v', []) search_result_list = data.get('v', [])
# 保存搜索结果
ai_search_result_list = []
for search_result in search_result_list:
url = search_result.get('url', '')
title = search_result.get('title', '')
body = search_result.get('snippet', '')
publish_time = search_result.get('published_at', '')
host_name = search_result.get('site_name', '未知')
ai_result = AiSearchResult(url=url, title=title, body=body, publish_time=publish_time, host_name=host_name)
if ai_result.title and ai_result.url:
ai_search_result_list.append(ai_result)
logger.debug(f"ai参考资料: [{host_name}]{title}({url})")
if ai_search_result_list:
self.ai_answer.search_result = ai_search_result_list
self.search_result_count = len(self.ai_answer.search_result)
search_result_lists.extend(search_result_list)
# # 保存搜索结果
# ai_search_result_list = []
# for search_result in search_result_list:
# url = search_result.get('url', '')
# title = search_result.get('title', '')
# body = search_result.get('snippet', '')
# publish_time = search_result.get('published_at', '')
# host_name = search_result.get('site_name', '未知')
# ai_result = AiSearchResult(url=url, title=title, body=body, publish_time=publish_time, host_name=host_name)
# if ai_result.title and ai_result.url:
# ai_search_result_list.append(ai_result)
# logger.debug(f"ai参考资料: [{host_name}]{title}({url})")
# if ai_search_result_list:
# self.ai_answer.search_result = ai_search_result_list
# self.search_result_count = len(self.ai_answer.search_result)
continue continue
# 是否开始返回回复数据 # 是否开始返回回复数据
if data.get('p', '') == 'response/content': if data.get('p', '') == 'response/content':

Loading…
Cancel
Save