From 23ed8ecb568f9f531064ec4c951df1228a82006d Mon Sep 17 00:00:00 2001 From: zhurunlin Date: Tue, 1 Jul 2025 20:39:20 +0800 Subject: [PATCH] =?UTF-8?q?=E6=8F=90=E4=BA=A4deepseek=E9=93=BE=E6=8E=A5?= =?UTF-8?q?=E6=98=AF=E5=90=A6=E8=A2=AB=E5=BC=95=E7=94=A8=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- spiders/ai_seo/deepseek.py | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/spiders/ai_seo/deepseek.py b/spiders/ai_seo/deepseek.py index f8aae2e..e9ec9ac 100644 --- a/spiders/ai_seo/deepseek.py +++ b/spiders/ai_seo/deepseek.py @@ -1,7 +1,6 @@ # coding=utf-8 import asyncio import json -import re from functools import partial, wraps from json import JSONDecodeError from glom import glom @@ -11,7 +10,7 @@ from abs_spider import AbstractAiSeoSpider from domain.ai_seo import AiAnswer, AiSearchResult from utils import create_logger, css_to_dict from utils.image_utils import crop_image_left - +import re logger = create_logger(__name__) class DeepseekSpider(AbstractAiSeoSpider): @@ -39,7 +38,7 @@ class DeepseekSpider(AbstractAiSeoSpider): if await search_btn.is_visible(): await search_btn.click() if self.think: - # 开启深度思考 + # 开启深度思考 think_btn = self.browser_page.locator("span:text('深度思考 (R1)')").locator('..') if await think_btn.is_visible(): styles = css_to_dict(await think_btn.get_attribute('style')) @@ -131,22 +130,22 @@ class DeepseekSpider(AbstractAiSeoSpider): if data.get('p', '') == 'response/search_results' or isinstance(data.get('v', ''), list): logger.debug(f"获取到联网搜索结果") search_result_list = data.get('v', []) - # 保存搜索结果 - ai_search_result_list = [] - - for search_result in search_result_list: - url = search_result.get('url', '') - title = search_result.get('title', '') - body = search_result.get('snippet', '') - publish_time = search_result.get('published_at', '') - host_name = search_result.get('site_name', '未知') - ai_result = AiSearchResult(url=url, title=title, body=body, publish_time=publish_time, host_name=host_name) - if ai_result.title and ai_result.url: - ai_search_result_list.append(ai_result) - logger.debug(f"ai参考资料: [{host_name}]{title}({url})") - if ai_search_result_list: - self.ai_answer.search_result = ai_search_result_list - self.search_result_count = len(self.ai_answer.search_result) + search_result_lists.extend(search_result_list) + # # 保存搜索结果 + # ai_search_result_list = [] + # for search_result in search_result_list: + # url = search_result.get('url', '') + # title = search_result.get('title', '') + # body = search_result.get('snippet', '') + # publish_time = search_result.get('published_at', '') + # host_name = search_result.get('site_name', '未知') + # ai_result = AiSearchResult(url=url, title=title, body=body, publish_time=publish_time, host_name=host_name) + # if ai_result.title and ai_result.url: + # ai_search_result_list.append(ai_result) + # logger.debug(f"ai参考资料: [{host_name}]{title}({url})") + # if ai_search_result_list: + # self.ai_answer.search_result = ai_search_result_list + # self.search_result_count = len(self.ai_answer.search_result) continue # 是否开始返回回复数据 if data.get('p', '') == 'response/content':