|
|
@ -1,7 +1,6 @@ |
|
|
# coding=utf-8 |
|
|
# coding=utf-8 |
|
|
import asyncio |
|
|
import asyncio |
|
|
import json |
|
|
import json |
|
|
import re |
|
|
|
|
|
from functools import partial, wraps |
|
|
from functools import partial, wraps |
|
|
from json import JSONDecodeError |
|
|
from json import JSONDecodeError |
|
|
from glom import glom |
|
|
from glom import glom |
|
|
@ -11,7 +10,7 @@ from abs_spider import AbstractAiSeoSpider |
|
|
from domain.ai_seo import AiAnswer, AiSearchResult |
|
|
from domain.ai_seo import AiAnswer, AiSearchResult |
|
|
from utils import create_logger, css_to_dict |
|
|
from utils import create_logger, css_to_dict |
|
|
from utils.image_utils import crop_image_left |
|
|
from utils.image_utils import crop_image_left |
|
|
|
|
|
|
|
|
|
|
|
import re |
|
|
logger = create_logger(__name__) |
|
|
logger = create_logger(__name__) |
|
|
|
|
|
|
|
|
class DeepseekSpider(AbstractAiSeoSpider): |
|
|
class DeepseekSpider(AbstractAiSeoSpider): |
|
|
@ -131,22 +130,22 @@ class DeepseekSpider(AbstractAiSeoSpider): |
|
|
if data.get('p', '') == 'response/search_results' or isinstance(data.get('v', ''), list): |
|
|
if data.get('p', '') == 'response/search_results' or isinstance(data.get('v', ''), list): |
|
|
logger.debug(f"获取到联网搜索结果") |
|
|
logger.debug(f"获取到联网搜索结果") |
|
|
search_result_list = data.get('v', []) |
|
|
search_result_list = data.get('v', []) |
|
|
# 保存搜索结果 |
|
|
|
|
|
ai_search_result_list = [] |
|
|
|
|
|
|
|
|
|
|
|
for search_result in search_result_list: |
|
|
|
|
|
url = search_result.get('url', '') |
|
|
|
|
|
title = search_result.get('title', '') |
|
|
|
|
|
body = search_result.get('snippet', '') |
|
|
|
|
|
publish_time = search_result.get('published_at', '') |
|
|
|
|
|
host_name = search_result.get('site_name', '未知') |
|
|
|
|
|
ai_result = AiSearchResult(url=url, title=title, body=body, publish_time=publish_time, host_name=host_name) |
|
|
|
|
|
if ai_result.title and ai_result.url: |
|
|
|
|
|
ai_search_result_list.append(ai_result) |
|
|
|
|
|
logger.debug(f"ai参考资料: [{host_name}]{title}({url})") |
|
|
|
|
|
if ai_search_result_list: |
|
|
|
|
|
self.ai_answer.search_result = ai_search_result_list |
|
|
|
|
|
self.search_result_count = len(self.ai_answer.search_result) |
|
|
|
|
|
|
|
|
search_result_lists.extend(search_result_list) |
|
|
|
|
|
# # 保存搜索结果 |
|
|
|
|
|
# ai_search_result_list = [] |
|
|
|
|
|
# for search_result in search_result_list: |
|
|
|
|
|
# url = search_result.get('url', '') |
|
|
|
|
|
# title = search_result.get('title', '') |
|
|
|
|
|
# body = search_result.get('snippet', '') |
|
|
|
|
|
# publish_time = search_result.get('published_at', '') |
|
|
|
|
|
# host_name = search_result.get('site_name', '未知') |
|
|
|
|
|
# ai_result = AiSearchResult(url=url, title=title, body=body, publish_time=publish_time, host_name=host_name) |
|
|
|
|
|
# if ai_result.title and ai_result.url: |
|
|
|
|
|
# ai_search_result_list.append(ai_result) |
|
|
|
|
|
# logger.debug(f"ai参考资料: [{host_name}]{title}({url})") |
|
|
|
|
|
# if ai_search_result_list: |
|
|
|
|
|
# self.ai_answer.search_result = ai_search_result_list |
|
|
|
|
|
# self.search_result_count = len(self.ai_answer.search_result) |
|
|
continue |
|
|
continue |
|
|
# 是否开始返回回复数据 |
|
|
# 是否开始返回回复数据 |
|
|
if data.get('p', '') == 'response/content': |
|
|
if data.get('p', '') == 'response/content': |
|
|
|