|
|
|
@ -30,6 +30,10 @@ class TongyiSpider(AbstractAiSeoSpider): |
|
|
|
# 初始化信息 |
|
|
|
self._init_data() |
|
|
|
await self.browser_page.goto(self.get_home_url(), timeout=600000) |
|
|
|
# 点掉提示框 |
|
|
|
confirm_btn = self.browser_page.locator('//button[.//span[text()="我知道了"]]') |
|
|
|
if await confirm_btn.is_visible(): |
|
|
|
await confirm_btn.click() |
|
|
|
if self.think: |
|
|
|
search_btn = self.browser_page.locator("div:text('深度思考')") |
|
|
|
if await search_btn.is_visible(): |
|
|
|
@ -94,7 +98,6 @@ class TongyiSpider(AbstractAiSeoSpider): |
|
|
|
stream = await response.body() |
|
|
|
response_text = stream.decode('utf-8') |
|
|
|
datas = response_text.split("\n") |
|
|
|
# print("datas:",datas) |
|
|
|
# 合规数据转成字典 |
|
|
|
for data_str in datas: |
|
|
|
if not data_str or data_str == 'data: [DONE]': |
|
|
|
@ -109,46 +112,29 @@ class TongyiSpider(AbstractAiSeoSpider): |
|
|
|
contents = data.get('contents', []) |
|
|
|
# 保存搜索内容 |
|
|
|
ai_search_result_list = [] |
|
|
|
search_result_list = list() |
|
|
|
for content in contents: |
|
|
|
content_type = content.get('contentType', '') |
|
|
|
if content_type == 'plugin': |
|
|
|
if content_type == 'referenceLink': |
|
|
|
logger.debug(f"获取到联网搜索结果") |
|
|
|
if self.think: |
|
|
|
search_result_list = glom(content, 'content.pluginResult.links', default=[]) |
|
|
|
else: |
|
|
|
search_result_list = glom(content, 'content.pluginResult.links.-1.search_results', default=[]) |
|
|
|
# for search_result in search_result_list: |
|
|
|
# url = search_result.get('url', '') |
|
|
|
# title = search_result.get('title', '') |
|
|
|
# body = search_result.get('body', '') |
|
|
|
# host_name = search_result.get('host_name', '未知') |
|
|
|
# publish_time = search_result.get('time', 0) |
|
|
|
# logger.debug(f"ai参考资料: [{host_name}]{title}({url})") |
|
|
|
# ai_search_result_list.append( |
|
|
|
# AiSearchResult(title=title, url=url, body=body, host_name=host_name, publish_time=publish_time) |
|
|
|
# ) |
|
|
|
if content_type == 'think': |
|
|
|
search_result_list = glom(content, 'content.links', default=[]) |
|
|
|
for search_result in search_result_list: |
|
|
|
url = search_result.get('url', '') |
|
|
|
title = search_result.get('title', '') |
|
|
|
body = search_result.get('body', '') |
|
|
|
host_name =title.rsplit('-', 1)[1] if '-' in title else '未知' |
|
|
|
publish_time = search_result.get('time', 0) |
|
|
|
logger.debug(f"ai参考资料: [{host_name}]{title}({url})") |
|
|
|
ai_search_result_list.append( |
|
|
|
AiSearchResult(title=title, url=url, body=body, host_name=host_name, publish_time=publish_time, is_referenced='1') |
|
|
|
) |
|
|
|
if content_type == 'text': |
|
|
|
logger.debug(f'获取到ai回复结果') |
|
|
|
answer = content.get('content', '').get('content', '') |
|
|
|
answer = content.get('content', '') |
|
|
|
logger.debug(f"ai回复: {answer}") |
|
|
|
self.ai_answer.answer = answer |
|
|
|
pattern = r'ty-reference]\((\d+)\)' |
|
|
|
index_data = list(set(re.findall(pattern, self.ai_answer.answer))) |
|
|
|
for index, search_result in enumerate(search_result_list): |
|
|
|
url = search_result.get('url', '') |
|
|
|
title = search_result.get('title', '') |
|
|
|
body = search_result.get('body', '') |
|
|
|
host_name = search_result.get('host_name', '未知') |
|
|
|
publish_time = search_result.get('time', 0) |
|
|
|
if str(index+1) in index_data: |
|
|
|
is_referenced = "1" |
|
|
|
else: |
|
|
|
is_referenced = "0" |
|
|
|
logger.debug(f"ai参考资料: [{host_name}]{title}({url})") |
|
|
|
ai_search_result_list.append( |
|
|
|
AiSearchResult(title=title, url=url, body=body, host_name=host_name, publish_time=publish_time , is_referenced=is_referenced) |
|
|
|
) |
|
|
|
if ai_search_result_list: |
|
|
|
self.ai_answer.search_result = ai_search_result_list |
|
|
|
self.completed_event.set() |
|
|
|
|