From 184c7ad851d72ffd296c2b0530d084649f90388f Mon Sep 17 00:00:00 2001 From: zzx Date: Mon, 15 Sep 2025 22:15:59 +0800 Subject: [PATCH] =?UTF-8?q?refactor(ai=5Fseo):=20=E9=87=8D=E6=9E=84?= =?UTF-8?q?=E9=80=9A=E4=B9=89=E7=81=B5=E7=A0=81=E7=88=AC=E8=99=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 点击提示框确认按钮- 优化搜索结果处理逻辑- 调整 AI 回答处理方式 - 移除不必要的注释和代码 --- spiders/ai_seo/tongyi.py | 50 +++++++++++++++++------------------------------- 1 file changed, 18 insertions(+), 32 deletions(-) diff --git a/spiders/ai_seo/tongyi.py b/spiders/ai_seo/tongyi.py index 8f83ab0..e72632d 100644 --- a/spiders/ai_seo/tongyi.py +++ b/spiders/ai_seo/tongyi.py @@ -30,6 +30,10 @@ class TongyiSpider(AbstractAiSeoSpider): # 初始化信息 self._init_data() await self.browser_page.goto(self.get_home_url(), timeout=600000) + # 点掉提示框 + confirm_btn = self.browser_page.locator('//button[.//span[text()="我知道了"]]') + if await confirm_btn.is_visible(): + await confirm_btn.click() if self.think: search_btn = self.browser_page.locator("div:text('深度思考')") if await search_btn.is_visible(): @@ -94,7 +98,6 @@ class TongyiSpider(AbstractAiSeoSpider): stream = await response.body() response_text = stream.decode('utf-8') datas = response_text.split("\n") - # print("datas:",datas) # 合规数据转成字典 for data_str in datas: if not data_str or data_str == 'data: [DONE]': @@ -109,46 +112,29 @@ class TongyiSpider(AbstractAiSeoSpider): contents = data.get('contents', []) # 保存搜索内容 ai_search_result_list = [] - search_result_list = list() for content in contents: content_type = content.get('contentType', '') - if content_type == 'plugin': + if content_type == 'referenceLink': logger.debug(f"获取到联网搜索结果") if self.think: search_result_list = glom(content, 'content.pluginResult.links', default=[]) else: - search_result_list = glom(content, 'content.pluginResult.links.-1.search_results', default=[]) - # for search_result in search_result_list: - # url = search_result.get('url', '') - # title = search_result.get('title', '') - # body = search_result.get('body', '') - # host_name = search_result.get('host_name', '未知') - # publish_time = search_result.get('time', 0) - # logger.debug(f"ai参考资料: [{host_name}]{title}({url})") - # ai_search_result_list.append( - # AiSearchResult(title=title, url=url, body=body, host_name=host_name, publish_time=publish_time) - # ) - if content_type == 'think': + search_result_list = glom(content, 'content.links', default=[]) + for search_result in search_result_list: + url = search_result.get('url', '') + title = search_result.get('title', '') + body = search_result.get('body', '') + host_name =title.rsplit('-', 1)[1] if '-' in title else '未知' + publish_time = search_result.get('time', 0) + logger.debug(f"ai参考资料: [{host_name}]{title}({url})") + ai_search_result_list.append( + AiSearchResult(title=title, url=url, body=body, host_name=host_name, publish_time=publish_time, is_referenced='1') + ) + if content_type == 'text': logger.debug(f'获取到ai回复结果') - answer = content.get('content', '').get('content', '') + answer = content.get('content', '') logger.debug(f"ai回复: {answer}") self.ai_answer.answer = answer - pattern = r'ty-reference]\((\d+)\)' - index_data = list(set(re.findall(pattern, self.ai_answer.answer))) - for index, search_result in enumerate(search_result_list): - url = search_result.get('url', '') - title = search_result.get('title', '') - body = search_result.get('body', '') - host_name = search_result.get('host_name', '未知') - publish_time = search_result.get('time', 0) - if str(index+1) in index_data: - is_referenced = "1" - else: - is_referenced = "0" - logger.debug(f"ai参考资料: [{host_name}]{title}({url})") - ai_search_result_list.append( - AiSearchResult(title=title, url=url, body=body, host_name=host_name, publish_time=publish_time , is_referenced=is_referenced) - ) if ai_search_result_list: self.ai_answer.search_result = ai_search_result_list self.completed_event.set()