From 184c7ad851d72ffd296c2b0530d084649f90388f Mon Sep 17 00:00:00 2001
From: zzx <zzx@suq.cn>
Date: Mon, 15 Sep 2025 22:15:59 +0800
Subject: [PATCH] =?UTF-8?q?refactor(ai=5Fseo):=20=E9=87=8D=E6=9E=84?=
 =?UTF-8?q?=E9=80=9A=E4=B9=89=E7=81=B5=E7=A0=81=E7=88=AC=E8=99=AB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 点击提示框确认按钮- 优化搜索结果处理逻辑- 调整 AI 回答处理方式
- 移除不必要的注释和代码
---
 spiders/ai_seo/tongyi.py | 50 +++++++++++++++++-------------------------------
 1 file changed, 18 insertions(+), 32 deletions(-)

diff --git a/spiders/ai_seo/tongyi.py b/spiders/ai_seo/tongyi.py
index 8f83ab0..e72632d 100644
--- a/spiders/ai_seo/tongyi.py
+++ b/spiders/ai_seo/tongyi.py
@@ -30,6 +30,10 @@ class TongyiSpider(AbstractAiSeoSpider):
         # 初始化信息
         self._init_data()
         await self.browser_page.goto(self.get_home_url(), timeout=600000)
+        # 点掉提示框
+        confirm_btn = self.browser_page.locator('//button[.//span[text()="我知道了"]]')
+        if await confirm_btn.is_visible():
+            await confirm_btn.click()
         if self.think:
             search_btn = self.browser_page.locator("div:text('深度思考')")
             if await search_btn.is_visible():
@@ -94,7 +98,6 @@ class TongyiSpider(AbstractAiSeoSpider):
         stream = await response.body()
         response_text = stream.decode('utf-8')
         datas = response_text.split("\n")
-        # print("datas:",datas)
         # 合规数据转成字典
         for data_str in datas:
             if not data_str or data_str == 'data: [DONE]':
@@ -109,46 +112,29 @@ class TongyiSpider(AbstractAiSeoSpider):
         contents = data.get('contents', [])
         # 保存搜索内容
         ai_search_result_list = []
-        search_result_list = list()
         for content in contents:
             content_type = content.get('contentType', '')
-            if content_type == 'plugin':
+            if content_type == 'referenceLink':
                 logger.debug(f"获取到联网搜索结果")
                 if self.think:
                     search_result_list = glom(content, 'content.pluginResult.links', default=[])
                 else:
-                    search_result_list = glom(content, 'content.pluginResult.links.-1.search_results', default=[])
-                # for search_result in search_result_list:
-                #     url = search_result.get('url', '')
-                #     title = search_result.get('title', '')
-                #     body = search_result.get('body', '')
-                #     host_name = search_result.get('host_name', '未知')
-                #     publish_time = search_result.get('time', 0)
-                #     logger.debug(f"ai参考资料: [{host_name}]{title}({url})")
-                #     ai_search_result_list.append(
-                #         AiSearchResult(title=title, url=url, body=body, host_name=host_name, publish_time=publish_time)
-                #     )
-            if content_type == 'think':
+                    search_result_list = glom(content, 'content.links', default=[])
+                for search_result in search_result_list:
+                    url = search_result.get('url', '')
+                    title = search_result.get('title', '')
+                    body = search_result.get('body', '')
+                    host_name =title.rsplit('-', 1)[1] if '-' in title else '未知'
+                    publish_time = search_result.get('time', 0)
+                    logger.debug(f"ai参考资料: [{host_name}]{title}({url})")
+                    ai_search_result_list.append(
+                        AiSearchResult(title=title, url=url, body=body, host_name=host_name, publish_time=publish_time, is_referenced='1')
+                    )
+            if content_type == 'text':
                 logger.debug(f'获取到ai回复结果')
-                answer = content.get('content', '').get('content', '')
+                answer = content.get('content', '')
                 logger.debug(f"ai回复: {answer}")
                 self.ai_answer.answer = answer
-        pattern = r'ty-reference]\((\d+)\)'
-        index_data = list(set(re.findall(pattern, self.ai_answer.answer)))
-        for index, search_result in enumerate(search_result_list):
-            url = search_result.get('url', '')
-            title = search_result.get('title', '')
-            body = search_result.get('body', '')
-            host_name = search_result.get('host_name', '未知')
-            publish_time = search_result.get('time', 0)
-            if str(index+1) in index_data:
-                is_referenced = "1"
-            else:
-                is_referenced = "0"
-            logger.debug(f"ai参考资料: [{host_name}]{title}({url})")
-            ai_search_result_list.append(
-                AiSearchResult(title=title, url=url, body=body, host_name=host_name, publish_time=publish_time , is_referenced=is_referenced)
-            )
         if ai_search_result_list:
             self.ai_answer.search_result = ai_search_result_list
         self.completed_event.set()