diff --git a/spiders/ai_seo/deepseek.py b/spiders/ai_seo/deepseek.py index f4c1b05..ea2ce4e 100644 --- a/spiders/ai_seo/deepseek.py +++ b/spiders/ai_seo/deepseek.py @@ -37,15 +37,16 @@ class DeepseekSpider(AbstractAiSeoSpider): search_btn = self.browser_page.locator("span:text('联网搜索')").locator('..') if await search_btn.is_visible(): await search_btn.click() + self.think = True if self.think: - # 开启深度思考 - think_btn = self.browser_page.locator("span:text('深度思考 (R1)')").locator('..') + # 开启深度思考 + think_btn = self.browser_page.locator("span:text('深度思考')").locator('..') if await think_btn.is_visible(): - styles = css_to_dict(await think_btn.get_attribute('style')) - if styles.get('--ds-button-color') == '#fff': - await think_btn.click() - await asyncio.sleep(1) - chat_input_element = self.browser_page.locator("//textarea[@id='chat-input']") + # styles = css_to_dict(await think_btn.get_attribute('style')) + # if styles.get('--ds-button-color') == '#fff': + await think_btn.click() + await asyncio.sleep(1) + chat_input_element = self.browser_page.locator("//textarea[@placeholder='给 DeepSeek 发送消息 ']") await chat_input_element.click() # 输入提问词 await self.browser_page.keyboard.type(self.prompt) @@ -72,7 +73,8 @@ class DeepseekSpider(AbstractAiSeoSpider): await think_element.nth(-1).click() await asyncio.sleep(2) # 获取回答元素 - answer = self.browser_page.locator("//div[@class='ds-markdown ds-markdown--block']").nth(-1) + # answer = self.browser_page.locator("//div[@class='ds-markdown ds-markdown--block']").nth(-1) + answer = self.browser_page.locator("//div[contains(@class, 'ds-message')]").nth(-1) box = await answer.bounding_box() # 设置视口大小 await self.browser_page.set_viewport_size({ @@ -130,9 +132,11 @@ class DeepseekSpider(AbstractAiSeoSpider): body = stream.decode('utf-8') datas = body.split("\n\n") for data_str in datas: + # 返回数据为空 跳过 if not data_str: continue data_str = data_str.replace('data: ', '') + # 服务器繁忙 跳过 try: data = json.loads(data_str) if glom(data, 'v.0.v', default='') == 'TIMEOUT': @@ -145,27 +149,27 @@ class DeepseekSpider(AbstractAiSeoSpider): logger.debug(f"获取到联网搜索结果") search_result_list = data.get('v', []) search_result_lists.extend(search_result_list) - # # 保存搜索结果 - # ai_search_result_list = [] - # for search_result in search_result_list: - # url = search_result.get('url', '') - # title = search_result.get('title', '') - # body = search_result.get('snippet', '') - # publish_time = search_result.get('published_at', '') - # host_name = search_result.get('site_name', '未知') - # ai_result = AiSearchResult(url=url, title=title, body=body, publish_time=publish_time, host_name=host_name) - # if ai_result.title and ai_result.url: - # ai_search_result_list.append(ai_result) - # logger.debug(f"ai参考资料: [{host_name}]{title}({url})") - # if ai_search_result_list: - # self.ai_answer.search_result = ai_search_result_list - # self.search_result_count = len(self.ai_answer.search_result) + # 保存搜索结果 + ai_search_result_list = [] + for search_result in search_result_list: + url = search_result.get('url', '') + title = search_result.get('title', '') + body = search_result.get('snippet', '') + publish_time = search_result.get('published_at', '') + host_name = search_result.get('site_name', '未知') + ai_result = AiSearchResult(url=url, title=title, body=body, publish_time=publish_time, host_name=host_name) + if ai_result.title and ai_result.url: + ai_search_result_list.append(ai_result) + logger.debug(f"ai参考资料: [{host_name}]{title}({url})") + if ai_search_result_list: + self.ai_answer.search_result = ai_search_result_list + self.search_result_count = len(self.ai_answer.search_result) continue # 是否开始返回深度思考数据 - if data.get('p', '') == 'response/thinking_content': + if data.get('p', '') == 'response/fragments/1/content': start_thinking = True - if data.get('p', '') == 'response/thinking_elapsed_secs': + if data.get('p', '') == 'response/fragments/1/elapsed_secs': start_thinking = False if start_thinking: # 获取深度思考回复 @@ -177,7 +181,7 @@ class DeepseekSpider(AbstractAiSeoSpider): value = glom(data, target, default="") thinking_text = thinking_text + str(value) # 是否开始返回回复数据 - if data.get('p', '') == 'response/content': + if data.get('p', '') == 'response/fragments/2/content': start_content = True if start_content: # 获取ai回复