Browse Source

修改doubao,yuanbao链接是否被引用代码

master
zhurunlin 4 months ago
parent
commit
351b043dea
  1. 16
      spiders/ai_seo/doubao.py
  2. 5
      spiders/ai_seo/yuanbao.py

16
spiders/ai_seo/doubao.py

@ -139,19 +139,35 @@ class DouBaoSpider(AbstractAiSeoSpider):
answer = '' answer = ''
datas = [] datas = []
index_data = list() index_data = list()
logger.debug(f"await data: {await response.text()}")
response_text = ftfy.fix_text(await response.text()) response_text = ftfy.fix_text(await response.text())
logger.debug(f"response_text: {response_text}")
lines = response_text.split("\n\n") lines = response_text.split("\n\n")
for line in lines: for line in lines:
if line.startswith('data: '): if line.startswith('data: '):
line = line[6:] line = line[6:]
logger.debug(f"line_1: {line}")
import re
pattern = r'"[\u4e00-\u9fa5]{1,10}"'
result = re.findall(pattern, line)
for i in result:
line = line.replace(i,i[1:-1])
try: try:
data = parse_nested_json(line) data = parse_nested_json(line)
logger.debug(f"data: {data}")
datas.append(data) datas.append(data)
event_data = data.get('event_data', {}) event_data = data.get('event_data', {})
target_key = 'message.content.text' target_key = 'message.content.text'
text = glom(event_data, target_key, default=None) text = glom(event_data, target_key, default=None)
if not text is None: if not text is None:
answer = answer + str(text) answer = answer + str(text)
index_key = 'message.content.meta_infos'
index = glom(event_data, index_key, default=None)
if index:
if str(index[0].get("info").get("insert_text")).isdigit():
# logger.debug(f"index: {index}")
logger.debug(f"index: {index[0].get("info").get("insert_text")}")
index_data.append(index[0].get("info").get("insert_text"))
except JSONDecodeError: except JSONDecodeError:
continue continue
logger.debug(f"ai回复: {answer}") logger.debug(f"ai回复: {answer}")

5
spiders/ai_seo/yuanbao.py

@ -39,7 +39,7 @@ class YuanBaoSpider(AbstractAiSeoSpider):
await think_button.click() await think_button.click()
await asyncio.sleep(2) await asyncio.sleep(2)
# 开启联网搜索 # 开启联网搜索
search_button = self.browser_page.locator("//button[@dt-button-id='online_search']")
search_button = self.browser_page.locator("//div[@dt-button-id='online_search']")
if await search_button.is_visible(): if await search_button.is_visible():
class_str = await search_button.get_attribute('class') class_str = await search_button.get_attribute('class')
clazz = class_str.split(' ') clazz = class_str.split(' ')
@ -115,7 +115,8 @@ class YuanBaoSpider(AbstractAiSeoSpider):
ai_search_result_list = [] ai_search_result_list = []
self.ai_answer.answer = text self.ai_answer.answer = text
if search_list: if search_list:
pattern = r'\[\^(\d+)\]'
# pattern = r'\[\^(\d+)\]'
pattern = r'citation:(\d+)'
index_data = list(set(re.findall(pattern, self.ai_answer.answer))) index_data = list(set(re.findall(pattern, self.ai_answer.answer)))
for index,search_result in enumerate(search_list): for index,search_result in enumerate(search_list):
if str(index+1) in index_data: if str(index+1) in index_data:

Loading…
Cancel
Save