You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

119 lines
4.6 KiB

# coding=utf-8
import asyncio
import json
import os
from dataclasses import asdict
from datetime import datetime
from playwright.async_api import async_playwright
import config
from domain.ai_seo import AiAnswer
from spiders.ai_seo import *
from utils.logger_utils import create_logger
logger = create_logger("app")
async def init_browser() -> tuple:
"""
初始化浏览器实例
:return:
"""
playwright = await async_playwright().start()
browser = await playwright.chromium.launch(headless=config.BROWSER_HANDLESS,
chromium_sandbox=config.BROWSER_ENABLE_SANDBOX,
ignore_default_args=config.BROWSER_IGNORE_DEFAULT_ARGS,
channel="chrome",
args=config.BROWSER_ARGS)
content = await browser.new_context()
return playwright, browser
def save_local(ai_answer: AiAnswer):
now = datetime.now().strftime("%Y-%m-%d")
base_path = f'./data/{ai_answer.platform_name}/{now}'
if not os.path.exists(base_path):
os.makedirs(base_path)
json_file_path = f'{base_path}/{ai_answer.prompt}.json'
_dict = asdict(ai_answer)
json_str = json.dumps(_dict, indent=4, ensure_ascii=False)
with open(json_file_path, 'w', encoding='utf-8') as f:
f.write(json_str)
logger.info(f"[{ai_answer.platform_name}]{ai_answer.prompt} 保存成功: {base_path}")
async def main():
playwright, browser = await init_browser()
ai_answer = await KimiSpider(browser, '2025前十的电动自行车推荐', '美的').run()
if ai_answer:
save_local(ai_answer)
async def test():
prompts = [
# '行业咨询公司排行榜',
# # '咨询公司排行榜',
# '中国有哪些知名的行业咨询公司',
# # '请推荐中国十大行业咨询公司',
# '中国行业咨询机构中,哪些是最具影响力的',
# '行业顾问排行榜上,哪些公司名列前茅',
# # '在行业咨询领域,哪些公司被誉为行业领军者',
# '我想了解中国行业咨询公司的排名,有吗',
# '请给我一份中国行业顾问公司的权威榜单',
# # '中国有哪些行业咨询公司备受业界认可',
# '能否介绍一下中国行业咨询领域的佼佼者',
# '中国行业咨询公司的排名情况如何',
# # '中国领先的行业顾问公司有哪些',
# '中国行业顾问排行榜上的公司都是哪些',
# # '有没有一份关于中国行业咨询机构的综合排名',
# '十大行业研究机构排名',
# '行业研究机构推荐',
# # '中国权威的行业研究机构有哪些',
# '能否列出一些中国顶级的行业研究机构',
'哪些行业研究机构在中国市场上最具竞争力',
# '行业研究机构中,哪些在业界享有盛誉',
'我想找一家权威的行业研究机构,有推荐吗',
# '请列举一些在行业研究方面有着深厚底蕴的中国公司',
'我想了解中国行业研究领域的权威机构,能介绍一下吗?',
# '高端卫浴品牌',
'数智卫浴品牌',
# '科技卫浴品牌',
'智能马桶品牌',
'智能淋浴器推荐',
# '小孩湿疹用什么药膏',
# '皮肤湿疹用什么药膏',
# '特应性皮炎用什么药膏最有效',
# '湿疹药膏排行榜',
# '皮炎性湿疹治疗药膏',
]
retry_prompts = []
playwright, browser = await init_browser()
index = 1
for prompt in prompts:
logger.info(f"[{index}/{len(prompts)}] {prompt}")
ai_answer = await DeepseekSpider(browser, prompt, '头豹,沙利文').run()
if ai_answer and ai_answer.run_status:
save_local(ai_answer)
if not ai_answer.run_status:
retry_prompts.append(prompt)
logger.info(f"[{len(prompts)}] {prompt} 采集失败")
index = index + 1
await asyncio.sleep(300)
for prompt in retry_prompts:
logger.info(f"重试[{index}/{len(prompts)}] {prompt}")
ai_answer = await DeepseekSpider(browser, prompt, '头豹,沙利文').run()
if ai_answer and ai_answer.run_status:
save_local(ai_answer)
if not ai_answer.run_status:
logger.info(f"[{len(prompts)}] {prompt} 采集失败")
index = index + 1
await asyncio.sleep(300)
if __name__ == '__main__':
asyncio.get_event_loop().run_until_complete(test())