# coding=utf-8 import asyncio import json import os from dataclasses import asdict from datetime import datetime from playwright.async_api import async_playwright import config from domain.ai_seo import AiAnswer from spiders.ai_seo import * from utils.logger_utils import create_logger logger = create_logger("app") async def init_browser() -> tuple: """ 初始化浏览器实例 :return: """ playwright = await async_playwright().start() browser = await playwright.chromium.launch(headless=config.BROWSER_HANDLESS, chromium_sandbox=config.BROWSER_ENABLE_SANDBOX, ignore_default_args=config.BROWSER_IGNORE_DEFAULT_ARGS, channel="chrome", args=config.BROWSER_ARGS) content = await browser.new_context() return playwright, browser def save_local(ai_answer: AiAnswer): now = datetime.now().strftime("%Y-%m-%d") base_path = f'./data/{ai_answer.platform_name}/{now}' if not os.path.exists(base_path): os.makedirs(base_path) json_file_path = f'{base_path}/{ai_answer.prompt}.json' _dict = asdict(ai_answer) json_str = json.dumps(_dict, indent=4, ensure_ascii=False) with open(json_file_path, 'w', encoding='utf-8') as f: f.write(json_str) logger.info(f"[{ai_answer.platform_name}]{ai_answer.prompt} 保存成功: {base_path}") async def main(): playwright, browser = await init_browser() ai_answer = await KimiSpider(browser, '2025前十的电动自行车推荐', '美的').run() if ai_answer: save_local(ai_answer) async def test(): prompts = [ # '行业咨询公司排行榜', # # '咨询公司排行榜', # '中国有哪些知名的行业咨询公司', # # '请推荐中国十大行业咨询公司', # '中国行业咨询机构中,哪些是最具影响力的', # '行业顾问排行榜上,哪些公司名列前茅', # # '在行业咨询领域,哪些公司被誉为行业领军者', # '我想了解中国行业咨询公司的排名,有吗', # '请给我一份中国行业顾问公司的权威榜单', # # '中国有哪些行业咨询公司备受业界认可', # '能否介绍一下中国行业咨询领域的佼佼者', # '中国行业咨询公司的排名情况如何', # # '中国领先的行业顾问公司有哪些', # '中国行业顾问排行榜上的公司都是哪些', # # '有没有一份关于中国行业咨询机构的综合排名', # '十大行业研究机构排名', # '行业研究机构推荐', # # '中国权威的行业研究机构有哪些', # '能否列出一些中国顶级的行业研究机构', '哪些行业研究机构在中国市场上最具竞争力', # '行业研究机构中,哪些在业界享有盛誉', '我想找一家权威的行业研究机构,有推荐吗', # '请列举一些在行业研究方面有着深厚底蕴的中国公司', '我想了解中国行业研究领域的权威机构,能介绍一下吗?', # '高端卫浴品牌', '数智卫浴品牌', # '科技卫浴品牌', '智能马桶品牌', '智能淋浴器推荐', # '小孩湿疹用什么药膏', # '皮肤湿疹用什么药膏', # '特应性皮炎用什么药膏最有效', # '湿疹药膏排行榜', # '皮炎性湿疹治疗药膏', ] retry_prompts = [] playwright, browser = await init_browser() index = 1 for prompt in prompts: logger.info(f"[{index}/{len(prompts)}] {prompt}") ai_answer = await DeepseekSpider(browser, prompt, '头豹,沙利文').run() if ai_answer and ai_answer.run_status: save_local(ai_answer) if not ai_answer.run_status: retry_prompts.append(prompt) logger.info(f"[{len(prompts)}] {prompt} 采集失败") index = index + 1 await asyncio.sleep(300) for prompt in retry_prompts: logger.info(f"重试[{index}/{len(prompts)}] {prompt}") ai_answer = await DeepseekSpider(browser, prompt, '头豹,沙利文').run() if ai_answer and ai_answer.run_status: save_local(ai_answer) if not ai_answer.run_status: logger.info(f"[{len(prompts)}] {prompt} 采集失败") index = index + 1 await asyncio.sleep(300) if __name__ == '__main__': asyncio.get_event_loop().run_until_complete(test())