You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

85 lines
2.6 KiB

# coding=utf-8
import asyncio
import json
import os
from dataclasses import asdict
from datetime import datetime
from playwright.async_api import async_playwright
import config
from abs_spider import AbstractAiSeoSpider
from domain.ai_seo import AiAnswer
from spiders.ai_seo import *
from utils.logger_utils import create_logger
logger = create_logger("app")
SPIDER_CLS = {
1: DeepseekSpider,
2: TongyiSpider,
3: YuanBaoSpider,
4: KimiSpider,
5: DouBaoSpider,
6: YiYanSpider,
7: NanometerSpider,
13: MetasoSpider
}
async def init_browser() -> tuple:
"""
初始化浏览器实例
:return:
"""
playwright = await async_playwright().start()
browser = await playwright.chromium.launch(headless=config.BROWSER_HANDLESS,
chromium_sandbox=config.BROWSER_ENABLE_SANDBOX,
ignore_default_args=config.BROWSER_IGNORE_DEFAULT_ARGS,
channel="chrome",
args=config.BROWSER_ARGS)
content = await browser.new_context()
return playwright, browser
def get_spider(platform_id, prompt, brand, browser) -> AbstractAiSeoSpider:
cls = SPIDER_CLS.get(int(platform_id), None)
if not cls:
raise ValueError(f"未找到对应的爬虫类,platform_id={platform_id}")
return cls(browser, prompt, brand, True)
def save_local(ai_answer: AiAnswer):
now = datetime.now().strftime("%Y-%m-%d")
base_path = f'./data/{ai_answer.platform_name}/{now}'
if not os.path.exists(base_path):
os.makedirs(base_path)
json_file_path = f'{base_path}/{ai_answer.prompt}.json'
_dict = asdict(ai_answer)
json_str = json.dumps(_dict, indent=4, ensure_ascii=False)
with open(json_file_path, 'w', encoding='utf-8') as f:
f.write(json_str)
logger.info(f"[{ai_answer.platform_name}]{ai_answer.prompt} 保存成功: {base_path}")
async def test():
playwright, browser = await init_browser()
prompts = config.TEST_KEYWORDS
index = 1
for prompt in prompts:
logger.info(f"[{index}/{len(prompts)}] {prompt}")
for platform in config.TEST_PLATFORM:
spider = get_spider(platform, prompt, '品牌词', browser)
ai_answer = await spider.run()
if ai_answer:
save_local(ai_answer)
await asyncio.sleep(config.TEST_INTERVAL)
index = index + 1
await asyncio.sleep(config.TEST_INTERVAL * 6)
if __name__ == '__main__':
asyncio.get_event_loop().run_until_complete(test())