# coding=utf-8 import asyncio import os import time from playwright.async_api import async_playwright from abs_spider import AbstractAiSeoSpider from spiders.ai_seo import * import config from utils import make_sha256_hash from utils.ai_seo_api_utils import AiSeoApis SPIDER_CLS = { 1: DeepseekSpider, 2: TongyiSpider, 3: YuanBaoSpider, 4: KimiSpider, 5: DouBaoSpider, 6: YiYanSpider, 7: NanometerSpider, 13: MetasoSpider } async def init_browser() -> tuple: """ 初始化浏览器实例 :return: """ playwright = await async_playwright().start() browser = await playwright.chromium.launch(headless=False, chromium_sandbox=config.BROWSER_ENABLE_SANDBOX, ignore_default_args=config.BROWSER_IGNORE_DEFAULT_ARGS, channel="chrome", args=config.BROWSER_ARGS) content = await browser.new_context() return playwright, browser async def main(): playwright, browser = await init_browser() main_info = """ 请选择要登录的平台: 1.Deepseek 2.通义千问 3.腾讯元宝 4.Kimi 5.豆包 6.文心一言 13.秘塔AI搜索 """ print(main_info) platform_id = input() cls = SPIDER_CLS.get(int(platform_id), None) # 如果没有找到对应的爬虫类,抛出异常 if not cls: print('输入的平台id不存在') # 要求用户填写登录账号 account = input('请输入登录账号:') # 创建并返回爬虫实例 spider = cls(browser, '', '') # 获取首页url home_url = spider.get_home_url() # 打开页面 browser_content = await browser.new_context() browser_page = await browser_content.new_page() await browser_page.set_viewport_size(config.PAGE_INIT_VIEWPORT_SIZE) print('创建浏览器成功') # 加载伪装脚本 await browser_page.add_init_script(""" Object.defineProperties(navigator, {webdriver:{get:()=>false}}); """) await browser_page.add_init_script('static/stealth.min.js') print('伪装脚本加载成功') await browser_page.goto(home_url, timeout=6000000) print('加载首页成功') input('使用手机号登录 并发送验证码后按回车键继续...') # 保存登录后的上下文 session_path = f"{config.ROOT_PATH}/data/tmp/session_{time.time()}.json" # 检查文件夹 dir_path = os.path.dirname(session_path) os.makedirs(dir_path, exist_ok=True) await browser_content.storage_state(path=session_path) await browser_page.close() await browser_content.close() await browser.close() print(f"登录成功 保存到{session_path}") # 上传登录后的上下文 upload_data = await AiSeoApis.upload_session_file(session_path) session_url = upload_data['url'] print(f"session文件上传成功 url:{session_url}") # 计算文件hash file_hash = make_sha256_hash(session_path) result = await AiSeoApis.save_spider_session(platform_id, session_url, file_hash, account) print("session文件保存成功") print(result) if __name__ == '__main__': asyncio.get_event_loop().run_until_complete(main())