You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

100 lines
3.2 KiB

# coding=utf-8
import asyncio
import os
import time
from playwright.async_api import async_playwright
from abs_spider import AbstractAiSeoSpider
from spiders.ai_seo import *
import config
from utils import make_sha256_hash
from utils.ai_seo_api_utils import AiSeoApis
SPIDER_CLS = {
1: DeepseekSpider,
2: TongyiSpider,
3: YuanBaoSpider,
4: KimiSpider,
5: DouBaoSpider,
6: YiYanSpider,
7: NanometerSpider,
13: MetasoSpider
}
async def init_browser() -> tuple:
"""
初始化浏览器实例
:return:
"""
playwright = await async_playwright().start()
browser = await playwright.chromium.launch(headless=False,
chromium_sandbox=config.BROWSER_ENABLE_SANDBOX,
ignore_default_args=config.BROWSER_IGNORE_DEFAULT_ARGS,
channel="chrome",
args=config.BROWSER_ARGS)
content = await browser.new_context()
return playwright, browser
async def main():
playwright, browser = await init_browser()
main_info = """
请选择要登录的平台:
1.Deepseek
2.通义千问
3.腾讯元宝
4.Kimi
5.豆包
6.文心一言
13.秘塔AI搜索
"""
print(main_info)
platform_id = input()
cls = SPIDER_CLS.get(int(platform_id), None)
# 如果没有找到对应的爬虫类,抛出异常
if not cls:
print('输入的平台id不存在')
# 要求用户填写登录账号
account = input('请输入登录账号:')
# 创建并返回爬虫实例
spider = cls(browser, '', '')
# 获取首页url
home_url = spider.get_home_url()
# 打开页面
browser_content = await browser.new_context()
browser_page = await browser_content.new_page()
await browser_page.set_viewport_size(config.PAGE_INIT_VIEWPORT_SIZE)
print('创建浏览器成功')
# 加载伪装脚本
await browser_page.add_init_script("""
Object.defineProperties(navigator, {webdriver:{get:()=>false}});
""")
await browser_page.add_init_script('static/stealth.min.js')
print('伪装脚本加载成功')
await browser_page.goto(home_url, timeout=6000000)
print('加载首页成功')
input('使用手机号登录 并发送验证码后按回车键继续...')
# 保存登录后的上下文
session_path = f"{config.ROOT_PATH}/data/tmp/session_{time.time()}.json"
# 检查文件夹
dir_path = os.path.dirname(session_path)
os.makedirs(dir_path, exist_ok=True)
await browser_content.storage_state(path=session_path)
await browser_page.close()
await browser_content.close()
await browser.close()
print(f"登录成功 保存到{session_path}")
# 上传登录后的上下文
upload_data = await AiSeoApis.upload_session_file(session_path)
session_url = upload_data['url']
print(f"session文件上传成功 url:{session_url}")
# 计算文件hash
file_hash = make_sha256_hash(session_path)
result = await AiSeoApis.save_spider_session(platform_id, session_url, file_hash, account)
print("session文件保存成功")
print(result)
if __name__ == '__main__':
asyncio.get_event_loop().run_until_complete(main())