You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
100 lines
3.2 KiB
100 lines
3.2 KiB
# coding=utf-8
|
|
import asyncio
|
|
import os
|
|
import time
|
|
|
|
from playwright.async_api import async_playwright
|
|
|
|
from abs_spider import AbstractAiSeoSpider
|
|
from spiders.ai_seo import *
|
|
import config
|
|
from utils import make_sha256_hash
|
|
from utils.ai_seo_api_utils import AiSeoApis
|
|
|
|
SPIDER_CLS = {
|
|
1: DeepseekSpider,
|
|
2: TongyiSpider,
|
|
3: YuanBaoSpider,
|
|
4: KimiSpider,
|
|
5: DouBaoSpider,
|
|
6: YiYanSpider,
|
|
7: NanometerSpider,
|
|
13: MetasoSpider
|
|
}
|
|
|
|
|
|
async def init_browser() -> tuple:
|
|
"""
|
|
初始化浏览器实例
|
|
:return:
|
|
"""
|
|
playwright = await async_playwright().start()
|
|
browser = await playwright.chromium.launch(headless=False,
|
|
chromium_sandbox=config.BROWSER_ENABLE_SANDBOX,
|
|
ignore_default_args=config.BROWSER_IGNORE_DEFAULT_ARGS,
|
|
channel="chrome",
|
|
args=config.BROWSER_ARGS)
|
|
content = await browser.new_context()
|
|
return playwright, browser
|
|
|
|
|
|
async def main():
|
|
playwright, browser = await init_browser()
|
|
main_info = """
|
|
请选择要登录的平台:
|
|
1.Deepseek
|
|
2.通义千问
|
|
3.腾讯元宝
|
|
4.Kimi
|
|
5.豆包
|
|
6.文心一言
|
|
13.秘塔AI搜索
|
|
"""
|
|
print(main_info)
|
|
platform_id = input()
|
|
cls = SPIDER_CLS.get(int(platform_id), None)
|
|
# 如果没有找到对应的爬虫类,抛出异常
|
|
if not cls:
|
|
print('输入的平台id不存在')
|
|
# 要求用户填写登录账号
|
|
account = input('请输入登录账号:')
|
|
# 创建并返回爬虫实例
|
|
spider = cls(browser, '', '')
|
|
# 获取首页url
|
|
home_url = spider.get_home_url()
|
|
# 打开页面
|
|
browser_content = await browser.new_context()
|
|
browser_page = await browser_content.new_page()
|
|
await browser_page.set_viewport_size(config.PAGE_INIT_VIEWPORT_SIZE)
|
|
print('创建浏览器成功')
|
|
# 加载伪装脚本
|
|
await browser_page.add_init_script("""
|
|
Object.defineProperties(navigator, {webdriver:{get:()=>false}});
|
|
""")
|
|
await browser_page.add_init_script('static/stealth.min.js')
|
|
print('伪装脚本加载成功')
|
|
await browser_page.goto(home_url, timeout=6000000)
|
|
print('加载首页成功')
|
|
input('使用手机号登录 并发送验证码后按回车键继续...')
|
|
# 保存登录后的上下文
|
|
session_path = f"{config.ROOT_PATH}/data/tmp/session_{time.time()}.json"
|
|
# 检查文件夹
|
|
dir_path = os.path.dirname(session_path)
|
|
os.makedirs(dir_path, exist_ok=True)
|
|
await browser_content.storage_state(path=session_path)
|
|
await browser_page.close()
|
|
await browser_content.close()
|
|
await browser.close()
|
|
print(f"登录成功 保存到{session_path}")
|
|
# 上传登录后的上下文
|
|
upload_data = await AiSeoApis.upload_session_file(session_path)
|
|
session_url = upload_data['url']
|
|
print(f"session文件上传成功 url:{session_url}")
|
|
# 计算文件hash
|
|
file_hash = make_sha256_hash(session_path)
|
|
result = await AiSeoApis.save_spider_session(platform_id, session_url, file_hash, account)
|
|
print("session文件保存成功")
|
|
print(result)
|
|
|
|
if __name__ == '__main__':
|
|
asyncio.get_event_loop().run_until_complete(main())
|