Browse Source

feat: 新增deepseek session检查功能

master
zzx 3 months ago
parent
commit
a2dd40d1d9
  1. 22
      abs_spider.py
  2. 17
      main.py
  3. 12
      spiders/ai_seo/deepseek.py
  4. 26
      utils/ai_seo_api_utils.py
  5. 4
      utils/session_utils.py

22
abs_spider.py

@ -9,6 +9,7 @@ from playwright.async_api import Browser, BrowserContext, Page
import config
from domain.ai_seo import AiAnswer
from utils import create_logger
from utils.ai_seo_api_utils import AiSeoApis
from utils.session_utils import get_spider_session
logger = create_logger("abs_spider")
@ -67,9 +68,9 @@ class AbstractAiSeoSpider(ABC):
screenshot_path = f'{config.SCREENSHOT_BASE_PATH}/{self.platform_name}_{unique_id}.png'
return screenshot_path
async def __init_page(self):
async def __init_page(self, id=''):
if self.load_session:
self.session_info = await get_spider_session(self.platform_id)
self.session_info = await get_spider_session(self.platform_id, id)
if self.platform_id != 8:
self.browser_content = await self.browser.new_context(storage_state=self.session_info['session_path'])
else:
@ -118,6 +119,20 @@ class AbstractAiSeoSpider(ABC):
finally:
await self._close()
async def check_session(self, session_id) -> bool:
await self.__init_page(session_id)
result = await self.do_check_session()
await self._close()
if result:
logger.success(f"[{self.get_platform_name()}]session状态有效! ✅ id: {session_id}")
else:
logger.error(f"[{self.get_platform_name()}]session状态无效! ❌ id: {session_id}")
# 更新session状态
await AiSeoApis.update_spider_session(session_id, 1 if result else 2)
return result
@abstractmethod
async def _do_spider(self) -> AiAnswer:
"""
@ -137,3 +152,6 @@ class AbstractAiSeoSpider(ABC):
@abstractmethod
def get_home_url(self) -> str:
pass
@abstractmethod
async def do_check_session(self) -> bool:
pass

17
main.py

@ -12,7 +12,9 @@ import config
from abs_spider import AbstractAiSeoSpider
from domain.ai_seo import AiAnswer
from spiders.ai_seo import *
from spiders.ai_seo.wenxiaoyan import WenxiaoyanSpider
from utils.logger_utils import create_logger
from utils.ai import AiSeoApis
logger = create_logger("app")
@ -24,7 +26,8 @@ SPIDER_CLS = {
5: DouBaoSpider,
6: YiYanSpider,
7: NanometerSpider,
13: MetasoSpider
13: MetasoSpider,
8: WenxiaoyanSpider
}
@ -47,7 +50,7 @@ def get_spider(platform_id, prompt, brand, browser) -> AbstractAiSeoSpider:
cls = SPIDER_CLS.get(int(platform_id), None)
if not cls:
raise ValueError(f"未找到对应的爬虫类,platform_id={platform_id}")
return cls(browser, prompt, brand, True)
return cls(browser, prompt, brand)
def save_local(ai_answer: AiAnswer):
@ -80,6 +83,14 @@ async def test():
index = index + 1
await asyncio.sleep(config.TEST_INTERVAL * 6)
async def test_check_session():
sessions = await AiSeoApis.list_spider_session(1)
playwright, browser = await init_browser()
for session in sessions:
spider = get_spider(session['platform_id'], '你好', '品牌词', browser)
result = await spider.check_session(session['id'])
if __name__ == '__main__':
asyncio.get_event_loop().run_until_complete(test())
asyncio.get_event_loop().run_until_complete(test_check_session())

12
spiders/ai_seo/deepseek.py

@ -87,6 +87,18 @@ class DeepseekSpider(AbstractAiSeoSpider):
self.ai_answer.screenshot_file = screenshot_path
return self.ai_answer
async def do_check_session(self) -> bool:
try:
await self.browser_page.goto(self.get_home_url(), timeout=600000)
await asyncio.sleep(3)
chat_input_element = self.browser_page.locator("//textarea[@id='chat-input']")
await chat_input_element.click()
# 输入提问词
await self.browser_page.keyboard.type(self.prompt)
return True
except Exception:
return False
def handle_listen_response_error(self, func):
"""

26
utils/ai_seo_api_utils.py

@ -170,15 +170,39 @@ class AiSeoApis:
return json_result['data']
@staticmethod
async def get_spider_session(platform_id):
async def get_spider_session(platform_id, id=''):
"""
:param platform_id:
:param id:
:return:
"""
uri = '/api/third/getOneSpiderSession'
url = AiSeoApis.build_full_url(uri)
json_data = {**config.AI_SEO_API_AUTH, 'platform_id': platform_id}
if not id:
json_data['id'] = id
async with httpx.AsyncClient() as client:
response = await client.get(url, params=json_data, timeout=60)
json_result = response.json()
if not json_result['code'] == 0:
logger.error(f"获取爬虫session失败")
return None
return json_result['data']
@staticmethod
async def list_spider_session(platform_id='', status=''):
"""
session列表
:param platform_id:
:param id:
:return:
"""
uri = '/api/third/getSpiderSessionList'
url = AiSeoApis.build_full_url(uri)
json_data = {**config.AI_SEO_API_AUTH, 'platform_id': platform_id, 'status': status}
if not id:
json_data['id'] = id
async with httpx.AsyncClient() as client:
response = await client.get(url, params=json_data, timeout=60)
json_result = response.json()

4
utils/session_utils.py

@ -5,7 +5,7 @@ import config
from utils.ai_seo_api_utils import AiSeoApis
async def get_spider_session(platform_id):
async def get_spider_session(platform_id, id=''):
"""
session
:param platform_id:
@ -13,7 +13,7 @@ async def get_spider_session(platform_id):
"""
base_path = f'{config.ROOT_PATH}/data/session_data'
# 爬虫信息
session_info = await AiSeoApis.get_spider_session(platform_id)
session_info = await AiSeoApis.get_spider_session(platform_id, id)
if not session_info:
raise Exception(f"平台id: {platform_id} 没有可用的爬虫session")
if platform_id == 8:

Loading…
Cancel
Save