Browse Source
feat(ai_seo): 支持 datetime 类型的 publish_time 字段并优化工作流爬虫集成
feat(ai_seo): 支持 datetime 类型的 publish_time 字段并优化工作流爬虫集成
- 在 `AiAnswer` 数据类中扩展 `publish_time` 类型,支持传入 `datetime` 实例,并自动格式化为字符串- 新增 `WorkFlowApiSpider` 爬虫类,用于通过 API 方式获取工作流平台的 AI 回复结果 - 更新 `main.py` 和 `run.py`,注册并调用工作流爬虫逻辑 - 添加定时任务调度逻辑以支持工作流平台的任务执行 - 支持根据平台 ID 动态判断使用传统爬虫或工作流 API 爬虫- 增加对短结果、空结果的异常处理及任务状态回滚机制- 浏览器初始化逻辑优化,仅在需要时启动浏览器实例master
4 changed files with 215 additions and 5 deletions
-
6domain/ai_seo.py
-
7main.py
-
100run.py
-
105spiders/ai_seo/workflow.py
@ -0,0 +1,105 @@ |
|||
# coding=utf-8 |
|||
from datetime import datetime |
|||
|
|||
import httpx |
|||
|
|||
import config |
|||
from domain.ai_seo import AiAnswer, AiSearchResult |
|||
from utils import create_logger |
|||
|
|||
# 平台信息 |
|||
PLATFORMS = { |
|||
1: {'id': 1, 'name': 'Deepseek', 'api_key': ''}, |
|||
2: {'id': 2, 'name': '通义千问', 'api_key': 'app-mQE0lOxB0G49r4tQSv2LgIOV'}, |
|||
3: {'id': 3, 'name': '腾讯元宝', 'api_key': ''}, |
|||
4: {'id': 4, 'name': 'Kimi', 'api_key': ''}, |
|||
5: {'id': 5, 'name': '豆包', 'api_key': 'app-lD5HbD03EW7pamzIV2VEIyR6'}, |
|||
6: {'id': 6, 'name': '文心一言', 'api_key': ''}, |
|||
} |
|||
|
|||
|
|||
|
|||
logger = create_logger(__name__) |
|||
|
|||
class WorkFlowApiSpider: |
|||
platform_id: int |
|||
platform_name: str |
|||
prompt: str |
|||
keyword: str |
|||
ai_answer: AiAnswer | None = None |
|||
fail_status: bool = False |
|||
fail_exception: Exception | None = None |
|||
load_session: bool = True |
|||
task_id: int = 0 |
|||
think: bool = False, |
|||
api_key: str = '' |
|||
|
|||
def __init__(self, prompt: str, keyword: str, platform_id: int): |
|||
self.platform_id = platform_id |
|||
self.prompt = prompt |
|||
self.keyword = keyword |
|||
|
|||
platform = PLATFORMS.get(int(platform_id), {}) |
|||
if not platform: |
|||
raise Exception('平台不存在') |
|||
self.platform_name = platform.get('name', '') |
|||
self.api_key = platform.get('api_key', '') |
|||
if not self.api_key: |
|||
raise Exception('平台未配置api_key') |
|||
|
|||
async def run(self): |
|||
logger.info(f"{self.platform_name}Api开始获取数据 提问词: {self.prompt}") |
|||
# 构建参数 |
|||
params = { |
|||
"response_mode": "blocking", |
|||
"user": config.DIFY_USER, |
|||
"inputs": { |
|||
"prompt": self.prompt |
|||
} |
|||
} |
|||
headers = { |
|||
"Authorization": f"Bearer {self.api_key}" |
|||
} |
|||
# 发送请求 |
|||
async with httpx.AsyncClient() as client: |
|||
response = await client.post(f"{config.DIFY_BASE_URL}/workflows/run", json=params, headers=headers, timeout=300) |
|||
json_result = response.json() |
|||
result = json_result.get('data', []) |
|||
if not result or not result['status'] == 'succeeded': |
|||
logger.error(f"{self.platform_name}Api获取数据失败: {json_result}") |
|||
raise Exception(f"{self.platform_name}Api获取数据失败") |
|||
# 获取工作流返回的数据 |
|||
workflow_result = result['outputs']['work_flow_result'] |
|||
# 用量数据 |
|||
usage = workflow_result['usage'] |
|||
# ai回复内容 带标签 |
|||
answer = workflow_result['answer'] |
|||
# ai回复内容 不带标签 |
|||
pure_answer = workflow_result['pure_answer'] |
|||
logger.debug(f"ai回复: {pure_answer}") |
|||
# 联网搜索结果 |
|||
web_searches = workflow_result.get('web_search', []) |
|||
# 转换后的结果 |
|||
search_items = [] |
|||
for item in web_searches: |
|||
# 提取publish_time |
|||
if not item['datePublished']: |
|||
publish_time = None |
|||
else: |
|||
publish_time = datetime.strptime(item['datePublished'], "%Y-%m-%dT%H:%M:%S%z") |
|||
search_item = AiSearchResult( |
|||
title=item['name'], |
|||
url=item['url'], |
|||
host_name=item['siteName'], |
|||
body=item['summary'], |
|||
publish_time=publish_time, |
|||
is_referenced=item['is_ref'], |
|||
) |
|||
logger.debug(f"ai参考资料: [{search_item.host_name}]{search_item.title}({search_item.url})") |
|||
search_items.append(search_item) |
|||
|
|||
# 组合结果 |
|||
self.ai_answer = AiAnswer(self.platform_id, self.platform_name, self.prompt, self.keyword, answer, search_items, '', True) |
|||
logger.info(f"本次用量:\n总Token: {usage['total_tokens']}\n总资费: {round(float(usage['total_price']), 3)}") |
|||
return self.ai_answer |
|||
|
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue