from main import Start from fastapi import FastAPI from pydantic import BaseModel from threading import Thread import queue import uuid import sqlite3 import json import hashlib app = FastAPI() task_queue = queue.Queue() def get_conn(): return sqlite3.connect("spider.db", check_same_thread=False) class Spider(BaseModel): url: str # 🔁 worker def worker(): while True: task_id, url = task_queue.get() conn = get_conn() cur = conn.cursor() try: data = Start(url).run() # cur.execute(""" # UPDATE tasks # SET status=?, # result=? # WHERE task_id = ? # """, (0, json.dumps(data, ensure_ascii=False), task_id)) cur.execute(""" INSERT OR REPLACE INTO tasks (task_id, status, result) VALUES (?, ?, ?) """, (task_id, 0, json.dumps(data, ensure_ascii=False))) except Exception as e: cur.execute(""" UPDATE tasks SET status=?, error=? WHERE task_id = ? """, ("error", str(e), task_id)) conn.commit() conn.close() task_queue.task_done() Thread(target=worker, daemon=True).start() # ✅ 提交任务 @app.post("/crawler/put", summary='提交爬虫') def put_task(req: Spider): """ url:爬取的url """ task_id = str(hashlib.md5(req.url.encode('utf-8')).hexdigest()) print(task_id) conn = get_conn() cur = conn.cursor() cur.execute(""" INSERT OR REPLACE INTO tasks (task_id, url, status) VALUES (?, ?, ?) """, (task_id, req.url, -2)) conn.commit() conn.close() task_queue.put((task_id, req.url)) return {'code': 0, 'data': {"task_id": task_id}, 'msg': '操作成功'} # ✅ 获取结果 @app.get("/crawler/get/{task_id}", summary='获取数据') def get_result(task_id: str): """ task_id: 提交返回的的task_id code: 异常有问题:-1 正在处理:-2 成功:0 """ conn = get_conn() cur = conn.cursor() cur.execute("SELECT status, result, error FROM tasks WHERE task_id=?", (task_id,)) row = cur.fetchone() conn.close() if not row: return {"code": -1, 'data': task_id, 'msg': '无此task_id'} status, result, error = row if status == "error": return {"code": -1, 'data': task_id, 'msg': '任务处理失败'} if int(status) == 0: msg = '操作成功' else: msg = '任务正在处理' return { "code": int(status), "data": json.loads(result) if result else None, "msg": msg } # uvicorn api_main:app --host 0.0.0.0 --port 8000 # uvicorn api_main:app --host 0.0.0.0 --port 32000 --log-level debug