You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
103 lines
2.5 KiB
103 lines
2.5 KiB
from main import Start
|
|
from fastapi import FastAPI
|
|
from pydantic import BaseModel
|
|
from threading import Thread
|
|
import queue
|
|
import uuid
|
|
import sqlite3
|
|
import json
|
|
import hashlib
|
|
|
|
app = FastAPI()
|
|
|
|
|
|
def get_conn():
|
|
return sqlite3.connect("spider.db", check_same_thread=False)
|
|
|
|
|
|
class Spider(BaseModel):
|
|
url: str
|
|
|
|
|
|
# 🔁 worker
|
|
def worker(task_id, url):
|
|
conn = get_conn()
|
|
cur = conn.cursor()
|
|
|
|
try:
|
|
data = Start(url).run()
|
|
cur.execute("""
|
|
INSERT OR REPLACE INTO tasks (task_id, status, result)
|
|
VALUES (?, ?, ?)
|
|
""", (task_id, 0, json.dumps(data, ensure_ascii=False)))
|
|
|
|
except Exception as e:
|
|
cur.execute("""
|
|
UPDATE tasks
|
|
SET status=?,
|
|
error=?
|
|
WHERE task_id = ?
|
|
""", ("error", str(e), task_id))
|
|
finally:
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
|
|
# ✅ 提交任务
|
|
@app.post("/crawler/put", summary='提交爬虫')
|
|
def put_task(req: Spider):
|
|
"""
|
|
url:爬取的url
|
|
"""
|
|
task_id = str(hashlib.md5(req.url.encode('utf-8')).hexdigest())
|
|
print(task_id)
|
|
|
|
conn = get_conn()
|
|
cur = conn.cursor()
|
|
|
|
cur.execute("""
|
|
INSERT OR REPLACE INTO tasks (task_id, url, status)
|
|
VALUES (?, ?, ?)
|
|
""", (task_id, req.url, -2))
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
Thread(target=worker, args=(task_id, req.url)).start()
|
|
|
|
return {'code': 0, 'data': {"task_id": task_id}, 'msg': '操作成功'}
|
|
|
|
|
|
# ✅ 获取结果
|
|
@app.get("/crawler/get/{task_id}", summary='获取数据')
|
|
def get_result(task_id: str):
|
|
"""
|
|
task_id: 提交返回的的task_id
|
|
code: 异常有问题:-1 正在处理:-2 成功:0
|
|
"""
|
|
conn = get_conn()
|
|
cur = conn.cursor()
|
|
|
|
cur.execute("SELECT status, result, error FROM tasks WHERE task_id=?", (task_id,))
|
|
row = cur.fetchone()
|
|
conn.close()
|
|
|
|
if not row:
|
|
return {"code": -1, 'data': task_id, 'msg': '无此task_id'}
|
|
|
|
status, result, error = row
|
|
if status == "error":
|
|
return {"code": -1, 'data': task_id, 'msg': '任务处理失败'}
|
|
if int(status) == 0:
|
|
msg = '操作成功'
|
|
else:
|
|
msg = '任务正在处理'
|
|
|
|
return {
|
|
"code": int(status),
|
|
"data": json.loads(result) if result else None,
|
|
"msg": msg
|
|
}
|
|
|
|
# uvicorn api_main:app --host 0.0.0.0 --port 8000
|
|
# uvicorn api_main:app --host 0.0.0.0 --port 32000 --log-level debug
|