爬虫相关
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

66 lines
1.8 KiB

import requests
from DrissionPage._configs.chromium_options import ChromiumOptions
from tools.retry import retry
from DrissionPage import SessionPage, ChromiumPage, Chromium
def co_int():
co = ChromiumOptions()
# 设置不加载图片、静音
co.no_imgs(True).mute(True)
co.incognito() # 匿名模式
co.headless() # 无头模式
co.set_argument('--no-sandbox') # 无沙盒模式
co.auto_port(on_off=True)
return co
class RequestsInt(object):
def __init__(self, url):
self.ref = url
self.session = requests.Session()
def get_headers(self):
return {
'referer': self.ref,
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36'
}
@retry('get请求', 3)
def get(self, url, params=None, headers=None, timeout=5, **kwargs):
if headers is None:
headers = self.get_headers()
r = self.session.get(url=url, params=params, headers=headers, timeout=timeout, **kwargs)
r.encoding = 'utf-8'
return r
@retry('post请求', 3)
def post(self, url, params=None, headers=None, timeout=5, **kwargs):
if headers is None:
headers = self.get_headers()
r = self.session.post(url=url, params=params, headers=headers, timeout=timeout, **kwargs)
r.encoding = 'utf-8'
return r
@retry('get_page请求', 3)
def get_page(self, url):
try:
co = co_int()
page = Chromium(addr_or_opts=co).new_tab()
# 访问网页
page.get(url)
return page.html
except Exception as e:
print('e', e)
return ''
finally:
try:
pass
# page.close()
except:
pass