爬虫相关
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

628 lines
23 KiB

import hashlib
import os
import random
import time
import cv2
from PIL import Image
from io import BytesIO
import requests
import json
import uuid
from moviepy.video.io.VideoFileClip import VideoFileClip
class Auth:
def __init__(self, cookies):
self.cookies = cookies
self.headers = {
"Host": "channels.weixin.qq.com",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
"sec-ch-ua-platform": "\"Windows\"",
"X-WECHAT-UIN": "0000000000",
"sec-ch-ua": "\"Not:A-Brand\";v=\"99\", \"Google Chrome\";v=\"145\", \"Chromium\";v=\"145\"",
"sec-ch-ua-mobile": "?0",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36",
"Accept": "application/json, text/plain, */*",
"Content-Type": "application/json",
"Origin": "https://channels.weixin.qq.com",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Dest": "empty",
"Referer": "https://channels.weixin.qq.com/platform/",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8"
}
def get_rid(self):
timestamp_hex = format(int(time.time()), "x")
random_hex = "".join(format(random.randint(0, 15), "x") for _ in range(8))
result = f"{timestamp_hex}-{random_hex}"
# print(result)
return result
def get_aid(self):
return str(uuid.uuid4())
def get_v2(self):
url = "https://channels.weixin.qq.com/cgi-bin/mmfinderassistant-bin/auth/auth_data"
params = {
"_aid": self.get_aid(),
"_rid": self.get_rid(),
"_pageUrl": "https://channels.weixin.qq.com/platform/login-for-iframe"
}
data = {
"timestamp": str(int(time.time() * 1000)),
"_log_finder_uin": "",
"_log_finder_id": "",
"rawKeyBuff": None,
"pluginSessionId": None,
"scene": 7,
"reqScene": 7
}
data = json.dumps(data, separators=(',', ':'))
response = requests.post(url, headers=self.headers, cookies=self.cookies, params=params, data=data).json()
print(response)
return response['data']['finderUser']['finderUsername']
def get_auth(self, ):
self.v2 = self.get_v2()
url = "https://channels.weixin.qq.com/cgi-bin/mmfinderassistant-bin/helper/helper_upload_params"
params = {
"_aid": self.get_aid(),
"_rid": self.get_rid(),
"_pageUrl": "https://channels.weixin.qq.com/platform/"
}
data = {
"timestamp": str(int(time.time() * 1000)),
"_log_finder_uin": "",
"_log_finder_id": self.v2,
"rawKeyBuff": None,
"pluginSessionId": None,
"scene": 7,
"reqScene": 7
}
data = json.dumps(data, separators=(',', ':'))
response = requests.post(url, headers=self.headers, cookies=self.cookies, params=params, data=data).json()
authKey = response['data']['authKey']
print('authkey--', authKey)
return authKey
class WxSphImage(object):
def __init__(self, cookies):
self.auto = Auth(cookies)
self.authorization = self.auto.get_auth() # 获取鉴权
self.cookies = cookies
self.image_file = '' # 图片地址
self.image_data = b'' # 图片内容
self.image_size = 0 # 图片长度
self.headers = {}
self.width, self.height = 0, 0
self.image_file = ''
self.width, self.height, self.duration = 0, 0, 0
def md5(self, data):
if type(data) == str:
data = data.encode('utf-8')
return hashlib.md5(data).hexdigest()
def get_image_data(self):
with open(self.image_file, 'rb') as f:
data = f.read()
return data
def get_ts(self):
return str(int(time.time() * 1000))
def get_headers(self):
headers = {
"Accept": "application/json, text/plain, */*",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"Authorization": self.authorization,
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-MD5": "null",
"Content-Type": "application/json",
"Origin": "https://channels.weixin.qq.com",
"Pragma": "no-cache",
"Referer": "https://channels.weixin.qq.com/",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36",
"X-Arguments": f"apptype=251&filetype=20304&weixinnum=2841261318&filekey=finder_video_img.jpeg&filesize={self.image_size}&taskid={self.auto.get_aid()}&scene=2",
"sec-ch-ua": "\"Not:A-Brand\";v=\"99\", \"Google Chrome\";v=\"145\", \"Chromium\";v=\"145\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\""
}
return headers
def get_uid(self, BlockPartLength):
"""
获取要上传的图片地址
:return:
"""
url = "https://finderassistancea.video.qq.com/applyuploaddfs"
data = {
"BlockSum": len(BlockPartLength),
"BlockPartLength": BlockPartLength
}
data = json.dumps(data, separators=(',', ':'))
response = requests.put(url, headers=self.headers, data=data).json()
print(response)
return response['UploadID']
def put_image(self, uid, image_data=None, PartNumber=1):
"""
上传图片/视频
PartNumber 这个是上传视频用的 1是第一段 2是第二
image_data 为了适配视频上传
:return:
"""
# url = "https://finderassistancec.video.qq.com/uploadpartdfs?"
url = 'https://finderassistancee.video.qq.com/uploadpartdfs?'
params = {'PartNumber': PartNumber,
'UploadID': uid,
'QuickUpload': 2
}
if image_data is None:
image_data = self.image_data
headers = self.headers
else:
headers = self.get_headers()
headers[
"X-Arguments"] = f"apptype=251&filetype=20302&weixinnum=2841261318&filekey=aa.mp4&filesize={self.image_size}&taskid={self.auto.get_aid()}&scene=2"
headers['content-md5'] = self.md5(image_data)
response = requests.put(url, data=image_data, headers=headers, params=params).json()
print(response)
return response['ETag'], response['TransFlag']
def get_image_url(self, uid, etag='', trans_flag='0_0', part_info=[]):
"""
获取图片地址
:param etag:
:param trans_flag:
:param uid:
:return:
"""
url = "https://finderassistancea.video.qq.com/completepartuploaddfs"
# https://finderassistancea.video.qq.com/completepartuploaddfs?
params = {
"UploadID": uid
}
data = {
"TransFlag": trans_flag,
"PartInfo": []
}
if not part_info:
data['PartInfo'] = [
{
"PartNumber": 1,
"ETag": etag
}
]
else:
data['PartInfo'] = part_info
data = json.dumps(data, separators=(',', ':'))
print(data)
response = requests.post(url, headers=self.headers, params=params, data=data).json()
print(response)
return response['DownloadURL']
def get_traceKey(self):
"""
这个是为了获取发布视频的 traceKey
:return:
"""
url = "https://channels.weixin.qq.com/micro/content/cgi-bin/mmfinderassistant-bin/post/get-finder-post-trace-key"
params = {
'_aid': self.auto.get_aid(),
'_rid': self.auto.get_rid(),
"_pageUrl": "https://channels.weixin.qq.com/micro/content/post/finderNewLifeCreate"
}
data = {
"objectId": "",
"timestamp": self.get_ts(),
"_log_finder_uin": "",
"_log_finder_id": self.auto.v2,
"rawKeyBuff": None,
"pluginSessionId": None,
"scene": 7,
"reqScene": 7
}
data = json.dumps(data, separators=(',', ':'))
response = requests.post(url, headers=self.headers, cookies=cookies, params=params, data=data).json()
print(response)
traceKey = response['data']['traceKey']
return traceKey
def get_clip_key(self, video_url, traceKey, width, height, duration):
"""
这个用来获取发布视频的参数
:return:
"""
url = "https://channels.weixin.qq.com/micro/content/cgi-bin/mmfinderassistant-bin/post/post_clip_video"
params = {
"_aid": self.auto.get_aid(),
"_rid": self.auto.get_rid(),
"_pageUrl": "https://channels.weixin.qq.com/micro/content/post/create"
}
data = {
"url": video_url,
"timeStart": 0,
"cropDuration": 0,
"height": 1280,
"width": width,
"x": 0,
"y": 0,
"clipOriginVideoInfo": {
"width": width,
"height": height,
"duration": duration,
"fileSize": self.image_size
},
"traceInfo": {
"traceKey": traceKey,
"uploadCdnStart": int(time.time()),
"uploadCdnEnd": int(time.time())
},
"targetWidth": width,
"targetHeight": height,
"type": 4,
"useAstraThumbCover": 1,
"timestamp": self.get_ts(),
"_log_finder_uin": "",
"_log_finder_id": self.auto.v2,
"rawKeyBuff": None,
"pluginSessionId": None,
"scene": 7,
"reqScene": 7
}
data = json.dumps(data, separators=(',', ':'))
response = requests.post(url, headers=self.headers, cookies=self.cookies, params=params, data=data).json()
print(response)
return response['data']['clipKey']
def release(self, title, connect, media):
"""
发布
:param title: 内容头
:param connect: 内容
:param media: 图片的列表
:return:
"""
url = 'https://channels.weixin.qq.com/micro/content/cgi-bin/mmfinderassistant-bin/post/post_create'
params = {
'_aid': self.auto.get_aid(),
'_rid': self.auto.get_rid(),
'_pageUrl': 'https://channels.weixin.qq.com/micro/content/post/finderNewLifeCreate'
}
traceKey = self.get_traceKey()
data = {
"objectType": 0,
"longitude": 0,
"latitude": 0,
"feedLongitude": 0,
"feedLatitude": 0,
"originalFlag": 0,
"topics": [],
"isFullPost": 1,
"handleFlag": 2,
"videoClipTaskId": "",
"traceInfo": {
"traceKey": traceKey,
"uploadCdnStart": int(time.time()),
"uploadCdnEnd": int(time.time())
},
"objectDesc": {
"mpTitle": "",
"description": connect,
"extReading": {},
"mediaType": 2,
"location": {
"latitude": 31.992259979248047,
"longitude": 118.77870178222656,
"city": "南京市",
"poiClassifyId": ""
},
"topic": {
"finderTopicInfo": "<finder><version>1</version><valuecount>1</valuecount><style><at></at></style><value0><![CDATA[" + connect + "]]></value0></finder>"
},
"event": {},
"mentionedUser": [],
"media": media,
"finderNewlifeDesc": {
"richTextTitle": title,
"richTextJson": '[{"insert":"' + title + '"},{"attributes":{"header":1},"insert":"\\n"},{"insert":"' + connect + '"},{"insert":"\\n\\n"}]',
"fromRichPublisher": 1
},
"member": {}
},
"postFlag": 0,
"mode": 1,
"clientid": self.auto.get_aid(),
"timestamp": self.get_ts(),
"_log_finder_uin": "",
"_log_finder_id": self.auto.v2,
"rawKeyBuff": None,
"pluginSessionId": None,
"scene": 7,
"reqScene": 7
}
# 5. 发送POST请求
response = requests.post(
url=url,
params=params,
headers=self.headers,
cookies=cookies,
json=data, # 确保中文正常传输
timeout=30 # 设置超时时间
)
print(response.json())
def release_video(self, title, connect, video_url, im_url):
"""
发布
:param title: 内容头
:param connect: 内容
:param media: 图片的列表
:return:
"""
url = 'https://channels.weixin.qq.com/micro/content/cgi-bin/mmfinderassistant-bin/post/post_create'
params = {
'_aid': self.auto.get_aid(),
'_rid': self.auto.get_rid(),
'_pageUrl': 'https://channels.weixin.qq.com/micro/content/post/finderNewLifeCreate'
}
traceKey = self.get_traceKey()
width = self.width
height = self.height
duration = self.duration
clip = self.get_clip_key(video_url=video_url, traceKey=traceKey, width=width, height=height, duration=duration)
print(clip)
data = {
"objectType": 0,
"longitude": 0,
"latitude": 0,
"feedLongitude": 0,
"feedLatitude": 0,
"originalFlag": 0,
"topics": [],
"isFullPost": 1,
"handleFlag": 2,
"videoClipTaskId": clip,
"traceInfo": {
"traceKey": traceKey,
"uploadCdnStart": int(time.time()),
"uploadCdnEnd": int(time.time()),
},
"objectDesc": {
"mpTitle": "",
"description": title,
"extReading": {},
"mediaType": 4,
"location": {
"latitude": 31.992259979248047,
"longitude": 118.77870178222656,
"city": "南京市",
"poiClassifyId": ""
},
"topic": {
"finderTopicInfo": "<finder><version>1</version><valuecount>1</valuecount><style><at></at></style><value0><![CDATA[" + connect + "]]></value0></finder>"
},
"event": {},
"mentionedUser": [],
"media": [
{
"url": video_url,
"fileSize": self.image_size,
"thumbUrl": im_url,
"fullThumbUrl": im_url,
"mediaType": 4,
"videoPlayLen": int(duration),
"width": width,
"height": height,
"md5sum": self.auto.get_aid(),
"coverUrl": im_url,
"fullCoverUrl": im_url,
"urlCdnTaskId": clip
}
],
"shortTitle": [{"shortTitle": connect}],
"member": {}
},
"report": {
"clipKey": clip,
"draftId": clip,
"timestamp": self.get_ts(),
"_log_finder_uin": "",
"_log_finder_id": self.auto.v2,
"rawKeyBuff": None,
"pluginSessionId": None,
"scene": 7,
"reqScene": 7,
"height": height,
"width": width,
"duration": duration,
"fileSize": self.image_size,
"uploadCost": 268
},
"postFlag": 0,
"mode": 1,
"clientid": self.auto.get_aid(),
"timestamp": self.get_ts(),
"_log_finder_uin": "",
"_log_finder_id": self.auto.v2,
"rawKeyBuff": None,
"pluginSessionId": None,
"scene": 7,
"reqScene": 7
}
# 5. 发送POST请求
response = requests.post(
url=url,
params=params,
headers=self.headers,
cookies=cookies,
json=data, # 确保中文正常传输
timeout=30 # 设置超时时间
).json()
print(response)
return response
def run_images(self, image_file):
self.image_file = image_file # 图片地址
self.image_data = self.get_image_data() # 图片内容
self.image_size = len(self.image_data) # 图片长度
self.headers = self.get_headers()
img = Image.open(BytesIO(self.image_data))
self.width, self.height = img.size
uid = self.get_uid([
self.image_size
])
etag, trans_flag = self.put_image(uid)
image_url = self.get_image_url(uid=uid, etag=etag, trans_flag=trans_flag) # 这里官方是上传了3个不同尺寸的 测试一样的也可以
# thumbUrl = self.get_image_url(uid=uid, etag=etag, trans_flag=trans_flag)
# fullThumbUrl = self.get_image_url(uid=uid, etag=etag, trans_flag=trans_flag)
media = {
"url": image_url,
"fileSize": self.image_size,
"thumbUrl": image_url,
"fullThumbUrl": image_url,
"mediaType": 2,
"videoPlayLen": 0,
"width": self.width,
"height": self.height,
"md5sum": self.auto.get_aid(),
"urlCdnTaskId": ""
}
return media
# self.release(title='下午好', connect='我是阿巴阿巴阿巴', media=media)
def video_cover(self, video_path):
cap = cv2.VideoCapture(video_path)
# 获取视频信息
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
duration = frame_count / fps
print("宽高:", width, height)
print("时长:", duration)
self.width, self.height, self.duration = width, height, duration
# 读取第1秒帧
cap.set(cv2.CAP_PROP_POS_MSEC, 1000)
ret, frame = cap.read()
cap.release()
if ret:
# OpenCV是BGR,需要转RGB
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
img = Image.fromarray(frame)
img.save("cover.png")
def run_video(self, video_path):
"""
这里发现视频和图片上传一致就共用了
:param video_path:
:return:
"""
self.image_file = video_path # 视频地址
self.image_data = self.get_image_data() # 视频内容
self.image_size = len(self.image_data) # 视频长度
self.headers = self.get_headers()
self.headers[
"X-Arguments"] = f"apptype=251&filetype=20302&weixinnum=2841261318&filekey=aa.mp4&filesize={self.image_size}&taskid={self.auto.get_aid()}&scene=2"
# apptype=251&filetype=20302&weixinnum=2841261318&filekey=_1.mp4&filesize=122544202&taskid=436451aa-f4fe-4a50-81be-4616bdb08c6c&scene=2
chunk_size = 1024 * 1024 * 8 # 这个是上传视频的最大值
if self.width == 0:
self.video_cover(video_path)
else:
print('已有视频数据,不再次获取')
BlockPartLength = [] # 每个视频大小存放
if self.image_size > chunk_size:
for start in range(0, self.image_size, chunk_size):
end = min(start + chunk_size, self.image_size)
BlockPartLength.append(end)
else:
BlockPartLength.append(len(self.image_data))
# print(BlockPartLength)
uid = self.get_uid(BlockPartLength=BlockPartLength)
start = 0
part_info = []
trans_flag = '0_0'
for index, end in enumerate(BlockPartLength):
chunk = self.image_data[start:end]
print(index, start, end, len(chunk))
etag, trans_flag = self.put_image(uid, image_data=chunk, PartNumber=index + 1)
start = end
part_info.append({
"PartNumber": index + 1,
"ETag": etag
})
video_url = self.get_image_url(uid=uid, part_info=part_info, trans_flag=trans_flag)
return video_url
def start(cookie):
wxsph_image = WxSphImage(cookie)
media = []
for i in os.listdir('data'):
r = wxsph_image.run_images(f'data\{i}')
media.append(r)
# wxsph_image.release(title='下午好', connect='我是大鹅,大白鹅', media=media)
def start2(cookie, path='bb.mp4'):
wxsph_image = WxSphImage(cookie)
wxsph_image.video_cover(path)
media = wxsph_image.run_images("cover.png")
im_url = media['url']
video_url = wxsph_image.run_video(path)
wxsph_image.release_video(title='护肤小妙招', connect='零零零零', video_url=video_url, im_url=im_url)
if __name__ == '__main__':
cookies = {
"sessionid": 'BgAAqTVttQ7sIhon30QFGwPOpNPlQIpVNuqwUrd3lCCvLzrBkAEkEyuk1nI3zfGoMe0Qd38FvByNaq1%2FTWIaXwoiH7apYb6usnTCPewc2ucN',
"wxuin": "1256030655"
}
# start(cookies)
start2(cookies)
# https://channels.weixin.qq.com/platform/post/finderNewLifeCreate
"""
原图 1440*1920
810*1080
1440*1920
1438 *
http://wxapp.tc.qq.com/251/20302/stodownload?bizid=1023&dotrans=0&encfilekey=Cvvj5Ix3eewK0tHtibORqcsqchXNh0Gf3sJcaYqC2rQAj7FSjPM3xflXxKHA63ZOrTibicJ9v7u7J4fYdgdOOMAySBURcTZ7sSROXeYhcJkA0NMVcbicDq8iaPQnqVGj2zIxb&findertoken=0886eae8ca0a10a4dea4cd061800223c66696e64657275706c6f616475726c5f323834313236313331385f313737323639353333323636305f333531383737313532323839353535323834312a2039393936353864633535653261366137643662393866303232316264363533613801400348005000580260ce9e01&hy=SH&idx=1&m=&scene=2&token=AxricY7RBHdW6dqda02zm3HiaicQibGV1ib8zfD9icC3T9A8c6t4zN9VWAM7nZtvAtt4oAqMqEvxj9KoFTiaYFBtQP5UC5BmmDWK37LTFiabH3m6rhq8sp8XWWaibicg&uzid=7a15c
http://wxapp.tc.qq.com/251/20304/stodownload?bizid=1023&dotrans=0&encfilekey=Cvvj5Ix3eewK0tHtibORqcsqchXNh0Gf3sJcaYqC2rQAgkKWLMfzK1J50xmbjOvs8wYzYKbvibicABqEW4zyA0bsuHYIdDdRYMuGz1dU2OyuO6WXBrYKveY2gDRZkv84t2A&hy=SH&idx=1&m=&scene=2&token=x5Y29zUxcibBpSHJLicjXJ6R7YDibFkMzwCNXu1DAa7W7jvkodt2kjXmsYYrnWNuBc35NgDetCXmeTxBexibwoDiaITKay4FZwicy6ich6DvGhZQMHTGCo1RANhxQ&uzid=1
http://wxapp.tc.qq.com/251/20304/stodownload?bizid=1023&dotrans=0&encfilekey=Cvvj5Ix3eewK0tHtibORqcsqchXNh0Gf3sJcaYqC2rQAgkKWLMfzK1J50xmbjOvs8wYzYKbvibicADoD7ptve2Vn5jFhCMOJRfXkekcy1lTcR3oOYKHZGibZOf2jicTMCEWM5&hy=SH&idx=1&m=&scene=2&token=x5Y29zUxcibBpSHJLicjXJ6QtAnlI3LxaeKdyJpZocIvkRYdXaYYp1iaNnm3H3rMb4cTeAaCc7EUmmx9Mqic7rMRYicuza3e6yibBjyQYRgGtE9FnMicSibFrNZScA&uzid=1
"""