在前文爬取图片的基础上,修改其中的json筛选条件就可以
https://blog.csdn.net/weixin_43596589/article/details/122215981
还是因为线程下载会个别出错,所以我选择生成的链接使用迅雷
get_pics_url函数如下
def get_pics_url(self):
i = 1
url_list=[]
while True:
url = self.start_url + '&page={}'.format(i)
headers = {'User-Agent': get_ua()}
r = requests.get(url, headers=headers)
_json = json.loads(r.text)
items = _json["data"]["cards"]
flag = _json['ok']
if flag == 1: # 爬取数据标志+一个手动控制标志
for v in items:
picslist = v.get('mblog')
if picslist is not None:
pageInfoList = picslist.get('page_info')
if pageInfoList is not None:
urlsList = pageInfoList.get('urls')
if urlsList is not None:
img_url = urlsList.get('mp4_720p_mp4')
if img_url is not None:
url_list.append(img_url)
else:
img_url = urlsList.get('mp4_hd_mp4')
if img_url is None:
print(urlsList)
url_list.append(img_url)
else:
#1.06页数显示出现问题
t1.insert(END, f'***在第{i}页终止***\n')
t1.see(END)
t1.update()
if r1_var.get() == 1:
big_dir=disk+':/WeiBo_Pics'
os.startfile(big_dir)
break
i += 1
print("url共有个数")
print(len(url_list))
# return url_list
#网上找的将整个list写入txt
file = open(user_name_selected+'mp4.txt', 'w')
for i in range(len(url_list)):
s = str(url_list[i]).replace('{', '').replace('}', '').replace("'", '').replace(':', ',') + '\n'
file.write(s)
file.close()
return ""