超级福利!各位爱看直播的朋友,斗鱼主播照片奉上,准备好大饱眼福吧。
整个爬取过程难度不大,记得找个空间够大的位置存储图片啊。
开发环境:Windows10
开发语言: Python3.6
开发工具:pycharm
抓包工具:Charles
import requests
import json
url = 'http://capi.douyucdn.cn/api/v1/getVerticalRoom?limit=20&offset='
offset = 0
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36'}
def douyuspider():
global offset
text = requests.get(url=url+str(offset), headers = headers).text
data_list =json.loads(text)['data']
item ={}
if not data_list:
return
for data in data_list:
item['vertical_src'] = data['vertical_src']
item['nickname'] = data['nickname']
item['room_id'] = data['room_id']
item['anchor_city'] = data['anchor_city']
write_img(item)
offset +=20
douyuspider()
def write_img(item):
print('正在存储:%s-%s,它的链接地址是:www.douyu/%s' % (item['anchor_city'], item['nickname'], item['room_id']))
content = requests.get(url=item['vertical_src'], headers=headers).content
with open('douyu_spiderimg/'+item['nickname']+'.jpg', 'wb') as f:
f.write(content)
if __name__ == '__main__':
douyuspider()