参考文章:http://cuiqingcai.com/3179.html
代码可封装成函数,此仅为学习
第一种:
import requests headers = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding':'gzip, deflate, sdch', 'Accept-Language':'zh-CN,zh;q=0.8', 'Cache-Control':'max-age=0', 'Connection':'keep-alive', 'Host':'i.meizitu.net', # host不使用也可以,但是如果用,要用正确 'Referer':'http://www.mzitu.com/100761/3', # 判断上一级地址,此网站防爬虫方式就是以此为判断依据的 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36', } for i in range(1,66): yema = '%02d' % i # 1~10要显示为01,02,03 response = requests.get('http://i.meizitu.net/2017/08/20b' + yema + '.jpg',headers = headers) print '正在爬取第' + yema + '张......' img = response.content # 取二进制的图片内容用.content name = u'C:/Users/Test/Desktop/妹子/' + yema + '.jpg' with open(name,'ab') as f: # 二进制文件,用ab f.write(img) print name # 检查name字符串是否显示正确 print type(name) # 检查加u后name的类型
第二种:
自动获取桌面位置并保存
# coding:utf-8 import requests,os def Get_Desktop_Path(): return os.path.join(os.path.expanduser("~"), 'Desktop') try: os.mkdir(Get_Desktop_Path() + '\meizitu') except BaseException as e: print e wjj = Get_Desktop_Path() +'\meizitu' # 打印文件夹地址 print wjj headers = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding':'gzip, deflate, sdch', 'Accept-Language':'zh-CN,zh;q=0.8', 'Cache-Control':'max-age=0', 'Connection':'keep-alive', 'Host':'i.meizitu.net', # host不使用也可以,但是如果用,要用正确 'Referer':'http://www.mzitu.com/100761/3', # 判断上一级地址,此网站防爬虫方式就是以此为判断依据的 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36', } for i in range(1,10): yema = '%02d' % i # 1~10要显示为01,02,03 response = requests.get('http://i.meizitu.net/2017/08/20b' + yema + '.jpg',headers = headers) print '正在爬取第' + yema + '张......' img = response.content # 取二进制的图片内容用.content name = wjj + '/' + u'惊不惊喜?意不意外?刺不刺激?' + yema + '.jpg' print name with open(name,'ab') as f: # 二进制文件,用ab f.write(img)