python关键字爬取京东图片

import re
import urllib.request
import os
import numpy as np
def craw(url,page,savedir):
    html1=urllib.request.urlopen(url).read()
    html1=str(html1)
    pat1='
' result1=re.compile(pat1).findall(html1) result1=result1[0] pat2 ='source-data-lazy-img="(//.*?jpg)' imag = re.compile(pat2).findall(result1) x = 1 for imagurl in imag: imagname = savedir + '第'+str(page)+'页' + '第'+str(x)+'个' + '.jpg' imagurl = 'https:' + imagurl try: urllib.request.urlretrieve(imagurl,filename=imagname) print('已输出第',page,'页,第',x,'个') except urllib.error.URLError as e: if hasattr(e,'code'): x+=1 if hasattr(e,'reason'): x+=1 x+=1 if __name__ =="__main__": page_= 45 key = ['衬衫','马甲衬衫','马甲','女生职业装','女士西服'] for k in range(len(key)): if os.path.exists('./img/' + key[k]) == False: os.makedirs('./img/' + key[k]) savedir = './img/' + key[k] + '/' name = key[k] for i in range(1,2*page_+1): if i%2==0: key2=i/2+0.5 else: key2=(i+1)/2 key1=name key_temp=urllib.request.quote(key1) url2='https://search.jd.com/Search?keyword='+key_temp +'&enc=utf-8&page='+ str(i) # 模拟浏览器 req = urllib.request.Request(url2) req.add_header("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8") req.add_header("Accept-Encoding","gzip, deflate, br") req.add_header("Accept-Language","zh-CN,zh;q=0.9") req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36") url=urllib.request.Request(url2) craw(url,key2,savedir)

本代码为网上所找,网址不记得了。原作可见请告知。

你可能感兴趣的:(python编程)