import requests,os,time,re
import multiprocessing as mp
headers = { ‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3377.1 Safari/537.36’, }
folder=‘PICTURE1’
if not os.path.exists(folder):
os.makedirs(folder)
class spider(object):
def init(self):
self.n=1
def get_pageone(self):
while True:
url=‘http://www.win4000.com/zt/xingkong_’+str(self.n)+’.html’
res=requests.get(url,headers=headers)
html=res.text
href1=re.findall(’’,html)
for i in href1:
self.get_pagetwo(i)
self.n+=1
if self.n==6:
break
def get_pagetwo(self,link):
res2=requests.get(link,headers=headers)
html2=res2.text
href2=re.findall(’’,html2)
for k in href2:
self.Downloading(k)
def Downloading(self,b):
cont=requests.get(b,headers=headers).text
src=re.findall(’’,cont)
srcc=src[0]
# print(skcc[-8:])
cont=requests.get(srcc,headers=headers)
file=‘PICTURE1/’+srcc[-8:]
print(‘正在下载…’+str(src[-8:]))
with open(file,‘wb’)as f:
f.write(cont.content)
pro=spider()
pro.get_pageone()