爬取mm131图片

爬取mm131图片

import urllib.request
import os

for i in range(4200, 4461):
    os.mkdir('tupian/' + str(i))
    for j in range(60):
        try:
            url = 'http://img1.mm131.me/pic/' + str(i) + '/' + str(j) + '.jpg'
            print(url)
            # urllib.request.urlretrieve(url, 'lala.jpg')
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36',
                'Referer': 'https://www.sogou.com/link?url=DSOYnZeCC_o7btUgpK402wmc9YOcsOr4cOOT57O29F8'
            }
            request = urllib.request.Request(url=url, headers=headers)
            response = urllib.request.urlopen(request)
            with open('tupian/' + str(i) + '/' + str(j) + '.jpg', 'wb') as fp:
                fp.write(response.read())
        except Exception as e:
            print('下载失败,下载下一条')
            break

爬取妹子图

import urllib.request
import re
import time
import os

proxy={
    'http':'120.92.74.189:3128'
}


headers={
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Safari/537.36',
    'Referer':'http://mzitu.com'
}

dirname = 'mzt'

if not os.path.exists(dirname):
	os.mkdir(dirname)

for page in range(2,3):

    url='http://www.mzitu.com/page/%s/' % page

    request=urllib.request.Request(url=url,headers=headers)
    handler = urllib.request.ProxyHandler(proxies=proxy)
    response=urllib.request.build_opener(handler).open(request)

    content=response.read().decode('utf8')

    pattern=re.compile(r'
  • .*?.*?(\d+)') ret1 = pattern1.findall(content1)[0] ret2=int(pattern2.findall(content1)[0]) print(ret1) print(ret2) time.sleep(5) for i in range(1, ret2 + 1): i = "%02d" % i url = ret1[0] + str(i) + ret1[1] print(i) request = urllib.request.Request(url, headers=headers) response = urllib.request.build_opener(handler).open(request) filename = '%s.jpg' % i filepath = os.path.join(name, filename) with open(filepath, 'wb') as fb: fb.write(response.read()) time.sleep(0.3)
  • 你可能感兴趣的:(python基础)