Python爬虫:用request爬取百思不得其姐网站的图片

代码不多就不加以注释了,有不懂得可以在下方评论~

import requests
import re
import os
import urllib.request
 
def get_response(url):
    response = requests.get(url).text
    return response
 
def get_content(html):
    reg = re.compile(r'(
.*?
.*?
)',re.S) return reg.findall(html) def get_img_url(response): reg = r'data-original="(.*?)"' return re.findall(reg,response) def get_img_name(response): reg = re.compile(r'(.*?)') return re.findall(reg,response) def download_img(img_url): name = img_url.split('/')[-1] path = 'E:\\xx\\{}.jpg'.format(name) if not os.path.exists(path): urllib.request.urlretrieve(img_url,path) print('ok!!!') else: print('no!!!') def get_url_name(start_url): content = get_content(get_response(start_url)) for i in content: img_url = get_img_url(i) if img_url: prfix = os.path.splitext(img_url[0])[1] if prfix != '.gif': img_name = get_img_name(i) try: download_img(img_url[0]) except: continue def main(start_urls): [get_url_name(start_url) for start_url in start_urls] if __name__ == '__main__': start_urls = ['http://www.budejie.com/pic/{}'.format(i) for i in range(1,10)] main(start_urls)

转载自:http://www.axuelt.cn/thread/detail/7/

你可能感兴趣的:(Python爬虫:用request爬取百思不得其姐网站的图片)