BeautifulSoup4简单爬取图片并存放

爬取图片并保存本地

import requests
from bs4 import BeautifulSoup
import urllib
def img_parse(img_url,title):
    print(img_url)
    #设置UA
    req_header = {
        'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
    }
    response = requests.get(img_url,headers=req_header)
    # print(response.text)
    #使用
    result = BeautifulSoup(response.text,'lxml')
    img = result.select('.border-wrap img')[0]
    # print('https://github.com{}'.format(img['src']))

    c = 'https://github.com{}'.format(img['src'])
    # response1 = urllib.request.urlopen(c)
    response1 = requests.get(c)
    #使用with open将图片保存在本地
    with open('{}'.format(title),'wb') as file:
        # file.write(response1.read())
        file.write(response1.content)


def img_cover():
    url = 'https://github.com/phoenixshow/FrontCode/tree/master/buickmall/img'
    req_header = {
        'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
    }
    response = requests.get(url,headers=req_header)
    result = BeautifulSoup(response.text,'lxml')
    # print(result)
    img = result.select('tbody tr .content span a')

    for i in img:
        # print('https://github.com{}'.format(i['href']))
        #拼接路径
        img_url = 'https://github.com{}'.format(i['href'])
        # print(i.get_text())
        title = i.get_text()
        # 调用img_parse解析函数
        img_parse(img_url,title)

if __name__ == '__main__':
    img_cover()
    
    
    

你可能感兴趣的:(BeautifulSoup4简单爬取图片并存放)