Week1_Practice4

GetImage.py

import urllib.request
import time
path='Image/'
def get_image(url):

    urllib.request.urlretrieve(url,path+url[-5:])
    print("Done one")
    # time.sleep(1)


get_image('https://cbu01.alicdn.com/img/ibank/2013/513/346/908643315_1282632295.220x220.jpg')

GetImagePage.py

import requests
from bs4 import BeautifulSoup
import time

def get_image_href(url):
    html=requests.get(url)
    bshtml=BeautifulSoup(html.text,'lxml')

    # 获取照片链接

    image_hrefs=bshtml.select('div.grid-thumb.grid-responsive > div > div > div > a > img')
    hrefs=[]
    for href in image_hrefs:
        hrefs.append(href.get('src'))
        time.sleep(2)
        print("Get One")
    return hrefs

get_image_href('http://weheartit.com/inspirations/taylorswift?page=4')

week1_4.py

from GetImagePage import get_image_href
from GetImage import get_image

def main():
    urls=["http://weheartit.com/inspirations/taylorswift?page={}".format(str(i)) for i in range(1,21)]

    for url in urls:
        part_url=get_image_href(url)
        for imageUrl in part_url:
            get_image(imageUrl)

main()

你可能感兴趣的:(Week1_Practice4)