Python学习笔记(4):爬取动态数据

我的代码

from bs4 import BeautifulSoup
import requests
import time
import urllib

url = 'https://knewone.com/discover?page='

def get_page(url,data=None):

    wb_data = requests.get(url)
    soup = BeautifulSoup(wb_data.text,'lxml')
    imgs = soup.select('a.cover-inner > img')

    download_links = []
    folder_path = '/Users/zhoujiangfeng/Pictures/jandan/'

    for img in imgs:
        img_link = img.get('src')
        download_links.append(img_link)

    for item in download_links:
        #使用urllib下载图片到本地
        urllib.request.urlretrieve(item,folder_path + item[-21:-16])
        print('done')

def get_more_pages(start,end):
    for one in range(start,end):
        get_page(url+str(one))
        time.sleep(2)


get_more_pages(1,10)

总结

  • 注意图片链接的信息,需以图片格式后缀名保存(截取相应位数)
  • 使用urllib库的方法下载图片到本地

你可能感兴趣的:(Python学习笔记(4):爬取动态数据)