王一博豆瓣电影海报抓取

代码基本通用,只要换个名字就可以下载到你喜欢的明星电影海报。
直接上代码和效果图,注意要把chromedriver下载下来,然后输入正确的路径信息才行。

# -*- coding: utf-8 -*-
import requests
from lxml import etree
from selenium import webdriver
import os

name = '王一博'

def download(src, id):
    if not os.path.isdir("Xpath的翻页图片包"):
        os.mkdir("Xpath的翻页图片包")
    dir = os.path.join("Xpath的翻页图片包/", str(id) + '.webp')
    try:
        pic = requests.get(src, timeout = 10)
        with open(dir, 'wb') as d:
            d.write(pic.content)
    except requests.exceptions.ConnectionError:
        print("图片无法下载")

def down_load(request_url):
    driver.get(request_url)
    html = etree.HTML(driver.page_source)
    src_xpath = "//div[@class='item-root']/a[@class='cover-link']/img[@class='cover']/@src"
    title_xpath = "//div[@class='item-root']/div[@class='detail']/div[@class='title']/a[@class='title-text']"
    srcs = html.xpath(src_xpath)
    
    titles = html.xpath(title_xpath)
    num = len(srcs)
    if num > 15:
        srcs = srcs[1:]
        titles = titles[1:]

    for src, title in zip(srcs, titles):
        if title is None:
            continue
        print(src)
        download(src, title.text)
    print('OK')
    print(num)
    if num >= 1:
        return True
    else:
        return False
if __name__ == '__main__':
    requests_url = "https://movie.douban.com/subject_search?search_text=" + name
    driver = webdriver.Chrome(executable_path=r'C:\Users\×××\AppData\Local\Google\Chrome\Application\chromedriver.exe')
    driver.get(requests_url)
    html = etree.HTML(driver.page_source)
    print(html)

    base_url = 'https://movie.douban.com/subject_search?search_text=' + name + '&cat=1002&start='
    start = 0
    while start < 70:
        request_url = base_url + str(start)
        flag = down_load(request_url)
        if flag:
            start += 15
        else:
            break
    print("结束")

代码有较强的可移植性,换个名字基本就可以下载。主要方法是利用了Xpath路径翻页查询下载,亲测个别明星可能不使用。而且下载的图片是webp格式,所以这也是一个缺陷,后期慢慢改进!先看效果图吧:
王一博豆瓣电影海报抓取_第1张图片
王一博豆瓣电影海报抓取_第2张图片

你可能感兴趣的:(小随笔,python)