爬取豆瓣电影

#-*- coding:utf-8 -*-
import requests
from lxml import etree
from bs4 import BeautifulSoup  # 导入所需库

# 请求头部
headers = {
     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:58.0) Gecko/20100101 Firefox/58.0'}

def get_top():
    # 首页输入想要爬取城市
    url = 'https://movie.douban.com/cinema/nowplaying/zhanjiang/'
    respose = requests.get(url, headers=headers)
    respose.encoding="utf-8"
    soup = BeautifulSoup(respose.text, 'lxml')

    #信息列表
    li_list = soup.find("ul", {
     "class": "lists"}).children

    for li in li_list:
        try:
            id = "{}".format(li["id"])#id
            score = "{}".format(li["data-score"])#评分
            maker_location = "{}".format(li["data-region"])#制片地区

            url1 = 'https://movie.douban.com/subject/{}/?from=playing_poster'.format(id)#跳转链接
            respose1 = requests.get(url1, headers=headers).text
            s = etree.HTML(respose1)

            film_name = s.xpath('//*[@id="content"]/h1/span[1]/text()')#电影名
            director = s.xpath('//*[@id="info"]/span[1]/span[2]/a/text()')#导演
            writers = s.xpath('//*[@id="info"]/span[2]/span[2]/a/text()')#编剧
            actor = s.xpath('//*[@id="info"]/span[3]/span[2]/a/text()')  # 主演
            type = s.xpath('//*[@id="info"]/span[5]/text()')#类型
            date = s.xpath('//*[@id="info"]/span[9]/text()')#日期
            time = s.xpath('//*[@id="info"]/span[11]/text()')#时长
            content = s.xpath('normalize-space(//*[@id="link-report"]/span[1]/text())')#简介
            img = s.xpath('//*[@id="mainpic"]/a/img/@src')#图片链接

            print(id)
            print(score)
            print(maker_location)
            print(director)
            print(writers)
            print(actor)
            print(film_name)
            print(type)
            print(date)
            print(time)
            print(content)
            print(img)
            print('\n')

        except KeyError:
            pass
        except Exception:
            pass

if __name__ == '__main__':
    get_top()

你可能感兴趣的:(python,爬虫,http)