数据爬取——电影天堂

import requests
from lxml import etree


url = "https://www.dy2018.com/14/"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36"
}

data_html = requests.get(url,headers)
data_html.encoding = "gb2312"

tree = etree.HTML(data_html.text)
movie_list = tree.xpath('//table[@class="tbspan"]')
for movie in movie_list:
    name = movie.xpath('./tr//a[@class="ulink"]/@title')[0]         # 电影名称
    score = movie.xpath('./tr//font[@color="#F98E6A"]/text()')[0]   # 电影评分
    print(name, score)

 

你可能感兴趣的:(数据分析)