超简单爬虫获取猫眼电影热映榜

# -*- coding:utf-8 -*-
import requests,time
from selenium import webdriver
from bs4 import BeautifulSoup
from collections import namedtuple
import xlwt


def main():
    url = "https://maoyan.com/board/7/"
    headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36"}
    response = requests.get(url,headers=headers)
    soup = BeautifulSoup(response.text,features="lxml")
    all_dd = soup.find_all('dd')
    # dat = namedtuple("dy",['index','title','yanyuan','releasetime'])
    for d in all_dd:
        data = {}
        data["index"]  = d.find('i').get_text()
        data["title"]  = d.find('p').find('a').get_text()
        data["actors"]  = d.find('p',{"class":"star"}).get_text().strip().split(':')[1]
        data["date"]  = d.find('p',{"class":"releasetime"}).get_text().split(':')[1]
        try:
            score  = d.find('i',{"class":"integer"}).get_text() + d.find('i',{"class":"fraction"}).get_text()
            data["score"] = (score if score else 'None')
        except Exception as e:
            data["score"] = 'N/A'
        yield data

def save_data(DATA):
    f=xlwt.Workbook(encoding='utf-8')
    sheet01=f.add_sheet(u'sheet1',cell_overwrite_ok=True)
    sheet01.write(0,0,'index') #第一行第一列
    sheet01.write(0,1,'title')
    sheet01.write(0,2,'actors')
    sheet01.write(0,3,'date')
    sheet01.write(0,4,'score')
    #写内容
    for i in range(len(DATA)):
        sheet01.write(i+1,0,DATA[i]['index'])
        sheet01.write(i+1,1,DATA[i]['title'])
        sheet01.write(i+1,2,DATA[i]['actors'])
        sheet01.write(i+1,3,DATA[i]['date'])
        sheet01.write(i+1,4,DATA[i]['score'])
        # print('p',end='')
    f.save('E:\\猫眼电影.xls')


if __name__ == '__main__':
    data = main()
    DATA = []
    for d in data:
        DATA.append(d)
    save_data(DATA)

你可能感兴趣的:(爬虫)