python实战 豆瓣电影关键字爬虫

import requests
from bs4 import BeautifulSoup
def get_moves():
    header = {
        'user-agent':
            'Mozilla/5.0 (Windows NT 10.0;Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/80.0.3987.132 Safari/537.36'
        , 'host': 'movie.douban.com'
    }
    link = 'https://movie.douban.com/top250?start='
    movie_list=[]
    for i in range(0,10):
        link1=link+str(i*25)
        r = requests.get(link1, headers=header, timeout=20)
        r.encoding = 'utf-8'
        soup=BeautifulSoup(r.text,"lxml")
        div_list=soup.find_all('div',class_='hd')
        for each in div_list:
            movie=each.a.span.text.strip()
            movie_list.append(movie)
    return movie_list
movies=get_moves()
print(movies)

 

你可能感兴趣的:(python)