抓取豆瓣网电影的例子

from bs4 import BeautifulSoup
import requests

def main(base_url):
    # base_url='https://movie.douban.com/top250?start={}&filter='.format(start_name)
    # print(base_url)
    req=requests.get(base_url)
    soup=BeautifulSoup(req.text,'lxml')
    ol=soup.find("ol",class_="grid_view")
    print(type(soup),type(ol))
    li_list=ol.find_all('li')
    for li in li_list:
        img=li.find('img')
        img_src=img['src']

        title=li.find('span',class_="title").text.strip()
        actor=li.find('div',class_="bd").p.get_text().strip()
        star_info_all=li.find('div',class_='star').find_all('span')
        mv_score=star_info_all[1].text.strip()
        comment_num=star_info_all[2].text.strip()
        print(title)
        print(img_src)
        print(actor)
        print(mv_score)
        print(comment_num)
        print('-' * 50)

    next_span=soup.find('span',class_='next')
    next_a=next_span.find('a')
    next_url=None #
    if next_a:
        next_url=next_a['href']
    return 'https://movie.douban.com/top250'+next_url

if __name__=='__main__':
    n_url=main('https://movie.douban.com/top250?start=0')
    #page down
    while n_url:
        main(n_url)

你可能感兴趣的:(抓取豆瓣网电影的例子)