Week1_Practice2

from bs4 import BeautifulSoup

with open('D:\\Python\\crawler\\网易云课堂作业\\第一周作业\\1_2\\1_2_homework_required\\index.html','r') as wb_data:
    soup=BeautifulSoup(wb_data,'lxml')

    images=soup.select('body > div > div > div.col-md-9 > div > div > div[class="thumbnail"] > img')
    titles=soup.select('body > div > div > div.col-md-9 > div > div > div > div.caption > h4 > a[href="#"]')
    prices=soup.select('body > div > div > div.col-md-9 > div > div > div > div.caption > h4.pull-right')
    reviews=soup.select('body > div > div > div.col-md-9 > div > div > div > div.ratings > p.pull-right')
    stars=soup.find_all('p',class_=None)
    allStars=[]
    num=0

    for i in stars:
        x=i.find_all()
        if x!=[]:
            for m in x:
             if m.get('class')==['glyphicon', 'glyphicon-star']:
                 num+=1
            allStars.append(num)
            num=0

    all_data=[]
    for image,title,price,review,allStar in zip(images,titles,prices,reviews,allStars):
        data={
            "image":image.get('href'),
            "title":title.get_text(),
            'price':price.get_text(),
            'review':review.get_text(),
            'allStar':allStar
        }
        all_data.append(data)
    print(all_data)

你可能感兴趣的:(Week1_Practice2)