1.2

from bs4 import BeautifulSoup as 
bs#_518=[]
with open('E:/four weeks/Plan-for-combating-master/week1/1_2/1_2answer_of_homework/1_2_homework_required/index.html','r') as wb_data:
 Soup = bs(wb_data,'lxml')    
images = Soup.select(' div > div > div > div > div > div > img')#存为一个列表    prices = Soup.select('div > div > div > div > div > div > div > h4.pull-right')    names = Soup.select('div > div > div > div > div > div > h4 > a')#css selector 也可以使用相对路径!!!    
reviews = Soup.select(' body > div > div > div.col-md-9 > div > div > div > div > p.pull-right')   
rates = Soup.select(' body > div > div > div > div > div > div > div.ratings > p:nth-of-type(2) ')# 为了从父节点开始取,此处保留:nth-of-type(2),观察网页,多取几个星星的selector,就发现规律了    
#print(rates)for image,price,name,review,rate in zip(images,prices,names,reviews,rates):    data = { 'tupian':image.get('src'),             'jiage':price.get_text(),             'name':name.get_text(),             'review':review.get_text(),             'rate':str(len(rate.find_all('span',class_="glyphicon glyphicon-star")))+'星'}    

print(data)

这里还学到了一个新的方法:stripped_Strings(将多个内容对一个属性的情况)

你可能感兴趣的:(1.2)