from bs4 import BeautifulSoup
path=r'E:\index.html'
data_list= []
with open(path,'r') as file:
Soup=BeautifulSoup(file,'lxml')
brices=Soup.select('body > div > div > div.col-md-9 > div > div > div > div.caption > h4.pull-right')
titles=Soup.select('body > div > div > div.col-md-9 > div > div > div > div.caption > h4 > a')
levels=Soup.select('body > div > div > div.col-md-9 > div > div > div > div.ratings > p.pull-right')
counts=Soup.select('body > div > div > div.col-md-9 > div > div > div > div.ratings > p:nth-of-type(2)')
imgs=Soup.select('body > div > div > div.col-md-9 > div > div > div > img')
# print(counts)
for brice,title,level,count,img in zip(brices,titles,levels,counts,imgs):
data={
'brice':brice.get_text(),
'title':title.get_text(),
'level':level.get_text(),
'count':len(count.find_all(attrs={'class':'glyphicon glyphicon-star'})),#采用find_all(attrs = {属性:内容}) 匹配方法,确定星星数量
'img':img.get('src')
}
data_list.append(data)
foriinsorted(data_list,key=lambdax:x['count'],reverse=True):
print('title {} --count is {} -- brice is {}'.format(i['title'], i['count'], i['brice']))
输出结果:
title EarPod --count is 5 -- brice is $24.99
title New Pocket --count is 4 -- brice is $64.99
title New sunglasses --count is 4 -- brice is $74.99
title iphone gamepad --count is 4 -- brice is $94.99
title Best Bed --count is 4 -- brice is $214.5
title iWatch --count is 4 -- brice is $500
title Park tickets --count is 4 -- brice is $15.5
title Art Cup --count is 3 -- brice is $84.99
小结:
BeautifulSoup参考文档链接:
https://www.crummy.com/software/BeautifulSoup/bs4/doc.zh/#find-all-tag