python实战计划第一周,第二个项目

爬取商品信息

import requests
from bs4 import BeautifulSoup
import os

os.chdir('/Users/baidu/Desktop/爬虫/Plan-for-combating-master/week1/1_2/1_2answer_of_homework/1_2_homework_required/')

with open('index.html', 'r') as wb_data:
    soup = BeautifulSoup(wb_data, 'lxml')

    images = soup.find_all('div', {'class': 'thumbnail'})
    prices = soup.find_all('h4', {'class': 'pull-right'})
    titles = soup.find_all('div', {'class': 'caption'})
    reviews = soup.find_all('div', {'class': 'ratings'})
    stars = soup.find_all('div', {'class': 'ratings'})

for image, price, title, review, star in zip(images, prices, titles, reviews, stars):
    star_num = len(star.find_all('p')[1].find_all('span', {'class': 'glyphicon glyphicon-star'}))
    data = {
    'image': image.img['src'],
    'price': price.get_text(),
    'title': title.a.get_text(),
    'review': review.p.get_text().split(' ')[0],
    'star': str(star_num) + '星'
    }
    print(data)

你可能感兴趣的:(python实战计划第一周,第二个项目)