Python实战计划学习笔记2.2:爬取58二手市场(转转)

爬取58上海二手市场(转转)

流程

1.png

from bs4 import BeautifulSoup
import requests
import time
import pymongo

# 建立数据库

client = pymongo.MongoClient('localhost', 27017)
ceshi = client['ceshi']
url_list = ceshi['url_list']
item_info = ceshi['url_info']


def get_links_from(channel, pages, who_sells=0):
    # spider 1
    list_view = '{}{}/pn{}/'.format(channel, str(who_sells), str(pages))
    wb_data = requests.get(list_view)
    time.sleep(1)
    soup = BeautifulSoup(wb_data.text, 'lxml')
    if soup.find('td', 't'):
        for link in soup.select('td.t > a.t'):
            item_link = link.get('href').split('?')[0]
            url_list.insert_one({'url': item_link})
            print(item_link)
        else:
            pass
            # Noting!


def get_item_info(url):
    # 获取商品详情
    wb_data = requests.get(url)
    soup = BeautifulSoup(wb_data.text, 'lxml')
    if soup.select('span.soldout_btn'):
        print('商品不存在')
    else:
        title = soup.title.text
        price = soup.select('div.price_li > span > i')[0].text
        area = soup.select('div.palce_li > span > i')[0].text
        item_info.insert_one({'title': title, 'price': price, 'area': area})
        print(title, price, area)




你可能感兴趣的:(Python实战计划学习笔记2.2:爬取58二手市场(转转))