1211完成小猪入mongo并筛选

总结: 小猪正在调整,使我们很难爬取,测试了几次

请大家务必,配置好所有的环境,

成功展示:

1211完成小猪入mongo并筛选_第1张图片
smallpigintomongo.PNG

代码:

#!C:\Python35\python.exe
# coding=utf-8

import requests
from bs4 import BeautifulSoup
import urllib.request
import time
import pymongo
from pymongo import MongoClient
import lxml

host = 'localhost'
port =27017

client =MongoClient(host,port)
#db= client['test']
#sheet =db['sheet']
#connect to mongodb
#client =pymongo.MongoClient('localhost',27017)
db=client['xiaozhu']
fangzi = db['fangzi']

def insert_fangzi_info(url):
    web_data=requests.get(url)
    #print("hellll")
    soup =BeautifulSoup(web_data.text,'lxml')
    #print(soup)

    # 20161211 ceshi haoyong
    titles = soup.select('#page_list > ul > li > div.result_btm_con > div > a > span')
    prices = soup.select('span.result_price > i')
    #titles =soup.select('span.result_title hiddenTxt')
    #prices =soup.select('span.result_price>i')
    print(titles)
    print(prices)
    for title,price in zip(titles,prices):
        info ={
            'title':title.get_text(),
            #三站奔中关,有转角阳台带独卫衣帽间的套间
            'price':int(price.get_text())
        }
        print(info)
        fangzi.insert_one(info)
        print("insert to fangzi biao")

def find_fangzi():
    for info in fangzi.find():
        if info['price']>=405:
            print(info)
            print("finish!")

urls=['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(number) for number in range(1,5)]
for one_url in urls:
    insert_fangzi_info(one_url)

find_fangzi()
'''

url22='http://bj.xiaozhu.com/search-duanzufang-p8-0/'
insert_fangzi_info(url22)

find_fangzi()

'''

你可能感兴趣的:(1211完成小猪入mongo并筛选)