Python3项目:抓取10页租房信息

目标页面:http://bj.xiaozhu.com/search-duanzufang-p2-0/

coding:utf-8

from bs4 import BeautifulSoup
import requests

获取列表页面上的链接

def get_url(url):
html = requests.get(url)
soup = BeautifulSoup(html.text,'lxml')
links = soup.select('#page_list > ul > li > a')
for link in links:
href = link.get('href')
get_contents(href)

获取租房信息

def get_contents(url):
html = requests.get(url)
soup = BeautifulSoup(html.text,'lxml')
titles = soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em')
adrs = soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p > span')
moneys = soup.select('div.day_l')
pics = soup.select('#curBigImage')
touxiangs = soup.select('#floatRightBox > div.js_box.clearfix > div.member_pic > a > img')
names = soup.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a')
genders = soup.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > span')
for title,adr,money,pic,touxiang,name,gender in zip(titles,adrs,moneys,pics,touxiangs,names,genders):
data = {
'title' : title.get_text(),
'adr' : adr.get_text(),
'money' : money.get_text(),
'pic' : pic.get('src'),
'touxiang' : touxiang.get('src'),
'name' : name.get_text(),
'gender' : gender.get('class'),
}
print(data)

urls = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(i) for i in range(1,11)]
for url in urls:
get_url(url)

你可能感兴趣的:(Python3项目:抓取10页租房信息)