作业1_3

importrequests

frombs4importBeautifulSoup

importtime

url='http://bj.xiaozhu.com/fangzi/597754001.html'

defhouse_info(url,data=None):

wb_data=requests.get(url)

time.sleep(1)

soup=BeautifulSoup(wb_data.text,'lxml')

titles=soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em')

addresses=soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p > span.pr5')

prices=soup.select(' div.day_l > span')

house_images=soup.find_all(id='curBigImage')

#print(house_images[0].get('src'))

owner_images=soup.select(' div.js_box.clearfix > div.member_pic > a > img')

#print(owner_images)

sexs=soup.select('div.js_box.clearfix > div.member_pic > div')

sex1=[]

forsexinsexs:

sex=str(sex)

if('member_ico1'insex):

sex1.append('女')

elif('member_ico'insexand'member_ico1'not insex):

sex1.append('男')

else:

sex1.append('空')

owner_names=soup.select(' div.js_box.clearfix > div.w_240 > h6 > a')

#print(owner_names[0].get_text())

fortitle,address,price,house_image,owner_image,owner_name,sexinzip(titles,addresses,prices,house_images,owner_images,owner_names,sex1):

data={

'title':title.get_text(),

'address':address.get_text().rstrip(),

'price':price.get_text(),

'house_image':house_image.get('src'),

'owner_image':owner_image.get('src'),

'owner_name':owner_name.get_text(),

'sex':sex

}

print(data)

url1=['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i))foriinrange(1,12)]

defmulti_info(url):

wb_data=requests.get(url)

soup=BeautifulSoup(wb_data.text,'lxml')

page_url=soup.select('a.resule_img_a')

#print(page_url[0])

#page_url2=page_url[0].find_all('a',href=re.compile(r"/fangzi/\d\.html"))

#print(page_url2)

forpage_url1inpage_url:

house_info(str(page_url1['href']))

forurl2inurl1:

multi_info(url2)

你可能感兴趣的:(作业1_3)