爬取小猪短租内容

练手,没有爬取图片,难度不大。
构建urls数组。抓取前6页
select找到各个关键点数据
for循环zip()写入字典。

import requests
from bs4 import BeautifulSoup
import time
urls=['http://liuzhou.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i)) for i in range(1,7)]
def get_onepage(url):
    html=requests.get(url)
    soup=BeautifulSoup(html.text,'lxml')
    titles= soup.select('#page_list   div.result_btm_con.lodgeunitname > div > a > span ')
    introduces=soup.select('#page_list  div.result_btm_con.lodgeunitname > div > em' )
    prices=soup.select('#page_list   div.result_btm_con.lodgeunitname > span.result_price > i')
    imgs=soup.select('#page_list ul img.lodgeunitpic')

    for title ,introduce, price, img in zip(titles,introduces,prices,imgs):
        data={
            "title":title.get_text(),
            "intorduces":introduce.get_text().split('-')[0].strip(),
            "price":price.get_text(),
            "img":img.get('lazy_src'),
        }
        print (data)

for url in urls:
    get_onepage(url)
    time.sleep(2)

你可能感兴趣的:(爬取小猪短租内容)