爱彼迎房源爬取价格和名称

爱彼迎是用javascripe渲染的,不能使用静态网页获取法,因此调用selenium库

from selenium import webdriver
from bs4 import BeautifulSoup
import bs4
import time 
from selenium.webdriver.firefox.options import Options

def getHTMLText(url):
    try:
        options = Options()//让浏览器不跳出来
        options.add_argument('-headless')
        driver = webdriver.Firefox(options=options)
        driver.get(url)
        time.sleep(10)//等待加载
        text=driver.page_source//返回网页渲染后的页面信息
        print("解析成功")
        return text      
    except:
        print("解析失败")

def parsePage(html)://bs4解析网页信息
    soup = BeautifulSoup(html,'html.parser')
    time.sleep(2)
    try:
    #爬下名字
        NAME = []
        name = soup.find_all("div",class_="_qrfr9x5")//找到对应标签后用煲汤法找出对应元素
        for i in name:
            NAME.append(i.text)//存储起来
                                    
    #爬下价格
        PRICE = []
        Price = soup.find_all("div",class_="_1ixtnfc")
        for i in Price:
            cont = i.find_all('span')
            PRICE.append(cont[1].text)
                                        
        result = []//将两个信息放进同一个数组中
        length = len(NAME)
        for i in range(length):
            result.append([NAME[i],PRICE[i]])
            print("信息存储成功")
        return result                                           
    except:
        print("信息存储失败")

def save_to_txt(result):
    f = open('price_village5.txt','a+',encoding='utf-8')//追加写入,不覆盖之前的信息
    length = len(result)
    for i in range(length):
        f.write(str(result[i][0])+"       "+str(result[i][1])+"\n")
    f.close()

def main():    
    start_url = 'https://www.airbnb.cn/s/深圳/homes?refinement_paths[]=%2Fhomes¤t_tab_id=home_tab&selected_tab_id=home_tab&screen_size=large&hide_dates_and_guests_filters=false&place_id=ChIJkVLh0Aj0AzQRyYCStw1V7v0&s_tag=6kIFXvRQ§ion_offset=4&items_offset='
    depth= 15
    for i in range(depth):
        try:
            url = start_url  + str(20*i)
            html = getHTMLText(url)
            infoList = parsePage(html)
            save_to_txt(infoList)
        except:
            continue
main()

爱彼迎房源爬取价格和名称_第1张图片部分民宿信息截图

你可能感兴趣的:(#,python实践,python,selenium)