有反爬措施暂时只能爬取一页
import requests
import re
import pandas as pd
import time
def get_data(page):
""""""
url = "http://hotel.elong.com/ajax/list/asyncsearch"
data = {
"code": "7809114",
"listRequest.areaID": "",
"listRequest.bookingChannel": 5,
"listRequest.cardNo": "192928",
"listRequest.checkInDate": "2018-03-19 00:00:00",
"listRequest.checkOutDate": "2018-03-20 00:00:00",
"listRequest.cityID": "0401",
"listRequest.cityName": "重庆市",
"listRequest.customLevel": "11",
"listRequest.distance": "20",
"listRequest.endLat": 0,
"listRequest.endLng": 0,
"listRequest.facilityIds": "",
"listRequest.highPrice": 0,
"listRequest.hotelBrandIDs": "",
"listRequest.isAdvanceSave": "false",
"listRequest.isAfterCouponPrice": "true",
"listRequest.isCoupon": "false",
"listRequest.isDebug": "false",
"listRequest.isLimitTime": "false",
"listRequest.isLogin": "false",
"listRequest.isMobileOnly": "true",
"listRequest.isNeed5Discount": "true",
"listRequest.isNeedNotContractedHotel": "false",
"listRequest.isNeedSimilarPrice": "false",
"listRequest.isReturnNoRoomHotel": "true",
"listRequest.isStaySave": "false",
"listRequest.isTrace": "false",
"listRequest.isUnionSite": "false",
"listRequest.keywords": "",
"listRequest.keywordsType": 0,
"listRequest.language": "cn",
"listRequest.listType": 0,
"listRequest.lowPrice": 0,
"listRequest.orderFromID": "50793",
"listRequest.pageIndex": page,
"listRequest.pageSize": 20,
"listRequest.payMethod": 0,
"listRequest.personOfRoom": 0,
"listRequest.poiId": 0,
"listRequest.promotionChannelCode": "0000",
"listRequest.proxyID": "ZD",
"listRequest.rankType": 0,
"listRequest.returnFilterItem": "true",
"listRequest.sellChannel": 1,
"listRequest.seoHotelStar": 0,
"listRequest.sortDirection": 1,
"listRequest.sortMethod": 1,
"listRequest.starLevels": "",
"listRequest.startLat": 0,
"listRequest.startLng": 0,
"listRequest.taRecommend": "false",
"listRequest.themeIds": "",
"listRequest.ctripToken": "815b07a9-3f97-4ae0-965c-e8d9d3b9a57f",
"listRequest.elongToken": "jeww06u3-7967-4a47-9e59-91d212f31e82",
}
header = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Length": "1623",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Host": "hotel.elong.com",
"Origin": "http://hotel.elong.com",
"Pragma": "no-cache",
"Referer": "http://hotel.elong.com/chongqing/",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36",
"X-Requested-With": "XMLHttpRequest"
}
response = requests.post(url, data=data, headers=header)
html = response.json()
hotel_name = re.findall('target="_blank" title="(.*?)">', html['value']['hotelListHtml'])
hotel_price = re.findall('(.*?)', html['value']['hotelListHtml'])
hotel_address = re.findall('data-hoteladdress="(.*?)" >', html['value']['hotelListHtml'])
# 返回酒店名称,酒店价格,酒店地址
return hotel_name, hotel_price, hotel_address
if __name__ == '__main__':
hotel_name = []
hotel_price = []
hotel_address = []
for i in range(10):
hotel_name_, hotel_price_, hotel_address_ = get_data(i)
hotel_name.extend(hotel_name_)
hotel_price.extend(hotel_price_)
hotel_address.extend(hotel_address_)
time.sleep(1)
print("已完成第"+str(i)+"页爬取")
dataframe = pd.DataFrame({'酒店名称': hotel_name, '酒店价格': hotel_price, '酒店地址': hotel_address})
dataframe.to_csv("hotel.csv", index=False, sep=',', encoding="utf_8_sig")