在post请求类型中,有一种利用formdata来进行翻页、搜索的方法,以艺龙为例。/20180112
其实,还可以筛选酒店类型:如家,汉庭啊之类的,都在data中,方法都是一样的。/20180115
# coding: utf-8
__author__ ='姜枫渔火'
import requests, re, time, random, pandas
from fake_useragent import UserAgent
def getOnePage(url):
res = requests.post(url, data = data, headers = headers)
html = res.json()
# print(html)
return html
def prasePage(html):
hptel_name = re.findall('title=\"(.+?)\">(.*?)',html['value']['hotelListHtml'])
# print(len(hptel_prince), hptel_prince)
data = list(map(lambda x:(hptel_name[x], hptel_prince[x]),range(len(hptel_name))))
print(data)
return data
def writeToFile(data):
content = pandas.DataFrame(data)
print('writing')
content.to_csv('艺龙.csv', header=False, index=False, mode='a+')
print("done")
if __name__ == '__main__':
city = input("请输入待查询城市:")
InData = input("请输入入住时间(xxxx-xx-xx):")
OutData = input("请输入离开时间(xxxx-xx-xx):")
for n in map(lambda i : str(i), range(1, 21)):
print("第" + n + "页")
url = 'http://hotel.elong.com/ajax/list/asyncsearch'
data = {
"code":"7140144",
"listRequest.areaID":"",
"listRequest.bookingChannel":"1",
"listRequest.cardNo":"192928",
"listRequest.checkInDate":InData + " 00:00:00", # 入住时间
"listRequest.checkOutDate":OutData + " 00:00:00", # 离开时间
"listRequest.cityID":"0101",
"listRequest.cityName":city, # 北京等地区
"listRequest.customLevel":"11",
"listRequest.distance":"20",
"listRequest.endLat":"0",
"listRequest.endLng":"0",
"listRequest.facilityIds":"",
"listRequest.highPrice":"0",
"listRequest.hotelBrandIDs":"",
"listRequest.isAdvanceSave":"false",
"listRequest.isAfterCouponPrice":"true",
"listRequest.isCoupon":"false",
"listRequest.isDebug":"false",
"listRequest.isLimitTime":"false",
"listRequest.isLogin":"false",
"listRequest.isMobileOnly":"true",
"listRequest.isNeed5Discount":"true",
"listRequest.isNeedNotContractedHotel":"false",
"listRequest.isNeedSimilarPrice":"false",
"listRequest.isReturnNoRoomHotel":"true",
"listRequest.isStaySave":"false",
"listRequest.isTrace":"false",
"listRequest.isUnionSite":"false",
"listRequest.keywords":"",
"listRequest.keywordsType":"0",
"listRequest.language":"cn",
"listRequest.listType":"0",
"listRequest.lowPrice":"0",
"listRequest.orderFromID":"50",
"listRequest.pageIndex":n, # 翻页
"listRequest.pageSize":"20",
"listRequest.payMethod":"0",
"listRequest.personOfRoom":"0",
"listRequest.poiId":"0",
"listRequest.promotionChannelCode":"0000",
"listRequest.proxyID":"ZD",
"listRequest.rankType":"0",
"listRequest.returnFilterItem":"true",
"listRequest.sellChannel":"1",
"listRequest.seoHotelStar":"0",
"listRequest.sortDirection":"1",
"listRequest.sortMethod":"1",
"listRequest.starLevels":"",
"listRequest.startLat":"0",
"listRequest.startLng":"0",
"listRequest.taRecommend":"false",
"listRequest.themeIds":"",
"listRequest.ctripToken":"1c06a555-04ce-4884-aa05-e6f92ad0e84e",
"listRequest.elongToken":"jc94shhj-d5a1-4092-8060-828b168dbb61"
}
headers = {'Accept':'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding':'gzip, deflate',
'Accept-Language':'zh-CN,zh;q=0.8',
'Cache-Control':'no-cache',
'Content-Length':'1599',
'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
# 'Cookie':'……61b8-48a1-b398-8b9ec1903f05……',
'Host':'hotel.elong.com',
'Origin':'http://hotel.elong.com',
'Pragma':'no-cache',
'Proxy-Connection':'keep-alive',
'Referer':'http://hotel.elong.com/beijing/',
'User-Agent':UserAgent().random,
'X-Requested-With':'XMLHttpRequest'}
html = getOnePage(url)
data = prasePage(html)
writeToFile(data)
time.sleep(random.randint(1,4))