python爬虫爬取携程国际机票航班信息,返回json串

# -*- coding: utf-8 -*-
import requests, json
import hashlib
import re

# 此处的参数是json   出发三字码,达到三字码,出发时间,成人数,儿童数,婴儿数,三字码请传小写的
def get_index(paramter):
    # 如果只执行这个脚本,需要将下边这行注释掉,如果放到服务,就打开
    # paramter = json.loads(paramter)
    dep_code = paramter["dep_code"]
    arr_code = paramter["arr_code"]
    date = paramter["date"]
    adult = paramter["adult"]
    child = paramter["child"]
    infant = paramter["infant"]
    urls = 'https://flights.ctrip.com/international/search/oneway-{}-{}?depdate={}&cabin=y_s&adult={}&child={}&infant={}'.format(
        dep_code, arr_code, date, adult, child, infant)

    # 这个url返回所需要的参数
    # urls = 'https://flights.ctrip.com/international/search/oneway-bjs-sel?depdate=2019-05-21&cabin=y_s&adult=1&child=0&infant=0'
    headers = {
        'headers': 'user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
    }
    response = requests.get(urls, headers=headers)
    data = re.findall(r'GlobalSearchCriteria =(.+);', response.text)[0]
    post_dict = json.loads(data)
    return post_dict, date


def get_data(post_dicts):
    post_dict = post_dicts[0]
    date = post_dicts[1]
    # ABTString = re.findall('id="ab_testing_tracker" value=(.+)/>', response.text)[0]
    trans_id = post_dict['transactionID']
    url = "https://flights.ctrip.com/international/search/api/search/batchSearch"

    # 拼接加密参数 trans_id +出发地到达地+时间
    sign_value = trans_id + post_dict['flightSegments'][0]['departureCityCode'] + post_dict['flightSegments'][0][
        'arrivalCityCode'] + date
    # 进行md5加密
    md5 = hashlib.md5()
    md5.update(sign_value.encode('utf-8'))
    sign = md5.hexdigest()

    flightWayEnum = "OW"
    arrivalProvinceId = post_dict["flightSegments"][0]["arrivalProvinceId"]
    arrivalCountryName = post_dict["flightSegments"][0]["arrivalCountryName"]
    cabinEnum = post_dict["cabin"]
    departCountryName = post_dict["flightSegments"][0]["departureCountryName"]
    segmentNo = len(post_dict["flightSegments"])
    departureCityId = post_dict["flightSegments"][0]["departureCityId"]
    isMultiplePassengerType = 0

    post_dict["flightWayEnum"] = flightWayEnum
    post_dict["arrivalProvinceId"] = arrivalProvinceId
    post_dict["arrivalCountryName"] = arrivalCountryName
    post_dict["cabinEnum"] = cabinEnum
    post_dict["departCountryName"] = departCountryName
    post_dict["segmentNo"] = segmentNo
    post_dict["departureCityId"] = departureCityId
    post_dict["isMultiplePassengerType"] = isMultiplePassengerType

    # payload = '{"flightWayEnum":"OW","arrivalProvinceId":0,"arrivalCountryName":"韩国","infantCount":0,"cabin":"Y_S","cabinEnum":"Y_S","departCountryName":"中国","flightSegments":[{"departureDate":"2019-05-21","arrivalProvinceId":0,"arrivalCountryName":"韩国","departureCityName":"北京","departureCityCode":"BJS","departureCountryName":"中国","arrivalCityName":"首尔","arrivalCityCode":"SEL","departureCityTimeZone":480,"arrivalCountryId":42,"timeZone":480,"departureCityId":1,"departureCountryId":1,"arrivalCityTimeZone":540,"departureProvinceId":1,"arrivalCityId":274}],"childCount":0,"segmentNo":1,"adultCount":1,"extensionAttributes":{},"transactionID":"c9ab78578e8342e8ba1101e5104fc5bd","directFlight":false,"departureCityId":1,"isMultiplePassengerType":0,"flightWay":"S","arrivalCityId":274,"departProvinceId":1}'
    # payload = payload.encode('UTF-8')
    headers = {
        'origin': "https://flights.ctrip.com",
        'sign': sign,
        'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36",
        'content-type': "application/json;charset=UTF-8",
        'accept': "application/json",
        'transactionid': trans_id,
        'Host': "flights.ctrip.com",
        'content-length': "815",
        'Connection': "keep-alive",
        'cache-control': "no-cache"
    }

    response = requests.request("POST", url, data=json.dumps(post_dict), headers=headers)
    # print(response.text)
    return response.text


if __name__ == '__main__':
    para = {"dep_code": "bbk", "arr_code": "bjs", "date": "2019-10-25", "adult": 1, "child": 2, "infant": 1}
    post_dicts = get_index(para)
    print(get_data(post_dicts))

你可能感兴趣的:(机票,python,爬虫)