Python利用Requests库写爬虫(二)

学会了Request库的基本用法,接下来我想利用Requests来抓取火车票数据。

基本用法:
Python利用Requests库写爬虫(一)

根据观察,数据接口如下:
https://kyfw.12306.cn/otn/lcxxcx/query?purpose_codes=ADULT&queryDate=2015-05-23&from_station=NCG&to_station=CZQ
返回的是2015-5-23南昌到郴州的火车票信息,格式为json。

返回的数据的如下(只截取了一部分):
{"validateMessagesShowId":"_validatorMessage","status":true,"httpstatus":200,"data":{"datas":[{"train_no":"5u000G140101","station_train_code":"G1401","start_station_telecode":"NXG","start_station_name":"南昌西","end_station_telecode":"IZQ","end_station_name":"广州南","from_station_telecode":"NXG","from_station_name":"南昌西","to_station_telecode":"ICQ","to_station_name":"郴州西","start_time":"07:29","arrive_time":"10:42","day_difference":"0","train_class_name":"","lishi":"03:13","canWebBuy":"Y","lishiValue":"193","yp_info":"O030850182M0507000009097450000","control_train_day":"20991231","start_train_date":"20150523","seat_feature":"O3M393","yp_ex":"O0M090","train_seat_feature":"3","seat_types":"OM9","location_code":"G2","from_station_no":"01","to_station_no":"11","control_day":59,"sale_time":"0930","is_support_card":"1","note":"","gg_num":"--","gr_num":"--","qt_num":"--","rw_num":"--","rz_num":"--","tz_num":"--","wz_num":"--","yb_num":"--","yw_num":"--","yz_num":"--","ze_num":"182","zy_num":"无","swz_num":"无"}}

看着很乱,我们稍加整理:

{
    "validateMessagesShowId":"_validatorMessage",
    "status":true,"httpstatus":200,
    "data":{
            "datas":[
                        {
                             "train_no":"5u000G140101",
                             "station_train_code":"G1401",
                             "start_station_telecode":"NXG",
                             "start_station_name":"南昌西",
                             "end_station_telecode":"IZQ",
                             "end_station_name":"广州南",
                             "from_station_telecode":"NXG",
                             "from_station_name":"南昌西",
                             "to_station_telecode":"ICQ",
                             "to_station_name":"郴州西",
                             "start_time":"07:29",
                             "arrive_time":"10:42",
                             "day_difference":"0",
                             ...
                             "swz_num":"无"
                        },
                        {
                              ...
                        }
                    ]
}

这样就比较清晰了,代码如下,提取自己需要的信息。

#-*- coding:utf-8 -*-
import requests
import json

class trainTicketsSprider:

    def getTicketsInfo(self,purpose_codes,queryDate,from_station,to_station):
        self.url = 'https://kyfw.12306.cn/otn/lcxxcx/query?purpose_codes=%s&queryDate=%s&from_station=%s&to_station=%s' %(purpose_codes,queryDate,from_station,to_station)
        self.headers = { 
                    "Accept":"text/html,application/xhtml+xml,application/xml;",
                    "Accept-Encoding":"gzip",
                    "Accept-Language":"zh-CN,zh;q=0.8",
                    "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36"
                  }
        self.TicketSession = requests.Session()
        self.TicketSession.verify = False #关闭https验证   
        self.TicketSession.headers = self.headers
        try:
            self.resp_json = self.TicketSession.get(self.url)
            self.ticketsDatas = json.loads(self.resp_json.text)["data"]["datas"]
            return self.ticketsDatas
        except Exception,e:
            print e

def isZero(num):
    if num == '--' or '无':
        return '0'
    else:
        return num
    
def main():
    purpose_codes = 'ADULT'
    queryDate = '2015-05-23'
    from_station = 'NCG'
    to_station = 'CZQ'
    TicketSprider = trainTicketsSprider()
    res= TicketSprider.getTicketsInfo(purpose_codes,queryDate,from_station,to_station)
    for i,ticketInfo in enumerate(res):        
                print u"车次:%s" %ticketInfo["station_train_code"]
                print u"起始站:%s" %ticketInfo["start_station_name"]
                print u"目的地:%s" %ticketInfo["to_station_name"]
                print u"开车时间:%s" %ticketInfo["start_time"]
                print u"到达时间:%s" %ticketInfo["arrive_time"]
                print u"二等座还剩:%s张票" %isZero(ticketInfo["ze_num"])
                print u"硬座还剩:%s张票" %isZero(ticketInfo["yz_num"])
                print u"硬卧还剩:%s张票" %isZero(ticketInfo["yw_num"])
                print u"无座还剩:%s张票" %isZero(ticketInfo["wz_num"])
                print u"是否有票:%s" %ticketInfo["canWebBuy"]
                print "**********************************"
                
                
if __name__ == '__main__':
    main()

Github地址

你可能感兴趣的:(Python利用Requests库写爬虫(二))