php提取12306火车信息,使用Python抓取12306火车信息,存入csv文件,仅介绍实现思路...

马上就要年底过年了,提前去网站上请回家的票。神奇的抢到了硬座票,心里很是欣慰,今年终于不用站着回家了。

但是开心的时间总是那么短暂,一周过后我回来继续抢返程的火车票。真的是查了好几遍,都没有返程的车,多方验证之后发现没有返程的列车,所以……

网站抓取思路

打开12306的官方网站,按F12打开你的开发人员工具,找到Network标签,F5刷新一下,获取车站信息。

部分数据var station_names ='@bjb|北京北|VAP|beijingbei|bjb|0@bjd|北京东|BOP|beijingdong|bjd|1@bji|北京|BJP|beijing|bj|2@bjn|北京南|VNP|beijingnan|bjn|3@bjx|北京西|BXP|beijingxi|bjx|4@gzn|广州南|IZQ|guangzhounan|gzn|…………

获取所有车次信息(朋友提供)

车次详细信息查询

按自己需求实现部分功能,部分功能不对外开放,仅介绍抓取思路# -*- coding:utf-8 -*-

'''

车次信息查询页面

https://kyfw.12306.cn/otn/queryTrainInfo/init

车次信息查询接口

https://kyfw.12306.cn/otn/queryTrainInfo/query?leftTicketDTO.train_no=5i0000G12600&leftTicketDTO.train_date=2020-01-24&rand_code=

车站信息

https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9138

车次信息

https://kyfw.12306.cn/otn/resources/js/query/train_list.js

'''

import json

import requests

class MainApp(object):

def __init__(self):

self.session = requests.session()

def start(self):

self.get_station_name()

self.get_train_list()

pass

# 抓取火车车次信息

def get_train_list(self):

line = self.session.get('https://kyfw.12306.cn/otn/resources/js/query/train_list.js').text

item = line.replace('var train_list =', '')

json_now = json.loads(item)['2019-10-10']

with open('train_list.csv', 'w+') as train_list_csv:

train_list_csv.write('字段1,字段2,字段3,字段4,字段5,字段6,字段7')

# 高铁

for g in json_now['G']:

print(g)

no = g['train_no']

station_train_code = str(g['station_train_code'])

che = station_train_code[0:station_train_code.find('(')]

se = station_train_code[station_train_code.find('(') + 1:-1].split('-')

train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '高铁', 'G', che, se[0], se[1], '2019-10-10'))

# 城际高铁

for c in json_now['C']:

print(c)

no = c['train_no']

station_train_code = str(c['station_train_code'])

che = station_train_code[0:station_train_code.find('(')]

se = station_train_code[station_train_code.find('(') + 1:-1].split('-')

train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '城际高铁', 'C', che, se[0], se[1], '2019-10-10'))

# 动车

for d in json_now['D']:

print(d)

no = d['train_no']

station_train_code = str(d['station_train_code'])

che = station_train_code[0:station_train_code.find('(')]

se = station_train_code[station_train_code.find('(') + 1:-1].split('-')

train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '动车', 'D', che, se[0], se[1], '2019-10-10'))

# 直达

for z in json_now['Z']:

print(z)

no = z['train_no']

station_train_code = str(z['station_train_code'])

che = station_train_code[0:station_train_code.find('(')]

se = station_train_code[station_train_code.find('(') + 1:-1].split('-')

train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '直达', 'Z', che, se[0], se[1], '2019-10-10'))

# 特快

for t in json_now['T']:

print(t)

no = t['train_no']

station_train_code = str(t['station_train_code'])

che = station_train_code[0:station_train_code.find('(')]

se = station_train_code[station_train_code.find('(') + 1:-1].split('-')

train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '特快', 'T', che, se[0], se[1], '2019-10-10'))

# 快速

for k in json_now['K']:

print(k)

no = k['train_no']

station_train_code = str(k['station_train_code'])

che = station_train_code[0:station_train_code.find('(')]

se = station_train_code[station_train_code.find('(') + 1:-1].split('-')

train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '快速', 'K', che, se[0], se[1], '2019-10-10'))

# 其他

for o in json_now['O']:

print(o)

no = o['train_no']

station_train_code = str(o['station_train_code'])

che = station_train_code[0:station_train_code.find('(')]

se = station_train_code[station_train_code.find('(') + 1:-1].split('-')

train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '其他', 'O', che, se[0], se[1], '2019-10-10'))

# 抓取火车站信息

def get_station_name(self):

line = self.session.get('https://kyfw.12306.cn/otn/resources/js/framework/station_name.js').text

item = line.replace('var station_names =\'', '').replace('\';', '').split('|')

print(item)

with open('station_name.csv', 'w+') as station_name_csv:

station_name_csv.write('字段1,字段2,字段3,字段4,字段5,字段6')

for i in item:

if '@' in i:

station_name_csv.write('\n%s' % i)

else:

station_name_csv.write(',%s' % i)

if __name__ == '__main__':

main = MainApp()

main.start()

抓取数据存入csv,后续会入库,提供个性化的查询,也会给出快捷跳转至12306官网查询页面,直接进行购票。

模拟正常人行为进行查询获取公共数据,防止对网站造成压力(我相信我做不到)

不做商业使用,进个人网站展示,展示功能不保证高可用,仅作学习交流分享出现。

你可能感兴趣的:(php提取12306火车信息)