马上就要年底过年了,提前去网站上请回家的票。神奇的抢到了硬座票,心里很是欣慰,今年终于不用站着回家了。
但是开心的时间总是那么短暂,一周过后我回来继续抢返程的火车票。真的是查了好几遍,都没有返程的车,多方验证之后发现没有返程的列车,所以……
网站抓取思路
打开12306的官方网站,按F12打开你的开发人员工具,找到Network标签,F5刷新一下,获取车站信息。
部分数据var station_names ='@bjb|北京北|VAP|beijingbei|bjb|0@bjd|北京东|BOP|beijingdong|bjd|1@bji|北京|BJP|beijing|bj|2@bjn|北京南|VNP|beijingnan|bjn|3@bjx|北京西|BXP|beijingxi|bjx|4@gzn|广州南|IZQ|guangzhounan|gzn|…………
获取所有车次信息(朋友提供)
车次详细信息查询
按自己需求实现部分功能,部分功能不对外开放,仅介绍抓取思路# -*- coding:utf-8 -*-
'''
车次信息查询页面
https://kyfw.12306.cn/otn/queryTrainInfo/init
车次信息查询接口
https://kyfw.12306.cn/otn/queryTrainInfo/query?leftTicketDTO.train_no=5i0000G12600&leftTicketDTO.train_date=2020-01-24&rand_code=
车站信息
https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9138
车次信息
https://kyfw.12306.cn/otn/resources/js/query/train_list.js
'''
import json
import requests
class MainApp(object):
def __init__(self):
self.session = requests.session()
def start(self):
self.get_station_name()
self.get_train_list()
pass
# 抓取火车车次信息
def get_train_list(self):
line = self.session.get('https://kyfw.12306.cn/otn/resources/js/query/train_list.js').text
item = line.replace('var train_list =', '')
json_now = json.loads(item)['2019-10-10']
with open('train_list.csv', 'w+') as train_list_csv:
train_list_csv.write('字段1,字段2,字段3,字段4,字段5,字段6,字段7')
# 高铁
for g in json_now['G']:
print(g)
no = g['train_no']
station_train_code = str(g['station_train_code'])
che = station_train_code[0:station_train_code.find('(')]
se = station_train_code[station_train_code.find('(') + 1:-1].split('-')
train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '高铁', 'G', che, se[0], se[1], '2019-10-10'))
# 城际高铁
for c in json_now['C']:
print(c)
no = c['train_no']
station_train_code = str(c['station_train_code'])
che = station_train_code[0:station_train_code.find('(')]
se = station_train_code[station_train_code.find('(') + 1:-1].split('-')
train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '城际高铁', 'C', che, se[0], se[1], '2019-10-10'))
# 动车
for d in json_now['D']:
print(d)
no = d['train_no']
station_train_code = str(d['station_train_code'])
che = station_train_code[0:station_train_code.find('(')]
se = station_train_code[station_train_code.find('(') + 1:-1].split('-')
train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '动车', 'D', che, se[0], se[1], '2019-10-10'))
# 直达
for z in json_now['Z']:
print(z)
no = z['train_no']
station_train_code = str(z['station_train_code'])
che = station_train_code[0:station_train_code.find('(')]
se = station_train_code[station_train_code.find('(') + 1:-1].split('-')
train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '直达', 'Z', che, se[0], se[1], '2019-10-10'))
# 特快
for t in json_now['T']:
print(t)
no = t['train_no']
station_train_code = str(t['station_train_code'])
che = station_train_code[0:station_train_code.find('(')]
se = station_train_code[station_train_code.find('(') + 1:-1].split('-')
train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '特快', 'T', che, se[0], se[1], '2019-10-10'))
# 快速
for k in json_now['K']:
print(k)
no = k['train_no']
station_train_code = str(k['station_train_code'])
che = station_train_code[0:station_train_code.find('(')]
se = station_train_code[station_train_code.find('(') + 1:-1].split('-')
train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '快速', 'K', che, se[0], se[1], '2019-10-10'))
# 其他
for o in json_now['O']:
print(o)
no = o['train_no']
station_train_code = str(o['station_train_code'])
che = station_train_code[0:station_train_code.find('(')]
se = station_train_code[station_train_code.find('(') + 1:-1].split('-')
train_list_csv.write('\n%s,%s,%s,%s,%s,%s,%s' % (no, '其他', 'O', che, se[0], se[1], '2019-10-10'))
# 抓取火车站信息
def get_station_name(self):
line = self.session.get('https://kyfw.12306.cn/otn/resources/js/framework/station_name.js').text
item = line.replace('var station_names =\'', '').replace('\';', '').split('|')
print(item)
with open('station_name.csv', 'w+') as station_name_csv:
station_name_csv.write('字段1,字段2,字段3,字段4,字段5,字段6')
for i in item:
if '@' in i:
station_name_csv.write('\n%s' % i)
else:
station_name_csv.write(',%s' % i)
if __name__ == '__main__':
main = MainApp()
main.start()
抓取数据存入csv,后续会入库,提供个性化的查询,也会给出快捷跳转至12306官网查询页面,直接进行购票。
模拟正常人行为进行查询获取公共数据,防止对网站造成压力(我相信我做不到)
不做商业使用,进个人网站展示,展示功能不保证高可用,仅作学习交流分享出现。