之前写了12306的爬虫,当时可以运行,最近发现不行,返回的数据不对,可能是cookie有问题(可能需要某些参数,我也不确定,但没有cookie,数据请求不到。。。),修改之后,成功。
对于返回的数据进行了提取,并再次发送请求,得到车票的其他数据。
1,获得火车的经过的站及相关数据。
2,获得火车的票价。
本来觉得应该写窗体的,自己实力不行,有待学习,以后再来完成。
该写的都写上,cookie,headers之类的。直接给出代码
# a传入时间(2023--01-10)
# b,出发地的代号
# c 目的地代号,
url = f'https://kyfw.12306.cn/otn/leftTicket/queryZ?leftTicketDTO.train_date={a}&leftTicketDTO.from_station={b}&leftTicketDTO.to_station={c}&purpose_codes=ADULT'
self.headers = {
'Cookie': f'_jc_save_toStation={b}',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.76',
}
r = requests.get(url=url, headers=self.headers)
results = r.json()['data']['result']
# result 就是数据
# 下面进行分析
需要提供的参数有
1,出发时间
2,出发地代号
3,目的地代号
"""
城市代码
url=https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9002
怎么获得
1,打开开发者工具(中文)
2,源代码
3,在js中
"""
获取到我保存到csv文件中,也可以放到数据库中。
用pandas提取csv中的数据,即参数
返回的数据是以"|"进行分割的,如下
data="vdeVLhSURm4Q1RP66du87ohMRCQ3MLwq0kfa3TJCnHalBU63NmdSl3%2Bj0rKw2GIMFpDmXzicLXbe%0Arj7VwfiSslzB2E330A4AhBXW%2FSiLQW29lNCn2ZoamKWMVLFSt9f7vxwjtgMqvoqMpleoviO0aZOG%0AX75YLCGxZv%2Bj8obzGYdP%2FjDdI1CxxnTonzzXG7GI0pM0YEwsrGSYPgTnlXRAjzA6WrqZn%2BVHxaQF%0AgL0vdZ2sxItfSR2yGSAYacvObVlZXkCJoHAJjrCXT25HnBhsUCrO%2FRQ%2F97qKbBcN5F3JnPxWZZzw%0AfwVu%2F0DAqHBn69up|预订|76000G219505|G2195|ICW|AOH|ICW|CWQ|06:10|13:22|07:12|Y|dHywp8bnO4BIQNw1cH%2FaVsWHeAoP7nmv6%2FF4lR7Tvkkl2UtE|20230114|3|W2|01|14|1|0|||||||||||有|5|2||90M0O0|9MO|1|0||9178550002M095200005O057850021|0|||||1|0#0#0#0#z||"
用split进行分开,得到一个列表,里面有49个元素
data.split('|')
配合enumerate 函数找到有用的数据,经过多次分析,其中有20条数据是有用的
给出数据和对应的含义
"""
#t就是data
train_no = t[2] #火车参数
che = t[3] # 车次
startcode = t[4] #出发地代号
endcode = t[5] # 目的地代号
from_station_no = t[16] # 发车地代号
to_station_no = t[17] # 终点代号
seat_types = t[35] # 座位类型
starttime = t[8] # 出发时间
endtime = t[9] # 到站时间
duration_time = t[10] 持续时间
special_shop_seat = t[32] or t[25] or '--' # 商务座/特等座,二者数据所处位置不一样
first_seat = t[31] or '--' # 一等座
second_seat = t[30] or '--' # 二等座
high_sleep = t[21] or '--' # 高级软卧
soft_sleep = t[23] or '--' # 软卧
dong_sleep = t[33] or '--' # 动卧
hard_sleep = t[28] or '--' # 硬卧
sort_seat = t[24] or '--' # 软座
hart_seat = t[29] or '--' # 硬座
no_seat = t[26] or '--' # 站票
"""
把数据用prettable进行展示中,并且把这些数据存到一个新的列表中,为后来的操作提供需要的参数。
# import prettable as pt
# tb=pt.PrettyTable()
# 列名如下,和注释可能不一样,不重要,数据一样的。
# 新增加了序号,好选择火车
tb.field_names = ['序号', '火车参数', '车次', '出发地代号', '终点代号', '出发地代码', '终点代码', '座位信息', '开始时', '结束时', '持续时间', '商务座/特等座','一等座', '二等座', '高级软卧', '软卧', '动卧', '软座', '硬卧', '软座' '硬座', '站票']
# 如图
数据太多,没对齐。。。
# a 火车参数
# b 起点站的代号
# c 终点站代号
# d 时间
url_1 = f'https://kyfw.12306.cn/otn/czxx/queryByTrainNo?train_no={a}&from_station_telecode={b}&to_station_telecode={c}&depart_date={d}'
resp = requests.get(url=url_1, headers=self.headers)
data = resp.json()['data']['data']
返回了数据,进行提取,用prettable来表示
tb.field_names = (['到站时间', '站点', '站数', '停留时间', '出站时间'])
没有多说的。唯一要说的就是代号,是火车发车的起点代号。
展示一下
# a 火车参数
# b 出发地代号
# c 目的地代号
# d 座位的类型
# e 时间
# 注意代号的不同
url = f'https://kyfw.12306.cn/otn/leftTicket/queryTicketPrice?train_no={a}&from_station_no={b}&to_station_no={c}&seat_types={d}&train_date={e}'
r = requests.get(url=url, headers=self.headers)
因为座位的不同,票价不一样,而导致座位类型不同,返回的数据也不同。
而座位的类型,d
if d == '1341':
tb.field_names = (['软卧价格', '硬卧价格', '硬座价格', '无座价格'])
soft_sleep_price = r.json()['data']['A4']
hard_sleep_price = r.json()['data']['A3']
hart_seat_price = r.json()['data']['A1']
no_seat_price = r.json()['data']['WZ']
tb.add_row([soft_sleep_price, hard_sleep_price, hart_seat_price, no_seat_price])
return tb
if d == '1346':
tb.field_names = (['高级软卧软卧价格', '硬卧价格', '硬座价格', '无座价格'])
high_soft_sleep_price = r.json()['data']['A6']
soft_sleep_price = r.json()['data']['A4']
hard_sleep_price = r.json()['data']['A3']
hard_seat_price = r.json()['data']['A1']
tb.add_row([high_soft_sleep_price, soft_sleep_price, hard_sleep_price, hard_seat_price])
return tb
if d == '134':
tb.field_names = (['软卧价格', '硬卧价格', '硬座价格'])
soft_sleep_price = r.json()['data']['A4']
hard_sleep_price = r.json()['data']['A3']
hart_seat_price = r.json()['data']['A1']
tb.add_row([soft_sleep_price, hard_sleep_price, hart_seat_price])
return tb
if d == '9MO' or 'OM9':
tb.field_names = (['商务座价格', '一等座', '二等座'])
special_price = r.json()['data']['A9']
first_seat_price = r.json()['data']['M']
second_seat = r.json()['data']['O']
tb.add_row([special_price, first_seat_price, second_seat])
return tb
if d == 'MOO':
tb.field_names = (['一等座', '二等座'])
first_seat_price = r.json()['data']['M']
second_seat = r.json()['data']['O']
tb.add_row([first_seat_price, second_seat])
return tb
if d=='FOO':
tb.field_names = (['动卧', '二等座','无座'])
dong_sleep = r.json()['data']['F']
second_seat = r.json()['data']['O']
no_seat = r.json()['data']['O']
tb.add_row([dong_sleep, second_seat,no_seat])
return tb
if d=='FO':
tb.field_names = (['动卧', '二等座'])
dong_sleep = r.json()['data']['F']
second_seat = r.json()['data']['O']
tb.add_row([dong_sleep, second_seat])
return tb
if d=='F':
tb.field_names = (['动卧'])
dong_sleep = r.json()['data']['F']
tb.add_row([dong_sleep])
return tb
if d=='MOP':
tb.field_names = (['特等座', '二等座','一等座'])
special_seat = r.json()['data']['P']
second_seat = r.json()['data']['O']
first_seat = r.json()['data']['M']
tb.add_row([special_seat, second_seat,first_seat])
return tb
if d=='IJO':
tb.field_names = (['二等座', '软卧','硬卧'])
soft_sleep = r.json()['data']['AI']
second_seat = r.json()['data']['O']
hard_sleep = r.json()['data']['AJ']
tb.add_row([second_seat, soft_sleep,hard_sleep])
return tb
d有很多种,应实际情况会发生不同的变化,我感觉没有遇到完,还有其他情况。。。
而且这样写,重复的代码实在太多,有待修改,有些情况应该还没遇到,还有其他座位类型。
展示一下,有待修改。
不知道怎么把视频传到csdn上,传到b站上了,顺便开始当up主,哈哈哈哈哈哈
视频
有待修改,还要结合pyqt5,不然终究感觉少了什么
import requests
import prettytable as pt
import pandas as pd
import datetime
tb = pt.PrettyTable()
def today(a: int):
today = datetime.date.today()
data = today + datetime.timedelta(days=a)
return data
class ottzs:
def __init__(self):
self.headers = None
self.times = None
def get_time(self):
a = {}
for i in range(15):
a[i] = today(i)
a[15] = '-1'
return a
def choose_time(self):
t = self.get_time()
choose = int(input(f'需要输入需要查询的车票的时间,序号如下.\n请选择序号\n'
f'序号----时间\n'
f'0----{today(0)}\n'
f'1----{today(1)}\n'
f'2----{today(2)}\n'
f'3----{today(3)}\n'
f'4----{today(4)}\n'
f'5----{today(5)}\n'
f'6----{today(6)}\n'
f'7----{today(7)}\n'
f'8----{today(8)}\n'
f'9----{today(9)}\n'
f'10----{today(10)}\n'
f'11----{today(11)}\n'
f'12----{today(12)}\n'
f'13----{today(13)}\n'
f'14----{today(14)}\n'
f'15-----退出\n'
'请输入序号:\n'
))
times = t[choose]
return times
def get_code(self, a):
s = pd.read_csv(path, index_col='地点')
result = s.loc[f'{a}']['代码']
return result
"""
城市代码的url=https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9002
"""
def get_data(self, a, b, c):
try:
tb.field_names = ['序号 ', '火车参数 ', '车次 ', '出发地代号 ', '终点代号 ', '出发地代码 ', '终点代码 ', '座位信息 ', '开始时 ', '结束时 ', ' 持续时间 ', '商务座/特等座 ','一等座 ', '二等座 ', '高级软卧 ', '软卧 ', '动卧 ', '软座 ', '硬卧 ', '软座 ' '硬座 ', '站票 ']
url = f'https://kyfw.12306.cn/otn/leftTicket/queryZ?leftTicketDTO.train_date={a}&leftTicketDTO.from_station={b}&leftTicketDTO.to_station={c}&purpose_codes=ADULT'
self.headers = {
'Cookie': f'_jc_save_toStation={b}',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.76',
}
r = requests.get(url=url, headers=self.headers)
results = r.json()['data']['result']
acc = []
num = 0
for i in results:
t = i.split('|')
train_no = t[2]
che = t[3]
startcode = t[4]
endcode = t[5]
from_station_no = t[16]
to_station_no = t[17]
seat_types = t[35]
starttime = t[8]
endtime = t[9]
duration_time = t[10]
special_shop_seat = t[32] or t[25] or '--'
first_seat = t[31] or '--'
second_seat = t[30] or '--'
high_sleep = t[21] or '--'
soft_sleep = t[23] or '--'
dong_sleep = t[33] or '--'
hard_sleep = t[28] or '--'
sort_seat = t[24] or '--'
hart_seat = t[29] or '--'
no_seat = t[26] or '--'
tb.add_row([num + 1, train_no, che, startcode, endcode, from_station_no, to_station_no, seat_types, starttime, endtime, duration_time, special_shop_seat, first_seat, second_seat, high_sleep, soft_sleep, dong_sleep, hard_sleep, sort_seat, hart_seat, no_seat])
acc.append([num + 1, train_no, che, startcode, endcode, from_station_no, to_station_no, seat_types, starttime, endtime, duration_time, special_shop_seat, first_seat, second_seat, high_sleep, soft_sleep, dong_sleep, hard_sleep, sort_seat, hart_seat, no_seat])
num += 1
print(tb)
return acc
except Exception as e:
return e
def price(self, a, b, c, d, e):
url = f'https://kyfw.12306.cn/otn/leftTicket/queryTicketPrice?train_no={a}&from_station_no={b}&to_station_no={c}&seat_types={d}&train_date={e}'
r = requests.get(url=url, headers=self.headers)
if d == '1341':
tb.field_names = (['软卧价格', '硬卧价格', '硬座价格', '无座价格'])
soft_sleep_price = r.json()['data']['A4']
hard_sleep_price = r.json()['data']['A3']
hart_seat_price = r.json()['data']['A1']
no_seat_price = r.json()['data']['WZ']
tb.add_row([soft_sleep_price, hard_sleep_price, hart_seat_price, no_seat_price])
return tb
if d == '1346':
tb.field_names = (['高级软卧软卧价格', '硬卧价格', '硬座价格', '无座价格'])
high_soft_sleep_price = r.json()['data']['A6']
soft_sleep_price = r.json()['data']['A4']
hard_sleep_price = r.json()['data']['A3']
hard_seat_price = r.json()['data']['A1']
tb.add_row([high_soft_sleep_price, soft_sleep_price, hard_sleep_price, hard_seat_price])
return tb
if d == '134':
tb.field_names = (['软卧价格', '硬卧价格', '硬座价格'])
soft_sleep_price = r.json()['data']['A4']
hard_sleep_price = r.json()['data']['A3']
hart_seat_price = r.json()['data']['A1']
tb.add_row([soft_sleep_price, hard_sleep_price, hart_seat_price])
return tb
if d == '9MO' or 'OM9':
tb.field_names = (['商务座价格', '一等座', '二等座'])
special_price = r.json()['data']['A9']
first_seat_price = r.json()['data']['M']
second_seat = r.json()['data']['O']
tb.add_row([special_price, first_seat_price, second_seat])
return tb
if d == 'MOO':
tb.field_names = (['一等座', '二等座'])
first_seat_price = r.json()['data']['M']
second_seat = r.json()['data']['O']
tb.add_row([first_seat_price, second_seat])
return tb
if d=='FOO':
tb.field_names = (['动卧', '二等座','无座'])
dong_sleep = r.json()['data']['F']
second_seat = r.json()['data']['O']
no_seat = r.json()['data']['O']
tb.add_row([dong_sleep, second_seat,no_seat])
return tb
if d=='FO':
tb.field_names = (['动卧', '二等座'])
dong_sleep = r.json()['data']['F']
second_seat = r.json()['data']['O']
tb.add_row([dong_sleep, second_seat])
return tb
if d=='F':
tb.field_names = (['动卧'])
dong_sleep = r.json()['data']['F']
tb.add_row([dong_sleep])
return tb
if d=='MOP':
tb.field_names = (['特等座', '二等座','一等座'])
special_seat = r.json()['data']['P']
second_seat = r.json()['data']['O']
first_seat = r.json()['data']['M']
tb.add_row([special_seat, second_seat,first_seat])
return tb
if d=='IJO':
tb.field_names = (['二等座', '软卧','硬卧'])
soft_sleep = r.json()['data']['AI']
second_seat = r.json()['data']['O']
hard_sleep = r.json()['data']['AJ']
tb.add_row([second_seat, soft_sleep,hard_sleep])
return tb
def road(self, a, b, c, d):
tb.field_names = (['到站时间', '站点', '站数', '停留时间', '出站时间'])
url_1 = f'https://kyfw.12306.cn/otn/czxx/queryByTrainNo?train_no={a}&from_station_telecode={b}&to_station_telecode={c}&depart_date={d}'
resp = requests.get(url=url_1, headers=self.headers)
data = resp.json()['data']['data']
for i in data:
arrive_time = i['arrive_time']
station_name = i['station_name']
station_no = i['station_no']
stopover_time = i['stopover_time']
start_time = i['start_time']
tb.add_row([arrive_time, station_name, station_no, stopover_time, start_time])
return tb
def success(self, e):
while True:
print(
'0,重新选择\n'
'1,查看价格\n'
'2,火车路程\n'
)
f = input('请输入(-1退出):')
if f == '-1':
break
if f == '2':
choose = int(input('请输入火车的序号:')) - 1
xunhao = e[choose]
j = self.road(xunhao[1],xunhao[3], xunhao[4], self.times)
print(j)
tb.clear()
if f == '0':
continue
if f == '1':
choose = int(input('请输入火车的序号:'))-1
xunhao = e[choose]
A=xunhao[1]
B=xunhao[5]
C=xunhao[6]
D=xunhao[7]
E = self.price(A, B, C, D, self.times)
print(E)
tb.clear()
def main(self):
while True:
self.times = self.choose_time()
if self.times == '-1':
break
a = input('输入出发地:')
b = input('输入终点:')
c = self.get_code(a)
d = self.get_code(b)
e = self.get_data(self.times, c, d)
tb.clear()
if isinstance(e, list):
self.success(e)
else:
print('没有返回数据!!!')
choose = input('是否再次尝试\n1,是\n2,算了吧\n')
if choose == '1':
continue
else:
break