由于之前写过一版12306爬虫,但之前的那个有点不灵活,这次做个完善,不详细讲,直接代码搞起(看了有不明白的可以qq:727733027 或者留言)
from bs4 import BeautifulSoup
import requests
import json
import time
sta_url = 'https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9002'
sta_wb_data = requests.get(sta_url)
sta_html = sta_wb_data.content
sta_soup = BeautifulSoup(sta_html, 'lxml')
sta_data = sta_soup.findAll('p')
for letter in sta_data:
v = letter.text
st = v.split("'")[1]
# print ('st:',st)
City = {}
City2 = {}
def setCity(city):
# print('city:',city)
vv = city.split('@')
for i in range(len(vv)):
if vv[i] != '':
City[vv[i].split('|')[1]] = vv[i].split('|')[2]
City2[vv[i].split('|')[2]] = vv[i].split('|')[1]
# print('City:',City)
# print('City2:',City2)
def getStation(Station):
try:
Station = City[Station]
except Exception as e:
print('City Error')
return None
return Station
def getStation2(Station2):
try:
Station2 = City2[Station2]
except Exception as e:
return None
return Station2
# 处理访问请求的url
def setStation(from_station,to_station,queryDate,purpose_codes):
url='https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=%s&leftTicketDTO.from_station=%s&leftTicketDTO.to_station=%s&purpose_codes=%s'%(queryDate,from_station,to_station,purpose_codes)
return url
# 处理页面上返回的数据,把json数据截取出来
def getList(url):
wb_data = requests.get(url)
html = wb_data.content
soup = BeautifulSoup(html, 'lxml')
data = soup.findAll('p')
for letter in data:
t = letter.text
return t
def sendToPhone(text):
pass
if __name__ == '__main__':
# 出发地
from_station_f = '上海'
# 目的地
to_station_f = '无锡'
# 出发日期
queryDate = '2018-01-03'
# 票的类型
purpose_codes = 'ADULT'
yw_Count = 0
yz_Count = 0
# 获得城市字典
setCity(st)
# 查询城市代码
from_station = getStation(from_station_f)
to_station = getStation(to_station_f)
url = setStation(from_station, to_station, queryDate, purpose_codes)
# 进行get请求 获取数据
text = ''
# 当 车票信息不存在 datas 下的数据为空
try:
aa = getList(url)
text = json.loads(aa)
p = text['data']
Data = p['result']
# print(Data)
bHaveTicket = True
except Exception as e:
print('没有查询到车辆信息')
bHaveTicket = False
# 解析车辆信息
if bHaveTicket:
count = len(Data)
info = ''
for index in range(0, len(Data)):
sResult = Data[index]
# 车次
station = Data[index].split('|')[3]
# 出发时间
departTime = Data[index].split('|')[8]
# 到达时间
arriverTime = Data[index].split('|')[9]
# 用时
userTime = Data[index].split('|')[10]
# 软卧
way_23 = Data[index].split('|')[23]
if(way_23 == ''):
way_23 = '无'
# 无座
way_26 = Data[index].split('|')[26]
if(way_26 == ''):
way_26 = '无'
# 硬卧
way_28 = Data[index].split('|')[28]
if(way_28 == ''):
way_28 = '无'
# 硬座
way_29 = Data[index].split('|')[29]
if(way_29 == ''):
way_29 = '无'
# 一等座
way_31 = Data[index].split('|')[31]
if(way_31 == ''):
way_31 = '无'
# 商务座|特等座
way_32 = Data[index].split('|')[32]
if(way_32 == ''):
way_32 = '无'
# 二等座
way_30 = Data[index].split('|')[30]
if(way_30 == ''):
way_30 = '无'
# 出发站
departStation = Data[index].split('|')[6]
# print('departStation:',departStation)
departStation_C = getStation2(departStation)
# 到达站
arriverStation = Data[index].split('|')[7]
# print('arriverStation:',arriverStation)
arriverStation_C = getStation2(arriverStation)
textmp = '车次: %s,出发站:%s,到达站:%s,出发时间:%s,到达时间:%s,历时:%s,商务座|特等座:%s,一等座:%s,二等座:%s,软卧:%s,无座:%s,硬卧:%s,硬座:%s \n'% (station,departStation_C,arriverStation_C,departTime,arriverTime,userTime,way_32,way_31,way_30,way_23,way_26,way_28,way_29)
info = info + textmp
header = '出发地: %s,目的地: %s,出发时间: %s,共计 %s 个车次 \n' %(from_station_f,to_station_f,queryDate,count)
print('header:',header)
print('info:')
print(info)
print('检测时间: %s' % time.strftime('%Y-%m-%d %H:%M:%S'))