最近把一些问题都修复了,主要修复了验证码截图位置不对,上传验证码不对的问题以及余票查询模块返回的数据不是json格式的问题。
下面是新的测试结果:
需要注意一点的就是,因为谷歌浏览器的自动定位元素坐标有bug,所以这里进行了手动确定。整张验证码图片的具体位置需要根据自己电脑屏幕的分辨率自行改动。改动方法:先截取整个网页的截图,再用win10自带的画图软件打开这个截图,再确定验证码左上角和右下角的像素坐标,然后在验证码处理模块把验证码的像素坐标改了即可。具体见程序。
完整程序,拿去可用
整个程序分了三个模块:购票模块(主体)、验证码处理模块、余票查询模块
购票模块:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import wait
from selenium.webdriver import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException, ElementNotVisibleException
import time
import requests
from urllib.parse import urlencode
from pyquery import PyQuery as pq
from check_ticket import Check
from verify_captcha import Code
import json
class Buy_Ticket():
def __init__(self, start_station, end_station, date, username, password, purpose, names):
self.num = 1
self.start = start_station
self.end = end_station
self.date = date
self.username = username
self.password = password
self.purpose = purpose
self.all_names = names
self.login_url = 'https://kyfw.12306.cn/otn/login/init'
self.ticket_url = 'https://kyfw.12306.cn/otn/leftTicket/init'
#模拟登录函数,包括自动填充用户名、密码、自动点击验证、最终自动登录
def login(self):
browser.get(self.login_url)
try:
input_name = browser.find_element_by_id('username')
input_pd = browser.find_element_by_id('password')
button = browser.find_element_by_id('loginSub')
input_name.send_keys(self.username)
input_pd.send_keys(self.password)
c = Code(browser) #调用验证码识别模块
c.main()
button.click()
time.sleep(1)
#等待页面跳转,如果验证码识别错误,就执行下面的while语句
while browser.current_url == self.login_url + '#':
c = Code(browser)
c.main()
button.click()
time.sleep(2)
try: #处理登录之后出现的弹窗
btn = browser.find_element_by_class_name('btn.btn-primary.ok')
btn.click()
except NoSuchElementException:
pass
self.check()
except NoSuchElementException:
self.login()
#余票查询函数,获取预定车次信息
def check(self):
#调用余票查询模块
check = Check(self.date, self.start, self.end, self.purpose)
start_end = check.look_up_station()
#cookie的添加,json.dumps把以汉字形式呈现的起始、终点站转化成unicode编码,可在审查元素里查看cookie
browser.add_cookie({'name':'_jc_save_fromStation', 'value':json.dumps(self.start).strip('"').replace('\\', '%') + '%2C' + start_end[0]})
browser.add_cookie({'name':'_jc_save_toStation', 'value':json.dumps(self.end).strip('"').replace('\\', '%') + '%2C' + start_end[1]})
browser.add_cookie({'name':'_jc_save_fromDate', 'value':self.date})
browser.get(self.ticket_url)
button = browser.find_element_by_id('query_ticket')
button.click()
self.num = check.get_info()
if self.purpose == '学生':
btn = browser.find_element_by_id('sf2')
btn.click()
button.click()
#检查当前日期是否可以买学生票
def check_date(self):
#12306学生票的时间是:暑假6月1日-9月30日,寒假12月1日-3月31日
date = ''.join(self.date.split('-'))
#暑假
if int(date[:5] + '0601') <= int(date) <= int(date[:5] + '0930'):
return 1
#当年寒假,也就是当年的1、2、3月
if int(date[:5] + '0101') <= int(date) <= int(date[:5] + '0331'):
return 1
#这里处理的是从当年12月到第二年的3月寒假,比如在2020-12-12买2021-1-18的学生票,那么就是在下面的处理区间
next_year = str(int(date[:5]) + 1)
if int(date[:5] + '1201') <= int(date) <= int(next_year + '0331'):
return 1
return 0
#车票预定函数
def book_ticket(self):
print('开始预订车票...')
#先查找出所有车次对应的预订按钮,再根据余票查询模块返回的车次序号,点击相应的预订按钮
button = browser.find_elements_by_class_name('btn72')
button[self.num-1].click()
time.sleep(2)
#选择乘车人
#获取所有乘车人的信息
passengers = browser.find_element_by_id('normal_passenger_id')
names = passengers.text.split('\n')
for name in self.all_names:
index = names.index(name)
browser.find_element_by_id('normalPassenger_' + str(index)).click()
if '学生' in name:
if self.check_date():
browser.find_element_by_id('dialog_xsertcj_ok').click()
else:
print('当前日期不在学生票可购买时间区间!')
print('学生票乘车时间为暑假6月1日至9月30日、寒假12月1日至3月31日!')
browser.find_element_by_id('dialog_xsertcj_cancel').click()
browser.find_element_by_id('submitOrder_id').click()
wait.WebDriverWait(browser, 3).until(EC.element_to_be_clickable((By.ID,'qr_submit_id'))).click()
print('车票预定成功!请在30分钟内完成付款!')
def main(self):
self.login()
self.book_ticket()
if __name__ == '__main__':
begin = time.time()
browser = webdriver.Chrome()
browser.maximize_window()
#账号、密码、购票类型,把所有乘客放在一个列表里,如果有学生,在姓名后面加上'(学生)'
b = Buy_Ticket('上海', '重庆', '2020-06-20', '账号', '密码', 'ADULT', ['乘客1姓名', '乘客2姓名(学生)'])
b.main()
end = time.time()
print('总耗时:%d秒' % int(end-begin))
#browser.close()
验证码处理模块:
import requests
from PIL import Image
from selenium.webdriver import ActionChains
import time
from io import BytesIO
from selenium import webdriver
class Code():
def __init__(self, browser):
self.browser = browser
self.verify_url = 'http://littlebigluo.qicp.net:47720/' #验证码识别网址,返回识别结果
#确定验证码的位置
def get_position(self):
element = self.browser.find_element_by_class_name('touclick-image')
size = element.size
#电脑屏幕大小不一,这里的验证码位置需要根据电脑屏幕尺寸大小进行相应改变,前两个数是验证码图片左上角的像素点,后两个数是右下角的像素点
position= (706, 411, 1165, 691)
return position, size
#截取整个网页页面
def get_screenshot(self):
screenshot = self.browser.get_screenshot_as_png()
screenshot = Image.open(BytesIO(screenshot))
#screenshot.save('whole.png') #保存整个网页的截图
return screenshot
#从截取的网页,裁剪出验证码图片,并保存到本地
def get_touclick_img(self, name = 'captcha.png'):
position, size = self.get_position()
print('验证码的位置:', position)
screenshot = self.get_screenshot()
captcha = screenshot.crop(position)
captcha = captcha.resize((size['width'], size['height']))
captcha.save('captcha.png')
#验证码解析
def parse_img(self):
pic_name = 'captcha.png'
files={'pic_xxfile':(pic_name,open(pic_name,'rb'),'image/png')} #打开保存到本地的验证码图片
response = requests.post(self.verify_url, files=files)
#print(response.text)
num = response.text.split('')[1].split('<')[0]
try:
if int(num):
print('验证码识别成功!图片位置:%s' % num)
return [int(num)]
except ValueError:
try:
print('验证码识别成功!图片位置:%s' % num)
num = list(map(int,num.split()))
return num
except ValueError:
print('验证码未能识别')
return
#识别结果num都以列表形式返回,方便后续验证码的点击
#还有可能验证码没能识别出来
#实现验证码自动点击
def move(self):
num = self.parse_img()
if num:
try:
element = self.browser.find_element_by_class_name('touclick-image')
for i in num:
if i <= 4:
ActionChains(self.browser).move_to_element_with_offset(element,40+72*(i-1),73).click().perform()
else :
i -= 4
ActionChains(self.browser).move_to_element_with_offset(element,40+72*(i-1),145).click().perform()
except:
print('元素不可选!')
else:
reload = self.browser.find_element_by_class_name('touclick-bgimg.touclick-reload.touclick-reload-normal')
reload.click()
self.main()
def main(self):
self.get_touclick_img()
self.move()
余票查询模块:
import requests
from urllib.parse import urlencode
class Check():
def __init__(self, date, start, end, purpose):
self.base_url = 'https://kyfw.12306.cn/otn/leftTicket/query?'
self.url = 'https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9018'
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
'Cookie': 'JSESSIONID=B709F9775E72BDED99B2EEBB8CA7FBB9; BIGipServerotn=1910046986.24610.0000; RAIL_EXPIRATION=1579188884851; RAIL_DEVIC'
}
self.date = date
self.start_station = start
self.end_station = end
if purpose == '学生':
self.purpose = '0X00'
else:
self.purpose = purpose
#查找出车站的英文简称,用于构造cookie、完整的余票查询链接
def look_up_station(self):
response1 = requests.get(self.url)
a = response1.text.split('@')
a.pop(0)
for each in a:
i = each.split('|')
if self.start_station == i[1]:
self.start_station = i[2]
elif self.end_station == i[1]:
self.end_station = i[2]
return [self.start_station, self.end_station]
def get_info(self):
start_end = self.look_up_station()
#构造请求参数
data = {
'leftTicketDTO.train_date':self.date,
'leftTicketDTO.from_station':start_end[0],
'leftTicketDTO.to_station':start_end[1],
'purpose_codes':self.purpose
}
url = self.base_url + urlencode(data)
response = requests.get(url, headers=self.headers)
json = response.json()
maps = json['data']['map']
count = 0 #用于对车次编号
for each in json['data']['result']:
count += 1
s = each.split('|')[3:]
info = {
'train':s[0],
'start_end':maps[s[3]] + '-' + maps[s[4]],
'time':s[5] + '-' + s[6],
'历时':s[7],
'一等座':s[27],
'二等座':s[28]
}
try:
#余票的结果有3种:有、一个具体的数字(如:18、6等)、无,判断如果余票是有或者一个具体的数字就直接输出对应的车次信息,然后返回
if info['二等座'] == '有' or int(info['二等座']):
print('[%d]' % count, info)
return count
except ValueError:
continue