HPU 图书馆占座 python

HPU 图书馆占座

  • 引言
  • 基于推荐博客的不同
    • main.py
    • config.ini
    • 其他配置
  • 对于程序运行时间的个人看法
        • 个人开发工具:

引言

俗话说吃水不忘挖井人,在这给一位大佬博主引流一波,我的代码思想都是基于这篇文章,所有的配置和环境都是以这篇博客为基础。
点这里速看

重点:跟着上面这个博客配置好环境,进行下面工作才有意义

本章博客代码实现的功能
多个用户预约同一个位置的不同时间段

基于推荐博客的不同

main.py

"""自动占位置"""
from __future__ import absolute_import

import ast
import configparser
import logging
import datetime
import time
import re
import requests

import smtplib
from email.mime.text import MIMEText
from email.utils import formataddr

from bs4 import BeautifulSoup
from retrying import retry
# 定义全局变量
seat = 0        # 定义座位的全局变量
msg = ''        # 定义信息的全局变量

# 固定链接,不建议修改
URL_MAP = {
    'HOST': 'https://uia.hpu.edu.cn/sso/user',
    'SEAT-login':'https://uia.hpu.edu.cn/cas/login?service=http://seatlib.hpu.edu.cn/cas',
    'SEAT': 'http://seatlib.hpu.edu.cn/selfRes',

    'Librarymap':'http://seatlib.hpu.edu.cn/map',
    'Library':'http://seatlib.hpu.edu.cn',

    'GetFloor':'http://seatlib.hpu.edu.cn/mapBook/ajaxGetFloor',
    'GetRoom':'http://seatlib.hpu.edu.cn/mapBook/ajaxGetRooms',
    'GetSeat':'http://seatlib.hpu.edu.cn/mapBook/getSeatsByRoom',
    'LOGIN': 'https://uia.hpu.edu.cn/cas/login',
    
    'CAPTCHA': 'https://uia.hpu.edu.cn/sso/apis/v2/open/captcha?date=',
 
    'API': 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=[ak]&client_secret=[sk]',
    'REQUEST_URL': 'https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic'
}

# 请求头,可以按需更改
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/78.0.3904.108 Safari/537.36 QIHU 360EE'
}


class ReadConfig:
    """读取配置文件类"""

    def __init__(self, filename):
        """配置文件读取初始化"""
        self.config = configparser.ConfigParser()
        self.config.read(filename, encoding='utf-8')

    def get_basic(self, param):
        """获取Basic配置"""
        value = self.config.get('Basic', param)
        return value

    def get_api(self, param):
        """获取API配置"""
        value = self.config.get('API', param)
        return value

    def get_user(self):
        """获取User配置"""
        value = self.config.items('User')
        return value
    def get_postemail(self, param):
        value = self.config.get('ToEmail', param)
        return value
class Bot:
    """自动占座类"""

    def __init__(self, config_file):
        """初始化"""
        logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        self.logger = logging.getLogger('LibraryOfseat')
        self.logger.info('程序启动中...')

        # 读取配置文件
        self.data = ReadConfig(config_file)
        self.library = self.data.get_basic('LIBRARY_ADDR')
        self.libfloor = self.data.get_basic('LIBRARY_FLOOR')
        self.libroom = self.data.get_basic('LIBRARY_ROOM')
        self.libseat = self.data.get_basic('LIBRARY_SEAT')

        self.api_key = self.data.get_api('API_KEY')
        self.secret_key = self.data.get_api('SECRET_KEY')

        self.sendemail = self.data.get_postemail('SENDER_EMAIL')
        self.emailpass = self.data.get_postemail('SENDER_PASS')
        self.receivemail = self.data.get_postemail('RECEIVE_EMAIL')   

        # 默认预约明天的座位
        today = datetime.date.today()

        tomorrow = today + datetime.timedelta(days=1)
        tomorrow = tomorrow.isoformat()

        # 登录并选座
        for user in self.data.get_user():
            self.sessions = requests.session()
            # 登录成功继续执行
            if(self.login(user) == 0):    
                judge = self.post(user ,tomorrow)
                self.sessions.close()
                # 没有找到对应座位退出程序
                if judge == -2:
                    break

        # 发送邮箱
        self.postmail(tomorrow)

    @retry(stop_max_attempt_number=3)
    def login(self, user):
        """登录"""
        global msg
        self.logger.info('%s开始登录中...', user[0])
        # 登录页面
        req = self.sessions.get(URL_MAP['LOGIN'], headers=HEADERS, timeout=5)
        soup = BeautifulSoup(req.content, features="html.parser")
        form = soup.find_all('input', class_='for-form')

        # 验证码获取
        captcha = URL_MAP['CAPTCHA'] + str(int(round(time.time() * 1000)))
        req = self.sessions.get(captcha, headers=HEADERS, timeout=5)
        img = ast.literal_eval(req.content.decode('utf-8'))['img']
        self.logger.info('验证码获取成功!')

        # 验证码识别
        api = URL_MAP['API'].replace('[ak]', self.api_key).replace('[sk]', self.secret_key)
        response = requests.get(api)
        access_token = response.json()['access_token']
        request_url = URL_MAP['REQUEST_URL'] + "?access_token=" + access_token
        api_headers = {'content-type': 'application/x-www-form-urlencoded'}
        response = requests.post(request_url, data={'image': img}, headers=api_headers, timeout=5)
        captcha = response.json()['words_result'][0]['words']
        captcha = str(captcha.replace('=?', '')).split('+')
        captcha = sum(map(int, captcha))
        self.logger.info('验证码识别成功!')

        # 构造表单 
        token = ast.literal_eval(req.content.decode('utf-8'))['token']
        account = ast.literal_eval(user[1])
        formdata = {
            'username': account[0],
            'password': account[1],
            '_eventId': 'submit',
            'token': token
        }
        formdata.update({f['name']: f['value'] for f in form})
        formdata.update({'captcha': captcha})
        
        # 登录
        req = self.sessions.post(URL_MAP['LOGIN'], headers=HEADERS, data=formdata, timeout=5)
        soup = BeautifulSoup(req.content, features='html.parser')
        errormes = soup.find('span', id='errormes')     # 记录错误信息
        if errormes:
            self.logger.error(errormes['value'])
            msg = msg + '{user}登录失败:{reaction}\n'.format(user = user[0], reaction = errormes['value'])
            return -1

        # 检测登录状态
        url = self.sessions.get(URL_MAP['HOST'], headers=HEADERS, timeout=5).url
        if url != URL_MAP['HOST']:
            self.logger.error('登录失败, 未知错误!')
            msg = msg + '{user}{reaction}\n'.format(user = user[0], reaction = '登录失败, 未知错误!')
            return -1
        else:
            self.logger.info('使用账号密码登录成功!')
                    
        return 0
    # 获取位置信息
    @retry(stop_max_attempt_number=3)
    def seat(self, tomorrow):
        global msg
        req = self.sessions.get(URL_MAP['SEAT-login'], headers=HEADERS, timeout=5)
        soup = BeautifulSoup(req.content, features='html.parser')  
        # 判断是否开发第二天的预约系统
        try:
            soup.find(text = tomorrow)
        except:
            msg = msg + '该时间暂时没有开放{senconddate}的预约\n'.format(senconddate = tomorrow)
            return -1
        # 获取图书馆编号
        try:
            Pbuild = soup.find(text = self.library)
        except:
            msg = msg + '没有找到对应的图书馆,请核实----图书馆格式:{yourlib}\n'.format(yourlib = self.library)
            return -1
        building = Pbuild.parent.attrs['value']
        payload = {'id':building}     
        req = self.sessions.get(URL_MAP['GetFloor'], headers=HEADERS, timeout=5,params=payload)
        soup = BeautifulSoup(req.content, features='html.parser')
        for k in soup.find_all('a'):
            if k.string == self.libfloor:
                floor = k['value']
                break
        # 判断floor是否被赋值
        try:
            floor
        except:
            self.logger.info('没有找到对应的楼层位置')
            msg = msg + '没有找到对应的楼层位置,请核实----楼层格式:{yourfloor}\n'.format(yourfloor = self.libfloor)
            return -1
        # 查找该楼层的与之对应的房间
        payload = {'building':building,'floor':floor,'onDate':tomorrow}
        req = self.sessions.get(URL_MAP['GetRoom'], headers=HEADERS, timeout=5,params=payload)
        soup = BeautifulSoup(req.content, features='html.parser')
        text = soup.text
        dict1 = eval(text)
        list1 = dict1['rooms']
        for k in list1:
            if k['name'] == self.libroom:
                room = k['id']
                break
        # 判断room是否被赋值
        try:
            room
        except:
            self.logger.info('没有找到对应的自习房间')
            msg = msg + '没有找到对应楼层的自习房间,请核实----自习房间格式:{yourroom}\n'.format(yourroom = self.libroom)
            return -1       
        # 查找该房间的对应位置
        payload = {'room':room,'date':tomorrow}
        req = self.sessions.get(URL_MAP['GetSeat'], headers=HEADERS, timeout=5,params=payload)
        soup = BeautifulSoup(req.content, features='html.parser')
        for k in soup.find_all('a'):
            if k.string == self.libseat:
                seat = k.parent.attrs['id']
                break
        try:
            # 对座位号进行更改
            seatnum = re.sub( "\D" , "", seat)
            return int(seatnum)
        except:
            self.logger.info('没有找到对应的自习位置')
            msg = msg + '没有找到对应自习房间的自习位置,请核实----自习位置格式:{yourseat}\n'.format(yourseat = self.libseat)
            return -1   
        
    @retry(stop_max_attempt_number=3)
    def post(self, user, tomorrow):
        """占座"""
        global msg
        # 调用seat全局变量,判断是否被赋值
        global seat 
        self.logger.info('%s开始占座中...', user[0])
        req = self.sessions.get(URL_MAP['SEAT-login'], headers=HEADERS, timeout=5)
        soup = BeautifulSoup(req.content, features='html.parser')
    
        try:
            req = self.sessions.get(URL_MAP['Librarymap'], headers=HEADERS, timeout=5)
            soup = BeautifulSoup(req.content, features='html.parser')        
        except:
            self.logger.info('%s需要更改密码', user[0])
            msg = msg + '{user}需要登录到{web}更改密码,占座失败\n'.format(user = user[0], web = URL_MAP['LOGIN'])
            return -1

        # 查找参数,构建表单
        syn_token = soup.find('input',id='SYNCHRONIZER_TOKEN')
        token = syn_token['value']
        syn_uri = soup.find('input',id='SYNCHRONIZER_URI')
        uri = syn_uri['value']

        # 计算开始时间和结束时间
        account = ast.literal_eval(user[1])
        starttime = account[2].split(':') 
        start = int(starttime[0])*60+int(starttime[1])
        endtime = account[3].split(':') 
        end = int(endtime[0])*60+int(endtime[1])

        if seat == 0:
            seat = self.seat(tomorrow)
            if seat == -1:
                return -2

        formdata = {
            'SYNCHRONIZER_TOKEN': token,	
            'SYNCHRONIZER_URI': uri,	    
            'date': tomorrow,		
            'seat': seat,			
            'start': start,			
            'end': end,		        
            'authid': -1				
        }
 
        req = self.sessions.post(URL_MAP['SEAT'], headers=HEADERS, data=formdata, timeout=5)
        try:
            soup = BeautifulSoup(req.content, features='html.parser')
            htmltext = soup.find('div', class_='layoutSeat').find('dl')
            msg = msg + '{user}的预约结果:\n'.format(user = user[0]) + str(htmltext).split(')[0]
            return 0
        except:
            msg = msg + '发生未知错误,{user}占座失败\n'.format(user = user[0])
            return -1

    def postmail(self, tomorrow):
        global msg
        try:
            msg = MIMEText(msg, 'html', 'utf-8')                   # 填写邮件内容plain
            msg['From'] = formataddr(["sir", self.sendemail])   # 括号里的对应发件人邮箱昵称、发件人邮箱账号
            msg['To'] = formataddr(["receiver", self.receivemail])  # 括号里的对应收件人邮箱昵称、收件人邮箱账号
            msg['Subject'] = 'HPU自动抢座----{ordertime}'.format(ordertime = tomorrow)                     # 邮件的主题,也可以说是标题

            server = smtplib.SMTP_SSL("smtp.qq.com", 465)           # 发件人邮箱中的SMTP服务器
            server.login(self.sendemail, self.emailpass)            # 括号中对应的是发件人邮箱账号、邮箱授权码
            server.sendmail(self.sendemail, [self.receivemail, ], msg.as_string())  # 括号中对应的是发件人邮箱账号、收件人邮箱账号、发送邮件
            server.quit()  # 关闭连接
        except:
            raise Exception('发送邮箱异常!')

if __name__ == '__main__':
    BOT = Bot('config.ini')

config.ini

config.ini 里面的账号有两个条件需要满足:

  1. 能登录这个链接.
  2. 能登录seat软件(主要是用于手动签到)
[Basic]
;图书馆:eg(南校区第二图书馆)
LIBRARY_ADDR = library
;楼层:eg(南校区第二图书馆6层)
LIBRARY_FLOOR = floor
;房间 eg(6层社会科学类借阅区()) 
LIBRARY_ROOM = room
;位置 eg(002) 注意座位号一定要三位
LIBRARY_SEAT = seat

[API]
;百度智能云 文字识别OCR AK
API_KEY = your_api_key
;百度智能云 文字识别OCR SK
SECRET_KEY = your_secret_key

[User]
;昵称: ['账号', '密码', '使用起始时间', '结束时间']
sample_user1: ['username1', 'password', '8:00', '12:00']
sample_user2: ['username2', 'password', '15:00', '19:00']

[ToEmail]
;发信人的邮箱账号
SENDER_EMAIL = sender email
;发送邮箱的授权码
SENDER_PASS = your send email of authorization code
;收件人的邮箱账号
RECEIVE_EMAIL = your recipient email

关于邮箱那段配置可以自行百度解决的,我在这不赘述了(eg:百度 QQ邮箱授权码)

其他配置

自动启动时间设置为每日晚上七点半,其余步骤还有代码都是一样的,如果用以上代码抢占一些不热门的座位到这就可以结束了,毕竟比手动抢座位快点。

对于程序运行时间的个人看法

本章博客的代码有点冗余,需要改进,当你放到Coding运行时,你会发现时间运行稍长,可能你程序开始运行了,座位已经被占了(本人遇到过,为了图书馆占座和谐,本章代码只是第一次的雏形代码),在改善时间方面,抛砖引玉,给出几个思考的方向:

  1. python中可以多进程运行代码,具体实现可以百度学习一下
  2. 卡时间,让程序提前一分钟运行,把所有的东西(eg:data)都准备好,等到七点半立刻把表单数据提交出去
  3. 理解session和cookie的作用,这里是改善代码的关键,和上面两个思路结合(待完善)

个人开发工具:

  1. VS code;
  2. VS(主要用于测试python并行测试,VS code应该也可以,个人能力水平有限,没找到相关文章)
  3. postman(主要用于测试网页post、get请求)
  4. 浏览器自带的F12(查看URL和表单传递数据等)

你可能感兴趣的:(python,python,爬虫)