import requests
import time
from PIL import Image
import json
import re
print("正在获取验证码......")
session = requests.session()
session.get("http://jwc.swjtu.edu.cn/service/login.html")
r = session.get("http://jwc.swjtu.edu.cn/vatuu/GetRandomNumberToJPEG?test=" + str(int(time.time())))
with open("yzm.jpg", "wb+") as f:
f.write(r.content)
image = Image.open(r"yzm.jpg")
image.show()
yzm = input("请输入验证码:")
username=input("请输入学号:")
password=input("请输入密码:")
sendmsg = {
'username': username,
'password': password,
'ranstring': yzm,
}
login_header = {
'Referer': 'http://jwc.swjtu.edu.cn/service/login.html',
'Origin': 'http://jwc.swjtu.edu.cn',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36',
}
r = session.post("http://jwc.swjtu.edu.cn/vatuu/UserLoginAction", data=sendmsg, headers=login_header)
json = json.loads(r.text)
#print(json['loginMsg'])
sendmsg = {
'url': 'http://jwc.swjtu.edu.cn/vatuu/UserExitAction&returnUrl',
'returnUrl': '',
'loginMsg': json['loginMsg']
}
login_header = {
'Referer': 'http://jwc.swjtu.edu.cn/vatuu/StudentScoreInfoAction?setAction=studentMarkUseProgram',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
}
r = session.post("http://jwc.swjtu.edu.cn/vatuu/UserLoadingAction", data=sendmsg, headers=login_header)
r1 = session.get(
"http://jwc.swjtu.edu.cn/vatuu/StudentScoreInfoAction?setAction=studentScoreQuery&viewType=studentScore&orderType=submitDate&orderValue=desc",
headers=login_header)
#print('\n\n\n')
html = r1.content.decode('utf-8').replace('\n', '').replace('\t', '').replace('\r', '')
key1 = r'(.*?)
'
key2 = r'(.*?) '
key3 = r'(.*?) '
base_re = re.compile(key1) # 预编译
title_re = re.compile(key2) # 预编译
data_re = re.compile(key3) # 预编译
title = []
data = []
for i in re.findall(base_re, html):
for n in re.findall(title_re, i):
title.append(n)
for m in re.findall(data_re, i):
data.append(m)
with open('score.txt', 'w+') as f:
for i in range(len(title)):
f.write('%s' % title[i].ljust(20))
f.write('\n')
num = 0
for n in range(len(data)):
f.write('%s' % str(data[n]).strip().ljust(20))
num += 1
if num % 15 == 0:
f.write('\n')
ps:中英文宽度不同,对不齐,,,,将就将就,,,暂时没学操作excel。。懒。。。。。
貌似可以自己写判断中英文,,,,,懒。。。。。
date:2021.9.12
修复了点小bug
没注意到课程名称是 两个数字,,,,,
顺带更新下 excel版本 ,加了点错误警告,自动打开excel,删除验证码图标等(我的pytesseract不知道咋的识别不出来,想试试百度的api,,,懒,,,,emmm,手工输入也挺好,,,)
import requests
import time
from PIL import Image
import json
import re
import xlwt
import os
def set_style(name, height, bold=0):
style = xlwt.XFStyle()
font = xlwt.Font()
font.name = name
font.colour_index = 0
font.height = height
font.bold = bold
style.font = font
return style
print("正在获取验证码......")
session = requests.session()
session.get("http://jwc.swjtu.edu.cn/service/login.html")
r = session.get("http://jwc.swjtu.edu.cn/vatuu/GetRandomNumberToJPEG?test=" + str(int(time.time())))
with open("yzm.jpg", "wb+") as f:
f.write(r.content)
image = Image.open(r"yzm.jpg")
image.show()
yzm = input("请输入验证码:")
username = input("请输入学号:")
password = input("请输入密码:")
sendmsg = {
'username': username,
'password': password,
'ranstring': yzm,
}
login_header = {
'Referer': 'http://jwc.swjtu.edu.cn/service/login.html',
'Origin': 'http://jwc.swjtu.edu.cn',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36',
}
r = session.post("http://jwc.swjtu.edu.cn/vatuu/UserLoginAction", data=sendmsg, headers=login_header)
json = json.loads(r.text)
print(json['loginMsg'])
if json['loginMsg'][0:4] == "登录成功":
sendmsg = {
'url': 'http://jwc.swjtu.edu.cn/vatuu/UserExitAction&returnUrl',
'returnUrl': '',
'loginMsg': json['loginMsg']
}
login_header = {
'Referer': 'http://jwc.swjtu.edu.cn/vatuu/StudentScoreInfoAction?setAction=studentMarkUseProgram',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
}
r = session.post("http://jwc.swjtu.edu.cn/vatuu/UserLoadingAction", data=sendmsg, headers=login_header)
r1 = session.get(
"http://jwc.swjtu.edu.cn/vatuu/StudentScoreInfoAction?setAction=studentScoreQuery&viewType=studentScore&orderType=submitDate&orderValue=desc",
headers=login_header)
# print('\n\n\n')
html = r1.content.decode('utf-8').replace('\n', '').replace('\t', '').replace('\r', '')
key1 = r'(.*?)
'
key2 = r'(.*?) '
key3 = r'(.*?) '
base_re = re.compile(key1) # 预编译
title_re = re.compile(key2) # 预编译
data_re = re.compile(key3) # 预编译
title = []
data = []
for i in re.findall(base_re, html):
for n in re.findall(title_re, i):
title.append(n)
for m in re.findall(data_re, i):
data.append(m)
# 创建一个excel文件
print('开始写入excel')
f = xlwt.Workbook()
# 创建一个名为学生的sheet
sheet1 = f.add_sheet('成绩', cell_overwrite_ok=True)
# 写第一行
row = 0
_list = 0
for i in range(0, len(title)):
sheet1.write(row, i, title[i].strip(), set_style('Times New Roman', 220, True))
# 写数据
row = 1
for n in range(len(data)):
sheet1.write(row, _list, data[n].strip(), set_style('Times New Roman', 220, True))
_list += 1
if _list == 15:
_list = 0
row += 1
f.save('score.xls')
os.startfile('score.xls')
os.remove("yzm.jpg")
print('写入完毕,3s后自动关闭')
time.sleep(3)
else:
os.remove("yzm.jpg")
print('登录失败,请重新尝试,3s后自动关闭')
time.sleep(3)
再顺带写个打包教程(怕自己忘了)
在.py 文件下,打开cmd,输入
pyinstaller -F 文件名.py -i (路径)图片名.ico -n 打包后的名字
当然直接输路径更好,图片一定要.ico 在线转换网址
如果用到输入输出啥的,不需要控制台小黑窗,可以加 -w
提示:如果没有填 课程评价,是没法爬出来的,,,
date:2021.9.13
author:_zs_dawn
鸽子永不迟到