研究生教务需要将课程在HUSTOJ上所有学生的练习题打包存档。
有HUSTOJ管理员的账号
拉取所有的学生代码,按照
"student_id_submit_id"命名保存
import requests
import re
# 定义目标网页的URL
def saveOJ(number):
# 访问地址
url = "http://10.91.145.2/showsource.php?id={}".format(number)
print(url)
# 定义要设置的Cookie
# 在登陆之后从F12检查拿到cookies
cookies = {"PHPSESSID": "xxxxxxxxx"}
# 发送HTTP GET请求获取页面内容
response = requests.get(url, cookies=cookies)
# 检查是否成功获取页面内容
if response.status_code == 200:
# 使用response.text获取页面的文本内容
page_content = response.text
# 使用正则表达式提取 "User: 202121xxxxx" 后面的数字部分
# print(page_content)
match = re.search(r'User: (\d+)', page_content)
if match:
user_id = match.group(1)
html_filename = f"student_id_{user_id}_submit_id_{number}.html"
# 将内容保存到文件中
with open(html_filename+".html", "w", encoding="utf-8") as file:
file.write(page_content)
print("页面内容已保存到page_content.html文件中。")
else:
print("未找到匹配的用户ID。")
# 将内容保存到文件中
with open("not_fund_{iid}.html", "w", encoding="utf-8") as file:
file.write(page_content)
print("页面内容已保存到not_found.html文件中。")
else:
print(f"无法获取页面,状态码:{response.status_code}")
for number in range(155951,156576):
saveOJ(number)
import os
from openpyxl import load_workbook
# 获取当前目录
current_directory = os.getcwd()
subdirectory = "期末"
current_directory = os.path.join(current_directory, subdirectory)
# 读取Excel文件
excel_file_path = 'score.xlsx' # 替换为你的Excel文件路径
wb = load_workbook(filename=excel_file_path, read_only=True)
# 选择工作表
sheet = wb['总评'] # 替换为你的工作表名称
# 提取带有学生名字的那一列
for row in sheet.iter_rows(min_row=2, max_row=59, min_col=7, max_col=7):
# 获取单元格的值
target_value = cell = row[0].value
# 列出当前目录中的所有文件名
file_names = os.listdir(current_directory)
# 检查G2单元格的值是否在文件名中出现
if any(target_value in file_name for file_name in file_names):
print("")
# print(f'"{target_value}" 出现在当前目录的文件名中。')
else:
print(f'"{target_value}" 未出现在当前目录的文件名中。')
# 关闭Excel文件
wb.close()
在第一步拉取时,可能会包含混杂其他班的学生,因此需要去除
import os
import re
from openpyxl import load_workbook
# 获取当前目录
current_directory = os.getcwd()
subdirectory = "作业#3"
current_directory = os.path.join(current_directory, subdirectory)
# 读取Excel文件
excel_file_path = 'score.xlsx' # 替换为你的Excel文件路径
wb = load_workbook(filename=excel_file_path, read_only=True)
# 选择工作表
sheet = wb['总评'] # 替换为你的工作表名称
stu_ids = []
for row in sheet.iter_rows(min_row=2, max_row=59, min_col=6, max_col=6):
# 获取单元格的值
target_value = cell = row[0].value
stu_ids.append(target_value)
# 列出当前目录中的所有文件名
file_names = os.listdir(current_directory)
# 逐个排查
for file_name in file_names:
# 从file_name 中提取 id
match = re.search(r'student_id_(\d+)', file_name)
if match:
user_id_current = match.group(1)
# 检查user_id是否在xslx文档之中
if not any(user_id_current in uid for uid in stu_ids):
file_path = os.path.join(current_directory, file_name)
os.remove(file_path)
print(file_name + " should not appear, removed")
# 关闭Excel文件
wb.close()