1.爬虫获取数据
import requests
import pandas as pd
import xlwt
import openpyxl
from time import sleep
from tqdm import tqdm
def get_page(n):
url = 'https://api.kaoyan.cn/pc/adjust/adjustList'
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36"
}
body = {
"degree_type":"",
"keyword":"",
"level1":"",
"level2":"",
"limit":9000,
"page":1,
"year":2022,
}
req = requests.post(url,headers=header,data=body).json()
content = req["data"]['data']
order = ['tj_id','spe_id','school_id','year','recruit_number','school_name','special_code', 'special_name','recruit_type_name','depart_name', 'province_name']
pf = pd.DataFrame(list(content))
pf = pf[order]
pf.fillna(' ', inplace=True)
file_path = pd.ExcelWriter('F:\\考研数据\\调剂数据.xlsx')
pf.to_excel(file_path, encoding='utf-8', index=False)
file_path.save()
#print(pf)
if __name__ == '__main__':
for i in tqdm(range(11)):
get_page(i)
2.将拿到的数据进行数据分析
import openpyxl,pprint
print("Opening workbook...... ")
file = "F:\\考研数据\\调剂数据.xlsx"
file2 = "F:\\考研数据\\result1.py"
file3 = "F:\\考研数据\\result2.py"
wb = openpyxl.load_workbook(file)
sheet = wb.active
Data1 = {}
for row in range(2,sheet.max_row+1):
#招生省份
Province = sheet['K' + str(row)].value
#print(Province)
#各个省份招生人数
Recruit_number = sheet['E' + str(row)].value
#大学
School_name = sheet['F' + str(row)].value
Data1.setdefault(School_name, {})
Data1[School_name].setdefault(Province,{'tract':0, 'Recruit_number':0})
Data1[School_name][Province]['tract'] += 1
Data1[School_name][Province]['Recruit_number'] += int(Recruit_number)
Data2 = {}
for row in range(2,sheet.max_row+1):
#招生省份
Province = sheet['K' + str(row)].value
#print(Province)
#各个省份招生人数
Recruit_number = sheet['E' + str(row)].value
#大学
School_name = sheet['F' + str(row)].value
Data2.setdefault('', {})
Data2[''].setdefault(Province,{'Recruit_number':0,'NUM':0})
Data2[''][Province]['NUM'] += 1
Data2[''][Province]['Recruit_number'] += int(Recruit_number)
print("Writing workbook......")
resultFile1 = open(file2,'w')
resultFile2 = open(file3,'w')
resultFile1.write(pprint.pformat(Data1))
resultFile2.write(pprint.pformat(Data2))
resultFile1.close()
resultFile2.close()
3.可视化处理并生成HTML页面
#!/usr/bin/python
# -*- coding: GBK -*-
import sys
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Pie
from pyecharts.charts import Page, Grid
//将拿到的数据放到同一文件下导进来
from 数据包 import result2
content = result2
python_data = content.con['']
university_num = []
student_num = []
university_name = []
print(type(university_name))
for i in python_data.keys():
university_name.append(i)
for key in python_data:
university_num.append(python_data[key]['NUM'])
student_num.append(python_data[key]['Recruit_number'])
l = list(zip(university_name, student_num))
c = (
Pie()
.add(
"",
l,
radius=["40%", "55%"],
label_opts=opts.LabelOpts(
position="outside",
formatter="{a|{a}}{abg|}\n{hr|}\n {b|{b}: }{c} {per|{d}%} ",
background_color="#eee",
border_color="#aaa",
border_width=1,
border_radius=4,
rich={
"a": {"color": "#999", "lineHeight": 22, "align": "center"},
"abg": {
"backgroundColor": "#e3e3e3",
"width": "100%",
"align": "right",
"height": 20,
"borderRadius": [4, 4, 0,0 ],
},
"hr": {
"borderColor": "#aaa",
"width": "100%",
"borderWidth": 5,
"height": 0,
},
"b": {"fontSize": 20, "lineHeight": 35},
"per": {
"color": "#eee",
"backgroundColor": "#334455",
"padding": [2, 4],
"borderRadius": 5,
},
},
),
)
.set_global_opts(
title_opts=opts.TitleOpts(title="大学在各省补录人数"),
legend_opts=opts.LegendOpts(padding=20, pos_left=200),
)
.render("pie_rich_label.html")
)
4.socket同步修改网页数据
'''
作者 : 一盆萝卜丁
时间 : 2022/12
'''
# -*-coding:utf-8 -*-
import socket
server_html = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_html.bind(("127.0.0.1", 8080))
server_html.listen(10)
while True:
conn, addr = server_html.accept()
msg = conn.recv(1024 * 12)
print(conn)
# 以字节读取数据的权限去打开html_pro.html文件
file_html = open("pie_rich_label.html", "rb")
# 读取文件内容
data = file_html.read()
# 下面这句话必须写,关于http协议的内容,以后说
conn.sendall(bytes("HTTP/1.1 201 OK\r\n\r\n", "utf-8"))
# 发送读取的内容
conn.sendall(data)
conn.close()