这里爬取数据使用request的库,使用get/post接口请求来做数据的获取即可。
数据获取一般有静态页面数据获取、动态数据获取、伪装请求数据获取等等方式,获取数据的具体代码如下:
import json
import requests
class GetData():
def __init__(self):
self.web_url = "http://www.cwl.gov.cn/cwl_admin/front/cwlkj/search/kjxx/findDrawNotice?name=ssq&issueCount=100"
self.header = {
"Referer": "http://www.cwl.gov.cn/ygkj/wqkjgg/ssq/",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"
}
def get_data(self, clolr_type):
ball_list = []
try:
reponse = requests.get(self.web_url, headers=self.header)
content = reponse.text
data = json.loads(content).get("result")
for x in data:
line_str = json.dumps(x)
line_dict = json.loads(line_str)
if "red" == clolr_type:
temp_list = line_dict["red"].split(",")
for num in temp_list:
ball_list.append(int(num))
elif "blue" == clolr_type:
ball_list.append(int(line_dict["blue"]))
else:
print("color type error")
print(str(line_dict["code"]) + " : " + str(line_dict["red"]) + " : " + line_dict["blue"])
except IOError:
print("get data error")
return ball_list
将获取的数据做各种解析和分析,一般会用到各种工具库类,比如json等,在解析过程中会用到列表、词典、字符串等等各种语法数据的解析,具体可以看python的语法API文档:https://www.runoob.com/python/python-tutorial.html,我解析数据的代码如下(主要就是统计红色球和蓝色球出现的统计数据):
class ParseData():
# 分析结果
def __init__(self, max_range):
self.max_range = max_range
def parse_data(self, source_data):
frequencies = []
for blue_value in range(1, self.max_range + 1):
blue_cnt = source_data.count(blue_value)
frequencies.append(blue_cnt)
return frequencies
这里我只是将数据做了展示,这里展示会有各种类库,比如matplotlib、pygal等等,各种数据展示方式各有优劣,具体我展示数据的代码如下:
import pygal
class ShowData():
def __init__(self, title, x_title, y_title, tips):
self.title = title
self.x_title = x_title
self.y_title = y_title
self.tips = tips
def show_data(self, x_data, y_data, file_name):
hist = pygal.Bar()
hist.title = self.title
hist.x_labels = x_data
hist.x_title = self.x_title
hist.y_title = self.y_title
hist.add(self.tips, y_data)
hist.render_to_file(file_name)
from show_data import ShowData
from get_data import GetData
from parse_data import ParseData
class Main():
def __init__(self):
self.red_max_num = 33
self.blue_max_num = 16
def run(self):
# 获取数据
get_red_ball_data = GetData()
get_blue_ball_data = GetData()
red_ball = get_red_ball_data.get_data("red")
blue_ball = get_blue_ball_data.get_data("blue")
# 分析数据
red_parse_data = ParseData(self.red_max_num)
blue_parse_data = ParseData(self.blue_max_num)
red_frequencies = red_parse_data.parse_data(red_ball)
blue_frequencies = blue_parse_data.parse_data(blue_ball)
# 展示分析数据的结果
red_show_data = ShowData("Results of Red Ball", "Result Of Red", "Red Ball Frequency of Result", "Red Count")
blue_show_data = ShowData("Results of Blue Ball", "Result Of Blue", "Blue Ball Frequency of Result", "Blue Count")
red_show_data.show_data(range(1, self.red_max_num + 1), red_frequencies, "result/red_ball_result.svg")
blue_show_data.show_data(range(1, self.blue_max_num + 1), blue_frequencies, "result/blue_ball_result.svg")
if __name__ == "__main__":
main = Main()
main.run()