链接:https://pan.baidu.com/s/18oE308_NVNPaCOACw_H5Hw?pwd=abc1
利用爬虫抓取双色球开奖号码,获取完整数据,简洁45行代码实现,更新时间2023-06-28
这是网上的数据,怎么将它爬取下来
它将只爬取最近30期的双色球开奖号码,并将结果写入到名为 "双色球开奖结果.csv" 的文件中。
import requests
import os
from bs4 import BeautifulSoup
def download(url, page):
html = requests.get(url).text
soup = BeautifulSoup(html, 'html.parser')
list = soup.select('div.ball_box01 ul li')
ball = []
for li in list:
ball.append(li.string)
write_to_excel(page, ball)
print(f"第{page}期开奖结果录入完成")
def write_to_excel(page, ball):
f = open('双色球开奖结果.csv', 'a', encoding='utf_8_sig')
f.write(f'第{page}期,{ball[0]},{ball[1]},{ball[2]},{ball[3]},{ball[4]},{ball[5]},{ball[6]}\n')
f.close()
def turn_page():
url = "http://kaijiang.500.com/ssq.shtml"
html = requests.get(url).text
soup = BeautifulSoup(html, 'html.parser')
pageList = soup.select("div.iSelectList a")
# 获取最近30期的页码
recent_pages = pageList[:30]
for p in recent_pages:
url = p['href']
page = p.string
download(url, page)
def main():
if os.path.exists('双色球开奖结果.csv'):
os.remove('双色球开奖结果.csv')
turn_page()
if __name__ == '__main__':
main()
生成的表格打开验证数据正确性
非常好,下面写出神经网络进行预测
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
# 读取数据
data = pd.read_csv('双色球开奖结果.csv')
# 提取特征和标签
features = data.iloc[:, 1:7] # 红色球特征
labels = data.iloc[:, 1:7] # 红色球标签
# 创建随机森林回归模型
model = RandomForestRegressor(n_estimators=100, random_state=1)
# 拟合模型
model.fit(features, labels)
# 预测下一期的红色球号码
next_features = model.predict(features.iloc[-1].values.reshape(1, -1))
next_features = np.round(next_features).astype(int)
# 打印预测的红色球号码
print("预测的红色球号码:", next_features)
这个预测方式好像有点简单了,弄一个多层感知机进行预测
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPRegressor
# 读取数据
data = pd.read_csv('双色球开奖结果.csv')
# 提取特征和标签
features = data.iloc[:, 1:7] # 红色球特征
labels = data.iloc[:, 1:7] # 红色球标签
# 创建多层感知机回归模型
model = MLPRegressor(hidden_layer_sizes=(100,), random_state=1)
# 拟合模型
model.fit(features, labels)
# 预测下一期的红色球号码
next_features = model.predict(features.iloc[[-1]])
next_features = np.round(next_features).astype(int)
# 打印预测的红色球号码
print("预测的红色球号码:", next_features)
这两个代码预测的号码不一样,可能是因为使用的模型不一样导致的。随便玩玩吧。
再做一个模拟双色球游戏,如果选择的号码中了,会显示绿色。
import tkinter as tk
import random
class DoubleColorBallGame:
def __init__(self, master):
self.master = master
self.master.title("双色球游戏")
self.master.geometry("600x400")
self.red_balls = []
self.blue_balls = []
self.selected_red_balls = []
self.selected_blue_ball = None
self.create_red_ball_buttons()
self.create_blue_ball_buttons()
self.create_draw_button()
self.create_clear_button()
self.create_result_label()
def create_red_ball_buttons(self):
red_frame = tk.Frame(self.master)
red_frame.pack(pady=10)
for i in range(1, 34):
button = tk.Button(red_frame, text=str(i), width=2, command=lambda i=i: self.select_red_ball(i))
button.grid(row=(i - 1) // 11, column=(i - 1) % 11, padx=2, pady=2)
self.red_balls.append(button)
def create_blue_ball_buttons(self):
blue_frame = tk.Frame(self.master)
blue_frame.pack(pady=10)
for i in range(1, 17):
button = tk.Button(blue_frame, text=str(i), width=2, command=lambda i=i: self.select_blue_ball(i))
button.grid(row=0, column=i - 1, padx=2, pady=2)
self.blue_balls.append(button)
def create_draw_button(self):
draw_frame = tk.Frame(self.master)
draw_frame.pack(pady=10)
draw_button = tk.Button(draw_frame, text="开奖", command=self.generate_draw_result)
draw_button.pack()
def create_clear_button(self):
clear_frame = tk.Frame(self.master)
clear_frame.pack(pady=10)
clear_button = tk.Button(clear_frame, text="清除", command=self.clear_selection)
clear_button.pack()
def create_result_label(self):
self.result_label = tk.Label(self.master, text="")
self.result_label.pack(pady=20)
def select_red_ball(self, number):
if number in self.selected_red_balls:
self.selected_red_balls.remove(number)
self.red_balls[number - 1].config(bg="SystemButtonFace")
else:
self.selected_red_balls.append(number)
self.red_balls[number - 1].config(bg="red")
def select_blue_ball(self, number):
if self.selected_blue_ball == number:
self.selected_blue_ball = None
self.blue_balls[number - 1].config(bg="SystemButtonFace")
else:
self.selected_blue_ball = number
self.blue_balls[number - 1].config(bg="blue")
def generate_draw_result(self):
draw_result = []
while len(draw_result) < 6:
number = random.randint(1, 33)
if number not in draw_result:
draw_result.append(number)
draw_result.sort()
draw_result.append(random.randint(1, 16))
self.result_label.config(
text="红球:" + " ".join(str(ball) for ball in draw_result[:6]) + "\n蓝球:" + str(draw_result[6]))
# 清除之前的标记
for red_ball in self.red_balls:
red_ball.config(bg="SystemButtonFace")
for blue_ball in self.blue_balls:
blue_ball.config(bg="SystemButtonFace")
# 框选选择的号码和开奖号码
for number in self.selected_red_balls:
if number in draw_result[:6]:
self.red_balls[number - 1].config(bg="green")
if self.selected_blue_ball is not None:
if self.selected_blue_ball == draw_result[6]:
self.blue_balls[self.selected_blue_ball - 1].config(bg="green")
def clear_selection(self):
self.selected_red_balls = []
self.selected_blue_ball = None
for red_ball in self.red_balls:
red_ball.config(bg="SystemButtonFace")
for blue_ball in self.blue_balls:
blue_ball.config(bg="SystemButtonFace")
self.result_label.config(text="")
if __name__ == "__main__":
root = tk.Tk()
game = DoubleColorBallGame(root)
root.mainloop()
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
# 读取数据
data = pd.read_csv('双色球开奖结果.csv')
# 提取号码数据,转换成PyTorch张量
numbers = data.iloc[:, 1:8].values.astype(float)
numbers = torch.tensor(numbers, dtype=torch.float32)
# 归一化数据
numbers /= 33.0 # 因为双色球号码范围是1-33
# 构建训练集和测试集
train_data = numbers[:-1, :6] # 使用前6期红球号码作为输入特征
train_red_balls = numbers[1:, :6] # 使用第2到第7期红球号码作为训练的目标值
train_blue_balls = numbers[1:, 6] # 使用第2到第7期蓝球号码作为训练的目标值
# 定义预测神经元模型
class PredictNeuronNet(nn.Module):
def __init__(self, input_dim):
super(PredictNeuronNet, self).__init__()
self.predict_red_balls = nn.Linear(input_dim, 6) # 输入维度为input_dim,输出维度为6,用于预测红球号码
self.predict_blue_ball = nn.Linear(input_dim, 1) # 输入维度为input_dim,输出维度为1,用于预测蓝球号码
def forward(self, x):
red_balls = self.predict_red_balls(x)
blue_ball = self.predict_blue_ball(x)
return red_balls, blue_ball
# 训练函数
def train_predict_neuron_model(model, criterion, optimizer, num_epochs):
for epoch in range(num_epochs):
red_balls, blue_ball = model(train_data)
loss_red = criterion(red_balls, train_red_balls) # 红球号码的损失
loss_blue = criterion(blue_ball.view(-1), train_blue_balls) # 蓝球号码的损失
loss = loss_red + loss_blue
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch + 1) % 10 == 0:
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
# 主函数
if __name__ == '__main__':
input_dim = 6 # 输入特征维度为6,即前6期红球号码
model = PredictNeuronNet(input_dim)
# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
# 训练模型
num_epochs = 1000
train_predict_neuron_model(model, criterion, optimizer, num_epochs)
# 使用模型进行预测
test_data = numbers[-1, :6] # 使用最后6期红球号码作为测试数据的输入
with torch.no_grad():
test_data = test_data.view(1, -1) # 将测试数据转换成(1, 6)的形状
red_balls, blue_ball = model(test_data)
# 将预测结果转换回原始范围
red_balls *= 33.0
blue_ball *= 33.0
print("Predicted numbers for the next period (Red Balls):")
print(red_balls)
print("Predicted number for the next period (Blue Ball):")
print(blue_ball)