转自:https://blog.csdn.net/weixin_42062762/article/details/87658044
若侵权请联系作者删除,谢谢~
原文有些许小错误,我修改了一下。
from bs4 import BeautifulSoup as bs
import requests
import os
import pandas as pd
import numpy as np
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
def get_url():
data_1 = []
for i in range(1, 97):
url = 'http://www.lottery.gov.cn/historykj/history_'+ str(i) + '.jspx?_ltype=dlt'
data = requests.get(url).text
data = bs(data[data.find(''):], "html.parser").find_all('tr')
for content in data:
number = content.get_text().strip().replace('\r', '').replace('\t', '').replace('\n', ' ')
with open('data_recent.csv', 'a') as f:
f.write(number+'\n')
f.close()
def huatu():
data = pd.read_csv(r'D:\PycharmProjects\pyQT_sports_lottery\data_recent.csv', sep=' ', header=None, error_bad_lines=False).values
data = data[:, 1:]
fig = plt.figure(figsize=(10, 10))
ax = fig.gca(projection='3d')
a = np.random.randint(0, 5, size=100)
for i in range(1, 8):
z = data[:100, i - 1]
y = np.full_like(a, i)
x = range(100)
ax.plot(x, y, z)
ax.legend()
# ax.set_xlim=[0,8]
plt.tight_layout()
# plt.savefig('img_3d.png')
plt.show()
class preddd(object):
def __init__(self):
self.data = pd.read_csv(r'D:\PycharmProjects\pyQT_sports_lottery\simple_data.csv', sep=' ', header=None,
error_bad_lines=False).values
# self.data = pd.read_csv(r"D:\PycharmProjects\pyQT_sports_lottery\data_recent.csv", sep=' ', header=None)
self.data = self.data[np.argsort(self.data[:, 0])] # 数据反过来
# self.data = self.data.sort_index(ascending=False).values # 数据反过来
self.data = self.data[:, 1:]
def fengbu(self, i):
abb = {}
for l in range(7):
for n in range(1, 36):
abb[l, n] = []
for qiu in range(i - 1):
if self.data[qiu][l] == n:
a = self.data[qiu + 1][l] - self.data[qiu][l]
abb[l, n].append(a) # 一个大字典为{(l,n):a}
dict1 = {}
dict2 = {} # 每个数字增大的概率
add1 = {} # 增大的次数
reduce = {} # 减小的次数
da = {}
jian = {}
da1 = []
jian1 = []
dict21 = []
for n, l in abb.items():
add1[n] = 0
reduce[n] = 0
da[n] = 0
jian[n] = 0
for m in l:
if m > 0:
add1[n] += 1 # 统计往期为这个数字时下次增大次数
elif m < 0:
reduce[n] += 1 # 减小次数
dict2[n] = round(add1[n] / (reduce[n] + add1[n] + 1), 4)
# 得到前面那张概率图 减小和它相反
for m in set(l):
if m > 0:
dict1[n, m] = (round(l.count(m) / add1[n], 4)) * m
da[n] += dict1[n, m]
'''
这是基于首先判断当前期每个数字增大或减小概率哪个大
数值大的进一步细化,即将具体增大或减小的值得概率当
成权重再分别与之对应值相乘,在全部相加为下一次预测值
'''
elif m < 0:
dict1[n, m] = (round(l.count(m) / reduce[n], 4)) * m
jian[n] += dict1[n, m]
elif m == 0:
dict1[n, m] = 0 # 两次数字不变
for s1 in range(7):
for q1 in range(1, 36):
n = da[(s1, q1)]
m = jian[(s1, q1)]
l = dict2[(s1, q1)]
da1.append(n) # 原来是字典现在要将其弄成矩阵
jian1.append(m)
dict21.append(l)
# for n, m, l in zip(da.values(), jian.values(), dict2.values()):
# da1.append(n) # 原来是字典现在要将其弄成矩阵
# jian1.append(m)
# dict21.append(l)
da1 = np.array(da1).reshape(7, 35)
jian1 = np.array(jian1).reshape(7, 35)
dict21 = np.array(dict21).reshape(7, 35)
# shuan
return da1, jian1, dict21
def predict(self, i):
for red in range(7):
print(round(self.data[:, red].mean(), 4), round(self.data[:, red].std(), 4))
# 当前均值
# 方差
da1, jian1, dict21 = self.fengbu(i)
predict = np.zeros(7)
for l in range(7):
for m in range(1, 36):
if self.data[i][l] == m:
if dict21[l][m - 1] > 0.5:
print(dict21[l][m - 1], da1[l][m - 1], self.data[i][l])
# 每期每个数字增大或减小概率,权重和,每个数字值
predict[l] = self.data[i][l] + da1[l][m - 1]
elif dict21[l][m - 1] < 0.5:
print(dict21[l][m - 1], jian1[l][m - 1], self.data[i][l])
predict[l] = self.data[i][l] + jian1[l][m - 1]
print("第 %d 次,结果是:%s" % (i, self.data[i]))
print("所以预测下一次是:%s" % predict)
print("真正下一次是:%s" % self.data[i + 1])
print('*' * 50)
if __name__ == '__main__':
# get_url()
# huatu()
a = preddd()
a.predict(1916)
第二部分:简单用LSTM预测开奖号码
import pandas as pd
import numpy as np
import os
# from keras.layers import LSTM, Dense
from keras import layers
from keras import Model
from keras import Input
from keras.optimizers import RMSprop
import matplotlib.pyplot as plt
class predit_rnn(object):
def __init__(self):
self.data = pd.read_csv(r'D:\PycharmProjects\pyQT_sports_lottery\simple_data.csv', sep=' ', header=None,
error_bad_lines=False).values
# data = pd.read_csv(r"D:\PycharmProjects\pyQT_sports_lottery\data_recent.csv", sep=' ', header=None)
self.data = self.data[np.argsort(self.data[:, 0])] # 数据反过来
# data = data.sort_index(ascending=False).values # 数据反过来
self.data = self.data[:, 1:]
mean = self.data[:1500].mean(axis=0)
std = self.data[:1500].std(axis=0)
# data = data.astype(np.float64)
self.data1 = self.data.copy()
self.data1 = self.data1.astype(np.float64)
for i in range(len(self.data1)):
# self.data1[i] = self.data1[i].astype(np.float64)
self.data1[i] = self.data1[i] - mean
self.data1[i] = self.data1[i] / std
# data1 -= mean
# data1 /= std
self.train_data = self.data1[:1400]
self.train_data = np.expand_dims(self.train_data, axis=1)
self.val_data = self.data1[1400:1550]
self.val_data = np.expand_dims(self.val_data, axis=1)
self.test_data = self.data1[1550:len(self.data) - 1]
self.test_data = np.expand_dims(self.test_data, axis=1)
red1_labels = self.data[:, 0]
red2_labels = self.data[:, 1]
red3_labels = self.data[:, 2]
red4_labels = self.data[:, 3]
red5_labels = self.data[:, 4]
blue1_labels = self.data[:, 5]
blue2_labels = self.data[:, 6]
self.train_labels_1 = red1_labels[1:1401]
self.train_labels_2 = red2_labels[1:1401]
self.train_labels_3 = red3_labels[1:1401]
self.train_labels_4 = red4_labels[1:1401]
self.train_labels_5 = red5_labels[1:1401]
self.train_labels_6 = blue1_labels[1:1401]
self.train_labels_7 = blue2_labels[1:1401]
self.val_labels_1 = red1_labels[1401:1551]
self.val_labels_2 = red2_labels[1401:1551]
self.val_labels_3 = red3_labels[1401:1551]
self.val_labels_4 = red4_labels[1401:1551]
self.val_labels_5 = red5_labels[1401:1551]
self.val_labels_6 = blue1_labels[1401:1551]
self.val_labels_7 = blue2_labels[1401:1551]
self.test_labels_1 = red1_labels[1551:]
self.test_labels_2 = red2_labels[1551:]
self.test_labels_3 = red3_labels[1551:]
self.test_labels_4 = red4_labels[1551:]
self.test_labels_5 = red5_labels[1551:]
self.test_labels_6 = blue1_labels[1551:]
self.test_labels_7 = blue2_labels[1551:]
def train_predict(self):
post_input = Input(shape=(None, 7), name='post_input')
lstm = layers.LSTM(150, dropout=0.2, recurrent_dropout=0.2, activation='relu', return_sequences=True)(
post_input)
lstm1 = layers.LSTM(250, dropout=0.2, recurrent_dropout=0.2, activation='relu')(lstm)
x = layers.Dense(360, activation='relu')(lstm1)
x = layers.Dense(250, activation='relu')(x)
x = layers.Dense(250, activation='relu')(x)
x = layers.Dense(250, activation='relu')(x)
x = layers.Dense(250, activation='relu')(x)
x = layers.Dense(250, activation='relu')(x)
x = layers.Dense(140, activation='relu')(x)
x = layers.Dense(70, activation='relu')(x)
# x=layers.Dropout(0.3)(x)
red1_predict = layers.Dense(1, name='red1')(x)
red2_predict = layers.Dense(1, name='red2')(x)
red3_predict = layers.Dense(1, name='red3')(x)
red4_predict = layers.Dense(1, name='red4')(x)
red5_predict = layers.Dense(1, name='red5')(x)
blue1_predict = layers.Dense(1, name='blue1')(x)
blue2_predict = layers.Dense(1, name='blue2')(x)
self.model = Model(post_input, [red1_predict, red2_predict, red3_predict, red4_predict, red5_predict, blue1_predict,
blue2_predict])
self.model.compile(optimizer=RMSprop(1e-4), loss=['mse', 'mse', 'mse', 'mse', 'mse', 'mse', 'mse'],
metrics=['acc', 'acc', 'acc', 'acc', 'acc', 'acc', 'acc'])
self.history = self.model.fit(self.train_data, [self.train_labels_1, self.train_labels_2, self.train_labels_3, self.train_labels_4, self.train_labels_5,
self.train_labels_6, self.train_labels_7],
batch_size=20, epochs=50, validation_data=(
self.val_data, [self.val_labels_1, self.val_labels_2, self.val_labels_3, self.val_labels_4, self.val_labels_5,
self.val_labels_6, self.val_labels_7]))
def huatu(self):
loss = self.history.history['loss']
loss = loss[3:]
val_loss = self.history.history['val_loss']
val_loss = val_loss[3:]
epochs = range(1, len(loss) + 1)
plt.figure()
plt.plot(epochs, loss, 'b', color='r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
if __name__ == '__main__':
a = predit_rnn()
a.train_predict()
predit_data = a.data1[1914]
print(a.data[1915])
predit_data = predit_data.reshape(1,1,7)
predict = a.model.predict(predit_data)
print(predict)
# a.huatu()