python爬彩票大乐透历史数据+预测测试...

转自:https://blog.csdn.net/weixin_42062762/article/details/87658044
若侵权请联系作者删除,谢谢~

原文有些许小错误,我修改了一下。

第一部分:用均值和方差预测开奖号码

from bs4 import BeautifulSoup as bs
import requests
import os
import pandas as pd
import numpy as np
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

def get_url():
    data_1 = []
    for i in range(1, 97):
        url = 'http://www.lottery.gov.cn/historykj/history_'+ str(i) + '.jspx?_ltype=dlt'
        data = requests.get(url).text
        data = bs(data[data.find(''):], "html.parser").find_all('tr')
        for content in data:
            number = content.get_text().strip().replace('\r', '').replace('\t', '').replace('\n', ' ')
            with open('data_recent.csv', 'a') as f:
                f.write(number+'\n')
    f.close()

def huatu():
    data = pd.read_csv(r'D:\PycharmProjects\pyQT_sports_lottery\data_recent.csv', sep=' ', header=None, error_bad_lines=False).values
    data = data[:, 1:]
    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')
    a = np.random.randint(0, 5, size=100)
    for i in range(1, 8):
        z = data[:100, i - 1]
        y = np.full_like(a, i)
        x = range(100)
        ax.plot(x, y, z)
    ax.legend()
    # ax.set_xlim=[0,8]
    plt.tight_layout()
    # plt.savefig('img_3d.png')
    plt.show()


class preddd(object):
    def __init__(self):
        self.data = pd.read_csv(r'D:\PycharmProjects\pyQT_sports_lottery\simple_data.csv', sep=' ', header=None,
                           error_bad_lines=False).values
        # self.data = pd.read_csv(r"D:\PycharmProjects\pyQT_sports_lottery\data_recent.csv", sep=' ', header=None)
        self.data = self.data[np.argsort(self.data[:, 0])] # 数据反过来
        # self.data = self.data.sort_index(ascending=False).values  # 数据反过来
        self.data = self.data[:, 1:]
    def fengbu(self, i):
        abb = {}
        for l in range(7):
            for n in range(1, 36):
                abb[l, n] = []
                for qiu in range(i - 1):
                    if self.data[qiu][l] == n:
                        a = self.data[qiu + 1][l] - self.data[qiu][l]
                        abb[l, n].append(a)  # 一个大字典为{(l,n):a}
        dict1 = {}
        dict2 = {}  # 每个数字增大的概率
        add1 = {}  # 增大的次数
        reduce = {}  # 减小的次数
        da = {}
        jian = {}
        da1 = []
        jian1 = []
        dict21 = []
        for n, l in abb.items():
            add1[n] = 0
            reduce[n] = 0
            da[n] = 0
            jian[n] = 0
            for m in l:
                if m > 0:
                    add1[n] += 1  # 统计往期为这个数字时下次增大次数
                elif m < 0:
                    reduce[n] += 1  # 减小次数

            dict2[n] = round(add1[n] / (reduce[n] + add1[n] + 1), 4)
            # 得到前面那张概率图 减小和它相反
            for m in set(l):
                if m > 0:
                    dict1[n, m] = (round(l.count(m) / add1[n], 4)) * m
                    da[n] += dict1[n, m]
                    '''
                    这是基于首先判断当前期每个数字增大或减小概率哪个大
                    数值大的进一步细化,即将具体增大或减小的值得概率当
                    成权重再分别与之对应值相乘,在全部相加为下一次预测值

                    '''
                elif m < 0:
                    dict1[n, m] = (round(l.count(m) / reduce[n], 4)) * m
                    jian[n] += dict1[n, m]
                elif m == 0:
                    dict1[n, m] = 0  # 两次数字不变
        for s1 in range(7):
            for q1 in range(1, 36):
                n = da[(s1, q1)]
                m = jian[(s1, q1)]
                l = dict2[(s1, q1)]
                da1.append(n)  # 原来是字典现在要将其弄成矩阵
                jian1.append(m)
                dict21.append(l)
        # for n, m, l in zip(da.values(), jian.values(), dict2.values()):
        #     da1.append(n)  # 原来是字典现在要将其弄成矩阵
        #     jian1.append(m)
        #     dict21.append(l)
        da1 = np.array(da1).reshape(7, 35)
        jian1 = np.array(jian1).reshape(7, 35)
        dict21 = np.array(dict21).reshape(7, 35)
        # shuan
        return da1, jian1, dict21

    def predict(self, i):
        for red in range(7):
            print(round(self.data[:, red].mean(), 4), round(self.data[:, red].std(), 4))
            # 当前均值
            # 方差
        da1, jian1, dict21 = self.fengbu(i)
        predict = np.zeros(7)
        for l in range(7):
            for m in range(1, 36):
                if self.data[i][l] == m:
                    if dict21[l][m - 1] > 0.5:
                        print(dict21[l][m - 1], da1[l][m - 1], self.data[i][l])
                        # 每期每个数字增大或减小概率,权重和,每个数字值
                        predict[l] = self.data[i][l] + da1[l][m - 1]
                    elif dict21[l][m - 1] < 0.5:
                        print(dict21[l][m - 1], jian1[l][m - 1], self.data[i][l])
                        predict[l] = self.data[i][l] + jian1[l][m - 1]
        print("第 %d 次,结果是:%s" % (i, self.data[i]))
        print("所以预测下一次是:%s" % predict)
        print("真正下一次是:%s" % self.data[i + 1])
        print('*' * 50)


if __name__ == '__main__':
    # get_url()
    # huatu()
    a = preddd()
    a.predict(1916)

第二部分:简单用LSTM预测开奖号码

import pandas as pd
import numpy as np 
import os
# from keras.layers import LSTM, Dense
from keras import layers
from keras import Model
from keras import Input
from keras.optimizers import RMSprop
import matplotlib.pyplot as plt
class predit_rnn(object):
    def __init__(self):
        self.data = pd.read_csv(r'D:\PycharmProjects\pyQT_sports_lottery\simple_data.csv', sep=' ', header=None,
                           error_bad_lines=False).values
        # data = pd.read_csv(r"D:\PycharmProjects\pyQT_sports_lottery\data_recent.csv", sep=' ', header=None)
        self.data = self.data[np.argsort(self.data[:, 0])] # 数据反过来
        # data = data.sort_index(ascending=False).values  # 数据反过来
        self.data = self.data[:, 1:]
        
        mean = self.data[:1500].mean(axis=0)
        std = self.data[:1500].std(axis=0)
        # data = data.astype(np.float64)
        self.data1 = self.data.copy()
        self.data1 = self.data1.astype(np.float64)
        for i in range(len(self.data1)):
            # self.data1[i] = self.data1[i].astype(np.float64)
            self.data1[i] = self.data1[i] - mean
            self.data1[i] = self.data1[i] / std
        # data1 -= mean
        # data1 /= std

        self.train_data = self.data1[:1400]
        self.train_data = np.expand_dims(self.train_data, axis=1)
        self.val_data = self.data1[1400:1550]
        self.val_data = np.expand_dims(self.val_data, axis=1)
        self.test_data = self.data1[1550:len(self.data) - 1]
        self.test_data = np.expand_dims(self.test_data, axis=1)

        red1_labels = self.data[:, 0]
        red2_labels = self.data[:, 1]
        red3_labels = self.data[:, 2]
        red4_labels = self.data[:, 3]
        red5_labels = self.data[:, 4]
        blue1_labels = self.data[:, 5]
        blue2_labels = self.data[:, 6]

        self.train_labels_1 = red1_labels[1:1401]
        self.train_labels_2 = red2_labels[1:1401]
        self.train_labels_3 = red3_labels[1:1401]
        self.train_labels_4 = red4_labels[1:1401]
        self.train_labels_5 = red5_labels[1:1401]
        self.train_labels_6 = blue1_labels[1:1401]
        self.train_labels_7 = blue2_labels[1:1401]

        self.val_labels_1 = red1_labels[1401:1551]
        self.val_labels_2 = red2_labels[1401:1551]
        self.val_labels_3 = red3_labels[1401:1551]
        self.val_labels_4 = red4_labels[1401:1551]
        self.val_labels_5 = red5_labels[1401:1551]
        self.val_labels_6 = blue1_labels[1401:1551]
        self.val_labels_7 = blue2_labels[1401:1551]

        self.test_labels_1 = red1_labels[1551:]
        self.test_labels_2 = red2_labels[1551:]
        self.test_labels_3 = red3_labels[1551:]
        self.test_labels_4 = red4_labels[1551:]
        self.test_labels_5 = red5_labels[1551:]
        self.test_labels_6 = blue1_labels[1551:]
        self.test_labels_7 = blue2_labels[1551:]

    def train_predict(self):
        post_input = Input(shape=(None, 7), name='post_input')
        lstm = layers.LSTM(150, dropout=0.2, recurrent_dropout=0.2, activation='relu', return_sequences=True)(
            post_input)
        lstm1 = layers.LSTM(250, dropout=0.2, recurrent_dropout=0.2, activation='relu')(lstm)
        x = layers.Dense(360, activation='relu')(lstm1)
        x = layers.Dense(250, activation='relu')(x)
        x = layers.Dense(250, activation='relu')(x)
        x = layers.Dense(250, activation='relu')(x)
        x = layers.Dense(250, activation='relu')(x)
        x = layers.Dense(250, activation='relu')(x)
        x = layers.Dense(140, activation='relu')(x)
        x = layers.Dense(70, activation='relu')(x)
        # x=layers.Dropout(0.3)(x)
        red1_predict = layers.Dense(1, name='red1')(x)
        red2_predict = layers.Dense(1, name='red2')(x)
        red3_predict = layers.Dense(1, name='red3')(x)
        red4_predict = layers.Dense(1, name='red4')(x)
        red5_predict = layers.Dense(1, name='red5')(x)
        blue1_predict = layers.Dense(1, name='blue1')(x)
        blue2_predict = layers.Dense(1, name='blue2')(x)
        self.model = Model(post_input, [red1_predict, red2_predict, red3_predict, red4_predict, red5_predict, blue1_predict,
                                   blue2_predict])
        self.model.compile(optimizer=RMSprop(1e-4), loss=['mse', 'mse', 'mse', 'mse', 'mse', 'mse', 'mse'],
                      metrics=['acc', 'acc', 'acc', 'acc', 'acc', 'acc', 'acc'])
        self.history = self.model.fit(self.train_data, [self.train_labels_1, self.train_labels_2, self.train_labels_3, self.train_labels_4, self.train_labels_5,
                                                   self.train_labels_6, self.train_labels_7],
                            batch_size=20, epochs=50, validation_data=(
                self.val_data, [self.val_labels_1, self.val_labels_2, self.val_labels_3, self.val_labels_4, self.val_labels_5,
                                self.val_labels_6, self.val_labels_7]))

    def huatu(self):
        loss = self.history.history['loss']
        loss = loss[3:]
        val_loss = self.history.history['val_loss']
        val_loss = val_loss[3:]
        epochs = range(1, len(loss) + 1)
        plt.figure()
        plt.plot(epochs, loss, 'b', color='r', label='Training loss')
        plt.plot(epochs, val_loss, 'b', label='Validation loss')
        plt.title('Training and validation loss')
        plt.legend()
        plt.show()



if __name__ == '__main__':
    a = predit_rnn()
    a.train_predict()
    predit_data = a.data1[1914]
    print(a.data[1915])
    predit_data = predit_data.reshape(1,1,7)
    predict = a.model.predict(predit_data)
    print(predict)
    # a.huatu()

你可能感兴趣的:(python,大乐透)