利用BP神经网络进行数据预测(附工程源码)

一、源码地址及使用说明

如果你是个非常暴躁的人,不想听任何废话,那么点下面的链接。

https://github.com/KANADEM/Data_Prediction

利用BP神经网络进行数据预测(附工程源码)_第1张图片

下载完成之后直接点击bat文件就可以看到你想要的结果。

如果你稍微有一些耐心的话,下面红框中的文件夹可以不需要下载,直接利用PyCharm打开项目即可,但相关的包需要你自己安装,或者你可以一并下载,将工程的Interpreter指向这个文件夹下的python.exe。

利用BP神经网络进行数据预测(附工程源码)_第2张图片

如果你更有一些耐心希望听听代码每一部分是做什么的自己用的时候应该如何修改请往下看。

二、源码解析

工程目录是这样的:

../

    main.py                         ------程序入口

    DataLoad.py                 ------装载数据

    DataPrediction.py         ------数据预测

    ModelTrain.py               ------模型训练

接下来解释每个文件的内容。

main.py

import DataLoad as dataload
import ModelTrain as modeltrain
import DataPrediction as dataprediction

def main():
    data_train = dataload.load_data_train()
    #装载训练数据
    modeltrain.Train_Model(data_train)
    #利用训练数据训练模型
    data_pre = dataload.load_data_pre()
    #装载预测使用的数据
    pre_result = dataprediction.Predict_Data(data_pre)
    #返回预测结果
    print("真实值为 930291366.85 预测结果为:%f" % (pre_result))
    
if __name__ == '__main__':
    main()

DataLoad.py

import numpy as np

def load_data_train():
    #直接声明一个矩阵,有人可能会奇怪为什么不使用读取文件的方法,原因是这样的:
    #因为实际工作中从文件中获取数据的时候比较少,这个项目的数据也是存储在数据库中。
    #声明为矩阵的方式也是为了匹配数据库读写模块,这个是为了模拟数据库读写模块传回的数据,
    #如果你要读写文件的话只要按照这个矩阵的样子就可以了
    #PS:这个矩阵并不限制行和列的长度,只要最后一列是需要预测的因变量,而之前的列都为特征就可以使用。
    data_train = np.mat([[14703,250,66,19199.52226,7,3024,3023,13,6972,7857,0.020381,0.00012,325133301.2],
[10730,216,61,8595.571496,9,2567,2567,8,4854,6077,0.020908,0.000186,246801419.4],
[18045,299,67,8109.965457,10,3475,3474,13,8926,10570,0.019244,0.000221,471650020.4],
[14800,287,66,18676.16881,8,3111,3111,8,8074,9260,0.020117,0.000216,384866969.4],
[19048,331,73,6674.933471,9,4003,3999,14,8987,11094,0.018887,0.000292,402189511.2],
[18770,297,70,10204.26802,9,3340,3340,13,9009,10746,0.015938,0.000229,412389219.4],
[18860,291,68,6391.200317,9,3394,3394,13,8889,10515,0.015139,0.000144,706241564.2],
[20465,296,67,4703.447989,10,2907,2907,12,9664,11540,0.013304,0.000114,448913353.3],
[22049,318,74,6291.984947,10,3136,3134,10,10332,12972,0.014398,0.000101,859860672],
[21420,289,67,4468.156265,9,3804,3804,12,9571,13134,0.015067,0.000038,702937200.9],
[22769,332,71,5671.411026,8,4552,4552,13,11217,14300,0.01722,0.000032,891841206.9],
[25555,352,77,14103.39354,9,4986,4986,15,11076,15361,0.016333,0.000025,807333345.8],
[21660,348,72,6798.407224,8,4645,4644,13,11295,13824,0.018439,0.000136,1022155587],
[12962,257,59,6968.417939,8,2692,2692,8,7840,8920,0.015465,0.000106,785414512.3],
[19969,320,70,9668.555094,9,3561,3560,14,9958,11791,0.015304,0.000168,899059075.1],
[21046,350,67,7905.089868,9,4548,4546,14,10547,12853,0.016932,0.000117,882063418.2],
[23352,355,61,6198.590871,9,4853,4852,16,11313,13748,0.01391,0.00011,826347166.8],
[20001,344,68,3635.853103,9,4012,4012,13,10726,12076,0.014101,0.000079,892400700.1],
[23448,345,60,6139.800395,10,4721,4721,19,11114,13654,0.013637,0.000105,841292119.3],
[25113,307,65,6252.929862,9,4508,4508,17,11421,14700,0.01278,0.000075,928412336.9],
[22759,334,69,9013.093411,9,3382,3381,13,11012,14682,0.01101,0.000082,1061414667],
[22858,333,68,8890.838623,9,3257,3256,18,10762,14731,0.010491,0.000114,738365009]])
    return data_train

def load_data_pre():
    #这里的声明和上面的原因相似,只是为了模拟数据库读写模块的输入
    #注意只能预测一组数据,如果想预测多组写个循环就好了
    data_pre = np.array([[ 18599,324,65,13116,9,2829,2829,12,10020,12509,0.013007,0.000136]])
    #开始进行数据标准化处理,做数据标准化的原因之前的博客也写到过,这里再写一下吧。
    #数据标准化的原因:不同的特征的大小范围是不一样的,将特征标准化,也便于不同单位或量级的指标能够进行比较和加权。
    data_mean = data_pre.mean()
    data_std = data_pre.std()
    data_pre = (data_pre - data_mean) / data_std
    #标准化结束,返回数据
    return data_pre

ModelTrain.py

import numpy as np
from keras.models import Sequential
from keras.layers.core import Dense, Activation
    #模型训练
def Train_Model(data_train):
    modelfile = './modelweight'#模型存储文件
    y_mean_std = './y_mean_std.txt' #数据标准化过程中的参数,用于还原预测值。
    data_train = np.matrix(data_train)#将训练集格式化为矩阵。
    data_mean = np.mean(data_train, axis=0)#数据标准化
    data_std = np.std(data_train, axis=0)
    data_train = (data_train - data_mean) / data_std
    #分割自变量和因变量
    x_train = data_train[:, 0:(data_train.shape[1] - 1)]
    y_train = data_train[:, data_train.shape[1] - 1]
    #定义模型
    model = Sequential()
    #为方便使用我已经将input_dim取值为自变量的数量
    model.add(Dense(x_train.shape[1], input_dim=x_train.shape[1], kernel_initializer="uniform"))
    model.add(Activation('relu'))
    model.add(Dense(1, input_dim=x_train.shape[1]))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(x_train, y_train, epochs=4000, batch_size=x_train.shape[0])
    #存储模型权重,注意这里只是保存的权重,所以之后要重新定义模型。
    model.save_weights(modelfile)
    #记录因变量标准化处理过程中的数据
    y_mean = data_mean[:, data_train.shape[1] - 1]
    y_std = data_std[:, data_train.shape[1] - 1]
    print("训练完毕")
    f = open(y_mean_std, "w")
    mean_std = str(y_mean.astype(str)) + " " + str(y_std.astype(str))
    mean_std = mean_std.replace("[", "")
    mean_std = mean_std.replace("]", "")
    mean_std = mean_std.replace("'", "")
    #存储因变量标准化处理过程中的数据
    f.write(mean_std)

DataPrediction.py

from keras.models import Sequential
from keras.layers.core import Dense, Activation
#读取因变量标准化过程中的参数
def load_y():
    f = open("./y_mean_std.txt", "r")
    y_mean = f.read();
    y_mean = y_mean.split(" ")
    f.close()
    return y_mean

def Predict_Data(data_pre):
    #定义模型
    model = Sequential()
    model.add(Dense(data_pre.shape[1], input_dim=data_pre.shape[1], kernel_initializer="uniform"))
    model.add(Activation('relu'))
    model.add(Dense(1, input_dim=data_pre.shape[1]))
    model.compile(loss='mean_squared_error', optimizer='adam')
    #装载模型权重文件
    model.load_weights('./modelweight')
    mean_std = load_y()
    #利用之前标准化的数据还原预测值
    pre_result = model.predict(data_pre) * float(mean_std[1]) + float(mean_std[0])
    return pre_result

到这里工程就完事了,其实总共不到100行代码,非常的简洁,只要更换掉矩阵数据就可以做你自己的预测了。

你可能感兴趣的:(01_人工智能)