财政收入影响因素分析和预测模型

描述:采用数据挖掘算法中人工神经网络算法,首先用Adaptive—Lasso方法找出相关性最大的因素,根据现有的数据建立神经网络模型,然后对未知属性做出预测。

代码:

import pandas as pd

def Data_pro(feature,data):
    data_train = data.loc[range(1994, 2014)].copy()  # 取2014年前的数据建模
    data_mean = data_train.mean()
    data_std = data_train.std()
    data_train = (data_train - data_mean) / data_std  # 数据标准化
    x_train = data_train[feature].as_matrix()  # 特征数据
    y_train = data_train['y'].as_matrix()  # 标签数据
    return data_mean,data_std,x_train,y_train

def Build_Net(data,data_mean,data_std,x_train,y_train,feature):
    from keras.models import Sequential
    from keras.layers.core import Dense, Activation

    model = Sequential()  # 建立模型
    model.add(Dense(input_dim=6, output_dim=12))
    model.add(Activation('relu'))  # 用relu函数作为激活函数,能够大幅提供准确度
    model.add(Dense(input_dim=12, output_dim=1))
    model.compile(loss='mean_squared_error', optimizer='adam')  # 编译模型
    model.fit(x_train, y_train, nb_epoch=10000, batch_size=16)  # 训练模型,学习一万次
    #model.save_weights(modelfile)  # 保存模型参数

    # 预测,并还原结果。
    x = ((data[feature] - data_mean[feature]) / data_std[feature]).as_matrix()
    data[u'y_pred'] = model.predict(x) * data_std['y'] + data_mean['y']
    print(data[u'y_pred'])
    return data
    #data.to_excel(outputfile)
    
def Draw_result(data):
    import matplotlib.pyplot as plt  # 画出预测结果图
    p = data[['y', 'y_pred']].plot(subplots=True, style=['b-o', 'r-*'])
    plt.show()

def main():
    inputfile = 'F:/Python/IDLE--python/BigDataAnalyze/chapter13-data/data1_GM11.xls'  # 灰色预测后保存的路径
    outputfile = '../data/revenue.xls'  # 神经网络预测后保存的结果
    modelfile = '../tmp/1-net.model'  # 模型保存路径
    feature = ['x1', 'x2', 'x3', 'x4', 'x5', 'x7']  # 特征所在列
    data = pd.read_excel(inputfile)  # 读取数据
    
    #数据处理,标准化,训练数据集
    data_mean, data_std, x_train, y_train = Data_pro(feature,data)
    
    #预测模型建立
    data = Build_Net(data,data_mean,data_std,x_train,y_train,feature)
    
    #画出预测结果图
    Draw_result(data)

if __name__ == '__main__':
    main()

你可能感兴趣的:(数据分析)