MLP回归预测

#!/usr/bin/env python

coding: utf-8

In[41]:

import pandas as pd, numpy as np
from sklearn.metrics import mean_squared_error
from math import sqrt

In[7]:

df_train=pd.read_csv(‘train_data_regression.csv’,encoding=‘gbk’)
#df_test=pd.read_csv(‘test_data.csv’)
df_test=pd.read_csv(‘test_result.csv’)
#df_test_y=pd.read_csv(’’)

def clean_data(df):
df=df.replace(‘NIL’,np.nan)
df=df.replace(’/0’,np.nan)
df=df.replace(’ n u l l null null’,np.nan)
for c in [c for c in df.columns if c not in [‘SDATE’,‘小区名称’,‘WeakCoverage’]]:
df[c]=df[c].astype(np.float)
display(df.isnull().sum().to_frame().T)
df=df.fillna(df.mean()) # 使用均值填充
return df

df_train=clean_data(df_train)
df_test=clean_data(df_test)

X_columns=[c for c in df_train.columns if c not in [‘SDATE’,‘小区名称’,‘WeakCoverage’,‘RRC连接态最大用户数(个数)’]]
y_column=‘RRC连接态最大用户数(个数)’

In[8]:

x = df_train[[‘小区名称’]].values #X是(,1)维列向量
y = df_train[‘小区下行业务量(GB)’].values #y是(
, )行向量
x

In[11]:

y

In[12]:

y[:, np.newaxis]

In[25]:

from sklearn.preprocessing import StandardScaler

ss_X = StandardScaler()
ss_y = StandardScaler()
train_X_ss = ss_X.fit_transform(df_train[X_columns])
test_X_ss = ss_X.transform(df_test[X_columns])

train_y_ss = ss_y.fit_transform(df_train[[y_column]]).flatten()
test_y_ss = ss_y.transform(df_test[[y_column]]).flatten()
#X_train_ss, X_test_ss, y_train, y_test=train_test_split(X_ss, df_train[y_column], test_size=0.25, random_state=1)

In[18]:

train_X_ss

In[19]:

train_y_ss

In[ ]:

df_train.head(3)

## MLP 多层感知机

In[20]:

import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
print(“TensorFlow:{}\tKeras:{}”.format(tf.version,keras.version))

In[21]:

import matplotlib.pyplot as plt
def show_train_history(train_history,train,validation):
plt.plot(train_history.history[train])
plt.plot(train_history.history[validation])
plt.title(‘Train History’)
plt.ylabel(train)
plt.xlabel(‘Epoch’)
plt.legend([‘train’, ‘validation’], loc=‘upper left’)
plt.show()

In[22]:

def get_sequental(layer_units,input_dim,output_dim,drop_out=0,drop_out_layer=1):
model=Sequential()
layer_1=Dense(units=layer_units,activation=‘relu’,input_dim=input_dim) # 其它参数用默认值
layer_2=Dense(units=output_dim) # 激活函数输出每列标签值
model.add(layer_1) #第一隐藏层
if drop_out>0:
model.add(Dropout(drop_out))
model.add(Dense(units=layer_units,activation=‘relu’)) #第二隐藏层
if drop_out_layer == 2:
model.add(Dropout(drop_out))
model.add(Dense(units=layer_units,activation=‘relu’)) #第三隐藏层
if drop_out_layer == 3:
model.add(Dropout(drop_out))
model.add(layer_2) #输出层

return model

In[23]:

model=get_sequental(len(X_columns)*2,len(X_columns),1, drop_out=0.5, drop_out_layer=2)
model.summary()
#model.compile(loss=‘categorical_crossentropy’, optimizer=‘adam’, metrics=[‘accuracy’])
model.compile(optimizer=‘adam’, loss=‘mean_squared_error’, metrics=[‘mae’])

In[26]:

train_history=model.fit(train_X_ss,train_y_ss,validation_split=0.2, epochs=250, batch_size=100)

In[30]:

train_history.history

In[31]:

show_train_history(train_history,‘loss’,‘val_loss’)

In[32]:

show_train_history(train_history,‘mae’,‘val_mae’)

In[33]:

score = model.evaluate(test_X_ss, test_y_ss, verbose=1)
print("\nTest score:", score[0])
print(‘Test accuracy:’, score[1])

In[34]:

score

In[38]:

y_pred_ss=model.predict(test_X_ss)
y_pred_ss

In[39]:

y_pred = ss_y.inverse_transform(y_pred_ss)
y_pred

In[42]:

calculate RMSE

rmse = sqrt(mean_squared_error(y_pred, df_test[y_column]))
print(‘Test RMSE: %.3f’ % rmse)

In[ ]:

In[ ]:

In[ ]:

你可能感兴趣的:(MLP回归预测)