Digit Recognizer是Kaggle的入门题,我尝试下训练一个卷积神经网络来解决该问题,主要记录下写代码中遇到的一些问题。
代码:
import tensorflow as tf
import pandas as pd
from keras.models import Sequential, load_model
from keras.layers import Dense, Conv2D, Activation, MaxPool2D, Flatten, Dropout
from keras.callbacks import EarlyStopping
from keras.optimizers import adam
import numpy as np
from matplotlib import pyplot as plt
def reading_test_data():
test_data = pd.read_csv('training_data/test.csv')
x_test_data = test_data.values[:, :]
x_test_data = x_test_data.reshape(x_test_data.shape[0], 28, 28, 1)
return x_test_data
def reading_training_data():
training_data = pd.read_csv('training_data/train.csv')
x_train_data = training_data.values[:, 1:]
y_train_data = training_data.values[:, 0]
x_train_data = x_train_data.reshape(x_train_data.shape[0], 28, 28, 1)
x_train_data = x_train_data / 255
fix_y = np.zeros((y_train_data.shape[0], 10))
for i in range(y_train_data.shape[0]):
fix_y[i][y_train_data[i]] = 1
return x_train_data, fix_y
def train(x_train_data, y_train_data):
model = Sequential()
model.add(Conv2D(input_shape=(28, 28, 1), kernel_size=(3, 3), filters=32, padding='same', activation='relu'))
model.add(Conv2D(kernel_size=(3, 3), filters=32, padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
#optimizer = adam(lr=0.5, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='acc', patience=2)
model.fit(x_train_data, y_train_data, epochs=30, batch_size=3000,validation_split=0.2,callbacks=[early_stopping])
model.save('model/my_model.h5')
def predict():
x_test_data = reading_test_data()
model_predict = load_model('model/my_model.h5')
result = model_predict.predict(x_test_data, batch_size=1000)
final_result = np.zeros(result.shape[0])
for i in range(result.shape[0]):
for j in range(result.shape[1]):
if result[i][j] > 0.5:
final_result[i] = j
return final_result
def strat_train():
x_train_data, y_train_data = reading_training_data()
train(x_train_data, y_train_data)
def save_result(final_result):
print(final_result.shape[0])
x = range(1, final_result.shape[0] + 1)
DataFrame = pd.DataFrame({'ImageId': x, 'Label': final_result.astype(int)})
DataFrame.to_csv('result.csv', index=False, sep=',')
strat_train()
final_result = predict()
save_result(final_result)
print('finish')
正确率是98.614%。
主要记录下遇到的问题:
1、softmax函数输出的格式和最后提交的结果的格式不一样,需要自己转换下。
2、pandas.read_csv会自动把第一行当成列属性,第一行将不能使用,如果需要读取第一行,需要使用
pandas.read_csv("test.csv",hearder=None)
3、epochs 到30左右会有明显的过拟合现象,在测试集上的表现反而不如epochs为10的时候。
4、分类任务的loss function一般适合用cross_entropy,而回归问题的用MSE好一点。。