#加入忽略
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# 1.加载数据集
from keras.datasets import imdb
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words = 10000)
# 2.使用网络第一层 或者 编码方式进行张量化
# 此处使用了编码方式将训练数据的数字序列编码为二进制矩阵,即将整数序列转换为张量
import numpy as np
# 定义训练数据序列转换为二进制矩阵的函数
def vectorize_sequences(sequences, dimension = 10000):
results = np.zeros((len(sequences), dimension))
for i , sequence in enumerate(sequences):
results[i, sequence] = 1
return results
# 训练数据和测试数据向量化
x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)
# 训练标签和测试标签向量化
y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')
# 3.构建神经网络,选择神经网络模型,确定层数,并且确定使用哪种激活函数和哪种层
# 这里使用的是3个全连接层,激活函数分别是前两个"relu",最后一层使用"sigmoid"
from keras import models
from keras import layers
from keras import optimizers
from keras import regularizers
def build_model_1():
model = models.Sequential()
model.add(layers.Dense(16, activation= 'relu', input_shape = (10000,)))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(16, activation= 'relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation= 'sigmoid'))
# 4.留出验证集, 流出10000个样本作为验证集
x_val = x_train[:10000]
partial_x_train = x_train[10000:]
y_val = y_train[:10000]
partial_y_train = y_train[10000:]
model_1 = build_model_1()
history_1 = model_1.fit(x_train,
y_train,
epochs= 12,
batch_size= 512,
validation_data= (x_test, y_test))
import matplotlib.pyplot as plt
history_dict_1 = history_1.history
loss = history_dict_1['loss']
val_loss = history_dict_1['val_loss']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, partial_x_train, 'bo', label = 'Traing loss')
plt.plot(epochs, val_loss_values_1, 'b', label = 'Validation loss')
plt.title('Traing and Smaller Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid()
plt.show()
plt.clf()
acc = history_dict_1['accuracy']
val_acc = history_dict_1['val_accuracy']
plt.plot(epochs, acc, 'bo', label = 'Traing accuracy')
plt.plot(epochs, val_acc, 'b', label = 'Validation accuracy')
plt.title('Traing and Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
由图像可得,当轮次到达第3次后,开始过拟合,所以在测试数据上,可以在第三轮次就停止训练。
model_1 = build_model_1()
history_1 = model_1.fit(x_train,
y_train,
epochs= 3,
batch_size= 512,
validation_data= (x_test, y_test))
results = model_1.evaluate(x_test, y_test)
print(results)
# 使用训练好的网络在新数据上生成预测结果
predict = model_1.predict(x_test)
print(predict)
最终准确率达到88.8%,后续可以通过调参的方式继续对此模型进行优化。