IMDB二分类深度学习模型

# IMDB互联网电影数据库

# 对电影评论做情感分析

from keras.datasets import imdb

import numpy

from keras import models

from keras import layers

from keras import optimizers

from keras import losses

from keras import metrics

import matplotlib.pyplot as plt

#加载IMBD数据集

# 一、在平级文件夹下创建data文件夹,raw下载四个数据集(在imdb的基础上处理过的数据集)

# (train_data, train_labels),(test_data, test_labels) = imdb.load_data(num_words=10000)

# path指定文件imdb数据集,如果没有,回新建文件下载数据集

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(path="/Users/2650523664/Desktop/idb.npz", num_words=10000)

word_index = imdb.get_word_index()

reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in train_data[0]])

#准备数据

def vectorize_sequences(sequences, dimension=10000):

        results = numpy.zeros((len(sequences), dimension))

        for i, sequence in enumerate(sequences):

                results[i, sequence] = 1

        return results

x_train = vectorize_sequences(train_data)

x_test = vectorize_sequences(test_data)

y_train = numpy.asarray(train_labels).astype('float32')

y_test = numpy.asarray(test_labels).astype('float32')

#构建网络

model = models.Sequential()

model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))

model.add(layers.Dense(16, activation='relu'))

model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=['accuracy'])

#配置优化器

model.compile(optimizer=optimizers.RMSprop(lr=0.001),loss='binary_crossentropy',metrics=['accuracy'])

#损失和指标

model.compile(optimizer=optimizers.RMSprop(lr=0.001),loss=losses.binary_crossentropy,metrics=[metrics.binary_accuracy])

#验证集

x_val = x_train[:10000]

partial_x_train = x_train[10000:]

y_val = y_train[:10000]

partial_y_train = y_train[10000:]

#训练模型

model.compile(optimizer='rmsprop',

        loss='binary_crossentropy',

        metrics=['acc'])

history = model.fit(partial_x_train,

                partial_y_train,

                epochs=20,

                batch_size=512,

                validation_data=(x_val, y_val))

#绘制训话损失和验证损失

history_dict = history.history

loss_values = history_dict['loss']

val_loss_values = history_dict['val_loss']

epochs = range(1, len(loss_values) + 1)

plt.plot(epochs, loss_values, 'bo', label='Training loss')

plt.plot(epochs, val_loss_values, 'b', label='Validation loss')

plt.title('Training and validation loss')

plt.xlabel('Epochs')

plt.ylabel('Loss')

plt.legend()

plt.show()

#绘制训练精度和验证精度

plt.clf()#清空图像

acc = history_dict['acc']

val_acc = history_dict['val_acc']

plt.plot(epochs, acc, 'bo', label='Training acc')

plt.plot(epochs, val_acc, 'b', label='Validation acc')

plt.title('Training and validation accuracy')

plt.xlabel('Epochs')

plt.ylabel('Accuracy')

plt.legend()

plt.show()

model = models.Sequential()

model.add(layers.Dense(16,activation='relu',input_shape=(10000,)))

model.add(layers.Dense(16,activation='relu'))

model.add(layers.Dense(1,activation='sigmoid'))

model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=['accuracy'])

model.fit(x_train,y_train,epochs=4,batch_size=512)

results = model.evaluate(x_test, y_test)

#从头开始训练一个模型

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(x_train, y_train, epochs=4, batch_size=512)

results = model.evaluate(x_test, y_test)

print(results)

print(model.predict(x_test))


IMDB二分类深度学习模型_第1张图片

你可能感兴趣的:(IMDB二分类深度学习模型)