运行环境
- python 3.6.7
- visual studio code
- w10
源代码
###层:深度学习的基础组件
'''from keras import layers
layer = layers.Dense(32,input_shape=(784,)) #有32个输出单元的密集层 将向量数据保存在2D张量中
from keras import models
from keras import layers
model=models.Sequential()
model.add(layers.Dense(32,input_shape=(784,)))
model.add(layers.Dense(32))''' #自动推导出输入形状为上一层的输出形状
###单一损失函数
'''from keras import optimizers #优化器
model.compile(optimizer=optimizers.RMSprop(lr=0.001),loss='mse',metrics=['accuracy'])
#编译 优化器 学习率 损失函数 指标
model.fit(input_tensor,target_tensor,batch_size=128,epochs=10)'''
#学习过程 拟合
###加载imdb数据集
from keras.datasets import imdb
(train_data,train_labels),(test_data,test_labels) = imdb.load_data(num_words=10000)
###解码英文单词
'''word_index = imdb.get_word_index()
reverse_word_index = dict( [(value,key) for (key,value) in word_index.item() ] ) #键值颠倒,将整数索引映射为单词
decoded_review = ' '.join( [reverse_word_index.get(i - 3,'?') for i in train_data[0] ] ) #将评论解码 -3是因为0,1,2是‘padding’填充 ‘start of sequence’序列开始 ‘unknow’未知词 分别保留的索引
print(decoded_review)
'''
###将整数序列编码为二进制矩阵
import numpy as np
def vectorize_sequences(sequences,dimension=10000): #sequences dtype:object shape:(25000,)向量 dimension=10000:num_words=10000
#向量 数列 范围
results = np.zeros((len(sequences),dimension)) #0矩阵 25000*10000
for i, sequence in enumerate(sequences): #enumerate(sequence, [start=0]) sequence 一个序列、迭代器或其他支持迭代对象 start下标起始位置。
results[i,sequence] = 1. #
return results
x_train = vectorize_sequences(train_data) #将训练数据向量化 train_data:(dtype:object shape:(25000,)向量) x_train:(shape:25000*10000,dtype:float64)
x_test = vectorize_sequences(test_data) #将测试u数据向量化 test_data:(dtype:int64 shape:(25000,)向量) x_test:(dtype:25000*10000 shape:float64)
y_train = np.asarray(train_labels).astype('float32') #将标签向量化 y_train:(shape:(25000,),dtype:) train_labels:(dtype:int64 shape:(25000,))
y_test = np.asarray(test_labels).astype('float32')
###模型定义
from keras import models,layers
model=models.Sequential()
model.add(layers.Dense(16,activation='relu',input_shape=(10000,)))
model.add(layers.Dense(16,activation='relu'))
model.add(layers.Dense(1,activation='sigmoid'))
# 编译模型
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
# 配置优化器
from keras import optimizers
model.compile(optimizer=optimizers.RMSprop(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])
# 使用自定义的损失和目标
from keras import losses,metrics
model.compile(optimizer=optimizers.RMSprop(lr=0.001), loss=losses.binary_crossentropy, metrics=[metrics.binary_accuracy])
# 留出验证集
x_val = x_train[:10000]
partial_x_train = x_train[10000:]
y_val = y_train[:10000]
partial_y_train = y_train[10000:]
# 训练模型
model.compile(optimizer='rmsprop', loss= 'binary_crossentropy', metrics= ['acc'])
history = model.fit(partial_x_train,partial_y_train,epochs=20,batch_size=512,validation_data=(x_val,y_val))
# 绘制训练损失和验证损失
import matplotlib.pyplot as plt
history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
epochs = range(1,len(loss_values) + 1)
plt.plot(epochs,loss_values,'bo',label='Training loss')
plt.plot(epochs,val_loss_values,'b',label='Valodation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
# 绘制训练精度和验证精度
plt.clf()
acc = history_dict['acc']
val_acc = history_dict['val_acc']
plt.plot(epochs,acc,'bo',label='Training acc')
plt.plot(epochs,val_acc,'b',label='Validtion acc')
plt.title('Training and valodation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend
plt.show()
# 有图像得知有过拟合存在,所以在
model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x_train,y_train, epochs=4, batch_size=512)
results = model.evaluate(x_test,y_test)
print(results)
prediction = model.predict(x_test)
print(prediction)
控制台输出