Table of Contents
3.1神经网络
3.4电影评论分类,二分类问题
3.5新闻分类:多分类问题
3.6预测房价,回归问题
总结:
# coding=utf-8
"""
__project_ = 'Python深度学习'
__file_name__ = '3.4电影评论分类'
__author__ = 'WIN10'
__time__ = '2020/4/11 12:02'
__product_name = PyCharm
"""
from keras.datasets import imdb
import numpy as np
from keras import models
from keras import layers
from keras import optimizers
from keras import losses
from keras import metrics
import matplotlib.pyplot as plt
# 读取数据
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)
print(train_data[0])
print(train_labels[0])
word_index = imdb.get_word_index()
reverse_world_index = dict([(value, key) for (key, value) in word_index.items()])
decoded_review = ' '.join([reverse_world_index.get(i - 3, '?') for i in train_data[0]])
print(decoded_review)
#准备数据
def vectorize_sequences(sequences,dimension=10000):
results=np.zeros((len(sequences),dimension))
for i ,sequences in enumerate(sequences):
results[i,sequences]=1
return results
x_train=vectorize_sequences(train_data)
x_test=vectorize_sequences(test_data)
print(x_train.shape)
y_train=np.asarray(train_labels).astype('float32')
y_test=np.asarray(test_labels).astype('float32')
#留出验证集
x_val=x_train[:10000]
partial_x_train=x_train[10000:]
y_val=y_train[:10000]
partial_y_train=y_train[10000:]
#构建网络
models=models.Sequential()
models.add(layers.Dense(16,activation='relu',input_shape=(10000,)))
models.add(layers.Dense(16,activation='relu'))
models.add(layers.Dense(1,activation='sigmoid'))
# #编译 需要3个参数 ,损失函数、优化器、训练和测试过程中的键控指标
# models.compile(optimizer='rmsprop',
# loss='binary_crossentropy',
# metrics=['accuracy'])
#
# #配置优化器
# models.compile(optimizer=optimizers.RMSprop(lr=0.001),
# loss='binary_crossentropy',
# metrics=['accuracy'])
#配置损失函数和指标
models.compile(optimizer=optimizers.RMSprop(lr=0.001),
loss=losses.binary_crossentropy,
metrics=[metrics.binary_accuracy])
#训练模型
history=models.fit(partial_x_train,
partial_y_train,
epochs=20,
batch_size=512,
validation_data=(x_val,y_val))
#验证
results=models.evaluate(x_test,y_test)
print(results)
#绘制训练损失和验证损失
history_dict=history.history
loss_values=history_dict['loss']
val_loss_values=history_dict['val_loss']
epochs=range(1,len(loss_values)+1)
plt.plot(epochs,loss_values,'bo',label='Training loss')
plt.plot(epochs,val_loss_values,'b',label='Validation loss')
plt.title('loss')
plt.xlabel('Epochs')
plt.ylabel('loss')
plt.legend()
plt.show()
# coding=utf-8
"""
__project_ = 'Python深度学习'
__file_name__ = '3.5新闻分类'
__author__ = 'WIN10'
__time__ = '2020/4/11 13:16'
__product_name = PyCharm
"""
from keras.datasets import reuters
import numpy as np
from keras import models
from keras import layers
#读取数据
(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)
#准备数据
def vectorize_sequences(sequences,dimension=10000):
results=np.zeros((len(sequences),dimension))
for i ,sequences in enumerate(sequences):
results[i,sequences]=1
return results
x_train=vectorize_sequences(train_data)
x_test=vectorize_sequences(test_data)
def to_one_hot(labels,dimension=46):
results=np.zeros((len(labels),dimension))
for i ,label in enumerate(labels):
results[i,label]=1
return results
# 1.标签转化为one-hot 损失函数对应为 categorical_crossentropy
# one_hot_train_labels=to_one_hot(train_labels)
# one_hot_test_labels=to_one_hot(test_labels)
# 2.标签转为整数张量 损失函数对应为 sparse_categorical_crossentropy
y_train=np.array(train_labels)
y_test=np.array(test_labels)
#留出验证集
x_val=x_train[:1000]
partial_x_train=x_train[1000:]
# y_val=one_hot_train_labels[:1000]
# partial_y_train=one_hot_train_labels[1000:]
y_val=y_train[:1000]
partial_y_train=y_train[1000:]
print(y_val)
#构建网络
models=models.Sequential()
models.add(layers.Dense(64,activation='relu',input_shape=(10000,)))
models.add(layers.Dense(64,activation='relu'))
models.add(layers.Dense(46,activation='softmax'))
#编译
# models.compile(optimizer='rmsprop',
# loss='categorical_crossentropy',
# metrics=['accuracy'])
models.compile(optimizer='rmsprop',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
#训练
history=models.fit(partial_x_train,
partial_y_train,
epochs=20,
batch_size=512,
validation_data=(x_val,y_val))
results=models.evaluate(x_test,y_test)
print(results)
# coding=utf-8
"""
__project_ = 'Python深度学习'
__file_name__ = '预测房价'
__author__ = 'WIN10'
__time__ = '2020/4/11 13:53'
__product_name = PyCharm
"""
from keras.datasets import boston_housing
from keras import models
from keras import layers
import numpy as np
#读取数据
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()
#数据标准化
mean=train_data.mean(axis=0)
train_data-=mean
std=train_data.std(axis=0)
train_data/=std
test_data-=mean
test_data/=std
#构建网络 mse 均方误差 mae 平均绝对误差
def build_model():
model=models.Sequential()
model.add(layers.Dense(64,activation='relu',input_shape=(train_data.shape[1],)))
model.add(layers.Dense(64,activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop',loss='mse',metrics=['mae'])
return model
#K折验证
k=4
num_val_samples=len(train_data)//k
num_epochs=500
all_scores=[]
all_mea_histories=[]
for i in range(k):
val_data=train_data[i*num_val_samples:(i+1)*num_val_samples]
val_targets=train_targets[i*num_val_samples:(i+1)*num_val_samples]
partial_train_data=np.concatenate([train_data[:i*num_val_samples],train_data[(i+1)*num_val_samples:]],axis=0)
partial_train_targets=np.concatenate([train_targets[:i*num_val_samples],train_targets[(i+1)*num_val_samples:]],axis=0)
model=build_model()
history=model.fit(partial_train_data,partial_train_targets,epochs=num_epochs,batch_size=1,verbose=0,validation_data=(val_data,val_targets))
#verbose=1 训练过程中展示进度条
# val_mse,val_mae=model.evaluate(val_data,val_targets,verbose=0)
# all_scores.append(val_mae)
mae_history=history.history['val_mean_absolute_error']
all_mea_histories.append(mae_history)
print(all_mea_histories)
回归问题使用损失函数是均方误差(MSE)
回归指标是平均绝对误差(MAE)
如果输入数据的特征具有不同的取值范围,应该预处理,缩放特征。
如果可用数据很少,可以采用K折验证
如果训练数据少,最好使用隐藏层少的小型网络,以避免过拟合。