python深度学习笔记第三章

Table of Contents

3.1神经网络

3.4电影评论分类,二分类问题

3.5新闻分类:多分类问题

3.6预测房价,回归问题

总结:


3.1神经网络

python深度学习笔记第三章_第1张图片

3.4电影评论分类,二分类问题

# coding=utf-8
"""
__project_ = 'Python深度学习'
__file_name__ = '3.4电影评论分类'
__author__ = 'WIN10'
__time__ = '2020/4/11 12:02'
__product_name = PyCharm

"""

from keras.datasets import imdb
import numpy as np
from keras import models
from keras import layers
from keras import optimizers
from keras import losses
from keras import metrics
import matplotlib.pyplot as plt
# 读取数据
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

print(train_data[0])
print(train_labels[0])

word_index = imdb.get_word_index()
reverse_world_index = dict([(value, key) for (key, value) in word_index.items()])
decoded_review = ' '.join([reverse_world_index.get(i - 3, '?') for i in train_data[0]])
print(decoded_review)

#准备数据
def vectorize_sequences(sequences,dimension=10000):
    results=np.zeros((len(sequences),dimension))
    for i ,sequences in enumerate(sequences):
        results[i,sequences]=1
    return results
x_train=vectorize_sequences(train_data)
x_test=vectorize_sequences(test_data)
print(x_train.shape)

y_train=np.asarray(train_labels).astype('float32')
y_test=np.asarray(test_labels).astype('float32')
#留出验证集
x_val=x_train[:10000]
partial_x_train=x_train[10000:]

y_val=y_train[:10000]
partial_y_train=y_train[10000:]
#构建网络

models=models.Sequential()
models.add(layers.Dense(16,activation='relu',input_shape=(10000,)))
models.add(layers.Dense(16,activation='relu'))
models.add(layers.Dense(1,activation='sigmoid'))

# #编译 需要3个参数 ,损失函数、优化器、训练和测试过程中的键控指标
# models.compile(optimizer='rmsprop',
#                loss='binary_crossentropy',
#                metrics=['accuracy'])
#
# #配置优化器
# models.compile(optimizer=optimizers.RMSprop(lr=0.001),
#                loss='binary_crossentropy',
#                metrics=['accuracy'])
#配置损失函数和指标
models.compile(optimizer=optimizers.RMSprop(lr=0.001),
               loss=losses.binary_crossentropy,
               metrics=[metrics.binary_accuracy])

#训练模型
history=models.fit(partial_x_train,
                   partial_y_train,
                   epochs=20,
                   batch_size=512,
                   validation_data=(x_val,y_val))

#验证
results=models.evaluate(x_test,y_test)
print(results)
#绘制训练损失和验证损失
history_dict=history.history
loss_values=history_dict['loss']
val_loss_values=history_dict['val_loss']

epochs=range(1,len(loss_values)+1)
plt.plot(epochs,loss_values,'bo',label='Training loss')
plt.plot(epochs,val_loss_values,'b',label='Validation loss')
plt.title('loss')
plt.xlabel('Epochs')
plt.ylabel('loss')
plt.legend()

plt.show()

3.5新闻分类:多分类问题

# coding=utf-8
"""
__project_ = 'Python深度学习'
__file_name__ = '3.5新闻分类'
__author__ = 'WIN10'
__time__ = '2020/4/11 13:16'
__product_name = PyCharm

"""

from keras.datasets import  reuters
import  numpy as np
from keras import models
from keras import layers

#读取数据
(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)

#准备数据
def vectorize_sequences(sequences,dimension=10000):
    results=np.zeros((len(sequences),dimension))
    for i ,sequences in enumerate(sequences):
        results[i,sequences]=1
    return results
x_train=vectorize_sequences(train_data)
x_test=vectorize_sequences(test_data)
def to_one_hot(labels,dimension=46):
    results=np.zeros((len(labels),dimension))
    for i ,label in enumerate(labels):
        results[i,label]=1
    return results
# 1.标签转化为one-hot  损失函数对应为 categorical_crossentropy
# one_hot_train_labels=to_one_hot(train_labels)
# one_hot_test_labels=to_one_hot(test_labels)
# 2.标签转为整数张量  损失函数对应为 sparse_categorical_crossentropy
y_train=np.array(train_labels)
y_test=np.array(test_labels)
#留出验证集
x_val=x_train[:1000]
partial_x_train=x_train[1000:]

# y_val=one_hot_train_labels[:1000]
# partial_y_train=one_hot_train_labels[1000:]
y_val=y_train[:1000]
partial_y_train=y_train[1000:]
print(y_val)
#构建网络
models=models.Sequential()
models.add(layers.Dense(64,activation='relu',input_shape=(10000,)))
models.add(layers.Dense(64,activation='relu'))
models.add(layers.Dense(46,activation='softmax'))

#编译
# models.compile(optimizer='rmsprop',
#                loss='categorical_crossentropy',
#                metrics=['accuracy'])
models.compile(optimizer='rmsprop',
               loss='sparse_categorical_crossentropy',
               metrics=['accuracy'])

#训练
history=models.fit(partial_x_train,
                   partial_y_train,
                   epochs=20,
                   batch_size=512,
                   validation_data=(x_val,y_val))

results=models.evaluate(x_test,y_test)
print(results)

3.6预测房价,回归问题

# coding=utf-8
"""
__project_ = 'Python深度学习'
__file_name__ = '预测房价'
__author__ = 'WIN10'
__time__ = '2020/4/11 13:53'
__product_name = PyCharm

"""
from keras.datasets import boston_housing
from keras import models
from keras import layers
import numpy as np
#读取数据
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

#数据标准化
mean=train_data.mean(axis=0)
train_data-=mean
std=train_data.std(axis=0)
train_data/=std

test_data-=mean
test_data/=std

#构建网络   mse  均方误差   mae 平均绝对误差
def build_model():
    model=models.Sequential()
    model.add(layers.Dense(64,activation='relu',input_shape=(train_data.shape[1],)))
    model.add(layers.Dense(64,activation='relu'))
    model.add(layers.Dense(1))
    model.compile(optimizer='rmsprop',loss='mse',metrics=['mae'])
    return model

#K折验证
k=4
num_val_samples=len(train_data)//k
num_epochs=500
all_scores=[]
all_mea_histories=[]
for i in range(k):
    val_data=train_data[i*num_val_samples:(i+1)*num_val_samples]
    val_targets=train_targets[i*num_val_samples:(i+1)*num_val_samples]

    partial_train_data=np.concatenate([train_data[:i*num_val_samples],train_data[(i+1)*num_val_samples:]],axis=0)
    partial_train_targets=np.concatenate([train_targets[:i*num_val_samples],train_targets[(i+1)*num_val_samples:]],axis=0)

    model=build_model()
    history=model.fit(partial_train_data,partial_train_targets,epochs=num_epochs,batch_size=1,verbose=0,validation_data=(val_data,val_targets))
    #verbose=1 训练过程中展示进度条
    # val_mse,val_mae=model.evaluate(val_data,val_targets,verbose=0)
    # all_scores.append(val_mae)
    mae_history=history.history['val_mean_absolute_error']
    all_mea_histories.append(mae_history)

print(all_mea_histories)

总结:

回归问题使用损失函数是均方误差(MSE)

回归指标是平均绝对误差(MAE)

如果输入数据的特征具有不同的取值范围,应该预处理,缩放特征。

如果可用数据很少,可以采用K折验证

如果训练数据少,最好使用隐藏层少的小型网络,以避免过拟合。

 

你可能感兴趣的:(python深度学(笔记),python,深度学习)