# %%
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# 产生数据
# 模拟直线y = 2x + 6
originX = tf.convert_to_tensor(np.linspace(-3, 3, 500), dtype = tf.double)
# 产生噪音
originY = 2 * originX + 6 + tf.random.normal((originX.shape[0], ), 0, 0.5, dtype = tf.double)
# %%
X = tf.cast(originX, tf.float32)
y = tf.cast(originY, tf.float32)
# 配对
train = tf.data.Dataset.from_tensor_slices((X, y)).batch(32)
# 定义神经网络训练参数
# [1, 1]表示输入层有一个神经元,隐藏层也有一个神经元
# [1]表示输出层有一个神经元
w = tf.Variable(tf.random.truncated_normal([1, 1], stddev = 0.1, seed = 1))
b = tf.Variable(tf.random.truncated_normal([1], stddev = 0.1, seed = 1))
# 定义学习率、训练次数
learnRate = 0.01
epoch = 10
lossAll = 0
for epoch in range(epoch):
for step, (xTrain, yTrain) in enumerate(train):
xTrain = tf.reshape(xTrain, (xTrain.shape[0], 1))
with tf.GradientTape() as tape:
# 计算y,y = Σwx + b
y = tf.matmul(xTrain, w) + b
# 计算损失函数
loss = tf.reduce_mean(
tf.square(yTrain - y)
)
lossAll += loss.numpy()
# 对损失函数的每个自变量求导数
gradient = tape.gradient(loss, [w, b])
# 梯度下降
w.assign_sub(learnRate * gradient[0])
b.assign_sub(learnRate * gradient[1])
print(f"Epoch: {epoch}, Loss: {lossAll / 4}")
lossAll = 0
print(w)
print(b)
w = w.numpy()[0][0]
b = b.numpy()[0]
plt.plot(originX, originY)
plt.plot(originX, w * originX + b)
plt.show()# %%
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# 产生数据
# 模拟直线y = 2x + 6
originX = tf.convert_to_tensor(np.linspace(-3, 3, 500), dtype = tf.double)
# 产生噪音
originY = 2 * originX + 6 + tf.random.normal((originX.shape[0], ), 0, 0.5, dtype = tf.double)
# %%
X = tf.cast(originX, tf.float32)
y = tf.cast(originY, tf.float32)
# 配对
train = tf.data.Dataset.from_tensor_slices((X, y)).batch(32)
# 定义神经网络训练参数
# [1, 1]表示输入层有一个神经元,隐藏层也有一个神经元,这里隐藏层也是输出层
# [1]表示偏执
w = tf.Variable(tf.random.truncated_normal([1, 1], stddev = 0.1, seed = 1))
b = tf.Variable(tf.random.truncated_normal([1], stddev = 0.1, seed = 1))
# 定义学习率、训练次数
learnRate = 0.01
epoch = 10
lossAll = 0
for epoch in range(epoch):
for step, (xTrain, yTrain) in enumerate(train):
xTrain = tf.reshape(xTrain, (xTrain.shape[0], 1))
with tf.GradientTape() as tape:
# 计算y,y = Σwx + b
y = tf.matmul(xTrain, w) + b
# 不适用激活函数
y = y
# 计算损失函数
loss = tf.reduce_mean(
tf.square(yTrain - y)
)
lossAll += loss.numpy()
# 对损失函数的每个自变量求导数
gradient = tape.gradient(loss, [w, b])
# 梯度下降
w.assign_sub(learnRate * gradient[0])
b.assign_sub(learnRate * gradient[1])
print(f"Epoch: {epoch}, Loss: {lossAll / 4}")
lossAll = 0
print(w)
print(b)
w = w.numpy()[0][0]
b = b.numpy()[0]
plt.plot(originX, originY)
plt.plot(originX, w * originX + b)
plt.show()
# 搭建多分类神经网络
# 产生数据
import pandas as pd
import numpy as np
np.random.seed(43)
df = pd.DataFrame({
'color': ['black'] * 5 + ['white'] * 5,
'age': np.random.rand(10) * 10,
'weight': np.random.rand(10) * 100,
'type': ['cat'] * 5 + ['dog'] * 5,
})
print(df[0:10])
color age weight type
0 black 1.150546 39.495002 cat
1 black 6.090665 80.204712 cat
2 black 1.333910 25.442113 cat
3 black 2.405896 5.688494 cat
4 black 3.271391 86.664864 cat
5 white 8.591375 22.102900 dog
6 white 6.660902 40.498945 dog
7 white 5.411622 31.609647 dog
8 white 0.290138 7.666270 dog
9 white 7.337483 84.322469 dog
由于在神经网络中,所有数据都是以数字的形式来存储的,因此要对color特征和type标签的值进行one-hot编码
# one-hot编码
oneHotColor = pd.get_dummies(df['color'])
oneHotType = pd.get_dummies(df['type'])
# 删除颜色列,axis = 1表示列
df = df.drop('color', axis = 1)
df = pd.concat([df, oneHotColor['black']], axis = 1)
# 列重命名
df.rename(columns = {'black': 'color'}, inplace = True)
# 删除类型列,axis = 1表示列
df = df.drop('type', axis = 1)
df = pd.concat([df, oneHotType['cat']], axis = 1)
df.rename(columns = {'cat': 'type'}, inplace = True)
print(df)
age weight color type
0 1.150546 39.495002 1 1
1 6.090665 80.204712 1 1
2 1.333910 25.442113 1 1
3 2.405896 5.688494 1 1
4 3.271391 86.664864 1 1
5 8.591375 22.102900 0 0
6 6.660902 40.498945 0 0
7 5.411622 31.609647 0 0
8 0.290138 7.666270 0 0
9 7.337483 84.322469 0 0
可以看出猫编码后是1,狗编码后是0
# 制作训练集
X = df[['color', 'age', 'weight']].values
y = df[['type']].values
print(X.shape)
print(y.shape)
# 搭建神经网络
model = tf.keras.Sequential()
# 输入层,[3, 50]
model.add(keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'))
# 隐藏层,[50, 25]
model.add(keras.layers.Dense(25, activation = 'relu'))
# 输出层,[25, 1]
model.add(keras.layers.Dense(y.shape[1], activation = 'sigmoid'))
# 查看模型概况
model.summary()
(10, 3)
(10, 1)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_1 (Dense) (None, 50) 200
dense_2 (Dense) (None, 25) 1275
dense_3 (Dense) (None, 1) 26
=================================================================
Total params: 1,501
Trainable params: 1,501
Non-trainable params: 0
_________________________________________________________________
Dense层代表全连接层,即相邻层之间的神经元权重全部都有相互连接
print(X.shape)
print(y.shape)
# 搭建神经网络
model = tf.keras.Sequential([
# 输入层,[3, 50]
keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
# 隐藏层,[50, 25]
keras.layers.Dense(25, activation = 'relu'),
# 输出层,[25, 1]
keras.layers.Dense(y.shape[1], activation = 'sigmoid')
])
# 查看模型概况
model.summary()
(10, 3)
(10, 1)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_1 (Dense) (None, 50) 200
dense_2 (Dense) (None, 25) 1275
dense_3 (Dense) (None, 1) 26
=================================================================
Total params: 1,501
Trainable params: 1,501
Non-trainable params: 0
_________________________________________________________________
# 配置损失函数和优化器
model.compile(
loss = 'binary_crossentropy', optimizer = 'SGD'
)
model.fit(X, y, epochs = 500)
......
Epoch 489/500
1/1 [==============================] - 0s 0s/step - loss: 0.3395
Epoch 490/500
1/1 [==============================] - 0s 0s/step - loss: 0.3512
Epoch 491/500
1/1 [==============================] - 0s 10ms/step - loss: 0.3420
Epoch 492/500
1/1 [==============================] - 0s 0s/step - loss: 0.3536
Epoch 493/500
1/1 [==============================] - 0s 0s/step - loss: 0.3433
Epoch 494/500
1/1 [==============================] - 0s 0s/step - loss: 0.3570
Epoch 495/500
1/1 [==============================] - 0s 0s/step - loss: 0.3676
Epoch 496/500
1/1 [==============================] - 0s 0s/step - loss: 0.3716
Epoch 497/500
1/1 [==============================] - 0s 0s/step - loss: 0.3418
Epoch 498/500
1/1 [==============================] - 0s 0s/step - loss: 0.3489
Epoch 499/500
1/1 [==============================] - 0s 0s/step - loss: 0.3333
Epoch 500/500
1/1 [==============================] - 0s 10ms/step - loss: 0.3408
binary_crossentropy
是二元交叉熵,用于二分类
SGD
是梯度下降法
# 测试
xTest = pd.DataFrame({
'color': ['black'] * 2,
'age': np.random.rand(1) * 10,
'weight': np.random.rand(1) * 100
})
oneHotColor = pd.get_dummies(xTest['color'])
# 删除颜色列,axis = 1表示列
xTest = xTest.drop('color', axis = 1)
xTest = pd.concat([xTest, oneHotColor['black']], axis = 1)
# 列重命名
xTest.rename(columns = {'black': 'color'}, inplace = True)
print(xTest)
print(model.predict(xTest.values))
age weight color
0 3.853769 95.448813 1
1/1 [==============================] - 0s 70ms/step
[[1.8637778e-09]]
这里计算出的结果接近0,所以改样例预测的结果为狗
import pandas as pd
import numpy as np
import tensorflow as tf
np.random.seed(43)
df = pd.DataFrame({
'color': ['black'] * 5 + ['white'] * 5,
'age': np.random.rand(10) * 10,
'weight': np.random.rand(10) * 100,
'type': ['cat'] * 5 + ['dog'] * 5,
})
print(df[0:10])
# %%
# one-hot编码
oneHotColor = pd.get_dummies(df['color'])
oneHotType = pd.get_dummies(df['type'])
# 删除颜色列,axis = 1表示列
df = df.drop('color', axis = 1)
df = pd.concat([df, oneHotColor['black']], axis = 1)
# 列重命名
df.rename(columns = {'black': 'color'}, inplace = True)
# 删除类型列,axis = 1表示列
df = df.drop('type', axis = 1)
df = pd.concat([df, oneHotType['cat']], axis = 1)
df.rename(columns = {'cat': 'type'}, inplace = True)
print(df)
# %%
# 制作训练集
X = df[['color', 'age', 'weight']].values
y = df[['type']].values
print(X)
print(y)
# %%
from tensorflow import keras
print(X.shape)
print(y.shape)
# 搭建神经网络
model = tf.keras.Sequential()
# 输入层
model.add(keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'))
# 隐藏层
model.add(keras.layers.Dense(25, activation = 'relu'))
# 输出层
model.add(keras.layers.Dense(y.shape[1], activation = 'sigmoid'))
# 查看模型概况
model.summary()
# %%
print(X.shape)
print(y.shape)
# 搭建神经网络
model = tf.keras.Sequential([
# 输入层,[3, 50]
keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
# 隐藏层,[50, 25]
keras.layers.Dense(25, activation = 'relu'),
# 输出层,[25, 1]
keras.layers.Dense(y.shape[1], activation = 'sigmoid')
])
# 查看模型概况
model.summary()
# %%
# 配置损失函数和优化器
model.compile(
loss = 'binary_crossentropy', optimizer = 'SGD'
)
model.fit(X, y, epochs = 500)
# %%
# 测试
xTest = pd.DataFrame({
'color': ['black'],
'age': np.random.rand(1) * 10,
'weight': np.random.rand(1) * 100
})
oneHotColor = pd.get_dummies(xTest['color'])
# 删除颜色列,axis = 1表示列
xTest = xTest.drop('color', axis = 1)
xTest = pd.concat([xTest, oneHotColor['black']], axis = 1)
# 列重命名
xTest.rename(columns = {'black': 'color'}, inplace = True)
print(xTest)
print(model.predict(xTest.values))
import tensorflow as tf
import numpy as np
import pandas as pd
df = pd.DataFrame({
'color': ['black'] * 5 + ['white'] * 5,
'age': np.random.rand(10) * 10,
'weight': np.random.rand(10) * 100,
'sleep_time': np.random.rand(10) * 24
})
oneHotColor = pd.get_dummies(df['color'])
print(oneHotColor)
df = df.drop('color', axis = 1)
df = pd.concat([df, oneHotColor['black']], axis = 1)
df.rename(columns = {'black': 'color'}, inplace = True)
print(df)
X = df[['color', 'age', 'weight']].values
y = df['sleep_time']
model = tf.keras.Sequential([
tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
tf.keras.layers.Dense(25, activation = 'relu'),
tf.keras.layers.Dense(1)
])
model.summary()
model.compile(loss = tf.keras.losses.MeanSquaredError(),
optimizer = tf.keras.optimizers.experimental.SGD())
model.fit(X, y, epochs = 500)
yPredict = []
for i in range(10):
predict = model.predict(df[i:i+1][['color', 'age', 'weight']].values)
yPredict.append(predict[0][0])
print(yPredict)
import matplotlib.pyplot as plt
origin = plt.plot([_ for _ in range(10)], df['sleep_time'].values)
predict = plt.plot([_ for _ in range(10)], yPredict)
plt.show()
__init__()
函数用于定义用到的层,call()
函数定义神经网络向前传播过程
以回归的神经网络为例
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
class MyModel(tf.keras.Model):
def __init__(self, X, y):
super().__init__()
self.d1 = tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu')
self.d2 = tf.keras.layers.Dense(25, activation = 'relu')
self.d3 = tf.keras.layers.Dense(1)
def call(self, X):
x = self.d1(X)
x = self.d2(x)
return self.d3(x)
df = pd.DataFrame({
'color': ['black'] * 5 + ['white'] * 5,
'age': np.random.rand(10) * 10,
'weight': np.random.rand(10) * 100,
'sleep_time': np.random.rand(10) * 24
})
oneHotColor = pd.get_dummies(df['color'])
print(oneHotColor)
df = df.drop('color', axis = 1)
df = pd.concat([df, oneHotColor['black']], axis = 1)
df.rename(columns = {'black': 'color'}, inplace = True)
print(df)
X = df[['color', 'age', 'weight']].values
y = df['sleep_time']
model = MyModel(X, y)
model.compile(loss = tf.keras.losses.MeanSquaredError(),
optimizer = tf.keras.optimizers.experimental.Adam())
model.fit(X, y, epochs = 500)
yPredict = []
for i in range(10):
predict = model.predict(df[i:i+1][['color', 'age', 'weight']].values)
yPredict.append(predict[0][0])
print(yPredict)
origin = plt.plot([_ for _ in range(10)], df['sleep_time'].values)
predict = plt.plot([_ for _ in range(10)], yPredict)
plt.show()
Dropout技术是指在深度神经网络的训练过程中,将一些神经元按照一定的概率对其进行临时丢弃,而这些被丢弃的神经元实际不参与整个训练过程,一次来达到减少网络参数量的目的,利用Dropout技术可以环节过拟合的现象
在搭建的网络中加入Dropout层
model = tf.keras.Sequential([
tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(25, activation = 'relu'),
tf.keras.layers.Dense(1)
])
这里的0.2表示一个比例,即让20%的神经元失活
EarlyStopping被成为早听发,它通常被视为一种可以避免网络发生过拟合的正则化方法
原理:在网络模型发生过拟合之前将它停止,将模型的权重参数维持到当前最好的状态
在tensorflow中使用回调函数callbacks直接调用出EarlyStopping方法来实现,并且与验证集一起使用
import tensorflow as tf
import pandas as pd
import numpy as np
np.random.seed(43)
df = pd.DataFrame({
'color': ['black'] * 10 + ['white'] * 10,
'age': np.random.rand(20) * 10,
'weight': np.random.rand(20) * 100,
'type': ['cat'] * 10 + ['dog'] * 10,
})
oneHotColor = pd.get_dummies(df['color'])
oneHotType = pd.get_dummies(df['type'])
df = df.drop(['color'], axis = 1)
df = df.drop(['type'], axis = 1)
df = pd.concat([df, oneHotColor['black']], axis = 1)
df = pd.concat([df, oneHotType['cat']], axis = 1)
df.rename(columns = {
'black': 'color',
'cat': 'type'
}, inplace = True)
X = df[['color', 'age', 'weight']].values
y = df[['type']].values
xTrain = X[:15]
yTrain = y[:15]
xValid = X[15:]
yValid = y[15:]
callback = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 4)
model = tf.keras.Sequential([
tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(25, activation = 'relu'),
tf.keras.layers.Dense(y.shape[1], activation = 'sigmoid')
])
model.summary()
model.compile(loss = tf.keras.losses.BinaryCrossentropy(),
optimizer = tf.keras.optimizers.experimental.Adam())
model.fit(X, y, epochs = 500, validation_data = (xValid, yValid), callbacks = [callback])
......
Epoch 22/500
1/1 [==============================] - 0s 22ms/step - loss: 2.4001 - val_loss: 0.1362
Epoch 23/500
1/1 [==============================] - 0s 21ms/step - loss: 2.5386 - val_loss: 0.1242
Epoch 24/500
1/1 [==============================] - 0s 20ms/step - loss: 1.5711 - val_loss: 0.1143
Epoch 25/500
1/1 [==============================] - 0s 20ms/step - loss: 2.0007 - val_loss: 0.1113
Epoch 26/500
1/1 [==============================] - 0s 20ms/step - loss: 1.5747 - val_loss: 0.1105
Epoch 27/500
1/1 [==============================] - 0s 22ms/step - loss: 2.8826 - val_loss: 0.1147
Epoch 28/500
1/1 [==============================] - 0s 23ms/step - loss: 1.8719 - val_loss: 0.1209
Epoch 29/500
1/1 [==============================] - 0s 24ms/step - loss: 2.5716 - val_loss: 0.1323
Epoch 30/500
1/1 [==============================] - 0s 26ms/step - loss: 2.2206 - val_loss: 0.1505
从结果可以看出,模型训练了30次就结束了
查看val_loss的值,发现不断增加,这表明拟合程度不断加深,因此出发了EarlyStopping
保存方式有两种:通过回调函数的checkpoints、转换成h5文件
利用回调函数checkpoint设置相关参数
tf.keras.callbacks.ModelCheckpoint(
filepath, 保存路径
monitor: str = 'val_loss', 监视的值
verbose: int = 0, 详细模式,0为不详细,1为详细
save_best_only: bool = False, 是否只保存最好的模型参数
save_weights_only: bool = False, 是否只保存模型的权重参数,如果为False,表示对整个模型都进行保存
)
earlyStopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 4)
checkpoint = tf.keras.callbacks.ModelCheckpoint(
filepath = 'training_model/cp.ckpt',
save_best_only = True,
save_weights_only = True,
verbose = 1
)
model = tf.keras.Sequential([
tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(25, activation = 'relu'),
tf.keras.layers.Dense(y.shape[1], activation = 'sigmoid')
])
model.summary()
model.compile(loss = tf.keras.losses.BinaryCrossentropy(),
optimizer = tf.keras.optimizers.experimental.Adam())
model.fit(X, y, epochs = 500, validation_data = (xValid, yValid), callbacks = [earlyStopping, checkpoint])
使用load_weights()
读取模型参数
# 加载模型数据
model = tf.keras.Sequential([
tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(25, activation = 'relu'),
tf.keras.layers.Dense(y.shape[1], activation = 'sigmoid')
])
model.summary()
model.compile(loss = tf.keras.losses.BinaryCrossentropy(),
optimizer = tf.keras.optimizers.experimental.Adam())
model.load_weights('./training_model/cp.ckpt')
保存
model.save('./training_model/mymodel.h5')
加载
model = tf.keras.models.load_model('./training_model/mymodel.h5')
注意:HDF5格式不保存optimizer_experimental.Optimizer
的权重