from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
housing=fetch_california_housing()
scaler=StandardScaler()
x_data=scaler.fit_transform(housing.data)
x_train_full,x_test,y_train_full,y_test=train_test_split(x_data,housing.target)
x_train,x_valid,y_train,y_valid=train_test_split(x_train_full,y_train_full)
#原版,用作对比
input_=tf.keras.layers.Input(shape=x_train.shape[1:])
hidden1=tf.keras.layers.Dense(30,activation='elu',kernel_initializer='he_normal')(input_)
hidden2=tf.keras.layers.Dense(30,activation='elu',kernel_initializer='he_normal')(hidden1)
concat=tf.keras.layers.Concatenate()([input_,hidden2])
output=tf.keras.layers.Dense(1)(concat)
model=tf.keras.Model(inputs=[input_],outputs=[output])
model.compile(loss=tf.keras.losses.mean_squared_error,optimizer=tf.keras.optimizers.SGD(learning_rate=0.001,momentum=0.9))
earlystop=tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
model.fit(x_train,y_train,epochs=20,validation_data=(x_valid,y_valid),callbacks=[earlystop])
model.evaluate(x_test,y_test)
Epoch 1/20
363/363 [==============================] - 3s 3ms/step - loss: 0.9917 - val_loss: 0.5461
Epoch 2/20
363/363 [==============================] - 1s 3ms/step - loss: 0.5161 - val_loss: 0.4732
Epoch 3/20
363/363 [==============================] - 1s 3ms/step - loss: 0.4482 - val_loss: 0.4653
Epoch 4/20
363/363 [==============================] - 1s 3ms/step - loss: 0.4573 - val_loss: 0.5455
Epoch 5/20
363/363 [==============================] - 1s 3ms/step - loss: 0.4327 - val_loss: 0.4521
Epoch 6/20
363/363 [==============================] - 1s 3ms/step - loss: 0.4210 - val_loss: 0.4479
Epoch 7/20
363/363 [==============================] - 1s 4ms/step - loss: 0.4073 - val_loss: 0.4462
Epoch 8/20
363/363 [==============================] - 1s 3ms/step - loss: 0.3974 - val_loss: 0.4295
Epoch 9/20
363/363 [==============================] - 1s 3ms/step - loss: 0.4035 - val_loss: 0.6242
Epoch 10/20
363/363 [==============================] - 1s 3ms/step - loss: 0.3999 - val_loss: 0.4166
Epoch 11/20
363/363 [==============================] - 1s 3ms/step - loss: 0.4020 - val_loss: 0.4947
Epoch 12/20
363/363 [==============================] - 1s 3ms/step - loss: 0.3834 - val_loss: 0.4090
Epoch 13/20
363/363 [==============================] - 1s 3ms/step - loss: 0.3827 - val_loss: 0.4200
Epoch 14/20
363/363 [==============================] - 1s 3ms/step - loss: 0.3726 - val_loss: 0.4170
Epoch 15/20
363/363 [==============================] - 1s 4ms/step - loss: 0.3683 - val_loss: 0.4226
Epoch 16/20
363/363 [==============================] - 1s 3ms/step - loss: 0.3683 - val_loss: 0.4034
Epoch 17/20
363/363 [==============================] - 1s 4ms/step - loss: 0.3659 - val_loss: 0.4269
Epoch 18/20
363/363 [==============================] - 1s 4ms/step - loss: 0.3619 - val_loss: 0.4220
Epoch 19/20
363/363 [==============================] - 1s 4ms/step - loss: 0.3604 - val_loss: 0.4071
Epoch 20/20
363/363 [==============================] - 1s 3ms/step - loss: 0.3608 - val_loss: 0.4009
162/162 [==============================] - 0s 2ms/step - loss: 0.3803
0.3803137540817261
参数y_true,y_pred,返回同样shape的损失向量(即保留每个实例的损失)
def huber_fn(y_true,y_pred):
error=y_true-y_pred
is_small_error=tf.abs(error)<1
#对于损失小于1的,平方
squared_error=tf.square(error) / 2
#对于损失大于1的,线性
linear_error=tf.abs(error) - 0.5
#同时两个函数拼接在x=1处,连续可微。
#返回损失向量
return tf.where(is_small_error,squared_error,linear_error)
此时
loss=huber_fn
#让huber_fn作为损失函数,同时让mean_square_error作为metrics,下面就会知道metrics的函数参数与losses一样,但是类的实现内容不一样
input_=tf.keras.layers.Input(shape=x_train.shape[1:])
hidden1=tf.keras.layers.Dense(30,activation='elu',kernel_initializer='he_normal')(input_)
hidden2=tf.keras.layers.Dense(30,activation='elu',kernel_initializer='he_normal')(hidden1)
concat=tf.keras.layers.Concatenate()([input_,hidden2])
output=tf.keras.layers.Dense(1)(concat)
model=tf.keras.Model(inputs=[input_],outputs=[output])
model.compile(loss=huber_fn,optimizer=tf.keras.optimizers.SGD(learning_rate=0.001,momentum=0.9),metrics=[tf.keras.losses.mean_squared_error])
earlystop=tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
model.fit(x_train,y_train,epochs=100,validation_data=(x_valid,y_valid),callbacks=[earlystop])
model.evaluate(x_test,y_test)
Epoch 1/100
363/363 [==============================] - 2s 5ms/step - loss: 0.3725 - mean_squared_error: 1.1120 - val_loss: 0.2369 - val_mean_squared_error: 0.7345
Epoch 2/100
363/363 [==============================] - 1s 4ms/step - loss: 0.2110 - mean_squared_error: 0.5283 - val_loss: 0.2108 - val_mean_squared_error: 0.4929
Epoch 3/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1989 - mean_squared_error: 0.4733 - val_loss: 0.2000 - val_mean_squared_error: 0.4709
Epoch 4/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1929 - mean_squared_error: 0.4587 - val_loss: 0.1953 - val_mean_squared_error: 0.4573
Epoch 5/100
363/363 [==============================] - 2s 4ms/step - loss: 0.1890 - mean_squared_error: 0.4448 - val_loss: 0.1944 - val_mean_squared_error: 0.4508
Epoch 6/100
363/363 [==============================] - 2s 4ms/step - loss: 0.1869 - mean_squared_error: 0.4405 - val_loss: 0.1910 - val_mean_squared_error: 0.4483
Epoch 7/100
363/363 [==============================] - 2s 4ms/step - loss: 0.1843 - mean_squared_error: 0.4368 - val_loss: 0.1890 - val_mean_squared_error: 0.4375
Epoch 8/100
363/363 [==============================] - 2s 4ms/step - loss: 0.1816 - mean_squared_error: 0.4206 - val_loss: 0.1903 - val_mean_squared_error: 0.4504
Epoch 9/100
363/363 [==============================] - 2s 4ms/step - loss: 0.1802 - mean_squared_error: 0.4254 - val_loss: 0.1885 - val_mean_squared_error: 0.4476
Epoch 10/100
363/363 [==============================] - 2s 5ms/step - loss: 0.1785 - mean_squared_error: 0.4116 - val_loss: 0.1878 - val_mean_squared_error: 0.4493
Epoch 11/100
363/363 [==============================] - 2s 5ms/step - loss: 0.1778 - mean_squared_error: 0.4204 - val_loss: 0.1876 - val_mean_squared_error: 0.4723
Epoch 12/100
363/363 [==============================] - 2s 5ms/step - loss: 0.1765 - mean_squared_error: 0.4049 - val_loss: 0.1824 - val_mean_squared_error: 0.4277
Epoch 13/100
363/363 [==============================] - 2s 4ms/step - loss: 0.1747 - mean_squared_error: 0.4039 - val_loss: 0.1861 - val_mean_squared_error: 0.4552
Epoch 14/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1734 - mean_squared_error: 0.3980 - val_loss: 0.1857 - val_mean_squared_error: 0.4394
Epoch 15/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1728 - mean_squared_error: 0.3960 - val_loss: 0.1829 - val_mean_squared_error: 0.4620
Epoch 16/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1720 - mean_squared_error: 0.3945 - val_loss: 0.1795 - val_mean_squared_error: 0.4203
Epoch 17/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1714 - mean_squared_error: 0.3930 - val_loss: 0.1883 - val_mean_squared_error: 0.4803
Epoch 18/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1712 - mean_squared_error: 0.3935 - val_loss: 0.1819 - val_mean_squared_error: 0.4347
Epoch 19/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1705 - mean_squared_error: 0.3939 - val_loss: 0.1774 - val_mean_squared_error: 0.4153
Epoch 20/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1704 - mean_squared_error: 0.3929 - val_loss: 0.1797 - val_mean_squared_error: 0.4532
Epoch 21/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1698 - mean_squared_error: 0.3970 - val_loss: 0.1773 - val_mean_squared_error: 0.4165
Epoch 22/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1693 - mean_squared_error: 0.3889 - val_loss: 0.1813 - val_mean_squared_error: 0.4764
Epoch 23/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1694 - mean_squared_error: 0.3906 - val_loss: 0.1778 - val_mean_squared_error: 0.4482
Epoch 24/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1678 - mean_squared_error: 0.3851 - val_loss: 0.1787 - val_mean_squared_error: 0.4560
Epoch 25/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1674 - mean_squared_error: 0.3844 - val_loss: 0.1782 - val_mean_squared_error: 0.4230
Epoch 26/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1666 - mean_squared_error: 0.3826 - val_loss: 0.1768 - val_mean_squared_error: 0.4268
Epoch 27/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1672 - mean_squared_error: 0.3877 - val_loss: 0.1742 - val_mean_squared_error: 0.4080
Epoch 28/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1656 - mean_squared_error: 0.3800 - val_loss: 0.1774 - val_mean_squared_error: 0.4743
Epoch 29/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1650 - mean_squared_error: 0.3787 - val_loss: 0.1754 - val_mean_squared_error: 0.4378
Epoch 30/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1650 - mean_squared_error: 0.3794 - val_loss: 0.1744 - val_mean_squared_error: 0.4084
Epoch 31/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1645 - mean_squared_error: 0.3772 - val_loss: 0.1739 - val_mean_squared_error: 0.4054
Epoch 32/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1642 - mean_squared_error: 0.3772 - val_loss: 0.1784 - val_mean_squared_error: 0.4508
Epoch 33/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1653 - mean_squared_error: 0.3838 - val_loss: 0.1736 - val_mean_squared_error: 0.4214
Epoch 34/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1633 - mean_squared_error: 0.3743 - val_loss: 0.1710 - val_mean_squared_error: 0.4026
Epoch 35/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1630 - mean_squared_error: 0.3746 - val_loss: 0.1738 - val_mean_squared_error: 0.4136
Epoch 36/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1629 - mean_squared_error: 0.3741 - val_loss: 0.1703 - val_mean_squared_error: 0.3968
Epoch 37/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1623 - mean_squared_error: 0.3717 - val_loss: 0.1752 - val_mean_squared_error: 0.4445
Epoch 38/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1627 - mean_squared_error: 0.3725 - val_loss: 0.1735 - val_mean_squared_error: 0.4195
Epoch 39/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1624 - mean_squared_error: 0.3735 - val_loss: 0.1739 - val_mean_squared_error: 0.4095
Epoch 40/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1618 - mean_squared_error: 0.3725 - val_loss: 0.1712 - val_mean_squared_error: 0.4125
Epoch 41/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1615 - mean_squared_error: 0.3710 - val_loss: 0.1694 - val_mean_squared_error: 0.3931
Epoch 42/100
363/363 [==============================] - 1s 4ms/step - loss: 0.1616 - mean_squared_error: 0.3701 - val_loss: 0.1734 - val_mean_squared_error: 0.4361
Epoch 43/100
363/363 [==============================] - 2s 5ms/step - loss: 0.1610 - mean_squared_error: 0.3685 - val_loss: 0.1829 - val_mean_squared_error: 0.4518
Epoch 44/100
363/363 [==============================] - 2s 5ms/step - loss: 0.1617 - mean_squared_error: 0.3782 - val_loss: 0.1686 - val_mean_squared_error: 0.3952
Epoch 45/100
363/363 [==============================] - 2s 5ms/step - loss: 0.1591 - mean_squared_error: 0.3646 - val_loss: 0.1692 - val_mean_squared_error: 0.3987
Epoch 46/100
363/363 [==============================] - 2s 5ms/step - loss: 0.1590 - mean_squared_error: 0.3654 - val_loss: 0.1673 - val_mean_squared_error: 0.3895
Epoch 47/100
363/363 [==============================] - 2s 5ms/step - loss: 0.1593 - mean_squared_error: 0.3645 - val_loss: 0.1743 - val_mean_squared_error: 0.4340
Epoch 48/100
363/363 [==============================] - 2s 5ms/step - loss: 0.1595 - mean_squared_error: 0.3647 - val_loss: 0.1684 - val_mean_squared_error: 0.3951
Epoch 49/100
363/363 [==============================] - 2s 5ms/step - loss: 0.1592 - mean_squared_error: 0.3650 - val_loss: 0.1714 - val_mean_squared_error: 0.4149
Epoch 50/100
363/363 [==============================] - 2s 5ms/step - loss: 0.1585 - mean_squared_error: 0.3629 - val_loss: 0.1675 - val_mean_squared_error: 0.3990
Epoch 51/100
363/363 [==============================] - 2s 5ms/step - loss: 0.1585 - mean_squared_error: 0.3629 - val_loss: 0.1696 - val_mean_squared_error: 0.4041
162/162 [==============================] - 0s 3ms/step - loss: 0.1654 - mean_squared_error: 0.3914
[0.16541458666324615, 0.39139023423194885]
保存模型时,不会保存函数的具体实现,也不会保存阈值,只是要在加载时提供一个字典将函数名称映射到实际编写的函数名,如果创建的是一个返回huber_fn的嵌套函数,则记得在load_model的时候key=huber_fn,而不是该嵌套函数名。。。。
model.save('定制模型_huber_fn.h5')
model=tf.keras.models.load_model('定制模型_huber_fn.h5',custom_objects={'huber_fn':huber_fn})
model.evaluate(x_test,y_test)
162/162 [==============================] - 1s 3ms/step - loss: 0.1654 - mean_squared_error: 0.3914
[0.16541458666324615, 0.39139023423194885]
返回特定配置的自定义损失函数
###嵌套函数,返回特定的损失函数
def create__huber_fn(threshold=1.0):
def huber_fn(y_true,y_pred):
error=y_true-y_pred
is_small_error=tf.abs(error)<1
#对于损失小于1的,平方
squared_error=tf.square(error) / 2
#对于损失大于1的,线性
linear_error=tf.abs(error) - 0.5
#同时两个函数拼接在x=1处,连续可微。
#返回损失向量
return tf.where(is_small_error,squared_error,linear_error)
return huber_fn
需要实现
call
方法,创建get_config(self)
方法,可以保存阈值
class HuberLoss(tf.keras.losses.Loss):
def __init__(self,threshold=1.0,**kwargs):
super().__init__(**kwargs)
self.threshold=threshold
def call(self,y_true,y_pred):
error=y_true-y_pred
is_small_error=tf.abs(error) < self.threshold
squared_error=tf.square(error)/2
linear_error=self.threshold * tf.abs(error) - self.threshold**2/2
return tf.where(is_small_error,squared_error,linear_error)
def get_config(self):
base_config=super().get_config()
return { **base_config,'threshold':self.threshold}
此时
loss=HuberLoss(threshold)
input_=tf.keras.layers.Input(shape=x_train.shape[1:])
hidden1=tf.keras.layers.Dense(30,activation='elu',kernel_initializer='he_normal')(input_)
hidden2=tf.keras.layers.Dense(30,activation='elu',kernel_initializer='he_normal')(hidden1)
concat=tf.keras.layers.Concatenate()([input_,hidden2])
output=tf.keras.layers.Dense(1)(concat)
model=tf.keras.Model(inputs=[input_],outputs=[output])
model.compile(loss=HuberLoss(2.0),optimizer=tf.keras.optimizers.SGD(learning_rate=0.001,momentum=0.9),metrics=[tf.keras.losses.mean_squared_error])
earlystop=tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
model.fit(x_train,y_train,epochs=100,validation_data=(x_valid,y_valid),callbacks=[earlystop])
model.evaluate(x_test,y_test)
Epoch 1/100
363/363 [==============================] - 3s 6ms/step - loss: 0.4421 - mean_squared_error: 0.9953 - val_loss: 0.2567 - val_mean_squared_error: 0.5254
Epoch 2/100
363/363 [==============================] - 2s 5ms/step - loss: 0.2430 - mean_squared_error: 0.4974 - val_loss: 0.2331 - val_mean_squared_error: 0.4793
Epoch 3/100
363/363 [==============================] - 2s 5ms/step - loss: 0.2271 - mean_squared_error: 0.4648 - val_loss: 0.2284 - val_mean_squared_error: 0.4795
Epoch 4/100
363/363 [==============================] - 2s 5ms/step - loss: 0.2197 - mean_squared_error: 0.4493 - val_loss: 0.2221 - val_mean_squared_error: 0.4734
Epoch 5/100
363/363 [==============================] - 2s 5ms/step - loss: 0.2147 - mean_squared_error: 0.4399 - val_loss: 0.2191 - val_mean_squared_error: 0.4589
Epoch 6/100
363/363 [==============================] - 2s 5ms/step - loss: 0.2091 - mean_squared_error: 0.4400 - val_loss: 0.2125 - val_mean_squared_error: 0.4474
Epoch 7/100
363/363 [==============================] - 2s 5ms/step - loss: 0.2069 - mean_squared_error: 0.4240 - val_loss: 0.2078 - val_mean_squared_error: 0.4243
Epoch 8/100
363/363 [==============================] - 2s 5ms/step - loss: 0.2021 - mean_squared_error: 0.4129 - val_loss: 0.2163 - val_mean_squared_error: 0.4852
Epoch 9/100
363/363 [==============================] - 2s 5ms/step - loss: 0.1983 - mean_squared_error: 0.4041 - val_loss: 0.2129 - val_mean_squared_error: 0.4604
Epoch 10/100
363/363 [==============================] - 2s 5ms/step - loss: 0.1950 - mean_squared_error: 0.3971 - val_loss: 0.2086 - val_mean_squared_error: 0.4305
Epoch 11/100
363/363 [==============================] - 2s 5ms/step - loss: 0.1977 - mean_squared_error: 0.4039 - val_loss: 0.2085 - val_mean_squared_error: 0.4574
Epoch 12/100
363/363 [==============================] - 2s 5ms/step - loss: 0.1962 - mean_squared_error: 0.4051 - val_loss: 0.2191 - val_mean_squared_error: 0.5494
162/162 [==============================] - 0s 3ms/step - loss: 0.2110 - mean_squared_error: 0.4350
[0.21103742718696594, 0.4350337088108063]
同样要提供字典
model.save('定制模型_HuberLoss.h5')
model=tf.keras.models.load_model('定制模型_HuberLoss.h5',custom_objects={'HuberLoss':HuberLoss})
model.evaluate(x_test,y_test)
162/162 [==============================] - 1s 3ms/step - loss: 0.2110 - mean_squared_error: 0.4350
[0.21103742718696594, 0.4350337088108063]
参数z,返回经过运算的结果,比如下面与
tf.nn.softplus
相同实现的函数
def my_softplus(z):
return tf.math.log(tf.exp(z)+1.0)
此时
activation=my_softplus
input_=tf.keras.layers.Input(shape=x_train.shape[1:])
hidden1=tf.keras.layers.Dense(30,activation=my_softplus,kernel_initializer='glorot_normal')(input_)
hidden2=tf.keras.layers.Dense(30,activation=my_softplus,kernel_initializer='glorot_normal')(hidden1)
concat=tf.keras.layers.Concatenate()([input_,hidden2])
output=tf.keras.layers.Dense(1)(concat)
model=tf.keras.Model(inputs=[input_],outputs=[output])
model.compile(loss=tf.keras.losses.mean_squared_error,optimizer=tf.keras.optimizers.SGD(learning_rate=0.001,momentum=0.9))
earlystop=tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
model.fit(x_train,y_train,epochs=100,validation_data=(x_valid,y_valid),callbacks=[earlystop])
model.evaluate(x_test,y_test)
Epoch 1/100
363/363 [==============================] - 2s 5ms/step - loss: 0.8307 - val_loss: 0.7658
Epoch 2/100
363/363 [==============================] - 2s 5ms/step - loss: 0.6173 - val_loss: 0.6259
Epoch 3/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5485 - val_loss: 0.6258
Epoch 4/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5265 - val_loss: 0.6317
Epoch 5/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5401 - val_loss: 0.5721
Epoch 6/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5106 - val_loss: 0.5107
Epoch 7/100
363/363 [==============================] - 2s 5ms/step - loss: 0.4970 - val_loss: 0.5644
Epoch 8/100
363/363 [==============================] - 2s 4ms/step - loss: 0.4886 - val_loss: 0.5093
Epoch 9/100
363/363 [==============================] - 2s 5ms/step - loss: 0.4878 - val_loss: 0.5241
Epoch 10/100
363/363 [==============================] - 2s 5ms/step - loss: 0.4814 - val_loss: 0.6238
Epoch 11/100
363/363 [==============================] - 2s 5ms/step - loss: 0.4824 - val_loss: 0.4967
Epoch 12/100
363/363 [==============================] - 2s 5ms/step - loss: 0.4774 - val_loss: 0.5293
Epoch 13/100
363/363 [==============================] - 2s 4ms/step - loss: 0.4676 - val_loss: 0.4797
Epoch 14/100
363/363 [==============================] - 2s 5ms/step - loss: 0.4647 - val_loss: 0.4828
Epoch 15/100
363/363 [==============================] - 2s 4ms/step - loss: 0.4662 - val_loss: 0.6175
Epoch 16/100
363/363 [==============================] - 2s 4ms/step - loss: 0.4620 - val_loss: 0.6252
Epoch 17/100
363/363 [==============================] - 2s 4ms/step - loss: 0.4562 - val_loss: 0.5492
Epoch 18/100
363/363 [==============================] - 2s 5ms/step - loss: 0.4484 - val_loss: 0.4663
Epoch 19/100
363/363 [==============================] - 2s 4ms/step - loss: 0.4555 - val_loss: 0.6987
Epoch 20/100
363/363 [==============================] - 2s 4ms/step - loss: 0.4806 - val_loss: 0.7208
Epoch 21/100
363/363 [==============================] - 1s 4ms/step - loss: 0.4357 - val_loss: 0.4493
Epoch 22/100
363/363 [==============================] - 1s 4ms/step - loss: 0.4340 - val_loss: 0.4558
Epoch 23/100
363/363 [==============================] - 1s 4ms/step - loss: 0.4358 - val_loss: 0.5214
Epoch 24/100
363/363 [==============================] - 1s 4ms/step - loss: 0.4226 - val_loss: 0.5461
Epoch 25/100
363/363 [==============================] - 1s 4ms/step - loss: 0.4465 - val_loss: 0.4570
Epoch 26/100
363/363 [==============================] - 2s 4ms/step - loss: 0.4142 - val_loss: 0.4585
162/162 [==============================] - 0s 2ms/step - loss: 0.4493
0.4493423402309418
要实现call方法,由于没有初始化参数,所以不多用,直接用函数定义即可。
class SoftplusActivation(tf.keras.layers.Layer):
def call(self,z):
return tf.math.log(tf.exp(z)+1.0)
input_=tf.keras.layers.Input(shape=x_train.shape[1:])
hidden1=tf.keras.layers.Dense(30,activation=SoftplusActivation(),kernel_initializer='glorot_normal')(input_)
hidden2=tf.keras.layers.Dense(30,activation=SoftplusActivation(),kernel_initializer='glorot_normal')(hidden1)
concat=tf.keras.layers.Concatenate()([input_,hidden2])
output=tf.keras.layers.Dense(1)(concat)
model=tf.keras.Model(inputs=[input_],outputs=[output])
model.compile(loss=tf.keras.losses.mean_squared_error,optimizer=tf.keras.optimizers.SGD(learning_rate=0.001,momentum=0.9))
earlystop=tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
model.fit(x_train,y_train,epochs=100,validation_data=(x_valid,y_valid),callbacks=[earlystop])
model.evaluate(x_test,y_test)
Epoch 1/100
363/363 [==============================] - 2s 5ms/step - loss: 0.8111 - val_loss: 0.5742
Epoch 2/100
363/363 [==============================] - 2s 5ms/step - loss: 0.5767 - val_loss: 0.5442
Epoch 3/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5894 - val_loss: 0.6192
Epoch 4/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5381 - val_loss: 0.5797
Epoch 5/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5225 - val_loss: 0.7634
Epoch 6/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5202 - val_loss: 0.5477
Epoch 7/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5402 - val_loss: 1.1840
162/162 [==============================] - 0s 2ms/step - loss: 0.5282
0.5282357335090637
参数shape,dtype,返回shape形状,dtype类型的矩阵
def my_glorot_initializer(shape,dtype=tf.float32):
#标准差
stddev= tf.sqrt(2. / (shape[0]+shape[1]))
return tf.random.normal(shape,stddev=stddev,dtype=dtype)
此时
kernel_initializer=my_glorot_initializer
input_=tf.keras.layers.Input(shape=x_train.shape[1:])
hidden1=tf.keras.layers.Dense(30,activation=my_softplus,kernel_initializer=my_glorot_initializer)(input_)
hidden2=tf.keras.layers.Dense(30,activation=my_softplus,kernel_initializer=my_glorot_initializer)(hidden1)
concat=tf.keras.layers.Concatenate()([input_,hidden2])
output=tf.keras.layers.Dense(1)(concat)
model=tf.keras.Model(inputs=[input_],outputs=[output])
model.compile(loss=tf.keras.losses.mean_squared_error,optimizer=tf.keras.optimizers.SGD(learning_rate=0.001,momentum=0.9))
earlystop=tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
model.fit(x_train,y_train,epochs=100,validation_data=(x_valid,y_valid),callbacks=[earlystop])
model.evaluate(x_test,y_test)
Epoch 1/100
363/363 [==============================] - 2s 5ms/step - loss: 0.7316 - val_loss: 0.7510
Epoch 2/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5682 - val_loss: 0.9692
Epoch 3/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5320 - val_loss: 0.6974
Epoch 4/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5513 - val_loss: 0.9444
Epoch 5/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5751 - val_loss: 0.5435
Epoch 6/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5323 - val_loss: 0.5834
Epoch 7/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5143 - val_loss: 0.5247
Epoch 8/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5015 - val_loss: 0.7107
Epoch 9/100
363/363 [==============================] - 1s 4ms/step - loss: 0.4895 - val_loss: 0.4979
Epoch 10/100
363/363 [==============================] - 1s 4ms/step - loss: 0.4887 - val_loss: 1.1900
Epoch 11/100
363/363 [==============================] - 1s 4ms/step - loss: 0.4874 - val_loss: 0.5460
Epoch 12/100
363/363 [==============================] - 1s 4ms/step - loss: 0.4855 - val_loss: 0.8740
Epoch 13/100
363/363 [==============================] - 2s 4ms/step - loss: 0.4881 - val_loss: 0.7599
Epoch 14/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5121 - val_loss: 0.6577
162/162 [==============================] - 0s 2ms/step - loss: 0.4825
0.482530415058136
要实现__call__,同样由于没有要保存的初始化参数,就直接用自定义函数
class Glorot_Initializer(tf.keras.initializers.Initializer):
def __call__(self,shape,dtype=tf.float32):
stddev= tf.sqrt(2. / (shape[0]+shape[1]))
return tf.random.normal(shape,stddev=stddev,dtype=dtype)
input_=tf.keras.layers.Input(shape=x_train.shape[1:])
hidden1=tf.keras.layers.Dense(30,activation=my_softplus,kernel_initializer=Glorot_Initializer())(input_)
hidden2=tf.keras.layers.Dense(30,activation=my_softplus,kernel_initializer=Glorot_Initializer())(hidden1)
concat=tf.keras.layers.Concatenate()([input_,hidden2])
output=tf.keras.layers.Dense(1)(concat)
model=tf.keras.Model(inputs=[input_],outputs=[output])
model.compile(loss=tf.keras.losses.mean_squared_error,optimizer=tf.keras.optimizers.SGD(learning_rate=0.001,momentum=0.9))
earlystop=tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
model.fit(x_train,y_train,epochs=100,validation_data=(x_valid,y_valid),callbacks=[earlystop])
model.evaluate(x_test,y_test)
Epoch 1/100
363/363 [==============================] - 2s 5ms/step - loss: 0.8588 - val_loss: 0.6394
Epoch 2/100
363/363 [==============================] - 2s 4ms/step - loss: 0.6588 - val_loss: 0.6844
Epoch 3/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5422 - val_loss: 0.5531
Epoch 4/100
363/363 [==============================] - 1s 4ms/step - loss: 0.6235 - val_loss: 0.5405
Epoch 5/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5336 - val_loss: 0.5264
Epoch 6/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5416 - val_loss: 0.5224
Epoch 7/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5639 - val_loss: 0.6617
Epoch 8/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5160 - val_loss: 0.5426
Epoch 9/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5421 - val_loss: 0.8940
Epoch 10/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5190 - val_loss: 0.5228
Epoch 11/100
363/363 [==============================] - 1s 4ms/step - loss: 0.4965 - val_loss: 0.9029
162/162 [==============================] - 0s 2ms/step - loss: 0.5111
0.5111328363418579
参数weights,返回经过特定计算的权重和,以下例子是超参数为0.01的L1正则化
def my_l1_regularizer(weights):
return tf.reduce_sum(tf.abs(0.01*weights))
此时
kernel_regularizer=my_l1_regularizer
input_=tf.keras.layers.Input(shape=x_train.shape[1:])
hidden1=tf.keras.layers.Dense(30,activation=my_softplus,kernel_initializer=my_glorot_initializer,kernel_regularizer=my_l1_regularizer)(input_)
hidden2=tf.keras.layers.Dense(30,activation=my_softplus,kernel_initializer=my_glorot_initializer,kernel_regularizer=my_l1_regularizer)(hidden1)
concat=tf.keras.layers.Concatenate()([input_,hidden2])
output=tf.keras.layers.Dense(1)(concat)
model=tf.keras.Model(inputs=[input_],outputs=[output])
model.compile(loss=tf.keras.losses.mean_squared_error,optimizer=tf.keras.optimizers.SGD(learning_rate=0.001,momentum=0.9))
earlystop=tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
model.fit(x_train,y_train,epochs=100,validation_data=(x_valid,y_valid),callbacks=[earlystop])
model.evaluate(x_test,y_test)
Epoch 1/100
363/363 [==============================] - 2s 5ms/step - loss: 2.2790 - val_loss: 2.0153
Epoch 2/100
363/363 [==============================] - 2s 4ms/step - loss: 1.7051 - val_loss: 1.5535
Epoch 3/100
363/363 [==============================] - 1s 4ms/step - loss: 1.5425 - val_loss: 1.3296
Epoch 4/100
363/363 [==============================] - 2s 4ms/step - loss: 1.2098 - val_loss: 1.1154
Epoch 5/100
363/363 [==============================] - 2s 4ms/step - loss: 1.0885 - val_loss: 1.0087
Epoch 6/100
363/363 [==============================] - 1s 4ms/step - loss: 0.8781 - val_loss: 0.8330
Epoch 7/100
363/363 [==============================] - 2s 4ms/step - loss: 0.7639 - val_loss: 0.7606
Epoch 8/100
363/363 [==============================] - 1s 4ms/step - loss: 0.7101 - val_loss: 0.6716
Epoch 9/100
363/363 [==============================] - 2s 4ms/step - loss: 0.6604 - val_loss: 0.6284
Epoch 10/100
363/363 [==============================] - 2s 4ms/step - loss: 0.6144 - val_loss: 0.6113
Epoch 11/100
363/363 [==============================] - 2s 4ms/step - loss: 0.6011 - val_loss: 0.5988
Epoch 12/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5927 - val_loss: 0.5646
Epoch 13/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5877 - val_loss: 0.8002
Epoch 14/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5553 - val_loss: 0.6087
Epoch 15/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5386 - val_loss: 0.5770
Epoch 16/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5414 - val_loss: 0.6216
Epoch 17/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5659 - val_loss: 0.5793
162/162 [==============================] - 0s 2ms/step - loss: 0.5535
0.5534846782684326
要实现__call__方法,有超参数,需要保存
注意不用调用父类的get_config(),因为父类没有。
class L1_Regularizer(tf.keras.regularizers.Regularizer):
def __init__(self,factor,**kwargs):
super().__init__(**kwargs)
self.factor=factor
def __call__(self,weights):
return tf.reduce_sum(tf.abs(0.01*weights))
def get_config(self):
return { 'factor':self.factor}
input_=tf.keras.layers.Input(shape=x_train.shape[1:])
hidden1=tf.keras.layers.Dense(30,activation=my_softplus,kernel_initializer=my_glorot_initializer,kernel_regularizer=L1_Regularizer(0.01))(input_)
hidden2=tf.keras.layers.Dense(30,activation=my_softplus,kernel_initializer=my_glorot_initializer,kernel_regularizer=L1_Regularizer(0.01))(hidden1)
concat=tf.keras.layers.Concatenate()([input_,hidden2])
output=tf.keras.layers.Dense(1)(concat)
model=tf.keras.Model(inputs=[input_],outputs=[output])
model.compile(loss=tf.keras.losses.mean_squared_error,optimizer=tf.keras.optimizers.SGD(learning_rate=0.001,momentum=0.9))
earlystop=tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
model.fit(x_train,y_train,epochs=100,validation_data=(x_valid,y_valid),callbacks=[earlystop])
model.evaluate(x_test,y_test)
Epoch 1/100
363/363 [==============================] - 2s 4ms/step - loss: 2.8960 - val_loss: 1.9720
Epoch 2/100
363/363 [==============================] - 1s 4ms/step - loss: 1.7808 - val_loss: 1.6138
Epoch 3/100
363/363 [==============================] - 2s 4ms/step - loss: 1.4938 - val_loss: 1.3382
Epoch 4/100
363/363 [==============================] - 1s 4ms/step - loss: 1.2372 - val_loss: 1.1240
Epoch 5/100
363/363 [==============================] - 2s 4ms/step - loss: 1.0518 - val_loss: 0.9585
Epoch 6/100
363/363 [==============================] - 2s 4ms/step - loss: 0.9430 - val_loss: 0.8267
Epoch 7/100
363/363 [==============================] - 1s 4ms/step - loss: 0.9501 - val_loss: 0.7313
Epoch 8/100
363/363 [==============================] - 2s 4ms/step - loss: 0.7022 - val_loss: 0.7410
Epoch 9/100
363/363 [==============================] - 2s 5ms/step - loss: 0.8435 - val_loss: 0.6263
Epoch 10/100
363/363 [==============================] - 2s 4ms/step - loss: 0.6156 - val_loss: 0.5961
Epoch 11/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5689 - val_loss: 0.5740
Epoch 12/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5966 - val_loss: 0.7584
Epoch 13/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5621 - val_loss: 0.5883
Epoch 14/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5920 - val_loss: 0.6776
Epoch 15/100
363/363 [==============================] - 2s 4ms/step - loss: 1.3289 - val_loss: 0.5591
Epoch 16/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5470 - val_loss: 0.5728
Epoch 17/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5484 - val_loss: 0.5669
Epoch 18/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5622 - val_loss: 0.5838
Epoch 19/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5329 - val_loss: 0.5567
Epoch 20/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5396 - val_loss: 0.6199
Epoch 21/100
363/363 [==============================] - 1s 4ms/step - loss: 0.5478 - val_loss: 0.7298
Epoch 22/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5658 - val_loss: 0.6074
Epoch 23/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5839 - val_loss: 0.6167
Epoch 24/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5601 - val_loss: 0.6254
162/162 [==============================] - 0s 2ms/step - loss: 0.5267
0.5267248153686523
model.save('定制模型_my_softplus_my_glorot_initializer_L1_Regularizer.h5')
model=tf.keras.models.load_model('定制模型_my_softplus_my_glorot_initializer_L1_Regularizer.h5',custom_objects={'my_softplus':my_softplus,'my_glorot_initializer':my_glorot_initializer,'L1_Regularizer':L1_Regularizer})
model.evaluate(x_test,y_test)
162/162 [==============================] - 0s 2ms/step - loss: 0.5267
0.5267248153686523
函数的实现,参数weights,返回经过修改的weights
#如果权重小于0,就返回0,否则返回原来的数,相当于relu,但是约束是在梯度下降后使用,激活函数是在梯度下降前使用
def my_positive_weights_constraint(weights):
return tf.where(weights<0,tf.zeros_like(weights),weights)
input_=tf.keras.layers.Input(shape=x_train.shape[1:])
hidden1=tf.keras.layers.Dense(30,activation=my_softplus,kernel_initializer=my_glorot_initializer,kernel_regularizer=my_l1_regularizer,kernel_constraint=my_positive_weights_constraint)(input_)
hidden2=tf.keras.layers.Dense(30,activation=my_softplus,kernel_initializer=my_glorot_initializer,kernel_regularizer=my_l1_regularizer,kernel_constraint=my_positive_weights_constraint)(hidden1)
concat=tf.keras.layers.Concatenate()([input_,hidden2])
output=tf.keras.layers.Dense(1)(concat)
model=tf.keras.Model(inputs=[input_],outputs=[output])
model.compile(loss=tf.keras.losses.mean_squared_error,optimizer=tf.keras.optimizers.SGD(learning_rate=0.001,momentum=0.9))
earlystop=tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
model.fit(x_train,y_train,epochs=100,validation_data=(x_valid,y_valid),callbacks=[earlystop])
model.evaluate(x_test,y_test)
Epoch 1/100
363/363 [==============================] - 2s 5ms/step - loss: 1.6365 - val_loss: 1.3565
Epoch 2/100
363/363 [==============================] - 2s 4ms/step - loss: 1.7824 - val_loss: 1.1982
Epoch 3/100
363/363 [==============================] - 2s 4ms/step - loss: 0.9332 - val_loss: 0.9515
Epoch 4/100
363/363 [==============================] - 2s 5ms/step - loss: 0.8297 - val_loss: 0.7922
Epoch 5/100
363/363 [==============================] - 2s 5ms/step - loss: 0.7350 - val_loss: 0.7076
Epoch 6/100
363/363 [==============================] - 2s 5ms/step - loss: 0.6793 - val_loss: 0.6293
Epoch 7/100
363/363 [==============================] - 2s 5ms/step - loss: 0.6238 - val_loss: 0.6011
Epoch 8/100
363/363 [==============================] - 2s 5ms/step - loss: 0.5880 - val_loss: 0.5752
Epoch 9/100
363/363 [==============================] - 2s 5ms/step - loss: 0.5912 - val_loss: 0.5583
Epoch 10/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5718 - val_loss: 0.5616
Epoch 11/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5673 - val_loss: 0.6114
Epoch 12/100
363/363 [==============================] - 2s 5ms/step - loss: 0.5707 - val_loss: 0.5466
Epoch 13/100
363/363 [==============================] - 2s 5ms/step - loss: 0.5361 - val_loss: 0.6356
Epoch 14/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5755 - val_loss: 0.5403
Epoch 15/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5443 - val_loss: 0.5468
Epoch 16/100
363/363 [==============================] - 2s 4ms/step - loss: 0.6053 - val_loss: 0.5402
Epoch 17/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5579 - val_loss: 0.5374
Epoch 18/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5513 - val_loss: 0.5415
Epoch 19/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5361 - val_loss: 0.5442
Epoch 20/100
363/363 [==============================] - 2s 5ms/step - loss: 0.5651 - val_loss: 0.5544
Epoch 21/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5797 - val_loss: 0.5723
Epoch 22/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5483 - val_loss: 1.0739
162/162 [==============================] - 0s 2ms/step - loss: 0.5255
0.5254966616630554
要实现__call__方法,没有超参数,所以都用函数自定义
class Positive_Constraint(tf.keras.constraints.Constraint):
def __call__(self,weights):
return tf.where(weights<0,tf.zeros_like(weights),weights)
input_=tf.keras.layers.Input(shape=x_train.shape[1:])
hidden1=tf.keras.layers.Dense(30,activation=my_softplus,kernel_initializer=my_glorot_initializer,kernel_regularizer=my_l1_regularizer,kernel_constraint=Positive_Constraint())(input_)
hidden2=tf.keras.layers.Dense(30,activation=my_softplus,kernel_initializer=my_glorot_initializer,kernel_regularizer=my_l1_regularizer,kernel_constraint=Positive_Constraint())(hidden1)
concat=tf.keras.layers.Concatenate()([input_,hidden2])
output=tf.keras.layers.Dense(1)(concat)
model=tf.keras.Model(inputs=[input_],outputs=[output])
model.compile(loss=tf.keras.losses.mean_squared_error,optimizer=tf.keras.optimizers.SGD(learning_rate=0.001,momentum=0.9))
earlystop=tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
model.fit(x_train,y_train,epochs=100,validation_data=(x_valid,y_valid),callbacks=[earlystop])
model.evaluate(x_test,y_test)
Epoch 1/100
363/363 [==============================] - 2s 5ms/step - loss: 1.5424 - val_loss: 1.2098
Epoch 2/100
363/363 [==============================] - 1s 4ms/step - loss: 1.1367 - val_loss: 1.0439
Epoch 3/100
363/363 [==============================] - 1s 4ms/step - loss: 0.9188 - val_loss: 0.8596
Epoch 4/100
363/363 [==============================] - 1s 4ms/step - loss: 0.8691 - val_loss: 0.8027
Epoch 5/100
363/363 [==============================] - 1s 4ms/step - loss: 0.7169 - val_loss: 0.7065
Epoch 6/100
363/363 [==============================] - 2s 4ms/step - loss: 0.6667 - val_loss: 0.7058
Epoch 7/100
363/363 [==============================] - 2s 4ms/step - loss: 0.6145 - val_loss: 0.6615
Epoch 8/100
363/363 [==============================] - 2s 4ms/step - loss: 0.7354 - val_loss: 0.5920
Epoch 9/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5654 - val_loss: 0.5650
Epoch 10/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5731 - val_loss: 0.5725
Epoch 11/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5497 - val_loss: 0.6181
Epoch 12/100
363/363 [==============================] - 2s 5ms/step - loss: 0.5965 - val_loss: 0.5522
Epoch 13/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5781 - val_loss: 0.5665
Epoch 14/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5323 - val_loss: 0.5506
Epoch 15/100
363/363 [==============================] - 2s 4ms/step - loss: 0.6170 - val_loss: 0.5729
Epoch 16/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5442 - val_loss: 0.5446
Epoch 17/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5459 - val_loss: 0.5470
Epoch 18/100
363/363 [==============================] - 2s 5ms/step - loss: 0.5621 - val_loss: 0.6127
Epoch 19/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5433 - val_loss: 0.5365
Epoch 20/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5717 - val_loss: 0.5698
Epoch 21/100
363/363 [==============================] - 2s 5ms/step - loss: 0.5663 - val_loss: 0.5762
Epoch 22/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5648 - val_loss: 0.5360
Epoch 23/100
363/363 [==============================] - 2s 5ms/step - loss: 0.5376 - val_loss: 0.5736
Epoch 24/100
363/363 [==============================] - 2s 5ms/step - loss: 0.5318 - val_loss: 0.5430
Epoch 25/100
363/363 [==============================] - 2s 5ms/step - loss: 0.5425 - val_loss: 0.5758
Epoch 26/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5325 - val_loss: 0.5499
Epoch 27/100
363/363 [==============================] - 2s 4ms/step - loss: 0.5604 - val_loss: 0.5750
162/162 [==============================] - 0s 2ms/step - loss: 0.5242
0.5242263674736023
如果是指标函数,参数为y_true,y_pred,与损失函数一样,实现最好是回归损失,不要分类精度(百分比),因为Keras会自动跟踪每个批次的损失,返回批次的均值(相当于所有实例总损失除以总实例数),但是如果是精度,不能简单的返回批次均值(只能实现流式指标,即跟踪每个批次的正确预测的实例数与参与预测的总实例数,最后计算出每个轮次的精度,每个轮次后要通过
reset_states()
重置。)
(损失,功能和函数一样),函数可以参考损失函数,要实现update_state(相当于call),result(返回目前总损失/总实例数)
class HuberMetric(tf.keras.metrics.Metric):
def __init__(self,threshold=1.0,**kwargs):
super().__init__(**kwargs)
self.threshold=threshold
#用来计算每个批次的损失
self.huber_fn=create__huber_fn(threshold)
#初始化
self.total=self.add_weight('total',initializer='zeros')
self.count=self.add_weight('count',initializer='zeros')
def update_state(self,y_true,y_pred,sample_weight=None):
metric=self.huber_fn(y_true,y_pred)
#更新
self.total.assign_add(tf.reduce_sum(metric))
self.count.assign_add(tf.cast(tf.size(y_true),tf.float32))
def result(self):
return self.total/self.count
def get_config(self):
base_config=super().get_config()
return {**base_config,'threshold':self.threshold}
input_=tf.keras.layers.Input(shape=x_train.shape[1:])
hidden1=tf.keras.layers.Dense(30,activation='elu',kernel_initializer='he_normal')(input_)
hidden2=tf.keras.layers.Dense(30,activation='elu',kernel_initializer='he_normal')(hidden1)
concat=tf.keras.layers.Concatenate()([input_,hidden2])
output=tf.keras.layers.Dense(1)(concat)
model=tf.keras.Model(inputs=[input_],outputs=[output])
model.compile(loss=tf.keras.losses.mean_squared_error,optimizer=tf.keras.optimizers.SGD(learning_rate=0.001,momentum=0.9),metrics=[HuberMetric()])
earlystop=tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
model.fit(x_train,y_train,epochs=20,validation_data=(x_valid,y_valid),callbacks=[earlystop])
model.evaluate(x_test,y_test)
Epoch 1/20
363/363 [==============================] - 2s 5ms/step - loss: 1.5173 - huber_metric: 0.3663 - val_loss: 1.0649 - val_huber_metric: 0.2390
Epoch 2/20
363/363 [==============================] - 2s 4ms/step - loss: 0.5028 - huber_metric: 0.2094 - val_loss: 1.0194 - val_huber_metric: 0.2288
Epoch 3/20
363/363 [==============================] - 2s 4ms/step - loss: 0.4874 - huber_metric: 0.2025 - val_loss: 0.5846 - val_huber_metric: 0.2068
Epoch 4/20
363/363 [==============================] - 1s 4ms/step - loss: 0.4573 - huber_metric: 0.1943 - val_loss: 0.5188 - val_huber_metric: 0.1988
Epoch 5/20
363/363 [==============================] - 1s 4ms/step - loss: 0.4318 - huber_metric: 0.1876 - val_loss: 0.4432 - val_huber_metric: 0.1932
Epoch 6/20
363/363 [==============================] - 2s 4ms/step - loss: 0.4174 - huber_metric: 0.1832 - val_loss: 0.4417 - val_huber_metric: 0.1893
Epoch 7/20
363/363 [==============================] - 2s 4ms/step - loss: 0.4055 - huber_metric: 0.1791 - val_loss: 0.4268 - val_huber_metric: 0.1871
Epoch 8/20
363/363 [==============================] - 2s 4ms/step - loss: 0.3993 - huber_metric: 0.1762 - val_loss: 0.4651 - val_huber_metric: 0.1868
Epoch 9/20
363/363 [==============================] - 2s 5ms/step - loss: 0.3933 - huber_metric: 0.1742 - val_loss: 0.4330 - val_huber_metric: 0.1875
Epoch 10/20
363/363 [==============================] - 2s 4ms/step - loss: 0.3948 - huber_metric: 0.1739 - val_loss: 0.4564 - val_huber_metric: 0.1840
Epoch 11/20
363/363 [==============================] - 2s 4ms/step - loss: 0.3973 - huber_metric: 0.1731 - val_loss: 0.4088 - val_huber_metric: 0.1772
Epoch 12/20
363/363 [==============================] - 2s 4ms/step - loss: 0.3825 - huber_metric: 0.1701 - val_loss: 0.4581 - val_huber_metric: 0.1801
Epoch 13/20
363/363 [==============================] - 2s 4ms/step - loss: 0.3815 - huber_metric: 0.1689 - val_loss: 0.4203 - val_huber_metric: 0.1819
Epoch 14/20
363/363 [==============================] - 2s 5ms/step - loss: 0.3812 - huber_metric: 0.1688 - val_loss: 0.4026 - val_huber_metric: 0.1747
Epoch 15/20
363/363 [==============================] - 2s 4ms/step - loss: 0.3710 - huber_metric: 0.1649 - val_loss: 0.4131 - val_huber_metric: 0.1804
Epoch 16/20
363/363 [==============================] - 2s 4ms/step - loss: 0.3718 - huber_metric: 0.1655 - val_loss: 0.4327 - val_huber_metric: 0.1791
Epoch 17/20
363/363 [==============================] - 1s 4ms/step - loss: 0.3677 - huber_metric: 0.1634 - val_loss: 0.3966 - val_huber_metric: 0.1730
Epoch 18/20
363/363 [==============================] - 1s 4ms/step - loss: 0.3650 - huber_metric: 0.1623 - val_loss: 0.4136 - val_huber_metric: 0.1705
Epoch 19/20
363/363 [==============================] - 2s 4ms/step - loss: 0.3620 - huber_metric: 0.1610 - val_loss: 0.3936 - val_huber_metric: 0.1709
Epoch 20/20
363/363 [==============================] - 2s 4ms/step - loss: 0.3629 - huber_metric: 0.1608 - val_loss: 0.4518 - val_huber_metric: 0.1769
162/162 [==============================] - 0s 2ms/step - loss: 0.3822 - huber_metric: 0.1660
[0.3821825385093689, 0.16596850752830505]
一般要实现
__init__
,build
,call
,compute_output_shape
,get_config
方法。当然只实现__init__
,call
方法也可以。
exponential_layer=tf.keras.layers.Lambda(lambda x:tf.exp(x))
model=tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=[28,28]),
exponential_layer,
tf.keras.layers.Dense(30),
tf.keras.layers.Dense(1)])
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten (Flatten) (None, 784) 0
lambda (Lambda) (None, 784) 0
dense_36 (Dense) (None, 30) 23550
dense_37 (Dense) (None, 1) 31
=================================================================
Total params: 23,581
Trainable params: 23,581
Non-trainable params: 0
_________________________________________________________________
class MyDense(tf.keras.layers.Layer):
def __init__(self,units,activation=None,**kwargs):
self.units=units
self.activation=tf.keras.layers.Activation(activation)
super().__init__(**kwargs)
# 初始化该层的权重参数
def build(self,batch_input_shape):
self.kernel=self.add_weight(name='kernel',shape=[batch_input_shape[-1],self.units],initializer='he_normal',trainable=True)
self.bias=self.add_weight(name='bias',shape=[self.units],initializer='zeros',trainable=True)
super().build(batch_input_shape) ##must be at the end
def call(self,X):
return self.activation(X @ self.kernel + self.bias)
# 计算输出的shape,可以在model.summary()中看到具体的shape
def compute_output_shape(self,batch_input_shape):
return tf.TensorShape(batch_input_shape[:-1]+[self.units])
# 用于model.save()
def get_config(self):
base_config=super().get_config()
return {**base_config,'units':self.units,'activation':self.activation}
由于未知原因,导致换用
MyDense()
层训练时出现梯度不稳定(爆炸),甚至出现nan情况,因此这里加上nesterov=True
,会缓解梯度爆炸,但是还有梯度不稳定的情况,所以再加上clipnorm=1
进行梯度裁剪,大大缓解梯度不稳定的情况。
input_=tf.keras.layers.Input(shape=x_train.shape[1:])
hidden1=MyDense(30,'elu')(input_)
hidden2=MyDense(30,'elu')(hidden1)
concat=tf.keras.layers.Concatenate()([input_,hidden2])
output=MyDense(1)(concat)
model=tf.keras.Model(inputs=[input_],outputs=[output])
model.compile(loss=tf.keras.losses.mean_squared_error,optimizer=tf.keras.optimizers.SGD(learning_rate=0.001,momentum=0.9,nesterov=True,clipnorm=1))
earlystop=tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
model.fit(x_train,y_train,epochs=100,validation_data=(x_valid,y_valid),callbacks=[earlystop])
model.evaluate(x_test,y_test)
Epoch 1/100
363/363 [==============================] - 3s 6ms/step - loss: 1.2321 - val_loss: 1.9838
Epoch 2/100
363/363 [==============================] - 2s 6ms/step - loss: 0.5371 - val_loss: 0.9565
Epoch 3/100
363/363 [==============================] - 2s 6ms/step - loss: 0.4700 - val_loss: 0.5568
Epoch 4/100
363/363 [==============================] - 2s 5ms/step - loss: 0.4389 - val_loss: 0.4537
Epoch 5/100
363/363 [==============================] - 2s 5ms/step - loss: 0.4225 - val_loss: 0.4333
Epoch 6/100
363/363 [==============================] - 2s 5ms/step - loss: 0.4106 - val_loss: 0.4228
Epoch 7/100
363/363 [==============================] - 2s 5ms/step - loss: 0.4061 - val_loss: 0.4193
Epoch 8/100
363/363 [==============================] - 2s 5ms/step - loss: 0.4002 - val_loss: 0.4205
Epoch 9/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3926 - val_loss: 0.4125
Epoch 10/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3884 - val_loss: 0.4073
Epoch 11/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3839 - val_loss: 0.4283
Epoch 12/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3811 - val_loss: 0.4070
Epoch 13/100
363/363 [==============================] - 2s 5ms/step - loss: 0.3780 - val_loss: 0.3909
Epoch 14/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3811 - val_loss: 0.4094
Epoch 15/100
363/363 [==============================] - 2s 5ms/step - loss: 0.3728 - val_loss: 0.3872
Epoch 16/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3705 - val_loss: 0.3980
Epoch 17/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3758 - val_loss: 0.3931
Epoch 18/100
363/363 [==============================] - 2s 5ms/step - loss: 0.3651 - val_loss: 0.4041
Epoch 19/100
363/363 [==============================] - 2s 5ms/step - loss: 0.3651 - val_loss: 0.3832
Epoch 20/100
363/363 [==============================] - 2s 5ms/step - loss: 0.3596 - val_loss: 0.3892
Epoch 21/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3560 - val_loss: 0.3818
Epoch 22/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3560 - val_loss: 0.3831
Epoch 23/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3525 - val_loss: 0.3790
Epoch 24/100
363/363 [==============================] - 2s 7ms/step - loss: 0.3535 - val_loss: 0.3757
Epoch 25/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3480 - val_loss: 0.3788
Epoch 26/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3494 - val_loss: 0.3787
Epoch 27/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3473 - val_loss: 0.3918
Epoch 28/100
363/363 [==============================] - 2s 7ms/step - loss: 0.3442 - val_loss: 0.3670
Epoch 29/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3418 - val_loss: 0.3664
Epoch 30/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3431 - val_loss: 0.3644
Epoch 31/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3390 - val_loss: 0.3684
Epoch 32/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3378 - val_loss: 0.3706
Epoch 33/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3395 - val_loss: 0.3717
Epoch 34/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3369 - val_loss: 0.3754
Epoch 35/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3337 - val_loss: 0.3576
Epoch 36/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3331 - val_loss: 0.3666
Epoch 37/100
363/363 [==============================] - 2s 7ms/step - loss: 0.3313 - val_loss: 0.3552
Epoch 38/100
363/363 [==============================] - 2s 7ms/step - loss: 0.3298 - val_loss: 0.3578
Epoch 39/100
363/363 [==============================] - 2s 7ms/step - loss: 0.3278 - val_loss: 0.3567
Epoch 40/100
363/363 [==============================] - 2s 7ms/step - loss: 0.3274 - val_loss: 0.3700
Epoch 41/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3274 - val_loss: 0.3566
Epoch 42/100
363/363 [==============================] - 2s 6ms/step - loss: 0.3263 - val_loss: 0.3606
162/162 [==============================] - 0s 2ms/step - loss: 0.3466
0.34662193059921265
用于下面的自定义模型
class ResidualBlock(tf.keras.layers.Layer):
def __init__(self,n_layers,n_neurons,**kwargs):
super().__init__(**kwargs)
self.n_layers=n_layers
self.n_neurons=n_neurons
self.hidden=[ tf.keras.layers.Dense(self.n_neurons,activation='elu',kernel_initializer='he_normal') for _ in range(self.n_layers)]
def call(self,inputs):
Z = inputs
for layer in self.hidden:
Z = layer(Z)
return Z+inputs #将上一层的输入与这个复合层最后一层相加,然后一起输出。
def compute_output_shape(self,batch_input_shape):
return tf.TensorShape(batch_input_shape[:-1]+[self.n_neurons])
def get_config(self):
base_config=super().get_config()
return {**base_config,'n_layers':self.n_layers,'n_neurons':self.n_neurons}
# 包含上面定义的ResidualBlock
class ResidualRegressor(tf.keras.models.Model):
def __init__(self,outputdim,**kwargs):
super().__init__(**kwargs)
self.outputdim=outputdim
self.input_layer=tf.keras.layers.InputLayer(input_shape=[8])
self.hidden1=tf.keras.layers.Dense(30,activation='elu',kernel_initializer='he_normal')
self.block1=ResidualBlock(2,30)
self.block2=ResidualBlock(2,30)
self.out=tf.keras.layers.Dense(self.outputdim)
def call(self,inputs):
input_output = self.input_layer(inputs)
Z = input_output
Z = self.hidden1(Z)
Z = self.block1(Z)
Z = self.block2(Z)
# 在这里我增加了一层合并,将输入与经过block2的输出合并,最后一起输入输出层。
Z = self.out(tf.keras.layers.concatenate([Z,input_output]))
return Z
def get_config(self):
base_config=super().get_config()
return {**base_config,'outputdim':self.outputdim}
优化器增加动量优化,Nesterov梯度加速(纠正方向,更接近目标),
clipnorm=1
梯度裁剪缓解梯度爆炸利器。
model=ResidualRegressor(1)
model.compile(loss=tf.keras.losses.mean_squared_error,optimizer=tf.keras.optimizers.SGD(learning_rate=0.001,momentum=0.9,nesterov=True,clipnorm=1))
earlystop=tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
model.fit(x_train,y_train,epochs=100,validation_data=(x_valid,y_valid),callbacks=[earlystop])
model.evaluate(x_test,y_test)
Epoch 1/100
363/363 [==============================] - 4s 9ms/step - loss: 1.6555 - val_loss: 0.4865
Epoch 2/100
363/363 [==============================] - 3s 8ms/step - loss: 0.5679 - val_loss: 0.4573
Epoch 3/100
363/363 [==============================] - 3s 9ms/step - loss: 0.4531 - val_loss: 0.4170
Epoch 4/100
363/363 [==============================] - 3s 9ms/step - loss: 0.4166 - val_loss: 0.4995
Epoch 5/100
363/363 [==============================] - 3s 9ms/step - loss: 0.3985 - val_loss: 0.4959
Epoch 6/100
363/363 [==============================] - 3s 8ms/step - loss: 0.3783 - val_loss: 0.4981
Epoch 7/100
363/363 [==============================] - 3s 8ms/step - loss: 0.3749 - val_loss: 0.4566
Epoch 8/100
363/363 [==============================] - 3s 9ms/step - loss: 0.3668 - val_loss: 0.4933
162/162 [==============================] - 0s 3ms/step - loss: 0.4215
0.4215191900730133
在自定义训练循环中,有很多要自己控制,同时如果要图表,要记录每个轮次的metrics。这里只是对训练进行打印,没有对验证评估。此外,如果要实现提前停止,也要手动实现,比较容易出错。
# 取出batch个样本
def random_batch(X,y,batch_size=32):
import numpy as np
index = np.random.randint(len(X),size=batch_size)
return X[index],y[index]
# 打印每个步骤地状态
def print_state(cur_num,total_num,mean_loss,metrics=None):
metrics_string= '-'.join( [ "{}:{:.4f}".format(m.name,m.result()) for m in [mean_loss] + ([] or metrics) ] )
end = "" if cur_num < total_num else '\n'
print("\r{}/{} - ".format(cur_num,total_num) + metrics_string,end=end)
# 设置需要的参数
n_epochs = 20
batch_size=32
n_steps = len(x_train) // batch_size
loss_fn = tf.keras.losses.mean_squared_error
mean_loss_metric = tf.keras.metrics.Mean()
optimizer = tf.keras.optimizers.Nadam()
metrics = [tf.keras.metrics.MeanAbsoluteError()]
# 构建模型,不用编译和fit
input_=tf.keras.layers.Input(shape=x_train.shape[1:])
hidden1=tf.keras.layers.Dense(30,activation='elu',kernel_initializer='he_normal',kernel_regularizer='l2')(input_)
hidden2=tf.keras.layers.Dense(30,activation='elu',kernel_initializer='he_normal',kernel_regularizer='l2')(hidden1)
concat=tf.keras.layers.Concatenate()([input_,hidden2])
output=tf.keras.layers.Dense(1)(concat)
model=tf.keras.Model(inputs=[input_],outputs=[output])
# 手动训练循环
for epoch in range(1,n_epochs+1):
print("Epoch {}/{}".format(epoch,n_epochs))
for step in range(1,n_steps+1):
X_batch,y_batch=random_batch(x_train,y_train,32)
with tf.GradientTape() as tape:
y_pred=model(X_batch,training=True) #设置training=True,让模型处于可训练状态
main_loss = tf.reduce_mean(loss_fn(y_batch,y_pred))
loss = tf.add_n([main_loss] + model.losses) # model.losses 是正则化损失列表
# 计算梯度
gradients = tape.gradient(loss,model.trainable_variables)
# 优化器
optimizer.apply_gradients(zip(gradients,model.trainable_variables))
# 记录该轮次该步骤地损失
mean_loss_metric(loss)
for metric in metrics:
metric(y_batch,y_pred)
print_state(step * batch_size,len(y_train),mean_loss_metric,metrics)
# 每个轮次结束打印该轮次的状态
print_state(len(y_train),len(y_train),mean_loss_metric,metrics)
# 每个轮次结束,要将metrics全部重置
for m in [mean_loss_metric] + metrics:
m.reset_states()
Epoch 1/20
11610/11610 - mean:2.8731-mean_absolute_error:0.9838
Epoch 2/20
11610/11610 - mean:2.2081-mean_absolute_error:0.8977
Epoch 3/20
11610/11610 - mean:1.9747-mean_absolute_error:0.8883
Epoch 4/20
11610/11610 - mean:1.8388-mean_absolute_error:0.8942
Epoch 5/20
11610/11610 - mean:1.7392-mean_absolute_error:0.8977
Epoch 6/20
11610/11610 - mean:1.6738-mean_absolute_error:0.9122
Epoch 7/20
11610/11610 - mean:1.6173-mean_absolute_error:0.9049
Epoch 8/20
11610/11610 - mean:1.5518-mean_absolute_error:0.8923
Epoch 9/20
11610/11610 - mean:1.4897-mean_absolute_error:0.8965
Epoch 10/20
11610/11610 - mean:1.4854-mean_absolute_error:0.8954
Epoch 11/20
11610/11610 - mean:1.4624-mean_absolute_error:0.9027
Epoch 12/20
11610/11610 - mean:1.4407-mean_absolute_error:0.8947
Epoch 13/20
11610/11610 - mean:1.4030-mean_absolute_error:0.8857
Epoch 14/20
11610/11610 - mean:1.4262-mean_absolute_error:0.8982
Epoch 15/20
11610/11610 - mean:1.3799-mean_absolute_error:0.8869
Epoch 16/20
11610/11610 - mean:1.4081-mean_absolute_error:0.9009
Epoch 17/20
11610/11610 - mean:1.3618-mean_absolute_error:0.8845
Epoch 18/20
11610/11610 - mean:1.3639-mean_absolute_error:0.8908
Epoch 19/20
11610/11610 - mean:1.3566-mean_absolute_error:0.8907
Epoch 20/20
11610/11610 - mean:1.3686-mean_absolute_error:0.8974