参数初始化在很多问题中都十分重要,例如均匀初始化、正太分布初始化、Xavier初始化[1]等。其中,Xavier初始化较为常用,通常结合激活函数ReLU使用。
正态分布可以使用Box–Muller变换实现。
截断正态分布可以参考文献[2],在佛罗里达州立大学的相关网页可以下载到各种版本的代码实现,其中也包括Fortran 90的版本。
!* 该模块定义了神经网络权值和阈值的初始化方法。
module mod_NNWeightThresholdInitMethods
use mod_Log
use mod_Precision
use mod_NNStructure
implicit none
public :: NN_weight_threshold_init_main
private :: m_zero_init
private :: m_one_init
private :: m_xavier_init
private :: m_xavier_init_weight
private :: m_weight_range_custom
private :: m_threshold_range_custom
contains
!* 处理神经网络权重和阈值初始化过程
subroutine NN_weight_threshold_init_main( &
init_method_name, my_NNStructure )
implicit none
character(len=*), intent(in) :: init_method_name
class(NNStructure), intent(inout) :: my_NNStructure
logical :: init_status
call my_NNStructure % get_init_basic_status(init_status)
if (init_status == .FALSE.) then
call LogErr("mod_NNWeightThresholdInitMethods: &
SUBROUTINE NN_weight_whreshold_init_main," )
call LogErr("my_NNStructure didn't init.")
stop
end if
call LogInfo("mod_NNWeightThresholdInitMethods: &
SUBROUTINE NN_weight_whreshold_init_main")
!* Append:增加初始化方法
select case (TRIM(ADJUSTL(init_method_name)))
case ('')
call LogInfo("--> my_NNStructure init default.")
case('zero')
call m_zero_init(my_NNStructure)
call LogInfo("--> my_NNStructure init zero.")
case('one')
call m_one_init(my_NNStructure)
call LogInfo("--> my_NNStructure init [0,1].")
case ('xavier')
call m_xavier_init(my_NNStructure)
call LogInfo("--> my_NNStructure init xavier.")
case default
call LogErr("mod_NNWeightThresholdInitMethods: &
SUBROUTINE NN_weight_whreshold_init_main")
call LogErr("--> my_NNStructure didn't init.")
stop
end select
return
end subroutine
!====
!* 初始化权重和阈值为 0
subroutine m_zero_init( my_NNStructure )
implicit none
class(NNStructure), intent(inout) :: my_NNStructure
integer :: l_count
integer :: layer_index
l_count = my_NNStructure % layers_count
do layer_index=1, l_count
associate ( &
W => my_NNStructure % pt_W( layer_index ) % W, &
Theta => my_NNStructure % pt_Theta( layer_index ) % Theta &
)
Theta = 0
W = 0
end associate
end do
call LogInfo("mod_NNWeightThresholdInitMethods: &
SUBROUTINE m_zero_init.")
return
end subroutine m_zero_init
!====
!* 权值、阈值初始化(0,1)
subroutine m_one_init( my_NNStructure )
implicit none
class(NNStructure), intent(inout) :: my_NNStructure
call m_weight_range_custom( my_NNStructure, 0.0, 1.0 )
call m_threshold_range_custom( my_NNStructure, 0.0, 1.0 )
call LogDebug("mod_NNWeightThresholdInitMethods: &
SUBROUTINE m_one_init.")
return
end subroutine m_one_init
!====
!* 权重初始化在指定的范围
subroutine m_weight_range_custom( my_NNStructure, &
set_min, set_max )
implicit none
class(NNStructure), intent(inout) :: my_NNStructure
real(PRECISION) :: set_min, set_max
integer :: l_count
integer :: layer_index
l_count = my_NNStructure % layers_count
do layer_index=1, l_count
associate ( &
W => my_NNStructure % pt_W( layer_index ) % W &
)
W = (set_max - set_min) * W + set_min
end associate
end do
call LogDebug("mod_NNWeightThresholdInitMethods: &
SUBROUTINE m_weight_range_custom.")
return
end subroutine m_weight_range_custom
!====
!* 阈值初始化在指定的范围
subroutine m_threshold_range_custom( my_NNStructure, &
set_min, set_max )
implicit none
class(NNStructure), intent(inout) :: my_NNStructure
real(PRECISION) :: set_min, set_max
integer :: l_count
integer :: layer_index
l_count = my_NNStructure % layers_count
do layer_index=1, l_count
associate ( &
Theta => my_NNStructure % pt_Theta( layer_index ) % Theta &
)
Theta = (set_max - set_min) * Theta + set_min
end associate
end do
call LogDebug("mod_NNWeightThresholdInitMethods: &
SUBROUTINE m_threshold_range_custom.")
return
end subroutine m_threshold_range_custom
!====
!* Xavier 初始化权重和阈值
subroutine m_xavier_init( my_NNStructure )
implicit none
class(NNStructure), intent(inout) :: my_NNStructure
integer :: l_count, M, N
integer :: layer_index
l_count = my_NNStructure % layers_count
do layer_index=1, l_count
associate ( &
W => my_NNStructure % pt_W( layer_index ) % W, &
Theta => my_NNStructure % pt_Theta( layer_index ) % Theta &
)
M = my_NNStructure % layers_node_count(layer_index - 1)
N = my_NNStructure % layers_node_count(layer_index)
Theta = 0
call m_xavier_init_weight(M, N, W)
end associate
end do
call LogDebug("mod_NNWeightThresholdInitMethods: &
SUBROUTINE m_xavier_init.")
return
end subroutine m_xavier_init
!====
!* Xavier 初始化权重
!* W ~ U(-x, x) 的均匀分布,x = sqrt[6 / (n_j + n_{j+1})]
!* n_j,n_{j+1} 是 W 连接两层的结点数.
!* From paper: Xavier Glorot, Yoshua Bengio
!* Understanding the difficulty of training deep feed.
subroutine m_xavier_init_weight( count_layer_node_present, &
count_layer_node_out, weight )
implicit none
integer, intent(in) :: count_layer_node_present, &
count_layer_node_out
real(PRECISION), dimension(:,:), intent(inout) :: weight
real(PRECISION) :: x
integer :: M, N
M = count_layer_node_present
N = count_layer_node_out
x = SQRT(6.0 / (M + N))
call RANDOM_SEED()
call RANDOM_NUMBER(weight)
weight = 2.0 * x * weight - x
return
end subroutine m_xavier_init_weight
!====
end module
附录
多层神经网络,从零开始——(一)、Fortran读取MNIST数据集
多层神经网络,从零开始——(二)、Fortran随机生成“双月”分类问题数据
多层神经网络,从零开始——(三)、BP神经网络公式的详细推导
多层神经网络,从零开始——(四)、多层BP神经网络的矩阵形式
多层神经网络,从零开始——(五)、定义数据结构
多层神经网络,从零开始——(六)、激活函数
多层神经网络,从零开始——(七)、损失函数
多层神经网络,从零开始——(八)、分类问题中为什么使用交叉熵作为损失函数
多层神经网络,从零开始——(九)、优化函数
多层神经网络,从零开始——(十)、参数初始化
多层神经网络,从零开始——(十一)、实现训练类
多层神经网络,从零开始——(十二)、实现算例类
多层神经网络,从零开始——(十三)、关于并行计算的简单探讨
-
Xavier Glorot, Yoshua Bengio. Understanding the difficulty of training deep feedforward neural networks. 2010, http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf ↩
-
John Burkardt. The Truncated Normal Distribution. 2014, https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf ↩