## 导入库
import numpy as np
import torch
import torch.nn as nn
## 建立一个空间注意力层
class Spatial_Attention_layer(nn.Module):
def __init__(self, DEVICE, in_channels, num_of_vertices, num_of_timesteps):
super(Spatial_Attention_layer, self).__init__()
self.W1 = nn.Parameter(torch.FloatTensor(num_of_timesteps).to(DEVICE)) # (12)
self.W2 = nn.Parameter(torch.FloatTensor(in_channels, num_of_timesteps).to(DEVICE)) # (1, 12)
self.W3 = nn.Parameter(torch.FloatTensor(in_channels).to(DEVICE)) # (1)
self.bs = nn.Parameter(torch.FloatTensor(1, num_of_vertices, num_of_vertices).to(DEVICE)) # (1,307, 307)
self.Vs = nn.Parameter(torch.FloatTensor(num_of_vertices, num_of_vertices).to(DEVICE)) # (307, 307)
def reset_parameters(self):
# 初始化方法
for p in self.parameters():
if p.dim() > 1:
nn.init.xavier_uniform_(p) # Xavier 初始化确保权重“恰到好处”
else:
nn.init.uniform_(p)
def forward(self, x):
'''
x(B,N,F,T)–-×W1(T,)–-> (B,N,F) —-×W2(F,T) –->lhs(B,N,T)
x(B,N,F,T)—- 转置 -–> x(B,T,N,F) —-×W3(F,) –->rhs(B,T,N)
product=lhs × rhs:(B,N,N)
'''
lhs = torch.matmul(torch.matmul(x, self.W1), self.W2)
rhs = torch.matmul(self.W3, x).transpose(-1, -2)
product = torch.matmul(lhs, rhs)
S = torch.matmul(self.Vs, torch.sigmoid(product + self.bs))
# normalization
S_normalized = F.softmax(S, dim=1) # (32, 307, 307)
return S_normalized
.parameters()
获得model的parameter的迭代序列,不是真的List通过nn.Parameters()
自动创建的参数往往不是我们所需要的参数,比如数据出现极小值。
进行参数初始化后的参数(如下图)
《Python的torch.nn.Parameter参数初始化》 值得参考与学习
import math
import numpy as np
import torch
import torch.nn as nn
# 参数的初始化
w = torch.nn.Parameter(torch.empty(2, 3))
# torch.nn.init.uniform_(tensor, a=0, b=1)
nn.init.uniform_(w)
# torch.nn.init.normal_(tensor, mean=0, std=1)
nn.init.normal_(w)
# torch.nn.init.constant_(tensor, value)
nn.init.constant_(w, 0.3)
# torch.nn.init.ones_(tensor)
torch.nn.init.ones_(w)
# torch.nn.init.zeros_(tensor)
torch.nn.init.zeros_(w)
# torch.nn.init.eye_(tensor)
nn.init.eye_(w)
# torch.nn.init.xavier_uniform_(tensor, gain=1)
# calculate_gain 返回默认增益值
a = nn.init.calculate_gain('relu')
# 1.414
nn.init.xavier_uniform_(w, gain=a)
# torch.nn.init.xavier_normal_(tensor, gain=1)
nn.init.xavier_normal_(w) # 默认gain=1
# torch.nn.init.kaiming_uniform_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu')
nn.init.kaiming_uniform_(w, mode='fan_in', nonlinearity='relu')
# torch.nn.init.kaiming_normal_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu')
nn.init.kaiming_normal_(w, mode='fan_out', nonlinearity='relu')
# torch.nn.init.orthogonal_(tensor, gain=1)
nn.init.orthogonal_(w)
# torch.nn.init.sparse_(tensor, sparsity, std=0.01)
# 非零元素采用正态分布 N(0, 0.01) 初始化.
nn.init.sparse_(w, sparsity=0.1)
# 13. Dirac delta 函数初始化,仅适用于 {3, 4, 5} 维的 torch.Tensor
# torch.nn.init.dirac_(tensor)
b = torch.empty(3, 16, 5, 5) # 是根据输入的size信息生成张量的方法,不需要主动初始化,其dtype默认torch.folat32
nn.init.dirac_(b)
c = torch.tensor(np.array([1, 2, 3.])) # torch.tensor()则是复制输入的数据再生成张量的方法,其dtype默认输入数据的dtype
# 举例
# 1 一层一层单独初始化
# conv 与 bn
conv = nn.Conv2d(1, 3, kernel_size=1)
# init.kaiming_uniform_(self.weight, a=math.sqrt(5)) ## nn.Conv2d()的weight的默认的初始化方式
# fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
# bound = 1 / math.sqrt(fan_in)
# init.uniform_(self.bias, -bound, bound) ## nn.Conv2d()的bias的默认的初始化方式
nn.init.kaiming_normal_(conv.weight, mode='fan_in')
nn.init.constant_(conv.bias, 0.)
bn = nn.BatchNorm2d(3),
# init.ones_(self.weight) ## nn.BatchNorm2d()的weight的默认的初始化方式
# init.zeros_(self.bias) ## nn.BatchNorm2d()的bias的默认的初始化方式
nn.init.normal_(bn[0].weight, mean=1., std=0.02)
nn.init.constant_(bn[0].bias, 0.)
1 .举例1: 见开头的案例中方式,并不会自动初始化。如果想要model在实例化的时候自动初始化。则在__init__()
添加一条语句 self.reset_parameters()
。
查看结果
class DoubleConv(nn.Module):
def __init__(self, in_channels, out_channels, mid_channels=None):
super().__init__()
if not mid_channels:
mid_channels = out_channels
self.double_conv = nn.Sequential(
nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(mid_channels),
nn.ReLU(inplace=True),
nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
)
self.initialize_weights()
def initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def forward(self, x):
return self.double_conv(x)
dc = DoubleConv(1, 4, 2)