torch.nn
class torcn.nn.Parameter()
Parameters
是Variable
的子类。
参数说明:
- data(Tensor)->parameter tensor
- requires_grad(bool,optional)
Containers:
class torch.nn.Module
所有神经网络模块的基类
你的模型也应该继承这个类
Modules
还可以包含其他模块,允许将它们嵌套在树结构中.您可以将子模块分配为常规属性:
#导入nn包
import torch.nn as nn
#导入nn包的函数模块
import torch.nn.functional as F
class Model(nn.Module):
def __init__(self):
#调用父类构造方法
super(Model,self).__init__()
#super(nn.Module,self).__init__()
#nn.Module.__init__()
#调用父类中的卷积方法
self.conv1 = nn.Conv2d(1,20,5)
self.conv2 = nn.Conv2d(20,20,5)
def forward(self,x):
# 前向传播使用relu激活函数
x = F.relu(self.conv1(x))
return F.relu(self.conv2(x))
以这种方式分配的子模块将被注册,并且在调用
.cuda()
时候会转换GPU参数
import torch.nn as nn
class Model(nn.Module):
def __init__(self):
#super(Model,self).__init__()
nn.Module.__init__(self)
self.add_module("conv",nn.Conv2d(10,20,4))
#self.conv = nn.Conv2d(10,20,4)和上面增加module的方式等价
model = Model()
print(model.conv)
Conv2d(10, 20, kernel_size=(4, 4), stride=(1, 1))
CLASS torch.nn.Linear(in_feature,out_feature,bias=True)
此为线性回归公式,也为DNN网络的输出公式,或全连接层的连接公式
参数解释:
- in_feature:假设输入尺寸为[rowsize,columsize]则其为columsize
- out_feature:输出尺寸大小或者下一层神经元的个数
L = nn.Linear(2,2)
print(L.weight)
print(L.bias.shape)
Parameter containing:
tensor([[-0.2078, -0.3180],
[ 0.1222, 0.5744]], requires_grad=True)
torch.Size([2])
import torch
x = torch.randn(128, 20) # 输入的维度是(128,20)
m = torch.nn.Linear(20, 30) # 20,30是指维度
c = m(x)
print(m.weight.shape)
print(c.shape)
print(m.bias.shape)
#print(c.bias.shape)
#print(m.shape)
print(c)
#语法错误n = torch.nn.Linear(20,torch.tensor([20,20]))
#print(n.bias.shape)
torch.Size([30, 20])
torch.Size([128, 30])
torch.Size([30])
tensor([[-0.9075, 0.4658, -0.6769, ..., 0.8770, 0.1963, -0.1634],
[ 0.3249, -0.4842, 0.5415, ..., -0.6922, 0.3830, -0.4454],
[ 0.2934, -0.3098, 0.8307, ..., -0.2225, 0.1665, -0.7599],
...,
[ 1.3914, -0.4900, 1.7351, ..., -0.4768, 0.1307, -2.4975],
[-0.3248, 0.1314, 0.1383, ..., -0.5789, -0.3377, -1.0821],
[ 1.0866, 0.2304, 1.0595, ..., -0.0817, -0.9459, -0.2057]],
grad_fn=)
apply(function)
递归调用函数应用到每个组成部分以及本身,典型应用为初始化模型参数
def init_weights(m):
print(m)
if type(m) == nn.Linear:
m.weight.data.fill_(1.0)
print(m.weight)
# Sequential中的单元按顺序执行
net = nn.Sequential(nn.Linear(2,2),nn.Linear(2,2))
net.apply(init_weights)
Linear(in_features=2, out_features=2, bias=True)
Parameter containing:
tensor([[1., 1.],
[1., 1.]], requires_grad=True)
Linear(in_features=2, out_features=2, bias=True)
Parameter containing:
tensor([[1., 1.],
[1., 1.]], requires_grad=True)
Sequential(
(0): Linear(in_features=2, out_features=2, bias=True)
(1): Linear(in_features=2, out_features=2, bias=True)
)
Sequential(
(0): Linear(in_features=2, out_features=2, bias=True)
(1): Linear(in_features=2, out_features=2, bias=True)
)
modules
返回所有模块的迭代器包括自身重复的模块只迭代一次
l = nn.Linear(2,2)
net = nn.Sequential(l,l)
#enumerate进行dict处理
for index,m in enumerate(net.modules()):
print(index,'->',m)
0 -> Sequential(
(0): Linear(in_features=2, out_features=2, bias=True)
(1): Linear(in_features=2, out_features=2, bias=True)
)
1 -> Linear(in_features=2, out_features=2, bias=True)
named_children()->SonModuleIteration
返回子模块迭代器
for name,module in model.named_children():
#if name in['conv2','conv2']:
print(module)
print(model.named_children())
Conv2d(10, 20, kernel_size=(4, 4), stride=(1, 1))
named_modules()
返回网络中所有模块迭代器,同时产生模块的名称以及模块本身
重复的模块只返回一次
l = nn.Linear(2,2)
net = nn.Sequential(l,l)
for index,m in enumerate(net.named_modules()):
print(index,'->',m)
0 -> ('', Sequential(
(0): Linear(in_features=2, out_features=2, bias=True)
(1): Linear(in_features=2, out_features=2, bias=True)
))
1 -> ('0', Linear(in_features=2, out_features=2, bias=True))
named_parameters(memo=None,prefix)
返回模块参数的迭代器,同时产生参数的名称和参数本身
for name,param in l.named_parameters():
print(name,'->',param)
weight -> Parameter containing:
tensor([[ 0.3842, 0.3218],
[ 0.1494, -0.4355]], requires_grad=True)
bias -> Parameter containing:
tensor([0.3653, 0.6474], requires_grad=True)
parameters()->ModuleParamIteration
这通常被传递个优化器
for param in l.parameters():
print(type(param.data),param.size())
torch.Size([2, 2])
torch.Size([2])
register_backward_hook(hook)
在模块上注册一个向后的钩子这是用于记录反向传播时的梯度的
每当计算相对于模块输入的梯度时,将调用该钩子挂钩应
具有一下签名:
hook(module,grad_input,grad_output)->Variable or None
|-如果module如果module有多个输入输出的话,那么grad_input grad_output将会是个tuple。 hook不应该修改它的arguments,但是它可以选择性的返回关于输入的梯度,这个返回的梯度在后续的计算中会替代grad_input。
这个函数返回一个句柄(handle)。它有一个方法 handle.remove(),可以用这个方法将hook从module移除
register_forward_hook(hook)
在模块上注册一个forward hook,这是记录前向传播时的梯度的
hook(module,grad_input,grad_output)->Variable or None
class torch.nn.Sequential(* args)
这是一个时序容器.Modules
会以他们传入的顺序添加到容器中当然也可以传入
一个顺序字典
model = nn.Sequential(nn.Conv2d(1,20,5),
nn.ReLU(),
nn.Conv2d(20,64,5),
nn.ReLU()
)
# 传入一个容器
model = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(1,20,5)),
('relu1', nn.ReLU()),
('conv2', nn.Conv2d(20,64,5)),
('relu2', nn.ReLU())
]))
class torch.nn.ModuleList(modules=None)
将submodules
保存在一个列表中
- modules(list,optional):要添加的模块列表
class MyModule(nn.Module):
def __init__(self):
super(MyModule, self).__init__()
self.linears = nn.ModuleList([nn.Linear(10, 10) for i in range(10)])
def forward(self, x):
# ModuleList can act as an iterable, or be indexed using ints
for i, l in enumerate(self.linears):
x = self.linears[i // 2](x) + l(x)
return x
append(module)
:追加一个给定模块
extend(modules)
:追加一个模块列表
class torch.nn.ParameterList(parameters=None)
在列表中保存参数
class MyModule(nn.Module):
def __init__(self):
super(MyModule, self).__init__()
self.params = nn.ParameterList([nn.Parameter(torch.randn(10, 10)) for i in range(10)])
print(self.params)
def forward(self, x):
# ModuleList can act as an iterable, or be indexed using ints
for i, p in enumerate(self.params):
x = self.params[i // 2].mm(x) + p.mm(x)
return x
Lmodule = MyModule()
ParameterList(
(0): Parameter containing: [torch.FloatTensor of size 10x10]
(1): Parameter containing: [torch.FloatTensor of size 10x10]
(2): Parameter containing: [torch.FloatTensor of size 10x10]
(3): Parameter containing: [torch.FloatTensor of size 10x10]
(4): Parameter containing: [torch.FloatTensor of size 10x10]
(5): Parameter containing: [torch.FloatTensor of size 10x10]
(6): Parameter containing: [torch.FloatTensor of size 10x10]
(7): Parameter containing: [torch.FloatTensor of size 10x10]
(8): Parameter containing: [torch.FloatTensor of size 10x10]
(9): Parameter containing: [torch.FloatTensor of size 10x10]
)
class torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
二维卷积:
输入形式input[ batch_size, channels, height_1, width_1 ]
- batch_size 一个batch中样例的个数
- channels 通道数,也就是当前层的深度
- height_1, 图片的高
- width_1, 图片的宽
Conv2d参数:
- in_channels:输入通道数
- out_channels:输出feature map数即核的数量
- kerner_size: 核的尺寸大小可以是元组
- stride:卷积移动步长
- padding: 填充数目
- dilation: 相当于卷积上采样填充零dilation相当于卷积元素之间隔的0个数
- groups:
通道分组比如说6个in_channels和6个out_channels(6个核设每个核的大小为3x3)分成两组记为in1,in2(都为三个通道),out1,out2(都为三个通道),(in1,out1)为一组,(in2,out2)为一组,这时候每个out1,out2的核都分别处理in1,in2的三个通道,也就是说out1,out2中的每个核的通道数为3,(这时候的参数为6个核x每个核对应的三个通道滤波器x每个滤波器对应的尺寸(长x宽) = 6x3x3x3这比远啦分成1组的时候6x6x3x3减少了一半的参数,分组的前提是,
in_channels must be divisible by groups
out_channels must be divisible by groups
- bias:有无偏置量
conv = nn.Conv2d(in_channels=6, out_channels=6, kernel_size=1, groups=3)
conv.weight.data.size()
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
in
----> 1 conv = nn.Conv2d(in_channels=2, out_channels=6, kernel_size=1, groups=3)
2 conv.weight.data.size()
D:\Anaconda\envs\pytorch_gpu\lib\site-packages\torch\nn\modules\conv.py in __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode)
325 super(Conv2d, self).__init__(
326 in_channels, out_channels, kernel_size, stride, padding, dilation,
--> 327 False, _pair(0), groups, bias, padding_mode)
328
329 @weak_script_method
D:\Anaconda\envs\pytorch_gpu\lib\site-packages\torch\nn\modules\conv.py in __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, transposed, output_padding, groups, bias, padding_mode)
20 super(_ConvNd, self).__init__()
21 if in_channels % groups != 0:
---> 22 raise ValueError('in_channels must be divisible by groups')
23 if out_channels % groups != 0:
24 raise ValueError('out_channels must be divisible by groups')
ValueError: in_channels must be divisible by groups
conv = nn.Conv2d(in_channels=6, out_channels=3, kernel_size=3, groups=2)
conv.weight.data.size()
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
in
----> 1 conv = nn.Conv2d(in_channels=6, out_channels=3, kernel_size=3, groups=2)
2 conv.weight.data.size()
D:\Anaconda\envs\pytorch_gpu\lib\site-packages\torch\nn\modules\conv.py in __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode)
325 super(Conv2d, self).__init__(
326 in_channels, out_channels, kernel_size, stride, padding, dilation,
--> 327 False, _pair(0), groups, bias, padding_mode)
328
329 @weak_script_method
D:\Anaconda\envs\pytorch_gpu\lib\site-packages\torch\nn\modules\conv.py in __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, transposed, output_padding, groups, bias, padding_mode)
22 raise ValueError('in_channels must be divisible by groups')
23 if out_channels % groups != 0:
---> 24 raise ValueError('out_channels must be divisible by groups')
25 self.in_channels = in_channels
26 self.out_channels = out_channels
ValueError: out_channels must be divisible by groups
conv = nn.Conv2d(in_channels=6, out_channels=6, kernel_size=3, groups=2)
conv.weight.data.size()
torch.Size([6, 3, 3, 3])
conv = nn.Conv2d(in_channels=6, out_channels=6, kernel_size=3, groups=2)
conv.weight.data.size()
torch.Size([6, 3, 3, 3])
conv = nn.Conv2d(in_channels=6, out_channels=6, kernel_size=3, groups=3)
conv.weight.data.size()
torch.Size([6, 2, 3, 3])