from PIL import Image
from torch import nn
from torchvision import transforms
#数据处理
transform = transforms.Compose(
[transforms.ToTensor()], #0-1 tensor
)
image = Image.open("111.jpeg")
print("imagesize:",image.size)
x = transform(image)
print(x.size())
x = x.unsqueeze(0) #expand
print(x.size()) #N C H W
batch_size , channels , height ,weight = x.size()
N_OUT_CHS = 32
KERNEL_SIZE = 5
#卷积
conv2d_nn = nn.Conv2d(
in_channels= channels,
out_channels= N_OUT_CHS,
kernel_size= KERNEL_SIZE,
stride=1,
padding= (KERNEL_SIZE//2 ,KERNEL_SIZE //2), #保证特征图的输入输出大小相同
)
x_out = conv2d_nn(x)
print("conv2d_nn:",x_out.size())
#最大池化
pooling_layer = nn.MaxPool2d(kernel_size=(2,2),stride=2)
x_conv2d_out = conv2d_nn(x)
print(x_conv2d_out.size())
pool_out = pooling_layer(x_conv2d_out)
print("Max_Pooling:",pool_out.size())
#平均池化
pooling_layer = nn.AvgPool2d(kernel_size=(2,2),stride=2)
x_conv2d_out = conv2d_nn(x)
print(x_conv2d_out.size())
pool_out = pooling_layer(x_conv2d_out)
print("Avg_pooling:",pool_out.size())
#自适应
pooling_layer = nn.AdaptiveAvgPool2d(output_size=(178,100))
x_conv2d_out = conv2d_nn(x)
print(x_conv2d_out.size())
pool_out = pooling_layer(x_conv2d_out)
print("AdaptiveAvg_pooling:",pool_out.size())
输出:
imagesize: (1000, 1778)
torch.Size([3, 1778, 1000])
torch.Size([1, 3, 1778, 1000])
conv2d_nn: torch.Size([1, 32, 1778, 1000])
--------------池化--------------
torch.Size([1, 32, 1778, 1000])
Max_Pooling: torch.Size([1, 32, 889, 500])
torch.Size([1, 32, 1778, 1000])
Avg_pooling: torch.Size([1, 32, 889, 500])
torch.Size([1, 32, 1778, 1000])
AdaptiveAvg_pooling: torch.Size([1, 32, 178, 100])
#该函数可以计算模型参数
def model_param_number_calc(model_):
return sum([p.numel() for p in model_.parameters() if p.requires_grad])
#全连接
model_fc = nn.Linear(in_features= 10*10*3 ,out_features= 10*10*30)
print("fc",model_param_number_calc(model_fc)) #打印全连接的参数
输出:
fc 903000
#普通卷积
model_cnn = nn.Conv2d(in_channels=3, #输入通道数
out_channels=30, #输出通道数 卷积核的数量
kernel_size=10, #卷积核大小
stride=1, #步长
padding= (10//2 ,10//2) #保证输入和输出大小相同
)
print("cnn:",model_param_number_calc(model_cnn))
输出:
cnn: 9030
#空洞卷积 增加感受野
model_dilation = nn.Conv2d(in_channels=3,
out_channels=30,
kernel_size=10,
stride=1,
padding= (10//2 ,10//2), #保证输入和输出大小相同
dilation= (2,2), #空洞
)
print("dilation:",model_param_number_calc(model_dilation))
输出:
dilation: 9030
#分组卷积
model_groups = nn.Conv2d(in_channels=3,
out_channels=30,
kernel_size=10,
stride=1,
padding= (10//2 ,10//2), #保证输入和输出大小相同
groups= 3, #分组
)
print("groups:",model_param_number_calc(model_groups))
输出:
groups: 3030
#点卷积 1*1卷积核
model_pointwise = nn.Conv2d(in_channels=3,
out_channels=30,
kernel_size=1, #点卷积
stride=1,
padding= (10//2 ,10//2), #保证输入和输出大小相同
)
print("pointwise:",model_param_number_calc(model_pointwise))
输出:
pointwise: 120
#深度可分离卷积 deep separable conv2d = pointwise + groups
depth_con2d = nn.Conv2d(in_channels=3,out_channels=3,kernel_size=10,groups=3)
point_con2d = nn.Conv2d(in_channels=3,out_channels=30,kernel_size=1)
print("deep_sep:",model_param_number_calc(depth_con2d) + model_param_number_calc(point_con2d))
输出:
deep_sep: 423
#transpose con2d 反卷积 上采样
import torch
transpose_con2d = nn.ConvTranspose2d(in_channels=3,out_channels=30,kernel_size=10)
print(transpose_con2d(torch.randn(size=(1,3,10,10))).size())
输出:
torch.Size([1, 30, 19, 19])
from torch import nn
import torch.nn.functional as F
import torch
class ResidualBlock(nn.Module):
def __init__(self,in_feature):
super(ResidualBlock,self).__init__()
hidden_chs = 128
self.weight_layer1 = nn.Conv2d(in_channels=in_feature,out_channels=hidden_chs,
kernel_size=3,padding=(3//2,3//2))
self.weight_layer2 = nn.Conv2d(in_channels=hidden_chs, out_channels=in_feature,
kernel_size=5,padding=(5 // 2, 5 // 2))
def forward(self,x):
layer1_out = self.weight_layer1(x)
layer1_out = F.relu(layer1_out)
layer2_out = self.weight_layer2(layer1_out)
final_out = layer2_out + x
final_out = F.relu(final_out)
return final_out
输出:
final_res: torch.Size([1, 3, 224, 224])
import torch
from torch import nn
class VGG11(nn.Module):
def __init__(self,in_feature):
super(VGG11,self).__init__()
self.conv2d_layers = nn.Sequential(
nn.Conv2d(in_channels=in_feature,out_channels=64,kernel_size=3,padding=(3//2,3//2)), #1,64,224,224
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2,2),stride=2), #1,64,112,112
nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,padding=(3//2,3//2)),#1,128,112,112
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2), stride=2),#1,128,56,56
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=(3 // 2, 3 // 2)),#1,256,56,56
nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=(3 // 2, 3 // 2)),#1,256,56,56
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2), stride=2),#1,256,28,28
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=(3 // 2, 3 // 2)),#1,512,28,28
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=(3 // 2, 3 // 2)),#1,512,28,28
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2), stride=2), #1,512,14,14
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=(3 // 2, 3 // 2)),#1,512,14,14
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=(3 // 2, 3 // 2)),#1,512,14,14
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2), stride=2),#1,512,7,7
)
self.FC_layers = nn.Sequential(
nn.Linear(in_features=512*7*7,out_features=4096),
nn.ReLU(),
nn.Linear(in_features=4096, out_features=4096),
nn.ReLU(),
nn.Linear(in_features=4096, out_features=1000),
)
def forward(self,x):
out_conv2d = self.conv2d_layers(x)
out_conv2d_flatten = out_conv2d.view(out_conv2d.size(0),-1) #N,512*7*7
out_FC = self.FC_layers(out_conv2d_flatten)
return out_FC
测试:
if __name__ == '__main__':
x = torch.randn(size=(8,3,224,224))
Vgg11 = VGG11(in_feature=3)
ouput = Vgg11(x)
print("model_output:",ouput.size())
输出:
model_output: torch.Size([8, 1000])
import torchvision.transforms as T
from PIL import Image
trans = T.Compose(
[
T.ToTensor(), # 0-1 tonsor
T.RandomAffine(90), # 仿射变化
T.RandomRotation(45), # 随机旋转
T.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225)) #标准化
]
)
image = Image.open("111.jpeg")
print(image)
t_out_image = trans(image)
print(t_out_image.size())
image.show()
input()
T.RandomSizedCrop(400)(image).show()
输出:
(1000, 1778)
torch.Size([3, 1778, 1000])