import pandas as pd
import torch
import torch.nn as nn
import numpy as np
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data.dataloader import default_collate
from torchvision.datasets import ImageFolder
from torchvision.datasets.folder import default_loader
class SimpleCNN(nn.Module):
def __init__(self):
super(SimpleCNN, self).__init__()
layer1 = nn.Sequential()
layer1.add_module('conv1', nn.Conv2d(3, 32, 3, 1, padding=1))
layer1.add_module('relu1', nn.ReLU(True))
layer1.add_module('pool1', nn.MaxPool2d(2, 2))
self.layer1 = layer1
layer2 = nn.Sequential()
layer2.add_module('conv2', nn.Conv2d(32, 64, 3, 1, padding=1))
layer2.add_module('relu2', nn.ReLU(True))
layer2.add_module('pool', nn.MaxPool2d(2, 2))
self.layer2 = layer2
layer3 = nn.Sequential()
layer3.add_module('conv3', nn.Conv2d(64, 128, 3, 1, padding=1))
layer3.add_module('relu3', nn.ReLU(True))
layer3.add_module('pool3', nn.MaxPool2d(2, 2))
self.layer3 = layer3
layer4 = nn.Sequential()
layer4.add_module('fc1', nn.Linear(2048, 512))
layer4.add_module('fc_relu', nn.ReLU(True))
layer4.add_module('fc2', nn.Linear(512, 64))
layer4.add_module('fc_relu2', nn.ReLU(True))
layer4.add_module('fc3', nn.Linear(64, 10))
self.layer4 = layer4
def forward(self, x):
conv1 = self.layer1(x)
conv2 = self.layer(conv1)
conv3 = self.layer3(conv2)
fc_input = conv3.view(conv3.size(0), -1)
fc_out = self.layer4(fc_input)
return fc_out
model = SimpleCNN()
print(model)
new_model = nn.Sequential(*list(model.children())[:2])
print(new_model)
for param in model.named_parameters():
print(param[0])
'''
总共有七层,两层卷积和两层池化交替出现
整体结构是
input layer
convulational layer
pooling layer
activation function
convulational layer
pooling layer
activation function
convulational layer
full connect layer
full connector layer
output layer
1. input 层
输入为 32 * 32 pixel 的图像
2. C1 层
C1 层为卷积层,kernel size = 5 * 5, 步长为1,无填充,生成 6 个 feature map.
无填充,所以生成的 feature map 的长和宽为 28 * 28 ((32 - 5 + 0 * 2) / 1 + 1 = 28)
参数个数为 (5 * 5 + 1) * 6 = 156, 其中 5 * 5 对应 kernel size, 1 对应 bias, 6 是
feature map 的个数
连接数 156 * 28 * 28 = 122304
3. S2 层
S2 层为降采样层,kernel size 为 2 * 2,长和宽的步长都为2,无填充
S2 层其实相当于降采样 + 激活层,先是降采样,然后激活函数 sigmoid 非线性输出。
新生成的 feature map 的大小为 (28 /2) * (28 / 2) = 14 * 14
4. C3 层
C3 层为卷积层,kernel size 为 5 * 5,步长为1,生成 16 个 feature map,
5. S4 层
S4 层为降维采样,此层配置如同 S2 层,kernel size 为 2 * 2,长和宽的步长均为 2,无填充
新生成的 feature map 为 5 * 5
6. C5 层
C5 层为卷积层, kernel size 为 5 * 5,步长为 1,无填充,全连接生成 120 个 feature map
C5 层可以理解成两层,第一层是卷积层,第二层是全连接层
7. F6 层
F6 层是全连接层
8. output层
'''
class Lenet(nn.Module):
def __init__(self):
super(Lenet, self).__init__()
layer1 = nn.Sequential()
layer1.add_module('conv1', nn.Conv2d(1, 6, 5))
layer1.add_module('pool1', nn.MaxPool2d(2, 2))
self.layer1 = layer1
layer2 = nn.Sequential()
layer2.add_module('conv2', nn.Conv2d(6, 16, 5))
layer2.add_module('pool2', nn.MaxPool2d(2, 2))
self.layer2 = layer2
layer3 = nn.Sequential()
layer3.add_module('fc1', nn.Linear(400, 120))
layer3.add_module('fc2', nn.Linear(120, 84))
layer3.add_module('fc3', nn.Linear(84, 10))
self.layer3 = layer3
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = x.view(x.size(0), -1)
x = self.layer3(x)
return x
model_LeNet = Lenet()
print(model_LeNet)
class AlexNet(nn.Module):
def __init__(self, num_classes):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size = 11, stride = 4, padding = 2),
nn.ReLU(inplace = True),
nn.MaxPool2d(kernel_size = 3, stride = 2),
nn.Conv2d(64, 192, kernel_size = 5, padding = 2),
nn.ReLU(inplace = True),
nn.MaxPool2d(kernel_size = 3, stride = 2),
nn.Conv2d(192, 384, kernel_size = 3, padding = 1),
nn.ReLU(inplace = True),
nn.Conv2d(384, 256, kernel_size = 3, padding = 1),
nn.ReLU(inplace = True),
nn.Conv2d(256, 256, kernel_size = 3, padding = 1),
nn.ReLU(inplace = True),
nn.MaxPool2d(kernel_size = 3, stride = 2)
)
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace = True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace = True),
nn.Linear(4096, num_classes)
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), 256 * 6 * 6)
x = self.classifier(x)
return x
model_AlexNet = AlexNet(1000)
print(model_AlexNet)
class BasicConv2d(nn.Module):
def __init__(self, in_channels, out_channels, **kwargs):
super(BasicConv2d, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, bias = False, **kwargs)
self.bn = nn.BatchNorm2d(out_channels, eps = 0.001)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
return nn.ReLU(x, inplace = True)
class Inception(nn.Module):
def __init__(self, in_channels, pool_features):
super(Inception, self).__init__()
self.branch1x1 = BasicConv2d(in_channels, 64, kernel_size = 1)
self.branch5x5_1 = BasicConv2d(in_channels, 48, kernel_size = 1)
self.branch5x5_2 = BasicConv2d(48, 64, kernel_size = 2)
self.branch3x3dbl_1 = BasicConv2d(in_channels, 64, kernel_size = 1)
self.branch3x3dbl_2 = BasicConv2d(64, 96, kernel_size = 3, padding = 1)
self.branch3x3dbl_3 = BasicConv2d(96, 96, kernel_size = 3, padding = 1)
self.branch_pool = BasicConv2d(in_channels, pool_features, kernel_size = 1)
def forward(self, x):
branch1x1 = self.branch1x1(x)
branch5x5 = self.branch5x5_1(x)
branch5x5 = self.branch5x5_2(branch5x5)
branch3x3dbl = self.branch3x3dbl_1(x)
branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
branch_pool = nn.AvgPool2d(x, kernel_size = 3, stride = 1, padding = 1)
branch_pool = self.branch_pool(branch_pool)
outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
return torch.cat(outputs, 1)
model_googlenet = Inception(100, 200)
print(model_googlenet)
def conv3x3(in_planes, out_planes, stride = 1):
'''3x3 convolution with padding'''
return nn.Conv2d(
in_planes,
out_planes,
kernel_size = 3,
stride = stride,
padding = 1,
bias = False
)
class BasicBlock(nn.Module):
def __init__(self, inplanes, planes, stride = 1, downsample = None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace = True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
'''
图像增强的方法:
torchvision.transforms 包括所有图像增强的方法
Scale 对图片的尺度进行缩小或放大
CenterCrop 对图像正中心进行给定大小的裁剪
RandomCrop 对图片进行给定大小的随机裁剪
RandomHorizaontalFlip 对图片进行概率为 0.5 的随机水平翻转
RandomSizeCrop 首先对图片进行随机尺寸的此裁剪,然后对裁剪的图片进行一个随机比例的缩放
最后将图片变成给定的大小
Pad 对图片进行边界零填充
'''
'''
CIFAR10 数据集有 60000 张图片,每张图片的大小都是 32 X 32 的三通道的彩色图,一共是
10 中类别,每种类别有 6000 张图片
'''