目录:
1、基于crnn图像序列预测-pytorch代码实现——加载自己的数据集
2、基于crnn图像序列预测-pytorch代码实现——模型介绍
3、基于crnn图像序列预测-pytorch代码实现——训练过程及常见错误
在这里介绍卷积循环网络模型,包括简单的一层卷积层的conv1_LSTM,以及卷积部分迁移学习VGG和RESNET模型的VGG_LSTM,RESNET_LSTM。
关键在于层与层之间的size match,将卷积部分提取的特征作为LSTM输入的seq_len。需要注意的是,VGG网络特征层输出的尺寸都是512 * 7 * 7,所以LSTM的input_size=512 * 7 * 7
CONV_LSTM代码如下:
class ConvLSTM(nn.Module):
def __init__(self, lstm_hidden_size=256, num_lstm_layers=1, bidirectional=True):
super(ConvLSTM, self).__init__()
self.num_directions = 2 if bidirectional else 1
self.num_lstm_layers = num_lstm_layers
self.lstm_hidden_size = lstm_hidden_size
# [B, 3, 224, 224] -> [B, 16, 112, 112]
self.conv1 = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.conv2 = nn.Sequential(nn.Conv2d(16, 32, 3, 1, 1), nn.ReLU(), nn.MaxPool2d(2), ) #[B, 32, 56, 112]
self.lstm1 = nn.LSTM(input_size=16 * 112 * 112,
hidden_size=lstm_hidden_size,
num_layers=num_lstm_layers,
batch_first=True,
dropout=0.5,
bidirectional=True) # [B, 112, lstm_hidden_size]
self.linear1 = nn.Sequential(nn.Linear(lstm_hidden_size * self.num_directions * num_lstm_layers, 64),
nn.ReLU(inplace=True))
self.output_layer = nn.Linear(64, 3)
def init_hidden(self, x):
batch_size = x.size(0)
h = x.data.new(
self.num_directions * self.num_lstm_layers, batch_size, self.lstm_hidden_size).zero_()
c = x.data.new(
self.num_directions * self.num_lstm_layers, batch_size, self.lstm_hidden_size).zero_()
return Variable(h).cuda(), Variable(c).cuda()
def forward(self, x):
# x shape: [B, 9, 3, 224, 224]
B = x.size(0)
x = x.view(B * 9, 3, 224, 224)
output = self.conv1(x) # [B*9, 16, 112, 112]
output = output.view(B * 9, -1).transpose(0, 1).contiguous().view(16 * 112 * 112, B, 9)
output = output.permute(1, 2, 0) # -> [B, 9, 16*112*112]
h, c = self.init_hidden(output)
output, (h, c) = self.lstm1(output, (h, c)) # h: (num_layers * num_directions, batch, lstm_hidden_size)
h = h.transpose_(0, 1).contiguous().view(B, -1) # -> [B, num_layers * num_directions*lstm_hidden_size]
output = self.linear1(h) # [B, 64]
output = self.output_layer(output) # [B, 3]
return output
VGG_LSTM代码如下:
class VGG_LSTM(nn.Module):
def __init__(self, lstm_hidden_size=256, num_lstm_layers=1, bidirectional=True):
super(VGG_LSTM, self).__init__()
net = models.vgg16(pretrained=True)
net.classifier = nn.Sequential()
self.num_directions = 2 if bidirectional else 1
self.num_lstm_layers = num_lstm_layers
self.lstm_hidden_size = lstm_hidden_size
# [B, 3, 224, 224] -> [B, 512, 7, 7]
self.features = net
self.lstm1 = nn.LSTM(input_size=512 * 7 * 7,
hidden_size=lstm_hidden_size,
num_layers=num_lstm_layers,
batch_first=True,
dropout=0.5,
bidirectional=bidirectional) # [B, 7, lstm_hidden_size]
self.linear1 = nn.Sequential(nn.Linear(lstm_hidden_size * self.num_directions * num_lstm_layers, 64),
nn.ReLU(inplace=True))
self.output_layer = nn.Linear(64, 3)
def init_hidden(self, x):
batch_size = x.size(0)
h = x.data.new(
self.num_directions * self.num_lstm_layers, batch_size, self.lstm_hidden_size).zero_()
c = x.data.new(
self.num_directions * self.num_lstm_layers, batch_size, self.lstm_hidden_size).zero_()
return Variable(h).cuda(), Variable(c).cuda()
def forward(self, x):
# x shape: [B, 9, 3, 224, 224]
B = x.size(0)
x = x.view(B * 9, 3, 224, 224)
output = self.features(x) # [B*9, 512, 7, 7]
output = output.view(B * 9, -1).transpose(0, 1).contiguous().view(512 * 7 * 7, B, 9)
output = output.permute(1, 2, 0) # -> [B, 9, 512*7*7]
h, c = self.init_hidden(output)
output, (h, c) = self.lstm1(output, (h, c)) # h: (num_layers * num_directions, batch, lstm_hidden_size)
h = h.transpose_(0, 1).contiguous().view(B, -1) # -> [B, num_layers * num_directions*lstm_hidden_size]
output = self.linear1(h) # [B, 64]
output = self.output_layer(output) # [B, 3]
return output
RESNET_LSTM等,就只需改下代码 net = models.resnet8(pretrained=True),同样pytorch已有的模型都能迁移过来。