class Darknet(nn.Module):
def __init__(self, cfgfile):
super(Darknet, self).__init__()
self.blocks = parse_cfg(cfgfile)
self.net_info, self.module_list = create_modules(self.blocks)
def forward(self, x, CUDA):
modules = self.blocks[1:]
outputs = {}
write = 0
for i, module in enumerate(modules):
module_type = (module["type"])
if module_type == "convolutional" or module_type == "upsample":
x = self.module_list[i](x)
elif module_type == "route":
layers = module["layers"]
layers = [int(a) for a in layers]
if layers[0]>0:
layers[0] = layers[0]-i
if len(layers) == 1:
x = outputs[i+layers[0]]
elif layers[1]>0:
map1 = outputs[i+layers[0]]
map2 = outputs[i+layers[1]]
x = torch.cat((map1,map2), 1)
elif module_type == "shortcut":
from_ = int(module["from"])
x = outputs[i-1]+outputs[i+from_]
elif module_type == 'yolo':
anchors = self.module_list[i][0].anchors
inp_dim = int(self.net_info["height"])
num_classes = int(module["classes"])
x = x.data
x = predict_transform(x, inp_dim, anchors, num_classes, CUDA)
if not write:
detections = x
write = 1
detections = torch.cat((detections, x), 1)
outputs[i] = x
return detections
class EmptyLayer(nn.Module):
def __init__(self):
super(EmptyLayer, self).__init__()
class DetectionLayer(nn.Module):
def __init__(self, anchors):
super(DetectionLayer, self).__init__()
self.anchors = anchors
def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA=True):
batch_size = prediction.size(0)
stride = inp_dim // prediction.size(2)
grid_size = grid_size = prediction.size(2)
bbox_attrs = 5 + num_classes
num_anchors = len(anchors)
prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
prediction = prediction.transpose(1,2).contiguous()
prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
# 在此之前将检测特征图按照中心位置和顺序排列好
anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
# 对中心位置的X,Y坐标以及检测物体得分执行sigmoid函数操作,其实也有疑惑为什么需要对xy也进行sigmoid操作
grid = np.arange(grid_size)
a, b = np.meshgrid(grid, grid)
x_offset = torch.FloatTensor(a).view(-1, 1)
y_offset = torch.FloatTensor(b).view(-1, 1)
if CUDA:
x_offset = x_offset.cuda()
y_offset = y_offset.cuda()
x_y_offset = torch.cat((x_offset, y_offset),1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
prediction[:,:,:2] += x_y_offset
anchors = torch.FloatTensor(anchors)
if CUDA:
anchors = anchors.cuda()
anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
prediction[:,:,5:5+num_classes] = torch.sigmoid((prediction[:,:,5:5+num_classes]))
prediction[:,:,:4] *= stride
return prediction
def get_test_input():
img = cv2.imread("C:\\Users\\86177\\Desktop\\dog-cycle-car.png")
img = cv2.resize(img, (608,608)) #Resize to the input dimension
img_ = img[:,:,::-1].transpose((2,0,1)) # BGR -> RGB | H X W C -> C X H X W
img_ = img_[np.newaxis,:,:,:]/255.0 #Add a channel at 0 (for batch) | Normalise
img_ = torch.from_numpy(img_).float().cuda() #Convert to float
img_ = Variable(img_) # Convert to Variable
return img_
if __name__ == "__main__":
model = Darknet("yolo.cfg").cuda()
inp = get_test_input()
pred = model(inp)
值得注意的是,img resize的大小最好与cfg文件中的一致,不过测试了一下不一致好像也没有关系,挠头…可能是卷积神经网络只管通道数吧