pytorch建立mobilenetV3-ssd网络并进行训练与预测方式

Step1：搭建mobilenetV3-ssd网络框架

它的网络原理很简单，就是把传统的ssd算法里面的VGG网络换成了mobilenetV3，其他的都一样。

需要提前准备的函数和类

在真的写网络框架之前，我们需要把网络中需要调用的一些激活函数和卷积块先写好。

先是mobilenetV3需要调用的两个激活函数，一个注意力模型SeModule,和卷积块Block。

class hswish(nn.Module):
    def forward(self, x):
        out = x * F.relu6(x + float(3.0), inplace=True) / float(6.0)
        return out
class hsigmoid(nn.Module):
    def forward(self, x):
        out = F.relu6(x + float(3.0), inplace=True) / float(6.0)
        return out
class SeModule(nn.Module):
    def __init__(self, in_size, reduction=4):
        super(SeModule, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.se = nn.Sequential(
            nn.Conv2d(in_size, in_size // reduction, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(in_size // reduction),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_size // reduction, in_size, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(in_size),
            hsigmoid()
        )
    def forward(self, x):
        return x * self.se(x)
class Block(nn.Module):
    def __init__(self, kernel_size, in_size, expand_size, out_size, nolinear, semodule, stride):
        super(Block, self).__init__()
        self.stride = stride
        self.se = semodule
        self.output_status = False
        if kernel_size == 5 and in_size == 160 and expand_size == 672:
            self.output_status = True
        self.conv1 = nn.Conv2d(in_size, expand_size, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(expand_size)
        self.nolinear1 = nolinear
        self.conv2 = nn.Conv2d(expand_size, expand_size, kernel_size=kernel_size, stride=stride, padding=kernel_size//2, groups=expand_size, bias=False)
        self.bn2 = nn.BatchNorm2d(expand_size)
        self.nolinear2 = nolinear
        self.conv3 = nn.Conv2d(expand_size, out_size, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn3 = nn.BatchNorm2d(out_size)
        self.shortcut = nn.Sequential()
        if stride == 1 and in_size != out_size:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_size, out_size, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(out_size),
            )
    def forward(self, x):
        out = self.nolinear1(self.bn1(self.conv1(x)))
        if self.output_status:
            expand = out
        out = self.nolinear2(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        if self.se != None:
            out = self.se(out)
        out = out + self.shortcut(x) if self.stride==1 else out
	
        if self.output_status:
            return (expand, out)
        return out

然后是ssd网络需要调用的卷积块。

def conv_bn(inp, oup, stride, groups=1, activation=nn.ReLU6):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 3, stride, 1, bias=False, groups=groups),
        nn.BatchNorm2d(oup),
        activation(inplace=True)
    )
def conv_1x1_bn(inp, oup, groups=1, activation=nn.ReLU6):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 1, 1, 0, bias=False, groups=groups),
        nn.BatchNorm2d(oup),
        activation(inplace=True)
    )
    class AuxiliaryConvolutions(nn.Module):
    """
    辅助卷积层
    """
    def __init__(self):
        super(AuxiliaryConvolutions, self).__init__()
        self.extra_convs = []
    
        self.extra_convs.append(conv_1x1_bn(960, 256))
        self.extra_convs.append(conv_bn(256, 256, 2, groups=256))
        self.extra_convs.append(conv_1x1_bn(256, 512, groups=1))
    
        self.extra_convs.append(conv_1x1_bn(512, 128))
        self.extra_convs.append(conv_bn(128, 128, 2, groups=128))
        self.extra_convs.append(conv_1x1_bn(128, 256))
    
        self.extra_convs.append(conv_1x1_bn(256, 128))
        self.extra_convs.append(conv_bn(128, 128, 2, groups=128))
        self.extra_convs.append(conv_1x1_bn(128, 256))
    
        self.extra_convs.append(conv_1x1_bn(256, 64))
        self.extra_convs.append(conv_bn(64, 64, 2, groups=64))
        self.extra_convs.append(conv_1x1_bn(64, 128))
        self.extra_convs = nn.Sequential(*self.extra_convs)
        
        self.init_conv2d()
        
    def init_conv2d(self):
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    init.kaiming_normal_(m.weight, mode='fan_out')
                    if m.bias is not None:
                        init.constant_(m.bias, 0)
                elif isinstance(m, nn.BatchNorm2d):
                    init.constant_(m.weight, 1)
                    init.constant_(m.bias, 0)
                elif isinstance(m, nn.Linear):
                    init.normal_(m.weight, std=0.001)
                    if m.bias is not None:
                        init.constant_(m.bias, 0)
    def forward(self, conv7_feats):
        """
        Forward propagation.
        :param conv7_feats: lower-level conv7 feature map
        :return: higher-level feature maps conv8_2, conv9_2, conv10_2, and conv11_2
        """
 
        outs = []
        out=conv7_feats
        for i, conv in enumerate(self.extra_convs):   
            out = conv(out)
            if i % 3 == 2:
                outs.append(out)
                
        conv8_2_feats=outs[0]
        conv9_2_feats=outs[1]
        conv10_2_feats=outs[2]
        conv11_2_feats=outs[3]
        return conv8_2_feats, conv9_2_feats, conv10_2_feats, conv11_2_feats
class PredictionConvolutions(nn.Module): 
    def __init__(self, n_classes):
        """
        预测卷积层
        """
        super(PredictionConvolutions, self).__init__()
        self.n_classes = n_classes
        
        n_boxes = {'conv4_3': 4,
                   'conv7': 6,
                   'conv8_2': 6,
                   'conv9_2': 6,
                   'conv10_2': 6,
                   'conv11_2': 6}
        
        
        
        input_channels=[672, 960, 512, 256, 256, 128]
        self.loc_conv4_3 = nn.Conv2d(input_channels[0], n_boxes['conv4_3'] * 4, kernel_size=3, padding=1)
        self.loc_conv7 = nn.Conv2d(input_channels[1], n_boxes['conv7'] * 4, kernel_size=3, padding=1)
        self.loc_conv8_2 = nn.Conv2d(input_channels[2], n_boxes['conv8_2'] * 4, kernel_size=3, padding=1)
        self.loc_conv9_2 = nn.Conv2d(input_channels[3], n_boxes['conv9_2'] * 4, kernel_size=3, padding=1)
        self.loc_conv10_2 = nn.Conv2d(input_channels[4], n_boxes['conv10_2'] * 4, kernel_size=3, padding=1)
        self.loc_conv11_2 = nn.Conv2d(input_channels[5], n_boxes['conv11_2'] * 4, kernel_size=3, padding=1)
        
        self.cl_conv4_3 = nn.Conv2d(input_channels[0], n_boxes['conv4_3'] * n_classes, kernel_size=3, padding=1)
        self.cl_conv7 = nn.Conv2d(input_channels[1], n_boxes['conv7'] * n_classes, kernel_size=3, padding=1)
        self.cl_conv8_2 = nn.Conv2d(input_channels[2], n_boxes['conv8_2'] * n_classes, kernel_size=3, padding=1)
        self.cl_conv9_2 = nn.Conv2d(input_channels[3], n_boxes['conv9_2'] * n_classes, kernel_size=3, padding=1)
        self.cl_conv10_2 = nn.Conv2d(input_channels[4], n_boxes['conv10_2'] * n_classes, kernel_size=3, padding=1)
        self.cl_conv11_2 = nn.Conv2d(input_channels[5], n_boxes['conv11_2'] * n_classes, kernel_size=3, padding=1)
        
        self.init_conv2d()
    def init_conv2d(self):
        """
        Initialize convolution parameters.
        """
        for c in self.children():
            if isinstance(c, nn.Conv2d):
                nn.init.xavier_uniform_(c.weight)
                nn.init.constant_(c.bias, 0.)
    def forward(self, conv4_3_feats, conv7_feats, conv8_2_feats, conv9_2_feats, conv10_2_feats, conv11_2_feats):

        batch_size = conv4_3_feats.size(0)
        
        l_conv4_3 = self.loc_conv4_3(conv4_3_feats)  
        l_conv4_3 = l_conv4_3.permute(0, 2, 3,
                                      1).contiguous()  
        
        l_conv4_3 = l_conv4_3.view(batch_size, -1, 4)  
        l_conv7 = self.loc_conv7(conv7_feats)  
        l_conv7 = l_conv7.permute(0, 2, 3, 1).contiguous()  
        l_conv7 = l_conv7.view(batch_size, -1, 4)  
        l_conv8_2 = self.loc_conv8_2(conv8_2_feats)  
        l_conv8_2 = l_conv8_2.permute(0, 2, 3, 1).contiguous()  
        l_conv8_2 = l_conv8_2.view(batch_size, -1, 4)  
        l_conv9_2 = self.loc_conv9_2(conv9_2_feats)  
        l_conv9_2 = l_conv9_2.permute(0, 2, 3, 1).contiguous()  
        l_conv9_2 = l_conv9_2.view(batch_size, -1, 4)  
        l_conv10_2 = self.loc_conv10_2(conv10_2_feats)  
        l_conv10_2 = l_conv10_2.permute(0, 2, 3, 1).contiguous()  
        l_conv10_2 = l_conv10_2.view(batch_size, -1, 4)  
        l_conv11_2 = self.loc_conv11_2(conv11_2_feats)  
        l_conv11_2 = l_conv11_2.permute(0, 2, 3, 1).contiguous()  
        l_conv11_2 = l_conv11_2.view(batch_size, -1, 4)  
        
        c_conv4_3 = self.cl_conv4_3(conv4_3_feats)  
        c_conv4_3 = c_conv4_3.permute(0, 2, 3, 1).contiguous()  
        c_conv4_3 = c_conv4_3.view(batch_size, -1,self.n_classes)  
        c_conv7 = self.cl_conv7(conv7_feats)  
        c_conv7 = c_conv7.permute(0, 2, 3, 1).contiguous()  
        c_conv7 = c_conv7.view(batch_size, -1,self.n_classes)  
        c_conv8_2 = self.cl_conv8_2(conv8_2_feats)  
        c_conv8_2 = c_conv8_2.permute(0, 2, 3, 1).contiguous()  
        c_conv8_2 = c_conv8_2.view(batch_size, -1, self.n_classes)  
        c_conv9_2 = self.cl_conv9_2(conv9_2_feats)  
        c_conv9_2 = c_conv9_2.permute(0, 2, 3, 1).contiguous()  
        c_conv9_2 = c_conv9_2.view(batch_size, -1, self.n_classes)  
        c_conv10_2 = self.cl_conv10_2(conv10_2_feats)  
        c_conv10_2 = c_conv10_2.permute(0, 2, 3, 1).contiguous()  
        c_conv10_2 = c_conv10_2.view(batch_size, -1, self.n_classes)  
        c_conv11_2 = self.cl_conv11_2(conv11_2_feats)  
        c_conv11_2 = c_conv11_2.permute(0, 2, 3, 1).contiguous()  
        c_conv11_2 = c_conv11_2.view(batch_size, -1, self.n_classes)  
        
        
        locs = torch.cat([l_conv4_3, l_conv7, l_conv8_2, l_conv9_2, l_conv10_2, l_conv11_2], dim=1)  
        classes_scores = torch.cat([c_conv4_3, c_conv7, c_conv8_2, c_conv9_2, c_conv10_2, c_conv11_2],dim=1)  
        return locs, classes_scores

mobilenetV3_large

class MobileNetV3_Large(nn.Module):
    def __init__(self, num_classes=1000):
        super(MobileNetV3_Large, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
        
        self.bn1 = nn.BatchNorm2d(16)
        self.hs1 = hswish()
        self.bneck = nn.Sequential(
            Block(3, 16, 16, 16, nn.ReLU(inplace=True), None, 1),
            Block(3, 16, 64, 24, nn.ReLU(inplace=True), None, 2),
            Block(3, 24, 72, 24, nn.ReLU(inplace=True), None, 1),
            Block(5, 24, 72, 40, nn.ReLU(inplace=True), SeModule(40), 2),
            Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
            Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
            Block(3, 40, 240, 80, hswish(), None, 2),
            Block(3, 80, 200, 80, hswish(), None, 1),
            Block(3, 80, 184, 80, hswish(), None, 1),
            Block(3, 80, 184, 80, hswish(), None, 1),
            Block(3, 80, 480, 112, hswish(), SeModule(112), 1),
            Block(3, 112, 672, 112, hswish(), SeModule(112), 1),
            Block(5, 112, 672, 160, hswish(), SeModule(160), 1),
            Block(5, 160, 672, 160, hswish(), SeModule(160), 2),
            Block(5, 160, 960, 160, hswish(), SeModule(160), 1),
        )
        self.conv2 = nn.Conv2d(160, 960, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(960)
        self.hs2 = hswish()
        self.linear3 = nn.Linear(960, 1280)
        self.bn3 = nn.BatchNorm1d(1280)
        self.hs3 = hswish()
        self.linear4 = nn.Linear(1280, 1000)
        self.init_weights() #这个是加载预训练权值或初始化权值
        
    # def load_pretrained_layers(self,pretrained):
    #     pretrained_state_dict = torch.load(pretrained)      
    #     self.load_state_dict(pretrained_state_dict)
    #     for param in self.parameters():
    #          param.requires_grad = False
    #     print("\nLoaded base model.\n") 	

    def init_weights(self, pretrained=None):#如果不用预训练权值，把pretrained设为None就行
        if isinstance(pretrained, str): #判断一个对象是否是一个已知类型
            checkpoint = torch.load(pretrained,map_location='cpu') ["state_dict"] 
            self.load_state_dict(checkpoint,strict=False)
            for param in self.parameters():
                param.requires_grad = True # to be or not to be

            #  also load module  
            # if isinstance(checkpoint, OrderedDict):
            #     state_dict = checkpoint
            # elif isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
            #     state_dict = checkpoint['state_dict']
            # else:
            #     print("No state_dict found in checkpoint file")

            # if list(state_dict.keys())[0].startswith('module.'):
            #     state_dict = {k[7:]: v for k, v in checkpoint['state_dict'].items()}
            # # load state_dict
            # if hasattr(self, 'module'):
            #     self.module.load_state_dict( state_dict,strict=False)
            # else:
            #     self.load_state_dict(state_dict,strict=False)    




            print("\nLoaded base model.\n")

        elif pretrained is None:
            print("\nNo loaded base model.\n")
            for m in self.modules():  #self.modules()里面存储了net的所有模块。
                if isinstance(m, nn.Conv2d):
                    init.kaiming_normal_(m.weight, mode='fan_out') #用kaiming正态分布进行初始化。
                    if m.bias is not None:
                        init.constant_(m.bias, 0)
                elif isinstance(m, nn.BatchNorm2d):
                    init.constant_(m.weight, 1)
                    init.constant_(m.bias, 0)
                elif isinstance(m, nn.Linear):
                    init.normal_(m.weight, std=0.001)
                    if m.bias is not None:
                        init.constant_(m.bias, 0)
    def forward(self, x):
        out = self.hs1(self.bn1(self.conv1(x)))
        for i, block in enumerate(self.bneck):
            out = block(out)
            if isinstance(out, tuple):
                conv4_3_feats =out[0] 
                out = out[1]
        out = self.hs2(self.bn2(self.conv2(out)))
        conv7_feats=out
        return conv4_3_feats,conv7_feats

调用mobilenetV3的ssd网络

class SSD300(nn.Module):
    """
    The SSD300 network - encapsulates the base MobileNet network, auxiliary, and prediction convolutions.
    """
    def __init__(self, n_classes):
        super(SSD300, self).__init__()
        self.n_classes = n_classes
        self.base = MobileNetV3_Large(num_classes=self.n_classes)
        self.aux_convs = AuxiliaryConvolutions()
        self.pred_convs = PredictionConvolutions(n_classes)
      
        self.rescale_factors = nn.Parameter(torch.FloatTensor(1, 672, 1, 1))  
        nn.init.constant_(self.rescale_factors, 20)
              
        self.priors_cxcy = self.create_prior_boxes() #这是在初始化先验框？

    def forward(self, image):
      
        conv4_3_feats, conv7_feats = self.base(image)  
          
        norm = conv4_3_feats.pow(2).sum(dim=1, keepdim=True).sqrt()+1e-10  
        conv4_3_feats = conv4_3_feats / norm  
        conv4_3_feats = conv4_3_feats * self.rescale_factors  
        conv8_2_feats, conv9_2_feats, conv10_2_feats, conv11_2_feats = self.aux_convs(conv7_feats)  
        
        
        locs, classes_scores = self.pred_convs(conv4_3_feats, conv7_feats, conv8_2_feats, conv9_2_feats, conv10_2_feats,conv11_2_feats)  
        return locs, classes_scores

    def create_prior_boxes(self):
  
        fmap_dims = {'conv4_3': 19,
                     'conv7': 10,
                     'conv8_2': 5,
                     'conv9_2': 3,
                     'conv10_2': 2,
                     'conv11_2': 1}             
        obj_scales = {'conv4_3': 0.1,
                      'conv7': 0.2,
                      'conv8_2': 0.375,
                      'conv9_2': 0.55,
                      'conv10_2': 0.725,
                      'conv11_2': 0.9}

        aspect_ratios = {'conv4_3': [1., 2., 0.5],
                         'conv7': [1., 2., 3., 0.5, .333],
                         'conv8_2': [1., 2., 3., 0.5, .333],
                         'conv9_2': [1., 2., 3., 0.5, .333],
                         'conv10_2': [1., 2., 3., 0.5, .333],
                         'conv11_2': [1., 2., 3., 0.5, .333]}

        fmaps = list(fmap_dims.keys())
        prior_boxes = []
        for k, fmap in enumerate(fmaps):
            for i in range(fmap_dims[fmap]):
                for j in range(fmap_dims[fmap]):
                    cx = (j + 0.5) / fmap_dims[fmap]
                    cy = (i + 0.5) / fmap_dims[fmap]
                    for ratio in aspect_ratios[fmap]:
                        prior_boxes.append([cx, cy, obj_scales[fmap] * sqrt(ratio), obj_scales[fmap] / sqrt(ratio)])
                        
                        
                        if ratio == 1.:
                            try:
                                additional_scale = sqrt(obj_scales[fmap] * obj_scales[fmaps[k + 1]])
                            
                            except IndexError:
                                additional_scale = 1.
                            prior_boxes.append([cx, cy, additional_scale, additional_scale])
        prior_boxes = torch.FloatTensor(prior_boxes).to(device)  
        prior_boxes.clamp_(0, 1)  
        return prior_boxes
    def detect_objects(self, predicted_locs, predicted_scores, min_score, max_overlap, top_k):
        """
        For each class, perform Non-Maximum Suppression (NMS) on boxes that are above a minimum threshold.
        :param min_score: minimum threshold for a box to be considered a match for a certain class
        :param max_overlap: maximum overlap two boxes can have so that the one with the lower score is not suppressed via NMS
        :param top_k: if there are a lot of resulting detection across all classes, keep only the top 'k'
        :return: detections (boxes, labels, and scores), lists of length batch_size
        """
        batch_size = predicted_locs.size(0)
        n_priors = self.priors_cxcy.size(0)
        predicted_scores = F.softmax(predicted_scores, dim=2)  
        
        all_images_boxes = list()
        all_images_labels = list()
        all_images_scores = list()
        assert n_priors == predicted_locs.size(1) == predicted_scores.size(1)
        for i in range(batch_size):
            
            decoded_locs = cxcy_to_xy(
                gcxgcy_to_cxcy(predicted_locs[i], self.priors_cxcy))  
            
            image_boxes = list()
            image_labels = list()
            image_scores = list()
            max_scores, best_label = predicted_scores[i].max(dim=1)  
            
            for c in range(1, self.n_classes):
                
                class_scores = predicted_scores[i][:, c]  
                score_above_min_score = class_scores > min_score  
                n_above_min_score = score_above_min_score.sum().item()
                if n_above_min_score == 0:
                    continue
                class_scores = class_scores[score_above_min_score]  
                class_decoded_locs = decoded_locs[score_above_min_score]  
                
                class_scores, sort_ind = class_scores.sort(dim=0, descending=True)  
                class_decoded_locs = class_decoded_locs[sort_ind]  
                
                overlap = find_jaccard_overlap(class_decoded_locs, class_decoded_locs)  
                
                
                
                
                suppress = torch.zeros((n_above_min_score), dtype=torch.bool).to(device)  
                
                for box in range(class_decoded_locs.size(0)):
                    
                    if suppress[box] == 1:
                        continue
                    
                    
                    suppress = torch.max(suppress, overlap[box] > max_overlap)
                    
                    
                    suppress[box] = 0
                               
                image_boxes.append(class_decoded_locs[~suppress])
                image_labels.append(torch.LongTensor((~ suppress).sum().item() * [c]).to(device))
                image_scores.append(class_scores[~suppress])
            
            if len(image_boxes) == 0:
                image_boxes.append(torch.FloatTensor([[0., 0., 1., 1.]]).to(device))
                image_labels.append(torch.LongTensor([0]).to(device))
                image_scores.append(torch.FloatTensor([0.]).to(device))
            
            image_boxes = torch.cat(image_boxes, dim=0)  
            image_labels = torch.cat(image_labels, dim=0)  
            image_scores = torch.cat(image_scores, dim=0)  
            n_objects = image_scores.size(0)
            
            if n_objects > top_k:
                image_scores, sort_ind = image_scores.sort(dim=0, descending=True)
                image_scores = image_scores[:top_k]  
                image_boxes = image_boxes[sort_ind][:top_k]  
                image_labels = image_labels[sort_ind][:top_k]  
            
            all_images_boxes.append(image_boxes)
            all_images_labels.append(image_labels)
            all_images_scores.append(image_scores)
        return all_images_boxes, all_images_labels, all_images_scores

Step2：训练

关键在于训练，这里会利用pytorch的语法规则进行训练。

训练数据预处理(VOC形式的dbb数据)

本来是想在这写用VOC2007进行训练，但是后来想想，人总是要进步嘛，不能总是利用VOC官方给的数据训练吧，所以这里还是清楚的讲一下怎么将dbb数据转换成VOC格式，并且进行训练。

首先，去官网下载dbb数据。

然后，利用下面这个程序，将json格式的标注文件装换成xml格式的标注文件。

import os
from json import loads
from dicttoxml import dicttoxml
from xml.dom.minidom import parseString


def jsonToXml(json_path, xml_path):
    #@abstract: transfer json file to xml file
    #json_path: complete path of the json file
    #xml_path: complete path of the xml file
    with open(json_path,'r',encoding='UTF-8')as json_file:
        load_dict=loads(json_file.read())
    #print(load_dict)
    my_item_func = lambda x: 'Annotation'
    xml = dicttoxml(load_dict,custom_root='Annotations',item_func=my_item_func,attr_type=False)
    dom = parseString(xml)
    #print(dom.toprettyxml())
    #print(type(dom.toprettyxml()))
    with open(xml_path,'w',encoding='UTF-8')as xml_file:
        xml_file.write(dom.toprettyxml())
        
def json_to_xml(json_dir, xml_dir):
    #transfer all json file which in the json_dir to xml_dir
    if(os.path.exists(xml_dir)==False): #如果没有这个文件夹，就生成这个文件夹
        os.makedirs(xml_dir)
    dir = os.listdir(json_dir)
    i=0
    for file in dir:
        file_list=file.split(".")
        if(file_list[-1] == 'json'):
            jsonToXml(os.path.join(json_dir,file),os.path.join(xml_dir,file_list[0]+'.xml'))  
            i=i+1
            print('处理了第：',i,'个')
if __name__ == '__main__':
    #transfer multi files
    j_dir = "train" #存放json文件的文件夹路径
    x_dir = "train_xml" #存放xml文件的文件夹路径，里面不需要有文件
    json_to_xml(j_dir, x_dir)

然后，利用下面这个程序，生成ImageSets/main里面的train.txt文件。

import os
import random

trainval_percent = 0.7  # 可以自己设置
train_percent = 0.8  # 可以自己设置

xmlfilepath = f"Annotations"  # 地址填自己的
txtsavepath = f"ImageSets/Main"
total_xml = os.listdir(xmlfilepath)
num = len(total_xml)
list = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)

ftrainval = open(txtsavepath + '/trainval.txt', 'w')
ftest = open(txtsavepath + '/test.txt', 'w')
ftrain = open(txtsavepath + '/train.txt', 'w')
fval = open(txtsavepath + '/val.txt', 'w')

for i in list:
    name = total_xml[i][:-4] + '\n'
    if i in trainval:
        ftrainval.write(name)
        if i in train:
            ftrain.write(name)
        else:
            fval.write(name)
    else:
        ftest.write(name)

ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
print('Well finshed')

然后，就是一个标准的VOC格式的dbb训练数据啦，简单不简单牙。

数据检测

注意，这里一定不要省，不然你训练的时候很容易出问题。比如dbb数据里面有些特征框没标注好，标注成了一条直线，导致训练的loss值会变成inf，你需要找出那些没标注好的图片然后把它删了。

我写的检查程序如下。注意，检查出来，删掉之后，要重新生成ImageSet/Main下的train.txt文件。

import json

with open('processed_data\TRAIN_objects.json','r') as obj:
    a=json.load(obj)
with open('processed_data\TRAIN_images.json','r') as obj:
    b=json.load(obj)
for i in range(0,len(a),1):
    boxes=a[i]['boxes']
    for boxe in boxes:
        if boxe[0]==boxe[2]:
            print(b[i])
        if boxe[1]==boxe[3]:
            print(b[i])

编写训练程序

import time
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
from model import SSD300, MultiBoxLoss
from datasets import PascalVOCDataset
from utils import *
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Data parameters
data_folder = 'processed_data'  #训练数据路径文件所在的文件夹
keep_difficult = True  #在voc数据标注里面，有difficult这一项，这里就是决定要不要用这个。

# Model parameters
# Not too many here since the SSD300 has a very specific structure
n_classes = len(label_map)  # 分类的类别数,这个label_map是从utils里面导入进来的。
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 

# Learning parameters
#checkpoint=None
checkpoint = 'weights/MobilenetV3_Large-ssd300.pth.tar' #这个是导入预训练权值。
batch_size = 16  # batch size 
# iterations = 120000  # number of iterations to train  120000
workers = 8  #导入数据的进程数。进程数越多，导入得更快。
print_freq = 10  #决定每过多少个batchsize输出一次训练信息。
lr =1e-3  # learning rate
#decay_lr_to = 0.1  # decay learning rate to this fraction of the existing learning rate
momentum = 0.9  # momentum
weight_decay = 5e-4  # weight decay：加入权重衰减，收敛得会更快。
grad_clip = None  #这是决定是否采用clip gradients方法,clip gradients方法是一种解决梯度爆炸的方法。

cudnn.benchmark = True #这是一种提高训练效率的方法，一般都会加


def main():
    """
    Training.
    """
    global start_epoch, label_map, epoch, checkpoint, decay_lr_at

    #初始化模型，或者加载预训练权重
    if checkpoint is None:
        #如果没有预训练权重，则初始化模型
        print("checkpoint none")
        start_epoch = 0
        model = SSD300(n_classes=n_classes) #在这个地方导入模型

        # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo
        biases = list()
        not_biases = list()
        for param_name, param in model.named_parameters(): #model.named_parameters()给出网络的名字和参数迭代器
            if param.requires_grad: #判断是否是需要求导的参数
                if param_name.endswith('.bias'): #如果是以bias结尾的参数名，则需要加偏置。
                    biases.append(param)
                else: #否则不需要加偏置。
                    not_biases.append(param)

        # differnet optimizer           
        # optimizer = torch.optim.SGD(params=[{'params': biases, 'lr': 2 * lr}, {'params': not_biases}],
        #                             lr=lr, momentum=momentum, weight_decay=weight_decay)
        optimizer = torch.optim.SGD(params=[{'params': biases, 'lr':  lr}, {'params': not_biases}],
                                    lr=lr, momentum=momentum, weight_decay=weight_decay)                            

        #optimizer = torch.optim.SGD(params=[{'params':model.parameters(), 'lr': 2 * lr}, {'params': model.parameters}],  lr=lr, momentum=momentum, weight_decay=weight_decay) 


    else:
        print("checkpoint load")
        checkpoint = torch.load(checkpoint,map_location='cuda:0') 
        start_epoch = checkpoint['epoch'] + 1  #这个是告诉你，这个预训练权值之前已经训练了多少次迭代 
        print('\nLoaded checkpoint from epoch %d.\n' % start_epoch) 
        model = checkpoint['model'] 
        optimizer = checkpoint['optimizer'] 


    

    # Move to default device 
    model = model.to(device) 
    criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy).to(device) #初始化损失函与先验框，这个model.priors_cxcy返回的是一组初始化产生的先验框

    # Custom dataloaders
    train_dataset = PascalVOCDataset(data_folder,split='train',keep_difficult=keep_difficult) #返回image, boxes, labels, difficulties 
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                                               collate_fn=train_dataset.collate_fn, num_workers=workers,
                                               pin_memory=True)  #将数据按照batchsize封装成tensor。

    # Calculate total number of epochs to train and the epochs to decay learning rate at (i.e. convert iterations to epochs)
    # To convert iterations to epochs, divide iterations by the number of iterations per epoch
    # now it is mobilenet v3,VGG paper trains for 120,000 iterations with a batch size of 32, decays after 80,000 and 100,000 iterations,
    epochs = 800
    # decay_lr_at =[154, 193]
    # print("decay_lr_at:",decay_lr_at)
    print("epochs:",epochs) 

    for param_group in optimizer.param_groups: #动态调节优化器学习率
        optimizer.param_groups[1]['lr']=lr
    print("learning rate.  The new LR is %f\n" % (optimizer.param_groups[1]['lr'],))    
    # Epochs,I try to use different learning rate shcheduler
    #different scheduler six way you could try
    #scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max = (epochs // 7) + 1)
    # 下面这句话是根据epoch动态调整学习率的方法 
    scheduler = ReduceLROnPlateau(optimizer,mode="min",factor=0.1,patience=15,verbose=True, threshold=0.00001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08)

    for epoch in range(start_epoch, epochs): #在这里面训练

        # Decay learning rate at particular epochs
        # if epoch in decay_lr_at:
        #     adjust_learning_rate_epoch(optimizer,epoch)
        

        # One epoch's training
        train(train_loader=train_loader,
              model=model,
              criterion=criterion,
              optimizer=optimizer,
              epoch=epoch) 
        print("epoch loss:",train_loss)      
        scheduler.step(train_loss) #这一步是对学习率进行调整

        # Save checkpoint
        save_checkpoint(epoch, model, optimizer)

def train(train_loader, model, criterion, optimizer, epoch):

    model.train()  #启用BatchNormalization与Dropout

    batch_time = AverageMeter()  #AverageMeter()这个类是用来记录数据的最新，平均，总和，计数的值的，里面就两个函数（reset和update）看源码就懂了
    data_time = AverageMeter()  
    losses = AverageMeter() 

    start = time.time()
    global train_loss
    # Batches
    for i, (images, boxes, labels, _) in enumerate(train_loader):
        data_time.update(time.time() - start)

        # if(i%200==0):
        #     adjust_learning_rate_iter(optimizer,epoch)
        #     print("batch id:",i)#([8, 3, 300, 300])
        #N=8
        # Move to default device
        images = images.to(device)  # (batch_size (N), 3, 300, 300)
        
        boxes = [b.to(device) for b in boxes]
        labels = [l.to(device) for l in labels]

        # Forward prop.
        predicted_locs, predicted_scores = model(images)  # (N, anchor_boxes_size, 4), (N, anchor_boxes_size, n_classes)

        # Loss
        loss = criterion(predicted_locs, predicted_scores, boxes, labels)  # scalar
        train_loss=loss
        #print("training",train_loss)

        # Backward prop.
        optimizer.zero_grad()#初始化梯度
        loss.backward()# 根据loss的值求相应weight的梯度

        # Clip gradients, if necessary
        if grad_clip is not None: #防止梯度爆炸用的
            clip_gradient(optimizer, grad_clip)

        # Update model
        optimizer.step() #这一步是更新权值

        losses.update(loss.item(), images.size(0))
        batch_time.update(time.time() - start)

        start = time.time()

        # Print status
        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}][{3}]\t'
                  'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data Time {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(epoch, i, len(train_loader),optimizer.param_groups[1]['lr'],
                                                                  batch_time=batch_time,
                                                                  data_time=data_time, loss=losses))

        #break #test
    del predicted_locs, predicted_scores, images, boxes, labels  # free some memory since their histories may be stored


def adjust_learning_rate_epoch(optimizer,cur_epoch):
    """
    Scale learning rate by a specified factor.

    :param optimizer: optimizer whose learning rate must be shrunk.
    :param scale: factor to multiply learning rate with.
    """
    for param_group in optimizer.param_groups:
        param_group['lr'] = param_group['lr'] * 0.1
    print("DECAYING learning rate. The new LR is %f\n" % (optimizer.param_groups[1]['lr'],))

#warmup ,how much learning rate.
def adjust_learning_rate_iter(optimizer,cur_epoch):

    if(cur_epoch==0 or cur_epoch==1 ):
        for param_group in optimizer.param_groups:
            param_group['lr'] =param_group['lr'] +  0.0001  
            print("DECAYING learning rate iter.  The new LR is %f\n" % (optimizer.param_groups[1]['lr'],))

      


if __name__ == '__main__':
    main()

这个程序是以调用json格式的数据进行读取训练数据和训练标签的，所以，训练之前还需要转一下数据格式，代码如下。

#使用注意事项，使用试记得修改voc_labels为你自己训练数据的标签
#from utils import create_data_lists
import os
import xml.etree.ElementTree as ET
import json

# Label map
#voc_labels = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
              #'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
#voc_labels=('bus','car')
voc_labels=('bus', 'traffic light', 'traffic sign', 'person', 'bike', 'truck', 'motor', 'car', 'train','rider')
label_map = {k: v + 1 for v, k in enumerate(voc_labels)}
label_map['background'] = 0
rev_label_map = {v: k for k, v in label_map.items()}  # Inverse mapping

def parse_annotation(annotation_path):
    tree = ET.parse(annotation_path)
    root = tree.getroot()
    boxes = list()
    labels = list()
    difficulties = list()
    for category in root.iter('category'):
        difficult=int(0.)
        label=category.text.lower().strip()
        if label not in label_map:
            continue
        labels.append(label_map[label])
        difficulties.append(difficult)
    for box2d in root.iter('box2d'):
        x1=int(float(box2d.find('x1').text))
        y1=int(float(box2d.find('y1').text))
        x2=int(float(box2d.find('x2').text))
        y2=int(float(box2d.find('y2').text))
        boxes.append([x1,y1,x2,y2])

    return {'boxes': boxes, 'labels': labels,'difficulties':difficulties}


def create_data_lists(voc07_path,output_folder):
    """
    Create lists of images, the bounding boxes and labels of the objects in these images, and save these to file.

    :param voc07_path: path to the 'VOC2007' folder
    :param voc12_path: path to the 'VOC2012' folder
    :param output_folder: folder where the JSONs must be saved
    """
    voc07_path = os.path.abspath(voc07_path)

    train_images = list()
    train_objects = list()
    n_objects = 0
    # Training data
    path=voc07_path
    # Find IDs of images in training data
    print(path)
    with open(os.path.join(path, 'ImageSets/Main/trainval.txt')) as f:
        ids = f.read().splitlines()

    for id in ids:
        # Parse annotation's XML file
        objects = parse_annotation(os.path.join(path, 'Annotations', id + '.xml'))
        if len(objects) == 0:
            continue
        n_objects += len(objects)
        train_objects.append(objects)
        train_images.append(os.path.join(path, 'JPEGImages', id + '.jpg'))

    assert len(train_objects) == len(train_images)

    # Save to file
    with open(os.path.join(output_folder, 'TRAIN_images.json'), 'w') as j: #写入训练图片路径
        json.dump(train_images, j)
    with open(os.path.join(output_folder, 'TRAIN_objects.json'), 'w') as j: #写入训练标签信息
        json.dump(train_objects, j)
    with open(os.path.join(output_folder, 'label_map.json'), 'w') as j: #写入训练标签类别
        json.dump(label_map, j)  # save label map too

    print('\nThere are %d training images containing a total of %d objects. Files have been saved to %s.' % (
        len(train_images), n_objects, os.path.abspath(output_folder)))

    # Test data
    test_images = list()
    test_objects = list()
    n_objects = 0

    # Find IDs of images in the test data
    with open(os.path.join(voc07_path, 'ImageSets/Main/trainval.txt')) as f:
        ids = f.read().splitlines()

    for id in ids:
        # Parse annotation's XML file
        objects = parse_annotation(os.path.join(voc07_path, 'Annotations', id + '.xml'))
        if len(objects) == 0:
            continue
        test_objects.append(objects)
        n_objects += len(objects)
        test_images.append(os.path.join(voc07_path, 'JPEGImages', id + '.jpg'))

    assert len(test_objects) == len(test_images)

    # Save to file
    with open(os.path.join(output_folder, 'TEST_images.json'), 'w') as j:
        json.dump(test_images, j)
    with open(os.path.join(output_folder, 'TEST_objects.json'), 'w') as j:
        json.dump(test_objects, j)

    print('\nThere are %d test images containing a total of %d objects. Files have been saved to %s.' % (
        len(test_images), n_objects, os.path.abspath(output_folder)))

if __name__ == '__main__':
    create_data_lists(voc07_path='D:/study/internship/work_file/Dataset/bdd100k/bdd1k',output_folder='processed_data')

训练过程如下图所示。

step3：预测

终于到预测啦，享受革命成果的时候到了。

代码如下。注意，虽然在程序中没有引入神经网络模型文件，但是这个模型文件是必须在相对路径下才能运行的，因为这个模型文件的名字保存在权重文件里面，会要调用的。

from torchvision import transforms
from utils import *
from PIL import Image, ImageDraw, ImageFont
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model checkpoint
checkpoint = 'checkpoint_ssd300.pth.tar' 
checkpoint = torch.load(checkpoint,map_location='cuda:0') 
print(checkpoint)
start_epoch = checkpoint['epoch'] + 1 
print('\nLoaded checkpoint from epoch %d.\n' % start_epoch) 
model = checkpoint['model'] 
model = model.to(device) 
model.eval()  #如果是预测，使用这个；如果是训练，使用model.train()


def detect(original_image, min_score, max_overlap, top_k, suppress=None):
    """
    Detect objects in an image with a trained SSD300, and visualize the results.

    :param original_image: image, a PIL Image
    :param min_score: minimum threshold for a detected box to be considered a match for a certain class
    :param max_overlap: maximum overlap two boxes can have so that the one with the lower score is not suppressed via Non-Maximum Suppression (NMS)
    :param top_k: if there are a lot of resulting detection across all classes, keep only the top 'k'
    :param suppress: classes that you know for sure cannot be in the image or you do not want in the image, a list
    :return: annotated image, a PIL Image
    """

    # Transform
    resize = transforms.Resize((300, 300)) 
    to_tensor = transforms.ToTensor() #这句话 
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) 
    image = normalize(to_tensor(resize(original_image))) 

    # Move to default device
    image = image.to(device) #这句话是将图片的张量读取到GPU上。
    # Forward prop.
    predicted_locs, predicted_scores = model(image.unsqueeze(0)) #unsqueeze用于添加维度。
    ###############################################后面都是解码与画图了
    # Detect objects in SSD output
    det_boxes, det_labels, det_scores = model.detect_objects(predicted_locs, predicted_scores, min_score=min_score,
                                                             max_overlap=max_overlap, top_k=top_k) #将预测结果进行解码
    # Move detections to the CPU
    det_boxes = det_boxes[0].to('cpu') 

    # Transform to original image dimensions
    original_dims = torch.FloatTensor(
        [original_image.width, original_image.height, original_image.width, original_image.height]).unsqueeze(0)
    det_boxes = det_boxes * original_dims

    # Decode class integer labels
    det_labels = [rev_label_map[l] for l in det_labels[0].to('cpu').tolist()] 
    print(det_labels)
    # If no objects found, the detected labels will be set to ['0.'], i.e. ['background'] in SSD300.detect_objects() in model.py
    if det_labels == ['background']:
        # Just return original image
        return original_image

    # Annotate
    annotated_image = original_image
    draw = ImageDraw.Draw(annotated_image)
    font = ImageFont.truetype("simhei.ttf", 15)

    # Suppress specific classes, if needed
    for i in range(det_boxes.size(0)):
        if suppress is not None:
            if det_labels[i] in suppress:
                continue

        # Boxes
        box_location = det_boxes[i].tolist()
        draw.rectangle(xy=box_location, outline=label_color_map[det_labels[i]])
        draw.rectangle(xy=[l + 1. for l in box_location], outline=label_color_map[
            det_labels[i]])  # a second rectangle at an offset of 1 pixel to increase line thickness
        # draw.rectangle(xy=[l + 2. for l in box_location], outline=label_color_map[
        #     det_labels[i]])  # a third rectangle at an offset of 1 pixel to increase line thickness
        # draw.rectangle(xy=[l + 3. for l in box_location], outline=label_color_map[
        #     det_labels[i]])  # a fourth rectangle at an offset of 1 pixel to increase line thickness

        # Text
        text_size = font.getsize(det_labels[i].upper())
        text_location = [box_location[0] + 2., box_location[1] - text_size[1]]
        textbox_location = [box_location[0], box_location[1] - text_size[1], box_location[0] + text_size[0] + 4.,
                            box_location[1]]
        draw.rectangle(xy=textbox_location, fill=label_color_map[det_labels[i]])
        draw.text(xy=text_location, text=det_labels[i].upper(), fill='white',
                  font=font)
    del draw

    return annotated_image


if __name__ == '__main__':
    img_path = 'feiji1.jpg' 
    original_image = Image.open(img_path, mode='r') 
    original_image = original_image.convert('RGB') 
    detect(original_image, min_score=0.2, max_overlap=0.5, top_k=200).show()

总结

以上为个人经验，希望能给大家一个参考，也希望大家多多支持脚本之家。

OCS2 是一个针对切换系统最优控制（OCS2）的 C++工具箱十年一梦实验室 c++开发语言
https://github.com/leggedrobotics/ocs2我将详细介绍位于https://github.com/leggedrobotics/ocs2的OCS2项目，这是一个由leggedrobotics团队开发并维护的开源软件库，专注于开关系统的最优控制（OptimalControlforSwitchedSystems）。以下是对其背景、功能、特点、应用场景及使用方法的全面说明
开源应用驱动企业新质生产力：Websoft9以EPP+AI+知识库助您领跑未来开源
开源应用驱动企业新质生产力：Websoft9以EPP+AI+知识库助您领跑未来在数字化转型加速的今天，企业新质生产力的核心已从传统资源投入转向技术驱动的效率革命。开源应用凭借其灵活性、成本优势和技术创新力，成为企业实现这一目标的关键引擎。作为开源技术与行业场景化落地的领航者，Websoft9通过企业应用平台（EPP）、AI智能引擎与知识库系统三位一体的解决方案，助力企业快速构建新一代生产力工具，实
STL--list基本使用 csdnjiajiac C++学习语言学习笔记 c++开发语言
目录一.基本概念二.基本使用1.list构造2.list赋值与交换3.list的大小操作4.list插入和删除5.list数据存取6.list反转和排序一.基本概念**功能：**将数据进行链式存储**链表**（list）是一种物理存储单元上非连续的存储结构，数据元素的逻辑顺序是通过链表中的指针链接实现的由于链表的存储方式并不是连续的内存空间，因此链表list中的迭代器只支持前移和后移，属于**双向
69.Harmonyos NEXT图片预览组件应用实践（二）：电商、内容与办公场景 harmonyos-next
温馨提示：本篇博客的详细代码已发布到git:https://gitcode.com/nutpi/HarmonyosNext可以下载运行哦！HarmonyosNEXT图片预览组件应用实践（二）：电商、内容与办公场景效果预览一、电商应用最佳实践1.功能需求电商应用中的商品图片预览需求包括：支持商品多角度图片查看高清缩放查看商品细节商品参数标注和热点标记与商品信息面板的联动支持视频和图片混合展示2.实现
HarmonyOS NEXT 权限申请系统授权和用户授权权限架构教育
在鸿蒙原生开发中，我们normal级别的证书能够使用的权限分为系统授权权限和用户授权权限其中系统授权权限只需要在模块的module.json5中requestPermissions中添加name就可以了，不需要reason等信息描述，但用户授权就需要reason和使用方式，同时在使用时还需要向用户弹框申请；下面时定位权限和网络权限的申请"requestPermissions":[{"name":"
非对称加密：SSL/TLS握手的数学基石安全
1.密钥交换的密码学困局在未加密的HTTP通信中，攻击者可通过中间人攻击（MITM）窃听或篡改数据。SSL/TLS协议的核心挑战在于：如何在不安全的信道上建立安全通信？这本质上是一个“密钥分发问题”——若使用对称加密（如AES），双方需要共享同一密钥，但密钥本身如何安全传递？非对称加密的突破性在于公钥与私钥的分离。以RSA算法为例，其数学基础是大质数分解难题：选择两个大质数p和q（通常≥2048位
守护网站安全的隐形卫士——SSL证书全解析安全
在网络世界中，保护用户数据的安全至关重要。无论你是经营一家小型网店还是管理大型企业网站，确保客户信息的安全性都是不可忽视的任务。今天，我们就来揭开一个默默守护网站安全的重要角色——SSL证书的神秘面纱。什么是SSL证书？SSL（SecureSocketsLayer）证书是一种数字证书，用于加密客户端与服务器之间的通信，确保数据传输过程中的安全性。简单来说，当您访问一个启用SSL证书的网站时，您的浏
短视频时代，普通人如何保护个人隐私？——从SSL证书看数据安全安全
在短视频时代，每天数以亿计的用户上传内容、互动评论、甚至进行直播购物。然而，这些行为背后潜藏着隐私泄露的风险：账号密码被盗、支付信息被窃取、个人数据遭篡改……如何在这些场景中保护隐私？SSL证书作为互联网安全的基石，正扮演着关键角色。本文将从技术原理到实践建议，为你揭开SSL证书如何成为隐私保护的“隐形盾牌”。一、SSL证书：隐私保护的第一道防线SSL（SecureSocketsLayer）证书是
HarmonyOS NEXT 将ArrayBuffer压缩到指定大小并转化为base64返回架构教育
项目中有需求要对获取的图片进行压缩，并且是要压缩到固定大小，考虑到harmonyos中对图片质量压缩方式packing，压缩后要及时检查大小，就使用while循环一步步的压缩，直至压缩到目标值letbitmap:ArrayBuffer;//需要压缩的数据letcompressSize:number;//目标大小letconsiderBase64:boolean;//是否考虑base64算法把字节数
嵌入式AI必备技能2-模型的压缩与加速奥德彪123 嵌入式AI 人工智能嵌入式
嵌入式AI必备技能2-模型的压缩与加速引言随着嵌入式AI设备的广泛应用，模型的计算效率和存储需求成为核心挑战。由于嵌入式系统通常资源受限，传统的深度学习模型往往难以直接部署。因此，模型压缩和加速技术应运而生，旨在减少计算量、降低存储需求，同时尽可能保持模型的准确性。本文介绍几种常见的模型压缩与加速方法，包括剪枝、低秩分解、量化、权值共享、知识蒸馏等，并探讨如何综合应用这些技术来优化AI模型。1.常
sql语句编写逻辑賢843 mysql sql
一、SQL语句的「书写顺序」与「执行顺序」书写顺序（人类思维逻辑）SELECT字段列表--第3步：确定要输出的内容FROM表名--第1步：确定数据来源[JOIN表ON条件]--第2步：处理表连接关系WHERE筛选条件--第4步：过滤原始数据GROUPBY分组字段--第5步：数据分组HAVING分组后条件--第6步：过滤分组结果ORDERBY排序字段--第7步：排序结果LIMIT分页参数--第8步：
JVM性能监控与调优小码快撩 jvm
导语JVM性能监控与调优是一个涵盖多个层面的复杂任务，涉及对JVM内部工作原理的理解、性能指标的监控、问题定位与优化策略的实施。以下是学习JVM性能监控与调优时应关注的主要技术点1.JVM基础知识JVM性能监控与调优之JVM基础知识在进行JVM性能监控与调优之前，深入理解JVM的基本知识是至关重要的。以下概述了JVM性能监控与调优所需掌握的核心基础知识：1.JVM内存区域划分堆内存（Heap）：存
Android第二次面试总结（项目拷打实战）每次的天空 android
MVVM+Jetpack组件落地采用ViewModel+LiveData实现数据驱动开发，将UI逻辑与业务逻辑解耦，通过LiveData的生命周期感知能力避免内存泄漏。使用WorkManager替代传统Service处理后台任务（如数据同步），结合Room数据库实现任务持久化，确保应用被杀后仍能恢复任务。性能优化实战集成Glide加载国风插画，结合自定义三级缓存策略（内存LRU+磁盘缓存+本地资源
数据标注工具及其对预训练模型性能的影响 AGI大模型与大数据研究院 DeepSeek R1 &大数据AI人工智能计算科学神经计算深度学习神经网络大数据人工智能大型语言模型 AI AGI LLM Java Python 架构设计 Agent RPA
1.背景介绍1.1预训练模型的崛起近年来，预训练模型（Pre-trainedModels）在自然语言处理（NLP）领域取得了显著的成功。这些模型通过在大规模无标注文本数据集上进行预训练，学习到丰富的语言知识和语义表示，并在下游任务中展现出优异的性能。BERT、GPT-3等预训练模型的出现，标志着NLP领域进入了一个新的时代。1.2数据标注的重要性尽管预训练模型展现出强大的能力，但它们仍然需要针对特
数据标注质量对AI模型质量的影响分析自由鬼行业发展 IT应用探讨人工智能机器学习深度学习 AI
上、数据标注质量与AI模型的质量关系数据标注是AI最基础的工作，数据标注的质量决定了AI质量，影响数据标注质量的是数据标注的规则。1、数据标注是AI最基础的工作：数据标注是构建高质量AI模型的基石：数据标注尤其是在监督学习范式下，是AI领域最基础、最关键的工作之一。没有高质量的标注数据，就如同建造高楼大厦没有坚实的地基，AI模型就无法有效地学习和训练，最终的AI质量也就无从谈起。训练数据是AI模型
DeepSeek API 客户端使用文档老大白菜 python 人工智能数据库
1.简介deep.py是一个用于与DeepSeekAPI交互的Python客户端封装。它提供了简单易用的接口，支持对话历史管理、日志记录等功能，使得与DeepSeekAPI的交互更加便捷和可靠。2.功能特点简单的接口设计自动管理对话历史完整的日志记录灵活的配置选项异常处理机制3.安装依赖pipinstallopenai4.配置环境在项目根目录创建.env文件：#WindowssetDEEPSEEK
【JVM】性能监控与调优概述篇白晨并不是很能熬夜 JVM jvm 后端面试 java 经验分享求职招聘
大家好，我是白晨，一个不是很能熬夜，但是也想日更的人✈。如果喜欢这篇文章，点个赞，关注一下白晨吧！你的支持就是我最大的动力！文章目录JVM性能监控与调优概述篇背景说明生产环境中的问题为什么要调优不同阶段的考虑调优概述监控的依据调优的大方向性能优化的步骤第一步（发现问题）：性能监控第二步（排查问题）：性能分析第三步（解决问题）：性能调优性能评价/测试指标停顿时间（或响应时间）吞吐量并发数内存站用相互
信息检索系统评估指标的层级分析：从单点精确度到整体性能度量人工智能深度学习llm检索系统
在构建搜索引擎系统时，有效的评估机制是保证系统质量的关键环节。当用户输入查询词如"machinelearningtutorialspython"，系统返回结果列表后，如何客观评估这些结果的相关性和有效性？这正是信息检索评估指标的核心价值所在。分析用户与搜索引擎的交互模式，我们可以观察到以下行为特征：用户主要关注结果列表的前几项对顶部结果的关注度显著高于底部结果用户基于多次搜索体验形成对搜索系统整体
文献阅读 | PNAS | 经验和发育中的前额叶皮层程序员
:::block-1文献介绍文献题目：经验和发育中的前额叶皮层\研究团队：BryanKolb（加拿大莱斯布里奇大学）\发表时间：2012-10-08\发表期刊：PNAS\影响因子：9.4\DOI：10.1073/pnas.1121251109:::摘要前额叶皮层（PFC）接收来自所有其他皮层区域的输入，并负责规划和指导跨时间的运动、认知、情感和社会行为。它具有较长的发育过程，这使得它能够通过经验获
HarmonyOS NEXT 添加地理围栏架构教育
添加一个围栏，并订阅地理围栏事件，地理围栏就是虚拟地理边界，当设备进入、离开某个特定地理区域时，可以接收自动通知和警告目前仅支持圆形围栏，并且依赖GNSS芯片的地理围栏功能，仅在室外开阔区域才能准确识别用户进出围栏事件geofence:geoLocationManager.Geofence中的coordinateSystemType表示地理围栏圆心坐标的坐标系,APP应先使用getGeofence
HarmonyNext实战：基于ArkTS的高性能音视频处理应用开发 harmonyos-next
HarmonyNext实战：基于ArkTS的高性能音视频处理应用开发引言在音视频处理领域，实时性和性能是关键。随着HarmonyNext生态系统的不断发展，开发者可以利用ArkTS语言构建高性能的音视频处理应用。本文将深入探讨如何利用ArkTS开发一个音视频处理应用，重点介绍音频处理、视频编解码以及实时流媒体传输的实现。我们将从理论基础出发，逐步构建一个完整的应用，并通过优化技巧提升性能。1.音视
Qt+ffmpeg环境搭建技术不支持 qt ffmpeg 开发语言
Qt+ffmpeg环境搭建各平台常见视频开发库举例:iOS：AVFoundationAudioUnitAndroid：MediaPlayer，MediaCodecWindows：DirectShowLinux：GStreamerFFmpeg库是一个跨平台的视频开发库,还有libVLC也是一个跨平台的视频开发库掌握了其中一个库,也能很快上手其它库,因为音视频解码的原理类似引入FFmpeg库有两种方式
HarmonyNext实战：基于ArkTS的高性能图像处理应用开发 harmonyos-next
HarmonyNext实战：基于ArkTS的高性能图像处理应用开发引言在HarmonyNext生态系统中，图像处理是一个重要且具有挑战性的领域。本文将深入探讨如何利用ArkTS语言开发一个高性能的图像处理应用，重点介绍图像卷积、边缘检测等核心算法的实现。我们将从理论基础出发，逐步构建一个完整的图像处理应用，并通过优化技巧提升性能。1.图像处理基础1.1图像表示在数字图像处理中，图像通常被表示为一个
我的创作纪念日我爱学习_zwj 前端前端框架华为
机缘在前端开发的广袤天地中，我的创作之旅始于一次充满挑战与机遇的契机。初涉前端领域时，面对复杂多变的项目需求和飞速更新的技术栈，我深刻感受到知识的浩瀚无边，也意识到自身能力的不足。在参与一个大型项目时，我遭遇了许多棘手难题，像页面加载速度的优化、不同浏览器兼容性的处理等。那时，我便产生了一个想法：把在实战中积累的经验以及过去学习的知识记录下来，既能作为自己成长的见证，也能与同行交流分享，携手攻克技
Python通过SSH隧道访问数据库 Java菜鸟在北京 python sshtunnel paramiko SSH隧道访问数据库
本文介绍通过sshtunnel类库建立SSH隧道，使用paramiko通过SSH来访问数据库。实现了两种建立SSH方式：公私钥验证、密码验证。公私钥可读本地，也可读取AwsS3上的私钥文件。本质上就是在本机建立SSH隧道，然后将访问DB转发到本地SSH内去访问数据库。简单易懂，上代码：fromsshtunnelimportSSHTunnelForwarderfromsqlalchemyimport
android 新闻客户端和springboot后台开发-网络接口封装（三） mmsx android 作业源码分享 android spring boot
一、前言android新闻客户端和springboot后台开发（一）-CSDN博客android新闻客户端和springboot后台开发（二）-CSDN博客这篇接前面，写android客户端接口这样方面的实现。okhttp简易封装，方便使用。二、例如注册接口示例UsermUser=newUser(account,password,UserTypeEnum.User.getDesc());Okhtt
基于大模型的单纯性孔源性视网膜脱离预测及治疗方案研究报告 LCG元围术期危险因子预测模型研究人工智能
目录一、引言1.1研究背景与目的1.2国内外研究现状1.3研究方法与创新点二、单纯性孔源性视网膜脱离概述2.1发病机制2.2高危因素2.3临床表现与诊断方法三、大模型在术前预测中的应用3.1模型选择与数据收集3.2术前风险预测指标3.3预测结果分析与验证四、基于预测结果的手术方案制定4.1手术原则与目标4.2不同预测结果下的手术方式选择4.3手术案例分析五、麻醉方案的确定5.1麻醉方式的选择依据5
C语言编译与链接详解夜晟洛 c语言开发语言
C语言是一种强大且广泛使用的编程语言。理解其编译和链接过程对于编写高效和可靠的代码至关重要。本文将详细探讨C语言的编译和链接过程，帮助你更好地理解代码从源文件到可执行文件的转变过程。目录一、编译过程概述1.预处理2.编译3.汇编4.链接二、编译与链接示例三、常见问题与最佳实践1.头文件保护2.模块化编程3.静态库和动态库静态库动态库四、总结一、编译过程概述编译过程将C语言源代码转换为机器码，可以分
Java 入门指南：Java 8 新特性 —— Stream 流热带鱼Tech Java java 后端个人开发 java-ee
文章目录JavaStream操作类型操作过程创建流操作流遍历forEach过滤filter映射map匹配match归约reduce排序sorted去重distinct限制limit跳过skip转换流流操作的特性JavaStreamJavaStream是Java8引入的一个新的API，它提供了一种函数式编程的方式来处理集合数据。Stream可以看作是一系列支持高效的、函数式操作的元素序列。通过使用S
Dinky × Jiron：打造高效智能的数据处理平台 jiron开源平台开发 flink 大数据 hive 数据仓库 kafka etl工程师 clickhouse
Dinky×Jiron：打造高效智能的数据处理平台JironGitHub地址https://github.com/642933588/jiron-cloudhttps://gitee.com/642933588/jiron-cloud将基于ApacheFlink的实时计算平台Dinky成功集成至Jiron数据开发平台，以进一步增强平台的数据处理能力，提升数据处理效率与灵活性，同时优化用户体验并降低
PHP，安卓，UI，java，linux视频教程合集 cocos2d-x小菜 java UI PHP android linux
╔-----------------------------------╗┆
各表中的列名必须唯一。在表 'dbo.XXX' 中多次指定了列名 'XXX'。 bozch .net .net mvc
在.net mvc5中，在执行某一操作的时候，出现了如下错误：各表中的列名必须唯一。在表 'dbo.XXX' 中多次指定了列名 'XXX'。经查询当前的操作与错误内容无关，经过对错误信息的排查发现，事故出现在数据库迁移上。回想过去：在迁移之前已经对数据库进行了添加字段操作，再次进行迁移插入XXX字段的时候，就会提示如上错误。 &
Java 对象大小的计算 e200702084 java
Java对象的大小如何计算一个对象的大小呢？
Mybatis Spring 171815164 mybatis
ApplicationContext ac = new ClassPathXmlApplicationContext("applicationContext.xml"); CustomerService userService = (CustomerService) ac.getBean("customerService"); Customer cust
JVM 不稳定参数 g21121 jvm
-XX 参数被称为不稳定参数，之所以这么叫是因为此类参数的设置很容易引起JVM 性能上的差异，使JVM 存在极大的不稳定性。当然这是在非合理设置的前提下，如果此类参数设置合理讲大大提高JVM 的性能及稳定性。可以说“不稳定参数”
用户自动登录网站永夜-极光用户
1.目标:实现用户登录后,再次登录就自动登录,无需用户名和密码 2.思路:将用户的信息保存为cookie 每次用户访问网站,通过filter拦截所有请求,在filter中读取所有的cookie,如果找到了保存登录信息的cookie,那么在cookie中读取登录信息,然后直接
centos7 安装后失去win7的引导记录程序员是怎么炼成的操作系统
1.使用root身份(必须)打开 /boot/grub2/grub.cfg 2.找到 ### BEGIN /etc/grub.d/30_os-prober ### 在后面添加 menuentry "Windows 7 (loader) (on /dev/sda1)" {
Oracle 10g 官方中文安装帮助文档以及Oracle官方中文教程文档下载 aijuans oracle
Oracle 10g 官方中文安装帮助文档下载：http://download.csdn.net/tag/Oracle%E4%B8%AD%E6%96%87API%EF%BC%8COracle%E4%B8%AD%E6%96%87%E6%96%87%E6%A1%A3%EF%BC%8Coracle%E5%AD%A6%E4%B9%A0%E6%96%87%E6%A1%A3 Oracle 10g 官方中文教程
JavaEE开源快速开发平台G4Studio_V3.2发布了無為子 AOP oracle mysql javaee G4Studio
我非常高兴地宣布,今天我们最新的JavaEE开源快速开发平台G4Studio_V3.2版本已经正式发布。大家可以通过如下地址下载。访问G4Studio网站 http://www.g4it.org G4Studio_V3.2版本变更日志功能新增 (1).新增了系统右下角滑出提示窗口功能。 (2).新增了文件资源的Zip压缩和解压缩
Oracle常用的单行函数应用技巧总结百合不是茶日期函数转换函数(核心)数字函数通用函数(核心)字符函数
单行函数; 字符函数,数字函数,日期函数,转换函数(核心),通用函数(核心) 一:字符函数: .UPPER(字符串) 将字符串转为大写 .LOWER (字符串) 将字符串转为小写 .INITCAP(字符串) 将首字母大写 .LENGTH (字符串) 字符串的长度 .REPLACE(字符串,'A','_') 将字符串字符A转换成_
Mockito异常测试实例 bijian1013 java 单元测试 mockito
Mockito异常测试实例： package com.bijian.study; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import org.junit.Assert; import org.junit.Test; import org.mockito.
GA与量子恒道统计 Bill_chen JavaScript 浏览器百度 Google 防火墙
前一阵子，统计**网址时，Google Analytics（GA）和量子恒道统计（也称量子统计），数据有较大的偏差，仔细找相关资料研究了下，总结如下：为何GA和量子网站统计（量子统计前身为雅虎统计）结果不同？首先：没有一种网站统计工具能保证百分之百的准确出现该问题可能有以下几个原因：（1）不同的统计分析系统的算法机制不同；（2）统计代码放置的位置和前后
【Linux命令三】Top命令 bit1129 linux命令
Linux的Top命令类似于Windows的任务管理器，可以查看当前系统的运行情况，包括CPU、内存的使用情况等。如下是一个Top命令的执行结果： top - 21:22:04 up 1 day, 23:49, 1 user, load average: 1.10, 1.66, 1.99 Tasks: 202 total, 4 running, 198 sl
spring四种依赖注入方式白糖_ spring
平常的java开发中，程序员在某个类中需要依赖其它类的方法，则通常是new一个依赖类再调用类实例的方法，这种开发存在的问题是new的类实例不好统一管理，spring提出了依赖注入的思想，即依赖类不由程序员实例化，而是通过spring容器帮我们new指定实例并且将实例注入到需要该对象的类中。依赖注入的另一种说法是“控制反转”，通俗的理解是：平常我们new一个实例，这个实例的控制权是我
angular.injector boyitech AngularJS AngularJS API
angular.injector 描述: 创建一个injector对象, 调用injector对象的方法可以获得angular的service, 或者用来做依赖注入. 使用方法: angular.injector(modules, [strictDi]) 参数详解: Param Type Details mod
java-同步访问一个数组Integer[10]，生产者不断地往数组放入整数1000，数组满时等待；消费者不断地将数组里面的数置零，数组空时等待 bylijinnan Integer
public class PC { /** * 题目：生产者-消费者。 * 同步访问一个数组Integer[10]，生产者不断地往数组放入整数1000，数组满时等待；消费者不断地将数组里面的数置零，数组空时等待。 */ private static final Integer[] val=new Integer[10]; private static
使用Struts2.2.1配置 Chen.H apache spring Web xml struts
Struts2.2.1 需要如下 jar包: commons-fileupload-1.2.1.jar commons-io-1.3.2.jar commons-logging-1.0.4.jar freemarker-2.3.16.jar javassist-3.7.ga.jar ognl-3.0.jar spring.jar struts2-core-2.2.1.jar struts2-sp
[职业与教育]青春之歌 comsci 教育
每个人都有自己的青春之歌............但是我要说的却不是青春... 大家如果在自己的职业生涯没有给自己以后创业留一点点机会,仅仅凭学历和人脉关系,是难以在竞争激烈的市场中生存下去的.... &nbs
oracle连接(join)中使用using关键字 daizj JOIN oracle sql using
在oracle连接(join)中使用using关键字 34. View the Exhibit and examine the structure of the ORDERS and ORDER_ITEMS tables. Evaluate the following SQL statement: SELECT oi.order_id, product_id, order_date FRO
NIO示例 daysinsun nio
NIO服务端代码： public class NIOServer { private Selector selector; public void startServer(int port) throws IOException { ServerSocketChannel serverChannel = ServerSocketChannel.open(
C语言学习homework1 dcj3sjt126com c homework
0、课堂练习做完 1、使用sizeof计算出你所知道的所有的类型占用的空间。 int x; sizeof(x); sizeof(int); # include <stdio.h> int main(void) { int x1; char x2; double x3; float x4; printf(&quo
select in order by , mysql排序 dcj3sjt126com mysql
If i select like this: SELECT id FROM users WHERE id IN(3,4,8,1); This by default will select users in this order 1,3,4,8, I would like to select them in the same order that i put IN() values so:
页面校验-新建项目 fanxiaolong 页面校验
$(document).ready( function() { var flag = true; $('#changeform').submit(function() { var projectScValNull = true; var s =""; var parent_id = $("#parent_id").v
Ehcache（02）——ehcache.xml简介 234390216 ehcache ehcache.xml 简介
ehcache.xml简介 ehcache.xml文件是用来定义Ehcache的配置信息的，更准确的来说它是定义CacheManager的配置信息的。根据之前我们在《Ehcache简介》一文中对CacheManager的介绍我们知道一切Ehcache的应用都是从CacheManager开始的。在不指定配置信
junit 4.11中三个新功能 jackyrong java
junit 4.11中两个新增的功能，首先是注解中可以参数化，比如 import static org.junit.Assert.assertEquals; import java.util.Arrays; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runn
国外程序员爱用苹果Mac电脑的10大理由 php教程分享 windows PHP unix Microsoft perl
Mac 在国外很受欢迎，尤其是在设计/web开发/IT 人员圈子里。普通用户喜欢 Mac 可以理解，毕竟 Mac 设计美观，简单好用，没有病毒。那么为什么专业人士也对 Mac 情有独钟呢？从个人使用经验来看我想有下面几个原因： 1、Mac OS X 是基于 Unix 的这一点太重要了，尤其是对开发人员，至少对于我来说很重要，这意味着Unix 下一堆好用的工具都可以随手捡到。如果你是个 wi
位运算、异或的实际应用 wenjinglian 位运算
一．位操作基础，用一张表描述位操作符的应用规则并详细解释。二．常用位操作小技巧，有判断奇偶、交换两数、变换符号、求绝对值。三．位操作与空间压缩，针对筛素数进行空间压缩。 &n
weblogic部署项目出现的一些问题（持续补充中……） Everyday都不同 weblogic部署失败
好吧，weblogic的问题确实…… 问题一： org.springframework.beans.factory.BeanDefinitionStoreException: Failed to read candidate component class: URL [zip:E:/weblogic/user_projects/domains/base_domain/serve
tomcat7性能调优（01） toknowme tomcat7
Tomcat优化： 1、最大连接数最大线程等设置 <Connector port="8082" protocol="HTTP/1.1" useBodyEncodingForURI="t
PO VO DAO DTO BO TO概念与区别 xp9802 java DAO 设计模式 bean 领域模型
O/R Mapping 是 Object Relational Mapping（对象关系映射）的缩写。通俗点讲，就是将对象与关系数据库绑定，用对象来表示关系数据。在O/R Mapping的世界里，有两个基本的也是重要的东东需要了解，即VO，PO。它们的关系应该是相互独立的，一个VO可以只是PO的部分，也可以是多个PO构成，同样也可以等同于一个PO（指的是他们的属性）。这样，PO独立出来，数据持

按字母分类： A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 其他