Zain Lau

2020-722-CV-YOLOV4 day3-yolo_v4

model.py

import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F
import math

class VGG(nn.Module):
    def __init__(self):
       super(VGG,self).__init__()
       # the vgg's layers
       #self.features = features
       cfg = [64,64,'M',128,128,'M',256,256,256,'M',512,512,512,'M',512,512,512,'M']
       layers= []
       batch_norm = False
       in_channels = 3
       for v in cfg:
           if v == 'M':
               layers += [nn.MaxPool2d(kernel_size=2,stride = 2)]
           else:
               conv2d = nn.Conv2d(in_channels,v,kernel_size=3,padding = 1)
               if batch_norm:
                   layers += [conv2d,nn.Batchnorm2d(v),nn.ReLU(inplace=True)]
               else:
                   layers += [conv2d,nn.ReLU(inplace=True)]
               in_channels = v
       # use the vgg layers to get the feature
       self.features = nn.Sequential(*layers)
       # 全局池化
       self.avgpool = nn.AdaptiveAvgPool2d((7,7))
       # 决策层：分类层
       self.classifier = nn.Sequential(
           nn.Linear(512*7*7,4096),
           nn.ReLU(True),
           nn.Dropout(),
           nn.Linear(4096,4096),
           nn.ReLU(True),
           nn.Dropout(),
           nn.Linear(4096,1000),
       )

       for m in self.modules():
           if isinstance(m,nn.Conv2d):
               nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
               if m.bias is not None: 
                   nn.init.constant_(m.bias,0)
           elif isinstance(m,nn.BatchNorm2d):
               nn.init.constant_(m.weight,1)
               nn.init.constant_(m.bias,1)
           elif isinstance(m,nn.Linear):
               nn.init.normal_(m.weight,0,0.01)
               nn.init.constant_(m.bias,0)

    def forward(self,x):
         x = self.features(x)
         x_fea = x
         x = self.avgpool(x)
         x_avg = x
         x = x.view(x.size(0),-1)
         x = self.classifier(x)
         return x,x_fea,x_avg
    def extractor(self,x):
         x = self.features(x)
         return x

class YOLOV1(nn.Module):
    def __init__(self):
       super(YOLOV1,self).__init__()
       vgg = VGG()
       self.extractor = vgg.extractor
       self.avgpool = nn.AdaptiveAvgPool2d((7,7))
       # 决策层：检测层
       self.detector = nn.Sequential(
          nn.Linear(512*7*7,4096),
          nn.ReLU(True),
          nn.Dropout(),
          nn.Linear(4096,1470),
       )
       for m in self.modules():
           if isinstance(m,nn.Conv2d):
               nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
               if m.bias is not None: 
                   nn.init.constant_(m.bias,0)
           elif isinstance(m,nn.BatchNorm2d):
               nn.init.constant_(m.weight,1)
               nn.init.constant_(m.bias,1)
           elif isinstance(m,nn.Linear):
               nn.init.normal_(m.weight,0,0.01)
               nn.init.constant_(m.bias,0)
    def forward(self,x):
        x = self.extractor(x)
        import pdb
        pdb.set_trace()
        x = self.avgpool(x)
        x = x.view(x.size(0),-1)
        x = self.detector(x)
        b,_ = x.shape
        x = x.view(b,7,7,30)
        return x
     
if __name__ == '__main__':
    vgg = VGG()
    x  = torch.randn(1,3,512,512)
    feature,x_fea,x_avg = vgg(x)
    print(feature.shape)
    print(x_fea.shape)
    print(x_avg.shape)
 
    yolov1 = YOLOV1()
    feature = yolov1(x)
    # feature_size b*7*7*30
    print(feature.shape)

model_yolov1_xiangxi.py

import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F
import math
import cv2




class VGG(nn.Module):
    def __init__(self):
       super(VGG,self).__init__()
       # the vgg's layers
       #self.features = features
       cfg = [64,64,'M',128,128,'M',256,256,256,'M',512,512,512,'M',512,512,512,'M']
       # use the vgg layers to get the feature
       #import pdb
       #pdb.set_trace()
       self.conv2d_1 = nn.Conv2d(3,64,3,1)
       self.bn_1 = nn.BatchNorm2d(64)
       self.relu_1 = nn.ReLU(True)
       
       self.conv2d_2 = nn.Conv2d(64,64,3,1)
       self.bn_2 = nn.BatchNorm2d(64)
       self.relu_2 = nn.ReLU(True)
       
       self.pool_2 = nn.MaxPool2d(2,2)
       self.conv2d_3 = nn.Conv2d(64,128,3,1)
       self.bn_3 = nn.BatchNorm2d(128)
       self.relu_3 = nn.ReLU(True)
       
       self.conv2d_4 = nn.Conv2d(128,128,3,1)
       self.bn_4 = nn.BatchNorm2d(128)
       self.relu_4 = nn.ReLU(True)
       
       self.pool_4 = nn.MaxPool2d(2,2)
       self.conv2d_5 = nn.Conv2d(128,256,3,1)
       self.bn_5 = nn.BatchNorm2d(256)
       self.relu_5 = nn.ReLU(True)
       
       self.conv2d_6 = nn.Conv2d(256,256,3,1)
       self.bn_6 = nn.BatchNorm2d(256)
       self.relu_6 = nn.ReLU(True)
       
       self.conv2d_7 = nn.Conv2d(256,256,3,1)
       self.bn_7 = nn.BatchNorm2d(256)
       self.relu_7 = nn.ReLU(True)
       
       self.pool_7 = nn.MaxPool2d(2,2)
       self.conv2d_8 = nn.Conv2d(256,512,3,1)
       self.bn_8 = nn.BatchNorm2d(512)
       self.relu_8 = nn.ReLU(True)
       
       self.conv2d_9 = nn.Conv2d(512,512,3,1)
       self.bn_9 = nn.BatchNorm2d(512)
       self.relu_9 = nn.ReLU(True)
       
       self.conv2d_10 = nn.Conv2d(512,512,3,1)
       self.bn_10 = nn.BatchNorm2d(512)
       self.relu_10 = nn.ReLU(True)
       
       self.pool_10 = nn.MaxPool2d(2,2)
       self.conv2d_11 = nn.Conv2d(512,512,3,1)
       self.bn_11 = nn.BatchNorm2d(512)
       self.relu_11 = nn.ReLU(True)
       
       self.conv2d_12 = nn.Conv2d(512,512,3,1)
       self.bn_12 = nn.BatchNorm2d(512)
       self.relu_12 = nn.ReLU(True)
       
       self.conv2d_13 = nn.Conv2d(512,512,3,1)
       self.bn_13 = nn.BatchNorm2d(512)
       self.relu_13 = nn.ReLU(True)
       
       self.pool_13 = nn.MaxPool2d(2,2)
       # 全局池化
       self.avgpool = nn.AdaptiveAvgPool2d((7,7))
       # 决策层：分类层
       self.classifier = nn.Sequential(
           nn.Linear(512*7*7,4096),
           nn.ReLU(True),
           nn.Dropout(),
           nn.Linear(4096,4096),
           nn.ReLU(True),
           nn.Dropout(),
           nn.Linear(4096,1000),
       )

       for m in self.modules():
           if isinstance(m,nn.Conv2d):
               nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
               if m.bias is not None: 
                   nn.init.constant_(m.bias,0)
           elif isinstance(m,nn.BatchNorm2d):
               nn.init.constant_(m.weight,1)
               nn.init.constant_(m.bias,1)
           elif isinstance(m,nn.Linear):
               nn.init.normal_(m.weight,0,0.01)
               nn.init.constant_(m.bias,0)

    def forward(self,x):
         x_records=[]
         #x = self.features(x)
         x = self.conv2d_1(x)
         x_records.append(x)
         x = self.bn_1(x)
         x = self.relu_1(x)
         x_records.append(x)
         x = self.conv2d_2(x)
         x = self.bn_2(x)
         x = self.relu_2(x)
         x_records.append(x)
         x = self.pool_2(x)
         x = self.conv2d_3(x)
         x = self.bn_3(x)
         x = self.relu_3(x)
         x_records.append(x)
         x = self.conv2d_4(x)
         x = self.bn_4(x)
         x = self.relu_4(x)
         x_records.append(x)

         x = self.pool_4(x)
         x = self.conv2d_5(x)
         x = self.bn_5(x)
         x = self.relu_5(x)
         x_records.append(x)
         x = self.conv2d_6(x)
         x = self.bn_6(x)
         x = self.relu_6(x)
         x_records.append(x)
         x = self.conv2d_7(x)
         x = self.bn_7(x)
         x = self.relu_7(x)
         x_records.append(x)
         x = self.pool_7(x)
         x = self.conv2d_8(x)
         x = self.bn_8(x)
         x = self.relu_8(x)
         x_records.append(x)
         x = self.conv2d_9(x)
         x = self.bn_9(x)
         x = self.relu_9(x)
         x_records.append(x)
         x = self.conv2d_10(x)
         x = self.bn_10(x)
         x = self.relu_10(x)
         x = self.pool_10(x)
         x = self.conv2d_11(x)
         x = self.bn_11(x)
         x = self.relu_11(x)
         x = self.conv2d_12(x)
         x = self.bn_12(x)
         x = self.relu_12(x)
         x = self.conv2d_13(x)
         x = self.bn_13(x)
         x = self.relu_13(x)
         x = self.pool_13(x)
         x_records.append(x)
         x = self.avgpool(x)
         x_records.append(x)
         x = x.view(x.size(0),-1)
         x = self.classifier(x)
         return x,x_records


if __name__ == '__main__':
    vgg = VGG()
    x  = torch.randn(1,3,512,512)
    #img=torch.tensor(cv2.imread("../../model_test.png"))
    img=torch.tensor(cv2.imread("../../model_test2.png"))
    w,h,c = img.shape
    x = img.view(1,w,h,c).permute(0,3,1,2).contiguous()
    x = x/255.
    import pdb
    pdb.set_trace()
    feature,x_records = vgg(x)
    print(feature.shape)
    from show_featuremap import show_featuremap
    for fea in x_records:
        print(fea.shape)
        show_featuremap(fea.detach())

PennFudanDataset_main.py

import os
import numpy as np
import torch
from PIL import Image


class PennFudanDataset(object):
    def __init__(self, root, transforms):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))

    def __getitem__(self, idx):
        # load images ad masks
        img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
        mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
        img = Image.open(img_path).convert("RGB")
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path)
        # convert the PIL Image into a numpy array
        mask = np.array(mask)
        # instances are encoded as different colors
        obj_ids = np.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        masks = mask == obj_ids[:, None, None]

        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)
        import transforms as T
import transforms as T
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

show_featuremap.py

import cv2
import torch
def show_featuremap(feature):
    b,c,w,h = feature.shape
    feature_show=torch.zeros(b*w,c*h)
    for bi in range(0,b):
        for ci in range(0,c):
            #import pdb
            #pdb.set_trace()
            feature_show[bi*w:(bi+1)*w,ci*h:(ci+1)*h]=feature[bi,ci,:,:]
    print(feature_show.shape)
    cv2.imwrite("feature_bw_ch.png",255*feature_show.float().numpy())
    import os
    os.system("open feature_bw_ch.png")
    return feature_show

if __name__=="__main__":
    feature = torch.randn(1,10,100,100)
    feature_show = show_featuremap(feature)
    print(feature_show.shape)
    cv2.imwrite("feature_bw_ch.png",255*feature_show.float().numpy())
    import os
    os.system("open feature_bw_ch.png")

transforms.py

import random
import torch

from torchvision.transforms import functional as F


def _flip_coco_person_keypoints(kps, width):
    flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
    flipped_data = kps[:, flip_inds]
    flipped_data[..., 0] = width - flipped_data[..., 0]
    # Maintain COCO convention that if visibility == 0, then x, y = 0
    inds = flipped_data[..., 2] == 0
    flipped_data[inds] = 0
    return flipped_data


class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target


class RandomHorizontalFlip(object):
    def __init__(self, prob):
        self.prob = prob

    def __call__(self, image, target):
        if random.random() < self.prob:
            height, width = image.shape[-2:]
            image = image.flip(-1)
            bbox = target["boxes"]
            bbox[:, [0, 2]] = width - bbox[:, [2, 0]]
            target["boxes"] = bbox
            if "masks" in target:
                target["masks"] = target["masks"].flip(-1)
            if "keypoints" in target:
                keypoints = target["keypoints"]
                keypoints = _flip_coco_person_keypoints(keypoints, width)
                target["keypoints"] = keypoints
        return image, target


class ToTensor(object):
    def __call__(self, image, target):
        image = F.to_tensor(image)
        return image, target

v0yolo_model.py

#coding:utf-8
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F
import math

class VGG(nn.Module):
    def __init__(self):
       super(VGG,self).__init__()
       # the vgg's layers
       #self.features = features
       cfg = [64,64,'M',128,128,'M',256,256,256,'M',512,512,512,'M',512,512,512,'M']
       layers= []
       batch_norm = False
       in_channels = 3
       for v in cfg:
           if v == 'M':
               layers += [nn.MaxPool2d(kernel_size=2,stride = 2)]
           else:
               conv2d = nn.Conv2d(in_channels,v,kernel_size=3,padding = 1)
               if batch_norm:
                   layers += [conv2d,nn.Batchnorm2d(v),nn.ReLU(inplace=True)]
               else:
                   layers += [conv2d,nn.ReLU(inplace=True)]
               in_channels = v
       # use the vgg layers to get the feature
       self.features = nn.Sequential(*layers)
       # 全局池化
       self.avgpool = nn.AdaptiveAvgPool2d((7,7))
       # 决策层：分类层
       self.classifier = nn.Sequential(
           nn.Linear(512*7*7,4096),
           nn.ReLU(True),
           nn.Dropout(),
           nn.Linear(4096,4096),
           nn.ReLU(True),
           nn.Dropout(),
           nn.Linear(4096,1000),
       )

       for m in self.modules():
           if isinstance(m,nn.Conv2d):
               nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
               if m.bias is not None: 
                   nn.init.constant_(m.bias,0)
           elif isinstance(m,nn.BatchNorm2d):
               nn.init.constant_(m.weight,1)
               nn.init.constant_(m.bias,1)
           elif isinstance(m,nn.Linear):
               nn.init.normal_(m.weight,0,0.01)
               nn.init.constant_(m.bias,0)

    def forward(self,x):
         x = self.features(x)
         x_fea = x
         x = self.avgpool(x)
         x_avg = x
         x = x.view(x.size(0),-1)
         x = self.classifier(x)
         return x,x_fea,x_avg
    def extractor(self,x):
         x = self.features(x)
         return x

class YOLOV0(nn.Module):
    def __init__(self):
       super(YOLOV0,self).__init__()
       vgg = VGG()
       self.extractor = vgg.extractor
       self.avgpool = nn.AdaptiveAvgPool2d((7,7))
       # 决策层：检测层
       self.detector = nn.Sequential(
          nn.Linear(512*7*7,4096),
          nn.ReLU(True),
          nn.Dropout(),
          #nn.Linear(4096,1470),
          nn.Linear(4096,5),
       )
       for m in self.modules():
           if isinstance(m,nn.Conv2d):
               nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
               if m.bias is not None: 
                   nn.init.constant_(m.bias,0)
           elif isinstance(m,nn.BatchNorm2d):
               nn.init.constant_(m.weight,1)
               nn.init.constant_(m.bias,1)
           elif isinstance(m,nn.Linear):
               nn.init.normal_(m.weight,0,0.01)
               nn.init.constant_(m.bias,0)
    def forward(self,x):
        x = self.extractor(x)
        #import pdb
        #pdb.set_trace()
        x = self.avgpool(x)
        x = x.view(x.size(0),-1)
        x = self.detector(x)
        b,_ = x.shape
        #x = x.view(b,7,7,30)
        x = x.view(b,1,1,5)
        return x
     
if __name__ == '__main__':
    vgg = VGG()
    x  = torch.randn(1,3,512,512)
    feature,x_fea,x_avg = vgg(x)
    print(feature.shape)
    print(x_fea.shape)
    print(x_avg.shape)
 
    yolov1 = YOLOV1()
    feature = yolov1(x)
    # feature_size b*7*7*30
    print(feature.shape)

v0yolotrain.py

#coding:utf-8
from PennFudanDataset_main import *

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import DataLoader
from v0yolo_model import *
import cv2
import numpy as np
import time
import sys
import os


## 数据处理
#服务器上的地址 /data/2020-722-YOLOV4-Practical-datasets/PenFudanPed
# dataset地址：/Users/zhaomignming/Documents/mmteacher/datasets
#datapath='/Users/zhaomignming/Documents/mmteacher/datasets/PennFudanPed'
datapath='/Users/zhaomingming/data_sets/PennFudanPed'
dataset = PennFudanDataset(datapath, get_transform(train=False))
dataset_test = PennFudanDataset(datapath, get_transform(train=False))

indices = torch.randperm(len(dataset)).tolist()
#dataset = torch.utils.data.Subset(dataset, indices[:-50])
#dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])
#dataset = torch.utils.data.Subset(dataset, indices[0:1])
#import pdb
#pdb.set_trace()
#dataset = torch.utils.data.Subset(dataset, indices[0:1])
dataset = torch.utils.data.Subset(dataset, [0])
dataset_test = torch.utils.data.Subset(dataset_test, indices[0:2])

def collate_fn(batch):
    return tuple(zip(*batch))
# define training and validation data loaders
train_loader = torch.utils.data.DataLoader(
        dataset, batch_size=1, shuffle=False, num_workers=1,
        collate_fn=collate_fn)

val_loader = torch.utils.data.DataLoader(
        dataset_test, batch_size=2, shuffle=False, num_workers=4,
        collate_fn=collate_fn)


def input_process(batch):
    #import pdb
    #pdb.set_trace()
    batch_size=len(batch[0])
    input_batch= torch.zeros(batch_size,3,448,448)
    for i in range(batch_size):
        inputs_tmp = Variable(batch[0][i])
        inputs_tmp1=cv2.resize(inputs_tmp.permute([1,2,0]).numpy(),(448,448))
        inputs_tmp2=torch.tensor(inputs_tmp1).permute([2,0,1])
        input_batch[i:i+1,:,:,:]= torch.unsqueeze(inputs_tmp2,0)
    return input_batch 

#batch[1][0]['boxes'][0]
def target_process(batch):
    batch_size=len(batch[0])
    target_batch= torch.zeros(batch_size,1,1,5)
    #import pdb
    #pdb.set_trace()
    for i in range(batch_size):
        #只处理batch中的第一张图片
        # batch[1]表示label
        # batch[0]表示image
        bbox=batch[1][i]['boxes'][0]
        _,hi,wi = batch[0][i].numpy().shape
        bbox = bbox/ torch.tensor([wi,hi,wi,hi])
        cbbox =  torch.cat([torch.ones(1),bbox])
        target_batch[i:i+1,:,:,:] = torch.unsqueeze(cbbox,0)
    return target_batch
    

num_classes = 2
n_class    = 2
batch_size = 6
epochs     = 500
lr         = 1e-3
momentum   = 0
w_decay    = 1e-5
step_size  = 50
gamma      = 0.5

# 定义模型
yolov0_model = YOLOV0()
import pdb
pdb.set_trace()
# 定义优化算法为sdg:随机梯度下降
optimizer = optim.SGD(yolov0_model.detector.parameters(), lr=lr, momentum=momentum, weight_decay=w_decay)

# 定义学习率变化策略
# 每30个epoch 学习率乘以0.5
scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)  # decay LR by a factor of 0.5 every 30 epochs

# 矩阵形式写法，写法简单，但是可读性不强
def lossfunc(outputs,labels):
    #import  pdb
    #pdb.set_trace()
    tmp = (outputs-labels)**2
    return torch.sum(tmp,0).view(1,5).mm(torch.tensor([10,0.0001,0.0001,0.0001,0.0001]).view(5,1))

# 定义直接拟合的学习率，可读性强
def lossfunc_details(outputs,labels):
    # 判断维度
    assert ( outputs.shape == labels.shape),"outputs shape[%s] not equal labels shape[%s]"%(outputs.shape,labels.shape)
    b,w,h,c = outputs.shape
    loss = 0
    for bi in range(b):
        for wi in range(w):
            for hi in range(h):
                #import pdb
                #pdb.set_trace()
                # detect_vector=[confidence,x,y,w,h]
                detect_vector = outputs[bi,wi,hi]
                gt_dv = labels[bi,wi,hi]
                conf_pred = detect_vector[0]
                conf_gt = gt_dv[0]
                x_pred = detect_vector[1]
                x_gt = gt_dv[1]
                y_pred = detect_vector[2]
                y_gt = gt_dv[2]
                w_pred = detect_vector[3]
                w_gt = gt_dv[3]
                h_pred = detect_vector[4]
                h_gt = gt_dv[4]
                loss_confidence = (conf_pred-conf_gt)**2 
                #loss_geo = (x_pred-x_gt)**2 + (y_pred-y_gt)**2 + (w_pred**0.5-w_gt**0.5)**2 + (h_pred**0.5-h_gt**0.5)**2
                loss_geo = (x_pred-x_gt)**2 + (y_pred-y_gt)**2 + (w_pred-w_gt)**2 + (h_pred-h_gt)**2
                loss_tmp = loss_confidence + 0.3*loss_geo
                #print("loss[%s,%s] = %s,%s"%(wi,hi,loss_confidence.item(),loss_geo.item()))
                loss += loss_tmp
    return loss
# train
def train():
    for epoch in range(epochs):
        ts = time.time()
        for iter, batch in enumerate(train_loader):
            optimizer.zero_grad()
            # 取图片
            inputs = input_process(batch)
            # 取标注
            labels = target_process(batch)
            
            # 获取得到输出
            outputs = yolov0_model(inputs)
            #import pdb
            #pdb.set_trace()
            #loss = criterion(outputs, labels)
            loss = lossfunc_details(outputs,labels)
            loss.backward()
            optimizer.step()
            #print(torch.cat([outputs.detach().view(1,5),labels.view(1,5)],0).view(2,5))
            if iter % 10 == 0:
            #    print(torch.cat([outputs.detach().view(1,5),labels.view(1,5)],0).view(2,5))
                print("epoch{}, iter{}, loss: {}, lr: {}".format(epoch, iter, loss.data.item(),optimizer.state_dict()['param_groups'][0]['lr']))
        
        #print("Finish epoch {}, time elapsed {}".format(epoch, time.time() - ts))
        #print("*"*30)
        #val(epoch)
        scheduler.step()
# inference
def val(epoch):
    yolov0_model.eval()
    total_ious = []
    pixel_accs = []
    for iter, batch in enumerate(val_loader):
        inputs = input_process(batch)
        target,label= target_process(batch)

        output = yolov1_model(inputs)
        output = output.data.cpu().numpy()

        N, _, h, w = output.shape
        pred = output.transpose(0, 2, 3, 1).reshape(-1, n_class).argmax(axis=1).reshape(N, h, w)
        

if __name__ == "__main__":
    train()

v1yolomodel.py

#coding:utf-8
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F
import math

class VGG(nn.Module):
    def __init__(self):
       super(VGG,self).__init__()
       # the vgg's layers
       #self.features = features
       cfg = [64,64,'M',128,128,'M',256,256,256,'M',512,512,512,'M',512,512,512,'M']
       layers= []
       batch_norm = False
       in_channels = 3
       for v in cfg:
           if v == 'M':
               layers += [nn.MaxPool2d(kernel_size=2,stride = 2)]
           else:
               conv2d = nn.Conv2d(in_channels,v,kernel_size=3,padding = 1)
               if batch_norm:
                   layers += [conv2d,nn.Batchnorm2d(v),nn.ReLU(inplace=True)]
               else:
                   layers += [conv2d,nn.ReLU(inplace=True)]
               in_channels = v
       # use the vgg layers to get the feature
       self.features = nn.Sequential(*layers)
       # 全局池化
       self.avgpool = nn.AdaptiveAvgPool2d((7,7))
       # 决策层：分类层
       self.classifier = nn.Sequential(
           nn.Linear(512*7*7,4096),
           nn.ReLU(True),
           nn.Dropout(),
           nn.Linear(4096,4096),
           nn.ReLU(True),
           nn.Dropout(),
           nn.Linear(4096,1000),
       )

       for m in self.modules():
           if isinstance(m,nn.Conv2d):
               nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
               if m.bias is not None: 
                   nn.init.constant_(m.bias,0)
           elif isinstance(m,nn.BatchNorm2d):
               nn.init.constant_(m.weight,1)
               nn.init.constant_(m.bias,1)
           elif isinstance(m,nn.Linear):
               nn.init.normal_(m.weight,0,0.01)
               nn.init.constant_(m.bias,0)

    def forward(self,x):
         x = self.features(x)
         x_fea = x
         x = self.avgpool(x)
         x_avg = x
         x = x.view(x.size(0),-1)
         x = self.classifier(x)
         return x,x_fea,x_avg
    def extractor(self,x):
         x = self.features(x)
         return x

class YOLOV1(nn.Module):
    def __init__(self):
       super(YOLOV1,self).__init__()
       vgg = VGG()
       self.extractor = vgg.extractor
       self.avgpool = nn.AdaptiveAvgPool2d((7,7))
       # 决策层：检测层
       self.detector = nn.Sequential(
          nn.Linear(512*7*7,4096),
          nn.ReLU(True),
          nn.Dropout(),
          #nn.Linear(4096,1470),
          nn.Linear(4096,245),
          #nn.Linear(4096,5),
       )
       for m in self.modules():
           if isinstance(m,nn.Conv2d):
               nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
               if m.bias is not None: 
                   nn.init.constant_(m.bias,0)
           elif isinstance(m,nn.BatchNorm2d):
               nn.init.constant_(m.weight,1)
               nn.init.constant_(m.bias,1)
           elif isinstance(m,nn.Linear):
               nn.init.normal_(m.weight,0,0.01)
               nn.init.constant_(m.bias,0)
    def forward(self,x):
        x = self.extractor(x)
        #import pdb
        #pdb.set_trace()
        x = self.avgpool(x)
        x = x.view(x.size(0),-1)
        x = self.detector(x)
        b,_ = x.shape
        #x = x.view(b,7,7,30)
        x = x.view(b,7,7,5)
        
        #x = x.view(b,1,1,5)
        return x
     
if __name__ == '__main__':
    vgg = VGG()
    x  = torch.randn(1,3,512,512)
    feature,x_fea,x_avg = vgg(x)
    print(feature.shape)
    print(x_fea.shape)
    print(x_avg.shape)
 
    yolov1 = YOLOV1()
    feature = yolov1(x)
    # feature_size b*7*7*30
    print(feature.shape)

v1yolotrain.py

#coding:utf-8
from PennFudanDataset_main import *

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import DataLoader
from v1yolomodel import *
import cv2
import numpy as np
import time
import sys
import os


## 数据处理
#服务器上的地址 /data/2020-722-YOLOV4-Practical-datasets/PenFudanPed
# dataset地址：/Users/zhaomignming/Documents/mmteacher/datasets
#datapath='/Users/zhaomignming/Documents/mmteacher/datasets/PennFudanPed'
datapath='/Users/zhaomingming/data_sets/PennFudanPed'
dataset = PennFudanDataset(datapath, get_transform(train=False))
dataset_test = PennFudanDataset(datapath, get_transform(train=False))

indices = torch.randperm(len(dataset)).tolist()
#dataset = torch.utils.data.Subset(dataset, indices[:-50])
#dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])
#dataset = torch.utils.data.Subset(dataset, indices[0:1])
#import pdb
#pdb.set_trace()
#dataset = torch.utils.data.Subset(dataset, indices[0:1])
dataset = torch.utils.data.Subset(dataset, [0])
dataset_test = torch.utils.data.Subset(dataset_test, indices[0:2])

def collate_fn(batch):
    return tuple(zip(*batch))
# define training and validation data loaders
train_loader = torch.utils.data.DataLoader(
        dataset, batch_size=1, shuffle=False, num_workers=1,
        collate_fn=collate_fn)

val_loader = torch.utils.data.DataLoader(
        dataset_test, batch_size=2, shuffle=False, num_workers=4,
        collate_fn=collate_fn)


def input_process(batch):
    #import pdb
    #pdb.set_trace()
    batch_size=len(batch[0])
    input_batch= torch.zeros(batch_size,3,448,448)
    for i in range(batch_size):
        inputs_tmp = Variable(batch[0][i])
        inputs_tmp1=cv2.resize(inputs_tmp.permute([1,2,0]).numpy(),(448,448))
        inputs_tmp2=torch.tensor(inputs_tmp1).permute([2,0,1])
        input_batch[i:i+1,:,:,:]= torch.unsqueeze(inputs_tmp2,0)
    return input_batch 

#batch[1][0]['boxes'][0]
def target_process(batch,grid_number=7):
    # batch[1]表示label
    # batch[0]表示image
    batch_size=len(batch[0])
    target_batch= torch.zeros(batch_size,grid_number,grid_number,5)
    #import pdb
    #pdb.set_trace()
    for i in range(batch_size):
        labels = batch[1]
        batch_labels = labels[i]
        #import pdb
        #pdb.set_trace()
        number_box = len(batch_labels['boxes'])
        for wi in range(grid_number):
            for hi in range(grid_number):
                # 便利每个标注的框
                for bi in range(number_box):
                    bbox=batch_labels['boxes'][bi]
                    _,himg,wimg = batch[0][i].numpy().shape
                    bbox = bbox/ torch.tensor([wimg,himg,wimg,himg])
                    #import pdb
                    #pdb.set_trace()
                    center_x= (bbox[0]+bbox[2])*0.5
                    center_y= (bbox[1]+bbox[3])*0.5
                    #print("[%s,%s,%s],[%s,%s,%s]"%(wi/grid_number,center_x,(wi+1)/grid_number,hi/grid_number,center_y,(hi+1)/grid_number))
                    if center_x<=(wi+1)/grid_number and center_x>=wi/grid_number and center_y<=(hi+1)/grid_number and center_y>= hi/grid_number:
                        #pdb.set_trace()
                        cbbox =  torch.cat([torch.ones(1),bbox])
                        # 中心点落在grid内，
                        target_batch[i:i+1,wi:wi+1,hi:hi+1,:] = torch.unsqueeze(cbbox,0)
                    #else:
                        #cbbox =  torch.cat([torch.zeros(1),bbox])
                #import pdb
                #pdb.set_trace()
                #rint(target_batch[i:i+1,wi:wi+1,hi:hi+1,:])
                #target_batch[i:i+1,wi:wi+1,hi:hi+1,:] = torch.unsqueeze(cbbox,0)
    return target_batch
    

num_classes = 2
n_class    = 2
batch_size = 6
epochs     = 500
lr         = 1e-3
momentum   = 0
w_decay    = 1e-5
step_size  = 50
gamma      = 0.5

# 定义模型
yolov1_model = YOLOV1()
import pdb
pdb.set_trace()
# 定义优化算法为sdg:随机梯度下降
optimizer = optim.SGD(yolov1_model.detector.parameters(), lr=lr, momentum=momentum, weight_decay=w_decay)

# 定义学习率变化策略
# 每30个epoch 学习率乘以0.5
scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)  # decay LR by a factor of 0.5 every 30 epochs

# 矩阵形式写法，写法简单，但是可读性不强
def lossfunc(outputs,labels):
    #import  pdb
    #pdb.set_trace()
    tmp = (outputs-labels)**2
    return torch.sum(tmp,0).view(1,5).mm(torch.tensor([10,0.0001,0.0001,0.0001,0.0001]).view(5,1))

# 定义直接拟合的学习率，可读性强
def lossfunc_details(outputs,labels):
    # 判断维度
    assert ( outputs.shape == labels.shape),"outputs shape[%s] not equal labels shape[%s]"%(outputs.shape,labels.shape)
    #import pdb
    #pdb.set_trace()
    b,w,h,c = outputs.shape
    loss = 0
    #import pdb
    #pdb.set_trace()
    conf_loss_matrix = torch.zeros(b,w,h)
    geo_loss_matrix = torch.zeros(b,w,h)
    loss_matrix = torch.zeros(b,w,h)
    
    for bi in range(b):
        for wi in range(w):
            for hi in range(h):
                #import pdb
                #pdb.set_trace()
                # detect_vector=[confidence,x,y,w,h]
                detect_vector = outputs[bi,wi,hi]
                gt_dv = labels[bi,wi,hi]
                conf_pred = detect_vector[0]
                conf_gt = gt_dv[0]
                x_pred = detect_vector[1]
                x_gt = gt_dv[1]
                y_pred = detect_vector[2]
                y_gt = gt_dv[2]
                w_pred = detect_vector[3]
                w_gt = gt_dv[3]
                h_pred = detect_vector[4]
                h_gt = gt_dv[4]
                loss_confidence = (conf_pred-conf_gt)**2 
                #loss_geo = (x_pred-x_gt)**2 + (y_pred-y_gt)**2 + (w_pred**0.5-w_gt**0.5)**2 + (h_pred**0.5-h_gt**0.5)**2
            
                loss_geo = (x_pred-x_gt)**2 + (y_pred-y_gt)**2 + (w_pred-w_gt)**2 + (h_pred-h_gt)**2
                loss_geo = conf_gt*loss_geo
                loss_tmp = loss_confidence + 0.3*loss_geo
                #print("loss[%s,%s] = %s,%s"%(wi,hi,loss_confidence.item(),loss_geo.item()))
                loss += loss_tmp
                conf_loss_matrix[bi,wi,hi]=loss_confidence
                geo_loss_matrix[bi,wi,hi]=loss_geo
                loss_matrix[bi,wi,hi]=loss_tmp
    #打印出batch中每张片的位置loss,和置信度输出
    print(geo_loss_matrix)
    print(outputs[0,:,:,0]>0.5)
    return loss,loss_matrix,geo_loss_matrix,conf_loss_matrix
# train
def train():
    for epoch in range(epochs):
        ts = time.time()
        for iter, batch in enumerate(train_loader):
            optimizer.zero_grad()
            # 取图片
            inputs = input_process(batch)
            # 取标注
            labels = target_process(batch)
            
            # 获取得到输出
            outputs = yolov1_model(inputs)
            #import pdb
            #pdb.set_trace()
            #loss = criterion(outputs, labels)
            loss,lm,glm,clm = lossfunc_details(outputs,labels)
            loss.backward()
            optimizer.step()
            #print(torch.cat([outputs.detach().view(1,5),labels.view(1,5)],0).view(2,5))
            if iter % 10 == 0:
            #    print(torch.cat([outputs.detach().view(1,5),labels.view(1,5)],0).view(2,5))
                print("epoch{}, iter{}, loss: {}, lr: {}".format(epoch, iter, loss.data.item(),optimizer.state_dict()['param_groups'][0]['lr']))
        
        #print("Finish epoch {}, time elapsed {}".format(epoch, time.time() - ts))
        #print("*"*30)
        #val(epoch)
        scheduler.step()
# inference
def val(epoch):
    yolov1_model.eval()
    total_ious = []
    pixel_accs = []
    for iter, batch in enumerate(val_loader):
        inputs = input_process(batch)
        target,label= target_process(batch)

        output = yolov1_model(inputs)
        output = output.data.cpu().numpy()

        N, _, h, w = output.shape
        pred = output.transpose(0, 2, 3, 1).reshape(-1, n_class).argmax(axis=1).reshape(N, h, w)
        

if __name__ == "__main__":
    train()

数据集的路径：/data/2020-722-YOLOV4-Practical-datasets

你可能感兴趣的:(cv,深度学习)

机器学习与深度学习间关系与区别 ℒℴѵℯ心·动ꦿ໊ོ꫞ 人工智能学习深度学习 python
一、机器学习概述定义机器学习（MachineLearning,ML）是一种通过数据驱动的方法，利用统计学和计算算法来训练模型，使计算机能够从数据中学习并自动进行预测或决策。机器学习通过分析大量数据样本，识别其中的模式和规律，从而对新的数据进行判断。其核心在于通过训练过程，让模型不断优化和提升其预测准确性。主要类型1.监督学习（SupervisedLearning）监督学习是指在训练数据集中包含输入
LocalDateTime 转 String igotyback java 开发语言
importjava.time.LocalDateTime;importjava.time.format.DateTimeFormatter;publicclassMain{publicstaticvoidmain(String[]args){//获取当前时间LocalDateTimenow=LocalDateTime.now();//定义日期格式化器DateTimeFormatterformat
Linux下QT开发的动态库界面弹出操作（SDL2） 13jjyao QT类 qt 开发语言 sdl2 linux
需求：操作系统为linux，开发框架为qt，做成需带界面的qt动态库，调用方为java等非qt程序难点：调用方为java等非qt程序，也就是说调用方肯定不带QApplication::exec()，缺少了这个，QTimer等事件和QT创建的窗口将不能弹出(包括opencv也是不能弹出)；这与qt调用本身qt库是有本质的区别的思路：1.调用方缺QApplication::exec()，那么我们在接口
将cmd中命令输出保存为txt文本文件落难Coder Windows cmd window
最近深度学习本地的训练中我们常常要在命令行中运行自己的代码，无可厚非，我们有必要保存我们的炼丹结果，但是复制命令行输出到txt是非常麻烦的，其实Windows下的命令行为我们提供了相应的操作。其基本的调用格式就是：运行指令>输出到的文件名称或者具体保存路径测试下，我打开cmd并且ping一下百度：pingwww.baidu.com>./data.txt看下相同目录下data.txt的输出：如果你再
多线程之——ExecutorCompletionService 阿福德
在我们开发中，经常会遇到这种情况，我们起多个线程来执行，等所有的线程都执行完成后，我们需要得到个线程的执行结果来进行聚合处理。我在内部代码评审时，发现了不少这种情况。看很多同学都使用正确，但比较啰嗦，效率也不高。本文介绍一个简单处理这种情况的方法：直接上代码：publicclassExecutorCompletionServiceTest{@TestpublicvoidtestExecutorCo
tiff批量转png 诺有缸的高飞鸟 opencv 图像处理 python opencv 图像处理
目录写在前面代码完写在前面1、本文内容tiff批量转png2、平台/环境opencv,python3、转载请注明出处：https://blog.csdn.net/qq_41102371/article/details/132975023代码importnumpyasnpimportcv2importosdeffindAllFile(base):file_list=[]forroot,ds,fsin
遥感影像的切片处理 sand&wich 计算机视觉 python 图像处理
在遥感影像分析中，经常需要将大尺寸的影像切分成小片段，以便于进行详细的分析和处理。这种方法特别适用于机器学习和图像处理任务，如对象检测、图像分类等。以下是如何使用Python和OpenCV库来实现这一过程，同时确保每个影像片段保留正确的地理信息。准备环境首先，确保安装了必要的Python库，包括numpy、opencv-python和xml.etree.ElementTree。这些库将用于图像处理
windows下python opencv ffmpeg读取摄像头实现rtsp推流拉流图像处理大大大大大牛啊 opencv实战代码讲解视觉图像项目 windows python opencv
windows下pythonopencvffmpeg读取摄像头实现rtsp推流拉流整体流程1.下载所需文件1.1下载rtsp推流服务器1.2下载ffmpeg2.开启RTSP服务器3.opencv读取摄像头并调用ffmpeg进行推流4.opencv进行拉流5.opencv异步拉流整体流程1.下载所需文件1.1下载rtsp推流服务器下载RTSP服务器下载页面https://github.com/blu
c++ opencv4.3 sift匹配图像处理大大大大大牛啊图像处理 opencv实战代码讲解 opencv sift c++opencv4 特征点
c++opencv4.3sift匹配main.cppintmain(){vectorkeypoints1,keypoints2;Matimg1,img2,descriptors1,descriptors2;intnumF
推荐3家毕业AI论文可五分钟一键生成！文末附免费教程！小猪包333 写论文人工智能 AI写作深度学习计算机视觉
在当前的学术研究和写作领域，AI论文生成器已经成为许多研究人员和学生的重要工具。这些工具不仅能够帮助用户快速生成高质量的论文内容，还能进行内容优化、查重和排版等操作。以下是三款值得推荐的AI论文生成器：千笔-AIPassPaper、懒人论文以及AIPaperPass。千笔-AIPassPaper千笔-AIPassPaper是一款基于深度学习和自然语言处理技术的AI写作助手，旨在帮助用户快速生成高质
AI大模型的架构演进与最新发展季风泯灭的季节 AI大模型应用技术二人工智能架构
随着深度学习的发展，AI大模型（LargeLanguageModels,LLMs）在自然语言处理、计算机视觉等领域取得了革命性的进展。本文将详细探讨AI大模型的架构演进，包括从Transformer的提出到GPT、BERT、T5等模型的历史演变，并探讨这些模型的技术细节及其在现代人工智能中的核心作用。一、基础模型介绍：Transformer的核心原理Transformer架构的背景在Transfo
[实践应用] 深度学习之模型性能评估指标 YuanDaima2048 深度学习工具使用深度学习人工智能损失函数性能评估 pytorch python 机器学习
文章总览：YuanDaiMa2048博客文章总览深度学习之模型性能评估指标分类任务回归任务排序任务聚类任务生成任务其他介绍在机器学习和深度学习领域，评估模型性能是一项至关重要的任务。不同的学习任务需要不同的性能指标来衡量模型的有效性。以下是对一些常见任务及其相应的性能评估指标的详细解释和总结。分类任务分类任务是指模型需要将输入数据分配到预定义的类别或标签中。以下是分类任务中常用的性能指标：准确率(
[实践应用] 深度学习之优化器 YuanDaima2048 深度学习工具使用 pytorch 深度学习人工智能机器学习 python 优化器
文章总览：YuanDaiMa2048博客文章总览深度学习之优化器1.随机梯度下降（SGD）2.动量优化（Momentum）3.自适应梯度（Adagrad）4.自适应矩估计（Adam）5.RMSprop总结其他介绍在深度学习中，优化器用于更新模型的参数，以最小化损失函数。常见的优化函数有很多种，下面是几种主流的优化器及其特点、原理和PyTorch实现：1.随机梯度下降（SGD）原理:随机梯度下降通过
生成式地图制图 Bwywb_3 深度学习机器学习深度学习生成对抗网络
生成式地图制图（GenerativeCartography）是一种利用生成式算法和人工智能技术自动创建地图的技术。它结合了传统的地理信息系统（GIS）技术与现代生成模型（如深度学习、GANs等），能够根据输入的数据自动生成符合需求的地图。这种方法在城市规划、虚拟环境设计、游戏开发等多个领域具有应用前景。主要特点：自动化生成：通过算法和模型，系统能够根据输入的地理或空间数据自动生成地图，而无需人工逐
ubuntu安装opencv最快的方法 Derek重名了
最快方法，当然不能太多文字$sudoapt-getinstallpython-opencv借助python就可以把ubuntu的opencv环境搞起来，非常快非常容易参考：https://docs.opencv.org/trunk/d2/de6/tutorial_py_setup_in_ubuntu.html
代码的执行效果高天
packagecom20210409;publicclassdemo04{publicstaticvoidmain(String[]args){//////&&当前的条件不满足,则最后结果一定不满足,后面的条件不再执行////&不管条件是否满足所有条件均作判断//intx=1,y=1;//if(++y==2&&x++==2){//x=7;//}//System.out.println("x="+x
吴恩达深度学习笔记(30)-正则化的解释极客Array
正则化（Regularization）深度学习可能存在过拟合问题——高方差，有两个解决方法，一个是正则化，另一个是准备更多的数据，这是非常可靠的方法，但你可能无法时时刻刻准备足够多的训练数据或者获取更多数据的成本很高，但正则化通常有助于避免过拟合或减少你的网络误差。如果你怀疑神经网络过度拟合了数据，即存在高方差问题，那么最先想到的方法可能是正则化，另一个解决高方差的方法就是准备更多数据，这也是非常
个人学习笔记7-6：动手学深度学习pytorch版-李沐浪子L 深度学习深度学习笔记计算机视觉 python 人工智能神经网络 pytorch
#人工智能##深度学习##语义分割##计算机视觉##神经网络#计算机视觉13.11全卷积网络全卷积网络（fullyconvolutionalnetwork，FCN）采用卷积神经网络实现了从图像像素到像素类别的变换。引入l转置卷积（transposedconvolution）实现的，输出的类别预测与输入图像在像素级别上具有一一对应关系：通道维的输出即该位置对应像素的类别预测。13.11.1构造模型下
深度学习-点击率预估-研究论文2024-09-14速读 sp_fyf_2024 深度学习人工智能
深度学习-点击率预估-研究论文2024-09-14速读1.DeepTargetSessionInterestNetworkforClick-ThroughRatePredictionHZhong,JMa,XDuan,SGu,JYao-2024InternationalJointConferenceonNeuralNetworks,2024深度目标会话兴趣网络用于点击率预测摘要：这篇文章提出了一种新
使用Python和Playwright破解滑动验证码 asfdsgdf python 开发语言
滑动验证码是一种常见的验证码形式，通过拖动滑块将缺失的拼图块对准原图中的空缺位置来验证用户操作。本文将介绍如何使用Python中的OpenCV进行模板匹配，并结合Playwright实现自动化破解滑动验证码的过程。所需技术OpenCV模板匹配：用于识别滑块在背景图中的正确位置。Python：主要编程语言。Playwright：用于浏览器自动化，模拟用户操作。破解过程概述获取验证码图像：下载背景图和
OpenCV图像处理技术（Python）——入门森屿_ opencv
©FuXianjun.AllRightsReserved.OpenCV入门图像作为人类感知世界的视觉基础，是人类获取信息、表达信息的重要手段，OpenCV作为一个开源的计算机视觉库，它包括几百个易用的图像成像和视觉函数，既可以用于学术研究，也可用于工业邻域，它于1999年由因特尔的GaryBradski启动，OpenCV库主要由C和C++语言编写，它可以在多个操作系统上运行。1.1图像处理基本操作
损失函数与反向传播 Star_. PyTorch pytorch 深度学习 python
损失函数定义与作用损失函数(lossfunction)在深度学习领域是用来计算搭建模型预测的输出值和真实值之间的误差。1.损失函数越小越好2.计算实际输出与目标之间的差距3.为更新输出提供依据（反向传播)常见的损失函数回归常见的损失函数有：均方差（MeanSquaredError，MSE）、平均绝对误差（MeanAbsoluteErrorLoss，MAE）、HuberLoss是一种将MSE与MAE
opencv学习：图像旋转的两种方法，旋转后的图片进行模板匹配代码实现夜清寒风学习 opencv 机器学习人工智能计算机视觉
图像旋转在图像处理中，rotate和rot90是两种常见的图像旋转方法，它们在功能和使用上有一些区别。下面我将分别介绍这两种方法，并解释它们的主要区别rot90方法rot90方法是NumPy提供的一种数组旋转函数，它主要用于对二维数组（如图像）进行90度的旋转。这个方法比较简单，只支持90度的倍数旋转，不支持任意角度旋转。使用NumPy进行旋转使用NumPy的rot90函数对模板图像进行旋转操作。
探索创新科技： Lite-Mono - 简约高效的小型化Mono框架杭律沛Meris
探索创新科技：Lite-Mono-简约高效的小型化Mono框架Lite-Mono[CVPR2023]Lite-Mono:ALightweightCNNandTransformerArchitectureforSelf-SupervisedMonocularDepthEstimation项目地址:https://gitcode.com/gh_mirrors/li/Lite-Mono如果你在寻找一个轻
Python OpenCV图像处理：从基础到高级的全方位指南极客代码玩转Python 开发语言 python opencv 图像处理计算机视觉
目录第一部分：PythonOpenCV图像处理基础1.1OpenCV简介1.2PythonOpenCV安装1.3实战案例：图像显示与保存1.4注意事项第二部分：PythonOpenCV图像处理高级技巧2.1图像变换2.2图像增强2.3图像复原第三部分：PythonOpenCV图像处理实战项目3.1图像滤波3.2图像分割3.3图像特征提取第四部分：PythonOpenCV图像处理注意事项与优化策略4
C# 禁止程序重复启动 wiseyao1219 c#
修改：Program.cs[STAThread]staticvoidMain(){Mutexmutex=newMutex(true,"NewGuid123456",outboolisCreatedNew);if(!isCreatedNew){MessageBox.Show(Application.ProductName+"isrunning...");return;}Application.Ena
2018-08-16【Swift 4.1】关于Swift4.0以后调用MJExtension无法模型转换问题码农happy
1、本人使用swift4.1，弄了一晚上才弄好，结果还是一个小问题真是尴尬，要在model中每个属性前面加上@objcimportUIKitclassUserModel:NSObject{@objcvardix=String()}letdic=["dix":"ffffff"]asNSDictionaryletmodel=UserModel.mj_object(withKeyValues:dic)!
python图像匹配_opencvpython中的图像匹配 weixin_39585675 python图像匹配
我一直在做一个项目，用opencvpython识别相机中显示的标志。我已经尝试过使用surf、颜色直方图匹配和模板匹配。但在这3个问题中，它并不总是返回正确的答案。我现在想要的是，解决我这个问题的最好办法是什么。模板图像示例：以下是摄像头中显示的标志示例。如果这是我想要识别的图像，该怎么用？在更新matchTemplate中的代码flags=["Cambodia.jpg","Laos.jpg","
利用Python+OpenCV实现截图匹配图像，支持自适应缩放、灰度匹配、区域匹配、匹配多个结果 xu-jssy Python自动化脚本 python opencv 开发语言图像处理自动化
可以直接通过pip获取，无需手动安装其他依赖pipinstallxug示例：importxugxug.find_image_on_screen(,,,)=========================================================================一、依赖安装pipinstallopencv-pythonpipinstallpyautogui二、获
【深度学习】训练过程中一个OOM的问题，太难查了 weixin_40293999 深度学习深度学习人工智能
现象：各位大佬又遇到过ubuntu的这个问题么？现象是在训练过程中，ssh上不去了，能ping通，没死机，但是ubunutu的pc侧的显示器，鼠标啥都不好用了。只能重启。问题原因：OOM了95G，尼玛！！！！pytorch爆内存了，然后journald假死了，在journald被watchdog干掉之后，系统就崩溃了。这种规模的爆内存一般，即使被oomkill了，也要卡半天的，确实会这样，能不能配
JAVA中的Enum 周凡杨 java enum 枚举
Enum是计算机编程语言中的一种数据类型---枚举类型。在实际问题中，有些变量的取值被限定在一个有限的范围内。例如，一个星期内只有七天我们通常这样实现上面的定义： public String monday; public String tuesday; public String wensday; public String thursday
赶集网mysql开发36条军规 Bill_chen mysql 业务架构设计 mysql调优 mysql性能优化
(一)核心军规 (1)不在数据库做运算 cpu计算务必移至业务层； (2)控制单表数据量 int型不超过1000w，含char则不超过500w；合理分表；限制单库表数量在300以内； (3)控制列数量字段少而精，字段数建议在20以内
Shell test命令 daizj shell 字符串 test 数字文件比较
Shell test命令 Shell中的 test 命令用于检查某个条件是否成立，它可以进行数值、字符和文件三个方面的测试。数值测试参数说明 -eq 等于则为真 -ne 不等于则为真 -gt 大于则为真 -ge 大于等于则为真 -lt 小于则为真 -le 小于等于则为真实例演示： num1=100 num2=100if test $[num1]
XFire框架实现WebService(二) 周凡杨 java webservice
有了XFire框架实现WebService(一)，就可以继续开发WebService的简单应用。 Webservice的服务端(WEB工程)：两个java bean类： Course.java package cn.com.bean; public class Course { private
重绘之画图板朱辉辉33 画图板
上次博客讲的五子棋重绘比较简单，因为只要在重写系统重绘方法paint（）时加入棋盘和棋子的绘制。这次我想说说画图板的重绘。画图板重绘难在需要重绘的类型很多，比如说里面有矩形，园，直线之类的，所以我们要想办法将里面的图形加入一个队列中，这样在重绘时就
Java的IO流西蜀石兰 java
刚学Java的IO流时，被各种inputStream流弄的很迷糊，看老罗视频时说想象成插在文件上的一根管道，当初听时觉得自己很明白，可到自己用时，有不知道怎么代码了。。。每当遇到这种问题时，我习惯性的从头开始理逻辑，会问自己一些很简单的问题，把这些简单的问题想明白了，再看代码时才不会迷糊。 IO流作用是什么？答：实现对文件的读写，这里的文件是广义的； Java如何实现程序到文件
No matching PlatformTransactionManager bean found for qualifier 'add' - neither 林鹤霄
java.lang.IllegalStateException: No matching PlatformTransactionManager bean found for qualifier 'add' - neither qualifier match nor bean name match! 网上找了好多的资料没能解决，后来发现：项目中使用的是xml配置的方式配置事务，但是
Row size too large (> 8126). Changing some columns to TEXT or BLOB aigo column
原文：http://stackoverflow.com/questions/15585602/change-limit-for-mysql-row-size-too-large 异常信息： Row size too large (> 8126). Changing some columns to TEXT or BLOB or using ROW_FORMAT=DYNAM
JS 格式化时间 alxw4616 JavaScript
/** * 格式化时间 2013/6/13 by 半仙 [email protected] * 需要 pad 函数 * 接收可用的时间值. * 返回替换时间占位符后的字符串 * * 时间占位符:年 Y 月 M 日 D 小时 h 分 m 秒 s 重复次数表示占位数 * 如 YYYY 4占4位 YY 占2位<p></p> * MM DD hh mm
队列中数据的移除问题百合不是茶队列移除
队列的移除一般都是使用的remov();都可以移除的,但是在昨天做线程移除的时候出现了点问题,没有将遍历出来的全部移除, 代码如下; // package com.Thread0715.com; import java.util.ArrayList; public class Threa
Runnable接口使用实例 bijian1013 java thread Runnable java多线程
Runnable接口 a. 该接口只有一个方法：public void run(); b. 实现该接口的类必须覆盖该run方法 c. 实现了Runnable接口的类并不具有任何天
oracle里的extend详解 bijian1013 oracle 数据库 extend
扩展已知的数组空间，例： DECLARE TYPE CourseList IS TABLE OF VARCHAR2(10); courses CourseList; BEGIN -- 初始化数组元素，大小为3 courses := CourseList('Biol 4412 ', 'Psyc 3112 ', 'Anth 3001 '); --
【httpclient】httpclient发送表单POST请求 bit1129 httpclient
浏览器Form Post请求浏览器可以通过提交表单的方式向服务器发起POST请求，这种形式的POST请求不同于一般的POST请求 1. 一般的POST请求，将请求数据放置于请求体中，服务器端以二进制流的方式读取数据，HttpServletRequest.getInputStream()。这种方式的请求可以处理任意数据形式的POST请求，比如请求数据是字符串或者是二进制数据 2. Form
【Hive十三】Hive读写Avro格式的数据 bit1129 hive
1. 原始数据 hive> select * from word; OK 1 MSN 10 QQ 100 Gtalk 1000 Skype 2. 创建avro格式的数据表 hive> CREATE TABLE avro_table(age INT, name STRING)STORE
nginx+lua+redis自动识别封解禁频繁访问IP ronin47
在站点遇到攻击且无明显攻击特征，造成站点访问慢，nginx不断返回502等错误时，可利用nginx+lua+redis实现在指定的时间段内，若单IP的请求量达到指定的数量后对该IP进行封禁，nginx返回403禁止访问。利用redis的expire命令设置封禁IP的过期时间达到在指定的封禁时间后实行自动解封的目的。一、安装环境： CentOS x64 release 6.4(Fin
java-二叉树的遍历-先序、中序、后序（递归和非递归）、层次遍历 bylijinnan java
import java.util.LinkedList; import java.util.List; import java.util.Stack; public class BinTreeTraverse { //private int[] array={ 1, 2, 3, 4, 5, 6, 7, 8, 9 }; private int[] array={ 10,6,
Spring源码学习-XML 配置方式的IoC容器启动过程分析 bylijinnan java spring IOC
以FileSystemXmlApplicationContext为例，把Spring IoC容器的初始化流程走一遍： ApplicationContext context = new FileSystemXmlApplicationContext ("C:/Users/ZARA/workspace/HelloSpring/src/Beans.xml&q
[科研与项目]民营企业请慎重参与军事科技工程 comsci 企业
军事科研工程和项目并非要用最先进，最时髦的技术，而是要做到“万无一失” 而民营科技企业在搞科技创新工程的时候，往往考虑的是技术的先进性，而对先进技术带来的风险考虑得不够，在今天提倡军民融合发展的大环境下，这种“万无一失”和“时髦性”的矛盾会日益凸显。。。。。。所以请大家在参与任何重大的军事和政府项目之前，对
spring 定时器-两种方式 cuityang spring quartz 定时器
方式一：间隔一定时间运行 <bean id="updateSessionIdTask" class="com.yang.iprms.common.UpdateSessionTask" autowire="byName" /> <bean id="updateSessionIdSchedule
简述一下关于BroadView站点的相关设计 damoqiongqiu view
终于弄上线了，累趴，戳这里http://www.broadview.com.cn 简述一下相关的技术点前端：jQuery+BootStrap3.2+HandleBars，全站Ajax（貌似对SEO的影响很大啊！怎么破？），用Grunt对全部JS做了压缩处理，对部分JS和CSS做了合并（模块间存在很多依赖，全部合并比较繁琐，待完善）。后端：U
运维 PHP问题汇总 dcj3sjt126com windows2003
1、Dede(织梦)发表文章时,内容自动添加关键字显示空白页解决方法：后台>系统>系统基本参数>核心设置>关键字替换（是/否），这里选择“是”。后台>系统>系统基本参数>其他选项>自动提取关键字，这里选择“是”。 2、解决PHP168超级管理员上传图片提示你的空间不足网站是用PHP168做的，反映使用管理员在后台无法
mac 下安装php扩展 - mcrypt dcj3sjt126com PHP
MCrypt是一个功能强大的加密算法扩展库，它包括有22种算法，phpMyAdmin依赖这个PHP扩展，具体如下：下载并解压libmcrypt-2.5.8.tar.gz。在终端执行如下命令： tar zxvf libmcrypt-2.5.8.tar.gz cd libmcrypt-2.5.8/ ./configure --disable-posix-threads --
MongoDB更新文档 [四] eksliang mongodb Mongodb更新文档
MongoDB更新文档转载请出自出处：http://eksliang.iteye.com/blog/2174104 MongoDB对文档的CURD，前面的博客简单介绍了，但是对文档更新篇幅比较大，所以这里单独拿出来。语法结构如下： db.collection.update( criteria, objNew, upsert, multi) 参数含义参数
Linux下的解压，移除，复制，查看tomcat命令 y806839048 tomcat
重复myeclipse生成webservice有问题删除以前的，干净 1、先切换到：cd usr/local/tomcat5/logs 2、tail -f catalina.out 3、这样运行时就可以实时查看运行日志了 Ctrl+c 是退出tail命令。有问题不明的先注掉 cp /opt/tomcat-6.0.44/webapps/g
Spring之使用事务缘由(3-XML实现) ihuning spring
用事务通知声明式地管理事务事务管理是一种横切关注点。为了在 Spring 2.x 中启用声明式事务管理，可以通过 tx Schema 中定义的 <tx:advice> 元素声明事务通知，为此必须事先将这个 Schema 定义添加到 <beans> 根元素中去。声明了事务通知后，就需要将它与切入点关联起来。由于事务通知是在 <aop:
GCD使用经验与技巧浅谈啸笑天 GC
前言 GCD(Grand Central Dispatch)可以说是Mac、iOS开发中的一大“利器”，本文就总结一些有关使用GCD的经验与技巧。 dispatch_once_t必须是全局或static变量这一条算是“老生常谈”了，但我认为还是有必要强调一次，毕竟非全局或非static的dispatch_once_t变量在使用时会导致非常不好排查的bug，正确的如下： 1
linux（Ubuntu）下常用命令备忘录1 macroli linux 工作 ubuntu
在使用下面的命令是可以通过--help来获取更多的信息1,查询当前目录文件列表：ls ls命令默认状态下将按首字母升序列出你当前文件夹下面的所有内容，但这样直接运行所得到的信息也是比较少的，通常它可以结合以下这些参数运行以查询更多的信息： ls / 显示/.下的所有文件和目录 ls -l 给出文件或者文件夹的详细信息 ls -a 显示所有文件，包括隐藏文
nodejs同步操作mysql qiaolevip 学习永无止境每天进步一点点 mysql nodejs
// db-util.js var mysql = require('mysql'); var pool = mysql.createPool({ connectionLimit : 10, host: 'localhost', user: 'root', password: '', database: 'test', port: 3306 });
一起学Hive系列文章 superlxw1234 hive Hive入门
[一起学Hive]系列文章目录贴，入门Hive，持续更新中。 [一起学Hive]之一—Hive概述，Hive是什么 [一起学Hive]之二—Hive函数大全-完整版 [一起学Hive]之三—Hive中的数据库(Database)和表(Table) [一起学Hive]之四-Hive的安装配置 [一起学Hive]之五-Hive的视图和分区 [一起学Hive
Spring开发利器：Spring Tool Suite 3.7.0 发布 wiselyman spring
Spring Tool Suite(简称STS)是基于Eclipse，专门针对Spring开发者提供大量的便捷功能的优秀开发工具。在3.7.0版本主要做了如下的更新：将eclipse版本更新至Eclipse Mars 4.5 GA Spring Boot(JavaEE开发的颠覆者集大成者，推荐大家学习)的配置语言YAML编辑器的支持(包含自动提示，