matlab v4是什么意思,train.m · Song_Yuqi/yolov3-yolov4-matlab - Gitee.com

addpath('./CustomLayers/','./utils/')

%% 1、准备数据,适合yolov3,yolov4,无需VOC-xml格式

% 数据问题参考,满足以下其一即可:

% 1、matlab中标注参考 https://ww2.mathworks.cn/help/vision/ug/get-started-with-the-image-labeler.html?requestedDomain=cn

% 2、外部标注文件导入到matlab参考 https://blog.csdn.net/cuixing001/article/details/77092627

load gTruthPerson.mat % 自己bbox标注文件,格式参考上面链接,最终为table类型,看起来直观

cfg_file = './cfg/yolov3-tiny.cfg';

weight_file = './weights/yolov3-tiny.weights'; %预训练backbone权重,其他类型也OK

annotateImgHeight = 1024; % 自己标注的图像原始高度

annotateImgWeight = 1280; % 自己标注的图像原始宽度

% 类别名字和对应的ID序号

classesNames = gTruth.Properties.VariableNames(2:end);

classIDs = (0:length(classesNames)-1);% 从0开始标注,保持与darknet官网一致

numClasses = length(classesNames);

structNamesIDs = struct();

for i = 1:numClasses

structNamesIDs.(classesNames{i}) = classIDs(i);

end

% 创建可迭代的数据集

bldsTrain = boxLabelDatastore(gTruth(:, 2:end));

imdsTrain = imageDatastore(gTruth.imageFilename);

miniBatch = 16;

imdsTrain.ReadSize = miniBatch;

bldsTrain.ReadSize = miniBatch;

trainingData = combine(imdsTrain, bldsTrain);

%% 设定超参数,导入训练权重或者导入matlab其他官方预训练权重,这里以darknet中的".weight"二进制权重

[lgModel,hyperParams] = importDarknetWeights(cfg_file,weight_file);

% analyzeNetwork(lgModel);% 可视化导入网络

inputWeight = str2double(hyperParams.width);

inputHeight = str2double(hyperParams.height);

networkInputSize = [inputHeight inputWeight 3];

preprocessedTrainingData = transform(trainingData,@(data)preprocessTrainData(data,networkInputSize,structNamesIDs));

% 预览标注数据

for k = 1:1

data = read(preprocessedTrainingData);

I = data{1,1}{1};

bbox = data{1,2}{1};

annotatedImage = zeros(size(I),'like',I);

for i = 1:size(I,4)

annotatedImage(:,:,:,i) = insertShape(I(:,:,:,i),'Rectangle',bbox{i}(:,1:4));

end

annotatedImage = imresize(annotatedImage,2);

figure

montage(annotatedImage)

end

numAnchors = 9;

anchorBoxes = estimateAnchorBoxes(trainingData,numAnchors).*[inputHeight/annotateImgHeight,inputWeight/annotateImgWeight];% anchorBoxes是networkInputSize上的大小

area = anchorBoxes(:, 1).*anchorBoxes(:, 2);

[~, idx] = sort(area, 'descend');

anchorBoxes = anchorBoxes(idx, :);

anchorBoxes = round(anchorBoxes);

anchorBoxMasks = {[1,2,3],[4,5,6],[7,8,9]};% 面积大的anchor结合特征图较小的yolov3层,面积小的anchor结合特征图较大的yolov3层

%% 2,搭建darknet网络,加入yolov3Layer

anchorBoxes(:,[2,1]) = anchorBoxes(:,[1,2]);% anchorBoxes现在是宽高,与darknet官网保持一致

imageSize = lgModel.Layers(1).InputSize(1:2);

arc = 'default';

yoloModule1 = [convolution2dLayer(1,length(anchorBoxMasks{1})*(5+numClasses),'Name','yoloconv1');

yolov3Layer('yolov3layer1',anchorBoxes(anchorBoxMasks{1},:),numClasses,1,imageSize,arc)];

yoloModule2 = [convolution2dLayer(1,length(anchorBoxMasks{2})*(5+numClasses),'Name','yoloconv2');

yolov3Layer('yolov3layer2',anchorBoxes(anchorBoxMasks{2},:),numClasses,2,imageSize,arc)];

lgModel = removeLayers(lgModel,{'yolo_v3_id1','yolo_v3_id2'});

lgModel = replaceLayer(lgModel,'conv_17',yoloModule1);

lgModel = replaceLayer(lgModel,'conv_24',yoloModule2);

analyzeNetwork(lgModel);

yoloLayerNumber = [36,47];% 注意!!!!!yolov3或者yolov4层在layers数组中的位置,看模型图得出!!!!!

model = dlnetwork(lgModel);

%% 3,for loop循环迭代更新模型

% 训练选项

numIterations = 2000;

learningRate = 0.001;

warmupPeriod = 1000;

l2Regularization = 0.0005;

penaltyThreshold = 0.5;

velocity = [];

executionEnvironment = "auto";

figure;

ax1 = subplot(211);

ax2 = subplot(212);

lossPlotter = animatedline(ax1);

learningRatePlotter = animatedline(ax2);

nEpochs = 10;

allIteration = 1;

for numEpoch = 1:nEpochs

reset(preprocessedTrainingData);% Reset datastore.

iteration = 1;

while hasdata(preprocessedTrainingData)

t_start = tic;

% Custom training loop.

% Read batch of data and create batch of images and

% ground truths.

outDataTable = read(preprocessedTrainingData);

XTrain = outDataTable{1,1}{1};

YTrain = outDataTable{1,2}{1};

if isempty(YTrain)

continue;

end

% Convert mini-batch of data to dlarray.

XTrain = dlarray(single(XTrain),'SSCB');

% If training on a GPU, then convert data to gpuArray.

if (executionEnvironment == "auto" && canUseGPU) || executionEnvironment == "gpu"

XTrain = gpuArray(XTrain);

end

% Evaluate the model gradients and loss using dlfeval and the

% modelGradients function.

[gradients,loss,state] = dlfeval(@modelGradients, model, XTrain, YTrain,yoloLayerNumber);

% Apply L2 regularization.

gradients = dlupdate(@(g,w) g + l2Regularization*w, gradients, model.Learnables);

% Update the network learnable parameters using the SGDM optimizer.

[model, velocity] = sgdmupdate(model, gradients, velocity, learningRate);

% Update the state parameters of dlnetwork.

model.State = state;

% save model

if (mod(numEpoch,5)==0)&&(iteration==1) % 设置每5个epoch保存下权重

timeStr = datestr(now,'yyyy_mm_dd_HH_MM_SS');

matlabModel = fullfile('./save',[timeStr,'.mat']);

save(matlabModel,'model');

cfgFile = fullfile('./cfg',[timeStr,'.cfg']);

darknetModel = fullfile('./weights',[timeStr,'.weights']);

exportDarkNetNetwork(model,hyperParams,cfgFile,darknetModel);

end

fprintf('[%d][%d/%d]\t BatchTime:%.2f\n\n',numEpoch,iteration,...

floor(numpartitions(preprocessedTrainingData)/miniBatch),toc(t_start));

% Update training plot with new points.

addpoints(lossPlotter, allIteration, double(gather(extractdata(loss))));

% addpoints(learningRatePlotter, iteration, currentLR);

iteration = iteration +1;

allIteration = allIteration+1;

drawnow

end

end

%% yolov3/yolov4 损失函数

function [gradients, totalLoss, state] = modelGradients(net, XTrain, YTrain,yoloLayerNumber)

% 功能:计算模型梯度,求取损失

% allYoloLayers = net.Layers(yoloLayerNumber);

yolov3layerNames = net.OutputNames;

outFeatureMaps = cell(size(yolov3layerNames));

[outFeatureMaps{:},state] = forward(net,XTrain,'Outputs',yolov3layerNames);

boxLoss = dlarray(0);

objLoss = dlarray(0);

clsLoss = dlarray(0);

for i = 1:length(outFeatureMaps)

currentYOLOV3Layer = net.Layers(yoloLayerNumber(i));

currentFeatureMap = outFeatureMaps{i};

% 由于yolov3Layer类里面predict函数未改变类属性,故重新给属性赋值

currentYOLOV3Layer.numY = size(currentFeatureMap,1);

currentYOLOV3Layer.numX = size(currentFeatureMap,2);

currentYOLOV3Layer.stride = max(currentYOLOV3Layer.imageSize)./max(currentYOLOV3Layer.numX,...

currentYOLOV3Layer.numY);

% reshape currentFeatureMap到有意义的维度,h*w*c*bs --> h*w*(5+nc)*na*bs

% --> bs*na*h*w*(5+nc),最终的维度方式与darknet官网兼容

bs = size(currentFeatureMap,4);

h = currentYOLOV3Layer.numY;

w = currentYOLOV3Layer.numX;

na = currentYOLOV3Layer.nAnchors;

nc = currentYOLOV3Layer.nClasses;

arc = currentYOLOV3Layer.arc;

currentFeatureMap = reshape(currentFeatureMap,h,w,5+nc,na,bs);% h*w*(5+nc)*na*bs

currentFeatureMap = permute(currentFeatureMap,[5,4,1,2,3]);% bs*na*h*w*(5+nc)

% 构建目标值

[tcls,tbox,indices,anchor_grids] = buildTargets(currentYOLOV3Layer,YTrain);

N = size(tcls,1);% N<=YTrain中所有检测框的数量,其代表有效的数量

tobj = zeros(N,1);

featuresCh = zeros(N,(5+nc),'like',currentFeatureMap);

if N

b = indices(:,1); % N*1

a = indices(:,2); % N*1

gj = indices(:,3); % N*1

gi = indices(:,4); % N*1

for idx = 1:N

featuresChannels = currentFeatureMap(b(idx),a(idx),gj(idx),gi(idx),:);% 1*1*1*1*(5+nc)

featuresChannels = squeeze(featuresChannels);%(5+nc)*1

featuresChannels = featuresChannels';%1*(5+nc)

featuresCh(idx,:) = featuresChannels; % N*(5+nc)

tobj(idx) = 1.0;

end

% mse or GIoU loss

predictXY = sigmoid(featuresCh(:,1:2)); % 大小为N*2,预测对应xy

predictWH = exp(featuresCh(:,3:4)).*anchor_grids;% 大小为N*2

predictBboxs = cat(2,predictXY,predictWH);% 大小为N*4

isUseGIOU = 0;

if isUseGIOU

giouRatio = getGIOU(predictBboxs,tbox);%梯度需要计算,然而反向传播非常耗时

boxLoss = boxLoss+mean(1-giouRatio,'all');

else

boxLoss = mse(predictBboxs,tbox,'DataFormat','BC');

end

if strcmpi(arc,'default')&&(nc>1)

tcls_ = zeros('like',featuresCh(:,6:end));

for idx = 1:N

tcls_(idx,tcls+1) = 1.0;% 确保类别标签是从0开始标注的索引,否则这里会超出维度

end

clsLoss = clsLoss + crossentropy(sigmoid(featuresCh(:,6:end)),tcls_,...

'DataFormat','BC',...

'TargetCategories','independent');

end

else

end

if strcmpi(arc,'default')

if N

objLoss = objLoss+crossentropy(sigmoid(featuresCh(:,5)),tobj,...

'DataFormat','BC',...

'TargetCategories','independent');

end

elseif strcmpi(arc,'uCE')||strcmpi(arc,'uBCE') % obj和class当成一个类别统一计算损失

if N

b = indices(:,1); % N*1

a = indices(:,2); % N*1

gj = indices(:,3); % N*1

gi = indices(:,4); % N*1

tcls_ = zeros('like',featuresCh(:,5:end));

for idx = 1:N

featuresChannels = currentFeatureMap(b(idx),a(idx),gj(idx),gi(idx),:);% 1*1*1*1*(5+nc)

featuresChannels = squeeze(featuresChannels);%(5+nc)*1

featuresChannels = featuresChannels';%1*(5+nc)

featuresCh(idx,:) = featuresChannels; % N*(5+nc)

tcls_(idx,tcls+1) = 1.0;

end

clsLoss = clsLoss + crossentropy(featuresCh(:,5:end),tcls_,...

'DataFormat','BC',...

'TargetCategories','independent');

end

end

end

totalLoss = boxLoss+objLoss+clsLoss;

fprintf('boxLoss:%.2f, objLoss:%.2f, clsLoss:%.2f, totalLoss:%.2f\n',...

boxLoss,objLoss,clsLoss,totalLoss);

% Compute gradients of learnables with regard to loss.

gradients = dlgradient(totalLoss, net.Learnables);

end

function [tcls,tbox,indices,anchor_grids] = buildTargets(currentYOLOV3Layer,YTrain)

% 功能:构建目标值

% 输入:

% currentYOLOV3Layer:网络中yolo输出层之一

% YTrain:网络目标值,bs*1大小的cell类型,每个cell下包含Mi*[x,y,width,height,classID]大小的矩阵,Mi为第i张图片含有目标的检测数量,

% 注意其存储的坐标值是相对网络输入图像上的坐标,并无归一化

% 输出:

% tcls:目标真实类别classID,N*1大小,每一项存储classID,其中N<=sum(Mi),只输出有效数量的类别N

% tbox:目标的boundingBox,存储真实目标在特征图上的位置(除去x,y整数部分,保留小数),N*4大小,每项形式为[Xdecimal,Ydecimal,gw,gh]

% indices:目标检测框在高维数组中的位置,N*4大小,每一项存储检测框的位置,其形式为[bs,na,gy,gx],他们都是从1开始的索引,与Python不同

% anchor_grids:所用有效的在特征图上的anchor,N*2大小,每项形式为[anchor_w,anchor_h]

% 注意:

% 此函数是核心,用于产生各个yolov3损失类型的目标,输出每个参数维度都有意义,顺序要保持一致,总的高维顺序为bs*na*h*w*(5+nc),此顺序为darknet

% 官网的顺序一致,非matlab官方一致

%

% author:cuixingxing

% emalil:[email protected]

% 2020.4.25

%

h = currentYOLOV3Layer.numY;

w = currentYOLOV3Layer.numX;

stride = currentYOLOV3Layer.stride;

bs = size(YTrain,1);

% 把targets转换成nt*[imageIDs,classIDs,gx,gy,gw,gh]二维矩阵

scalex = w/currentYOLOV3Layer.imageSize(2);

scaley = h/currentYOLOV3Layer.imageSize(1);

gwh = currentYOLOV3Layer.anchors/stride; % 此处anchor_grids大小为na*2

targets = cat(1,YTrain{:}); % nt*5大小矩阵,nt为该bach下目标检测框数量

output = cellfun(@(x)size(x,1),YTrain);

imageIDs = repelem((1:bs)',output);

classIDs = targets(:,5);

targets = [imageIDs,classIDs,targets(:,1:end-1)];% nt*6大小,[imageIDs,classIDs,x,y,w,h]

% 计算目标检测框在特征图上的大小

targets(:,[3,5]) = targets(:,[3,5]).*scalex;% gx,gw

targets(:,[4,6]) = targets(:,[4,6]).*scaley;% nt*6大小,[imageIDs,classIDs,gx,gy,gw,gh]

% 分别获取每个anchor每个bbox的target

if ~isempty(targets)

iou = getMaxIOUPredictedWithGroundTruth(gwh,targets(:,5:6));

iouThresh = 0.2;

reject = true;

if reject

iou(iou

end

use_best_anchor = false;

if use_best_anchor

[iou,anchorsID] = max(iou,[],1);

[~,targetsID] = find(iou);

targets = targets(targetsID,:)

else % use all anchors

[anchorsID,targetsID] = find(iou);

targets = targets(targetsID,:); % N*6 ,[imageIDs,classIDs,gx,gy,gw,gh]

end

anchorsID = anchorsID(:); % N*1

end

gxy = targets(:,3:4);

targets(:,3:4) = gxy -floor(gxy);

% 返回targets值

tcls = targets(:,2);

if ~isempty(tcls)

assert(max(tcls)<=currentYOLOV3Layer.nClasses,'Target classes exceed model classes!');

end

tbox = targets(:,3:6);

xyPos = ceil(gxy);% 取ceil是因为matlab数组索引从1开始

indices = [targets(:,1),anchorsID,xyPos(:,2),xyPos(:,1)];

anchor_grids = gwh(anchorsID,:);

end

function iou = getMaxIOUPredictedWithGroundTruth(gwh,truth)

% getMaxIOUPredictedWithGroundTruth computes the maximum intersection over

% union scores for every pair of predictions and ground-truth boxes.

% 输入:

% gwh: 特征图上的anchor,大小为na*2

% truth:特征图上的目标真实检测框,只含有宽高,大小为nt*2

% 输出:

% iou:大小为na*nt,每一个值代表第i个anchor与第j个真值之间的交并比

%

% author:cuixingxing

% emalil:[email protected]

% 2020.4.25

%

bboxA = [ones(size(gwh)),gwh];

bboxB = [ones(size(truth)),truth];

iou = bboxOverlapRatio(bboxA,bboxB);

end

一键复制

编辑

Web IDE

原始数据

按行查看

历史

你可能感兴趣的:(matlab,v4是什么意思)