【更新完这个之后,不再更行yolo的东西,应该也不会再使用yolo框架】
一、添加LeakyLayer层
leaky_layer.hpp
template <typename Dtype>
class LeakyLayer : public NeuronLayer{
public:
explicit LeakyLayer(const LayerParameter& param)
: NeuronLayer(param) {}
virtual void LayerSetUp(const vector *>& bottom,
const vector *>& top);
virtual inline const char* type() const { return "Leaky"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual void Forward_cpu(const vector *>& bottom,
const vector *>& top);
virtual void Forward_gpu(const vector *>& bottom,
const vector *>& top);
virtual void Backward_cpu(const vector *>& top,
const vector<bool>& propagate_down, const vector *>& bottom){};//ÔÝʱ»¹Ã»ÊµÏÖ
virtual void Backward_gpu(const vector *>& top,
const vector<bool>& propagate_down, const vector *>& bottom);
};
leaky_layer.cpp
template <typename Dtype>
void LeakyLayer::LayerSetUp(const vector *>& bottom,
const vector *>& top) {
NeuronLayer::LayerSetUp(bottom, top);
CHECK_NE(top[0], bottom[0]) << this->type() << " Layer does not "
"allow in-place computation.";
}
template <typename Dtype>
void LeakyLayer::Forward_cpu(
const vector *>& bottom, const vector *>& top) {
const int count = top[0]->count();
Dtype* top_data = top[0]->mutable_cpu_data();
const Dtype* bottom_data = bottom[0]->cpu_data();
for(int i = 0; i < count; ++i){
if(bottom_data[i] > 0)
top_data[i] = bottom_data[i];
else
top_data[i] = 0.1*bottom_data[i];
//top_data[i] = bottom_data[i] > 0£¿ bottom_data[i]: (Dtype(0.1)*bottom_data[i]);
}
}
#ifdef CPU_ONLY
STUB_GPU(LeakyLayer);
#endif
INSTANTIATE_CLASS(LeakyLayer);
REGISTER_LAYER_CLASS(Leaky);
leaky_layer.cu
template <typename Dtype>
__global__ void LeakyForward(const int n, const Dtype* in, Dtype* out){
CUDA_KERNEL_LOOP(index, n){
out[index] = in[index] > 0 ? in[index] : in[index]*0.1;
}
}
template <typename Dtype>
void LeakyLayer::Forward_gpu(
const vector *>& bottom, const vector *>& top) {
const int count = bottom[0]->count();
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
LeakyForward<<>>(
count, bottom_data, top_data);
CUDA_POST_KERNEL_CHECK;
}
template<typename Dtype>
__global__ void LeakyBackward(const int n, const Dtype* bottom_data, Dtype* bottom_diff, const Dtype* top_diff){
CUDA_KERNEL_LOOP(index, n){
bottom_diff[index] = bottom_data[index] > 0 ? top_diff[index] : top_diff[index]*0.1;
}
}
template<typename Dtype>
void LeakyLayer::Backward_gpu(const vector *>& top,
const vector<bool>& propagate_down, const vector *>& bottom){
const int count = bottom[0]->count();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
const Dtype* top_diff = top[0]->mutable_gpu_diff();
const Dtype* bottom_data = bottom[0]->gpu_data();
LeakyBackward<<>>(
count, bottom_data, bottom_diff, top_diff);
}
INSTANTIATE_LAYER_GPU_FUNCS(LeakyLayer);
二、添加detect层(loss)
detect_layer.hpp
template<typename Dtype>
class DetectLayer : public Layer{
public:
explicit DetectLayer(const LayerParameter& param);
virtual ~DetectLayer(){}
virtual void LayerSetUp(const vector *>& bottom,
const vector *>& top);
virtual void Reshape(const vector *>& bottom,
const vector *>& top);
virtual inline const char* type() const { return "Detect";}
virtual inline int ExactNumBottomBlobs() const {return 2;}
virtual inline int ExactNumTopBlobs() const { return 1;}
protected:
virtual void Forward_cpu(const vector *>& bottom,
const vector *>& top);
virtual void Backward_cpu(const vector *>& bottom,
const vector<bool>& propagate_down,const vector *>& top);
int classes;
int coords;
int rescore;
int side;
int num;
bool softmax;
bool sqrt;
float jiter;
float object_scale;
float noobject_scale;
float class_scale;
float coord_scale;
};
detect_layer.cpp
template<typename Dtype>
Dtype lap(Dtype x1_min,Dtype x1_max,Dtype x2_min,Dtype x2_max){
if(x1_min < x2_min){
if(x1_max < x2_min){
return 0;
}else{
if(x1_max > x2_min){
if(x1_max < x2_max){
return x1_max - x2_min;
}else{
return x2_max - x2_min;
}
}else{
return 0;
}
}
}else{
if(x1_min < x2_max){
if(x1_max < x2_max)
return x1_max-x1_min;
else
return x2_max-x1_min;
}else{
return 0;
}
}
}
template<typename Dtype>
Dtype box_iou(const vector box1, const vector box2){
Dtype lap_x = lap(box1[0]-box1[2]/2,box1[0]+box1[2]/2,box2[0]-box2[2]/2,box2[0]+box2[2]/2);
Dtype lap_y = lap(box1[1]-box1[3]/2,box1[1]+box1[3]/2,box2[1]-box2[3]/2,box2[1]+box2[3]/2);
Dtype are = box1[2]*box1[3]+box2[2]*box2[3]-lap_x*lap_y;
if(are < 0.00001)
return 0.0;
else
return (lap_x*lap_y)/are;
}
template <typename Dtype>
DetectLayer::DetectLayer(const LayerParameter& param) : Layer(param){
this->layer_param_.add_propagate_down(true);
this->layer_param_.add_propagate_down(false);
const DetectParameter& detect_param = this->layer_param_.detect_param();
classes = detect_param.classes();
coords = detect_param.coords();
rescore = detect_param.rescore();
side = detect_param.side();
num = detect_param.num();
softmax = detect_param.softmax();
sqrt = detect_param.sqrt();
jiter = detect_param.jitter();
object_scale = detect_param.object_scale();
noobject_scale = detect_param.noobject_scale();
class_scale = detect_param.class_scale();
coord_scale = detect_param.coord_scale();
}
template <typename Dtype>
void DetectLayer::LayerSetUp(const vector *>& bottom,
const vector *>& top){
Layer::LayerSetUp(bottom, top);
this->layer_param_.add_loss_weight(Dtype(1));
int inputs = (side*side*(((1+coords)*num)+classes));
CHECK_EQ(inputs, bottom[0]->count(1)) << "input dimensions error";
CHECK_EQ(top.size(), 1) << "top size must be 1";
}
template <typename Dtype>
void DetectLayer::Reshape(const vector *>& bottom,
const vector *>& top){
vector<int> shape(0);
top[0]->Reshape(shape);
}
template <typename Dtype>
void DetectLayer::Forward_cpu(const vector *>& bottom,
const vector *>& top){
int input_num_each = side*side*(((1+coords)*num)+classes);
int batch = bottom[0]->num();
int locations = side*side;
const Dtype* truth = bottom[1]->cpu_data();
const Dtype* input = bottom[0]->cpu_data();
Dtype* delta = bottom[0]->mutable_cpu_diff();
Dtype& cost = top[0]->mutable_cpu_data()[0];
cost = Dtype(0.0);
for(int i = 0; i < bottom[0]->count(); ++i){
delta[i] = Dtype(0.0);
}
float avg_iou = 0;
float avg_cat = 0;
float avg_allcat = 0;
float avg_obj = 0;
float avg_anyobj = 0;
int count = 0;
for(int b = 0; b < batch; ++b){
int input_index = b*input_num_each;
for(int l = 0; l < locations; ++l){
int truth_index = (b*locations+l)*(1+coords+classes);
Dtype is_obj = truth[truth_index];
for(int n = 0; n < num;++n){
int delta_index = input_index + locations*classes + l*num + n;
delta[delta_index] = noobject_scale*(0 - input[delta_index]);
cost += noobject_scale*pow(input[delta_index],2);
avg_anyobj += input[delta_index];
}
int best_index = 0;
float best_iou = 0;
float best_rmse = 400;
if(is_obj < 0.0001) continue;
int class_index = input_index + l*classes;
for(int j = 0; j < classes; ++j){
delta[class_index+j]= class_scale * (truth[truth_index+1+j] - input[class_index+j]);
if(truth[truth_index+1+j]) avg_cat += input[class_index+j];
avg_allcat += input[class_index+j];
}//classes
vector<float> truth_box;
truth_box.push_back(float(truth[truth_index+1+classes]/side));
truth_box.push_back(float(truth[truth_index+1+classes+1]/side));
truth_box.push_back(float(truth[truth_index+1+classes+2]));
truth_box.push_back(float(truth[truth_index+1+classes+3]));
for(int n = 0; n < num; ++n){
int box_index = input_index + locations*(classes+num)+(l*num+n)*coords;
vector<float> out_box;
out_box.push_back(float(input[box_index]/side));
out_box.push_back(float(input[box_index+1]/side));
if(sqrt){
out_box.push_back(float(input[box_index+2]*input[box_index+2]));
out_box.push_back(float(input[box_index+3]*input[box_index+3]));
}else{
out_box.push_back(float(input[box_index+2]));
out_box.push_back(float(input[box_index+3]));
}
float iou = box_iou(truth_box, out_box);
float rmse = (pow(truth_box[0]-out_box[0],2)+pow(truth_box[1]-out_box[1],2)+pow(truth_box[2]-out_box[2],2)+pow(truth_box[3]-out_box[3],2));
if(best_iou > 0 || iou > 0){
if(iou > best_iou){
best_iou = iou;
best_index = n;
}
}else{
if(rmse < best_rmse){
best_rmse = rmse;
best_index = n;
}
}
}//for num
int box_index = input_index + locations*(classes+num)+(l*num+best_index)*coords;
int tbox_index = truth_index+1+classes;
vector<float> best_box;
best_box.push_back(float(input[box_index]/side));
best_box.push_back(float(input[box_index+1]/side));
if(sqrt){
best_box.push_back(float(input[box_index+2]*input[box_index+2]));
best_box.push_back(float(input[box_index+3]*input[box_index+3]));
}else{
best_box.push_back(float(input[box_index+2]));
best_box.push_back(float(input[box_index+3]));
}
int p_index = input_index + locations*classes + l*num + best_index;
cost -= noobject_scale*pow(input[p_index],2);
cost += object_scale*pow(1-input[p_index],2);
avg_obj += input[p_index];
delta[p_index] = object_scale*(1. - input[p_index]);
if(rescore){
//delta[p_index] = object_scale*(best_iou - input[p_index]);
}
delta[box_index] = coord_scale*(truth[tbox_index]-input[box_index]);
delta[box_index+1] = coord_scale*(truth[tbox_index+1]-input[box_index+1]);
delta[box_index+2] = coord_scale*(truth[tbox_index+2]-input[box_index+2]);
delta[box_index+3] = coord_scale*(truth[tbox_index+3]-input[box_index+3]);
if(sqrt) {
delta[box_index+2] = coord_scale*(std::sqrt(truth[tbox_index+2])-input[box_index+2]);
delta[box_index+3] = coord_scale*(std::sqrt(truth[tbox_index+3])-input[box_index+3]);
}
cost += pow(1-best_iou, 2);
avg_iou += best_iou;
++count;
}//locations
}//batch
for(int i = 0; i < bottom[0]->count(); ++i){
delta[i] = -delta[i];
}
//printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou, avg_cat, avg_allcat, avg_obj, avg_anyobj, count);
}
template <typename Dtype>
void DetectLayer::Backward_cpu(const vector *>& bottom,
const vector<bool>& propagate_down,const vector *>& top){
}
template float lap(float x1_min,float x1_max,float x2_min,float x2_max);
template double lap(double x1_min,double x1_max,double x2_min,double x2_max);
template float box_iou(const vector<float> box1, const vector<float> box2);
template double box_iou(const vector<double> box1, const vector<double> box2);
#ifdef CPU_ONLY
STUB_GPU(DetectLayer);
#endif
INSTANTIATE_CLASS(DetectLayer);
REGISTER_LAYER_CLASS(Detect);
三、train的prototxt
yolo-train.prototxt
name: "yolo"
layer {
name: "data"
type: "Data"
top: "data"
include {
phase: TRAIN
}
data_param {
source: ""
batch_size: 15
backend: LMDB
}
transform_param {
mean_value:127.5
mean_value:127.5
mean_value:127.5
scale: 0.00784
}
}
layer {
name: "data"
type: "Data"
top: "data"
include {
phase: TEST
}
data_param {
source: ""
batch_size: 15
backend: LMDB
}
transform_param {
mean_value:127.5
mean_value:127.5
mean_value:127.5
scale: 0.00784
}
}
layer {
name: "truth"
type: "HDF5Data"
top: "label"
include {
phase: TRAIN
}
hdf5_data_param {
source: ""
batch_size: 15
}
}
layer {
name: "truth"
type: "HDF5Data"
top: "label"
include {
phase: TEST
}
hdf5_data_param {
source: ""
batch_size: 15
}
}
layer{
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 64
kernel_size: 7
stride: 2
pad: 3
}
}
layer{
name: "Leaky1"
type: "Leaky"
bottom: "conv1"
top: "Leaky1"
}
layer{
name: "pool1"
type: "Pooling"
bottom: "Leaky1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer{
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 192
kernel_size: 3
stride: 1
pad: 1
}
}
layer{
name: "Leaky2"
type: "Leaky"
bottom: "conv2"
top: "Leaky2"
}
layer{
name: "pool2"
type: "Pooling"
bottom: "Leaky2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer{
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 128
kernel_size: 1
stride: 1
pad: 0
}
}
layer{
name: "Leaky3"
type: "Leaky"
bottom: "conv3"
top: "Leaky3"
}
layer{
name: "conv4"
type: "Convolution"
bottom: "Leaky3"
top: "conv4"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 256
kernel_size: 3
stride: 1
pad: 1
}
}
layer{
name: "Leaky4"
type: "Leaky"
bottom: "conv4"
top: "Leaky4"
}
layer{
name: "conv5"
type: "Convolution"
bottom: "Leaky4"
top: "conv5"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 256
kernel_size: 1
stride: 1
pad: 0
}
}
layer{
name: "Leaky5"
type: "Leaky"
bottom: "conv5"
top: "Leaky5"
}
layer{
name: "conv6"
type: "Convolution"
bottom: "Leaky5"
top: "conv6"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 512
kernel_size: 3
stride: 1
pad: 1
}
}
layer{
name: "Leaky6"
type: "Leaky"
bottom: "conv6"
top: "Leaky6"
}
layer{
name: "pool3"
type: "Pooling"
bottom: "Leaky6"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer{
name: "conv7"
type: "Convolution"
bottom: "pool3"
top: "conv7"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 256
kernel_size: 1
stride: 1
pad: 0
}
}
layer{
name: "Leaky7"
type: "Leaky"
bottom: "conv7"
top: "Leaky7"
}
layer{
name: "conv8"
type: "Convolution"
bottom: "Leaky7"
top: "conv8"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 512
kernel_size: 3
stride: 1
pad: 1
}
}
layer{
name: "Leaky8"
type: "Leaky"
bottom: "conv8"
top: "Leaky8"
}
layer{
name: "conv9"
type: "Convolution"
bottom: "Leaky8"
top: "conv9"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 256
kernel_size: 1
stride: 1
pad: 0
}
}
layer{
name: "Leaky9"
type: "Leaky"
bottom: "conv9"
top: "Leaky9"
}
layer{
name: "conv10"
type: "Convolution"
bottom: "Leaky9"
top: "conv10"
convolution_param{
num_output: 512
kernel_size: 3
stride: 1
pad: 1
}
}
layer{
name: "Leaky10"
type: "Leaky"
bottom: "conv10"
top: "Leaky10"
}
layer{
name: "conv11"
type: "Convolution"
bottom: "Leaky10"
top: "conv11"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 256
kernel_size: 1
stride: 1
pad: 0
}
}
layer{
name: "Leaky11"
type: "Leaky"
bottom: "conv11"
top: "Leaky11"
}
layer{
name: "conv12"
type: "Convolution"
bottom: "Leaky11"
top: "conv12"
convolution_param{
num_output:512
kernel_size: 3
stride: 1
pad: 1
}
}
layer{
name: "Leaky12"
type: "Leaky"
bottom: "conv12"
top: "Leaky12"
}
layer{
name: "conv13"
type: "Convolution"
bottom: "Leaky12"
top: "conv13"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 256
kernel_size: 1
stride: 1
pad: 0
}
}
layer{
name: "Leaky13"
type: "Leaky"
bottom: "conv13"
top: "Leaky13"
}
layer{
name: "conv14"
type: "Convolution"
bottom: "Leaky13"
top: "conv14"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 512
kernel_size: 3
stride: 1
pad: 1
}
}
layer{
name: "Leaky14"
type: "Leaky"
bottom: "conv14"
top: "Leaky14"
}
layer{
name: "conv15"
type: "Convolution"
bottom: "Leaky14"
top: "conv15"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 512
kernel_size: 1
stride: 1
pad: 0
}
}
layer{
name: "Leaky15"
type: "Leaky"
bottom: "conv15"
top: "Leaky15"
}
layer{
name: "conv16"
type: "Convolution"
bottom: "Leaky15"
top: "conv16"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 1024
kernel_size: 3
stride: 1
pad: 1
}
}
layer{
name: "Leaky16"
type: "Leaky"
bottom: "conv16"
top: "Leaky16"
}
layer{
name: "pool4"
type: "Pooling"
bottom: "Leaky16"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer{
name: "conv17"
type: "Convolution"
bottom: "pool4"
top: "conv17"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 512
kernel_size: 1
stride: 1
pad: 0
}
}
layer{
name: "Leaky17"
type: "Leaky"
bottom: "conv17"
top: "Leaky17"
}
layer{
name: "conv18"
type: "Convolution"
bottom: "Leaky17"
top: "conv18"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 1024
kernel_size: 3
stride: 1
pad: 1
}
}
layer{
name: "Leaky18"
type: "Leaky"
bottom: "conv18"
top: "Leaky18"
}
layer{
name: "conv19"
type: "Convolution"
bottom: "Leaky18"
top: "conv19"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 512
kernel_size: 1
stride: 1
pad: 0
}
}
layer{
name: "Leaky19"
type: "Leaky"
bottom: "conv19"
top: "Leaky19"
}
layer{
name: "conv20"
type: "Convolution"
bottom: "Leaky19"
top: "conv20"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 1024
kernel_size: 3
stride: 1
pad: 1
}
}
layer{
name: "Leaky20"
type: "Leaky"
bottom: "conv20"
top: "Leaky20"
}
layer{
name: "conv21"
type: "Convolution"
bottom: "Leaky20"
top: "conv21"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 1024
kernel_size: 3
stride: 1
pad: 1
}
}
layer{
name: "Leaky21"
type: "Leaky"
bottom: "conv21"
top: "Leaky21"
}
layer{
name: "conv22"
type: "Convolution"
bottom: "Leaky21"
top: "conv22"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 1024
kernel_size: 3
stride: 2
pad: 1
}
}
layer{
name: "Leaky22"
type: "Leaky"
bottom: "conv22"
top: "Leaky22"
}
layer{
name: "conv23"
type: "Convolution"
bottom: "Leaky22"
top: "conv23"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 1024
kernel_size: 3
stride: 1
pad: 1
}
}
layer{
name: "Leaky23"
type: "Leaky"
bottom: "conv23"
top: "Leaky23"
}
layer{
name: "conv24"
type: "Convolution"
bottom: "Leaky23"
top: "conv24"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param{
num_output: 1024
kernel_size: 3
stride: 1
pad: 1
}
}
layer{
name: "Leaky24"
type: "Leaky"
bottom: "conv24"
top: "Leaky24"
}
layer {
name: "connect1"
type: "InnerProduct"
bottom: "Leaky24"
top: "connect1"
param {
lr_mult: 10
}
param {
lr_mult: 20
}
inner_product_param {
num_output: 4096
}
}
layer{
name: "Leaky25"
type: "Leaky"
bottom: "connect1"
top: "Leaky25"
}
layer {
name: "connect2_add"
type: "InnerProduct"
bottom: "Leaky25"
top: "connect2_add"
param {
lr_mult: 10
}
param {
lr_mult: 20
}
inner_product_param {
num_output: 637
weight_filler {
type: "uniform"
max: 1
min: -1
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "detect"
type: "Detect"
top: "loss"
bottom: "connect2_add"
bottom: "label"
detect_param {
classes: 3
}
}