MTCNN训练不收敛原因:
地址: https://github.com/dlunion/mtcnn
我们的训练数据标签格式:
wider face:
pos/001.jpg 1 x1 y1 x2 y2 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
part/001.jpg -1 x1 y1 x2 y2 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
neg/001.jpg 0 -1 -1 -1 -1 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
celebA:
landmark/001.jpg -1 -1 -1 -1 -1 pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y
作者要求的训练数据标签格式:
pos/001.jpg 1 x1 y1 x2 y2 (x1 y1 x2 y2) pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y
part/001.jpg -1 x1 y1 x2 y2 (x1 y1 x2 y2) pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y
neg/001.jpg 0 -1 -1 -1 -1 (x1 y1 x2 y2) pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y
在“pts_loss”层(type: "MTCNNEuclideanLoss")中,以"label"(分类的标签)来判断是否ignore。对于我们的训练数据标签格式:
class: ignore_label=-1, 可以正常分类;
bbox regression: ignore_label=0, 有landmark中-1参加计算,导致loss无法收敛;
landmark: ignore_label=0, 有part中-1参加计算,导致loss无法收敛;
解决思路:
在做class,bbox regression,landmark任务时,判断便签值是否全部为—1,来作为ignore条件。
修改后"MTCNNEuclideanLoss.cpp"如下:
#include
#include "caffe/layers/mtcnn_euclidean_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"
#include
using namespace std;
namespace caffe {
template
void MTCNNEuclideanLossLayer
const vector
LossLayer
CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))
<< "Inputs must have the same dimension.";
int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
if (has_ignore_label)
CHECK_EQ(bottom.size(), 3) << "has_ignore_label=true but not input label";
if (!has_ignore_label)
CHECK_EQ(bottom.size(), 2) << "has_ignore_label=false but input mismatch";
diff_.ReshapeLike(*bottom[0]);
}
template
void MTCNNEuclideanLossLayer
const vector
int count = bottom[0]->count();
int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;
if (has_ignore_label){
const Dtype* label = bottom[2]->cpu_data();
int countLabel = bottom[2]->num();
//label
Dtype* diff = diff_.mutable_cpu_data();
int channel = bottom[0]->channels();
//cout << "countLabel_forward: " << countLabel << endl;
//cout << "channel_forward: " << channel << endl;
//cout << "ignore_label_forward: " << ignore_label << endl;
memset(diff, 0, sizeof(Dtype)*count);
const Dtype* b0 = bottom[0]->cpu_data();
const Dtype* b1 = bottom[1]->cpu_data();
Dtype loss = 0;
// bbox regression
if (channel == 4)
{
for (int i = 0; i < countLabel; ++i)
{
//cout << "forware_b1_4: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << endl;
int dec = (b1[i*channel + 0] != ignore_label) && (b1[i*channel + 1] != ignore_label) && (b1[i*channel + 2] != ignore_label) && (b1[i*channel + 3] != ignore_label);
if ( dec==1 )
{
caffe_sub(
channel,
b0 + i * channel,
b1 + i * channel,
diff + i * channel);
Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);
loss += dot / Dtype(2);
//cout << "forware_b1_4: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << endl;
}
}
}
// landmark
else if (channel == 10)
{
for (int i = 0; i < countLabel; ++i)
{
//cout << "forward_b1_10: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << " " << b1[i*channel + 4] << " ";
//cout << b1[i*channel + 5] << " " << b1[i*channel + 6] << " " << b1[i*channel + 7] << " " << b1[i*channel + 8] << " " << b1[i*channel + 9] << endl;
int dec1 = (b1[i*channel + 0] != ignore_label) && (b1[i*channel + 1] != ignore_label) && (b1[i*channel + 2] != ignore_label) && (b1[i*channel + 3] != ignore_label) && (b1[i*channel + 4] != ignore_label);
int dec2 = (b1[i*channel + 5] != ignore_label) && (b1[i*channel + 6] != ignore_label) && (b1[i*channel + 7] != ignore_label) && (b1[i*channel + 8] != ignore_label) && (b1[i*channel + 9] != ignore_label);
if (dec1==1 && dec2==1)
{
caffe_sub(
channel,
b0 + i * channel,
b1 + i * channel,
diff + i * channel);
Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);
loss += dot / Dtype(2);
//cout << "forward_b1_10: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << " " << b1[i*channel + 4] << " ";
//cout << b1[i*channel + 5] << " " << b1[i*channel + 6] << " " << b1[i*channel + 7] << " " << b1[i*channel + 8] << " " << b1[i*channel + 9] << endl;
}
}
}
// ****************org data ********************
//for (int i = 0; i < countLabel; ++i){
// if (label[i] != ignore_label){
// caffe_sub(
// channel,
// b0 + i * channel,
// b1 + i * channel,
// diff + i * channel);
// Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);
// loss += dot / Dtype(2);
// }
//}
// ***************** ********************
top[0]->mutable_cpu_data()[0] = loss;
}
else{
caffe_sub(
count,
bottom[0]->cpu_data(),
bottom[1]->cpu_data(),
diff_.mutable_cpu_data());
Dtype dot = caffe_cpu_dot(count, diff_.cpu_data(), diff_.cpu_data());
Dtype loss = dot / bottom[0]->num() / Dtype(2);
top[0]->mutable_cpu_data()[0] = loss;
}
}
template
void MTCNNEuclideanLossLayer
const vector
int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;
if (has_ignore_label){
const Dtype* b1 = bottom[1]->cpu_data();
const Dtype* label = bottom[2]->cpu_data();
int countLabel = bottom[2]->num();
int channels = bottom[0]->channels();
//cout << "countLabel_backword: " << countLabel << endl;
//cout << "channels_backword: " << channels << endl;
//cout << "ignore_label_backword: " << ignore_label << endl;
for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
memset(bottom[i]->mutable_cpu_diff(), 0, sizeof(Dtype)*bottom[i]->count());
const Dtype sign = (i == 0) ? 1 : -1;
const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
// bbox regression
if (channels == 4)
{
for (int j = 0; j < countLabel; ++j)
{
int dec = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label);
if (dec==1)
{
caffe_cpu_axpby(
channels, // count
alpha, // alpha
diff_.cpu_data() + channels * j, // a
Dtype(0), // beta
bottom[i]->mutable_cpu_diff() + channels * j); // b
}
}
}
// landmark
else if (channels == 10)
{
for (int j = 0; j < countLabel; ++j)
{
int dec1 = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label) && (b1[j*channels + 4] != ignore_label);
int dec2 = (b1[j*channels + 5] != ignore_label) && (b1[j*channels + 6] != ignore_label) && (b1[j*channels + 7] != ignore_label) && (b1[j*channels + 8] != ignore_label) && (b1[j*channels + 9] != ignore_label);
if (dec1 == 1 && dec2 == 1)
{
caffe_cpu_axpby(
channels, // count
alpha, // alpha
diff_.cpu_data() + channels * j, // a
Dtype(0), // beta
bottom[i]->mutable_cpu_diff() + channels * j); // b
}
}
}
// ***********************org data********************
//for (int j = 0; j < countLabel; ++j){
// if (label[j] != ignore_label){
// caffe_cpu_axpby(
// channels, // count
// alpha, // alpha
// diff_.cpu_data() + channels * j, // a
// Dtype(0), // beta
// bottom[i]->mutable_cpu_diff() + channels * j); // b
// }
//}
}
}
}
else{
for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
const Dtype sign = (i == 0) ? 1 : -1;
const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
caffe_cpu_axpby(
bottom[i]->count(), // count
alpha, // alpha
diff_.cpu_data(), // a
Dtype(0), // beta
bottom[i]->mutable_cpu_diff()); // b
}
}
}
}
#ifdef CPU_ONLY
STUB_GPU(MTCNNEuclideanLossLayer);
#endif
INSTANTIATE_CLASS(MTCNNEuclideanLossLayer);
REGISTER_LAYER_CLASS(MTCNNEuclideanLoss);
} // namespace caffe
相应的"MTCNNEuclideanLoss.cu"如下:
#include
#include "caffe/layers/mtcnn_euclidean_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"
#include
using namespace std;
namespace caffe {
template
void MTCNNEuclideanLossLayer
const vector
LossLayer
CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))
<< "Inputs must have the same dimension.";
int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
if (has_ignore_label)
CHECK_EQ(bottom.size(), 3) << "has_ignore_label=true but not input label";
if (!has_ignore_label)
CHECK_EQ(bottom.size(), 2) << "has_ignore_label=false but input mismatch";
diff_.ReshapeLike(*bottom[0]);
}
template
void MTCNNEuclideanLossLayer
const vector
int count = bottom[0]->count();
int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;
if (has_ignore_label){
//label
const Dtype* label = bottom[2]->cpu_data();
Dtype* diff = diff_.mutable_gpu_data();
int countLabel = bottom[2]->num();
int channel = bottom[0]->channels();
//cout << "ignore_label_forward: " << ignore_label << endl; //
caffe_gpu_memset(sizeof(Dtype)*count, 0, diff);
const Dtype* b0 = bottom[0]->gpu_data();
const Dtype* b1 = bottom[1]->gpu_data();
const Dtype* b1_cpu = bottom[1]->cpu_data();
Dtype loss = 0;
//cout << "channel_forward " << channel << endl;
// bbox regression
if (channel == 4)
{
for (int i = 0; i < countLabel; ++i)
{
//cout << "forware_b1_4: " << b1_cpu[i*channel + 0] << " " << b1_cpu[i*channel + 1] << " " << b1_cpu[i*channel + 2] << " " << b1_cpu[i*channel + 3] << endl;
int dec = (b1_cpu[i*channel + 0] != ignore_label) && (b1_cpu[i*channel + 1] != ignore_label) && (b1_cpu[i*channel + 2] != ignore_label) && (b1_cpu[i*channel + 3] != ignore_label);
if (dec == 1)
{
caffe_gpu_sub(
channel,
b0 + i * channel,
b1 + i * channel,
diff + i * channel);
Dtype dot;
caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);
loss += dot / Dtype(2);
}
}
}
// landmark
else if (channel == 10)
{
for (int i = 0; i < countLabel; ++i)
{
int dec1 = (b1_cpu[i*channel + 0] != ignore_label) && (b1_cpu[i*channel + 1] != ignore_label) && (b1_cpu[i*channel + 2] != ignore_label) && (b1_cpu[i*channel + 3] != ignore_label) && (b1_cpu[i*channel + 4] != ignore_label);
int dec2 = (b1_cpu[i*channel + 5] != ignore_label) && (b1_cpu[i*channel + 6] != ignore_label) && (b1_cpu[i*channel + 7] != ignore_label) && (b1_cpu[i*channel + 8] != ignore_label) && (b1_cpu[i*channel + 9] != ignore_label);
if (dec1 == 1 && dec2 == 1)
{
caffe_gpu_sub(
channel,
b0 + i * channel,
b1 + i * channel,
diff + i * channel);
Dtype dot;
caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);
loss += dot / Dtype(2);
}
}
}
// ***********************org data ********************
//for (int i = 0; i < countLabel; ++i){
// if (label[i] != ignore_label){
// caffe_gpu_sub(
// channel,
// b0 + i * channel,
// b1 + i * channel,
// diff + i * channel);
// Dtype dot;
// caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);
// loss += dot / Dtype(2);
// }
//}
// **************************** **********************
top[0]->mutable_cpu_data()[0] = loss;
}
else{
int count = bottom[0]->count();
caffe_gpu_sub(
count,
bottom[0]->gpu_data(),
bottom[1]->gpu_data(),
diff_.mutable_gpu_data());
Dtype dot;
caffe_gpu_dot(count, diff_.gpu_data(), diff_.gpu_data(), &dot);
Dtype loss = dot / bottom[0]->num() / Dtype(2);
top[0]->mutable_cpu_data()[0] = loss;
}
}
template
void MTCNNEuclideanLossLayer
const vector
int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;
if (has_ignore_label){
const Dtype* b1 = bottom[1]->cpu_data();
const Dtype* label = bottom[2]->cpu_data();
int countLabel = bottom[2]->num();
int channels = bottom[0]->channels();
for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
caffe_gpu_memset(sizeof(Dtype)*bottom[i]->count(), 0, bottom[i]->mutable_gpu_diff());
const Dtype sign = (i == 0) ? 1 : -1;
// bbox regression
if (channels == 4)
{
for (int j = 0; j < countLabel; ++j)
{
const Dtype alpha = sign * top[0]->cpu_diff()[0];
int dec = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label);
if (dec == 1)
{
caffe_gpu_axpby(
channels, // count
alpha, // alpha
diff_.gpu_data() + channels * j, // a
Dtype(0), // beta
bottom[i]->mutable_gpu_diff() + channels * j); // b
}
}
}
// landmark
else if (channels == 10)
{
for (int j = 0; j < countLabel; ++j)
{
const Dtype alpha = sign * top[0]->cpu_diff()[0];
int dec1 = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label) && (b1[j*channels + 4] != ignore_label);
int dec2 = (b1[j*channels + 5] != ignore_label) && (b1[j*channels + 6] != ignore_label) && (b1[j*channels + 7] != ignore_label) && (b1[j*channels + 8] != ignore_label) && (b1[j*channels + 9] != ignore_label);
if (dec1 == 1 && dec2 == 1)
{
caffe_gpu_axpby(
channels, // count
alpha, // alpha
diff_.gpu_data() + channels * j, // a
Dtype(0), // beta
bottom[i]->mutable_gpu_diff() + channels * j); // b
}
}
}
// ******************* org data**********************
//for (int j = 0; j < countLabel; ++j){
// const Dtype alpha = sign * top[0]->cpu_diff()[0];
// if (label[j] != ignore_label){
// caffe_gpu_axpby(
// channels, // count
// alpha, // alpha
// diff_.gpu_data() + channels * j, // a
// Dtype(0), // beta
// bottom[i]->mutable_gpu_diff() + channels * j); // b
// }
//}
}
}
}
else{
for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
const Dtype sign = (i == 0) ? 1 : -1;
const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
caffe_gpu_axpby(
bottom[i]->count(), // count
alpha, // alpha
diff_.gpu_data(), // a
Dtype(0), // beta
bottom[i]->mutable_gpu_diff()); // b
}
}
}
}
INSTANTIATE_LAYER_GPU_FUNCS(MTCNNEuclideanLossLayer);
} // namespace caffe
小计,完成mtcnn_educlidean_loss_layer.cu的修改,发现:const Dtype* b1 = bottom[1]->gpu_data();,gpu读取的数据是不能打印和取出来的,改成cpu模式:const Dtype* b1_cpu = bottom[1]->cpu_data()就行了;