修改MTCNN中caffe源码,时计算loss时以对应标签是否为“-1”判别(转)

MTCNN训练不收敛原因:

地址: https://github.com/dlunion/mtcnn

我们的训练数据标签格式:

wider face:

pos/001.jpg  1 x1 y1 x2 y2 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

part/001.jpg -1 x1 y1 x2 y2 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

neg/001.jpg  0 -1 -1 -1 -1 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

 

celebA:

landmark/001.jpg -1 -1 -1 -1 -1 pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y

 

作者要求的训练数据标签格式:

pos/001.jpg  1 x1 y1 x2 y2 (x1 y1 x2 y2) pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y

part/001.jpg -1 x1 y1 x2 y2 (x1 y1 x2 y2) pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y

neg/001.jpg  0 -1 -1 -1 -1 (x1 y1 x2 y2) pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y

 

在“pts_loss”层(type: "MTCNNEuclideanLoss")中,以"label"(分类的标签)来判断是否ignore。对于我们的训练数据标签格式:

class: ignore_label=-1, 可以正常分类;

bbox regression: ignore_label=0, 有landmark中-1参加计算,导致loss无法收敛;

landmark: ignore_label=0, 有part中-1参加计算,导致loss无法收敛;

 

解决思路:

在做class,bbox regression,landmark任务时,判断便签值是否全部为—1,来作为ignore条件。

 

修改后"MTCNNEuclideanLoss.cpp"如下:

 
  1. #include

  2.  
  3. #include "caffe/layers/mtcnn_euclidean_loss_layer.hpp"

  4. #include "caffe/util/math_functions.hpp"

  5.  
  6. #include

  7. using namespace std;

  8.  
  9. namespace caffe {

  10.  
  11. template

  12. void MTCNNEuclideanLossLayer::Reshape(

  13. const vector*>& bottom, const vector*>& top) {

  14. LossLayer::Reshape(bottom, top);

  15. CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))

  16. << "Inputs must have the same dimension.";

  17.  
  18. int has_ignore_label = this->layer_param().loss_param().has_ignore_label();

  19. if (has_ignore_label)

  20. CHECK_EQ(bottom.size(), 3) << "has_ignore_label=true but not input label";

  21.  
  22. if (!has_ignore_label)

  23. CHECK_EQ(bottom.size(), 2) << "has_ignore_label=false but input mismatch";

  24.  
  25. diff_.ReshapeLike(*bottom[0]);

  26. }

  27.  
  28. template

  29. void MTCNNEuclideanLossLayer::Forward_cpu(const vector*>& bottom,

  30. const vector*>& top) {

  31. int count = bottom[0]->count();

  32. int has_ignore_label = this->layer_param().loss_param().has_ignore_label();

  33. int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;

  34.  
  35. if (has_ignore_label){

  36. const Dtype* label = bottom[2]->cpu_data();

  37. int countLabel = bottom[2]->num();

  38.  
  39. //label

  40. Dtype* diff = diff_.mutable_cpu_data();

  41. int channel = bottom[0]->channels();

  42. //cout << "countLabel_forward: " << countLabel << endl;

  43. //cout << "channel_forward: " << channel << endl;

  44. //cout << "ignore_label_forward: " << ignore_label << endl;

  45. memset(diff, 0, sizeof(Dtype)*count);

  46.  
  47. const Dtype* b0 = bottom[0]->cpu_data();

  48. const Dtype* b1 = bottom[1]->cpu_data();

  49. Dtype loss = 0;

  50.  
  51. // bbox regression

  52. if (channel == 4)

  53. {

  54. for (int i = 0; i < countLabel; ++i)

  55. {

  56. //cout << "forware_b1_4: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << endl;

  57.  
  58. int dec = (b1[i*channel + 0] != ignore_label) && (b1[i*channel + 1] != ignore_label) && (b1[i*channel + 2] != ignore_label) && (b1[i*channel + 3] != ignore_label);

  59. if ( dec==1 )

  60. {

  61. caffe_sub(

  62. channel,

  63. b0 + i * channel,

  64. b1 + i * channel,

  65. diff + i * channel);

  66. Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);

  67. loss += dot / Dtype(2);

  68.  
  69. //cout << "forware_b1_4: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << endl;

  70. }

  71. }

  72. }

  73.  
  74. // landmark

  75. else if (channel == 10)

  76. {

  77. for (int i = 0; i < countLabel; ++i)

  78. {

  79. //cout << "forward_b1_10: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << " " << b1[i*channel + 4] << " ";

  80. //cout << b1[i*channel + 5] << " " << b1[i*channel + 6] << " " << b1[i*channel + 7] << " " << b1[i*channel + 8] << " " << b1[i*channel + 9] << endl;

  81.  
  82. int dec1 = (b1[i*channel + 0] != ignore_label) && (b1[i*channel + 1] != ignore_label) && (b1[i*channel + 2] != ignore_label) && (b1[i*channel + 3] != ignore_label) && (b1[i*channel + 4] != ignore_label);

  83. int dec2 = (b1[i*channel + 5] != ignore_label) && (b1[i*channel + 6] != ignore_label) && (b1[i*channel + 7] != ignore_label) && (b1[i*channel + 8] != ignore_label) && (b1[i*channel + 9] != ignore_label);

  84. if (dec1==1 && dec2==1)

  85. {

  86. caffe_sub(

  87. channel,

  88. b0 + i * channel,

  89. b1 + i * channel,

  90. diff + i * channel);

  91. Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);

  92. loss += dot / Dtype(2);

  93.  
  94. //cout << "forward_b1_10: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << " " << b1[i*channel + 4] << " ";

  95. //cout << b1[i*channel + 5] << " " << b1[i*channel + 6] << " " << b1[i*channel + 7] << " " << b1[i*channel + 8] << " " << b1[i*channel + 9] << endl;

  96. }

  97. }

  98. }

  99.  
  100. // ****************org data ********************

  101. //for (int i = 0; i < countLabel; ++i){

  102. // if (label[i] != ignore_label){

  103. // caffe_sub(

  104. // channel,

  105. // b0 + i * channel,

  106. // b1 + i * channel,

  107. // diff + i * channel);

  108. // Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);

  109. // loss += dot / Dtype(2);

  110. // }

  111. //}

  112. // ***************** ********************

  113.  
  114. top[0]->mutable_cpu_data()[0] = loss;

  115. }

  116. else{

  117. caffe_sub(

  118. count,

  119. bottom[0]->cpu_data(),

  120. bottom[1]->cpu_data(),

  121. diff_.mutable_cpu_data());

  122. Dtype dot = caffe_cpu_dot(count, diff_.cpu_data(), diff_.cpu_data());

  123. Dtype loss = dot / bottom[0]->num() / Dtype(2);

  124. top[0]->mutable_cpu_data()[0] = loss;

  125. }

  126. }

  127.  
  128. template

  129. void MTCNNEuclideanLossLayer::Backward_cpu(const vector*>& top,

  130. const vector& propagate_down, const vector*>& bottom) {

  131.  
  132. int has_ignore_label = this->layer_param().loss_param().has_ignore_label();

  133. int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;

  134.  
  135. if (has_ignore_label){

  136. const Dtype* b1 = bottom[1]->cpu_data();

  137. const Dtype* label = bottom[2]->cpu_data();

  138. int countLabel = bottom[2]->num();

  139. int channels = bottom[0]->channels();

  140. //cout << "countLabel_backword: " << countLabel << endl;

  141. //cout << "channels_backword: " << channels << endl;

  142. //cout << "ignore_label_backword: " << ignore_label << endl;

  143. for (int i = 0; i < 2; ++i) {

  144. if (propagate_down[i]) {

  145. memset(bottom[i]->mutable_cpu_diff(), 0, sizeof(Dtype)*bottom[i]->count());

  146.  
  147. const Dtype sign = (i == 0) ? 1 : -1;

  148. const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();

  149.  
  150. // bbox regression

  151. if (channels == 4)

  152. {

  153. for (int j = 0; j < countLabel; ++j)

  154. {

  155. int dec = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label);

  156. if (dec==1)

  157. {

  158. caffe_cpu_axpby(

  159. channels, // count

  160. alpha, // alpha

  161. diff_.cpu_data() + channels * j, // a

  162. Dtype(0), // beta

  163. bottom[i]->mutable_cpu_diff() + channels * j); // b

  164. }

  165. }

  166. }

  167.  
  168. // landmark

  169. else if (channels == 10)

  170. {

  171. for (int j = 0; j < countLabel; ++j)

  172. {

  173. int dec1 = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label) && (b1[j*channels + 4] != ignore_label);

  174. int dec2 = (b1[j*channels + 5] != ignore_label) && (b1[j*channels + 6] != ignore_label) && (b1[j*channels + 7] != ignore_label) && (b1[j*channels + 8] != ignore_label) && (b1[j*channels + 9] != ignore_label);

  175. if (dec1 == 1 && dec2 == 1)

  176. {

  177. caffe_cpu_axpby(

  178. channels, // count

  179. alpha, // alpha

  180. diff_.cpu_data() + channels * j, // a

  181. Dtype(0), // beta

  182. bottom[i]->mutable_cpu_diff() + channels * j); // b

  183. }

  184. }

  185. }

  186.  
  187. // ***********************org data********************

  188. //for (int j = 0; j < countLabel; ++j){

  189. // if (label[j] != ignore_label){

  190. // caffe_cpu_axpby(

  191. // channels, // count

  192. // alpha, // alpha

  193. // diff_.cpu_data() + channels * j, // a

  194. // Dtype(0), // beta

  195. // bottom[i]->mutable_cpu_diff() + channels * j); // b

  196. // }

  197. //}

  198.  
  199.  
  200. }

  201. }

  202. }

  203. else{

  204. for (int i = 0; i < 2; ++i) {

  205. if (propagate_down[i]) {

  206. const Dtype sign = (i == 0) ? 1 : -1;

  207. const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();

  208. caffe_cpu_axpby(

  209. bottom[i]->count(), // count

  210. alpha, // alpha

  211. diff_.cpu_data(), // a

  212. Dtype(0), // beta

  213. bottom[i]->mutable_cpu_diff()); // b

  214. }

  215. }

  216. }

  217. }

  218.  
  219. #ifdef CPU_ONLY

  220. STUB_GPU(MTCNNEuclideanLossLayer);

  221. #endif

  222.  
  223. INSTANTIATE_CLASS(MTCNNEuclideanLossLayer);

  224. REGISTER_LAYER_CLASS(MTCNNEuclideanLoss);

  225.  
  226. } // namespace caffe

  227.  


相应的"MTCNNEuclideanLoss.cu"如下:

 
  1. #include

  2.  
  3. #include "caffe/layers/mtcnn_euclidean_loss_layer.hpp"

  4. #include "caffe/util/math_functions.hpp"

  5.  
  6. #include

  7. using namespace std;

  8.  
  9. namespace caffe {

  10.  
  11. template

  12. void MTCNNEuclideanLossLayer::Reshape(

  13. const vector*>& bottom, const vector*>& top) {

  14. LossLayer::Reshape(bottom, top);

  15. CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))

  16. << "Inputs must have the same dimension.";

  17.  
  18. int has_ignore_label = this->layer_param().loss_param().has_ignore_label();

  19. if (has_ignore_label)

  20. CHECK_EQ(bottom.size(), 3) << "has_ignore_label=true but not input label";

  21.  
  22. if (!has_ignore_label)

  23. CHECK_EQ(bottom.size(), 2) << "has_ignore_label=false but input mismatch";

  24.  
  25. diff_.ReshapeLike(*bottom[0]);

  26. }

  27.  
  28. template

  29. void MTCNNEuclideanLossLayer::Forward_gpu(const vector*>& bottom,

  30. const vector*>& top) {

  31. int count = bottom[0]->count();

  32. int has_ignore_label = this->layer_param().loss_param().has_ignore_label();

  33. int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;

  34.  
  35. if (has_ignore_label){

  36. //label

  37. const Dtype* label = bottom[2]->cpu_data();

  38. Dtype* diff = diff_.mutable_gpu_data();

  39. int countLabel = bottom[2]->num();

  40. int channel = bottom[0]->channels();

  41. //cout << "ignore_label_forward: " << ignore_label << endl; //

  42. caffe_gpu_memset(sizeof(Dtype)*count, 0, diff);

  43.  
  44. const Dtype* b0 = bottom[0]->gpu_data();

  45. const Dtype* b1 = bottom[1]->gpu_data();

  46. const Dtype* b1_cpu = bottom[1]->cpu_data();

  47. Dtype loss = 0;

  48.  
  49. //cout << "channel_forward " << channel << endl;

  50. // bbox regression

  51. if (channel == 4)

  52. {

  53. for (int i = 0; i < countLabel; ++i)

  54. {

  55. //cout << "forware_b1_4: " << b1_cpu[i*channel + 0] << " " << b1_cpu[i*channel + 1] << " " << b1_cpu[i*channel + 2] << " " << b1_cpu[i*channel + 3] << endl;

  56.  
  57. int dec = (b1_cpu[i*channel + 0] != ignore_label) && (b1_cpu[i*channel + 1] != ignore_label) && (b1_cpu[i*channel + 2] != ignore_label) && (b1_cpu[i*channel + 3] != ignore_label);

  58. if (dec == 1)

  59. {

  60. caffe_gpu_sub(

  61. channel,

  62. b0 + i * channel,

  63. b1 + i * channel,

  64. diff + i * channel);

  65. Dtype dot;

  66. caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);

  67. loss += dot / Dtype(2);

  68. }

  69. }

  70. }

  71.  
  72. // landmark

  73. else if (channel == 10)

  74. {

  75. for (int i = 0; i < countLabel; ++i)

  76. {

  77. int dec1 = (b1_cpu[i*channel + 0] != ignore_label) && (b1_cpu[i*channel + 1] != ignore_label) && (b1_cpu[i*channel + 2] != ignore_label) && (b1_cpu[i*channel + 3] != ignore_label) && (b1_cpu[i*channel + 4] != ignore_label);

  78. int dec2 = (b1_cpu[i*channel + 5] != ignore_label) && (b1_cpu[i*channel + 6] != ignore_label) && (b1_cpu[i*channel + 7] != ignore_label) && (b1_cpu[i*channel + 8] != ignore_label) && (b1_cpu[i*channel + 9] != ignore_label);

  79. if (dec1 == 1 && dec2 == 1)

  80. {

  81. caffe_gpu_sub(

  82. channel,

  83. b0 + i * channel,

  84. b1 + i * channel,

  85. diff + i * channel);

  86. Dtype dot;

  87. caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);

  88. loss += dot / Dtype(2);

  89. }

  90. }

  91. }

  92.  
  93.  
  94. // ***********************org data ********************

  95. //for (int i = 0; i < countLabel; ++i){

  96. // if (label[i] != ignore_label){

  97. // caffe_gpu_sub(

  98. // channel,

  99. // b0 + i * channel,

  100. // b1 + i * channel,

  101. // diff + i * channel);

  102. // Dtype dot;

  103. // caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);

  104. // loss += dot / Dtype(2);

  105. // }

  106. //}

  107. // **************************** **********************

  108.  
  109. top[0]->mutable_cpu_data()[0] = loss;

  110. }

  111. else{

  112. int count = bottom[0]->count();

  113. caffe_gpu_sub(

  114. count,

  115. bottom[0]->gpu_data(),

  116. bottom[1]->gpu_data(),

  117. diff_.mutable_gpu_data());

  118. Dtype dot;

  119. caffe_gpu_dot(count, diff_.gpu_data(), diff_.gpu_data(), &dot);

  120. Dtype loss = dot / bottom[0]->num() / Dtype(2);

  121. top[0]->mutable_cpu_data()[0] = loss;

  122. }

  123. }

  124.  
  125. template

  126. void MTCNNEuclideanLossLayer::Backward_gpu(const vector*>& top,

  127. const vector& propagate_down, const vector*>& bottom) {

  128.  
  129. int has_ignore_label = this->layer_param().loss_param().has_ignore_label();

  130. int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;

  131.  
  132. if (has_ignore_label){

  133. const Dtype* b1 = bottom[1]->cpu_data();

  134. const Dtype* label = bottom[2]->cpu_data();

  135. int countLabel = bottom[2]->num();

  136. int channels = bottom[0]->channels();

  137. for (int i = 0; i < 2; ++i) {

  138. if (propagate_down[i]) {

  139. caffe_gpu_memset(sizeof(Dtype)*bottom[i]->count(), 0, bottom[i]->mutable_gpu_diff());

  140.  
  141. const Dtype sign = (i == 0) ? 1 : -1;

  142.  
  143. // bbox regression

  144. if (channels == 4)

  145. {

  146. for (int j = 0; j < countLabel; ++j)

  147. {

  148. const Dtype alpha = sign * top[0]->cpu_diff()[0];

  149. int dec = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label);

  150. if (dec == 1)

  151. {

  152. caffe_gpu_axpby(

  153. channels, // count

  154. alpha, // alpha

  155. diff_.gpu_data() + channels * j, // a

  156. Dtype(0), // beta

  157. bottom[i]->mutable_gpu_diff() + channels * j); // b

  158. }

  159. }

  160. }

  161.  
  162. // landmark

  163. else if (channels == 10)

  164. {

  165. for (int j = 0; j < countLabel; ++j)

  166. {

  167. const Dtype alpha = sign * top[0]->cpu_diff()[0];

  168. int dec1 = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label) && (b1[j*channels + 4] != ignore_label);

  169. int dec2 = (b1[j*channels + 5] != ignore_label) && (b1[j*channels + 6] != ignore_label) && (b1[j*channels + 7] != ignore_label) && (b1[j*channels + 8] != ignore_label) && (b1[j*channels + 9] != ignore_label);

  170. if (dec1 == 1 && dec2 == 1)

  171. {

  172. caffe_gpu_axpby(

  173. channels, // count

  174. alpha, // alpha

  175. diff_.gpu_data() + channels * j, // a

  176. Dtype(0), // beta

  177. bottom[i]->mutable_gpu_diff() + channels * j); // b

  178. }

  179. }

  180. }

  181.  
  182. // ******************* org data**********************

  183. //for (int j = 0; j < countLabel; ++j){

  184. // const Dtype alpha = sign * top[0]->cpu_diff()[0];

  185. // if (label[j] != ignore_label){

  186. // caffe_gpu_axpby(

  187. // channels, // count

  188. // alpha, // alpha

  189. // diff_.gpu_data() + channels * j, // a

  190. // Dtype(0), // beta

  191. // bottom[i]->mutable_gpu_diff() + channels * j); // b

  192. // }

  193. //}

  194.  
  195.  
  196.  
  197. }

  198. }

  199. }

  200. else{

  201. for (int i = 0; i < 2; ++i) {

  202. if (propagate_down[i]) {

  203. const Dtype sign = (i == 0) ? 1 : -1;

  204. const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();

  205. caffe_gpu_axpby(

  206. bottom[i]->count(), // count

  207. alpha, // alpha

  208. diff_.gpu_data(), // a

  209. Dtype(0), // beta

  210. bottom[i]->mutable_gpu_diff()); // b

  211. }

  212. }

  213. }

  214. }

  215.  
  216. INSTANTIATE_LAYER_GPU_FUNCS(MTCNNEuclideanLossLayer);

  217.  
  218. } // namespace caffe

  219.  


小计,完成mtcnn_educlidean_loss_layer.cu的修改,发现:const Dtype* b1 = bottom[1]->gpu_data();,gpu读取的数据是不能打印和取出来的,改成cpu模式:const Dtype* b1_cpu = bottom[1]->cpu_data()就行了;

你可能感兴趣的:(深度学习)