FCN源码解读之surgery.py

转载自 https://blog.csdn.net/qq_21368481/article/details/80289350

surgery.py是FCN中用于转换模型权重的python文件,在解读源码前,我们先来看一下VGG16网络的构架和FCN32s网络的构架两者之间的区别(此处参看VGG_ILSVRC_16_deploy.prototxt和FCN32s的deploy.prototxt,deploy.prototxt文件比train.prototxt文件少了输入层的数据路径和loss层的反向传播,其余网络结构两者是一样的),如下:
VGG16 FCN32s

        
          
          
          
          
  1. name: "VGG_ILSVRC_16_layers"
  2. input: "data"
  3. input_dim: 10
  4. input_dim: 3
  5. input_dim: 224
  6. input_dim: 224
  7. layers {
  8. bottom: "data"
  9. top: "conv1_1"
  10. name: "conv1_1"
  11. type: CONVOLUTION
  12. convolution_param {
  13. num_output: 64
  14. pad: 1
  15. kernel_size: 3
  16. }
  17. }
  18. layers {
  19. bottom: "conv1_1"
  20. top: "conv1_1"
  21. name: "relu1_1"
  22. type: RELU
  23. }
  24. layers {
  25. bottom: "conv1_1"
  26. top: "conv1_2"
  27. name: "conv1_2"
  28. type: CONVOLUTION
  29. convolution_param {
  30. num_output: 64
  31. pad: 1
  32. kernel_size: 3
  33. }
  34. }
  35. layers {
  36. bottom: "conv1_2"
  37. top: "conv1_2"
  38. name: "relu1_2"
  39. type: RELU
  40. }
  41. layers {
  42. bottom: "conv1_2"
  43. top: "pool1"
  44. name: "pool1"
  45. type: POOLING
  46. pooling_param {
  47. pool: MAX
  48. kernel_size: 2
  49. stride: 2
  50. }
  51. }
  52. layers {
  53. bottom: "pool1"
  54. top: "conv2_1"
  55. name: "conv2_1"
  56. type: CONVOLUTION
  57. convolution_param {
  58. num_output: 128
  59. pad: 1
  60. kernel_size: 3
  61. }
  62. }
  63. layers {
  64. bottom: "conv2_1"
  65. top: "conv2_1"
  66. name: "relu2_1"
  67. type: RELU
  68. }
  69. layers {
  70. bottom: "conv2_1"
  71. top: "conv2_2"
  72. name: "conv2_2"
  73. type: CONVOLUTION
  74. convolution_param {
  75. num_output: 128
  76. pad: 1
  77. kernel_size: 3
  78. }
  79. }
  80. layers {
  81. bottom: "conv2_2"
  82. top: "conv2_2"
  83. name: "relu2_2"
  84. type: RELU
  85. }
  86. layers {
  87. bottom: "conv2_2"
  88. top: "pool2"
  89. name: "pool2"
  90. type: POOLING
  91. pooling_param {
  92. pool: MAX
  93. kernel_size: 2
  94. stride: 2
  95. }
  96. }
  97. layers {
  98. bottom: "pool2"
  99. top: "conv3_1"
  100. name: "conv3_1"
  101. type: CONVOLUTION
  102. convolution_param {
  103. num_output: 256
  104. pad: 1
  105. kernel_size: 3
  106. }
  107. }
  108. layers {
  109. bottom: "conv3_1"
  110. top: "conv3_1"
  111. name: "relu3_1"
  112. type: RELU
  113. }
  114. layers {
  115. bottom: "conv3_1"
  116. top: "conv3_2"
  117. name: "conv3_2"
  118. type: CONVOLUTION
  119. convolution_param {
  120. num_output: 256
  121. pad: 1
  122. kernel_size: 3
  123. }
  124. }
  125. layers {
  126. bottom: "conv3_2"
  127. top: "conv3_2"
  128. name: "relu3_2"
  129. type: RELU
  130. }
  131. layers {
  132. bottom: "conv3_2"
  133. top: "conv3_3"
  134. name: "conv3_3"
  135. type: CONVOLUTION
  136. convolution_param {
  137. num_output: 256
  138. pad: 1
  139. kernel_size: 3
  140. }
  141. }
  142. layers {
  143. bottom: "conv3_3"
  144. top: "conv3_3"
  145. name: "relu3_3"
  146. type: RELU
  147. }
  148. layers {
  149. bottom: "conv3_3"
  150. top: "pool3"
  151. name: "pool3"
  152. type: POOLING
  153. pooling_param {
  154. pool: MAX
  155. kernel_size: 2
  156. stride: 2
  157. }
  158. }
  159. layers {
  160. bottom: "pool3"
  161. top: "conv4_1"
  162. name: "conv4_1"
  163. type: CONVOLUTION
  164. convolution_param {
  165. num_output: 512
  166. pad: 1
  167. kernel_size: 3
  168. }
  169. }
  170. layers {
  171. bottom: "conv4_1"
  172. top: "conv4_1"
  173. name: "relu4_1"
  174. type: RELU
  175. }
  176. layers {
  177. bottom: "conv4_1"
  178. top: "conv4_2"
  179. name: "conv4_2"
  180. type: CONVOLUTION
  181. convolution_param {
  182. num_output: 512
  183. pad: 1
  184. kernel_size: 3
  185. }
  186. }
  187. layers {
  188. bottom: "conv4_2"
  189. top: "conv4_2"
  190. name: "relu4_2"
  191. type: RELU
  192. }
  193. layers {
  194. bottom: "conv4_2"
  195. top: "conv4_3"
  196. name: "conv4_3"
  197. type: CONVOLUTION
  198. convolution_param {
  199. num_output: 512
  200. pad: 1
  201. kernel_size: 3
  202. }
  203. }
  204. layers {
  205. bottom: "conv4_3"
  206. top: "conv4_3"
  207. name: "relu4_3"
  208. type: RELU
  209. }
  210. layers {
  211. bottom: "conv4_3"
  212. top: "pool4"
  213. name: "pool4"
  214. type: POOLING
  215. pooling_param {
  216. pool: MAX
  217. kernel_size: 2
  218. stride: 2
  219. }
  220. }
  221. layers {
  222. bottom: "pool4"
  223. top: "conv5_1"
  224. name: "conv5_1"
  225. type: CONVOLUTION
  226. convolution_param {
  227. num_output: 512
  228. pad: 1
  229. kernel_size: 3
  230. }
  231. }
  232. layers {
  233. bottom: "conv5_1"
  234. top: "conv5_1"
  235. name: "relu5_1"
  236. type: RELU
  237. }
  238. layers {
  239. bottom: "conv5_1"
  240. top: "conv5_2"
  241. name: "conv5_2"
  242. type: CONVOLUTION
  243. convolution_param {
  244. num_output: 512
  245. pad: 1
  246. kernel_size: 3
  247. }
  248. }
  249. layers {
  250. bottom: "conv5_2"
  251. top: "conv5_2"
  252. name: "relu5_2"
  253. type: RELU
  254. }
  255. layers {
  256. bottom: "conv5_2"
  257. top: "conv5_3"
  258. name: "conv5_3"
  259. type: CONVOLUTION
  260. convolution_param {
  261. num_output: 512
  262. pad: 1
  263. kernel_size: 3
  264. }
  265. }
  266. layers {
  267. bottom: "conv5_3"
  268. top: "conv5_3"
  269. name: "relu5_3"
  270. type: RELU
  271. }
  272. layers {
  273. bottom: "conv5_3"
  274. top: "pool5"
  275. name: "pool5"
  276. type: POOLING
  277. pooling_param {
  278. pool: MAX
  279. kernel_size: 2
  280. stride: 2
  281. }
  282. }
  283. layers {
  284. bottom: "pool5"
  285. top: "fc6"
  286. name: "fc6"
  287. type: INNER_PRODUCT
  288. inner_product_param {
  289. num_output: 4096
  290. }
  291. }
  292. layers {
  293. bottom: "fc6"
  294. top: "fc6"
  295. name: "relu6"
  296. type: RELU
  297. }
  298. layers {
  299. bottom: "fc6"
  300. top: "fc6"
  301. name: "drop6"
  302. type: DROPOUT
  303. dropout_param {
  304. dropout_ratio: 0.5
  305. }
  306. }
  307. layers {
  308. bottom: "fc6"
  309. top: "fc7"
  310. name: "fc7"
  311. type: INNER_PRODUCT
  312. inner_product_param {
  313. num_output: 4096
  314. }
  315. }
  316. layers {
  317. bottom: "fc7"
  318. top: "fc7"
  319. name: "relu7"
  320. type: RELU
  321. }
  322. layers {
  323. bottom: "fc7"
  324. top: "fc7"
  325. name: "drop7"
  326. type: DROPOUT
  327. dropout_param {
  328. dropout_ratio: 0.5
  329. }
  330. }
  331. layers {
  332. bottom: "fc7"
  333. top: "fc8"
  334. name: "fc8"
  335. type: INNER_PRODUCT
  336. inner_product_param {
  337. num_output: 1000
  338. }
  339. }
  340. layers {
  341. bottom: "fc8"
  342. top: "prob"
  343. name: "prob"
  344. type: SOFTMAX
  345. }

        
          
          
          
          
  1. layer {
  2. name: "input"
  3. type: "Input"
  4. top: "data"
  5. input_param {
  6. # These dimensions are purely for sake of example;
  7. # see infer.py for how to reshape the net to the given input size.
  8. shape { dim: 1 dim: 3 dim: 500 dim: 500 }
  9. }
  10. }
  11. layer {
  12. name: "conv1_1"
  13. type: "Convolution"
  14. bottom: "data"
  15. top: "conv1_1"
  16. param {
  17. lr_mult: 1
  18. decay_mult: 1
  19. }
  20. param {
  21. lr_mult: 2
  22. decay_mult: 0
  23. }
  24. convolution_param {
  25. num_output: 64
  26. pad: 100
  27. kernel_size: 3
  28. stride: 1
  29. }
  30. }
  31. layer {
  32. name: "relu1_1"
  33. type: "ReLU"
  34. bottom: "conv1_1"
  35. top: "conv1_1"
  36. }
  37. layer {
  38. name: "conv1_2"
  39. type: "Convolution"
  40. bottom: "conv1_1"
  41. top: "conv1_2"
  42. param {
  43. lr_mult: 1
  44. decay_mult: 1
  45. }
  46. param {
  47. lr_mult: 2
  48. decay_mult: 0
  49. }
  50. convolution_param {
  51. num_output: 64
  52. pad: 1
  53. kernel_size: 3
  54. stride: 1
  55. }
  56. }
  57. layer {
  58. name: "relu1_2"
  59. type: "ReLU"
  60. bottom: "conv1_2"
  61. top: "conv1_2"
  62. }
  63. layer {
  64. name: "pool1"
  65. type: "Pooling"
  66. bottom: "conv1_2"
  67. top: "pool1"
  68. pooling_param {
  69. pool: MAX
  70. kernel_size: 2
  71. stride: 2
  72. }
  73. }
  74. layer {
  75. name: "conv2_1"
  76. type: "Convolution"
  77. bottom: "pool1"
  78. top: "conv2_1"
  79. param {
  80. lr_mult: 1
  81. decay_mult: 1
  82. }
  83. param {
  84. lr_mult: 2
  85. decay_mult: 0
  86. }
  87. convolution_param {
  88. num_output: 128
  89. pad: 1
  90. kernel_size: 3
  91. stride: 1
  92. }
  93. }
  94. layer {
  95. name: "relu2_1"
  96. type: "ReLU"
  97. bottom: "conv2_1"
  98. top: "conv2_1"
  99. }
  100. layer {
  101. name: "conv2_2"
  102. type: "Convolution"
  103. bottom: "conv2_1"
  104. top: "conv2_2"
  105. param {
  106. lr_mult: 1
  107. decay_mult: 1
  108. }
  109. param {
  110. lr_mult: 2
  111. decay_mult: 0
  112. }
  113. convolution_param {
  114. num_output: 128
  115. pad: 1
  116. kernel_size: 3
  117. stride: 1
  118. }
  119. }
  120. layer {
  121. name: "relu2_2"
  122. type: "ReLU"
  123. bottom: "conv2_2"
  124. top: "conv2_2"
  125. }
  126. layer {
  127. name: "pool2"
  128. type: "Pooling"
  129. bottom: "conv2_2"
  130. top: "pool2"
  131. pooling_param {
  132. pool: MAX
  133. kernel_size: 2
  134. stride: 2
  135. }
  136. }
  137. layer {
  138. name: "conv3_1"
  139. type: "Convolution"
  140. bottom: "pool2"
  141. top: "conv3_1"
  142. param {
  143. lr_mult: 1
  144. decay_mult: 1
  145. }
  146. param {
  147. lr_mult: 2
  148. decay_mult: 0
  149. }
  150. convolution_param {
  151. num_output: 256
  152. pad: 1
  153. kernel_size: 3
  154. stride: 1
  155. }
  156. }
  157. layer {
  158. name: "relu3_1"
  159. type: "ReLU"
  160. bottom: "conv3_1"
  161. top: "conv3_1"
  162. }
  163. layer {
  164. name: "conv3_2"
  165. type: "Convolution"
  166. bottom: "conv3_1"
  167. top: "conv3_2"
  168. param {
  169. lr_mult: 1
  170. decay_mult: 1
  171. }
  172. param {
  173. lr_mult: 2
  174. decay_mult: 0
  175. }
  176. convolution_param {
  177. num_output: 256
  178. pad: 1
  179. kernel_size: 3
  180. stride: 1
  181. }
  182. }
  183. layer {
  184. name: "relu3_2"
  185. type: "ReLU"
  186. bottom: "conv3_2"
  187. top: "conv3_2"
  188. }
  189. layer {
  190. name: "conv3_3"
  191. type: "Convolution"
  192. bottom: "conv3_2"
  193. top: "conv3_3"
  194. param {
  195. lr_mult: 1
  196. decay_mult: 1
  197. }
  198. param {
  199. lr_mult: 2
  200. decay_mult: 0
  201. }
  202. convolution_param {
  203. num_output: 256
  204. pad: 1
  205. kernel_size: 3
  206. stride: 1
  207. }
  208. }
  209. layer {
  210. name: "relu3_3"
  211. type: "ReLU"
  212. bottom: "conv3_3"
  213. top: "conv3_3"
  214. }
  215. layer {
  216. name: "pool3"
  217. type: "Pooling"
  218. bottom: "conv3_3"
  219. top: "pool3"
  220. pooling_param {
  221. pool: MAX
  222. kernel_size: 2
  223. stride: 2
  224. }
  225. }
  226. layer {
  227. name: "conv4_1"
  228. type: "Convolution"
  229. bottom: "pool3"
  230. top: "conv4_1"
  231. param {
  232. lr_mult: 1
  233. decay_mult: 1
  234. }
  235. param {
  236. lr_mult: 2
  237. decay_mult: 0
  238. }
  239. convolution_param {
  240. num_output: 512
  241. pad: 1
  242. kernel_size: 3
  243. stride: 1
  244. }
  245. }
  246. layer {
  247. name: "relu4_1"
  248. type: "ReLU"
  249. bottom: "conv4_1"
  250. top: "conv4_1"
  251. }
  252. layer {
  253. name: "conv4_2"
  254. type: "Convolution"
  255. bottom: "conv4_1"
  256. top: "conv4_2"
  257. param {
  258. lr_mult: 1
  259. decay_mult: 1
  260. }
  261. param {
  262. lr_mult: 2
  263. decay_mult: 0
  264. }
  265. convolution_param {
  266. num_output: 512
  267. pad: 1
  268. kernel_size: 3
  269. stride: 1
  270. }
  271. }
  272. layer {
  273. name: "relu4_2"
  274. type: "ReLU"
  275. bottom: "conv4_2"
  276. top: "conv4_2"
  277. }
  278. layer {
  279. name: "conv4_3"
  280. type: "Convolution"
  281. bottom: "conv4_2"
  282. top: "conv4_3"
  283. param {
  284. lr_mult: 1
  285. decay_mult: 1
  286. }
  287. param {
  288. lr_mult: 2
  289. decay_mult: 0
  290. }
  291. convolution_param {
  292. num_output: 512
  293. pad: 1
  294. kernel_size: 3
  295. stride: 1
  296. }
  297. }
  298. layer {
  299. name: "relu4_3"
  300. type: "ReLU"
  301. bottom: "conv4_3"
  302. top: "conv4_3"
  303. }
  304. layer {
  305. name: "pool4"
  306. type: "Pooling"
  307. bottom: "conv4_3"
  308. top: "pool4"
  309. pooling_param {
  310. pool: MAX
  311. kernel_size: 2
  312. stride: 2
  313. }
  314. }
  315. layer {
  316. name: "conv5_1"
  317. type: "Convolution"
  318. bottom: "pool4"
  319. top: "conv5_1"
  320. param {
  321. lr_mult: 1
  322. decay_mult: 1
  323. }
  324. param {
  325. lr_mult: 2
  326. decay_mult: 0
  327. }
  328. convolution_param {
  329. num_output: 512
  330. pad: 1
  331. kernel_size: 3
  332. stride: 1
  333. }
  334. }
  335. layer {
  336. name: "relu5_1"
  337. type: "ReLU"
  338. bottom: "conv5_1"
  339. top: "conv5_1"
  340. }
  341. layer {
  342. name: "conv5_2"
  343. type: "Convolution"
  344. bottom: "conv5_1"
  345. top: "conv5_2"
  346. param {
  347. lr_mult: 1
  348. decay_mult: 1
  349. }
  350. param {
  351. lr_mult: 2
  352. decay_mult: 0
  353. }
  354. convolution_param {
  355. num_output: 512
  356. pad: 1
  357. kernel_size: 3
  358. stride: 1
  359. }
  360. }
  361. layer {
  362. name: "relu5_2"
  363. type: "ReLU"
  364. bottom: "conv5_2"
  365. top: "conv5_2"
  366. }
  367. layer {
  368. name: "conv5_3"
  369. type: "Convolution"
  370. bottom: "conv5_2"
  371. top: "conv5_3"
  372. param {
  373. lr_mult: 1
  374. decay_mult: 1
  375. }
  376. param {
  377. lr_mult: 2
  378. decay_mult: 0
  379. }
  380. convolution_param {
  381. num_output: 512
  382. pad: 1
  383. kernel_size: 3
  384. stride: 1
  385. }
  386. }
  387. layer {
  388. name: "relu5_3"
  389. type: "ReLU"
  390. bottom: "conv5_3"
  391. top: "conv5_3"
  392. }
  393. layer {
  394. name: "pool5"
  395. type: "Pooling"
  396. bottom: "conv5_3"
  397. top: "pool5"
  398. pooling_param {
  399. pool: MAX
  400. kernel_size: 2
  401. stride: 2
  402. }
  403. }
  404. layer {
  405. name: "fc6"
  406. type: "Convolution"
  407. bottom: "pool5"
  408. top: "fc6"
  409. param {
  410. lr_mult: 1
  411. decay_mult: 1
  412. }
  413. param {
  414. lr_mult: 2
  415. decay_mult: 0
  416. }
  417. convolution_param {
  418. num_output: 4096
  419. pad: 0
  420. kernel_size: 7
  421. stride: 1
  422. }
  423. }
  424. layer {
  425. name: "relu6"
  426. type: "ReLU"
  427. bottom: "fc6"
  428. top: "fc6"
  429. }
  430. layer {
  431. name: "drop6"
  432. type: "Dropout"
  433. bottom: "fc6"
  434. top: "fc6"
  435. dropout_param {
  436. dropout_ratio: 0.5
  437. }
  438. }
  439. layer {
  440. name: "fc7"
  441. type: "Convolution"
  442. bottom: "fc6"
  443. top: "fc7"
  444. param {
  445. lr_mult: 1
  446. decay_mult: 1
  447. }
  448. param {
  449. lr_mult: 2
  450. decay_mult: 0
  451. }
  452. convolution_param {
  453. num_output: 4096
  454. pad: 0
  455. kernel_size: 1
  456. stride: 1
  457. }
  458. }
  459. layer {
  460. name: "relu7"
  461. type: "ReLU"
  462. bottom: "fc7"
  463. top: "fc7"
  464. }
  465. layer {
  466. name: "drop7"
  467. type: "Dropout"
  468. bottom: "fc7"
  469. top: "fc7"
  470. dropout_param {
  471. dropout_ratio: 0.5
  472. }
  473. }
  474. layer {
  475. name: "score_fr"
  476. type: "Convolution"
  477. bottom: "fc7"
  478. top: "score_fr"
  479. param {
  480. lr_mult: 1
  481. decay_mult: 1
  482. }
  483. param {
  484. lr_mult: 2
  485. decay_mult: 0
  486. }
  487. convolution_param {
  488. num_output: 21
  489. pad: 0
  490. kernel_size: 1
  491. }
  492. }
  493. layer {
  494. name: "upscore"
  495. type: "Deconvolution"
  496. bottom: "score_fr"
  497. top: "upscore"
  498. param {
  499. lr_mult: 0
  500. }
  501. convolution_param {
  502. num_output: 21
  503. bias_term: false
  504. kernel_size: 64
  505. stride: 32
  506. }
  507. }
  508. layer {
  509. name: "score"
  510. type: "Crop"
  511. bottom: "upscore"
  512. bottom: "data"
  513. top: "score"
  514. crop_param {
  515. axis: 2
  516. offset: 19
  517. }
  518. }

从上表两个网络的配置文件中可以看出,FCN32s的网络结构和VGG16的不部分是相同的,特别是前面的卷积层(均用RuLe激活函数)、池化层(均用最大池化),这些层的卷积核大小、步长和填充数(除了第一个卷积层的填充数不一样)都是一样的,不同的地方在于后面的fc6和fc7层,VGG16的这两层是全连接层,FCN32s的是卷积层,还有一点不同是FCN32s去掉了VGG16的fc8层。

但实际上网络卷积层的参数与卷积核的步长和填充数是没有关系的,只与卷积核的大小,以及该层的输入通道数和输出通道数(输出特征图的数目)有关,且最大池化层无需训练参数。所以可以采用直接将VGG16训练好的模型参数初始化FCN32s网络各层的权重和偏置,但由于VGG16的fc6和fc7是全连接层,其参数需要经过一定的排列才能赋值给FCN32s的fc6和fc7层(FCN32s这两层是卷积层)。由此需要surgery.py来实现VGG16参数赋值给FCN32s网络。

surgery.py的源代码如下:


   
     
     
     
     
  1. from __future__ import division
  2. import caffe
  3. import numpy as np
  4. def transplant(new_net, net, suffix=''):
  5. """
  6. Transfer weights by copying matching parameters, coercing parameters of
  7. incompatible shape, and dropping unmatched parameters.
  8. The coercion is useful to convert fully connected layers to their
  9. equivalent convolutional layers, since the weights are the same and only
  10. the shapes are different. In particular, equivalent fully connected and
  11. convolution layers have shapes O x I and O x I x H x W respectively for O
  12. outputs channels, I input channels, H kernel height, and W kernel width.
  13. Both `net` to `new_net` arguments must be instantiated `caffe.Net`s.
  14. """
  15. for p in net.params:
  16. p_new = p + suffix
  17. if p_new not in new_net.params:
  18. print 'dropping', p
  19. continue
  20. for i in range(len(net.params[p])):
  21. if i > (len(new_net.params[p_new]) - 1):
  22. print 'dropping', p, i
  23. break
  24. if net.params[p][i].data.shape != new_net.params[p_new][i].data.shape:
  25. print 'coercing', p, i, 'from', net.params[p][i].data.shape, 'to', new_net.params[p_new][i].data.shape
  26. else:
  27. print 'copying', p, ' -> ', p_new, i
  28. new_net.params[p_new][i].data.flat = net.params[p][i].data.flat
  29. def upsample_filt(size):
  30. """
  31. Make a 2D bilinear kernel suitable for upsampling of the given (h, w) size.
  32. """
  33. factor = (size + 1) // 2
  34. if size % 2 == 1:
  35. center = factor - 1
  36. else:
  37. center = factor - 0.5
  38. og = np.ogrid[:size, :size]
  39. return ( 1 - abs(og[ 0] - center) / factor) * \
  40. ( 1 - abs(og[ 1] - center) / factor)
  41. def interp(net, layers):
  42. """
  43. Set weights of each layer in layers to bilinear kernels for interpolation.
  44. """
  45. for l in layers:
  46. m, k, h, w = net.params[l][ 0].data.shape
  47. if m != k and k != 1:
  48. print 'input + output channels need to be the same or |output| == 1'
  49. raise
  50. if h != w:
  51. print 'filters need to be square'
  52. raise
  53. filt = upsample_filt(h)
  54. net.params[l][ 0].data[range(m), range(k), :, :] = filt
  55. def expand_score(new_net, new_layer, net, layer):
  56. """
  57. Transplant an old score layer's parameters, with k < k' classes, into a new
  58. score layer with k classes s.t. the first k' are the old classes.
  59. """
  60. old_cl = net.params[layer][ 0].num
  61. new_net.params[new_layer][ 0].data[:old_cl][...] = net.params[layer][ 0].data
  62. new_net.params[new_layer][ 1].data[ 0, 0, 0,:old_cl][...] = net.params[layer][ 1].data

源码解读如下:

(1)transplant()函数


   
     
     
     
     
  1. def transplant(new_net, net, suffix=''):
  2.     """
  3.     通过复制匹配的参数来传递权重,强制网络形状不兼容的参数以及丢弃不匹配的参数。
  4.     强制将全连接层转换为等效的卷积层是有用的,因为权重是相同的,只是网络的形状不同。特别地,等效的全连接层和卷积层分别具有形状O x I和O x I x H x W,其中O为输出通道数,I输入通道数,H为卷积核高度和W为卷积核宽度。
  5.     `net`到`new_net`参数都必须实例化`caffe.Net`s。
  6.     
  7.     Transfer weights by copying matching parameters, coercing parameters of
  8.     incompatible shape, and dropping unmatched parameters.
  9.     The coercion is useful to convert fully connected layers to their
  10.     equivalent convolutional layers, since the weights are the same and only
  11.     the shapes are different.  In particular, equivalent fully connected and
  12.     convolution layers have shapes O x I and O x I x H x W respectively for O
  13.     outputs channels, I input channels, H kernel height, and W kernel width.
  14.     Both  `net` to `new_net` arguments must be instantiated `caffe.Net`s.
  15.     """
  16.     for p in net.params:    #循环取出旧网络(例如VGG16网络)中每层的名字(用于两个网络间的匹配)
  17.         p_new = p + suffix  #将p+''赋值给p_new,目的是判断当前旧网络的该层所否也存在于新网络中
  18.         if p_new not in new_net.params:    #如果p所对应的层在新网络中没有,则直接丢弃
  19.             print 'dropping', p
  20.             continue
  21.         #net.params[p]一般有两项,且net.params[p][0]表示保存权重参数的数组,net.params[p][1]表示保存偏置的数组
  22.         for i in range(len(net.params[p])):  
  23.             if i > (len(new_net.params[p_new]) - 1): #丢弃旧网络多余的参数,并退出当前for循环
  24.                 print 'dropping', p, i
  25.                 break
  26.             #如果新网络该层的参数形状与旧网络不同(参数的数目相同),则打印“强制转换”该层;否则打印“复制”该层参数
  27.             if net.params[p][i].data.shape != new_net.params[p_new][i].data.shape:
  28.                 print 'coercing', p, i, 'from', net.params[p][i].data.shape, 'to', new_net.params[p_new][i].data.shape  #对于全连接层转换为全卷积层,需要进行参数强制转换
  29.             else:
  30.                 print 'copying', p, ' -> ', p_new, i
  31.             #无论是强制转换还是复制操作,因为参数数目相同,所以可以直接平铺进行参数的赋值
  32.             new_net.params[p_new][i].data.flat = net.params[p][i].data.flat

此函数的最后一行new_net.params[p_new][i].data.flat = net.params[p][i].data.flat的理解可参见以下例子:


   
     
     
     
     
  1. import numpy as np
  2. a = np.array([[[ 1, 2, 3],[ 4, 5, 6]],[[ 7, 8, 9],[ 10, 11, 12]],[[ 13, 14, 15],[ 16, 17, 18]]])
  3. b=np.zeros([ 6, 3])
  4. b.flat=a.flat
  5. print b

运行结果为:


   
     
     
     
     
  1. [[ 1. 2. 3.]
  2. [ 4. 5. 6.]
  3. [ 7. 8. 9.]
  4. [ 10. 11. 12.]
  5. [ 13. 14. 15.]
  6. [ 16. 17. 18.]]
也即python中的flat()函数从a[0][0][0]开始到a[3][2][3]依次平铺展开成一行赋值给相应平铺展开的b,但平铺不会影响原来a和b的形状。

利用此函数将VGG16模型的参数复制给FCN32s网络结果如下:


   
     
     
     
     
  1. copying conv1_1 -> conv1_1 0
  2. copying conv1_1 -> conv1_1 1
  3. copying conv1_2 -> conv1_2 0
  4. copying conv1_2 -> conv1_2 1
  5. copying conv2_1 -> conv2_1 0
  6. copying conv2_1 -> conv2_1 1
  7. copying conv2_2 -> conv2_2 0
  8. copying conv2_2 -> conv2_2 1
  9. copying conv3_1 -> conv3_1 0
  10. copying conv3_1 -> conv3_1 1
  11. copying conv3_2 -> conv3_2 0
  12. copying conv3_2 -> conv3_2 1
  13. copying conv3_3 -> conv3_3 0
  14. copying conv3_3 -> conv3_3 1
  15. copying conv4_1 -> conv4_1 0
  16. copying conv4_1 -> conv4_1 1
  17. copying conv4_2 -> conv4_2 0
  18. copying conv4_2 -> conv4_2 1
  19. copying conv4_3 -> conv4_3 0
  20. copying conv4_3 -> conv4_3 1
  21. copying conv5_1 -> conv5_1 0
  22. copying conv5_1 -> conv5_1 1
  23. copying conv5_2 -> conv5_2 0
  24. copying conv5_2 -> conv5_2 1
  25. copying conv5_3 -> conv5_3 0
  26. copying conv5_3 -> conv5_3 1
  27. coercing fc6 0 from ( 4096, 25088) to ( 4096, 512, 7, 7)
  28. copying fc6 -> fc6 1
  29. coercing fc7 0 from ( 4096, 4096) to ( 4096, 4096, 1, 1)
  30. copying fc7 -> fc7 1
  31. dropping fc8

从中可以看出conv5_3之前的所有卷积层的权重和偏置参数是直接复制VGG16模型相对应层的参数的,而fc6和fc7层是进行了强制转化,将原来全连接层的权重参数强制赋值给了卷积层的卷积核参数,fc6和fc7的偏置仍旧采用直接复制的形式(其实强制转化和复制都是通过python的flat函数来完成的,本质是一样的,只是概念上加以区分而已)。

也可以从中看出为何FCN32s的fc6层的卷积核大小是7*7,为何fc7层卷积核的大小为1*1,目的就是为了使得强制转化前后参数的数目不变。

coercing fc6 0 from (4096, 25088) to (4096, 512, 7, 7)
   
     
     
     
     

其中,512*7*7=25088。

(2)upsample_filt()函数


   
     
     
     
     
  1. #用于产生双线性核,来初始化反卷积层的卷积核权重参数
  2. def upsample_filt(size):
  3. """
  4. Make a 2D bilinear kernel suitable for upsampling of the given (h, w) size.
  5. """
  6. factor = (size + 1) // 2 #//在python中是除号,但结果取整(向下取整)
  7. if size % 2 == 1:
  8. center = factor - 1
  9. else:
  10. center = factor - 0.5 #center为插值中心点
  11. og = np.ogrid[:size, :size] #ogrid用于产生从0~(size-1)的两个序列(前者为列向量,后者为行向量)
  12. #返回一个size×size大小的双线性核(除以factor是为了归一化,使得四个插值点的插值权重之和为1)
  13. return ( 1 - abs(og[ 0] - center) / factor) * \
  14. ( 1 - abs(og[ 1] - center) / factor)

拿size=4为例,该函数的运行结果如下:


   
     
     
     
     
  1. [[ 0.0625 0.1875 0.1875 0.0625]
  2. [ 0.1875 0.5625 0.5625 0.1875]
  3. [ 0.1875 0.5625 0.5625 0.1875]
  4. [ 0.0625 0.1875 0.1875 0.0625]]

双线性核是由双线性插值而来,但也不完全是双线性插值,在说明原理前,先放一张卷积及其反卷积的原理动图(上者为卷积过程,下者为对应的反卷积过程):

(注:双线性插值可参见https://blog.csdn.net/xbinworld/article/details/65660665)

FCN源码解读之surgery.py_第1张图片

FCN源码解读之surgery.py_第2张图片

从反卷积动图中可以看出,在反卷积前需要在输入图像(蓝色)的每一像素间进行补零,且补的零的维数是s-1(s为反卷积所对应的卷积过程的步长,即上者卷积动图中的步长为2,故下者反卷积过程每一像素间补了1维的0),且反卷积过程的步长是固定为1的。具体大家可以参见https://github.com/vdumoulin/conv_arithmetic

FCN中的upsample是通过反卷积实现的,假如上采样因子为2,即反卷积的步长为s=2(这个步长其实指的是反卷积所对应的逆过程即卷积过程中的步长,反卷积的步长固定为1),卷积核大小k=4。卷积核的权重初始化利用upsample_filt(k)得到,也即上述例子中的结果。大家可能会有疑惑,双线性插值是通过邻近的四个点进行插值的,但这卷积核有k*k个元素,即总共16个点进行插值(说白了就是16个点的加权求和),这也就是为何一开始要说明反卷积的实现过程了,从动图中可以看出输入图像每一像素间是有补零的,拿输入为3*3为例,其反卷积前需要进行补零,也即下图:

FCN源码解读之surgery.py_第3张图片

此时反卷积过程如下(蓝色的为原3*3的输入图像像素点,白色的为补零的点,灰色的为卷积核):

FCN源码解读之surgery.py_第4张图片

从上述反卷积过程中可以看出,卷积核内部最多只有输入图像的四个像素点(且这四个像素点所对应的卷积核所在位置的四个权重之和为1,也就对应了代码里的归一化操作),这也正是为何说双线性核是由双线性插值而来的,但又不完全是,原因在于其中的归一化操作,如下图所示:

FCN源码解读之surgery.py_第5张图片

其中,Center的坐标值即为upsample_filt()函数中的center;图中每个格中的白色数字即为每一格的坐标值,拿第二个格为例,其坐标为(0,1),则该位置所对应的权重为:

先从x轴方向开始:a=1-abs(1-center)/factor=1-abs(1-1.5)/2=0.75

再是y轴方向:b=1-abs(0-center)/factor=1-abs(0-1.5)/2=0.25

故最终的权重为:w=a*b=0.1875(也正是例子中第二个格所对应的权重)

此过程也即双线性插值过程,只是多了除以factor这一归一化操作,也就不是完全意义上的双线性插值了。

最后拿自己编写的C++程序来直观说明一下这个双线性核进行插值的效果,源代码如下:


   
     
     
     
     
  1. #include
  2. #include
  3. #include
  4. using namespace std;
  5. using namespace cv;
  6. Mat kernel(int size)
  7. {
  8. int factor = (size + 1) / 2;
  9. double center = factor - 0.5;
  10. if (size % 2 == 1)
  11. center = factor - 1;
  12. Mat tempk = Mat::zeros(size, 1,CV_32F);
  13. for ( int i = 0; i < size; i++)
  14. tempk.at< float>(i, 0) = 1 - abs(i - center)* 1.0 / factor;
  15. Mat ker = Mat::zeros(size, size, CV_32F);
  16. ker = tempk*tempk.t();
  17. return ker;
  18. }
  19. int main()
  20. {
  21. Mat src=imread( "image.jpg");
  22. int col = src.cols,row=src.rows;
  23. int k = 8, s = 4;
  24. int outh = (row - 1)*s + 4;
  25. int outw = (col - 1)*s + 4;
  26. Mat result = Mat::zeros(outh, outw, CV_8UC3);
  27. outh = outh - 1 + k;
  28. outw = outw - 1 + k;
  29. Mat dst = Mat::zeros(outh, outw, CV_8UC3);
  30. for ( int i = 1; i < outh; i += s)
  31. {
  32. Vec3b* data1 = dst.ptr(i);
  33. Vec3b* data2 = src.ptr(i/s);
  34. for ( int j = 1; j < outw; j += s)
  35. data1[j] = data2[j/s];
  36. }
  37. imwrite( "卷积补零图.jpg", dst);
  38. Mat mask = kernel(k); //调用kernel()函数生成双线性核
  39. for ( int i = 0; i < dst.rows-k+ 1; i++)
  40. {
  41. Vec3b* data3 = result.ptr(i);
  42. for ( int j = 0; j < dst.cols-k+ 1; j++)
  43. {
  44. for ( int s = 0; s < k; s++)
  45. for ( int t = 0; t < k; t++)
  46. {
  47. data3[j]( 0) += mask.at< float>(s, t)*dst.at(i + s, j + t)[ 0];
  48. data3[j]( 1) += mask.at< float>(s, t)*dst.at(i + s, j + t)[ 1];
  49. data3[j]( 2) += mask.at< float>(s, t)*dst.at(i + s, j + t)[ 2];
  50. }
  51. }
  52. }
  53. imwrite( "最终插值图.jpg", result);
  54. waitKey();
  55. return 0;
  56. }

当反卷积核大小为k=8,步长为s=4时,结果如下(左上为原图,右上为卷积补零图(为了和原图放在同一行,进行了缩放,实际大小为1343*2007),下中为插值后的图像(也进行了缩放,实际大小为1336*2000)):

FCN源码解读之surgery.py_第6张图片 FCN源码解读之surgery.py_第7张图片

FCN源码解读之surgery.py_第8张图片

从上图可以看出,在放大倍数不大的情况下,使用双线性核进行插值得到的效果还是挺好的。

(3)interp()函数


   
     
     
     
     
  1. #调用upsample_filt()函数进行反卷积核的权重赋值(初始化)
  2. def interp(net, layers):
  3. """
  4. Set weights of each layer in layers to bilinear kernels for interpolation.
  5. """
  6. for l in layers:
  7. m, k, h, w = net.params[l][ 0].data.shape #net.params[l][0]为卷积核权重(不是偏置)
  8. if m != k and k != 1: #反卷积层但输入和输出图像数目应该是相同的,或者输出图像数目为1
  9. print 'input + output channels need to be the same or |output| == 1'
  10. raise
  11. if h != w: #卷积核需要是方形的
  12. print 'filters need to be square'
  13. raise
  14. filt = upsample_filt(h) #计算卷积核权重参数
  15. #将filt赋值给每一卷积核,即反卷积层但每一卷积核但权重都一样,均为双线性插值初始化
  16. net.params[l][ 0].data[range(m), range(k), :, :] = filt

(4)expand_score()函数


   
     
     
     
     
  1. #拓展score层,即将旧网络该层的参数移植到新网络上(注:新网络该层但分类数k<旧网络该层但分类数k')
  2. def expand_score(new_net, new_layer, net, layer):
  3. """
  4. Transplant an old score layer's parameters, with k < k' classes, into a new
  5. score layer with k classes s.t. the first k' are the old classes.
  6. """
  7. old_cl = net.params[layer][ 0].num

你可能感兴趣的:(Deep,Learning,FCN,caffe)