训练好的模型文件参数权重可以保存在‘.pth’文件中,从该文件读取参数权重的数据:
premodel='xxxx.pth'
param_dict={}
pretrained_dict=torch.load(premodel,map_location='cpu')
if 'state_dict'==pretrained_dict['state_dict']:
for layer,value in pretrained_dict.items():
layer=str(layer)
param_dict[layer]=value.detach()
else:
pass
np.save('xxx.npy',param_dict)
推荐caffe模型可视化软件Netron,可以可视化进行改进。了解caffe的基本层和格式的基本形式。
示例:
import caffe
from pylab import *
import caffe.layers as L
import caffe.params as P
def net():
n=caffe.NetSpec()
n.data,n.label=L.Data(source=dbfile,backen=xxx.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(scale=0.00390625))#数据层
n.ip1=L.InnerProduct(n.data,num_output=500,weight_file=dict(type='xavier'))#全连接层 ip1是层的name
n.relu1=L.ReLU(n.ip1,in_place=True)
n.ip2=L.InnerProduct(n.relu1, num_output=10, weight_filler=dict(type='xavier'))
n.loss= L.SoftmaxWithLoss(n.ip2, n.label)
n.accu= L.Accuracy(n.ip2, n.label, include={'phase':caffe.TEST})
return n.to_proto()
with open( 'auto_train00.prototxt', 'w') as f:
f.write(str(net( '/home/hbk/caffe/examples/mnist/mnist_train_lmdb', 64)))
with open('auto_test00.prototxt', 'w') as f:
f.write(str(net('/home/hbk/caffe/examples/mnist/mnist_test_lmdb', 100)))
#进行训练的solver 训练参数的填写
solver=caffe.SGDSovlver('hbk_mnist_solver_py.prototxt')
solver.test_nets[0].forward()
solver.step(1)
solver.solve()
solver.prototxt的代码示例。具体的参数说明可以自行搜索
# The train/test net 文件路径
train_net: "auto_train00.prototxt"
test_net: "auto_test00.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100
# 训练迭代多少次执行一次Test验证
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# 多少次迭代输出一次信息
display: 100
# The maximum number of iterations
max_iter: 10001
# 存储中间结果
snapshot: 5000
snapshot_prefix: "snapshot"
# solver mode: CPU or GPU
solver_mode: GPU
以数据层为例,和上面的Python代码相对应。
#训练数据
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
scale: 0.00390625
}
data_param {
source: "/home/hbk/caffe/examples/mnist/mnist_train_lmdb"
batch_size: 64
backend: LMDB
}
}
#测试数据
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
scale: 0.00390625
}
data_param {
source: "/home/hbk/caffe/examples/mnist/mnist_test_lmdb"
batch_size: 100
backend: LMDB
}
}
以inceptionv2网络为例,代码如下
#加载已经训练好的pytoch模型的参数对应权重
weights=np.load('.npy')
#加载caffe网络框架
net_file='.prototxt'
caffemodel_file='.caffemodel'
net=caffe.Net(net_file,caffe.TEST)
#首先简历两个框架上模型参数名的对应字典
param_dict={}
for key in net.params.keys():
layer=key.replace('/','.')
#将caffe上的模型改成pytoch相对应的,然后利用参数权重字典获得赋值
if len(layer.split('_'))>2 and 'conv' in layer:
layer=layer.repalce
...
layer_t2=layer+'wight'
#将weight 权重值付给layer
if layer_t2 in weights:
param_dict[layer_t2]=key
net.params[key].data[...]=weights[layer_t2].detach().reshape(net.params[key][1].data.shape)
.... bias running_mean running_var
#检查是否有遗漏
for key in weights:
if key not in param_dict.values() and 'tracked' not in key:
print('miss',key)
for key in net.params.keys():
if key not in param_dict.values():
print('miss',key)
for i in param_dict.keys():
print("caffe name",i)
for j in param_dict.values():
print("caffe name:",j)
net.save(caffemodel_file)
加载caffe模型,在相同的数据集进行测试,查看性能,保证两者差异不大。
注意:
1,caffe的标准化计算是采用 (x-mean)*scale的方式,注意pytorch在训练时保持一致,,
2,caffe默认使用的opencv读取图像,为BGR通道,建议pytorch保持同步。
1.保存pytorch参数
import torch
import numpy as np
import sys
import os
import os.path as osp
cur_dir = osp.split(osp.realpath(__file__))[0]
src_dir = osp.join(cur_dir, '../')
if src_dir not in sys.path:
sys.path.append(src_dir)
from collections import OrderedDict
parm_dict={}
def main():
premodel =
r'/data1/yaoyixuan/model_analysis/video/4L_32F_Att5/26pth/checkpoint_E26.pth'
pretrained_dict =
torch.load(premodel, map_location='cpu')
f =open('/data1/yaoyixuan/caffe_test/pytorch_layer_name.txt', 'w+')
if 'state_dict' in pretrained_dict:
pretrained_dict =pretrained_dict['state_dict']
for layer, value in pretrained_dict.items():
layer = str(layer)
# print(layer)
temp_name = str(layer)
if 'module.inception_' in temp_name:
temp_name.replace('module.inception_', 'conv')
print(temp_name)
f.write(str(temp_name)),f.write('\n')
parm_dict[layer] =value.detach()
else:
pass
f.close()
np.save('/data1/yaoyixuan/caffe_test/inception_maxpooling_len8_pad3_nobn_all3d.npy',parm_dict)
print(parm_dict.keys())
if __name__ == '__main__':
main()
2,加载进caffe模型中
import sys, os
import numpy as np
from PIL import Image
caffe_root ='/data1/darwincaffe2.0/'
sys.path.insert(0,caffe_root+'python')
import caffe
os.chdir(caffe_root)
caffe.set_mode_gpu()
caffe.set_device(0)
parm_dict={}
net_file = '/data1/caffe_test/deploy_4L32FAtt5.prototxt'
caffemodel_file = '/data1/caffe_test/caffe_E2.caffemodel'
net = caffe.Net(net_file, caffe.TEST)
f= open('/data1/caffe_test/caffe_layer_name_E2.txt', 'w+')
for param_name in net.params.keys():
print('param_name', param_name)
f.write(param_name),
f.write('\n')
f.close()
for param_name in net.params.keys():
if len(net.params[param_name])==2 and'fc' not in param_name:
print('param_name ll',param_name)
key1 =param_name+'.conv.weight'
# print("key1",key1)
parm_dict[key1]=net.params[param_name][0].data#weight
key2 =param_name+'.conv.bias'
# print('key2', key2)
parm_dict[key2] =net.params[param_name][1].data#bias
#
elif len(net.params[param_name])==4:
print('param_name ==4',param_name)
key1 =param_name.split('/')[0]+'.bn.weight'
#print("key1",key1)
parm_dict[key1]=net.params[param_name][0].data.reshape(net.params[param_name][0].data.shape[1])
key2 =param_name.split('/')[0]+'.bn.bias'
#print("key2",key2)
parm_dict[key2]=net.params[param_name][1].data.reshape(net.params[param_name][1].data.shape[1])
key3 =param_name.split('/')[0]+'.bn.running_mean'
#print("key3",key3)
parm_dict[key3]=net.params[param_name][2].data.reshape(net.params[param_name][2].data.shape[1])
key4 =param_name.split('/')[0]+'.bn.running_var'
#print("key4",key4)
parm_dict[key4]=net.params[param_name][3].data.reshape(net.params[param_name][3].data.shape[1])
elif len(net.params[param_name])==2and 'fc' in param_name:
print('param_name ==fc',param_name)
key1 = param_name+'.weight'
# print("key1",key1)
parm_dict[key1]=net.params[param_name][0].data
key2 = param_name+'.bias'
# print("key2",key2)
parm_dict[key2]=net.params[param_name][1].data
else:
print("else",param_name)
weights =np.load('/data1/caffe_test/inception_E2.npy')#inception_maxpooling_len8_pad3_nobn_all3d_4L32FAtt5.npy')
weights=weights.tolist()
# print("weight ",weights)
param_dict = {}
#
for key in net.params.keys():
# print(key)
#caffe name=> pytorch_name
layer = key.replace('/','.' )
if len(layer.split('_')) > 2 and'conv' in layer:
layer = layer.replace('conv','module.inception_')
layer = layer.replace('_t','.t')
layer = layer.replace('_s','.s')
if 'fc' in layer:
layer = layer.replace('fc','module.fc')
if 'attention' in layer:
layer = 'module.' +layer.replace('on_', 'on.')
if 'mask' in layer:
layer = layer.replace('mask','conv_mask')
if 'module' not in layer and layer =='conv1':
layer = 'module.conv1'
if 'module' not in layer:
layer = 'module.' + layer
if 'fc_spatial' in layer:
layer ='module.attention.fc_spatial'
print("changename",layer)
layer_t1 = layer + '.conv.weight'
if layer_t1 in weights:#find theconve layer
param_dict[layer_t1] = key
#
'''
# if layer_t1 == 'module.conv1.conv.weight':
# net.params[key][0].data[:,0,:,:,:] =
weights[layer_t1].detach()[:,2,:,:,:]
# net.params[key][0].data[:, 1, :, :, :]
= weights[layer_t1].detach()[:, 1, :, :, :]
# net.params[key][0].data[:, 2, :, :, :]
= weights[layer_t1].detach()[:, 0, :, :, :]
#
else:
# #print('size',
net.params[key][0].data.shape, weights[layer_t1].detach().shape)
#
'''
net.params[key][0].data[...]=weights[layer_t1].detach().view(net.params[key][0].data.shape)
#
layer_t2 = layer + '.weight'
if layer_t2 in weights:
#print('in2')
param_dict[layer_t2] = key
net.params[key][0].data[...] =weights[layer_t2].detach().view(net.params[key][0].data.shape)
layer_t3 = layer + '.bias'
if layer_t3 in weights:
#print('in3')
param_dict[layer_t3] = key
net.params[key][1].data[...] =weights[layer_t3].detach().reshape(net.params[key][1].data.shape)
layer_t4 = layer +'.running_mean'
if layer_t4 in weights:
#print('in4')
param_dict[layer_t4] = key
net.params[key][2].data[...] =
weights[layer_t4].detach().reshape(net.params[key][2].data.shape)
layer_t5 = layer +'.running_var'
if layer_t5 in weights:
#print('in5')
param_dict[layer_t5] = key
net.params[key][3].data[...] =
weights[layer_t5].detach().reshape(net.params[key][3].data.shape)
for key in weights:
if key not in param_dict.keys() and 'tracked' not in key:
print('miss', key)
for key in net.params.keys():
if key not in param_dict.values():
print('miss', key)
#
for i in param_dict.keys():
print("pytorchname",i)
for i in param_dict.values():
print("caffe name",i)
net.save(caffemodel_file)