PyTorch模型转caffe

PyTorch模型转caffe简单教程

    • 1.将Pytorch 模型参数名和对应权重保留,存成字典,存入npy文件
    • 2. 建立caffe的prototxt文件。对应pytorch的网络结构,参数名字要有对应规律,可以使用python接口写,然后自动生成。
      • 1.手写prototxt文件,根据pytorch的模型。
      • 2.采用python接口自动生成prototxt文件
      • 2,自己手写,注意网络名字,采用和pytorch形式匹配的形式
    • 3. 建立caffemodel文件,根据参数映射,将pytorch参数权重赋予caffe
    • 4.推测。对caffemodel进行测试,看看能不能得到对应结果
    • 完整代码如下

1.将Pytorch 模型参数名和对应权重保留,存成字典,存入npy文件

训练好的模型文件参数权重可以保存在‘.pth’文件中,从该文件读取参数权重的数据:

premodel='xxxx.pth'
param_dict={}
pretrained_dict=torch.load(premodel,map_location='cpu')
if 'state_dict'==pretrained_dict['state_dict']:	
	for layer,value in pretrained_dict.items():
		layer=str(layer)
		param_dict[layer]=value.detach()
else:
	pass
	
np.save('xxx.npy',param_dict)

2. 建立caffe的prototxt文件。对应pytorch的网络结构,参数名字要有对应规律,可以使用python接口写,然后自动生成。

1.手写prototxt文件,根据pytorch的模型。

推荐caffe模型可视化软件Netron,可以可视化进行改进。了解caffe的基本层和格式的基本形式。

2.采用python接口自动生成prototxt文件

示例:

import caffe 
from pylab import *
import caffe.layers as L
import caffe.params as P
def net():
   n=caffe.NetSpec()
   n.data,n.label=L.Data(source=dbfile,backen=xxx.LMDB, batch_size=batch_size, ntop=2, transform_param=dict(scale=0.00390625))#数据层 
   n.ip1=L.InnerProduct(n.data,num_output=500,weight_file=dict(type='xavier'))#全连接层 ip1是层的name
   n.relu1=L.ReLU(n.ip1,in_place=True)
   n.ip2=L.InnerProduct(n.relu1, num_output=10, weight_filler=dict(type='xavier'))
   n.loss= L.SoftmaxWithLoss(n.ip2, n.label)
   n.accu= L.Accuracy(n.ip2, n.label, include={'phase':caffe.TEST})
   return n.to_proto()
with open( 'auto_train00.prototxt', 'w') as f:
    f.write(str(net( '/home/hbk/caffe/examples/mnist/mnist_train_lmdb', 64)))
with open('auto_test00.prototxt', 'w') as f:
    f.write(str(net('/home/hbk/caffe/examples/mnist/mnist_test_lmdb', 100)))
#进行训练的solver 训练参数的填写
solver=caffe.SGDSovlver('hbk_mnist_solver_py.prototxt')
solver.test_nets[0].forward()

solver.step(1)
solver.solve()
   

solver.prototxt的代码示例。具体的参数说明可以自行搜索

# The train/test net 文件路径
train_net: "auto_train00.prototxt"
test_net: "auto_test00.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100

# 训练迭代多少次执行一次Test验证
test_interval: 500

# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005

# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75

# 多少次迭代输出一次信息
display: 100
# The maximum number of iterations
max_iter: 10001
# 存储中间结果
snapshot: 5000
snapshot_prefix: "snapshot"

# solver mode: CPU or GPU
solver_mode: GPU

2,自己手写,注意网络名字,采用和pytorch形式匹配的形式

以数据层为例,和上面的Python代码相对应。

#训练数据
layer {
  name: "mnist"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TRAIN
  }
  transform_param {
    scale: 0.00390625
  }
  data_param {
    source: "/home/hbk/caffe/examples/mnist/mnist_train_lmdb"
    batch_size: 64
    backend: LMDB
  }
}
#测试数据
layer {
  name: "mnist"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TEST
  }
  transform_param {
    scale: 0.00390625
  }
  data_param {
    source: "/home/hbk/caffe/examples/mnist/mnist_test_lmdb"
    batch_size: 100
    backend: LMDB
  }
}

3. 建立caffemodel文件,根据参数映射,将pytorch参数权重赋予caffe

以inceptionv2网络为例,代码如下

#加载已经训练好的pytoch模型的参数对应权重
weights=np.load('.npy')
#加载caffe网络框架
net_file='.prototxt'
caffemodel_file='.caffemodel'
net=caffe.Net(net_file,caffe.TEST)
#首先简历两个框架上模型参数名的对应字典

param_dict={}
for key in  net.params.keys():
	layer=key.replace('/','.')
	#将caffe上的模型改成pytoch相对应的,然后利用参数权重字典获得赋值
	if len(layer.split('_'))>2 and 'conv' in layer:
		layer=layer.repalce
	...
	layer_t2=layer+'wight'
	#将weight 权重值付给layer
	if layer_t2 in weights:
		param_dict[layer_t2]=key
		net.params[key].data[...]=weights[layer_t2].detach().reshape(net.params[key][1].data.shape)
		.... bias  running_mean  running_var
#检查是否有遗漏
for key in weights:
	if key not in param_dict.values() and 'tracked' not in key:
		print('miss',key)
for key in net.params.keys():
	if key not in param_dict.values():
		print('miss',key)
for i in param_dict.keys():
	print("caffe name",i)
for j in param_dict.values():
	print("caffe name:",j)
net.save(caffemodel_file)

4.推测。对caffemodel进行测试,看看能不能得到对应结果

加载caffe模型,在相同的数据集进行测试,查看性能,保证两者差异不大。
注意:
1,caffe的标准化计算是采用 (x-mean)*scale的方式,注意pytorch在训练时保持一致,,
2,caffe默认使用的opencv读取图像,为BGR通道,建议pytorch保持同步。

完整代码如下

1.保存pytorch参数

import torch
import numpy as np
import sys
import os
import os.path as osp
cur_dir = osp.split(osp.realpath(__file__))[0]
src_dir = osp.join(cur_dir, '../')
if src_dir not in sys.path:
    sys.path.append(src_dir)
from collections import OrderedDict
parm_dict={}
def main():
    premodel =
r'/data1/yaoyixuan/model_analysis/video/4L_32F_Att5/26pth/checkpoint_E26.pth'
    pretrained_dict =
torch.load(premodel, map_location='cpu')
    f =open('/data1/yaoyixuan/caffe_test/pytorch_layer_name.txt', 'w+')   
    if 'state_dict' in pretrained_dict:
        pretrained_dict =pretrained_dict['state_dict']
        for layer, value in pretrained_dict.items():
            layer = str(layer)
            # print(layer)
            temp_name = str(layer)
            if 'module.inception_' in temp_name:
		temp_name.replace('module.inception_', 'conv')
                print(temp_name)
            f.write(str(temp_name)),f.write('\n')	
           parm_dict[layer] =value.detach()
    else:
        pass
    f.close()
    np.save('/data1/yaoyixuan/caffe_test/inception_maxpooling_len8_pad3_nobn_all3d.npy',parm_dict)
    print(parm_dict.keys())

if __name__ == '__main__':

    main()

2,加载进caffe模型中



import sys, os

import numpy as np

from PIL import Image

caffe_root ='/data1/darwincaffe2.0/'

sys.path.insert(0,caffe_root+'python')

import caffe

os.chdir(caffe_root)
caffe.set_mode_gpu()

caffe.set_device(0)

parm_dict={}

net_file = '/data1/caffe_test/deploy_4L32FAtt5.prototxt'

caffemodel_file = '/data1/caffe_test/caffe_E2.caffemodel'

net = caffe.Net(net_file, caffe.TEST)

f= open('/data1/caffe_test/caffe_layer_name_E2.txt', 'w+')

for param_name in net.params.keys():

    print('param_name', param_name)

    f.write(param_name),
f.write('\n')

f.close()
for param_name in net.params.keys():

    if len(net.params[param_name])==2 and'fc' not in param_name:

        print('param_name ll',param_name)

        key1 =param_name+'.conv.weight'

        # print("key1",key1)

        parm_dict[key1]=net.params[param_name][0].data#weight

        key2 =param_name+'.conv.bias'

        # print('key2', key2)

        parm_dict[key2] =net.params[param_name][1].data#bias

    #

    elif len(net.params[param_name])==4:

        print('param_name ==4',param_name)

        key1 =param_name.split('/')[0]+'.bn.weight'

        #print("key1",key1)

        parm_dict[key1]=net.params[param_name][0].data.reshape(net.params[param_name][0].data.shape[1])

        key2 =param_name.split('/')[0]+'.bn.bias'
        #print("key2",key2)

        parm_dict[key2]=net.params[param_name][1].data.reshape(net.params[param_name][1].data.shape[1])

        key3 =param_name.split('/')[0]+'.bn.running_mean'

        #print("key3",key3)

        parm_dict[key3]=net.params[param_name][2].data.reshape(net.params[param_name][2].data.shape[1])

        key4 =param_name.split('/')[0]+'.bn.running_var'

        #print("key4",key4)

        parm_dict[key4]=net.params[param_name][3].data.reshape(net.params[param_name][3].data.shape[1])

    elif len(net.params[param_name])==2and 'fc' in param_name:

        print('param_name ==fc',param_name)

        key1 = param_name+'.weight'

        # print("key1",key1)

        parm_dict[key1]=net.params[param_name][0].data

        key2 = param_name+'.bias'

        # print("key2",key2)

        parm_dict[key2]=net.params[param_name][1].data
    else:
 
print("else",param_name)

weights =np.load('/data1/caffe_test/inception_E2.npy')#inception_maxpooling_len8_pad3_nobn_all3d_4L32FAtt5.npy')

weights=weights.tolist()

# print("weight ",weights)

param_dict = {}

#

for key in net.params.keys():

    # print(key)

    #caffe name=> pytorch_name

    layer = key.replace('/','.' )

    if len(layer.split('_')) > 2 and'conv' in layer:

        layer = layer.replace('conv','module.inception_')

        layer = layer.replace('_t','.t')

        layer = layer.replace('_s','.s')
    if 'fc' in layer:

        layer = layer.replace('fc','module.fc')

    if 'attention' in layer:

        layer = 'module.' +layer.replace('on_', 'on.')

        if 'mask' in layer:

            layer = layer.replace('mask','conv_mask')

    if 'module' not in layer and layer =='conv1':

        layer = 'module.conv1'

    if 'module' not in layer:

        layer = 'module.' + layer

    if 'fc_spatial' in layer:

        layer ='module.attention.fc_spatial'

    print("changename",layer)



    layer_t1 = layer + '.conv.weight'

    if layer_t1 in weights:#find theconve layer

        param_dict[layer_t1] = key

    #    
'''

    #    if layer_t1 == 'module.conv1.conv.weight':

    #         net.params[key][0].data[:,0,:,:,:] =
weights[layer_t1].detach()[:,2,:,:,:]

    #         net.params[key][0].data[:, 1, :, :, :]
= weights[layer_t1].detach()[:, 1, :, :, :]

    #         net.params[key][0].data[:, 2, :, :, :]
= weights[layer_t1].detach()[:, 0, :, :, :]

    #    
else:

    #         #print('size',
net.params[key][0].data.shape, weights[layer_t1].detach().shape)

    #    
'''

        net.params[key][0].data[...]=weights[layer_t1].detach().view(net.params[key][0].data.shape)

#
    layer_t2 = layer + '.weight'

    if layer_t2 in weights:

        #print('in2')

        param_dict[layer_t2] = key

        net.params[key][0].data[...] =weights[layer_t2].detach().view(net.params[key][0].data.shape)
    layer_t3 = layer + '.bias'

    if layer_t3 in weights:

        #print('in3')

        param_dict[layer_t3] = key

        net.params[key][1].data[...] =weights[layer_t3].detach().reshape(net.params[key][1].data.shape)
    layer_t4 = layer +'.running_mean'
    if layer_t4 in weights:
        #print('in4')
        param_dict[layer_t4] = key
        net.params[key][2].data[...] =
weights[layer_t4].detach().reshape(net.params[key][2].data.shape)
    layer_t5 = layer +'.running_var'

    if layer_t5 in weights:

        #print('in5')

        param_dict[layer_t5] = key

        net.params[key][3].data[...] =
weights[layer_t5].detach().reshape(net.params[key][3].data.shape)

for key in weights:

    if key not in param_dict.keys() and 'tracked' not in key:

        print('miss', key)
for key in net.params.keys():

    if key not in param_dict.values():

        print('miss', key)

#

for i in param_dict.keys():

    print("pytorchname",i)

for i in param_dict.values():

    print("caffe name",i)
net.save(caffemodel_file)


你可能感兴趣的:(pytorch,python,入门,python,人工智能,深度学习,PyTorch入门)