import torch
import torch.nn as nn
import numpy as np
import torchvision
import cv2
from collections import OrderedDict
#我们所用的解码网络
class Encoder(nn.Module):
def __init__(self, isize=256, nc=3, ndf=192, ndf_ch_up=1.5, ngpu=1, n_extra_layers=0, igs=4, add_final_conv=True):
super(Encoder, self).__init__()
self.ngpu = ngpu
assert isize % 16 == 0, "isize has to be a multiple of 16"
main = nn.Sequential()
# input is nc x isize x isize
main.add_module('initial-conv-{0}-{1}'.format(nc, ndf), nn.Conv2d(nc, ndf, 4, 2, 1, bias=False))
main.add_module('initial-relu-{0}'.format(ndf), nn.LeakyReLU(0.2, inplace=True))
csize, cndf = isize / 2, ndf
#print('ndf=', ndf)
#ndf_ch_up = 1.5
# Extra layers
for t in range(n_extra_layers):
main.add_module('extra-layers-{0}-{1}-conv'.format(t, cndf), nn.Conv2d(cndf, cndf, 3, 1, 1, bias=False))
main.add_module('extra-layers-{0}-{1}-batchnorm'.format(t, cndf), nn.BatchNorm2d(cndf))
main.add_module('extra-layers-{0}-{1}-relu'.format(t, cndf), nn.LeakyReLU(0.2, inplace=True))
# 不断添加 stride=2 的卷积层,直到feature map的 grid size < 4*4 为止,每个卷积层的 out-channel数都是in-channel数的2倍
# 此操作可以理解为 “通过添加卷积层来自动适配输入图像的长宽”
while csize > igs:
# print('csize=', csize)
in_feat = cndf
#out_feat = cndf * 2
out_feat = int(cndf * ndf_ch_up)
#out_feat = cndf
main.add_module('pyramid-{0}-{1}-conv'.format(in_feat, out_feat), nn.Conv2d(in_feat, out_feat, 4, 2, 1, bias=False))
main.add_module('pyramid-{0}-batchnorm'.format(out_feat), nn.BatchNorm2d(out_feat))
main.add_module('pyramid-{0}-relu'.format(out_feat), nn.LeakyReLU(0.2, inplace=True))
#cndf = cndf * 2
cndf = int(cndf * ndf_ch_up)
#cndf = cndf
csize = csize / 2
#print('cndf=', cndf)
# state size. K x 4 x 4,K就是cndf,即add_final_conv卷积层的 in-channel 数,
# add_final_conv卷积层的out-channel数为 nz,输出feature map的 grid size 为 1*1
#if add_final_conv:
# main.add_module('final-{0}-{1}-conv'.format(cndf, 1), nn.Conv2d(cndf, nz, igs, 1, 0, bias=False))
self.main = main
def forward(self, input):
if self.ngpu > 1:
output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
else:
output = self.main(input)
return output
class Decoder(nn.Module):
def __init__(self, isize=256, nc=3, ndf=192, ndf_ch_up=1.5, ngf=192,ngpu=1, n_extra_layers=0, igs=4):
super(Decoder, self).__init__()
self.ngpu = ngpu
assert isize % 16 == 0, "isize has to be a multiple of 16"
cngf, tisize = int(ngf // 1.5), igs
while tisize != isize:
cngf = int(cngf * 1.5)
tisize = tisize * 2
ndf = int(ndf * ndf_ch_up)
main = nn.Sequential()
main.add_module('initial-{0}-{1}-convt'.format(int( (ndf-1) // ndf_ch_up)+1, cngf), nn.ConvTranspose2d(int( (ndf-1) // ndf_ch_up)+1, cngf, 4, 2, 1, bias=False))
main.add_module('initial-{0}-batchnorm'.format(cngf), nn.BatchNorm2d(cngf))
main.add_module('initial-{0}-relu'.format(cngf), nn.ReLU(True))
csize, _ = igs, cngf
while csize < isize // 4:
main.add_module('pyramid-{0}-{1}-convt'.format(cngf, int(cngf // 1.5)),
nn.ConvTranspose2d(cngf, int(cngf // 1.5), 4, 2, 1, bias=False))
main.add_module('pyramid-{0}-batchnorm'.format(int(cngf // 1.5)), nn.BatchNorm2d(int(cngf // 1.5)))
main.add_module('pyramid-{0}-relu'.format(int(cngf // 1.5)), nn.ReLU(True))
cngf = int(cngf // 1.5)
csize = csize * 2
for t in range(n_extra_layers):
main.add_module('extra-layers-{0}-{1}-conv'.format(t, cngf), nn.Conv2d(cngf, cngf, 3, 1, 1, bias=False))
main.add_module('extra-layers-{0}-{1}-batchnorm'.format(t, cngf), nn.BatchNorm2d(cngf))
main.add_module('extra-layers-{0}-{1}-relu'.format(t, cngf), nn.ReLU(True))
main.add_module('final-{0}-{1}-convt'.format(cngf, nc), nn.ConvTranspose2d(cngf, nc, 4, 2, 1, bias=False))
main.add_module('final-{0}-tanh'.format(nc), nn.Tanh())
self.main = main
def forward(self, input):
if self.ngpu > 1:
output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
else:
output = self.main(input)
return output
filename="08600_netG.pth"
filename1=filename.replace("netG","netG_1").replace("pth","pt")
filename2=filename.replace("netG","netG_2").replace("pth","pt")
img=cv2.imread("images_08600\\00001_real.bmp")
img=[img]
img=np.array(img)
img=img/255
img=torch.from_numpy(img).permute(0,3,1,2).type(torch.FloatTensor)
print(img.shape)
# #加载模型
pretrained_net_d=torch.load(filename)
#加载解码网络并送入gpu
model_d=Encoder()
model_d.cuda()
#加载解码网络的字典(字典里的是解码网络的网络结构)
model_dict_d=model_d.state_dict()
#打印模型里的字典
for k,v in list(pretrained_net_d['state_dict'].items()):
print(k)
#在打印模型的字典的时候 我们发现模型的字典和解码网络的字典不一致
#字典不一致直接使用下句话会报错
#pretrained_net_d['state_dict'] ={k: v for k, v in pretrained_net_d['state_dict'].items()
#因此我们对模型字典进行修改 让其和解码网络的字典一直
for k,v in list(pretrained_net_d['state_dict'].items()):
new_key = k.replace(r"encoder1.m", "m")
#new_key = k.replace(r"decoder.m", "m")
pretrained_net_d['state_dict'][new_key] = v
pretrained_net_d['state_dict'].pop(k)
#将如果模型里的字典和解码网络的字典一直时 保留网络和模型共有的字典的键值
pretrained_net_d['state_dict'] = {k: v for k, v in pretrained_net_d['state_dict'].items() if k in model_dict_d}
#注释掉的语句为检查模型的字典,看看保留了哪些层的键值
# for k,v in pretrained_net_d['state_dict'].items():
# print(k)
#
# print('-----------------------------------')
#加载键值
model_d.load_state_dict(pretrained_net_d['state_dict'])
# for k,v in pretrained_net_d['state_dict'].items():
# print(k)
#模型评估
model_d.eval()
example_d2=img
#track
traced_script_module_d = torch.jit.trace(model_d, example_d2.cuda())
print(traced_script_module_d)
output_d = traced_script_module_d(example_d2.cuda())
#保存成libtorch可以调用的格式
traced_script_module_d.save(filename1)
pretrained_net=torch.load(filename)
model=Decoder()
model.cuda()
model_dict=model.state_dict()
for k,v in list(pretrained_net['state_dict'].items()):
# new_key = k.replace(r"encoder1.m", "m")
new_key = k.replace(r"decoder.m", "m")
pretrained_net['state_dict'][new_key] = v
pretrained_net['state_dict'].pop(k)
pretrained_net['state_dict'] = {k: v for k, v in pretrained_net['state_dict'].items() if k in model_dict}
# print('-----------------------------------')
# for k,v in model_dict.items():
# print(k)
model.load_state_dict(pretrained_net['state_dict'])
# print('-----------------------------------')
# for k,v in pretrained_net['state_dict'].items():
# print(k)
model.eval()
example = torch.rand(output_d.shape).cuda()
traced_script_module = torch.jit.trace(model, example)
output = traced_script_module(torch.ones(output_d.shape).cuda())
traced_script_module.save(filename2)
# torchvision.utils.save_image(output,"1.bmp",normalize=False)
# print(output_d[0][0][0][0])
调用的C++代码
#include
#include
#include "torch/script.h"
#include "torch/torch.h"
using namespace std;
int main()
{
torch::DeviceType device_type; //设置Device类型
device_type = torch::kCUDA; //torch::kCUDA and torch::kCPU
torch::Device device(device_type, 0);
#读图 输入的大小和训练的大小一样 但是batch为1
cv::Mat img = cv::imread("D:/测试/OneClass_AE_Cam06_Small/00001_real.bmp");
char model_name[100] = "D:/测试/OneClass_AE_Cam06_Small/40000_netG_1.pt";
char model_name2[100];
for (int i = 0; i < 99; i++)
{
if (model_name[i] == '1'&&model_name[i+1]=='.')
{
model_name2[i] = '2';
}
else
{
model_name2[i] = model_name[i];
}
}
img.convertTo(img, CV_32FC3, 1.0f / 255.0f);
//尺寸
cout << img.rows< module = torch::jit::load(model_name);
assert(module != nullptr);
module->to(device);
torch::cuda::is_available();
std::vector inputs;
inputs.push_back(img_var);
at::Tensor output = module->forward(inputs).toTensor();
std::shared_ptr module_d = torch::jit::load(model_name2);//
assert(module_d != nullptr);
module_d->to(device);
torch::cuda::is_available(); //判断是否支持GPU加速
std::vector inputs_d;
inputs_d.push_back(output);
at::Tensor result_d = module_d->forward(inputs_d).toTensor();
result_d = result_d.to(torch::kCPU);
std::vector mat_lists;
for (int index = 2; index >= 0; index--)
{//尺寸注意
cv::Mat channel(img.rows, img.cols, CV_32FC1, result_d[0][index].data()), output_channel;
channel.convertTo(output_channel, CV_8UC1,255);
mat_lists.push_back(output_channel);
}
cv::Mat ganpic;
cv::merge(mat_lists, ganpic);
//cv::imshow("1", ganpic);
cv::imwrite("D:/64_f.jpg", ganpic);
cv::waitKey(0);
cv::destroyAllWindows();
return 0;
}