在ubuntu20.4上使用1080ti的GPU测试、当前的模型转换和PC端的效果测试都需要使用的GPU。
注:当前的测试模型下载:链接:https://pan.baidu.com/s/18Je6lJPTXmxEzbE9pmtN7w?pwd=ylxv 提取码:ylxv
下载:https://github.com/Li-Chongyi/Zero-DCE
测试环境:Python 3.7、Pytorch 1.0.0、opencv、torchvision 0.2.1、cuda 10.0
安装测试指令(conda虚拟环境参考:https://pytorch.org/get-started/previous-versions/):conda install pytorch1.0.0 torchvision0.2.1 cuda100 -c pytorch
测试结果:
import torch
import torch.nn as nn
from loss import LossFunction
from torch.autograd import Variable
class EnhanceNetwork(nn.Module):
def __init__(self, layers, channels):
super(EnhanceNetwork, self).__init__()
kernel_size = 3
dilation = 1
padding = int((kernel_size - 1) / 2) * dilation
self.in_conv = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=channels, kernel_size=kernel_size, stride=1, padding=padding),
nn.ReLU()
)
self.conv = nn.Sequential(
nn.Conv2d(in_channels=channels, out_channels=channels, kernel_size=kernel_size, stride=1, padding=padding),
nn.BatchNorm2d(channels),
nn.ReLU()
)
self.blocks = nn.ModuleList()
for i in range(layers):
self.blocks.append(self.conv)
self.out_conv = nn.Sequential(
nn.Conv2d(in_channels=channels, out_channels=3, kernel_size=3, stride=1, padding=1),
nn.Sigmoid()
)
def forward(self, input):
fea = self.in_conv(input)
for conv in self.blocks:
fea = fea + conv(fea)
fea = self.out_conv(fea)
illu = fea + input
illu = torch.clamp(illu, 0.0001, 1)
return illu
class CalibrateNetwork(nn.Module):
def __init__(self, layers, channels):
super(CalibrateNetwork, self).__init__()
kernel_size = 3
dilation = 1
padding = int((kernel_size - 1) / 2) * dilation
self.layers = layers
self.in_conv = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=channels, kernel_size=kernel_size, stride=1, padding=padding),
nn.BatchNorm2d(channels),
nn.ReLU()
)
self.convs = nn.Sequential(
nn.Conv2d(in_channels=channels, out_channels=channels, kernel_size=kernel_size, stride=1, padding=padding),
nn.BatchNorm2d(channels),
nn.ReLU(),
nn.Conv2d(in_channels=channels, out_channels=channels, kernel_size=kernel_size, stride=1, padding=padding),
nn.BatchNorm2d(channels),
nn.ReLU()
)
self.blocks = nn.ModuleList()
for i in range(layers):
self.blocks.append(self.convs)
self.out_conv = nn.Sequential(
nn.Conv2d(in_channels=channels, out_channels=3, kernel_size=3, stride=1, padding=1),
nn.Sigmoid()
)
def forward(self, input):
fea = self.in_conv(input)
for conv in self.blocks:
fea = fea + conv(fea)
fea = self.out_conv(fea)
delta = input - fea
return delta
class Network(nn.Module):
def __init__(self, stage=3):
super(Network, self).__init__()
self.stage = stage
self.enhance = EnhanceNetwork(layers=1, channels=3)
self.calibrate = CalibrateNetwork(layers=3, channels=16)
self._criterion = LossFunction()
def weights_init(self, m):
if isinstance(m, nn.Conv2d):
m.weight.data.normal_(0, 0.02)
m.bias.data.zero_()
if isinstance(m, nn.BatchNorm2d):
m.weight.data.normal_(1., 0.02)
def forward(self, input):
ilist, rlist, inlist, attlist = [], [], [], []
input_op = input
for i in range(self.stage):
inlist.append(input_op)
i = self.enhance(input_op)
r = input / i
r = torch.clamp(r, 0, 1)
att = self.calibrate(r)
input_op = input + att
ilist.append(i)
rlist.append(r)
attlist.append(torch.abs(att))
return ilist, rlist, inlist, attlist
def _loss(self, input):
i_list, en_list, in_list, _ = self(input)
loss = 0
for i in range(self.stage):
loss += self._criterion(in_list[i], i_list[i])
return loss
class Finetunemodel(nn.Module):
def __init__(self, weights):
super(Finetunemodel, self).__init__()
self.enhance = EnhanceNetwork(layers=1, channels=3)
self._criterion = LossFunction()
base_weights = torch.load(weights)
#print(base_weights)
pretrained_dict = base_weights
model_dict = self.state_dict()
#print(model_dict)
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
model_dict.update(pretrained_dict)
self.load_state_dict(model_dict)
dummy_input = Variable(torch.randn(1, 3,300,300))
torch.onnx.export(self,dummy_input,"onnx.onnx",verbose=True)
def weights_init(self, m):
if isinstance(m, nn.Conv2d):
m.weight.data.normal_(0, 0.02)
m.bias.data.zero_()
if isinstance(m, nn.BatchNorm2d):
m.weight.data.normal_(1., 0.02)
#print(self)
def forward(self, input):
i = self.enhance(input)
r = input / i
r = torch.clamp(r, 0, 1)
return i, r
def _loss(self, input):
i, r = self(input)
loss = self._criterion(input, i)
return loss
测试指令
//连接服务器进入容器
docker start wyj
docker attach wyj
//进入路径
cd SCI-main
//激活虚拟环境
conda activate sci
//执行指令
python test.py
//退出环境
exit
//复制输出的模型
docker cp wyj:/workspace/SCI-main/onnx.onnx /data/
注:测试medium模型时会存在一个错误,但是不影响模型转换。
进入上述的测试环境需要修改源码用于实现模型的推理
测试路径
cd external/rknn-toolkit/examples/onnx/resnet50v2
测试环境
conda activate yolov5-ommx
测试指令
python onnxtest.py
源码
import torch
import torchvision
import onnx
import onnxruntime
import numpy as np
import os
from PIL import Image
# 设置pytorch下载的预训练模型保存位置
os.environ["TORCH_HOME"] = "./pretrained_models"
def pytorch_2_onnx():
"""
将pytorch模型导出为onnx,导出时pytorch内部使用的是trace或者script先执行一次模型推理,然后记录下网络图结构
所以,要导出的模型要能够被trace或者script进行转换
:return:
"""
# 加载预训练模型
model = torchvision.models.alexnet(pretrained=True)
print(model)
model_path = "alexnet.onnx"
# pytorch转换为onnx内部使用trace或者script,需要提供一组输入数据执行一次模型推理过程,然后进行trace记录
dummy_input = torch.randn(4, 3, 224, 224, device="cpu")
input_names = ["input_data"] + ["learned_%d" % i for i in range(16)]
output_names = ["output_data"]
torch.onnx.export(
model, # pytorch网络模型
dummy_input, # 随机的模拟输入
model_path, # 导出的onnx文件位置
export_params=True, # 导出训练好的模型参数
verbose=10, # debug message
training=torch.onnx.TrainingMode.EVAL, # 导出模型调整到推理状态,将dropout,BatchNorm等涉及的超参数固定
input_names=input_names, # 为静态网络图中的输入节点设置别名,在进行onnx推理时,将input_names字段与输入数据绑定
output_names=output_names, # 为输出节点设置别名
# 如果不设置dynamic_axes,那么对于输入形状为[4, 3, 224, 224],在以后使用onnx进行推理时也必须输入[4, 3, 224, 224]
# 下面设置了输入的第0维是动态的,以后推理时batch_size的大小可以是其他动态值
dynamic_axes={
# a dictionary to specify dynamic axes of input/output
# each key must also be provided in input_names or output_names
"input_data": {0: "batch_size"},
"output_data": {0: "batch_size"}
})
return model_path
def onnx_check(model_path):
"""
验证导出的模型格式时候正确
:param model_path:
:return:
"""
onnx_model = onnx.load(model_path)
onnx.checker.check_model(onnx_model)
print(onnx.helper.printable_graph(onnx_model.graph))
def onnx_inference(model_path):
"""
模型推理
:param model_path:
:return:
"""
# 使用onnxruntime-gpu在GPU上进行推理
session = onnxruntime.InferenceSession(model_path,
providers=[
("CUDAExecutionProvider", { # 使用GPU推理
"device_id": 0,
"arena_extend_strategy": "kNextPowerOfTwo",
"gpu_mem_limit": 4 * 1024 * 1024 * 1024,
"cudnn_conv_algo_search": "EXHAUSTIVE",
"do_copy_in_default_stream": True,
# "cudnn_conv_use_max_workspace": "1" # 在初始化阶段需要占用好几G的显存
}),
"CPUExecutionProvider" # 使用CPU推理
])
# session = onnxruntime.InferenceSession(model_path)
#data = np.random.randn(2, 3, 224, 224).astype(np.float32)
data = np.random.randn(1, 3, 300, 300).astype(np.float32)
data_lowlight = Image.open("01.jpg")
#print()
data_lowlight = (np.asarray(data_lowlight)/255.0)
#print()
data_lowlight = torch.from_numpy(data_lowlight).float()
#print()
data_lowlight = data_lowlight.permute(2,0,1)
#print()
data_lowlight = data_lowlight.unsqueeze(0)
#print("data_lowlight : {}",data_lowlight)
data=data_lowlight.numpy()
#data = data_lowlight
# 获取模型原始输入的字段名称
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name
#print("input name: {}".format(input_name))
# 以字典方式将数据输入到模型中
outputs = session.run([output_name], {input_name: data})
#print("outputs name: {}",outputs)
print(np.array(outputs[0]).shape)
print(torch.Tensor(outputs[0]))
torchvision.utils.save_image(torch.Tensor(outputs[0]), "1_result.jpg")
if __name__ == '__main__':
#model_path = pytorch_2_onnx()
#onnx_check(model_path)
#onnx_check("Epoch99.onnx")
#onnx_inference(model_path)
onnx_inference("Epoch99.onnx")
进入上述的测试环境需要修改源码用于实现模型的转换
测试路径:
yolov5/yolov5-master/rknn_convert_tools_pt
测试虚拟环境
conda activate rv1126
测试指令
conda activate rv1126
源码
import yaml
from rknn.api import RKNN
import cv2
_model_load_dict = {
'caffe': 'load_caffe',
'tensorflow': 'load_tensorflow',
'tflite': 'load_tflite',
'onnx': 'load_onnx',
'darknet': 'load_darknet',
'pytorch': 'load_pytorch',
'mxnet': 'load_mxnet',
'rknn': 'load_rknn',
}
yaml_file = './config.yaml'
def main():
with open(yaml_file, 'r') as F:
config = yaml.load(F)
print('config is:')
print(config)
model_type = config['running']['model_type']
print('model_type is {}'.format(model_type))
rknn = RKNN(verbose=True)
print('--> config model')
rknn.config(**config['config'])
print('done')
print('--> Loading model')
load_function = getattr(rknn, _model_load_dict[model_type])
ret = load_function(**config['parameters'][model_type])
if ret != 0:
print('Load mobilenet_v2 failed! Ret = {}'.format(ret))
exit(ret)
print('done')
####
# print('hybrid_quantization')
# ret = rknn.hybrid_quantization_step1(dataset=config['build']['dataset'])
if model_type != 'rknn':
print('--> Building model')
ret = rknn.build(**config['build'])
if ret != 0:
print('Build mobilenet_v2 failed!')
exit(ret)
else:
print('--> skip Building model step, cause the model is already rknn')
if config['running']['export'] is True:
print('--> Export RKNN model')
ret = rknn.export_rknn(**config['export_rknn'])
if ret != 0:
print('Init runtime environment failed')
exit(ret)
else:
print('--> skip Export model')
if (config['running']['inference'] is True) or (config['running']['eval_perf'] is True):
print('--> Init runtime environment')
ret = rknn.init_runtime(**config['init_runtime'])
if ret != 0:
print('Init runtime environment failed')
exit(ret)
print('--> load img')
img = cv2.imread(config['img']['path'])
print('img shape is {}'.format(img.shape))
# img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
inputs = [img]
if config['running']['inference'] is True:
print('--> Running model')
config['inference']['inputs'] = inputs
#print(config['inference'])
outputs = rknn.inference(inputs)
#outputs = rknn.inference(config['inference'])
print('len of output {}'.format(len(outputs)))
print('outputs[0] shape is {}'.format(outputs[0].shape))
print(outputs[0][0][0:2])
else:
print('--> skip inference')
if config['running']['eval_perf'] is True:
print('--> Begin evaluate model performance')
config['inference']['inputs'] = inputs
perf_results = rknn.eval_perf(inputs=[img])
else:
print('--> skip eval_perf')
else:
print('--> skip inference')
print('--> skip eval_perf')
if __name__ == '__main__':
main()
进入上述的测试环境需要修改源码用于实现模型的板端测试
//使用虚拟化境
conda activate rv1126
//进入到指定的路径
cd external/rknpu/rknn/rknn_api/examples/rknn_ssd_demo
//执行指令
./build.sh
复制上述生成的可执行文件和相应的配置文件到指定路径
adb连接板端测试,执行指令
//复制数据到板端
.\adb push .\rknn_ssd_demo /userdata/rknn_ssd_demo
//连接板端
.\adb shell
//进入指定路径并修改权限
cd userdata
cd rknn_ssd_demo
chmod 777 *
//执行测试
./rknn_ssd_demo model/Epoch99.rknn model/01.bmp
//将数据复制到PC端
.\adb pull /userdata/rknn_ssd_demo/outputs1.txt
.\adb pull /userdata/rknn_ssd_demo/outputs2.txt
板端输出的数据解析函数
float output[3 * 1280 * 720];
void CMFCApplication1Dlg::OnBnClickedButton19()
{
// TODO: 在此添加控件通知处理程序代码
int w = 720;
int h = 1280;
IplImage *img = cvCreateImage(cvSize(w, h), 8, 3);
char temp[255], ReadimgPath[255], SaveimgPath[255], imgName[255], imgType[255];
//度配置文件
FILE * fp = fopen("1280.txt", "r");
//FILE * fp = fopen(ReadimgPath, "r");
for (int i = 0; i < 3 * w * h; i++)
{
fscanf(fp, "%s\n", temp);
output[i] = atof(temp);
}
fclose(fp);
int ii = 0;
for (int i = 0; i < w; i++)//w
{
for (int j = 0; j < h; j++)//h
{
img->imageData[j*img->widthStep + i * 3 + 0] = (char)(output[ii + w * h * 2] * 255.0); //ii++;
img->imageData[j*img->widthStep + i * 3 + 1] = (char)(output[ii + w * h] * 255.0); //ii++;
img->imageData[j*img->widthStep + i * 3 + 2] = (char)(output[ii ] * 255.0);
ii++;
}
}
cvShowImage("img", img);
cvSaveImage("1280_result.png", img);
cvWaitKey(0);
}