目录
主题
pytorch 静态量化
对称量化
非对称量化
YoloV5_lite 量化
代码地址
量化代码
fuse_modules 模块提取代码
模型网络中增加量化Module
量化过程中出现的问题
silu 不支持量化
add 操作不支持量化操作
yolov detect 回归报错
结尾
参考文献
针对yolov5_lite 网络采用pytorch 进行训练后的静态量化,主要介绍量化的过程,并记录其中遇到的各种问题。
X表示是浮点值, Q表示量化值,量化中一般会将浮点进行截断,选取【T2, T1】区间
其中Q0 表示量化的0点, S表示量化的scale。
Q0=0, T1 = -T2 这个是时候就是对称量化, 所以在神经网络计算的结果如下:
当前的网络计算将大量的否点乘法,转换成了整数乘法,这样可以大量的减少计算时间。
Q0 != 0 , T1 != -T2, 这个时候是非对称量化,量化损失更小,但是计算量上更大,如下:
https://github.com/ppogg/YOLOv5-Lite.git
backend = "fbgemm"
model.qconfig = torch.quantization.get_default_qconfig(backend) # 不同平台不同配置
model = torch.quantization.fuse_modules(model,fuse_ops) # 合并某些层,不想合并这句也可以跳过
model_fp32_prepared = torch.quantization.prepare(model)
stride = int(model.stride.max())
dataset = LoadImages(opt.source, img_size=opt.img_size, stride=stride)
import tqdm
tx = None
index = 0
for path, img, im0s, vid_cap in tqdm.tqdm(dataset):
img = torch.from_numpy(img).to(device)
img = img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
tx = img
out = model_fp32_prepared(img, augment=opt.augment)
index = index + 1
#print(model_fp32_prepared)
#model_fp32_prepared = model_fp32_prepared.to('cpu')
model_int8 = torch.quantization.convert(model_fp32_prepared)
#print(model_int8)
for path, img, im0s, vid_cap in tqdm.tqdm(dataset):
img = torch.from_numpy(img).to("cpu")
img = img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
tx = img
out = model_int8(img, augment=opt.augment)
index = index + 1
torch.save(model_int8.state_dict(), "int_8_model.pt")
fuse_modules的时候目前,只是支持conv+bn, conv+bn+relu 这两种方式的融合, 这个需要传入
融合模块名称的list, 这里提供了查找融合模块的list的代码
def is_fused_itm(module): #用来判断该模块是否是需要fuse的模块
if module.__module__ in ['torch.nn.modules.conv', 'torch.nn.modules.batchnorm', 'torch.nn.modules.activation']:
return True
return False
def get_fuse_module(model):
sub_module_dict = {}
for name, module in model.named_modules(): #提取每一个{name:op}的操作
module_name_structs = name.split('.')
if is_fused_itm(module):
print(name, module)
sub_module_name = ".".join(module_name_structs[0:len(module_name_structs) - 1])
if not sub_module_name in sub_module_dict:
sub_module_dict[sub_module_name] = [(name, module)]
else:
sub_module_dict[sub_module_name].append((name,module))
fuse_ops = []
tmp_fuse_ops = []
for name in sub_module_dict.keys(): #遍历所有的module,找到每个子module 可以合并的部分
vals = sub_module_dict[name]
for n, m in vals:
if m.__module__ == 'torch.nn.modules.conv':
if len(tmp_fuse_ops) > 1:
fuse_ops.append(tmp_fuse_ops)
tmp_fuse_ops = [n]
elif m.__module__ == 'torch.nn.modules.batchnorm':
tmp_fuse_ops.append(n)
elif m.__module__ == 'torch.nn.modules.activation' and m._get_name()=='ReLU':
tmp_fuse_ops.append(n)
if len(tmp_fuse_ops) > 1: # 处理掉当前队列中的剩余的,如果剩余大于1 则说明可以fuse, 并清空缓存
fuse_ops.append(tmp_fuse_ops)
tmp_fuse_ops = []
return fuse_ops
在pytorch中实现量化需要在代码中添加量化和恢复浮点的模块如下:
self.quant = torch.quantization.QuantStub()
self.dequant = torch.quantization.DeQuantStub()
这里为了尽量保持之前的代码不变的情况下可以采用如下的修改方式:
class QuantModel(Model):
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):
super(QuantModel, self).__init__(cfg, ch, nc, anchors)
def set_quant(self):
logger.info("quant model need to set")
self.quant = torch.quantization.QuantStub()
self.dequant = torch.quantization.DeQuantStub()
def forward(self, x, augment=False, profile=False):
x = self.quant(x)
x = super(QuantModel, self).forward(x, augment, profile)
x = [self.dequant(val) for val in x]
return x
这里的Model 表示的是原来的网络,set_quant在原有的Model 定义为空,并且在构造函数的开始调用, 如下所示:
class Model(nn.Module):
def set_quant(self):
logger.info("org model not need to set quant")
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
super(Model, self).__init__()
self.set_quant()
这样原始的网络跟量化没有关系。
File "/home/kylin/anaconda3/envs/torch/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/kylin/YOLOv5-Lite/models/yolo.py", line 315, in forward
x = super(QuantModel, self).forward(x, augment, profile)
File "/home/kylin/YOLOv5-Lite/models/yolo.py", line 166, in forward
return self.forward_once(x, profile) # single-scale inference, train
File "/home/kylin/YOLOv5-Lite/models/yolo.py", line 182, in forward_once
x = m(x) # run
File "/home/kylin/anaconda3/envs/torch/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/kylin/YOLOv5-Lite/models/common.py", line 171, in forward
return self.act(self.bn(self.conv(x)))
File "/home/kylin/anaconda3/envs/torch/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/kylin/anaconda3/envs/torch/lib/python3.8/site-packages/torch/nn/modules/activation.py", line 395, in forward
return F.silu(input, inplace=self.inplace)
File "/home/kylin/anaconda3/envs/torch/lib/python3.8/site-packages/torch/nn/functional.py", line 2059, in silu
return torch._C._nn.silu(input)
NotImplementedError: Could not run 'aten::silu.out' with arguments from the 'QuantizedCPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'aten::silu.out' is only available for these backends: [CPU, CUDA, Meta, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradHIP, AutogradXLA, AutogradMPS, AutogradIPU, AutogradXPU, AutogradHPU, AutogradVE, AutogradLazy, AutogradMeta, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, AutogradNestedTensor, Tracer, AutocastCPU, AutocastCUDA, FuncTorchBatched, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PythonDispatcher].
pytorch 的量化op表中不支持silu 这种activation 操作,目前的方式直接变成identity 进行重新训练,当前的自己的数据集上差别不大, 修改的位置在common.py 中的 Conv 类中, 代码如下:
class Conv(nn.Module):
# Standard convolution
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super(Conv, self).__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
将act=True , 改成False, 这样默认就是 采用了Identity 操作了。 注意这里修改完需要重新训练一下。
File "/home/kylin/anaconda3/envs/torch/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/kylin/YOLOv5-Lite/models/yolo.py", line 315, in forward
x = super(QuantModel, self).forward(x, augment, profile)
File "/home/kylin/YOLOv5-Lite/models/yolo.py", line 166, in forward
return self.forward_once(x, profile) # single-scale inference, train
File "/home/kylin/YOLOv5-Lite/models/yolo.py", line 182, in forward_once
x = m(x) # run
File "/home/kylin/anaconda3/envs/torch/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/kylin/YOLOv5-Lite/models/common.py", line 826, in forward
return torch.add(x1, x2, alpha=self.a)
NotImplementedError: Could not run 'aten::add.out' with arguments from the 'QuantizedCPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'aten::add.out' is only available for these backends: [CPU, CUDA, Meta, MkldnnCPU, SparseCPU, SparseCUDA, SparseCsrCPU, SparseCsrCUDA, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradHIP, AutogradXLA, AutogradMPS, AutogradIPU, AutogradXPU, AutogradHPU, AutogradVE, AutogradLazy, AutogradMeta, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, AutogradNestedTensor, Tracer, AutocastCPU, AutocastCUDA, FuncTorchBatched, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PythonDispatcher].
add, cat 这些二元操作,由于两个输入的scale 不一致这样直接int 加会出现问题,所以add 这类操作需要直接采用float 运算,不能采用量化操作运行进行处理,pytorch 中提供了量化后的浮点操作,修改是在common.py 的 ADD模块中进行修改
class ADD(nn.Module):
# Stortcut a list of tensors along dimension
def __init__(self, alpha=0.5):
super(ADD, self).__init__()
self.a = alpha
self.ff = torch.nn.quantized.FloatFunctional()
def forward(self, x):
x1, x2 = x[0], x[1]
#return torch.add(x1, x2, alpha=self.a)
return self.ff.add(x1, self.ff.mul_scalar(x2, self.a)) #用于量化
调用了torch.nn.quantized.FloatFunctional() 获取了浮点操作。将原始的pytorch 的 add 操作变成了
浮点操作。可以参考torch.ao.nn.quantized.modules.functional_modules — PyTorch 1.13 documentation
File "/home/kylin/anaconda3/envs/torch/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/kylin/YOLOv5-Lite/models/yolo.py", line 315, in forward
x = super(QuantModel, self).forward(x, augment, profile)
File "/home/kylin/YOLOv5-Lite/models/yolo.py", line 166, in forward
return self.forward_once(x, profile) # single-scale inference, train
File "/home/kylin/YOLOv5-Lite/models/yolo.py", line 182, in forward_once
x = m(x) # run
File "/home/kylin/anaconda3/envs/torch/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/home/kylin/YOLOv5-Lite/models/yolo.py", line 82, in forward
return x if self.training else (torch.cat(z, 1), torch.cat(logits_, 1), x)
RuntimeError: Tensors must have same number of dimensions: got 5 and 3
目前这块问题的解决方案只能采用将detect 的框回归的部分从网络中抽离出来,然后在量化计算完成后再直接进行相关的框的计算。修改是在yolo.py 中的Detect 模块。
def forward(self, x):
# x = x.copy() # for profiling
z = [] # inference output
logits_ = []
self.training |= self.export
for i in range(self.nl):
x[i] = self.m[i](x[i]) # conv
# bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
# x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
# if not self.training: # inference
# if torch.onnx.is_in_onnx_export():
# self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
# elif self.grid[i].shape[2:4] != x[i].shape[2:4]:
# self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
# logits = x[i][..., 5:]
# y = x[i].sigmoid()
# logits_.append(y)
# if not torch.onnx.is_in_onnx_export():
# y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
# y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
# else:
# xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
# wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].data # wh
# y = torch.cat((xy, wh, y[..., 4:]), -1)
# z.append(y.view(bs, -1, self.no))
# logits_.append(logits.view(bs, -1, self.no - 5))
# return x if self.training else (torch.cat(z, 1), torch.cat(logits_, 1), x)
return x
将原有的这部分代码注释掉就可以了。训练还是用原始的代码这样,量化的时候修改。
目前可以将yolov5 的pytorch 的静态量化跑通, 性能和精度后续再补齐。
torch — PyTorch 1.13 documentation