注意:求交区域的时候,一定要和0比较大小,如果是负数就说明压根不相交
import numpy as np
def ComputeIOU(boxA, boxB):
## 计算相交框的坐标
x1 = np.max([boxA[0], boxB[0]])
x2 = np.min([boxA[2], boxB[2]])
y1 = np.max([boxA[1], boxB[1]])
y2 = np.min([boxA[3], boxB[3]])
## 计算交区域,并区域,及IOU
interArea = np.max([x2-x1+1, 0])*np.max([y2-y1+1,0]) ##一定要和0比较大小,如果是负数就说明压根不相交
unionArea = (boxA[2]-boxA[0]+1)*(boxA[3]-boxA[1]+1) + (boxB[2]-boxB[0]+1)*(boxB[3]-boxB[1]+1)-interArea
iou = interArea/unionArea
return iou
boxA = [1,1,3,3]
boxB = [2,2,4,4]
IOU = ComputeIOU(boxA, boxB)
import numpy as np
def nms(dets, iou_thred, cfd_thred):
if len(dets)==0: return []
bboxes = np.array(dets)
## 对整个bboxes排序
bboxes = bboxes[np.argsort(bboxes[:,4])]
pick_bboxes = []
# print(bboxes)
while bboxes.shape[0] and bboxes[-1,-1] >= cfd_thred:
bbox = bboxes[-1]
x1 = np.maximum(bbox[0], bboxes[:-1,0])
y1 = np.maximum(bbox[1], bboxes[:-1,1])
x2 = np.minimum(bbox[2], bboxes[:-1,2])
y2 = np.minimum(bbox[3], bboxes[:-1,3])
inters = np.maximum(x2-x1+1, 0) * np.maximum(y2-y1+1, 0)
unions = (bbox[2]-bbox[0]+1)*(bbox[3]-bbox[1]+1) + (bboxes[:-1,2]-bboxes[:-1,0]+1)*(bboxes[:-1,3]-bboxes[:-1,1]+1) - inters
ious = inters/unions
keep_indices = np.where(ious<iou_thred)
bboxes = bboxes[keep_indices] ## indices一定不包括自己
pick_bboxes.append(bbox)
return np.asarray(pick_bboxes)
### 肌肉记忆了
import numpy as np
def nms(preds, iou_thred=0.5, score_thred=0.5):
## preds: N * 5, [x1, y1, x2, y2, score]
orders = np.argsort(preds[:,4])
det = []
arears = (preds[:, 2] - preds[:, 0]) * (preds[:, 3] - preds[:, 1])
while orders.shape[0] and preds[orders[-1], 4] >= score_thred:
pick = preds[orders[-1]]
xx1 = np.maximum(pick[0], preds[orders[:-1], 0])
yy1 = np.maximum(pick[1], preds[orders[:-1], 1])
xx2 = np.minimum(pick[2], preds[orders[:-1], 2])
yy2 = np.minimum(pick[3], preds[orders[:-1], 3])
inters = np.maximum((xx2-xx1), 0) * np.maximum((yy2-yy1), 0)
unions = arears[orders[-1]] + arears[orders[:-1]] - inters
iou = inters / unions
keep = iou < iou_thred
orders = orders[:-1][keep]
det.append(pick)
return np.asarray(det)
dets = np.asarray([[187, 82, 337, 317, 0.9], [150, 67, 305, 282, 0.75], [246, 121, 368, 304, 0.8]])
nms(dets)
dets = [[187, 82, 337
, 317, 0.9], [150, 67, 305, 282, 0.75], [246, 121, 368, 304, 0.8]]
dets_nms = nms(dets, 0.5, 0.3)
print(dets_nms)
始终维护orders,代表到原bboxes的映射(map)
优化1:仅维护orders,不改变原bboxes
优化2:提前计算好bboxes的面积,以免在循环中多次重复计算
import numpy as np
def nms(dets, iou_thred, cfd_thred):
if len(dets)==0: return []
bboxes = np.array(dets)
## 维护orders
orders = np.argsort(bboxes[:,4])
pick_bboxes = []
x1 = bboxes[:,0]
y1 = bboxes[:,1]
x2 = bboxes[:,2]
y2 = bboxes[:,3]
areas = (x2-x1+1)*(y2-y1+1) ## 提前计算好bboxes面积,防止在循环中重复计算
while orders.shape[0] and bboxes[orders[-1],-1] >= cfd_thred:
bbox = bboxes[orders[-1]]
xx1 = np.maximum(bbox[0], x1[orders[:-1]])
yy1 = np.maximum(bbox[1], y1[orders[:-1]])
xx2 = np.minimum(bbox[2], x2[orders[:-1]])
yy2 = np.minimum(bbox[3], y2[orders[:-1]])
inters = np.maximum(xx2-xx1+1, 0) * np.maximum(yy2-yy1+1, 0)
unions = areas[orders[-1]] + areas[orders[:-1]] - inters
ious = inters/unions
keep_indices = np.where(ious<iou_thred)
pick_bboxes.append(bbox)
orders = orders[keep_indices]
return np.asarray(pick_bboxes)
dets = [[187, 82, 337, 317, 0.9], [150, 67, 305, 282, 0.75], [246, 121, 368, 304, 0.8]]
dets_nms = nms(dets, 0.5, 0.3)
print(dets_nms)
torch官方的Conv2d需要传入的参数
torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
输入和输出的特征图尺寸大小关系:
1.对于padding的处理是重开一个输出特征图尺寸的矩阵,然后给非padding区域赋值。或者直接用np.pad
函数
2.卷积通过逐元素相乘并求和实现。使用numpy的np.multiply
和np.sum
函数。在inputs上逐行和逐列操作并赋值到outputs中。np.multiply
可以广播,所以可以同时对多个卷积核操作,例如卷积核251633与特征图区域1633经过multiply和sum后得到251,就是输出特征图该像素点上的多通道特征。这样可以省去对各卷积核的一层循环。
3.直接利用range中的间隔模拟stride,由于总共有n-k+2p-1个有效位置,因此range的边界是n-k+2p-1。
import numpy as np
def conv2d(inputs, kernels, padding, bias, stride):
c, w, h = inputs.shape
# inputs_pad = np.zeros((c,w+2*padding,h+2*padding))
# inputs_pad[:, padding:w+padding, padding:h+padding] = inputs
# print(inputs_pad.shape, '\n', inputs_pad)
# inputs = inputs_pad
inputs = np.pad(inputs, ((0,0),(1,1),(1,1))) ## 可以直接用np.pad函数实现pad
kernels_num, kernel_size = kernels.shape[0], kernels.shape[2]
outputs = np.ones((kernels_num, (w-kernel_size+2*padding)//stride+1, (h-kernel_size+2*padding)//stride+1))
for i in range(0, w-kernel_size+2*padding+1, stride):
for j in range(0, h-kernel_size+2*padding+1, stride):
outputs[:, i//stride, j//stride] = np.sum(np.multiply(kernels, inputs[:, i:i+kernel_size, j:j+kernel_size]), axis=(1,2,3))+bias
return outputs
inputs = np.ones((16,9,9))
kernels = np.ones((25,16,3,3))
bias = np.arange(1,kernels.shape[0]+1)
stride = 2
padding = 1
outputs = conv2d(inputs, kernels, padding, bias, stride)
print("input{}".format(inputs.shape))
print("kenerls{}, stride{}".format(kernels.shape, stride))
print("output{}".format(outputs.shape))
print(outputs)
torch官方的Pool2d需要传入的参数
nn.MaxPool2d(kernel_size=2, stride=(2, 1), padding=(0, 1))
没写padding了,stride在w和h方向也没区分。。。
## 池化操作
def pooling(inputs, pool_size, stride, mode='max'):
c, w, h = inputs.shape
k = pool_size
outputs = np.zeros((c,(w-k)//stride+1, (h-k)//stride+1))
if mode == 'max':
for i in range(0, w-k+1, stride):
for j in range(0, h-k+1, stride):
outputs[:, i//stride, j//stride] = np.max(inputs[:,i:i+k,j:j+k], axis=(1,2))
return outputs
elif mode == 'avg':
for i in range(0, w-k+1, stride):
for j in range(0, h-k+1, stride):
outputs[:, i//stride, j//stride] = np.mean(inputs[:,i:i+k,j:j+k], axis=(1,2))
return outputs
else:
raise ValueError('not support this mode, choose "max" or "avg" ')
pool_size = 2
stride = 2
mode = 'max'
inputs = np.arange(1,76).reshape((3,5,5))
print("inputs:{}".format(inputs.shape), '\n',inputs)
outputs = pooling(inputs, pool_size, stride, mode)
print("outputs:{}".format(outputs.shape), '\n',outputs)
#### 手撕 BN
import torch
from torch import nn
def batch_norm(X, parameters, moving_mean, moving_var, eps, momentum):
#### 预测模式下
if not torch.is_grad_enable():
X_hat = (X-moving_mean) / torch.sqrt(moving_var + eps)
return x_hat
### 训练模式下
else:
assert len(X.shape) in (2, 4)
#### 全连接层
if len(X.shape) == 2:
mean = X.mean(dim=0)
var = ((X-mean)**2).mean(dim=0)
### 卷积层
elif len(X.shape) == 4:
mean = X.mean(dim=(0,2,3))
var = ((X-mean)**2).mean(dim=(0,2,3))
X_hat = (X-mean) / torch.sqrt(var + eps)
moving_mean = momentum*moving_mean + (1-momentum)*mean
moving_var = momentum*moving_var + (1-momentum)*moving_var
Y = parameters['gamma'] * X_hat + parameters['beta']
return Y, moving_mean, moving_var
class BatchNorm(nn.Module):
def __ init__(self, num_features, num_dims):
super.__init__()
if num_dims == 2:
shape = (1, num_features)
else:
shape = (1, num_features, 1, 1)
self.parameters = {}
self.parameters['gamma'] = nn.parameters(torch.ones(shape))
self.parameters['beta'] = nn.parametersa(torch.zeros(shape))
self.moving_mean, self.moving_var = torch.ones(shape), torch.zeros(shape)
def forward(self, X):
Y, self.moving_mean, self.moving_var = batch_norm(X, self.parameters, self.moving_mean, self.moving_var, eps=1e-5, momentum=0.9)
return Y
import torch
import torch.nn as nn
import torch.nn.functional as F
class ResNetBlock(nn.Module):
def __init__(self):
super(ResNetBlock, self).__init__()
self.bottleneck = nn.Sequential(
nn.Conv2d(256, 64, 1, padding='same'),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 64, 3, padding='same'),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 256, 1, padding='same')
)
def forward(self, x):
residual = self.bottleneck(x)
outputs = x + residual
return outputs
resnet = ResNetBlock()
inputs = torch.rand(4, 256, 16, 16)
outputs = resnet(inputs)
print(outputs.shape, outputs)
#### 手撕 torch神经网络
import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable
import matplotlib.pyplot as plt
### 定义模型
### N,1 -> N,10 -> N,10 -> N,1
class Net(nn.Module):
def __init__(self, n_input, n_hidden, n_output):
super(Net, self).__init__()
self.dense1 = nn.Linear(n_input, n_hidden)
self.dense2 = nn.Linear(n_hidden, n_hidden)
self.out = nn.Linear(n_hidden, n_output)
def forward(self, x):
x = self.dense1(x)
x = F.relu(x)
x = self.dense2(x)
x = F.relu(x)
x = self.out(x)
return x
model = Net(1, 20, 1)
print(model)
### 准备数据
x = torch.unsqueeze(torch.linspace(-1,1,100),dim=1)
y = x.pow(3)+0.1*torch.randn(x.size())
x , y =(Variable(x),Variable(y))
plt.scatter(x.data,y.data)
# 或者采用如下的方式也可以输出x,y
# plt.scatter(x.data.numpy(),y.data.numpy())
plt.show()
#### pipeline
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
loss_func = torch.nn.MSELoss()
for t in range(500):
predict = model(x)
loss = loss_func(predict, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if t%5 ==0:
plt.cla()
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), predict.data.numpy(), 'r-', lw=5)
plt.text(0.5, 0, 'Loss = %.4f' % loss.data, fontdict={'size': 20, 'color': 'red'})
plt.pause(0.05)
定义模型使用nn.sequential()
这种更简单的方式
class Net(torch.nn.Module):
def __init__(self, n_input, n_hidden, n_output):
super(Net4, self).__init__()
self.dense1 = torch.nn.Sequential(
OrderedDict(
[
("dense1", torch.nn.Linear(n_input, n_hidden),
("relu1", torch.nn.ReLU()),
]
))
self.dense2 = torch.nn.Sequential(
OrderedDict([
("dense1", torch.nn.Linear(n_hidden, n_hidden),
("relu2", torch.nn.ReLU()),
])
)
self.out = nn.Linear(n_hidden, n_output)
def forward(self, x):
x= self.dense1(x)
x= self.dense2(x)
x= self.out(x)
return x
参考:
python实现NMS
面试问题总结——编程题关于IOU、NMS
Python手撕实现正向卷积操作
BatchNorm的最详细解释和手撕实现与应用代码
BatchNorm反向传播推导
Pytorch搭建简单神经网络(一)——回归
Pytorch之搭建神经网络的简化写法