将读取的图片进行reshape,变为正方形
def pad_to_square(img, pad_value):
c, h, w = img.shape
dim_diff = np.abs(h - w)
# (upper / left) padding and (lower / right) padding
pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
# Determine padding
pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
#(左填充,右填充,上填充,下填充)
# Add padding
img = F.pad(img, pad, "constant", value=pad_value)
return img, pad
处理方式如下:
- json中得到的值box左上角坐标(w,y),box宽高w,h,原图宽高w_factor,h_factor
- 将左上角坐标变为中心点坐标(x1,y1)
- 将中心点坐标,宽高等比例缩放,即得到一个相对值(x2,y2,w2,h2)
标签label的处理方式:
另外还有使用打标工具labelImg得到的数据也是同样的处理方式,代码验证如下:
import cv2
import numpy as np
img_path='F:/labelimg_test/COCO_train2014_000000000036.jpg'
label_path='F:/labelimg_test/COCO_train2014_000000000036.txt'
image = cv2.imread(img_path)
print(image.shape) #(576, 704, 3) (H,W,C)
h_factor, w_factor = image.shape[:2]
print('原始图像尺寸h:',h_factor,'w:',w_factor)
boxes =np.loadtxt(label_path).reshape(-1,5)
for i in boxes:
print('label中的标签:',)
print('x_label:', i[1],'y_label:', i[2],'w_label:', i[3],'h_label:', i[4])
x,y,w,h=i[1],i[2],i[3],i[4]
x=x*w_factor
y=y*h_factor
w=w*w_factor
h=h*h_factor
x=x-w/2
y=y-h/2
print('试着将label中标签还原到原图上:')
print('x:', x,'y: ', y,'w:', w,'h:', h)
print('\n')
#左上角坐标(下,y),右下角坐标(x+w,y+h)
draw_1=cv2.rectangle(image, (int(x),int(y)), (int(x+w),int(y+h)), color=(0,255,0),thickness=2, lineType=8)
cv2.imwrite("vertical_flip.jpg", draw_1) #将画过矩形框的图片保存到当前文件夹
cv2.imshow("draw_0", draw_1) #显示画过矩形框的图片
cv2.waitKey(0)
cv2.destroyWindow("draw_0")
- 将label中的标签(boxes[1],boxes[2],boxes[3],boxes[4])还原得到真实图片上的标签,即左上角坐标和宽高
- 找到右下脚坐标,即得到(x1,y1,x2,y2)
- 原图片经过填充后,相当于图片的坐标系发生了变化,因此呢,标注框也需要同样的填充,保证相对位置不变
- 最后再经过上文提到的标签变换方法,得到中心点坐标和宽高,再除以填充后的宽高padded_w,padded_h,得到相对值
# Extract coordinates for unpadded + unscaled image
x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
# Adjust for added padding
x1 += pad[0]
y1 += pad[2]
x2 += pad[1]
y2 += pad[3]
# Returns (x, y, w, h)
boxes[:, 1] = ((x1 + x2) / 2) / padded_w
boxes[:, 2] = ((y1 + y2) / 2) / padded_h
boxes[:, 3] *= w_factor / padded_w
boxes[:, 4] *= h_factor / padded_h
- 给每一个box框添加索引
- 多尺度训练,在同一个batch中图片大小要一致,需要将图片进行resize到相同的尺寸,在resize时,每10个batch进行一次多尺度训练,在[min=416-3x32,max=416+3x32]之间随机选择一个目标尺寸值
- 将图片,标签以及路径连接起来,最后返回
paths=[path0,path1,…]
imgs=[batch_size,3,w,h]
targets=[N,6]- 默认的collate_fn最后返回的是用栈存起来的,自定义collate返回的使用列表存起来的
def collate_fn(self, batch):
"""
:param batch: list 一个批次的数据 [(path,img,target),(),...]
targets :tuple(target0,target1,...)
:return:
"""
paths, imgs, targets = list(zip(*batch)) #*表示以元组形式接收参数 zip打包为元组的列表
# Remove empty placeholder targets
targets = [boxes for boxes in targets if boxes is not None]
# Add sample index to targets
for i, boxes in enumerate(targets):
boxes[:, 0] = i
targets = torch.cat(targets, 0)
# print('拼接后的targets:',targets.shape)
# Selects new image size every tenth batch
if self.multiscale and self.batch_count % 10 == 0:
self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32))
# Resize images to input shape
imgs = torch.stack([resize(img, self.img_size) for img in imgs])
self.batch_count += 1
return paths, imgs, targets