本竞赛所用训练和测试图片均来自一般监控场景,但包括多种视角(如低空、高空、鱼眼等),图中行人的相对尺寸也会有较大差异。部分训练数据参考了公开数据集(如ShanghaiTech [1], UCF-CC-50 [2], WorldExpo’10 [3],Mall [4] 等)。
本竞赛的数据标注均在对应json文件中,每张训练图片的标注为以下两种方式之一:
(1)部分数据对图中行人提供了方框标注(boundingbox),格式为[x, y, w, h][x,y,w,h];
(2)部分图对图中行人提供了头部的打点标注,坐标格式为[x, y][x,y]。
此外部分图片还提供了忽略区(ignore_region)标注,格式为[x_0, y_0, x_1, y_1, …, x_n, y_n]组成的多边形(注意一张图片可能有多个多边形忽略区),图片在忽略区内的部分不参与训练/测试。
虽然是比赛,但是还是想通过自建网络的方式来熟悉下paddle1.7新的动态图,没有使用paddledetection直接来做。![CSRnet网络图](https://img-blog.csdnimg.cn/20200410083311700.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L2tlbnNoaW41MTYxNjM=,size_16,color_FFFFFF,t_70)
class CSRNet(fluid.dygraph.Layer):
def __init__(self):
super(CSRNet, self).__init__()
#前端10层卷积3层池化
self.conv1=fluid.dygraph.Conv2D(num_channels=3, num_filters=64, filter_size=3, stride=1, padding=1,act="relu")
self.conv2=fluid.dygraph.Conv2D(num_channels=64, num_filters=64, filter_size=3, stride=1, padding=1,act="relu")
self.pool1=fluid.dygraph.Pool2D(pool_size=2,pool_type="max",pool_stride=2, pool_padding=0)
self.conv3=fluid.dygraph.Conv2D(num_channels=64, num_filters=128, filter_size=3, stride=1, padding=1,act="relu")
self.conv4=fluid.dygraph.Conv2D(num_channels=128, num_filters=128, filter_size=3, stride=1, padding=1,act="relu")
self.pool2=fluid.dygraph.Pool2D(pool_size=2,pool_type="max",pool_stride=2, pool_padding=0)
self.conv5=fluid.dygraph.Conv2D(num_channels=128, num_filters=256, filter_size=3, stride=1, padding=1,act="relu")
self.conv6=fluid.dygraph.Conv2D(num_channels=256, num_filters=256, filter_size=3, stride=1, padding=1,act="relu")
self.conv7=fluid.dygraph.Conv2D(num_channels=256, num_filters=256, filter_size=3, stride=1, padding=1,act="relu")
self.pool3=fluid.dygraph.Pool2D(pool_size=2,pool_type="max",pool_stride=2, pool_padding=0)
self.conv8=fluid.dygraph.Conv2D(num_channels=256, num_filters=512, filter_size=3, stride=1, padding=1,act="relu")
self.conv9=fluid.dygraph.Conv2D(num_channels=512, num_filters=512, filter_size=3, stride=1, padding=1,act="relu")
self.conv10=fluid.dygraph.Conv2D(num_channels=512, num_filters=512, filter_size=3, stride=1, padding=1,act="relu")
#后端6层空洞卷积
self.dilated_con1=fluid.dygraph.Conv2D(num_channels=512, num_filters=512, filter_size=3, stride=1,padding=2, dilation=2,act="relu")
self.dilated_con2=fluid.dygraph.Conv2D(num_channels=512, num_filters=512, filter_size=3, stride=1,padding=2, dilation=2,act="relu")
self.dilated_con3=fluid.dygraph.Conv2D(num_channels=512, num_filters=512, filter_size=3, stride=1,padding=2, dilation=2,act="relu")
self.dilated_con4=fluid.dygraph.Conv2D(num_channels=512, num_filters=256, filter_size=3, stride=1,padding=2, dilation=2,act="relu")
self.dilated_con5=fluid.dygraph.Conv2D(num_channels=256, num_filters=128, filter_size=3, stride=1,padding=2, dilation=2,act="relu")
self.dilated_con6=fluid.dygraph.Conv2D(num_channels=128, num_filters=64, filter_size=3, stride=1,padding=2, dilation=2,act="relu")
#1*1普通卷积输出
self.output1 = fluid.dygraph.Conv2D(num_channels=64, num_filters=1, filter_size=1,act=None)
def forward(self, inputs, label=None):
"""前向计算"""
#print("input shape:",inputs.shape)
outputs=self.conv1(inputs)
outputs=self.conv2(outputs)
outputs= fluid.layers.dropout(outputs, 0.2)
outputs=self.pool1(outputs)
outputs=self.conv3(outputs)
outputs=self.conv4(outputs)
outputs= fluid.layers.dropout(outputs, 0.2)
outputs=self.pool2(outputs)
outputs=self.conv5(outputs)
outputs=self.conv6(outputs)
outputs=self.conv7(outputs)
outputs=self.pool3(outputs)
outputs=self.conv8(outputs)
outputs= fluid.layers.dropout(outputs, 0.2)
outputs=self.conv9(outputs)
outputs= fluid.layers.dropout(outputs, 0.2)
outputs=self.conv10(outputs)
outputs=self.dilated_con1(outputs)
outputs=self.dilated_con2(outputs)
outputs=self.dilated_con3(outputs)
outputs=self.dilated_con4(outputs)
#print("dilated_con4 shape:",outputs.shape)
outputs=self.dilated_con5(outputs)
#print("dilated_con5 shape:",outputs.shape)
outputs=self.dilated_con6(outputs)
#print("dilated_con6:",outputs.shape)
outputs=self.output1(outputs)
#print("final shape:",outputs.shape)
return outputs
根据paddle1.7的动态图写的,很直观,也很方便在中间添加bn,dropout。
import json
import cv2
import matplotlib.pyplot as plt
from PIL import Image
import re
import numpy as np
from matplotlib import cm as CM
import math
from scipy.ndimage.filters import gaussian_filter
import scipy
import scipy.spatial
def gaussian_filter_density(gt):
#print(gt.shape)
density = np.zeros(gt.shape, dtype=np.float32)
gt_count = np.count_nonzero(gt)
if gt_count == 0:
return density
pts = np.array(list(zip(np.nonzero(gt)[1].ravel(), np.nonzero(gt)[0].ravel())))
leafsize = 2048
# build kdtree
tree = scipy.spatial.KDTree(pts.copy(), leafsize=leafsize)
# query kdtree
distances, locations = tree.query(pts, k=4)
#print 'generate density...'
for i, pt in enumerate(pts):
pt2d = np.zeros(gt.shape, dtype=np.float32)
pt2d[pt[1],pt[0]] = 1.
if gt_count > 1:
sigma = (distances[i][1]+distances[i][2]+distances[i][3])*0.1
if sigma==float("inf"):
sigma=1
else:
sigma = np.average(np.array(gt.shape))/2./2.
density += scipy.ndimage.filters.gaussian_filter(pt2d, sigma, mode='constant')
#print 'done.'
return density
def train_data():
def reader():
json_info = json.load(open('/home/aistudio/work/train.json',encoding='utf-8'))
home_url="/home/aistudio/work/train_img"
train_len=len(json_info['annotations'])
ig_list=open('/home/aistudio/work/ig_train.txt','r',encoding='utf-8').read().split("\n")
dic_ig=dict.fromkeys(ig_list)
#for i in range(2):
for i in range(train_len):
#if str(i) in dic_ig:
# continue
img = cv2.imread(re.sub("stage1",home_url,json_info['annotations'][i]['name']))
num=json_info['annotations'][i]['num']
k=480/4 #头肩比1:3
k/=math.sqrt(num)
k/=8
k=int(k)
if k%2==0:
k-=1
if k<=0:
k=1
#处理ignore_region
if json_info['annotations'][i]['ignore_region']:
for ig in json_info['annotations'][i]['ignore_region']:
p_list=[]
for p in ig:
p_list.append([p['x'],p['y']])
ig_np = np.array(p_list,np.int32)
img=cv2.fillPoly(img,[ig_np],(0,0,0),cv2.LINE_AA)
img_y=img.shape[0]
img_x=img.shape[1]
#统一缩放
img = cv2.resize(img, (640,480), interpolation = cv2.INTER_AREA)
dens = np.zeros((int(img.shape[0]/8),int(img.shape[1]/8)))
if json_info['annotations'][i]['type']=='bbox':
for box in json_info['annotations'][i]['annotation']:
box_x=int(box['x'])+0.5*int(box['w'])
box_y=int(box['y'])+0.2*int(box['h'])
dens[int(box_y*480/img_y/8),int(box_x*640/img_x/8)]+=1
elif json_info['annotations'][i]['type']=='dot':
for dot in json_info['annotations'][i]['annotation']:
dens[int(dot['y']*480/img_y/8),int(dot['x']*640/img_x/8)]+=1
dens = cv2.GaussianBlur(dens,(k,k),0.3)
dens = dens.transpose(1,0)
#dens= gaussian_filter_density(dens)
dens = np.asarray(dens)
dens=dens[np.newaxis,:]
dens=dens.astype('float32')
img = img.transpose(2,1,0).astype('float32')
img/=255.0
#print(np.sum(dens))
yield img,dens
return reader
这里用了非官方的另外一种生成热点图的方式。
cv2.GaussianBlur(dens,(k,k),0.3)
其中k值我根据图像内标记的人数结合头肩比进行估算,效果在没有同一张图片存在很大的透视的情况下可以。实际上k值大多数也在1~3之间。
1)在学习阶段手写网络还是很有必要的,可以加深理解,避免知其然不知其所以然。其实对着网络结构图来做,手动实现网络也是很方便的。
2)在CV领域中,数据增强还是非常重要的,增强后的数据集对最后的结果影响很大,当然静态数据增强需要时间和精力,动态数据增强要避免出现错误数据
3)关于超参调整和优化,从零开始的调整确实是个体力活,在实际应用中采用预训练模型是有必要的,参数调整过程中要及时观察、保存和中断,跑一晚上结果爆炸了或者到鞍点出不来都是对时间和算力的浪费。