数据集地址 http://shuoyang1213.me/WIDERFACE/
import os,cv2,sys,shutil
from xml.dom.minidom import Document
def writexml(filename,saveimg,bboxes,xmlpath):
doc = Document()
annotation = doc.createElement('annotation')
doc.appendChild(annotation)
folder = doc.createElement('folder')
folder_name = doc.createTextNode('widerface')
folder.appendChild(folder_name)
annotation.appendChild(folder)
filenamenode = doc.createElement('filename')
filename_name = doc.createTextNode(filename)
filenamenode.appendChild(filename_name)
annotation.appendChild(filenamenode)
source = doc.createElement('source')
annotation.appendChild(source)
database = doc.createElement('database')
database.appendChild(doc.createTextNode('wider face Database'))
source.appendChild(database)
annotation_s = doc.createElement('annotation')
annotation_s.appendChild(doc.createTextNode('PASCAL VOC2007'))
source.appendChild(annotation_s)
image = doc.createElement('image')
image.appendChild(doc.createTextNode('flickr'))
source.appendChild(image)
flickrid = doc.createElement('flickrid')
flickrid.appendChild(doc.createTextNode('-1'))
source.appendChild(flickrid)
owner = doc.createElement('owner')
annotation.appendChild(owner)
flickrid_o = doc.createElement('flickrid')
flickrid_o.appendChild(doc.createTextNode('yanyu'))
owner.appendChild(flickrid_o)
name_o = doc.createElement('name')
name_o.appendChild(doc.createTextNode('yanyu'))
owner.appendChild(name_o)
size = doc.createElement('size')
annotation.appendChild(size)
width = doc.createElement('width')
width.appendChild(doc.createTextNode(str(saveimg.shape[1])))
height = doc.createElement('height')
height.appendChild(doc.createTextNode(str(saveimg.shape[0])))
depth = doc.createElement('depth')
depth.appendChild(doc.createTextNode(str(saveimg.shape[2])))
size.appendChild(width)
size.appendChild(height)
size.appendChild(depth)
segmented = doc.createElement('segmented')
segmented.appendChild(doc.createTextNode('0'))
annotation.appendChild(segmented)
for i in range(len(bboxes)):
bbox = bboxes[i]
objects = doc.createElement('object')
annotation.appendChild(objects)
object_name = doc.createElement('name')
object_name.appendChild(doc.createTextNode('face'))
objects.appendChild(object_name)
pose = doc.createElement('pose')
pose.appendChild(doc.createTextNode('Unspecified'))
objects.appendChild(pose)
truncated = doc.createElement('truncated')
truncated.appendChild(doc.createTextNode('1'))
objects.appendChild(truncated)
difficult = doc.createElement('difficult')
difficult.appendChild(doc.createTextNode('0'))
objects.appendChild(difficult)
bndbox = doc.createElement('bndbox')
objects.appendChild(bndbox)
xmin = doc.createElement('xmin')
xmin.appendChild(doc.createTextNode(str(bbox[0])))
bndbox.appendChild(xmin)
ymin = doc.createElement('ymin')
ymin.appendChild(doc.createTextNode(str(bbox[1])))
bndbox.appendChild(ymin)
xmax = doc.createElement('xmax')
xmax.appendChild(doc.createTextNode(str(bbox[0] + bbox[2])))
bndbox.appendChild(xmax)
ymax = doc.createElement('ymax')
ymax.appendChild(doc.createTextNode(str(bbox[1] + bbox[3])))
bndbox.appendChild(ymax)
f = open(xmlpath, "w")
f.write(doc.toprettyxml(indent=''))
f.close()
#原始数据集所在根目录
rootdir = "/home/zero/face_ws"
def convertimgset(img_set):
#图片位置的根目录
imgdir = rootdir + "/WIDER_" + img_set + "/images"
#描述图片信息的gt.txt
gtfilepath = rootdir + "/wider_face_split/wider_face_" + img_set + "_bbx_gt.txt"
#新建写入train/val.txt
fwrite = open(rootdir + "/ImageSets/Main/" + img_set + ".txt", 'w')
index = 0
with open(gtfilepath, 'r') as gtfiles:
while(True): #true
filename = gtfiles.readline()[:-1]
if filename == None or filename == "":
break
imgpath = imgdir + "/" + filename
#原始图片地址
img = cv2.imread(imgpath)
if not img.data:
break;
numbbox = int(gtfiles.readline())
bboxes = []
print(numbbox)
for i in range(numbbox):
line = gtfiles.readline()
lines = line.split(" ")
lines = lines[0:4]
bbox = (int(lines[0]), int(lines[1]), int(lines[2]), int(lines[3]))
if int(lines[2]) < 40 or int(lines[3]) < 40:
continue
bboxes.append(bbox)
#cv2.rectangle(img, (bbox[0],bbox[1]),(bbox[0]+bbox[2],bbox[1]+bbox[3]),color=(255,255,0),thickness=1)
#将文件名中的‘/’替换为'_'
filename = filename.replace("/", "_")
if len(bboxes) == 0:
print("no face")
continue
#cv2.imshow("img", img)
#cv2.waitKey(0)
#将图买移至JPEGImages/中
cv2.imwrite("{}/JPEGImages/{}".format(rootdir,filename), img)
#按‘.’切片后取[0]片写入train/val.txt
fwrite.write(filename.split(".")[0] + "\n")
#xml文件地址
xmlpath = "{}/Annotations/{}.xml".format(rootdir,filename.split(".")[0])
#写入xml内容
writexml(filename, img, bboxes, xmlpath)
print("success number is ", index)
index += 1
fwrite.close()
if __name__=="__main__":
img_sets = ["train","val"]
for img_set in img_sets:
convertimgset(img_set)
#将train.txt变为trainval.txt
shutil.move(rootdir + "/ImageSets/Main/" + "train.txt", rootdir + "/ImageSets/Main/" + "trainval.txt")
shutil.move(rootdir + "/ImageSets/Main/" + "val.txt", rootdir + "/ImageSets/Main/" + "test.txt")
直接从数据集官网download下来的标注文件wider_face_split中下面两个文件,再生成voc时可能有问题:
wider_face_train_bbx_gt.txt
wider_face_val_bbx_gt.txt
将其中的全为0的行删掉!!
我们需要三个文件,分别为:
create_list.sh
create_data.sh
labelmap_voc.prototxt
#!/bin/bash
root_dir=/home/zero #xiu
sub_dir=ImageSets/Main
bash_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
for dataset in trainval test
do
dst_file=$bash_dir/$dataset.txt
if [ -f $dst_file ]
then
rm -f $dst_file
fi
for name in face_ws #xiu
do
if [[ $dataset == "test" && $name == "VOC2012" ]]
then
continue
fi
echo "Create list for $name $dataset..."
dataset_file=$root_dir/$name/$sub_dir/$dataset.txt
img_file=$bash_dir/$dataset"_img.txt"
cp $dataset_file $img_file
sed -i "s/^/$name\/JPEGImages\//g" $img_file
sed -i "s/$/.jpg/g" $img_file
label_file=$bash_dir/$dataset"_label.txt"
cp $dataset_file $label_file
sed -i "s/^/$name\/Annotations\//g" $label_file
sed -i "s/$/.xml/g" $label_file
paste -d' ' $img_file $label_file >> $dst_file
rm -f $label_file
rm -f $img_file
done
# Generate image name and size infomation.
if [ $dataset == "test" ]
then
$bash_dir/../../build/tools/get_image_size $root_dir $dst_file $bash_dir/$dataset"_name_size.txt"
fi
# Shuffle trainval file.
if [ $dataset == "trainval" ]
then
rand_file=$dst_file.random
cat $dst_file | perl -MList::Util=shuffle -e 'print shuffle();' > $rand_file
mv $rand_file $dst_file
fi
done
cd caffe
#运行sh文件
./data/widerface/create_list.sh
#之后生成
#test.txt
#test_name_size.txt
#trainval.txt
cur_dir=$(cd $( dirname ${BASH_SOURCE[0]} ) && pwd )
root_dir=$cur_dir/../..
cd $root_dir
redo=1
data_root_dir="/home/zero"
dataset_name="widerface"
mapfile="$root_dir/data/$dataset_name/labelmap_voc.prototxt"
anno_type="detection"
db="lmdb"
min_dim=0
max_dim=0
width=0
height=0
extra_cmd="--encode-type=jpg --encoded"
if [ $redo ]
then
extra_cmd="$extra_cmd --redo"
fi
for subset in test trainval
do
python $root_dir/scripts/create_annoset.py --anno-type=$anno_type --label-map-file=$mapfile --min-dim=$min_dim --max-dim=$max_dim --resize-width=$width --resize-height=$height --check-label $extra_cmd $data_root_dir $root_dir/data/$dataset_name/$subset.txt $data_root_dir/$dataset_name/$db/$dataset_name"_"$subset"_"$db examples/$dataset_name
done
#需要利用到上一部生成的test/trainval.txt
cd caffe
#运行sh文件
./data/widerface/create_data.sh
在根目录下生成两个lmdb文件和再caffe/examples/widerface/中生成两个lmdb软链接
位于examples/ssd/wider_face/~
ssd_pascal.py用到了
(1)create_list.sh创建的test_name_size.txt;
(2)labelmap_voc.prototxt
(3)create_data.sh创建的两个lmdb软链接widerface_test_lmdb
(4)widerface_trainval_lmdb
此外,还将ssd_pascal.py中调用的主干网络(model_libs.py)VGGNetBody改为了VGGNetBody_small
ssd_pascal.py生成了:
(1)jobs/VGGNet/wider_face/*
(2)models/VGGNet/wider_face/*
ssd_pascal.py(部分内容):
# The database file for training data. Created by data/VOC0712/create_data.sh
train_data = "examples/widerface/widerface_trainval_lmdb"
# The database file for testing data. Created by data/VOC0712/create_data.sh
test_data = "examples/widerface/widerface_test_lmdb"
# If true, use batch norm for all newly added layers.
# Currently only the non batch norm version has been tested.
use_batchnorm = False
lr_mult = 1
# Use different initial learning rate.
if use_batchnorm:
base_lr = 0.0004
else:
# A learning rate for batch_size = 1, num_gpus = 1.
base_lr = 0.00004
# Modify the job name if you want.
job_name = "SSD_{}".format(resize)
# The name of the model. Modify it if you want.
model_name = "VGG_wider_face_{}".format(job_name)
# Directory which stores the model .prototxt file.
save_dir = "models/VGGNet/wider_face/{}".format(job_name)
# Directory which stores the snapshot of models.
snapshot_dir = "models/VGGNet/wider_face/{}".format(job_name)
# Directory which stores the job script and log file.
job_dir = "jobs/VGGNet/wider_face/{}".format(job_name)
# Directory which stores the detection results.
output_result_dir = "{}/data/VOCdevkit/results/wider_face/{}/Main".format(os.environ['HOME'], job_name)
# model definition files.
train_net_file = "{}/train.prototxt".format(save_dir)
test_net_file = "{}/test.prototxt".format(save_dir)
deploy_net_file = "{}/deploy.prototxt".format(save_dir)
solver_file = "{}/solver.prototxt".format(save_dir)
# snapshot prefix.
snapshot_prefix = "{}/{}".format(snapshot_dir, model_name)
# job script path.
job_file = "{}/{}.sh".format(job_dir, model_name)
# Stores the test image names and sizes. Created by data/VOC0712/create_list.sh
name_size_file = "data/widerface/test_name_size.txt"
# The pretrained model. We use the Fully convolutional reduced (atrous) VGGNet.
pretrain_model = "models/VGGNet/VGG_ILSVRC_16_layers_fc_reduced.caffemodel"
# Stores LabelMapItem.
label_map_file = "data/widerface/labelmap_voc.prototxt"
#conv5_3原来为fc7
mbox_source_layers = ['conv4_3', 'conv5_3', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2'] # conv5_3
# in percent %
min_ratio = 20
max_ratio = 90
step = int(math.floor((max_ratio - min_ratio) / (len(mbox_source_layers) - 2)))
min_sizes = []
max_sizes = []
for ratio in xrange(min_ratio, max_ratio + 1, step):
min_sizes.append(min_dim * ratio / 100.)
max_sizes.append(min_dim * (ratio + step) / 100.)
min_sizes = [min_dim * 10 / 100.] + min_sizes
max_sizes = [min_dim * 20 / 100.] + max_sizes
steps = [8, 16, 32, 64, 100, 300]
aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
# L2 normalize conv4_3.
normalizations = [20, -1, -1, -1, -1, -1]
# variance used to encode/decode prior bboxes.
if code_type == P.PriorBox.CENTER_SIZE:
prior_variance = [0.1, 0.1, 0.2, 0.2]
else:
prior_variance = [0.1]
flip = True
clip = False
# Solver parameters.
# Defining which GPUs to use.
gpus = "0"
gpulist = gpus.split(",")
num_gpus = len(gpulist)
# Divide the mini-batch to different GPUs.
#根据自己显卡内存修改!!否则报错
batch_size = 16 #32
accum_batch_size = 16 #32
iter_size = accum_batch_size / batch_size ##如果iter_size=1,则前向传播一次后进行一次反向传递,如果=2,则两次前传后进行一次反传,这样做是减少每次传播所占用的内存空间,有的硬件不行的话就无法训练,但是增加iter会使训练时间增加,但是总的迭代次数不变。
solver_mode = P.Solver.CPU
device_id = 0
batch_size_per_device = batch_size
if num_gpus > 0:
batch_size_per_device = int(math.ceil(float(batch_size) / num_gpus))
iter_size = int(math.ceil(float(accum_batch_size) / (batch_size_per_device * num_gpus)))
solver_mode = P.Solver.GPU
device_id = int(gpulist[0])
if normalization_mode == P.Loss.NONE:
base_lr /= batch_size_per_device
elif normalization_mode == P.Loss.VALID:
base_lr *= 25. / loc_weight
elif normalization_mode == P.Loss.FULL:
# Roughly there are 2000 prior bboxes per image.
# TODO(weiliu89): Estimate the exact # of priors.
base_lr *= 2000.
# Evaluate on whole test set.
num_test_image = 2580
test_batch_size = 8
# Ideally test_batch_size should be divisible by num_test_image,
# otherwise mAP will be slightly off the true value.
test_iter = int(math.ceil(float(num_test_image) / test_batch_size))
solver_param = {
# Train parameters
'base_lr': base_lr, #基础学习率
'weight_decay': 0.0005, #权重衰减项,防止过拟合的一个参数
'lr_policy': "multistep", #基础学习率的策略,设置为multistep时,需要添加下方stepvalue参数
'stepvalue': [2000, 5000, 10000], #当我们的数据集不太大时,可以适当减小其值,从而减少迭代次数,更快的显示训练结果
'gamma': 0.1,
'momentum': 0.9, #上一次梯度更新的权重
'iter_size': iter_size,
'max_iter': 20000, #最大迭代次数
'snapshot': 100, #将训练出来的model和solver状态进行保存,snapshot用于设置训练多少次后进行保存
'display': 10, #每迭代1在屏幕打印一次
'average_loss': 10, #显示最近10次iter的平均loss
'type': "SGD",
'solver_mode': solver_mode,
'device_id': device_id,
'debug_info': False,
'snapshot_after_train': True,
# Test parameters
'test_iter': [test_iter],
'test_interval': 1000, #测试间隔,每训练多少次进行一次测试。
'eval_type': "detection",
'ap_version': "11point",
'test_initialization': False,
}
#调用主干网络
VGGNetBody_small(net, from_layer='data', fully_conv=True, reduced=True, dilated=True,
dropout=False)
AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult)
mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers,
use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes,
aspect_ratios=aspect_ratios, steps=steps, normalizations=normalizations,
num_classes=num_classes, share_location=share_location, flip=flip, clip=clip,
prior_variance=prior_variance, kernel_size=3, pad=1, lr_mult=lr_mult)
# Create the MultiBoxLossLayer.
name = "mbox_loss"
mbox_layers.append(net.label)
net[name] = L.MultiBoxLoss(*mbox_layers, multibox_loss_param=multibox_loss_param,
loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')),
propagate_down=[True, True, False, False])
with open(train_net_file, 'w') as f:
print('name: "{}_train"'.format(model_name), file=f)
print(net.to_proto(), file=f)
shutil.copy(train_net_file, job_dir)
# Create test net.
net = caffe.NetSpec()
net.data, net.label = CreateAnnotatedDataLayer(test_data, batch_size=test_batch_size,
train=False, output_label=True, label_map_file=label_map_file,
transform_param=test_transform_param)
VGGNetBody_small(net, from_layer='data', fully_conv=True, reduced=True, dilated=True,
dropout=False)
#位于caffe/python/caffe/model_libs.py
1、测试图片
2、caffemodel
3、deploy.prototxt
4、执行文件 test.py
test.py内容如下:
import os
import cv2
import sys
import numpy as np
caffe_root = "/home/zero/caffe"
os.chdir(caffe_root)
sys.path.insert(0,os.path.join(caffe_root, 'python'))
import caffe
caffe.set_device(0)
caffe.set_mode_gpu()
model_def = "/home/zero/caffe/models/VGGNet/wider_face/SSD_300x300/deploy.prototxt"
model_weight = "/home/zero/caffe/models/VGGNet/wider_face/SSD_300x300/VGG_wider_face_SSD_300x300_iter_1000.caffemodel"
img_path = "/home/zero/caffe/models/VGGNet/wider_face/SSD_300x300/30_Surgeons_Surgeons_30_90.jpg"
net = caffe.Net(model_def,model_weight,caffe.TEST)
image_data = caffe.io.load_image(img_path)
tranformer = caffe.io.Transformer({'data':net.blobs['data'].data.shape})
tranformer.set_transpose('data', (2,0,1))
tranformer.set_mean('data',np.array([104,117,123]))
tranformer.set_raw_scale('data', 255)
tranformer_image = tranformer.preprocess('data', image_data)
net.blobs['data'].reshape(1,3,300,300)
net.blobs['data'].data[...] = tranformer_image
detect_out = net.forward()['detection_out']
print detect_out
det_label = detect_out[0,0,:,1]
det_conf = detect_out[0,0,:,2]
det_xmin = detect_out[0,0,:,3]
det_ymin = detect_out[0,0,:,4]
det_xmax = detect_out[0,0,:,5]
det_ymax = detect_out[0,0,:,6]
top_indices = [i for i , conf in enumerate(det_conf) if conf >=0.1]
top_conf = det_conf[top_indices]
top_xmin = det_xmin[top_indices]
top_ymin = det_ymin[top_indices]
top_xmax = det_xmax[top_indices]
top_ymax = det_ymax[top_indices]
[height,width,_] = image_data.shape
for i in range(min(5, top_conf.shape[0])):
xmin = int(top_xmin[i] * width)
ymin = int(top_ymin[i] * height)
xmax = int(top_xmax[i] * width)
ymax = int(top_ymax[i] * height)
cv2.rectangle(image_data, (xmin,ymin),(xmax,ymax),(255,0,0),5)
cv2.imshow("face", image_data)
cv2.waitKey(0)