因为最近在学习SSD,实例程序中生成lmdb文件要执行作者给出的两个.sh文件,所以借此机会简单学习一下linux的shell脚本语言和lmdb文件的制作
#!/bin/bash
# 告诉系统这个脚本要用bash解释器来执行
root_dir=$HOME/data/VOCdevkit/
# 定义一个路径,$HOME返回当前用户根目录,比如我的,就返回/home/zhangchen
# 执行结果就是:root_dir=/home/zhangchen/data/VOCdevkit
sub_dir=ImageSets/Main
bash_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# ${BASH_SOURCE[0]}返回执行当前脚本的第一个参数,如果第一个参数是sh或bash,则获取第二个参数
# 假如执行sh create_list.sh,则${BASH_SOURCE[0]}返回create_list.sh
# dirname "${BASH_SOURCE[0]}"则返回create_list.sh所在路径
# 执行结果就是bash_dir=create_list.sh文件所在路径
for dataset in trainval test
# for in
# do
# done
# 是循环,第一次将trainval赋给dataset,第二次将test赋给dataset
do
dst_file=$bash_dir/$dataset.txt
if [ -f $dst_file ]
# -f file 检测文件file是否是普通文件,如果是,返回True。如果是目录或者设备文件,返回False
then
rm -f $dst_file
# rm -f:-f就是不提示,直接删除
# 这里是:如果已经存在trainval.txt或者test.txt,则删除
fi
for name in VOC2007 VOC2012
do
if [[ $dataset == "test" && $name == "VOC2012" ]]
then
continue
# 如果是test+VOC2012则跳过,因为按ssd github上的指示,在这之前只下载了2007的trainval、test和2012的trainval
fi
echo "Create list for $name $dataset..."
# 终端打印信息:Create list for VOC2007 trainval...
dataset_file=$root_dir/$name/$sub_dir/$dataset.txt
# dataset_file=/home/zhangchen/data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt
img_file=$bash_dir/$dataset"_img.txt"
# img_file=/create_list.sh的路径/trainval_img.txt
# 注意当前还不存在这个文件
cp $dataset_file $img_file
# 将dataset_file拷贝成img_file
sed -i "s/^/$name\/JPEGImages\//g" $img_file
# sed -i:直接处理读取到的内容,而不是打印到终端
# s/a/b/g 是将所有的a替换为b
# 在开头加/$name/JPEGImages/
sed -i "s/$/.jpg/g" $img_file
# 在结尾加.jpg
# 得到的结果是:VOC2007/JPEGImages/009874.jpg
label_file=$bash_dir/$dataset"_label.txt"
cp $dataset_file $label_file
sed -i "s/^/$name\/Annotations\//g" $label_file
sed -i "s/$/.xml/g" $label_file
# trainval_label.txt中:VOC2007/Annotations/009874.xml
paste -d' ' $img_file $label_file >> $dst_file
# paste是粘贴,-d' '制定分隔符是一个空格
# 把trainval_label.txt按行粘贴到trainval_img.txt后面
# 生成一个trainval.txt
rm -f $label_file
rm -f $img_file
done
# Generate image name and size infomation.
if [ $dataset == "test" ]
then
$bash_dir/../../build/tools/get_image_size $root_dir $dst_file $bash_dir/$dataset"_name_size.txt"
fi
# 如果是test,调用buile/tools中的get_image_size得到图片的名字和尺寸大小
# Shuffle trainval file.
if [ $dataset == "trainval" ]
then
rand_file=$dst_file.random
cat $dst_file | perl -MList::Util=shuffle -e 'print shuffle();' > $rand_file
mv $rand_file $dst_file
fi
# 如果是train,shuffle一下
done
cur_dir=$(cd $( dirname ${BASH_SOURCE[0]} ) && pwd )
# 同样,cur_dir是create_data.sh所在路径
root_dir=$cur_dir/../..
# root_dir:caffe的源码路径
cd $root_dir
redo=1
data_root_dir="$HOME/data/VOCdevkit"
dataset_name="VOC0712"
mapfile="$root_dir/data/$dataset_name/labelmap_voc.prototxt"
anno_type="detection"
db="lmdb"
min_dim=0
max_dim=0
width=0
height=0
extra_cmd="--encode-type=jpg --encoded"
if [ $redo ]
# redo是1,所以执行
then
extra_cmd="$extra_cmd --redo"
fi
for subset in test trainval
do
python $root_dir/scripts/create_annoset.py --anno-type=$anno_type --label-map-file=$mapfile --min-dim=$min_dim --max-dim=$max_dim --resize-width=$width --resize-height=$height --check-label $extra_cmd $data_root_dir $root_dir/data/$dataset_name/$subset.txt $data_root_dir/$dataset_name/$db/$dataset_name"_"$subset"_"$db examples/$dataset_name
done
# create_annset.py才是关键文件
# --anno-type:detection
# --label-map-file:labelmap_voc.prototxt这个文件
# --min-dim:0
# --max-dim:0
# --resize-width:0
# --resize-height:0
# --check-label
# --redo
# --encode-type=jpg
# --encoded
import argparse
import os
import shutil
import subprocess
import sys
from caffe.proto import caffe_pb2
from google.protobuf import text_format
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Create AnnotatedDatum database")
parser.add_argument("root",
help="The root directory which contains the images and annotations.")
# root:/home/zhangchen/data/VOCdevkit
parser.add_argument("listfile",
help="The file which contains image paths and annotation info.")
# listfile:/home/zhangchen/thirdparty-source/caffe/data/VOC0712/trainval.txt
# listfile:/home/zhangchen/thirdparty-source/caffe/data/VOC0712/test.txt
parser.add_argument("outdir",
help="The output directory which stores the database file.")
# outdir:/home/zhangchen/data/VOCdevkit/VOC0712/lmdb/VOC0712_trainval_lmdb
# outdir:/home/zhangchen/data/VOCdevkit/VOC0712/lmdb/VOC0712_test_lmdb
parser.add_argument("exampledir",
help="The directory to store the link of the database files.")
# exampledir:examples/VOC0712
parser.add_argument("--redo", default = False, action = "store_true",
help="Recreate the database.")
# --redo:false
parser.add_argument("--anno-type", default = "classification",
help="The type of annotation {classification, detection}.")
# --anno-type:detection
parser.add_argument("--label-type", default = "xml",
help="The type of label file format for detection {xml, json, txt}.")
# --label-type:xml
parser.add_argument("--backend", default = "lmdb",
help="The backend {lmdb, leveldb} for storing the result")
# --backend:lmdb
parser.add_argument("--check-size", default = False, action = "store_true",
help="Check that all the datum have the same size.")
# --check-size:false
parser.add_argument("--encode-type", default = "",
help="What type should we encode the image as ('png','jpg',...).")
# --encode-type:jpg
parser.add_argument("--encoded", default = False, action = "store_true",
help="The encoded image will be save in datum.")
# encoded:false
parser.add_argument("--gray", default = False, action = "store_true",
help="Treat images as grayscale ones.")
# --gray:false
parser.add_argument("--label-map-file", default = "",
help="A file with LabelMap protobuf message.")
# --label-map-file:labelmap_voc.prototxt这个文件
parser.add_argument("--min-dim", default = 0, type = int,
help="Minimum dimension images are resized to.")
# --min-dim:0
parser.add_argument("--max-dim", default = 0, type = int,
help="Maximum dimension images are resized to.")
# --max-dim:0
parser.add_argument("--resize-height", default = 0, type = int,
help="Height images are resized to.")
# --resize-height:0
parser.add_argument("--resize-width", default = 0, type = int,
help="Width images are resized to.")
# -resize-width:0
parser.add_argument("--shuffle", default = False, action = "store_true",
help="Randomly shuffle the order of images and their labels.")
# --shuffle:false
parser.add_argument("--check-label", default = False, action = "store_true",
help="Check that there is no duplicated name/label.")
# --check-label:false
args = parser.parse_args()
root_dir = args.root
list_file = args.listfile
out_dir = args.outdir
example_dir = args.exampledir
redo = args.redo
anno_type = args.anno_type
label_type = args.label_type
backend = args.backend
check_size = args.check_size
encode_type = args.encode_type
encoded = args.encoded
gray = args.gray
label_map_file = args.label_map_file
min_dim = args.min_dim
max_dim = args.max_dim
resize_height = args.resize_height
resize_width = args.resize_width
shuffle = args.shuffle
check_label = args.check_label
# check if root directory exists
if not os.path.exists(root_dir):
print("root directory: {} does not exist".format(root_dir))
# 如果不存在/home/zhangchen/data/VOCdevkit,退出程序
sys.exit()
# add "/" to root directory if needed
if root_dir[-1] != "/":
root_dir += "/"
# root_dir:/home/zhangchen/data/VOCdevkit/
# check if list file exists
if not os.path.exists(list_file):
print("list file: {} does not exist".format(list_file))
sys.exit()
# 如果不存在/home/zhangchen/thirdparty-source/caffe/data/VOC0712/trainval.txt,退出程序
# check list file format is correct
with open(list_file, "r") as lf:
for line in lf.readlines():
img_file, anno = line.strip("\n").split(" ")
if not os.path.exists(root_dir + img_file):
# /home/zhangchen/data/VOCdevkit/VOC2012/JPEGImages/2009_005287.jpg
print("image file: {} does not exist".format(root_dir + img_file))
if anno_type == "classification":
if not anno.isdigit():
# .isdigit(),判断是不是一个整数,如果是返回True
print("annotation: {} is not an integer".format(anno))
elif anno_type == "detection":
if not os.path.exists(root_dir + anno):
# /home/zhangchen/data/VOCdevkit/VOC2012/VOC2012/Annotations/2009_005287.xml
print("annofation file: {} does not exist".format(root_dir + anno))
sys.exit()
# break
# 源码中这里有一个break,我认为应该去掉,如果不对望指正
# check if label map file exist
if anno_type == "detection":
if not os.path.exists(label_map_file):
print("label map file: {} does not exist".format(label_map_file))
sys.exit()
label_map = caffe_pb2.LabelMap()
lmf = open(label_map_file, "r")
try:
text_format.Merge(str(lmf.read()), label_map)
except:
print("Cannot parse label map file: {}".format(label_map_file))
sys.exit()
out_parent_dir = os.path.dirname(out_dir)
# /home/zhangchen/data/VOCdevkit/VOC0712/lmdb/VOC0712_trainval_lmdb
if not os.path.exists(out_parent_dir):
os.makedirs(out_parent_dir)
if os.path.exists(out_dir) and not redo:
print("{} already exists and I do not hear redo".format(out_dir))
sys.exit()
if os.path.exists(out_dir):
shutil.rmtree(out_dir)
# shutil.rmtree表示递归删除文件夹下的所有子文件夹和子文件
# get caffe root directory
caffe_root = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
if anno_type == "detection":
cmd = "{}/build/tools/convert_annoset" \
" --anno_type={}" \
" --label_type={}" \
" --label_map_file={}" \
" --check_label={}" \
" --min_dim={}" \
" --max_dim={}" \
" --resize_height={}" \
" --resize_width={}" \
" --backend={}" \
" --shuffle={}" \
" --check_size={}" \
" --encode_type={}" \
" --encoded={}" \
" --gray={}" \
" {} {} {}" \
.format(caffe_root, anno_type, label_type, label_map_file, check_label,
min_dim, max_dim, resize_height, resize_width, backend, shuffle,
check_size, encode_type, encoded, gray, root_dir, list_file, out_dir)
elif anno_type == "classification":
cmd = "{}/build/tools/convert_annoset" \
" --anno_type={}" \
" --min_dim={}" \
" --max_dim={}" \
" --resize_height={}" \
" --resize_width={}" \
" --backend={}" \
" --shuffle={}" \
" --check_size={}" \
" --encode_type={}" \
" --encoded={}" \
" --gray={}" \
" {} {} {}" \
.format(caffe_root, anno_type, min_dim, max_dim, resize_height,
resize_width, backend, shuffle, check_size, encode_type, encoded,
gray, root_dir, list_file, out_dir)
print(cmd)
process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
output = process.communicate()[0]
if not os.path.exists(example_dir):
os.makedirs(example_dir)
link_dir = os.path.join(example_dir, os.path.basename(out_dir))
if os.path.exists(link_dir):
os.unlink(link_dir)
os.symlink(out_dir, link_dir)
item {
name: "none_of_the_above"
label: 0
display_name: "background"
}
item {
name: "aeroplane"
label: 1
display_name: "aeroplane"
}
item {
name: "bicycle"
label: 2
display_name: "bicycle"
}
item {
name: "bird"
label: 3
display_name: "bird"
}
item {
name: "boat"
label: 4
display_name: "boat"
}
item {
name: "bottle"
label: 5
display_name: "bottle"
}
item {
name: "bus"
label: 6
display_name: "bus"
}
item {
name: "car"
label: 7
display_name: "car"
}
item {
name: "cat"
label: 8
display_name: "cat"
}
item {
name: "chair"
label: 9
display_name: "chair"
}
item {
name: "cow"
label: 10
display_name: "cow"
}
item {
name: "diningtable"
label: 11
display_name: "diningtable"
}
item {
name: "dog"
label: 12
display_name: "dog"
}
item {
name: "horse"
label: 13
display_name: "horse"
}
item {
name: "motorbike"
label: 14
display_name: "motorbike"
}
item {
name: "person"
label: 15
display_name: "person"
}
item {
name: "pottedplant"
label: 16
display_name: "pottedplant"
}
item {
name: "sheep"
label: 17
display_name: "sheep"
}
item {
name: "sofa"
label: 18
display_name: "sofa"
}
item {
name: "train"
label: 19
display_name: "train"
}
item {
name: "tvmonitor"
label: 20
display_name: "tvmonitor"
}
# 对detection
cmd = "{}/build/tools/convert_annoset" \
" --anno_type={}" \
" --label_type={}" \
" --label_map_file={}" \
" --check_label={}" \
" --min_dim={}" \
" --max_dim={}" \
" --resize_height={}" \
" --resize_width={}" \
" --backend={}" \
" --shuffle={}" \
" --check_size={}" \
" --encode_type={}" \
" --encoded={}" \
" --gray={}" \
" {} {} {}" \
.format(caffe_root, anno_type, label_type, label_map_file, check_label,
min_dim, max_dim, resize_height, resize_width, backend, shuffle,
check_size, encode_type, encoded, gray, root_dir, list_file, out_dir)
# 真实命令为:
/home/zhangchen/thirdparty-source/caffe/build/tools/convert_annoset --anno_type=detection --label_type=xml --label_map_file=/home/zhangchen/task/code/object_detection/SSD_ResNet-50/PASCAL-VOC/labelmap_voc.prototxt --check_label=True --min_dim=0 --max_dim=0 --resize_height=0 --resize_width=0 --backend=lmdb --shuffle=False --check_size=False --encode_type=jpg --encoded=True --gray=False /home/zhangchen/task/data/object_detection/PASCAL-VOC/VOCdevkit/ /home/zhangchen/thirdparty-source/caffe/data/VOC0712/trainval.txt /home/zhangchen/task/data/object_detection/PASCAL-VOC/VOCdevkit/VOC0712/lmdb/VOC0712_trainval_lmdb
# 对应的含义分别为:
ann-type:The type of annotation {classification, detection}.
# 是检测任务还是分类任务
label_type:The type of annotation file format.
# 注释文件的类型,默认就是xml
label_map_file:A file with LabelMap protobuf message.
# labelmap_voc.prototxt这个文件
check_label:When this option is on, check that there is no duplicated name/label.
# 为True时,检查是否有重复的label
min_dim:Minimum dimension images are resized to (keep same aspect ratio)
# 默认是0,怎么用将在下面说明
max_dim:Maximum dimension images are resized to (keep same aspect ratio)
# 默认是0,怎么用将在下面说明
resize_height:Width images are resized to
# 默认是0,怎么用将在下面说明
resize_width:Height images are resized to
# 默认是0,怎么用将在下面说明
backend:The backend {lmdb, leveldb} for storing the result
# 转换后储存为lmdb还是leveldb
shuffle:Randomly shuffle the order of images and their labels
# 将图片顺序打乱
check_size:When this option is on, check that all the datum have the same size
# 为True时,检查所有的数据是否大小相同
encode_type:Optional: What type should we encode the image as ('png','jpg',...).
# 选择图片的编码格式
encoded:When this option is on, the encoded image will be save in datum
# 为True时,编码的图像将被保存为数据
gray:When this option is on, treat images as grayscale ones
# 为True时,将图像转为灰度图处理
/home/zhangchen/task/data/object_detection/PASCAL-VOC/VOCdevkit/
/home/zhangchen/thirdparty-source/caffe/data/VOC0712/trainval.txt
/home/zhangchen/task/data/object_detection/PASCAL-VOC/VOCdevkit/VOC0712/lmdb/VOC0712_trainval_lmdbe
# 把图片读成数据
bool ReadImageToDatum(const string& filename, const int label,
const int height, const int width, const int min_dim, const int max_dim,
const bool is_color, const std::string & encoding, Datum* datum) {
cv::Mat cv_img = ReadImageToCVMat(filename, height, width, min_dim, max_dim,
is_color);
if (cv_img.data) {
if (encoding.size()) {
if ( (cv_img.channels() == 3) == is_color && !height && !width &&
!min_dim && !max_dim && matchExt(filename, encoding) ) {
datum->set_channels(cv_img.channels());
datum->set_height(cv_img.rows);
datum->set_width(cv_img.cols);
return ReadFileToDatum(filename, label, datum);
}
EncodeCVMatToDatum(cv_img, encoding, datum);
datum->set_label(label);
return true;
}
CVMatToDatum(cv_img, datum);
datum->set_label(label);
return true;
} else {
return false;
}
}
#ifdef USE_OPENCV
cv::Mat ReadImageToCVMat(const string& filename, const int height,
const int width, const int min_dim, const int max_dim,
const bool is_color) {
cv::Mat cv_img;
int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR :
CV_LOAD_IMAGE_GRAYSCALE);
cv::Mat cv_img_origin = cv::imread(filename, cv_read_flag);
// cv_img_origin是原图
if (!cv_img_origin.data) {
LOG(ERROR) << "Could not open or find file " << filename;
return cv_img_origin;
}
if (min_dim > 0 || max_dim > 0) {
int num_rows = cv_img_origin.rows;
int num_cols = cv_img_origin.cols;
int min_num = std::min(num_rows, num_cols);
int max_num = std::max(num_rows, num_cols);
float scale_factor = 1;
if (min_dim > 0 && min_num < min_dim) {
scale_factor = static_cast<float>(min_dim) / min_num;
}
if (max_dim > 0 && static_cast<int>(scale_factor * max_num) > max_dim) {
// Make sure the maximum dimension is less than max_dim.
scale_factor = static_cast<float>(max_dim) / max_num;
}
if (scale_factor == 1) {
cv_img = cv_img_origin;
} else {
cv::resize(cv_img_origin, cv_img, cv::Size(0, 0),
scale_factor, scale_factor);
}
} else if (height > 0 && width > 0) {
cv::resize(cv_img_origin, cv_img, cv::Size(width, height));
} else {
cv_img = cv_img_origin;
// 可见,当min_dim、max_dim、resize_height、resize_width都为零时,返回的就是不做处理的原图
}
return cv_img;
}
如果您有修改意见或问题,欢迎留言或者通过邮箱和我联系。
手打很辛苦,如果我的文章对您有帮助,转载请注明出处。