参加一个场景分类的比赛。数据集:http://ai.futurelab.tv/dataset/view。解压码:QBRE,数据集不可以用作商业用途。这是一个20类的场景分类,用的方法是https://github.com/CSAILVision/places365。利用这里面的方法进行caffe微调。
一、制作数据集
参考:caffe学习系列:训练自己的图片集(超详细教程)点击打开链接
1.1原数据集分类
list.csv中存放图片场景标注信息,categories.csv中存放场景分类信息,总共是20类。
1、每一类500-1500张图片,首先根据csv文件,将20类图片分到每个文件夹下。数据放在caffe/data下,程序见category_classification.py。
2、将数据统一命名,程序见rename.py
# -*- coding:utf8 -*-
#!/usr/bin/python2.7
import os
class BatchRename():
'''
批量重命名文件夹中的图片文件
'''
def __init__(self):
self.path = '/home/ouc/workspace-sjh/caffe/data/testdata/train/3'
def rename(self):
filelist = os.listdir(self.path)
total_num = len(filelist)
i = 1
for item in filelist:
if item.endswith('.jpg'):
src = os.path.join(os.path.abspath(self.path), item)
str1=str(i)
dst = os.path.join(os.path.abspath(self.path), str1.zfill(6) + '.jpg')
try:
os.rename(src, dst)
print 'converting %s to %s ...' % (src, dst)
i = i + 1
except:
continue
print 'total %d to rename & converted %d jpgs' % (total_num, i)
if __name__ == '__main__':
demo = BatchRename()
demo.rename()
3、为了提高分类效果,将数据扩增,使用keras,将每一类的数据扩增到2000,程序见imgAug.py
#!/usr/bin/env python
#-*- coding: utf-8 -*-
#Author: yuanyong.name
import os
import random
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
num_wanted = 2000
target_path_dir = '/home/ouc/workspace-sjh/caffe/data/testdata/train/19'
datagen = ImageDataGenerator(
rotation_range = 10,
width_shift_range = 0.2,
height_shift_range = 0.2,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True,
fill_mode = 'nearest')
sub_dirs = os.walk(target_path_dir).next()
for sub_dir in enumerate(sub_dirs):
# print 'to be processed: %s (%d/%d), contains images: %d' % (sub_dirs, k+1, len(sub_dirs), sub_dir, len(img_basenames))
sub_dir_new = sub_dir[1]
sub_dir_full = os.path.join(target_path_dir, sub_dir_new)
img_basenames = os.listdir(sub_dir_full)
num_imgs = len(img_basenames)
num_perAug = int(float(num_wanted)/float(num_imgs)) - 1
if num_imgs >= num_wanted:
continue
num_total = 0
for i, img_basename in enumerate(img_basenames):
num_total = num_imgs + i*num_perAug
if num_total >= num_wanted:
break
img_path = os.path.join(sub_dir_full, img_basename)
#print "Aug: %s" % img_path
img = load_img(img_path) # this is a PIL image, please replace to your own file path
if img == None:
continue
try:
x = img_to_array(img) # this is a Numpy array with shape (3, 150, 150)
x = x.reshape((1,) + x.shape) # this is a Numpy array with shape (1, 3, 150, 150)
i = 0
for batch in datagen.flow(x, batch_size = 1, save_to_dir = sub_dir_full, save_prefix = 'aug', save_format = 'jpg'):
i += 1
if i >= num_perAug:
break # otherwise the generator would loop indefinitely
except:
print "%s" % img_path
# delete extra aug images
for sub_dir in sub_dirs:
sub_dir_full = os.path.join(target_path_dir, sub_dir)
img_basenames = os.listdir(sub_dir_full)
num_imgs = len(img_basenames)
if num_imgs <= num_wanted:
continue
aug_imgs = [img_basename for img_basename in img_basenames if img_basename.startswith('aug_')]
random.shuffle(aug_imgs, random.random)
num_del = num_imgs - num_wanted
aug_imgs_del = aug_imgs[-num_del:]
for img_basename in aug_imgs_del:
img_full = os.path.join(sub_dir_full, img_basename)
os.remove(img_full)
4、从文件中挑选出训练集和测试集,这里挑选了10%的图片作为测试集,程序见select_test.py
'''
select test set
2018-4-26
'''
import os
import shutil
path = 'F:\\工作\\比赛\\未来杯\\image_scene_training\\train_data\\basketball\\'
path_new = 'F:\\工作\\比赛\\未来杯\\image_scene_training\\test_data\\basketball_test\\'
filename = os.listdir(path)
for i in filename:
e = int(i[0:-6])
if e%10 == 0:
oldname = path + i
newname = path_new + i
# shutil.copyfile(oldname,newname)
# os.remove(oldname)
1、创建图片名列表清单
创建一个txt文件来存放所有图片信息,文本中每行存放一个样本,内容包括图片名和类别标签信息,中间用空格分开。可以用python或者sh文件实现。程序见filename_list.py
import os
def create_image_list(file_path,txtpath):
'''
:param file_path: path of image file
:param txtpath: be used to save all image names and labels
:return: none
'''
# remove the old list file
if os.path.isfile(txtpath):
os.remove(txtpath)
# get the name list of all images
image_name_list = os.listdir(file_path)
# save the names and labels of all images to the txt_file named 'txt_name'
with open(txtpath,'a') as f:
print('saving to'+ txtpath + '...')
for image_name in image_name_list:
image_label = image_name.split('_')[1].split('.')[0]
image_data = image_name + ' ' +image_label
f.write(image_data + '\n')
print('Done')
if __name__ == "__main__":
# file_path to test_path
filepath ='F:\\工作\\比赛\\未来杯\\image_scene_training\\test_data\\basketball_test'
txtpath = 'F:\\工作\\比赛\\未来杯\\train_label\\test.txt'
create_image_list(filepath,txtpath)
2、生成lmdb文件
参考:Caffe学习系列(11):图像数据转换成db(leveldb/lmdb)文件
首先,在examples下面创建一个myfile的文件夹,用来存放文件和脚本文件,然后创建一个脚本create_imagenet.sh,用来生成lmdb文件,程序见create_imagenet.sh
#!/usr/bin/env sh
# Create the imagenet lmdb inputs
# N.B. set the path to the imagenet train + val data dirs
set -e
EXAMPLE=data/my_data/testdataNew
DATA=data/my_data/testdataNew
TOOLS=build/tools
TRAIN_DATA_ROOT=/home/b101/caffe/data/project/train
VAL_DATA_ROOT= /home/b101/caffe/data/project/val
# Set RESIZE=true to resize the images to 256x256. Leave as false if images have
# already been resized using another tool.
RESIZE=true
if $RESIZE; then
RESIZE_HEIGHT=256
RESIZE_WIDTH=256
else
RESIZE_HEIGHT=0
RESIZE_WIDTH=0
fi
if [ ! -d "$TRAIN_DATA_ROOT" ]; then
echo "Error: TRAIN_DATA_ROOT is not a path to a directory: $TRAIN_DATA_ROOT"
echo "Set the TRAIN_DATA_ROOT variable in create_imagenet.sh to the path" \
"where the ImageNet training data is stored."
exit 1
fi
if [ ! -d "$VAL_DATA_ROOT" ]; then
echo "Error: VAL_DATA_ROOT is not a path to a directory: $VAL_DATA_ROOT"
echo "Set the VAL_DATA_ROOT variable in create_imagenet.sh to the path" \
"where the ImageNet validation data is stored."
exit 1
fi
echo "Creating train lmdb..."
GLOG_logtostderr=1 $TOOLS/convert_imageset \
--resize_height=$RESIZE_HEIGHT \
--resize_width=$RESIZE_WIDTH \
--shuffle \
$TRAIN_DATA_ROOT \
$DATA/train.txt \
$EXAMPLE/train_lmdb
echo "Creating val lmdb..."
GLOG_logtostderr=1 $TOOLS/convert_imageset \
--resize_height=$RESIZE_HEIGHT \
--resize_width=$RESIZE_WIDTH \
--shuffle \
$VAL_DATA_ROOT \
$DATA/test.txt \
$EXAMPLE/test_lmdb
echo "Done."
执行make_imagenet_mean.sh文件,得到均值文件mean.binaryproto
#!/usr/bin/env sh
# Compute the mean image from the imagenet training lmdb
# N.B. this is available in data/ilsvrc12
EXAMPLE=/home/b101/caffe/data/my_data/test_bad
DATA=/home/b101/caffe/data/my_data/test_bad
TOOLS=build/tools
$TOOLS/compute_image_mean $EXAMPLE/train_lmdb \
$DATA/mean.binaryproto
echo "Done."
到此,数据集整理完毕
1、调整网络层参数
1、修改train_val.prototxt的mean_file和source,根据配置修改batch_size
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
crop_size: 224
mean_file: "examples/myfile/VGG16_new/mean.binaryproto"
mirror: true
}
data_param {
source: "examples/myfile/VGG16_new/train_lmdb"
backend: LMDB
batch_size: 48
}
}
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
mirror: false
crop_size: 224
mean_file: "examples/myfile/VGG16_new/mean.binaryproto"
}
data_param {
source: "examples/myfile/VGG16_new/test_lmdb"
batch_size: 16
backend: LMDB
}
}
2、首先输出层fc8,修改名字,这样预训练模型赋值的时候就会因为名字不匹配而重新训练,从而达到我们的目的
3、调整学习率,因为最后一层是重新学习,因此需要有更快的学习率,因此将最后一层的weights和bias的学习率加快10倍
layer {
name: "fc8a_new"
type: "InnerProduct"
bottom: "fc7"
top: "fc8a_new"
param {
lr_mult: 10
decay_mult: 1.0
}
param {
lr_mult: 20
decay_mult: 0.0
}
inner_product_param {
num_output: 20
}
}
4、如果需要top3的准确率,在accuracy层添加accuracy_param
layer {
name: "accuracy"
type: "Accuracy"
bottom: "fc8a_new"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
accuracy_param {
top_k: 3
}
}
2、修改solver文件
test_iter: 100
test_interval: 100
test_initialization: false
base_lr: 0.001
display: 20
max_iter: 10000
lr_policy: "step"
gamma: 0.1
momentum: 0.9
weight_decay: 0.0005
stepsize: 3000
snapshot: 1000
snapshot_prefix: "models/myfile/VGG_Places365_new/caffe_vgg16_train"
solver_mode: GPU
net: "examples/myfile/VGG16_new/train_val_vgg16.prototxt"
solver_type: SGD
3、下载caffemodel和deploy.prototxt点击打开链接,我选择的是vgg16-palces365
4、开始训练,运行train_caffenet.sh
#!/usr/bin/env sh
set -e
./build/tools/caffe train \
--solver=data/my_data/test_bad/solver_vgg16.prototxt -weights data/my_data/test_bad/vgg16_places365.caffemodel -gpu 0
参考:点击打开链接
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#!/usr/bin/env python
import numpy as np
import sys,caffe
if len(sys.argv)!=3:
print "Usage: python convert_mean.py mean.binaryproto mean.npy"
sys.exit()
blob = caffe.proto.caffe_pb2.BlobProto()
bin_mean = open( sys.argv[1] , 'rb' ).read()
blob.ParseFromString(bin_mean)
arr = np.array( caffe.io.blobproto_to_array(blob) )
npy_mean = arr[0]
np.save( sys.argv[2] , npy_mean )
# -*- coding:utf-8 -*-
import caffe
import sys
import os
import numpy as np
#import cv2
def GetFileList(dir,filelist):
# for i in range(20):
# new_dir = dir + str(i) + '/'
new_dir = dir
filenames = os.listdir(new_dir)
for fn in filenames:
fullfilename = os.path.join(new_dir,fn)
filelist.append(fullfilename)
return filelist
###############################################################
caffe_root='/home/ouc/workspace-sjh/caffe/'
deploy=caffe_root+'examples/myfile/VGG16/deploy_vgg16_places365.prototxt'
caffe_model=caffe_root+'models/myfile/VGG_Places365_new/caffe_vgg16_train_iter_8000.caffemodel'
labels_name=caffe_root+'examples/myfile/VGG16_new/label.txt'
mean_file=caffe_root+'examples/myfile/VGG16_new/mymean.npy'
path='/home/ouc/workspace-sjh/caffe/data/my_data/test_B/data/'
################################################################
net=caffe.Net(deploy,caffe_model,caffe.TEST)
transformer=caffe.io.Transformer({'data':net.blobs['data'].data.shape})
transformer.set_transpose('data',(2,0,1))
transformer.set_mean('data',np.load(mean_file).mean(1).mean(1))
transformer.set_raw_scale('data',255)
transformer.set_channel_swap('data',(2,1,0))
#################################################################
labels=np.loadtxt(labels_name,str,delimiter='\t')
MyPicList=GetFileList(path,[])
f=open('/home/ouc/workspace-sjh/caffe/examples/myfile/VGG16_new/test_B_result/result.csv','w')
f.writelines('FILE_ID,CATEGORY_ID0,CATEGORY_ID1,CATEGORY_ID2'+'\n')
for imgPath in MyPicList:
img=caffe.io.load_image(imgPath)
net.blobs['data'].data[...]=transformer.preprocess('data',img)
out=net.forward()
prob=net.blobs['prob'].data[0].flatten()
top_k=net.blobs['prob'].data[0].flatten().argsort()[-1:-4:-1]
for i in np.arange(top_k.size):
print (top_k[i],labels[top_k[i]],prob[top_k[i]])
f.writelines(imgPath.split('/')[-1].split('.')[0]+','+labels[top_k[0]]+','+labels[top_k[1]]+','+labels[top_k[2]]+'\n' )
f.close()
最后生成的结果放在result.csv中
1、修改类名,我们当初把类的名字搞错了,就写了一个程序来修改result.csv的名字
比对关系间map_data.csv
err_id,original_id
3,0
1,1
6,2
4,3
0,4
9,5
19,6
17,7
16,8
12,9
15,10
14,11
5,12
11,13
8,14
10,15
18,16
7,17
13,18
2,19
变换程序test.py
# -*- coding: utf-8 -*-
"""
Created on 2018/5/13 10:43
@author: ring
Func:
"""
import pandas as pd
def change_id(original_csv, map_csv):
"""
转换csv结果文件
:param original_csv: 表示类标混乱的结果csv文件
:param map_csv: 表示错类标与正确类标的映射csv文件,形如:
err_id original_id
0 3
5 7
:return:
"""
map_dct = {}
map_datas = pd.read_csv(map_csv).values
for map_data in map_datas:
map_dct[int(map_data[0])] = int(map_data[1])
res_datas = pd.read_csv(original_csv).values
new_res_datas = [[res_data[0]]+[map_dct[int(val)] for val in res_data[1:]]for res_data in res_datas]
pd.DataFrame(new_res_datas).to_csv('./new.csv',index=False,header=['FILE_ID', 'CATEGORY_ID0',
'CATEGORY_ID1', 'CATEGORY_ID2'])
if __name__ == "__main__":
original_csv = 'result.csv'
map_csv = 'map_data.csv'
change_id(original_csv, map_csv)