场景分类总结-caffe版

     参加一个场景分类的比赛。数据集:http://ai.futurelab.tv/dataset/view。解压码:QBRE,数据集不可以用作商业用途。这是一个20类的场景分类,用的方法是https://github.com/CSAILVision/places365。利用这里面的方法进行caffe微调。

一、制作数据集

    参考:caffe学习系列:训练自己的图片集(超详细教程)点击打开链接

1.1原数据集分类

list.csv中存放图片场景标注信息,categories.csv中存放场景分类信息,总共是20类。

1、每一类500-1500张图片,首先根据csv文件,将20类图片分到每个文件夹下。数据放在caffe/data下,程序见category_classification.py。

2、将数据统一命名,程序见rename.py

# -*- coding:utf8 -*-
#!/usr/bin/python2.7
import os

class BatchRename():
    '''
    批量重命名文件夹中的图片文件

    '''
    def __init__(self):
        self.path = '/home/ouc/workspace-sjh/caffe/data/testdata/train/3'

    def rename(self):
        filelist = os.listdir(self.path)
        total_num = len(filelist)
        i = 1
	
        for item in filelist:
            if item.endswith('.jpg'):
                src = os.path.join(os.path.abspath(self.path), item)
		str1=str(i)
                dst = os.path.join(os.path.abspath(self.path), str1.zfill(6) + '.jpg')
                try:
                    os.rename(src, dst)
                    print 'converting %s to %s ...' % (src, dst)
                    i = i + 1
                except:
                    continue
        print 'total %d to rename & converted %d jpgs' % (total_num, i)

if __name__ == '__main__':
    demo = BatchRename()
    demo.rename()

3、为了提高分类效果,将数据扩增,使用keras,将每一类的数据扩增到2000,程序见imgAug.py    

#!/usr/bin/env python
#-*- coding: utf-8 -*-
#Author: yuanyong.name

import os
import random
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

num_wanted = 2000

target_path_dir = '/home/ouc/workspace-sjh/caffe/data/testdata/train/19'

datagen = ImageDataGenerator(
        rotation_range = 10,
        width_shift_range = 0.2,
        height_shift_range = 0.2,
        shear_range = 0.2,
        zoom_range = 0.2,
        horizontal_flip = True,
        fill_mode = 'nearest')

sub_dirs = os.walk(target_path_dir).next()
for sub_dir in enumerate(sub_dirs):
    # print 'to be processed: %s (%d/%d), contains images: %d' % (sub_dirs, k+1, len(sub_dirs), sub_dir, len(img_basenames))
    sub_dir_new = sub_dir[1]
    sub_dir_full = os.path.join(target_path_dir, sub_dir_new)
    img_basenames = os.listdir(sub_dir_full)
    num_imgs = len(img_basenames)
    num_perAug = int(float(num_wanted)/float(num_imgs)) - 1
    if num_imgs >= num_wanted:
        continue
    num_total = 0
    for i, img_basename in enumerate(img_basenames):
        num_total = num_imgs + i*num_perAug
        if num_total >= num_wanted:
            break
        img_path = os.path.join(sub_dir_full, img_basename)
        #print "Aug: %s" % img_path
        img = load_img(img_path) # this is a PIL image, please replace to your own file path
        if img == None:
            continue
        try:
            x = img_to_array(img) # this is a Numpy array with shape (3, 150, 150)
            x = x.reshape((1,) + x.shape) # this is a Numpy array with shape (1, 3, 150, 150)
            i = 0
            for batch in datagen.flow(x, batch_size = 1, save_to_dir = sub_dir_full, save_prefix = 'aug', save_format = 'jpg'):
                i += 1
                if i >= num_perAug:
                    break # otherwise the generator would loop indefinitely
        except:
            print "%s" % img_path

# delete extra aug images
for sub_dir in sub_dirs:
    sub_dir_full = os.path.join(target_path_dir, sub_dir)
    img_basenames = os.listdir(sub_dir_full)
    num_imgs = len(img_basenames)
    if num_imgs <= num_wanted:
        continue
    aug_imgs = [img_basename for img_basename in img_basenames if img_basename.startswith('aug_')]
    random.shuffle(aug_imgs, random.random)
    num_del = num_imgs - num_wanted
    aug_imgs_del = aug_imgs[-num_del:]

    for img_basename in aug_imgs_del:
        img_full = os.path.join(sub_dir_full, img_basename)
        os.remove(img_full)

4、从文件中挑选出训练集和测试集,这里挑选了10%的图片作为测试集,程序见select_test.py

'''
select test set
2018-4-26
'''

import os
import shutil
path = 'F:\\工作\\比赛\\未来杯\\image_scene_training\\train_data\\basketball\\'
path_new = 'F:\\工作\\比赛\\未来杯\\image_scene_training\\test_data\\basketball_test\\'
filename = os.listdir(path)


for i in filename:
    e = int(i[0:-6])
    if e%10 == 0:
        oldname = path + i
        newname = path_new + i
        # shutil.copyfile(oldname,newname)
        # os.remove(oldname)

1.2 创建自己的lmdb文件

1、创建图片名列表清单

     创建一个txt文件来存放所有图片信息,文本中每行存放一个样本,内容包括图片名和类别标签信息,中间用空格分开。可以用python或者sh文件实现。程序见filename_list.py

import os
def create_image_list(file_path,txtpath):
    '''
    :param file_path: path of image file
    :param txtpath:  be used to save all image names and labels
    :return: none
    '''
    # remove the old list file
    if os.path.isfile(txtpath):
        os.remove(txtpath)
    # get the name list of all images
    image_name_list = os.listdir(file_path)

    # save the names and labels of all images to the txt_file named 'txt_name'
    with open(txtpath,'a') as f:
        print('saving to'+ txtpath + '...')
        for image_name in image_name_list:
            image_label = image_name.split('_')[1].split('.')[0]
            image_data = image_name + ' ' +image_label
            f.write(image_data + '\n')
        print('Done')
if __name__ == "__main__":

    # file_path to test_path
    filepath ='F:\\工作\\比赛\\未来杯\\image_scene_training\\test_data\\basketball_test'
    txtpath = 'F:\\工作\\比赛\\未来杯\\train_label\\test.txt'
    create_image_list(filepath,txtpath)

2、生成lmdb文件

    参考:Caffe学习系列(11):图像数据转换成db(leveldb/lmdb)文件

    首先,在examples下面创建一个myfile的文件夹,用来存放文件和脚本文件,然后创建一个脚本create_imagenet.sh,用来生成lmdb文件,程序见create_imagenet.sh

    
#!/usr/bin/env sh
# Create the imagenet lmdb inputs
# N.B. set the path to the imagenet train + val data dirs
set -e

EXAMPLE=data/my_data/testdataNew
DATA=data/my_data/testdataNew
TOOLS=build/tools

TRAIN_DATA_ROOT=/home/b101/caffe/data/project/train
VAL_DATA_ROOT=  /home/b101/caffe/data/project/val
# Set RESIZE=true to resize the images to 256x256. Leave as false if images have
# already been resized using another tool.
RESIZE=true
if $RESIZE; then
  RESIZE_HEIGHT=256
  RESIZE_WIDTH=256
else
  RESIZE_HEIGHT=0
  RESIZE_WIDTH=0
fi

if [ ! -d "$TRAIN_DATA_ROOT" ]; then
  echo "Error: TRAIN_DATA_ROOT is not a path to a directory: $TRAIN_DATA_ROOT"
  echo "Set the TRAIN_DATA_ROOT variable in create_imagenet.sh to the path" \
       "where the ImageNet training data is stored."
  exit 1
fi

if [ ! -d "$VAL_DATA_ROOT" ]; then
  echo "Error: VAL_DATA_ROOT is not a path to a directory: $VAL_DATA_ROOT"
  echo "Set the VAL_DATA_ROOT variable in create_imagenet.sh to the path" \
       "where the ImageNet validation data is stored."
  exit 1
fi

echo "Creating train lmdb..."

GLOG_logtostderr=1 $TOOLS/convert_imageset \
    --resize_height=$RESIZE_HEIGHT \
    --resize_width=$RESIZE_WIDTH \
    --shuffle \
    $TRAIN_DATA_ROOT \
    $DATA/train.txt \
    $EXAMPLE/train_lmdb

echo "Creating val lmdb..."

GLOG_logtostderr=1 $TOOLS/convert_imageset \
    --resize_height=$RESIZE_HEIGHT \
    --resize_width=$RESIZE_WIDTH \
    --shuffle \
    $VAL_DATA_ROOT \
    $DATA/test.txt \
    $EXAMPLE/test_lmdb

echo "Done."

1.3 计算均值并保存

    执行make_imagenet_mean.sh文件,得到均值文件mean.binaryproto

#!/usr/bin/env sh
# Compute the mean image from the imagenet training lmdb
# N.B. this is available in data/ilsvrc12

EXAMPLE=/home/b101/caffe/data/my_data/test_bad
DATA=/home/b101/caffe/data/my_data/test_bad
TOOLS=build/tools

$TOOLS/compute_image_mean $EXAMPLE/train_lmdb \
  $DATA/mean.binaryproto

echo "Done."

    到此,数据集整理完毕

二、caffe微调

    参考:caffe训练和测试自己的数据集 点击打开链接;caffe初探4:对训练得到的模型进行测试 点击打开链接;caffe fine-tuning微调网络 点击打开链接
2.1 创建模型并编写配置文件

1、调整网络层参数

     1、修改train_val.prototxt的mean_file和source,根据配置修改batch_size    

layer {
  name: "data"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TRAIN
  }
  transform_param {
    crop_size: 224
    mean_file: "examples/myfile/VGG16_new/mean.binaryproto"
    mirror: true
  }
  data_param {
    source: "examples/myfile/VGG16_new/train_lmdb"
    backend: LMDB
    batch_size: 48
  }
}
layer {
  name: "data"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TEST
  }
  transform_param {
    mirror: false
    crop_size: 224
    mean_file: "examples/myfile/VGG16_new/mean.binaryproto"
  }
  data_param {
    source: "examples/myfile/VGG16_new/test_lmdb"
    batch_size: 16
    backend: LMDB
  }
}

     2、首先输出层fc8,修改名字,这样预训练模型赋值的时候就会因为名字不匹配而重新训练,从而达到我们的目的

     3、调整学习率,因为最后一层是重新学习,因此需要有更快的学习率,因此将最后一层的weights和bias的学习率加快10倍

layer {
  name: "fc8a_new"
  type: "InnerProduct"
  bottom: "fc7"
  top: "fc8a_new"
  param {
    lr_mult: 10
    decay_mult: 1.0
  }
  param {
    lr_mult: 20
    decay_mult: 0.0
  }
  inner_product_param {
    num_output: 20
  }
}

        4、如果需要top3的准确率,在accuracy层添加accuracy_param

layer {
  name: "accuracy"
  type: "Accuracy"
  bottom: "fc8a_new"
  bottom: "label"
  top: "accuracy"
  include {
    phase: TEST
  }
  accuracy_param {
    top_k: 3
  }
}

2、修改solver文件

test_iter: 100
test_interval: 100
test_initialization: false
base_lr: 0.001
display: 20
max_iter: 10000
lr_policy: "step"
gamma: 0.1
momentum: 0.9
weight_decay: 0.0005
stepsize: 3000
snapshot: 1000
snapshot_prefix: "models/myfile/VGG_Places365_new/caffe_vgg16_train"
solver_mode: GPU
net: "examples/myfile/VGG16_new/train_val_vgg16.prototxt"
solver_type: SGD

3、下载caffemodel和deploy.prototxt点击打开链接,我选择的是vgg16-palces365

4、开始训练,运行train_caffenet.sh

#!/usr/bin/env sh
set -e

./build/tools/caffe train \
    --solver=data/my_data/test_bad/solver_vgg16.prototxt -weights data/my_data/test_bad/vgg16_places365.caffemodel -gpu 0

三、测试

参考:点击打开链接

3.1 生成类名文件label.txt

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19

3.2生成均值文件mean.npy,程序见create_mean

#!/usr/bin/env python
import numpy as np
import sys,caffe

if len(sys.argv)!=3:
    print "Usage: python convert_mean.py mean.binaryproto mean.npy"
    sys.exit()

blob = caffe.proto.caffe_pb2.BlobProto()
bin_mean = open( sys.argv[1] , 'rb' ).read()
blob.ParseFromString(bin_mean)
arr = np.array( caffe.io.blobproto_to_array(blob) )
npy_mean = arr[0]
np.save( sys.argv[2] , npy_mean )

3.3 分类,程序见classification.py

# -*- coding:utf-8 -*-
import caffe
import sys
import os
import numpy as np
#import cv2

def GetFileList(dir,filelist):
    # for i in range(20):
    #     new_dir = dir + str(i) + '/'
    new_dir = dir
    filenames = os.listdir(new_dir)
    for fn in filenames:
	    fullfilename = os.path.join(new_dir,fn)
	    filelist.append(fullfilename)
    return filelist

###############################################################
caffe_root='/home/ouc/workspace-sjh/caffe/'
deploy=caffe_root+'examples/myfile/VGG16/deploy_vgg16_places365.prototxt'
caffe_model=caffe_root+'models/myfile/VGG_Places365_new/caffe_vgg16_train_iter_8000.caffemodel'
labels_name=caffe_root+'examples/myfile/VGG16_new/label.txt'
mean_file=caffe_root+'examples/myfile/VGG16_new/mymean.npy'
path='/home/ouc/workspace-sjh/caffe/data/my_data/test_B/data/'


################################################################
net=caffe.Net(deploy,caffe_model,caffe.TEST)
transformer=caffe.io.Transformer({'data':net.blobs['data'].data.shape})
transformer.set_transpose('data',(2,0,1))
transformer.set_mean('data',np.load(mean_file).mean(1).mean(1))
transformer.set_raw_scale('data',255)
transformer.set_channel_swap('data',(2,1,0))

#################################################################
labels=np.loadtxt(labels_name,str,delimiter='\t')
MyPicList=GetFileList(path,[])
f=open('/home/ouc/workspace-sjh/caffe/examples/myfile/VGG16_new/test_B_result/result.csv','w')
f.writelines('FILE_ID,CATEGORY_ID0,CATEGORY_ID1,CATEGORY_ID2'+'\n')
for imgPath in MyPicList:
    img=caffe.io.load_image(imgPath)

    net.blobs['data'].data[...]=transformer.preprocess('data',img)
    out=net.forward()
    prob=net.blobs['prob'].data[0].flatten()
    top_k=net.blobs['prob'].data[0].flatten().argsort()[-1:-4:-1]
    for i in np.arange(top_k.size):
	    print (top_k[i],labels[top_k[i]],prob[top_k[i]])
    f.writelines(imgPath.split('/')[-1].split('.')[0]+','+labels[top_k[0]]+','+labels[top_k[1]]+','+labels[top_k[2]]+'\n' )
f.close()

最后生成的结果放在result.csv中

附件:

1、修改类名,我们当初把类的名字搞错了,就写了一个程序来修改result.csv的名字

比对关系间map_data.csv

err_id,original_id
3,0
1,1
6,2
4,3
0,4
9,5
19,6
17,7
16,8
12,9
15,10
14,11
5,12
11,13
8,14
10,15
18,16
7,17
13,18
2,19

变换程序test.py

# -*- coding: utf-8 -*-
"""
Created on 2018/5/13 10:43
@author: ring
Func: 
"""
import pandas as pd
def change_id(original_csv, map_csv):
    """
    转换csv结果文件
    :param original_csv: 表示类标混乱的结果csv文件
    :param map_csv: 表示错类标与正确类标的映射csv文件,形如:
    err_id original_id
    0      3
    5      7
    :return:
    """

    map_dct = {}
    map_datas = pd.read_csv(map_csv).values
    for map_data in map_datas:
        map_dct[int(map_data[0])] = int(map_data[1])
    res_datas = pd.read_csv(original_csv).values
    new_res_datas = [[res_data[0]]+[map_dct[int(val)] for val in res_data[1:]]for res_data in res_datas]
    pd.DataFrame(new_res_datas).to_csv('./new.csv',index=False,header=['FILE_ID', 'CATEGORY_ID0',
                                                                       'CATEGORY_ID1', 'CATEGORY_ID2'])
if __name__ == "__main__":
    original_csv = 'result.csv'
    map_csv = 'map_data.csv'
    change_id(original_csv, map_csv)

      

你可能感兴趣的:(场景分类总结-caffe版)