caffe在构造TrainNet时,首先要做的便是data和label的输入。数据层在Caffe的Data Layer中位于最底层,数据可以在高效的数据库中读取,也可以直接在内存中读取,或者从硬盘中直接读取
1. 数据层 LevedB 和 LMDB
层类型为data,主要分为LevedB 和 LMDB
其在*.prototxt文件中如下所示:
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform param{ #这个参数主要负责输入数据的预处理
scale = 0.00390625 #如减均值,尺度变换,随机裁剪或者镜像
}
data_param{
source: "example/mnist/mnist_train.lmdb"
}
batch_size:64
backend: LMDB
}
这里给出一种生成这种pycaffe的生成代码:
n.data, n.label = L.Data(name = 'MNIST', source = "example/mnist/mnist_train.lmdb", batch_size = 64, include = {'phase':0}, backend = P.Data.LMDB, ntop = 2, transform_param = dict(scale = 0.00390625))
2. 内存数据
layer {
name: "memory_data"
type: "MemoryData"
top: "data"
top: "label"
memory_data_param{
batch_size:2
height: 100
width: 100
channels: 1
}
transform param{
scale: 0.0078125
mean_file: "mean.proto"
mirror: false
}
}
3. HDF5数据
layer{
name: "data"
type: "HDF5Data"
top: "data"
top: "label"
hdf5_data_param{
source: "example/mnist/mnist_train.lmdb"
batch_size: 10
}
}
pycaffe接口:
n.data, n.label = L.HDF5Data(name = 'data', source = "example/mnist/mnist_train.lmdb", batch_size = 10, ntop = 2)
4. 图像数据Images
layer {
name: "data"
type: "ImageData"
top: "data"
top: "label"
image_data_param{
source: "example/mnist/mnist_train.lmdb"
batch_size:50
new_height: 256
new_width: 256
}
transform param{
mirror: false
crop_size: 227
mean_file : "mean.proto"
}
}
事实caffe可以自己来定义数据输入层,比如FCN代码的Python Layer形式,通过继承data layer的一些函数:
我直接把FCN中的PythonLayer放上来,这里仅仅作为事例:
import caffe
import numpy as np
from PIL import Image
import random
class VOCSegDataLayer(caffe.Layer):
"""
Load (input image, label image) pairs from PASCAL VOC
one-at-a-time while reshaping the net to preserve dimensions.
Use this to feed data to a fully convolutional network.
"""
def setup(self, bottom, top):
"""
Setup data layer according to parameters:
- voc_dir: path to PASCAL VOC year dir
- split: train / val / test
- mean: tuple of mean values to subtract
- randomize: load in random order (default: True)
- seed: seed for randomization (default: None / current time)
for PASCAL VOC semantic segmentation.
example
params = dict(voc_dir="/path/to/PASCAL/VOC2011",
mean=(104.00698793, 116.66876762, 122.67891434),
split="val")
"""
# config
params = eval(self.param_str)
self.voc_dir = params['voc_dir']
self.split = params['split']
self.mean = np.array(params['mean'])
self.random = params.get('randomize', True)
self.seed = params.get('seed', None)
# two tops: data and label
if len(top) != 2:
raise Exception("Need to define two tops: data and label.")
# data layers have no bottoms
if len(bottom) != 0:
raise Exception("Do not define a bottom.")
# load indices for images and labels
split_f = '{}/ImageSets/Segmentation/{}.txt'.format(self.voc_dir,
self.split)
self.indices = open(split_f, 'r').read().splitlines()
self.idx = 0
# make eval deterministic
if 'train' not in self.split:
self.random = False
# randomization: seed and pick
if self.random:
random.seed(self.seed)
self.idx = random.randint(0, len(self.indices)-1)
def reshape(self, bottom, top):
# load image + label image pair
self.data = self.load_image(self.indices[self.idx])
self.label = self.load_label(self.indices[self.idx])
# reshape tops to fit (leading 1 is for batch dimension)
top[0].reshape(1, *self.data.shape)
top[1].reshape(1, *self.label.shape)
def forward(self, bottom, top):
# assign output
top[0].data[...] = self.data
top[1].data[...] = self.label
# pick next input
if self.random:
self.idx = random.randint(0, len(self.indices)-1)
else:
self.idx += 1
if self.idx == len(self.indices):
self.idx = 0
def backward(self, top, propagate_down, bottom):
pass
def load_image(self, idx):
"""
Load input image and preprocess for Caffe:
- cast to float
- switch channels RGB -> BGR
- subtract mean
- transpose to channel x height x width order
"""
im = Image.open('{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx))
in_ = np.array(im, dtype=np.float32)
in_ = in_[:,:,::-1]
in_ -= self.mean
in_ = in_.transpose((2,0,1))
return in_
def load_label(self, idx):
"""
Load label image as 1 x height x width integer array of label indices.
The leading singleton dimension is required by the loss.
"""
im = Image.open('{}/SegmentationClass/{}.png'.format(self.voc_dir, idx))
label = np.array(im, dtype=np.uint8)
label = label[np.newaxis, ...]
return label
class SBDDSegDataLayer(caffe.Layer):
"""
Load (input image, label image) pairs from the SBDD extended labeling
of PASCAL VOC for semantic segmentation
one-at-a-time while reshaping the net to preserve dimensions.
Use this to feed data to a fully convolutional network.
"""
def setup(self, bottom, top):
"""
Setup data layer according to parameters:
- sbdd_dir: path to SBDD `dataset` dir
- split: train / seg11valid
- mean: tuple of mean values to subtract
- randomize: load in random order (default: True)
- seed: seed for randomization (default: None / current time)
for SBDD semantic segmentation.
N.B.segv11alid is the set of segval11 that does not intersect with SBDD.
Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.
example
params = dict(sbdd_dir="/path/to/SBDD/dataset",
mean=(104.00698793, 116.66876762, 122.67891434),
split="valid")
"""
# config
params = eval(self.param_str)
self.sbdd_dir = params['sbdd_dir']
self.split = params['split']
self.mean = np.array(params['mean'])
self.random = params.get('randomize', True)
self.seed = params.get('seed', None)
# two tops: data and label
if len(top) != 2:
raise Exception("Need to define two tops: data and label.")
# data layers have no bottoms
if len(bottom) != 0:
raise Exception("Do not define a bottom.")
# load indices for images and labels
split_f = '{}/{}.txt'.format(self.sbdd_dir,
self.split)
self.indices = open(split_f, 'r').read().splitlines()
self.idx = 0
# make eval deterministic
if 'train' not in self.split:
self.random = False
# randomization: seed and pick
if self.random:
random.seed(self.seed)
self.idx = random.randint(0, len(self.indices)-1)
def reshape(self, bottom, top):
# load image + label image pair
self.data = self.load_image(self.indices[self.idx])
self.label = self.load_label(self.indices[self.idx])
# reshape tops to fit (leading 1 is for batch dimension)
top[0].reshape(1, *self.data.shape)
top[1].reshape(1, *self.label.shape)
def forward(self, bottom, top):
# assign output
top[0].data[...] = self.data
top[1].data[...] = self.label
# pick next input
if self.random:
self.idx = random.randint(0, len(self.indices)-1)
else:
self.idx += 1
if self.idx == len(self.indices):
self.idx = 0
def backward(self, top, propagate_down, bottom):
pass
def load_image(self, idx):
"""
Load input image and preprocess for Caffe:
- cast to float
- switch channels RGB -> BGR
- subtract mean
- transpose to channel x height x width order
"""
im = Image.open('{}/img/{}.jpg'.format(self.sbdd_dir, idx))
in_ = np.array(im, dtype=np.float32)
in_ = in_[:,:,::-1]
in_ -= self.mean
in_ = in_.transpose((2,0,1))
return in_
def load_label(self, idx):
"""
Load label image as 1 x height x width integer array of label indices.
The leading singleton dimension is required by the loss.
"""
import scipy.io
mat = scipy.io.loadmat('{}/cls/{}.mat'.format(self.sbdd_dir, idx))
label = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8)
label = label[np.newaxis, ...]
return labe
在Pycaffe的Net生成中:
n.data, n.label = L.Python(module='voc_layers', layer=pylayer,
ntop=2, param_str=str(pydata_params))
Python Layer还是比较好用的,可以随心所欲的定义自己的输入。还是要向大牛前辈学习一个