keras的数据读取并进行在线数据增强进行训练,此处使用pandas dataframe的形式~
迭代器生成数据
def make_image_gen(in_df, batch_size = BATCH_SIZE):
file = in_df['filepath'].values
label = in_df['label'].values
all_batches = np.stack((file, label),1)
out_rgb = []
out_y = []
while True:
np.random.shuffle(all_batches)
for data in all_batches:
file = data[0]
c_img = cv2.imread(file)
out_rgb += [c_img]
out_y.append([data[1]])
if len(out_rgb)>=batch_size:
yield np.stack(out_rgb, 0)/255.0, np.array(out_y)
out_rgb, out_y = [], []
数据增强使用ImageDataGenerator
from keras.preprocessing.image import ImageDataGenerator
dg_args = dict(featurewise_center = False,
samplewise_center = False,
rotation_range = 5,
width_shift_range = 0.01,
height_shift_range = 0.01,
shear_range = 0.01,
zoom_range = [0.9, 1.25],
horizontal_flip = True,
vertical_flip = True, ###
fill_mode = "constant",
cval = 0,
data_format = 'channels_last')
image_gen = ImageDataGenerator(**dg_args)
def create_aug_gen(in_gen):
for in_x, in_y in in_gen:
a = image_gen.flow(255*in_x,in_y,batch_size = in_x.shape[0])
x, y = next(a)
yield x/255.0, y
训练
aug_gen = create_aug_gen(make_image_gen(data_train))
model.fit_generator(aug_gen,
steps_per_epoch=1,
epochs=1)
迭代器生成数据
def make_image_gen(in_df, batch_size = BATCH_SIZE):
file = in_df['filepath'].values
mask = in_df['maskpath'].values
all_batches = np.stack((file,mask),1)
out_rgb = []
out_mask = []
while True:
np.random.shuffle(all_batches)
for data in all_batches:
file = data[0]
c_img = cv2.imread(file)
mask = data[1]
c_mask = cv2.imread(mask, 0)
c_mask = np.expand_dims(c_mask, axis=-1)
out_rgb += [c_img]
out_mask += [c_mask]
if len(out_rgb)>=batch_size:
yield np.stack(out_rgb, 0)/255.0, np.stack(out_mask, 0)
out_rgb, out_mask=[], []
图像和掩码进行相同的数据增强
from keras.preprocessing.image import ImageDataGenerator
dg_args = dict(featurewise_center = False,
samplewise_center = False,
rotation_range = 45,
data_format = 'channels_last')
image_gen = ImageDataGenerator(**dg_args)
label_gen = ImageDataGenerator(**dg_args)
def create_aug_gen(in_gen, seed = None):
np.random.seed(seed if seed is not None else np.random.choice(range(9999)))
for in_x, in_y in in_gen:
seed = np.random.choice(range(9999))
g_x = image_gen.flow(255*in_x,
batch_size = in_x.shape[0],
seed = seed,
shuffle=True)
g_y = label_gen.flow(in_y,
batch_size = in_x.shape[0],
seed = seed,
shuffle=True)
yield next(g_x)/255.0, next(g_y)
训练
valid_x, valid_y = next(make_image_gen(data_val_mb, 1))
aug_gen = create_aug_gen(make_image_gen(data_train_mb))
model.fit_generator(aug_gen,
steps_per_epoch=1,
epochs=NB_EPOCHS,
validation_data=(valid_x, valid_y))