h5disp(filename);
写入:
function [curr_dat_sz, curr_lab_sz] = store2hdf5(filename, data, labels, create, startloc, chunksz)
%filename:地址名
% *data* is W*H*C*N matrix of images should be normalized (e.g. to lie between 0 and 1) beforehand
% *label* is D*N matrix of labels (D labels per sample)
% *create* [0/1] specifies whether to create file newly or to append to previously created file, useful to store information in batches when a dataset is too big to be held in memory (default: 1)是否新创建文件,还是将信息存在已有文件的后缀
% *startloc* (point at which to start writing data). By default, 开始写入数据的位置;
% if create=1 (create mode), startloc.data=[1 1 1 1], and startloc.lab=[1 1];
% if create=0 (append mode), startloc.data=[1 1 1 K+1], and startloc.lab = [1 K+1]; where K is the current number of samples stored in the HDF
% chunksz (used only in create mode), specifies number of samples to be stored per chunk (see HDF5 documentation on chunking) for creating HDF5 files with unbounded maximum size - TLDR; higher chunk sizes allow faster read-write operations 文件中每个chunk的大小
例:
%生成训练数据;
clear;close all;
%% settings
folder = 'Train';
savepath = 'train.h5';
size_input = 33;%输入图像矩阵
size_label = 21;%数据类矩阵
scale = 3;
stride = 14;%间隔
%% initialization
data = zeros(size_input, size_input, 1, 1);
label = zeros(size_label, size_label, 1, 1);
padding = abs(size_input - size_label)/2;
count = 0;
%% generate data获得训练图像数据,输入图像为33*33*1*21884,label为21*21*1*21884,并将训练图像的顺序打乱;
filepaths = dir(fullfile(folder,'*.bmp'));%列出该文件夹下所有.bmp格式的文件;
for i = 1 : length(filepaths)
image = imread(fullfile(folder,filepaths(i).name));%读入第i个图片;
image = rgb2ycbcr(image);
image = im2double(image(:, :, 1));%获得图像的y通道;
im_label = modcrop(image, scale);%保证图像被scale整除;
[hei,wid] = size(im_label);
im_input = imresize(imresize(im_label,1/scale,'bicubic'),[hei,wid],'bicubic');%对图像用'bicubic'先下采样再上采样;
%提取数据;
for x = 1 : stride : hei-size_input+1
for y = 1 :stride : wid-size_input+1
subim_input = im_input(x : x+size_input-1, y : y+size_input-1);%子图像尺寸33*33;
subim_label = im_label(x+padding : x+padding+size_label-1, y+padding : y+padding+size_label-1);%子图像类别尺寸21*21;
%subim_input和subim_label的中心一致;
count=count+1;
data(:, :, 1, count) = subim_input;
label(:, :, 1, count) = subim_label;
end
end
end
order = randperm(count);%randperm:返回一个包含0到count之间随机值的向量;
data = data(:, :, 1, order);
label = label(:, :, 1, order);
%% writing to HDF5从训练图像中挑选
chunksz = 128;
created_flag = false;
totalct = 0;
for batchno = 1:floor(count/chunksz)
last_read=(batchno-1)*chunksz;
batchdata = data(:,:,1,last_read+1:last_read+chunksz); %33*33*1*128
batchlabs = label(:,:,1,last_read+1:last_read+chunksz);%21*21*1*128
startloc = struct('dat',[1,1,1,totalct+1], 'lab', [1,1,1,totalct+1]);
curr_dat_sz = store2hdf5(savepath, batchdata, batchlabs, ~created_flag, startloc, chunksz); %将数据保存为hdf5的格式;
created_flag = true;
totalct = curr_dat_sz(end);
end
h5disp(savepath);%