【puthon】把大量csv文件写入h5文件制作数据集

背景

每一个样本的数据被写入了一个csv中,在制作数据集时,需要先将大量的csv文件写入到一个h5文件中

样例代码

import os
import sys
import h5py
import numpy as np

'''
arg[1]: bins file folder path
arg[2]: x of shape
arg[3]: y of shape
arg[4]: h5 file name 
'''

if len(sys.argv) != 5:
    exit(-1)

data_folder=sys.argv[1]
shape_x = int(sys.argv[2])
shape_y = int(sys.argv[3])

total_count = len([name for name in os.listdir(data_folder)]) #  if os.path.isfile(name)

h5f = h5py.File(sys.argv[4], 'w')
dset = h5f.create_dataset('spectrum_bins', (total_count,shape_x,shape_y))      # ,  chunks=(1, sys.argv[2],sys.argv[3])

n = 0
for dp, d, name in os.walk(data_folder):
    file_path = os.path.join(dp, ''.join(name))
    if os.path.isdir(file_path):
        continue
    print("open file ", file_path)
    csv = np.genfromtxt(file_path, delimiter=",", dtype='float32')
    dset[n,:,:] = csv
    #print(dset[n])
    n=n+1
h5f.close()

你可能感兴趣的:(深度学习)