将txt转为hdf5文件

# -*- coding: utf-8 -*-
import h5py
import os
from os.path import join, getsize
import numpy as np

'''
for folder_path in folder_paths:
    os.makedirs('E:\datadisk_hdf5' + folder_path[12:])
'''

def GetDirSize(directory):
    size = 0
    for root, dirs, files in os.walk(directory):
        size += sum([getsize(join(root, name)) for name in files])
    return size


def GetHdf5(import_folder, export_folder):
    import_folder_paths = [import_folder + '\\' + x for x in os.listdir(import_folder)]
    export_folder_paths=[export_folder+'\\'+x for x in os.listdir(import_folder)]
    for i in export_folder_paths:#创建文件夹
        if not os.path.exists(i):
            os.makedirs(i)
    for folder_path in import_folder_paths:
        print(folder_path)
        file_path = [join(root, name) for root, dirs, files in os.walk(folder_path) for name in dirs if len(name) != 0]
        print(file_path)
        for x in file_path:
            hdf5_file_name = export_folder + x[19:] + '.hdf5'
            if os.path.exists(hdf5_file_name):
                continue
            else:
                f = h5py.File(hdf5_file_name, 'w')
            print(hdf5_file_name)
            for root, dirs, files in os.walk(x):
                file_name_path = [join(root, name) for name in files]
                file_name = [name[:-4] for name in files]
                file_size_dict = dict(zip(file_name, map(lambda x: os.path.getsize(x), file_name_path)))
                sorted_key_list = sorted(file_size_dict.items(), key=lambda x: x[1], reverse=True)
                need_in_mongo = list(filter(lambda x: x[1] > 100, sorted_key_list))
                #f = h5py.File(hdf5_file_name, 'w')
                for i, j in need_in_mongo:
                    file = root + '\\' + i + '.txt'
                    fdata = open(file)
                    data_wait = fdata.read()
                    f.create_dataset(name=i, shape=(1, 1), data=np.string_(data_wait), compression='gzip')
                f.close()


def ReadHdf5(read_file, item):
    fr = h5py.File(read_file, 'r')
    out = fr[item]
    return out.value[0][0].decode()


if __name__ == '__main__':
    import_folder = 'D:\dataBYcpp\SH-2-0'
    export_folder = 'D:\datadisk2_hdf5\second'
    GetHdf5(import_folder, export_folder)
    #out = ReadHdf5('D:\datadisk2_hdf5\kline\\20180327.hdf5', '000001SZ_20180327_kline')
    #print(out)

注释:
 ##是将一个目录下的所有txt文件存为一个hdf5文件,每个txt文件名作为一个key,SH-2-0目录下还有两级目录,例如是将SH-2-0\kline\20180327存为 SH-2-0\kline\20180327.hdf5,25-30行的代码需要根据你们自己的需要修改成对应的文件目录
##ReadHdf5函数是读取hdf5文件,获取某个key对应的内容

你可能感兴趣的:(hdf5)