Python读取tsv文件数据

#!/usr/bin/env python3
# -*- coding: utf-8 -*-


"""
create_author : 蛙鳜鸡鹳狸猿
create_time   : 2019-03-19
program       : *_* .tsv file handler *_*
"""

import codecs


class TSV(object):
    """
    .tsv file's handler.
    """

    def __init__(self, file):
        """
        TSV init.
        :param file: .tsv file to handle.
        """
        self.file = file

    def __repr__(self):
        return "File {file} under handling......".format(file=self.file)

    def tsv(self):
        """
        .tsv file's column definition and data check.
        :return: List.
            lines data from [file] row by row in dict format.
        """
        with codecs.open(self.file, 'r', "utf-8") as f:
            line = f.readline()
            data = []
            head = []
            while line:
                if line.isspace():
                    line = f.readline()
                    continue
                elif not line.isspace():
                    # to be compatible between OS
                    head = line.rstrip("\r\n").split('\t')
                    line = f.readline()
                    break
            while line:
                if line.isspace():
                    line = f.readline()
                    continue
                elif not line.isspace():
                    body = line.rstrip("\r\n").split('\t')
                    rows = zip(head, body)
                    tsv_dic = {}
                    for (head_sub, body_sub) in list(rows):
                        tsv_dic[head_sub] = body_sub
                    data.append(tsv_dic)
                    line = f.readline()
            return data


if __name__ == "__main__":
    with codecs.open("tsv", 'w', "utf-8") as f:
        rows = """


        Id\tContent
        1\tContent1
        2\tContent2
        3\tContent3
        4\tContent4


        1024\tContent1024

        """
        f.writelines(rows.replace(' ', ''))
    TSV_Tester = TSV(file="tsv")
    print(TSV_Tester.tsv())

 

你可能感兴趣的:(#,Python,Python,tsv文件,\t,Tab,读取)