CSV module in Python

https://docs.python.org/3/library/csv.html

这段代码批量给.csv文件加上headers(thanks Dalao for the help):但是readlines应该是有毛病的。

import os
dir_name = os.path.dirname(os.path.realpath(__file__))
print('dir_name:', dir_name)

columns = ["question, potentially_related_question, answer"]

for f_name in os.listdir(dir_name):
    try:
        if f_name.endswith('.csv'):
            with open(f_name) as f:
                data = f.readlines()
            data = columns + data
            with open(f_name, "w") as f:
                f.write("\n".join(data))
    except:
        print(f_name)

来自段老板的建议:
办法好像效率不太高啊…
你可以试试 先把原来的第一行存下来 然后直接用columns把第一行覆盖掉 再把原来的第一行append到文件尾。
不只是速度…原来的方法相当于重写了整个文件…?直接readlines也很吃内存的。

打开读一行关上
打开写一行关上
打开append一行
最简单的

但是注意算一下字符数差值。覆盖要看长度的,你是把原来的第一行覆盖掉

修改之后的代码,非原创,改自于StackOverflow回答

import os
import codecs

import csv

import sys
# import csv
maxInt = sys.maxsize
decrement = True

while decrement:
    # decrease the maxInt value by factor 10
    # as long as the OverflowError occurs.

    decrement = False
    try:
        csv.field_size_limit(maxInt)
    except OverflowError:
        maxInt = int(maxInt/10)
        decrement = True

def split(filehandler, delimiter=',', row_limit=1000,
          output_name_template='tasks_%s.csv', output_path='.', keep_headers=True):
    reader = csv.reader(filehandler, delimiter=delimiter)
    current_piece = 1
    current_out_path = os.path.join(
        output_path,
        output_name_template % current_piece
    )
    current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
    current_limit = row_limit
    if keep_headers:
        headers = next(reader)
        current_out_writer.writerow(headers)
    for i, row in enumerate(reader):
        if i + 1 > current_limit:
            current_piece += 1
            current_limit = row_limit * current_piece
            current_out_path = os.path.join(
                output_path,
                output_name_template % current_piece
            )
            current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
            if keep_headers:
                current_out_writer.writerow(headers)
        current_out_writer.writerow(row)

split(codecs.open('/your/path/here/ori_tasks.csv', 'rU'))

你可能感兴趣的:(CSV module in Python)