from multiprocessing import Pool, cpu_count
from tqdm import tqdm
import math
import datetime
prints = lambda x: print(f'{datetime.datetime.now()} {x}')
def cost_time(func):
def war(*args, **kwargs):
start_time = datetime.datetime.now()
res = func(*args, **kwargs)
stop = datetime.datetime.now() - start_time
prints(f'cost_time:{stop}')
return res
return war
def get_files():
with open('inputfile', 'r') as rr:
# lines = rr.read().splitlines()
lines = rr.readlines()
max_num = math.ceil(len(lines) / max_file)
files = [open(f'outputfile_{_}', 'w') for _ in range(max_num)]
return files
def func(data):
global files
count, line = data
ff = files[count]
ff.write(str(line))
def func2(count, line):
return [count, line]
@cost_time
def run():
num, count = [0] * 2
p = Pool(cpu_count()) # 开启进程
with open('inputfile', 'r') as lines:
for line in tqdm(lines):
num += 1
p.apply_async(func=func2, args=(count, line,), callback=func) # 调用进程
if num % max_file == 0:
count += 1
p.close()
p.join()
[file.close() for file in files]
if __name__ == '__main__':
max_file = 5000
files = get_files()
run()