1. Pandas + glob获取指定目录下的文件列表
import pandas as pd
import glob
data_dir = "/public/data/"
df_all = pd.concat([pd.read_csv(f, sep='\t') for f in glob.glob(data_dir + '*.txt')])
print(df_all)
2. 使用 enumerate 函数获取索引和值
letter = [chr(ord('A') + i) for i in range(0, 11)]
for idx, value in enumerate(letter):
print(f"{idx}\t{value}")
3. 使用 zip 函数同时遍历多个列表
number = [n for n in range(0, 11)]
letter = [chr(ord('A') + i) for i in range(0, 11)]
for number, letter in zip(letter, number):
print(f"{letter}: {number}")
4. 内置函数map + filter 过滤数据
number = [n for n in range(0, 11)]
squared_numbers = list(map(lambda x: x**2, number)
print(squared_numbers)
even_numbers = list(filter(lambda x: x % 2 == 0, number))
print(even_numbers)
5. 使用concurrent.futures模块实现循环的并发处理,提高计算效率
import concurrent.futures
def square(num):
return num ** 2
with concurrent.futures.ThreadPoolExecutor() as executor:
res = list(executor.map(square, number))
print(res)
6. 使用asyncio模块实现异步处理,提高并发性能
import asyncio
import math
async def sqrt(num):
return math.sqrt(num)
async def calculate():
run_tasks = [sqrt(num) for num in number]
results = await asyncio.gather(*run_tasks)
print(results)
asyncio.run(calculate())
7. 程序运行分析装饰器
import time
def analysis_time(func):
def warpper(*args, **kwargs):
start_time = time.time()
res = func(*args, *kwargs)
end_time = time.time()
print(f"{func.__name__} program run time: {end_time - start_time}s")
return res
return warpper
import concurrent.futures
def square(num):
return num ** 2
@analysis_time
def calulate(number):
with concurrent.futures.ThreadPoolExecutor() as executor:
res = list(executor.map(square, number))
return res
print(calulate(number))