python的多线程:
使用as_completed(),可以实现优先处理已经完成的任务,若使用pool.map(),则实现的是按请求顺序处理任务
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed, ProcessPoolExecutor
import math
class Service:
@staticmethod
def multi_classifier_predict(id_list, val):
print("begin!")
df_group = pd.DataFrame([])
id_list = list(set(id_list))
# 分组,一组有10个id
id_list_ = Service.split_id_block_size(id_list, block_size=10)
# 创建线程池,然后使用多线程
with ProcessPoolExecutor(max_workers=4) as pool:
tasks = [pool.submit(Service().check_map, id, val) for id in id_list_]
# 优先处理已完成任务
for future in as_completed(tasks):
feat_df, id = future.result()
print(f"id:{id}")
if feat_df.empty:
continue
df_group = df_group.append(feat_df)
ls1 = df_group['positionId'].values.tolist()
ls_origin = set(i for i in id_list)
print(f'无效id为{ls_origin - set(ls1)}')
data = df_group.to_dict(orient='records')
print("success!")
return {"data": data}
# 分组函数
@staticmethod
def split_id_block_size(pids, block_size):
res_pids = []
total_len = len(pids)
nums = math.ceil(total_len / block_size)
for i in range(nums):
begin = i * block_size
end = (i + 1) * block_size
res_pids.append(pids[begin:end])
return res_pids
# 需要多线程处理的函数
@staticmethod
def check_map(id, val):
df_res = pd.DataFrame([])
return df_res, id