源码分享,采用线程池,实现高性能跨平台C++多线程并行库,附测试!

由于实用需要,实现一个跨平台的多线程并行库,摆脱windows的ppl,并且兼顾效率和跨平台性,特点如下:

  • 采用C++11跨平台,调度性能和windows ppl库相近;
  • 使用了其他大神的 线程池代码,实现线程高效复用;
  • 支持STL容器、C数组、指针多种方式传递容器目标对象做并行;
  • 代码可自行完善功能;

先看测试结果:
源码分享,采用线程池,实现高性能跨平台C++多线程并行库,附测试!_第1张图片

  • 测试所用的执行函数体执行时间极端{ i += 1;},目的是,测试比较多线程的调度性能;
  • 由于数据量不大,主执行过程短,且windows平台VC编译器对串行有优化,所有串行的速度还是很快(不过没有可比性);
  • 对于连续空间排列容器(数组、vector)代码执行效率比windows PPL库略慢一些,不过调度效率尚可;
  • 对于非连续空间容器(本例采用unordered_map)代码执行效率比windows PPL库快一些;
  • 综合来说,若兼顾性能和考虑跨平台,代码效率还算是达标的。

下面是并发执行效果:(指定并发30个任务,实现了并行)
源码分享,采用线程池,实现高性能跨平台C++多线程并行库,附测试!_第2张图片
(指定cpu硬件核心线程数个并发任务,也实现了并行)源码分享,采用线程池,实现高性能跨平台C++多线程并行库,附测试!_第3张图片
并行过程cpu出现极高峰值段(并行实现)
源码分享,采用线程池,实现高性能跨平台C++多线程并行库,附测试!_第4张图片

测试代码:

int main(){
	std::mutex cs;
	const int sii = 50000;
	const int cycle = 5000;
	int con[sii] = { 2 };
	vector<int> con2(sii, 2);
	unordered_map<int, int> con3;
	map<int, int> con4;
	for (int i = 0; i < sii; ++i)con3[i] = 2, con4[i] = 2;
	auto incre = [&](int& i) {i += 1; };

	auto ttt0 = clock();
	for (size_t i = 0; i < cycle; i++)
		for (size_t j = 0; j < sii; j++)
			incre(con[j]);
	auto ttt1 = (clock() - ttt0) / double(CLOCKS_PER_SEC); ttt0 = clock();
	for (size_t i = 0; i < cycle; i++)
		parallel(&con[0],0, sii, incre); //传首指针
	auto ttt2 = (clock() - ttt0) / double(CLOCKS_PER_SEC);
	ttt0 = clock();
	for (size_t i = 0; i < cycle; i++)
		parallel_iter(con2.begin(), con2.end(), incre);
	auto ttt3 = (clock() - ttt0) / double(CLOCKS_PER_SEC); ttt0 = clock();
	for (size_t i = 0; i < cycle; i++)
		concurrency::parallel_for_each(con2.begin(), con2.end(), incre);
	auto ttt6 = (clock() - ttt0) / double(CLOCKS_PER_SEC); ttt0 = clock();
	for (size_t i = 0; i < cycle; i++)
		parallel_iter(con3.begin(), con3.end(), [](pair<const int, int>& i) {i.second += 1; });
	auto ttt4 = (clock() - ttt0) / double(CLOCKS_PER_SEC); ttt0 = clock();
	for (size_t i = 0; i < cycle; i++)
		concurrency::parallel_for_each(con3.begin(), con3.end(), [](pair<const int, int>& i) {i.second += 1; });
	auto ttt5 = (clock() - ttt0) / double(CLOCKS_PER_SEC); ttt0 = clock();
	for (size_t i = 0; i < cycle; i++)
		parallel_iter(con4.begin(), con4.end(), [](pair<const int, int>& i) {i.second += 1; });
	auto ttt7 = (clock() - ttt0) / double(CLOCKS_PER_SEC); ttt0 = clock();
	for (size_t i = 0; i < cycle; i++)
		concurrency::parallel_for_each(con4.begin(), con4.end(), [](pair<const int, int>& i) {i.second += 1; });
	auto ttt8 = (clock() - ttt0) / double(CLOCKS_PER_SEC); ttt0 = clock();
	cout << "\n[ 数据量 " << sii << " ][ 循环次数 " << cycle << " ] -> 执行时间统计 ---------------"
		<< endl << "串行                   : " << ttt1 << " 秒"
		<< endl << "数组                   : " << ttt2 << " 秒"
		<< endl << "vector                 : " << ttt3 << " 秒"
		<< endl << "vector       (Win PPL) : " << ttt6 << " 秒"
		<< endl << "unordered_map          : " << ttt4 << " 秒"
		<< endl << "unordered_map(Win PPL) : " << ttt5 << " 秒"
		<< endl << "map                    : " << ttt7 << " 秒"
		<< endl << "map          (Win PPL) : " << ttt8 << " 秒";

	size_t tCounts2 = 30;
	auto pfun2 = [&cs, tCounts2](int i, int ni) {
		std::this_thread::sleep_for(std::chrono::milliseconds(1));
		cs.lock();
		cout << endl << "Running thread " << i + 1 << " / " << tCounts2;
		cs.unlock();
	};
	auto pfun3 = [&cs](int i, int ni) {
		std::this_thread::sleep_for(std::chrono::milliseconds(1));
		cs.lock();
		cout << endl << "Running thread " << i + 1 << " / " << parallel_helper_hardware_concurrency();
		cs.unlock();
	};
	cout << "\n\n并行情况验证(指定并行数量情况)--------------";
	parallel_proc(tCounts2, pfun2);
	cout << "\n\n并行情况验证(按核心数确定并行数量情况)--------------";
	parallel_proc(pfun3);
	int ci = 0;
	auto ft = parallel_proc([&ci](int trdIndex, int trdIndex2)->std::string {
		std::this_thread::sleep_for(std::chrono::milliseconds(1));
		ci++;
		char buf[40];
		sprintf_s(buf, "\n%d 获得 ci 修改权 ci = %d", trdIndex, ci); 
		return buf; 
		});
	for (size_t i = 0; i < ft.size(); i++)
		cout << ft[i].get();
	return 1;
	}

头文件
CLParallel.h


#ifndef __CL_PARALLEL_H__
#define __CL_PARALLEL_H__

#include 
#include 
#include 
#include 
#include 
#include 

//分片函数,按标号拆分总份数,currentIndex是从0开始的
inline void parallel_helper_getThreadSection(size_t ssi, size_t sectionTotals, size_t currentIndex, size_t& iStart, size_t& iEnd) {
	auto n = ssi / sectionTotals;
	auto yu = ssi % sectionTotals;
	if (currentIndex < yu)
	{
		iStart = (currentIndex)*n + (currentIndex);
		iEnd = iStart + n + 1;
	}
	else {
		iStart = (currentIndex)*n + yu;
		iEnd = iStart + n;
	}
}

//取得硬件执行资源数
inline size_t parallel_helper_hardware_concurrency() {
	static auto _core = size_t(std::thread::hardware_concurrency());
	return _core;
}

#ifndef _Parallel_Helper_ThreadPool_
#define _Parallel_Helper_ThreadPool_
#include 
#include 
#include 
#include 
#include 
#include 
class ThreadPool {
public:
	//构造函数,把线程插入线程队列,插入时调用embrace_back(),用匿名函数lambda初始化Thread对象
	ThreadPool(size_t threads = parallel_helper_hardware_concurrency()) : stop(false) {
		for (size_t i = 0; i < threads; ++i)
			workers.emplace_back(
				[this]
				{
					for (;;)
					{
						// task是一个函数类型,从任务队列接收任务
						std::function<void()> task;
						{
							//给互斥量加锁,锁对象生命周期结束后自动解锁
							std::unique_lock<std::mutex> lock(this->queue_mutex);

							//(1)当匿名函数返回false时才阻塞线程,阻塞时自动释放锁。
							//(2)当匿名函数返回true且受到通知时解阻塞,然后加锁。
							this->condition.wait(lock, [this] { return this->stop || !this->tasks.empty(); });

							if (this->stop && this->tasks.empty())
								return;

							//从任务队列取出一个任务
							task = std::move(this->tasks.front());
							this->tasks.pop();
						}                            // 自动解锁
						task();                      // 执行这个任务
					}
				}
				);
	}
	//类模板,任务入队
	//添加新的任务到任务队列
	template<class F, class... Args>
	auto enqueue(F&& f, Args&&... args)->std::future<typename std::result_of<F(Args...)>::type>
	{
		// 获取函数返回值类型        
		using return_type = typename std::result_of<F(Args...)>::type;

		// 创建一个指向任务的智能指针
		auto task = std::make_shared< std::packaged_task<return_type()> >(
			std::bind(std::forward<F>(f), std::forward<Args>(args)...)
			);

		std::future<return_type> res = task->get_future();
		{
			std::unique_lock<std::mutex> lock(queue_mutex);  //加锁
			if (stop)
				throw std::runtime_error("enqueue on stopped ThreadPool");

			tasks.emplace([task]() { (*task)(); });         //把任务加入队列
		}                                                   //自动解锁
		condition.notify_one();                             //通知条件变量,唤醒一个线程
		return res;
	}
	// 析构函数,删除所有线程
	~ThreadPool()
	{
		{
			std::unique_lock<std::mutex> lock(queue_mutex);
			stop = true;
		}
		condition.notify_all();
		for (std::thread& worker : workers)
			worker.join();
	}                              //析构函数

private:
	std::vector< std::thread > workers;            //线程队列,每个元素为一个Thread对象
	std::queue< std::function<void()> > tasks;     //任务队列,每个元素为一个函数对象    

	std::mutex queue_mutex;                        //互斥量
	std::condition_variable condition;             //条件变量
	bool stop;                                     //停止
};

//获取全局并行线程池(该线程池初始化硬件可用线程数个线程待命)
inline ThreadPool& parallel_helper_taskPool_static() {
	static ThreadPool _globle_thread_pool(parallel_helper_hardware_concurrency());
	return _globle_thread_pool;
}

//获取任务完成对象(装载任务到全局线程池等待队列)
template<class F, class... Args>
auto parallel_helper_task(F&& f, Args&&... args)->std::future<typename std::result_of<F(Args...)>::type>
{
	return parallel_helper_taskPool_static().enqueue(std::forward<F>(f), std::forward<Args>(args)...);
}
//自定义局部线程池包装类(在栈上分配的线程池空间)
template<bool useStaticPool = false>struct _TaskPool {
	ThreadPool& pool;
	template<class F, class... Args>
	auto operator()(F&& f, Args&&... args)->std::future<typename std::result_of<F(Args...)>::type> {
		return pool.enqueue(std::forward<F>(f), std::forward<Args>(args)...);
	}
	_TaskPool(size_t trdCounts = parallel_helper_hardware_concurrency())
		:pool(*(new ThreadPool(trdCounts))) {}
	~_TaskPool() {
		delete& pool;
	}
};
//全局线程池特化包装类
template<>struct _TaskPool<true> {
	ThreadPool& pool = parallel_helper_taskPool_static();
	template<class F, class... Args>
	auto operator()(F&& f, Args&&... args)->std::future<typename std::result_of<F(Args...)>::type>
	{
		return pool.enqueue(std::forward<F>(f), std::forward<Args>(args)...);
	}
};

//全局共享内存池,通过operator()插入可带参数任务
struct TaskPoolStatic :_TaskPool<true> {};
//局部内存池,通过operator()插入可带参数任务方法
struct TaskPool :_TaskPool<> {
	//局部内存池,trdCounts表示创建的等待线程数
	TaskPool(size_t trdCounts = parallel_helper_hardware_concurrency())
		:_TaskPool<>(trdCounts) {}
};
#endif

//控制继续的异常控制类
class parallel_exception_continue :public std::runtime_error {
public:	parallel_exception_continue() :std::runtime_error("Parallel control exception: Continue next loop.") {};
};
//控制本线程退出的异常控制类
class parallel_exception_break :public std::runtime_error {
public:	parallel_exception_break() :std::runtime_error("Parallel control exception: Break this thread.") {};
};
//控制全部线程组退出的异常控制类
class parallel_exception_break_all :public std::runtime_error {
public:	parallel_exception_break_all() :std::runtime_error("Parallel control exception: Break all threads.") {};
};
//线程控制函数:继续下一循环(该函数需要在parallel线程体中执行)
inline void parallel_control_continue() {
	throw parallel_exception_continue();
};
#define parallel_continue (parallel_control_continue()) //线程控制:继续下一循环
//线程控制函数:本线程退出(该函数需要在parallel线程体中执行)
inline void parallel_control_break() {
	throw parallel_exception_break();
};
#define parallel_break (parallel_control_break()) //线程控制:本线程退出
//线程控制函数:本次启动的线程组退出(该函数需要在parallel线程体中执行)
inline void parallel_control_break_all() {
	throw parallel_exception_break_all();
};
#define parallel_break_all (parallel_control_break_all()) //线程控制:本次启动的并行线程组退出

//do not use it!!!
template<class _iter, class _function, class _task = TaskPoolStatic>
void _parallel_iter(size_t nThreadsCounts, _iter startedIndex, _iter endIndex, _function&& func, _task&& task
	, std::random_access_iterator_tag) {
	auto pfunc = [](bool* bKeep,size_t i, size_t nThreadsCounts, _iter _iis, _iter _iie, _function&& func) {
		size_t is, ie;
		parallel_helper_getThreadSection(_iie - _iis, nThreadsCounts, i, is, ie); //分片
		auto iis = _iis + is, iie = _iis + ie;
	ag:
		try {
			for (; iis != iie; ++iis) {
				if (*bKeep)func(*iis);
				else break;
			}
		}
		catch (const parallel_exception_continue & e) {
			++iis;
			if (!strstr(e.what(), "Parallel"))
				throw runtime_error(e.what());
			goto ag;
		}
		catch (const parallel_exception_break & e) {
			if (!strstr(e.what(), "Parallel"))
				throw runtime_error(e.what());
		}
		catch (const parallel_exception_break_all & e) {
			if (!strstr(e.what(), "Parallel"))
				throw runtime_error(e.what());
			*bKeep = false;
		}
	};
	bool bKeep = true;
	//组装完成对象序列
	std::vector<std::future<void>> works(nThreadsCounts);
	for (size_t i = 0; i < nThreadsCounts; i++)
		works[i] = task(pfunc,&bKeep, i, nThreadsCounts, startedIndex, endIndex, func);//run	
	//主线程同步
	for (auto& i : works)i.wait();
}
//do not use it!!!
template<class _iter, class _function, class _task = TaskPoolStatic>
void _parallel_iter(size_t nThreadsCounts, _iter startedIndex, _iter endIndex, _function&& func, _task&& task
	, std::bidirectional_iterator_tag) {
	auto pfunc = [](bool* bKeep,_iter iis, _iter iie, _function&& func) {
	ag:
		try {
			for (; iis != iie; ++iis) {
				if (*bKeep)func(*iis);
				else break;
			}
		}
		catch (const parallel_exception_continue & e) {
			++iis;
			if (!strstr(e.what(), "Parallel"))
				throw runtime_error(e.what());
			goto ag;
		}
		catch (const parallel_exception_break & e) {
			if (!strstr(e.what(), "Parallel"))
				throw runtime_error(e.what());
		}
		catch (const parallel_exception_break_all & e) {
			if (!strstr(e.what(), "Parallel"))
				throw runtime_error(e.what());
			*bKeep = false;
		}
	};
	auto pAdv = [](_iter& iter, size_t d) {	while (d--) ++iter; };
	auto pDiff = [](_iter startedIndex, _iter endIndex)-> size_t {size_t si = 0; for (; startedIndex != endIndex; ++startedIndex)++si; return si; };
	size_t is, ie;
	size_t si = pDiff(startedIndex, endIndex);
	endIndex = startedIndex;
	//组装完成对象序列
	std::vector<std::future<void>> works(nThreadsCounts);
	bool bKeep = true;
	for (size_t i = 0; i < nThreadsCounts; i++) {
		parallel_helper_getThreadSection(si, nThreadsCounts, i, is, ie);//分片
		pAdv(endIndex, ie - is);
		works[i] = task(pfunc,&bKeep, startedIndex, endIndex, func);//run
		startedIndex = endIndex;
	}
	//主线程同步
	for (auto& i : works)i.wait();
}

//执行与调度线程同步的nThreadsCounts个线程,并行STL容器,从容器的迭代器startedIndex至endIndex(不包含endIndex)个元素,
//回调函数传参为容器的元素类型的引用;
template<class _iter, class _function, class _task = TaskPoolStatic>
void parallel_iter(size_t nThreadsCounts, _iter&& startedIndex, _iter&& endIndex, _function&& func, _task&& task = _task()) {
	_parallel_iter(nThreadsCounts, startedIndex, endIndex, func, task
		, typename std::iterator_traits<typename std::decay<_iter>::type>::iterator_category()
		//, typename _get_fun_args_counts,std::param_type<_function>>::type()
	);
}
//执行与调度线程同步的nThreadsCounts个线程,并行STL容器,从容器的迭代器startedIndex至endIndex(不包含endIndex)个元素,
//回调函数传参为容器的元素类型的引用;
template<class _iter, class _function, class _task = TaskPoolStatic>
void parallel(size_t nThreadsCounts, _iter&& startedIndex, _iter&& endIndex, _function&& func, _task&& task = _task()) {
	_parallel_iter(nThreadsCounts, startedIndex, endIndex, func, task
		, typename std::iterator_traits<typename std::decay<_iter>::type>::iterator_category()
		//, typename _get_fun_args_counts>::type()
	);
}

//执行与调度线程同步的cpu核心数个线程,并行STL容器,从容器的迭代器startedIndex至endIndex(不包含endIndex)个元素,
//回调函数传参为容器的元素类型的引用;
template<class _iter, class _function, class _task = TaskPoolStatic>
void parallel_iter(_iter&& startedIndex, _iter&& endIndex, _function&& func, _task&& task = _task()) {
	parallel_iter(parallel_helper_hardware_concurrency(), startedIndex, endIndex, func, task);
}
//执行与调度线程同步的cpu核心数个线程,并行STL容器,从容器的迭代器startedIndex至endIndex(不包含endIndex)个元素,
//回调函数传参为容器的元素类型的引用;
template<class _iter, class _function, class _task = TaskPoolStatic>
void parallel(_iter&& startedIndex, _iter&& endIndex, _function&& func, _task&& task = _task()) {
	parallel_iter(parallel_helper_hardware_concurrency(), startedIndex, endIndex, func, task);
}

//执行与调度线程同步的nThreadsCounts个线程,并行容器con的从下标startedIndex至endIndex(不包含endIndex)个元素,
//回调函数传参为容器或数组的元素类型的引用;
template<class _contain, class _function, class _task = TaskPoolStatic>
void parallel(size_t nThreadsCounts, _contain& con, size_t startedIndex, size_t endIndex, _function&& func, _task&& task = _task()) {
	auto pfunc = [](bool* bKeep,size_t ni, size_t nThreadsCounts, _contain* con, size_t startedIndex, size_t endIndex, _function&& func) {
		size_t is, ie;
		parallel_helper_getThreadSection(endIndex - startedIndex, nThreadsCounts, ni, is, ie);
		is += startedIndex;
		ie += startedIndex;
		ag:
		try {
			for (; is < ie; ++is) {
				if (*bKeep)func((*con)[is]);
				else break;
			}
		}
		catch (const parallel_exception_continue & e) {
			++is;
			if (!strstr(e.what(), "Parallel"))
				throw runtime_error(e.what());
			goto ag;
		}
		catch (const parallel_exception_break & e) {
			if (!strstr(e.what(), "Parallel"))
				throw runtime_error(e.what());
		}
		catch (const parallel_exception_break_all & e) {
			if (!strstr(e.what(), "Parallel"))
				throw runtime_error(e.what());
			*bKeep = false;
		}
	};
	bool bKeep = true;
	//组装完成对象序列
	std::vector<std::future<void>> works(nThreadsCounts);
	for (size_t i = 0; i < nThreadsCounts; i++)
		works[i] = task(pfunc, &bKeep, i, nThreadsCounts, &con, startedIndex, endIndex, func);//run
	//主线程同步
	for (auto& i : works)i.wait();
}

//执行与调度线程同步的nThreadsCounts个线程,并行容器con的从下标startedIndex至endIndex(不包含endIndex)个元素,
//回调函数传参为容器或数组的元素类型的引用;
template<class _contain, class _function, class _task = TaskPoolStatic>
void parallel(size_t nThreadsCounts, _contain&& con, size_t startedIndex, size_t endIndex, _function&& func, _task&& task = _task()) {
	parallel(nThreadsCounts, con, startedIndex, endIndex, func, task);
}

//执行与调度线程同步的cpu核心数个线程,并行容器con的从下标startedIndex至endIndex(不包含endIndex)个元素,
//回调函数传参为容器或数组的元素类型的引用;
template<class _contain, class _function, class _task = TaskPoolStatic>
void parallel(_contain&& con, size_t startedIndex, size_t endIndex, _function&& func, _task&& task = _task()) {
	parallel(parallel_helper_hardware_concurrency(), con, startedIndex, endIndex, func, task);
}

//执行与调度线程同步的nThreadsCounts个线程,线程函数传参为当前线程下标i(一个0开始的索引)和总的任务线程数nThreadsCounts
//返回线程的future队列;
template<class _function,class _task = TaskPoolStatic>
auto parallel_proc(size_t nThreadsCounts, _function&& func, _task&& task = _task()) {
	typedef typename result_of<_function(size_t, size_t)>::type key_type;
	//组装完成对象序列
	std::vector<std::future<key_type>> works(nThreadsCounts);
	for (size_t i = 0; i < nThreadsCounts; i++)
		works[i] = task(func, i, nThreadsCounts);//run
	//主线程同步
	for (auto& i : works)i.wait();
	return works;
}

//执行与调度线程同步的cpu核心数个线程;
//线程函数体形式为:R(*)(T1 i,T2 nThreadsCounts);
//传参为:i为当前线程下标(一个0开始的索引)类型为T1,nThreadsCounts为总的任务线程数类型为T2,线程体返回值类型为R;
//函数返回线程的future队列;
template<class _function, class _task = TaskPoolStatic>
auto parallel_proc(_function&& func, _task&& task = _task()) {
	return parallel_proc(parallel_helper_hardware_concurrency(), func, task);
}

#endif

你可能感兴趣的:(C++,工具库,源码)