互斥 临界区 自旋锁 排队自旋锁 MCSLOCK 性能简单测试

VS2013


#include 
#include 
#include 
using namespace std;
DWORD dwTest;
const DWORD dwMax = 20000000;

//#define MY_CRITICAL
//#define MY_MUTEX
#define MY_SPINLOCK


#ifdef MY_CRITICAL
    CRITICAL_SECTION g_cs;
#elif defined MY_MUTEX
    HANDLE  g_mutex;
#else 
    SHORT   g_count;
#endif

int Init()
{
#ifdef MY_CRITICAL
	InitializeCriticalSection(&g_cs);
#elif defined MY_MUTEX
	g_mutex = CreateMutex(NULL,FALSE,NULL);
	if (g_mutex)
	{
		if (ERROR_ALREADY_EXISTS == GetLastError())
		{
			cout << "only one instance can run!" << endl;
			return 0;
		}
	}
	else{cout <<"mutex is null."<= dwMax)
		{
			UnLock();
			return 1;
		}
		UnLock();
	}

	return 0;
}

int main()
{
	if(!Init())
		return 0;
	dwTest = 0;
#define num 2

	HANDLE hThread[num]; int i = 0;
	DWORD begin = GetTickCount();
	hThread[i++] = (HANDLE)_beginthreadex(NULL, 0, ThreadProc, NULL, 0, NULL);
	hThread[i++] = (HANDLE)_beginthreadex(NULL, 0, ThreadProc, NULL, 0, NULL);
	WaitForMultipleObjects(num, hThread,0,INFINITE);
	DWORD end = GetTickCount();

	cout << "花费的时间为: "<



根据我的测试结果得出简单结论(不一定很准确):

耗时    互斥 > 临界 > 自旋锁

自旋锁采用Sleep(0)时

CPU  自旋锁  > 临界 > 互斥

采用Sleep(1) 自旋锁较低,后面考虑其它方法优化自旋锁,使之空转和Sleep浪费的时间充分利用起来!!!



优化后代码如下:

#include 
#include 
#include 
using namespace std;
DWORD dwTest;
const DWORD dwMax = 200000000;

#define MY_CRITICAL
//#define MY_MUTEX
//#define MY_SPINLOCK


#ifdef MY_CRITICAL
CRITICAL_SECTION g_cs;
#elif defined MY_MUTEX
HANDLE  g_mutex;
#else 
DWORD   g_count;
#endif

int Init()
{
#ifdef MY_CRITICAL
	InitializeCriticalSection(&g_cs);
#elif defined MY_MUTEX
	g_mutex = CreateMutex(NULL, FALSE, NULL);
	if (g_mutex)
	{
		if (ERROR_ALREADY_EXISTS == GetLastError())
		{
			cout << "only one instance can run!" << endl;
			return 0;
		}
	}
	else{ cout << "mutex is null." << endl; return 0; }
#else 
	g_count = 0;
#endif
	return 1;
}

#ifdef MY_CRITICAL
#elif defined MY_MUTEX
#else 
void spin_lock()
{
	// 1 优点:比2指令更少
	//while (InterlockedExchange(&g_count, 1) == 1)
	//	Sleep(1);

	// 2 优点:资源被锁时,占用总线比1机会更少
	//while (InterlockedCompareExchange(&g_count, 1, 0) == 1)
	//	Sleep(1);

	// 3 结合1和2的优点(测试发现并不比1、2快。。)
	DWORD loop_count, yield_cnt, spin_count, pause_cnt;
	DWORD hold_cnt = 2;
	if (InterlockedExchange(&g_count, 1U) == 1U)
	{
		loop_count = 0;
		spin_count = 1;
		do {
			if (loop_count < hold_cnt) {
				for (pause_cnt = spin_count; pause_cnt > 0; --pause_cnt) {
					_mm_pause();        // 这是为支持超线程的 CPU 准备的切换提示
				}
				spin_count *= hold_cnt;
			}
			else {
				yield_cnt = loop_count - 1;
				if ((yield_cnt & 63) == 63) {
					Sleep(1);          // 真正的休眠, 转入内核态
				}
				else if ((yield_cnt & 3) == 3) {
					Sleep(0);          // 切换到优先级跟自己一样或更高的线程, 可以换到别的CPU核心上
				}
				else {
					if (!SwitchToThread()) {    // 让步给该线程所在的CPU核心上的别的线程,
						// 不能切换到别的CPU核心上等待的线程
						Sleep(0);      // 如果同核心上没有可切换的线程,
						// 则切到别的核心试试(只能切优先级跟自己相同或更好的)
					}
				}
			}
			loop_count++;
		} while (InterlockedCompareExchange(&g_count, 1U, 0U) == 1U);
	}
}
void spin_unlock()
{
	InterlockedExchange(&g_count, 0);
	//InterlockedCompareExchange(&g_count, 0, 1);
}
#endif
void Lock()
{
#ifdef MY_CRITICAL
	EnterCriticalSection(&g_cs);
#elif defined MY_MUTEX
	WaitForSingleObject(g_mutex, INFINITE);
#else 
	spin_lock();
#endif
}

void UnLock()
{
#ifdef MY_CRITICAL
	LeaveCriticalSection(&g_cs);
#elif defined MY_MUTEX
	ReleaseMutex(g_mutex);
#else 
	spin_unlock();
#endif
}

UINT WINAPI ThreadProc(void *)
{
	while (true)
	{
		Lock();
		dwTest++;
		if ((dwTest < 1000000 && dwTest % 10000 == 0) || (dwTest < 100000000 && dwTest % 1000000 == 0) || (dwTest % 10000000 == 0))
			cout << dwTest << endl;
		if (dwTest >= dwMax)
		{
			UnLock();
			return 1;
		}
		UnLock();
	}

	return 0;
}

int main()
{
	if (!Init())
		return 0;
	dwTest = 0;
#define num 4

	HANDLE hThread[num]; int i = 0;
	DWORD begin = GetTickCount();
	hThread[i++] = (HANDLE)_beginthreadex(NULL, 0, ThreadProc, NULL, 0, NULL);
	//SetThreadAffinityMask(hThread[i-1],i);
	hThread[i++] = (HANDLE)_beginthreadex(NULL, 0, ThreadProc, NULL, 0, NULL);
	//SetThreadAffinityMask(hThread[i - 1], i);
	hThread[i++] = (HANDLE)_beginthreadex(NULL, 0, ThreadProc, NULL, 0, NULL);
	//SetThreadAffinityMask(hThread[i - 1], i);
	hThread[i++] = (HANDLE)_beginthreadex(NULL, 0, ThreadProc, NULL, 0, NULL);
	//SetThreadAffinityMask(hThread[i - 1], i);
	WaitForMultipleObjects(num, hThread, 0, INFINITE);
	DWORD end = GetTickCount();

	cout << "花费的时间为: " << end - begin << " 毫秒" << endl;
	system("pause");
	return 0;
}


 
  

以上是不公平锁,即锁A锁住资源,此时B等待,然后C等待,A释放资源后,B、C都有可能获得锁资源,而不是B获得,下面介绍一种公平锁MCSLOCK:

运行后发现效率很低。当然还是比互斥锁好些

#include 
#include 
#include 
using namespace std;
#define CAS(a,b,c) (InterlockedCompareExchangePointer((volatile PVOID*)a,(PVOID)c,(PVOID)b) == b)

DWORD dwTest;
const DWORD dwMax = 200000000;
int   nTest = dwMax;
//使用公平锁 MSCLOCK
//线程节点
struct ThreadNode
{
	ThreadNode *next;
	bool       islock;
};
//线程队列
struct ThreadQueue
{
	ThreadNode *head;
};

int Init(ThreadQueue *&queue)
{
	queue = nullptr;
	queue = new ThreadQueue;
	if (!queue) return 0;
	queue->head = nullptr;
	return 1;
}

void NodeIsLock(ThreadNode *node)
{
	DWORD count = 0, loop = 10, forloop = 1, elsecount = 0, else1 = 12, else2 = 64;
	while (node->islock)
	{
		if (count < loop)
		{
			for (int i = 0; i < forloop; i++)
				_mm_pause();
			forloop *= 2;
		}
		else
		{
			elsecount++;
			if (elsecount < else1)
				Sleep(0);
			else if (elsecount < else2)
			{
				if (!SwitchToThread())
					Sleep(0);
			}
			else
				Sleep(1);
		}
		count++;
	}
}

void NodeNextIsNull(ThreadNode *node)
{
	DWORD count = 0, loop = 10, forloop = 1, elsecount = 0, else1 = 12, else2 = 64;
	while (node->next == nullptr)
	{
		if (count < loop)
		{
			for (int i = 0; i < forloop; i++)
				_mm_pause();
			forloop *= 2;
		}
		else
		{
			elsecount++;
			if (elsecount < else1)
				Sleep(0);
			else if (elsecount < else2)
			{
				if (!SwitchToThread())
					Sleep(0);
			}
			else
				Sleep(1);
		}
		count++;
	}
}

void Lock(ThreadQueue *queue, ThreadNode *node)
{
	node->next = nullptr;
	ThreadNode *tmp = node;
	ThreadNode *pre = (ThreadNode *)InterlockedExchangePointer((PVOID volatile *)&queue->head, (PVOID)tmp);
	if (pre == nullptr)  //空闲
		return;
	node->islock = true; //a
	__asm{sfence}        //写屏障 //_WriteBarrier();
	pre->next = node;    //b

	//NodeIsLock(node);
	while (node->islock)
		Sleep(0);//_mm_pause();
}

void UnLock(ThreadQueue *queue, ThreadNode *node)
{
	if (node->next == nullptr)
	{
		if (CAS(&queue->head, node, nullptr))
			return;

		//NodeNextIsNull(node);
		while (node->next == nullptr)
			Sleep(0);

	}
	node->next->islock = false;
}

UINT WINAPI ThreadProc(void *ptr)
{
	ThreadQueue *queue = (ThreadQueue *)ptr;
	ThreadNode *node = new ThreadNode;
	while (true)
	{
		Lock(queue, node);
		if (dwTest >= dwMax)
		{
			UnLock(queue, node);
			delete node;
			return 1;
		}
		dwTest++;
		if ((dwTest < 1000000 && dwTest % 10000 == 0) || (dwTest < 100000000 && dwTest % 1000000 == 0) || (dwTest % 10000000 == 0))
			cout << dwTest << endl;
		nTest--;

		UnLock(queue, node);
	}
	//delete node; //应该所有线程结束后释放
	return 0;
}

int main()
{
	ThreadQueue *queue = nullptr;
	if (!Init(queue))
		return 0;
	dwTest = 0;
#define num 4

	HANDLE hThread[num]; int i = 0;
	DWORD begin = GetTickCount();
	hThread[i++] = (HANDLE)_beginthreadex(NULL, 0, ThreadProc, queue, 0, NULL);
	//SetThreadAffinityMask(hThread[i-1],i);
	hThread[i++] = (HANDLE)_beginthreadex(NULL, 0, ThreadProc, queue, 0, NULL);
	//SetThreadAffinityMask(hThread[i - 1], i);
	hThread[i++] = (HANDLE)_beginthreadex(NULL, 0, ThreadProc, queue, 0, NULL);
	//SetThreadAffinityMask(hThread[i - 1], i);
	hThread[i++] = (HANDLE)_beginthreadex(NULL, 0, ThreadProc, queue, 0, NULL);
	//SetThreadAffinityMask(hThread[i - 1], i);
	WaitForMultipleObjects(num, hThread, 0, INFINITE);
	DWORD end = GetTickCount();

	cout << "nTest=0则锁操作是正确的.nTest=" << nTest << endl;
	cout << "花费的时间为: " << end - begin << " 毫秒" << endl;
	system("pause");
	return 0;
}


 
  

使用此锁,发现CPU超高!!!而在while处改成NodeIsLock和NodeNextIsNull后CPU虽然降下来了,但是运行速度很慢!!!

CLH LOCK也是排队自旋锁,有空可以实现一下C++般般

数组自旋锁:

#include 
#include 
#include 
using namespace std;
#define CAS(a,b,c) (InterlockedCompareExchangePointer((volatile PVOID*)a,(PVOID)c,(PVOID)b) == b)
#define num 4

//使用数组锁
DWORD dwTest;
const DWORD dwMax = 2000000;// 200000000;
int   nTest = dwMax;
DWORD _count = 0XFFFFFFFF;
bool  _lockarr[num] = {false};

void Lock(DWORD *ptr)
{
	//DWORD ret = InterlockedIncrement(&_count);
	//DWORD index = ret % num;
	//*ptr = index;
	//__asm{sfence}
	while (!_lockarr[*ptr])
		Sleep(0);
}

void UnLock(DWORD *ptr)
{
	_lockarr[*ptr] = false;
	__asm{sfence}
	_lockarr[(*ptr + 1) % num] = true;
}

UINT WINAPI ThreadProc(void *ptr)
{
	DWORD *Ptr = (DWORD *)ptr;
	while (true)
	{
		Lock(Ptr);
		if (dwTest >= dwMax)
		{
			UnLock(Ptr);
			return 1;
		}
		dwTest++;
		if ((dwTest < 1000000 && dwTest % 10000 == 0) || (dwTest < 100000000 && dwTest % 1000000 == 0) || (dwTest % 10000000 == 0))
			cout << dwTest << endl;
		nTest--;
		UnLock(Ptr);
	}
	return 0;
}

int main()
{
	dwTest = 0;
	DWORD index[num];
	for (int i = 0; i < num; i++)
		index[i] = i;

	HANDLE hThread[num]; int i = 0;
	DWORD begin = GetTickCount();
	_lockarr[0] = true;
	hThread[i] = (HANDLE)_beginthreadex(NULL, 0, ThreadProc, &index[i], 0, NULL); i++;
	//SetThreadAffinityMask(hThread[i-1],i);
	hThread[i] = (HANDLE)_beginthreadex(NULL, 0, ThreadProc, &index[i], 0, NULL); i++;
	//SetThreadAffinityMask(hThread[i - 1], i);
	hThread[i] = (HANDLE)_beginthreadex(NULL, 0, ThreadProc, &index[i], 0, NULL); i++;
	//SetThreadAffinityMask(hThread[i - 1], i);
	hThread[i] = (HANDLE)_beginthreadex(NULL, 0, ThreadProc, &index[i], 0, NULL); i++;
	//SetThreadAffinityMask(hThread[i - 1], i);
	WaitForMultipleObjects(num, hThread, 0, INFINITE);
	DWORD end = GetTickCount();

	cout << "nTest=0则锁操作是正确的.nTest=" << nTest << endl;
	cout << "花费的时间为: " << end - begin << " 毫秒" << endl;
	system("pause");
	return 0;
}


你可能感兴趣的:(无锁队列,服务器开发)