随想录(内存屏障示例代码)

【 声明:版权所有,欢迎转载,请勿用于商业用途。  联系信箱:feixiaoxing @163.com】  


    内存屏障,英文称之为memory barrier。产生内存屏障的原因很多,有的是因为编译器优化乱码造成的,有的是因为cpu乱序造成的,还有的是因为cpu cache没有按顺序同步造成的。编译器优化的code很容易理解,但是cpu乱序是怎么造成内存屏障的,却很少在网上看到相关的说明代码。今天,很偶然在网上看到网易的何登成同学写的《CPU Cache and Memory Ordering 》,中间就包括了这一份代码,启发很大。大家可以下载下来,看看结果,思考思考。代码编译的环境是linux,这个需要注意一下。编译命令在文中也一并包含了。


// compile with: g++ -o ordering -O2 ordering.cpp -lpthread

#include <pthread.h>
#include <semaphore.h>
#include <stdio.h>
#include <stdlib.h>

// Set either of these to 1 to prevent CPU reordering
#define USE_CPU_FENCE              0
#define USE_SINGLE_HW_THREAD       0  // Supported on Linux, but not Cygwin or PS3

#if USE_SINGLE_HW_THREAD
#include <sched.h>
#endif


//-------------------------------------
//  MersenneTwister
//  A thread-safe random number generator with good randomness
//  in a small number of instructions. We'll use it to introduce
//  random timing delays.
//-------------------------------------
#define MT_IA  397
#define MT_LEN 624

class MersenneTwister
{
    unsigned int m_buffer[MT_LEN];
    int m_index;

public:
    MersenneTwister(unsigned int seed);
    // Declare noinline so that the function call acts as a compiler barrier:
    unsigned int integer() __attribute__((noinline));
};

MersenneTwister::MersenneTwister(unsigned int seed)
{
    // Initialize by filling with the seed, then iterating
    // the algorithm a bunch of times to shuffle things up.
    for (int i = 0; i < MT_LEN; i++)
        m_buffer[i] = seed;
    m_index = 0;
    for (int i = 0; i < MT_LEN * 100; i++)
        integer();
}

unsigned int MersenneTwister::integer()
{
    // Indices
    int i = m_index;
    int i2 = m_index + 1; if (i2 >= MT_LEN) i2 = 0; // wrap-around
    int j = m_index + MT_IA; if (j >= MT_LEN) j -= MT_LEN; // wrap-around

    // Twist
    unsigned int s = (m_buffer[i] & 0x80000000) | (m_buffer[i2] & 0x7fffffff);
    unsigned int r = m_buffer[j] ^ (s >> 1) ^ ((s & 1) * 0x9908B0DF);
    m_buffer[m_index] = r;
    m_index = i2;

    // Swizzle
    r ^= (r >> 11);
    r ^= (r << 7) & 0x9d2c5680UL;
    r ^= (r << 15) & 0xefc60000UL;
    r ^= (r >> 18);
    return r;
}


//-------------------------------------
//  Main program, as decribed in the post
//-------------------------------------
sem_t beginSema1;
sem_t beginSema2;
sem_t endSema;

int X, Y;
int r1, r2;

/* thread1Func, thread2Func for StoreLoad */
/* thread3Func, thread4Func for StoreStore LoadLoad */
/* thread5Func, thread6Func for LoadStore */

void *thread1Func(void *param)
{
    MersenneTwister random(1);
    for (;;)
    {
        sem_wait(&beginSema1);  // Wait for signal
        while (random.integer() % 8 != 0) {}  // Random delay

        // ----- THE TRANSACTION! -----
        X = 1;
#if USE_CPU_FENCE
        asm volatile("mfence" ::: "memory");  // Prevent CPU reordering
#else
        asm volatile("" ::: "memory");  // Prevent compiler reordering
#endif
        r1 = Y;

        sem_post(&endSema);  // Notify transaction complete
    }
    return NULL;  // Never returns
};

void *thread2Func(void *param)
{
    MersenneTwister random(2);
    for (;;)
    {
        sem_wait(&beginSema2);  // Wait for signal
        while (random.integer() % 8 != 0) {}  // Random delay

        // ----- THE TRANSACTION! -----
        Y = 1;
#if USE_CPU_FENCE
        asm volatile("mfence" ::: "memory");  // Prevent CPU reordering
#else
        asm volatile("" ::: "memory");  // Prevent compiler reordering
#endif
        r2 = X;

        sem_post(&endSema);  // Notify transaction complete
    }
    return NULL;  // Never returns
};

void *thread3Func(void *param)
{
	MersenneTwister random(1);
	for (;;)
	{
		sem_wait(&beginSema1);  // Wait for signal
		while (random.integer() % 8 != 0) {}  // Random delay

		// ----- THE TRANSACTION! -----
		X = 1;
#if USE_CPU_FENCE
		asm volatile("mfence" ::: "memory");  // Prevent CPU reordering
#else
		asm volatile("" ::: "memory");  // Prevent compiler reordering only
#endif
		Y = 1;

		sem_post(&endSema);  // Notify transaction complete
	}
	return NULL;  // Never returns
};

void *thread4Func(void *param)
{
	MersenneTwister random(2);
	for (;;)
	{
		sem_wait(&beginSema2);  // Wait for signal
		while (random.integer() % 8 != 0) {}  // Random delay

		// ----- THE TRANSACTION! -----
		r1 = Y;
#if USE_CPU_FENCE
		asm volatile("mfence" ::: "memory");  // Prevent CPU reordering
#else
		asm volatile("" ::: "memory");  // Prevent compiler reordering only
#endif
		r2 = X;

		sem_post(&endSema); // Notify transaction complete
	}
	return NULL;  // Never returns
};

void *thread5Func(void *param)
{
	MersenneTwister random(1);
	for (;;)
	{
		sem_wait(&beginSema1);  // Wait for signal
		while (random.integer() % 8 != 0) {}  // Random delay

		// ----- THE TRANSACTION! -----
		r1 = X;
#if USE_CPU_FENCE
		asm volatile("mfence" ::: "memory");  // Prevent CPU reordering
#else
		asm volatile("" ::: "memory");  // Prevent compiler reordering only
#endif
		Y = 1;

		sem_post(&endSema);  // Notify transaction complete
	}
	return NULL;  // Never returns
};

void *thread6Func(void *param)
{
	MersenneTwister random(2);
	for (;;)
	{
		sem_wait(&beginSema2);  // Wait for signal
		while (random.integer() % 8 != 0) {}  // Random delay

		// ----- THE TRANSACTION! -----
		r2 = Y;
#if USE_CPU_FENCE
		asm volatile("mfence" ::: "memory");  // Prevent CPU reordering
#else
		asm volatile("" ::: "memory");  // Prevent compiler reordering only
#endif
		X = 1;

		sem_post(&endSema);  // Notify transaction complete
	}
	return NULL;  // Never returns
};

int main(int argc, char** argv)
{
	// Check Argument
	if (argc > 2)
	{
		printf("Too Many Arguments: Only Need One.\n");
		return 0;
	}
	
	if (argc == 1)
	{
		printf("You Should Give an Argument: 1 or 2 or 3.\n");
		return 0;
	}

	int i;

	i = atoi(argv[1]);

	if (i < 1 || i > 3)
	{
		printf("Wrong Argument: Only 1 or 2 or 3 Can be Input.\n");
		return 0;
	}
	
    // Initialize the semaphores
    sem_init(&beginSema1, 0, 0);
    sem_init(&beginSema2, 0, 0);
    sem_init(&endSema, 0, 0);

    // Spawn the threads
    pthread_t thread1, thread2;
	
	// 1. StoreLoad Reorder Tests
	if (i == 1) 
	{
		pthread_create(&thread1, NULL, thread1Func, NULL);
		pthread_create(&thread2, NULL, thread2Func, NULL);
	}
	// 2. LoadLoad && StoreStore Reorder Tests
	else if (i == 2)
	{
		pthread_create(&thread1, NULL, thread3Func, NULL);
		pthread_create(&thread2, NULL, thread4Func, NULL);
	}
	// 3. LoadStore Reorder Tests
	else
	{
		pthread_create(&thread1, NULL, thread5Func, NULL);
		pthread_create(&thread2, NULL, thread6Func, NULL);
	}

#if USE_SINGLE_HW_THREAD
    // Force thread affinities to the same cpu core.
    cpu_set_t cpus;
    CPU_ZERO(&cpus);
    CPU_SET(0, &cpus);
    pthread_setaffinity_np(thread1, sizeof(cpu_set_t), &cpus);
    pthread_setaffinity_np(thread2, sizeof(cpu_set_t), &cpus);
#endif

    // Repeat the experiment ad infinitum
    int detected = 0;
    for (int iterations = 1; ; iterations++)
    {
        // Reset X and Y
        X = 0;
        Y = 0;
        // Signal both threads
        sem_post(&beginSema1);
        sem_post(&beginSema2);
        // Wait for both threads
        sem_wait(&endSema);
        sem_wait(&endSema);
        // Check if there was a simultaneous reorder
        	// 1. StoreLoad Reorder
		if (i == 1)
		{
			if (r1 == 0 && r2 == 0)
			{
				detected++;
				printf("%d reorders detected after %d iterations\n", detected, iterations);
			}
		}
		// 2. LoadLoad && StoreStore Reorder
		else if (i == 2)
		{
			if (r1 == 1 && r2 == 0)
			{
				detected++;
				printf("%d reorders detected after %d iterations\n", detected, iterations);
			}
		}
		// 3. LoadStore Reorder
		else 
		{
			if (r1 == 1 && r2 == 1)
			{
				detected++;
				printf("%d reorders detected after %d iterations\n", detected, iterations);
			}
		}
    }
    return 0;  // Never returns
}


    有机会的话,同学们可以在网上找到这篇文档来看看。文中的一个观点,我特别同意:所有的LOCK prefix指令的代码,都会构成天生的memory barrier。

    btw:

    鉴于之前的版本只能在linux上运行,今天特定修改了一个在windows上运行的版本。

// compile with vc 6.0


#include <stdio.h>
#include <stdlib.h>
#include <windows.h>

// Set either of these to 1 to prevent CPU reordering
#define USE_CPU_FENCE              0
#define USE_SINGLE_HW_THREAD       0  // Supported on windows


//-------------------------------------
//  MersenneTwister
//  A thread-safe random number generator with good randomness
//  in a small number of instructions. We'll use it to introduce
//  random timing delays.
//-------------------------------------
#define MT_IA  397
#define MT_LEN 624

class MersenneTwister
{
    unsigned int m_buffer[MT_LEN];
    int m_index;

public:
    MersenneTwister(unsigned int seed);
    // Declare noinline so that the function call acts as a compiler barrier:
    unsigned int integer();
};

MersenneTwister::MersenneTwister(unsigned int seed)
{
	int i;

    // Initialize by filling with the seed, then iterating
    // the algorithm a bunch of times to shuffle things up.
    for (i = 0; i < MT_LEN; i++)
        m_buffer[i] = seed;
    m_index = 0;
    for (i = 0; i < MT_LEN * 100; i++)
        integer();
}

unsigned int MersenneTwister::integer()
{
    // Indices
    int i = m_index;
    int i2 = m_index + 1; if (i2 >= MT_LEN) i2 = 0; // wrap-around
    int j = m_index + MT_IA; if (j >= MT_LEN) j -= MT_LEN; // wrap-around

    // Twist
    unsigned int s = (m_buffer[i] & 0x80000000) | (m_buffer[i2] & 0x7fffffff);
    unsigned int r = m_buffer[j] ^ (s >> 1) ^ ((s & 1) * 0x9908B0DF);
    m_buffer[m_index] = r;
    m_index = i2;

    // Swizzle
    r ^= (r >> 11);
    r ^= (r << 7) & 0x9d2c5680UL;
    r ^= (r << 15) & 0xefc60000UL;
    r ^= (r >> 18);
    return r;
}


//-------------------------------------
//  Main program, as decribed in the post
//-------------------------------------
HANDLE beginSema1;
HANDLE beginSema2;
HANDLE endSema;

int X, Y;
int r1, r2;

/* thread1Func, thread2Func for StoreLoad */
/* thread3Func, thread4Func for StoreStore LoadLoad */
/* thread5Func, thread6Func for LoadStore */

void *thread1Func(void *param)
{
    MersenneTwister random(1);
    for (;;)
    {
        WaitForSingleObject(beginSema1, INFINITE);  // Wait for signal
        while (random.integer() % 8 != 0) {}  // Random delay

        // ----- THE TRANSACTION! -----
        X = 1;
#if USE_CPU_FENCE
        __asm {cpuid};  // Prevent CPU reordering
#else
        __asm {};  // Prevent compiler reordering
#endif
        r1 = Y;

        ReleaseSemaphore(endSema, 1, 0);  // Notify transaction complete
    }
    return NULL;  // Never returns
};

void *thread2Func(void *param)
{
    MersenneTwister random(2);
    for (;;)
    {
        WaitForSingleObject(beginSema2, INFINITE);  // Wait for signal
        while (random.integer() % 8 != 0) {}  // Random delay

        // ----- THE TRANSACTION! -----
        Y = 1;
#if USE_CPU_FENCE
         __asm {cpuid};   // Prevent CPU reordering
#else
        _asm {};  // Prevent compiler reordering
#endif
        r2 = X;

        ReleaseSemaphore(endSema, 1, 0);  // Notify transaction complete
    }
    return NULL;  // Never returns
};

void *thread3Func(void *param)
{
	MersenneTwister random(1);
	for (;;)
	{
		WaitForSingleObject(beginSema1, INFINITE);  // Wait for signal
		while (random.integer() % 8 != 0) {}  // Random delay

		// ----- THE TRANSACTION! -----
		X = 1;
#if USE_CPU_FENCE
		 __asm {cpuid};   // Prevent CPU reordering
#else
		__asm {};  // Prevent compiler reordering only
#endif
		Y = 1;

		ReleaseSemaphore(endSema, 1, 0);  // Notify transaction complete
	}
	return NULL;  // Never returns
};

void *thread4Func(void *param)
{
	MersenneTwister random(2);
	for (;;)
	{
		WaitForSingleObject(beginSema2, INFINITE);  // Wait for signal
		while (random.integer() % 8 != 0) {}  // Random delay

		// ----- THE TRANSACTION! -----
		r1 = Y;
#if USE_CPU_FENCE
		 __asm {cpuid};   // Prevent CPU reordering
#else
		__asm {};  // Prevent compiler reordering only
#endif
		r2 = X;

		ReleaseSemaphore(endSema, 1, 0); // Notify transaction complete
	}
	return NULL;  // Never returns
};

void *thread5Func(void *param)
{
	MersenneTwister random(1);
	for (;;)
	{
		WaitForSingleObject(beginSema1, INFINITE);  // Wait for signal
		while (random.integer() % 8 != 0) {}  // Random delay

		// ----- THE TRANSACTION! -----
		r1 = X;
#if USE_CPU_FENCE
		 __asm {cpuid};   // Prevent CPU reordering
#else
		__asm {};  // Prevent compiler reordering only
#endif
		Y = 1;

		ReleaseSemaphore(endSema, 1, 0);  // Notify transaction complete
	}
	return NULL;  // Never returns
};

void *thread6Func(void *param)
{
	MersenneTwister random(2);
	for (;;)
	{
		WaitForSingleObject(beginSema2, INFINITE);  // Wait for signal
		while (random.integer() % 8 != 0) {}  // Random delay

		// ----- THE TRANSACTION! -----
		r2 = Y;
#if USE_CPU_FENCE
		 __asm {cpuid};   // Prevent CPU reordering
#else
		__asm {};  // Prevent compiler reordering only
#endif
		X = 1;

		ReleaseSemaphore(endSema, 1, 0);  // Notify transaction complete
	}
	return NULL;  // Never returns
};

int main(int argc, char** argv)
{
	// Check Argument
	if (argc > 2)
	{
		printf("Too Many Arguments: Only Need One.\n");
		return 0;
	}
	
	if (argc == 1)
	{
		printf("You Should Give an Argument: 1 or 2 or 3.\n");
		return 0;
	}

	int i;

	i = atoi(argv[1]);

	if (i < 1 || i > 3)
	{
		printf("Wrong Argument: Only 1 or 2 or 3 Can be Input.\n");
		return 0;
	}
	
    // Initialize the semaphores
    beginSema1 = CreateSemaphore(NULL, 0, 1, 0);
    beginSema2 = CreateSemaphore(NULL, 0, 1, 0);
    endSema = CreateSemaphore(NULL, 0, 2, 0);

    // Spawn the threads
    HANDLE thread1, thread2;
	
	// 1. StoreLoad Reorder Tests
	if (i == 1) 
	{
		thread1 = CreateThread(NULL, 0, ( LPTHREAD_START_ROUTINE ) thread1Func, 0, 0, NULL);
		thread2 = CreateThread(NULL, 0, ( LPTHREAD_START_ROUTINE ) thread2Func, 0, 0, NULL);
	}
	// 2. LoadLoad && StoreStore Reorder Tests
	else if (i == 2)
	{
		thread1 = CreateThread(NULL, 0, ( LPTHREAD_START_ROUTINE ) thread3Func, 0, 0, NULL);
		thread2 = CreateThread(NULL, 0, ( LPTHREAD_START_ROUTINE ) thread4Func, 0, 0, NULL);
	}
	// 3. LoadStore Reorder Tests
	else
	{
		thread1 = CreateThread(NULL, 0, ( LPTHREAD_START_ROUTINE ) thread5Func, 0, 0, NULL);
		thread2 = CreateThread(NULL, 0, ( LPTHREAD_START_ROUTINE ) thread6Func, 0, 0, NULL);
	}

#if USE_SINGLE_HW_THREAD
    // Force thread affinities to the same cpu core.
    SetThreadAffinityMask(thread1, 0x1 << 0);
	SetThreadAffinityMask(thread2, 0x1 << 0);
#endif

    // Repeat the experiment ad infinitum
    int detected = 0;
    for (int iterations = 1; ; iterations++)
    {
        // Reset X and Y
        X = 0;
        Y = 0;
        // Signal both threads
        ReleaseSemaphore(beginSema1, 1, 0);
        ReleaseSemaphore(beginSema2, 1, 0);
        // Wait for both threads
        WaitForSingleObject(endSema, INFINITE);
        WaitForSingleObject(endSema, INFINITE);
        // Check if there was a simultaneous reorder
        	// 1. StoreLoad Reorder
		if (i == 1)
		{
			if (r1 == 0 && r2 == 0)
			{
				detected++;
				printf("%d reorders detected after %d iterations\n", detected, iterations);
			}
		}
		// 2. LoadLoad && StoreStore Reorder
		else if (i == 2)
		{
			if (r1 == 1 && r2 == 0)
			{
				detected++;
				printf("%d reorders detected after %d iterations\n", detected, iterations);
			}
		}
		// 3. LoadStore Reorder
		else 
		{
			if (r1 == 1 && r2 == 1)
			{
				detected++;
				printf("%d reorders detected after %d iterations\n", detected, iterations);
			}
		}
    }
    return 0;  // Never returns
}




你可能感兴趣的:(随想录(内存屏障示例代码))