[菜鸟每天来段CUDA_C]CUDA实现简单热传导动态模拟

本文利用CUDA实现简单二维平面上的热传导模拟。假设有一个矩形房间,将其分成一个网格,在网格中随机散布一些热源,热源有不同的固定温度,然后就可以计算网格中每个单元的温度随时间的变化情况。本文将热传导模型做如下简化:某单元的新时刻温度等于原有温度加其与邻接单元的温差,邻接单元取上下左右四个单元。

温度更新算法:

Step1: 给定一个包含初始输入温度的网格,将其作为热源温度值复制到网格对应的单元;

Step2: 规定一个输入温度网格,根据热传导模型更新温度到输出网格;

Step3: 交换输入温度网格和输出温度网格的数据,转step1.


实验结果如下:下图显示按先后顺序的四个不同时刻的热量分布及变化情况。

[菜鸟每天来段CUDA_C]CUDA实现简单热传导动态模拟_第1张图片


主要程序代码如下:

/********************************************************************
*  heatTransfer.cu
*********************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <cutil_inline.h>
#include "CPUAnim.h"

#define DIM 1024
#define MAX_TEMP 1.0f
#define MIN_TEMP 0.0001f
#define SPEED 0.25f

struct DataBlock 
{
	unsigned char	*outputBitmap;
	float			*dev_inSrc;
	float			*dev_outSrc;
	float			*dev_constSrc;
	CPUAnimBitmap	*bitmap;

	cudaEvent_t		start, stop;
	float			totalTime;
	float			frames;
};

template< typename T >
void swap( T& a, T& b ) {
	T t = a;
	a = b;
	b = t;
}


/************************************************************************/
/* Init CUDA                                                            */
/************************************************************************/
bool InitCUDA(void)
{
    ......
}

/************************************************************************/
__global__ void copyConstKernel(float *iPtr, const float *cPtr)
{
	int x = threadIdx.x + blockIdx.x + blockDim.x;
	int y = threadIdx.y + blockIdx.y * blockDim.y;
	int offset = x + y * blockDim.x * gridDim.x;

	if (cPtr[offset] != 0)
	{
		iPtr[offset] = cPtr[offset];
	}
}

__global__ void blendKernel(float* outSrc, const float* inSrc)
{
	int x = threadIdx.x + blockIdx.x * blockDim.x;
	int y = threadIdx.y + blockIdx.y * blockDim.y;
	int offset = x + y * blockDim.x * gridDim.x;

	int left = offset - 1;
	int right = offset + 1;
	if (x == 0) 
	{
		left++; 
	}
	if (x == DIM - 1)
	{
		right--;
	}

	int top = offset - DIM;
	int bottom = offset + DIM;
	if (y == 0)
	{
		top += DIM;
	}
	if (y == DIM -1)
	{
		bottom -= DIM;
	}

	outSrc[offset] = inSrc[offset] + SPEED * (inSrc[top] + inSrc[bottom] 
											+ inSrc[left] + inSrc[right] - 4 * inSrc[offset]);
}

__device__ unsigned char value( float n1, float n2, int hue )
{
      ......
}
__global__ void floatToColor( unsigned char *optr,
							   const float *outSrc ) 
{
    ......
}

void animGPU(DataBlock *d, int ticks)
{
	cutilSafeCall(cudaEventRecord(d->start, 0));
	dim3	blocks(DIM/16, DIM/16);
	dim3	threads(16, 16);
	CPUAnimBitmap	*bitmap = d->bitmap;

	for (int i=0; i<90; i++)
	{
		copyConstKernel<<<blocks, threads>>>(d->dev_inSrc, d->dev_constSrc);
		blendKernel<<<blocks, threads>>>(d->dev_outSrc, d->dev_inSrc);
		swap(d->dev_inSrc, d->dev_outSrc);
	}
	floatToColor<<<blocks, threads>>>(d->outputBitmap, d->dev_inSrc);

	cudaMemcpy(bitmap->get_ptr(), d->outputBitmap, bitmap->image_size(), cudaMemcpyDeviceToHost);
	cutilSafeCall(cudaEventRecord(d->stop, 0));
	cutilSafeCall(cudaEventSynchronize(d->stop));

	float elapsedTime;
	cutilSafeCall(cudaEventElapsedTime(&elapsedTime, d->start, d->stop));
	d->totalTime += elapsedTime;
	++d->frames;

	printf("Average Time per frame: %3.1f ms\n", d->totalTime/d->frames);
}


void animExit(DataBlock *d)
{
	cudaFree(d->dev_inSrc);
	cudaFree(d->dev_outSrc);
	cudaFree(d->dev_constSrc);

	cutilSafeCall(cudaEventDestroy(d->start));
	cutilSafeCall(cudaEventDestroy(d->stop));
}
/************************************************************************/

int main(int argc, char* argv[])
{

	if(!InitCUDA()) {
		return 0;
	}

	DataBlock	data;
	CPUAnimBitmap bitmap(DIM, DIM, &data);
	data.bitmap = &bitmap;
	data.totalTime = 0;
	data.frames = 0;

	cutilSafeCall(cudaEventCreate(&data.start));
	cutilSafeCall(cudaEventCreate(&data.stop));

	cutilSafeCall(cudaMalloc((void**)&data.outputBitmap, data.bitmap->image_size()));

	cutilSafeCall(cudaMalloc((void**)&data.dev_inSrc, data.bitmap->image_size()));
	cutilSafeCall(cudaMalloc((void**)&data.dev_outSrc, data.bitmap->image_size()));
	cutilSafeCall(cudaMalloc((void**)&data.dev_constSrc, data.bitmap->image_size()));

	float *temp = (float*)malloc(bitmap.image_size());
	for (int i=0; i<DIM*DIM; i++)
	{
		temp[i] = 0;
		int x = i % DIM;
		int y = i / DIM;

		if ((x>300) && (x<600) && (y>301) && (y<601))
		{
			temp[i] = MAX_TEMP;
		}
	}
	temp[DIM*100+100] = (MAX_TEMP + MIN_TEMP) / 2;
    temp[DIM*700+100] = MIN_TEMP;
	temp[DIM*300+300] = MIN_TEMP;
	temp[DIM*200+700] = MIN_TEMP;

	for (int y=800; y<900; y++)
	{
		for (int x=400; x<500; x++)
		{
			temp[x+y*DIM] = MIN_TEMP;
		}
	}

	cutilSafeCall(cudaMemcpy(data.dev_constSrc, temp, bitmap.image_size(), cudaMemcpyHostToDevice));
	
	for(int y=800; y<DIM; y++)
	{
		for (int x=0; x<200; x++)
		{
			temp[x+y*DIM] = MAX_TEMP;
		}
	}
	cutilSafeCall(cudaMemcpy(data.dev_inSrc, temp, bitmap.image_size(), cudaMemcpyHostToDevice));
	free(temp);
	bitmap.anim_and_exit((void(*)(void*, int))animGPU, (void(*)(void*))animExit);
	return 0;
}


你可能感兴趣的:(CUDA,热传导模拟)