上一篇一步一步写一个简单通用的makefile(二) 里面的makefile 实现对通用的代码进行编译,这一章我将会对上一次的makefile 进行进一步的优化.
优化后的makefile:
#Hellomake
#Magnum, 2014-10-20 # 指令编译器和选项 CC=gcc CFLAGS=-Wall # 需要链接库的库名,比如libm.a,就是-lm,需要去掉前面的lib和后面的.a LIBS=-lm # 设置默认搜索头文件的路径,优先是这个,然后是系统路径 IncludeDir = -I./include/ # 需要链接的库的路径 LinkDir = #-L OBJ_DIR = ./obj BIN_DIR = ./bin #PROJECT_TOP_DIR 设置成pwd 或者"./"都行 PROJECT_TOP_DIR=$(shell pwd)#$(shell cd ../; pwd) PROJECT_BIN_DIR=$(PROJECT_TOP_DIR)/bin PROJECT_SRC_DIR=$(PROJECT_TOP_DIR)/src PROJECT_LIB_DIR=$(PROJECT_TOP_DIR)/lib PROJECT_OBJ_DIR=$(PROJECT_TOP_DIR)/objs MKDIR := mkdir -p # 目标文件 EXE_NAME=hellomake TARGET=$(BIN_DIR)/$(EXE_NAME) #源文件的文件类型 FILE_TYPE=c src=$(wildcard $(PROJECT_SRC_DIR)/*.$(FILE_TYPE)) dir= $(notdir $(src)) PROJECT_OBJ= $(patsubst %.$(FILE_TYPE),%.o,$(dir) ) PROJECT_ALL_OBJS= $(addprefix $(PROJECT_OBJ_DIR)/, $(PROJECT_OBJ)) all: chdir $(TARGET) @echo "magnum $(PROJECT_OBJ)" @echo "magnum $(PROJECT_OBJ_DIR)" @echo "magnum $(PROJECT_ALL_OBJS)" $(TARGET): $(PROJECT_ALL_OBJS) $(CC) -o $@ $^ $(LinkDir) $(LIBS) chdir: @if test ! -d $(PROJECT_OBJ_DIR) ; \ then \ mkdir $(PROJECT_OBJ_DIR) ; \ fi @if test ! -d $(PROJECT_BIN_DIR) ; \ then \ mkdir $(PROJECT_BIN_DIR) ; \ fi .PHONY : clean clean: -rm -rf $(PROJECT_BIN_DIR) $(PROJECT_OBJ_DIR) $(PROJECT_OBJ_DIR)/%.o:$(PROJECT_SRC_DIR)/%.$(FILE_TYPE) $(CC) $(CFLAGS) -o $@ -c $< $(IncludeDir)
这个优化后的makefile 对于一般需要写一个小的测试程序都有一定的通用性,如果需要对新的程序进行修改:
1. 编译类型C用gcc, c++用g++
2. 源文件的路径 PROJECT_SRC_DIR
3. 文件类型c 还是cpp
4. 还有这三个:
# 需要链接库的库名,比如libm.a,就是-lm,需要去掉前面的lib和后面的.a LIBS=-lm # 设置默认搜索头文件的路径,优先是这个,然后是系统路径 IncludeDir = -I./include/ # 需要链接的库的路径 LinkDir = #-L
下面我就用这个模板去编译一个opencl的小程序。
文件树如下:
.
├── convolve.cl
├── convolve_cl.cpp
├── makefile
└── makefile~
很简单的只有3个文件, convolve.cl, convolve_cl.cpp, makefile.
convolve_cl.cpp源码:
// newTutorial1.cpp : Defines the entry point for the console application. // //#include "stdafx.h" #include <CL/cl.h> #include <stdio.h> #include <stdlib.h> #include <time.h> #include <iostream> #include <fstream> #include <string.h> #include <string> #include <pthread.h> using namespace std; #pragma comment (lib,"OpenCL.lib") #define WIDTH 1920 #define HEIGHT 1080 #define FRAMES 1 typedef unsigned char uint8_t; static double now_ms(void) { struct timespec res; clock_gettime(CLOCK_REALTIME, &res); return 1000.0 * res.tv_sec + (double) res.tv_nsec / 1e6; } static int filter0(uint8_t *src, uint8_t *dst, uint8_t *ocl, int w, int h) { printf("filter 0 ,w=%d, h=%d\n",w, h); double start, end; int i =1; int j =1; if(w > WIDTH || h > HEIGHT) return 1; //start = now_ms(); for (i = 1; i < h - 1; ++i) { for (j = 1; j < w - 1; ++j) { int index = j + i * w; uint8_t lu = src[index - 1 - w]; uint8_t lm = src[index - 1]; uint8_t ld = src[index - 1 + w]; uint8_t mu = src[index - w]; uint8_t mm = src[index]; uint8_t md = src[index + w]; uint8_t ru = src[index + 1 - w]; uint8_t rm = src[index + 1]; uint8_t rd = src[index + 1 + w]; int sum = lu + lm + ld + mu + mm + md + ru + rm + rd; // printf("%d, %d, %d, %d, %d, %d, %d %d, %d,\n",lu, lm, ld, mu, mm, md, ru, rm, rd); dst[index] = (uint8_t)sum / 9 + 1; // printf(" dst[%d] =%d",index, dst[index]); if(ocl[index] != dst[index]) printf("index[%d] differ \n", index); } // printf("\n"); } end = now_ms(); //printf("filter 0 %f \n", end - start); return 0; } //°ÑÎıŸÎÄŒþ¶ÁÈëÒ»žöstringÖÐ int convertToString(const char *filename, std::string& s) { size_t size; char* str; std::fstream f(filename, (std::fstream::in | std::fstream::binary)); if(f.is_open()) { size_t fileSize; f.seekg(0, std::fstream::end); size = fileSize = (size_t)f.tellg(); f.seekg(0, std::fstream::beg); str = new char[size+1]; if(!str) { f.close(); return NULL; } f.read(str, fileSize); f.close(); str[size] = '\0'; s = str; delete[] str; return 0; } printf("Error: Failed to open file %s\n", filename); return 1; } int main(int argc, char* argv[]) { int i, ret; double start, end; uint8_t * inputBuf; uint8_t * dstBuf1; uint8_t * dstBuf2; inputBuf =(uint8_t *)malloc(WIDTH * HEIGHT * sizeof(uint8_t)); dstBuf1 =(uint8_t *)malloc(WIDTH * HEIGHT * sizeof(uint8_t)); dstBuf2 =(uint8_t *)malloc(WIDTH * HEIGHT * sizeof(uint8_t)); memset(dstBuf1,0,WIDTH * HEIGHT * sizeof(uint8_t)); memset(dstBuf2,0,WIDTH * HEIGHT * sizeof(uint8_t)); srand( (unsigned)time( NULL ) ); for(i = 0; i < WIDTH * HEIGHT; i++) { inputBuf[i] = rand()%255; //printf("[%d] =%d\n", i, inputBuf[i]); } //return 0; cl_uint status; cl_platform_id platform; //ŽŽœšÆœÌš¶ÔÏó status = clGetPlatformIDs( 1, &platform, NULL ); cl_device_id device; //ŽŽœšGPUÉ豞 clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); cl_uint maxComputeUnits; status = clGetDeviceInfo(device,CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &maxComputeUnits, NULL); printf("maxComputeUnits = %d\n" ,maxComputeUnits); //ŽŽœšcontext cl_context context = clCreateContext( NULL, 1, &device, NULL, NULL, NULL); //ŽŽœšÃüÁî¶ÓÁÐ cl_command_queue queue = clCreateCommandQueue( context, device, CL_QUEUE_PROFILING_ENABLE, NULL ); //ŽŽœšÈýžöOpenCLÄÚŽæ¶ÔÏó cl_mem clinbuf = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, WIDTH*HEIGHT*sizeof(cl_uchar),inputBuf, NULL ); cl_mem cloutbuf = clCreateBuffer( context, CL_MEM_WRITE_ONLY, WIDTH*HEIGHT * sizeof(cl_uchar), NULL, NULL ); const char * filename = "convolve.cl"; std::string sourceStr; status = convertToString(filename, sourceStr); const char * source = sourceStr.c_str(); size_t sourceSize[] = { strlen(source) }; //ŽŽœš³ÌÐò¶ÔÏó cl_program program = clCreateProgramWithSource( context, 1, &source, sourceSize, NULL); //±àÒë³ÌÐò¶ÔÏó status = clBuildProgram( program, 1, &device, NULL, NULL, NULL ); if(status != 0) { printf("clBuild failed:%d\n", status); char tbuf[0x10000]; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0x10000, tbuf, NULL); printf("\n%s\n", tbuf); return -1; } cl_int dimx = WIDTH; cl_int dimy = HEIGHT; cl_event ev; cl_kernel kernel; cl_ulong startTime, endTime; cl_ulong kernelExecTimeNs; float *op_data = 0; #if 1 //ŽŽœšKernel¶ÔÏó kernel = clCreateKernel( program, "filter", NULL ); //ÉèÖÃKernel²ÎÊý clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&clinbuf); clSetKernelArg(kernel, 1, sizeof(cl_int), (void *)&dimx); clSetKernelArg(kernel, 2, sizeof(cl_int), (void *)&dimy); clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&cloutbuf); //Set local and global workgroup sizes size_t localws[2] = {1, 1} ; size_t globalws[2] = {WIDTH,HEIGHT}; //ÖŽÐÐkernel clEnqueueNDRangeKernel( queue ,kernel, 2, 0, globalws, NULL, 0, NULL, &ev); clFinish( queue ); //ŒÆËãkerenlÖŽÐÐʱŒä clGetEventProfilingInfo(ev, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &startTime, NULL); clGetEventProfilingInfo(ev, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL); kernelExecTimeNs = endTime-startTime; printf("kernal exec time :%8.6f ms\n ", kernelExecTimeNs*1e-6 ); //ÊýŸÝ¿œ»ØhostÄÚŽæ cl_uchar *ptr; ptr = (cl_uchar *) clEnqueueMapBuffer( queue, cloutbuf, CL_TRUE, CL_MAP_READ, 0, WIDTH*HEIGHT * sizeof(cl_uchar), 0, NULL, NULL, NULL ); //œá¹ûÑéÖ€£¬ºÍcpuŒÆËãµÄœá¹û±ÈœÏ start = now_ms(); for(i = 0; i< FRAMES; i++) { ret = filter0(inputBuf, dstBuf1, ptr,WIDTH, HEIGHT); if(ret) printf("filter Fail \n"); } end = now_ms(); printf("filter 0 %f \n", (end - start)/FRAMES); //ÑéÖ€GPUŒÆËãœá¹û /* for(i = 0; i < M*N; i++) { //printf("%d, %6.3f,%6.3f\n",i,outbuf[i],op_data[i]); if(abs(dstBuf1[i] - dstBuf2[i]) > 0.0001) { printf("check failed\n"); break; } } if(i == M*N) printf("check passed\n"); */ #endif if(inputBuf) free(inputBuf); if(dstBuf1) free(dstBuf1); if(dstBuf2) free(dstBuf2); //ÉŸ³ýOpenCL×ÊÔŽ¶ÔÏó clReleaseMemObject(clinbuf); clReleaseMemObject(cloutbuf); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseContext(context); return 0; }
convolve.cl 源码:
#pragma OPENCL EXTENSION cl_amd_printf : enable __kernel void filter( __global uchar* in, int Width, int Height, __global uchar* out) { // WIDTH int row = get_global_id(0); //HEIGHT int col = get_global_id(1); // int wi = get_global_size(0); // int he = get_global_size(1); // printf("Magnum Global w= %d, h= %d,row=%d, col =%d\n",wi,he,row,col); if(row == 0 || col == 0 ||row == Width -1 || col == Height -1) return; int index = row + col * Width; uchar lu = in[index - 1 - Width]; uchar lm = in[index - 1]; uchar ld = in[index - 1 + Width]; uchar mu = in[index - Width]; uchar mm = in[index]; uchar md = in[index + Width]; uchar ru = in[index + 1 - Width]; uchar rm = in[index + 1]; uchar rd = in[index + 1 + Width]; int sum = lu + lm + ld + mu + mm + md + ru + rm + rd; out[index] = (uchar)sum / 9 + 1; // printf("%d, %d, %d, %d, %d, %d, %d %d, %d,\n",lu, lm, ld, mu, mm, md, ru, rm, rd); // printf("dst[%d] = %d\n", index, out[index]); }
下面是修改上面给出的模板makefile文件,来编译这个程序:
1. 因为是cpp,所以CC=g++, FILE_TYPE=cpp
2. 可执行文件的名字:EXE_NAME=convolve_cl
3. 链接的库:
LIBS= -lOpenCL -lfreeimage -lrt
IncludeDir = -I/opt/AMDAPP/include
LinkDir = -L/opt/AMDAPP/lib/x86_64
修改后的makefile如下:
#Hellomake #Magnum, 2014-10-19 # 指令编译器和选项 CC=g++ CFLAGS=-Wall # 需要链接库的库名,比如libm.a,就是-lm,需要去掉前面的lib和后面的.a LIBS= -lOpenCL -lfreeimage -lrt # 设置默认搜索头文件的路径,优先是这个,然后是系统路径 IncludeDir = -I/opt/AMDAPP/include # 需要链接的库的路径 LinkDir = -L/opt/AMDAPP/lib/x86_64 OBJ_DIR = ./obj BIN_DIR = ./bin #PROJECT_TOP_DIR 设置成pwd 或者"./"都行 PROJECT_TOP_DIR=.#$(shell pwd)#$(shell cd ../; pwd) PROJECT_BIN_DIR=$(PROJECT_TOP_DIR)/bin PROJECT_SRC_DIR=$(PROJECT_TOP_DIR)/ PROJECT_LIB_DIR=$(PROJECT_TOP_DIR)/lib PROJECT_OBJ_DIR=$(PROJECT_TOP_DIR)/objs MKDIR := mkdir -p # 目标文件 EXE_NAME=convolve_cl TARGET=$(BIN_DIR)/$(EXE_NAME) #源文件的文件类型 FILE_TYPE=cpp src=$(wildcard $(PROJECT_SRC_DIR)/*.$(FILE_TYPE)) dir= $(notdir $(src)) PROJECT_OBJ= $(patsubst %.$(FILE_TYPE),%.o,$(dir) ) PROJECT_ALL_OBJS= $(addprefix $(PROJECT_OBJ_DIR)/, $(PROJECT_OBJ)) all: chdir $(TARGET) @echo "magnum $(PROJECT_OBJ)" @echo "magnum $(PROJECT_OBJ_DIR)" @echo "magnum $(PROJECT_ALL_OBJS)" $(TARGET): $(PROJECT_ALL_OBJS) $(CC) -o $@ $^ $(LinkDir) $(LIBS) chdir: @if test ! -d $(PROJECT_OBJ_DIR) ; \ then \ mkdir $(PROJECT_OBJ_DIR) ; \ fi @if test ! -d $(PROJECT_BIN_DIR) ; \ then \ mkdir $(PROJECT_BIN_DIR) ; \ fi .PHONY : clean clean: -rm -rf $(PROJECT_BIN_DIR) $(PROJECT_OBJ_DIR) $(PROJECT_OBJ_DIR)/%.o:$(PROJECT_SRC_DIR)/%.$(FILE_TYPE) $(CC) $(CFLAGS) -o $@ -c $< $(IncludeDir)
可以看到相比较之前的makefile只需要修改几个文件即可。
我的这个是ubuntu 环境 AMD显卡的opencl程序,虽然你们的环境有些不同,但是对于你们需要修改的编译程序也是同样适用的