1、MATLAB调用CUDA是通过:MATLAB通过mexFunction编译C,通过nvcc编译cu文件实现的,大致流程为:MATLAB调用C文件,C文件调用cu文件。
2、c调用cu之前,使用system,通过nvcc编译cu文件为.o文件,以提供给cpp文件使用,这样C就能调用cu了。而MATLAB通过mex调用mexFuntion文件,就能编译成MATLAB可调用的mexa64/32文件函数。
3、Linux下的MATLAB调用CUDA举例:
(1)matlab下nvmex函数:
function nvmex(cuFileName)
if ispc % Windows
%dips('windows');
Host_Compiler_Location = '-ccbin "C:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin\x86_amd64"';
CUDA_INC_Location = ['"' getenv('CUDA_PATH') '\include"'];
CUDA_SAMPLES_Location =['"' getenv('NVCUDASAMPLES5_5_ROOT') '\common\inc"'];
PIC_Option = '';
if ( strcmp(computer('arch'),'win32') ==1)
disp('')
machine_str = ' --machine 32 ';
CUDA_LIB_Location = ['"' getenv('CUDA_PATH') '\lib\Win32"'];
elseif ( strcmp(computer('arch'),'win64') ==1)
machine_str = ' --machine 64 ';
CUDA_LIB_Location = ['"' getenv('CUDA_PATH') '\lib\x64"'];
end
else % Mac and Linux (assuming gcc is on the path)
%disp('linux');
CUDA_INC_Location = '/usr/local/cuda/include';
CUDA_SAMPLES_Location = '/usr/local/cuda/samples/common/inc';
Host_Compiler_Location = ' ';
PIC_Option = ' --compiler-options -fPIC ';
machine_str = [];
if ( strcmp(computer('arch'),'glnxa32') ==1)
%disp('32');
CUDA_LIB_Location = '/usr/local/cuda/lib';
elseif ( strcmp(computer('arch'),'glnxa64') ==1)
% disp('64');
CUDA_LIB_Location = '/usr/local/cuda/lib64';
end
end
%以上过程是判断系统是Linux还是Windows,是64位的还是32位的系统;但是无论是哪个系统,相应的cuda路径要选对,否则运行会出错
% !!! End of things to modify !!!
[~, filename] = fileparts(cuFileName);
nvccCommandLine = [ ...
'nvcc --compile ' Host_Compiler_Location ' ' ...
'-o ' filename '.o ' ...
machine_str PIC_Option ...
' -I' '"' matlabroot '/extern/include "' ...
' -I' CUDA_INC_Location ' -I' CUDA_SAMPLES_Location ...
' "' cuFileName '" '
];
mexCommandLine = ['mex ' filename '.o' ' -L' CUDA_LIB_Location ' -lcudart'];
disp(nvccCommandLine);
warning off;
status = system(nvccCommandLine); %system编译,编译成功则status >=0,封装失败,则status < 0,编译产生.o文件或者.Obj文件
warning on;
if status < 0
error 'Error invoking nvcc';
end
disp(mexCommandLine);
eval(mexCommandLine); %mex执行,编译.o文件(在Windows系统下为.obj文件),并封装为MATLAB可调用的mexa64/32文件函数
%同时如果将CUDA和Cpp文件写开的话,可在mexCommandLine加入CPP文件和.o文件: mexCommandLine= ['mex ' filename '.cpp ' filename '.o' ' -L' CUDA_LIB_Location ' -lcudart']; 同时:filename 也可以是自己随意定义的名字
end
(2)nvmex的调用:
clc;clear;close all
nvmex('Addvector.cu');
A=[1 2 3];
B=[4 5 6];
C = AddVectors(A,B)
(3)cu文件的编译:addVectors.cu
#include "AddVectors.h"
#include
#include "mex.h"
#include
__global__ void addVectorsMask(double *devPtrA, double *devPtrB, double *devPtrC, int size)
{
int i = threadIdx.x ;//+ blockIdx.x * blockDim.x;
devPtrC[i] = devPtrA[i] + devPtrB[i];
__syncthreads();
}
void addVectors(double *A, double *B, double *C, int size)//定义C和CUDA的接口
{
double *devPtrA,*devPtrB,*devPtrC;
cudaMalloc(&devPtrA,sizeof(double)* size);
cudaMalloc(&devPtrB,sizeof(double)* size);
cudaMalloc(&devPtrC,sizeof(double)* size);
cudaMemcpy(devPtrA,A, sizeof(double)* size, cudaMemcpyHostToDevice);
cudaMemcpy(devPtrB,B, sizeof(double)* size, cudaMemcpyHostToDevice);
addVectorsMask<<<1,size>>>(devPtrA,devPtrB, devPtrC, size);
cudaMemcpy(C,devPtrC, sizeof(double)* size, cudaMemcpyDeviceToHost);
double *d=(double *)malloc(sizeof(double)* size);
cudaMemcpy(d,devPtrC, sizeof(double)* size, cudaMemcpyDeviceToHost);
free(d);
cudaFree(devPtrA);
cudaFree(devPtrB);
cudaFree(devPtrC);
}
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, mxArray *prhs[])//定义C和MATLAB的接口
{
// 鍒ゆ柇杈撳叆鍙傛暟涓暟鏄惁婊¤冻鏉′欢
if (nrhs != 2)
mexErrMsgTxt("Invaidnumber of input arguments");
if (nlhs != 1)
mexErrMsgTxt("Invalidnumber of outputs");
// 鍒ゆ柇杈撳叆鍙傛暟鐨勭被鍨嬫槸鍚︽弧瓒虫潯锟?
// if (!mxIsSingle(prhs[0]) !mxIsSingle(prhs[1]))
// mexErrMsgTxt("inputvector data type must be single");
// 鑾峰彇杈撳叆鍙傛暟缁村害
// mxGetM:寰楀埌杈撳叆鐭╅樀鐨勮锟?
// mxGetN:寰楀埌杈撳叆鐭╅樀鐨勫垪锟?
int numRowsA = (int)mxGetM(prhs[0]);//閭d箞prhs[0]鎸囧悜绗竴涓彉锟?
int numColsA = (int)mxGetN(prhs[0]);
int numRowsB = (int)mxGetM(prhs[1]);//prhs[1]鎸囧悜绗簩涓彉锟?
int numColsB = (int)mxGetN(prhs[1]);
// 鍒ゆ柇杈撳叆鍙傛暟缁村害鏄惁婊¤冻鏉′欢
if (numRowsA != numRowsB || numColsA != numColsB)
mexErrMsgTxt("Invalidsize. The sizes of two vectors must be same");
int minSize = (numRowsA < numColsA) ? numRowsA : numColsA;
int maxSize = (numRowsA > numColsA) ? numRowsA : numColsA;
int size=numRowsA*numColsA;
if (minSize != 1)
mexErrMsgTxt("Invalidsize. The vector must be one dimentional");
//mxGetData 鑾峰彇鏁版嵁闃靛垪涓殑鏁版嵁
double *A;// = (float*)mxGetPr(prhs[0]);
double *B;// = (float*)mxGetPr(prhs[1]);
A=mxGetPr(prhs[0]);
B=mxGetPr(prhs[1]);
plhs[0]=mxCreateDoubleMatrix(numRowsA,numColsA,mxREAL);
//plhs[0]= mxCreateNumericMatrix(numRowsA,numColsB, mxSINGLE_CLASS, mxREAL);
// 鑾峰彇杈撳嚭鍙傛暟鐨勬寚锟?
double *C ;//= (double*)mxGetData(plhs[0]);
// C=mxGetPr(plhs[0]);
C=mxGetPr(plhs[0]);
addVectors(A, B, C, size);
}