Linux系统和Windows下MATLAB调用CUDA

1、MATLAB调用CUDA是通过:MATLAB通过mexFunction编译C,通过nvcc编译cu文件实现的,大致流程为:MATLAB调用C文件,C文件调用cu文件。


2、c调用cu之前,使用system,通过nvcc编译cu文件为.o文件,以提供给cpp文件使用,这样C就能调用cu了。而MATLAB通过mex调用mexFuntion文件,就能编译成MATLAB可调用的mexa64/32文件函数。


3、Linux下的MATLAB调用CUDA举例:


(1)matlab下nvmex函数:

function nvmex(cuFileName)


if ispc % Windows
 %dips('windows');
 Host_Compiler_Location = '-ccbin "C:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin\x86_amd64"';
 CUDA_INC_Location = ['"' getenv('CUDA_PATH')  '\include"'];
 CUDA_SAMPLES_Location =['"' getenv('NVCUDASAMPLES5_5_ROOT')  '\common\inc"'];
 PIC_Option = '';
    if ( strcmp(computer('arch'),'win32') ==1)
        disp('')
        machine_str = ' --machine 32 ';
        CUDA_LIB_Location = ['"' getenv('CUDA_PATH')  '\lib\Win32"'];
    elseif  ( strcmp(computer('arch'),'win64') ==1)
        machine_str = ' --machine 64 ';
        CUDA_LIB_Location = ['"' getenv('CUDA_PATH')  '\lib\x64"'];
    end
else % Mac and Linux (assuming gcc is on the path)
    %disp('linux');
    CUDA_INC_Location = '/usr/local/cuda/include';
    CUDA_SAMPLES_Location = '/usr/local/cuda/samples/common/inc';
    Host_Compiler_Location = ' ';
    PIC_Option = ' --compiler-options -fPIC ';
    machine_str = [];
    if ( strcmp(computer('arch'),'glnxa32') ==1)
        %disp('32');
        CUDA_LIB_Location = '/usr/local/cuda/lib';
    elseif  ( strcmp(computer('arch'),'glnxa64') ==1)
       % disp('64');
        CUDA_LIB_Location = '/usr/local/cuda/lib64';
    end
end

%以上过程是判断系统是Linux还是Windows,是64位的还是32位的系统;但是无论是哪个系统,相应的cuda路径要选对,否则运行会出错


% !!! End of things to modify !!!
[~, filename] = fileparts(cuFileName);
nvccCommandLine = [ ...
'nvcc --compile ' Host_Compiler_Location ' ' ...
'-o '  filename '.o ' ...
machine_str PIC_Option ...
' -I' '"' matlabroot '/extern/include "' ...
' -I' CUDA_INC_Location ' -I' CUDA_SAMPLES_Location ...
' "' cuFileName '" ' 
 ];
mexCommandLine = ['mex ' filename  '.o'  ' -L' CUDA_LIB_Location  ' -lcudart'];
disp(nvccCommandLine);
warning off;
status = system(nvccCommandLine);  %system编译,编译成功则status >=0,封装失败,则status < 0,编译产生.o文件或者.Obj文件
warning on;


if status < 0
 error 'Error invoking nvcc';
end


disp(mexCommandLine);
eval(mexCommandLine);   %mex执行,编译.o文件(在Windows系统下为.obj文件),并封装为MATLAB可调用的mexa64/32文件函数

%同时如果将CUDA和Cpp文件写开的话,可在mexCommandLine加入CPP文件和.o文件: mexCommandLine= ['mex '  filename  '.cpp ' filename  '.o'  ' -L' CUDA_LIB_Location  ' -lcudart'];   同时:filename  也可以是自己随意定义的名字

end


(2)nvmex的调用:

clc;clear;close all

nvmex('Addvector.cu');

A=[1 2 3];
B=[4 5 6];
 C = AddVectors(A,B)


(3)cu文件的编译:addVectors.cu

#include "AddVectors.h"
#include
#include "mex.h"
#include
__global__ void addVectorsMask(double *devPtrA, double *devPtrB, double *devPtrC, int size)
{
    int i = threadIdx.x ;//+ blockIdx.x * blockDim.x;

    devPtrC[i] = devPtrA[i] + devPtrB[i];
    __syncthreads();
}


void addVectors(double *A, double *B, double *C, int size)//定义C和CUDA的接口
{
    double *devPtrA,*devPtrB,*devPtrC;
    cudaMalloc(&devPtrA,sizeof(double)* size);
    cudaMalloc(&devPtrB,sizeof(double)* size);
    cudaMalloc(&devPtrC,sizeof(double)* size);


    cudaMemcpy(devPtrA,A, sizeof(double)* size, cudaMemcpyHostToDevice);
    cudaMemcpy(devPtrB,B, sizeof(double)* size, cudaMemcpyHostToDevice);
    addVectorsMask<<<1,size>>>(devPtrA,devPtrB, devPtrC, size);


    cudaMemcpy(C,devPtrC, sizeof(double)* size, cudaMemcpyDeviceToHost);

double *d=(double *)malloc(sizeof(double)* size);
cudaMemcpy(d,devPtrC, sizeof(double)* size, cudaMemcpyDeviceToHost);
 


free(d);


    cudaFree(devPtrA);
    cudaFree(devPtrB);
    cudaFree(devPtrC);


}

void mexFunction(int nlhs, mxArray *plhs[], int nrhs, mxArray *prhs[])//定义C和MATLAB的接口
{
   // 鍒ゆ柇杈撳叆鍙傛暟涓暟鏄惁婊¤冻鏉′欢


   if (nrhs != 2)
       mexErrMsgTxt("Invaidnumber of input arguments");


   if (nlhs != 1)
       mexErrMsgTxt("Invalidnumber of outputs");


   // 鍒ゆ柇杈撳叆鍙傛暟鐨勭被鍨嬫槸鍚︽弧瓒虫潯锟?
  // if (!mxIsSingle(prhs[0]) !mxIsSingle(prhs[1]))
    //   mexErrMsgTxt("inputvector data type must be single");


   // 鑾峰彇杈撳叆鍙傛暟缁村害
   // mxGetM:寰楀埌杈撳叆鐭╅樀鐨勮锟?
   // mxGetN:寰楀埌杈撳叆鐭╅樀鐨勫垪锟?
   int numRowsA = (int)mxGetM(prhs[0]);//閭d箞prhs[0]鎸囧悜绗竴涓彉锟?
   int numColsA = (int)mxGetN(prhs[0]);
   int numRowsB = (int)mxGetM(prhs[1]);//prhs[1]鎸囧悜绗簩涓彉锟?
   int numColsB = (int)mxGetN(prhs[1]);


   // 鍒ゆ柇杈撳叆鍙傛暟缁村害鏄惁婊¤冻鏉′欢
   if (numRowsA != numRowsB || numColsA != numColsB)
       mexErrMsgTxt("Invalidsize. The sizes of two vectors must be same");


   int minSize = (numRowsA < numColsA) ? numRowsA : numColsA; 
   int maxSize = (numRowsA > numColsA) ? numRowsA : numColsA;
   int size=numRowsA*numColsA;
   if (minSize != 1)
       mexErrMsgTxt("Invalidsize. The vector must be one dimentional");


   //mxGetData 鑾峰彇鏁版嵁闃靛垪涓殑鏁版嵁
   double *A;// = (float*)mxGetPr(prhs[0]);
   double *B;// = (float*)mxGetPr(prhs[1]);
   A=mxGetPr(prhs[0]);
   B=mxGetPr(prhs[1]);


   plhs[0]=mxCreateDoubleMatrix(numRowsA,numColsA,mxREAL);
   //plhs[0]= mxCreateNumericMatrix(numRowsA,numColsB, mxSINGLE_CLASS, mxREAL);


   // 鑾峰彇杈撳嚭鍙傛暟鐨勬寚锟?
   double *C ;//= (double*)mxGetData(plhs[0]);
   // C=mxGetPr(plhs[0]);
   C=mxGetPr(plhs[0]); 


   addVectors(A, B, C, size);





你可能感兴趣的:(CUDA,MATLAB,C/C++)