G++与NVCC混合编译

本文给出了一个使用G++与NVCC混合编译的例子,该例子用于向量的相加。

CPP文件

/**
 * @file    vectorAdd.cpp
 * @date    Thu 22 Aug 2019 09:37:03 AM CST
 ******************************************************************************/

#include 
using namespace std;

const int N = 1024;

extern "C"
void vectorAdd(int *a, int *b, int *c, const int n);

int main(int argc, char const* argv[])
{
    int a[N];
    int b[N];
    int c[N];

    for (int i = 0; i < N; i++) {
        a[i] = 2 * i;
        b[i] = 1;
        c[i] = 0;
    }

    vectorAdd(a, b, c, N);

    for (int i = 0; i < N; i++) {
        cout << c[i] << "\t";
    }
    cout << endl;

    return 0;
}

CU文件

/**
 * @file    vectorAdd.cu
 * @date    Thu 22 Aug 2019 09:36:58 AM CST
 ******************************************************************************/

#include 
#include 
#include 

__global__ void vectorAddKernel(int *da, int *db, int *dc, const int n) 
{
    unsigned int i = blockDim.x * blockIdx.x + threadIdx.x;

    if (i < n) {
        dc[i] = da[i] + db[i];
    }
}

extern "C"
void vectorAdd(int *a, int *b, int *c, const int n)
{
    int *da = NULL, *db = NULL, *dc = NULL;

    cudaMalloc((void**)(&da), n * sizeof(int));
    cudaMalloc((void**)(&db), n * sizeof(int));
    cudaMalloc((void**)(&dc), n * sizeof(int));

    cudaMemcpy(da, a, n * sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(db, b, n * sizeof(int), cudaMemcpyHostToDevice);

    const int nt = 256;
    const int nb = (n + nt - 1) / nt;

    vectorAddKernel<<<nt, nb>>>(da, db, dc, n);

    cudaMemcpy(c, dc, n * sizeof(int), cudaMemcpyDeviceToHost);

    cudaFree(da);
    cudaFree(db);
    cudaFree(dc);
}

编译

g++ vectorAdd.cpp -c -o vectorAdd.cpp.o
nvcc vectorAdd.cu -c -o vectorAdd.cu.o
g++ -o vectorAdd vectorAdd.cpp.o vectorAdd.cu.o -lcudart -L/path/to/cuda/lib64

你可能感兴趣的:(C++,C++)