win+cuda +vs 新CUDA项目配置

step1:新建一个项目(可空)
step2:右键项目 → 属性 → 配置管理器 → 全改为“x64”
step3:右键项目 → 生成依赖项 → 生成自定义 → 勾选“CUDA 9.0XXX”
win+cuda +vs 新CUDA项目配置_第1张图片
step4:右键项目 → 属性 → C/C++ → 附加包含目录增加:
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\include
I:\cudnn\include

win+cuda +vs 新CUDA项目配置_第2张图片step5:右键项目 → 属性 → 链接器→ 输入→附加依赖项增加:
cublas.lib
curand.lib
cudart.lib
测试代码:

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include 

int main() {
    int deviceCount;
    cudaGetDeviceCount(&deviceCount);

    int dev;
    for (dev = 0; dev < deviceCount; dev++)
    {
        int driver_version(0), runtime_version(0);
        cudaDeviceProp deviceProp;
        cudaGetDeviceProperties(&deviceProp, dev);
        if (dev == 0)
            if (deviceProp.minor = 9999 && deviceProp.major == 9999)
                printf("\n");
        printf("\nDevice%d:\"%s\"\n", dev, deviceProp.name);
        cudaDriverGetVersion(&driver_version);
        printf("CUDA驱动版本:                                   %d.%d\n", driver_version / 1000, (driver_version % 1000) / 10);
        cudaRuntimeGetVersion(&runtime_version);
        printf("CUDA运行时版本:                                 %d.%d\n", runtime_version / 1000, (runtime_version % 1000) / 10);
        printf("设备计算能力:                                   %d.%d\n", deviceProp.major, deviceProp.minor);
        printf("Total amount of Global Memory:                  %u bytes\n", deviceProp.totalGlobalMem);
        printf("Number of SMs:                                  %d\n", deviceProp.multiProcessorCount);
        printf("Total amount of Constant Memory:                %u bytes\n", deviceProp.totalConstMem);
        printf("Total amount of Shared Memory per block:        %u bytes\n", deviceProp.sharedMemPerBlock);
        printf("Total number of registers available per block:  %d\n", deviceProp.regsPerBlock);
        printf("Warp size:                                      %d\n", deviceProp.warpSize);
        printf("Maximum number of threads per SM:               %d\n", deviceProp.maxThreadsPerMultiProcessor);
        printf("Maximum number of threads per block:            %d\n", deviceProp.maxThreadsPerBlock);
        printf("Maximum size of each dimension of a block:      %d x %d x %d\n", deviceProp.maxThreadsDim[0],
            deviceProp.maxThreadsDim[1],
            deviceProp.maxThreadsDim[2]);
        printf("Maximum size of each dimension of a grid:       %d x %d x %d\n", deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], deviceProp.maxGridSize[2]);
        printf("Maximum memory pitch:                           %u bytes\n", deviceProp.memPitch);
        printf("Texture alignmemt:                              %u bytes\n", deviceProp.texturePitchAlignment);
        printf("Clock rate:                                     %.2f GHz\n", deviceProp.clockRate * 1e-6f);
        printf("Memory Clock rate:                              %.0f MHz\n", deviceProp.memoryClockRate * 1e-3f);
        printf("Memory Bus Width:                               %d-bit\n", deviceProp.memoryBusWidth);
    }

    return 0;
}

win+cuda +vs 新CUDA项目配置_第3张图片

test2

#include "cuda_runtime.h" 
#include< stdio.h>
#include "device_launch_parameters.h" 

bool InitCUDA()

{

    int count;

    cudaGetDeviceCount(&count);

    if (count == 0)

    {

        fprintf(stderr, "There is no device.\n");

        return false;

    }

    int i;

    for (i = 0; i < count; i++)

    {

        cudaDeviceProp prop;

        if (cudaGetDeviceProperties(&prop, i) == cudaSuccess)

        {

            if (prop.major >= 1)

            {

                break;

            }

        }

    }

    if (i == count)

    {

        fprintf(stderr, "There is no device supporting CUDA 1.x.\n");

        return false;

    }

    cudaSetDevice(i);

    return true;

}



int main()

{

    if (!InitCUDA())

    {

        return 0;

    }

    printf("HelloWorld, CUDA has been initialized.\n");


    return 0;

}

你可能感兴趣的:(C++)