GPU编程 CUDA C++ 线性代数求解器 cuSolver库

cuSolver库较cuBLAS库更为高级,其能处理矩阵求逆,矩阵对角化,矩阵分解,特征值计算等问题。cuSolver库的实现是基于cuBLAS库和cuSPARSE库这两个基本库。cuSolver库的功能类似于Fortran中的LAPACK库:是Linear Algebra PACKage的简称。

以下以一个厄米矩阵的本征值(特征值)问题,代码示例cusolver.cu:

#include "error.cuh" 
#include 
#include 
#include    //必须要用的头文件

int main(void)
{
    int N = 2;
    int N2 = N * N;

    cuDoubleComplex *A_cpu = (cuDoubleComplex *)   //定义一个cuDoubleComplex类型的双精度复数的主机数组A_cpu
        malloc(sizeof(cuDoubleComplex) * N2);      //分配一个大小为 N*N 的一维主机线性内存
    for (int n = 0; n < N2; ++n) 
    {
        A_cpu[0].x = 0;
        A_cpu[1].x = 0;
        A_cpu[2].x = 0;
        A_cpu[3].x = 0;
        A_cpu[0].y = 0; 
        A_cpu[1].y = 1;    //复数元素0+i
        A_cpu[2].y = -1;   //复数元素0-i
        A_cpu[3].y = 0;
    }
    cuDoubleComplex *A;
    CHECK(cudaMalloc((void**)&A, sizeof(cuDoubleComplex) * N2));   //分配GPU显存
    CHECK(cudaMemcpy(A, A_cpu, sizeof(cuDoubleComplex) * N2,       //从主机A_cpu数组传到GPU数组A
        cudaMemcpyHostToDevice));

    double *W_cpu = (double*) malloc(sizeof(double) * N);    //定义保存矩阵本征值的主机数组W_cpu
    double *W; 
    CHECK(cudaMalloc((void**)&W, sizeof(double) * N));  //定义保存矩阵本征值的GPU数组W

    cusolverDnHandle_t handle = NULL;   //定义cusolverDnHandle类型的句柄
    cusolverDnCreate(&handle);     //初始化
    cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_NOVECTOR;   //仅计算本征值不计算本征矢
    cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER;   //用下三角矩阵填充

    int lwork = 0;
    cusolverDnZheevd_bufferSize(handle, jobz, uplo,   //计算缓冲空间的大小
        N, A, N, W, &lwork);
    cuDoubleComplex* work;
    CHECK(cudaMalloc((void**)&work, 
        sizeof(cuDoubleComplex) * lwork));

    int* info;
    CHECK(cudaMalloc((void**)&info, sizeof(int)));
    cusolverDnZheevd(handle, jobz, uplo, N, A, N, W,   //计算矩阵A的本征值W
        work, lwork, info);
    cudaMemcpy(W_cpu, W, sizeof(double) * N,    //存到主机W_cpu中
        cudaMemcpyDeviceToHost);

    printf("Eigenvalues are:\n");   //打印结果
    for (int n = 0; n < N; ++n)
    {
        printf("%g\n", W_cpu[n]);
    }

    cusolverDnDestroy(handle);   //销毁句柄

    free(A_cpu);    //释放主机内存
    free(W_cpu);
    CHECK(cudaFree(A));   //释放GPU显存
    CHECK(cudaFree(W));
    CHECK(cudaFree(work));
    CHECK(cudaFree(info));

    return 0;
}

头文件error.cuh为错误检查宏CHECK函数:

#pragma once
#include 

#define CHECK(call)                                   \
{                                                     \
    const cudaError_t error_code = call;              \
    if (error_code != cudaSuccess)                    \
    {                                                 \
        printf("CUDA Error:\n");                      \
        printf("    File:       %s\n", __FILE__);     \
        printf("    Line:       %d\n", __LINE__);     \
        printf("    Error code: %d\n", error_code);   \
        printf("    Error text: %s\n",                \
            cudaGetErrorString(error_code));          \
        exit(1);                                      \
    }                                                 \
}

编译运行:

$ nvcc cusolver.cu -o cusolver
$ ./cusolver

你可能感兴趣的:(线性代数,矩阵,算法,c++,c语言)