FLANN GPU 版本测试

转载请注明出处!

CUDA 版本10.2

#define FLANN_USE_CUDA
#include 
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include
#include 

#include 
#include 
#include 
//
void main()
{
    int tmpDataSize = 1000;
    float* tmpData(new float[tmpDataSize * 3]);
    float* tmpDataq(new float[tmpDataSize * 3]);
    int tmpNN = 4;

    for (int iPt = 0; iPt < tmpDataSize; ++iPt)
    {
        tmpData[iPt * 3] = iPt;
        tmpData[iPt * 3 + 1] = iPt + 1;
        tmpData[iPt * 3 + 2] = iPt + 2;
    }
    for (int iPt = 0; iPt < tmpDataSize; ++iPt)
    {
        tmpDataq[iPt * 3] = iPt + 1;
        tmpDataq[iPt * 3 + 1] = iPt + 1;
        tmpDataq[iPt * 3 + 2] = iPt + 1;
    }

    std::cout << "tmpData 012: " << tmpData[0] << " " << tmpData[1] << " " << tmpData[2] << std::endl;
    flann::Matrix dataset(tmpData, tmpDataSize, 3);
    flann::Matrix query(tmpDataq, tmpDataSize, 3);
    std::cout << "dataset 012: " << dataset[0][0] << " " << dataset[0][1] << " " << dataset[0][2] << std::endl;

    thrust::host_vector data_host(tmpDataSize);

    std::cout << "dataset.rows: " << dataset.rows << std::endl;
    for (int i = 0; i < dataset.rows; i++)
    {

        data_host[i] = make_float4(dataset[i][0], dataset[i][1], dataset[i][2], 0);
        //std::cout << dataset[i][0]<<" "<< dataset[i][1]<<" "<< dataset[i][2] << std::endl;
    }
    thrust::device_vector data_device = data_host;
    float4 a = data_device[0];
    std::cout<<"data:" << a.x << a.y << a.z < query_host(dataset.rows);
    /* thrust::host_vector query_host;
     query_host.resize(dataset.rows);*/
    for (int i = 0; i < dataset.rows; i++)
    {
        query_host[i] = make_float4(query[i][0], query[i][1], query[i][2], 0);
       // std::cout << "query:" << query[i][0] << " " << query[i][1] << " " << query[i][2] << std::endl;
    }
    thrust::device_vector query_device = query_host;
    std::cout << "query:" << std::endl;
    float4 q = query_device[0];
    std::cout << q.x << q.y << q.z << q.w << std::endl;//0120

    flann::Matrix data_device_matrix((float*)thrust::raw_pointer_cast(&data_device[0]), tmpDataSize, 3/*3D*/, 4 * sizeof(float)/*stride*/);
    flann::Matrix query_device_matrix((float*)thrust::raw_pointer_cast(&query_device[0]), tmpDataSize, 3, 4 * sizeof(float));
    if (query_device_matrix.ptr() == nullptr || data_device_matrix.ptr() == nullptr)
        std::cout << "NULL" << std::endl;
    else
        std::cout << "NOT NULL PTR" << std::endl;


    int* indices = new int[query.rows * tmpNN];
    float* dists = new float[query.rows * tmpNN];
    //malloc or new
    flann::Matrix cpu_indices(indices, query.rows, tmpNN);
    flann::Matrix cpu_dists(dists, query.rows, tmpNN);
    //
    flann::KDTreeCuda3dIndexParams index_params;
    index_params["input_is_gpu_float4"] = true;
    /*flann::Index > tmpGPUIndex(data_device_matrix, index_params);*/
#ifdef UNEXPECTEDLY_FINISHED_CRASH
    flann::Index > index(data_device_matrix, index_params);
#else
    flann::KDTreeCuda3dIndex /*flann::L2*/ > GPUIndex(data_device_matrix, index_params);
#endif


    //Build KdTree
    GPUIndex.buildIndex();


    thrust::device_vector indices_device(query.rows * tmpNN);
    thrust::device_vector dists_device(query.rows * tmpNN);



    flann::Matrix indices_device_matrix((int*)thrust::raw_pointer_cast(&indices_device[0]), tmpDataSize, tmpNN);
    flann::Matrix dists_device_matrix((float*)thrust::raw_pointer_cast(&dists_device[0]), tmpDataSize, tmpNN);

    cpu_indices.cols = tmpNN;
    cpu_dists.cols = tmpNN;
    flann::SearchParams sp;
    sp.matrices_in_gpu_ram = true;

    GPUIndex.knnSearch(query_device_matrix, indices_device_matrix, dists_device_matrix, tmpNN, sp);


    /* int* indices = new int[tmpDataSize * tmpNN];
     float* dists = new float[tmpDataSize * tmpNN];*/

    flann::Matrix indices_host(new int[query.rows * tmpNN], query.rows, tmpNN);
    flann::Matrix dists_host(new float[query.rows * tmpNN], query.rows, tmpNN);

    printf("size: %d", dists_device.size());
    std::cout << "dist:" << std::endl;
    float d0 = dists_device[tmpDataSize - 1];
    int i0 = indices_device[tmpDataSize - 1];
    printf("d0 i0: %f ,%d\n", d0, i0);
    std::cout << "size: " << dists_device.size() << std::endl;


    std::vector dis_vector(dists_device.size());
    std::vector ind_vector(indices_device.size());
    thrust::copy(dists_device.begin(), dists_device.end(), dis_vector.begin());
    thrust::copy(indices_device.begin(), indices_device.end(), ind_vector.begin());


    thrust::copy(dists_device.begin(), dists_device.end(), dists_host.ptr());
    thrust::copy(indices_device.begin(), indices_device.end(), indices_host.ptr());


    for (int i = 0; i < tmpDataSize; i++)
        for (int j = 0; j < tmpNN; j++)
        {
            printf("dist_host: %f\n", dis_vector[tmpNN * i + j]);
            printf("indices_host: %d\n", ind_vector[tmpNN * i + j]);
            /*  std::cout << "dist_host[" << i << "]" << "[" << j << "]:" << dists_host[i][j] << std::endl;
              std::cout << "indices: " << indices_host[i][j] << std::endl;*/
        }
    // std::cout << "dist_vec[0].size: "<

参考:FLANN快速近似最邻近算法官方指导文档_oxidane-lin的博客-CSDN博客FLANN快速近似最邻近算法官方指导文档IntroductionQuick Start 快速学习Downloading and compiling FLANN 下载和编译Upgrading from a previous version 版本升级Compiling FLANN with multithreading support 多线程支持Using FLANN 使用细则Using FLANN from C++flann::Indexflann::Index::buildIndexflann::Indexhttps://blog.csdn.net/weixin_45687825/article/details/110881552
https://www.pclcn.org/bbs/forum.php?mod=viewthread&tid=2166icon-default.png?t=L892https://www.pclcn.org/bbs/forum.php?mod=viewthread&tid=2166

你可能感兴趣的:(计算机视觉,C++,算法)