CUDA 版本10.2
#define FLANN_USE_CUDA
#include
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include
#include
#include
#include
#include
//
void main()
{
int tmpDataSize = 1000;
float* tmpData(new float[tmpDataSize * 3]);
float* tmpDataq(new float[tmpDataSize * 3]);
int tmpNN = 4;
for (int iPt = 0; iPt < tmpDataSize; ++iPt)
{
tmpData[iPt * 3] = iPt;
tmpData[iPt * 3 + 1] = iPt + 1;
tmpData[iPt * 3 + 2] = iPt + 2;
}
for (int iPt = 0; iPt < tmpDataSize; ++iPt)
{
tmpDataq[iPt * 3] = iPt + 1;
tmpDataq[iPt * 3 + 1] = iPt + 1;
tmpDataq[iPt * 3 + 2] = iPt + 1;
}
std::cout << "tmpData 012: " << tmpData[0] << " " << tmpData[1] << " " << tmpData[2] << std::endl;
flann::Matrix dataset(tmpData, tmpDataSize, 3);
flann::Matrix query(tmpDataq, tmpDataSize, 3);
std::cout << "dataset 012: " << dataset[0][0] << " " << dataset[0][1] << " " << dataset[0][2] << std::endl;
thrust::host_vector data_host(tmpDataSize);
std::cout << "dataset.rows: " << dataset.rows << std::endl;
for (int i = 0; i < dataset.rows; i++)
{
data_host[i] = make_float4(dataset[i][0], dataset[i][1], dataset[i][2], 0);
//std::cout << dataset[i][0]<<" "<< dataset[i][1]<<" "<< dataset[i][2] << std::endl;
}
thrust::device_vector data_device = data_host;
float4 a = data_device[0];
std::cout<<"data:" << a.x << a.y << a.z < query_host(dataset.rows);
/* thrust::host_vector query_host;
query_host.resize(dataset.rows);*/
for (int i = 0; i < dataset.rows; i++)
{
query_host[i] = make_float4(query[i][0], query[i][1], query[i][2], 0);
// std::cout << "query:" << query[i][0] << " " << query[i][1] << " " << query[i][2] << std::endl;
}
thrust::device_vector query_device = query_host;
std::cout << "query:" << std::endl;
float4 q = query_device[0];
std::cout << q.x << q.y << q.z << q.w << std::endl;//0120
flann::Matrix data_device_matrix((float*)thrust::raw_pointer_cast(&data_device[0]), tmpDataSize, 3/*3D*/, 4 * sizeof(float)/*stride*/);
flann::Matrix query_device_matrix((float*)thrust::raw_pointer_cast(&query_device[0]), tmpDataSize, 3, 4 * sizeof(float));
if (query_device_matrix.ptr() == nullptr || data_device_matrix.ptr() == nullptr)
std::cout << "NULL" << std::endl;
else
std::cout << "NOT NULL PTR" << std::endl;
int* indices = new int[query.rows * tmpNN];
float* dists = new float[query.rows * tmpNN];
//malloc or new
flann::Matrix cpu_indices(indices, query.rows, tmpNN);
flann::Matrix cpu_dists(dists, query.rows, tmpNN);
//
flann::KDTreeCuda3dIndexParams index_params;
index_params["input_is_gpu_float4"] = true;
/*flann::Index > tmpGPUIndex(data_device_matrix, index_params);*/
#ifdef UNEXPECTEDLY_FINISHED_CRASH
flann::Index > index(data_device_matrix, index_params);
#else
flann::KDTreeCuda3dIndex /*flann::L2*/ > GPUIndex(data_device_matrix, index_params);
#endif
//Build KdTree
GPUIndex.buildIndex();
thrust::device_vector indices_device(query.rows * tmpNN);
thrust::device_vector dists_device(query.rows * tmpNN);
flann::Matrix indices_device_matrix((int*)thrust::raw_pointer_cast(&indices_device[0]), tmpDataSize, tmpNN);
flann::Matrix dists_device_matrix((float*)thrust::raw_pointer_cast(&dists_device[0]), tmpDataSize, tmpNN);
cpu_indices.cols = tmpNN;
cpu_dists.cols = tmpNN;
flann::SearchParams sp;
sp.matrices_in_gpu_ram = true;
GPUIndex.knnSearch(query_device_matrix, indices_device_matrix, dists_device_matrix, tmpNN, sp);
/* int* indices = new int[tmpDataSize * tmpNN];
float* dists = new float[tmpDataSize * tmpNN];*/
flann::Matrix indices_host(new int[query.rows * tmpNN], query.rows, tmpNN);
flann::Matrix dists_host(new float[query.rows * tmpNN], query.rows, tmpNN);
printf("size: %d", dists_device.size());
std::cout << "dist:" << std::endl;
float d0 = dists_device[tmpDataSize - 1];
int i0 = indices_device[tmpDataSize - 1];
printf("d0 i0: %f ,%d\n", d0, i0);
std::cout << "size: " << dists_device.size() << std::endl;
std::vector dis_vector(dists_device.size());
std::vector ind_vector(indices_device.size());
thrust::copy(dists_device.begin(), dists_device.end(), dis_vector.begin());
thrust::copy(indices_device.begin(), indices_device.end(), ind_vector.begin());
thrust::copy(dists_device.begin(), dists_device.end(), dists_host.ptr());
thrust::copy(indices_device.begin(), indices_device.end(), indices_host.ptr());
for (int i = 0; i < tmpDataSize; i++)
for (int j = 0; j < tmpNN; j++)
{
printf("dist_host: %f\n", dis_vector[tmpNN * i + j]);
printf("indices_host: %d\n", ind_vector[tmpNN * i + j]);
/* std::cout << "dist_host[" << i << "]" << "[" << j << "]:" << dists_host[i][j] << std::endl;
std::cout << "indices: " << indices_host[i][j] << std::endl;*/
}
// std::cout << "dist_vec[0].size: "<
参考:FLANN快速近似最邻近算法官方指导文档_oxidane-lin的博客-CSDN博客FLANN快速近似最邻近算法官方指导文档IntroductionQuick Start 快速学习Downloading and compiling FLANN 下载和编译Upgrading from a previous version 版本升级Compiling FLANN with multithreading support 多线程支持Using FLANN 使用细则Using FLANN from C++flann::Indexflann::Index::buildIndexflann::Indexhttps://blog.csdn.net/weixin_45687825/article/details/110881552
https://www.pclcn.org/bbs/forum.php?mod=viewthread&tid=2166https://www.pclcn.org/bbs/forum.php?mod=viewthread&tid=2166