static void HoughFitPlanGPU(float* pXData, float* pYData, float* pZData, int count, PlanParameters& planParam)
{
concurrency::array_view xdata(count, pXData);
concurrency::array_view ydata(count, pYData);
concurrency::array_view zdata(count, pZData);
float miminor = 0.08f;
int aN = planParam.aN, bN = planParam.bN, cN = planParam.cN, dN = planParam.dN;
// for the special demand
float* aParam = new float[aN];
CreateParam(aParam, aN, -PI_FIT/4.0f, PI_FIT/4.0f);
float* bParam = new float[bN];
CreateParam(bParam, bN, -PI_FIT/4.0f, PI_FIT/4.0f);
//std::vector aParam(aN, 1.0);
//CreateParam(aParam, aN, -PI_FIT/4.0f, PI_FIT/4.0f);
//std::vector bParam(bN, 0.5);
//CreateParam(bParam, bN, -PI_FIT/4.0f, PI_FIT/4.0f);
float* dParam = new float[dN];
for ( int i = 0; i < dN; i++)
dParam[i] = (float)(i-13);
concurrency::array_view vaParam(aN, aParam);
concurrency::array_view vbParam(bN, bParam);
concurrency::array_view vdParam(dN, dParam);
std::vector voteRes( aN*bN*dN, 0);
concurrency::array_view voteData(aN,bN,dN,voteRes); // 特别注意,不能是int型,否则会把shader设置成为int还是怎么,影响到参数空间的值dParam也会变成int值
voteData.discard_data();
concurrency::parallel_for_each(
voteData.extent,
[=](concurrency::index<3> idx) restrict(amp)
{
float a = vaParam(idx[0]);
float b = vbParam(idx[1]);
float c = 1.0f;
float d = vdParam[idx[2]];
voteData[idx] = 0;
for ( int m = 0; m < count; m++)
{
float value = a * xdata[m] + b * ydata[m] + zdata[m] + d;
value = (value > 0)? value: -value;
if(value < miminor)
//voteData[idx] += d;
voteData[idx]++;
}
}
);
float max = 0;
for ( int i = 0; i < aN; i++)
{
for ( int j = 0; j< bN; j++)
{
for (int k = 0; k < dN; k++)
{
if(voteData(i,j,k) > max)
{
max = voteData(i,j,k);
planParam.a = aParam[i];
planParam.b = bParam[j];
planParam.c = 1.0f;
planParam.d = dParam[k];
}
}
}
}
max = max;
std::cout << max << std::endl;
}
在C++ AMP中主要有array和array_view这两种数据容器。这两者主要的区别在于array类型的数据在创建时会在GPU显存上拥有一个备份,在GPU对该数据进行完运算之后,开发者必须手动将数据拷贝回CPU。与之相比,array_view其实是一个数据结构的封装,只有在它指向的数据被GPU调用时才会被拷贝到GPU上进行相应的计算。从下例中我们看到,声明array_view数据时需要提供两个模板参数:array_view元素的类型和数据结构的纬度。因为pXData,pYData和pZData都是一维数组,因此我们在声明时传入const float和1两个参数。其他所遇到的情况以此类推即可。【说明摘自程序员杂志第四期】
参数空间的设计可以根据自己的需求而定,因为在我的试验里,发现在z轴上的分布为40 - 60cm,所以我考虑将z的参数定为单位值,参数空间为a、b、d(点到平面的距离),a、b为-1到1的等差采样,采样率可以根据精度需求进行手动指定(优化选择当然更好,如果要把这个功能集成到现在的项目里可以会考虑做这一步)。
为了和GPU进行检查和对比,实现了CPU进行平面检测,在鉴定GPU效果良好后,对其进行改进,采用了迭代的方法提高检测进度。
static void HoughFitPlanCPU(float* pXData, float* pYData, float* pZData, int count, PlanParameters& planParam)
{
float miminor = 0.01f;
int aN = planParam.aN, bN = planParam.bN, cN = planParam.cN, dN = planParam.dN;
// for the special demand
float band = 2.0f;
int iter = 6;
float* aParam = new float[aN];
float* bParam = new float[bN];
float* dParam = new float[dN];
for ( int i = 0; i < dN; i++)
dParam[i] = (float)(i-13);
while ( iter-- >0)
{
band = band / 2.0f;
CreateParam(aParam, aN, planParam.a - band, planParam.a + band);
CreateParam(bParam, bN, planParam.b - band, planParam.b + band);
//std::ofstream outfile("data3.txt");
float miniset = 10000.0f, dx, dy, dz;
std::vector voteRes( aN*bN*dN, 0);
//int*** vote = new int**[dN];
for ( int i = 0; i < aN; i++)
{
//vote[i] = new int*[bN];
for ( int j = 0; j < bN; j++)
{
//vote[i][j] = new int[dN];
for (int k = 0; k < dN; k++)
{
long idx = i*bN*dN + j*dN + k;
float a = aParam[i];
float b = bParam[j];
float c = 1.0f;
float d = dParam[k];
float normal = (a*a + b*b + c*c + d*d);
//vote[i][j][k] = 0;
voteRes[idx] = 0;
for (int m = 0; m < count; m++)
{
float x = pXData[m], y = pYData[m], z = pZData[m];
float value = a*x + b*y + c*z + d;
value = (value >0)?value:(-value);
//value = value*value / 10000.0f;// / (x*x + y*y + z*z)/ normal;
if( value < miminor)
//voteRes[idx] += d;
voteRes[idx]++;
//vote[i][j][k]++;
}
//outfile << vote[i][j][k] <<"\t";
}
//outfile << std::endl;
}
}
//outfile.close();
float max = -0.01;
for ( int i = 0; i < aN; i++)
{
for ( int j = 0; j < bN; j++)
{
for (int k = 0; k < dN; k++)
{
long idx = i*bN*dN + j*dN + k;
if ( voteRes[idx] > max) //vote[i][j][k] > max)
{
max = voteRes[idx]; //vote[i][j][k];
planParam.a = aParam[i];
planParam.b = bParam[j];
planParam.d = dParam[k];
}
}
}
}
std::cout << max << std::endl;
}
}
测试代码:
int _tmain(int argc, _TCHAR* argv[])
{
// 0.457x + 0.787y + z - 9 = 0;
int count = 80, rand_count = 10;
float* XData = new float[count];
float* YData = new float[count];
float* ZData = new float[count];
for ( int i = 0; i
CPU递归迭代能找到更好的逼近结果,可考虑换成GPU进行迭代在降低算法复杂度。
【由于这段时间太多,写得很粗糙,请多多见谅】
http://www.cnblogs.com/yxy8023ustc/archive/2012/12/09/2809404.html