openCL-矩阵相乘

这是AMD异构大赛发的书上的例子,我自己加了一些东西,实现了一下。不过还没有学会如何分析运行的时间,先贴上代码

这是第一次用amd APP KernelAnalyzer感觉还可以吧,

simpleMultiply.cl

// Enter your kernel in this window
__kernel                                         
void simpleMultiply(__global float* outPutC,             
int widthA,                                   
int heightA,                                     
int widthB ,                                     
int heightB ,                                    
 __global float* inputA ,                        
__global  float* inputB                          
 )                                               
{                                                
   int row = get_global_id(1);                    
   int col = get_global_id(0);                      
   float sum = 0.0f  ;                             
   for(int i=0;i

main.cpp

/*
   项目:openCL的矩阵相乘
   作者:刘荣
   时间:2012.11.20
*/
#include 
#include
#include  
#include
#include 
#include 
#include 
using namespace std;
//kernel函数
std::string
convertToString(const char *filename)//将kernel源码,即自己写的并行化的函数,转化成字符串
{
    size_t size;
    char*  str;
    std::string s;

    std::fstream f(filename, (std::fstream::in | std::fstream::binary));

    if(f.is_open())
    {
        size_t fileSize;
        f.seekg(0, std::fstream::end);
        size = fileSize = (size_t)f.tellg();
        f.seekg(0, std::fstream::beg);

        str = new char[size+1];
        if(!str)
        {
            f.close();
            std::cout << "Memory allocation failed";
            return NULL;
        }

        f.read(str, fileSize);
        f.close();
        str[size] = '\0';
    
        s = str;
        delete[] str;
        return s;
    }
    else
    {
        std::cout << "\nFile containg the kernel code(\".cl\") not found. Please copy the required file in the folder containg the executable.\n";
        exit(1);
    }
    return NULL;
}

int main()
{
	double start,end,time1,time2;
	//查询平台
	cl_int ciErrNum;
	cl_platform_id platform;
	ciErrNum = clGetPlatformIDs(1, &platform, NULL);
	if(ciErrNum != CL_SUCCESS)
	{
		cout<<"获取设备失败"<


 

你可能感兴趣的:(并行计算)