求解随机构建大数组的最大最小值

问题描述:求解随机构建大数组的最大最小值_第1张图片

#include <stdio.h>
#include<iostream>
#include <time.h>
using namespace std;
 //求R的模长
float length(float *a)
{
	float sum = 0;
	for (int i = 0; i < 8192; i++) {
		sum += a[i] * a[i];
	}
	return sqrt(sum);
}
//最大值
float qmax(float **b) {
	float max = 0;
	for (int i = 0; i < 8192; i++)
		for (int j = 0; j < 8192; j++) {
			if (b[i][j] > max)max = b[i][j];
		}
	return max;
}
//最小值
float qmin(float **b) {
	float min = 0;
	for (int i = 0; i < 8192; i++)
		for (int j = 0; j < 8192; j++) {
			if (b[i][j] < min)min = b[i][j];
		}
	return min;
}
int main() {
	float *a;
	float max, min;
	float **b;
	clock_t start, stop;
	double duration;
	start = clock();

	a = (float*)malloc(8192 * sizeof(float));
	b = (float**)malloc(8192 * sizeof(float*));
	for (int i = 0; i < 8192; ++i)
	{
		b[i] = (float *)malloc(sizeof(float) * 8192);
	}
	for (int i = 0; i < 8192; i++)
	{
		a[i] = rand() % 41;
		//printf("%d\n",rand()%41);
	}
	float len=length(a);
	printf("||R||=%f\n", len);
	//将两个矩阵相乘后放入二维数组
	 for(int i=0;i<8192;i++)
	     for(int j=0;j<8192;j++){
	         b[i][j]=(float)a[i]*a[j]/len;
	     }
	//打印二维数组
	/* for (int i = 0; i < 10; i++)
	 {
		 for (int j = 0; j < 10; j++) {
			 printf("%f\t", b[i][j]);
		 }
		 printf("\n");

	  }*/
	 //求二维数组最大值与最小值
	 max = qmax(b);
	 min = qmin(b);
	 printf("----------------------------------------------\n");
	 printf("最终的最大值=%f\n",max);
	 printf("最终的最小值=%f\n", min);
	 stop = clock();
	 duration = ((double)(stop - start)) / CLK_TCK;
	 printf("cpu程序运行的时间=%lf", duration);
	return 0;
}

二.gpu实现

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include 
#include 
#include 
__global__ void  calcSum(float* AA, float* CC, int len)
{
	int Width = 8192;
	int Row = blockIdx.y * blockDim.y + threadIdx.y;
	int Col = blockIdx.x * blockDim.x + threadIdx.x;
	CC[Row * Width + Col] = (float)AA[Row] * AA[Col] / len;
	//CC[1] = 11;

}
__global__ void  maxMin(float* CC)
{
	int Width = 8192;
	unsigned int tid = threadIdx.x;
	int Row = blockIdx.x * blockDim.x + threadIdx.x;
	for (unsigned int stride = 33554432; stride > 0; stride >>= 1)
	{
		__syncthreads();
		if (Row< stride&& CC[Row + stride] > CC[Row])
			CC[Row] = CC[Row + stride];
	}
}
__global__ void  maxMin1(float* CC)
{
	int Width = 8192;
	unsigned int tid = threadIdx.x;
	int Row = blockIdx.x * blockDim.x + threadIdx.x;
	for (unsigned int stride = 33554432; stride > 0; stride >>= 1)
	{
		__syncthreads();
		if (Row < stride&& CC[Row + stride] < CC[Row])
			CC[Row] = CC[Row + stride];
	}
}
//求模长
float length(float *a)
{
	float sum = 0;
	for (int i = 0; i < 8192; i++) {
		sum += a[i] * a[i];
	}
	return sqrt(sum);
}
int main()
{
	cudaError_t cudaStatus = cudaSuccess;
	int szA = 8192;
	int szC = 8192 * 8192;
	float* A, *AA;
	float* C, *CC;
	clock_t start, stop;
	double duration;
	start = clock();
	A = (float*)malloc(szA * sizeof(float));
	C = (float*)malloc(szC * sizeof(float));
	int i, j;
	for (int i = 0; i < 8192; i++)
	{
		A[i] = rand() % 41;
		//printf("%d\n",rand()%41);
	}

	printf("\nArray A的结果:\n");
	for (i = 0; i < 8; i++) {
		printf("%f\t", A[i]);
		printf("\n");
	}
	cudaStatus = cudaMalloc((void**)&AA, szA * sizeof(float));
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMalloc1 failed!");
	}

	cudaStatus = cudaMalloc((void**)&CC, szC * sizeof(float));
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMalloc3 failed!");
	}

	// Copy input vectors from host memory to GPU buffers.
	cudaStatus = cudaMemcpy(AA, A, szA * sizeof(float), cudaMemcpyHostToDevice);
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMemcpy1 failed!");
	}

	float len = length(A);
	printf("len值:%f", len);
	dim3 dimGrid(256, 256, 1);
	dim3 dimBlock(32, 32, 1);
	calcSum << > > (AA, CC, len);

	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "calcSum failed!");
		return 1;
	}
	cudaStatus = cudaDeviceSynchronize();
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
	}
	cudaStatus = cudaMemcpy(C, CC, szC * sizeof(float), cudaMemcpyDeviceToHost);
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMemcpy failed!");
	}
	printf("\nArray C的结果:\n");
	for (i = 0; i < 8; i++) {
		printf("%f\t", C[i]);
		printf("\n");
	}
	//求最大值最小值过程
	cudaStatus = cudaMemcpy(CC, C, szC * sizeof(float), cudaMemcpyHostToDevice);
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMemcpy1 failed!");
	}
	dim3 dimGrid1 = 65536;
	dim3 dimBlock1 = 1024;
	maxMin << > > (CC);
	cudaStatus = cudaMemcpy(C, CC, szC * sizeof(float), cudaMemcpyDeviceToHost);
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMemcpy failed!");
	}
	printf("最大值\n%f\n", C[0]);
	//求最小值过程
	cudaStatus = cudaMemcpy(CC, C, szC * sizeof(float), cudaMemcpyHostToDevice);
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMemcpy1 failed!");
	}
	dim3 dimGrid2 = 65536;
	dim3 dimBlock2 = 1024;
	maxMin1 << > > (CC);
	cudaStatus = cudaMemcpy(C, CC, szC * sizeof(float), cudaMemcpyDeviceToHost);
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMemcpy failed!");
	}
	printf("最小值\n%f\n", C[0]);
	cudaFree(AA);
	cudaFree(CC);
	free(A);
	free(C);
	stop = clock();
	duration = ((double)(stop - start));
	printf("gpu程序运行的时间=%lf", duration);
	return 0;
}

试验结果:
求解随机构建大数组的最大最小值_第2张图片

你可能感兴趣的:(cuda,数组,cpu)