使用CUDA判断是否为素数 代码稍有改动

使用CUDA判断是否为素数
使用CUDA判断是否为素数程序,每个线程判断一个数是否可以被整除,将每线程判断结果写入shared memory内,然后统计结果,如果全部不能被整除,那就是素数,代码如下

使用CUDA判断是否为素数程序,每个线程判断一个数是否可以被整除,将每线程判断结果写入shared memory内,然后统计结果,如果全部不能被整除,那就是素数,代码如下
 
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <cuda_runtime.h>
#include <vector_types.h>

#define    TEST_DATA        9999941
#define DATA_SIZE         1048576
#define MILLION            1000000
#define    GPU_FRQNC_K     918000
#define    THREAD_NUM        256
#define    BLOCK_NUM        32

long data[TEST_DATA];

void GenerateNumbers(long *number, int size) {
    for (int i = 0; i < size - 2; i++) {
        number[i] = i + 2;
    }
}

__global__ static void isPrime(long *num, bool *result, clock_t *time) {

    extern __shared__ bool shareMem[];
    const int blockID = blockIdx.x;
    const int threadID = threadIdx.x;
    int CHUNK_SIZE = BLOCK_NUM * THREAD_NUM;
    int i;
    int iOffset;

    if(0 == threadID){
        time[blockID] = clock();
    }
    shareMem[threadID] = true;
    for(i = (blockID * THREAD_NUM + threadID); i < TEST_DATA -2; i += CHUNK_SIZE){
        if((TEST_DATA % num[i]) == 0){
            shareMem[threadID] = false;
        }
    }
    __syncthreads();
    iOffset = THREAD_NUM / 2;
    while (iOffset > 0) {
        if (threadID < iOffset) {
            shareMem[threadID] &= shareMem[threadID + iOffset];
        }
        iOffset >>= 1;
        __syncthreads();
    }
    if(0 == threadID){
        result[blockID] = shareMem[0];
        time[blockID + BLOCK_NUM] = clock();
    }
}

int main() {
    struct timeval tBegin;
    struct timeval tEnd;
    double ftimeused = 0.0;
    bool sum[BLOCK_NUM];
    bool final_sum = false;
    long *gpudata;
    bool *result;
    clock_t *time;
    clock_t time_used[BLOCK_NUM * 2];
    clock_t time_begin_min, time_end_max;
    clock_t time_gpu_used;
    int i, j;

    GenerateNumbers(data, TEST_DATA);

    cudaMalloc((void**) &gpudata, sizeof(long) * TEST_DATA);
    cudaMalloc((void**) &result, sizeof(bool) * BLOCK_NUM);
    cudaMalloc((void**) &time, sizeof(time) * BLOCK_NUM * 2);
    cudaMemcpy(gpudata, data, sizeof(long) * TEST_DATA, cudaMemcpyHostToDevice);

    gettimeofday(&tBegin, NULL);

    isPrime<<<BLOCK_NUM, THREAD_NUM, sizeof(bool)*THREAD_NUM>>>(gpudata, result, time);
    cudaMemcpy(&sum, result, sizeof(bool) * BLOCK_NUM, cudaMemcpyDeviceToHost);
    cudaMemcpy(&time_used, time, sizeof(clock_t) * BLOCK_NUM * 2,
            cudaMemcpyDeviceToHost);

    final_sum = true;
    for (i = 0; i < BLOCK_NUM; i++) {
        final_sum &= sum[i];
    }

    gettimeofday(&tEnd, NULL);
    ftimeused = ((tEnd.tv_sec - tBegin.tv_sec) * MILLION + (tEnd.tv_usec
            - tBegin.tv_usec)) / (double) MILLION;

    cudaFree(gpudata);
    cudaFree(result);
    cudaFree(time);

    time_begin_min = time_used[0];
    time_end_max = time_used[BLOCK_NUM];
    for (i = 0, j = BLOCK_NUM; i < BLOCK_NUM; i++, j++) {
        if (time_begin_min > time_used[i]) {
            time_begin_min = time_used[i];
        }
        if (time_end_max < time_used[j]) {
            time_end_max = time_used[j];
        }
    }
    time_gpu_used = time_end_max - time_begin_min;

    const char *str_result_gpu = final_sum ? "TRUE" : "FALSE";
    printf("return(GPU): %s  timeused=%f s  GPU count\n", str_result_gpu,
            (time_gpu_used / (double) GPU_FRQNC_K) / 1000);
    printf("return(GPU): %s  timeused=%f s  GPU count\n", str_result_gpu,
            (time_gpu_used / (double) GPU_FRQNC_K) / 1000);
    printf("return(GPU): %s  timeused=%f s  CPU count\n", str_result_gpu, ftimeused);

    final_sum = true;
    gettimeofday(&tBegin, NULL);
    for (int i = 2; i < TEST_DATA - 2; i++) {
        if((TEST_DATA % i) == 0){
            final_sum = false;
        }
    }
    gettimeofday(&tEnd, NULL);
    const char *str_result_cpu = final_sum ? "TRUE" : "FALSE";
    ftimeused = ((tEnd.tv_sec - tBegin.tv_sec) * MILLION + (tEnd.tv_usec
            - tBegin.tv_usec)) / (double) MILLION;
    printf("return(CPU): %s  timeused=%f s CPU count\n", str_result_cpu, ftimeused);
}

你可能感兴趣的:(CUDA,职场,GPU,休闲)