Cuda束表决函数(warp vote)

CUDA束表决函数

束表决函数:简单的理解就是在一个warp内进行表决


__all(int predicate):指的是predicate与0进行比较,如果当前线程所在的Wrap所有线程对应predicate不为0,则返回1。

__any(int predicate):指的是predicate与0进行比较,如果当前线程所在的Wrap有一个线程对应的predicate值不为0,则返回1。

__ballot(int predicate):指的是当前线程所在的Wrap中第N个线程对应的predicate值不为0,则将整数0的第N位进行置位。

 

//
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "device_functions.h"
#include <stdio.h>
 
__global__ void vote_all(int *a, int *b, int n)
{
    int tid = threadIdx.x;
    if (tid > n)
    {
       return;
    }
    int temp = a[tid];
    b[tid] = __all(temp >100);
}
 
__global__ void vote_any(int *a, int *b, int n)
{
    int tid = threadIdx.x;
    if (tid > n)
    {
       return;
    }
    int temp = a[tid];
    b[tid] = __any(temp >100);
}
 
__global__ void vote_ballot(int *a, int *b, int n)
{
    int tid = threadIdx.x;
    if (tid > n)
    {
       return;
    }
    int temp = a[tid];
    b[tid] = __ballot(temp >100);
}
 
int main()
{
    int *h_a, *h_b, *d_a, *d_b;
    int n = 256, m = 10;
    int nsize = n * sizeof(int);
    h_a = (int *)malloc(nsize);
    h_b = (int *)malloc(nsize);
    for (int i = 0; i < n; ++i)
    {
       h_a[i] = i;
    }
    memset(h_b, 0, nsize);
    cudaMalloc(&d_a, nsize);
    cudaMalloc(&d_b, nsize);
    cudaMemcpy(d_a, h_a, nsize, cudaMemcpyHostToDevice);
    cudaMemset(d_b, 0, nsize);
    vote_all<< <1, 256 >> >(d_a, d_b, n);
    cudaMemcpy(h_b, d_b, nsize, cudaMemcpyDeviceToHost);
    printf("vote_all():");
    for (int i = 0; i < n; ++i)
    {
       if (!(i % m))
       {
           printf("\n");
       }
       printf("%d", h_b[i]);
    }
    printf("\n");
    vote_any<<<1, 256 >> >(d_a, d_b, n);
    cudaMemcpy(h_b, d_b, nsize, cudaMemcpyDeviceToHost);
    printf("vote_any():");
    for (int i = 0; i < n; ++i)
    {
       if (!(i % m))
       {
           printf("\n");
       }
       printf("%d", h_b[i]);
    }
    printf("\n");
    vote_ballot<< <1, 256 >> >(d_a, d_b, n);
    cudaMemcpy(h_b, d_b, nsize, cudaMemcpyDeviceToHost);
    printf("vote_ballot():");
    for (int i = 0; i < n; ++i)
    {
       if (!(i % m))
       {
           printf("\n");
       }
       printf("%d", h_b[i]);
    }
    printf("\n");
}


#include <iostream>
 
using namespace std;
 
int main()
{
    int state = 0;
    int start = 10;
    for (int i = start; i <32; ++i)
    {
       state |= (1<< i);
    }
    cout<< state<< endl;
}


 

置位可以用或操作符“|”实现:y = x | (1 << n)  对x的第n位进行置位

清楚可以用与操作符”&“实现:y = x & (~(1 << n))

取反可以用异或操作符”^“实现: y = x ^ (1 << n)

Bit提取操作: bit = (x | (1 << n)) >> n;

你可能感兴趣的:(线程,CUDA,vote,warp,warp)