CUDA束表决函数
束表决函数:简单的理解就是在一个warp内进行表决
__all(int predicate):指的是predicate与0进行比较,如果当前线程所在的Wrap所有线程对应predicate不为0,则返回1。
__any(int predicate):指的是predicate与0进行比较,如果当前线程所在的Wrap有一个线程对应的predicate值不为0,则返回1。
__ballot(int predicate):指的是当前线程所在的Wrap中第N个线程对应的predicate值不为0,则将整数0的第N位进行置位。
// #include "cuda_runtime.h" #include "device_launch_parameters.h" #include "device_functions.h" #include <stdio.h> __global__ void vote_all(int *a, int *b, int n) { int tid = threadIdx.x; if (tid > n) { return; } int temp = a[tid]; b[tid] = __all(temp >100); } __global__ void vote_any(int *a, int *b, int n) { int tid = threadIdx.x; if (tid > n) { return; } int temp = a[tid]; b[tid] = __any(temp >100); } __global__ void vote_ballot(int *a, int *b, int n) { int tid = threadIdx.x; if (tid > n) { return; } int temp = a[tid]; b[tid] = __ballot(temp >100); } int main() { int *h_a, *h_b, *d_a, *d_b; int n = 256, m = 10; int nsize = n * sizeof(int); h_a = (int *)malloc(nsize); h_b = (int *)malloc(nsize); for (int i = 0; i < n; ++i) { h_a[i] = i; } memset(h_b, 0, nsize); cudaMalloc(&d_a, nsize); cudaMalloc(&d_b, nsize); cudaMemcpy(d_a, h_a, nsize, cudaMemcpyHostToDevice); cudaMemset(d_b, 0, nsize); vote_all<< <1, 256 >> >(d_a, d_b, n); cudaMemcpy(h_b, d_b, nsize, cudaMemcpyDeviceToHost); printf("vote_all():"); for (int i = 0; i < n; ++i) { if (!(i % m)) { printf("\n"); } printf("%d", h_b[i]); } printf("\n"); vote_any<<<1, 256 >> >(d_a, d_b, n); cudaMemcpy(h_b, d_b, nsize, cudaMemcpyDeviceToHost); printf("vote_any():"); for (int i = 0; i < n; ++i) { if (!(i % m)) { printf("\n"); } printf("%d", h_b[i]); } printf("\n"); vote_ballot<< <1, 256 >> >(d_a, d_b, n); cudaMemcpy(h_b, d_b, nsize, cudaMemcpyDeviceToHost); printf("vote_ballot():"); for (int i = 0; i < n; ++i) { if (!(i % m)) { printf("\n"); } printf("%d", h_b[i]); } printf("\n"); }
#include <iostream> using namespace std; int main() { int state = 0; int start = 10; for (int i = start; i <32; ++i) { state |= (1<< i); } cout<< state<< endl; }
置位可以用或操作符“|”实现:y = x | (1 << n) 对x的第n位进行置位
清楚可以用与操作符”&“实现:y = x & (~(1 << n))
取反可以用异或操作符”^“实现: y = x ^ (1 << n)
Bit提取操作: bit = (x | (1 << n)) >> n;