MIC学习

头文件

编译指令:icc -o test test.c(默认编译成mic程序)

设置线程数量:

程序运行之前在命令行输入:

export num_threads=240

offload模式(主从模式):

1.变量/函数声明:

#pragama offload_attribute(push, target(mic))

......

#pragma offload_attribute(pop)

2.上传变量:

#pragma offload target(mic) in(......)out(...)

3.判断是否有MIC卡可用:

#ifdef  __MIC__

#else ...

#endif

4.在函数定义之前需要加上:

__attribute__(target(mic))

以表明函数的类型。

向量化:

数据类型:__m512 Data Type

如:__m512i(int 32/64 vector), __m512(float 32 vector), __m512d(float 64 vector)

1.变量声明:__m512i   v1, v2, v3;

2.变量对齐:

windows:__declspec(align(n, off))

linux:__attribute__(align(n, off))

如:__attribute__(align(64))

permutevar函数:

 1 #include                                                                                                                                                     
  2 #include
  3 #include
  4 
  5 int main(void)
  6 {
  7     int a[16] = {1, 9, 2, 5, 2, 7, 8, 1, 10, 14, 15, 4, 3, 7, 12, 11};
  8     int b[16] = {3, 4, 6, 2, 5, 7, 9, 4, 13, 12, 7, 8, 15, 7, 2, 11};
  9     int c[16];
 10     int s[16] = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
 11     int c1[16];
 12     #pragma offload target(mic)in(a, b)
 13     {
 14         __m512i va, vb, result, source;
 15         __mmask16 mask, fmask;
 16         va = _mm512_load_epi32(a);
 17         vb = _mm512_load_epi32(b);
 18         source = _mm512_load_epi32(s);
 19 
 20         mask = _mm512_cmp_epi32_mask(va, vb, _MM_CMPINT_LT);
 21         printf("%o\n", mask);
 22         fmask = _mm512_knot(mask);
 23         printf("%o\n", fmask);
 24         result = _mm512_set1_epi32(0);
 25         result = _mm512_mask_permutevar_epi32(result, mask, va, source);
 26         _mm512_store_epi32(c1, result);
 27         result = _mm512_mask_permutevar_epi32(result, fmask, vb, source);
 28         
 29         _mm512_store_epi32(c, result);
 30     }
 31     for(int i = 0; i < 16; i++)
 32     {
 33         printf("%d ", c1[i]);
 34     }
 35     printf("\n");
 36     for(int i = 0; i < 16; i++)
 37         printf("%d ", c[i]);
 38     return 0;
 39 }
 40 //输出结果:
 41 //mask打印:14725
 42 //转换成二进制:1100111010101
 43 //fmask打印:163052
 44 //转换成二进制:1110011000101010
 45 //
 46 //也就是说在将a, b的值load到va,vb中时,是按照从右到左的顺序。
 ./cmp 
17 0 18 0 18 0 24 17 26 0 0 20 19 0 0 0 
14725
163052
17 20 18 18 18 23 24 17 26 28 23 20 19 23 18 27


 
  


你可能感兴趣的:(高性能计算)